xref: /petsc/src/mat/impls/maij/maij.c (revision 9b5a856f60f98cab05cfa6d8e9a84d5a165873e6)
1be1d678aSKris Buschelman #define PETSCMAT_DLL
2be1d678aSKris Buschelman 
382b1193eSBarry Smith /*
4cd3bbe55SBarry Smith     Defines the basic matrix operations for the MAIJ  matrix storage format.
5cd3bbe55SBarry Smith   This format is used for restriction and interpolation operations for
6cd3bbe55SBarry Smith   multicomponent problems. It interpolates each component the same way
7cd3bbe55SBarry Smith   independently.
8cd3bbe55SBarry Smith 
9cd3bbe55SBarry Smith      We provide:
10cd3bbe55SBarry Smith          MatMult()
11cd3bbe55SBarry Smith          MatMultTranspose()
12cd3bbe55SBarry Smith          MatMultTransposeAdd()
13cd3bbe55SBarry Smith          MatMultAdd()
14cd3bbe55SBarry Smith           and
15cd3bbe55SBarry Smith          MatCreateMAIJ(Mat,dof,Mat*)
16f4a53059SBarry Smith 
17f4a53059SBarry Smith      This single directory handles both the sequential and parallel codes
1882b1193eSBarry Smith */
1982b1193eSBarry Smith 
20be911618SKris Buschelman #include "src/mat/impls/maij/maij.h"
217ba1a0bfSKris Buschelman #include "src/mat/utils/freespace.h"
221d8d5f9aSSatish Balay #include "private/vecimpl.h"
2382b1193eSBarry Smith 
244a2ae208SSatish Balay #undef __FUNCT__
254a2ae208SSatish Balay #define __FUNCT__ "MatMAIJGetAIJ"
26be1d678aSKris Buschelman PetscErrorCode PETSCMAT_DLLEXPORT MatMAIJGetAIJ(Mat A,Mat *B)
27b9b97703SBarry Smith {
28dfbe8321SBarry Smith   PetscErrorCode ierr;
29b9b97703SBarry Smith   PetscTruth     ismpimaij,isseqmaij;
30b9b97703SBarry Smith 
31b9b97703SBarry Smith   PetscFunctionBegin;
32b9b97703SBarry Smith   ierr = PetscTypeCompare((PetscObject)A,MATMPIMAIJ,&ismpimaij);CHKERRQ(ierr);
33b9b97703SBarry Smith   ierr = PetscTypeCompare((PetscObject)A,MATSEQMAIJ,&isseqmaij);CHKERRQ(ierr);
34b9b97703SBarry Smith   if (ismpimaij) {
35b9b97703SBarry Smith     Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data;
36b9b97703SBarry Smith 
37b9b97703SBarry Smith     *B = b->A;
38b9b97703SBarry Smith   } else if (isseqmaij) {
39b9b97703SBarry Smith     Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
40b9b97703SBarry Smith 
41b9b97703SBarry Smith     *B = b->AIJ;
42b9b97703SBarry Smith   } else {
43b9b97703SBarry Smith     *B = A;
44b9b97703SBarry Smith   }
45b9b97703SBarry Smith   PetscFunctionReturn(0);
46b9b97703SBarry Smith }
47b9b97703SBarry Smith 
484a2ae208SSatish Balay #undef __FUNCT__
494a2ae208SSatish Balay #define __FUNCT__ "MatMAIJRedimension"
50be1d678aSKris Buschelman PetscErrorCode PETSCMAT_DLLEXPORT MatMAIJRedimension(Mat A,PetscInt dof,Mat *B)
51b9b97703SBarry Smith {
52dfbe8321SBarry Smith   PetscErrorCode ierr;
53b9b97703SBarry Smith   Mat            Aij;
54b9b97703SBarry Smith 
55b9b97703SBarry Smith   PetscFunctionBegin;
56b9b97703SBarry Smith   ierr = MatMAIJGetAIJ(A,&Aij);CHKERRQ(ierr);
57b9b97703SBarry Smith   ierr = MatCreateMAIJ(Aij,dof,B);CHKERRQ(ierr);
58b9b97703SBarry Smith   PetscFunctionReturn(0);
59b9b97703SBarry Smith }
60b9b97703SBarry Smith 
614a2ae208SSatish Balay #undef __FUNCT__
624a2ae208SSatish Balay #define __FUNCT__ "MatDestroy_SeqMAIJ"
63dfbe8321SBarry Smith PetscErrorCode MatDestroy_SeqMAIJ(Mat A)
6482b1193eSBarry Smith {
65dfbe8321SBarry Smith   PetscErrorCode ierr;
664cb9d9b8SBarry Smith   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
6782b1193eSBarry Smith 
6882b1193eSBarry Smith   PetscFunctionBegin;
69cd3bbe55SBarry Smith   if (b->AIJ) {
70cd3bbe55SBarry Smith     ierr = MatDestroy(b->AIJ);CHKERRQ(ierr);
7182b1193eSBarry Smith   }
724cb9d9b8SBarry Smith   ierr = PetscFree(b);CHKERRQ(ierr);
734cb9d9b8SBarry Smith   PetscFunctionReturn(0);
744cb9d9b8SBarry Smith }
754cb9d9b8SBarry Smith 
764a2ae208SSatish Balay #undef __FUNCT__
770fd73130SBarry Smith #define __FUNCT__ "MatView_SeqMAIJ"
780fd73130SBarry Smith PetscErrorCode MatView_SeqMAIJ(Mat A,PetscViewer viewer)
790fd73130SBarry Smith {
800fd73130SBarry Smith   PetscErrorCode ierr;
810fd73130SBarry Smith   Mat            B;
820fd73130SBarry Smith 
830fd73130SBarry Smith   PetscFunctionBegin;
84ceb03754SKris Buschelman   ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);
850fd73130SBarry Smith   ierr = MatView(B,viewer);CHKERRQ(ierr);
860fd73130SBarry Smith   ierr = MatDestroy(B);CHKERRQ(ierr);
870fd73130SBarry Smith   PetscFunctionReturn(0);
880fd73130SBarry Smith }
890fd73130SBarry Smith 
900fd73130SBarry Smith #undef __FUNCT__
910fd73130SBarry Smith #define __FUNCT__ "MatView_MPIMAIJ"
920fd73130SBarry Smith PetscErrorCode MatView_MPIMAIJ(Mat A,PetscViewer viewer)
930fd73130SBarry Smith {
940fd73130SBarry Smith   PetscErrorCode ierr;
950fd73130SBarry Smith   Mat            B;
960fd73130SBarry Smith 
970fd73130SBarry Smith   PetscFunctionBegin;
98ceb03754SKris Buschelman   ierr = MatConvert(A,MATMPIAIJ,MAT_INITIAL_MATRIX,&B);
990fd73130SBarry Smith   ierr = MatView(B,viewer);CHKERRQ(ierr);
1000fd73130SBarry Smith   ierr = MatDestroy(B);CHKERRQ(ierr);
1010fd73130SBarry Smith   PetscFunctionReturn(0);
1020fd73130SBarry Smith }
1030fd73130SBarry Smith 
1040fd73130SBarry Smith #undef __FUNCT__
1054a2ae208SSatish Balay #define __FUNCT__ "MatDestroy_MPIMAIJ"
106dfbe8321SBarry Smith PetscErrorCode MatDestroy_MPIMAIJ(Mat A)
1074cb9d9b8SBarry Smith {
108dfbe8321SBarry Smith   PetscErrorCode ierr;
1094cb9d9b8SBarry Smith   Mat_MPIMAIJ    *b = (Mat_MPIMAIJ*)A->data;
1104cb9d9b8SBarry Smith 
1114cb9d9b8SBarry Smith   PetscFunctionBegin;
1124cb9d9b8SBarry Smith   if (b->AIJ) {
1134cb9d9b8SBarry Smith     ierr = MatDestroy(b->AIJ);CHKERRQ(ierr);
1144cb9d9b8SBarry Smith   }
115f4a53059SBarry Smith   if (b->OAIJ) {
116f4a53059SBarry Smith     ierr = MatDestroy(b->OAIJ);CHKERRQ(ierr);
117f4a53059SBarry Smith   }
118b9b97703SBarry Smith   if (b->A) {
119b9b97703SBarry Smith     ierr = MatDestroy(b->A);CHKERRQ(ierr);
120b9b97703SBarry Smith   }
121f4a53059SBarry Smith   if (b->ctx) {
122f4a53059SBarry Smith     ierr = VecScatterDestroy(b->ctx);CHKERRQ(ierr);
123f4a53059SBarry Smith   }
124f4a53059SBarry Smith   if (b->w) {
125f4a53059SBarry Smith     ierr = VecDestroy(b->w);CHKERRQ(ierr);
126f4a53059SBarry Smith   }
127cd3bbe55SBarry Smith   ierr = PetscFree(b);CHKERRQ(ierr);
128b9b97703SBarry Smith   PetscFunctionReturn(0);
129b9b97703SBarry Smith }
130b9b97703SBarry Smith 
1310bad9183SKris Buschelman /*MC
132fafad747SKris Buschelman   MATMAIJ - MATMAIJ = "maij" - A matrix type to be used for restriction and interpolation operations for
1330bad9183SKris Buschelman   multicomponent problems, interpolating or restricting each component the same way independently.
1340bad9183SKris Buschelman   The matrix type is based on MATSEQAIJ for sequential matrices, and MATMPIAIJ for distributed matrices.
1350bad9183SKris Buschelman 
1360bad9183SKris Buschelman   Operations provided:
1370bad9183SKris Buschelman . MatMult
1380bad9183SKris Buschelman . MatMultTranspose
1390bad9183SKris Buschelman . MatMultAdd
1400bad9183SKris Buschelman . MatMultTransposeAdd
1410bad9183SKris Buschelman 
1420bad9183SKris Buschelman   Level: advanced
1430bad9183SKris Buschelman 
1440bad9183SKris Buschelman .seealso: MatCreateSeqDense
1450bad9183SKris Buschelman M*/
1460bad9183SKris Buschelman 
14782b1193eSBarry Smith EXTERN_C_BEGIN
1484a2ae208SSatish Balay #undef __FUNCT__
1494a2ae208SSatish Balay #define __FUNCT__ "MatCreate_MAIJ"
150be1d678aSKris Buschelman PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_MAIJ(Mat A)
15182b1193eSBarry Smith {
152dfbe8321SBarry Smith   PetscErrorCode ierr;
1534cb9d9b8SBarry Smith   Mat_MPIMAIJ    *b;
15482b1193eSBarry Smith 
15582b1193eSBarry Smith   PetscFunctionBegin;
156b0a32e0cSBarry Smith   ierr     = PetscNew(Mat_MPIMAIJ,&b);CHKERRQ(ierr);
157b0a32e0cSBarry Smith   A->data  = (void*)b;
158cd3bbe55SBarry Smith   ierr = PetscMemzero(A->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
159cd3bbe55SBarry Smith   A->factor           = 0;
160cd3bbe55SBarry Smith   A->mapping          = 0;
161f4a53059SBarry Smith 
162cd3bbe55SBarry Smith   b->AIJ  = 0;
163cd3bbe55SBarry Smith   b->dof  = 0;
164f4a53059SBarry Smith   b->OAIJ = 0;
165f4a53059SBarry Smith   b->ctx  = 0;
166f4a53059SBarry Smith   b->w    = 0;
16782b1193eSBarry Smith   PetscFunctionReturn(0);
16882b1193eSBarry Smith }
16982b1193eSBarry Smith EXTERN_C_END
17082b1193eSBarry Smith 
171cd3bbe55SBarry Smith /* --------------------------------------------------------------------------------------*/
1724a2ae208SSatish Balay #undef __FUNCT__
173b9617806SBarry Smith #define __FUNCT__ "MatMult_SeqMAIJ_2"
174dfbe8321SBarry Smith PetscErrorCode MatMult_SeqMAIJ_2(Mat A,Vec xx,Vec yy)
17582b1193eSBarry Smith {
1764cb9d9b8SBarry Smith   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
177bcc973b7SBarry Smith   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
17887828ca2SBarry Smith   PetscScalar    *x,*y,*v,sum1, sum2;
179dfbe8321SBarry Smith   PetscErrorCode ierr;
180899cda47SBarry Smith   PetscInt       m = b->AIJ->rmap.n,*idx,*ii;
181b24ad042SBarry Smith   PetscInt       n,i,jrow,j;
18282b1193eSBarry Smith 
183bcc973b7SBarry Smith   PetscFunctionBegin;
1841ebc52fbSHong Zhang   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1851ebc52fbSHong Zhang   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
186bcc973b7SBarry Smith   idx  = a->j;
187bcc973b7SBarry Smith   v    = a->a;
188bcc973b7SBarry Smith   ii   = a->i;
189bcc973b7SBarry Smith 
190bcc973b7SBarry Smith   for (i=0; i<m; i++) {
191bcc973b7SBarry Smith     jrow = ii[i];
192bcc973b7SBarry Smith     n    = ii[i+1] - jrow;
193bcc973b7SBarry Smith     sum1  = 0.0;
194bcc973b7SBarry Smith     sum2  = 0.0;
195bcc973b7SBarry Smith     for (j=0; j<n; j++) {
196bcc973b7SBarry Smith       sum1 += v[jrow]*x[2*idx[jrow]];
197bcc973b7SBarry Smith       sum2 += v[jrow]*x[2*idx[jrow]+1];
198bcc973b7SBarry Smith       jrow++;
199bcc973b7SBarry Smith      }
200bcc973b7SBarry Smith     y[2*i]   = sum1;
201bcc973b7SBarry Smith     y[2*i+1] = sum2;
202bcc973b7SBarry Smith   }
203bcc973b7SBarry Smith 
204efee365bSSatish Balay   ierr = PetscLogFlops(4*a->nz - 2*m);CHKERRQ(ierr);
2051ebc52fbSHong Zhang   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
2061ebc52fbSHong Zhang   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
20782b1193eSBarry Smith   PetscFunctionReturn(0);
20882b1193eSBarry Smith }
209bcc973b7SBarry Smith 
2104a2ae208SSatish Balay #undef __FUNCT__
211b9617806SBarry Smith #define __FUNCT__ "MatMultTranspose_SeqMAIJ_2"
212dfbe8321SBarry Smith PetscErrorCode MatMultTranspose_SeqMAIJ_2(Mat A,Vec xx,Vec yy)
21382b1193eSBarry Smith {
2144cb9d9b8SBarry Smith   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
215bcc973b7SBarry Smith   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
21687828ca2SBarry Smith   PetscScalar    *x,*y,*v,alpha1,alpha2,zero = 0.0;
217dfbe8321SBarry Smith   PetscErrorCode ierr;
218899cda47SBarry Smith   PetscInt       m = b->AIJ->rmap.n,n,i,*idx;
21982b1193eSBarry Smith 
220bcc973b7SBarry Smith   PetscFunctionBegin;
2212dcb1b2aSMatthew Knepley   ierr = VecSet(yy,zero);CHKERRQ(ierr);
2221ebc52fbSHong Zhang   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
2231ebc52fbSHong Zhang   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
224bfec09a0SHong Zhang 
225bcc973b7SBarry Smith   for (i=0; i<m; i++) {
226bfec09a0SHong Zhang     idx    = a->j + a->i[i] ;
227bfec09a0SHong Zhang     v      = a->a + a->i[i] ;
228bcc973b7SBarry Smith     n      = a->i[i+1] - a->i[i];
229bcc973b7SBarry Smith     alpha1 = x[2*i];
230bcc973b7SBarry Smith     alpha2 = x[2*i+1];
231bcc973b7SBarry Smith     while (n-->0) {y[2*(*idx)] += alpha1*(*v); y[2*(*idx)+1] += alpha2*(*v); idx++; v++;}
232bcc973b7SBarry Smith   }
233899cda47SBarry Smith   ierr = PetscLogFlops(4*a->nz - 2*b->AIJ->cmap.n);CHKERRQ(ierr);
2341ebc52fbSHong Zhang   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
2351ebc52fbSHong Zhang   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
23682b1193eSBarry Smith   PetscFunctionReturn(0);
23782b1193eSBarry Smith }
238bcc973b7SBarry Smith 
2394a2ae208SSatish Balay #undef __FUNCT__
240b9617806SBarry Smith #define __FUNCT__ "MatMultAdd_SeqMAIJ_2"
241dfbe8321SBarry Smith PetscErrorCode MatMultAdd_SeqMAIJ_2(Mat A,Vec xx,Vec yy,Vec zz)
24282b1193eSBarry Smith {
2434cb9d9b8SBarry Smith   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
244bcc973b7SBarry Smith   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
24587828ca2SBarry Smith   PetscScalar    *x,*y,*v,sum1, sum2;
246dfbe8321SBarry Smith   PetscErrorCode ierr;
247899cda47SBarry Smith   PetscInt       m = b->AIJ->rmap.n,*idx,*ii;
248b24ad042SBarry Smith   PetscInt       n,i,jrow,j;
24982b1193eSBarry Smith 
250bcc973b7SBarry Smith   PetscFunctionBegin;
251f9fae5adSBarry Smith   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
2521ebc52fbSHong Zhang   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
2531ebc52fbSHong Zhang   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
254bcc973b7SBarry Smith   idx  = a->j;
255bcc973b7SBarry Smith   v    = a->a;
256bcc973b7SBarry Smith   ii   = a->i;
257bcc973b7SBarry Smith 
258bcc973b7SBarry Smith   for (i=0; i<m; i++) {
259bcc973b7SBarry Smith     jrow = ii[i];
260bcc973b7SBarry Smith     n    = ii[i+1] - jrow;
261bcc973b7SBarry Smith     sum1  = 0.0;
262bcc973b7SBarry Smith     sum2  = 0.0;
263bcc973b7SBarry Smith     for (j=0; j<n; j++) {
264bcc973b7SBarry Smith       sum1 += v[jrow]*x[2*idx[jrow]];
265bcc973b7SBarry Smith       sum2 += v[jrow]*x[2*idx[jrow]+1];
266bcc973b7SBarry Smith       jrow++;
267bcc973b7SBarry Smith      }
268bcc973b7SBarry Smith     y[2*i]   += sum1;
269bcc973b7SBarry Smith     y[2*i+1] += sum2;
270bcc973b7SBarry Smith   }
271bcc973b7SBarry Smith 
272efee365bSSatish Balay   ierr = PetscLogFlops(4*a->nz - 2*m);CHKERRQ(ierr);
2731ebc52fbSHong Zhang   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
2741ebc52fbSHong Zhang   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
275bcc973b7SBarry Smith   PetscFunctionReturn(0);
27682b1193eSBarry Smith }
2774a2ae208SSatish Balay #undef __FUNCT__
278b9617806SBarry Smith #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_2"
279dfbe8321SBarry Smith PetscErrorCode MatMultTransposeAdd_SeqMAIJ_2(Mat A,Vec xx,Vec yy,Vec zz)
28082b1193eSBarry Smith {
2814cb9d9b8SBarry Smith   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
282bcc973b7SBarry Smith   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
28387828ca2SBarry Smith   PetscScalar    *x,*y,*v,alpha1,alpha2;
284dfbe8321SBarry Smith   PetscErrorCode ierr;
285899cda47SBarry Smith   PetscInt       m = b->AIJ->rmap.n,n,i,*idx;
28682b1193eSBarry Smith 
287bcc973b7SBarry Smith   PetscFunctionBegin;
288f9fae5adSBarry Smith   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
2891ebc52fbSHong Zhang   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
2901ebc52fbSHong Zhang   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
291bfec09a0SHong Zhang 
292bcc973b7SBarry Smith   for (i=0; i<m; i++) {
293bfec09a0SHong Zhang     idx   = a->j + a->i[i] ;
294bfec09a0SHong Zhang     v     = a->a + a->i[i] ;
295bcc973b7SBarry Smith     n     = a->i[i+1] - a->i[i];
296bcc973b7SBarry Smith     alpha1 = x[2*i];
297bcc973b7SBarry Smith     alpha2 = x[2*i+1];
298bcc973b7SBarry Smith     while (n-->0) {y[2*(*idx)] += alpha1*(*v); y[2*(*idx)+1] += alpha2*(*v); idx++; v++;}
299bcc973b7SBarry Smith   }
300899cda47SBarry Smith   ierr = PetscLogFlops(4*a->nz - 2*b->AIJ->cmap.n);CHKERRQ(ierr);
3011ebc52fbSHong Zhang   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
3021ebc52fbSHong Zhang   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
303bcc973b7SBarry Smith   PetscFunctionReturn(0);
30482b1193eSBarry Smith }
305cd3bbe55SBarry Smith /* --------------------------------------------------------------------------------------*/
3064a2ae208SSatish Balay #undef __FUNCT__
307b9617806SBarry Smith #define __FUNCT__ "MatMult_SeqMAIJ_3"
308dfbe8321SBarry Smith PetscErrorCode MatMult_SeqMAIJ_3(Mat A,Vec xx,Vec yy)
309bcc973b7SBarry Smith {
3104cb9d9b8SBarry Smith   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
311bcc973b7SBarry Smith   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
31287828ca2SBarry Smith   PetscScalar    *x,*y,*v,sum1, sum2, sum3;
313dfbe8321SBarry Smith   PetscErrorCode ierr;
314899cda47SBarry Smith   PetscInt       m = b->AIJ->rmap.n,*idx,*ii;
315b24ad042SBarry Smith   PetscInt       n,i,jrow,j;
31682b1193eSBarry Smith 
317bcc973b7SBarry Smith   PetscFunctionBegin;
3181ebc52fbSHong Zhang   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
3191ebc52fbSHong Zhang   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
320bcc973b7SBarry Smith   idx  = a->j;
321bcc973b7SBarry Smith   v    = a->a;
322bcc973b7SBarry Smith   ii   = a->i;
323bcc973b7SBarry Smith 
324bcc973b7SBarry Smith   for (i=0; i<m; i++) {
325bcc973b7SBarry Smith     jrow = ii[i];
326bcc973b7SBarry Smith     n    = ii[i+1] - jrow;
327bcc973b7SBarry Smith     sum1  = 0.0;
328bcc973b7SBarry Smith     sum2  = 0.0;
329bcc973b7SBarry Smith     sum3  = 0.0;
330bcc973b7SBarry Smith     for (j=0; j<n; j++) {
331bcc973b7SBarry Smith       sum1 += v[jrow]*x[3*idx[jrow]];
332bcc973b7SBarry Smith       sum2 += v[jrow]*x[3*idx[jrow]+1];
333bcc973b7SBarry Smith       sum3 += v[jrow]*x[3*idx[jrow]+2];
334bcc973b7SBarry Smith       jrow++;
335bcc973b7SBarry Smith      }
336bcc973b7SBarry Smith     y[3*i]   = sum1;
337bcc973b7SBarry Smith     y[3*i+1] = sum2;
338bcc973b7SBarry Smith     y[3*i+2] = sum3;
339bcc973b7SBarry Smith   }
340bcc973b7SBarry Smith 
341efee365bSSatish Balay   ierr = PetscLogFlops(6*a->nz - 3*m);CHKERRQ(ierr);
3421ebc52fbSHong Zhang   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
3431ebc52fbSHong Zhang   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
344bcc973b7SBarry Smith   PetscFunctionReturn(0);
345bcc973b7SBarry Smith }
346bcc973b7SBarry Smith 
3474a2ae208SSatish Balay #undef __FUNCT__
348b9617806SBarry Smith #define __FUNCT__ "MatMultTranspose_SeqMAIJ_3"
349dfbe8321SBarry Smith PetscErrorCode MatMultTranspose_SeqMAIJ_3(Mat A,Vec xx,Vec yy)
350bcc973b7SBarry Smith {
3514cb9d9b8SBarry Smith   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
352bcc973b7SBarry Smith   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
35387828ca2SBarry Smith   PetscScalar    *x,*y,*v,alpha1,alpha2,alpha3,zero = 0.0;
354dfbe8321SBarry Smith   PetscErrorCode ierr;
355899cda47SBarry Smith   PetscInt       m = b->AIJ->rmap.n,n,i,*idx;
356bcc973b7SBarry Smith 
357bcc973b7SBarry Smith   PetscFunctionBegin;
3582dcb1b2aSMatthew Knepley   ierr = VecSet(yy,zero);CHKERRQ(ierr);
3591ebc52fbSHong Zhang   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
3601ebc52fbSHong Zhang   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
361bfec09a0SHong Zhang 
362bcc973b7SBarry Smith   for (i=0; i<m; i++) {
363bfec09a0SHong Zhang     idx    = a->j + a->i[i];
364bfec09a0SHong Zhang     v      = a->a + a->i[i];
365bcc973b7SBarry Smith     n      = a->i[i+1] - a->i[i];
366bcc973b7SBarry Smith     alpha1 = x[3*i];
367bcc973b7SBarry Smith     alpha2 = x[3*i+1];
368bcc973b7SBarry Smith     alpha3 = x[3*i+2];
369bcc973b7SBarry Smith     while (n-->0) {
370bcc973b7SBarry Smith       y[3*(*idx)]   += alpha1*(*v);
371bcc973b7SBarry Smith       y[3*(*idx)+1] += alpha2*(*v);
372bcc973b7SBarry Smith       y[3*(*idx)+2] += alpha3*(*v);
373bcc973b7SBarry Smith       idx++; v++;
374bcc973b7SBarry Smith     }
375bcc973b7SBarry Smith   }
376899cda47SBarry Smith   ierr = PetscLogFlops(6*a->nz - 3*b->AIJ->cmap.n);CHKERRQ(ierr);
3771ebc52fbSHong Zhang   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
3781ebc52fbSHong Zhang   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
379bcc973b7SBarry Smith   PetscFunctionReturn(0);
380bcc973b7SBarry Smith }
381bcc973b7SBarry Smith 
3824a2ae208SSatish Balay #undef __FUNCT__
383b9617806SBarry Smith #define __FUNCT__ "MatMultAdd_SeqMAIJ_3"
384dfbe8321SBarry Smith PetscErrorCode MatMultAdd_SeqMAIJ_3(Mat A,Vec xx,Vec yy,Vec zz)
385bcc973b7SBarry Smith {
3864cb9d9b8SBarry Smith   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
387bcc973b7SBarry Smith   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
38887828ca2SBarry Smith   PetscScalar    *x,*y,*v,sum1, sum2, sum3;
389dfbe8321SBarry Smith   PetscErrorCode ierr;
390899cda47SBarry Smith   PetscInt       m = b->AIJ->rmap.n,*idx,*ii;
391b24ad042SBarry Smith   PetscInt       n,i,jrow,j;
392bcc973b7SBarry Smith 
393bcc973b7SBarry Smith   PetscFunctionBegin;
394f9fae5adSBarry Smith   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
3951ebc52fbSHong Zhang   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
3961ebc52fbSHong Zhang   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
397bcc973b7SBarry Smith   idx  = a->j;
398bcc973b7SBarry Smith   v    = a->a;
399bcc973b7SBarry Smith   ii   = a->i;
400bcc973b7SBarry Smith 
401bcc973b7SBarry Smith   for (i=0; i<m; i++) {
402bcc973b7SBarry Smith     jrow = ii[i];
403bcc973b7SBarry Smith     n    = ii[i+1] - jrow;
404bcc973b7SBarry Smith     sum1  = 0.0;
405bcc973b7SBarry Smith     sum2  = 0.0;
406bcc973b7SBarry Smith     sum3  = 0.0;
407bcc973b7SBarry Smith     for (j=0; j<n; j++) {
408bcc973b7SBarry Smith       sum1 += v[jrow]*x[3*idx[jrow]];
409bcc973b7SBarry Smith       sum2 += v[jrow]*x[3*idx[jrow]+1];
410bcc973b7SBarry Smith       sum3 += v[jrow]*x[3*idx[jrow]+2];
411bcc973b7SBarry Smith       jrow++;
412bcc973b7SBarry Smith      }
413bcc973b7SBarry Smith     y[3*i]   += sum1;
414bcc973b7SBarry Smith     y[3*i+1] += sum2;
415bcc973b7SBarry Smith     y[3*i+2] += sum3;
416bcc973b7SBarry Smith   }
417bcc973b7SBarry Smith 
418efee365bSSatish Balay   ierr = PetscLogFlops(6*a->nz);CHKERRQ(ierr);
4191ebc52fbSHong Zhang   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
4201ebc52fbSHong Zhang   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
421bcc973b7SBarry Smith   PetscFunctionReturn(0);
422bcc973b7SBarry Smith }
4234a2ae208SSatish Balay #undef __FUNCT__
424b9617806SBarry Smith #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_3"
425dfbe8321SBarry Smith PetscErrorCode MatMultTransposeAdd_SeqMAIJ_3(Mat A,Vec xx,Vec yy,Vec zz)
426bcc973b7SBarry Smith {
4274cb9d9b8SBarry Smith   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
428bcc973b7SBarry Smith   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
42987828ca2SBarry Smith   PetscScalar    *x,*y,*v,alpha1,alpha2,alpha3;
430dfbe8321SBarry Smith   PetscErrorCode ierr;
431899cda47SBarry Smith   PetscInt       m = b->AIJ->rmap.n,n,i,*idx;
432bcc973b7SBarry Smith 
433bcc973b7SBarry Smith   PetscFunctionBegin;
434f9fae5adSBarry Smith   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
4351ebc52fbSHong Zhang   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
4361ebc52fbSHong Zhang   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
437bcc973b7SBarry Smith   for (i=0; i<m; i++) {
438bfec09a0SHong Zhang     idx    = a->j + a->i[i] ;
439bfec09a0SHong Zhang     v      = a->a + a->i[i] ;
440bcc973b7SBarry Smith     n      = a->i[i+1] - a->i[i];
441bcc973b7SBarry Smith     alpha1 = x[3*i];
442bcc973b7SBarry Smith     alpha2 = x[3*i+1];
443bcc973b7SBarry Smith     alpha3 = x[3*i+2];
444bcc973b7SBarry Smith     while (n-->0) {
445bcc973b7SBarry Smith       y[3*(*idx)]   += alpha1*(*v);
446bcc973b7SBarry Smith       y[3*(*idx)+1] += alpha2*(*v);
447bcc973b7SBarry Smith       y[3*(*idx)+2] += alpha3*(*v);
448bcc973b7SBarry Smith       idx++; v++;
449bcc973b7SBarry Smith     }
450bcc973b7SBarry Smith   }
451efee365bSSatish Balay   ierr = PetscLogFlops(6*a->nz);CHKERRQ(ierr);
4521ebc52fbSHong Zhang   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
4531ebc52fbSHong Zhang   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
454bcc973b7SBarry Smith   PetscFunctionReturn(0);
455bcc973b7SBarry Smith }
456bcc973b7SBarry Smith 
457bcc973b7SBarry Smith /* ------------------------------------------------------------------------------*/
4584a2ae208SSatish Balay #undef __FUNCT__
459b9617806SBarry Smith #define __FUNCT__ "MatMult_SeqMAIJ_4"
460dfbe8321SBarry Smith PetscErrorCode MatMult_SeqMAIJ_4(Mat A,Vec xx,Vec yy)
461bcc973b7SBarry Smith {
4624cb9d9b8SBarry Smith   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
463bcc973b7SBarry Smith   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
46487828ca2SBarry Smith   PetscScalar    *x,*y,*v,sum1, sum2, sum3, sum4;
465dfbe8321SBarry Smith   PetscErrorCode ierr;
466899cda47SBarry Smith   PetscInt       m = b->AIJ->rmap.n,*idx,*ii;
467b24ad042SBarry Smith   PetscInt       n,i,jrow,j;
468bcc973b7SBarry Smith 
469bcc973b7SBarry Smith   PetscFunctionBegin;
4701ebc52fbSHong Zhang   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
4711ebc52fbSHong Zhang   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
472bcc973b7SBarry Smith   idx  = a->j;
473bcc973b7SBarry Smith   v    = a->a;
474bcc973b7SBarry Smith   ii   = a->i;
475bcc973b7SBarry Smith 
476bcc973b7SBarry Smith   for (i=0; i<m; i++) {
477bcc973b7SBarry Smith     jrow = ii[i];
478bcc973b7SBarry Smith     n    = ii[i+1] - jrow;
479bcc973b7SBarry Smith     sum1  = 0.0;
480bcc973b7SBarry Smith     sum2  = 0.0;
481bcc973b7SBarry Smith     sum3  = 0.0;
482bcc973b7SBarry Smith     sum4  = 0.0;
483bcc973b7SBarry Smith     for (j=0; j<n; j++) {
484bcc973b7SBarry Smith       sum1 += v[jrow]*x[4*idx[jrow]];
485bcc973b7SBarry Smith       sum2 += v[jrow]*x[4*idx[jrow]+1];
486bcc973b7SBarry Smith       sum3 += v[jrow]*x[4*idx[jrow]+2];
487bcc973b7SBarry Smith       sum4 += v[jrow]*x[4*idx[jrow]+3];
488bcc973b7SBarry Smith       jrow++;
489bcc973b7SBarry Smith      }
490bcc973b7SBarry Smith     y[4*i]   = sum1;
491bcc973b7SBarry Smith     y[4*i+1] = sum2;
492bcc973b7SBarry Smith     y[4*i+2] = sum3;
493bcc973b7SBarry Smith     y[4*i+3] = sum4;
494bcc973b7SBarry Smith   }
495bcc973b7SBarry Smith 
496efee365bSSatish Balay   ierr = PetscLogFlops(8*a->nz - 4*m);CHKERRQ(ierr);
4971ebc52fbSHong Zhang   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
4981ebc52fbSHong Zhang   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
499bcc973b7SBarry Smith   PetscFunctionReturn(0);
500bcc973b7SBarry Smith }
501bcc973b7SBarry Smith 
5024a2ae208SSatish Balay #undef __FUNCT__
503b9617806SBarry Smith #define __FUNCT__ "MatMultTranspose_SeqMAIJ_4"
504dfbe8321SBarry Smith PetscErrorCode MatMultTranspose_SeqMAIJ_4(Mat A,Vec xx,Vec yy)
505bcc973b7SBarry Smith {
5064cb9d9b8SBarry Smith   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
507bcc973b7SBarry Smith   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
50887828ca2SBarry Smith   PetscScalar    *x,*y,*v,alpha1,alpha2,alpha3,alpha4,zero = 0.0;
509dfbe8321SBarry Smith   PetscErrorCode ierr;
510899cda47SBarry Smith   PetscInt       m = b->AIJ->rmap.n,n,i,*idx;
511bcc973b7SBarry Smith 
512bcc973b7SBarry Smith   PetscFunctionBegin;
5132dcb1b2aSMatthew Knepley   ierr = VecSet(yy,zero);CHKERRQ(ierr);
5141ebc52fbSHong Zhang   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
5151ebc52fbSHong Zhang   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
516bcc973b7SBarry Smith   for (i=0; i<m; i++) {
517bfec09a0SHong Zhang     idx    = a->j + a->i[i] ;
518bfec09a0SHong Zhang     v      = a->a + a->i[i] ;
519bcc973b7SBarry Smith     n      = a->i[i+1] - a->i[i];
520bcc973b7SBarry Smith     alpha1 = x[4*i];
521bcc973b7SBarry Smith     alpha2 = x[4*i+1];
522bcc973b7SBarry Smith     alpha3 = x[4*i+2];
523bcc973b7SBarry Smith     alpha4 = x[4*i+3];
524bcc973b7SBarry Smith     while (n-->0) {
525bcc973b7SBarry Smith       y[4*(*idx)]   += alpha1*(*v);
526bcc973b7SBarry Smith       y[4*(*idx)+1] += alpha2*(*v);
527bcc973b7SBarry Smith       y[4*(*idx)+2] += alpha3*(*v);
528bcc973b7SBarry Smith       y[4*(*idx)+3] += alpha4*(*v);
529bcc973b7SBarry Smith       idx++; v++;
530bcc973b7SBarry Smith     }
531bcc973b7SBarry Smith   }
532899cda47SBarry Smith   ierr = PetscLogFlops(8*a->nz - 4*b->AIJ->cmap.n);CHKERRQ(ierr);
5331ebc52fbSHong Zhang   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
5341ebc52fbSHong Zhang   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
535bcc973b7SBarry Smith   PetscFunctionReturn(0);
536bcc973b7SBarry Smith }
537bcc973b7SBarry Smith 
5384a2ae208SSatish Balay #undef __FUNCT__
539b9617806SBarry Smith #define __FUNCT__ "MatMultAdd_SeqMAIJ_4"
540dfbe8321SBarry Smith PetscErrorCode MatMultAdd_SeqMAIJ_4(Mat A,Vec xx,Vec yy,Vec zz)
541bcc973b7SBarry Smith {
5424cb9d9b8SBarry Smith   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
543f9fae5adSBarry Smith   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
54487828ca2SBarry Smith   PetscScalar    *x,*y,*v,sum1, sum2, sum3, sum4;
545dfbe8321SBarry Smith   PetscErrorCode ierr;
546899cda47SBarry Smith   PetscInt       m = b->AIJ->rmap.n,*idx,*ii;
547b24ad042SBarry Smith   PetscInt       n,i,jrow,j;
548f9fae5adSBarry Smith 
549f9fae5adSBarry Smith   PetscFunctionBegin;
550f9fae5adSBarry Smith   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
5511ebc52fbSHong Zhang   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
5521ebc52fbSHong Zhang   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
553f9fae5adSBarry Smith   idx  = a->j;
554f9fae5adSBarry Smith   v    = a->a;
555f9fae5adSBarry Smith   ii   = a->i;
556f9fae5adSBarry Smith 
557f9fae5adSBarry Smith   for (i=0; i<m; i++) {
558f9fae5adSBarry Smith     jrow = ii[i];
559f9fae5adSBarry Smith     n    = ii[i+1] - jrow;
560f9fae5adSBarry Smith     sum1  = 0.0;
561f9fae5adSBarry Smith     sum2  = 0.0;
562f9fae5adSBarry Smith     sum3  = 0.0;
563f9fae5adSBarry Smith     sum4  = 0.0;
564f9fae5adSBarry Smith     for (j=0; j<n; j++) {
565f9fae5adSBarry Smith       sum1 += v[jrow]*x[4*idx[jrow]];
566f9fae5adSBarry Smith       sum2 += v[jrow]*x[4*idx[jrow]+1];
567f9fae5adSBarry Smith       sum3 += v[jrow]*x[4*idx[jrow]+2];
568f9fae5adSBarry Smith       sum4 += v[jrow]*x[4*idx[jrow]+3];
569f9fae5adSBarry Smith       jrow++;
570f9fae5adSBarry Smith      }
571f9fae5adSBarry Smith     y[4*i]   += sum1;
572f9fae5adSBarry Smith     y[4*i+1] += sum2;
573f9fae5adSBarry Smith     y[4*i+2] += sum3;
574f9fae5adSBarry Smith     y[4*i+3] += sum4;
575f9fae5adSBarry Smith   }
576f9fae5adSBarry Smith 
577efee365bSSatish Balay   ierr = PetscLogFlops(8*a->nz - 4*m);CHKERRQ(ierr);
5781ebc52fbSHong Zhang   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
5791ebc52fbSHong Zhang   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
580f9fae5adSBarry Smith   PetscFunctionReturn(0);
581bcc973b7SBarry Smith }
5824a2ae208SSatish Balay #undef __FUNCT__
583b9617806SBarry Smith #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_4"
584dfbe8321SBarry Smith PetscErrorCode MatMultTransposeAdd_SeqMAIJ_4(Mat A,Vec xx,Vec yy,Vec zz)
585bcc973b7SBarry Smith {
5864cb9d9b8SBarry Smith   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
587f9fae5adSBarry Smith   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
58887828ca2SBarry Smith   PetscScalar    *x,*y,*v,alpha1,alpha2,alpha3,alpha4;
589dfbe8321SBarry Smith   PetscErrorCode ierr;
590899cda47SBarry Smith   PetscInt       m = b->AIJ->rmap.n,n,i,*idx;
591f9fae5adSBarry Smith 
592f9fae5adSBarry Smith   PetscFunctionBegin;
593f9fae5adSBarry Smith   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
5941ebc52fbSHong Zhang   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
5951ebc52fbSHong Zhang   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
596bfec09a0SHong Zhang 
597f9fae5adSBarry Smith   for (i=0; i<m; i++) {
598bfec09a0SHong Zhang     idx    = a->j + a->i[i] ;
599bfec09a0SHong Zhang     v      = a->a + a->i[i] ;
600f9fae5adSBarry Smith     n      = a->i[i+1] - a->i[i];
601f9fae5adSBarry Smith     alpha1 = x[4*i];
602f9fae5adSBarry Smith     alpha2 = x[4*i+1];
603f9fae5adSBarry Smith     alpha3 = x[4*i+2];
604f9fae5adSBarry Smith     alpha4 = x[4*i+3];
605f9fae5adSBarry Smith     while (n-->0) {
606f9fae5adSBarry Smith       y[4*(*idx)]   += alpha1*(*v);
607f9fae5adSBarry Smith       y[4*(*idx)+1] += alpha2*(*v);
608f9fae5adSBarry Smith       y[4*(*idx)+2] += alpha3*(*v);
609f9fae5adSBarry Smith       y[4*(*idx)+3] += alpha4*(*v);
610f9fae5adSBarry Smith       idx++; v++;
611f9fae5adSBarry Smith     }
612f9fae5adSBarry Smith   }
613899cda47SBarry Smith   ierr = PetscLogFlops(8*a->nz - 4*b->AIJ->cmap.n);CHKERRQ(ierr);
6141ebc52fbSHong Zhang   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
6151ebc52fbSHong Zhang   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
616f9fae5adSBarry Smith   PetscFunctionReturn(0);
617f9fae5adSBarry Smith }
618f9fae5adSBarry Smith /* ------------------------------------------------------------------------------*/
6196cd98798SBarry Smith 
6204a2ae208SSatish Balay #undef __FUNCT__
621b9617806SBarry Smith #define __FUNCT__ "MatMult_SeqMAIJ_5"
622dfbe8321SBarry Smith PetscErrorCode MatMult_SeqMAIJ_5(Mat A,Vec xx,Vec yy)
623f9fae5adSBarry Smith {
6244cb9d9b8SBarry Smith   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
625f9fae5adSBarry Smith   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
62687828ca2SBarry Smith   PetscScalar    *x,*y,*v,sum1, sum2, sum3, sum4, sum5;
627dfbe8321SBarry Smith   PetscErrorCode ierr;
628899cda47SBarry Smith   PetscInt       m = b->AIJ->rmap.n,*idx,*ii;
629b24ad042SBarry Smith   PetscInt       n,i,jrow,j;
630f9fae5adSBarry Smith 
631f9fae5adSBarry Smith   PetscFunctionBegin;
6321ebc52fbSHong Zhang   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
6331ebc52fbSHong Zhang   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
634f9fae5adSBarry Smith   idx  = a->j;
635f9fae5adSBarry Smith   v    = a->a;
636f9fae5adSBarry Smith   ii   = a->i;
637f9fae5adSBarry Smith 
638f9fae5adSBarry Smith   for (i=0; i<m; i++) {
639f9fae5adSBarry Smith     jrow = ii[i];
640f9fae5adSBarry Smith     n    = ii[i+1] - jrow;
641f9fae5adSBarry Smith     sum1  = 0.0;
642f9fae5adSBarry Smith     sum2  = 0.0;
643f9fae5adSBarry Smith     sum3  = 0.0;
644f9fae5adSBarry Smith     sum4  = 0.0;
645f9fae5adSBarry Smith     sum5  = 0.0;
646f9fae5adSBarry Smith     for (j=0; j<n; j++) {
647f9fae5adSBarry Smith       sum1 += v[jrow]*x[5*idx[jrow]];
648f9fae5adSBarry Smith       sum2 += v[jrow]*x[5*idx[jrow]+1];
649f9fae5adSBarry Smith       sum3 += v[jrow]*x[5*idx[jrow]+2];
650f9fae5adSBarry Smith       sum4 += v[jrow]*x[5*idx[jrow]+3];
651f9fae5adSBarry Smith       sum5 += v[jrow]*x[5*idx[jrow]+4];
652f9fae5adSBarry Smith       jrow++;
653f9fae5adSBarry Smith      }
654f9fae5adSBarry Smith     y[5*i]   = sum1;
655f9fae5adSBarry Smith     y[5*i+1] = sum2;
656f9fae5adSBarry Smith     y[5*i+2] = sum3;
657f9fae5adSBarry Smith     y[5*i+3] = sum4;
658f9fae5adSBarry Smith     y[5*i+4] = sum5;
659f9fae5adSBarry Smith   }
660f9fae5adSBarry Smith 
661efee365bSSatish Balay   ierr = PetscLogFlops(10*a->nz - 5*m);CHKERRQ(ierr);
6621ebc52fbSHong Zhang   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
6631ebc52fbSHong Zhang   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
664f9fae5adSBarry Smith   PetscFunctionReturn(0);
665f9fae5adSBarry Smith }
666f9fae5adSBarry Smith 
6674a2ae208SSatish Balay #undef __FUNCT__
668b9617806SBarry Smith #define __FUNCT__ "MatMultTranspose_SeqMAIJ_5"
669dfbe8321SBarry Smith PetscErrorCode MatMultTranspose_SeqMAIJ_5(Mat A,Vec xx,Vec yy)
670f9fae5adSBarry Smith {
6714cb9d9b8SBarry Smith   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
672f9fae5adSBarry Smith   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
67387828ca2SBarry Smith   PetscScalar    *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,zero = 0.0;
674dfbe8321SBarry Smith   PetscErrorCode ierr;
675899cda47SBarry Smith   PetscInt       m = b->AIJ->rmap.n,n,i,*idx;
676f9fae5adSBarry Smith 
677f9fae5adSBarry Smith   PetscFunctionBegin;
6782dcb1b2aSMatthew Knepley   ierr = VecSet(yy,zero);CHKERRQ(ierr);
6791ebc52fbSHong Zhang   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
6801ebc52fbSHong Zhang   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
681bfec09a0SHong Zhang 
682f9fae5adSBarry Smith   for (i=0; i<m; i++) {
683bfec09a0SHong Zhang     idx    = a->j + a->i[i] ;
684bfec09a0SHong Zhang     v      = a->a + a->i[i] ;
685f9fae5adSBarry Smith     n      = a->i[i+1] - a->i[i];
686f9fae5adSBarry Smith     alpha1 = x[5*i];
687f9fae5adSBarry Smith     alpha2 = x[5*i+1];
688f9fae5adSBarry Smith     alpha3 = x[5*i+2];
689f9fae5adSBarry Smith     alpha4 = x[5*i+3];
690f9fae5adSBarry Smith     alpha5 = x[5*i+4];
691f9fae5adSBarry Smith     while (n-->0) {
692f9fae5adSBarry Smith       y[5*(*idx)]   += alpha1*(*v);
693f9fae5adSBarry Smith       y[5*(*idx)+1] += alpha2*(*v);
694f9fae5adSBarry Smith       y[5*(*idx)+2] += alpha3*(*v);
695f9fae5adSBarry Smith       y[5*(*idx)+3] += alpha4*(*v);
696f9fae5adSBarry Smith       y[5*(*idx)+4] += alpha5*(*v);
697f9fae5adSBarry Smith       idx++; v++;
698f9fae5adSBarry Smith     }
699f9fae5adSBarry Smith   }
700899cda47SBarry Smith   ierr = PetscLogFlops(10*a->nz - 5*b->AIJ->cmap.n);CHKERRQ(ierr);
7011ebc52fbSHong Zhang   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
7021ebc52fbSHong Zhang   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
703f9fae5adSBarry Smith   PetscFunctionReturn(0);
704f9fae5adSBarry Smith }
705f9fae5adSBarry Smith 
7064a2ae208SSatish Balay #undef __FUNCT__
707b9617806SBarry Smith #define __FUNCT__ "MatMultAdd_SeqMAIJ_5"
708dfbe8321SBarry Smith PetscErrorCode MatMultAdd_SeqMAIJ_5(Mat A,Vec xx,Vec yy,Vec zz)
709f9fae5adSBarry Smith {
7104cb9d9b8SBarry Smith   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
711f9fae5adSBarry Smith   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
71287828ca2SBarry Smith   PetscScalar    *x,*y,*v,sum1, sum2, sum3, sum4, sum5;
713dfbe8321SBarry Smith   PetscErrorCode ierr;
714899cda47SBarry Smith   PetscInt       m = b->AIJ->rmap.n,*idx,*ii;
715b24ad042SBarry Smith   PetscInt       n,i,jrow,j;
716f9fae5adSBarry Smith 
717f9fae5adSBarry Smith   PetscFunctionBegin;
718f9fae5adSBarry Smith   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
7191ebc52fbSHong Zhang   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
7201ebc52fbSHong Zhang   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
721f9fae5adSBarry Smith   idx  = a->j;
722f9fae5adSBarry Smith   v    = a->a;
723f9fae5adSBarry Smith   ii   = a->i;
724f9fae5adSBarry Smith 
725f9fae5adSBarry Smith   for (i=0; i<m; i++) {
726f9fae5adSBarry Smith     jrow = ii[i];
727f9fae5adSBarry Smith     n    = ii[i+1] - jrow;
728f9fae5adSBarry Smith     sum1  = 0.0;
729f9fae5adSBarry Smith     sum2  = 0.0;
730f9fae5adSBarry Smith     sum3  = 0.0;
731f9fae5adSBarry Smith     sum4  = 0.0;
732f9fae5adSBarry Smith     sum5  = 0.0;
733f9fae5adSBarry Smith     for (j=0; j<n; j++) {
734f9fae5adSBarry Smith       sum1 += v[jrow]*x[5*idx[jrow]];
735f9fae5adSBarry Smith       sum2 += v[jrow]*x[5*idx[jrow]+1];
736f9fae5adSBarry Smith       sum3 += v[jrow]*x[5*idx[jrow]+2];
737f9fae5adSBarry Smith       sum4 += v[jrow]*x[5*idx[jrow]+3];
738f9fae5adSBarry Smith       sum5 += v[jrow]*x[5*idx[jrow]+4];
739f9fae5adSBarry Smith       jrow++;
740f9fae5adSBarry Smith      }
741f9fae5adSBarry Smith     y[5*i]   += sum1;
742f9fae5adSBarry Smith     y[5*i+1] += sum2;
743f9fae5adSBarry Smith     y[5*i+2] += sum3;
744f9fae5adSBarry Smith     y[5*i+3] += sum4;
745f9fae5adSBarry Smith     y[5*i+4] += sum5;
746f9fae5adSBarry Smith   }
747f9fae5adSBarry Smith 
748efee365bSSatish Balay   ierr = PetscLogFlops(10*a->nz);CHKERRQ(ierr);
7491ebc52fbSHong Zhang   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
7501ebc52fbSHong Zhang   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
751f9fae5adSBarry Smith   PetscFunctionReturn(0);
752f9fae5adSBarry Smith }
753f9fae5adSBarry Smith 
7544a2ae208SSatish Balay #undef __FUNCT__
755b9617806SBarry Smith #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_5"
756dfbe8321SBarry Smith PetscErrorCode MatMultTransposeAdd_SeqMAIJ_5(Mat A,Vec xx,Vec yy,Vec zz)
757f9fae5adSBarry Smith {
7584cb9d9b8SBarry Smith   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
759f9fae5adSBarry Smith   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
76087828ca2SBarry Smith   PetscScalar    *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5;
761dfbe8321SBarry Smith   PetscErrorCode ierr;
762899cda47SBarry Smith   PetscInt       m = b->AIJ->rmap.n,n,i,*idx;
763f9fae5adSBarry Smith 
764f9fae5adSBarry Smith   PetscFunctionBegin;
765f9fae5adSBarry Smith   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
7661ebc52fbSHong Zhang   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
7671ebc52fbSHong Zhang   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
768bfec09a0SHong Zhang 
769f9fae5adSBarry Smith   for (i=0; i<m; i++) {
770bfec09a0SHong Zhang     idx    = a->j + a->i[i] ;
771bfec09a0SHong Zhang     v      = a->a + a->i[i] ;
772f9fae5adSBarry Smith     n      = a->i[i+1] - a->i[i];
773f9fae5adSBarry Smith     alpha1 = x[5*i];
774f9fae5adSBarry Smith     alpha2 = x[5*i+1];
775f9fae5adSBarry Smith     alpha3 = x[5*i+2];
776f9fae5adSBarry Smith     alpha4 = x[5*i+3];
777f9fae5adSBarry Smith     alpha5 = x[5*i+4];
778f9fae5adSBarry Smith     while (n-->0) {
779f9fae5adSBarry Smith       y[5*(*idx)]   += alpha1*(*v);
780f9fae5adSBarry Smith       y[5*(*idx)+1] += alpha2*(*v);
781f9fae5adSBarry Smith       y[5*(*idx)+2] += alpha3*(*v);
782f9fae5adSBarry Smith       y[5*(*idx)+3] += alpha4*(*v);
783f9fae5adSBarry Smith       y[5*(*idx)+4] += alpha5*(*v);
784f9fae5adSBarry Smith       idx++; v++;
785f9fae5adSBarry Smith     }
786f9fae5adSBarry Smith   }
787efee365bSSatish Balay   ierr = PetscLogFlops(10*a->nz);CHKERRQ(ierr);
7881ebc52fbSHong Zhang   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
7891ebc52fbSHong Zhang   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
790f9fae5adSBarry Smith   PetscFunctionReturn(0);
791bcc973b7SBarry Smith }
792bcc973b7SBarry Smith 
7936cd98798SBarry Smith /* ------------------------------------------------------------------------------*/
7944a2ae208SSatish Balay #undef __FUNCT__
795b9617806SBarry Smith #define __FUNCT__ "MatMult_SeqMAIJ_6"
796dfbe8321SBarry Smith PetscErrorCode MatMult_SeqMAIJ_6(Mat A,Vec xx,Vec yy)
7976cd98798SBarry Smith {
7986cd98798SBarry Smith   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
7996cd98798SBarry Smith   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
80087828ca2SBarry Smith   PetscScalar    *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6;
801dfbe8321SBarry Smith   PetscErrorCode ierr;
802899cda47SBarry Smith   PetscInt       m = b->AIJ->rmap.n,*idx,*ii;
803b24ad042SBarry Smith   PetscInt       n,i,jrow,j;
8046cd98798SBarry Smith 
8056cd98798SBarry Smith   PetscFunctionBegin;
8061ebc52fbSHong Zhang   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
8071ebc52fbSHong Zhang   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
8086cd98798SBarry Smith   idx  = a->j;
8096cd98798SBarry Smith   v    = a->a;
8106cd98798SBarry Smith   ii   = a->i;
8116cd98798SBarry Smith 
8126cd98798SBarry Smith   for (i=0; i<m; i++) {
8136cd98798SBarry Smith     jrow = ii[i];
8146cd98798SBarry Smith     n    = ii[i+1] - jrow;
8156cd98798SBarry Smith     sum1  = 0.0;
8166cd98798SBarry Smith     sum2  = 0.0;
8176cd98798SBarry Smith     sum3  = 0.0;
8186cd98798SBarry Smith     sum4  = 0.0;
8196cd98798SBarry Smith     sum5  = 0.0;
8206cd98798SBarry Smith     sum6  = 0.0;
8216cd98798SBarry Smith     for (j=0; j<n; j++) {
8226cd98798SBarry Smith       sum1 += v[jrow]*x[6*idx[jrow]];
8236cd98798SBarry Smith       sum2 += v[jrow]*x[6*idx[jrow]+1];
8246cd98798SBarry Smith       sum3 += v[jrow]*x[6*idx[jrow]+2];
8256cd98798SBarry Smith       sum4 += v[jrow]*x[6*idx[jrow]+3];
8266cd98798SBarry Smith       sum5 += v[jrow]*x[6*idx[jrow]+4];
8276cd98798SBarry Smith       sum6 += v[jrow]*x[6*idx[jrow]+5];
8286cd98798SBarry Smith       jrow++;
8296cd98798SBarry Smith      }
8306cd98798SBarry Smith     y[6*i]   = sum1;
8316cd98798SBarry Smith     y[6*i+1] = sum2;
8326cd98798SBarry Smith     y[6*i+2] = sum3;
8336cd98798SBarry Smith     y[6*i+3] = sum4;
8346cd98798SBarry Smith     y[6*i+4] = sum5;
8356cd98798SBarry Smith     y[6*i+5] = sum6;
8366cd98798SBarry Smith   }
8376cd98798SBarry Smith 
838efee365bSSatish Balay   ierr = PetscLogFlops(12*a->nz - 6*m);CHKERRQ(ierr);
8391ebc52fbSHong Zhang   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
8401ebc52fbSHong Zhang   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
8416cd98798SBarry Smith   PetscFunctionReturn(0);
8426cd98798SBarry Smith }
8436cd98798SBarry Smith 
8444a2ae208SSatish Balay #undef __FUNCT__
845b9617806SBarry Smith #define __FUNCT__ "MatMultTranspose_SeqMAIJ_6"
846dfbe8321SBarry Smith PetscErrorCode MatMultTranspose_SeqMAIJ_6(Mat A,Vec xx,Vec yy)
8476cd98798SBarry Smith {
8486cd98798SBarry Smith   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
8496cd98798SBarry Smith   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
85087828ca2SBarry Smith   PetscScalar    *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,zero = 0.0;
851dfbe8321SBarry Smith   PetscErrorCode ierr;
852899cda47SBarry Smith   PetscInt       m = b->AIJ->rmap.n,n,i,*idx;
8536cd98798SBarry Smith 
8546cd98798SBarry Smith   PetscFunctionBegin;
8552dcb1b2aSMatthew Knepley   ierr = VecSet(yy,zero);CHKERRQ(ierr);
8561ebc52fbSHong Zhang   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
8571ebc52fbSHong Zhang   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
858bfec09a0SHong Zhang 
8596cd98798SBarry Smith   for (i=0; i<m; i++) {
860bfec09a0SHong Zhang     idx    = a->j + a->i[i] ;
861bfec09a0SHong Zhang     v      = a->a + a->i[i] ;
8626cd98798SBarry Smith     n      = a->i[i+1] - a->i[i];
8636cd98798SBarry Smith     alpha1 = x[6*i];
8646cd98798SBarry Smith     alpha2 = x[6*i+1];
8656cd98798SBarry Smith     alpha3 = x[6*i+2];
8666cd98798SBarry Smith     alpha4 = x[6*i+3];
8676cd98798SBarry Smith     alpha5 = x[6*i+4];
8686cd98798SBarry Smith     alpha6 = x[6*i+5];
8696cd98798SBarry Smith     while (n-->0) {
8706cd98798SBarry Smith       y[6*(*idx)]   += alpha1*(*v);
8716cd98798SBarry Smith       y[6*(*idx)+1] += alpha2*(*v);
8726cd98798SBarry Smith       y[6*(*idx)+2] += alpha3*(*v);
8736cd98798SBarry Smith       y[6*(*idx)+3] += alpha4*(*v);
8746cd98798SBarry Smith       y[6*(*idx)+4] += alpha5*(*v);
8756cd98798SBarry Smith       y[6*(*idx)+5] += alpha6*(*v);
8766cd98798SBarry Smith       idx++; v++;
8776cd98798SBarry Smith     }
8786cd98798SBarry Smith   }
879899cda47SBarry Smith   ierr = PetscLogFlops(12*a->nz - 6*b->AIJ->cmap.n);CHKERRQ(ierr);
8801ebc52fbSHong Zhang   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
8811ebc52fbSHong Zhang   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
8826cd98798SBarry Smith   PetscFunctionReturn(0);
8836cd98798SBarry Smith }
8846cd98798SBarry Smith 
8854a2ae208SSatish Balay #undef __FUNCT__
886b9617806SBarry Smith #define __FUNCT__ "MatMultAdd_SeqMAIJ_6"
887dfbe8321SBarry Smith PetscErrorCode MatMultAdd_SeqMAIJ_6(Mat A,Vec xx,Vec yy,Vec zz)
8886cd98798SBarry Smith {
8896cd98798SBarry Smith   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
8906cd98798SBarry Smith   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
89187828ca2SBarry Smith   PetscScalar    *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6;
892dfbe8321SBarry Smith   PetscErrorCode ierr;
893899cda47SBarry Smith   PetscInt       m = b->AIJ->rmap.n,*idx,*ii;
894b24ad042SBarry Smith   PetscInt       n,i,jrow,j;
8956cd98798SBarry Smith 
8966cd98798SBarry Smith   PetscFunctionBegin;
8976cd98798SBarry Smith   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
8981ebc52fbSHong Zhang   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
8991ebc52fbSHong Zhang   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
9006cd98798SBarry Smith   idx  = a->j;
9016cd98798SBarry Smith   v    = a->a;
9026cd98798SBarry Smith   ii   = a->i;
9036cd98798SBarry Smith 
9046cd98798SBarry Smith   for (i=0; i<m; i++) {
9056cd98798SBarry Smith     jrow = ii[i];
9066cd98798SBarry Smith     n    = ii[i+1] - jrow;
9076cd98798SBarry Smith     sum1  = 0.0;
9086cd98798SBarry Smith     sum2  = 0.0;
9096cd98798SBarry Smith     sum3  = 0.0;
9106cd98798SBarry Smith     sum4  = 0.0;
9116cd98798SBarry Smith     sum5  = 0.0;
9126cd98798SBarry Smith     sum6  = 0.0;
9136cd98798SBarry Smith     for (j=0; j<n; j++) {
9146cd98798SBarry Smith       sum1 += v[jrow]*x[6*idx[jrow]];
9156cd98798SBarry Smith       sum2 += v[jrow]*x[6*idx[jrow]+1];
9166cd98798SBarry Smith       sum3 += v[jrow]*x[6*idx[jrow]+2];
9176cd98798SBarry Smith       sum4 += v[jrow]*x[6*idx[jrow]+3];
9186cd98798SBarry Smith       sum5 += v[jrow]*x[6*idx[jrow]+4];
9196cd98798SBarry Smith       sum6 += v[jrow]*x[6*idx[jrow]+5];
9206cd98798SBarry Smith       jrow++;
9216cd98798SBarry Smith      }
9226cd98798SBarry Smith     y[6*i]   += sum1;
9236cd98798SBarry Smith     y[6*i+1] += sum2;
9246cd98798SBarry Smith     y[6*i+2] += sum3;
9256cd98798SBarry Smith     y[6*i+3] += sum4;
9266cd98798SBarry Smith     y[6*i+4] += sum5;
9276cd98798SBarry Smith     y[6*i+5] += sum6;
9286cd98798SBarry Smith   }
9296cd98798SBarry Smith 
930efee365bSSatish Balay   ierr = PetscLogFlops(12*a->nz);CHKERRQ(ierr);
9311ebc52fbSHong Zhang   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
9321ebc52fbSHong Zhang   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
9336cd98798SBarry Smith   PetscFunctionReturn(0);
9346cd98798SBarry Smith }
9356cd98798SBarry Smith 
9364a2ae208SSatish Balay #undef __FUNCT__
937b9617806SBarry Smith #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_6"
938dfbe8321SBarry Smith PetscErrorCode MatMultTransposeAdd_SeqMAIJ_6(Mat A,Vec xx,Vec yy,Vec zz)
9396cd98798SBarry Smith {
9406cd98798SBarry Smith   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
9416cd98798SBarry Smith   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
94287828ca2SBarry Smith   PetscScalar    *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6;
943dfbe8321SBarry Smith   PetscErrorCode ierr;
944899cda47SBarry Smith   PetscInt       m = b->AIJ->rmap.n,n,i,*idx;
9456cd98798SBarry Smith 
9466cd98798SBarry Smith   PetscFunctionBegin;
9476cd98798SBarry Smith   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
9481ebc52fbSHong Zhang   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
9491ebc52fbSHong Zhang   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
950bfec09a0SHong Zhang 
9516cd98798SBarry Smith   for (i=0; i<m; i++) {
952bfec09a0SHong Zhang     idx    = a->j + a->i[i] ;
953bfec09a0SHong Zhang     v      = a->a + a->i[i] ;
9546cd98798SBarry Smith     n      = a->i[i+1] - a->i[i];
9556cd98798SBarry Smith     alpha1 = x[6*i];
9566cd98798SBarry Smith     alpha2 = x[6*i+1];
9576cd98798SBarry Smith     alpha3 = x[6*i+2];
9586cd98798SBarry Smith     alpha4 = x[6*i+3];
9596cd98798SBarry Smith     alpha5 = x[6*i+4];
9606cd98798SBarry Smith     alpha6 = x[6*i+5];
9616cd98798SBarry Smith     while (n-->0) {
9626cd98798SBarry Smith       y[6*(*idx)]   += alpha1*(*v);
9636cd98798SBarry Smith       y[6*(*idx)+1] += alpha2*(*v);
9646cd98798SBarry Smith       y[6*(*idx)+2] += alpha3*(*v);
9656cd98798SBarry Smith       y[6*(*idx)+3] += alpha4*(*v);
9666cd98798SBarry Smith       y[6*(*idx)+4] += alpha5*(*v);
9676cd98798SBarry Smith       y[6*(*idx)+5] += alpha6*(*v);
9686cd98798SBarry Smith       idx++; v++;
9696cd98798SBarry Smith     }
9706cd98798SBarry Smith   }
971efee365bSSatish Balay   ierr = PetscLogFlops(12*a->nz);CHKERRQ(ierr);
9721ebc52fbSHong Zhang   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
9731ebc52fbSHong Zhang   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
9746cd98798SBarry Smith   PetscFunctionReturn(0);
9756cd98798SBarry Smith }
9766cd98798SBarry Smith 
97766ed3db0SBarry Smith /* ------------------------------------------------------------------------------*/
97866ed3db0SBarry Smith #undef __FUNCT__
979ed8eea03SBarry Smith #define __FUNCT__ "MatMult_SeqMAIJ_7"
980ed8eea03SBarry Smith PetscErrorCode MatMult_SeqMAIJ_7(Mat A,Vec xx,Vec yy)
981ed8eea03SBarry Smith {
982ed8eea03SBarry Smith   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
983ed8eea03SBarry Smith   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
984ed8eea03SBarry Smith   PetscScalar    *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7;
985ed8eea03SBarry Smith   PetscErrorCode ierr;
986899cda47SBarry Smith   PetscInt       m = b->AIJ->rmap.n,*idx,*ii;
987ed8eea03SBarry Smith   PetscInt       n,i,jrow,j;
988ed8eea03SBarry Smith 
989ed8eea03SBarry Smith   PetscFunctionBegin;
990ed8eea03SBarry Smith   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
991ed8eea03SBarry Smith   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
992ed8eea03SBarry Smith   idx  = a->j;
993ed8eea03SBarry Smith   v    = a->a;
994ed8eea03SBarry Smith   ii   = a->i;
995ed8eea03SBarry Smith 
996ed8eea03SBarry Smith   for (i=0; i<m; i++) {
997ed8eea03SBarry Smith     jrow = ii[i];
998ed8eea03SBarry Smith     n    = ii[i+1] - jrow;
999ed8eea03SBarry Smith     sum1  = 0.0;
1000ed8eea03SBarry Smith     sum2  = 0.0;
1001ed8eea03SBarry Smith     sum3  = 0.0;
1002ed8eea03SBarry Smith     sum4  = 0.0;
1003ed8eea03SBarry Smith     sum5  = 0.0;
1004ed8eea03SBarry Smith     sum6  = 0.0;
1005ed8eea03SBarry Smith     sum7  = 0.0;
1006ed8eea03SBarry Smith     for (j=0; j<n; j++) {
1007ed8eea03SBarry Smith       sum1 += v[jrow]*x[7*idx[jrow]];
1008ed8eea03SBarry Smith       sum2 += v[jrow]*x[7*idx[jrow]+1];
1009ed8eea03SBarry Smith       sum3 += v[jrow]*x[7*idx[jrow]+2];
1010ed8eea03SBarry Smith       sum4 += v[jrow]*x[7*idx[jrow]+3];
1011ed8eea03SBarry Smith       sum5 += v[jrow]*x[7*idx[jrow]+4];
1012ed8eea03SBarry Smith       sum6 += v[jrow]*x[7*idx[jrow]+5];
1013ed8eea03SBarry Smith       sum7 += v[jrow]*x[7*idx[jrow]+6];
1014ed8eea03SBarry Smith       jrow++;
1015ed8eea03SBarry Smith      }
1016ed8eea03SBarry Smith     y[7*i]   = sum1;
1017ed8eea03SBarry Smith     y[7*i+1] = sum2;
1018ed8eea03SBarry Smith     y[7*i+2] = sum3;
1019ed8eea03SBarry Smith     y[7*i+3] = sum4;
1020ed8eea03SBarry Smith     y[7*i+4] = sum5;
1021ed8eea03SBarry Smith     y[7*i+5] = sum6;
1022ed8eea03SBarry Smith     y[7*i+6] = sum7;
1023ed8eea03SBarry Smith   }
1024ed8eea03SBarry Smith 
1025efee365bSSatish Balay   ierr = PetscLogFlops(14*a->nz - 7*m);CHKERRQ(ierr);
1026ed8eea03SBarry Smith   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1027ed8eea03SBarry Smith   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
1028ed8eea03SBarry Smith   PetscFunctionReturn(0);
1029ed8eea03SBarry Smith }
1030ed8eea03SBarry Smith 
1031ed8eea03SBarry Smith #undef __FUNCT__
1032ed8eea03SBarry Smith #define __FUNCT__ "MatMultTranspose_SeqMAIJ_7"
1033ed8eea03SBarry Smith PetscErrorCode MatMultTranspose_SeqMAIJ_7(Mat A,Vec xx,Vec yy)
1034ed8eea03SBarry Smith {
1035ed8eea03SBarry Smith   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
1036ed8eea03SBarry Smith   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
1037ed8eea03SBarry Smith   PetscScalar    *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,zero = 0.0;
1038ed8eea03SBarry Smith   PetscErrorCode ierr;
1039899cda47SBarry Smith   PetscInt       m = b->AIJ->rmap.n,n,i,*idx;
1040ed8eea03SBarry Smith 
1041ed8eea03SBarry Smith   PetscFunctionBegin;
10422dcb1b2aSMatthew Knepley   ierr = VecSet(yy,zero);CHKERRQ(ierr);
1043ed8eea03SBarry Smith   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1044ed8eea03SBarry Smith   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
1045ed8eea03SBarry Smith 
1046ed8eea03SBarry Smith   for (i=0; i<m; i++) {
1047ed8eea03SBarry Smith     idx    = a->j + a->i[i] ;
1048ed8eea03SBarry Smith     v      = a->a + a->i[i] ;
1049ed8eea03SBarry Smith     n      = a->i[i+1] - a->i[i];
1050ed8eea03SBarry Smith     alpha1 = x[7*i];
1051ed8eea03SBarry Smith     alpha2 = x[7*i+1];
1052ed8eea03SBarry Smith     alpha3 = x[7*i+2];
1053ed8eea03SBarry Smith     alpha4 = x[7*i+3];
1054ed8eea03SBarry Smith     alpha5 = x[7*i+4];
1055ed8eea03SBarry Smith     alpha6 = x[7*i+5];
1056ed8eea03SBarry Smith     alpha7 = x[7*i+6];
1057ed8eea03SBarry Smith     while (n-->0) {
1058ed8eea03SBarry Smith       y[7*(*idx)]   += alpha1*(*v);
1059ed8eea03SBarry Smith       y[7*(*idx)+1] += alpha2*(*v);
1060ed8eea03SBarry Smith       y[7*(*idx)+2] += alpha3*(*v);
1061ed8eea03SBarry Smith       y[7*(*idx)+3] += alpha4*(*v);
1062ed8eea03SBarry Smith       y[7*(*idx)+4] += alpha5*(*v);
1063ed8eea03SBarry Smith       y[7*(*idx)+5] += alpha6*(*v);
1064ed8eea03SBarry Smith       y[7*(*idx)+6] += alpha7*(*v);
1065ed8eea03SBarry Smith       idx++; v++;
1066ed8eea03SBarry Smith     }
1067ed8eea03SBarry Smith   }
1068899cda47SBarry Smith   ierr = PetscLogFlops(14*a->nz - 7*b->AIJ->cmap.n);CHKERRQ(ierr);
1069ed8eea03SBarry Smith   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1070ed8eea03SBarry Smith   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
1071ed8eea03SBarry Smith   PetscFunctionReturn(0);
1072ed8eea03SBarry Smith }
1073ed8eea03SBarry Smith 
1074ed8eea03SBarry Smith #undef __FUNCT__
1075ed8eea03SBarry Smith #define __FUNCT__ "MatMultAdd_SeqMAIJ_7"
1076ed8eea03SBarry Smith PetscErrorCode MatMultAdd_SeqMAIJ_7(Mat A,Vec xx,Vec yy,Vec zz)
1077ed8eea03SBarry Smith {
1078ed8eea03SBarry Smith   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
1079ed8eea03SBarry Smith   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
1080ed8eea03SBarry Smith   PetscScalar    *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7;
1081ed8eea03SBarry Smith   PetscErrorCode ierr;
1082899cda47SBarry Smith   PetscInt       m = b->AIJ->rmap.n,*idx,*ii;
1083ed8eea03SBarry Smith   PetscInt       n,i,jrow,j;
1084ed8eea03SBarry Smith 
1085ed8eea03SBarry Smith   PetscFunctionBegin;
1086ed8eea03SBarry Smith   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
1087ed8eea03SBarry Smith   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1088ed8eea03SBarry Smith   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
1089ed8eea03SBarry Smith   idx  = a->j;
1090ed8eea03SBarry Smith   v    = a->a;
1091ed8eea03SBarry Smith   ii   = a->i;
1092ed8eea03SBarry Smith 
1093ed8eea03SBarry Smith   for (i=0; i<m; i++) {
1094ed8eea03SBarry Smith     jrow = ii[i];
1095ed8eea03SBarry Smith     n    = ii[i+1] - jrow;
1096ed8eea03SBarry Smith     sum1  = 0.0;
1097ed8eea03SBarry Smith     sum2  = 0.0;
1098ed8eea03SBarry Smith     sum3  = 0.0;
1099ed8eea03SBarry Smith     sum4  = 0.0;
1100ed8eea03SBarry Smith     sum5  = 0.0;
1101ed8eea03SBarry Smith     sum6  = 0.0;
1102ed8eea03SBarry Smith     sum7  = 0.0;
1103ed8eea03SBarry Smith     for (j=0; j<n; j++) {
1104ed8eea03SBarry Smith       sum1 += v[jrow]*x[7*idx[jrow]];
1105ed8eea03SBarry Smith       sum2 += v[jrow]*x[7*idx[jrow]+1];
1106ed8eea03SBarry Smith       sum3 += v[jrow]*x[7*idx[jrow]+2];
1107ed8eea03SBarry Smith       sum4 += v[jrow]*x[7*idx[jrow]+3];
1108ed8eea03SBarry Smith       sum5 += v[jrow]*x[7*idx[jrow]+4];
1109ed8eea03SBarry Smith       sum6 += v[jrow]*x[7*idx[jrow]+5];
1110ed8eea03SBarry Smith       sum7 += v[jrow]*x[7*idx[jrow]+6];
1111ed8eea03SBarry Smith       jrow++;
1112ed8eea03SBarry Smith      }
1113ed8eea03SBarry Smith     y[7*i]   += sum1;
1114ed8eea03SBarry Smith     y[7*i+1] += sum2;
1115ed8eea03SBarry Smith     y[7*i+2] += sum3;
1116ed8eea03SBarry Smith     y[7*i+3] += sum4;
1117ed8eea03SBarry Smith     y[7*i+4] += sum5;
1118ed8eea03SBarry Smith     y[7*i+5] += sum6;
1119ed8eea03SBarry Smith     y[7*i+6] += sum7;
1120ed8eea03SBarry Smith   }
1121ed8eea03SBarry Smith 
1122efee365bSSatish Balay   ierr = PetscLogFlops(14*a->nz);CHKERRQ(ierr);
1123ed8eea03SBarry Smith   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1124ed8eea03SBarry Smith   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
1125ed8eea03SBarry Smith   PetscFunctionReturn(0);
1126ed8eea03SBarry Smith }
1127ed8eea03SBarry Smith 
1128ed8eea03SBarry Smith #undef __FUNCT__
1129ed8eea03SBarry Smith #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_7"
1130ed8eea03SBarry Smith PetscErrorCode MatMultTransposeAdd_SeqMAIJ_7(Mat A,Vec xx,Vec yy,Vec zz)
1131ed8eea03SBarry Smith {
1132ed8eea03SBarry Smith   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
1133ed8eea03SBarry Smith   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
1134ed8eea03SBarry Smith   PetscScalar    *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7;
1135ed8eea03SBarry Smith   PetscErrorCode ierr;
1136899cda47SBarry Smith   PetscInt       m = b->AIJ->rmap.n,n,i,*idx;
1137ed8eea03SBarry Smith 
1138ed8eea03SBarry Smith   PetscFunctionBegin;
1139ed8eea03SBarry Smith   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
1140ed8eea03SBarry Smith   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1141ed8eea03SBarry Smith   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
1142ed8eea03SBarry Smith   for (i=0; i<m; i++) {
1143ed8eea03SBarry Smith     idx    = a->j + a->i[i] ;
1144ed8eea03SBarry Smith     v      = a->a + a->i[i] ;
1145ed8eea03SBarry Smith     n      = a->i[i+1] - a->i[i];
1146ed8eea03SBarry Smith     alpha1 = x[7*i];
1147ed8eea03SBarry Smith     alpha2 = x[7*i+1];
1148ed8eea03SBarry Smith     alpha3 = x[7*i+2];
1149ed8eea03SBarry Smith     alpha4 = x[7*i+3];
1150ed8eea03SBarry Smith     alpha5 = x[7*i+4];
1151ed8eea03SBarry Smith     alpha6 = x[7*i+5];
1152ed8eea03SBarry Smith     alpha7 = x[7*i+6];
1153ed8eea03SBarry Smith     while (n-->0) {
1154ed8eea03SBarry Smith       y[7*(*idx)]   += alpha1*(*v);
1155ed8eea03SBarry Smith       y[7*(*idx)+1] += alpha2*(*v);
1156ed8eea03SBarry Smith       y[7*(*idx)+2] += alpha3*(*v);
1157ed8eea03SBarry Smith       y[7*(*idx)+3] += alpha4*(*v);
1158ed8eea03SBarry Smith       y[7*(*idx)+4] += alpha5*(*v);
1159ed8eea03SBarry Smith       y[7*(*idx)+5] += alpha6*(*v);
1160ed8eea03SBarry Smith       y[7*(*idx)+6] += alpha7*(*v);
1161ed8eea03SBarry Smith       idx++; v++;
1162ed8eea03SBarry Smith     }
1163ed8eea03SBarry Smith   }
1164efee365bSSatish Balay   ierr = PetscLogFlops(14*a->nz);CHKERRQ(ierr);
1165ed8eea03SBarry Smith   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1166ed8eea03SBarry Smith   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
1167ed8eea03SBarry Smith   PetscFunctionReturn(0);
1168ed8eea03SBarry Smith }
1169ed8eea03SBarry Smith 
1170ed8eea03SBarry Smith #undef __FUNCT__
117166ed3db0SBarry Smith #define __FUNCT__ "MatMult_SeqMAIJ_8"
1172dfbe8321SBarry Smith PetscErrorCode MatMult_SeqMAIJ_8(Mat A,Vec xx,Vec yy)
117366ed3db0SBarry Smith {
117466ed3db0SBarry Smith   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
117566ed3db0SBarry Smith   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
117687828ca2SBarry Smith   PetscScalar    *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8;
1177dfbe8321SBarry Smith   PetscErrorCode ierr;
1178899cda47SBarry Smith   PetscInt       m = b->AIJ->rmap.n,*idx,*ii;
1179b24ad042SBarry Smith   PetscInt       n,i,jrow,j;
118066ed3db0SBarry Smith 
118166ed3db0SBarry Smith   PetscFunctionBegin;
11821ebc52fbSHong Zhang   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
11831ebc52fbSHong Zhang   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
118466ed3db0SBarry Smith   idx  = a->j;
118566ed3db0SBarry Smith   v    = a->a;
118666ed3db0SBarry Smith   ii   = a->i;
118766ed3db0SBarry Smith 
118866ed3db0SBarry Smith   for (i=0; i<m; i++) {
118966ed3db0SBarry Smith     jrow = ii[i];
119066ed3db0SBarry Smith     n    = ii[i+1] - jrow;
119166ed3db0SBarry Smith     sum1  = 0.0;
119266ed3db0SBarry Smith     sum2  = 0.0;
119366ed3db0SBarry Smith     sum3  = 0.0;
119466ed3db0SBarry Smith     sum4  = 0.0;
119566ed3db0SBarry Smith     sum5  = 0.0;
119666ed3db0SBarry Smith     sum6  = 0.0;
119766ed3db0SBarry Smith     sum7  = 0.0;
119866ed3db0SBarry Smith     sum8  = 0.0;
119966ed3db0SBarry Smith     for (j=0; j<n; j++) {
120066ed3db0SBarry Smith       sum1 += v[jrow]*x[8*idx[jrow]];
120166ed3db0SBarry Smith       sum2 += v[jrow]*x[8*idx[jrow]+1];
120266ed3db0SBarry Smith       sum3 += v[jrow]*x[8*idx[jrow]+2];
120366ed3db0SBarry Smith       sum4 += v[jrow]*x[8*idx[jrow]+3];
120466ed3db0SBarry Smith       sum5 += v[jrow]*x[8*idx[jrow]+4];
120566ed3db0SBarry Smith       sum6 += v[jrow]*x[8*idx[jrow]+5];
120666ed3db0SBarry Smith       sum7 += v[jrow]*x[8*idx[jrow]+6];
120766ed3db0SBarry Smith       sum8 += v[jrow]*x[8*idx[jrow]+7];
120866ed3db0SBarry Smith       jrow++;
120966ed3db0SBarry Smith      }
121066ed3db0SBarry Smith     y[8*i]   = sum1;
121166ed3db0SBarry Smith     y[8*i+1] = sum2;
121266ed3db0SBarry Smith     y[8*i+2] = sum3;
121366ed3db0SBarry Smith     y[8*i+3] = sum4;
121466ed3db0SBarry Smith     y[8*i+4] = sum5;
121566ed3db0SBarry Smith     y[8*i+5] = sum6;
121666ed3db0SBarry Smith     y[8*i+6] = sum7;
121766ed3db0SBarry Smith     y[8*i+7] = sum8;
121866ed3db0SBarry Smith   }
121966ed3db0SBarry Smith 
1220efee365bSSatish Balay   ierr = PetscLogFlops(16*a->nz - 8*m);CHKERRQ(ierr);
12211ebc52fbSHong Zhang   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
12221ebc52fbSHong Zhang   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
122366ed3db0SBarry Smith   PetscFunctionReturn(0);
122466ed3db0SBarry Smith }
122566ed3db0SBarry Smith 
122666ed3db0SBarry Smith #undef __FUNCT__
122766ed3db0SBarry Smith #define __FUNCT__ "MatMultTranspose_SeqMAIJ_8"
1228dfbe8321SBarry Smith PetscErrorCode MatMultTranspose_SeqMAIJ_8(Mat A,Vec xx,Vec yy)
122966ed3db0SBarry Smith {
123066ed3db0SBarry Smith   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
123166ed3db0SBarry Smith   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
123287828ca2SBarry Smith   PetscScalar    *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,zero = 0.0;
1233dfbe8321SBarry Smith   PetscErrorCode ierr;
1234899cda47SBarry Smith   PetscInt       m = b->AIJ->rmap.n,n,i,*idx;
123566ed3db0SBarry Smith 
123666ed3db0SBarry Smith   PetscFunctionBegin;
12372dcb1b2aSMatthew Knepley   ierr = VecSet(yy,zero);CHKERRQ(ierr);
12381ebc52fbSHong Zhang   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
12391ebc52fbSHong Zhang   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
1240bfec09a0SHong Zhang 
124166ed3db0SBarry Smith   for (i=0; i<m; i++) {
1242bfec09a0SHong Zhang     idx    = a->j + a->i[i] ;
1243bfec09a0SHong Zhang     v      = a->a + a->i[i] ;
124466ed3db0SBarry Smith     n      = a->i[i+1] - a->i[i];
124566ed3db0SBarry Smith     alpha1 = x[8*i];
124666ed3db0SBarry Smith     alpha2 = x[8*i+1];
124766ed3db0SBarry Smith     alpha3 = x[8*i+2];
124866ed3db0SBarry Smith     alpha4 = x[8*i+3];
124966ed3db0SBarry Smith     alpha5 = x[8*i+4];
125066ed3db0SBarry Smith     alpha6 = x[8*i+5];
125166ed3db0SBarry Smith     alpha7 = x[8*i+6];
125266ed3db0SBarry Smith     alpha8 = x[8*i+7];
125366ed3db0SBarry Smith     while (n-->0) {
125466ed3db0SBarry Smith       y[8*(*idx)]   += alpha1*(*v);
125566ed3db0SBarry Smith       y[8*(*idx)+1] += alpha2*(*v);
125666ed3db0SBarry Smith       y[8*(*idx)+2] += alpha3*(*v);
125766ed3db0SBarry Smith       y[8*(*idx)+3] += alpha4*(*v);
125866ed3db0SBarry Smith       y[8*(*idx)+4] += alpha5*(*v);
125966ed3db0SBarry Smith       y[8*(*idx)+5] += alpha6*(*v);
126066ed3db0SBarry Smith       y[8*(*idx)+6] += alpha7*(*v);
126166ed3db0SBarry Smith       y[8*(*idx)+7] += alpha8*(*v);
126266ed3db0SBarry Smith       idx++; v++;
126366ed3db0SBarry Smith     }
126466ed3db0SBarry Smith   }
1265899cda47SBarry Smith   ierr = PetscLogFlops(16*a->nz - 8*b->AIJ->cmap.n);CHKERRQ(ierr);
12661ebc52fbSHong Zhang   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
12671ebc52fbSHong Zhang   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
126866ed3db0SBarry Smith   PetscFunctionReturn(0);
126966ed3db0SBarry Smith }
127066ed3db0SBarry Smith 
127166ed3db0SBarry Smith #undef __FUNCT__
127266ed3db0SBarry Smith #define __FUNCT__ "MatMultAdd_SeqMAIJ_8"
1273dfbe8321SBarry Smith PetscErrorCode MatMultAdd_SeqMAIJ_8(Mat A,Vec xx,Vec yy,Vec zz)
127466ed3db0SBarry Smith {
127566ed3db0SBarry Smith   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
127666ed3db0SBarry Smith   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
127787828ca2SBarry Smith   PetscScalar    *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8;
1278dfbe8321SBarry Smith   PetscErrorCode ierr;
1279899cda47SBarry Smith   PetscInt       m = b->AIJ->rmap.n,*idx,*ii;
1280b24ad042SBarry Smith   PetscInt       n,i,jrow,j;
128166ed3db0SBarry Smith 
128266ed3db0SBarry Smith   PetscFunctionBegin;
128366ed3db0SBarry Smith   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
12841ebc52fbSHong Zhang   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
12851ebc52fbSHong Zhang   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
128666ed3db0SBarry Smith   idx  = a->j;
128766ed3db0SBarry Smith   v    = a->a;
128866ed3db0SBarry Smith   ii   = a->i;
128966ed3db0SBarry Smith 
129066ed3db0SBarry Smith   for (i=0; i<m; i++) {
129166ed3db0SBarry Smith     jrow = ii[i];
129266ed3db0SBarry Smith     n    = ii[i+1] - jrow;
129366ed3db0SBarry Smith     sum1  = 0.0;
129466ed3db0SBarry Smith     sum2  = 0.0;
129566ed3db0SBarry Smith     sum3  = 0.0;
129666ed3db0SBarry Smith     sum4  = 0.0;
129766ed3db0SBarry Smith     sum5  = 0.0;
129866ed3db0SBarry Smith     sum6  = 0.0;
129966ed3db0SBarry Smith     sum7  = 0.0;
130066ed3db0SBarry Smith     sum8  = 0.0;
130166ed3db0SBarry Smith     for (j=0; j<n; j++) {
130266ed3db0SBarry Smith       sum1 += v[jrow]*x[8*idx[jrow]];
130366ed3db0SBarry Smith       sum2 += v[jrow]*x[8*idx[jrow]+1];
130466ed3db0SBarry Smith       sum3 += v[jrow]*x[8*idx[jrow]+2];
130566ed3db0SBarry Smith       sum4 += v[jrow]*x[8*idx[jrow]+3];
130666ed3db0SBarry Smith       sum5 += v[jrow]*x[8*idx[jrow]+4];
130766ed3db0SBarry Smith       sum6 += v[jrow]*x[8*idx[jrow]+5];
130866ed3db0SBarry Smith       sum7 += v[jrow]*x[8*idx[jrow]+6];
130966ed3db0SBarry Smith       sum8 += v[jrow]*x[8*idx[jrow]+7];
131066ed3db0SBarry Smith       jrow++;
131166ed3db0SBarry Smith      }
131266ed3db0SBarry Smith     y[8*i]   += sum1;
131366ed3db0SBarry Smith     y[8*i+1] += sum2;
131466ed3db0SBarry Smith     y[8*i+2] += sum3;
131566ed3db0SBarry Smith     y[8*i+3] += sum4;
131666ed3db0SBarry Smith     y[8*i+4] += sum5;
131766ed3db0SBarry Smith     y[8*i+5] += sum6;
131866ed3db0SBarry Smith     y[8*i+6] += sum7;
131966ed3db0SBarry Smith     y[8*i+7] += sum8;
132066ed3db0SBarry Smith   }
132166ed3db0SBarry Smith 
1322efee365bSSatish Balay   ierr = PetscLogFlops(16*a->nz);CHKERRQ(ierr);
13231ebc52fbSHong Zhang   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
13241ebc52fbSHong Zhang   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
132566ed3db0SBarry Smith   PetscFunctionReturn(0);
132666ed3db0SBarry Smith }
132766ed3db0SBarry Smith 
132866ed3db0SBarry Smith #undef __FUNCT__
132966ed3db0SBarry Smith #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_8"
1330dfbe8321SBarry Smith PetscErrorCode MatMultTransposeAdd_SeqMAIJ_8(Mat A,Vec xx,Vec yy,Vec zz)
133166ed3db0SBarry Smith {
133266ed3db0SBarry Smith   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
133366ed3db0SBarry Smith   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
133487828ca2SBarry Smith   PetscScalar    *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8;
1335dfbe8321SBarry Smith   PetscErrorCode ierr;
1336899cda47SBarry Smith   PetscInt       m = b->AIJ->rmap.n,n,i,*idx;
133766ed3db0SBarry Smith 
133866ed3db0SBarry Smith   PetscFunctionBegin;
133966ed3db0SBarry Smith   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
13401ebc52fbSHong Zhang   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
13411ebc52fbSHong Zhang   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
134266ed3db0SBarry Smith   for (i=0; i<m; i++) {
1343bfec09a0SHong Zhang     idx    = a->j + a->i[i] ;
1344bfec09a0SHong Zhang     v      = a->a + a->i[i] ;
134566ed3db0SBarry Smith     n      = a->i[i+1] - a->i[i];
134666ed3db0SBarry Smith     alpha1 = x[8*i];
134766ed3db0SBarry Smith     alpha2 = x[8*i+1];
134866ed3db0SBarry Smith     alpha3 = x[8*i+2];
134966ed3db0SBarry Smith     alpha4 = x[8*i+3];
135066ed3db0SBarry Smith     alpha5 = x[8*i+4];
135166ed3db0SBarry Smith     alpha6 = x[8*i+5];
135266ed3db0SBarry Smith     alpha7 = x[8*i+6];
135366ed3db0SBarry Smith     alpha8 = x[8*i+7];
135466ed3db0SBarry Smith     while (n-->0) {
135566ed3db0SBarry Smith       y[8*(*idx)]   += alpha1*(*v);
135666ed3db0SBarry Smith       y[8*(*idx)+1] += alpha2*(*v);
135766ed3db0SBarry Smith       y[8*(*idx)+2] += alpha3*(*v);
135866ed3db0SBarry Smith       y[8*(*idx)+3] += alpha4*(*v);
135966ed3db0SBarry Smith       y[8*(*idx)+4] += alpha5*(*v);
136066ed3db0SBarry Smith       y[8*(*idx)+5] += alpha6*(*v);
136166ed3db0SBarry Smith       y[8*(*idx)+6] += alpha7*(*v);
136266ed3db0SBarry Smith       y[8*(*idx)+7] += alpha8*(*v);
136366ed3db0SBarry Smith       idx++; v++;
136466ed3db0SBarry Smith     }
136566ed3db0SBarry Smith   }
1366efee365bSSatish Balay   ierr = PetscLogFlops(16*a->nz);CHKERRQ(ierr);
13671ebc52fbSHong Zhang   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
13681ebc52fbSHong Zhang   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
13692f7816d4SBarry Smith   PetscFunctionReturn(0);
13702f7816d4SBarry Smith }
13712f7816d4SBarry Smith 
13722b692628SMatthew Knepley /* ------------------------------------------------------------------------------*/
13732b692628SMatthew Knepley #undef __FUNCT__
13742b692628SMatthew Knepley #define __FUNCT__ "MatMult_SeqMAIJ_9"
1375dfbe8321SBarry Smith PetscErrorCode MatMult_SeqMAIJ_9(Mat A,Vec xx,Vec yy)
13762b692628SMatthew Knepley {
13772b692628SMatthew Knepley   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
13782b692628SMatthew Knepley   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
13792b692628SMatthew Knepley   PetscScalar    *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9;
1380dfbe8321SBarry Smith   PetscErrorCode ierr;
1381899cda47SBarry Smith   PetscInt       m = b->AIJ->rmap.n,*idx,*ii;
1382b24ad042SBarry Smith   PetscInt       n,i,jrow,j;
13832b692628SMatthew Knepley 
13842b692628SMatthew Knepley   PetscFunctionBegin;
13851ebc52fbSHong Zhang   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
13861ebc52fbSHong Zhang   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
13872b692628SMatthew Knepley   idx  = a->j;
13882b692628SMatthew Knepley   v    = a->a;
13892b692628SMatthew Knepley   ii   = a->i;
13902b692628SMatthew Knepley 
13912b692628SMatthew Knepley   for (i=0; i<m; i++) {
13922b692628SMatthew Knepley     jrow = ii[i];
13932b692628SMatthew Knepley     n    = ii[i+1] - jrow;
13942b692628SMatthew Knepley     sum1  = 0.0;
13952b692628SMatthew Knepley     sum2  = 0.0;
13962b692628SMatthew Knepley     sum3  = 0.0;
13972b692628SMatthew Knepley     sum4  = 0.0;
13982b692628SMatthew Knepley     sum5  = 0.0;
13992b692628SMatthew Knepley     sum6  = 0.0;
14002b692628SMatthew Knepley     sum7  = 0.0;
14012b692628SMatthew Knepley     sum8  = 0.0;
14022b692628SMatthew Knepley     sum9  = 0.0;
14032b692628SMatthew Knepley     for (j=0; j<n; j++) {
14042b692628SMatthew Knepley       sum1 += v[jrow]*x[9*idx[jrow]];
14052b692628SMatthew Knepley       sum2 += v[jrow]*x[9*idx[jrow]+1];
14062b692628SMatthew Knepley       sum3 += v[jrow]*x[9*idx[jrow]+2];
14072b692628SMatthew Knepley       sum4 += v[jrow]*x[9*idx[jrow]+3];
14082b692628SMatthew Knepley       sum5 += v[jrow]*x[9*idx[jrow]+4];
14092b692628SMatthew Knepley       sum6 += v[jrow]*x[9*idx[jrow]+5];
14102b692628SMatthew Knepley       sum7 += v[jrow]*x[9*idx[jrow]+6];
14112b692628SMatthew Knepley       sum8 += v[jrow]*x[9*idx[jrow]+7];
14122b692628SMatthew Knepley       sum9 += v[jrow]*x[9*idx[jrow]+8];
14132b692628SMatthew Knepley       jrow++;
14142b692628SMatthew Knepley      }
14152b692628SMatthew Knepley     y[9*i]   = sum1;
14162b692628SMatthew Knepley     y[9*i+1] = sum2;
14172b692628SMatthew Knepley     y[9*i+2] = sum3;
14182b692628SMatthew Knepley     y[9*i+3] = sum4;
14192b692628SMatthew Knepley     y[9*i+4] = sum5;
14202b692628SMatthew Knepley     y[9*i+5] = sum6;
14212b692628SMatthew Knepley     y[9*i+6] = sum7;
14222b692628SMatthew Knepley     y[9*i+7] = sum8;
14232b692628SMatthew Knepley     y[9*i+8] = sum9;
14242b692628SMatthew Knepley   }
14252b692628SMatthew Knepley 
1426efee365bSSatish Balay   ierr = PetscLogFlops(18*a->nz - 9*m);CHKERRQ(ierr);
14271ebc52fbSHong Zhang   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
14281ebc52fbSHong Zhang   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
14292b692628SMatthew Knepley   PetscFunctionReturn(0);
14302b692628SMatthew Knepley }
14312b692628SMatthew Knepley 
1432b9cda22cSBarry Smith /* ------------------------------------------------------------------------------*/
1433b9cda22cSBarry Smith 
14342b692628SMatthew Knepley #undef __FUNCT__
14352b692628SMatthew Knepley #define __FUNCT__ "MatMultTranspose_SeqMAIJ_9"
1436dfbe8321SBarry Smith PetscErrorCode MatMultTranspose_SeqMAIJ_9(Mat A,Vec xx,Vec yy)
14372b692628SMatthew Knepley {
14382b692628SMatthew Knepley   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
14392b692628SMatthew Knepley   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
14402b692628SMatthew Knepley   PetscScalar    *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9,zero = 0.0;
1441dfbe8321SBarry Smith   PetscErrorCode ierr;
1442899cda47SBarry Smith   PetscInt       m = b->AIJ->rmap.n,n,i,*idx;
14432b692628SMatthew Knepley 
14442b692628SMatthew Knepley   PetscFunctionBegin;
14452dcb1b2aSMatthew Knepley   ierr = VecSet(yy,zero);CHKERRQ(ierr);
14461ebc52fbSHong Zhang   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
14471ebc52fbSHong Zhang   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
14482b692628SMatthew Knepley 
14492b692628SMatthew Knepley   for (i=0; i<m; i++) {
14502b692628SMatthew Knepley     idx    = a->j + a->i[i] ;
14512b692628SMatthew Knepley     v      = a->a + a->i[i] ;
14522b692628SMatthew Knepley     n      = a->i[i+1] - a->i[i];
14532b692628SMatthew Knepley     alpha1 = x[9*i];
14542b692628SMatthew Knepley     alpha2 = x[9*i+1];
14552b692628SMatthew Knepley     alpha3 = x[9*i+2];
14562b692628SMatthew Knepley     alpha4 = x[9*i+3];
14572b692628SMatthew Knepley     alpha5 = x[9*i+4];
14582b692628SMatthew Knepley     alpha6 = x[9*i+5];
14592b692628SMatthew Knepley     alpha7 = x[9*i+6];
14602b692628SMatthew Knepley     alpha8 = x[9*i+7];
14612f6bd0c6SMatthew Knepley     alpha9 = x[9*i+8];
14622b692628SMatthew Knepley     while (n-->0) {
14632b692628SMatthew Knepley       y[9*(*idx)]   += alpha1*(*v);
14642b692628SMatthew Knepley       y[9*(*idx)+1] += alpha2*(*v);
14652b692628SMatthew Knepley       y[9*(*idx)+2] += alpha3*(*v);
14662b692628SMatthew Knepley       y[9*(*idx)+3] += alpha4*(*v);
14672b692628SMatthew Knepley       y[9*(*idx)+4] += alpha5*(*v);
14682b692628SMatthew Knepley       y[9*(*idx)+5] += alpha6*(*v);
14692b692628SMatthew Knepley       y[9*(*idx)+6] += alpha7*(*v);
14702b692628SMatthew Knepley       y[9*(*idx)+7] += alpha8*(*v);
14712b692628SMatthew Knepley       y[9*(*idx)+8] += alpha9*(*v);
14722b692628SMatthew Knepley       idx++; v++;
14732b692628SMatthew Knepley     }
14742b692628SMatthew Knepley   }
1475899cda47SBarry Smith   ierr = PetscLogFlops(18*a->nz - 9*b->AIJ->cmap.n);CHKERRQ(ierr);
14761ebc52fbSHong Zhang   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
14771ebc52fbSHong Zhang   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
14782b692628SMatthew Knepley   PetscFunctionReturn(0);
14792b692628SMatthew Knepley }
14802b692628SMatthew Knepley 
14812b692628SMatthew Knepley #undef __FUNCT__
14822b692628SMatthew Knepley #define __FUNCT__ "MatMultAdd_SeqMAIJ_9"
1483dfbe8321SBarry Smith PetscErrorCode MatMultAdd_SeqMAIJ_9(Mat A,Vec xx,Vec yy,Vec zz)
14842b692628SMatthew Knepley {
14852b692628SMatthew Knepley   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
14862b692628SMatthew Knepley   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
14872b692628SMatthew Knepley   PetscScalar    *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9;
1488dfbe8321SBarry Smith   PetscErrorCode ierr;
1489899cda47SBarry Smith   PetscInt       m = b->AIJ->rmap.n,*idx,*ii;
1490b24ad042SBarry Smith   PetscInt       n,i,jrow,j;
14912b692628SMatthew Knepley 
14922b692628SMatthew Knepley   PetscFunctionBegin;
14932b692628SMatthew Knepley   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
14941ebc52fbSHong Zhang   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
14951ebc52fbSHong Zhang   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
14962b692628SMatthew Knepley   idx  = a->j;
14972b692628SMatthew Knepley   v    = a->a;
14982b692628SMatthew Knepley   ii   = a->i;
14992b692628SMatthew Knepley 
15002b692628SMatthew Knepley   for (i=0; i<m; i++) {
15012b692628SMatthew Knepley     jrow = ii[i];
15022b692628SMatthew Knepley     n    = ii[i+1] - jrow;
15032b692628SMatthew Knepley     sum1  = 0.0;
15042b692628SMatthew Knepley     sum2  = 0.0;
15052b692628SMatthew Knepley     sum3  = 0.0;
15062b692628SMatthew Knepley     sum4  = 0.0;
15072b692628SMatthew Knepley     sum5  = 0.0;
15082b692628SMatthew Knepley     sum6  = 0.0;
15092b692628SMatthew Knepley     sum7  = 0.0;
15102b692628SMatthew Knepley     sum8  = 0.0;
15112b692628SMatthew Knepley     sum9  = 0.0;
15122b692628SMatthew Knepley     for (j=0; j<n; j++) {
15132b692628SMatthew Knepley       sum1 += v[jrow]*x[9*idx[jrow]];
15142b692628SMatthew Knepley       sum2 += v[jrow]*x[9*idx[jrow]+1];
15152b692628SMatthew Knepley       sum3 += v[jrow]*x[9*idx[jrow]+2];
15162b692628SMatthew Knepley       sum4 += v[jrow]*x[9*idx[jrow]+3];
15172b692628SMatthew Knepley       sum5 += v[jrow]*x[9*idx[jrow]+4];
15182b692628SMatthew Knepley       sum6 += v[jrow]*x[9*idx[jrow]+5];
15192b692628SMatthew Knepley       sum7 += v[jrow]*x[9*idx[jrow]+6];
15202b692628SMatthew Knepley       sum8 += v[jrow]*x[9*idx[jrow]+7];
15212b692628SMatthew Knepley       sum9 += v[jrow]*x[9*idx[jrow]+8];
15222b692628SMatthew Knepley       jrow++;
15232b692628SMatthew Knepley      }
15242b692628SMatthew Knepley     y[9*i]   += sum1;
15252b692628SMatthew Knepley     y[9*i+1] += sum2;
15262b692628SMatthew Knepley     y[9*i+2] += sum3;
15272b692628SMatthew Knepley     y[9*i+3] += sum4;
15282b692628SMatthew Knepley     y[9*i+4] += sum5;
15292b692628SMatthew Knepley     y[9*i+5] += sum6;
15302b692628SMatthew Knepley     y[9*i+6] += sum7;
15312b692628SMatthew Knepley     y[9*i+7] += sum8;
15322b692628SMatthew Knepley     y[9*i+8] += sum9;
15332b692628SMatthew Knepley   }
15342b692628SMatthew Knepley 
1535efee365bSSatish Balay   ierr = PetscLogFlops(18*a->nz);CHKERRQ(ierr);
15361ebc52fbSHong Zhang   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
15371ebc52fbSHong Zhang   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
15382b692628SMatthew Knepley   PetscFunctionReturn(0);
15392b692628SMatthew Knepley }
15402b692628SMatthew Knepley 
15412b692628SMatthew Knepley #undef __FUNCT__
15422b692628SMatthew Knepley #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_9"
1543dfbe8321SBarry Smith PetscErrorCode MatMultTransposeAdd_SeqMAIJ_9(Mat A,Vec xx,Vec yy,Vec zz)
15442b692628SMatthew Knepley {
15452b692628SMatthew Knepley   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
15462b692628SMatthew Knepley   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
15472b692628SMatthew Knepley   PetscScalar    *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9;
1548dfbe8321SBarry Smith   PetscErrorCode ierr;
1549899cda47SBarry Smith   PetscInt       m = b->AIJ->rmap.n,n,i,*idx;
15502b692628SMatthew Knepley 
15512b692628SMatthew Knepley   PetscFunctionBegin;
15522b692628SMatthew Knepley   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
15531ebc52fbSHong Zhang   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
15541ebc52fbSHong Zhang   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
15552b692628SMatthew Knepley   for (i=0; i<m; i++) {
15562b692628SMatthew Knepley     idx    = a->j + a->i[i] ;
15572b692628SMatthew Knepley     v      = a->a + a->i[i] ;
15582b692628SMatthew Knepley     n      = a->i[i+1] - a->i[i];
15592b692628SMatthew Knepley     alpha1 = x[9*i];
15602b692628SMatthew Knepley     alpha2 = x[9*i+1];
15612b692628SMatthew Knepley     alpha3 = x[9*i+2];
15622b692628SMatthew Knepley     alpha4 = x[9*i+3];
15632b692628SMatthew Knepley     alpha5 = x[9*i+4];
15642b692628SMatthew Knepley     alpha6 = x[9*i+5];
15652b692628SMatthew Knepley     alpha7 = x[9*i+6];
15662b692628SMatthew Knepley     alpha8 = x[9*i+7];
15672b692628SMatthew Knepley     alpha9 = x[9*i+8];
15682b692628SMatthew Knepley     while (n-->0) {
15692b692628SMatthew Knepley       y[9*(*idx)]   += alpha1*(*v);
15702b692628SMatthew Knepley       y[9*(*idx)+1] += alpha2*(*v);
15712b692628SMatthew Knepley       y[9*(*idx)+2] += alpha3*(*v);
15722b692628SMatthew Knepley       y[9*(*idx)+3] += alpha4*(*v);
15732b692628SMatthew Knepley       y[9*(*idx)+4] += alpha5*(*v);
15742b692628SMatthew Knepley       y[9*(*idx)+5] += alpha6*(*v);
15752b692628SMatthew Knepley       y[9*(*idx)+6] += alpha7*(*v);
15762b692628SMatthew Knepley       y[9*(*idx)+7] += alpha8*(*v);
15772b692628SMatthew Knepley       y[9*(*idx)+8] += alpha9*(*v);
15782b692628SMatthew Knepley       idx++; v++;
15792b692628SMatthew Knepley     }
15802b692628SMatthew Knepley   }
1581efee365bSSatish Balay   ierr = PetscLogFlops(18*a->nz);CHKERRQ(ierr);
15821ebc52fbSHong Zhang   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
15831ebc52fbSHong Zhang   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
15842b692628SMatthew Knepley   PetscFunctionReturn(0);
15852b692628SMatthew Knepley }
1586b9cda22cSBarry Smith /*--------------------------------------------------------------------------------------------*/
1587b9cda22cSBarry Smith #undef __FUNCT__
1588b9cda22cSBarry Smith #define __FUNCT__ "MatMult_SeqMAIJ_10"
1589b9cda22cSBarry Smith PetscErrorCode MatMult_SeqMAIJ_10(Mat A,Vec xx,Vec yy)
1590b9cda22cSBarry Smith {
1591b9cda22cSBarry Smith   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
1592b9cda22cSBarry Smith   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
1593b9cda22cSBarry Smith   PetscScalar    *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9, sum10;
1594b9cda22cSBarry Smith   PetscErrorCode ierr;
1595899cda47SBarry Smith   PetscInt       m = b->AIJ->rmap.n,*idx,*ii;
1596b9cda22cSBarry Smith   PetscInt       n,i,jrow,j;
1597b9cda22cSBarry Smith 
1598b9cda22cSBarry Smith   PetscFunctionBegin;
1599b9cda22cSBarry Smith   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1600b9cda22cSBarry Smith   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
1601b9cda22cSBarry Smith   idx  = a->j;
1602b9cda22cSBarry Smith   v    = a->a;
1603b9cda22cSBarry Smith   ii   = a->i;
1604b9cda22cSBarry Smith 
1605b9cda22cSBarry Smith   for (i=0; i<m; i++) {
1606b9cda22cSBarry Smith     jrow = ii[i];
1607b9cda22cSBarry Smith     n    = ii[i+1] - jrow;
1608b9cda22cSBarry Smith     sum1  = 0.0;
1609b9cda22cSBarry Smith     sum2  = 0.0;
1610b9cda22cSBarry Smith     sum3  = 0.0;
1611b9cda22cSBarry Smith     sum4  = 0.0;
1612b9cda22cSBarry Smith     sum5  = 0.0;
1613b9cda22cSBarry Smith     sum6  = 0.0;
1614b9cda22cSBarry Smith     sum7  = 0.0;
1615b9cda22cSBarry Smith     sum8  = 0.0;
1616b9cda22cSBarry Smith     sum9  = 0.0;
1617b9cda22cSBarry Smith     sum10 = 0.0;
1618b9cda22cSBarry Smith     for (j=0; j<n; j++) {
1619b9cda22cSBarry Smith       sum1  += v[jrow]*x[10*idx[jrow]];
1620b9cda22cSBarry Smith       sum2  += v[jrow]*x[10*idx[jrow]+1];
1621b9cda22cSBarry Smith       sum3  += v[jrow]*x[10*idx[jrow]+2];
1622b9cda22cSBarry Smith       sum4  += v[jrow]*x[10*idx[jrow]+3];
1623b9cda22cSBarry Smith       sum5  += v[jrow]*x[10*idx[jrow]+4];
1624b9cda22cSBarry Smith       sum6  += v[jrow]*x[10*idx[jrow]+5];
1625b9cda22cSBarry Smith       sum7  += v[jrow]*x[10*idx[jrow]+6];
1626b9cda22cSBarry Smith       sum8  += v[jrow]*x[10*idx[jrow]+7];
1627b9cda22cSBarry Smith       sum9  += v[jrow]*x[10*idx[jrow]+8];
1628b9cda22cSBarry Smith       sum10 += v[jrow]*x[10*idx[jrow]+9];
1629b9cda22cSBarry Smith       jrow++;
1630b9cda22cSBarry Smith      }
1631b9cda22cSBarry Smith     y[10*i]   = sum1;
1632b9cda22cSBarry Smith     y[10*i+1] = sum2;
1633b9cda22cSBarry Smith     y[10*i+2] = sum3;
1634b9cda22cSBarry Smith     y[10*i+3] = sum4;
1635b9cda22cSBarry Smith     y[10*i+4] = sum5;
1636b9cda22cSBarry Smith     y[10*i+5] = sum6;
1637b9cda22cSBarry Smith     y[10*i+6] = sum7;
1638b9cda22cSBarry Smith     y[10*i+7] = sum8;
1639b9cda22cSBarry Smith     y[10*i+8] = sum9;
1640b9cda22cSBarry Smith     y[10*i+9] = sum10;
1641b9cda22cSBarry Smith   }
1642b9cda22cSBarry Smith 
1643b9cda22cSBarry Smith   ierr = PetscLogFlops(20*a->nz - 10*m);CHKERRQ(ierr);
1644b9cda22cSBarry Smith   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1645b9cda22cSBarry Smith   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
1646b9cda22cSBarry Smith   PetscFunctionReturn(0);
1647b9cda22cSBarry Smith }
1648b9cda22cSBarry Smith 
1649b9cda22cSBarry Smith #undef __FUNCT__
1650b9cda22cSBarry Smith #define __FUNCT__ "MatMult_SeqMAIJ_10"
1651b9cda22cSBarry Smith PetscErrorCode MatMultAdd_SeqMAIJ_10(Mat A,Vec xx,Vec yy,Vec zz)
1652b9cda22cSBarry Smith {
1653b9cda22cSBarry Smith   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
1654b9cda22cSBarry Smith   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
1655b9cda22cSBarry Smith   PetscScalar    *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9, sum10;
1656b9cda22cSBarry Smith   PetscErrorCode ierr;
1657899cda47SBarry Smith   PetscInt       m = b->AIJ->rmap.n,*idx,*ii;
1658b9cda22cSBarry Smith   PetscInt       n,i,jrow,j;
1659b9cda22cSBarry Smith 
1660b9cda22cSBarry Smith   PetscFunctionBegin;
1661b9cda22cSBarry Smith   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
1662b9cda22cSBarry Smith   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1663b9cda22cSBarry Smith   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
1664b9cda22cSBarry Smith   idx  = a->j;
1665b9cda22cSBarry Smith   v    = a->a;
1666b9cda22cSBarry Smith   ii   = a->i;
1667b9cda22cSBarry Smith 
1668b9cda22cSBarry Smith   for (i=0; i<m; i++) {
1669b9cda22cSBarry Smith     jrow = ii[i];
1670b9cda22cSBarry Smith     n    = ii[i+1] - jrow;
1671b9cda22cSBarry Smith     sum1  = 0.0;
1672b9cda22cSBarry Smith     sum2  = 0.0;
1673b9cda22cSBarry Smith     sum3  = 0.0;
1674b9cda22cSBarry Smith     sum4  = 0.0;
1675b9cda22cSBarry Smith     sum5  = 0.0;
1676b9cda22cSBarry Smith     sum6  = 0.0;
1677b9cda22cSBarry Smith     sum7  = 0.0;
1678b9cda22cSBarry Smith     sum8  = 0.0;
1679b9cda22cSBarry Smith     sum9  = 0.0;
1680b9cda22cSBarry Smith     sum10 = 0.0;
1681b9cda22cSBarry Smith     for (j=0; j<n; j++) {
1682b9cda22cSBarry Smith       sum1  += v[jrow]*x[10*idx[jrow]];
1683b9cda22cSBarry Smith       sum2  += v[jrow]*x[10*idx[jrow]+1];
1684b9cda22cSBarry Smith       sum3  += v[jrow]*x[10*idx[jrow]+2];
1685b9cda22cSBarry Smith       sum4  += v[jrow]*x[10*idx[jrow]+3];
1686b9cda22cSBarry Smith       sum5  += v[jrow]*x[10*idx[jrow]+4];
1687b9cda22cSBarry Smith       sum6  += v[jrow]*x[10*idx[jrow]+5];
1688b9cda22cSBarry Smith       sum7  += v[jrow]*x[10*idx[jrow]+6];
1689b9cda22cSBarry Smith       sum8  += v[jrow]*x[10*idx[jrow]+7];
1690b9cda22cSBarry Smith       sum9  += v[jrow]*x[10*idx[jrow]+8];
1691b9cda22cSBarry Smith       sum10 += v[jrow]*x[10*idx[jrow]+9];
1692b9cda22cSBarry Smith       jrow++;
1693b9cda22cSBarry Smith      }
1694b9cda22cSBarry Smith     y[10*i]   += sum1;
1695b9cda22cSBarry Smith     y[10*i+1] += sum2;
1696b9cda22cSBarry Smith     y[10*i+2] += sum3;
1697b9cda22cSBarry Smith     y[10*i+3] += sum4;
1698b9cda22cSBarry Smith     y[10*i+4] += sum5;
1699b9cda22cSBarry Smith     y[10*i+5] += sum6;
1700b9cda22cSBarry Smith     y[10*i+6] += sum7;
1701b9cda22cSBarry Smith     y[10*i+7] += sum8;
1702b9cda22cSBarry Smith     y[10*i+8] += sum9;
1703b9cda22cSBarry Smith     y[10*i+9] += sum10;
1704b9cda22cSBarry Smith   }
1705b9cda22cSBarry Smith 
1706b9cda22cSBarry Smith   ierr = PetscLogFlops(20*a->nz - 10*m);CHKERRQ(ierr);
1707b9cda22cSBarry Smith   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1708b9cda22cSBarry Smith   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
1709b9cda22cSBarry Smith   PetscFunctionReturn(0);
1710b9cda22cSBarry Smith }
1711b9cda22cSBarry Smith 
1712b9cda22cSBarry Smith #undef __FUNCT__
1713b9cda22cSBarry Smith #define __FUNCT__ "MatMultTranspose_SeqMAIJ_10"
1714b9cda22cSBarry Smith PetscErrorCode MatMultTranspose_SeqMAIJ_10(Mat A,Vec xx,Vec yy)
1715b9cda22cSBarry Smith {
1716b9cda22cSBarry Smith   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
1717b9cda22cSBarry Smith   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
1718b9cda22cSBarry Smith   PetscScalar    *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9,alpha10,zero = 0.0;
1719b9cda22cSBarry Smith   PetscErrorCode ierr;
1720899cda47SBarry Smith   PetscInt       m = b->AIJ->rmap.n,n,i,*idx;
1721b9cda22cSBarry Smith 
1722b9cda22cSBarry Smith   PetscFunctionBegin;
1723b9cda22cSBarry Smith   ierr = VecSet(yy,zero);CHKERRQ(ierr);
1724b9cda22cSBarry Smith   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1725b9cda22cSBarry Smith   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
1726b9cda22cSBarry Smith 
1727b9cda22cSBarry Smith   for (i=0; i<m; i++) {
1728b9cda22cSBarry Smith     idx    = a->j + a->i[i] ;
1729b9cda22cSBarry Smith     v      = a->a + a->i[i] ;
1730b9cda22cSBarry Smith     n      = a->i[i+1] - a->i[i];
1731e29fdc22SBarry Smith     alpha1 = x[10*i];
1732e29fdc22SBarry Smith     alpha2 = x[10*i+1];
1733e29fdc22SBarry Smith     alpha3 = x[10*i+2];
1734e29fdc22SBarry Smith     alpha4 = x[10*i+3];
1735e29fdc22SBarry Smith     alpha5 = x[10*i+4];
1736e29fdc22SBarry Smith     alpha6 = x[10*i+5];
1737e29fdc22SBarry Smith     alpha7 = x[10*i+6];
1738e29fdc22SBarry Smith     alpha8 = x[10*i+7];
1739e29fdc22SBarry Smith     alpha9 = x[10*i+8];
1740b9cda22cSBarry Smith     alpha10 = x[10*i+9];
1741b9cda22cSBarry Smith     while (n-->0) {
1742e29fdc22SBarry Smith       y[10*(*idx)]   += alpha1*(*v);
1743e29fdc22SBarry Smith       y[10*(*idx)+1] += alpha2*(*v);
1744e29fdc22SBarry Smith       y[10*(*idx)+2] += alpha3*(*v);
1745e29fdc22SBarry Smith       y[10*(*idx)+3] += alpha4*(*v);
1746e29fdc22SBarry Smith       y[10*(*idx)+4] += alpha5*(*v);
1747e29fdc22SBarry Smith       y[10*(*idx)+5] += alpha6*(*v);
1748e29fdc22SBarry Smith       y[10*(*idx)+6] += alpha7*(*v);
1749e29fdc22SBarry Smith       y[10*(*idx)+7] += alpha8*(*v);
1750e29fdc22SBarry Smith       y[10*(*idx)+8] += alpha9*(*v);
1751b9cda22cSBarry Smith       y[10*(*idx)+9] += alpha10*(*v);
1752b9cda22cSBarry Smith       idx++; v++;
1753b9cda22cSBarry Smith     }
1754b9cda22cSBarry Smith   }
1755899cda47SBarry Smith   ierr = PetscLogFlops(20*a->nz - 10*b->AIJ->cmap.n);CHKERRQ(ierr);
1756b9cda22cSBarry Smith   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1757b9cda22cSBarry Smith   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
1758b9cda22cSBarry Smith   PetscFunctionReturn(0);
1759b9cda22cSBarry Smith }
1760b9cda22cSBarry Smith 
1761b9cda22cSBarry Smith #undef __FUNCT__
1762b9cda22cSBarry Smith #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_10"
1763b9cda22cSBarry Smith PetscErrorCode MatMultTransposeAdd_SeqMAIJ_10(Mat A,Vec xx,Vec yy,Vec zz)
1764b9cda22cSBarry Smith {
1765b9cda22cSBarry Smith   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
1766b9cda22cSBarry Smith   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
1767b9cda22cSBarry Smith   PetscScalar    *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9,alpha10;
1768b9cda22cSBarry Smith   PetscErrorCode ierr;
1769899cda47SBarry Smith   PetscInt       m = b->AIJ->rmap.n,n,i,*idx;
1770b9cda22cSBarry Smith 
1771b9cda22cSBarry Smith   PetscFunctionBegin;
1772b9cda22cSBarry Smith   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
1773b9cda22cSBarry Smith   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1774b9cda22cSBarry Smith   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
1775b9cda22cSBarry Smith   for (i=0; i<m; i++) {
1776b9cda22cSBarry Smith     idx    = a->j + a->i[i] ;
1777b9cda22cSBarry Smith     v      = a->a + a->i[i] ;
1778b9cda22cSBarry Smith     n      = a->i[i+1] - a->i[i];
1779b9cda22cSBarry Smith     alpha1 = x[10*i];
1780b9cda22cSBarry Smith     alpha2 = x[10*i+1];
1781b9cda22cSBarry Smith     alpha3 = x[10*i+2];
1782b9cda22cSBarry Smith     alpha4 = x[10*i+3];
1783b9cda22cSBarry Smith     alpha5 = x[10*i+4];
1784b9cda22cSBarry Smith     alpha6 = x[10*i+5];
1785b9cda22cSBarry Smith     alpha7 = x[10*i+6];
1786b9cda22cSBarry Smith     alpha8 = x[10*i+7];
1787b9cda22cSBarry Smith     alpha9 = x[10*i+8];
1788b9cda22cSBarry Smith     alpha10 = x[10*i+9];
1789b9cda22cSBarry Smith     while (n-->0) {
1790b9cda22cSBarry Smith       y[10*(*idx)]   += alpha1*(*v);
1791b9cda22cSBarry Smith       y[10*(*idx)+1] += alpha2*(*v);
1792b9cda22cSBarry Smith       y[10*(*idx)+2] += alpha3*(*v);
1793b9cda22cSBarry Smith       y[10*(*idx)+3] += alpha4*(*v);
1794b9cda22cSBarry Smith       y[10*(*idx)+4] += alpha5*(*v);
1795b9cda22cSBarry Smith       y[10*(*idx)+5] += alpha6*(*v);
1796b9cda22cSBarry Smith       y[10*(*idx)+6] += alpha7*(*v);
1797b9cda22cSBarry Smith       y[10*(*idx)+7] += alpha8*(*v);
1798b9cda22cSBarry Smith       y[10*(*idx)+8] += alpha9*(*v);
1799b9cda22cSBarry Smith       y[10*(*idx)+9] += alpha10*(*v);
1800b9cda22cSBarry Smith       idx++; v++;
1801b9cda22cSBarry Smith     }
1802b9cda22cSBarry Smith   }
1803b9cda22cSBarry Smith   ierr = PetscLogFlops(20*a->nz);CHKERRQ(ierr);
1804b9cda22cSBarry Smith   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1805b9cda22cSBarry Smith   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
1806b9cda22cSBarry Smith   PetscFunctionReturn(0);
1807b9cda22cSBarry Smith }
1808b9cda22cSBarry Smith 
18092b692628SMatthew Knepley 
18102f7816d4SBarry Smith /*--------------------------------------------------------------------------------------------*/
18112f7816d4SBarry Smith #undef __FUNCT__
18122f7816d4SBarry Smith #define __FUNCT__ "MatMult_SeqMAIJ_16"
1813dfbe8321SBarry Smith PetscErrorCode MatMult_SeqMAIJ_16(Mat A,Vec xx,Vec yy)
18142f7816d4SBarry Smith {
18152f7816d4SBarry Smith   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
18162f7816d4SBarry Smith   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
18172f7816d4SBarry Smith   PetscScalar    *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8;
18182f7816d4SBarry Smith   PetscScalar    sum9, sum10, sum11, sum12, sum13, sum14, sum15, sum16;
1819dfbe8321SBarry Smith   PetscErrorCode ierr;
1820899cda47SBarry Smith   PetscInt       m = b->AIJ->rmap.n,*idx,*ii;
1821b24ad042SBarry Smith   PetscInt       n,i,jrow,j;
18222f7816d4SBarry Smith 
18232f7816d4SBarry Smith   PetscFunctionBegin;
18241ebc52fbSHong Zhang   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
18251ebc52fbSHong Zhang   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
18262f7816d4SBarry Smith   idx  = a->j;
18272f7816d4SBarry Smith   v    = a->a;
18282f7816d4SBarry Smith   ii   = a->i;
18292f7816d4SBarry Smith 
18302f7816d4SBarry Smith   for (i=0; i<m; i++) {
18312f7816d4SBarry Smith     jrow = ii[i];
18322f7816d4SBarry Smith     n    = ii[i+1] - jrow;
18332f7816d4SBarry Smith     sum1  = 0.0;
18342f7816d4SBarry Smith     sum2  = 0.0;
18352f7816d4SBarry Smith     sum3  = 0.0;
18362f7816d4SBarry Smith     sum4  = 0.0;
18372f7816d4SBarry Smith     sum5  = 0.0;
18382f7816d4SBarry Smith     sum6  = 0.0;
18392f7816d4SBarry Smith     sum7  = 0.0;
18402f7816d4SBarry Smith     sum8  = 0.0;
18412f7816d4SBarry Smith     sum9  = 0.0;
18422f7816d4SBarry Smith     sum10 = 0.0;
18432f7816d4SBarry Smith     sum11 = 0.0;
18442f7816d4SBarry Smith     sum12 = 0.0;
18452f7816d4SBarry Smith     sum13 = 0.0;
18462f7816d4SBarry Smith     sum14 = 0.0;
18472f7816d4SBarry Smith     sum15 = 0.0;
18482f7816d4SBarry Smith     sum16 = 0.0;
18492f7816d4SBarry Smith     for (j=0; j<n; j++) {
18502f7816d4SBarry Smith       sum1  += v[jrow]*x[16*idx[jrow]];
18512f7816d4SBarry Smith       sum2  += v[jrow]*x[16*idx[jrow]+1];
18522f7816d4SBarry Smith       sum3  += v[jrow]*x[16*idx[jrow]+2];
18532f7816d4SBarry Smith       sum4  += v[jrow]*x[16*idx[jrow]+3];
18542f7816d4SBarry Smith       sum5  += v[jrow]*x[16*idx[jrow]+4];
18552f7816d4SBarry Smith       sum6  += v[jrow]*x[16*idx[jrow]+5];
18562f7816d4SBarry Smith       sum7  += v[jrow]*x[16*idx[jrow]+6];
18572f7816d4SBarry Smith       sum8  += v[jrow]*x[16*idx[jrow]+7];
18582f7816d4SBarry Smith       sum9  += v[jrow]*x[16*idx[jrow]+8];
18592f7816d4SBarry Smith       sum10 += v[jrow]*x[16*idx[jrow]+9];
18602f7816d4SBarry Smith       sum11 += v[jrow]*x[16*idx[jrow]+10];
18612f7816d4SBarry Smith       sum12 += v[jrow]*x[16*idx[jrow]+11];
18622f7816d4SBarry Smith       sum13 += v[jrow]*x[16*idx[jrow]+12];
18632f7816d4SBarry Smith       sum14 += v[jrow]*x[16*idx[jrow]+13];
18642f7816d4SBarry Smith       sum15 += v[jrow]*x[16*idx[jrow]+14];
18652f7816d4SBarry Smith       sum16 += v[jrow]*x[16*idx[jrow]+15];
18662f7816d4SBarry Smith       jrow++;
18672f7816d4SBarry Smith      }
18682f7816d4SBarry Smith     y[16*i]    = sum1;
18692f7816d4SBarry Smith     y[16*i+1]  = sum2;
18702f7816d4SBarry Smith     y[16*i+2]  = sum3;
18712f7816d4SBarry Smith     y[16*i+3]  = sum4;
18722f7816d4SBarry Smith     y[16*i+4]  = sum5;
18732f7816d4SBarry Smith     y[16*i+5]  = sum6;
18742f7816d4SBarry Smith     y[16*i+6]  = sum7;
18752f7816d4SBarry Smith     y[16*i+7]  = sum8;
18762f7816d4SBarry Smith     y[16*i+8]  = sum9;
18772f7816d4SBarry Smith     y[16*i+9]  = sum10;
18782f7816d4SBarry Smith     y[16*i+10] = sum11;
18792f7816d4SBarry Smith     y[16*i+11] = sum12;
18802f7816d4SBarry Smith     y[16*i+12] = sum13;
18812f7816d4SBarry Smith     y[16*i+13] = sum14;
18822f7816d4SBarry Smith     y[16*i+14] = sum15;
18832f7816d4SBarry Smith     y[16*i+15] = sum16;
18842f7816d4SBarry Smith   }
18852f7816d4SBarry Smith 
1886efee365bSSatish Balay   ierr = PetscLogFlops(32*a->nz - 16*m);CHKERRQ(ierr);
18871ebc52fbSHong Zhang   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
18881ebc52fbSHong Zhang   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
18892f7816d4SBarry Smith   PetscFunctionReturn(0);
18902f7816d4SBarry Smith }
18912f7816d4SBarry Smith 
18922f7816d4SBarry Smith #undef __FUNCT__
18932f7816d4SBarry Smith #define __FUNCT__ "MatMultTranspose_SeqMAIJ_16"
1894dfbe8321SBarry Smith PetscErrorCode MatMultTranspose_SeqMAIJ_16(Mat A,Vec xx,Vec yy)
18952f7816d4SBarry Smith {
18962f7816d4SBarry Smith   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
18972f7816d4SBarry Smith   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
18982f7816d4SBarry Smith   PetscScalar    *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,zero = 0.0;
18992f7816d4SBarry Smith   PetscScalar    alpha9,alpha10,alpha11,alpha12,alpha13,alpha14,alpha15,alpha16;
1900dfbe8321SBarry Smith   PetscErrorCode ierr;
1901899cda47SBarry Smith   PetscInt       m = b->AIJ->rmap.n,n,i,*idx;
19022f7816d4SBarry Smith 
19032f7816d4SBarry Smith   PetscFunctionBegin;
19042dcb1b2aSMatthew Knepley   ierr = VecSet(yy,zero);CHKERRQ(ierr);
19051ebc52fbSHong Zhang   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
19061ebc52fbSHong Zhang   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
1907bfec09a0SHong Zhang 
19082f7816d4SBarry Smith   for (i=0; i<m; i++) {
1909bfec09a0SHong Zhang     idx    = a->j + a->i[i] ;
1910bfec09a0SHong Zhang     v      = a->a + a->i[i] ;
19112f7816d4SBarry Smith     n      = a->i[i+1] - a->i[i];
19122f7816d4SBarry Smith     alpha1  = x[16*i];
19132f7816d4SBarry Smith     alpha2  = x[16*i+1];
19142f7816d4SBarry Smith     alpha3  = x[16*i+2];
19152f7816d4SBarry Smith     alpha4  = x[16*i+3];
19162f7816d4SBarry Smith     alpha5  = x[16*i+4];
19172f7816d4SBarry Smith     alpha6  = x[16*i+5];
19182f7816d4SBarry Smith     alpha7  = x[16*i+6];
19192f7816d4SBarry Smith     alpha8  = x[16*i+7];
19202f7816d4SBarry Smith     alpha9  = x[16*i+8];
19212f7816d4SBarry Smith     alpha10 = x[16*i+9];
19222f7816d4SBarry Smith     alpha11 = x[16*i+10];
19232f7816d4SBarry Smith     alpha12 = x[16*i+11];
19242f7816d4SBarry Smith     alpha13 = x[16*i+12];
19252f7816d4SBarry Smith     alpha14 = x[16*i+13];
19262f7816d4SBarry Smith     alpha15 = x[16*i+14];
19272f7816d4SBarry Smith     alpha16 = x[16*i+15];
19282f7816d4SBarry Smith     while (n-->0) {
19292f7816d4SBarry Smith       y[16*(*idx)]    += alpha1*(*v);
19302f7816d4SBarry Smith       y[16*(*idx)+1]  += alpha2*(*v);
19312f7816d4SBarry Smith       y[16*(*idx)+2]  += alpha3*(*v);
19322f7816d4SBarry Smith       y[16*(*idx)+3]  += alpha4*(*v);
19332f7816d4SBarry Smith       y[16*(*idx)+4]  += alpha5*(*v);
19342f7816d4SBarry Smith       y[16*(*idx)+5]  += alpha6*(*v);
19352f7816d4SBarry Smith       y[16*(*idx)+6]  += alpha7*(*v);
19362f7816d4SBarry Smith       y[16*(*idx)+7]  += alpha8*(*v);
19372f7816d4SBarry Smith       y[16*(*idx)+8]  += alpha9*(*v);
19382f7816d4SBarry Smith       y[16*(*idx)+9]  += alpha10*(*v);
19392f7816d4SBarry Smith       y[16*(*idx)+10] += alpha11*(*v);
19402f7816d4SBarry Smith       y[16*(*idx)+11] += alpha12*(*v);
19412f7816d4SBarry Smith       y[16*(*idx)+12] += alpha13*(*v);
19422f7816d4SBarry Smith       y[16*(*idx)+13] += alpha14*(*v);
19432f7816d4SBarry Smith       y[16*(*idx)+14] += alpha15*(*v);
19442f7816d4SBarry Smith       y[16*(*idx)+15] += alpha16*(*v);
19452f7816d4SBarry Smith       idx++; v++;
19462f7816d4SBarry Smith     }
19472f7816d4SBarry Smith   }
1948899cda47SBarry Smith   ierr = PetscLogFlops(32*a->nz - 16*b->AIJ->cmap.n);CHKERRQ(ierr);
19491ebc52fbSHong Zhang   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
19501ebc52fbSHong Zhang   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
19512f7816d4SBarry Smith   PetscFunctionReturn(0);
19522f7816d4SBarry Smith }
19532f7816d4SBarry Smith 
19542f7816d4SBarry Smith #undef __FUNCT__
19552f7816d4SBarry Smith #define __FUNCT__ "MatMultAdd_SeqMAIJ_16"
1956dfbe8321SBarry Smith PetscErrorCode MatMultAdd_SeqMAIJ_16(Mat A,Vec xx,Vec yy,Vec zz)
19572f7816d4SBarry Smith {
19582f7816d4SBarry Smith   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
19592f7816d4SBarry Smith   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
19602f7816d4SBarry Smith   PetscScalar    *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8;
19612f7816d4SBarry Smith   PetscScalar    sum9, sum10, sum11, sum12, sum13, sum14, sum15, sum16;
1962dfbe8321SBarry Smith   PetscErrorCode ierr;
1963899cda47SBarry Smith   PetscInt       m = b->AIJ->rmap.n,*idx,*ii;
1964b24ad042SBarry Smith   PetscInt       n,i,jrow,j;
19652f7816d4SBarry Smith 
19662f7816d4SBarry Smith   PetscFunctionBegin;
19672f7816d4SBarry Smith   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
19681ebc52fbSHong Zhang   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
19691ebc52fbSHong Zhang   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
19702f7816d4SBarry Smith   idx  = a->j;
19712f7816d4SBarry Smith   v    = a->a;
19722f7816d4SBarry Smith   ii   = a->i;
19732f7816d4SBarry Smith 
19742f7816d4SBarry Smith   for (i=0; i<m; i++) {
19752f7816d4SBarry Smith     jrow = ii[i];
19762f7816d4SBarry Smith     n    = ii[i+1] - jrow;
19772f7816d4SBarry Smith     sum1  = 0.0;
19782f7816d4SBarry Smith     sum2  = 0.0;
19792f7816d4SBarry Smith     sum3  = 0.0;
19802f7816d4SBarry Smith     sum4  = 0.0;
19812f7816d4SBarry Smith     sum5  = 0.0;
19822f7816d4SBarry Smith     sum6  = 0.0;
19832f7816d4SBarry Smith     sum7  = 0.0;
19842f7816d4SBarry Smith     sum8  = 0.0;
19852f7816d4SBarry Smith     sum9  = 0.0;
19862f7816d4SBarry Smith     sum10 = 0.0;
19872f7816d4SBarry Smith     sum11 = 0.0;
19882f7816d4SBarry Smith     sum12 = 0.0;
19892f7816d4SBarry Smith     sum13 = 0.0;
19902f7816d4SBarry Smith     sum14 = 0.0;
19912f7816d4SBarry Smith     sum15 = 0.0;
19922f7816d4SBarry Smith     sum16 = 0.0;
19932f7816d4SBarry Smith     for (j=0; j<n; j++) {
19942f7816d4SBarry Smith       sum1  += v[jrow]*x[16*idx[jrow]];
19952f7816d4SBarry Smith       sum2  += v[jrow]*x[16*idx[jrow]+1];
19962f7816d4SBarry Smith       sum3  += v[jrow]*x[16*idx[jrow]+2];
19972f7816d4SBarry Smith       sum4  += v[jrow]*x[16*idx[jrow]+3];
19982f7816d4SBarry Smith       sum5  += v[jrow]*x[16*idx[jrow]+4];
19992f7816d4SBarry Smith       sum6  += v[jrow]*x[16*idx[jrow]+5];
20002f7816d4SBarry Smith       sum7  += v[jrow]*x[16*idx[jrow]+6];
20012f7816d4SBarry Smith       sum8  += v[jrow]*x[16*idx[jrow]+7];
20022f7816d4SBarry Smith       sum9  += v[jrow]*x[16*idx[jrow]+8];
20032f7816d4SBarry Smith       sum10 += v[jrow]*x[16*idx[jrow]+9];
20042f7816d4SBarry Smith       sum11 += v[jrow]*x[16*idx[jrow]+10];
20052f7816d4SBarry Smith       sum12 += v[jrow]*x[16*idx[jrow]+11];
20062f7816d4SBarry Smith       sum13 += v[jrow]*x[16*idx[jrow]+12];
20072f7816d4SBarry Smith       sum14 += v[jrow]*x[16*idx[jrow]+13];
20082f7816d4SBarry Smith       sum15 += v[jrow]*x[16*idx[jrow]+14];
20092f7816d4SBarry Smith       sum16 += v[jrow]*x[16*idx[jrow]+15];
20102f7816d4SBarry Smith       jrow++;
20112f7816d4SBarry Smith      }
20122f7816d4SBarry Smith     y[16*i]    += sum1;
20132f7816d4SBarry Smith     y[16*i+1]  += sum2;
20142f7816d4SBarry Smith     y[16*i+2]  += sum3;
20152f7816d4SBarry Smith     y[16*i+3]  += sum4;
20162f7816d4SBarry Smith     y[16*i+4]  += sum5;
20172f7816d4SBarry Smith     y[16*i+5]  += sum6;
20182f7816d4SBarry Smith     y[16*i+6]  += sum7;
20192f7816d4SBarry Smith     y[16*i+7]  += sum8;
20202f7816d4SBarry Smith     y[16*i+8]  += sum9;
20212f7816d4SBarry Smith     y[16*i+9]  += sum10;
20222f7816d4SBarry Smith     y[16*i+10] += sum11;
20232f7816d4SBarry Smith     y[16*i+11] += sum12;
20242f7816d4SBarry Smith     y[16*i+12] += sum13;
20252f7816d4SBarry Smith     y[16*i+13] += sum14;
20262f7816d4SBarry Smith     y[16*i+14] += sum15;
20272f7816d4SBarry Smith     y[16*i+15] += sum16;
20282f7816d4SBarry Smith   }
20292f7816d4SBarry Smith 
2030efee365bSSatish Balay   ierr = PetscLogFlops(32*a->nz);CHKERRQ(ierr);
20311ebc52fbSHong Zhang   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
20321ebc52fbSHong Zhang   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
20332f7816d4SBarry Smith   PetscFunctionReturn(0);
20342f7816d4SBarry Smith }
20352f7816d4SBarry Smith 
20362f7816d4SBarry Smith #undef __FUNCT__
20372f7816d4SBarry Smith #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_16"
2038dfbe8321SBarry Smith PetscErrorCode MatMultTransposeAdd_SeqMAIJ_16(Mat A,Vec xx,Vec yy,Vec zz)
20392f7816d4SBarry Smith {
20402f7816d4SBarry Smith   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
20412f7816d4SBarry Smith   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
20422f7816d4SBarry Smith   PetscScalar    *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8;
20432f7816d4SBarry Smith   PetscScalar    alpha9,alpha10,alpha11,alpha12,alpha13,alpha14,alpha15,alpha16;
2044dfbe8321SBarry Smith   PetscErrorCode ierr;
2045899cda47SBarry Smith   PetscInt       m = b->AIJ->rmap.n,n,i,*idx;
20462f7816d4SBarry Smith 
20472f7816d4SBarry Smith   PetscFunctionBegin;
20482f7816d4SBarry Smith   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
20491ebc52fbSHong Zhang   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
20501ebc52fbSHong Zhang   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
20512f7816d4SBarry Smith   for (i=0; i<m; i++) {
2052bfec09a0SHong Zhang     idx    = a->j + a->i[i] ;
2053bfec09a0SHong Zhang     v      = a->a + a->i[i] ;
20542f7816d4SBarry Smith     n      = a->i[i+1] - a->i[i];
20552f7816d4SBarry Smith     alpha1 = x[16*i];
20562f7816d4SBarry Smith     alpha2 = x[16*i+1];
20572f7816d4SBarry Smith     alpha3 = x[16*i+2];
20582f7816d4SBarry Smith     alpha4 = x[16*i+3];
20592f7816d4SBarry Smith     alpha5 = x[16*i+4];
20602f7816d4SBarry Smith     alpha6 = x[16*i+5];
20612f7816d4SBarry Smith     alpha7 = x[16*i+6];
20622f7816d4SBarry Smith     alpha8 = x[16*i+7];
20632f7816d4SBarry Smith     alpha9  = x[16*i+8];
20642f7816d4SBarry Smith     alpha10 = x[16*i+9];
20652f7816d4SBarry Smith     alpha11 = x[16*i+10];
20662f7816d4SBarry Smith     alpha12 = x[16*i+11];
20672f7816d4SBarry Smith     alpha13 = x[16*i+12];
20682f7816d4SBarry Smith     alpha14 = x[16*i+13];
20692f7816d4SBarry Smith     alpha15 = x[16*i+14];
20702f7816d4SBarry Smith     alpha16 = x[16*i+15];
20712f7816d4SBarry Smith     while (n-->0) {
20722f7816d4SBarry Smith       y[16*(*idx)]   += alpha1*(*v);
20732f7816d4SBarry Smith       y[16*(*idx)+1] += alpha2*(*v);
20742f7816d4SBarry Smith       y[16*(*idx)+2] += alpha3*(*v);
20752f7816d4SBarry Smith       y[16*(*idx)+3] += alpha4*(*v);
20762f7816d4SBarry Smith       y[16*(*idx)+4] += alpha5*(*v);
20772f7816d4SBarry Smith       y[16*(*idx)+5] += alpha6*(*v);
20782f7816d4SBarry Smith       y[16*(*idx)+6] += alpha7*(*v);
20792f7816d4SBarry Smith       y[16*(*idx)+7] += alpha8*(*v);
20802f7816d4SBarry Smith       y[16*(*idx)+8]  += alpha9*(*v);
20812f7816d4SBarry Smith       y[16*(*idx)+9]  += alpha10*(*v);
20822f7816d4SBarry Smith       y[16*(*idx)+10] += alpha11*(*v);
20832f7816d4SBarry Smith       y[16*(*idx)+11] += alpha12*(*v);
20842f7816d4SBarry Smith       y[16*(*idx)+12] += alpha13*(*v);
20852f7816d4SBarry Smith       y[16*(*idx)+13] += alpha14*(*v);
20862f7816d4SBarry Smith       y[16*(*idx)+14] += alpha15*(*v);
20872f7816d4SBarry Smith       y[16*(*idx)+15] += alpha16*(*v);
20882f7816d4SBarry Smith       idx++; v++;
20892f7816d4SBarry Smith     }
20902f7816d4SBarry Smith   }
2091efee365bSSatish Balay   ierr = PetscLogFlops(32*a->nz);CHKERRQ(ierr);
20921ebc52fbSHong Zhang   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
20931ebc52fbSHong Zhang   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
209466ed3db0SBarry Smith   PetscFunctionReturn(0);
209566ed3db0SBarry Smith }
209666ed3db0SBarry Smith 
2097f4a53059SBarry Smith /*===================================================================================*/
20984a2ae208SSatish Balay #undef __FUNCT__
20994a2ae208SSatish Balay #define __FUNCT__ "MatMult_MPIMAIJ_dof"
2100dfbe8321SBarry Smith PetscErrorCode MatMult_MPIMAIJ_dof(Mat A,Vec xx,Vec yy)
2101f4a53059SBarry Smith {
21024cb9d9b8SBarry Smith   Mat_MPIMAIJ    *b = (Mat_MPIMAIJ*)A->data;
2103dfbe8321SBarry Smith   PetscErrorCode ierr;
2104f4a53059SBarry Smith 
2105b24ad042SBarry Smith   PetscFunctionBegin;
2106f4a53059SBarry Smith   /* start the scatter */
2107f4a53059SBarry Smith   ierr = VecScatterBegin(xx,b->w,INSERT_VALUES,SCATTER_FORWARD,b->ctx);CHKERRQ(ierr);
21084cb9d9b8SBarry Smith   ierr = (*b->AIJ->ops->mult)(b->AIJ,xx,yy);CHKERRQ(ierr);
2109f4a53059SBarry Smith   ierr = VecScatterEnd(xx,b->w,INSERT_VALUES,SCATTER_FORWARD,b->ctx);CHKERRQ(ierr);
21104cb9d9b8SBarry Smith   ierr = (*b->OAIJ->ops->multadd)(b->OAIJ,b->w,yy,yy);CHKERRQ(ierr);
2111f4a53059SBarry Smith   PetscFunctionReturn(0);
2112f4a53059SBarry Smith }
2113f4a53059SBarry Smith 
21144a2ae208SSatish Balay #undef __FUNCT__
21154a2ae208SSatish Balay #define __FUNCT__ "MatMultTranspose_MPIMAIJ_dof"
2116dfbe8321SBarry Smith PetscErrorCode MatMultTranspose_MPIMAIJ_dof(Mat A,Vec xx,Vec yy)
21174cb9d9b8SBarry Smith {
21184cb9d9b8SBarry Smith   Mat_MPIMAIJ    *b = (Mat_MPIMAIJ*)A->data;
2119dfbe8321SBarry Smith   PetscErrorCode ierr;
2120b24ad042SBarry Smith 
21214cb9d9b8SBarry Smith   PetscFunctionBegin;
21224cb9d9b8SBarry Smith   ierr = (*b->OAIJ->ops->multtranspose)(b->OAIJ,xx,b->w);CHKERRQ(ierr);
21234cb9d9b8SBarry Smith   ierr = (*b->AIJ->ops->multtranspose)(b->AIJ,xx,yy);CHKERRQ(ierr);
2124a5ff213dSBarry Smith   ierr = VecScatterBegin(b->w,yy,ADD_VALUES,SCATTER_REVERSE,b->ctx);CHKERRQ(ierr);
21254cb9d9b8SBarry Smith   ierr = VecScatterEnd(b->w,yy,ADD_VALUES,SCATTER_REVERSE,b->ctx);CHKERRQ(ierr);
21264cb9d9b8SBarry Smith   PetscFunctionReturn(0);
21274cb9d9b8SBarry Smith }
21284cb9d9b8SBarry Smith 
21294a2ae208SSatish Balay #undef __FUNCT__
21304a2ae208SSatish Balay #define __FUNCT__ "MatMultAdd_MPIMAIJ_dof"
2131dfbe8321SBarry Smith PetscErrorCode MatMultAdd_MPIMAIJ_dof(Mat A,Vec xx,Vec yy,Vec zz)
21324cb9d9b8SBarry Smith {
21334cb9d9b8SBarry Smith   Mat_MPIMAIJ    *b = (Mat_MPIMAIJ*)A->data;
2134dfbe8321SBarry Smith   PetscErrorCode ierr;
21354cb9d9b8SBarry Smith 
2136b24ad042SBarry Smith   PetscFunctionBegin;
21374cb9d9b8SBarry Smith   /* start the scatter */
21384cb9d9b8SBarry Smith   ierr = VecScatterBegin(xx,b->w,INSERT_VALUES,SCATTER_FORWARD,b->ctx);CHKERRQ(ierr);
2139d72c5c08SBarry Smith   ierr = (*b->AIJ->ops->multadd)(b->AIJ,xx,yy,zz);CHKERRQ(ierr);
21404cb9d9b8SBarry Smith   ierr = VecScatterEnd(xx,b->w,INSERT_VALUES,SCATTER_FORWARD,b->ctx);CHKERRQ(ierr);
2141717f2ec8SHong Zhang   ierr = (*b->OAIJ->ops->multadd)(b->OAIJ,b->w,zz,zz);CHKERRQ(ierr);
21424cb9d9b8SBarry Smith   PetscFunctionReturn(0);
21434cb9d9b8SBarry Smith }
21444cb9d9b8SBarry Smith 
21454a2ae208SSatish Balay #undef __FUNCT__
21464a2ae208SSatish Balay #define __FUNCT__ "MatMultTransposeAdd_MPIMAIJ_dof"
2147dfbe8321SBarry Smith PetscErrorCode MatMultTransposeAdd_MPIMAIJ_dof(Mat A,Vec xx,Vec yy,Vec zz)
21484cb9d9b8SBarry Smith {
21494cb9d9b8SBarry Smith   Mat_MPIMAIJ    *b = (Mat_MPIMAIJ*)A->data;
2150dfbe8321SBarry Smith   PetscErrorCode ierr;
2151b24ad042SBarry Smith 
21524cb9d9b8SBarry Smith   PetscFunctionBegin;
21534cb9d9b8SBarry Smith   ierr = (*b->OAIJ->ops->multtranspose)(b->OAIJ,xx,b->w);CHKERRQ(ierr);
2154d72c5c08SBarry Smith   ierr = VecScatterBegin(b->w,zz,ADD_VALUES,SCATTER_REVERSE,b->ctx);CHKERRQ(ierr);
2155d72c5c08SBarry Smith   ierr = (*b->AIJ->ops->multtransposeadd)(b->AIJ,xx,yy,zz);CHKERRQ(ierr);
2156d72c5c08SBarry Smith   ierr = VecScatterEnd(b->w,zz,ADD_VALUES,SCATTER_REVERSE,b->ctx);CHKERRQ(ierr);
21574cb9d9b8SBarry Smith   PetscFunctionReturn(0);
21584cb9d9b8SBarry Smith }
21594cb9d9b8SBarry Smith 
21609c4fc4c7SBarry Smith #undef __FUNCT__
21617ba1a0bfSKris Buschelman #define __FUNCT__ "MatPtAPSymbolic_SeqAIJ_SeqMAIJ"
21627ba1a0bfSKris Buschelman PetscErrorCode MatPtAPSymbolic_SeqAIJ_SeqMAIJ(Mat A,Mat PP,PetscReal fill,Mat *C)
21637ba1a0bfSKris Buschelman {
21647ba1a0bfSKris Buschelman   /* This routine requires testing -- but it's getting better. */
21657ba1a0bfSKris Buschelman   PetscErrorCode     ierr;
2166a1a86e44SBarry Smith   PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL;
21677ba1a0bfSKris Buschelman   Mat_SeqMAIJ        *pp=(Mat_SeqMAIJ*)PP->data;
21687ba1a0bfSKris Buschelman   Mat                P=pp->AIJ;
21697ba1a0bfSKris Buschelman   Mat_SeqAIJ         *a=(Mat_SeqAIJ*)A->data,*p=(Mat_SeqAIJ*)P->data,*c;
21707ba1a0bfSKris Buschelman   PetscInt           *pti,*ptj,*ptJ,*ai=a->i,*aj=a->j,*ajj,*pi=p->i,*pj=p->j,*pjj;
21717ba1a0bfSKris Buschelman   PetscInt           *ci,*cj,*ptadenserow,*ptasparserow,*denserow,*sparserow,*ptaj;
2172899cda47SBarry Smith   PetscInt           an=A->cmap.N,am=A->rmap.N,pn=P->cmap.N,pm=P->rmap.N,ppdof=pp->dof,cn;
21737ba1a0bfSKris Buschelman   PetscInt           i,j,k,dof,pshift,ptnzi,arow,anzj,ptanzi,prow,pnzj,cnzi;
21747ba1a0bfSKris Buschelman   MatScalar          *ca;
21757ba1a0bfSKris Buschelman 
21767ba1a0bfSKris Buschelman   PetscFunctionBegin;
21777ba1a0bfSKris Buschelman   /* Start timer */
21787ba1a0bfSKris Buschelman   ierr = PetscLogEventBegin(MAT_PtAPSymbolic,A,PP,0,0);CHKERRQ(ierr);
21797ba1a0bfSKris Buschelman 
21807ba1a0bfSKris Buschelman   /* Get ij structure of P^T */
21817ba1a0bfSKris Buschelman   ierr = MatGetSymbolicTranspose_SeqAIJ(P,&pti,&ptj);CHKERRQ(ierr);
21827ba1a0bfSKris Buschelman 
21837ba1a0bfSKris Buschelman   cn = pn*ppdof;
21847ba1a0bfSKris Buschelman   /* Allocate ci array, arrays for fill computation and */
21857ba1a0bfSKris Buschelman   /* free space for accumulating nonzero column info */
21867ba1a0bfSKris Buschelman   ierr = PetscMalloc((cn+1)*sizeof(PetscInt),&ci);CHKERRQ(ierr);
21877ba1a0bfSKris Buschelman   ci[0] = 0;
21887ba1a0bfSKris Buschelman 
21897ba1a0bfSKris Buschelman   /* Work arrays for rows of P^T*A */
21907ba1a0bfSKris Buschelman   ierr = PetscMalloc((2*cn+2*an+1)*sizeof(PetscInt),&ptadenserow);CHKERRQ(ierr);
21917ba1a0bfSKris Buschelman   ierr = PetscMemzero(ptadenserow,(2*cn+2*an+1)*sizeof(PetscInt));CHKERRQ(ierr);
21927ba1a0bfSKris Buschelman   ptasparserow = ptadenserow  + an;
21937ba1a0bfSKris Buschelman   denserow     = ptasparserow + an;
21947ba1a0bfSKris Buschelman   sparserow    = denserow     + cn;
21957ba1a0bfSKris Buschelman 
21967ba1a0bfSKris Buschelman   /* Set initial free space to be nnz(A) scaled by aspect ratio of P. */
21977ba1a0bfSKris Buschelman   /* This should be reasonable if sparsity of PtAP is similar to that of A. */
21987ba1a0bfSKris Buschelman   /* Note, aspect ratio of P is the same as the aspect ratio of SeqAIJ inside P */
2199a1a86e44SBarry Smith   ierr          = PetscFreeSpaceGet((ai[am]/pm)*pn,&free_space);
22007ba1a0bfSKris Buschelman   current_space = free_space;
22017ba1a0bfSKris Buschelman 
22027ba1a0bfSKris Buschelman   /* Determine symbolic info for each row of C: */
22037ba1a0bfSKris Buschelman   for (i=0;i<pn;i++) {
22047ba1a0bfSKris Buschelman     ptnzi  = pti[i+1] - pti[i];
22057ba1a0bfSKris Buschelman     ptJ    = ptj + pti[i];
22067ba1a0bfSKris Buschelman     for (dof=0;dof<ppdof;dof++) {
22077ba1a0bfSKris Buschelman       ptanzi = 0;
22087ba1a0bfSKris Buschelman       /* Determine symbolic row of PtA: */
22097ba1a0bfSKris Buschelman       for (j=0;j<ptnzi;j++) {
22107ba1a0bfSKris Buschelman         /* Expand ptJ[j] by block size and shift by dof to get the right row of A */
22117ba1a0bfSKris Buschelman         arow = ptJ[j]*ppdof + dof;
22127ba1a0bfSKris Buschelman         /* Nonzeros of P^T*A will be in same locations as any element of A in that row */
22137ba1a0bfSKris Buschelman         anzj = ai[arow+1] - ai[arow];
22147ba1a0bfSKris Buschelman         ajj  = aj + ai[arow];
22157ba1a0bfSKris Buschelman         for (k=0;k<anzj;k++) {
22167ba1a0bfSKris Buschelman           if (!ptadenserow[ajj[k]]) {
22177ba1a0bfSKris Buschelman             ptadenserow[ajj[k]]    = -1;
22187ba1a0bfSKris Buschelman             ptasparserow[ptanzi++] = ajj[k];
22197ba1a0bfSKris Buschelman           }
22207ba1a0bfSKris Buschelman         }
22217ba1a0bfSKris Buschelman       }
22227ba1a0bfSKris Buschelman       /* Using symbolic info for row of PtA, determine symbolic info for row of C: */
22237ba1a0bfSKris Buschelman       ptaj = ptasparserow;
22247ba1a0bfSKris Buschelman       cnzi   = 0;
22257ba1a0bfSKris Buschelman       for (j=0;j<ptanzi;j++) {
22267ba1a0bfSKris Buschelman         /* Get offset within block of P */
22277ba1a0bfSKris Buschelman         pshift = *ptaj%ppdof;
22287ba1a0bfSKris Buschelman         /* Get block row of P */
22297ba1a0bfSKris Buschelman         prow = (*ptaj++)/ppdof; /* integer division */
22307ba1a0bfSKris Buschelman         /* P has same number of nonzeros per row as the compressed form */
22317ba1a0bfSKris Buschelman         pnzj = pi[prow+1] - pi[prow];
22327ba1a0bfSKris Buschelman         pjj  = pj + pi[prow];
22337ba1a0bfSKris Buschelman         for (k=0;k<pnzj;k++) {
22347ba1a0bfSKris Buschelman           /* Locations in C are shifted by the offset within the block */
22357ba1a0bfSKris Buschelman           /* Note: we cannot use PetscLLAdd here because of the additional offset for the write location */
22367ba1a0bfSKris Buschelman           if (!denserow[pjj[k]*ppdof+pshift]) {
22377ba1a0bfSKris Buschelman             denserow[pjj[k]*ppdof+pshift] = -1;
22387ba1a0bfSKris Buschelman             sparserow[cnzi++]             = pjj[k]*ppdof+pshift;
22397ba1a0bfSKris Buschelman           }
22407ba1a0bfSKris Buschelman         }
22417ba1a0bfSKris Buschelman       }
22427ba1a0bfSKris Buschelman 
22437ba1a0bfSKris Buschelman       /* sort sparserow */
22447ba1a0bfSKris Buschelman       ierr = PetscSortInt(cnzi,sparserow);CHKERRQ(ierr);
22457ba1a0bfSKris Buschelman 
22467ba1a0bfSKris Buschelman       /* If free space is not available, make more free space */
22477ba1a0bfSKris Buschelman       /* Double the amount of total space in the list */
22487ba1a0bfSKris Buschelman       if (current_space->local_remaining<cnzi) {
2249a1a86e44SBarry Smith         ierr = PetscFreeSpaceGet(current_space->total_array_size,&current_space);CHKERRQ(ierr);
22507ba1a0bfSKris Buschelman       }
22517ba1a0bfSKris Buschelman 
22527ba1a0bfSKris Buschelman       /* Copy data into free space, and zero out denserows */
22537ba1a0bfSKris Buschelman       ierr = PetscMemcpy(current_space->array,sparserow,cnzi*sizeof(PetscInt));CHKERRQ(ierr);
22547ba1a0bfSKris Buschelman       current_space->array           += cnzi;
22557ba1a0bfSKris Buschelman       current_space->local_used      += cnzi;
22567ba1a0bfSKris Buschelman       current_space->local_remaining -= cnzi;
22577ba1a0bfSKris Buschelman 
22587ba1a0bfSKris Buschelman       for (j=0;j<ptanzi;j++) {
22597ba1a0bfSKris Buschelman         ptadenserow[ptasparserow[j]] = 0;
22607ba1a0bfSKris Buschelman       }
22617ba1a0bfSKris Buschelman       for (j=0;j<cnzi;j++) {
22627ba1a0bfSKris Buschelman         denserow[sparserow[j]] = 0;
22637ba1a0bfSKris Buschelman       }
22647ba1a0bfSKris Buschelman       /* Aside: Perhaps we should save the pta info for the numerical factorization. */
22657ba1a0bfSKris Buschelman       /*        For now, we will recompute what is needed. */
22667ba1a0bfSKris Buschelman       ci[i*ppdof+1+dof] = ci[i*ppdof+dof] + cnzi;
22677ba1a0bfSKris Buschelman     }
22687ba1a0bfSKris Buschelman   }
22697ba1a0bfSKris Buschelman   /* nnz is now stored in ci[ptm], column indices are in the list of free space */
22707ba1a0bfSKris Buschelman   /* Allocate space for cj, initialize cj, and */
22717ba1a0bfSKris Buschelman   /* destroy list of free space and other temporary array(s) */
22727ba1a0bfSKris Buschelman   ierr = PetscMalloc((ci[cn]+1)*sizeof(PetscInt),&cj);CHKERRQ(ierr);
2273a1a86e44SBarry Smith   ierr = PetscFreeSpaceContiguous(&free_space,cj);CHKERRQ(ierr);
22747ba1a0bfSKris Buschelman   ierr = PetscFree(ptadenserow);CHKERRQ(ierr);
22757ba1a0bfSKris Buschelman 
22767ba1a0bfSKris Buschelman   /* Allocate space for ca */
22777ba1a0bfSKris Buschelman   ierr = PetscMalloc((ci[cn]+1)*sizeof(MatScalar),&ca);CHKERRQ(ierr);
22787ba1a0bfSKris Buschelman   ierr = PetscMemzero(ca,(ci[cn]+1)*sizeof(MatScalar));CHKERRQ(ierr);
22797ba1a0bfSKris Buschelman 
22807ba1a0bfSKris Buschelman   /* put together the new matrix */
22817ba1a0bfSKris Buschelman   ierr = MatCreateSeqAIJWithArrays(A->comm,cn,cn,ci,cj,ca,C);CHKERRQ(ierr);
22827ba1a0bfSKris Buschelman 
22837ba1a0bfSKris Buschelman   /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
22847ba1a0bfSKris Buschelman   /* Since these are PETSc arrays, change flags to free them as necessary. */
22857ba1a0bfSKris Buschelman   c = (Mat_SeqAIJ *)((*C)->data);
22867ba1a0bfSKris Buschelman   c->freedata = PETSC_TRUE;
22877ba1a0bfSKris Buschelman   c->nonew    = 0;
22887ba1a0bfSKris Buschelman 
22897ba1a0bfSKris Buschelman   /* Clean up. */
22907ba1a0bfSKris Buschelman   ierr = MatRestoreSymbolicTranspose_SeqAIJ(P,&pti,&ptj);CHKERRQ(ierr);
22917ba1a0bfSKris Buschelman 
22927ba1a0bfSKris Buschelman   ierr = PetscLogEventEnd(MAT_PtAPSymbolic,A,PP,0,0);CHKERRQ(ierr);
22937ba1a0bfSKris Buschelman   PetscFunctionReturn(0);
22947ba1a0bfSKris Buschelman }
22957ba1a0bfSKris Buschelman 
22967ba1a0bfSKris Buschelman #undef __FUNCT__
22977ba1a0bfSKris Buschelman #define __FUNCT__ "MatPtAPNumeric_SeqAIJ_SeqMAIJ"
22987ba1a0bfSKris Buschelman PetscErrorCode MatPtAPNumeric_SeqAIJ_SeqMAIJ(Mat A,Mat PP,Mat C)
22997ba1a0bfSKris Buschelman {
23007ba1a0bfSKris Buschelman   /* This routine requires testing -- first draft only */
23017ba1a0bfSKris Buschelman   PetscErrorCode ierr;
23027ba1a0bfSKris Buschelman   PetscInt       flops=0;
23037ba1a0bfSKris Buschelman   Mat_SeqMAIJ    *pp=(Mat_SeqMAIJ*)PP->data;
23047ba1a0bfSKris Buschelman   Mat            P=pp->AIJ;
23057ba1a0bfSKris Buschelman   Mat_SeqAIJ     *a  = (Mat_SeqAIJ *) A->data;
23067ba1a0bfSKris Buschelman   Mat_SeqAIJ     *p  = (Mat_SeqAIJ *) P->data;
23077ba1a0bfSKris Buschelman   Mat_SeqAIJ     *c  = (Mat_SeqAIJ *) C->data;
23087ba1a0bfSKris Buschelman   PetscInt       *ai=a->i,*aj=a->j,*apj,*apjdense,*pi=p->i,*pj=p->j,*pJ=p->j,*pjj;
23097ba1a0bfSKris Buschelman   PetscInt       *ci=c->i,*cj=c->j,*cjj;
2310899cda47SBarry Smith   PetscInt       am=A->rmap.N,cn=C->cmap.N,cm=C->rmap.N,ppdof=pp->dof;
23117ba1a0bfSKris Buschelman   PetscInt       i,j,k,pshift,poffset,anzi,pnzi,apnzj,nextap,pnzj,prow,crow;
23127ba1a0bfSKris Buschelman   MatScalar      *aa=a->a,*apa,*pa=p->a,*pA=p->a,*paj,*ca=c->a,*caj;
23137ba1a0bfSKris Buschelman 
23147ba1a0bfSKris Buschelman   PetscFunctionBegin;
23157ba1a0bfSKris Buschelman   /* Allocate temporary array for storage of one row of A*P */
23167ba1a0bfSKris Buschelman   ierr = PetscMalloc(cn*(sizeof(MatScalar)+2*sizeof(PetscInt)),&apa);CHKERRQ(ierr);
23177ba1a0bfSKris Buschelman   ierr = PetscMemzero(apa,cn*(sizeof(MatScalar)+2*sizeof(PetscInt)));CHKERRQ(ierr);
23187ba1a0bfSKris Buschelman 
23197ba1a0bfSKris Buschelman   apj      = (PetscInt *)(apa + cn);
23207ba1a0bfSKris Buschelman   apjdense = apj + cn;
23217ba1a0bfSKris Buschelman 
23227ba1a0bfSKris Buschelman   /* Clear old values in C */
23237ba1a0bfSKris Buschelman   ierr = PetscMemzero(ca,ci[cm]*sizeof(MatScalar));CHKERRQ(ierr);
23247ba1a0bfSKris Buschelman 
23257ba1a0bfSKris Buschelman   for (i=0;i<am;i++) {
23267ba1a0bfSKris Buschelman     /* Form sparse row of A*P */
23277ba1a0bfSKris Buschelman     anzi  = ai[i+1] - ai[i];
23287ba1a0bfSKris Buschelman     apnzj = 0;
23297ba1a0bfSKris Buschelman     for (j=0;j<anzi;j++) {
23307ba1a0bfSKris Buschelman       /* Get offset within block of P */
23317ba1a0bfSKris Buschelman       pshift = *aj%ppdof;
23327ba1a0bfSKris Buschelman       /* Get block row of P */
23337ba1a0bfSKris Buschelman       prow   = *aj++/ppdof; /* integer division */
23347ba1a0bfSKris Buschelman       pnzj = pi[prow+1] - pi[prow];
23357ba1a0bfSKris Buschelman       pjj  = pj + pi[prow];
23367ba1a0bfSKris Buschelman       paj  = pa + pi[prow];
23377ba1a0bfSKris Buschelman       for (k=0;k<pnzj;k++) {
23387ba1a0bfSKris Buschelman         poffset = pjj[k]*ppdof+pshift;
23397ba1a0bfSKris Buschelman         if (!apjdense[poffset]) {
23407ba1a0bfSKris Buschelman           apjdense[poffset] = -1;
23417ba1a0bfSKris Buschelman           apj[apnzj++]      = poffset;
23427ba1a0bfSKris Buschelman         }
23437ba1a0bfSKris Buschelman         apa[poffset] += (*aa)*paj[k];
23447ba1a0bfSKris Buschelman       }
23457ba1a0bfSKris Buschelman       flops += 2*pnzj;
23467ba1a0bfSKris Buschelman       aa++;
23477ba1a0bfSKris Buschelman     }
23487ba1a0bfSKris Buschelman 
23497ba1a0bfSKris Buschelman     /* Sort the j index array for quick sparse axpy. */
23507ba1a0bfSKris Buschelman     /* Note: a array does not need sorting as it is in dense storage locations. */
23517ba1a0bfSKris Buschelman     ierr = PetscSortInt(apnzj,apj);CHKERRQ(ierr);
23527ba1a0bfSKris Buschelman 
23537ba1a0bfSKris Buschelman     /* Compute P^T*A*P using outer product (P^T)[:,j]*(A*P)[j,:]. */
23547ba1a0bfSKris Buschelman     prow    = i/ppdof; /* integer division */
23557ba1a0bfSKris Buschelman     pshift  = i%ppdof;
23567ba1a0bfSKris Buschelman     poffset = pi[prow];
23577ba1a0bfSKris Buschelman     pnzi = pi[prow+1] - poffset;
23587ba1a0bfSKris Buschelman     /* Reset pJ and pA so we can traverse the same row of P 'dof' times. */
23597ba1a0bfSKris Buschelman     pJ   = pj+poffset;
23607ba1a0bfSKris Buschelman     pA   = pa+poffset;
23617ba1a0bfSKris Buschelman     for (j=0;j<pnzi;j++) {
23627ba1a0bfSKris Buschelman       crow   = (*pJ)*ppdof+pshift;
23637ba1a0bfSKris Buschelman       cjj    = cj + ci[crow];
23647ba1a0bfSKris Buschelman       caj    = ca + ci[crow];
23657ba1a0bfSKris Buschelman       pJ++;
23667ba1a0bfSKris Buschelman       /* Perform sparse axpy operation.  Note cjj includes apj. */
23677ba1a0bfSKris Buschelman       for (k=0,nextap=0;nextap<apnzj;k++) {
23687ba1a0bfSKris Buschelman         if (cjj[k]==apj[nextap]) {
23697ba1a0bfSKris Buschelman           caj[k] += (*pA)*apa[apj[nextap++]];
23707ba1a0bfSKris Buschelman         }
23717ba1a0bfSKris Buschelman       }
23727ba1a0bfSKris Buschelman       flops += 2*apnzj;
23737ba1a0bfSKris Buschelman       pA++;
23747ba1a0bfSKris Buschelman     }
23757ba1a0bfSKris Buschelman 
23767ba1a0bfSKris Buschelman     /* Zero the current row info for A*P */
23777ba1a0bfSKris Buschelman     for (j=0;j<apnzj;j++) {
23787ba1a0bfSKris Buschelman       apa[apj[j]]      = 0.;
23797ba1a0bfSKris Buschelman       apjdense[apj[j]] = 0;
23807ba1a0bfSKris Buschelman     }
23817ba1a0bfSKris Buschelman   }
23827ba1a0bfSKris Buschelman 
23837ba1a0bfSKris Buschelman   /* Assemble the final matrix and clean up */
23847ba1a0bfSKris Buschelman   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
23857ba1a0bfSKris Buschelman   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
23867ba1a0bfSKris Buschelman   ierr = PetscFree(apa);CHKERRQ(ierr);
23877ba1a0bfSKris Buschelman   ierr = PetscLogFlops(flops);CHKERRQ(ierr);
23887ba1a0bfSKris Buschelman 
23897ba1a0bfSKris Buschelman   PetscFunctionReturn(0);
23907ba1a0bfSKris Buschelman }
23917ba1a0bfSKris Buschelman 
2392be1d678aSKris Buschelman EXTERN_C_BEGIN
23937ba1a0bfSKris Buschelman #undef __FUNCT__
23940fd73130SBarry Smith #define __FUNCT__ "MatConvert_SeqMAIJ_SeqAIJ"
2395f69a0ea3SMatthew Knepley PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_SeqMAIJ_SeqAIJ(Mat A, MatType newtype,MatReuse reuse,Mat *newmat)
23969c4fc4c7SBarry Smith {
23979c4fc4c7SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
2398ceb03754SKris Buschelman   Mat               a = b->AIJ,B;
23999c4fc4c7SBarry Smith   Mat_SeqAIJ        *aij = (Mat_SeqAIJ*)a->data;
24009c4fc4c7SBarry Smith   PetscErrorCode    ierr;
24010fd73130SBarry Smith   PetscInt          m,n,i,ncols,*ilen,nmax = 0,*icols,j,k,ii,dof = b->dof;
2402ba8c8a56SBarry Smith   PetscInt          *cols;
2403ba8c8a56SBarry Smith   PetscScalar       *vals;
24049c4fc4c7SBarry Smith 
24059c4fc4c7SBarry Smith   PetscFunctionBegin;
24069c4fc4c7SBarry Smith   ierr = MatGetSize(a,&m,&n);CHKERRQ(ierr);
24077c307921SBarry Smith   ierr = PetscMalloc(dof*m*sizeof(PetscInt),&ilen);CHKERRQ(ierr);
24089c4fc4c7SBarry Smith   for (i=0; i<m; i++) {
24099c4fc4c7SBarry Smith     nmax = PetscMax(nmax,aij->ilen[i]);
24100fd73130SBarry Smith     for (j=0; j<dof; j++) {
24110fd73130SBarry Smith       ilen[dof*i+j] = aij->ilen[i];
24129c4fc4c7SBarry Smith     }
24139c4fc4c7SBarry Smith   }
2414ceb03754SKris Buschelman   ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,dof*m,dof*n,0,ilen,&B);CHKERRQ(ierr);
2415ceb03754SKris Buschelman   ierr = MatSetOption(B,MAT_COLUMNS_SORTED);CHKERRQ(ierr);
24169c4fc4c7SBarry Smith   ierr = PetscFree(ilen);CHKERRQ(ierr);
24179c4fc4c7SBarry Smith   ierr = PetscMalloc(nmax*sizeof(PetscInt),&icols);CHKERRQ(ierr);
24189c4fc4c7SBarry Smith   ii   = 0;
24199c4fc4c7SBarry Smith   for (i=0; i<m; i++) {
2420ba8c8a56SBarry Smith     ierr = MatGetRow_SeqAIJ(a,i,&ncols,&cols,&vals);CHKERRQ(ierr);
24210fd73130SBarry Smith     for (j=0; j<dof; j++) {
24229c4fc4c7SBarry Smith       for (k=0; k<ncols; k++) {
24230fd73130SBarry Smith         icols[k] = dof*cols[k]+j;
24249c4fc4c7SBarry Smith       }
2425ceb03754SKris Buschelman       ierr = MatSetValues_SeqAIJ(B,1,&ii,ncols,icols,vals,INSERT_VALUES);CHKERRQ(ierr);
24269c4fc4c7SBarry Smith       ii++;
24279c4fc4c7SBarry Smith     }
2428ba8c8a56SBarry Smith     ierr = MatRestoreRow_SeqAIJ(a,i,&ncols,&cols,&vals);CHKERRQ(ierr);
24299c4fc4c7SBarry Smith   }
24309c4fc4c7SBarry Smith   ierr = PetscFree(icols);CHKERRQ(ierr);
2431ceb03754SKris Buschelman   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2432ceb03754SKris Buschelman   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2433ceb03754SKris Buschelman 
2434ceb03754SKris Buschelman   if (reuse == MAT_REUSE_MATRIX) {
24358ab5b326SKris Buschelman     ierr = MatHeaderReplace(A,B);CHKERRQ(ierr);
2436c3d102feSKris Buschelman   } else {
2437ceb03754SKris Buschelman     *newmat = B;
2438c3d102feSKris Buschelman   }
24399c4fc4c7SBarry Smith   PetscFunctionReturn(0);
24409c4fc4c7SBarry Smith }
2441be1d678aSKris Buschelman EXTERN_C_END
24429c4fc4c7SBarry Smith 
24430fd73130SBarry Smith #include "src/mat/impls/aij/mpi/mpiaij.h"
2444be1d678aSKris Buschelman 
2445be1d678aSKris Buschelman EXTERN_C_BEGIN
24460fd73130SBarry Smith #undef __FUNCT__
24470fd73130SBarry Smith #define __FUNCT__ "MatConvert_MPIMAIJ_MPIAIJ"
2448f69a0ea3SMatthew Knepley PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_MPIMAIJ_MPIAIJ(Mat A, MatType newtype,MatReuse reuse,Mat *newmat)
24490fd73130SBarry Smith {
24500fd73130SBarry Smith   Mat_MPIMAIJ       *maij = (Mat_MPIMAIJ*)A->data;
2451ceb03754SKris Buschelman   Mat               MatAIJ  = ((Mat_SeqMAIJ*)maij->AIJ->data)->AIJ,B;
24520fd73130SBarry Smith   Mat               MatOAIJ = ((Mat_SeqMAIJ*)maij->OAIJ->data)->AIJ;
24530fd73130SBarry Smith   Mat_SeqAIJ        *AIJ = (Mat_SeqAIJ*) MatAIJ->data;
24540fd73130SBarry Smith   Mat_SeqAIJ        *OAIJ =(Mat_SeqAIJ*) MatOAIJ->data;
24550fd73130SBarry Smith   Mat_MPIAIJ        *mpiaij = (Mat_MPIAIJ*) maij->A->data;
24567a1a7badSBarry Smith   PetscInt          dof = maij->dof,i,j,*dnz,*onz,nmax = 0,onmax = 0,*oicols,*icols,ncols,*cols,oncols,*ocols;
24570fd73130SBarry Smith   PetscInt          rstart,cstart,*garray,ii,k;
24580fd73130SBarry Smith   PetscErrorCode    ierr;
24590fd73130SBarry Smith   PetscScalar       *vals,*ovals;
24600fd73130SBarry Smith 
24610fd73130SBarry Smith   PetscFunctionBegin;
2462899cda47SBarry Smith   ierr = PetscMalloc2(A->rmap.n,PetscInt,&dnz,A->rmap.n,PetscInt,&onz);CHKERRQ(ierr);
2463899cda47SBarry Smith   for (i=0; i<A->rmap.n/dof; i++) {
24640fd73130SBarry Smith     nmax  = PetscMax(nmax,AIJ->ilen[i]);
24650fd73130SBarry Smith     onmax = PetscMax(onmax,OAIJ->ilen[i]);
24660fd73130SBarry Smith     for (j=0; j<dof; j++) {
24670fd73130SBarry Smith       dnz[dof*i+j] = AIJ->ilen[i];
24680fd73130SBarry Smith       onz[dof*i+j] = OAIJ->ilen[i];
24690fd73130SBarry Smith     }
24700fd73130SBarry Smith   }
2471899cda47SBarry Smith   ierr = MatCreateMPIAIJ(A->comm,A->rmap.n,A->cmap.n,A->rmap.N,A->cmap.N,0,dnz,0,onz,&B);CHKERRQ(ierr);
2472ceb03754SKris Buschelman   ierr = MatSetOption(B,MAT_COLUMNS_SORTED);CHKERRQ(ierr);
24730fd73130SBarry Smith   ierr = PetscFree2(dnz,onz);CHKERRQ(ierr);
24740fd73130SBarry Smith 
24757a1a7badSBarry Smith   ierr   = PetscMalloc2(nmax,PetscInt,&icols,onmax,PetscInt,&oicols);CHKERRQ(ierr);
2476*9b5a856fSSatish Balay   rstart = dof*maij->A->rmap.rstart;
2477*9b5a856fSSatish Balay   cstart = dof*maij->A->cmap.rstart;
24780fd73130SBarry Smith   garray = mpiaij->garray;
24790fd73130SBarry Smith 
24800fd73130SBarry Smith   ii = rstart;
2481899cda47SBarry Smith   for (i=0; i<A->rmap.n/dof; i++) {
24820fd73130SBarry Smith     ierr = MatGetRow_SeqAIJ(MatAIJ,i,&ncols,&cols,&vals);CHKERRQ(ierr);
24830fd73130SBarry Smith     ierr = MatGetRow_SeqAIJ(MatOAIJ,i,&oncols,&ocols,&ovals);CHKERRQ(ierr);
24840fd73130SBarry Smith     for (j=0; j<dof; j++) {
24850fd73130SBarry Smith       for (k=0; k<ncols; k++) {
24860fd73130SBarry Smith         icols[k] = cstart + dof*cols[k]+j;
24870fd73130SBarry Smith       }
24880fd73130SBarry Smith       for (k=0; k<oncols; k++) {
24890fd73130SBarry Smith         oicols[k] = dof*garray[ocols[k]]+j;
24900fd73130SBarry Smith       }
2491ceb03754SKris Buschelman       ierr = MatSetValues_MPIAIJ(B,1,&ii,ncols,icols,vals,INSERT_VALUES);CHKERRQ(ierr);
2492ceb03754SKris Buschelman       ierr = MatSetValues_MPIAIJ(B,1,&ii,oncols,oicols,ovals,INSERT_VALUES);CHKERRQ(ierr);
24930fd73130SBarry Smith       ii++;
24940fd73130SBarry Smith     }
24950fd73130SBarry Smith     ierr = MatRestoreRow_SeqAIJ(MatAIJ,i,&ncols,&cols,&vals);CHKERRQ(ierr);
24960fd73130SBarry Smith     ierr = MatRestoreRow_SeqAIJ(MatOAIJ,i,&oncols,&ocols,&ovals);CHKERRQ(ierr);
24970fd73130SBarry Smith   }
24980fd73130SBarry Smith   ierr = PetscFree2(icols,oicols);CHKERRQ(ierr);
24990fd73130SBarry Smith 
2500ceb03754SKris Buschelman   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2501ceb03754SKris Buschelman   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2502ceb03754SKris Buschelman 
2503ceb03754SKris Buschelman   if (reuse == MAT_REUSE_MATRIX) {
25048ab5b326SKris Buschelman     ierr = MatHeaderReplace(A,B);CHKERRQ(ierr);
2505c3d102feSKris Buschelman   } else {
2506ceb03754SKris Buschelman     *newmat = B;
2507c3d102feSKris Buschelman   }
25080fd73130SBarry Smith   PetscFunctionReturn(0);
25090fd73130SBarry Smith }
2510be1d678aSKris Buschelman EXTERN_C_END
25110fd73130SBarry Smith 
25120fd73130SBarry Smith 
2513bcc973b7SBarry Smith /* ---------------------------------------------------------------------------------- */
25145983afb6SSatish Balay /*MC
25150bad9183SKris Buschelman   MatCreateMAIJ - Creates a matrix type providing restriction and interpolation
25160bad9183SKris Buschelman   operations for multicomponent problems.  It interpolates each component the same
25170bad9183SKris Buschelman   way independently.  The matrix type is based on MATSEQAIJ for sequential matrices,
25180bad9183SKris Buschelman   and MATMPIAIJ for distributed matrices.
25190bad9183SKris Buschelman 
25200bad9183SKris Buschelman   Operations provided:
25210fd73130SBarry Smith + MatMult
25220bad9183SKris Buschelman . MatMultTranspose
25230bad9183SKris Buschelman . MatMultAdd
25240bad9183SKris Buschelman . MatMultTransposeAdd
25250fd73130SBarry Smith - MatView
25260bad9183SKris Buschelman 
25270bad9183SKris Buschelman   Level: advanced
25280bad9183SKris Buschelman 
25290bad9183SKris Buschelman M*/
25304a2ae208SSatish Balay #undef __FUNCT__
25314a2ae208SSatish Balay #define __FUNCT__ "MatCreateMAIJ"
2532be1d678aSKris Buschelman PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMAIJ(Mat A,PetscInt dof,Mat *maij)
253382b1193eSBarry Smith {
2534dfbe8321SBarry Smith   PetscErrorCode ierr;
2535b24ad042SBarry Smith   PetscMPIInt    size;
2536b24ad042SBarry Smith   PetscInt       n;
25374cb9d9b8SBarry Smith   Mat_MPIMAIJ    *b;
253882b1193eSBarry Smith   Mat            B;
253982b1193eSBarry Smith 
254082b1193eSBarry Smith   PetscFunctionBegin;
2541d72c5c08SBarry Smith   ierr   = PetscObjectReference((PetscObject)A);CHKERRQ(ierr);
2542d72c5c08SBarry Smith 
2543d72c5c08SBarry Smith   if (dof == 1) {
2544d72c5c08SBarry Smith     *maij = A;
2545d72c5c08SBarry Smith   } else {
2546f69a0ea3SMatthew Knepley     ierr = MatCreate(A->comm,&B);CHKERRQ(ierr);
2547899cda47SBarry Smith     ierr = MatSetSizes(B,dof*A->rmap.n,dof*A->cmap.n,dof*A->rmap.N,dof*A->cmap.N);CHKERRQ(ierr);
2548cd3bbe55SBarry Smith     B->assembled    = PETSC_TRUE;
2549d72c5c08SBarry Smith 
2550f4a53059SBarry Smith     ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr);
2551f4a53059SBarry Smith     if (size == 1) {
2552b9b97703SBarry Smith       ierr = MatSetType(B,MATSEQMAIJ);CHKERRQ(ierr);
25534cb9d9b8SBarry Smith       B->ops->destroy = MatDestroy_SeqMAIJ;
25540fd73130SBarry Smith       B->ops->view    = MatView_SeqMAIJ;
2555b9b97703SBarry Smith       b      = (Mat_MPIMAIJ*)B->data;
2556b9b97703SBarry Smith       b->dof = dof;
25574cb9d9b8SBarry Smith       b->AIJ = A;
2558d72c5c08SBarry Smith       if (dof == 2) {
2559cd3bbe55SBarry Smith         B->ops->mult             = MatMult_SeqMAIJ_2;
2560cd3bbe55SBarry Smith         B->ops->multadd          = MatMultAdd_SeqMAIJ_2;
2561cd3bbe55SBarry Smith         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_2;
2562cd3bbe55SBarry Smith         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_2;
2563bcc973b7SBarry Smith       } else if (dof == 3) {
2564bcc973b7SBarry Smith         B->ops->mult             = MatMult_SeqMAIJ_3;
2565bcc973b7SBarry Smith         B->ops->multadd          = MatMultAdd_SeqMAIJ_3;
2566bcc973b7SBarry Smith         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_3;
2567bcc973b7SBarry Smith         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_3;
2568bcc973b7SBarry Smith       } else if (dof == 4) {
2569bcc973b7SBarry Smith         B->ops->mult             = MatMult_SeqMAIJ_4;
2570bcc973b7SBarry Smith         B->ops->multadd          = MatMultAdd_SeqMAIJ_4;
2571bcc973b7SBarry Smith         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_4;
2572bcc973b7SBarry Smith         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_4;
2573f9fae5adSBarry Smith       } else if (dof == 5) {
2574f9fae5adSBarry Smith         B->ops->mult             = MatMult_SeqMAIJ_5;
2575f9fae5adSBarry Smith         B->ops->multadd          = MatMultAdd_SeqMAIJ_5;
2576f9fae5adSBarry Smith         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_5;
2577f9fae5adSBarry Smith         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_5;
25786cd98798SBarry Smith       } else if (dof == 6) {
25796cd98798SBarry Smith         B->ops->mult             = MatMult_SeqMAIJ_6;
25806cd98798SBarry Smith         B->ops->multadd          = MatMultAdd_SeqMAIJ_6;
25816cd98798SBarry Smith         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_6;
25826cd98798SBarry Smith         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_6;
2583ed8eea03SBarry Smith       } else if (dof == 7) {
2584ed8eea03SBarry Smith         B->ops->mult             = MatMult_SeqMAIJ_7;
2585ed8eea03SBarry Smith         B->ops->multadd          = MatMultAdd_SeqMAIJ_7;
2586ed8eea03SBarry Smith         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_7;
2587ed8eea03SBarry Smith         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_7;
258866ed3db0SBarry Smith       } else if (dof == 8) {
258966ed3db0SBarry Smith         B->ops->mult             = MatMult_SeqMAIJ_8;
259066ed3db0SBarry Smith         B->ops->multadd          = MatMultAdd_SeqMAIJ_8;
259166ed3db0SBarry Smith         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_8;
259266ed3db0SBarry Smith         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_8;
25932b692628SMatthew Knepley       } else if (dof == 9) {
25942b692628SMatthew Knepley         B->ops->mult             = MatMult_SeqMAIJ_9;
25952b692628SMatthew Knepley         B->ops->multadd          = MatMultAdd_SeqMAIJ_9;
25962b692628SMatthew Knepley         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_9;
25972b692628SMatthew Knepley         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_9;
2598b9cda22cSBarry Smith       } else if (dof == 10) {
2599b9cda22cSBarry Smith         B->ops->mult             = MatMult_SeqMAIJ_10;
2600b9cda22cSBarry Smith         B->ops->multadd          = MatMultAdd_SeqMAIJ_10;
2601b9cda22cSBarry Smith         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_10;
2602b9cda22cSBarry Smith         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_10;
26032f7816d4SBarry Smith       } else if (dof == 16) {
26042f7816d4SBarry Smith         B->ops->mult             = MatMult_SeqMAIJ_16;
26052f7816d4SBarry Smith         B->ops->multadd          = MatMultAdd_SeqMAIJ_16;
26062f7816d4SBarry Smith         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_16;
26072f7816d4SBarry Smith         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_16;
260882b1193eSBarry Smith       } else {
260977431f27SBarry Smith         SETERRQ1(PETSC_ERR_SUP,"Cannot handle a dof of %D. Send request for code to petsc-maint@mcs.anl.gov\n",dof);
261082b1193eSBarry Smith       }
26117ba1a0bfSKris Buschelman       B->ops->ptapsymbolic_seqaij = MatPtAPSymbolic_SeqAIJ_SeqMAIJ;
26127ba1a0bfSKris Buschelman       B->ops->ptapnumeric_seqaij  = MatPtAPNumeric_SeqAIJ_SeqMAIJ;
26139c4fc4c7SBarry Smith       ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqmaij_seqaij_C","MatConvert_SeqMAIJ_SeqAIJ",MatConvert_SeqMAIJ_SeqAIJ);CHKERRQ(ierr);
2614f4a53059SBarry Smith     } else {
2615f4a53059SBarry Smith       Mat_MPIAIJ *mpiaij = (Mat_MPIAIJ *)A->data;
2616f4a53059SBarry Smith       IS         from,to;
2617f4a53059SBarry Smith       Vec        gvec;
2618b24ad042SBarry Smith       PetscInt   *garray,i;
2619f4a53059SBarry Smith 
2620b9b97703SBarry Smith       ierr = MatSetType(B,MATMPIMAIJ);CHKERRQ(ierr);
2621d72c5c08SBarry Smith       B->ops->destroy = MatDestroy_MPIMAIJ;
26220fd73130SBarry Smith       B->ops->view    = MatView_MPIMAIJ;
2623b9b97703SBarry Smith       b      = (Mat_MPIMAIJ*)B->data;
2624b9b97703SBarry Smith       b->dof = dof;
2625b9b97703SBarry Smith       b->A   = A;
26264cb9d9b8SBarry Smith       ierr = MatCreateMAIJ(mpiaij->A,dof,&b->AIJ);CHKERRQ(ierr);
26274cb9d9b8SBarry Smith       ierr = MatCreateMAIJ(mpiaij->B,dof,&b->OAIJ);CHKERRQ(ierr);
26284cb9d9b8SBarry Smith 
2629f4a53059SBarry Smith       ierr = VecGetSize(mpiaij->lvec,&n);CHKERRQ(ierr);
2630f4a53059SBarry Smith       ierr = VecCreateSeq(PETSC_COMM_SELF,n*dof,&b->w);CHKERRQ(ierr);
263113288a74SBarry Smith       ierr = VecSetBlockSize(b->w,dof);CHKERRQ(ierr);
2632f4a53059SBarry Smith 
2633f4a53059SBarry Smith       /* create two temporary Index sets for build scatter gather */
2634b24ad042SBarry Smith       ierr = PetscMalloc((n+1)*sizeof(PetscInt),&garray);CHKERRQ(ierr);
2635f4a53059SBarry Smith       for (i=0; i<n; i++) garray[i] = dof*mpiaij->garray[i];
2636f4a53059SBarry Smith       ierr = ISCreateBlock(A->comm,dof,n,garray,&from);CHKERRQ(ierr);
2637f4a53059SBarry Smith       ierr = PetscFree(garray);CHKERRQ(ierr);
2638f4a53059SBarry Smith       ierr = ISCreateStride(PETSC_COMM_SELF,n*dof,0,1,&to);CHKERRQ(ierr);
2639f4a53059SBarry Smith 
2640f4a53059SBarry Smith       /* create temporary global vector to generate scatter context */
2641899cda47SBarry Smith       ierr = VecCreateMPI(A->comm,dof*A->cmap.n,dof*A->cmap.N,&gvec);CHKERRQ(ierr);
264213288a74SBarry Smith       ierr = VecSetBlockSize(gvec,dof);CHKERRQ(ierr);
2643f4a53059SBarry Smith 
2644f4a53059SBarry Smith       /* generate the scatter context */
2645f4a53059SBarry Smith       ierr = VecScatterCreate(gvec,from,b->w,to,&b->ctx);CHKERRQ(ierr);
2646f4a53059SBarry Smith 
2647f4a53059SBarry Smith       ierr = ISDestroy(from);CHKERRQ(ierr);
2648f4a53059SBarry Smith       ierr = ISDestroy(to);CHKERRQ(ierr);
2649f4a53059SBarry Smith       ierr = VecDestroy(gvec);CHKERRQ(ierr);
2650f4a53059SBarry Smith 
2651f4a53059SBarry Smith       B->ops->mult             = MatMult_MPIMAIJ_dof;
26524cb9d9b8SBarry Smith       B->ops->multtranspose    = MatMultTranspose_MPIMAIJ_dof;
26534cb9d9b8SBarry Smith       B->ops->multadd          = MatMultAdd_MPIMAIJ_dof;
26544cb9d9b8SBarry Smith       B->ops->multtransposeadd = MatMultTransposeAdd_MPIMAIJ_dof;
26550fd73130SBarry Smith       ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpimaij_mpiaij_C","MatConvert_MPIMAIJ_MPIAIJ",MatConvert_MPIMAIJ_MPIAIJ);CHKERRQ(ierr);
2656f4a53059SBarry Smith     }
2657cd3bbe55SBarry Smith     *maij = B;
26580fd73130SBarry Smith     ierr = MatView_Private(B);CHKERRQ(ierr);
2659d72c5c08SBarry Smith   }
266082b1193eSBarry Smith   PetscFunctionReturn(0);
266182b1193eSBarry Smith }
266282b1193eSBarry Smith 
266382b1193eSBarry Smith 
266482b1193eSBarry Smith 
266582b1193eSBarry Smith 
266682b1193eSBarry Smith 
266782b1193eSBarry Smith 
266882b1193eSBarry Smith 
266982b1193eSBarry Smith 
267082b1193eSBarry Smith 
267182b1193eSBarry Smith 
267282b1193eSBarry Smith 
267382b1193eSBarry Smith 
2674