xref: /petsc/src/mat/impls/maij/maij.c (revision b45d2f2cb7e031d9c0de5873eca80614ca7b863b)
1be1d678aSKris Buschelman 
282b1193eSBarry Smith /*
3cd3bbe55SBarry Smith     Defines the basic matrix operations for the MAIJ  matrix storage format.
4cd3bbe55SBarry Smith   This format is used for restriction and interpolation operations for
5cd3bbe55SBarry Smith   multicomponent problems. It interpolates each component the same way
6cd3bbe55SBarry Smith   independently.
7cd3bbe55SBarry Smith 
8cd3bbe55SBarry Smith      We provide:
9cd3bbe55SBarry Smith          MatMult()
10cd3bbe55SBarry Smith          MatMultTranspose()
11cd3bbe55SBarry Smith          MatMultTransposeAdd()
12cd3bbe55SBarry Smith          MatMultAdd()
13cd3bbe55SBarry Smith           and
14cd3bbe55SBarry Smith          MatCreateMAIJ(Mat,dof,Mat*)
15f4a53059SBarry Smith 
16f4a53059SBarry Smith      This single directory handles both the sequential and parallel codes
1782b1193eSBarry Smith */
1882b1193eSBarry Smith 
19c6db04a5SJed Brown #include <../src/mat/impls/maij/maij.h> /*I "petscmat.h" I*/
20c6db04a5SJed Brown #include <../src/mat/utils/freespace.h>
21*b45d2f2cSJed Brown #include <petsc-private/vecimpl.h>
2282b1193eSBarry Smith 
234a2ae208SSatish Balay #undef __FUNCT__
244a2ae208SSatish Balay #define __FUNCT__ "MatMAIJGetAIJ"
25ff585edeSJed Brown /*@C
26ff585edeSJed Brown    MatMAIJGetAIJ - Get the AIJ matrix describing the blockwise action of the MAIJ matrix
27ff585edeSJed Brown 
28ff585edeSJed Brown    Not Collective, but if the MAIJ matrix is parallel, the AIJ matrix is also parallel
29ff585edeSJed Brown 
30ff585edeSJed Brown    Input Parameter:
31ff585edeSJed Brown .  A - the MAIJ matrix
32ff585edeSJed Brown 
33ff585edeSJed Brown    Output Parameter:
34ff585edeSJed Brown .  B - the AIJ matrix
35ff585edeSJed Brown 
36ff585edeSJed Brown    Level: advanced
37ff585edeSJed Brown 
38ff585edeSJed Brown    Notes: The reference count on the AIJ matrix is not increased so you should not destroy it.
39ff585edeSJed Brown 
40ff585edeSJed Brown .seealso: MatCreateMAIJ()
41ff585edeSJed Brown @*/
427087cfbeSBarry Smith PetscErrorCode  MatMAIJGetAIJ(Mat A,Mat *B)
43b9b97703SBarry Smith {
44dfbe8321SBarry Smith   PetscErrorCode ierr;
45ace3abfcSBarry Smith   PetscBool      ismpimaij,isseqmaij;
46b9b97703SBarry Smith 
47b9b97703SBarry Smith   PetscFunctionBegin;
48b9b97703SBarry Smith   ierr = PetscTypeCompare((PetscObject)A,MATMPIMAIJ,&ismpimaij);CHKERRQ(ierr);
49b9b97703SBarry Smith   ierr = PetscTypeCompare((PetscObject)A,MATSEQMAIJ,&isseqmaij);CHKERRQ(ierr);
50b9b97703SBarry Smith   if (ismpimaij) {
51b9b97703SBarry Smith     Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data;
52b9b97703SBarry Smith 
53b9b97703SBarry Smith     *B = b->A;
54b9b97703SBarry Smith   } else if (isseqmaij) {
55b9b97703SBarry Smith     Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
56b9b97703SBarry Smith 
57b9b97703SBarry Smith     *B = b->AIJ;
58b9b97703SBarry Smith   } else {
59b9b97703SBarry Smith     *B = A;
60b9b97703SBarry Smith   }
61b9b97703SBarry Smith   PetscFunctionReturn(0);
62b9b97703SBarry Smith }
63b9b97703SBarry Smith 
644a2ae208SSatish Balay #undef __FUNCT__
654a2ae208SSatish Balay #define __FUNCT__ "MatMAIJRedimension"
66ff585edeSJed Brown /*@C
67ff585edeSJed Brown    MatMAIJRedimension - Get an MAIJ matrix with the same action, but for a different block size
68ff585edeSJed Brown 
693f9fe445SBarry Smith    Logically Collective
70ff585edeSJed Brown 
71ff585edeSJed Brown    Input Parameter:
72ff585edeSJed Brown +  A - the MAIJ matrix
73ff585edeSJed Brown -  dof - the block size for the new matrix
74ff585edeSJed Brown 
75ff585edeSJed Brown    Output Parameter:
76ff585edeSJed Brown .  B - the new MAIJ matrix
77ff585edeSJed Brown 
78ff585edeSJed Brown    Level: advanced
79ff585edeSJed Brown 
80ff585edeSJed Brown .seealso: MatCreateMAIJ()
81ff585edeSJed Brown @*/
827087cfbeSBarry Smith PetscErrorCode  MatMAIJRedimension(Mat A,PetscInt dof,Mat *B)
83b9b97703SBarry Smith {
84dfbe8321SBarry Smith   PetscErrorCode ierr;
853b98c0a2SBarry Smith   Mat            Aij = PETSC_NULL;
86b9b97703SBarry Smith 
87b9b97703SBarry Smith   PetscFunctionBegin;
88c5eb9154SBarry Smith   PetscValidLogicalCollectiveInt(A,dof,2);
89b9b97703SBarry Smith   ierr = MatMAIJGetAIJ(A,&Aij);CHKERRQ(ierr);
90b9b97703SBarry Smith   ierr = MatCreateMAIJ(Aij,dof,B);CHKERRQ(ierr);
91b9b97703SBarry Smith   PetscFunctionReturn(0);
92b9b97703SBarry Smith }
93b9b97703SBarry Smith 
944a2ae208SSatish Balay #undef __FUNCT__
954a2ae208SSatish Balay #define __FUNCT__ "MatDestroy_SeqMAIJ"
96dfbe8321SBarry Smith PetscErrorCode MatDestroy_SeqMAIJ(Mat A)
9782b1193eSBarry Smith {
98dfbe8321SBarry Smith   PetscErrorCode ierr;
994cb9d9b8SBarry Smith   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
10082b1193eSBarry Smith 
10182b1193eSBarry Smith   PetscFunctionBegin;
1026bf464f9SBarry Smith   ierr = MatDestroy(&b->AIJ);CHKERRQ(ierr);
103bf0cc555SLisandro Dalcin   ierr = PetscFree(A->data);CHKERRQ(ierr);
1044cb9d9b8SBarry Smith   PetscFunctionReturn(0);
1054cb9d9b8SBarry Smith }
1064cb9d9b8SBarry Smith 
1074a2ae208SSatish Balay #undef __FUNCT__
1080fd73130SBarry Smith #define __FUNCT__ "MatView_SeqMAIJ"
1090fd73130SBarry Smith PetscErrorCode MatView_SeqMAIJ(Mat A,PetscViewer viewer)
1100fd73130SBarry Smith {
1110fd73130SBarry Smith   PetscErrorCode ierr;
1120fd73130SBarry Smith   Mat            B;
1130fd73130SBarry Smith 
1140fd73130SBarry Smith   PetscFunctionBegin;
115ceb03754SKris Buschelman   ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);
1160fd73130SBarry Smith   ierr = MatView(B,viewer);CHKERRQ(ierr);
1176bf464f9SBarry Smith   ierr = MatDestroy(&B);CHKERRQ(ierr);
1180fd73130SBarry Smith   PetscFunctionReturn(0);
1190fd73130SBarry Smith }
1200fd73130SBarry Smith 
1210fd73130SBarry Smith #undef __FUNCT__
1220fd73130SBarry Smith #define __FUNCT__ "MatView_MPIMAIJ"
1230fd73130SBarry Smith PetscErrorCode MatView_MPIMAIJ(Mat A,PetscViewer viewer)
1240fd73130SBarry Smith {
1250fd73130SBarry Smith   PetscErrorCode ierr;
1260fd73130SBarry Smith   Mat            B;
1270fd73130SBarry Smith 
1280fd73130SBarry Smith   PetscFunctionBegin;
129ceb03754SKris Buschelman   ierr = MatConvert(A,MATMPIAIJ,MAT_INITIAL_MATRIX,&B);
1300fd73130SBarry Smith   ierr = MatView(B,viewer);CHKERRQ(ierr);
1316bf464f9SBarry Smith   ierr = MatDestroy(&B);CHKERRQ(ierr);
1320fd73130SBarry Smith   PetscFunctionReturn(0);
1330fd73130SBarry Smith }
1340fd73130SBarry Smith 
1350fd73130SBarry Smith #undef __FUNCT__
1364a2ae208SSatish Balay #define __FUNCT__ "MatDestroy_MPIMAIJ"
137dfbe8321SBarry Smith PetscErrorCode MatDestroy_MPIMAIJ(Mat A)
1384cb9d9b8SBarry Smith {
139dfbe8321SBarry Smith   PetscErrorCode ierr;
1404cb9d9b8SBarry Smith   Mat_MPIMAIJ    *b = (Mat_MPIMAIJ*)A->data;
1414cb9d9b8SBarry Smith 
1424cb9d9b8SBarry Smith   PetscFunctionBegin;
1436bf464f9SBarry Smith   ierr = MatDestroy(&b->AIJ);CHKERRQ(ierr);
1446bf464f9SBarry Smith   ierr = MatDestroy(&b->OAIJ);CHKERRQ(ierr);
1456bf464f9SBarry Smith   ierr = MatDestroy(&b->A);CHKERRQ(ierr);
1466bf464f9SBarry Smith   ierr = VecScatterDestroy(&b->ctx);CHKERRQ(ierr);
1476bf464f9SBarry Smith   ierr = VecDestroy(&b->w);CHKERRQ(ierr);
148bf0cc555SLisandro Dalcin   ierr = PetscFree(A->data);CHKERRQ(ierr);
149dbd8c25aSHong Zhang   ierr = PetscObjectChangeTypeName((PetscObject)A,0);CHKERRQ(ierr);
150b9b97703SBarry Smith   PetscFunctionReturn(0);
151b9b97703SBarry Smith }
152b9b97703SBarry Smith 
1530bad9183SKris Buschelman /*MC
154fafad747SKris Buschelman   MATMAIJ - MATMAIJ = "maij" - A matrix type to be used for restriction and interpolation operations for
1550bad9183SKris Buschelman   multicomponent problems, interpolating or restricting each component the same way independently.
1560bad9183SKris Buschelman   The matrix type is based on MATSEQAIJ for sequential matrices, and MATMPIAIJ for distributed matrices.
1570bad9183SKris Buschelman 
1580bad9183SKris Buschelman   Operations provided:
1590bad9183SKris Buschelman . MatMult
1600bad9183SKris Buschelman . MatMultTranspose
1610bad9183SKris Buschelman . MatMultAdd
1620bad9183SKris Buschelman . MatMultTransposeAdd
1630bad9183SKris Buschelman 
1640bad9183SKris Buschelman   Level: advanced
1650bad9183SKris Buschelman 
166b409243cSBarry Smith .seealso: MatMAIJGetAIJ(), MatMAIJRedimension(), MatCreateMAIJ()
1670bad9183SKris Buschelman M*/
1680bad9183SKris Buschelman 
16982b1193eSBarry Smith EXTERN_C_BEGIN
1704a2ae208SSatish Balay #undef __FUNCT__
1714a2ae208SSatish Balay #define __FUNCT__ "MatCreate_MAIJ"
1727087cfbeSBarry Smith PetscErrorCode  MatCreate_MAIJ(Mat A)
17382b1193eSBarry Smith {
174dfbe8321SBarry Smith   PetscErrorCode ierr;
1754cb9d9b8SBarry Smith   Mat_MPIMAIJ    *b;
17664b52464SHong Zhang   PetscMPIInt    size;
17782b1193eSBarry Smith 
17882b1193eSBarry Smith   PetscFunctionBegin;
17938f2d2fdSLisandro Dalcin   ierr     = PetscNewLog(A,Mat_MPIMAIJ,&b);CHKERRQ(ierr);
180b0a32e0cSBarry Smith   A->data  = (void*)b;
181cd3bbe55SBarry Smith   ierr = PetscMemzero(A->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
182f4a53059SBarry Smith 
183cd3bbe55SBarry Smith   b->AIJ  = 0;
184cd3bbe55SBarry Smith   b->dof  = 0;
185f4a53059SBarry Smith   b->OAIJ = 0;
186f4a53059SBarry Smith   b->ctx  = 0;
187f4a53059SBarry Smith   b->w    = 0;
1887adad957SLisandro Dalcin   ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr);
18964b52464SHong Zhang   if (size == 1){
19064b52464SHong Zhang     ierr = PetscObjectChangeTypeName((PetscObject)A,MATSEQMAIJ);CHKERRQ(ierr);
19164b52464SHong Zhang   } else {
19264b52464SHong Zhang     ierr = PetscObjectChangeTypeName((PetscObject)A,MATMPIMAIJ);CHKERRQ(ierr);
19364b52464SHong Zhang   }
19482b1193eSBarry Smith   PetscFunctionReturn(0);
19582b1193eSBarry Smith }
19682b1193eSBarry Smith EXTERN_C_END
19782b1193eSBarry Smith 
198cd3bbe55SBarry Smith /* --------------------------------------------------------------------------------------*/
1994a2ae208SSatish Balay #undef __FUNCT__
200b9617806SBarry Smith #define __FUNCT__ "MatMult_SeqMAIJ_2"
201dfbe8321SBarry Smith PetscErrorCode MatMult_SeqMAIJ_2(Mat A,Vec xx,Vec yy)
20282b1193eSBarry Smith {
2034cb9d9b8SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
204bcc973b7SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
205d2840e09SBarry Smith   const PetscScalar *x,*v;
206d2840e09SBarry Smith   PetscScalar       *y, sum1, sum2;
207dfbe8321SBarry Smith   PetscErrorCode    ierr;
208d2840e09SBarry Smith   PetscInt          nonzerorow=0,n,i,jrow,j;
209d2840e09SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
21082b1193eSBarry Smith 
211bcc973b7SBarry Smith   PetscFunctionBegin;
2123649974fSBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
2131ebc52fbSHong Zhang   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
214bcc973b7SBarry Smith   idx  = a->j;
215bcc973b7SBarry Smith   v    = a->a;
216bcc973b7SBarry Smith   ii   = a->i;
217bcc973b7SBarry Smith 
218bcc973b7SBarry Smith   for (i=0; i<m; i++) {
219bcc973b7SBarry Smith     jrow = ii[i];
220bcc973b7SBarry Smith     n    = ii[i+1] - jrow;
221bcc973b7SBarry Smith     sum1  = 0.0;
222bcc973b7SBarry Smith     sum2  = 0.0;
223b3c51c66SMatthew Knepley     nonzerorow += (n>0);
224bcc973b7SBarry Smith     for (j=0; j<n; j++) {
225bcc973b7SBarry Smith       sum1 += v[jrow]*x[2*idx[jrow]];
226bcc973b7SBarry Smith       sum2 += v[jrow]*x[2*idx[jrow]+1];
227bcc973b7SBarry Smith       jrow++;
228bcc973b7SBarry Smith      }
229bcc973b7SBarry Smith     y[2*i]   = sum1;
230bcc973b7SBarry Smith     y[2*i+1] = sum2;
231bcc973b7SBarry Smith   }
232bcc973b7SBarry Smith 
233dc0b31edSSatish Balay   ierr = PetscLogFlops(4.0*a->nz - 2.0*nonzerorow);CHKERRQ(ierr);
2343649974fSBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
2351ebc52fbSHong Zhang   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
23682b1193eSBarry Smith   PetscFunctionReturn(0);
23782b1193eSBarry Smith }
238bcc973b7SBarry Smith 
2394a2ae208SSatish Balay #undef __FUNCT__
240b9617806SBarry Smith #define __FUNCT__ "MatMultTranspose_SeqMAIJ_2"
241dfbe8321SBarry Smith PetscErrorCode MatMultTranspose_SeqMAIJ_2(Mat A,Vec xx,Vec yy)
24282b1193eSBarry Smith {
2434cb9d9b8SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
244bcc973b7SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
245d2840e09SBarry Smith   const PetscScalar *x,*v;
246d2840e09SBarry Smith   PetscScalar       *y,alpha1,alpha2;
247dfbe8321SBarry Smith   PetscErrorCode    ierr;
248d2840e09SBarry Smith   PetscInt          n,i;
249d2840e09SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx;
25082b1193eSBarry Smith 
251bcc973b7SBarry Smith   PetscFunctionBegin;
252d2840e09SBarry Smith   ierr = VecSet(yy,0.0);CHKERRQ(ierr);
2533649974fSBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
2541ebc52fbSHong Zhang   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
255bfec09a0SHong Zhang 
256bcc973b7SBarry Smith   for (i=0; i<m; i++) {
257bfec09a0SHong Zhang     idx    = a->j + a->i[i] ;
258bfec09a0SHong Zhang     v      = a->a + a->i[i] ;
259bcc973b7SBarry Smith     n      = a->i[i+1] - a->i[i];
260bcc973b7SBarry Smith     alpha1 = x[2*i];
261bcc973b7SBarry Smith     alpha2 = x[2*i+1];
262bcc973b7SBarry Smith     while (n-->0) {y[2*(*idx)] += alpha1*(*v); y[2*(*idx)+1] += alpha2*(*v); idx++; v++;}
263bcc973b7SBarry Smith   }
264dc0b31edSSatish Balay   ierr = PetscLogFlops(4.0*a->nz);CHKERRQ(ierr);
2653649974fSBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
2661ebc52fbSHong Zhang   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
26782b1193eSBarry Smith   PetscFunctionReturn(0);
26882b1193eSBarry Smith }
269bcc973b7SBarry Smith 
2704a2ae208SSatish Balay #undef __FUNCT__
271b9617806SBarry Smith #define __FUNCT__ "MatMultAdd_SeqMAIJ_2"
272dfbe8321SBarry Smith PetscErrorCode MatMultAdd_SeqMAIJ_2(Mat A,Vec xx,Vec yy,Vec zz)
27382b1193eSBarry Smith {
2744cb9d9b8SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
275bcc973b7SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
276d2840e09SBarry Smith   const PetscScalar *x,*v;
277d2840e09SBarry Smith   PetscScalar       *y,sum1, sum2;
278dfbe8321SBarry Smith   PetscErrorCode    ierr;
279b24ad042SBarry Smith   PetscInt          n,i,jrow,j;
280d2840e09SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
28182b1193eSBarry Smith 
282bcc973b7SBarry Smith   PetscFunctionBegin;
283f9fae5adSBarry Smith   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
2843649974fSBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
2851ebc52fbSHong Zhang   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
286bcc973b7SBarry Smith   idx  = a->j;
287bcc973b7SBarry Smith   v    = a->a;
288bcc973b7SBarry Smith   ii   = a->i;
289bcc973b7SBarry Smith 
290bcc973b7SBarry Smith   for (i=0; i<m; i++) {
291bcc973b7SBarry Smith     jrow = ii[i];
292bcc973b7SBarry Smith     n    = ii[i+1] - jrow;
293bcc973b7SBarry Smith     sum1  = 0.0;
294bcc973b7SBarry Smith     sum2  = 0.0;
295bcc973b7SBarry Smith     for (j=0; j<n; j++) {
296bcc973b7SBarry Smith       sum1 += v[jrow]*x[2*idx[jrow]];
297bcc973b7SBarry Smith       sum2 += v[jrow]*x[2*idx[jrow]+1];
298bcc973b7SBarry Smith       jrow++;
299bcc973b7SBarry Smith      }
300bcc973b7SBarry Smith     y[2*i]   += sum1;
301bcc973b7SBarry Smith     y[2*i+1] += sum2;
302bcc973b7SBarry Smith   }
303bcc973b7SBarry Smith 
304dc0b31edSSatish Balay   ierr = PetscLogFlops(4.0*a->nz);CHKERRQ(ierr);
3053649974fSBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
3061ebc52fbSHong Zhang   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
307bcc973b7SBarry Smith   PetscFunctionReturn(0);
30882b1193eSBarry Smith }
3094a2ae208SSatish Balay #undef __FUNCT__
310b9617806SBarry Smith #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_2"
311dfbe8321SBarry Smith PetscErrorCode MatMultTransposeAdd_SeqMAIJ_2(Mat A,Vec xx,Vec yy,Vec zz)
31282b1193eSBarry Smith {
3134cb9d9b8SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
314bcc973b7SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
315d2840e09SBarry Smith   const PetscScalar *x,*v;
316d2840e09SBarry Smith   PetscScalar       *y,alpha1,alpha2;
317dfbe8321SBarry Smith   PetscErrorCode    ierr;
318d2840e09SBarry Smith   PetscInt          n,i;
319d2840e09SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx;
32082b1193eSBarry Smith 
321bcc973b7SBarry Smith   PetscFunctionBegin;
322f9fae5adSBarry Smith   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
3233649974fSBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
3241ebc52fbSHong Zhang   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
325bfec09a0SHong Zhang 
326bcc973b7SBarry Smith   for (i=0; i<m; i++) {
327bfec09a0SHong Zhang     idx   = a->j + a->i[i] ;
328bfec09a0SHong Zhang     v     = a->a + a->i[i] ;
329bcc973b7SBarry Smith     n     = a->i[i+1] - a->i[i];
330bcc973b7SBarry Smith     alpha1 = x[2*i];
331bcc973b7SBarry Smith     alpha2 = x[2*i+1];
332bcc973b7SBarry Smith     while (n-->0) {y[2*(*idx)] += alpha1*(*v); y[2*(*idx)+1] += alpha2*(*v); idx++; v++;}
333bcc973b7SBarry Smith   }
334dc0b31edSSatish Balay   ierr = PetscLogFlops(4.0*a->nz);CHKERRQ(ierr);
3353649974fSBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
3361ebc52fbSHong Zhang   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
337bcc973b7SBarry Smith   PetscFunctionReturn(0);
33882b1193eSBarry Smith }
339cd3bbe55SBarry Smith /* --------------------------------------------------------------------------------------*/
3404a2ae208SSatish Balay #undef __FUNCT__
341b9617806SBarry Smith #define __FUNCT__ "MatMult_SeqMAIJ_3"
342dfbe8321SBarry Smith PetscErrorCode MatMult_SeqMAIJ_3(Mat A,Vec xx,Vec yy)
343bcc973b7SBarry Smith {
3444cb9d9b8SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
345bcc973b7SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
346d2840e09SBarry Smith   const PetscScalar *x,*v;
347d2840e09SBarry Smith   PetscScalar       *y,sum1, sum2, sum3;
348dfbe8321SBarry Smith   PetscErrorCode    ierr;
349d2840e09SBarry Smith   PetscInt          nonzerorow=0,n,i,jrow,j;
350d2840e09SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
35182b1193eSBarry Smith 
352bcc973b7SBarry Smith   PetscFunctionBegin;
3533649974fSBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
3541ebc52fbSHong Zhang   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
355bcc973b7SBarry Smith   idx  = a->j;
356bcc973b7SBarry Smith   v    = a->a;
357bcc973b7SBarry Smith   ii   = a->i;
358bcc973b7SBarry Smith 
359bcc973b7SBarry Smith   for (i=0; i<m; i++) {
360bcc973b7SBarry Smith     jrow = ii[i];
361bcc973b7SBarry Smith     n    = ii[i+1] - jrow;
362bcc973b7SBarry Smith     sum1  = 0.0;
363bcc973b7SBarry Smith     sum2  = 0.0;
364bcc973b7SBarry Smith     sum3  = 0.0;
365b3c51c66SMatthew Knepley     nonzerorow += (n>0);
366bcc973b7SBarry Smith     for (j=0; j<n; j++) {
367bcc973b7SBarry Smith       sum1 += v[jrow]*x[3*idx[jrow]];
368bcc973b7SBarry Smith       sum2 += v[jrow]*x[3*idx[jrow]+1];
369bcc973b7SBarry Smith       sum3 += v[jrow]*x[3*idx[jrow]+2];
370bcc973b7SBarry Smith       jrow++;
371bcc973b7SBarry Smith      }
372bcc973b7SBarry Smith     y[3*i]   = sum1;
373bcc973b7SBarry Smith     y[3*i+1] = sum2;
374bcc973b7SBarry Smith     y[3*i+2] = sum3;
375bcc973b7SBarry Smith   }
376bcc973b7SBarry Smith 
377dc0b31edSSatish Balay   ierr = PetscLogFlops(6.0*a->nz - 3.0*nonzerorow);CHKERRQ(ierr);
3783649974fSBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
3791ebc52fbSHong Zhang   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
380bcc973b7SBarry Smith   PetscFunctionReturn(0);
381bcc973b7SBarry Smith }
382bcc973b7SBarry Smith 
3834a2ae208SSatish Balay #undef __FUNCT__
384b9617806SBarry Smith #define __FUNCT__ "MatMultTranspose_SeqMAIJ_3"
385dfbe8321SBarry Smith PetscErrorCode MatMultTranspose_SeqMAIJ_3(Mat A,Vec xx,Vec yy)
386bcc973b7SBarry Smith {
3874cb9d9b8SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
388bcc973b7SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
389d2840e09SBarry Smith   const PetscScalar *x,*v;
390d2840e09SBarry Smith   PetscScalar       *y,alpha1,alpha2,alpha3;
391dfbe8321SBarry Smith   PetscErrorCode    ierr;
392d2840e09SBarry Smith   PetscInt          n,i;
393d2840e09SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx;
394bcc973b7SBarry Smith 
395bcc973b7SBarry Smith   PetscFunctionBegin;
396d2840e09SBarry Smith   ierr = VecSet(yy,0.0);CHKERRQ(ierr);
3973649974fSBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
3981ebc52fbSHong Zhang   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
399bfec09a0SHong Zhang 
400bcc973b7SBarry Smith   for (i=0; i<m; i++) {
401bfec09a0SHong Zhang     idx    = a->j + a->i[i];
402bfec09a0SHong Zhang     v      = a->a + a->i[i];
403bcc973b7SBarry Smith     n      = a->i[i+1] - a->i[i];
404bcc973b7SBarry Smith     alpha1 = x[3*i];
405bcc973b7SBarry Smith     alpha2 = x[3*i+1];
406bcc973b7SBarry Smith     alpha3 = x[3*i+2];
407bcc973b7SBarry Smith     while (n-->0) {
408bcc973b7SBarry Smith       y[3*(*idx)]   += alpha1*(*v);
409bcc973b7SBarry Smith       y[3*(*idx)+1] += alpha2*(*v);
410bcc973b7SBarry Smith       y[3*(*idx)+2] += alpha3*(*v);
411bcc973b7SBarry Smith       idx++; v++;
412bcc973b7SBarry Smith     }
413bcc973b7SBarry Smith   }
414dc0b31edSSatish Balay   ierr = PetscLogFlops(6.0*a->nz);CHKERRQ(ierr);
4153649974fSBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
4161ebc52fbSHong Zhang   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
417bcc973b7SBarry Smith   PetscFunctionReturn(0);
418bcc973b7SBarry Smith }
419bcc973b7SBarry Smith 
4204a2ae208SSatish Balay #undef __FUNCT__
421b9617806SBarry Smith #define __FUNCT__ "MatMultAdd_SeqMAIJ_3"
422dfbe8321SBarry Smith PetscErrorCode MatMultAdd_SeqMAIJ_3(Mat A,Vec xx,Vec yy,Vec zz)
423bcc973b7SBarry Smith {
4244cb9d9b8SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
425bcc973b7SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
426d2840e09SBarry Smith   const PetscScalar *x,*v;
427d2840e09SBarry Smith   PetscScalar       *y,sum1, sum2, sum3;
428dfbe8321SBarry Smith   PetscErrorCode    ierr;
429b24ad042SBarry Smith   PetscInt          n,i,jrow,j;
430d2840e09SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
431bcc973b7SBarry Smith 
432bcc973b7SBarry Smith   PetscFunctionBegin;
433f9fae5adSBarry Smith   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
4343649974fSBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
4351ebc52fbSHong Zhang   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
436bcc973b7SBarry Smith   idx  = a->j;
437bcc973b7SBarry Smith   v    = a->a;
438bcc973b7SBarry Smith   ii   = a->i;
439bcc973b7SBarry Smith 
440bcc973b7SBarry Smith   for (i=0; i<m; i++) {
441bcc973b7SBarry Smith     jrow = ii[i];
442bcc973b7SBarry Smith     n    = ii[i+1] - jrow;
443bcc973b7SBarry Smith     sum1  = 0.0;
444bcc973b7SBarry Smith     sum2  = 0.0;
445bcc973b7SBarry Smith     sum3  = 0.0;
446bcc973b7SBarry Smith     for (j=0; j<n; j++) {
447bcc973b7SBarry Smith       sum1 += v[jrow]*x[3*idx[jrow]];
448bcc973b7SBarry Smith       sum2 += v[jrow]*x[3*idx[jrow]+1];
449bcc973b7SBarry Smith       sum3 += v[jrow]*x[3*idx[jrow]+2];
450bcc973b7SBarry Smith       jrow++;
451bcc973b7SBarry Smith      }
452bcc973b7SBarry Smith     y[3*i]   += sum1;
453bcc973b7SBarry Smith     y[3*i+1] += sum2;
454bcc973b7SBarry Smith     y[3*i+2] += sum3;
455bcc973b7SBarry Smith   }
456bcc973b7SBarry Smith 
457dc0b31edSSatish Balay   ierr = PetscLogFlops(6.0*a->nz);CHKERRQ(ierr);
4583649974fSBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
4591ebc52fbSHong Zhang   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
460bcc973b7SBarry Smith   PetscFunctionReturn(0);
461bcc973b7SBarry Smith }
4624a2ae208SSatish Balay #undef __FUNCT__
463b9617806SBarry Smith #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_3"
464dfbe8321SBarry Smith PetscErrorCode MatMultTransposeAdd_SeqMAIJ_3(Mat A,Vec xx,Vec yy,Vec zz)
465bcc973b7SBarry Smith {
4664cb9d9b8SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
467bcc973b7SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
468d2840e09SBarry Smith   const PetscScalar *x,*v;
469d2840e09SBarry Smith   PetscScalar       *y,alpha1,alpha2,alpha3;
470dfbe8321SBarry Smith   PetscErrorCode    ierr;
471d2840e09SBarry Smith   PetscInt          n,i;
472d2840e09SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx;
473bcc973b7SBarry Smith 
474bcc973b7SBarry Smith   PetscFunctionBegin;
475f9fae5adSBarry Smith   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
4763649974fSBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
4771ebc52fbSHong Zhang   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
478bcc973b7SBarry Smith   for (i=0; i<m; i++) {
479bfec09a0SHong Zhang     idx    = a->j + a->i[i] ;
480bfec09a0SHong Zhang     v      = a->a + a->i[i] ;
481bcc973b7SBarry Smith     n      = a->i[i+1] - a->i[i];
482bcc973b7SBarry Smith     alpha1 = x[3*i];
483bcc973b7SBarry Smith     alpha2 = x[3*i+1];
484bcc973b7SBarry Smith     alpha3 = x[3*i+2];
485bcc973b7SBarry Smith     while (n-->0) {
486bcc973b7SBarry Smith       y[3*(*idx)]   += alpha1*(*v);
487bcc973b7SBarry Smith       y[3*(*idx)+1] += alpha2*(*v);
488bcc973b7SBarry Smith       y[3*(*idx)+2] += alpha3*(*v);
489bcc973b7SBarry Smith       idx++; v++;
490bcc973b7SBarry Smith     }
491bcc973b7SBarry Smith   }
492dc0b31edSSatish Balay   ierr = PetscLogFlops(6.0*a->nz);CHKERRQ(ierr);
4933649974fSBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
4941ebc52fbSHong Zhang   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
495bcc973b7SBarry Smith   PetscFunctionReturn(0);
496bcc973b7SBarry Smith }
497bcc973b7SBarry Smith 
498bcc973b7SBarry Smith /* ------------------------------------------------------------------------------*/
4994a2ae208SSatish Balay #undef __FUNCT__
500b9617806SBarry Smith #define __FUNCT__ "MatMult_SeqMAIJ_4"
501dfbe8321SBarry Smith PetscErrorCode MatMult_SeqMAIJ_4(Mat A,Vec xx,Vec yy)
502bcc973b7SBarry Smith {
5034cb9d9b8SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
504bcc973b7SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
505d2840e09SBarry Smith   const PetscScalar *x,*v;
506d2840e09SBarry Smith   PetscScalar       *y,sum1, sum2, sum3, sum4;
507dfbe8321SBarry Smith   PetscErrorCode    ierr;
508d2840e09SBarry Smith   PetscInt          nonzerorow=0,n,i,jrow,j;
509d2840e09SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
510bcc973b7SBarry Smith 
511bcc973b7SBarry Smith   PetscFunctionBegin;
5123649974fSBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
5131ebc52fbSHong Zhang   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
514bcc973b7SBarry Smith   idx  = a->j;
515bcc973b7SBarry Smith   v    = a->a;
516bcc973b7SBarry Smith   ii   = a->i;
517bcc973b7SBarry Smith 
518bcc973b7SBarry Smith   for (i=0; i<m; i++) {
519bcc973b7SBarry Smith     jrow = ii[i];
520bcc973b7SBarry Smith     n    = ii[i+1] - jrow;
521bcc973b7SBarry Smith     sum1  = 0.0;
522bcc973b7SBarry Smith     sum2  = 0.0;
523bcc973b7SBarry Smith     sum3  = 0.0;
524bcc973b7SBarry Smith     sum4  = 0.0;
525b3c51c66SMatthew Knepley     nonzerorow += (n>0);
526bcc973b7SBarry Smith     for (j=0; j<n; j++) {
527bcc973b7SBarry Smith       sum1 += v[jrow]*x[4*idx[jrow]];
528bcc973b7SBarry Smith       sum2 += v[jrow]*x[4*idx[jrow]+1];
529bcc973b7SBarry Smith       sum3 += v[jrow]*x[4*idx[jrow]+2];
530bcc973b7SBarry Smith       sum4 += v[jrow]*x[4*idx[jrow]+3];
531bcc973b7SBarry Smith       jrow++;
532bcc973b7SBarry Smith      }
533bcc973b7SBarry Smith     y[4*i]   = sum1;
534bcc973b7SBarry Smith     y[4*i+1] = sum2;
535bcc973b7SBarry Smith     y[4*i+2] = sum3;
536bcc973b7SBarry Smith     y[4*i+3] = sum4;
537bcc973b7SBarry Smith   }
538bcc973b7SBarry Smith 
539dc0b31edSSatish Balay   ierr = PetscLogFlops(8.0*a->nz - 4.0*nonzerorow);CHKERRQ(ierr);
5403649974fSBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
5411ebc52fbSHong Zhang   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
542bcc973b7SBarry Smith   PetscFunctionReturn(0);
543bcc973b7SBarry Smith }
544bcc973b7SBarry Smith 
5454a2ae208SSatish Balay #undef __FUNCT__
546b9617806SBarry Smith #define __FUNCT__ "MatMultTranspose_SeqMAIJ_4"
547dfbe8321SBarry Smith PetscErrorCode MatMultTranspose_SeqMAIJ_4(Mat A,Vec xx,Vec yy)
548bcc973b7SBarry Smith {
5494cb9d9b8SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
550bcc973b7SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
551d2840e09SBarry Smith   const PetscScalar *x,*v;
552d2840e09SBarry Smith   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4;
553dfbe8321SBarry Smith   PetscErrorCode    ierr;
554d2840e09SBarry Smith   PetscInt          n,i;
555d2840e09SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx;
556bcc973b7SBarry Smith 
557bcc973b7SBarry Smith   PetscFunctionBegin;
558d2840e09SBarry Smith   ierr = VecSet(yy,0.0);CHKERRQ(ierr);
5593649974fSBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
5601ebc52fbSHong Zhang   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
561bcc973b7SBarry Smith   for (i=0; i<m; i++) {
562bfec09a0SHong Zhang     idx    = a->j + a->i[i] ;
563bfec09a0SHong Zhang     v      = a->a + a->i[i] ;
564bcc973b7SBarry Smith     n      = a->i[i+1] - a->i[i];
565bcc973b7SBarry Smith     alpha1 = x[4*i];
566bcc973b7SBarry Smith     alpha2 = x[4*i+1];
567bcc973b7SBarry Smith     alpha3 = x[4*i+2];
568bcc973b7SBarry Smith     alpha4 = x[4*i+3];
569bcc973b7SBarry Smith     while (n-->0) {
570bcc973b7SBarry Smith       y[4*(*idx)]   += alpha1*(*v);
571bcc973b7SBarry Smith       y[4*(*idx)+1] += alpha2*(*v);
572bcc973b7SBarry Smith       y[4*(*idx)+2] += alpha3*(*v);
573bcc973b7SBarry Smith       y[4*(*idx)+3] += alpha4*(*v);
574bcc973b7SBarry Smith       idx++; v++;
575bcc973b7SBarry Smith     }
576bcc973b7SBarry Smith   }
577dc0b31edSSatish Balay   ierr = PetscLogFlops(8.0*a->nz);CHKERRQ(ierr);
5783649974fSBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
5791ebc52fbSHong Zhang   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
580bcc973b7SBarry Smith   PetscFunctionReturn(0);
581bcc973b7SBarry Smith }
582bcc973b7SBarry Smith 
5834a2ae208SSatish Balay #undef __FUNCT__
584b9617806SBarry Smith #define __FUNCT__ "MatMultAdd_SeqMAIJ_4"
585dfbe8321SBarry Smith PetscErrorCode MatMultAdd_SeqMAIJ_4(Mat A,Vec xx,Vec yy,Vec zz)
586bcc973b7SBarry Smith {
5874cb9d9b8SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
588f9fae5adSBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
589d2840e09SBarry Smith   const PetscScalar *x,*v;
590d2840e09SBarry Smith   PetscScalar       *y,sum1, sum2, sum3, sum4;
591dfbe8321SBarry Smith   PetscErrorCode    ierr;
592b24ad042SBarry Smith   PetscInt          n,i,jrow,j;
593d2840e09SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
594f9fae5adSBarry Smith 
595f9fae5adSBarry Smith   PetscFunctionBegin;
596f9fae5adSBarry Smith   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
5973649974fSBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
5981ebc52fbSHong Zhang   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
599f9fae5adSBarry Smith   idx  = a->j;
600f9fae5adSBarry Smith   v    = a->a;
601f9fae5adSBarry Smith   ii   = a->i;
602f9fae5adSBarry Smith 
603f9fae5adSBarry Smith   for (i=0; i<m; i++) {
604f9fae5adSBarry Smith     jrow = ii[i];
605f9fae5adSBarry Smith     n    = ii[i+1] - jrow;
606f9fae5adSBarry Smith     sum1  = 0.0;
607f9fae5adSBarry Smith     sum2  = 0.0;
608f9fae5adSBarry Smith     sum3  = 0.0;
609f9fae5adSBarry Smith     sum4  = 0.0;
610f9fae5adSBarry Smith     for (j=0; j<n; j++) {
611f9fae5adSBarry Smith       sum1 += v[jrow]*x[4*idx[jrow]];
612f9fae5adSBarry Smith       sum2 += v[jrow]*x[4*idx[jrow]+1];
613f9fae5adSBarry Smith       sum3 += v[jrow]*x[4*idx[jrow]+2];
614f9fae5adSBarry Smith       sum4 += v[jrow]*x[4*idx[jrow]+3];
615f9fae5adSBarry Smith       jrow++;
616f9fae5adSBarry Smith      }
617f9fae5adSBarry Smith     y[4*i]   += sum1;
618f9fae5adSBarry Smith     y[4*i+1] += sum2;
619f9fae5adSBarry Smith     y[4*i+2] += sum3;
620f9fae5adSBarry Smith     y[4*i+3] += sum4;
621f9fae5adSBarry Smith   }
622f9fae5adSBarry Smith 
623dc0b31edSSatish Balay   ierr = PetscLogFlops(8.0*a->nz);CHKERRQ(ierr);
6243649974fSBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
6251ebc52fbSHong Zhang   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
626f9fae5adSBarry Smith   PetscFunctionReturn(0);
627bcc973b7SBarry Smith }
6284a2ae208SSatish Balay #undef __FUNCT__
629b9617806SBarry Smith #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_4"
630dfbe8321SBarry Smith PetscErrorCode MatMultTransposeAdd_SeqMAIJ_4(Mat A,Vec xx,Vec yy,Vec zz)
631bcc973b7SBarry Smith {
6324cb9d9b8SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
633f9fae5adSBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
634d2840e09SBarry Smith   const PetscScalar *x,*v;
635d2840e09SBarry Smith   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4;
636dfbe8321SBarry Smith   PetscErrorCode    ierr;
637d2840e09SBarry Smith   PetscInt          n,i;
638d2840e09SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx;
639f9fae5adSBarry Smith 
640f9fae5adSBarry Smith   PetscFunctionBegin;
641f9fae5adSBarry Smith   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
6423649974fSBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
6431ebc52fbSHong Zhang   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
644bfec09a0SHong Zhang 
645f9fae5adSBarry Smith   for (i=0; i<m; i++) {
646bfec09a0SHong Zhang     idx    = a->j + a->i[i] ;
647bfec09a0SHong Zhang     v      = a->a + a->i[i] ;
648f9fae5adSBarry Smith     n      = a->i[i+1] - a->i[i];
649f9fae5adSBarry Smith     alpha1 = x[4*i];
650f9fae5adSBarry Smith     alpha2 = x[4*i+1];
651f9fae5adSBarry Smith     alpha3 = x[4*i+2];
652f9fae5adSBarry Smith     alpha4 = x[4*i+3];
653f9fae5adSBarry Smith     while (n-->0) {
654f9fae5adSBarry Smith       y[4*(*idx)]   += alpha1*(*v);
655f9fae5adSBarry Smith       y[4*(*idx)+1] += alpha2*(*v);
656f9fae5adSBarry Smith       y[4*(*idx)+2] += alpha3*(*v);
657f9fae5adSBarry Smith       y[4*(*idx)+3] += alpha4*(*v);
658f9fae5adSBarry Smith       idx++; v++;
659f9fae5adSBarry Smith     }
660f9fae5adSBarry Smith   }
661dc0b31edSSatish Balay   ierr = PetscLogFlops(8.0*a->nz);CHKERRQ(ierr);
6623649974fSBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
6631ebc52fbSHong Zhang   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
664f9fae5adSBarry Smith   PetscFunctionReturn(0);
665f9fae5adSBarry Smith }
666f9fae5adSBarry Smith /* ------------------------------------------------------------------------------*/
6676cd98798SBarry Smith 
6684a2ae208SSatish Balay #undef __FUNCT__
669b9617806SBarry Smith #define __FUNCT__ "MatMult_SeqMAIJ_5"
670dfbe8321SBarry Smith PetscErrorCode MatMult_SeqMAIJ_5(Mat A,Vec xx,Vec yy)
671f9fae5adSBarry Smith {
6724cb9d9b8SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
673f9fae5adSBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
674d2840e09SBarry Smith   const PetscScalar *x,*v;
675d2840e09SBarry Smith   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5;
676dfbe8321SBarry Smith   PetscErrorCode    ierr;
677d2840e09SBarry Smith   PetscInt          nonzerorow=0,n,i,jrow,j;
678d2840e09SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
679f9fae5adSBarry Smith 
680f9fae5adSBarry Smith   PetscFunctionBegin;
6813649974fSBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
6821ebc52fbSHong Zhang   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
683f9fae5adSBarry Smith   idx  = a->j;
684f9fae5adSBarry Smith   v    = a->a;
685f9fae5adSBarry Smith   ii   = a->i;
686f9fae5adSBarry Smith 
687f9fae5adSBarry Smith   for (i=0; i<m; i++) {
688f9fae5adSBarry Smith     jrow = ii[i];
689f9fae5adSBarry Smith     n    = ii[i+1] - jrow;
690f9fae5adSBarry Smith     sum1  = 0.0;
691f9fae5adSBarry Smith     sum2  = 0.0;
692f9fae5adSBarry Smith     sum3  = 0.0;
693f9fae5adSBarry Smith     sum4  = 0.0;
694f9fae5adSBarry Smith     sum5  = 0.0;
695b3c51c66SMatthew Knepley     nonzerorow += (n>0);
696f9fae5adSBarry Smith     for (j=0; j<n; j++) {
697f9fae5adSBarry Smith       sum1 += v[jrow]*x[5*idx[jrow]];
698f9fae5adSBarry Smith       sum2 += v[jrow]*x[5*idx[jrow]+1];
699f9fae5adSBarry Smith       sum3 += v[jrow]*x[5*idx[jrow]+2];
700f9fae5adSBarry Smith       sum4 += v[jrow]*x[5*idx[jrow]+3];
701f9fae5adSBarry Smith       sum5 += v[jrow]*x[5*idx[jrow]+4];
702f9fae5adSBarry Smith       jrow++;
703f9fae5adSBarry Smith      }
704f9fae5adSBarry Smith     y[5*i]   = sum1;
705f9fae5adSBarry Smith     y[5*i+1] = sum2;
706f9fae5adSBarry Smith     y[5*i+2] = sum3;
707f9fae5adSBarry Smith     y[5*i+3] = sum4;
708f9fae5adSBarry Smith     y[5*i+4] = sum5;
709f9fae5adSBarry Smith   }
710f9fae5adSBarry Smith 
711dc0b31edSSatish Balay   ierr = PetscLogFlops(10.0*a->nz - 5.0*nonzerorow);CHKERRQ(ierr);
7123649974fSBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
7131ebc52fbSHong Zhang   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
714f9fae5adSBarry Smith   PetscFunctionReturn(0);
715f9fae5adSBarry Smith }
716f9fae5adSBarry Smith 
7174a2ae208SSatish Balay #undef __FUNCT__
718b9617806SBarry Smith #define __FUNCT__ "MatMultTranspose_SeqMAIJ_5"
719dfbe8321SBarry Smith PetscErrorCode MatMultTranspose_SeqMAIJ_5(Mat A,Vec xx,Vec yy)
720f9fae5adSBarry Smith {
7214cb9d9b8SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
722f9fae5adSBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
723d2840e09SBarry Smith   const PetscScalar *x,*v;
724d2840e09SBarry Smith   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5;
725dfbe8321SBarry Smith   PetscErrorCode    ierr;
726d2840e09SBarry Smith   PetscInt          n,i;
727d2840e09SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx;
728f9fae5adSBarry Smith 
729f9fae5adSBarry Smith   PetscFunctionBegin;
730d2840e09SBarry Smith   ierr = VecSet(yy,0.0);CHKERRQ(ierr);
7313649974fSBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
7321ebc52fbSHong Zhang   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
733bfec09a0SHong Zhang 
734f9fae5adSBarry Smith   for (i=0; i<m; i++) {
735bfec09a0SHong Zhang     idx    = a->j + a->i[i] ;
736bfec09a0SHong Zhang     v      = a->a + a->i[i] ;
737f9fae5adSBarry Smith     n      = a->i[i+1] - a->i[i];
738f9fae5adSBarry Smith     alpha1 = x[5*i];
739f9fae5adSBarry Smith     alpha2 = x[5*i+1];
740f9fae5adSBarry Smith     alpha3 = x[5*i+2];
741f9fae5adSBarry Smith     alpha4 = x[5*i+3];
742f9fae5adSBarry Smith     alpha5 = x[5*i+4];
743f9fae5adSBarry Smith     while (n-->0) {
744f9fae5adSBarry Smith       y[5*(*idx)]   += alpha1*(*v);
745f9fae5adSBarry Smith       y[5*(*idx)+1] += alpha2*(*v);
746f9fae5adSBarry Smith       y[5*(*idx)+2] += alpha3*(*v);
747f9fae5adSBarry Smith       y[5*(*idx)+3] += alpha4*(*v);
748f9fae5adSBarry Smith       y[5*(*idx)+4] += alpha5*(*v);
749f9fae5adSBarry Smith       idx++; v++;
750f9fae5adSBarry Smith     }
751f9fae5adSBarry Smith   }
752dc0b31edSSatish Balay   ierr = PetscLogFlops(10.0*a->nz);CHKERRQ(ierr);
7533649974fSBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
7541ebc52fbSHong Zhang   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
755f9fae5adSBarry Smith   PetscFunctionReturn(0);
756f9fae5adSBarry Smith }
757f9fae5adSBarry Smith 
7584a2ae208SSatish Balay #undef __FUNCT__
759b9617806SBarry Smith #define __FUNCT__ "MatMultAdd_SeqMAIJ_5"
760dfbe8321SBarry Smith PetscErrorCode MatMultAdd_SeqMAIJ_5(Mat A,Vec xx,Vec yy,Vec zz)
761f9fae5adSBarry Smith {
7624cb9d9b8SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
763f9fae5adSBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
764d2840e09SBarry Smith   const PetscScalar *x,*v;
765d2840e09SBarry Smith   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5;
766dfbe8321SBarry Smith   PetscErrorCode    ierr;
767b24ad042SBarry Smith   PetscInt          n,i,jrow,j;
768d2840e09SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
769f9fae5adSBarry Smith 
770f9fae5adSBarry Smith   PetscFunctionBegin;
771f9fae5adSBarry Smith   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
7723649974fSBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
7731ebc52fbSHong Zhang   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
774f9fae5adSBarry Smith   idx  = a->j;
775f9fae5adSBarry Smith   v    = a->a;
776f9fae5adSBarry Smith   ii   = a->i;
777f9fae5adSBarry Smith 
778f9fae5adSBarry Smith   for (i=0; i<m; i++) {
779f9fae5adSBarry Smith     jrow = ii[i];
780f9fae5adSBarry Smith     n    = ii[i+1] - jrow;
781f9fae5adSBarry Smith     sum1  = 0.0;
782f9fae5adSBarry Smith     sum2  = 0.0;
783f9fae5adSBarry Smith     sum3  = 0.0;
784f9fae5adSBarry Smith     sum4  = 0.0;
785f9fae5adSBarry Smith     sum5  = 0.0;
786f9fae5adSBarry Smith     for (j=0; j<n; j++) {
787f9fae5adSBarry Smith       sum1 += v[jrow]*x[5*idx[jrow]];
788f9fae5adSBarry Smith       sum2 += v[jrow]*x[5*idx[jrow]+1];
789f9fae5adSBarry Smith       sum3 += v[jrow]*x[5*idx[jrow]+2];
790f9fae5adSBarry Smith       sum4 += v[jrow]*x[5*idx[jrow]+3];
791f9fae5adSBarry Smith       sum5 += v[jrow]*x[5*idx[jrow]+4];
792f9fae5adSBarry Smith       jrow++;
793f9fae5adSBarry Smith      }
794f9fae5adSBarry Smith     y[5*i]   += sum1;
795f9fae5adSBarry Smith     y[5*i+1] += sum2;
796f9fae5adSBarry Smith     y[5*i+2] += sum3;
797f9fae5adSBarry Smith     y[5*i+3] += sum4;
798f9fae5adSBarry Smith     y[5*i+4] += sum5;
799f9fae5adSBarry Smith   }
800f9fae5adSBarry Smith 
801dc0b31edSSatish Balay   ierr = PetscLogFlops(10.0*a->nz);CHKERRQ(ierr);
8023649974fSBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
8031ebc52fbSHong Zhang   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
804f9fae5adSBarry Smith   PetscFunctionReturn(0);
805f9fae5adSBarry Smith }
806f9fae5adSBarry Smith 
8074a2ae208SSatish Balay #undef __FUNCT__
808b9617806SBarry Smith #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_5"
809dfbe8321SBarry Smith PetscErrorCode MatMultTransposeAdd_SeqMAIJ_5(Mat A,Vec xx,Vec yy,Vec zz)
810f9fae5adSBarry Smith {
8114cb9d9b8SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
812f9fae5adSBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
813d2840e09SBarry Smith   const PetscScalar *x,*v;
814d2840e09SBarry Smith   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5;
815dfbe8321SBarry Smith   PetscErrorCode    ierr;
816d2840e09SBarry Smith   PetscInt          n,i;
817d2840e09SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx;
818f9fae5adSBarry Smith 
819f9fae5adSBarry Smith   PetscFunctionBegin;
820f9fae5adSBarry Smith   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
8213649974fSBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
8221ebc52fbSHong Zhang   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
823bfec09a0SHong Zhang 
824f9fae5adSBarry Smith   for (i=0; i<m; i++) {
825bfec09a0SHong Zhang     idx    = a->j + a->i[i] ;
826bfec09a0SHong Zhang     v      = a->a + a->i[i] ;
827f9fae5adSBarry Smith     n      = a->i[i+1] - a->i[i];
828f9fae5adSBarry Smith     alpha1 = x[5*i];
829f9fae5adSBarry Smith     alpha2 = x[5*i+1];
830f9fae5adSBarry Smith     alpha3 = x[5*i+2];
831f9fae5adSBarry Smith     alpha4 = x[5*i+3];
832f9fae5adSBarry Smith     alpha5 = x[5*i+4];
833f9fae5adSBarry Smith     while (n-->0) {
834f9fae5adSBarry Smith       y[5*(*idx)]   += alpha1*(*v);
835f9fae5adSBarry Smith       y[5*(*idx)+1] += alpha2*(*v);
836f9fae5adSBarry Smith       y[5*(*idx)+2] += alpha3*(*v);
837f9fae5adSBarry Smith       y[5*(*idx)+3] += alpha4*(*v);
838f9fae5adSBarry Smith       y[5*(*idx)+4] += alpha5*(*v);
839f9fae5adSBarry Smith       idx++; v++;
840f9fae5adSBarry Smith     }
841f9fae5adSBarry Smith   }
842dc0b31edSSatish Balay   ierr = PetscLogFlops(10.0*a->nz);CHKERRQ(ierr);
8433649974fSBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
8441ebc52fbSHong Zhang   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
845f9fae5adSBarry Smith   PetscFunctionReturn(0);
846bcc973b7SBarry Smith }
847bcc973b7SBarry Smith 
8486cd98798SBarry Smith /* ------------------------------------------------------------------------------*/
8494a2ae208SSatish Balay #undef __FUNCT__
850b9617806SBarry Smith #define __FUNCT__ "MatMult_SeqMAIJ_6"
851dfbe8321SBarry Smith PetscErrorCode MatMult_SeqMAIJ_6(Mat A,Vec xx,Vec yy)
8526cd98798SBarry Smith {
8536cd98798SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
8546cd98798SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
855d2840e09SBarry Smith   const PetscScalar *x,*v;
856d2840e09SBarry Smith   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6;
857dfbe8321SBarry Smith   PetscErrorCode    ierr;
858d2840e09SBarry Smith   PetscInt          nonzerorow=0,n,i,jrow,j;
859d2840e09SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
8606cd98798SBarry Smith 
8616cd98798SBarry Smith   PetscFunctionBegin;
8623649974fSBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
8631ebc52fbSHong Zhang   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
8646cd98798SBarry Smith   idx  = a->j;
8656cd98798SBarry Smith   v    = a->a;
8666cd98798SBarry Smith   ii   = a->i;
8676cd98798SBarry Smith 
8686cd98798SBarry Smith   for (i=0; i<m; i++) {
8696cd98798SBarry Smith     jrow = ii[i];
8706cd98798SBarry Smith     n    = ii[i+1] - jrow;
8716cd98798SBarry Smith     sum1  = 0.0;
8726cd98798SBarry Smith     sum2  = 0.0;
8736cd98798SBarry Smith     sum3  = 0.0;
8746cd98798SBarry Smith     sum4  = 0.0;
8756cd98798SBarry Smith     sum5  = 0.0;
8766cd98798SBarry Smith     sum6  = 0.0;
877b3c51c66SMatthew Knepley     nonzerorow += (n>0);
8786cd98798SBarry Smith     for (j=0; j<n; j++) {
8796cd98798SBarry Smith       sum1 += v[jrow]*x[6*idx[jrow]];
8806cd98798SBarry Smith       sum2 += v[jrow]*x[6*idx[jrow]+1];
8816cd98798SBarry Smith       sum3 += v[jrow]*x[6*idx[jrow]+2];
8826cd98798SBarry Smith       sum4 += v[jrow]*x[6*idx[jrow]+3];
8836cd98798SBarry Smith       sum5 += v[jrow]*x[6*idx[jrow]+4];
8846cd98798SBarry Smith       sum6 += v[jrow]*x[6*idx[jrow]+5];
8856cd98798SBarry Smith       jrow++;
8866cd98798SBarry Smith      }
8876cd98798SBarry Smith     y[6*i]   = sum1;
8886cd98798SBarry Smith     y[6*i+1] = sum2;
8896cd98798SBarry Smith     y[6*i+2] = sum3;
8906cd98798SBarry Smith     y[6*i+3] = sum4;
8916cd98798SBarry Smith     y[6*i+4] = sum5;
8926cd98798SBarry Smith     y[6*i+5] = sum6;
8936cd98798SBarry Smith   }
8946cd98798SBarry Smith 
895dc0b31edSSatish Balay   ierr = PetscLogFlops(12.0*a->nz - 6.0*nonzerorow);CHKERRQ(ierr);
8963649974fSBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
8971ebc52fbSHong Zhang   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
8986cd98798SBarry Smith   PetscFunctionReturn(0);
8996cd98798SBarry Smith }
9006cd98798SBarry Smith 
9014a2ae208SSatish Balay #undef __FUNCT__
902b9617806SBarry Smith #define __FUNCT__ "MatMultTranspose_SeqMAIJ_6"
903dfbe8321SBarry Smith PetscErrorCode MatMultTranspose_SeqMAIJ_6(Mat A,Vec xx,Vec yy)
9046cd98798SBarry Smith {
9056cd98798SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
9066cd98798SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
907d2840e09SBarry Smith   const PetscScalar *x,*v;
908d2840e09SBarry Smith   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6;
909dfbe8321SBarry Smith   PetscErrorCode    ierr;
910d2840e09SBarry Smith   PetscInt          n,i;
911d2840e09SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx;
9126cd98798SBarry Smith 
9136cd98798SBarry Smith   PetscFunctionBegin;
914d2840e09SBarry Smith   ierr = VecSet(yy,0.0);CHKERRQ(ierr);
9153649974fSBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
9161ebc52fbSHong Zhang   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
917bfec09a0SHong Zhang 
9186cd98798SBarry Smith   for (i=0; i<m; i++) {
919bfec09a0SHong Zhang     idx    = a->j + a->i[i] ;
920bfec09a0SHong Zhang     v      = a->a + a->i[i] ;
9216cd98798SBarry Smith     n      = a->i[i+1] - a->i[i];
9226cd98798SBarry Smith     alpha1 = x[6*i];
9236cd98798SBarry Smith     alpha2 = x[6*i+1];
9246cd98798SBarry Smith     alpha3 = x[6*i+2];
9256cd98798SBarry Smith     alpha4 = x[6*i+3];
9266cd98798SBarry Smith     alpha5 = x[6*i+4];
9276cd98798SBarry Smith     alpha6 = x[6*i+5];
9286cd98798SBarry Smith     while (n-->0) {
9296cd98798SBarry Smith       y[6*(*idx)]   += alpha1*(*v);
9306cd98798SBarry Smith       y[6*(*idx)+1] += alpha2*(*v);
9316cd98798SBarry Smith       y[6*(*idx)+2] += alpha3*(*v);
9326cd98798SBarry Smith       y[6*(*idx)+3] += alpha4*(*v);
9336cd98798SBarry Smith       y[6*(*idx)+4] += alpha5*(*v);
9346cd98798SBarry Smith       y[6*(*idx)+5] += alpha6*(*v);
9356cd98798SBarry Smith       idx++; v++;
9366cd98798SBarry Smith     }
9376cd98798SBarry Smith   }
938dc0b31edSSatish Balay   ierr = PetscLogFlops(12.0*a->nz);CHKERRQ(ierr);
9393649974fSBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
9401ebc52fbSHong Zhang   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
9416cd98798SBarry Smith   PetscFunctionReturn(0);
9426cd98798SBarry Smith }
9436cd98798SBarry Smith 
9444a2ae208SSatish Balay #undef __FUNCT__
945b9617806SBarry Smith #define __FUNCT__ "MatMultAdd_SeqMAIJ_6"
946dfbe8321SBarry Smith PetscErrorCode MatMultAdd_SeqMAIJ_6(Mat A,Vec xx,Vec yy,Vec zz)
9476cd98798SBarry Smith {
9486cd98798SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
9496cd98798SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
950d2840e09SBarry Smith   const PetscScalar *x,*v;
951d2840e09SBarry Smith   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6;
952dfbe8321SBarry Smith   PetscErrorCode    ierr;
953b24ad042SBarry Smith   PetscInt          n,i,jrow,j;
954d2840e09SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
9556cd98798SBarry Smith 
9566cd98798SBarry Smith   PetscFunctionBegin;
9576cd98798SBarry Smith   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
9583649974fSBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
9591ebc52fbSHong Zhang   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
9606cd98798SBarry Smith   idx  = a->j;
9616cd98798SBarry Smith   v    = a->a;
9626cd98798SBarry Smith   ii   = a->i;
9636cd98798SBarry Smith 
9646cd98798SBarry Smith   for (i=0; i<m; i++) {
9656cd98798SBarry Smith     jrow = ii[i];
9666cd98798SBarry Smith     n    = ii[i+1] - jrow;
9676cd98798SBarry Smith     sum1  = 0.0;
9686cd98798SBarry Smith     sum2  = 0.0;
9696cd98798SBarry Smith     sum3  = 0.0;
9706cd98798SBarry Smith     sum4  = 0.0;
9716cd98798SBarry Smith     sum5  = 0.0;
9726cd98798SBarry Smith     sum6  = 0.0;
9736cd98798SBarry Smith     for (j=0; j<n; j++) {
9746cd98798SBarry Smith       sum1 += v[jrow]*x[6*idx[jrow]];
9756cd98798SBarry Smith       sum2 += v[jrow]*x[6*idx[jrow]+1];
9766cd98798SBarry Smith       sum3 += v[jrow]*x[6*idx[jrow]+2];
9776cd98798SBarry Smith       sum4 += v[jrow]*x[6*idx[jrow]+3];
9786cd98798SBarry Smith       sum5 += v[jrow]*x[6*idx[jrow]+4];
9796cd98798SBarry Smith       sum6 += v[jrow]*x[6*idx[jrow]+5];
9806cd98798SBarry Smith       jrow++;
9816cd98798SBarry Smith      }
9826cd98798SBarry Smith     y[6*i]   += sum1;
9836cd98798SBarry Smith     y[6*i+1] += sum2;
9846cd98798SBarry Smith     y[6*i+2] += sum3;
9856cd98798SBarry Smith     y[6*i+3] += sum4;
9866cd98798SBarry Smith     y[6*i+4] += sum5;
9876cd98798SBarry Smith     y[6*i+5] += sum6;
9886cd98798SBarry Smith   }
9896cd98798SBarry Smith 
990dc0b31edSSatish Balay   ierr = PetscLogFlops(12.0*a->nz);CHKERRQ(ierr);
9913649974fSBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
9921ebc52fbSHong Zhang   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
9936cd98798SBarry Smith   PetscFunctionReturn(0);
9946cd98798SBarry Smith }
9956cd98798SBarry Smith 
9964a2ae208SSatish Balay #undef __FUNCT__
997b9617806SBarry Smith #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_6"
998dfbe8321SBarry Smith PetscErrorCode MatMultTransposeAdd_SeqMAIJ_6(Mat A,Vec xx,Vec yy,Vec zz)
9996cd98798SBarry Smith {
10006cd98798SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
10016cd98798SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1002d2840e09SBarry Smith   const PetscScalar *x,*v;
1003d2840e09SBarry Smith   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6;
1004dfbe8321SBarry Smith   PetscErrorCode    ierr;
1005d2840e09SBarry Smith   PetscInt          n,i;
1006d2840e09SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx;
10076cd98798SBarry Smith 
10086cd98798SBarry Smith   PetscFunctionBegin;
10096cd98798SBarry Smith   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
10103649974fSBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
10111ebc52fbSHong Zhang   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
1012bfec09a0SHong Zhang 
10136cd98798SBarry Smith   for (i=0; i<m; i++) {
1014bfec09a0SHong Zhang     idx    = a->j + a->i[i] ;
1015bfec09a0SHong Zhang     v      = a->a + a->i[i] ;
10166cd98798SBarry Smith     n      = a->i[i+1] - a->i[i];
10176cd98798SBarry Smith     alpha1 = x[6*i];
10186cd98798SBarry Smith     alpha2 = x[6*i+1];
10196cd98798SBarry Smith     alpha3 = x[6*i+2];
10206cd98798SBarry Smith     alpha4 = x[6*i+3];
10216cd98798SBarry Smith     alpha5 = x[6*i+4];
10226cd98798SBarry Smith     alpha6 = x[6*i+5];
10236cd98798SBarry Smith     while (n-->0) {
10246cd98798SBarry Smith       y[6*(*idx)]   += alpha1*(*v);
10256cd98798SBarry Smith       y[6*(*idx)+1] += alpha2*(*v);
10266cd98798SBarry Smith       y[6*(*idx)+2] += alpha3*(*v);
10276cd98798SBarry Smith       y[6*(*idx)+3] += alpha4*(*v);
10286cd98798SBarry Smith       y[6*(*idx)+4] += alpha5*(*v);
10296cd98798SBarry Smith       y[6*(*idx)+5] += alpha6*(*v);
10306cd98798SBarry Smith       idx++; v++;
10316cd98798SBarry Smith     }
10326cd98798SBarry Smith   }
1033dc0b31edSSatish Balay   ierr = PetscLogFlops(12.0*a->nz);CHKERRQ(ierr);
10343649974fSBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
10351ebc52fbSHong Zhang   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
10366cd98798SBarry Smith   PetscFunctionReturn(0);
10376cd98798SBarry Smith }
10386cd98798SBarry Smith 
103966ed3db0SBarry Smith /* ------------------------------------------------------------------------------*/
104066ed3db0SBarry Smith #undef __FUNCT__
1041ed8eea03SBarry Smith #define __FUNCT__ "MatMult_SeqMAIJ_7"
1042ed8eea03SBarry Smith PetscErrorCode MatMult_SeqMAIJ_7(Mat A,Vec xx,Vec yy)
1043ed8eea03SBarry Smith {
1044ed8eea03SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1045ed8eea03SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1046d2840e09SBarry Smith   const PetscScalar *x,*v;
1047d2840e09SBarry Smith   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7;
1048ed8eea03SBarry Smith   PetscErrorCode    ierr;
1049d2840e09SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
1050d2840e09SBarry Smith   PetscInt          nonzerorow=0,n,i,jrow,j;
1051ed8eea03SBarry Smith 
1052ed8eea03SBarry Smith   PetscFunctionBegin;
10533649974fSBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
1054ed8eea03SBarry Smith   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
1055ed8eea03SBarry Smith   idx  = a->j;
1056ed8eea03SBarry Smith   v    = a->a;
1057ed8eea03SBarry Smith   ii   = a->i;
1058ed8eea03SBarry Smith 
1059ed8eea03SBarry Smith   for (i=0; i<m; i++) {
1060ed8eea03SBarry Smith     jrow = ii[i];
1061ed8eea03SBarry Smith     n    = ii[i+1] - jrow;
1062ed8eea03SBarry Smith     sum1  = 0.0;
1063ed8eea03SBarry Smith     sum2  = 0.0;
1064ed8eea03SBarry Smith     sum3  = 0.0;
1065ed8eea03SBarry Smith     sum4  = 0.0;
1066ed8eea03SBarry Smith     sum5  = 0.0;
1067ed8eea03SBarry Smith     sum6  = 0.0;
1068ed8eea03SBarry Smith     sum7  = 0.0;
1069b3c51c66SMatthew Knepley     nonzerorow += (n>0);
1070ed8eea03SBarry Smith     for (j=0; j<n; j++) {
1071ed8eea03SBarry Smith       sum1 += v[jrow]*x[7*idx[jrow]];
1072ed8eea03SBarry Smith       sum2 += v[jrow]*x[7*idx[jrow]+1];
1073ed8eea03SBarry Smith       sum3 += v[jrow]*x[7*idx[jrow]+2];
1074ed8eea03SBarry Smith       sum4 += v[jrow]*x[7*idx[jrow]+3];
1075ed8eea03SBarry Smith       sum5 += v[jrow]*x[7*idx[jrow]+4];
1076ed8eea03SBarry Smith       sum6 += v[jrow]*x[7*idx[jrow]+5];
1077ed8eea03SBarry Smith       sum7 += v[jrow]*x[7*idx[jrow]+6];
1078ed8eea03SBarry Smith       jrow++;
1079ed8eea03SBarry Smith      }
1080ed8eea03SBarry Smith     y[7*i]   = sum1;
1081ed8eea03SBarry Smith     y[7*i+1] = sum2;
1082ed8eea03SBarry Smith     y[7*i+2] = sum3;
1083ed8eea03SBarry Smith     y[7*i+3] = sum4;
1084ed8eea03SBarry Smith     y[7*i+4] = sum5;
1085ed8eea03SBarry Smith     y[7*i+5] = sum6;
1086ed8eea03SBarry Smith     y[7*i+6] = sum7;
1087ed8eea03SBarry Smith   }
1088ed8eea03SBarry Smith 
1089dc0b31edSSatish Balay   ierr = PetscLogFlops(14.0*a->nz - 7.0*nonzerorow);CHKERRQ(ierr);
10903649974fSBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
1091ed8eea03SBarry Smith   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
1092ed8eea03SBarry Smith   PetscFunctionReturn(0);
1093ed8eea03SBarry Smith }
1094ed8eea03SBarry Smith 
1095ed8eea03SBarry Smith #undef __FUNCT__
1096ed8eea03SBarry Smith #define __FUNCT__ "MatMultTranspose_SeqMAIJ_7"
1097ed8eea03SBarry Smith PetscErrorCode MatMultTranspose_SeqMAIJ_7(Mat A,Vec xx,Vec yy)
1098ed8eea03SBarry Smith {
1099ed8eea03SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1100ed8eea03SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1101d2840e09SBarry Smith   const PetscScalar *x,*v;
1102d2840e09SBarry Smith   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7;
1103ed8eea03SBarry Smith   PetscErrorCode    ierr;
1104d2840e09SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx;
1105d2840e09SBarry Smith   PetscInt          n,i;
1106ed8eea03SBarry Smith 
1107ed8eea03SBarry Smith   PetscFunctionBegin;
1108d2840e09SBarry Smith   ierr = VecSet(yy,0.0);CHKERRQ(ierr);
11093649974fSBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
1110ed8eea03SBarry Smith   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
1111ed8eea03SBarry Smith 
1112ed8eea03SBarry Smith   for (i=0; i<m; i++) {
1113ed8eea03SBarry Smith     idx    = a->j + a->i[i] ;
1114ed8eea03SBarry Smith     v      = a->a + a->i[i] ;
1115ed8eea03SBarry Smith     n      = a->i[i+1] - a->i[i];
1116ed8eea03SBarry Smith     alpha1 = x[7*i];
1117ed8eea03SBarry Smith     alpha2 = x[7*i+1];
1118ed8eea03SBarry Smith     alpha3 = x[7*i+2];
1119ed8eea03SBarry Smith     alpha4 = x[7*i+3];
1120ed8eea03SBarry Smith     alpha5 = x[7*i+4];
1121ed8eea03SBarry Smith     alpha6 = x[7*i+5];
1122ed8eea03SBarry Smith     alpha7 = x[7*i+6];
1123ed8eea03SBarry Smith     while (n-->0) {
1124ed8eea03SBarry Smith       y[7*(*idx)]   += alpha1*(*v);
1125ed8eea03SBarry Smith       y[7*(*idx)+1] += alpha2*(*v);
1126ed8eea03SBarry Smith       y[7*(*idx)+2] += alpha3*(*v);
1127ed8eea03SBarry Smith       y[7*(*idx)+3] += alpha4*(*v);
1128ed8eea03SBarry Smith       y[7*(*idx)+4] += alpha5*(*v);
1129ed8eea03SBarry Smith       y[7*(*idx)+5] += alpha6*(*v);
1130ed8eea03SBarry Smith       y[7*(*idx)+6] += alpha7*(*v);
1131ed8eea03SBarry Smith       idx++; v++;
1132ed8eea03SBarry Smith     }
1133ed8eea03SBarry Smith   }
1134dc0b31edSSatish Balay   ierr = PetscLogFlops(14.0*a->nz);CHKERRQ(ierr);
11353649974fSBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
1136ed8eea03SBarry Smith   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
1137ed8eea03SBarry Smith   PetscFunctionReturn(0);
1138ed8eea03SBarry Smith }
1139ed8eea03SBarry Smith 
1140ed8eea03SBarry Smith #undef __FUNCT__
1141ed8eea03SBarry Smith #define __FUNCT__ "MatMultAdd_SeqMAIJ_7"
1142ed8eea03SBarry Smith PetscErrorCode MatMultAdd_SeqMAIJ_7(Mat A,Vec xx,Vec yy,Vec zz)
1143ed8eea03SBarry Smith {
1144ed8eea03SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1145ed8eea03SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1146d2840e09SBarry Smith   const PetscScalar *x,*v;
1147d2840e09SBarry Smith   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7;
1148ed8eea03SBarry Smith   PetscErrorCode    ierr;
1149d2840e09SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
1150ed8eea03SBarry Smith   PetscInt          n,i,jrow,j;
1151ed8eea03SBarry Smith 
1152ed8eea03SBarry Smith   PetscFunctionBegin;
1153ed8eea03SBarry Smith   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
11543649974fSBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
1155ed8eea03SBarry Smith   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
1156ed8eea03SBarry Smith   idx  = a->j;
1157ed8eea03SBarry Smith   v    = a->a;
1158ed8eea03SBarry Smith   ii   = a->i;
1159ed8eea03SBarry Smith 
1160ed8eea03SBarry Smith   for (i=0; i<m; i++) {
1161ed8eea03SBarry Smith     jrow = ii[i];
1162ed8eea03SBarry Smith     n    = ii[i+1] - jrow;
1163ed8eea03SBarry Smith     sum1  = 0.0;
1164ed8eea03SBarry Smith     sum2  = 0.0;
1165ed8eea03SBarry Smith     sum3  = 0.0;
1166ed8eea03SBarry Smith     sum4  = 0.0;
1167ed8eea03SBarry Smith     sum5  = 0.0;
1168ed8eea03SBarry Smith     sum6  = 0.0;
1169ed8eea03SBarry Smith     sum7  = 0.0;
1170ed8eea03SBarry Smith     for (j=0; j<n; j++) {
1171ed8eea03SBarry Smith       sum1 += v[jrow]*x[7*idx[jrow]];
1172ed8eea03SBarry Smith       sum2 += v[jrow]*x[7*idx[jrow]+1];
1173ed8eea03SBarry Smith       sum3 += v[jrow]*x[7*idx[jrow]+2];
1174ed8eea03SBarry Smith       sum4 += v[jrow]*x[7*idx[jrow]+3];
1175ed8eea03SBarry Smith       sum5 += v[jrow]*x[7*idx[jrow]+4];
1176ed8eea03SBarry Smith       sum6 += v[jrow]*x[7*idx[jrow]+5];
1177ed8eea03SBarry Smith       sum7 += v[jrow]*x[7*idx[jrow]+6];
1178ed8eea03SBarry Smith       jrow++;
1179ed8eea03SBarry Smith      }
1180ed8eea03SBarry Smith     y[7*i]   += sum1;
1181ed8eea03SBarry Smith     y[7*i+1] += sum2;
1182ed8eea03SBarry Smith     y[7*i+2] += sum3;
1183ed8eea03SBarry Smith     y[7*i+3] += sum4;
1184ed8eea03SBarry Smith     y[7*i+4] += sum5;
1185ed8eea03SBarry Smith     y[7*i+5] += sum6;
1186ed8eea03SBarry Smith     y[7*i+6] += sum7;
1187ed8eea03SBarry Smith   }
1188ed8eea03SBarry Smith 
1189dc0b31edSSatish Balay   ierr = PetscLogFlops(14.0*a->nz);CHKERRQ(ierr);
11903649974fSBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
1191ed8eea03SBarry Smith   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
1192ed8eea03SBarry Smith   PetscFunctionReturn(0);
1193ed8eea03SBarry Smith }
1194ed8eea03SBarry Smith 
1195ed8eea03SBarry Smith #undef __FUNCT__
1196ed8eea03SBarry Smith #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_7"
1197ed8eea03SBarry Smith PetscErrorCode MatMultTransposeAdd_SeqMAIJ_7(Mat A,Vec xx,Vec yy,Vec zz)
1198ed8eea03SBarry Smith {
1199ed8eea03SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1200ed8eea03SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1201d2840e09SBarry Smith   const PetscScalar *x,*v;
1202d2840e09SBarry Smith   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7;
1203ed8eea03SBarry Smith   PetscErrorCode    ierr;
1204d2840e09SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx;
1205d2840e09SBarry Smith   PetscInt          n,i;
1206ed8eea03SBarry Smith 
1207ed8eea03SBarry Smith   PetscFunctionBegin;
1208ed8eea03SBarry Smith   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
12093649974fSBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
1210ed8eea03SBarry Smith   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
1211ed8eea03SBarry Smith   for (i=0; i<m; i++) {
1212ed8eea03SBarry Smith     idx    = a->j + a->i[i] ;
1213ed8eea03SBarry Smith     v      = a->a + a->i[i] ;
1214ed8eea03SBarry Smith     n      = a->i[i+1] - a->i[i];
1215ed8eea03SBarry Smith     alpha1 = x[7*i];
1216ed8eea03SBarry Smith     alpha2 = x[7*i+1];
1217ed8eea03SBarry Smith     alpha3 = x[7*i+2];
1218ed8eea03SBarry Smith     alpha4 = x[7*i+3];
1219ed8eea03SBarry Smith     alpha5 = x[7*i+4];
1220ed8eea03SBarry Smith     alpha6 = x[7*i+5];
1221ed8eea03SBarry Smith     alpha7 = x[7*i+6];
1222ed8eea03SBarry Smith     while (n-->0) {
1223ed8eea03SBarry Smith       y[7*(*idx)]   += alpha1*(*v);
1224ed8eea03SBarry Smith       y[7*(*idx)+1] += alpha2*(*v);
1225ed8eea03SBarry Smith       y[7*(*idx)+2] += alpha3*(*v);
1226ed8eea03SBarry Smith       y[7*(*idx)+3] += alpha4*(*v);
1227ed8eea03SBarry Smith       y[7*(*idx)+4] += alpha5*(*v);
1228ed8eea03SBarry Smith       y[7*(*idx)+5] += alpha6*(*v);
1229ed8eea03SBarry Smith       y[7*(*idx)+6] += alpha7*(*v);
1230ed8eea03SBarry Smith       idx++; v++;
1231ed8eea03SBarry Smith     }
1232ed8eea03SBarry Smith   }
1233dc0b31edSSatish Balay   ierr = PetscLogFlops(14.0*a->nz);CHKERRQ(ierr);
12343649974fSBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
1235ed8eea03SBarry Smith   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
1236ed8eea03SBarry Smith   PetscFunctionReturn(0);
1237ed8eea03SBarry Smith }
1238ed8eea03SBarry Smith 
1239ed8eea03SBarry Smith #undef __FUNCT__
124066ed3db0SBarry Smith #define __FUNCT__ "MatMult_SeqMAIJ_8"
1241dfbe8321SBarry Smith PetscErrorCode MatMult_SeqMAIJ_8(Mat A,Vec xx,Vec yy)
124266ed3db0SBarry Smith {
124366ed3db0SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
124466ed3db0SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1245d2840e09SBarry Smith   const PetscScalar *x,*v;
1246d2840e09SBarry Smith   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8;
1247dfbe8321SBarry Smith   PetscErrorCode    ierr;
1248d2840e09SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
1249d2840e09SBarry Smith   PetscInt          nonzerorow=0,n,i,jrow,j;
125066ed3db0SBarry Smith 
125166ed3db0SBarry Smith   PetscFunctionBegin;
12523649974fSBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
12531ebc52fbSHong Zhang   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
125466ed3db0SBarry Smith   idx  = a->j;
125566ed3db0SBarry Smith   v    = a->a;
125666ed3db0SBarry Smith   ii   = a->i;
125766ed3db0SBarry Smith 
125866ed3db0SBarry Smith   for (i=0; i<m; i++) {
125966ed3db0SBarry Smith     jrow = ii[i];
126066ed3db0SBarry Smith     n    = ii[i+1] - jrow;
126166ed3db0SBarry Smith     sum1  = 0.0;
126266ed3db0SBarry Smith     sum2  = 0.0;
126366ed3db0SBarry Smith     sum3  = 0.0;
126466ed3db0SBarry Smith     sum4  = 0.0;
126566ed3db0SBarry Smith     sum5  = 0.0;
126666ed3db0SBarry Smith     sum6  = 0.0;
126766ed3db0SBarry Smith     sum7  = 0.0;
126866ed3db0SBarry Smith     sum8  = 0.0;
1269b3c51c66SMatthew Knepley     nonzerorow += (n>0);
127066ed3db0SBarry Smith     for (j=0; j<n; j++) {
127166ed3db0SBarry Smith       sum1 += v[jrow]*x[8*idx[jrow]];
127266ed3db0SBarry Smith       sum2 += v[jrow]*x[8*idx[jrow]+1];
127366ed3db0SBarry Smith       sum3 += v[jrow]*x[8*idx[jrow]+2];
127466ed3db0SBarry Smith       sum4 += v[jrow]*x[8*idx[jrow]+3];
127566ed3db0SBarry Smith       sum5 += v[jrow]*x[8*idx[jrow]+4];
127666ed3db0SBarry Smith       sum6 += v[jrow]*x[8*idx[jrow]+5];
127766ed3db0SBarry Smith       sum7 += v[jrow]*x[8*idx[jrow]+6];
127866ed3db0SBarry Smith       sum8 += v[jrow]*x[8*idx[jrow]+7];
127966ed3db0SBarry Smith       jrow++;
128066ed3db0SBarry Smith      }
128166ed3db0SBarry Smith     y[8*i]   = sum1;
128266ed3db0SBarry Smith     y[8*i+1] = sum2;
128366ed3db0SBarry Smith     y[8*i+2] = sum3;
128466ed3db0SBarry Smith     y[8*i+3] = sum4;
128566ed3db0SBarry Smith     y[8*i+4] = sum5;
128666ed3db0SBarry Smith     y[8*i+5] = sum6;
128766ed3db0SBarry Smith     y[8*i+6] = sum7;
128866ed3db0SBarry Smith     y[8*i+7] = sum8;
128966ed3db0SBarry Smith   }
129066ed3db0SBarry Smith 
1291dc0b31edSSatish Balay   ierr = PetscLogFlops(16.0*a->nz - 8.0*nonzerorow);CHKERRQ(ierr);
12923649974fSBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
12931ebc52fbSHong Zhang   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
129466ed3db0SBarry Smith   PetscFunctionReturn(0);
129566ed3db0SBarry Smith }
129666ed3db0SBarry Smith 
129766ed3db0SBarry Smith #undef __FUNCT__
129866ed3db0SBarry Smith #define __FUNCT__ "MatMultTranspose_SeqMAIJ_8"
1299dfbe8321SBarry Smith PetscErrorCode MatMultTranspose_SeqMAIJ_8(Mat A,Vec xx,Vec yy)
130066ed3db0SBarry Smith {
130166ed3db0SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
130266ed3db0SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1303d2840e09SBarry Smith   const PetscScalar *x,*v;
1304d2840e09SBarry Smith   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8;
1305dfbe8321SBarry Smith   PetscErrorCode    ierr;
1306d2840e09SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx;
1307d2840e09SBarry Smith   PetscInt          n,i;
130866ed3db0SBarry Smith 
130966ed3db0SBarry Smith   PetscFunctionBegin;
1310d2840e09SBarry Smith   ierr = VecSet(yy,0.0);CHKERRQ(ierr);
13113649974fSBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
13121ebc52fbSHong Zhang   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
1313bfec09a0SHong Zhang 
131466ed3db0SBarry Smith   for (i=0; i<m; i++) {
1315bfec09a0SHong Zhang     idx    = a->j + a->i[i] ;
1316bfec09a0SHong Zhang     v      = a->a + a->i[i] ;
131766ed3db0SBarry Smith     n      = a->i[i+1] - a->i[i];
131866ed3db0SBarry Smith     alpha1 = x[8*i];
131966ed3db0SBarry Smith     alpha2 = x[8*i+1];
132066ed3db0SBarry Smith     alpha3 = x[8*i+2];
132166ed3db0SBarry Smith     alpha4 = x[8*i+3];
132266ed3db0SBarry Smith     alpha5 = x[8*i+4];
132366ed3db0SBarry Smith     alpha6 = x[8*i+5];
132466ed3db0SBarry Smith     alpha7 = x[8*i+6];
132566ed3db0SBarry Smith     alpha8 = x[8*i+7];
132666ed3db0SBarry Smith     while (n-->0) {
132766ed3db0SBarry Smith       y[8*(*idx)]   += alpha1*(*v);
132866ed3db0SBarry Smith       y[8*(*idx)+1] += alpha2*(*v);
132966ed3db0SBarry Smith       y[8*(*idx)+2] += alpha3*(*v);
133066ed3db0SBarry Smith       y[8*(*idx)+3] += alpha4*(*v);
133166ed3db0SBarry Smith       y[8*(*idx)+4] += alpha5*(*v);
133266ed3db0SBarry Smith       y[8*(*idx)+5] += alpha6*(*v);
133366ed3db0SBarry Smith       y[8*(*idx)+6] += alpha7*(*v);
133466ed3db0SBarry Smith       y[8*(*idx)+7] += alpha8*(*v);
133566ed3db0SBarry Smith       idx++; v++;
133666ed3db0SBarry Smith     }
133766ed3db0SBarry Smith   }
1338dc0b31edSSatish Balay   ierr = PetscLogFlops(16.0*a->nz);CHKERRQ(ierr);
13393649974fSBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
13401ebc52fbSHong Zhang   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
134166ed3db0SBarry Smith   PetscFunctionReturn(0);
134266ed3db0SBarry Smith }
134366ed3db0SBarry Smith 
134466ed3db0SBarry Smith #undef __FUNCT__
134566ed3db0SBarry Smith #define __FUNCT__ "MatMultAdd_SeqMAIJ_8"
1346dfbe8321SBarry Smith PetscErrorCode MatMultAdd_SeqMAIJ_8(Mat A,Vec xx,Vec yy,Vec zz)
134766ed3db0SBarry Smith {
134866ed3db0SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
134966ed3db0SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1350d2840e09SBarry Smith   const PetscScalar *x,*v;
1351d2840e09SBarry Smith   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8;
1352dfbe8321SBarry Smith   PetscErrorCode    ierr;
1353d2840e09SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
1354b24ad042SBarry Smith   PetscInt          n,i,jrow,j;
135566ed3db0SBarry Smith 
135666ed3db0SBarry Smith   PetscFunctionBegin;
135766ed3db0SBarry Smith   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
13583649974fSBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
13591ebc52fbSHong Zhang   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
136066ed3db0SBarry Smith   idx  = a->j;
136166ed3db0SBarry Smith   v    = a->a;
136266ed3db0SBarry Smith   ii   = a->i;
136366ed3db0SBarry Smith 
136466ed3db0SBarry Smith   for (i=0; i<m; i++) {
136566ed3db0SBarry Smith     jrow = ii[i];
136666ed3db0SBarry Smith     n    = ii[i+1] - jrow;
136766ed3db0SBarry Smith     sum1  = 0.0;
136866ed3db0SBarry Smith     sum2  = 0.0;
136966ed3db0SBarry Smith     sum3  = 0.0;
137066ed3db0SBarry Smith     sum4  = 0.0;
137166ed3db0SBarry Smith     sum5  = 0.0;
137266ed3db0SBarry Smith     sum6  = 0.0;
137366ed3db0SBarry Smith     sum7  = 0.0;
137466ed3db0SBarry Smith     sum8  = 0.0;
137566ed3db0SBarry Smith     for (j=0; j<n; j++) {
137666ed3db0SBarry Smith       sum1 += v[jrow]*x[8*idx[jrow]];
137766ed3db0SBarry Smith       sum2 += v[jrow]*x[8*idx[jrow]+1];
137866ed3db0SBarry Smith       sum3 += v[jrow]*x[8*idx[jrow]+2];
137966ed3db0SBarry Smith       sum4 += v[jrow]*x[8*idx[jrow]+3];
138066ed3db0SBarry Smith       sum5 += v[jrow]*x[8*idx[jrow]+4];
138166ed3db0SBarry Smith       sum6 += v[jrow]*x[8*idx[jrow]+5];
138266ed3db0SBarry Smith       sum7 += v[jrow]*x[8*idx[jrow]+6];
138366ed3db0SBarry Smith       sum8 += v[jrow]*x[8*idx[jrow]+7];
138466ed3db0SBarry Smith       jrow++;
138566ed3db0SBarry Smith      }
138666ed3db0SBarry Smith     y[8*i]   += sum1;
138766ed3db0SBarry Smith     y[8*i+1] += sum2;
138866ed3db0SBarry Smith     y[8*i+2] += sum3;
138966ed3db0SBarry Smith     y[8*i+3] += sum4;
139066ed3db0SBarry Smith     y[8*i+4] += sum5;
139166ed3db0SBarry Smith     y[8*i+5] += sum6;
139266ed3db0SBarry Smith     y[8*i+6] += sum7;
139366ed3db0SBarry Smith     y[8*i+7] += sum8;
139466ed3db0SBarry Smith   }
139566ed3db0SBarry Smith 
1396dc0b31edSSatish Balay   ierr = PetscLogFlops(16.0*a->nz);CHKERRQ(ierr);
13973649974fSBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
13981ebc52fbSHong Zhang   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
139966ed3db0SBarry Smith   PetscFunctionReturn(0);
140066ed3db0SBarry Smith }
140166ed3db0SBarry Smith 
140266ed3db0SBarry Smith #undef __FUNCT__
140366ed3db0SBarry Smith #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_8"
1404dfbe8321SBarry Smith PetscErrorCode MatMultTransposeAdd_SeqMAIJ_8(Mat A,Vec xx,Vec yy,Vec zz)
140566ed3db0SBarry Smith {
140666ed3db0SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
140766ed3db0SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1408d2840e09SBarry Smith   const PetscScalar *x,*v;
1409d2840e09SBarry Smith   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8;
1410dfbe8321SBarry Smith   PetscErrorCode    ierr;
1411d2840e09SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx;
1412d2840e09SBarry Smith   PetscInt          n,i;
141366ed3db0SBarry Smith 
141466ed3db0SBarry Smith   PetscFunctionBegin;
141566ed3db0SBarry Smith   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
14163649974fSBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
14171ebc52fbSHong Zhang   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
141866ed3db0SBarry Smith   for (i=0; i<m; i++) {
1419bfec09a0SHong Zhang     idx    = a->j + a->i[i] ;
1420bfec09a0SHong Zhang     v      = a->a + a->i[i] ;
142166ed3db0SBarry Smith     n      = a->i[i+1] - a->i[i];
142266ed3db0SBarry Smith     alpha1 = x[8*i];
142366ed3db0SBarry Smith     alpha2 = x[8*i+1];
142466ed3db0SBarry Smith     alpha3 = x[8*i+2];
142566ed3db0SBarry Smith     alpha4 = x[8*i+3];
142666ed3db0SBarry Smith     alpha5 = x[8*i+4];
142766ed3db0SBarry Smith     alpha6 = x[8*i+5];
142866ed3db0SBarry Smith     alpha7 = x[8*i+6];
142966ed3db0SBarry Smith     alpha8 = x[8*i+7];
143066ed3db0SBarry Smith     while (n-->0) {
143166ed3db0SBarry Smith       y[8*(*idx)]   += alpha1*(*v);
143266ed3db0SBarry Smith       y[8*(*idx)+1] += alpha2*(*v);
143366ed3db0SBarry Smith       y[8*(*idx)+2] += alpha3*(*v);
143466ed3db0SBarry Smith       y[8*(*idx)+3] += alpha4*(*v);
143566ed3db0SBarry Smith       y[8*(*idx)+4] += alpha5*(*v);
143666ed3db0SBarry Smith       y[8*(*idx)+5] += alpha6*(*v);
143766ed3db0SBarry Smith       y[8*(*idx)+6] += alpha7*(*v);
143866ed3db0SBarry Smith       y[8*(*idx)+7] += alpha8*(*v);
143966ed3db0SBarry Smith       idx++; v++;
144066ed3db0SBarry Smith     }
144166ed3db0SBarry Smith   }
1442dc0b31edSSatish Balay   ierr = PetscLogFlops(16.0*a->nz);CHKERRQ(ierr);
14433649974fSBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
14441ebc52fbSHong Zhang   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
14452f7816d4SBarry Smith   PetscFunctionReturn(0);
14462f7816d4SBarry Smith }
14472f7816d4SBarry Smith 
14482b692628SMatthew Knepley /* ------------------------------------------------------------------------------*/
14492b692628SMatthew Knepley #undef __FUNCT__
14502b692628SMatthew Knepley #define __FUNCT__ "MatMult_SeqMAIJ_9"
1451dfbe8321SBarry Smith PetscErrorCode MatMult_SeqMAIJ_9(Mat A,Vec xx,Vec yy)
14522b692628SMatthew Knepley {
14532b692628SMatthew Knepley   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
14542b692628SMatthew Knepley   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1455d2840e09SBarry Smith   const PetscScalar *x,*v;
1456d2840e09SBarry Smith   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9;
1457dfbe8321SBarry Smith   PetscErrorCode    ierr;
1458d2840e09SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
1459d2840e09SBarry Smith   PetscInt          nonzerorow=0,n,i,jrow,j;
14602b692628SMatthew Knepley 
14612b692628SMatthew Knepley   PetscFunctionBegin;
14623649974fSBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
14631ebc52fbSHong Zhang   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
14642b692628SMatthew Knepley   idx  = a->j;
14652b692628SMatthew Knepley   v    = a->a;
14662b692628SMatthew Knepley   ii   = a->i;
14672b692628SMatthew Knepley 
14682b692628SMatthew Knepley   for (i=0; i<m; i++) {
14692b692628SMatthew Knepley     jrow = ii[i];
14702b692628SMatthew Knepley     n    = ii[i+1] - jrow;
14712b692628SMatthew Knepley     sum1  = 0.0;
14722b692628SMatthew Knepley     sum2  = 0.0;
14732b692628SMatthew Knepley     sum3  = 0.0;
14742b692628SMatthew Knepley     sum4  = 0.0;
14752b692628SMatthew Knepley     sum5  = 0.0;
14762b692628SMatthew Knepley     sum6  = 0.0;
14772b692628SMatthew Knepley     sum7  = 0.0;
14782b692628SMatthew Knepley     sum8  = 0.0;
14792b692628SMatthew Knepley     sum9  = 0.0;
1480b3c51c66SMatthew Knepley     nonzerorow += (n>0);
14812b692628SMatthew Knepley     for (j=0; j<n; j++) {
14822b692628SMatthew Knepley       sum1 += v[jrow]*x[9*idx[jrow]];
14832b692628SMatthew Knepley       sum2 += v[jrow]*x[9*idx[jrow]+1];
14842b692628SMatthew Knepley       sum3 += v[jrow]*x[9*idx[jrow]+2];
14852b692628SMatthew Knepley       sum4 += v[jrow]*x[9*idx[jrow]+3];
14862b692628SMatthew Knepley       sum5 += v[jrow]*x[9*idx[jrow]+4];
14872b692628SMatthew Knepley       sum6 += v[jrow]*x[9*idx[jrow]+5];
14882b692628SMatthew Knepley       sum7 += v[jrow]*x[9*idx[jrow]+6];
14892b692628SMatthew Knepley       sum8 += v[jrow]*x[9*idx[jrow]+7];
14902b692628SMatthew Knepley       sum9 += v[jrow]*x[9*idx[jrow]+8];
14912b692628SMatthew Knepley       jrow++;
14922b692628SMatthew Knepley      }
14932b692628SMatthew Knepley     y[9*i]   = sum1;
14942b692628SMatthew Knepley     y[9*i+1] = sum2;
14952b692628SMatthew Knepley     y[9*i+2] = sum3;
14962b692628SMatthew Knepley     y[9*i+3] = sum4;
14972b692628SMatthew Knepley     y[9*i+4] = sum5;
14982b692628SMatthew Knepley     y[9*i+5] = sum6;
14992b692628SMatthew Knepley     y[9*i+6] = sum7;
15002b692628SMatthew Knepley     y[9*i+7] = sum8;
15012b692628SMatthew Knepley     y[9*i+8] = sum9;
15022b692628SMatthew Knepley   }
15032b692628SMatthew Knepley 
1504dc0b31edSSatish Balay   ierr = PetscLogFlops(18.0*a->nz - 9*nonzerorow);CHKERRQ(ierr);
15053649974fSBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
15061ebc52fbSHong Zhang   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
15072b692628SMatthew Knepley   PetscFunctionReturn(0);
15082b692628SMatthew Knepley }
15092b692628SMatthew Knepley 
1510b9cda22cSBarry Smith /* ------------------------------------------------------------------------------*/
1511b9cda22cSBarry Smith 
15122b692628SMatthew Knepley #undef __FUNCT__
15132b692628SMatthew Knepley #define __FUNCT__ "MatMultTranspose_SeqMAIJ_9"
1514dfbe8321SBarry Smith PetscErrorCode MatMultTranspose_SeqMAIJ_9(Mat A,Vec xx,Vec yy)
15152b692628SMatthew Knepley {
15162b692628SMatthew Knepley   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
15172b692628SMatthew Knepley   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1518d2840e09SBarry Smith   const PetscScalar *x,*v;
1519d2840e09SBarry Smith   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9;
1520dfbe8321SBarry Smith   PetscErrorCode    ierr;
1521d2840e09SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx;
1522d2840e09SBarry Smith   PetscInt          n,i;
15232b692628SMatthew Knepley 
15242b692628SMatthew Knepley   PetscFunctionBegin;
1525d2840e09SBarry Smith   ierr = VecSet(yy,0.0);CHKERRQ(ierr);
15263649974fSBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
15271ebc52fbSHong Zhang   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
15282b692628SMatthew Knepley 
15292b692628SMatthew Knepley   for (i=0; i<m; i++) {
15302b692628SMatthew Knepley     idx    = a->j + a->i[i] ;
15312b692628SMatthew Knepley     v      = a->a + a->i[i] ;
15322b692628SMatthew Knepley     n      = a->i[i+1] - a->i[i];
15332b692628SMatthew Knepley     alpha1 = x[9*i];
15342b692628SMatthew Knepley     alpha2 = x[9*i+1];
15352b692628SMatthew Knepley     alpha3 = x[9*i+2];
15362b692628SMatthew Knepley     alpha4 = x[9*i+3];
15372b692628SMatthew Knepley     alpha5 = x[9*i+4];
15382b692628SMatthew Knepley     alpha6 = x[9*i+5];
15392b692628SMatthew Knepley     alpha7 = x[9*i+6];
15402b692628SMatthew Knepley     alpha8 = x[9*i+7];
15412f6bd0c6SMatthew Knepley     alpha9 = x[9*i+8];
15422b692628SMatthew Knepley     while (n-->0) {
15432b692628SMatthew Knepley       y[9*(*idx)]   += alpha1*(*v);
15442b692628SMatthew Knepley       y[9*(*idx)+1] += alpha2*(*v);
15452b692628SMatthew Knepley       y[9*(*idx)+2] += alpha3*(*v);
15462b692628SMatthew Knepley       y[9*(*idx)+3] += alpha4*(*v);
15472b692628SMatthew Knepley       y[9*(*idx)+4] += alpha5*(*v);
15482b692628SMatthew Knepley       y[9*(*idx)+5] += alpha6*(*v);
15492b692628SMatthew Knepley       y[9*(*idx)+6] += alpha7*(*v);
15502b692628SMatthew Knepley       y[9*(*idx)+7] += alpha8*(*v);
15512b692628SMatthew Knepley       y[9*(*idx)+8] += alpha9*(*v);
15522b692628SMatthew Knepley       idx++; v++;
15532b692628SMatthew Knepley     }
15542b692628SMatthew Knepley   }
1555dc0b31edSSatish Balay   ierr = PetscLogFlops(18.0*a->nz);CHKERRQ(ierr);
15563649974fSBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
15571ebc52fbSHong Zhang   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
15582b692628SMatthew Knepley   PetscFunctionReturn(0);
15592b692628SMatthew Knepley }
15602b692628SMatthew Knepley 
15612b692628SMatthew Knepley #undef __FUNCT__
15622b692628SMatthew Knepley #define __FUNCT__ "MatMultAdd_SeqMAIJ_9"
1563dfbe8321SBarry Smith PetscErrorCode MatMultAdd_SeqMAIJ_9(Mat A,Vec xx,Vec yy,Vec zz)
15642b692628SMatthew Knepley {
15652b692628SMatthew Knepley   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
15662b692628SMatthew Knepley   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1567d2840e09SBarry Smith   const PetscScalar *x,*v;
1568d2840e09SBarry Smith   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9;
1569dfbe8321SBarry Smith   PetscErrorCode    ierr;
1570d2840e09SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
1571b24ad042SBarry Smith   PetscInt          n,i,jrow,j;
15722b692628SMatthew Knepley 
15732b692628SMatthew Knepley   PetscFunctionBegin;
15742b692628SMatthew Knepley   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
15753649974fSBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
15761ebc52fbSHong Zhang   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
15772b692628SMatthew Knepley   idx  = a->j;
15782b692628SMatthew Knepley   v    = a->a;
15792b692628SMatthew Knepley   ii   = a->i;
15802b692628SMatthew Knepley 
15812b692628SMatthew Knepley   for (i=0; i<m; i++) {
15822b692628SMatthew Knepley     jrow = ii[i];
15832b692628SMatthew Knepley     n    = ii[i+1] - jrow;
15842b692628SMatthew Knepley     sum1  = 0.0;
15852b692628SMatthew Knepley     sum2  = 0.0;
15862b692628SMatthew Knepley     sum3  = 0.0;
15872b692628SMatthew Knepley     sum4  = 0.0;
15882b692628SMatthew Knepley     sum5  = 0.0;
15892b692628SMatthew Knepley     sum6  = 0.0;
15902b692628SMatthew Knepley     sum7  = 0.0;
15912b692628SMatthew Knepley     sum8  = 0.0;
15922b692628SMatthew Knepley     sum9  = 0.0;
15932b692628SMatthew Knepley     for (j=0; j<n; j++) {
15942b692628SMatthew Knepley       sum1 += v[jrow]*x[9*idx[jrow]];
15952b692628SMatthew Knepley       sum2 += v[jrow]*x[9*idx[jrow]+1];
15962b692628SMatthew Knepley       sum3 += v[jrow]*x[9*idx[jrow]+2];
15972b692628SMatthew Knepley       sum4 += v[jrow]*x[9*idx[jrow]+3];
15982b692628SMatthew Knepley       sum5 += v[jrow]*x[9*idx[jrow]+4];
15992b692628SMatthew Knepley       sum6 += v[jrow]*x[9*idx[jrow]+5];
16002b692628SMatthew Knepley       sum7 += v[jrow]*x[9*idx[jrow]+6];
16012b692628SMatthew Knepley       sum8 += v[jrow]*x[9*idx[jrow]+7];
16022b692628SMatthew Knepley       sum9 += v[jrow]*x[9*idx[jrow]+8];
16032b692628SMatthew Knepley       jrow++;
16042b692628SMatthew Knepley      }
16052b692628SMatthew Knepley     y[9*i]   += sum1;
16062b692628SMatthew Knepley     y[9*i+1] += sum2;
16072b692628SMatthew Knepley     y[9*i+2] += sum3;
16082b692628SMatthew Knepley     y[9*i+3] += sum4;
16092b692628SMatthew Knepley     y[9*i+4] += sum5;
16102b692628SMatthew Knepley     y[9*i+5] += sum6;
16112b692628SMatthew Knepley     y[9*i+6] += sum7;
16122b692628SMatthew Knepley     y[9*i+7] += sum8;
16132b692628SMatthew Knepley     y[9*i+8] += sum9;
16142b692628SMatthew Knepley   }
16152b692628SMatthew Knepley 
1616efee365bSSatish Balay   ierr = PetscLogFlops(18*a->nz);CHKERRQ(ierr);
16173649974fSBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
16181ebc52fbSHong Zhang   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
16192b692628SMatthew Knepley   PetscFunctionReturn(0);
16202b692628SMatthew Knepley }
16212b692628SMatthew Knepley 
16222b692628SMatthew Knepley #undef __FUNCT__
16232b692628SMatthew Knepley #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_9"
1624dfbe8321SBarry Smith PetscErrorCode MatMultTransposeAdd_SeqMAIJ_9(Mat A,Vec xx,Vec yy,Vec zz)
16252b692628SMatthew Knepley {
16262b692628SMatthew Knepley   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
16272b692628SMatthew Knepley   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1628d2840e09SBarry Smith   const PetscScalar *x,*v;
1629d2840e09SBarry Smith   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9;
1630dfbe8321SBarry Smith   PetscErrorCode    ierr;
1631d2840e09SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx;
1632d2840e09SBarry Smith   PetscInt          n,i;
16332b692628SMatthew Knepley 
16342b692628SMatthew Knepley   PetscFunctionBegin;
16352b692628SMatthew Knepley   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
16363649974fSBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
16371ebc52fbSHong Zhang   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
16382b692628SMatthew Knepley   for (i=0; i<m; i++) {
16392b692628SMatthew Knepley     idx    = a->j + a->i[i] ;
16402b692628SMatthew Knepley     v      = a->a + a->i[i] ;
16412b692628SMatthew Knepley     n      = a->i[i+1] - a->i[i];
16422b692628SMatthew Knepley     alpha1 = x[9*i];
16432b692628SMatthew Knepley     alpha2 = x[9*i+1];
16442b692628SMatthew Knepley     alpha3 = x[9*i+2];
16452b692628SMatthew Knepley     alpha4 = x[9*i+3];
16462b692628SMatthew Knepley     alpha5 = x[9*i+4];
16472b692628SMatthew Knepley     alpha6 = x[9*i+5];
16482b692628SMatthew Knepley     alpha7 = x[9*i+6];
16492b692628SMatthew Knepley     alpha8 = x[9*i+7];
16502b692628SMatthew Knepley     alpha9 = x[9*i+8];
16512b692628SMatthew Knepley     while (n-->0) {
16522b692628SMatthew Knepley       y[9*(*idx)]   += alpha1*(*v);
16532b692628SMatthew Knepley       y[9*(*idx)+1] += alpha2*(*v);
16542b692628SMatthew Knepley       y[9*(*idx)+2] += alpha3*(*v);
16552b692628SMatthew Knepley       y[9*(*idx)+3] += alpha4*(*v);
16562b692628SMatthew Knepley       y[9*(*idx)+4] += alpha5*(*v);
16572b692628SMatthew Knepley       y[9*(*idx)+5] += alpha6*(*v);
16582b692628SMatthew Knepley       y[9*(*idx)+6] += alpha7*(*v);
16592b692628SMatthew Knepley       y[9*(*idx)+7] += alpha8*(*v);
16602b692628SMatthew Knepley       y[9*(*idx)+8] += alpha9*(*v);
16612b692628SMatthew Knepley       idx++; v++;
16622b692628SMatthew Knepley     }
16632b692628SMatthew Knepley   }
1664dc0b31edSSatish Balay   ierr = PetscLogFlops(18.0*a->nz);CHKERRQ(ierr);
16653649974fSBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
16661ebc52fbSHong Zhang   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
16672b692628SMatthew Knepley   PetscFunctionReturn(0);
16682b692628SMatthew Knepley }
1669b9cda22cSBarry Smith #undef __FUNCT__
1670b9cda22cSBarry Smith #define __FUNCT__ "MatMult_SeqMAIJ_10"
1671b9cda22cSBarry Smith PetscErrorCode MatMult_SeqMAIJ_10(Mat A,Vec xx,Vec yy)
1672b9cda22cSBarry Smith {
1673b9cda22cSBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1674b9cda22cSBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1675d2840e09SBarry Smith   const PetscScalar *x,*v;
1676d2840e09SBarry Smith   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9, sum10;
1677b9cda22cSBarry Smith   PetscErrorCode    ierr;
1678d2840e09SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
1679d2840e09SBarry Smith   PetscInt          nonzerorow=0,n,i,jrow,j;
1680b9cda22cSBarry Smith 
1681b9cda22cSBarry Smith   PetscFunctionBegin;
16823649974fSBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
1683b9cda22cSBarry Smith   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
1684b9cda22cSBarry Smith   idx  = a->j;
1685b9cda22cSBarry Smith   v    = a->a;
1686b9cda22cSBarry Smith   ii   = a->i;
1687b9cda22cSBarry Smith 
1688b9cda22cSBarry Smith   for (i=0; i<m; i++) {
1689b9cda22cSBarry Smith     jrow = ii[i];
1690b9cda22cSBarry Smith     n    = ii[i+1] - jrow;
1691b9cda22cSBarry Smith     sum1  = 0.0;
1692b9cda22cSBarry Smith     sum2  = 0.0;
1693b9cda22cSBarry Smith     sum3  = 0.0;
1694b9cda22cSBarry Smith     sum4  = 0.0;
1695b9cda22cSBarry Smith     sum5  = 0.0;
1696b9cda22cSBarry Smith     sum6  = 0.0;
1697b9cda22cSBarry Smith     sum7  = 0.0;
1698b9cda22cSBarry Smith     sum8  = 0.0;
1699b9cda22cSBarry Smith     sum9  = 0.0;
1700b9cda22cSBarry Smith     sum10 = 0.0;
1701b3c51c66SMatthew Knepley     nonzerorow += (n>0);
1702b9cda22cSBarry Smith     for (j=0; j<n; j++) {
1703b9cda22cSBarry Smith       sum1  += v[jrow]*x[10*idx[jrow]];
1704b9cda22cSBarry Smith       sum2  += v[jrow]*x[10*idx[jrow]+1];
1705b9cda22cSBarry Smith       sum3  += v[jrow]*x[10*idx[jrow]+2];
1706b9cda22cSBarry Smith       sum4  += v[jrow]*x[10*idx[jrow]+3];
1707b9cda22cSBarry Smith       sum5  += v[jrow]*x[10*idx[jrow]+4];
1708b9cda22cSBarry Smith       sum6  += v[jrow]*x[10*idx[jrow]+5];
1709b9cda22cSBarry Smith       sum7  += v[jrow]*x[10*idx[jrow]+6];
1710b9cda22cSBarry Smith       sum8  += v[jrow]*x[10*idx[jrow]+7];
1711b9cda22cSBarry Smith       sum9  += v[jrow]*x[10*idx[jrow]+8];
1712b9cda22cSBarry Smith       sum10 += v[jrow]*x[10*idx[jrow]+9];
1713b9cda22cSBarry Smith       jrow++;
1714b9cda22cSBarry Smith      }
1715b9cda22cSBarry Smith     y[10*i]   = sum1;
1716b9cda22cSBarry Smith     y[10*i+1] = sum2;
1717b9cda22cSBarry Smith     y[10*i+2] = sum3;
1718b9cda22cSBarry Smith     y[10*i+3] = sum4;
1719b9cda22cSBarry Smith     y[10*i+4] = sum5;
1720b9cda22cSBarry Smith     y[10*i+5] = sum6;
1721b9cda22cSBarry Smith     y[10*i+6] = sum7;
1722b9cda22cSBarry Smith     y[10*i+7] = sum8;
1723b9cda22cSBarry Smith     y[10*i+8] = sum9;
1724b9cda22cSBarry Smith     y[10*i+9] = sum10;
1725b9cda22cSBarry Smith   }
1726b9cda22cSBarry Smith 
1727dc0b31edSSatish Balay   ierr = PetscLogFlops(20.0*a->nz - 10.0*nonzerorow);CHKERRQ(ierr);
17283649974fSBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
1729b9cda22cSBarry Smith   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
1730b9cda22cSBarry Smith   PetscFunctionReturn(0);
1731b9cda22cSBarry Smith }
1732b9cda22cSBarry Smith 
1733b9cda22cSBarry Smith #undef __FUNCT__
1734dbdd7285SBarry Smith #define __FUNCT__ "MatMultAdd_SeqMAIJ_10"
1735b9cda22cSBarry Smith PetscErrorCode MatMultAdd_SeqMAIJ_10(Mat A,Vec xx,Vec yy,Vec zz)
1736b9cda22cSBarry Smith {
1737b9cda22cSBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1738b9cda22cSBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1739d2840e09SBarry Smith   const PetscScalar *x,*v;
1740d2840e09SBarry Smith   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9, sum10;
1741b9cda22cSBarry Smith   PetscErrorCode    ierr;
1742d2840e09SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
1743b9cda22cSBarry Smith   PetscInt          n,i,jrow,j;
1744b9cda22cSBarry Smith 
1745b9cda22cSBarry Smith   PetscFunctionBegin;
1746b9cda22cSBarry Smith   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
17473649974fSBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
1748b9cda22cSBarry Smith   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
1749b9cda22cSBarry Smith   idx  = a->j;
1750b9cda22cSBarry Smith   v    = a->a;
1751b9cda22cSBarry Smith   ii   = a->i;
1752b9cda22cSBarry Smith 
1753b9cda22cSBarry Smith   for (i=0; i<m; i++) {
1754b9cda22cSBarry Smith     jrow = ii[i];
1755b9cda22cSBarry Smith     n    = ii[i+1] - jrow;
1756b9cda22cSBarry Smith     sum1  = 0.0;
1757b9cda22cSBarry Smith     sum2  = 0.0;
1758b9cda22cSBarry Smith     sum3  = 0.0;
1759b9cda22cSBarry Smith     sum4  = 0.0;
1760b9cda22cSBarry Smith     sum5  = 0.0;
1761b9cda22cSBarry Smith     sum6  = 0.0;
1762b9cda22cSBarry Smith     sum7  = 0.0;
1763b9cda22cSBarry Smith     sum8  = 0.0;
1764b9cda22cSBarry Smith     sum9  = 0.0;
1765b9cda22cSBarry Smith     sum10 = 0.0;
1766b9cda22cSBarry Smith     for (j=0; j<n; j++) {
1767b9cda22cSBarry Smith       sum1  += v[jrow]*x[10*idx[jrow]];
1768b9cda22cSBarry Smith       sum2  += v[jrow]*x[10*idx[jrow]+1];
1769b9cda22cSBarry Smith       sum3  += v[jrow]*x[10*idx[jrow]+2];
1770b9cda22cSBarry Smith       sum4  += v[jrow]*x[10*idx[jrow]+3];
1771b9cda22cSBarry Smith       sum5  += v[jrow]*x[10*idx[jrow]+4];
1772b9cda22cSBarry Smith       sum6  += v[jrow]*x[10*idx[jrow]+5];
1773b9cda22cSBarry Smith       sum7  += v[jrow]*x[10*idx[jrow]+6];
1774b9cda22cSBarry Smith       sum8  += v[jrow]*x[10*idx[jrow]+7];
1775b9cda22cSBarry Smith       sum9  += v[jrow]*x[10*idx[jrow]+8];
1776b9cda22cSBarry Smith       sum10 += v[jrow]*x[10*idx[jrow]+9];
1777b9cda22cSBarry Smith       jrow++;
1778b9cda22cSBarry Smith      }
1779b9cda22cSBarry Smith     y[10*i]   += sum1;
1780b9cda22cSBarry Smith     y[10*i+1] += sum2;
1781b9cda22cSBarry Smith     y[10*i+2] += sum3;
1782b9cda22cSBarry Smith     y[10*i+3] += sum4;
1783b9cda22cSBarry Smith     y[10*i+4] += sum5;
1784b9cda22cSBarry Smith     y[10*i+5] += sum6;
1785b9cda22cSBarry Smith     y[10*i+6] += sum7;
1786b9cda22cSBarry Smith     y[10*i+7] += sum8;
1787b9cda22cSBarry Smith     y[10*i+8] += sum9;
1788b9cda22cSBarry Smith     y[10*i+9] += sum10;
1789b9cda22cSBarry Smith   }
1790b9cda22cSBarry Smith 
1791dc0b31edSSatish Balay   ierr = PetscLogFlops(20.0*a->nz);CHKERRQ(ierr);
17923649974fSBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
1793b9cda22cSBarry Smith   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
1794b9cda22cSBarry Smith   PetscFunctionReturn(0);
1795b9cda22cSBarry Smith }
1796b9cda22cSBarry Smith 
1797b9cda22cSBarry Smith #undef __FUNCT__
1798b9cda22cSBarry Smith #define __FUNCT__ "MatMultTranspose_SeqMAIJ_10"
1799b9cda22cSBarry Smith PetscErrorCode MatMultTranspose_SeqMAIJ_10(Mat A,Vec xx,Vec yy)
1800b9cda22cSBarry Smith {
1801b9cda22cSBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1802b9cda22cSBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1803d2840e09SBarry Smith   const PetscScalar *x,*v;
1804d2840e09SBarry Smith   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9,alpha10;
1805b9cda22cSBarry Smith   PetscErrorCode    ierr;
1806d2840e09SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx;
1807d2840e09SBarry Smith   PetscInt          n,i;
1808b9cda22cSBarry Smith 
1809b9cda22cSBarry Smith   PetscFunctionBegin;
1810d2840e09SBarry Smith   ierr = VecSet(yy,0.0);CHKERRQ(ierr);
18113649974fSBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
1812b9cda22cSBarry Smith   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
1813b9cda22cSBarry Smith 
1814b9cda22cSBarry Smith   for (i=0; i<m; i++) {
1815b9cda22cSBarry Smith     idx    = a->j + a->i[i] ;
1816b9cda22cSBarry Smith     v      = a->a + a->i[i] ;
1817b9cda22cSBarry Smith     n      = a->i[i+1] - a->i[i];
1818e29fdc22SBarry Smith     alpha1 = x[10*i];
1819e29fdc22SBarry Smith     alpha2 = x[10*i+1];
1820e29fdc22SBarry Smith     alpha3 = x[10*i+2];
1821e29fdc22SBarry Smith     alpha4 = x[10*i+3];
1822e29fdc22SBarry Smith     alpha5 = x[10*i+4];
1823e29fdc22SBarry Smith     alpha6 = x[10*i+5];
1824e29fdc22SBarry Smith     alpha7 = x[10*i+6];
1825e29fdc22SBarry Smith     alpha8 = x[10*i+7];
1826e29fdc22SBarry Smith     alpha9 = x[10*i+8];
1827b9cda22cSBarry Smith     alpha10 = x[10*i+9];
1828b9cda22cSBarry Smith     while (n-->0) {
1829e29fdc22SBarry Smith       y[10*(*idx)]   += alpha1*(*v);
1830e29fdc22SBarry Smith       y[10*(*idx)+1] += alpha2*(*v);
1831e29fdc22SBarry Smith       y[10*(*idx)+2] += alpha3*(*v);
1832e29fdc22SBarry Smith       y[10*(*idx)+3] += alpha4*(*v);
1833e29fdc22SBarry Smith       y[10*(*idx)+4] += alpha5*(*v);
1834e29fdc22SBarry Smith       y[10*(*idx)+5] += alpha6*(*v);
1835e29fdc22SBarry Smith       y[10*(*idx)+6] += alpha7*(*v);
1836e29fdc22SBarry Smith       y[10*(*idx)+7] += alpha8*(*v);
1837e29fdc22SBarry Smith       y[10*(*idx)+8] += alpha9*(*v);
1838b9cda22cSBarry Smith       y[10*(*idx)+9] += alpha10*(*v);
1839b9cda22cSBarry Smith       idx++; v++;
1840b9cda22cSBarry Smith     }
1841b9cda22cSBarry Smith   }
1842dc0b31edSSatish Balay   ierr = PetscLogFlops(20.0*a->nz);CHKERRQ(ierr);
18433649974fSBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
1844b9cda22cSBarry Smith   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
1845b9cda22cSBarry Smith   PetscFunctionReturn(0);
1846b9cda22cSBarry Smith }
1847b9cda22cSBarry Smith 
1848b9cda22cSBarry Smith #undef __FUNCT__
1849b9cda22cSBarry Smith #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_10"
1850b9cda22cSBarry Smith PetscErrorCode MatMultTransposeAdd_SeqMAIJ_10(Mat A,Vec xx,Vec yy,Vec zz)
1851b9cda22cSBarry Smith {
1852b9cda22cSBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1853b9cda22cSBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1854d2840e09SBarry Smith   const PetscScalar *x,*v;
1855d2840e09SBarry Smith   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9,alpha10;
1856b9cda22cSBarry Smith   PetscErrorCode    ierr;
1857d2840e09SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx;
1858d2840e09SBarry Smith   PetscInt          n,i;
1859b9cda22cSBarry Smith 
1860b9cda22cSBarry Smith   PetscFunctionBegin;
1861b9cda22cSBarry Smith   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
18623649974fSBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
1863b9cda22cSBarry Smith   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
1864b9cda22cSBarry Smith   for (i=0; i<m; i++) {
1865b9cda22cSBarry Smith     idx    = a->j + a->i[i] ;
1866b9cda22cSBarry Smith     v      = a->a + a->i[i] ;
1867b9cda22cSBarry Smith     n      = a->i[i+1] - a->i[i];
1868b9cda22cSBarry Smith     alpha1 = x[10*i];
1869b9cda22cSBarry Smith     alpha2 = x[10*i+1];
1870b9cda22cSBarry Smith     alpha3 = x[10*i+2];
1871b9cda22cSBarry Smith     alpha4 = x[10*i+3];
1872b9cda22cSBarry Smith     alpha5 = x[10*i+4];
1873b9cda22cSBarry Smith     alpha6 = x[10*i+5];
1874b9cda22cSBarry Smith     alpha7 = x[10*i+6];
1875b9cda22cSBarry Smith     alpha8 = x[10*i+7];
1876b9cda22cSBarry Smith     alpha9 = x[10*i+8];
1877b9cda22cSBarry Smith     alpha10 = x[10*i+9];
1878b9cda22cSBarry Smith     while (n-->0) {
1879b9cda22cSBarry Smith       y[10*(*idx)]   += alpha1*(*v);
1880b9cda22cSBarry Smith       y[10*(*idx)+1] += alpha2*(*v);
1881b9cda22cSBarry Smith       y[10*(*idx)+2] += alpha3*(*v);
1882b9cda22cSBarry Smith       y[10*(*idx)+3] += alpha4*(*v);
1883b9cda22cSBarry Smith       y[10*(*idx)+4] += alpha5*(*v);
1884b9cda22cSBarry Smith       y[10*(*idx)+5] += alpha6*(*v);
1885b9cda22cSBarry Smith       y[10*(*idx)+6] += alpha7*(*v);
1886b9cda22cSBarry Smith       y[10*(*idx)+7] += alpha8*(*v);
1887b9cda22cSBarry Smith       y[10*(*idx)+8] += alpha9*(*v);
1888b9cda22cSBarry Smith       y[10*(*idx)+9] += alpha10*(*v);
1889b9cda22cSBarry Smith       idx++; v++;
1890b9cda22cSBarry Smith     }
1891b9cda22cSBarry Smith   }
1892dc0b31edSSatish Balay   ierr = PetscLogFlops(20.0*a->nz);CHKERRQ(ierr);
18933649974fSBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
1894b9cda22cSBarry Smith   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
1895b9cda22cSBarry Smith   PetscFunctionReturn(0);
1896b9cda22cSBarry Smith }
1897b9cda22cSBarry Smith 
18982b692628SMatthew Knepley 
18992f7816d4SBarry Smith /*--------------------------------------------------------------------------------------------*/
19002f7816d4SBarry Smith #undef __FUNCT__
1901dbdd7285SBarry Smith #define __FUNCT__ "MatMult_SeqMAIJ_11"
1902dbdd7285SBarry Smith PetscErrorCode MatMult_SeqMAIJ_11(Mat A,Vec xx,Vec yy)
1903dbdd7285SBarry Smith {
1904dbdd7285SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1905dbdd7285SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1906d2840e09SBarry Smith   const PetscScalar *x,*v;
1907d2840e09SBarry Smith   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9, sum10, sum11;
1908dbdd7285SBarry Smith   PetscErrorCode    ierr;
1909d2840e09SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
1910d2840e09SBarry Smith   PetscInt          nonzerorow=0,n,i,jrow,j;
1911dbdd7285SBarry Smith 
1912dbdd7285SBarry Smith   PetscFunctionBegin;
19133649974fSBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
1914dbdd7285SBarry Smith   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
1915dbdd7285SBarry Smith   idx  = a->j;
1916dbdd7285SBarry Smith   v    = a->a;
1917dbdd7285SBarry Smith   ii   = a->i;
1918dbdd7285SBarry Smith 
1919dbdd7285SBarry Smith   for (i=0; i<m; i++) {
1920dbdd7285SBarry Smith     jrow = ii[i];
1921dbdd7285SBarry Smith     n    = ii[i+1] - jrow;
1922dbdd7285SBarry Smith     sum1  = 0.0;
1923dbdd7285SBarry Smith     sum2  = 0.0;
1924dbdd7285SBarry Smith     sum3  = 0.0;
1925dbdd7285SBarry Smith     sum4  = 0.0;
1926dbdd7285SBarry Smith     sum5  = 0.0;
1927dbdd7285SBarry Smith     sum6  = 0.0;
1928dbdd7285SBarry Smith     sum7  = 0.0;
1929dbdd7285SBarry Smith     sum8  = 0.0;
1930dbdd7285SBarry Smith     sum9  = 0.0;
1931dbdd7285SBarry Smith     sum10 = 0.0;
1932dbdd7285SBarry Smith     sum11 = 0.0;
1933dbdd7285SBarry Smith     nonzerorow += (n>0);
1934dbdd7285SBarry Smith     for (j=0; j<n; j++) {
1935dbdd7285SBarry Smith       sum1  += v[jrow]*x[11*idx[jrow]];
1936dbdd7285SBarry Smith       sum2  += v[jrow]*x[11*idx[jrow]+1];
1937dbdd7285SBarry Smith       sum3  += v[jrow]*x[11*idx[jrow]+2];
1938dbdd7285SBarry Smith       sum4  += v[jrow]*x[11*idx[jrow]+3];
1939dbdd7285SBarry Smith       sum5  += v[jrow]*x[11*idx[jrow]+4];
1940dbdd7285SBarry Smith       sum6  += v[jrow]*x[11*idx[jrow]+5];
1941dbdd7285SBarry Smith       sum7  += v[jrow]*x[11*idx[jrow]+6];
1942dbdd7285SBarry Smith       sum8  += v[jrow]*x[11*idx[jrow]+7];
1943dbdd7285SBarry Smith       sum9  += v[jrow]*x[11*idx[jrow]+8];
1944dbdd7285SBarry Smith       sum10 += v[jrow]*x[11*idx[jrow]+9];
1945dbdd7285SBarry Smith       sum11 += v[jrow]*x[11*idx[jrow]+10];
1946dbdd7285SBarry Smith       jrow++;
1947dbdd7285SBarry Smith      }
1948dbdd7285SBarry Smith     y[11*i]   = sum1;
1949dbdd7285SBarry Smith     y[11*i+1] = sum2;
1950dbdd7285SBarry Smith     y[11*i+2] = sum3;
1951dbdd7285SBarry Smith     y[11*i+3] = sum4;
1952dbdd7285SBarry Smith     y[11*i+4] = sum5;
1953dbdd7285SBarry Smith     y[11*i+5] = sum6;
1954dbdd7285SBarry Smith     y[11*i+6] = sum7;
1955dbdd7285SBarry Smith     y[11*i+7] = sum8;
1956dbdd7285SBarry Smith     y[11*i+8] = sum9;
1957dbdd7285SBarry Smith     y[11*i+9] = sum10;
1958dbdd7285SBarry Smith     y[11*i+10] = sum11;
1959dbdd7285SBarry Smith   }
1960dbdd7285SBarry Smith 
1961dbdd7285SBarry Smith   ierr = PetscLogFlops(22*a->nz - 11*nonzerorow);CHKERRQ(ierr);
19623649974fSBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
1963dbdd7285SBarry Smith   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
1964dbdd7285SBarry Smith   PetscFunctionReturn(0);
1965dbdd7285SBarry Smith }
1966dbdd7285SBarry Smith 
1967dbdd7285SBarry Smith #undef __FUNCT__
1968dbdd7285SBarry Smith #define __FUNCT__ "MatMultAdd_SeqMAIJ_11"
1969dbdd7285SBarry Smith PetscErrorCode MatMultAdd_SeqMAIJ_11(Mat A,Vec xx,Vec yy,Vec zz)
1970dbdd7285SBarry Smith {
1971dbdd7285SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1972dbdd7285SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1973d2840e09SBarry Smith   const PetscScalar *x,*v;
1974d2840e09SBarry Smith   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9, sum10, sum11;
1975dbdd7285SBarry Smith   PetscErrorCode    ierr;
1976d2840e09SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
1977dbdd7285SBarry Smith   PetscInt          n,i,jrow,j;
1978dbdd7285SBarry Smith 
1979dbdd7285SBarry Smith   PetscFunctionBegin;
1980dbdd7285SBarry Smith   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
19813649974fSBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
1982dbdd7285SBarry Smith   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
1983dbdd7285SBarry Smith   idx  = a->j;
1984dbdd7285SBarry Smith   v    = a->a;
1985dbdd7285SBarry Smith   ii   = a->i;
1986dbdd7285SBarry Smith 
1987dbdd7285SBarry Smith   for (i=0; i<m; i++) {
1988dbdd7285SBarry Smith     jrow = ii[i];
1989dbdd7285SBarry Smith     n    = ii[i+1] - jrow;
1990dbdd7285SBarry Smith     sum1  = 0.0;
1991dbdd7285SBarry Smith     sum2  = 0.0;
1992dbdd7285SBarry Smith     sum3  = 0.0;
1993dbdd7285SBarry Smith     sum4  = 0.0;
1994dbdd7285SBarry Smith     sum5  = 0.0;
1995dbdd7285SBarry Smith     sum6  = 0.0;
1996dbdd7285SBarry Smith     sum7  = 0.0;
1997dbdd7285SBarry Smith     sum8  = 0.0;
1998dbdd7285SBarry Smith     sum9  = 0.0;
1999dbdd7285SBarry Smith     sum10 = 0.0;
2000dbdd7285SBarry Smith     sum11 = 0.0;
2001dbdd7285SBarry Smith     for (j=0; j<n; j++) {
2002dbdd7285SBarry Smith       sum1  += v[jrow]*x[11*idx[jrow]];
2003dbdd7285SBarry Smith       sum2  += v[jrow]*x[11*idx[jrow]+1];
2004dbdd7285SBarry Smith       sum3  += v[jrow]*x[11*idx[jrow]+2];
2005dbdd7285SBarry Smith       sum4  += v[jrow]*x[11*idx[jrow]+3];
2006dbdd7285SBarry Smith       sum5  += v[jrow]*x[11*idx[jrow]+4];
2007dbdd7285SBarry Smith       sum6  += v[jrow]*x[11*idx[jrow]+5];
2008dbdd7285SBarry Smith       sum7  += v[jrow]*x[11*idx[jrow]+6];
2009dbdd7285SBarry Smith       sum8  += v[jrow]*x[11*idx[jrow]+7];
2010dbdd7285SBarry Smith       sum9  += v[jrow]*x[11*idx[jrow]+8];
2011dbdd7285SBarry Smith       sum10 += v[jrow]*x[11*idx[jrow]+9];
2012dbdd7285SBarry Smith       sum11 += v[jrow]*x[11*idx[jrow]+10];
2013dbdd7285SBarry Smith       jrow++;
2014dbdd7285SBarry Smith      }
2015dbdd7285SBarry Smith     y[11*i]   += sum1;
2016dbdd7285SBarry Smith     y[11*i+1] += sum2;
2017dbdd7285SBarry Smith     y[11*i+2] += sum3;
2018dbdd7285SBarry Smith     y[11*i+3] += sum4;
2019dbdd7285SBarry Smith     y[11*i+4] += sum5;
2020dbdd7285SBarry Smith     y[11*i+5] += sum6;
2021dbdd7285SBarry Smith     y[11*i+6] += sum7;
2022dbdd7285SBarry Smith     y[11*i+7] += sum8;
2023dbdd7285SBarry Smith     y[11*i+8] += sum9;
2024dbdd7285SBarry Smith     y[11*i+9] += sum10;
2025dbdd7285SBarry Smith     y[11*i+10] += sum11;
2026dbdd7285SBarry Smith   }
2027dbdd7285SBarry Smith 
2028dbdd7285SBarry Smith   ierr = PetscLogFlops(22*a->nz);CHKERRQ(ierr);
20293649974fSBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
2030dbdd7285SBarry Smith   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
2031dbdd7285SBarry Smith   PetscFunctionReturn(0);
2032dbdd7285SBarry Smith }
2033dbdd7285SBarry Smith 
2034dbdd7285SBarry Smith #undef __FUNCT__
2035dbdd7285SBarry Smith #define __FUNCT__ "MatMultTranspose_SeqMAIJ_11"
2036dbdd7285SBarry Smith PetscErrorCode MatMultTranspose_SeqMAIJ_11(Mat A,Vec xx,Vec yy)
2037dbdd7285SBarry Smith {
2038dbdd7285SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
2039dbdd7285SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
2040d2840e09SBarry Smith   const PetscScalar *x,*v;
2041d2840e09SBarry Smith   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9,alpha10,alpha11;
2042dbdd7285SBarry Smith   PetscErrorCode    ierr;
2043d2840e09SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx;
2044d2840e09SBarry Smith   PetscInt          n,i;
2045dbdd7285SBarry Smith 
2046dbdd7285SBarry Smith   PetscFunctionBegin;
2047d2840e09SBarry Smith   ierr = VecSet(yy,0.0);CHKERRQ(ierr);
20483649974fSBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
2049dbdd7285SBarry Smith   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
2050dbdd7285SBarry Smith 
2051dbdd7285SBarry Smith   for (i=0; i<m; i++) {
2052dbdd7285SBarry Smith     idx    = a->j + a->i[i] ;
2053dbdd7285SBarry Smith     v      = a->a + a->i[i] ;
2054dbdd7285SBarry Smith     n      = a->i[i+1] - a->i[i];
2055dbdd7285SBarry Smith     alpha1 = x[11*i];
2056dbdd7285SBarry Smith     alpha2 = x[11*i+1];
2057dbdd7285SBarry Smith     alpha3 = x[11*i+2];
2058dbdd7285SBarry Smith     alpha4 = x[11*i+3];
2059dbdd7285SBarry Smith     alpha5 = x[11*i+4];
2060dbdd7285SBarry Smith     alpha6 = x[11*i+5];
2061dbdd7285SBarry Smith     alpha7 = x[11*i+6];
2062dbdd7285SBarry Smith     alpha8 = x[11*i+7];
2063dbdd7285SBarry Smith     alpha9 = x[11*i+8];
2064dbdd7285SBarry Smith     alpha10 = x[11*i+9];
2065dbdd7285SBarry Smith     alpha11 = x[11*i+10];
2066dbdd7285SBarry Smith     while (n-->0) {
2067dbdd7285SBarry Smith       y[11*(*idx)]   += alpha1*(*v);
2068dbdd7285SBarry Smith       y[11*(*idx)+1] += alpha2*(*v);
2069dbdd7285SBarry Smith       y[11*(*idx)+2] += alpha3*(*v);
2070dbdd7285SBarry Smith       y[11*(*idx)+3] += alpha4*(*v);
2071dbdd7285SBarry Smith       y[11*(*idx)+4] += alpha5*(*v);
2072dbdd7285SBarry Smith       y[11*(*idx)+5] += alpha6*(*v);
2073dbdd7285SBarry Smith       y[11*(*idx)+6] += alpha7*(*v);
2074dbdd7285SBarry Smith       y[11*(*idx)+7] += alpha8*(*v);
2075dbdd7285SBarry Smith       y[11*(*idx)+8] += alpha9*(*v);
2076dbdd7285SBarry Smith       y[11*(*idx)+9] += alpha10*(*v);
2077dbdd7285SBarry Smith       y[11*(*idx)+10] += alpha11*(*v);
2078dbdd7285SBarry Smith       idx++; v++;
2079dbdd7285SBarry Smith     }
2080dbdd7285SBarry Smith   }
2081dbdd7285SBarry Smith   ierr = PetscLogFlops(22*a->nz);CHKERRQ(ierr);
20823649974fSBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
2083dbdd7285SBarry Smith   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
2084dbdd7285SBarry Smith   PetscFunctionReturn(0);
2085dbdd7285SBarry Smith }
2086dbdd7285SBarry Smith 
2087dbdd7285SBarry Smith #undef __FUNCT__
2088dbdd7285SBarry Smith #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_11"
2089dbdd7285SBarry Smith PetscErrorCode MatMultTransposeAdd_SeqMAIJ_11(Mat A,Vec xx,Vec yy,Vec zz)
2090dbdd7285SBarry Smith {
2091dbdd7285SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
2092dbdd7285SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
2093d2840e09SBarry Smith   const PetscScalar *x,*v;
2094d2840e09SBarry Smith   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9,alpha10,alpha11;
2095dbdd7285SBarry Smith   PetscErrorCode    ierr;
2096d2840e09SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx;
2097d2840e09SBarry Smith   PetscInt          n,i;
2098dbdd7285SBarry Smith 
2099dbdd7285SBarry Smith   PetscFunctionBegin;
2100dbdd7285SBarry Smith   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
21013649974fSBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
2102dbdd7285SBarry Smith   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
2103dbdd7285SBarry Smith   for (i=0; i<m; i++) {
2104dbdd7285SBarry Smith     idx    = a->j + a->i[i] ;
2105dbdd7285SBarry Smith     v      = a->a + a->i[i] ;
2106dbdd7285SBarry Smith     n      = a->i[i+1] - a->i[i];
2107dbdd7285SBarry Smith     alpha1 = x[11*i];
2108dbdd7285SBarry Smith     alpha2 = x[11*i+1];
2109dbdd7285SBarry Smith     alpha3 = x[11*i+2];
2110dbdd7285SBarry Smith     alpha4 = x[11*i+3];
2111dbdd7285SBarry Smith     alpha5 = x[11*i+4];
2112dbdd7285SBarry Smith     alpha6 = x[11*i+5];
2113dbdd7285SBarry Smith     alpha7 = x[11*i+6];
2114dbdd7285SBarry Smith     alpha8 = x[11*i+7];
2115dbdd7285SBarry Smith     alpha9 = x[11*i+8];
2116dbdd7285SBarry Smith     alpha10 = x[11*i+9];
2117dbdd7285SBarry Smith     alpha11 = x[11*i+10];
2118dbdd7285SBarry Smith     while (n-->0) {
2119dbdd7285SBarry Smith       y[11*(*idx)]   += alpha1*(*v);
2120dbdd7285SBarry Smith       y[11*(*idx)+1] += alpha2*(*v);
2121dbdd7285SBarry Smith       y[11*(*idx)+2] += alpha3*(*v);
2122dbdd7285SBarry Smith       y[11*(*idx)+3] += alpha4*(*v);
2123dbdd7285SBarry Smith       y[11*(*idx)+4] += alpha5*(*v);
2124dbdd7285SBarry Smith       y[11*(*idx)+5] += alpha6*(*v);
2125dbdd7285SBarry Smith       y[11*(*idx)+6] += alpha7*(*v);
2126dbdd7285SBarry Smith       y[11*(*idx)+7] += alpha8*(*v);
2127dbdd7285SBarry Smith       y[11*(*idx)+8] += alpha9*(*v);
2128dbdd7285SBarry Smith       y[11*(*idx)+9] += alpha10*(*v);
2129dbdd7285SBarry Smith       y[11*(*idx)+10] += alpha11*(*v);
2130dbdd7285SBarry Smith       idx++; v++;
2131dbdd7285SBarry Smith     }
2132dbdd7285SBarry Smith   }
2133dbdd7285SBarry Smith   ierr = PetscLogFlops(22*a->nz);CHKERRQ(ierr);
21343649974fSBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
2135dbdd7285SBarry Smith   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
2136dbdd7285SBarry Smith   PetscFunctionReturn(0);
2137dbdd7285SBarry Smith }
2138dbdd7285SBarry Smith 
2139dbdd7285SBarry Smith 
2140dbdd7285SBarry Smith /*--------------------------------------------------------------------------------------------*/
2141dbdd7285SBarry Smith #undef __FUNCT__
21422f7816d4SBarry Smith #define __FUNCT__ "MatMult_SeqMAIJ_16"
2143dfbe8321SBarry Smith PetscErrorCode MatMult_SeqMAIJ_16(Mat A,Vec xx,Vec yy)
21442f7816d4SBarry Smith {
21452f7816d4SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
21462f7816d4SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
2147d2840e09SBarry Smith   const PetscScalar *x,*v;
2148d2840e09SBarry Smith   PetscScalar        *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8;
21492f7816d4SBarry Smith   PetscScalar        sum9, sum10, sum11, sum12, sum13, sum14, sum15, sum16;
2150dfbe8321SBarry Smith   PetscErrorCode     ierr;
2151d2840e09SBarry Smith   const PetscInt     m = b->AIJ->rmap->n,*idx,*ii;
2152d2840e09SBarry Smith   PetscInt           nonzerorow=0,n,i,jrow,j;
21532f7816d4SBarry Smith 
21542f7816d4SBarry Smith   PetscFunctionBegin;
21553649974fSBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
21561ebc52fbSHong Zhang   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
21572f7816d4SBarry Smith   idx  = a->j;
21582f7816d4SBarry Smith   v    = a->a;
21592f7816d4SBarry Smith   ii   = a->i;
21602f7816d4SBarry Smith 
21612f7816d4SBarry Smith   for (i=0; i<m; i++) {
21622f7816d4SBarry Smith     jrow = ii[i];
21632f7816d4SBarry Smith     n    = ii[i+1] - jrow;
21642f7816d4SBarry Smith     sum1  = 0.0;
21652f7816d4SBarry Smith     sum2  = 0.0;
21662f7816d4SBarry Smith     sum3  = 0.0;
21672f7816d4SBarry Smith     sum4  = 0.0;
21682f7816d4SBarry Smith     sum5  = 0.0;
21692f7816d4SBarry Smith     sum6  = 0.0;
21702f7816d4SBarry Smith     sum7  = 0.0;
21712f7816d4SBarry Smith     sum8  = 0.0;
21722f7816d4SBarry Smith     sum9  = 0.0;
21732f7816d4SBarry Smith     sum10 = 0.0;
21742f7816d4SBarry Smith     sum11 = 0.0;
21752f7816d4SBarry Smith     sum12 = 0.0;
21762f7816d4SBarry Smith     sum13 = 0.0;
21772f7816d4SBarry Smith     sum14 = 0.0;
21782f7816d4SBarry Smith     sum15 = 0.0;
21792f7816d4SBarry Smith     sum16 = 0.0;
2180b3c51c66SMatthew Knepley     nonzerorow += (n>0);
21812f7816d4SBarry Smith     for (j=0; j<n; j++) {
21822f7816d4SBarry Smith       sum1  += v[jrow]*x[16*idx[jrow]];
21832f7816d4SBarry Smith       sum2  += v[jrow]*x[16*idx[jrow]+1];
21842f7816d4SBarry Smith       sum3  += v[jrow]*x[16*idx[jrow]+2];
21852f7816d4SBarry Smith       sum4  += v[jrow]*x[16*idx[jrow]+3];
21862f7816d4SBarry Smith       sum5  += v[jrow]*x[16*idx[jrow]+4];
21872f7816d4SBarry Smith       sum6  += v[jrow]*x[16*idx[jrow]+5];
21882f7816d4SBarry Smith       sum7  += v[jrow]*x[16*idx[jrow]+6];
21892f7816d4SBarry Smith       sum8  += v[jrow]*x[16*idx[jrow]+7];
21902f7816d4SBarry Smith       sum9  += v[jrow]*x[16*idx[jrow]+8];
21912f7816d4SBarry Smith       sum10 += v[jrow]*x[16*idx[jrow]+9];
21922f7816d4SBarry Smith       sum11 += v[jrow]*x[16*idx[jrow]+10];
21932f7816d4SBarry Smith       sum12 += v[jrow]*x[16*idx[jrow]+11];
21942f7816d4SBarry Smith       sum13 += v[jrow]*x[16*idx[jrow]+12];
21952f7816d4SBarry Smith       sum14 += v[jrow]*x[16*idx[jrow]+13];
21962f7816d4SBarry Smith       sum15 += v[jrow]*x[16*idx[jrow]+14];
21972f7816d4SBarry Smith       sum16 += v[jrow]*x[16*idx[jrow]+15];
21982f7816d4SBarry Smith       jrow++;
21992f7816d4SBarry Smith      }
22002f7816d4SBarry Smith     y[16*i]    = sum1;
22012f7816d4SBarry Smith     y[16*i+1]  = sum2;
22022f7816d4SBarry Smith     y[16*i+2]  = sum3;
22032f7816d4SBarry Smith     y[16*i+3]  = sum4;
22042f7816d4SBarry Smith     y[16*i+4]  = sum5;
22052f7816d4SBarry Smith     y[16*i+5]  = sum6;
22062f7816d4SBarry Smith     y[16*i+6]  = sum7;
22072f7816d4SBarry Smith     y[16*i+7]  = sum8;
22082f7816d4SBarry Smith     y[16*i+8]  = sum9;
22092f7816d4SBarry Smith     y[16*i+9]  = sum10;
22102f7816d4SBarry Smith     y[16*i+10] = sum11;
22112f7816d4SBarry Smith     y[16*i+11] = sum12;
22122f7816d4SBarry Smith     y[16*i+12] = sum13;
22132f7816d4SBarry Smith     y[16*i+13] = sum14;
22142f7816d4SBarry Smith     y[16*i+14] = sum15;
22152f7816d4SBarry Smith     y[16*i+15] = sum16;
22162f7816d4SBarry Smith   }
22172f7816d4SBarry Smith 
2218dc0b31edSSatish Balay   ierr = PetscLogFlops(32.0*a->nz - 16.0*nonzerorow);CHKERRQ(ierr);
22193649974fSBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
22201ebc52fbSHong Zhang   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
22212f7816d4SBarry Smith   PetscFunctionReturn(0);
22222f7816d4SBarry Smith }
22232f7816d4SBarry Smith 
22242f7816d4SBarry Smith #undef __FUNCT__
22252f7816d4SBarry Smith #define __FUNCT__ "MatMultTranspose_SeqMAIJ_16"
2226dfbe8321SBarry Smith PetscErrorCode MatMultTranspose_SeqMAIJ_16(Mat A,Vec xx,Vec yy)
22272f7816d4SBarry Smith {
22282f7816d4SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
22292f7816d4SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
2230d2840e09SBarry Smith   const PetscScalar *x,*v;
2231d2840e09SBarry Smith   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8;
22322f7816d4SBarry Smith   PetscScalar       alpha9,alpha10,alpha11,alpha12,alpha13,alpha14,alpha15,alpha16;
2233dfbe8321SBarry Smith   PetscErrorCode    ierr;
2234d2840e09SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx;
2235d2840e09SBarry Smith   PetscInt          n,i;
22362f7816d4SBarry Smith 
22372f7816d4SBarry Smith   PetscFunctionBegin;
2238d2840e09SBarry Smith   ierr = VecSet(yy,0.0);CHKERRQ(ierr);
22393649974fSBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
22401ebc52fbSHong Zhang   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
2241bfec09a0SHong Zhang 
22422f7816d4SBarry Smith   for (i=0; i<m; i++) {
2243bfec09a0SHong Zhang     idx    = a->j + a->i[i] ;
2244bfec09a0SHong Zhang     v      = a->a + a->i[i] ;
22452f7816d4SBarry Smith     n      = a->i[i+1] - a->i[i];
22462f7816d4SBarry Smith     alpha1  = x[16*i];
22472f7816d4SBarry Smith     alpha2  = x[16*i+1];
22482f7816d4SBarry Smith     alpha3  = x[16*i+2];
22492f7816d4SBarry Smith     alpha4  = x[16*i+3];
22502f7816d4SBarry Smith     alpha5  = x[16*i+4];
22512f7816d4SBarry Smith     alpha6  = x[16*i+5];
22522f7816d4SBarry Smith     alpha7  = x[16*i+6];
22532f7816d4SBarry Smith     alpha8  = x[16*i+7];
22542f7816d4SBarry Smith     alpha9  = x[16*i+8];
22552f7816d4SBarry Smith     alpha10 = x[16*i+9];
22562f7816d4SBarry Smith     alpha11 = x[16*i+10];
22572f7816d4SBarry Smith     alpha12 = x[16*i+11];
22582f7816d4SBarry Smith     alpha13 = x[16*i+12];
22592f7816d4SBarry Smith     alpha14 = x[16*i+13];
22602f7816d4SBarry Smith     alpha15 = x[16*i+14];
22612f7816d4SBarry Smith     alpha16 = x[16*i+15];
22622f7816d4SBarry Smith     while (n-->0) {
22632f7816d4SBarry Smith       y[16*(*idx)]    += alpha1*(*v);
22642f7816d4SBarry Smith       y[16*(*idx)+1]  += alpha2*(*v);
22652f7816d4SBarry Smith       y[16*(*idx)+2]  += alpha3*(*v);
22662f7816d4SBarry Smith       y[16*(*idx)+3]  += alpha4*(*v);
22672f7816d4SBarry Smith       y[16*(*idx)+4]  += alpha5*(*v);
22682f7816d4SBarry Smith       y[16*(*idx)+5]  += alpha6*(*v);
22692f7816d4SBarry Smith       y[16*(*idx)+6]  += alpha7*(*v);
22702f7816d4SBarry Smith       y[16*(*idx)+7]  += alpha8*(*v);
22712f7816d4SBarry Smith       y[16*(*idx)+8]  += alpha9*(*v);
22722f7816d4SBarry Smith       y[16*(*idx)+9]  += alpha10*(*v);
22732f7816d4SBarry Smith       y[16*(*idx)+10] += alpha11*(*v);
22742f7816d4SBarry Smith       y[16*(*idx)+11] += alpha12*(*v);
22752f7816d4SBarry Smith       y[16*(*idx)+12] += alpha13*(*v);
22762f7816d4SBarry Smith       y[16*(*idx)+13] += alpha14*(*v);
22772f7816d4SBarry Smith       y[16*(*idx)+14] += alpha15*(*v);
22782f7816d4SBarry Smith       y[16*(*idx)+15] += alpha16*(*v);
22792f7816d4SBarry Smith       idx++; v++;
22802f7816d4SBarry Smith     }
22812f7816d4SBarry Smith   }
2282dc0b31edSSatish Balay   ierr = PetscLogFlops(32.0*a->nz);CHKERRQ(ierr);
22833649974fSBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
22841ebc52fbSHong Zhang   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
22852f7816d4SBarry Smith   PetscFunctionReturn(0);
22862f7816d4SBarry Smith }
22872f7816d4SBarry Smith 
22882f7816d4SBarry Smith #undef __FUNCT__
22892f7816d4SBarry Smith #define __FUNCT__ "MatMultAdd_SeqMAIJ_16"
2290dfbe8321SBarry Smith PetscErrorCode MatMultAdd_SeqMAIJ_16(Mat A,Vec xx,Vec yy,Vec zz)
22912f7816d4SBarry Smith {
22922f7816d4SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
22932f7816d4SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
2294d2840e09SBarry Smith   const PetscScalar *x,*v;
2295d2840e09SBarry Smith   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8;
22962f7816d4SBarry Smith   PetscScalar       sum9, sum10, sum11, sum12, sum13, sum14, sum15, sum16;
2297dfbe8321SBarry Smith   PetscErrorCode    ierr;
2298d2840e09SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
2299b24ad042SBarry Smith   PetscInt          n,i,jrow,j;
23002f7816d4SBarry Smith 
23012f7816d4SBarry Smith   PetscFunctionBegin;
23022f7816d4SBarry Smith   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
23033649974fSBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
23041ebc52fbSHong Zhang   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
23052f7816d4SBarry Smith   idx  = a->j;
23062f7816d4SBarry Smith   v    = a->a;
23072f7816d4SBarry Smith   ii   = a->i;
23082f7816d4SBarry Smith 
23092f7816d4SBarry Smith   for (i=0; i<m; i++) {
23102f7816d4SBarry Smith     jrow = ii[i];
23112f7816d4SBarry Smith     n    = ii[i+1] - jrow;
23122f7816d4SBarry Smith     sum1  = 0.0;
23132f7816d4SBarry Smith     sum2  = 0.0;
23142f7816d4SBarry Smith     sum3  = 0.0;
23152f7816d4SBarry Smith     sum4  = 0.0;
23162f7816d4SBarry Smith     sum5  = 0.0;
23172f7816d4SBarry Smith     sum6  = 0.0;
23182f7816d4SBarry Smith     sum7  = 0.0;
23192f7816d4SBarry Smith     sum8  = 0.0;
23202f7816d4SBarry Smith     sum9  = 0.0;
23212f7816d4SBarry Smith     sum10 = 0.0;
23222f7816d4SBarry Smith     sum11 = 0.0;
23232f7816d4SBarry Smith     sum12 = 0.0;
23242f7816d4SBarry Smith     sum13 = 0.0;
23252f7816d4SBarry Smith     sum14 = 0.0;
23262f7816d4SBarry Smith     sum15 = 0.0;
23272f7816d4SBarry Smith     sum16 = 0.0;
23282f7816d4SBarry Smith     for (j=0; j<n; j++) {
23292f7816d4SBarry Smith       sum1  += v[jrow]*x[16*idx[jrow]];
23302f7816d4SBarry Smith       sum2  += v[jrow]*x[16*idx[jrow]+1];
23312f7816d4SBarry Smith       sum3  += v[jrow]*x[16*idx[jrow]+2];
23322f7816d4SBarry Smith       sum4  += v[jrow]*x[16*idx[jrow]+3];
23332f7816d4SBarry Smith       sum5  += v[jrow]*x[16*idx[jrow]+4];
23342f7816d4SBarry Smith       sum6  += v[jrow]*x[16*idx[jrow]+5];
23352f7816d4SBarry Smith       sum7  += v[jrow]*x[16*idx[jrow]+6];
23362f7816d4SBarry Smith       sum8  += v[jrow]*x[16*idx[jrow]+7];
23372f7816d4SBarry Smith       sum9  += v[jrow]*x[16*idx[jrow]+8];
23382f7816d4SBarry Smith       sum10 += v[jrow]*x[16*idx[jrow]+9];
23392f7816d4SBarry Smith       sum11 += v[jrow]*x[16*idx[jrow]+10];
23402f7816d4SBarry Smith       sum12 += v[jrow]*x[16*idx[jrow]+11];
23412f7816d4SBarry Smith       sum13 += v[jrow]*x[16*idx[jrow]+12];
23422f7816d4SBarry Smith       sum14 += v[jrow]*x[16*idx[jrow]+13];
23432f7816d4SBarry Smith       sum15 += v[jrow]*x[16*idx[jrow]+14];
23442f7816d4SBarry Smith       sum16 += v[jrow]*x[16*idx[jrow]+15];
23452f7816d4SBarry Smith       jrow++;
23462f7816d4SBarry Smith      }
23472f7816d4SBarry Smith     y[16*i]    += sum1;
23482f7816d4SBarry Smith     y[16*i+1]  += sum2;
23492f7816d4SBarry Smith     y[16*i+2]  += sum3;
23502f7816d4SBarry Smith     y[16*i+3]  += sum4;
23512f7816d4SBarry Smith     y[16*i+4]  += sum5;
23522f7816d4SBarry Smith     y[16*i+5]  += sum6;
23532f7816d4SBarry Smith     y[16*i+6]  += sum7;
23542f7816d4SBarry Smith     y[16*i+7]  += sum8;
23552f7816d4SBarry Smith     y[16*i+8]  += sum9;
23562f7816d4SBarry Smith     y[16*i+9]  += sum10;
23572f7816d4SBarry Smith     y[16*i+10] += sum11;
23582f7816d4SBarry Smith     y[16*i+11] += sum12;
23592f7816d4SBarry Smith     y[16*i+12] += sum13;
23602f7816d4SBarry Smith     y[16*i+13] += sum14;
23612f7816d4SBarry Smith     y[16*i+14] += sum15;
23622f7816d4SBarry Smith     y[16*i+15] += sum16;
23632f7816d4SBarry Smith   }
23642f7816d4SBarry Smith 
2365dc0b31edSSatish Balay   ierr = PetscLogFlops(32.0*a->nz);CHKERRQ(ierr);
23663649974fSBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
23671ebc52fbSHong Zhang   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
23682f7816d4SBarry Smith   PetscFunctionReturn(0);
23692f7816d4SBarry Smith }
23702f7816d4SBarry Smith 
23712f7816d4SBarry Smith #undef __FUNCT__
23722f7816d4SBarry Smith #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_16"
2373dfbe8321SBarry Smith PetscErrorCode MatMultTransposeAdd_SeqMAIJ_16(Mat A,Vec xx,Vec yy,Vec zz)
23742f7816d4SBarry Smith {
23752f7816d4SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
23762f7816d4SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
2377d2840e09SBarry Smith   const PetscScalar *x,*v;
2378d2840e09SBarry Smith   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8;
23792f7816d4SBarry Smith   PetscScalar       alpha9,alpha10,alpha11,alpha12,alpha13,alpha14,alpha15,alpha16;
2380dfbe8321SBarry Smith   PetscErrorCode    ierr;
2381d2840e09SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx;
2382d2840e09SBarry Smith   PetscInt          n,i;
23832f7816d4SBarry Smith 
23842f7816d4SBarry Smith   PetscFunctionBegin;
23852f7816d4SBarry Smith   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
23863649974fSBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
23871ebc52fbSHong Zhang   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
23882f7816d4SBarry Smith   for (i=0; i<m; i++) {
2389bfec09a0SHong Zhang     idx    = a->j + a->i[i] ;
2390bfec09a0SHong Zhang     v      = a->a + a->i[i] ;
23912f7816d4SBarry Smith     n      = a->i[i+1] - a->i[i];
23922f7816d4SBarry Smith     alpha1 = x[16*i];
23932f7816d4SBarry Smith     alpha2 = x[16*i+1];
23942f7816d4SBarry Smith     alpha3 = x[16*i+2];
23952f7816d4SBarry Smith     alpha4 = x[16*i+3];
23962f7816d4SBarry Smith     alpha5 = x[16*i+4];
23972f7816d4SBarry Smith     alpha6 = x[16*i+5];
23982f7816d4SBarry Smith     alpha7 = x[16*i+6];
23992f7816d4SBarry Smith     alpha8 = x[16*i+7];
24002f7816d4SBarry Smith     alpha9  = x[16*i+8];
24012f7816d4SBarry Smith     alpha10 = x[16*i+9];
24022f7816d4SBarry Smith     alpha11 = x[16*i+10];
24032f7816d4SBarry Smith     alpha12 = x[16*i+11];
24042f7816d4SBarry Smith     alpha13 = x[16*i+12];
24052f7816d4SBarry Smith     alpha14 = x[16*i+13];
24062f7816d4SBarry Smith     alpha15 = x[16*i+14];
24072f7816d4SBarry Smith     alpha16 = x[16*i+15];
24082f7816d4SBarry Smith     while (n-->0) {
24092f7816d4SBarry Smith       y[16*(*idx)]   += alpha1*(*v);
24102f7816d4SBarry Smith       y[16*(*idx)+1] += alpha2*(*v);
24112f7816d4SBarry Smith       y[16*(*idx)+2] += alpha3*(*v);
24122f7816d4SBarry Smith       y[16*(*idx)+3] += alpha4*(*v);
24132f7816d4SBarry Smith       y[16*(*idx)+4] += alpha5*(*v);
24142f7816d4SBarry Smith       y[16*(*idx)+5] += alpha6*(*v);
24152f7816d4SBarry Smith       y[16*(*idx)+6] += alpha7*(*v);
24162f7816d4SBarry Smith       y[16*(*idx)+7] += alpha8*(*v);
24172f7816d4SBarry Smith       y[16*(*idx)+8]  += alpha9*(*v);
24182f7816d4SBarry Smith       y[16*(*idx)+9]  += alpha10*(*v);
24192f7816d4SBarry Smith       y[16*(*idx)+10] += alpha11*(*v);
24202f7816d4SBarry Smith       y[16*(*idx)+11] += alpha12*(*v);
24212f7816d4SBarry Smith       y[16*(*idx)+12] += alpha13*(*v);
24222f7816d4SBarry Smith       y[16*(*idx)+13] += alpha14*(*v);
24232f7816d4SBarry Smith       y[16*(*idx)+14] += alpha15*(*v);
24242f7816d4SBarry Smith       y[16*(*idx)+15] += alpha16*(*v);
24252f7816d4SBarry Smith       idx++; v++;
24262f7816d4SBarry Smith     }
24272f7816d4SBarry Smith   }
2428dc0b31edSSatish Balay   ierr = PetscLogFlops(32.0*a->nz);CHKERRQ(ierr);
24293649974fSBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
24301ebc52fbSHong Zhang   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
243166ed3db0SBarry Smith   PetscFunctionReturn(0);
243266ed3db0SBarry Smith }
243366ed3db0SBarry Smith 
2434ed1c418cSBarry Smith /*--------------------------------------------------------------------------------------------*/
2435ed1c418cSBarry Smith #undef __FUNCT__
2436ed1c418cSBarry Smith #define __FUNCT__ "MatMult_SeqMAIJ_18"
2437ed1c418cSBarry Smith PetscErrorCode MatMult_SeqMAIJ_18(Mat A,Vec xx,Vec yy)
2438ed1c418cSBarry Smith {
2439ed1c418cSBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
2440ed1c418cSBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
2441d2840e09SBarry Smith   const PetscScalar *x,*v;
2442d2840e09SBarry Smith   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8;
2443ed1c418cSBarry Smith   PetscScalar       sum9, sum10, sum11, sum12, sum13, sum14, sum15, sum16, sum17, sum18;
2444ed1c418cSBarry Smith   PetscErrorCode    ierr;
2445d2840e09SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
2446d2840e09SBarry Smith   PetscInt          nonzerorow=0,n,i,jrow,j;
2447ed1c418cSBarry Smith 
2448ed1c418cSBarry Smith   PetscFunctionBegin;
24493649974fSBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
2450ed1c418cSBarry Smith   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
2451ed1c418cSBarry Smith   idx  = a->j;
2452ed1c418cSBarry Smith   v    = a->a;
2453ed1c418cSBarry Smith   ii   = a->i;
2454ed1c418cSBarry Smith 
2455ed1c418cSBarry Smith   for (i=0; i<m; i++) {
2456ed1c418cSBarry Smith     jrow = ii[i];
2457ed1c418cSBarry Smith     n    = ii[i+1] - jrow;
2458ed1c418cSBarry Smith     sum1  = 0.0;
2459ed1c418cSBarry Smith     sum2  = 0.0;
2460ed1c418cSBarry Smith     sum3  = 0.0;
2461ed1c418cSBarry Smith     sum4  = 0.0;
2462ed1c418cSBarry Smith     sum5  = 0.0;
2463ed1c418cSBarry Smith     sum6  = 0.0;
2464ed1c418cSBarry Smith     sum7  = 0.0;
2465ed1c418cSBarry Smith     sum8  = 0.0;
2466ed1c418cSBarry Smith     sum9  = 0.0;
2467ed1c418cSBarry Smith     sum10 = 0.0;
2468ed1c418cSBarry Smith     sum11 = 0.0;
2469ed1c418cSBarry Smith     sum12 = 0.0;
2470ed1c418cSBarry Smith     sum13 = 0.0;
2471ed1c418cSBarry Smith     sum14 = 0.0;
2472ed1c418cSBarry Smith     sum15 = 0.0;
2473ed1c418cSBarry Smith     sum16 = 0.0;
2474ed1c418cSBarry Smith     sum17 = 0.0;
2475ed1c418cSBarry Smith     sum18 = 0.0;
2476ed1c418cSBarry Smith     nonzerorow += (n>0);
2477ed1c418cSBarry Smith     for (j=0; j<n; j++) {
2478ed1c418cSBarry Smith       sum1  += v[jrow]*x[18*idx[jrow]];
2479ed1c418cSBarry Smith       sum2  += v[jrow]*x[18*idx[jrow]+1];
2480ed1c418cSBarry Smith       sum3  += v[jrow]*x[18*idx[jrow]+2];
2481ed1c418cSBarry Smith       sum4  += v[jrow]*x[18*idx[jrow]+3];
2482ed1c418cSBarry Smith       sum5  += v[jrow]*x[18*idx[jrow]+4];
2483ed1c418cSBarry Smith       sum6  += v[jrow]*x[18*idx[jrow]+5];
2484ed1c418cSBarry Smith       sum7  += v[jrow]*x[18*idx[jrow]+6];
2485ed1c418cSBarry Smith       sum8  += v[jrow]*x[18*idx[jrow]+7];
2486ed1c418cSBarry Smith       sum9  += v[jrow]*x[18*idx[jrow]+8];
2487ed1c418cSBarry Smith       sum10 += v[jrow]*x[18*idx[jrow]+9];
2488ed1c418cSBarry Smith       sum11 += v[jrow]*x[18*idx[jrow]+10];
2489ed1c418cSBarry Smith       sum12 += v[jrow]*x[18*idx[jrow]+11];
2490ed1c418cSBarry Smith       sum13 += v[jrow]*x[18*idx[jrow]+12];
2491ed1c418cSBarry Smith       sum14 += v[jrow]*x[18*idx[jrow]+13];
2492ed1c418cSBarry Smith       sum15 += v[jrow]*x[18*idx[jrow]+14];
2493ed1c418cSBarry Smith       sum16 += v[jrow]*x[18*idx[jrow]+15];
2494ed1c418cSBarry Smith       sum17 += v[jrow]*x[18*idx[jrow]+16];
2495ed1c418cSBarry Smith       sum18 += v[jrow]*x[18*idx[jrow]+17];
2496ed1c418cSBarry Smith       jrow++;
2497ed1c418cSBarry Smith      }
2498ed1c418cSBarry Smith     y[18*i]    = sum1;
2499ed1c418cSBarry Smith     y[18*i+1]  = sum2;
2500ed1c418cSBarry Smith     y[18*i+2]  = sum3;
2501ed1c418cSBarry Smith     y[18*i+3]  = sum4;
2502ed1c418cSBarry Smith     y[18*i+4]  = sum5;
2503ed1c418cSBarry Smith     y[18*i+5]  = sum6;
2504ed1c418cSBarry Smith     y[18*i+6]  = sum7;
2505ed1c418cSBarry Smith     y[18*i+7]  = sum8;
2506ed1c418cSBarry Smith     y[18*i+8]  = sum9;
2507ed1c418cSBarry Smith     y[18*i+9]  = sum10;
2508ed1c418cSBarry Smith     y[18*i+10] = sum11;
2509ed1c418cSBarry Smith     y[18*i+11] = sum12;
2510ed1c418cSBarry Smith     y[18*i+12] = sum13;
2511ed1c418cSBarry Smith     y[18*i+13] = sum14;
2512ed1c418cSBarry Smith     y[18*i+14] = sum15;
2513ed1c418cSBarry Smith     y[18*i+15] = sum16;
2514ed1c418cSBarry Smith     y[18*i+16] = sum17;
2515ed1c418cSBarry Smith     y[18*i+17] = sum18;
2516ed1c418cSBarry Smith   }
2517ed1c418cSBarry Smith 
2518dc0b31edSSatish Balay   ierr = PetscLogFlops(36.0*a->nz - 18.0*nonzerorow);CHKERRQ(ierr);
25193649974fSBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
2520ed1c418cSBarry Smith   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
2521ed1c418cSBarry Smith   PetscFunctionReturn(0);
2522ed1c418cSBarry Smith }
2523ed1c418cSBarry Smith 
2524ed1c418cSBarry Smith #undef __FUNCT__
2525ed1c418cSBarry Smith #define __FUNCT__ "MatMultTranspose_SeqMAIJ_18"
2526ed1c418cSBarry Smith PetscErrorCode MatMultTranspose_SeqMAIJ_18(Mat A,Vec xx,Vec yy)
2527ed1c418cSBarry Smith {
2528ed1c418cSBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
2529ed1c418cSBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
2530d2840e09SBarry Smith   const PetscScalar *x,*v;
2531d2840e09SBarry Smith   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8;
2532ed1c418cSBarry Smith   PetscScalar       alpha9,alpha10,alpha11,alpha12,alpha13,alpha14,alpha15,alpha16,alpha17,alpha18;
2533ed1c418cSBarry Smith   PetscErrorCode    ierr;
2534d2840e09SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx;
2535d2840e09SBarry Smith   PetscInt          n,i;
2536ed1c418cSBarry Smith 
2537ed1c418cSBarry Smith   PetscFunctionBegin;
2538d2840e09SBarry Smith   ierr = VecSet(yy,0.0);CHKERRQ(ierr);
25393649974fSBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
2540ed1c418cSBarry Smith   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
2541ed1c418cSBarry Smith 
2542ed1c418cSBarry Smith   for (i=0; i<m; i++) {
2543ed1c418cSBarry Smith     idx    = a->j + a->i[i] ;
2544ed1c418cSBarry Smith     v      = a->a + a->i[i] ;
2545ed1c418cSBarry Smith     n      = a->i[i+1] - a->i[i];
2546ed1c418cSBarry Smith     alpha1  = x[18*i];
2547ed1c418cSBarry Smith     alpha2  = x[18*i+1];
2548ed1c418cSBarry Smith     alpha3  = x[18*i+2];
2549ed1c418cSBarry Smith     alpha4  = x[18*i+3];
2550ed1c418cSBarry Smith     alpha5  = x[18*i+4];
2551ed1c418cSBarry Smith     alpha6  = x[18*i+5];
2552ed1c418cSBarry Smith     alpha7  = x[18*i+6];
2553ed1c418cSBarry Smith     alpha8  = x[18*i+7];
2554ed1c418cSBarry Smith     alpha9  = x[18*i+8];
2555ed1c418cSBarry Smith     alpha10 = x[18*i+9];
2556ed1c418cSBarry Smith     alpha11 = x[18*i+10];
2557ed1c418cSBarry Smith     alpha12 = x[18*i+11];
2558ed1c418cSBarry Smith     alpha13 = x[18*i+12];
2559ed1c418cSBarry Smith     alpha14 = x[18*i+13];
2560ed1c418cSBarry Smith     alpha15 = x[18*i+14];
2561ed1c418cSBarry Smith     alpha16 = x[18*i+15];
2562ed1c418cSBarry Smith     alpha17 = x[18*i+16];
2563ed1c418cSBarry Smith     alpha18 = x[18*i+17];
2564ed1c418cSBarry Smith     while (n-->0) {
2565ed1c418cSBarry Smith       y[18*(*idx)]    += alpha1*(*v);
2566ed1c418cSBarry Smith       y[18*(*idx)+1]  += alpha2*(*v);
2567ed1c418cSBarry Smith       y[18*(*idx)+2]  += alpha3*(*v);
2568ed1c418cSBarry Smith       y[18*(*idx)+3]  += alpha4*(*v);
2569ed1c418cSBarry Smith       y[18*(*idx)+4]  += alpha5*(*v);
2570ed1c418cSBarry Smith       y[18*(*idx)+5]  += alpha6*(*v);
2571ed1c418cSBarry Smith       y[18*(*idx)+6]  += alpha7*(*v);
2572ed1c418cSBarry Smith       y[18*(*idx)+7]  += alpha8*(*v);
2573ed1c418cSBarry Smith       y[18*(*idx)+8]  += alpha9*(*v);
2574ed1c418cSBarry Smith       y[18*(*idx)+9]  += alpha10*(*v);
2575ed1c418cSBarry Smith       y[18*(*idx)+10] += alpha11*(*v);
2576ed1c418cSBarry Smith       y[18*(*idx)+11] += alpha12*(*v);
2577ed1c418cSBarry Smith       y[18*(*idx)+12] += alpha13*(*v);
2578ed1c418cSBarry Smith       y[18*(*idx)+13] += alpha14*(*v);
2579ed1c418cSBarry Smith       y[18*(*idx)+14] += alpha15*(*v);
2580ed1c418cSBarry Smith       y[18*(*idx)+15] += alpha16*(*v);
2581ed1c418cSBarry Smith       y[18*(*idx)+16] += alpha17*(*v);
2582ed1c418cSBarry Smith       y[18*(*idx)+17] += alpha18*(*v);
2583ed1c418cSBarry Smith       idx++; v++;
2584ed1c418cSBarry Smith     }
2585ed1c418cSBarry Smith   }
2586dc0b31edSSatish Balay   ierr = PetscLogFlops(36.0*a->nz);CHKERRQ(ierr);
25873649974fSBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
2588ed1c418cSBarry Smith   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
2589ed1c418cSBarry Smith   PetscFunctionReturn(0);
2590ed1c418cSBarry Smith }
2591ed1c418cSBarry Smith 
2592ed1c418cSBarry Smith #undef __FUNCT__
2593ed1c418cSBarry Smith #define __FUNCT__ "MatMultAdd_SeqMAIJ_18"
2594ed1c418cSBarry Smith PetscErrorCode MatMultAdd_SeqMAIJ_18(Mat A,Vec xx,Vec yy,Vec zz)
2595ed1c418cSBarry Smith {
2596ed1c418cSBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
2597ed1c418cSBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
2598d2840e09SBarry Smith   const PetscScalar *x,*v;
2599d2840e09SBarry Smith   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8;
2600ed1c418cSBarry Smith   PetscScalar       sum9, sum10, sum11, sum12, sum13, sum14, sum15, sum16, sum17, sum18;
2601ed1c418cSBarry Smith   PetscErrorCode    ierr;
2602d2840e09SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
2603ed1c418cSBarry Smith   PetscInt          n,i,jrow,j;
2604ed1c418cSBarry Smith 
2605ed1c418cSBarry Smith   PetscFunctionBegin;
2606ed1c418cSBarry Smith   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
26073649974fSBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
2608ed1c418cSBarry Smith   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
2609ed1c418cSBarry Smith   idx  = a->j;
2610ed1c418cSBarry Smith   v    = a->a;
2611ed1c418cSBarry Smith   ii   = a->i;
2612ed1c418cSBarry Smith 
2613ed1c418cSBarry Smith   for (i=0; i<m; i++) {
2614ed1c418cSBarry Smith     jrow = ii[i];
2615ed1c418cSBarry Smith     n    = ii[i+1] - jrow;
2616ed1c418cSBarry Smith     sum1  = 0.0;
2617ed1c418cSBarry Smith     sum2  = 0.0;
2618ed1c418cSBarry Smith     sum3  = 0.0;
2619ed1c418cSBarry Smith     sum4  = 0.0;
2620ed1c418cSBarry Smith     sum5  = 0.0;
2621ed1c418cSBarry Smith     sum6  = 0.0;
2622ed1c418cSBarry Smith     sum7  = 0.0;
2623ed1c418cSBarry Smith     sum8  = 0.0;
2624ed1c418cSBarry Smith     sum9  = 0.0;
2625ed1c418cSBarry Smith     sum10 = 0.0;
2626ed1c418cSBarry Smith     sum11 = 0.0;
2627ed1c418cSBarry Smith     sum12 = 0.0;
2628ed1c418cSBarry Smith     sum13 = 0.0;
2629ed1c418cSBarry Smith     sum14 = 0.0;
2630ed1c418cSBarry Smith     sum15 = 0.0;
2631ed1c418cSBarry Smith     sum16 = 0.0;
2632ed1c418cSBarry Smith     sum17 = 0.0;
2633ed1c418cSBarry Smith     sum18 = 0.0;
2634ed1c418cSBarry Smith     for (j=0; j<n; j++) {
2635ed1c418cSBarry Smith       sum1  += v[jrow]*x[18*idx[jrow]];
2636ed1c418cSBarry Smith       sum2  += v[jrow]*x[18*idx[jrow]+1];
2637ed1c418cSBarry Smith       sum3  += v[jrow]*x[18*idx[jrow]+2];
2638ed1c418cSBarry Smith       sum4  += v[jrow]*x[18*idx[jrow]+3];
2639ed1c418cSBarry Smith       sum5  += v[jrow]*x[18*idx[jrow]+4];
2640ed1c418cSBarry Smith       sum6  += v[jrow]*x[18*idx[jrow]+5];
2641ed1c418cSBarry Smith       sum7  += v[jrow]*x[18*idx[jrow]+6];
2642ed1c418cSBarry Smith       sum8  += v[jrow]*x[18*idx[jrow]+7];
2643ed1c418cSBarry Smith       sum9  += v[jrow]*x[18*idx[jrow]+8];
2644ed1c418cSBarry Smith       sum10 += v[jrow]*x[18*idx[jrow]+9];
2645ed1c418cSBarry Smith       sum11 += v[jrow]*x[18*idx[jrow]+10];
2646ed1c418cSBarry Smith       sum12 += v[jrow]*x[18*idx[jrow]+11];
2647ed1c418cSBarry Smith       sum13 += v[jrow]*x[18*idx[jrow]+12];
2648ed1c418cSBarry Smith       sum14 += v[jrow]*x[18*idx[jrow]+13];
2649ed1c418cSBarry Smith       sum15 += v[jrow]*x[18*idx[jrow]+14];
2650ed1c418cSBarry Smith       sum16 += v[jrow]*x[18*idx[jrow]+15];
2651ed1c418cSBarry Smith       sum17 += v[jrow]*x[18*idx[jrow]+16];
2652ed1c418cSBarry Smith       sum18 += v[jrow]*x[18*idx[jrow]+17];
2653ed1c418cSBarry Smith       jrow++;
2654ed1c418cSBarry Smith      }
2655ed1c418cSBarry Smith     y[18*i]    += sum1;
2656ed1c418cSBarry Smith     y[18*i+1]  += sum2;
2657ed1c418cSBarry Smith     y[18*i+2]  += sum3;
2658ed1c418cSBarry Smith     y[18*i+3]  += sum4;
2659ed1c418cSBarry Smith     y[18*i+4]  += sum5;
2660ed1c418cSBarry Smith     y[18*i+5]  += sum6;
2661ed1c418cSBarry Smith     y[18*i+6]  += sum7;
2662ed1c418cSBarry Smith     y[18*i+7]  += sum8;
2663ed1c418cSBarry Smith     y[18*i+8]  += sum9;
2664ed1c418cSBarry Smith     y[18*i+9]  += sum10;
2665ed1c418cSBarry Smith     y[18*i+10] += sum11;
2666ed1c418cSBarry Smith     y[18*i+11] += sum12;
2667ed1c418cSBarry Smith     y[18*i+12] += sum13;
2668ed1c418cSBarry Smith     y[18*i+13] += sum14;
2669ed1c418cSBarry Smith     y[18*i+14] += sum15;
2670ed1c418cSBarry Smith     y[18*i+15] += sum16;
2671ed1c418cSBarry Smith     y[18*i+16] += sum17;
2672ed1c418cSBarry Smith     y[18*i+17] += sum18;
2673ed1c418cSBarry Smith   }
2674ed1c418cSBarry Smith 
2675dc0b31edSSatish Balay   ierr = PetscLogFlops(36.0*a->nz);CHKERRQ(ierr);
26763649974fSBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
2677ed1c418cSBarry Smith   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
2678ed1c418cSBarry Smith   PetscFunctionReturn(0);
2679ed1c418cSBarry Smith }
2680ed1c418cSBarry Smith 
2681ed1c418cSBarry Smith #undef __FUNCT__
2682ed1c418cSBarry Smith #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_18"
2683ed1c418cSBarry Smith PetscErrorCode MatMultTransposeAdd_SeqMAIJ_18(Mat A,Vec xx,Vec yy,Vec zz)
2684ed1c418cSBarry Smith {
2685ed1c418cSBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
2686ed1c418cSBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
2687d2840e09SBarry Smith   const PetscScalar *x,*v;
2688d2840e09SBarry Smith   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8;
2689ed1c418cSBarry Smith   PetscScalar       alpha9,alpha10,alpha11,alpha12,alpha13,alpha14,alpha15,alpha16,alpha17,alpha18;
2690ed1c418cSBarry Smith   PetscErrorCode    ierr;
2691d2840e09SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx;
2692d2840e09SBarry Smith   PetscInt          n,i;
2693ed1c418cSBarry Smith 
2694ed1c418cSBarry Smith   PetscFunctionBegin;
2695ed1c418cSBarry Smith   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
26963649974fSBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
2697ed1c418cSBarry Smith   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
2698ed1c418cSBarry Smith   for (i=0; i<m; i++) {
2699ed1c418cSBarry Smith     idx    = a->j + a->i[i] ;
2700ed1c418cSBarry Smith     v      = a->a + a->i[i] ;
2701ed1c418cSBarry Smith     n      = a->i[i+1] - a->i[i];
2702ed1c418cSBarry Smith     alpha1 = x[18*i];
2703ed1c418cSBarry Smith     alpha2 = x[18*i+1];
2704ed1c418cSBarry Smith     alpha3 = x[18*i+2];
2705ed1c418cSBarry Smith     alpha4 = x[18*i+3];
2706ed1c418cSBarry Smith     alpha5 = x[18*i+4];
2707ed1c418cSBarry Smith     alpha6 = x[18*i+5];
2708ed1c418cSBarry Smith     alpha7 = x[18*i+6];
2709ed1c418cSBarry Smith     alpha8 = x[18*i+7];
2710ed1c418cSBarry Smith     alpha9  = x[18*i+8];
2711ed1c418cSBarry Smith     alpha10 = x[18*i+9];
2712ed1c418cSBarry Smith     alpha11 = x[18*i+10];
2713ed1c418cSBarry Smith     alpha12 = x[18*i+11];
2714ed1c418cSBarry Smith     alpha13 = x[18*i+12];
2715ed1c418cSBarry Smith     alpha14 = x[18*i+13];
2716ed1c418cSBarry Smith     alpha15 = x[18*i+14];
2717ed1c418cSBarry Smith     alpha16 = x[18*i+15];
2718ed1c418cSBarry Smith     alpha17 = x[18*i+16];
2719ed1c418cSBarry Smith     alpha18 = x[18*i+17];
2720ed1c418cSBarry Smith     while (n-->0) {
2721ed1c418cSBarry Smith       y[18*(*idx)]   += alpha1*(*v);
2722ed1c418cSBarry Smith       y[18*(*idx)+1] += alpha2*(*v);
2723ed1c418cSBarry Smith       y[18*(*idx)+2] += alpha3*(*v);
2724ed1c418cSBarry Smith       y[18*(*idx)+3] += alpha4*(*v);
2725ed1c418cSBarry Smith       y[18*(*idx)+4] += alpha5*(*v);
2726ed1c418cSBarry Smith       y[18*(*idx)+5] += alpha6*(*v);
2727ed1c418cSBarry Smith       y[18*(*idx)+6] += alpha7*(*v);
2728ed1c418cSBarry Smith       y[18*(*idx)+7] += alpha8*(*v);
2729ed1c418cSBarry Smith       y[18*(*idx)+8]  += alpha9*(*v);
2730ed1c418cSBarry Smith       y[18*(*idx)+9]  += alpha10*(*v);
2731ed1c418cSBarry Smith       y[18*(*idx)+10] += alpha11*(*v);
2732ed1c418cSBarry Smith       y[18*(*idx)+11] += alpha12*(*v);
2733ed1c418cSBarry Smith       y[18*(*idx)+12] += alpha13*(*v);
2734ed1c418cSBarry Smith       y[18*(*idx)+13] += alpha14*(*v);
2735ed1c418cSBarry Smith       y[18*(*idx)+14] += alpha15*(*v);
2736ed1c418cSBarry Smith       y[18*(*idx)+15] += alpha16*(*v);
2737ed1c418cSBarry Smith       y[18*(*idx)+16] += alpha17*(*v);
2738ed1c418cSBarry Smith       y[18*(*idx)+17] += alpha18*(*v);
2739ed1c418cSBarry Smith       idx++; v++;
2740ed1c418cSBarry Smith     }
2741ed1c418cSBarry Smith   }
2742dc0b31edSSatish Balay   ierr = PetscLogFlops(36.0*a->nz);CHKERRQ(ierr);
27433649974fSBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
2744ed1c418cSBarry Smith   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
2745ed1c418cSBarry Smith   PetscFunctionReturn(0);
2746ed1c418cSBarry Smith }
2747ed1c418cSBarry Smith 
27486861f193SBarry Smith #undef __FUNCT__
27496861f193SBarry Smith #define __FUNCT__ "MatMult_SeqMAIJ_N"
27506861f193SBarry Smith PetscErrorCode MatMult_SeqMAIJ_N(Mat A,Vec xx,Vec yy)
27516861f193SBarry Smith {
27526861f193SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
27536861f193SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
27546861f193SBarry Smith   const PetscScalar *x,*v;
27556861f193SBarry Smith   PetscScalar       *y,*sums;
27566861f193SBarry Smith   PetscErrorCode    ierr;
27576861f193SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
27586861f193SBarry Smith   PetscInt          n,i,jrow,j,dof = b->dof,k;
27596861f193SBarry Smith 
27606861f193SBarry Smith   PetscFunctionBegin;
27613649974fSBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
27626861f193SBarry Smith   ierr = VecSet(yy,0.0);CHKERRQ(ierr);
27636861f193SBarry Smith   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
27646861f193SBarry Smith   idx  = a->j;
27656861f193SBarry Smith   v    = a->a;
27666861f193SBarry Smith   ii   = a->i;
27676861f193SBarry Smith 
27686861f193SBarry Smith   for (i=0; i<m; i++) {
27696861f193SBarry Smith     jrow = ii[i];
27706861f193SBarry Smith     n    = ii[i+1] - jrow;
27716861f193SBarry Smith     sums = y + dof*i;
27726861f193SBarry Smith     for (j=0; j<n; j++) {
27736861f193SBarry Smith       for (k=0; k<dof; k++) {
27746861f193SBarry Smith         sums[k]  += v[jrow]*x[dof*idx[jrow]+k];
27756861f193SBarry Smith       }
27766861f193SBarry Smith       jrow++;
27776861f193SBarry Smith     }
27786861f193SBarry Smith   }
27796861f193SBarry Smith 
27806861f193SBarry Smith   ierr = PetscLogFlops(2.0*dof*a->nz);CHKERRQ(ierr);
27813649974fSBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
27826861f193SBarry Smith   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
27836861f193SBarry Smith   PetscFunctionReturn(0);
27846861f193SBarry Smith }
27856861f193SBarry Smith 
27866861f193SBarry Smith #undef __FUNCT__
27876861f193SBarry Smith #define __FUNCT__ "MatMultAdd_SeqMAIJ_N"
27886861f193SBarry Smith PetscErrorCode MatMultAdd_SeqMAIJ_N(Mat A,Vec xx,Vec yy,Vec zz)
27896861f193SBarry Smith {
27906861f193SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
27916861f193SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
27926861f193SBarry Smith   const PetscScalar *x,*v;
27936861f193SBarry Smith   PetscScalar       *y,*sums;
27946861f193SBarry Smith   PetscErrorCode    ierr;
27956861f193SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
27966861f193SBarry Smith   PetscInt          n,i,jrow,j,dof = b->dof,k;
27976861f193SBarry Smith 
27986861f193SBarry Smith   PetscFunctionBegin;
27996861f193SBarry Smith   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
28003649974fSBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
28016861f193SBarry Smith   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
28026861f193SBarry Smith   idx  = a->j;
28036861f193SBarry Smith   v    = a->a;
28046861f193SBarry Smith   ii   = a->i;
28056861f193SBarry Smith 
28066861f193SBarry Smith   for (i=0; i<m; i++) {
28076861f193SBarry Smith     jrow = ii[i];
28086861f193SBarry Smith     n    = ii[i+1] - jrow;
28096861f193SBarry Smith     sums = y + dof*i;
28106861f193SBarry Smith     for (j=0; j<n; j++) {
28116861f193SBarry Smith       for (k=0; k<dof; k++) {
28126861f193SBarry Smith         sums[k]  += v[jrow]*x[dof*idx[jrow]+k];
28136861f193SBarry Smith       }
28146861f193SBarry Smith       jrow++;
28156861f193SBarry Smith     }
28166861f193SBarry Smith   }
28176861f193SBarry Smith 
28186861f193SBarry Smith   ierr = PetscLogFlops(2.0*dof*a->nz);CHKERRQ(ierr);
28193649974fSBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
28206861f193SBarry Smith   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
28216861f193SBarry Smith   PetscFunctionReturn(0);
28226861f193SBarry Smith }
28236861f193SBarry Smith 
28246861f193SBarry Smith #undef __FUNCT__
28256861f193SBarry Smith #define __FUNCT__ "MatMultTranspose_SeqMAIJ_N"
28266861f193SBarry Smith PetscErrorCode MatMultTranspose_SeqMAIJ_N(Mat A,Vec xx,Vec yy)
28276861f193SBarry Smith {
28286861f193SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
28296861f193SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
28306861f193SBarry Smith   const PetscScalar *x,*v,*alpha;
28316861f193SBarry Smith   PetscScalar       *y;
28326861f193SBarry Smith   PetscErrorCode    ierr;
28336861f193SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx,dof = b->dof;
28346861f193SBarry Smith   PetscInt          n,i,k;
28356861f193SBarry Smith 
28366861f193SBarry Smith   PetscFunctionBegin;
28373649974fSBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
28386861f193SBarry Smith   ierr = VecSet(yy,0.0);CHKERRQ(ierr);
28396861f193SBarry Smith   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
28406861f193SBarry Smith   for (i=0; i<m; i++) {
28416861f193SBarry Smith     idx    = a->j + a->i[i] ;
28426861f193SBarry Smith     v      = a->a + a->i[i] ;
28436861f193SBarry Smith     n      = a->i[i+1] - a->i[i];
28446861f193SBarry Smith     alpha  = x + dof*i;
28456861f193SBarry Smith     while (n-->0) {
28466861f193SBarry Smith       for (k=0; k<dof; k++) {
28476861f193SBarry Smith         y[dof*(*idx)+k] += alpha[k]*(*v);
28486861f193SBarry Smith       }
284983ce7366SBarry Smith       idx++; v++;
28506861f193SBarry Smith     }
28516861f193SBarry Smith   }
28526861f193SBarry Smith   ierr = PetscLogFlops(2.0*dof*a->nz);CHKERRQ(ierr);
28533649974fSBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
28546861f193SBarry Smith   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
28556861f193SBarry Smith   PetscFunctionReturn(0);
28566861f193SBarry Smith }
28576861f193SBarry Smith 
28586861f193SBarry Smith #undef __FUNCT__
28596861f193SBarry Smith #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_N"
28606861f193SBarry Smith PetscErrorCode MatMultTransposeAdd_SeqMAIJ_N(Mat A,Vec xx,Vec yy,Vec zz)
28616861f193SBarry Smith {
28626861f193SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
28636861f193SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
28646861f193SBarry Smith   const PetscScalar *x,*v,*alpha;
28656861f193SBarry Smith   PetscScalar       *y;
28666861f193SBarry Smith   PetscErrorCode    ierr;
28676861f193SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx,dof = b->dof;
28686861f193SBarry Smith   PetscInt          n,i,k;
28696861f193SBarry Smith 
28706861f193SBarry Smith   PetscFunctionBegin;
28716861f193SBarry Smith   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
28723649974fSBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
28736861f193SBarry Smith   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
28746861f193SBarry Smith   for (i=0; i<m; i++) {
28756861f193SBarry Smith     idx    = a->j + a->i[i] ;
28766861f193SBarry Smith     v      = a->a + a->i[i] ;
28776861f193SBarry Smith     n      = a->i[i+1] - a->i[i];
28786861f193SBarry Smith     alpha  = x + dof*i;
28796861f193SBarry Smith     while (n-->0) {
28806861f193SBarry Smith       for (k=0; k<dof; k++) {
28816861f193SBarry Smith         y[dof*(*idx)+k] += alpha[k]*(*v);
28826861f193SBarry Smith       }
288383ce7366SBarry Smith       idx++; v++;
28846861f193SBarry Smith     }
28856861f193SBarry Smith   }
28866861f193SBarry Smith   ierr = PetscLogFlops(2.0*dof*a->nz);CHKERRQ(ierr);
28873649974fSBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
28886861f193SBarry Smith   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
28896861f193SBarry Smith   PetscFunctionReturn(0);
28906861f193SBarry Smith }
28916861f193SBarry Smith 
2892f4a53059SBarry Smith /*===================================================================================*/
28934a2ae208SSatish Balay #undef __FUNCT__
28944a2ae208SSatish Balay #define __FUNCT__ "MatMult_MPIMAIJ_dof"
2895dfbe8321SBarry Smith PetscErrorCode MatMult_MPIMAIJ_dof(Mat A,Vec xx,Vec yy)
2896f4a53059SBarry Smith {
28974cb9d9b8SBarry Smith   Mat_MPIMAIJ    *b = (Mat_MPIMAIJ*)A->data;
2898dfbe8321SBarry Smith   PetscErrorCode ierr;
2899f4a53059SBarry Smith 
2900b24ad042SBarry Smith   PetscFunctionBegin;
2901f4a53059SBarry Smith   /* start the scatter */
2902ca9f406cSSatish Balay   ierr = VecScatterBegin(b->ctx,xx,b->w,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
29034cb9d9b8SBarry Smith   ierr = (*b->AIJ->ops->mult)(b->AIJ,xx,yy);CHKERRQ(ierr);
2904ca9f406cSSatish Balay   ierr = VecScatterEnd(b->ctx,xx,b->w,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
29054cb9d9b8SBarry Smith   ierr = (*b->OAIJ->ops->multadd)(b->OAIJ,b->w,yy,yy);CHKERRQ(ierr);
2906f4a53059SBarry Smith   PetscFunctionReturn(0);
2907f4a53059SBarry Smith }
2908f4a53059SBarry Smith 
29094a2ae208SSatish Balay #undef __FUNCT__
29104a2ae208SSatish Balay #define __FUNCT__ "MatMultTranspose_MPIMAIJ_dof"
2911dfbe8321SBarry Smith PetscErrorCode MatMultTranspose_MPIMAIJ_dof(Mat A,Vec xx,Vec yy)
29124cb9d9b8SBarry Smith {
29134cb9d9b8SBarry Smith   Mat_MPIMAIJ    *b = (Mat_MPIMAIJ*)A->data;
2914dfbe8321SBarry Smith   PetscErrorCode ierr;
2915b24ad042SBarry Smith 
29164cb9d9b8SBarry Smith   PetscFunctionBegin;
29174cb9d9b8SBarry Smith   ierr = (*b->OAIJ->ops->multtranspose)(b->OAIJ,xx,b->w);CHKERRQ(ierr);
29184cb9d9b8SBarry Smith   ierr = (*b->AIJ->ops->multtranspose)(b->AIJ,xx,yy);CHKERRQ(ierr);
2919ca9f406cSSatish Balay   ierr = VecScatterBegin(b->ctx,b->w,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2920ca9f406cSSatish Balay   ierr = VecScatterEnd(b->ctx,b->w,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
29214cb9d9b8SBarry Smith   PetscFunctionReturn(0);
29224cb9d9b8SBarry Smith }
29234cb9d9b8SBarry Smith 
29244a2ae208SSatish Balay #undef __FUNCT__
29254a2ae208SSatish Balay #define __FUNCT__ "MatMultAdd_MPIMAIJ_dof"
2926dfbe8321SBarry Smith PetscErrorCode MatMultAdd_MPIMAIJ_dof(Mat A,Vec xx,Vec yy,Vec zz)
29274cb9d9b8SBarry Smith {
29284cb9d9b8SBarry Smith   Mat_MPIMAIJ    *b = (Mat_MPIMAIJ*)A->data;
2929dfbe8321SBarry Smith   PetscErrorCode ierr;
29304cb9d9b8SBarry Smith 
2931b24ad042SBarry Smith   PetscFunctionBegin;
29324cb9d9b8SBarry Smith   /* start the scatter */
2933ca9f406cSSatish Balay   ierr = VecScatterBegin(b->ctx,xx,b->w,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2934d72c5c08SBarry Smith   ierr = (*b->AIJ->ops->multadd)(b->AIJ,xx,yy,zz);CHKERRQ(ierr);
2935ca9f406cSSatish Balay   ierr = VecScatterEnd(b->ctx,xx,b->w,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2936717f2ec8SHong Zhang   ierr = (*b->OAIJ->ops->multadd)(b->OAIJ,b->w,zz,zz);CHKERRQ(ierr);
29374cb9d9b8SBarry Smith   PetscFunctionReturn(0);
29384cb9d9b8SBarry Smith }
29394cb9d9b8SBarry Smith 
29404a2ae208SSatish Balay #undef __FUNCT__
29414a2ae208SSatish Balay #define __FUNCT__ "MatMultTransposeAdd_MPIMAIJ_dof"
2942dfbe8321SBarry Smith PetscErrorCode MatMultTransposeAdd_MPIMAIJ_dof(Mat A,Vec xx,Vec yy,Vec zz)
29434cb9d9b8SBarry Smith {
29444cb9d9b8SBarry Smith   Mat_MPIMAIJ    *b = (Mat_MPIMAIJ*)A->data;
2945dfbe8321SBarry Smith   PetscErrorCode ierr;
2946b24ad042SBarry Smith 
29474cb9d9b8SBarry Smith   PetscFunctionBegin;
29484cb9d9b8SBarry Smith   ierr = (*b->OAIJ->ops->multtranspose)(b->OAIJ,xx,b->w);CHKERRQ(ierr);
2949ca9f406cSSatish Balay   ierr = VecScatterBegin(b->ctx,b->w,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2950d72c5c08SBarry Smith   ierr = (*b->AIJ->ops->multtransposeadd)(b->AIJ,xx,yy,zz);CHKERRQ(ierr);
2951ca9f406cSSatish Balay   ierr = VecScatterEnd(b->ctx,b->w,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
29524cb9d9b8SBarry Smith   PetscFunctionReturn(0);
29534cb9d9b8SBarry Smith }
29544cb9d9b8SBarry Smith 
295595ddefa2SHong Zhang /* ----------------------------------------------------------------*/
29569c4fc4c7SBarry Smith #undef __FUNCT__
29577ba1a0bfSKris Buschelman #define __FUNCT__ "MatPtAPSymbolic_SeqAIJ_SeqMAIJ"
29587ba1a0bfSKris Buschelman PetscErrorCode MatPtAPSymbolic_SeqAIJ_SeqMAIJ(Mat A,Mat PP,PetscReal fill,Mat *C)
29597ba1a0bfSKris Buschelman {
29607ba1a0bfSKris Buschelman   /* This routine requires testing -- but it's getting better. */
29617ba1a0bfSKris Buschelman   PetscErrorCode     ierr;
2962a1a86e44SBarry Smith   PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL;
29637ba1a0bfSKris Buschelman   Mat_SeqMAIJ        *pp=(Mat_SeqMAIJ*)PP->data;
29647ba1a0bfSKris Buschelman   Mat                P=pp->AIJ;
29657ba1a0bfSKris Buschelman   Mat_SeqAIJ         *a=(Mat_SeqAIJ*)A->data,*p=(Mat_SeqAIJ*)P->data,*c;
2966d2840e09SBarry Smith   PetscInt           *pti,*ptj,*ptJ;
29677ba1a0bfSKris Buschelman   PetscInt           *ci,*cj,*ptadenserow,*ptasparserow,*denserow,*sparserow,*ptaj;
2968d2840e09SBarry Smith   const PetscInt     an=A->cmap->N,am=A->rmap->N,pn=P->cmap->N,pm=P->rmap->N,ppdof=pp->dof;
2969d2840e09SBarry Smith   PetscInt           i,j,k,dof,pshift,ptnzi,arow,anzj,ptanzi,prow,pnzj,cnzi,cn;
29707ba1a0bfSKris Buschelman   MatScalar          *ca;
2971d2840e09SBarry Smith   const PetscInt     *pi = p->i,*pj = p->j,*pjj,*ai=a->i,*aj=a->j,*ajj;
29727ba1a0bfSKris Buschelman 
29737ba1a0bfSKris Buschelman   PetscFunctionBegin;
29747ba1a0bfSKris Buschelman   /* Start timer */
29757ba1a0bfSKris Buschelman   ierr = PetscLogEventBegin(MAT_PtAPSymbolic,A,PP,0,0);CHKERRQ(ierr);
29767ba1a0bfSKris Buschelman 
29777ba1a0bfSKris Buschelman   /* Get ij structure of P^T */
29787ba1a0bfSKris Buschelman   ierr = MatGetSymbolicTranspose_SeqAIJ(P,&pti,&ptj);CHKERRQ(ierr);
29797ba1a0bfSKris Buschelman 
29807ba1a0bfSKris Buschelman   cn = pn*ppdof;
29817ba1a0bfSKris Buschelman   /* Allocate ci array, arrays for fill computation and */
29827ba1a0bfSKris Buschelman   /* free space for accumulating nonzero column info */
29837ba1a0bfSKris Buschelman   ierr = PetscMalloc((cn+1)*sizeof(PetscInt),&ci);CHKERRQ(ierr);
29847ba1a0bfSKris Buschelman   ci[0] = 0;
29857ba1a0bfSKris Buschelman 
29867ba1a0bfSKris Buschelman   /* Work arrays for rows of P^T*A */
298774ed9c26SBarry Smith   ierr = PetscMalloc4(an,PetscInt,&ptadenserow,an,PetscInt,&ptasparserow,cn,PetscInt,&denserow,cn,PetscInt,&sparserow);CHKERRQ(ierr);
2988c43dabc9SBarry Smith   ierr = PetscMemzero(ptadenserow,an*sizeof(PetscInt));CHKERRQ(ierr);
2989c43dabc9SBarry Smith   ierr = PetscMemzero(denserow,cn*sizeof(PetscInt));CHKERRQ(ierr);
29907ba1a0bfSKris Buschelman 
29917ba1a0bfSKris Buschelman   /* Set initial free space to be nnz(A) scaled by aspect ratio of P. */
29927ba1a0bfSKris Buschelman   /* This should be reasonable if sparsity of PtAP is similar to that of A. */
29937ba1a0bfSKris Buschelman   /* Note, aspect ratio of P is the same as the aspect ratio of SeqAIJ inside P */
2994a1a86e44SBarry Smith   ierr          = PetscFreeSpaceGet((ai[am]/pm)*pn,&free_space);
29957ba1a0bfSKris Buschelman   current_space = free_space;
29967ba1a0bfSKris Buschelman 
29977ba1a0bfSKris Buschelman   /* Determine symbolic info for each row of C: */
29987ba1a0bfSKris Buschelman   for (i=0;i<pn;i++) {
29997ba1a0bfSKris Buschelman     ptnzi  = pti[i+1] - pti[i];
30007ba1a0bfSKris Buschelman     ptJ    = ptj + pti[i];
30017ba1a0bfSKris Buschelman     for (dof=0;dof<ppdof;dof++) {
30027ba1a0bfSKris Buschelman       ptanzi = 0;
30037ba1a0bfSKris Buschelman       /* Determine symbolic row of PtA: */
30047ba1a0bfSKris Buschelman       for (j=0;j<ptnzi;j++) {
30057ba1a0bfSKris Buschelman         /* Expand ptJ[j] by block size and shift by dof to get the right row of A */
30067ba1a0bfSKris Buschelman         arow = ptJ[j]*ppdof + dof;
30077ba1a0bfSKris Buschelman         /* Nonzeros of P^T*A will be in same locations as any element of A in that row */
30087ba1a0bfSKris Buschelman         anzj = ai[arow+1] - ai[arow];
30097ba1a0bfSKris Buschelman         ajj  = aj + ai[arow];
30107ba1a0bfSKris Buschelman         for (k=0;k<anzj;k++) {
30117ba1a0bfSKris Buschelman           if (!ptadenserow[ajj[k]]) {
30127ba1a0bfSKris Buschelman             ptadenserow[ajj[k]]    = -1;
30137ba1a0bfSKris Buschelman             ptasparserow[ptanzi++] = ajj[k];
30147ba1a0bfSKris Buschelman           }
30157ba1a0bfSKris Buschelman         }
30167ba1a0bfSKris Buschelman       }
30177ba1a0bfSKris Buschelman       /* Using symbolic info for row of PtA, determine symbolic info for row of C: */
30187ba1a0bfSKris Buschelman       ptaj = ptasparserow;
30197ba1a0bfSKris Buschelman       cnzi   = 0;
30207ba1a0bfSKris Buschelman       for (j=0;j<ptanzi;j++) {
30217ba1a0bfSKris Buschelman         /* Get offset within block of P */
30227ba1a0bfSKris Buschelman         pshift = *ptaj%ppdof;
30237ba1a0bfSKris Buschelman         /* Get block row of P */
30247ba1a0bfSKris Buschelman         prow = (*ptaj++)/ppdof; /* integer division */
30257ba1a0bfSKris Buschelman         /* P has same number of nonzeros per row as the compressed form */
30267ba1a0bfSKris Buschelman         pnzj = pi[prow+1] - pi[prow];
30277ba1a0bfSKris Buschelman         pjj  = pj + pi[prow];
30287ba1a0bfSKris Buschelman         for (k=0;k<pnzj;k++) {
30297ba1a0bfSKris Buschelman           /* Locations in C are shifted by the offset within the block */
30307ba1a0bfSKris Buschelman           /* Note: we cannot use PetscLLAdd here because of the additional offset for the write location */
30317ba1a0bfSKris Buschelman           if (!denserow[pjj[k]*ppdof+pshift]) {
30327ba1a0bfSKris Buschelman             denserow[pjj[k]*ppdof+pshift] = -1;
30337ba1a0bfSKris Buschelman             sparserow[cnzi++]             = pjj[k]*ppdof+pshift;
30347ba1a0bfSKris Buschelman           }
30357ba1a0bfSKris Buschelman         }
30367ba1a0bfSKris Buschelman       }
30377ba1a0bfSKris Buschelman 
30387ba1a0bfSKris Buschelman       /* sort sparserow */
30397ba1a0bfSKris Buschelman       ierr = PetscSortInt(cnzi,sparserow);CHKERRQ(ierr);
30407ba1a0bfSKris Buschelman 
30417ba1a0bfSKris Buschelman       /* If free space is not available, make more free space */
30427ba1a0bfSKris Buschelman       /* Double the amount of total space in the list */
30437ba1a0bfSKris Buschelman       if (current_space->local_remaining<cnzi) {
30444238b7adSHong Zhang         ierr = PetscFreeSpaceGet(cnzi+current_space->total_array_size,&current_space);CHKERRQ(ierr);
30457ba1a0bfSKris Buschelman       }
30467ba1a0bfSKris Buschelman 
30477ba1a0bfSKris Buschelman       /* Copy data into free space, and zero out denserows */
30487ba1a0bfSKris Buschelman       ierr = PetscMemcpy(current_space->array,sparserow,cnzi*sizeof(PetscInt));CHKERRQ(ierr);
30497ba1a0bfSKris Buschelman       current_space->array           += cnzi;
30507ba1a0bfSKris Buschelman       current_space->local_used      += cnzi;
30517ba1a0bfSKris Buschelman       current_space->local_remaining -= cnzi;
30527ba1a0bfSKris Buschelman 
30537ba1a0bfSKris Buschelman       for (j=0;j<ptanzi;j++) {
30547ba1a0bfSKris Buschelman         ptadenserow[ptasparserow[j]] = 0;
30557ba1a0bfSKris Buschelman       }
30567ba1a0bfSKris Buschelman       for (j=0;j<cnzi;j++) {
30577ba1a0bfSKris Buschelman         denserow[sparserow[j]] = 0;
30587ba1a0bfSKris Buschelman       }
30597ba1a0bfSKris Buschelman       /* Aside: Perhaps we should save the pta info for the numerical factorization. */
30607ba1a0bfSKris Buschelman       /*        For now, we will recompute what is needed. */
30617ba1a0bfSKris Buschelman       ci[i*ppdof+1+dof] = ci[i*ppdof+dof] + cnzi;
30627ba1a0bfSKris Buschelman     }
30637ba1a0bfSKris Buschelman   }
30647ba1a0bfSKris Buschelman   /* nnz is now stored in ci[ptm], column indices are in the list of free space */
30657ba1a0bfSKris Buschelman   /* Allocate space for cj, initialize cj, and */
30667ba1a0bfSKris Buschelman   /* destroy list of free space and other temporary array(s) */
30677ba1a0bfSKris Buschelman   ierr = PetscMalloc((ci[cn]+1)*sizeof(PetscInt),&cj);CHKERRQ(ierr);
3068a1a86e44SBarry Smith   ierr = PetscFreeSpaceContiguous(&free_space,cj);CHKERRQ(ierr);
306974ed9c26SBarry Smith   ierr = PetscFree4(ptadenserow,ptasparserow,denserow,sparserow);CHKERRQ(ierr);
30707ba1a0bfSKris Buschelman 
30717ba1a0bfSKris Buschelman   /* Allocate space for ca */
30727ba1a0bfSKris Buschelman   ierr = PetscMalloc((ci[cn]+1)*sizeof(MatScalar),&ca);CHKERRQ(ierr);
30737ba1a0bfSKris Buschelman   ierr = PetscMemzero(ca,(ci[cn]+1)*sizeof(MatScalar));CHKERRQ(ierr);
30747ba1a0bfSKris Buschelman 
30757ba1a0bfSKris Buschelman   /* put together the new matrix */
30767adad957SLisandro Dalcin   ierr = MatCreateSeqAIJWithArrays(((PetscObject)A)->comm,cn,cn,ci,cj,ca,C);CHKERRQ(ierr);
30777ba1a0bfSKris Buschelman 
30787ba1a0bfSKris Buschelman   /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
30797ba1a0bfSKris Buschelman   /* Since these are PETSc arrays, change flags to free them as necessary. */
30807ba1a0bfSKris Buschelman   c          = (Mat_SeqAIJ *)((*C)->data);
3081e6b907acSBarry Smith   c->free_a  = PETSC_TRUE;
3082e6b907acSBarry Smith   c->free_ij = PETSC_TRUE;
30837ba1a0bfSKris Buschelman   c->nonew   = 0;
30847ba1a0bfSKris Buschelman 
30857ba1a0bfSKris Buschelman   /* Clean up. */
30867ba1a0bfSKris Buschelman   ierr = MatRestoreSymbolicTranspose_SeqAIJ(P,&pti,&ptj);CHKERRQ(ierr);
30877ba1a0bfSKris Buschelman 
30887ba1a0bfSKris Buschelman   ierr = PetscLogEventEnd(MAT_PtAPSymbolic,A,PP,0,0);CHKERRQ(ierr);
30897ba1a0bfSKris Buschelman   PetscFunctionReturn(0);
30907ba1a0bfSKris Buschelman }
30917ba1a0bfSKris Buschelman 
30927ba1a0bfSKris Buschelman #undef __FUNCT__
30937ba1a0bfSKris Buschelman #define __FUNCT__ "MatPtAPNumeric_SeqAIJ_SeqMAIJ"
30947ba1a0bfSKris Buschelman PetscErrorCode MatPtAPNumeric_SeqAIJ_SeqMAIJ(Mat A,Mat PP,Mat C)
30957ba1a0bfSKris Buschelman {
30967ba1a0bfSKris Buschelman   /* This routine requires testing -- first draft only */
30977ba1a0bfSKris Buschelman   PetscErrorCode  ierr;
30987ba1a0bfSKris Buschelman   Mat_SeqMAIJ     *pp=(Mat_SeqMAIJ*)PP->data;
30997ba1a0bfSKris Buschelman   Mat             P=pp->AIJ;
31007ba1a0bfSKris Buschelman   Mat_SeqAIJ      *a  = (Mat_SeqAIJ *) A->data;
31017ba1a0bfSKris Buschelman   Mat_SeqAIJ      *p  = (Mat_SeqAIJ *) P->data;
31027ba1a0bfSKris Buschelman   Mat_SeqAIJ      *c  = (Mat_SeqAIJ *) C->data;
3103d2840e09SBarry Smith   const PetscInt  *ai=a->i,*aj=a->j,*pi=p->i,*pj=p->j,*pJ=p->j,*pjj;
3104d2840e09SBarry Smith   const PetscInt  *ci=c->i,*cj=c->j,*cjj;
3105d2840e09SBarry Smith   const PetscInt  am=A->rmap->N,cn=C->cmap->N,cm=C->rmap->N,ppdof=pp->dof;
3106d2840e09SBarry Smith   PetscInt        i,j,k,pshift,poffset,anzi,pnzi,apnzj,nextap,pnzj,prow,crow,*apj,*apjdense;
3107d2840e09SBarry Smith   const MatScalar *aa=a->a,*pa=p->a,*pA=p->a,*paj;
3108d2840e09SBarry Smith   MatScalar       *ca=c->a,*caj,*apa;
31097ba1a0bfSKris Buschelman 
31107ba1a0bfSKris Buschelman   PetscFunctionBegin;
31117ba1a0bfSKris Buschelman   /* Allocate temporary array for storage of one row of A*P */
311274ed9c26SBarry Smith   ierr = PetscMalloc3(cn,MatScalar,&apa,cn,PetscInt,&apj,cn,PetscInt,&apjdense);CHKERRQ(ierr);
311374ed9c26SBarry Smith   ierr = PetscMemzero(apa,cn*sizeof(MatScalar));CHKERRQ(ierr);
311474ed9c26SBarry Smith   ierr = PetscMemzero(apj,cn*sizeof(PetscInt));CHKERRQ(ierr);
311574ed9c26SBarry Smith   ierr = PetscMemzero(apjdense,cn*sizeof(PetscInt));CHKERRQ(ierr);
31167ba1a0bfSKris Buschelman 
31177ba1a0bfSKris Buschelman   /* Clear old values in C */
31187ba1a0bfSKris Buschelman   ierr = PetscMemzero(ca,ci[cm]*sizeof(MatScalar));CHKERRQ(ierr);
31197ba1a0bfSKris Buschelman 
31207ba1a0bfSKris Buschelman   for (i=0;i<am;i++) {
31217ba1a0bfSKris Buschelman     /* Form sparse row of A*P */
31227ba1a0bfSKris Buschelman     anzi  = ai[i+1] - ai[i];
31237ba1a0bfSKris Buschelman     apnzj = 0;
31247ba1a0bfSKris Buschelman     for (j=0;j<anzi;j++) {
31257ba1a0bfSKris Buschelman       /* Get offset within block of P */
31267ba1a0bfSKris Buschelman       pshift = *aj%ppdof;
31277ba1a0bfSKris Buschelman       /* Get block row of P */
31287ba1a0bfSKris Buschelman       prow   = *aj++/ppdof; /* integer division */
31297ba1a0bfSKris Buschelman       pnzj = pi[prow+1] - pi[prow];
31307ba1a0bfSKris Buschelman       pjj  = pj + pi[prow];
31317ba1a0bfSKris Buschelman       paj  = pa + pi[prow];
31327ba1a0bfSKris Buschelman       for (k=0;k<pnzj;k++) {
31337ba1a0bfSKris Buschelman         poffset = pjj[k]*ppdof+pshift;
31347ba1a0bfSKris Buschelman         if (!apjdense[poffset]) {
31357ba1a0bfSKris Buschelman           apjdense[poffset] = -1;
31367ba1a0bfSKris Buschelman           apj[apnzj++]      = poffset;
31377ba1a0bfSKris Buschelman         }
31387ba1a0bfSKris Buschelman         apa[poffset] += (*aa)*paj[k];
31397ba1a0bfSKris Buschelman       }
3140dc0b31edSSatish Balay       ierr = PetscLogFlops(2.0*pnzj);CHKERRQ(ierr);
31417ba1a0bfSKris Buschelman       aa++;
31427ba1a0bfSKris Buschelman     }
31437ba1a0bfSKris Buschelman 
31447ba1a0bfSKris Buschelman     /* Sort the j index array for quick sparse axpy. */
31457ba1a0bfSKris Buschelman     /* Note: a array does not need sorting as it is in dense storage locations. */
31467ba1a0bfSKris Buschelman     ierr = PetscSortInt(apnzj,apj);CHKERRQ(ierr);
31477ba1a0bfSKris Buschelman 
31487ba1a0bfSKris Buschelman     /* Compute P^T*A*P using outer product (P^T)[:,j]*(A*P)[j,:]. */
31497ba1a0bfSKris Buschelman     prow    = i/ppdof; /* integer division */
31507ba1a0bfSKris Buschelman     pshift  = i%ppdof;
31517ba1a0bfSKris Buschelman     poffset = pi[prow];
31527ba1a0bfSKris Buschelman     pnzi = pi[prow+1] - poffset;
31537ba1a0bfSKris Buschelman     /* Reset pJ and pA so we can traverse the same row of P 'dof' times. */
31547ba1a0bfSKris Buschelman     pJ   = pj+poffset;
31557ba1a0bfSKris Buschelman     pA   = pa+poffset;
31567ba1a0bfSKris Buschelman     for (j=0;j<pnzi;j++) {
31577ba1a0bfSKris Buschelman       crow   = (*pJ)*ppdof+pshift;
31587ba1a0bfSKris Buschelman       cjj    = cj + ci[crow];
31597ba1a0bfSKris Buschelman       caj    = ca + ci[crow];
31607ba1a0bfSKris Buschelman       pJ++;
31617ba1a0bfSKris Buschelman       /* Perform sparse axpy operation.  Note cjj includes apj. */
31627ba1a0bfSKris Buschelman       for (k=0,nextap=0;nextap<apnzj;k++) {
31637ba1a0bfSKris Buschelman         if (cjj[k]==apj[nextap]) {
31647ba1a0bfSKris Buschelman           caj[k] += (*pA)*apa[apj[nextap++]];
31657ba1a0bfSKris Buschelman         }
31667ba1a0bfSKris Buschelman       }
3167dc0b31edSSatish Balay       ierr = PetscLogFlops(2.0*apnzj);CHKERRQ(ierr);
31687ba1a0bfSKris Buschelman       pA++;
31697ba1a0bfSKris Buschelman     }
31707ba1a0bfSKris Buschelman 
31717ba1a0bfSKris Buschelman     /* Zero the current row info for A*P */
31727ba1a0bfSKris Buschelman     for (j=0;j<apnzj;j++) {
31737ba1a0bfSKris Buschelman       apa[apj[j]]      = 0.;
31747ba1a0bfSKris Buschelman       apjdense[apj[j]] = 0;
31757ba1a0bfSKris Buschelman     }
31767ba1a0bfSKris Buschelman   }
31777ba1a0bfSKris Buschelman 
31787ba1a0bfSKris Buschelman   /* Assemble the final matrix and clean up */
31797ba1a0bfSKris Buschelman   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
31807ba1a0bfSKris Buschelman   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
318174ed9c26SBarry Smith   ierr = PetscFree3(apa,apj,apjdense);CHKERRQ(ierr);
318295ddefa2SHong Zhang   PetscFunctionReturn(0);
318395ddefa2SHong Zhang }
31847ba1a0bfSKris Buschelman 
318595ddefa2SHong Zhang #undef __FUNCT__
318695ddefa2SHong Zhang #define __FUNCT__ "MatPtAPSymbolic_MPIAIJ_MPIMAIJ"
318795ddefa2SHong Zhang PetscErrorCode MatPtAPSymbolic_MPIAIJ_MPIMAIJ(Mat A,Mat PP,PetscReal fill,Mat *C)
318895ddefa2SHong Zhang {
318995ddefa2SHong Zhang   PetscErrorCode    ierr;
319095ddefa2SHong Zhang 
319195ddefa2SHong Zhang   PetscFunctionBegin;
319295ddefa2SHong Zhang   /* MatPtAPSymbolic_MPIAIJ_MPIMAIJ() is not implemented yet. Convert PP to mpiaij format */
319395ddefa2SHong Zhang   ierr = MatConvert(PP,MATMPIAIJ,MAT_REUSE_MATRIX,&PP);CHKERRQ(ierr);
319495ddefa2SHong Zhang   ierr =(*PP->ops->ptapsymbolic)(A,PP,fill,C);CHKERRQ(ierr);
319595ddefa2SHong Zhang   PetscFunctionReturn(0);
319695ddefa2SHong Zhang }
319795ddefa2SHong Zhang 
319895ddefa2SHong Zhang #undef __FUNCT__
319995ddefa2SHong Zhang #define __FUNCT__ "MatPtAPNumeric_MPIAIJ_MPIMAIJ"
320095ddefa2SHong Zhang PetscErrorCode MatPtAPNumeric_MPIAIJ_MPIMAIJ(Mat A,Mat PP,Mat C)
320195ddefa2SHong Zhang {
320295ddefa2SHong Zhang   PetscFunctionBegin;
3203e32f2f54SBarry Smith   SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatPtAPNumeric is not implemented for MPIMAIJ matrix yet");
32047ba1a0bfSKris Buschelman   PetscFunctionReturn(0);
32057ba1a0bfSKris Buschelman }
32067ba1a0bfSKris Buschelman 
3207be1d678aSKris Buschelman EXTERN_C_BEGIN
32087ba1a0bfSKris Buschelman #undef __FUNCT__
32090fd73130SBarry Smith #define __FUNCT__ "MatConvert_SeqMAIJ_SeqAIJ"
32107087cfbeSBarry Smith PetscErrorCode  MatConvert_SeqMAIJ_SeqAIJ(Mat A, MatType newtype,MatReuse reuse,Mat *newmat)
32119c4fc4c7SBarry Smith {
32129c4fc4c7SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
3213ceb03754SKris Buschelman   Mat               a = b->AIJ,B;
32149c4fc4c7SBarry Smith   Mat_SeqAIJ        *aij = (Mat_SeqAIJ*)a->data;
32159c4fc4c7SBarry Smith   PetscErrorCode    ierr;
32160fd73130SBarry Smith   PetscInt          m,n,i,ncols,*ilen,nmax = 0,*icols,j,k,ii,dof = b->dof;
3217ba8c8a56SBarry Smith   PetscInt          *cols;
3218ba8c8a56SBarry Smith   PetscScalar       *vals;
32199c4fc4c7SBarry Smith 
32209c4fc4c7SBarry Smith   PetscFunctionBegin;
32219c4fc4c7SBarry Smith   ierr = MatGetSize(a,&m,&n);CHKERRQ(ierr);
32227c307921SBarry Smith   ierr = PetscMalloc(dof*m*sizeof(PetscInt),&ilen);CHKERRQ(ierr);
32239c4fc4c7SBarry Smith   for (i=0; i<m; i++) {
32249c4fc4c7SBarry Smith     nmax = PetscMax(nmax,aij->ilen[i]);
32250fd73130SBarry Smith     for (j=0; j<dof; j++) {
32260fd73130SBarry Smith       ilen[dof*i+j] = aij->ilen[i];
32279c4fc4c7SBarry Smith     }
32289c4fc4c7SBarry Smith   }
3229ceb03754SKris Buschelman   ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,dof*m,dof*n,0,ilen,&B);CHKERRQ(ierr);
32309c4fc4c7SBarry Smith   ierr = PetscFree(ilen);CHKERRQ(ierr);
32319c4fc4c7SBarry Smith   ierr = PetscMalloc(nmax*sizeof(PetscInt),&icols);CHKERRQ(ierr);
32329c4fc4c7SBarry Smith   ii   = 0;
32339c4fc4c7SBarry Smith   for (i=0; i<m; i++) {
3234ba8c8a56SBarry Smith     ierr = MatGetRow_SeqAIJ(a,i,&ncols,&cols,&vals);CHKERRQ(ierr);
32350fd73130SBarry Smith     for (j=0; j<dof; j++) {
32369c4fc4c7SBarry Smith       for (k=0; k<ncols; k++) {
32370fd73130SBarry Smith         icols[k] = dof*cols[k]+j;
32389c4fc4c7SBarry Smith       }
3239ceb03754SKris Buschelman       ierr = MatSetValues_SeqAIJ(B,1,&ii,ncols,icols,vals,INSERT_VALUES);CHKERRQ(ierr);
32409c4fc4c7SBarry Smith       ii++;
32419c4fc4c7SBarry Smith     }
3242ba8c8a56SBarry Smith     ierr = MatRestoreRow_SeqAIJ(a,i,&ncols,&cols,&vals);CHKERRQ(ierr);
32439c4fc4c7SBarry Smith   }
32449c4fc4c7SBarry Smith   ierr = PetscFree(icols);CHKERRQ(ierr);
3245ceb03754SKris Buschelman   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3246ceb03754SKris Buschelman   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3247ceb03754SKris Buschelman 
3248ceb03754SKris Buschelman   if (reuse == MAT_REUSE_MATRIX) {
32498ab5b326SKris Buschelman     ierr = MatHeaderReplace(A,B);CHKERRQ(ierr);
3250c3d102feSKris Buschelman   } else {
3251ceb03754SKris Buschelman     *newmat = B;
3252c3d102feSKris Buschelman   }
32539c4fc4c7SBarry Smith   PetscFunctionReturn(0);
32549c4fc4c7SBarry Smith }
3255be1d678aSKris Buschelman EXTERN_C_END
32569c4fc4c7SBarry Smith 
3257c6db04a5SJed Brown #include <../src/mat/impls/aij/mpi/mpiaij.h>
3258be1d678aSKris Buschelman 
3259be1d678aSKris Buschelman EXTERN_C_BEGIN
32600fd73130SBarry Smith #undef __FUNCT__
32610fd73130SBarry Smith #define __FUNCT__ "MatConvert_MPIMAIJ_MPIAIJ"
32627087cfbeSBarry Smith PetscErrorCode  MatConvert_MPIMAIJ_MPIAIJ(Mat A, MatType newtype,MatReuse reuse,Mat *newmat)
32630fd73130SBarry Smith {
32640fd73130SBarry Smith   Mat_MPIMAIJ       *maij = (Mat_MPIMAIJ*)A->data;
3265ceb03754SKris Buschelman   Mat               MatAIJ  = ((Mat_SeqMAIJ*)maij->AIJ->data)->AIJ,B;
32660fd73130SBarry Smith   Mat               MatOAIJ = ((Mat_SeqMAIJ*)maij->OAIJ->data)->AIJ;
32670fd73130SBarry Smith   Mat_SeqAIJ        *AIJ = (Mat_SeqAIJ*) MatAIJ->data;
32680fd73130SBarry Smith   Mat_SeqAIJ        *OAIJ =(Mat_SeqAIJ*) MatOAIJ->data;
32690fd73130SBarry Smith   Mat_MPIAIJ        *mpiaij = (Mat_MPIAIJ*) maij->A->data;
3270910ba992SMatthew Knepley   PetscInt          dof = maij->dof,i,j,*dnz = PETSC_NULL,*onz = PETSC_NULL,nmax = 0,onmax = 0;
3271910ba992SMatthew Knepley   PetscInt          *oicols = PETSC_NULL,*icols = PETSC_NULL,ncols,*cols = PETSC_NULL,oncols,*ocols = PETSC_NULL;
32720fd73130SBarry Smith   PetscInt          rstart,cstart,*garray,ii,k;
32730fd73130SBarry Smith   PetscErrorCode    ierr;
32740fd73130SBarry Smith   PetscScalar       *vals,*ovals;
32750fd73130SBarry Smith 
32760fd73130SBarry Smith   PetscFunctionBegin;
3277d0f46423SBarry Smith   ierr = PetscMalloc2(A->rmap->n,PetscInt,&dnz,A->rmap->n,PetscInt,&onz);CHKERRQ(ierr);
3278d0f46423SBarry Smith   for (i=0; i<A->rmap->n/dof; i++) {
32790fd73130SBarry Smith     nmax  = PetscMax(nmax,AIJ->ilen[i]);
32800fd73130SBarry Smith     onmax = PetscMax(onmax,OAIJ->ilen[i]);
32810fd73130SBarry Smith     for (j=0; j<dof; j++) {
32820fd73130SBarry Smith       dnz[dof*i+j] = AIJ->ilen[i];
32830fd73130SBarry Smith       onz[dof*i+j] = OAIJ->ilen[i];
32840fd73130SBarry Smith     }
32850fd73130SBarry Smith   }
328669b1f4b7SBarry Smith   ierr = MatCreateAIJ(((PetscObject)A)->comm,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N,0,dnz,0,onz,&B);CHKERRQ(ierr);
32870fd73130SBarry Smith   ierr = PetscFree2(dnz,onz);CHKERRQ(ierr);
32880fd73130SBarry Smith 
32897a1a7badSBarry Smith   ierr   = PetscMalloc2(nmax,PetscInt,&icols,onmax,PetscInt,&oicols);CHKERRQ(ierr);
3290d0f46423SBarry Smith   rstart = dof*maij->A->rmap->rstart;
3291d0f46423SBarry Smith   cstart = dof*maij->A->cmap->rstart;
32920fd73130SBarry Smith   garray = mpiaij->garray;
32930fd73130SBarry Smith 
32940fd73130SBarry Smith   ii = rstart;
3295d0f46423SBarry Smith   for (i=0; i<A->rmap->n/dof; i++) {
32960fd73130SBarry Smith     ierr = MatGetRow_SeqAIJ(MatAIJ,i,&ncols,&cols,&vals);CHKERRQ(ierr);
32970fd73130SBarry Smith     ierr = MatGetRow_SeqAIJ(MatOAIJ,i,&oncols,&ocols,&ovals);CHKERRQ(ierr);
32980fd73130SBarry Smith     for (j=0; j<dof; j++) {
32990fd73130SBarry Smith       for (k=0; k<ncols; k++) {
33000fd73130SBarry Smith         icols[k] = cstart + dof*cols[k]+j;
33010fd73130SBarry Smith       }
33020fd73130SBarry Smith       for (k=0; k<oncols; k++) {
33030fd73130SBarry Smith         oicols[k] = dof*garray[ocols[k]]+j;
33040fd73130SBarry Smith       }
3305ceb03754SKris Buschelman       ierr = MatSetValues_MPIAIJ(B,1,&ii,ncols,icols,vals,INSERT_VALUES);CHKERRQ(ierr);
3306ceb03754SKris Buschelman       ierr = MatSetValues_MPIAIJ(B,1,&ii,oncols,oicols,ovals,INSERT_VALUES);CHKERRQ(ierr);
33070fd73130SBarry Smith       ii++;
33080fd73130SBarry Smith     }
33090fd73130SBarry Smith     ierr = MatRestoreRow_SeqAIJ(MatAIJ,i,&ncols,&cols,&vals);CHKERRQ(ierr);
33100fd73130SBarry Smith     ierr = MatRestoreRow_SeqAIJ(MatOAIJ,i,&oncols,&ocols,&ovals);CHKERRQ(ierr);
33110fd73130SBarry Smith   }
33120fd73130SBarry Smith   ierr = PetscFree2(icols,oicols);CHKERRQ(ierr);
33130fd73130SBarry Smith 
3314ceb03754SKris Buschelman   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3315ceb03754SKris Buschelman   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3316ceb03754SKris Buschelman 
3317ceb03754SKris Buschelman   if (reuse == MAT_REUSE_MATRIX) {
33187adad957SLisandro Dalcin     PetscInt refct = ((PetscObject)A)->refct; /* save ((PetscObject)A)->refct */
33197adad957SLisandro Dalcin     ((PetscObject)A)->refct = 1;
33208ab5b326SKris Buschelman     ierr = MatHeaderReplace(A,B);CHKERRQ(ierr);
33217adad957SLisandro Dalcin     ((PetscObject)A)->refct = refct; /* restore ((PetscObject)A)->refct */
3322c3d102feSKris Buschelman   } else {
3323ceb03754SKris Buschelman     *newmat = B;
3324c3d102feSKris Buschelman   }
33250fd73130SBarry Smith   PetscFunctionReturn(0);
33260fd73130SBarry Smith }
3327be1d678aSKris Buschelman EXTERN_C_END
33280fd73130SBarry Smith 
33299e516c8fSBarry Smith #undef __FUNCT__
33309e516c8fSBarry Smith #define __FUNCT__ "MatGetSubMatrix_MAIJ"
33319e516c8fSBarry Smith PetscErrorCode  MatGetSubMatrix_MAIJ(Mat mat,IS isrow,IS iscol,MatReuse cll,Mat *newmat)
33329e516c8fSBarry Smith {
33339e516c8fSBarry Smith   PetscErrorCode ierr;
33349e516c8fSBarry Smith   Mat            A;
33359e516c8fSBarry Smith 
33369e516c8fSBarry Smith   PetscFunctionBegin;
33379e516c8fSBarry Smith   ierr = MatConvert(mat,MATAIJ,MAT_INITIAL_MATRIX,&A);CHKERRQ(ierr);
33389e516c8fSBarry Smith   ierr = MatGetSubMatrix(A,isrow,iscol,cll,newmat);CHKERRQ(ierr);
33399e516c8fSBarry Smith   ierr = MatDestroy(&A);CHKERRQ(ierr);
33409e516c8fSBarry Smith   PetscFunctionReturn(0);
33419e516c8fSBarry Smith }
33420fd73130SBarry Smith 
3343bcc973b7SBarry Smith /* ---------------------------------------------------------------------------------- */
3344ff585edeSJed Brown #undef __FUNCT__
3345ff585edeSJed Brown #define __FUNCT__ "MatCreateMAIJ"
3346ff585edeSJed Brown /*@C
33470bad9183SKris Buschelman   MatCreateMAIJ - Creates a matrix type providing restriction and interpolation
33480bad9183SKris Buschelman   operations for multicomponent problems.  It interpolates each component the same
33490bad9183SKris Buschelman   way independently.  The matrix type is based on MATSEQAIJ for sequential matrices,
33500bad9183SKris Buschelman   and MATMPIAIJ for distributed matrices.
33510bad9183SKris Buschelman 
3352ff585edeSJed Brown   Collective
3353ff585edeSJed Brown 
3354ff585edeSJed Brown   Input Parameters:
3355ff585edeSJed Brown + A - the AIJ matrix describing the action on blocks
3356ff585edeSJed Brown - dof - the block size (number of components per node)
3357ff585edeSJed Brown 
3358ff585edeSJed Brown   Output Parameter:
3359ff585edeSJed Brown . maij - the new MAIJ matrix
3360ff585edeSJed Brown 
33610bad9183SKris Buschelman   Operations provided:
33620fd73130SBarry Smith + MatMult
33630bad9183SKris Buschelman . MatMultTranspose
33640bad9183SKris Buschelman . MatMultAdd
33650bad9183SKris Buschelman . MatMultTransposeAdd
33660fd73130SBarry Smith - MatView
33670bad9183SKris Buschelman 
33680bad9183SKris Buschelman   Level: advanced
33690bad9183SKris Buschelman 
3370b409243cSBarry Smith .seealso: MatMAIJGetAIJ(), MatMAIJRedimension(), MATMAIJ
3371ff585edeSJed Brown @*/
33727087cfbeSBarry Smith PetscErrorCode  MatCreateMAIJ(Mat A,PetscInt dof,Mat *maij)
337382b1193eSBarry Smith {
3374dfbe8321SBarry Smith   PetscErrorCode ierr;
3375b24ad042SBarry Smith   PetscMPIInt    size;
3376b24ad042SBarry Smith   PetscInt       n;
337782b1193eSBarry Smith   Mat            B;
337882b1193eSBarry Smith 
337982b1193eSBarry Smith   PetscFunctionBegin;
3380d72c5c08SBarry Smith   ierr   = PetscObjectReference((PetscObject)A);CHKERRQ(ierr);
3381d72c5c08SBarry Smith 
3382d72c5c08SBarry Smith   if (dof == 1) {
3383d72c5c08SBarry Smith     *maij = A;
3384d72c5c08SBarry Smith   } else {
33857adad957SLisandro Dalcin     ierr = MatCreate(((PetscObject)A)->comm,&B);CHKERRQ(ierr);
3386d0f46423SBarry Smith     ierr = MatSetSizes(B,dof*A->rmap->n,dof*A->cmap->n,dof*A->rmap->N,dof*A->cmap->N);CHKERRQ(ierr);
3387bccba955SJed Brown     ierr = PetscLayoutSetBlockSize(B->rmap,dof);CHKERRQ(ierr);
3388bccba955SJed Brown     ierr = PetscLayoutSetBlockSize(B->cmap,dof);CHKERRQ(ierr);
3389bccba955SJed Brown     ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
3390bccba955SJed Brown     ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
3391cd3bbe55SBarry Smith     B->assembled    = PETSC_TRUE;
3392d72c5c08SBarry Smith 
33937adad957SLisandro Dalcin     ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr);
3394f4a53059SBarry Smith     if (size == 1) {
3395feb9fe23SJed Brown       Mat_SeqMAIJ    *b;
3396feb9fe23SJed Brown 
3397b9b97703SBarry Smith       ierr = MatSetType(B,MATSEQMAIJ);CHKERRQ(ierr);
33984cb9d9b8SBarry Smith       B->ops->destroy = MatDestroy_SeqMAIJ;
33990fd73130SBarry Smith       B->ops->view    = MatView_SeqMAIJ;
3400feb9fe23SJed Brown       b      = (Mat_SeqMAIJ*)B->data;
3401b9b97703SBarry Smith       b->dof = dof;
34024cb9d9b8SBarry Smith       b->AIJ = A;
3403d72c5c08SBarry Smith       if (dof == 2) {
3404cd3bbe55SBarry Smith         B->ops->mult             = MatMult_SeqMAIJ_2;
3405cd3bbe55SBarry Smith         B->ops->multadd          = MatMultAdd_SeqMAIJ_2;
3406cd3bbe55SBarry Smith         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_2;
3407cd3bbe55SBarry Smith         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_2;
3408bcc973b7SBarry Smith       } else if (dof == 3) {
3409bcc973b7SBarry Smith         B->ops->mult             = MatMult_SeqMAIJ_3;
3410bcc973b7SBarry Smith         B->ops->multadd          = MatMultAdd_SeqMAIJ_3;
3411bcc973b7SBarry Smith         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_3;
3412bcc973b7SBarry Smith         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_3;
3413bcc973b7SBarry Smith       } else if (dof == 4) {
3414bcc973b7SBarry Smith         B->ops->mult             = MatMult_SeqMAIJ_4;
3415bcc973b7SBarry Smith         B->ops->multadd          = MatMultAdd_SeqMAIJ_4;
3416bcc973b7SBarry Smith         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_4;
3417bcc973b7SBarry Smith         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_4;
3418f9fae5adSBarry Smith       } else if (dof == 5) {
3419f9fae5adSBarry Smith         B->ops->mult             = MatMult_SeqMAIJ_5;
3420f9fae5adSBarry Smith         B->ops->multadd          = MatMultAdd_SeqMAIJ_5;
3421f9fae5adSBarry Smith         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_5;
3422f9fae5adSBarry Smith         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_5;
34236cd98798SBarry Smith       } else if (dof == 6) {
34246cd98798SBarry Smith         B->ops->mult             = MatMult_SeqMAIJ_6;
34256cd98798SBarry Smith         B->ops->multadd          = MatMultAdd_SeqMAIJ_6;
34266cd98798SBarry Smith         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_6;
34276cd98798SBarry Smith         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_6;
3428ed8eea03SBarry Smith       } else if (dof == 7) {
3429ed8eea03SBarry Smith         B->ops->mult             = MatMult_SeqMAIJ_7;
3430ed8eea03SBarry Smith         B->ops->multadd          = MatMultAdd_SeqMAIJ_7;
3431ed8eea03SBarry Smith         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_7;
3432ed8eea03SBarry Smith         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_7;
343366ed3db0SBarry Smith       } else if (dof == 8) {
343466ed3db0SBarry Smith         B->ops->mult             = MatMult_SeqMAIJ_8;
343566ed3db0SBarry Smith         B->ops->multadd          = MatMultAdd_SeqMAIJ_8;
343666ed3db0SBarry Smith         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_8;
343766ed3db0SBarry Smith         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_8;
34382b692628SMatthew Knepley       } else if (dof == 9) {
34392b692628SMatthew Knepley         B->ops->mult             = MatMult_SeqMAIJ_9;
34402b692628SMatthew Knepley         B->ops->multadd          = MatMultAdd_SeqMAIJ_9;
34412b692628SMatthew Knepley         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_9;
34422b692628SMatthew Knepley         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_9;
3443b9cda22cSBarry Smith       } else if (dof == 10) {
3444b9cda22cSBarry Smith         B->ops->mult             = MatMult_SeqMAIJ_10;
3445b9cda22cSBarry Smith         B->ops->multadd          = MatMultAdd_SeqMAIJ_10;
3446b9cda22cSBarry Smith         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_10;
3447b9cda22cSBarry Smith         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_10;
3448dbdd7285SBarry Smith       } else if (dof == 11) {
3449dbdd7285SBarry Smith         B->ops->mult             = MatMult_SeqMAIJ_11;
3450dbdd7285SBarry Smith         B->ops->multadd          = MatMultAdd_SeqMAIJ_11;
3451dbdd7285SBarry Smith         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_11;
3452dbdd7285SBarry Smith         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_11;
34532f7816d4SBarry Smith       } else if (dof == 16) {
34542f7816d4SBarry Smith         B->ops->mult             = MatMult_SeqMAIJ_16;
34552f7816d4SBarry Smith         B->ops->multadd          = MatMultAdd_SeqMAIJ_16;
34562f7816d4SBarry Smith         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_16;
34572f7816d4SBarry Smith         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_16;
3458ed1c418cSBarry Smith       } else if (dof == 18) {
3459ed1c418cSBarry Smith         B->ops->mult             = MatMult_SeqMAIJ_18;
3460ed1c418cSBarry Smith         B->ops->multadd          = MatMultAdd_SeqMAIJ_18;
3461ed1c418cSBarry Smith         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_18;
3462ed1c418cSBarry Smith         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_18;
346382b1193eSBarry Smith       } else {
34646861f193SBarry Smith         B->ops->mult             = MatMult_SeqMAIJ_N;
34656861f193SBarry Smith         B->ops->multadd          = MatMultAdd_SeqMAIJ_N;
34666861f193SBarry Smith         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_N;
34676861f193SBarry Smith         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_N;
346882b1193eSBarry Smith       }
34697ba1a0bfSKris Buschelman       B->ops->ptapsymbolic_seqaij = MatPtAPSymbolic_SeqAIJ_SeqMAIJ;
34707ba1a0bfSKris Buschelman       B->ops->ptapnumeric_seqaij  = MatPtAPNumeric_SeqAIJ_SeqMAIJ;
34719c4fc4c7SBarry Smith       ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqmaij_seqaij_C","MatConvert_SeqMAIJ_SeqAIJ",MatConvert_SeqMAIJ_SeqAIJ);CHKERRQ(ierr);
3472f4a53059SBarry Smith     } else {
3473f4a53059SBarry Smith       Mat_MPIAIJ  *mpiaij = (Mat_MPIAIJ *)A->data;
3474feb9fe23SJed Brown       Mat_MPIMAIJ *b;
3475f4a53059SBarry Smith       IS          from,to;
3476f4a53059SBarry Smith       Vec         gvec;
3477f4a53059SBarry Smith 
3478b9b97703SBarry Smith       ierr = MatSetType(B,MATMPIMAIJ);CHKERRQ(ierr);
3479d72c5c08SBarry Smith       B->ops->destroy = MatDestroy_MPIMAIJ;
34800fd73130SBarry Smith       B->ops->view    = MatView_MPIMAIJ;
3481b9b97703SBarry Smith       b      = (Mat_MPIMAIJ*)B->data;
3482b9b97703SBarry Smith       b->dof = dof;
3483b9b97703SBarry Smith       b->A   = A;
34844cb9d9b8SBarry Smith       ierr = MatCreateMAIJ(mpiaij->A,dof,&b->AIJ);CHKERRQ(ierr);
34854cb9d9b8SBarry Smith       ierr = MatCreateMAIJ(mpiaij->B,dof,&b->OAIJ);CHKERRQ(ierr);
34864cb9d9b8SBarry Smith 
3487f4a53059SBarry Smith       ierr = VecGetSize(mpiaij->lvec,&n);CHKERRQ(ierr);
3488f4a53059SBarry Smith       ierr = VecCreateSeq(PETSC_COMM_SELF,n*dof,&b->w);CHKERRQ(ierr);
348913288a74SBarry Smith       ierr = VecSetBlockSize(b->w,dof);CHKERRQ(ierr);
3490f4a53059SBarry Smith 
3491f4a53059SBarry Smith       /* create two temporary Index sets for build scatter gather */
3492deff0451SBarry Smith       ierr = ISCreateBlock(((PetscObject)A)->comm,dof,n,mpiaij->garray,PETSC_COPY_VALUES,&from);CHKERRQ(ierr);
3493f4a53059SBarry Smith       ierr = ISCreateStride(PETSC_COMM_SELF,n*dof,0,1,&to);CHKERRQ(ierr);
3494f4a53059SBarry Smith 
3495f4a53059SBarry Smith       /* create temporary global vector to generate scatter context */
3496d0f46423SBarry Smith       ierr = VecCreateMPIWithArray(((PetscObject)A)->comm,dof*A->cmap->n,dof*A->cmap->N,PETSC_NULL,&gvec);CHKERRQ(ierr);
349713288a74SBarry Smith       ierr = VecSetBlockSize(gvec,dof);CHKERRQ(ierr);
3498f4a53059SBarry Smith 
3499f4a53059SBarry Smith       /* generate the scatter context */
3500f4a53059SBarry Smith       ierr = VecScatterCreate(gvec,from,b->w,to,&b->ctx);CHKERRQ(ierr);
3501f4a53059SBarry Smith 
35026bf464f9SBarry Smith       ierr = ISDestroy(&from);CHKERRQ(ierr);
35036bf464f9SBarry Smith       ierr = ISDestroy(&to);CHKERRQ(ierr);
35046bf464f9SBarry Smith       ierr = VecDestroy(&gvec);CHKERRQ(ierr);
3505f4a53059SBarry Smith 
3506f4a53059SBarry Smith       B->ops->mult                = MatMult_MPIMAIJ_dof;
35074cb9d9b8SBarry Smith       B->ops->multtranspose       = MatMultTranspose_MPIMAIJ_dof;
35084cb9d9b8SBarry Smith       B->ops->multadd             = MatMultAdd_MPIMAIJ_dof;
35094cb9d9b8SBarry Smith       B->ops->multtransposeadd    = MatMultTransposeAdd_MPIMAIJ_dof;
351095ddefa2SHong Zhang       B->ops->ptapsymbolic_mpiaij = MatPtAPSymbolic_MPIAIJ_MPIMAIJ;
351195ddefa2SHong Zhang       B->ops->ptapnumeric_mpiaij  = MatPtAPNumeric_MPIAIJ_MPIMAIJ;
35120fd73130SBarry Smith       ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpimaij_mpiaij_C","MatConvert_MPIMAIJ_MPIAIJ",MatConvert_MPIMAIJ_MPIAIJ);CHKERRQ(ierr);
3513f4a53059SBarry Smith     }
35149e516c8fSBarry Smith     B->ops->getsubmatrix        = MatGetSubMatrix_MAIJ;
35154994cf47SJed Brown     ierr = MatSetUp(B);CHKERRQ(ierr);
3516cd3bbe55SBarry Smith     *maij = B;
35170fd73130SBarry Smith     ierr = MatView_Private(B);CHKERRQ(ierr);
3518d72c5c08SBarry Smith   }
351982b1193eSBarry Smith   PetscFunctionReturn(0);
352082b1193eSBarry Smith }
3521