xref: /petsc/src/mat/impls/maij/maij.c (revision 95452b02e12c0ee11232c7ff2b24b568a8e07e43)
1be1d678aSKris Buschelman 
282b1193eSBarry Smith /*
3cd3bbe55SBarry Smith     Defines the basic matrix operations for the MAIJ  matrix storage format.
4cd3bbe55SBarry Smith   This format is used for restriction and interpolation operations for
5cd3bbe55SBarry Smith   multicomponent problems. It interpolates each component the same way
6cd3bbe55SBarry Smith   independently.
7cd3bbe55SBarry Smith 
8cd3bbe55SBarry Smith      We provide:
9cd3bbe55SBarry Smith          MatMult()
10cd3bbe55SBarry Smith          MatMultTranspose()
11cd3bbe55SBarry Smith          MatMultTransposeAdd()
12cd3bbe55SBarry Smith          MatMultAdd()
13cd3bbe55SBarry Smith           and
14cd3bbe55SBarry Smith          MatCreateMAIJ(Mat,dof,Mat*)
15f4a53059SBarry Smith 
16f4a53059SBarry Smith      This single directory handles both the sequential and parallel codes
1782b1193eSBarry Smith */
1882b1193eSBarry Smith 
19c6db04a5SJed Brown #include <../src/mat/impls/maij/maij.h> /*I "petscmat.h" I*/
20c6db04a5SJed Brown #include <../src/mat/utils/freespace.h>
2182b1193eSBarry Smith 
22b350b9acSSatish Balay /*@
23ff585edeSJed Brown    MatMAIJGetAIJ - Get the AIJ matrix describing the blockwise action of the MAIJ matrix
24ff585edeSJed Brown 
25ff585edeSJed Brown    Not Collective, but if the MAIJ matrix is parallel, the AIJ matrix is also parallel
26ff585edeSJed Brown 
27ff585edeSJed Brown    Input Parameter:
28ff585edeSJed Brown .  A - the MAIJ matrix
29ff585edeSJed Brown 
30ff585edeSJed Brown    Output Parameter:
31ff585edeSJed Brown .  B - the AIJ matrix
32ff585edeSJed Brown 
33ff585edeSJed Brown    Level: advanced
34ff585edeSJed Brown 
35*95452b02SPatrick Sanan    Notes:
36*95452b02SPatrick Sanan     The reference count on the AIJ matrix is not increased so you should not destroy it.
37ff585edeSJed Brown 
38ff585edeSJed Brown .seealso: MatCreateMAIJ()
39ff585edeSJed Brown @*/
407087cfbeSBarry Smith PetscErrorCode  MatMAIJGetAIJ(Mat A,Mat *B)
41b9b97703SBarry Smith {
42dfbe8321SBarry Smith   PetscErrorCode ierr;
43ace3abfcSBarry Smith   PetscBool      ismpimaij,isseqmaij;
44b9b97703SBarry Smith 
45b9b97703SBarry Smith   PetscFunctionBegin;
46251f4c67SDmitry Karpeev   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIMAIJ,&ismpimaij);CHKERRQ(ierr);
47251f4c67SDmitry Karpeev   ierr = PetscObjectTypeCompare((PetscObject)A,MATSEQMAIJ,&isseqmaij);CHKERRQ(ierr);
48b9b97703SBarry Smith   if (ismpimaij) {
49b9b97703SBarry Smith     Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data;
50b9b97703SBarry Smith 
51b9b97703SBarry Smith     *B = b->A;
52b9b97703SBarry Smith   } else if (isseqmaij) {
53b9b97703SBarry Smith     Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
54b9b97703SBarry Smith 
55b9b97703SBarry Smith     *B = b->AIJ;
56b9b97703SBarry Smith   } else {
57b9b97703SBarry Smith     *B = A;
58b9b97703SBarry Smith   }
59b9b97703SBarry Smith   PetscFunctionReturn(0);
60b9b97703SBarry Smith }
61b9b97703SBarry Smith 
62480dffcfSJed Brown /*@
63ff585edeSJed Brown    MatMAIJRedimension - Get an MAIJ matrix with the same action, but for a different block size
64ff585edeSJed Brown 
653f9fe445SBarry Smith    Logically Collective
66ff585edeSJed Brown 
67ff585edeSJed Brown    Input Parameter:
68ff585edeSJed Brown +  A - the MAIJ matrix
69ff585edeSJed Brown -  dof - the block size for the new matrix
70ff585edeSJed Brown 
71ff585edeSJed Brown    Output Parameter:
72ff585edeSJed Brown .  B - the new MAIJ matrix
73ff585edeSJed Brown 
74ff585edeSJed Brown    Level: advanced
75ff585edeSJed Brown 
76ff585edeSJed Brown .seealso: MatCreateMAIJ()
77ff585edeSJed Brown @*/
787087cfbeSBarry Smith PetscErrorCode  MatMAIJRedimension(Mat A,PetscInt dof,Mat *B)
79b9b97703SBarry Smith {
80dfbe8321SBarry Smith   PetscErrorCode ierr;
810298fd71SBarry Smith   Mat            Aij = NULL;
82b9b97703SBarry Smith 
83b9b97703SBarry Smith   PetscFunctionBegin;
84c5eb9154SBarry Smith   PetscValidLogicalCollectiveInt(A,dof,2);
85b9b97703SBarry Smith   ierr = MatMAIJGetAIJ(A,&Aij);CHKERRQ(ierr);
86b9b97703SBarry Smith   ierr = MatCreateMAIJ(Aij,dof,B);CHKERRQ(ierr);
87b9b97703SBarry Smith   PetscFunctionReturn(0);
88b9b97703SBarry Smith }
89b9b97703SBarry Smith 
90dfbe8321SBarry Smith PetscErrorCode MatDestroy_SeqMAIJ(Mat A)
9182b1193eSBarry Smith {
92dfbe8321SBarry Smith   PetscErrorCode ierr;
934cb9d9b8SBarry Smith   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
9482b1193eSBarry Smith 
9582b1193eSBarry Smith   PetscFunctionBegin;
966bf464f9SBarry Smith   ierr = MatDestroy(&b->AIJ);CHKERRQ(ierr);
97bf0cc555SLisandro Dalcin   ierr = PetscFree(A->data);CHKERRQ(ierr);
98bdf89e91SBarry Smith   ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqmaij_seqaij_C",NULL);CHKERRQ(ierr);
99bdf89e91SBarry Smith   ierr = PetscObjectComposeFunction((PetscObject)A,"MatPtAP_seqaij_seqmaij_C",NULL);CHKERRQ(ierr);
10059e29146SRichard Tran Mills   ierr = PetscObjectComposeFunction((PetscObject)A,"MatPtAP_seqaijperm_seqmaij_C",NULL);CHKERRQ(ierr);
101279e4bd4SRichard Tran Mills   ierr = PetscObjectComposeFunction((PetscObject)A,"MatPtAP_seqaijmkl_seqmaij_C",NULL);CHKERRQ(ierr);
1024cb9d9b8SBarry Smith   PetscFunctionReturn(0);
1034cb9d9b8SBarry Smith }
1044cb9d9b8SBarry Smith 
105356c569eSBarry Smith PetscErrorCode MatSetUp_MAIJ(Mat A)
106356c569eSBarry Smith {
107356c569eSBarry Smith   PetscFunctionBegin;
108ce94432eSBarry Smith   SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Must use MatCreateMAIJ() to create MAIJ matrices");
109356c569eSBarry Smith   PetscFunctionReturn(0);
110356c569eSBarry Smith }
111356c569eSBarry Smith 
1120fd73130SBarry Smith PetscErrorCode MatView_SeqMAIJ(Mat A,PetscViewer viewer)
1130fd73130SBarry Smith {
1140fd73130SBarry Smith   PetscErrorCode ierr;
1150fd73130SBarry Smith   Mat            B;
1160fd73130SBarry Smith 
1170fd73130SBarry Smith   PetscFunctionBegin;
118a2ea699eSBarry Smith   ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr);
1190fd73130SBarry Smith   ierr = MatView(B,viewer);CHKERRQ(ierr);
1206bf464f9SBarry Smith   ierr = MatDestroy(&B);CHKERRQ(ierr);
1210fd73130SBarry Smith   PetscFunctionReturn(0);
1220fd73130SBarry Smith }
1230fd73130SBarry Smith 
1240fd73130SBarry Smith PetscErrorCode MatView_MPIMAIJ(Mat A,PetscViewer viewer)
1250fd73130SBarry Smith {
1260fd73130SBarry Smith   PetscErrorCode ierr;
1270fd73130SBarry Smith   Mat            B;
1280fd73130SBarry Smith 
1290fd73130SBarry Smith   PetscFunctionBegin;
130a2ea699eSBarry Smith   ierr = MatConvert(A,MATMPIAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr);
1310fd73130SBarry Smith   ierr = MatView(B,viewer);CHKERRQ(ierr);
1326bf464f9SBarry Smith   ierr = MatDestroy(&B);CHKERRQ(ierr);
1330fd73130SBarry Smith   PetscFunctionReturn(0);
1340fd73130SBarry Smith }
1350fd73130SBarry Smith 
136dfbe8321SBarry Smith PetscErrorCode MatDestroy_MPIMAIJ(Mat A)
1374cb9d9b8SBarry Smith {
138dfbe8321SBarry Smith   PetscErrorCode ierr;
1394cb9d9b8SBarry Smith   Mat_MPIMAIJ    *b = (Mat_MPIMAIJ*)A->data;
1404cb9d9b8SBarry Smith 
1414cb9d9b8SBarry Smith   PetscFunctionBegin;
1426bf464f9SBarry Smith   ierr = MatDestroy(&b->AIJ);CHKERRQ(ierr);
1436bf464f9SBarry Smith   ierr = MatDestroy(&b->OAIJ);CHKERRQ(ierr);
1446bf464f9SBarry Smith   ierr = MatDestroy(&b->A);CHKERRQ(ierr);
1456bf464f9SBarry Smith   ierr = VecScatterDestroy(&b->ctx);CHKERRQ(ierr);
1466bf464f9SBarry Smith   ierr = VecDestroy(&b->w);CHKERRQ(ierr);
147bf0cc555SLisandro Dalcin   ierr = PetscFree(A->data);CHKERRQ(ierr);
148bdf89e91SBarry Smith   ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_mpimaij_mpiaij_C",NULL);CHKERRQ(ierr);
149bdf89e91SBarry Smith   ierr = PetscObjectComposeFunction((PetscObject)A,"MatPtAP_mpiaij_mpimaij_C",NULL);CHKERRQ(ierr);
150dbd8c25aSHong Zhang   ierr = PetscObjectChangeTypeName((PetscObject)A,0);CHKERRQ(ierr);
151b9b97703SBarry Smith   PetscFunctionReturn(0);
152b9b97703SBarry Smith }
153b9b97703SBarry Smith 
1540bad9183SKris Buschelman /*MC
155fafad747SKris Buschelman   MATMAIJ - MATMAIJ = "maij" - A matrix type to be used for restriction and interpolation operations for
1560bad9183SKris Buschelman   multicomponent problems, interpolating or restricting each component the same way independently.
1570bad9183SKris Buschelman   The matrix type is based on MATSEQAIJ for sequential matrices, and MATMPIAIJ for distributed matrices.
1580bad9183SKris Buschelman 
1590bad9183SKris Buschelman   Operations provided:
1600bad9183SKris Buschelman . MatMult
1610bad9183SKris Buschelman . MatMultTranspose
1620bad9183SKris Buschelman . MatMultAdd
1630bad9183SKris Buschelman . MatMultTransposeAdd
1640bad9183SKris Buschelman 
1650bad9183SKris Buschelman   Level: advanced
1660bad9183SKris Buschelman 
167b409243cSBarry Smith .seealso: MatMAIJGetAIJ(), MatMAIJRedimension(), MatCreateMAIJ()
1680bad9183SKris Buschelman M*/
1690bad9183SKris Buschelman 
1708cc058d9SJed Brown PETSC_EXTERN PetscErrorCode MatCreate_MAIJ(Mat A)
17182b1193eSBarry Smith {
172dfbe8321SBarry Smith   PetscErrorCode ierr;
1734cb9d9b8SBarry Smith   Mat_MPIMAIJ    *b;
17464b52464SHong Zhang   PetscMPIInt    size;
17582b1193eSBarry Smith 
17682b1193eSBarry Smith   PetscFunctionBegin;
177b00a9115SJed Brown   ierr     = PetscNewLog(A,&b);CHKERRQ(ierr);
178b0a32e0cSBarry Smith   A->data  = (void*)b;
17926fbe8dcSKarl Rupp 
180cd3bbe55SBarry Smith   ierr = PetscMemzero(A->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
18126fbe8dcSKarl Rupp 
182356c569eSBarry Smith   A->ops->setup = MatSetUp_MAIJ;
183f4a53059SBarry Smith 
184cd3bbe55SBarry Smith   b->AIJ  = 0;
185cd3bbe55SBarry Smith   b->dof  = 0;
186f4a53059SBarry Smith   b->OAIJ = 0;
187f4a53059SBarry Smith   b->ctx  = 0;
188f4a53059SBarry Smith   b->w    = 0;
189ce94432eSBarry Smith   ierr    = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRQ(ierr);
19064b52464SHong Zhang   if (size == 1) {
19164b52464SHong Zhang     ierr = PetscObjectChangeTypeName((PetscObject)A,MATSEQMAIJ);CHKERRQ(ierr);
19264b52464SHong Zhang   } else {
19364b52464SHong Zhang     ierr = PetscObjectChangeTypeName((PetscObject)A,MATMPIMAIJ);CHKERRQ(ierr);
19464b52464SHong Zhang   }
19532e7c8b0SBarry Smith   A->preallocated  = PETSC_TRUE;
19632e7c8b0SBarry Smith   A->assembled     = PETSC_TRUE;
19782b1193eSBarry Smith   PetscFunctionReturn(0);
19882b1193eSBarry Smith }
19982b1193eSBarry Smith 
200cd3bbe55SBarry Smith /* --------------------------------------------------------------------------------------*/
201dfbe8321SBarry Smith PetscErrorCode MatMult_SeqMAIJ_2(Mat A,Vec xx,Vec yy)
20282b1193eSBarry Smith {
2034cb9d9b8SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
204bcc973b7SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
205d2840e09SBarry Smith   const PetscScalar *x,*v;
206d2840e09SBarry Smith   PetscScalar       *y, sum1, sum2;
207dfbe8321SBarry Smith   PetscErrorCode    ierr;
208d2840e09SBarry Smith   PetscInt          nonzerorow=0,n,i,jrow,j;
209d2840e09SBarry Smith   const PetscInt    m         = b->AIJ->rmap->n,*idx,*ii;
21082b1193eSBarry Smith 
211bcc973b7SBarry Smith   PetscFunctionBegin;
2123649974fSBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
2131ebc52fbSHong Zhang   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
214bcc973b7SBarry Smith   idx  = a->j;
215bcc973b7SBarry Smith   v    = a->a;
216bcc973b7SBarry Smith   ii   = a->i;
217bcc973b7SBarry Smith 
218bcc973b7SBarry Smith   for (i=0; i<m; i++) {
219bcc973b7SBarry Smith     jrow  = ii[i];
220bcc973b7SBarry Smith     n     = ii[i+1] - jrow;
221bcc973b7SBarry Smith     sum1  = 0.0;
222bcc973b7SBarry Smith     sum2  = 0.0;
22326fbe8dcSKarl Rupp 
224b3c51c66SMatthew Knepley     nonzerorow += (n>0);
225bcc973b7SBarry Smith     for (j=0; j<n; j++) {
226bcc973b7SBarry Smith       sum1 += v[jrow]*x[2*idx[jrow]];
227bcc973b7SBarry Smith       sum2 += v[jrow]*x[2*idx[jrow]+1];
228bcc973b7SBarry Smith       jrow++;
229bcc973b7SBarry Smith     }
230bcc973b7SBarry Smith     y[2*i]   = sum1;
231bcc973b7SBarry Smith     y[2*i+1] = sum2;
232bcc973b7SBarry Smith   }
233bcc973b7SBarry Smith 
234dc0b31edSSatish Balay   ierr = PetscLogFlops(4.0*a->nz - 2.0*nonzerorow);CHKERRQ(ierr);
2353649974fSBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
2361ebc52fbSHong Zhang   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
23782b1193eSBarry Smith   PetscFunctionReturn(0);
23882b1193eSBarry Smith }
239bcc973b7SBarry Smith 
240dfbe8321SBarry Smith PetscErrorCode MatMultTranspose_SeqMAIJ_2(Mat A,Vec xx,Vec yy)
24182b1193eSBarry Smith {
2424cb9d9b8SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
243bcc973b7SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
244d2840e09SBarry Smith   const PetscScalar *x,*v;
245d2840e09SBarry Smith   PetscScalar       *y,alpha1,alpha2;
246dfbe8321SBarry Smith   PetscErrorCode    ierr;
247d2840e09SBarry Smith   PetscInt          n,i;
248d2840e09SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx;
24982b1193eSBarry Smith 
250bcc973b7SBarry Smith   PetscFunctionBegin;
251d2840e09SBarry Smith   ierr = VecSet(yy,0.0);CHKERRQ(ierr);
2523649974fSBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
2531ebc52fbSHong Zhang   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
254bfec09a0SHong Zhang 
255bcc973b7SBarry Smith   for (i=0; i<m; i++) {
256bfec09a0SHong Zhang     idx    = a->j + a->i[i];
257bfec09a0SHong Zhang     v      = a->a + a->i[i];
258bcc973b7SBarry Smith     n      = a->i[i+1] - a->i[i];
259bcc973b7SBarry Smith     alpha1 = x[2*i];
260bcc973b7SBarry Smith     alpha2 = x[2*i+1];
26126fbe8dcSKarl Rupp     while (n-->0) {
26226fbe8dcSKarl Rupp       y[2*(*idx)]   += alpha1*(*v);
26326fbe8dcSKarl Rupp       y[2*(*idx)+1] += alpha2*(*v);
26426fbe8dcSKarl Rupp       idx++; v++;
26526fbe8dcSKarl Rupp     }
266bcc973b7SBarry Smith   }
267dc0b31edSSatish Balay   ierr = PetscLogFlops(4.0*a->nz);CHKERRQ(ierr);
2683649974fSBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
2691ebc52fbSHong Zhang   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
27082b1193eSBarry Smith   PetscFunctionReturn(0);
27182b1193eSBarry Smith }
272bcc973b7SBarry Smith 
273dfbe8321SBarry Smith PetscErrorCode MatMultAdd_SeqMAIJ_2(Mat A,Vec xx,Vec yy,Vec zz)
27482b1193eSBarry Smith {
2754cb9d9b8SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
276bcc973b7SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
277d2840e09SBarry Smith   const PetscScalar *x,*v;
278d2840e09SBarry Smith   PetscScalar       *y,sum1, sum2;
279dfbe8321SBarry Smith   PetscErrorCode    ierr;
280b24ad042SBarry Smith   PetscInt          n,i,jrow,j;
281d2840e09SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
28282b1193eSBarry Smith 
283bcc973b7SBarry Smith   PetscFunctionBegin;
284f9fae5adSBarry Smith   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
2853649974fSBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
2861ebc52fbSHong Zhang   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
287bcc973b7SBarry Smith   idx  = a->j;
288bcc973b7SBarry Smith   v    = a->a;
289bcc973b7SBarry Smith   ii   = a->i;
290bcc973b7SBarry Smith 
291bcc973b7SBarry Smith   for (i=0; i<m; i++) {
292bcc973b7SBarry Smith     jrow = ii[i];
293bcc973b7SBarry Smith     n    = ii[i+1] - jrow;
294bcc973b7SBarry Smith     sum1 = 0.0;
295bcc973b7SBarry Smith     sum2 = 0.0;
296bcc973b7SBarry Smith     for (j=0; j<n; j++) {
297bcc973b7SBarry Smith       sum1 += v[jrow]*x[2*idx[jrow]];
298bcc973b7SBarry Smith       sum2 += v[jrow]*x[2*idx[jrow]+1];
299bcc973b7SBarry Smith       jrow++;
300bcc973b7SBarry Smith     }
301bcc973b7SBarry Smith     y[2*i]   += sum1;
302bcc973b7SBarry Smith     y[2*i+1] += sum2;
303bcc973b7SBarry Smith   }
304bcc973b7SBarry Smith 
305dc0b31edSSatish Balay   ierr = PetscLogFlops(4.0*a->nz);CHKERRQ(ierr);
3063649974fSBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
3071ebc52fbSHong Zhang   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
308bcc973b7SBarry Smith   PetscFunctionReturn(0);
30982b1193eSBarry Smith }
310dfbe8321SBarry Smith PetscErrorCode MatMultTransposeAdd_SeqMAIJ_2(Mat A,Vec xx,Vec yy,Vec zz)
31182b1193eSBarry Smith {
3124cb9d9b8SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
313bcc973b7SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
314d2840e09SBarry Smith   const PetscScalar *x,*v;
315d2840e09SBarry Smith   PetscScalar       *y,alpha1,alpha2;
316dfbe8321SBarry Smith   PetscErrorCode    ierr;
317d2840e09SBarry Smith   PetscInt          n,i;
318d2840e09SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx;
31982b1193eSBarry Smith 
320bcc973b7SBarry Smith   PetscFunctionBegin;
321f9fae5adSBarry Smith   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
3223649974fSBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
3231ebc52fbSHong Zhang   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
324bfec09a0SHong Zhang 
325bcc973b7SBarry Smith   for (i=0; i<m; i++) {
326bfec09a0SHong Zhang     idx    = a->j + a->i[i];
327bfec09a0SHong Zhang     v      = a->a + a->i[i];
328bcc973b7SBarry Smith     n      = a->i[i+1] - a->i[i];
329bcc973b7SBarry Smith     alpha1 = x[2*i];
330bcc973b7SBarry Smith     alpha2 = x[2*i+1];
33126fbe8dcSKarl Rupp     while (n-->0) {
33226fbe8dcSKarl Rupp       y[2*(*idx)]   += alpha1*(*v);
33326fbe8dcSKarl Rupp       y[2*(*idx)+1] += alpha2*(*v);
33426fbe8dcSKarl Rupp       idx++; v++;
33526fbe8dcSKarl Rupp     }
336bcc973b7SBarry Smith   }
337dc0b31edSSatish Balay   ierr = PetscLogFlops(4.0*a->nz);CHKERRQ(ierr);
3383649974fSBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
3391ebc52fbSHong Zhang   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
340bcc973b7SBarry Smith   PetscFunctionReturn(0);
34182b1193eSBarry Smith }
342cd3bbe55SBarry Smith /* --------------------------------------------------------------------------------------*/
343dfbe8321SBarry Smith PetscErrorCode MatMult_SeqMAIJ_3(Mat A,Vec xx,Vec yy)
344bcc973b7SBarry Smith {
3454cb9d9b8SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
346bcc973b7SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
347d2840e09SBarry Smith   const PetscScalar *x,*v;
348d2840e09SBarry Smith   PetscScalar       *y,sum1, sum2, sum3;
349dfbe8321SBarry Smith   PetscErrorCode    ierr;
350d2840e09SBarry Smith   PetscInt          nonzerorow=0,n,i,jrow,j;
351d2840e09SBarry Smith   const PetscInt    m         = b->AIJ->rmap->n,*idx,*ii;
35282b1193eSBarry Smith 
353bcc973b7SBarry Smith   PetscFunctionBegin;
3543649974fSBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
3551ebc52fbSHong Zhang   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
356bcc973b7SBarry Smith   idx  = a->j;
357bcc973b7SBarry Smith   v    = a->a;
358bcc973b7SBarry Smith   ii   = a->i;
359bcc973b7SBarry Smith 
360bcc973b7SBarry Smith   for (i=0; i<m; i++) {
361bcc973b7SBarry Smith     jrow  = ii[i];
362bcc973b7SBarry Smith     n     = ii[i+1] - jrow;
363bcc973b7SBarry Smith     sum1  = 0.0;
364bcc973b7SBarry Smith     sum2  = 0.0;
365bcc973b7SBarry Smith     sum3  = 0.0;
36626fbe8dcSKarl Rupp 
367b3c51c66SMatthew Knepley     nonzerorow += (n>0);
368bcc973b7SBarry Smith     for (j=0; j<n; j++) {
369bcc973b7SBarry Smith       sum1 += v[jrow]*x[3*idx[jrow]];
370bcc973b7SBarry Smith       sum2 += v[jrow]*x[3*idx[jrow]+1];
371bcc973b7SBarry Smith       sum3 += v[jrow]*x[3*idx[jrow]+2];
372bcc973b7SBarry Smith       jrow++;
373bcc973b7SBarry Smith      }
374bcc973b7SBarry Smith     y[3*i]   = sum1;
375bcc973b7SBarry Smith     y[3*i+1] = sum2;
376bcc973b7SBarry Smith     y[3*i+2] = sum3;
377bcc973b7SBarry Smith   }
378bcc973b7SBarry Smith 
379dc0b31edSSatish Balay   ierr = PetscLogFlops(6.0*a->nz - 3.0*nonzerorow);CHKERRQ(ierr);
3803649974fSBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
3811ebc52fbSHong Zhang   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
382bcc973b7SBarry Smith   PetscFunctionReturn(0);
383bcc973b7SBarry Smith }
384bcc973b7SBarry Smith 
385dfbe8321SBarry Smith PetscErrorCode MatMultTranspose_SeqMAIJ_3(Mat A,Vec xx,Vec yy)
386bcc973b7SBarry Smith {
3874cb9d9b8SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
388bcc973b7SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
389d2840e09SBarry Smith   const PetscScalar *x,*v;
390d2840e09SBarry Smith   PetscScalar       *y,alpha1,alpha2,alpha3;
391dfbe8321SBarry Smith   PetscErrorCode    ierr;
392d2840e09SBarry Smith   PetscInt          n,i;
393d2840e09SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx;
394bcc973b7SBarry Smith 
395bcc973b7SBarry Smith   PetscFunctionBegin;
396d2840e09SBarry Smith   ierr = VecSet(yy,0.0);CHKERRQ(ierr);
3973649974fSBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
3981ebc52fbSHong Zhang   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
399bfec09a0SHong Zhang 
400bcc973b7SBarry Smith   for (i=0; i<m; i++) {
401bfec09a0SHong Zhang     idx    = a->j + a->i[i];
402bfec09a0SHong Zhang     v      = a->a + a->i[i];
403bcc973b7SBarry Smith     n      = a->i[i+1] - a->i[i];
404bcc973b7SBarry Smith     alpha1 = x[3*i];
405bcc973b7SBarry Smith     alpha2 = x[3*i+1];
406bcc973b7SBarry Smith     alpha3 = x[3*i+2];
407bcc973b7SBarry Smith     while (n-->0) {
408bcc973b7SBarry Smith       y[3*(*idx)]   += alpha1*(*v);
409bcc973b7SBarry Smith       y[3*(*idx)+1] += alpha2*(*v);
410bcc973b7SBarry Smith       y[3*(*idx)+2] += alpha3*(*v);
411bcc973b7SBarry Smith       idx++; v++;
412bcc973b7SBarry Smith     }
413bcc973b7SBarry Smith   }
414dc0b31edSSatish Balay   ierr = PetscLogFlops(6.0*a->nz);CHKERRQ(ierr);
4153649974fSBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
4161ebc52fbSHong Zhang   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
417bcc973b7SBarry Smith   PetscFunctionReturn(0);
418bcc973b7SBarry Smith }
419bcc973b7SBarry Smith 
420dfbe8321SBarry Smith PetscErrorCode MatMultAdd_SeqMAIJ_3(Mat A,Vec xx,Vec yy,Vec zz)
421bcc973b7SBarry Smith {
4224cb9d9b8SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
423bcc973b7SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
424d2840e09SBarry Smith   const PetscScalar *x,*v;
425d2840e09SBarry Smith   PetscScalar       *y,sum1, sum2, sum3;
426dfbe8321SBarry Smith   PetscErrorCode    ierr;
427b24ad042SBarry Smith   PetscInt          n,i,jrow,j;
428d2840e09SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
429bcc973b7SBarry Smith 
430bcc973b7SBarry Smith   PetscFunctionBegin;
431f9fae5adSBarry Smith   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
4323649974fSBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
4331ebc52fbSHong Zhang   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
434bcc973b7SBarry Smith   idx  = a->j;
435bcc973b7SBarry Smith   v    = a->a;
436bcc973b7SBarry Smith   ii   = a->i;
437bcc973b7SBarry Smith 
438bcc973b7SBarry Smith   for (i=0; i<m; i++) {
439bcc973b7SBarry Smith     jrow = ii[i];
440bcc973b7SBarry Smith     n    = ii[i+1] - jrow;
441bcc973b7SBarry Smith     sum1 = 0.0;
442bcc973b7SBarry Smith     sum2 = 0.0;
443bcc973b7SBarry Smith     sum3 = 0.0;
444bcc973b7SBarry Smith     for (j=0; j<n; j++) {
445bcc973b7SBarry Smith       sum1 += v[jrow]*x[3*idx[jrow]];
446bcc973b7SBarry Smith       sum2 += v[jrow]*x[3*idx[jrow]+1];
447bcc973b7SBarry Smith       sum3 += v[jrow]*x[3*idx[jrow]+2];
448bcc973b7SBarry Smith       jrow++;
449bcc973b7SBarry Smith     }
450bcc973b7SBarry Smith     y[3*i]   += sum1;
451bcc973b7SBarry Smith     y[3*i+1] += sum2;
452bcc973b7SBarry Smith     y[3*i+2] += sum3;
453bcc973b7SBarry Smith   }
454bcc973b7SBarry Smith 
455dc0b31edSSatish Balay   ierr = PetscLogFlops(6.0*a->nz);CHKERRQ(ierr);
4563649974fSBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
4571ebc52fbSHong Zhang   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
458bcc973b7SBarry Smith   PetscFunctionReturn(0);
459bcc973b7SBarry Smith }
460dfbe8321SBarry Smith PetscErrorCode MatMultTransposeAdd_SeqMAIJ_3(Mat A,Vec xx,Vec yy,Vec zz)
461bcc973b7SBarry Smith {
4624cb9d9b8SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
463bcc973b7SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
464d2840e09SBarry Smith   const PetscScalar *x,*v;
465d2840e09SBarry Smith   PetscScalar       *y,alpha1,alpha2,alpha3;
466dfbe8321SBarry Smith   PetscErrorCode    ierr;
467d2840e09SBarry Smith   PetscInt          n,i;
468d2840e09SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx;
469bcc973b7SBarry Smith 
470bcc973b7SBarry Smith   PetscFunctionBegin;
471f9fae5adSBarry Smith   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
4723649974fSBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
4731ebc52fbSHong Zhang   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
474bcc973b7SBarry Smith   for (i=0; i<m; i++) {
475bfec09a0SHong Zhang     idx    = a->j + a->i[i];
476bfec09a0SHong Zhang     v      = a->a + a->i[i];
477bcc973b7SBarry Smith     n      = a->i[i+1] - a->i[i];
478bcc973b7SBarry Smith     alpha1 = x[3*i];
479bcc973b7SBarry Smith     alpha2 = x[3*i+1];
480bcc973b7SBarry Smith     alpha3 = x[3*i+2];
481bcc973b7SBarry Smith     while (n-->0) {
482bcc973b7SBarry Smith       y[3*(*idx)]   += alpha1*(*v);
483bcc973b7SBarry Smith       y[3*(*idx)+1] += alpha2*(*v);
484bcc973b7SBarry Smith       y[3*(*idx)+2] += alpha3*(*v);
485bcc973b7SBarry Smith       idx++; v++;
486bcc973b7SBarry Smith     }
487bcc973b7SBarry Smith   }
488dc0b31edSSatish Balay   ierr = PetscLogFlops(6.0*a->nz);CHKERRQ(ierr);
4893649974fSBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
4901ebc52fbSHong Zhang   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
491bcc973b7SBarry Smith   PetscFunctionReturn(0);
492bcc973b7SBarry Smith }
493bcc973b7SBarry Smith 
494bcc973b7SBarry Smith /* ------------------------------------------------------------------------------*/
495dfbe8321SBarry Smith PetscErrorCode MatMult_SeqMAIJ_4(Mat A,Vec xx,Vec yy)
496bcc973b7SBarry Smith {
4974cb9d9b8SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
498bcc973b7SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
499d2840e09SBarry Smith   const PetscScalar *x,*v;
500d2840e09SBarry Smith   PetscScalar       *y,sum1, sum2, sum3, sum4;
501dfbe8321SBarry Smith   PetscErrorCode    ierr;
502d2840e09SBarry Smith   PetscInt          nonzerorow=0,n,i,jrow,j;
503d2840e09SBarry Smith   const PetscInt    m         = b->AIJ->rmap->n,*idx,*ii;
504bcc973b7SBarry Smith 
505bcc973b7SBarry Smith   PetscFunctionBegin;
5063649974fSBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
5071ebc52fbSHong Zhang   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
508bcc973b7SBarry Smith   idx  = a->j;
509bcc973b7SBarry Smith   v    = a->a;
510bcc973b7SBarry Smith   ii   = a->i;
511bcc973b7SBarry Smith 
512bcc973b7SBarry Smith   for (i=0; i<m; i++) {
513bcc973b7SBarry Smith     jrow        = ii[i];
514bcc973b7SBarry Smith     n           = ii[i+1] - jrow;
515bcc973b7SBarry Smith     sum1        = 0.0;
516bcc973b7SBarry Smith     sum2        = 0.0;
517bcc973b7SBarry Smith     sum3        = 0.0;
518bcc973b7SBarry Smith     sum4        = 0.0;
519b3c51c66SMatthew Knepley     nonzerorow += (n>0);
520bcc973b7SBarry Smith     for (j=0; j<n; j++) {
521bcc973b7SBarry Smith       sum1 += v[jrow]*x[4*idx[jrow]];
522bcc973b7SBarry Smith       sum2 += v[jrow]*x[4*idx[jrow]+1];
523bcc973b7SBarry Smith       sum3 += v[jrow]*x[4*idx[jrow]+2];
524bcc973b7SBarry Smith       sum4 += v[jrow]*x[4*idx[jrow]+3];
525bcc973b7SBarry Smith       jrow++;
526bcc973b7SBarry Smith     }
527bcc973b7SBarry Smith     y[4*i]   = sum1;
528bcc973b7SBarry Smith     y[4*i+1] = sum2;
529bcc973b7SBarry Smith     y[4*i+2] = sum3;
530bcc973b7SBarry Smith     y[4*i+3] = sum4;
531bcc973b7SBarry Smith   }
532bcc973b7SBarry Smith 
533dc0b31edSSatish Balay   ierr = PetscLogFlops(8.0*a->nz - 4.0*nonzerorow);CHKERRQ(ierr);
5343649974fSBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
5351ebc52fbSHong Zhang   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
536bcc973b7SBarry Smith   PetscFunctionReturn(0);
537bcc973b7SBarry Smith }
538bcc973b7SBarry Smith 
539dfbe8321SBarry Smith PetscErrorCode MatMultTranspose_SeqMAIJ_4(Mat A,Vec xx,Vec yy)
540bcc973b7SBarry Smith {
5414cb9d9b8SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
542bcc973b7SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
543d2840e09SBarry Smith   const PetscScalar *x,*v;
544d2840e09SBarry Smith   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4;
545dfbe8321SBarry Smith   PetscErrorCode    ierr;
546d2840e09SBarry Smith   PetscInt          n,i;
547d2840e09SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx;
548bcc973b7SBarry Smith 
549bcc973b7SBarry Smith   PetscFunctionBegin;
550d2840e09SBarry Smith   ierr = VecSet(yy,0.0);CHKERRQ(ierr);
5513649974fSBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
5521ebc52fbSHong Zhang   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
553bcc973b7SBarry Smith   for (i=0; i<m; i++) {
554bfec09a0SHong Zhang     idx    = a->j + a->i[i];
555bfec09a0SHong Zhang     v      = a->a + a->i[i];
556bcc973b7SBarry Smith     n      = a->i[i+1] - a->i[i];
557bcc973b7SBarry Smith     alpha1 = x[4*i];
558bcc973b7SBarry Smith     alpha2 = x[4*i+1];
559bcc973b7SBarry Smith     alpha3 = x[4*i+2];
560bcc973b7SBarry Smith     alpha4 = x[4*i+3];
561bcc973b7SBarry Smith     while (n-->0) {
562bcc973b7SBarry Smith       y[4*(*idx)]   += alpha1*(*v);
563bcc973b7SBarry Smith       y[4*(*idx)+1] += alpha2*(*v);
564bcc973b7SBarry Smith       y[4*(*idx)+2] += alpha3*(*v);
565bcc973b7SBarry Smith       y[4*(*idx)+3] += alpha4*(*v);
566bcc973b7SBarry Smith       idx++; v++;
567bcc973b7SBarry Smith     }
568bcc973b7SBarry Smith   }
569dc0b31edSSatish Balay   ierr = PetscLogFlops(8.0*a->nz);CHKERRQ(ierr);
5703649974fSBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
5711ebc52fbSHong Zhang   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
572bcc973b7SBarry Smith   PetscFunctionReturn(0);
573bcc973b7SBarry Smith }
574bcc973b7SBarry Smith 
575dfbe8321SBarry Smith PetscErrorCode MatMultAdd_SeqMAIJ_4(Mat A,Vec xx,Vec yy,Vec zz)
576bcc973b7SBarry Smith {
5774cb9d9b8SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
578f9fae5adSBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
579d2840e09SBarry Smith   const PetscScalar *x,*v;
580d2840e09SBarry Smith   PetscScalar       *y,sum1, sum2, sum3, sum4;
581dfbe8321SBarry Smith   PetscErrorCode    ierr;
582b24ad042SBarry Smith   PetscInt          n,i,jrow,j;
583d2840e09SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
584f9fae5adSBarry Smith 
585f9fae5adSBarry Smith   PetscFunctionBegin;
586f9fae5adSBarry Smith   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
5873649974fSBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
5881ebc52fbSHong Zhang   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
589f9fae5adSBarry Smith   idx  = a->j;
590f9fae5adSBarry Smith   v    = a->a;
591f9fae5adSBarry Smith   ii   = a->i;
592f9fae5adSBarry Smith 
593f9fae5adSBarry Smith   for (i=0; i<m; i++) {
594f9fae5adSBarry Smith     jrow = ii[i];
595f9fae5adSBarry Smith     n    = ii[i+1] - jrow;
596f9fae5adSBarry Smith     sum1 = 0.0;
597f9fae5adSBarry Smith     sum2 = 0.0;
598f9fae5adSBarry Smith     sum3 = 0.0;
599f9fae5adSBarry Smith     sum4 = 0.0;
600f9fae5adSBarry Smith     for (j=0; j<n; j++) {
601f9fae5adSBarry Smith       sum1 += v[jrow]*x[4*idx[jrow]];
602f9fae5adSBarry Smith       sum2 += v[jrow]*x[4*idx[jrow]+1];
603f9fae5adSBarry Smith       sum3 += v[jrow]*x[4*idx[jrow]+2];
604f9fae5adSBarry Smith       sum4 += v[jrow]*x[4*idx[jrow]+3];
605f9fae5adSBarry Smith       jrow++;
606f9fae5adSBarry Smith     }
607f9fae5adSBarry Smith     y[4*i]   += sum1;
608f9fae5adSBarry Smith     y[4*i+1] += sum2;
609f9fae5adSBarry Smith     y[4*i+2] += sum3;
610f9fae5adSBarry Smith     y[4*i+3] += sum4;
611f9fae5adSBarry Smith   }
612f9fae5adSBarry Smith 
613dc0b31edSSatish Balay   ierr = PetscLogFlops(8.0*a->nz);CHKERRQ(ierr);
6143649974fSBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
6151ebc52fbSHong Zhang   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
616f9fae5adSBarry Smith   PetscFunctionReturn(0);
617bcc973b7SBarry Smith }
618dfbe8321SBarry Smith PetscErrorCode MatMultTransposeAdd_SeqMAIJ_4(Mat A,Vec xx,Vec yy,Vec zz)
619bcc973b7SBarry Smith {
6204cb9d9b8SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
621f9fae5adSBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
622d2840e09SBarry Smith   const PetscScalar *x,*v;
623d2840e09SBarry Smith   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4;
624dfbe8321SBarry Smith   PetscErrorCode    ierr;
625d2840e09SBarry Smith   PetscInt          n,i;
626d2840e09SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx;
627f9fae5adSBarry Smith 
628f9fae5adSBarry Smith   PetscFunctionBegin;
629f9fae5adSBarry Smith   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
6303649974fSBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
6311ebc52fbSHong Zhang   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
632bfec09a0SHong Zhang 
633f9fae5adSBarry Smith   for (i=0; i<m; i++) {
634bfec09a0SHong Zhang     idx    = a->j + a->i[i];
635bfec09a0SHong Zhang     v      = a->a + a->i[i];
636f9fae5adSBarry Smith     n      = a->i[i+1] - a->i[i];
637f9fae5adSBarry Smith     alpha1 = x[4*i];
638f9fae5adSBarry Smith     alpha2 = x[4*i+1];
639f9fae5adSBarry Smith     alpha3 = x[4*i+2];
640f9fae5adSBarry Smith     alpha4 = x[4*i+3];
641f9fae5adSBarry Smith     while (n-->0) {
642f9fae5adSBarry Smith       y[4*(*idx)]   += alpha1*(*v);
643f9fae5adSBarry Smith       y[4*(*idx)+1] += alpha2*(*v);
644f9fae5adSBarry Smith       y[4*(*idx)+2] += alpha3*(*v);
645f9fae5adSBarry Smith       y[4*(*idx)+3] += alpha4*(*v);
646f9fae5adSBarry Smith       idx++; v++;
647f9fae5adSBarry Smith     }
648f9fae5adSBarry Smith   }
649dc0b31edSSatish Balay   ierr = PetscLogFlops(8.0*a->nz);CHKERRQ(ierr);
6503649974fSBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
6511ebc52fbSHong Zhang   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
652f9fae5adSBarry Smith   PetscFunctionReturn(0);
653f9fae5adSBarry Smith }
654f9fae5adSBarry Smith /* ------------------------------------------------------------------------------*/
6556cd98798SBarry Smith 
656dfbe8321SBarry Smith PetscErrorCode MatMult_SeqMAIJ_5(Mat A,Vec xx,Vec yy)
657f9fae5adSBarry Smith {
6584cb9d9b8SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
659f9fae5adSBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
660d2840e09SBarry Smith   const PetscScalar *x,*v;
661d2840e09SBarry Smith   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5;
662dfbe8321SBarry Smith   PetscErrorCode    ierr;
663d2840e09SBarry Smith   PetscInt          nonzerorow=0,n,i,jrow,j;
664d2840e09SBarry Smith   const PetscInt    m         = b->AIJ->rmap->n,*idx,*ii;
665f9fae5adSBarry Smith 
666f9fae5adSBarry Smith   PetscFunctionBegin;
6673649974fSBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
6681ebc52fbSHong Zhang   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
669f9fae5adSBarry Smith   idx  = a->j;
670f9fae5adSBarry Smith   v    = a->a;
671f9fae5adSBarry Smith   ii   = a->i;
672f9fae5adSBarry Smith 
673f9fae5adSBarry Smith   for (i=0; i<m; i++) {
674f9fae5adSBarry Smith     jrow  = ii[i];
675f9fae5adSBarry Smith     n     = ii[i+1] - jrow;
676f9fae5adSBarry Smith     sum1  = 0.0;
677f9fae5adSBarry Smith     sum2  = 0.0;
678f9fae5adSBarry Smith     sum3  = 0.0;
679f9fae5adSBarry Smith     sum4  = 0.0;
680f9fae5adSBarry Smith     sum5  = 0.0;
68126fbe8dcSKarl Rupp 
682b3c51c66SMatthew Knepley     nonzerorow += (n>0);
683f9fae5adSBarry Smith     for (j=0; j<n; j++) {
684f9fae5adSBarry Smith       sum1 += v[jrow]*x[5*idx[jrow]];
685f9fae5adSBarry Smith       sum2 += v[jrow]*x[5*idx[jrow]+1];
686f9fae5adSBarry Smith       sum3 += v[jrow]*x[5*idx[jrow]+2];
687f9fae5adSBarry Smith       sum4 += v[jrow]*x[5*idx[jrow]+3];
688f9fae5adSBarry Smith       sum5 += v[jrow]*x[5*idx[jrow]+4];
689f9fae5adSBarry Smith       jrow++;
690f9fae5adSBarry Smith     }
691f9fae5adSBarry Smith     y[5*i]   = sum1;
692f9fae5adSBarry Smith     y[5*i+1] = sum2;
693f9fae5adSBarry Smith     y[5*i+2] = sum3;
694f9fae5adSBarry Smith     y[5*i+3] = sum4;
695f9fae5adSBarry Smith     y[5*i+4] = sum5;
696f9fae5adSBarry Smith   }
697f9fae5adSBarry Smith 
698dc0b31edSSatish Balay   ierr = PetscLogFlops(10.0*a->nz - 5.0*nonzerorow);CHKERRQ(ierr);
6993649974fSBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
7001ebc52fbSHong Zhang   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
701f9fae5adSBarry Smith   PetscFunctionReturn(0);
702f9fae5adSBarry Smith }
703f9fae5adSBarry Smith 
704dfbe8321SBarry Smith PetscErrorCode MatMultTranspose_SeqMAIJ_5(Mat A,Vec xx,Vec yy)
705f9fae5adSBarry Smith {
7064cb9d9b8SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
707f9fae5adSBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
708d2840e09SBarry Smith   const PetscScalar *x,*v;
709d2840e09SBarry Smith   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5;
710dfbe8321SBarry Smith   PetscErrorCode    ierr;
711d2840e09SBarry Smith   PetscInt          n,i;
712d2840e09SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx;
713f9fae5adSBarry Smith 
714f9fae5adSBarry Smith   PetscFunctionBegin;
715d2840e09SBarry Smith   ierr = VecSet(yy,0.0);CHKERRQ(ierr);
7163649974fSBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
7171ebc52fbSHong Zhang   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
718bfec09a0SHong Zhang 
719f9fae5adSBarry Smith   for (i=0; i<m; i++) {
720bfec09a0SHong Zhang     idx    = a->j + a->i[i];
721bfec09a0SHong Zhang     v      = a->a + a->i[i];
722f9fae5adSBarry Smith     n      = a->i[i+1] - a->i[i];
723f9fae5adSBarry Smith     alpha1 = x[5*i];
724f9fae5adSBarry Smith     alpha2 = x[5*i+1];
725f9fae5adSBarry Smith     alpha3 = x[5*i+2];
726f9fae5adSBarry Smith     alpha4 = x[5*i+3];
727f9fae5adSBarry Smith     alpha5 = x[5*i+4];
728f9fae5adSBarry Smith     while (n-->0) {
729f9fae5adSBarry Smith       y[5*(*idx)]   += alpha1*(*v);
730f9fae5adSBarry Smith       y[5*(*idx)+1] += alpha2*(*v);
731f9fae5adSBarry Smith       y[5*(*idx)+2] += alpha3*(*v);
732f9fae5adSBarry Smith       y[5*(*idx)+3] += alpha4*(*v);
733f9fae5adSBarry Smith       y[5*(*idx)+4] += alpha5*(*v);
734f9fae5adSBarry Smith       idx++; v++;
735f9fae5adSBarry Smith     }
736f9fae5adSBarry Smith   }
737dc0b31edSSatish Balay   ierr = PetscLogFlops(10.0*a->nz);CHKERRQ(ierr);
7383649974fSBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
7391ebc52fbSHong Zhang   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
740f9fae5adSBarry Smith   PetscFunctionReturn(0);
741f9fae5adSBarry Smith }
742f9fae5adSBarry Smith 
743dfbe8321SBarry Smith PetscErrorCode MatMultAdd_SeqMAIJ_5(Mat A,Vec xx,Vec yy,Vec zz)
744f9fae5adSBarry Smith {
7454cb9d9b8SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
746f9fae5adSBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
747d2840e09SBarry Smith   const PetscScalar *x,*v;
748d2840e09SBarry Smith   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5;
749dfbe8321SBarry Smith   PetscErrorCode    ierr;
750b24ad042SBarry Smith   PetscInt          n,i,jrow,j;
751d2840e09SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
752f9fae5adSBarry Smith 
753f9fae5adSBarry Smith   PetscFunctionBegin;
754f9fae5adSBarry Smith   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
7553649974fSBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
7561ebc52fbSHong Zhang   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
757f9fae5adSBarry Smith   idx  = a->j;
758f9fae5adSBarry Smith   v    = a->a;
759f9fae5adSBarry Smith   ii   = a->i;
760f9fae5adSBarry Smith 
761f9fae5adSBarry Smith   for (i=0; i<m; i++) {
762f9fae5adSBarry Smith     jrow = ii[i];
763f9fae5adSBarry Smith     n    = ii[i+1] - jrow;
764f9fae5adSBarry Smith     sum1 = 0.0;
765f9fae5adSBarry Smith     sum2 = 0.0;
766f9fae5adSBarry Smith     sum3 = 0.0;
767f9fae5adSBarry Smith     sum4 = 0.0;
768f9fae5adSBarry Smith     sum5 = 0.0;
769f9fae5adSBarry Smith     for (j=0; j<n; j++) {
770f9fae5adSBarry Smith       sum1 += v[jrow]*x[5*idx[jrow]];
771f9fae5adSBarry Smith       sum2 += v[jrow]*x[5*idx[jrow]+1];
772f9fae5adSBarry Smith       sum3 += v[jrow]*x[5*idx[jrow]+2];
773f9fae5adSBarry Smith       sum4 += v[jrow]*x[5*idx[jrow]+3];
774f9fae5adSBarry Smith       sum5 += v[jrow]*x[5*idx[jrow]+4];
775f9fae5adSBarry Smith       jrow++;
776f9fae5adSBarry Smith     }
777f9fae5adSBarry Smith     y[5*i]   += sum1;
778f9fae5adSBarry Smith     y[5*i+1] += sum2;
779f9fae5adSBarry Smith     y[5*i+2] += sum3;
780f9fae5adSBarry Smith     y[5*i+3] += sum4;
781f9fae5adSBarry Smith     y[5*i+4] += sum5;
782f9fae5adSBarry Smith   }
783f9fae5adSBarry Smith 
784dc0b31edSSatish Balay   ierr = PetscLogFlops(10.0*a->nz);CHKERRQ(ierr);
7853649974fSBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
7861ebc52fbSHong Zhang   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
787f9fae5adSBarry Smith   PetscFunctionReturn(0);
788f9fae5adSBarry Smith }
789f9fae5adSBarry Smith 
790dfbe8321SBarry Smith PetscErrorCode MatMultTransposeAdd_SeqMAIJ_5(Mat A,Vec xx,Vec yy,Vec zz)
791f9fae5adSBarry Smith {
7924cb9d9b8SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
793f9fae5adSBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
794d2840e09SBarry Smith   const PetscScalar *x,*v;
795d2840e09SBarry Smith   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5;
796dfbe8321SBarry Smith   PetscErrorCode    ierr;
797d2840e09SBarry Smith   PetscInt          n,i;
798d2840e09SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx;
799f9fae5adSBarry Smith 
800f9fae5adSBarry Smith   PetscFunctionBegin;
801f9fae5adSBarry Smith   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
8023649974fSBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
8031ebc52fbSHong Zhang   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
804bfec09a0SHong Zhang 
805f9fae5adSBarry Smith   for (i=0; i<m; i++) {
806bfec09a0SHong Zhang     idx    = a->j + a->i[i];
807bfec09a0SHong Zhang     v      = a->a + a->i[i];
808f9fae5adSBarry Smith     n      = a->i[i+1] - a->i[i];
809f9fae5adSBarry Smith     alpha1 = x[5*i];
810f9fae5adSBarry Smith     alpha2 = x[5*i+1];
811f9fae5adSBarry Smith     alpha3 = x[5*i+2];
812f9fae5adSBarry Smith     alpha4 = x[5*i+3];
813f9fae5adSBarry Smith     alpha5 = x[5*i+4];
814f9fae5adSBarry Smith     while (n-->0) {
815f9fae5adSBarry Smith       y[5*(*idx)]   += alpha1*(*v);
816f9fae5adSBarry Smith       y[5*(*idx)+1] += alpha2*(*v);
817f9fae5adSBarry Smith       y[5*(*idx)+2] += alpha3*(*v);
818f9fae5adSBarry Smith       y[5*(*idx)+3] += alpha4*(*v);
819f9fae5adSBarry Smith       y[5*(*idx)+4] += alpha5*(*v);
820f9fae5adSBarry Smith       idx++; v++;
821f9fae5adSBarry Smith     }
822f9fae5adSBarry Smith   }
823dc0b31edSSatish Balay   ierr = PetscLogFlops(10.0*a->nz);CHKERRQ(ierr);
8243649974fSBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
8251ebc52fbSHong Zhang   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
826f9fae5adSBarry Smith   PetscFunctionReturn(0);
827bcc973b7SBarry Smith }
828bcc973b7SBarry Smith 
8296cd98798SBarry Smith /* ------------------------------------------------------------------------------*/
830dfbe8321SBarry Smith PetscErrorCode MatMult_SeqMAIJ_6(Mat A,Vec xx,Vec yy)
8316cd98798SBarry Smith {
8326cd98798SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
8336cd98798SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
834d2840e09SBarry Smith   const PetscScalar *x,*v;
835d2840e09SBarry Smith   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6;
836dfbe8321SBarry Smith   PetscErrorCode    ierr;
837d2840e09SBarry Smith   PetscInt          nonzerorow=0,n,i,jrow,j;
838d2840e09SBarry Smith   const PetscInt    m         = b->AIJ->rmap->n,*idx,*ii;
8396cd98798SBarry Smith 
8406cd98798SBarry Smith   PetscFunctionBegin;
8413649974fSBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
8421ebc52fbSHong Zhang   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
8436cd98798SBarry Smith   idx  = a->j;
8446cd98798SBarry Smith   v    = a->a;
8456cd98798SBarry Smith   ii   = a->i;
8466cd98798SBarry Smith 
8476cd98798SBarry Smith   for (i=0; i<m; i++) {
8486cd98798SBarry Smith     jrow  = ii[i];
8496cd98798SBarry Smith     n     = ii[i+1] - jrow;
8506cd98798SBarry Smith     sum1  = 0.0;
8516cd98798SBarry Smith     sum2  = 0.0;
8526cd98798SBarry Smith     sum3  = 0.0;
8536cd98798SBarry Smith     sum4  = 0.0;
8546cd98798SBarry Smith     sum5  = 0.0;
8556cd98798SBarry Smith     sum6  = 0.0;
85626fbe8dcSKarl Rupp 
857b3c51c66SMatthew Knepley     nonzerorow += (n>0);
8586cd98798SBarry Smith     for (j=0; j<n; j++) {
8596cd98798SBarry Smith       sum1 += v[jrow]*x[6*idx[jrow]];
8606cd98798SBarry Smith       sum2 += v[jrow]*x[6*idx[jrow]+1];
8616cd98798SBarry Smith       sum3 += v[jrow]*x[6*idx[jrow]+2];
8626cd98798SBarry Smith       sum4 += v[jrow]*x[6*idx[jrow]+3];
8636cd98798SBarry Smith       sum5 += v[jrow]*x[6*idx[jrow]+4];
8646cd98798SBarry Smith       sum6 += v[jrow]*x[6*idx[jrow]+5];
8656cd98798SBarry Smith       jrow++;
8666cd98798SBarry Smith     }
8676cd98798SBarry Smith     y[6*i]   = sum1;
8686cd98798SBarry Smith     y[6*i+1] = sum2;
8696cd98798SBarry Smith     y[6*i+2] = sum3;
8706cd98798SBarry Smith     y[6*i+3] = sum4;
8716cd98798SBarry Smith     y[6*i+4] = sum5;
8726cd98798SBarry Smith     y[6*i+5] = sum6;
8736cd98798SBarry Smith   }
8746cd98798SBarry Smith 
875dc0b31edSSatish Balay   ierr = PetscLogFlops(12.0*a->nz - 6.0*nonzerorow);CHKERRQ(ierr);
8763649974fSBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
8771ebc52fbSHong Zhang   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
8786cd98798SBarry Smith   PetscFunctionReturn(0);
8796cd98798SBarry Smith }
8806cd98798SBarry Smith 
881dfbe8321SBarry Smith PetscErrorCode MatMultTranspose_SeqMAIJ_6(Mat A,Vec xx,Vec yy)
8826cd98798SBarry Smith {
8836cd98798SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
8846cd98798SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
885d2840e09SBarry Smith   const PetscScalar *x,*v;
886d2840e09SBarry Smith   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6;
887dfbe8321SBarry Smith   PetscErrorCode    ierr;
888d2840e09SBarry Smith   PetscInt          n,i;
889d2840e09SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx;
8906cd98798SBarry Smith 
8916cd98798SBarry Smith   PetscFunctionBegin;
892d2840e09SBarry Smith   ierr = VecSet(yy,0.0);CHKERRQ(ierr);
8933649974fSBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
8941ebc52fbSHong Zhang   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
895bfec09a0SHong Zhang 
8966cd98798SBarry Smith   for (i=0; i<m; i++) {
897bfec09a0SHong Zhang     idx    = a->j + a->i[i];
898bfec09a0SHong Zhang     v      = a->a + a->i[i];
8996cd98798SBarry Smith     n      = a->i[i+1] - a->i[i];
9006cd98798SBarry Smith     alpha1 = x[6*i];
9016cd98798SBarry Smith     alpha2 = x[6*i+1];
9026cd98798SBarry Smith     alpha3 = x[6*i+2];
9036cd98798SBarry Smith     alpha4 = x[6*i+3];
9046cd98798SBarry Smith     alpha5 = x[6*i+4];
9056cd98798SBarry Smith     alpha6 = x[6*i+5];
9066cd98798SBarry Smith     while (n-->0) {
9076cd98798SBarry Smith       y[6*(*idx)]   += alpha1*(*v);
9086cd98798SBarry Smith       y[6*(*idx)+1] += alpha2*(*v);
9096cd98798SBarry Smith       y[6*(*idx)+2] += alpha3*(*v);
9106cd98798SBarry Smith       y[6*(*idx)+3] += alpha4*(*v);
9116cd98798SBarry Smith       y[6*(*idx)+4] += alpha5*(*v);
9126cd98798SBarry Smith       y[6*(*idx)+5] += alpha6*(*v);
9136cd98798SBarry Smith       idx++; v++;
9146cd98798SBarry Smith     }
9156cd98798SBarry Smith   }
916dc0b31edSSatish Balay   ierr = PetscLogFlops(12.0*a->nz);CHKERRQ(ierr);
9173649974fSBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
9181ebc52fbSHong Zhang   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
9196cd98798SBarry Smith   PetscFunctionReturn(0);
9206cd98798SBarry Smith }
9216cd98798SBarry Smith 
922dfbe8321SBarry Smith PetscErrorCode MatMultAdd_SeqMAIJ_6(Mat A,Vec xx,Vec yy,Vec zz)
9236cd98798SBarry Smith {
9246cd98798SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
9256cd98798SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
926d2840e09SBarry Smith   const PetscScalar *x,*v;
927d2840e09SBarry Smith   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6;
928dfbe8321SBarry Smith   PetscErrorCode    ierr;
929b24ad042SBarry Smith   PetscInt          n,i,jrow,j;
930d2840e09SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
9316cd98798SBarry Smith 
9326cd98798SBarry Smith   PetscFunctionBegin;
9336cd98798SBarry Smith   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
9343649974fSBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
9351ebc52fbSHong Zhang   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
9366cd98798SBarry Smith   idx  = a->j;
9376cd98798SBarry Smith   v    = a->a;
9386cd98798SBarry Smith   ii   = a->i;
9396cd98798SBarry Smith 
9406cd98798SBarry Smith   for (i=0; i<m; i++) {
9416cd98798SBarry Smith     jrow = ii[i];
9426cd98798SBarry Smith     n    = ii[i+1] - jrow;
9436cd98798SBarry Smith     sum1 = 0.0;
9446cd98798SBarry Smith     sum2 = 0.0;
9456cd98798SBarry Smith     sum3 = 0.0;
9466cd98798SBarry Smith     sum4 = 0.0;
9476cd98798SBarry Smith     sum5 = 0.0;
9486cd98798SBarry Smith     sum6 = 0.0;
9496cd98798SBarry Smith     for (j=0; j<n; j++) {
9506cd98798SBarry Smith       sum1 += v[jrow]*x[6*idx[jrow]];
9516cd98798SBarry Smith       sum2 += v[jrow]*x[6*idx[jrow]+1];
9526cd98798SBarry Smith       sum3 += v[jrow]*x[6*idx[jrow]+2];
9536cd98798SBarry Smith       sum4 += v[jrow]*x[6*idx[jrow]+3];
9546cd98798SBarry Smith       sum5 += v[jrow]*x[6*idx[jrow]+4];
9556cd98798SBarry Smith       sum6 += v[jrow]*x[6*idx[jrow]+5];
9566cd98798SBarry Smith       jrow++;
9576cd98798SBarry Smith     }
9586cd98798SBarry Smith     y[6*i]   += sum1;
9596cd98798SBarry Smith     y[6*i+1] += sum2;
9606cd98798SBarry Smith     y[6*i+2] += sum3;
9616cd98798SBarry Smith     y[6*i+3] += sum4;
9626cd98798SBarry Smith     y[6*i+4] += sum5;
9636cd98798SBarry Smith     y[6*i+5] += sum6;
9646cd98798SBarry Smith   }
9656cd98798SBarry Smith 
966dc0b31edSSatish Balay   ierr = PetscLogFlops(12.0*a->nz);CHKERRQ(ierr);
9673649974fSBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
9681ebc52fbSHong Zhang   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
9696cd98798SBarry Smith   PetscFunctionReturn(0);
9706cd98798SBarry Smith }
9716cd98798SBarry Smith 
972dfbe8321SBarry Smith PetscErrorCode MatMultTransposeAdd_SeqMAIJ_6(Mat A,Vec xx,Vec yy,Vec zz)
9736cd98798SBarry Smith {
9746cd98798SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
9756cd98798SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
976d2840e09SBarry Smith   const PetscScalar *x,*v;
977d2840e09SBarry Smith   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6;
978dfbe8321SBarry Smith   PetscErrorCode    ierr;
979d2840e09SBarry Smith   PetscInt          n,i;
980d2840e09SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx;
9816cd98798SBarry Smith 
9826cd98798SBarry Smith   PetscFunctionBegin;
9836cd98798SBarry Smith   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
9843649974fSBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
9851ebc52fbSHong Zhang   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
986bfec09a0SHong Zhang 
9876cd98798SBarry Smith   for (i=0; i<m; i++) {
988bfec09a0SHong Zhang     idx    = a->j + a->i[i];
989bfec09a0SHong Zhang     v      = a->a + a->i[i];
9906cd98798SBarry Smith     n      = a->i[i+1] - a->i[i];
9916cd98798SBarry Smith     alpha1 = x[6*i];
9926cd98798SBarry Smith     alpha2 = x[6*i+1];
9936cd98798SBarry Smith     alpha3 = x[6*i+2];
9946cd98798SBarry Smith     alpha4 = x[6*i+3];
9956cd98798SBarry Smith     alpha5 = x[6*i+4];
9966cd98798SBarry Smith     alpha6 = x[6*i+5];
9976cd98798SBarry Smith     while (n-->0) {
9986cd98798SBarry Smith       y[6*(*idx)]   += alpha1*(*v);
9996cd98798SBarry Smith       y[6*(*idx)+1] += alpha2*(*v);
10006cd98798SBarry Smith       y[6*(*idx)+2] += alpha3*(*v);
10016cd98798SBarry Smith       y[6*(*idx)+3] += alpha4*(*v);
10026cd98798SBarry Smith       y[6*(*idx)+4] += alpha5*(*v);
10036cd98798SBarry Smith       y[6*(*idx)+5] += alpha6*(*v);
10046cd98798SBarry Smith       idx++; v++;
10056cd98798SBarry Smith     }
10066cd98798SBarry Smith   }
1007dc0b31edSSatish Balay   ierr = PetscLogFlops(12.0*a->nz);CHKERRQ(ierr);
10083649974fSBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
10091ebc52fbSHong Zhang   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
10106cd98798SBarry Smith   PetscFunctionReturn(0);
10116cd98798SBarry Smith }
10126cd98798SBarry Smith 
101366ed3db0SBarry Smith /* ------------------------------------------------------------------------------*/
1014ed8eea03SBarry Smith PetscErrorCode MatMult_SeqMAIJ_7(Mat A,Vec xx,Vec yy)
1015ed8eea03SBarry Smith {
1016ed8eea03SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1017ed8eea03SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1018d2840e09SBarry Smith   const PetscScalar *x,*v;
1019d2840e09SBarry Smith   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7;
1020ed8eea03SBarry Smith   PetscErrorCode    ierr;
1021d2840e09SBarry Smith   const PetscInt    m         = b->AIJ->rmap->n,*idx,*ii;
1022d2840e09SBarry Smith   PetscInt          nonzerorow=0,n,i,jrow,j;
1023ed8eea03SBarry Smith 
1024ed8eea03SBarry Smith   PetscFunctionBegin;
10253649974fSBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
1026ed8eea03SBarry Smith   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
1027ed8eea03SBarry Smith   idx  = a->j;
1028ed8eea03SBarry Smith   v    = a->a;
1029ed8eea03SBarry Smith   ii   = a->i;
1030ed8eea03SBarry Smith 
1031ed8eea03SBarry Smith   for (i=0; i<m; i++) {
1032ed8eea03SBarry Smith     jrow  = ii[i];
1033ed8eea03SBarry Smith     n     = ii[i+1] - jrow;
1034ed8eea03SBarry Smith     sum1  = 0.0;
1035ed8eea03SBarry Smith     sum2  = 0.0;
1036ed8eea03SBarry Smith     sum3  = 0.0;
1037ed8eea03SBarry Smith     sum4  = 0.0;
1038ed8eea03SBarry Smith     sum5  = 0.0;
1039ed8eea03SBarry Smith     sum6  = 0.0;
1040ed8eea03SBarry Smith     sum7  = 0.0;
104126fbe8dcSKarl Rupp 
1042b3c51c66SMatthew Knepley     nonzerorow += (n>0);
1043ed8eea03SBarry Smith     for (j=0; j<n; j++) {
1044ed8eea03SBarry Smith       sum1 += v[jrow]*x[7*idx[jrow]];
1045ed8eea03SBarry Smith       sum2 += v[jrow]*x[7*idx[jrow]+1];
1046ed8eea03SBarry Smith       sum3 += v[jrow]*x[7*idx[jrow]+2];
1047ed8eea03SBarry Smith       sum4 += v[jrow]*x[7*idx[jrow]+3];
1048ed8eea03SBarry Smith       sum5 += v[jrow]*x[7*idx[jrow]+4];
1049ed8eea03SBarry Smith       sum6 += v[jrow]*x[7*idx[jrow]+5];
1050ed8eea03SBarry Smith       sum7 += v[jrow]*x[7*idx[jrow]+6];
1051ed8eea03SBarry Smith       jrow++;
1052ed8eea03SBarry Smith     }
1053ed8eea03SBarry Smith     y[7*i]   = sum1;
1054ed8eea03SBarry Smith     y[7*i+1] = sum2;
1055ed8eea03SBarry Smith     y[7*i+2] = sum3;
1056ed8eea03SBarry Smith     y[7*i+3] = sum4;
1057ed8eea03SBarry Smith     y[7*i+4] = sum5;
1058ed8eea03SBarry Smith     y[7*i+5] = sum6;
1059ed8eea03SBarry Smith     y[7*i+6] = sum7;
1060ed8eea03SBarry Smith   }
1061ed8eea03SBarry Smith 
1062dc0b31edSSatish Balay   ierr = PetscLogFlops(14.0*a->nz - 7.0*nonzerorow);CHKERRQ(ierr);
10633649974fSBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
1064ed8eea03SBarry Smith   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
1065ed8eea03SBarry Smith   PetscFunctionReturn(0);
1066ed8eea03SBarry Smith }
1067ed8eea03SBarry Smith 
1068ed8eea03SBarry Smith PetscErrorCode MatMultTranspose_SeqMAIJ_7(Mat A,Vec xx,Vec yy)
1069ed8eea03SBarry Smith {
1070ed8eea03SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1071ed8eea03SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1072d2840e09SBarry Smith   const PetscScalar *x,*v;
1073d2840e09SBarry Smith   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7;
1074ed8eea03SBarry Smith   PetscErrorCode    ierr;
1075d2840e09SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx;
1076d2840e09SBarry Smith   PetscInt          n,i;
1077ed8eea03SBarry Smith 
1078ed8eea03SBarry Smith   PetscFunctionBegin;
1079d2840e09SBarry Smith   ierr = VecSet(yy,0.0);CHKERRQ(ierr);
10803649974fSBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
1081ed8eea03SBarry Smith   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
1082ed8eea03SBarry Smith 
1083ed8eea03SBarry Smith   for (i=0; i<m; i++) {
1084ed8eea03SBarry Smith     idx    = a->j + a->i[i];
1085ed8eea03SBarry Smith     v      = a->a + a->i[i];
1086ed8eea03SBarry Smith     n      = a->i[i+1] - a->i[i];
1087ed8eea03SBarry Smith     alpha1 = x[7*i];
1088ed8eea03SBarry Smith     alpha2 = x[7*i+1];
1089ed8eea03SBarry Smith     alpha3 = x[7*i+2];
1090ed8eea03SBarry Smith     alpha4 = x[7*i+3];
1091ed8eea03SBarry Smith     alpha5 = x[7*i+4];
1092ed8eea03SBarry Smith     alpha6 = x[7*i+5];
1093ed8eea03SBarry Smith     alpha7 = x[7*i+6];
1094ed8eea03SBarry Smith     while (n-->0) {
1095ed8eea03SBarry Smith       y[7*(*idx)]   += alpha1*(*v);
1096ed8eea03SBarry Smith       y[7*(*idx)+1] += alpha2*(*v);
1097ed8eea03SBarry Smith       y[7*(*idx)+2] += alpha3*(*v);
1098ed8eea03SBarry Smith       y[7*(*idx)+3] += alpha4*(*v);
1099ed8eea03SBarry Smith       y[7*(*idx)+4] += alpha5*(*v);
1100ed8eea03SBarry Smith       y[7*(*idx)+5] += alpha6*(*v);
1101ed8eea03SBarry Smith       y[7*(*idx)+6] += alpha7*(*v);
1102ed8eea03SBarry Smith       idx++; v++;
1103ed8eea03SBarry Smith     }
1104ed8eea03SBarry Smith   }
1105dc0b31edSSatish Balay   ierr = PetscLogFlops(14.0*a->nz);CHKERRQ(ierr);
11063649974fSBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
1107ed8eea03SBarry Smith   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
1108ed8eea03SBarry Smith   PetscFunctionReturn(0);
1109ed8eea03SBarry Smith }
1110ed8eea03SBarry Smith 
1111ed8eea03SBarry Smith PetscErrorCode MatMultAdd_SeqMAIJ_7(Mat A,Vec xx,Vec yy,Vec zz)
1112ed8eea03SBarry Smith {
1113ed8eea03SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1114ed8eea03SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1115d2840e09SBarry Smith   const PetscScalar *x,*v;
1116d2840e09SBarry Smith   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7;
1117ed8eea03SBarry Smith   PetscErrorCode    ierr;
1118d2840e09SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
1119ed8eea03SBarry Smith   PetscInt          n,i,jrow,j;
1120ed8eea03SBarry Smith 
1121ed8eea03SBarry Smith   PetscFunctionBegin;
1122ed8eea03SBarry Smith   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
11233649974fSBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
1124ed8eea03SBarry Smith   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
1125ed8eea03SBarry Smith   idx  = a->j;
1126ed8eea03SBarry Smith   v    = a->a;
1127ed8eea03SBarry Smith   ii   = a->i;
1128ed8eea03SBarry Smith 
1129ed8eea03SBarry Smith   for (i=0; i<m; i++) {
1130ed8eea03SBarry Smith     jrow = ii[i];
1131ed8eea03SBarry Smith     n    = ii[i+1] - jrow;
1132ed8eea03SBarry Smith     sum1 = 0.0;
1133ed8eea03SBarry Smith     sum2 = 0.0;
1134ed8eea03SBarry Smith     sum3 = 0.0;
1135ed8eea03SBarry Smith     sum4 = 0.0;
1136ed8eea03SBarry Smith     sum5 = 0.0;
1137ed8eea03SBarry Smith     sum6 = 0.0;
1138ed8eea03SBarry Smith     sum7 = 0.0;
1139ed8eea03SBarry Smith     for (j=0; j<n; j++) {
1140ed8eea03SBarry Smith       sum1 += v[jrow]*x[7*idx[jrow]];
1141ed8eea03SBarry Smith       sum2 += v[jrow]*x[7*idx[jrow]+1];
1142ed8eea03SBarry Smith       sum3 += v[jrow]*x[7*idx[jrow]+2];
1143ed8eea03SBarry Smith       sum4 += v[jrow]*x[7*idx[jrow]+3];
1144ed8eea03SBarry Smith       sum5 += v[jrow]*x[7*idx[jrow]+4];
1145ed8eea03SBarry Smith       sum6 += v[jrow]*x[7*idx[jrow]+5];
1146ed8eea03SBarry Smith       sum7 += v[jrow]*x[7*idx[jrow]+6];
1147ed8eea03SBarry Smith       jrow++;
1148ed8eea03SBarry Smith     }
1149ed8eea03SBarry Smith     y[7*i]   += sum1;
1150ed8eea03SBarry Smith     y[7*i+1] += sum2;
1151ed8eea03SBarry Smith     y[7*i+2] += sum3;
1152ed8eea03SBarry Smith     y[7*i+3] += sum4;
1153ed8eea03SBarry Smith     y[7*i+4] += sum5;
1154ed8eea03SBarry Smith     y[7*i+5] += sum6;
1155ed8eea03SBarry Smith     y[7*i+6] += sum7;
1156ed8eea03SBarry Smith   }
1157ed8eea03SBarry Smith 
1158dc0b31edSSatish Balay   ierr = PetscLogFlops(14.0*a->nz);CHKERRQ(ierr);
11593649974fSBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
1160ed8eea03SBarry Smith   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
1161ed8eea03SBarry Smith   PetscFunctionReturn(0);
1162ed8eea03SBarry Smith }
1163ed8eea03SBarry Smith 
1164ed8eea03SBarry Smith PetscErrorCode MatMultTransposeAdd_SeqMAIJ_7(Mat A,Vec xx,Vec yy,Vec zz)
1165ed8eea03SBarry Smith {
1166ed8eea03SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1167ed8eea03SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1168d2840e09SBarry Smith   const PetscScalar *x,*v;
1169d2840e09SBarry Smith   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7;
1170ed8eea03SBarry Smith   PetscErrorCode    ierr;
1171d2840e09SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx;
1172d2840e09SBarry Smith   PetscInt          n,i;
1173ed8eea03SBarry Smith 
1174ed8eea03SBarry Smith   PetscFunctionBegin;
1175ed8eea03SBarry Smith   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
11763649974fSBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
1177ed8eea03SBarry Smith   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
1178ed8eea03SBarry Smith   for (i=0; i<m; i++) {
1179ed8eea03SBarry Smith     idx    = a->j + a->i[i];
1180ed8eea03SBarry Smith     v      = a->a + a->i[i];
1181ed8eea03SBarry Smith     n      = a->i[i+1] - a->i[i];
1182ed8eea03SBarry Smith     alpha1 = x[7*i];
1183ed8eea03SBarry Smith     alpha2 = x[7*i+1];
1184ed8eea03SBarry Smith     alpha3 = x[7*i+2];
1185ed8eea03SBarry Smith     alpha4 = x[7*i+3];
1186ed8eea03SBarry Smith     alpha5 = x[7*i+4];
1187ed8eea03SBarry Smith     alpha6 = x[7*i+5];
1188ed8eea03SBarry Smith     alpha7 = x[7*i+6];
1189ed8eea03SBarry Smith     while (n-->0) {
1190ed8eea03SBarry Smith       y[7*(*idx)]   += alpha1*(*v);
1191ed8eea03SBarry Smith       y[7*(*idx)+1] += alpha2*(*v);
1192ed8eea03SBarry Smith       y[7*(*idx)+2] += alpha3*(*v);
1193ed8eea03SBarry Smith       y[7*(*idx)+3] += alpha4*(*v);
1194ed8eea03SBarry Smith       y[7*(*idx)+4] += alpha5*(*v);
1195ed8eea03SBarry Smith       y[7*(*idx)+5] += alpha6*(*v);
1196ed8eea03SBarry Smith       y[7*(*idx)+6] += alpha7*(*v);
1197ed8eea03SBarry Smith       idx++; v++;
1198ed8eea03SBarry Smith     }
1199ed8eea03SBarry Smith   }
1200dc0b31edSSatish Balay   ierr = PetscLogFlops(14.0*a->nz);CHKERRQ(ierr);
12013649974fSBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
1202ed8eea03SBarry Smith   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
1203ed8eea03SBarry Smith   PetscFunctionReturn(0);
1204ed8eea03SBarry Smith }
1205ed8eea03SBarry Smith 
1206dfbe8321SBarry Smith PetscErrorCode MatMult_SeqMAIJ_8(Mat A,Vec xx,Vec yy)
120766ed3db0SBarry Smith {
120866ed3db0SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
120966ed3db0SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1210d2840e09SBarry Smith   const PetscScalar *x,*v;
1211d2840e09SBarry Smith   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8;
1212dfbe8321SBarry Smith   PetscErrorCode    ierr;
1213d2840e09SBarry Smith   const PetscInt    m         = b->AIJ->rmap->n,*idx,*ii;
1214d2840e09SBarry Smith   PetscInt          nonzerorow=0,n,i,jrow,j;
121566ed3db0SBarry Smith 
121666ed3db0SBarry Smith   PetscFunctionBegin;
12173649974fSBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
12181ebc52fbSHong Zhang   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
121966ed3db0SBarry Smith   idx  = a->j;
122066ed3db0SBarry Smith   v    = a->a;
122166ed3db0SBarry Smith   ii   = a->i;
122266ed3db0SBarry Smith 
122366ed3db0SBarry Smith   for (i=0; i<m; i++) {
122466ed3db0SBarry Smith     jrow  = ii[i];
122566ed3db0SBarry Smith     n     = ii[i+1] - jrow;
122666ed3db0SBarry Smith     sum1  = 0.0;
122766ed3db0SBarry Smith     sum2  = 0.0;
122866ed3db0SBarry Smith     sum3  = 0.0;
122966ed3db0SBarry Smith     sum4  = 0.0;
123066ed3db0SBarry Smith     sum5  = 0.0;
123166ed3db0SBarry Smith     sum6  = 0.0;
123266ed3db0SBarry Smith     sum7  = 0.0;
123366ed3db0SBarry Smith     sum8  = 0.0;
123426fbe8dcSKarl Rupp 
1235b3c51c66SMatthew Knepley     nonzerorow += (n>0);
123666ed3db0SBarry Smith     for (j=0; j<n; j++) {
123766ed3db0SBarry Smith       sum1 += v[jrow]*x[8*idx[jrow]];
123866ed3db0SBarry Smith       sum2 += v[jrow]*x[8*idx[jrow]+1];
123966ed3db0SBarry Smith       sum3 += v[jrow]*x[8*idx[jrow]+2];
124066ed3db0SBarry Smith       sum4 += v[jrow]*x[8*idx[jrow]+3];
124166ed3db0SBarry Smith       sum5 += v[jrow]*x[8*idx[jrow]+4];
124266ed3db0SBarry Smith       sum6 += v[jrow]*x[8*idx[jrow]+5];
124366ed3db0SBarry Smith       sum7 += v[jrow]*x[8*idx[jrow]+6];
124466ed3db0SBarry Smith       sum8 += v[jrow]*x[8*idx[jrow]+7];
124566ed3db0SBarry Smith       jrow++;
124666ed3db0SBarry Smith     }
124766ed3db0SBarry Smith     y[8*i]   = sum1;
124866ed3db0SBarry Smith     y[8*i+1] = sum2;
124966ed3db0SBarry Smith     y[8*i+2] = sum3;
125066ed3db0SBarry Smith     y[8*i+3] = sum4;
125166ed3db0SBarry Smith     y[8*i+4] = sum5;
125266ed3db0SBarry Smith     y[8*i+5] = sum6;
125366ed3db0SBarry Smith     y[8*i+6] = sum7;
125466ed3db0SBarry Smith     y[8*i+7] = sum8;
125566ed3db0SBarry Smith   }
125666ed3db0SBarry Smith 
1257dc0b31edSSatish Balay   ierr = PetscLogFlops(16.0*a->nz - 8.0*nonzerorow);CHKERRQ(ierr);
12583649974fSBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
12591ebc52fbSHong Zhang   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
126066ed3db0SBarry Smith   PetscFunctionReturn(0);
126166ed3db0SBarry Smith }
126266ed3db0SBarry Smith 
1263dfbe8321SBarry Smith PetscErrorCode MatMultTranspose_SeqMAIJ_8(Mat A,Vec xx,Vec yy)
126466ed3db0SBarry Smith {
126566ed3db0SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
126666ed3db0SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1267d2840e09SBarry Smith   const PetscScalar *x,*v;
1268d2840e09SBarry Smith   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8;
1269dfbe8321SBarry Smith   PetscErrorCode    ierr;
1270d2840e09SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx;
1271d2840e09SBarry Smith   PetscInt          n,i;
127266ed3db0SBarry Smith 
127366ed3db0SBarry Smith   PetscFunctionBegin;
1274d2840e09SBarry Smith   ierr = VecSet(yy,0.0);CHKERRQ(ierr);
12753649974fSBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
12761ebc52fbSHong Zhang   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
1277bfec09a0SHong Zhang 
127866ed3db0SBarry Smith   for (i=0; i<m; i++) {
1279bfec09a0SHong Zhang     idx    = a->j + a->i[i];
1280bfec09a0SHong Zhang     v      = a->a + a->i[i];
128166ed3db0SBarry Smith     n      = a->i[i+1] - a->i[i];
128266ed3db0SBarry Smith     alpha1 = x[8*i];
128366ed3db0SBarry Smith     alpha2 = x[8*i+1];
128466ed3db0SBarry Smith     alpha3 = x[8*i+2];
128566ed3db0SBarry Smith     alpha4 = x[8*i+3];
128666ed3db0SBarry Smith     alpha5 = x[8*i+4];
128766ed3db0SBarry Smith     alpha6 = x[8*i+5];
128866ed3db0SBarry Smith     alpha7 = x[8*i+6];
128966ed3db0SBarry Smith     alpha8 = x[8*i+7];
129066ed3db0SBarry Smith     while (n-->0) {
129166ed3db0SBarry Smith       y[8*(*idx)]   += alpha1*(*v);
129266ed3db0SBarry Smith       y[8*(*idx)+1] += alpha2*(*v);
129366ed3db0SBarry Smith       y[8*(*idx)+2] += alpha3*(*v);
129466ed3db0SBarry Smith       y[8*(*idx)+3] += alpha4*(*v);
129566ed3db0SBarry Smith       y[8*(*idx)+4] += alpha5*(*v);
129666ed3db0SBarry Smith       y[8*(*idx)+5] += alpha6*(*v);
129766ed3db0SBarry Smith       y[8*(*idx)+6] += alpha7*(*v);
129866ed3db0SBarry Smith       y[8*(*idx)+7] += alpha8*(*v);
129966ed3db0SBarry Smith       idx++; v++;
130066ed3db0SBarry Smith     }
130166ed3db0SBarry Smith   }
1302dc0b31edSSatish Balay   ierr = PetscLogFlops(16.0*a->nz);CHKERRQ(ierr);
13033649974fSBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
13041ebc52fbSHong Zhang   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
130566ed3db0SBarry Smith   PetscFunctionReturn(0);
130666ed3db0SBarry Smith }
130766ed3db0SBarry Smith 
1308dfbe8321SBarry Smith PetscErrorCode MatMultAdd_SeqMAIJ_8(Mat A,Vec xx,Vec yy,Vec zz)
130966ed3db0SBarry Smith {
131066ed3db0SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
131166ed3db0SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1312d2840e09SBarry Smith   const PetscScalar *x,*v;
1313d2840e09SBarry Smith   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8;
1314dfbe8321SBarry Smith   PetscErrorCode    ierr;
1315d2840e09SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
1316b24ad042SBarry Smith   PetscInt          n,i,jrow,j;
131766ed3db0SBarry Smith 
131866ed3db0SBarry Smith   PetscFunctionBegin;
131966ed3db0SBarry Smith   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
13203649974fSBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
13211ebc52fbSHong Zhang   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
132266ed3db0SBarry Smith   idx  = a->j;
132366ed3db0SBarry Smith   v    = a->a;
132466ed3db0SBarry Smith   ii   = a->i;
132566ed3db0SBarry Smith 
132666ed3db0SBarry Smith   for (i=0; i<m; i++) {
132766ed3db0SBarry Smith     jrow = ii[i];
132866ed3db0SBarry Smith     n    = ii[i+1] - jrow;
132966ed3db0SBarry Smith     sum1 = 0.0;
133066ed3db0SBarry Smith     sum2 = 0.0;
133166ed3db0SBarry Smith     sum3 = 0.0;
133266ed3db0SBarry Smith     sum4 = 0.0;
133366ed3db0SBarry Smith     sum5 = 0.0;
133466ed3db0SBarry Smith     sum6 = 0.0;
133566ed3db0SBarry Smith     sum7 = 0.0;
133666ed3db0SBarry Smith     sum8 = 0.0;
133766ed3db0SBarry Smith     for (j=0; j<n; j++) {
133866ed3db0SBarry Smith       sum1 += v[jrow]*x[8*idx[jrow]];
133966ed3db0SBarry Smith       sum2 += v[jrow]*x[8*idx[jrow]+1];
134066ed3db0SBarry Smith       sum3 += v[jrow]*x[8*idx[jrow]+2];
134166ed3db0SBarry Smith       sum4 += v[jrow]*x[8*idx[jrow]+3];
134266ed3db0SBarry Smith       sum5 += v[jrow]*x[8*idx[jrow]+4];
134366ed3db0SBarry Smith       sum6 += v[jrow]*x[8*idx[jrow]+5];
134466ed3db0SBarry Smith       sum7 += v[jrow]*x[8*idx[jrow]+6];
134566ed3db0SBarry Smith       sum8 += v[jrow]*x[8*idx[jrow]+7];
134666ed3db0SBarry Smith       jrow++;
134766ed3db0SBarry Smith     }
134866ed3db0SBarry Smith     y[8*i]   += sum1;
134966ed3db0SBarry Smith     y[8*i+1] += sum2;
135066ed3db0SBarry Smith     y[8*i+2] += sum3;
135166ed3db0SBarry Smith     y[8*i+3] += sum4;
135266ed3db0SBarry Smith     y[8*i+4] += sum5;
135366ed3db0SBarry Smith     y[8*i+5] += sum6;
135466ed3db0SBarry Smith     y[8*i+6] += sum7;
135566ed3db0SBarry Smith     y[8*i+7] += sum8;
135666ed3db0SBarry Smith   }
135766ed3db0SBarry Smith 
1358dc0b31edSSatish Balay   ierr = PetscLogFlops(16.0*a->nz);CHKERRQ(ierr);
13593649974fSBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
13601ebc52fbSHong Zhang   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
136166ed3db0SBarry Smith   PetscFunctionReturn(0);
136266ed3db0SBarry Smith }
136366ed3db0SBarry Smith 
1364dfbe8321SBarry Smith PetscErrorCode MatMultTransposeAdd_SeqMAIJ_8(Mat A,Vec xx,Vec yy,Vec zz)
136566ed3db0SBarry Smith {
136666ed3db0SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
136766ed3db0SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1368d2840e09SBarry Smith   const PetscScalar *x,*v;
1369d2840e09SBarry Smith   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8;
1370dfbe8321SBarry Smith   PetscErrorCode    ierr;
1371d2840e09SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx;
1372d2840e09SBarry Smith   PetscInt          n,i;
137366ed3db0SBarry Smith 
137466ed3db0SBarry Smith   PetscFunctionBegin;
137566ed3db0SBarry Smith   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
13763649974fSBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
13771ebc52fbSHong Zhang   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
137866ed3db0SBarry Smith   for (i=0; i<m; i++) {
1379bfec09a0SHong Zhang     idx    = a->j + a->i[i];
1380bfec09a0SHong Zhang     v      = a->a + a->i[i];
138166ed3db0SBarry Smith     n      = a->i[i+1] - a->i[i];
138266ed3db0SBarry Smith     alpha1 = x[8*i];
138366ed3db0SBarry Smith     alpha2 = x[8*i+1];
138466ed3db0SBarry Smith     alpha3 = x[8*i+2];
138566ed3db0SBarry Smith     alpha4 = x[8*i+3];
138666ed3db0SBarry Smith     alpha5 = x[8*i+4];
138766ed3db0SBarry Smith     alpha6 = x[8*i+5];
138866ed3db0SBarry Smith     alpha7 = x[8*i+6];
138966ed3db0SBarry Smith     alpha8 = x[8*i+7];
139066ed3db0SBarry Smith     while (n-->0) {
139166ed3db0SBarry Smith       y[8*(*idx)]   += alpha1*(*v);
139266ed3db0SBarry Smith       y[8*(*idx)+1] += alpha2*(*v);
139366ed3db0SBarry Smith       y[8*(*idx)+2] += alpha3*(*v);
139466ed3db0SBarry Smith       y[8*(*idx)+3] += alpha4*(*v);
139566ed3db0SBarry Smith       y[8*(*idx)+4] += alpha5*(*v);
139666ed3db0SBarry Smith       y[8*(*idx)+5] += alpha6*(*v);
139766ed3db0SBarry Smith       y[8*(*idx)+6] += alpha7*(*v);
139866ed3db0SBarry Smith       y[8*(*idx)+7] += alpha8*(*v);
139966ed3db0SBarry Smith       idx++; v++;
140066ed3db0SBarry Smith     }
140166ed3db0SBarry Smith   }
1402dc0b31edSSatish Balay   ierr = PetscLogFlops(16.0*a->nz);CHKERRQ(ierr);
14033649974fSBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
14041ebc52fbSHong Zhang   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
14052f7816d4SBarry Smith   PetscFunctionReturn(0);
14062f7816d4SBarry Smith }
14072f7816d4SBarry Smith 
14082b692628SMatthew Knepley /* ------------------------------------------------------------------------------*/
1409dfbe8321SBarry Smith PetscErrorCode MatMult_SeqMAIJ_9(Mat A,Vec xx,Vec yy)
14102b692628SMatthew Knepley {
14112b692628SMatthew Knepley   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
14122b692628SMatthew Knepley   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1413d2840e09SBarry Smith   const PetscScalar *x,*v;
1414d2840e09SBarry Smith   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9;
1415dfbe8321SBarry Smith   PetscErrorCode    ierr;
1416d2840e09SBarry Smith   const PetscInt    m         = b->AIJ->rmap->n,*idx,*ii;
1417d2840e09SBarry Smith   PetscInt          nonzerorow=0,n,i,jrow,j;
14182b692628SMatthew Knepley 
14192b692628SMatthew Knepley   PetscFunctionBegin;
14203649974fSBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
14211ebc52fbSHong Zhang   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
14222b692628SMatthew Knepley   idx  = a->j;
14232b692628SMatthew Knepley   v    = a->a;
14242b692628SMatthew Knepley   ii   = a->i;
14252b692628SMatthew Knepley 
14262b692628SMatthew Knepley   for (i=0; i<m; i++) {
14272b692628SMatthew Knepley     jrow  = ii[i];
14282b692628SMatthew Knepley     n     = ii[i+1] - jrow;
14292b692628SMatthew Knepley     sum1  = 0.0;
14302b692628SMatthew Knepley     sum2  = 0.0;
14312b692628SMatthew Knepley     sum3  = 0.0;
14322b692628SMatthew Knepley     sum4  = 0.0;
14332b692628SMatthew Knepley     sum5  = 0.0;
14342b692628SMatthew Knepley     sum6  = 0.0;
14352b692628SMatthew Knepley     sum7  = 0.0;
14362b692628SMatthew Knepley     sum8  = 0.0;
14372b692628SMatthew Knepley     sum9  = 0.0;
143826fbe8dcSKarl Rupp 
1439b3c51c66SMatthew Knepley     nonzerorow += (n>0);
14402b692628SMatthew Knepley     for (j=0; j<n; j++) {
14412b692628SMatthew Knepley       sum1 += v[jrow]*x[9*idx[jrow]];
14422b692628SMatthew Knepley       sum2 += v[jrow]*x[9*idx[jrow]+1];
14432b692628SMatthew Knepley       sum3 += v[jrow]*x[9*idx[jrow]+2];
14442b692628SMatthew Knepley       sum4 += v[jrow]*x[9*idx[jrow]+3];
14452b692628SMatthew Knepley       sum5 += v[jrow]*x[9*idx[jrow]+4];
14462b692628SMatthew Knepley       sum6 += v[jrow]*x[9*idx[jrow]+5];
14472b692628SMatthew Knepley       sum7 += v[jrow]*x[9*idx[jrow]+6];
14482b692628SMatthew Knepley       sum8 += v[jrow]*x[9*idx[jrow]+7];
14492b692628SMatthew Knepley       sum9 += v[jrow]*x[9*idx[jrow]+8];
14502b692628SMatthew Knepley       jrow++;
14512b692628SMatthew Knepley     }
14522b692628SMatthew Knepley     y[9*i]   = sum1;
14532b692628SMatthew Knepley     y[9*i+1] = sum2;
14542b692628SMatthew Knepley     y[9*i+2] = sum3;
14552b692628SMatthew Knepley     y[9*i+3] = sum4;
14562b692628SMatthew Knepley     y[9*i+4] = sum5;
14572b692628SMatthew Knepley     y[9*i+5] = sum6;
14582b692628SMatthew Knepley     y[9*i+6] = sum7;
14592b692628SMatthew Knepley     y[9*i+7] = sum8;
14602b692628SMatthew Knepley     y[9*i+8] = sum9;
14612b692628SMatthew Knepley   }
14622b692628SMatthew Knepley 
1463dc0b31edSSatish Balay   ierr = PetscLogFlops(18.0*a->nz - 9*nonzerorow);CHKERRQ(ierr);
14643649974fSBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
14651ebc52fbSHong Zhang   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
14662b692628SMatthew Knepley   PetscFunctionReturn(0);
14672b692628SMatthew Knepley }
14682b692628SMatthew Knepley 
1469b9cda22cSBarry Smith /* ------------------------------------------------------------------------------*/
1470b9cda22cSBarry Smith 
1471dfbe8321SBarry Smith PetscErrorCode MatMultTranspose_SeqMAIJ_9(Mat A,Vec xx,Vec yy)
14722b692628SMatthew Knepley {
14732b692628SMatthew Knepley   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
14742b692628SMatthew Knepley   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1475d2840e09SBarry Smith   const PetscScalar *x,*v;
1476d2840e09SBarry Smith   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9;
1477dfbe8321SBarry Smith   PetscErrorCode    ierr;
1478d2840e09SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx;
1479d2840e09SBarry Smith   PetscInt          n,i;
14802b692628SMatthew Knepley 
14812b692628SMatthew Knepley   PetscFunctionBegin;
1482d2840e09SBarry Smith   ierr = VecSet(yy,0.0);CHKERRQ(ierr);
14833649974fSBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
14841ebc52fbSHong Zhang   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
14852b692628SMatthew Knepley 
14862b692628SMatthew Knepley   for (i=0; i<m; i++) {
14872b692628SMatthew Knepley     idx    = a->j + a->i[i];
14882b692628SMatthew Knepley     v      = a->a + a->i[i];
14892b692628SMatthew Knepley     n      = a->i[i+1] - a->i[i];
14902b692628SMatthew Knepley     alpha1 = x[9*i];
14912b692628SMatthew Knepley     alpha2 = x[9*i+1];
14922b692628SMatthew Knepley     alpha3 = x[9*i+2];
14932b692628SMatthew Knepley     alpha4 = x[9*i+3];
14942b692628SMatthew Knepley     alpha5 = x[9*i+4];
14952b692628SMatthew Knepley     alpha6 = x[9*i+5];
14962b692628SMatthew Knepley     alpha7 = x[9*i+6];
14972b692628SMatthew Knepley     alpha8 = x[9*i+7];
14982f6bd0c6SMatthew Knepley     alpha9 = x[9*i+8];
14992b692628SMatthew Knepley     while (n-->0) {
15002b692628SMatthew Knepley       y[9*(*idx)]   += alpha1*(*v);
15012b692628SMatthew Knepley       y[9*(*idx)+1] += alpha2*(*v);
15022b692628SMatthew Knepley       y[9*(*idx)+2] += alpha3*(*v);
15032b692628SMatthew Knepley       y[9*(*idx)+3] += alpha4*(*v);
15042b692628SMatthew Knepley       y[9*(*idx)+4] += alpha5*(*v);
15052b692628SMatthew Knepley       y[9*(*idx)+5] += alpha6*(*v);
15062b692628SMatthew Knepley       y[9*(*idx)+6] += alpha7*(*v);
15072b692628SMatthew Knepley       y[9*(*idx)+7] += alpha8*(*v);
15082b692628SMatthew Knepley       y[9*(*idx)+8] += alpha9*(*v);
15092b692628SMatthew Knepley       idx++; v++;
15102b692628SMatthew Knepley     }
15112b692628SMatthew Knepley   }
1512dc0b31edSSatish Balay   ierr = PetscLogFlops(18.0*a->nz);CHKERRQ(ierr);
15133649974fSBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
15141ebc52fbSHong Zhang   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
15152b692628SMatthew Knepley   PetscFunctionReturn(0);
15162b692628SMatthew Knepley }
15172b692628SMatthew Knepley 
1518dfbe8321SBarry Smith PetscErrorCode MatMultAdd_SeqMAIJ_9(Mat A,Vec xx,Vec yy,Vec zz)
15192b692628SMatthew Knepley {
15202b692628SMatthew Knepley   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
15212b692628SMatthew Knepley   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1522d2840e09SBarry Smith   const PetscScalar *x,*v;
1523d2840e09SBarry Smith   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9;
1524dfbe8321SBarry Smith   PetscErrorCode    ierr;
1525d2840e09SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
1526b24ad042SBarry Smith   PetscInt          n,i,jrow,j;
15272b692628SMatthew Knepley 
15282b692628SMatthew Knepley   PetscFunctionBegin;
15292b692628SMatthew Knepley   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
15303649974fSBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
15311ebc52fbSHong Zhang   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
15322b692628SMatthew Knepley   idx  = a->j;
15332b692628SMatthew Knepley   v    = a->a;
15342b692628SMatthew Knepley   ii   = a->i;
15352b692628SMatthew Knepley 
15362b692628SMatthew Knepley   for (i=0; i<m; i++) {
15372b692628SMatthew Knepley     jrow = ii[i];
15382b692628SMatthew Knepley     n    = ii[i+1] - jrow;
15392b692628SMatthew Knepley     sum1 = 0.0;
15402b692628SMatthew Knepley     sum2 = 0.0;
15412b692628SMatthew Knepley     sum3 = 0.0;
15422b692628SMatthew Knepley     sum4 = 0.0;
15432b692628SMatthew Knepley     sum5 = 0.0;
15442b692628SMatthew Knepley     sum6 = 0.0;
15452b692628SMatthew Knepley     sum7 = 0.0;
15462b692628SMatthew Knepley     sum8 = 0.0;
15472b692628SMatthew Knepley     sum9 = 0.0;
15482b692628SMatthew Knepley     for (j=0; j<n; j++) {
15492b692628SMatthew Knepley       sum1 += v[jrow]*x[9*idx[jrow]];
15502b692628SMatthew Knepley       sum2 += v[jrow]*x[9*idx[jrow]+1];
15512b692628SMatthew Knepley       sum3 += v[jrow]*x[9*idx[jrow]+2];
15522b692628SMatthew Knepley       sum4 += v[jrow]*x[9*idx[jrow]+3];
15532b692628SMatthew Knepley       sum5 += v[jrow]*x[9*idx[jrow]+4];
15542b692628SMatthew Knepley       sum6 += v[jrow]*x[9*idx[jrow]+5];
15552b692628SMatthew Knepley       sum7 += v[jrow]*x[9*idx[jrow]+6];
15562b692628SMatthew Knepley       sum8 += v[jrow]*x[9*idx[jrow]+7];
15572b692628SMatthew Knepley       sum9 += v[jrow]*x[9*idx[jrow]+8];
15582b692628SMatthew Knepley       jrow++;
15592b692628SMatthew Knepley     }
15602b692628SMatthew Knepley     y[9*i]   += sum1;
15612b692628SMatthew Knepley     y[9*i+1] += sum2;
15622b692628SMatthew Knepley     y[9*i+2] += sum3;
15632b692628SMatthew Knepley     y[9*i+3] += sum4;
15642b692628SMatthew Knepley     y[9*i+4] += sum5;
15652b692628SMatthew Knepley     y[9*i+5] += sum6;
15662b692628SMatthew Knepley     y[9*i+6] += sum7;
15672b692628SMatthew Knepley     y[9*i+7] += sum8;
15682b692628SMatthew Knepley     y[9*i+8] += sum9;
15692b692628SMatthew Knepley   }
15702b692628SMatthew Knepley 
1571efee365bSSatish Balay   ierr = PetscLogFlops(18*a->nz);CHKERRQ(ierr);
15723649974fSBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
15731ebc52fbSHong Zhang   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
15742b692628SMatthew Knepley   PetscFunctionReturn(0);
15752b692628SMatthew Knepley }
15762b692628SMatthew Knepley 
1577dfbe8321SBarry Smith PetscErrorCode MatMultTransposeAdd_SeqMAIJ_9(Mat A,Vec xx,Vec yy,Vec zz)
15782b692628SMatthew Knepley {
15792b692628SMatthew Knepley   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
15802b692628SMatthew Knepley   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1581d2840e09SBarry Smith   const PetscScalar *x,*v;
1582d2840e09SBarry Smith   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9;
1583dfbe8321SBarry Smith   PetscErrorCode    ierr;
1584d2840e09SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx;
1585d2840e09SBarry Smith   PetscInt          n,i;
15862b692628SMatthew Knepley 
15872b692628SMatthew Knepley   PetscFunctionBegin;
15882b692628SMatthew Knepley   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
15893649974fSBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
15901ebc52fbSHong Zhang   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
15912b692628SMatthew Knepley   for (i=0; i<m; i++) {
15922b692628SMatthew Knepley     idx    = a->j + a->i[i];
15932b692628SMatthew Knepley     v      = a->a + a->i[i];
15942b692628SMatthew Knepley     n      = a->i[i+1] - a->i[i];
15952b692628SMatthew Knepley     alpha1 = x[9*i];
15962b692628SMatthew Knepley     alpha2 = x[9*i+1];
15972b692628SMatthew Knepley     alpha3 = x[9*i+2];
15982b692628SMatthew Knepley     alpha4 = x[9*i+3];
15992b692628SMatthew Knepley     alpha5 = x[9*i+4];
16002b692628SMatthew Knepley     alpha6 = x[9*i+5];
16012b692628SMatthew Knepley     alpha7 = x[9*i+6];
16022b692628SMatthew Knepley     alpha8 = x[9*i+7];
16032b692628SMatthew Knepley     alpha9 = x[9*i+8];
16042b692628SMatthew Knepley     while (n-->0) {
16052b692628SMatthew Knepley       y[9*(*idx)]   += alpha1*(*v);
16062b692628SMatthew Knepley       y[9*(*idx)+1] += alpha2*(*v);
16072b692628SMatthew Knepley       y[9*(*idx)+2] += alpha3*(*v);
16082b692628SMatthew Knepley       y[9*(*idx)+3] += alpha4*(*v);
16092b692628SMatthew Knepley       y[9*(*idx)+4] += alpha5*(*v);
16102b692628SMatthew Knepley       y[9*(*idx)+5] += alpha6*(*v);
16112b692628SMatthew Knepley       y[9*(*idx)+6] += alpha7*(*v);
16122b692628SMatthew Knepley       y[9*(*idx)+7] += alpha8*(*v);
16132b692628SMatthew Knepley       y[9*(*idx)+8] += alpha9*(*v);
16142b692628SMatthew Knepley       idx++; v++;
16152b692628SMatthew Knepley     }
16162b692628SMatthew Knepley   }
1617dc0b31edSSatish Balay   ierr = PetscLogFlops(18.0*a->nz);CHKERRQ(ierr);
16183649974fSBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
16191ebc52fbSHong Zhang   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
16202b692628SMatthew Knepley   PetscFunctionReturn(0);
16212b692628SMatthew Knepley }
1622b9cda22cSBarry Smith PetscErrorCode MatMult_SeqMAIJ_10(Mat A,Vec xx,Vec yy)
1623b9cda22cSBarry Smith {
1624b9cda22cSBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1625b9cda22cSBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1626d2840e09SBarry Smith   const PetscScalar *x,*v;
1627d2840e09SBarry Smith   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9, sum10;
1628b9cda22cSBarry Smith   PetscErrorCode    ierr;
1629d2840e09SBarry Smith   const PetscInt    m         = b->AIJ->rmap->n,*idx,*ii;
1630d2840e09SBarry Smith   PetscInt          nonzerorow=0,n,i,jrow,j;
1631b9cda22cSBarry Smith 
1632b9cda22cSBarry Smith   PetscFunctionBegin;
16333649974fSBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
1634b9cda22cSBarry Smith   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
1635b9cda22cSBarry Smith   idx  = a->j;
1636b9cda22cSBarry Smith   v    = a->a;
1637b9cda22cSBarry Smith   ii   = a->i;
1638b9cda22cSBarry Smith 
1639b9cda22cSBarry Smith   for (i=0; i<m; i++) {
1640b9cda22cSBarry Smith     jrow  = ii[i];
1641b9cda22cSBarry Smith     n     = ii[i+1] - jrow;
1642b9cda22cSBarry Smith     sum1  = 0.0;
1643b9cda22cSBarry Smith     sum2  = 0.0;
1644b9cda22cSBarry Smith     sum3  = 0.0;
1645b9cda22cSBarry Smith     sum4  = 0.0;
1646b9cda22cSBarry Smith     sum5  = 0.0;
1647b9cda22cSBarry Smith     sum6  = 0.0;
1648b9cda22cSBarry Smith     sum7  = 0.0;
1649b9cda22cSBarry Smith     sum8  = 0.0;
1650b9cda22cSBarry Smith     sum9  = 0.0;
1651b9cda22cSBarry Smith     sum10 = 0.0;
165226fbe8dcSKarl Rupp 
1653b3c51c66SMatthew Knepley     nonzerorow += (n>0);
1654b9cda22cSBarry Smith     for (j=0; j<n; j++) {
1655b9cda22cSBarry Smith       sum1  += v[jrow]*x[10*idx[jrow]];
1656b9cda22cSBarry Smith       sum2  += v[jrow]*x[10*idx[jrow]+1];
1657b9cda22cSBarry Smith       sum3  += v[jrow]*x[10*idx[jrow]+2];
1658b9cda22cSBarry Smith       sum4  += v[jrow]*x[10*idx[jrow]+3];
1659b9cda22cSBarry Smith       sum5  += v[jrow]*x[10*idx[jrow]+4];
1660b9cda22cSBarry Smith       sum6  += v[jrow]*x[10*idx[jrow]+5];
1661b9cda22cSBarry Smith       sum7  += v[jrow]*x[10*idx[jrow]+6];
1662b9cda22cSBarry Smith       sum8  += v[jrow]*x[10*idx[jrow]+7];
1663b9cda22cSBarry Smith       sum9  += v[jrow]*x[10*idx[jrow]+8];
1664b9cda22cSBarry Smith       sum10 += v[jrow]*x[10*idx[jrow]+9];
1665b9cda22cSBarry Smith       jrow++;
1666b9cda22cSBarry Smith     }
1667b9cda22cSBarry Smith     y[10*i]   = sum1;
1668b9cda22cSBarry Smith     y[10*i+1] = sum2;
1669b9cda22cSBarry Smith     y[10*i+2] = sum3;
1670b9cda22cSBarry Smith     y[10*i+3] = sum4;
1671b9cda22cSBarry Smith     y[10*i+4] = sum5;
1672b9cda22cSBarry Smith     y[10*i+5] = sum6;
1673b9cda22cSBarry Smith     y[10*i+6] = sum7;
1674b9cda22cSBarry Smith     y[10*i+7] = sum8;
1675b9cda22cSBarry Smith     y[10*i+8] = sum9;
1676b9cda22cSBarry Smith     y[10*i+9] = sum10;
1677b9cda22cSBarry Smith   }
1678b9cda22cSBarry Smith 
1679dc0b31edSSatish Balay   ierr = PetscLogFlops(20.0*a->nz - 10.0*nonzerorow);CHKERRQ(ierr);
16803649974fSBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
1681b9cda22cSBarry Smith   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
1682b9cda22cSBarry Smith   PetscFunctionReturn(0);
1683b9cda22cSBarry Smith }
1684b9cda22cSBarry Smith 
1685b9cda22cSBarry Smith PetscErrorCode MatMultAdd_SeqMAIJ_10(Mat A,Vec xx,Vec yy,Vec zz)
1686b9cda22cSBarry Smith {
1687b9cda22cSBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1688b9cda22cSBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1689d2840e09SBarry Smith   const PetscScalar *x,*v;
1690d2840e09SBarry Smith   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9, sum10;
1691b9cda22cSBarry Smith   PetscErrorCode    ierr;
1692d2840e09SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
1693b9cda22cSBarry Smith   PetscInt          n,i,jrow,j;
1694b9cda22cSBarry Smith 
1695b9cda22cSBarry Smith   PetscFunctionBegin;
1696b9cda22cSBarry Smith   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
16973649974fSBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
1698b9cda22cSBarry Smith   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
1699b9cda22cSBarry Smith   idx  = a->j;
1700b9cda22cSBarry Smith   v    = a->a;
1701b9cda22cSBarry Smith   ii   = a->i;
1702b9cda22cSBarry Smith 
1703b9cda22cSBarry Smith   for (i=0; i<m; i++) {
1704b9cda22cSBarry Smith     jrow  = ii[i];
1705b9cda22cSBarry Smith     n     = ii[i+1] - jrow;
1706b9cda22cSBarry Smith     sum1  = 0.0;
1707b9cda22cSBarry Smith     sum2  = 0.0;
1708b9cda22cSBarry Smith     sum3  = 0.0;
1709b9cda22cSBarry Smith     sum4  = 0.0;
1710b9cda22cSBarry Smith     sum5  = 0.0;
1711b9cda22cSBarry Smith     sum6  = 0.0;
1712b9cda22cSBarry Smith     sum7  = 0.0;
1713b9cda22cSBarry Smith     sum8  = 0.0;
1714b9cda22cSBarry Smith     sum9  = 0.0;
1715b9cda22cSBarry Smith     sum10 = 0.0;
1716b9cda22cSBarry Smith     for (j=0; j<n; j++) {
1717b9cda22cSBarry Smith       sum1  += v[jrow]*x[10*idx[jrow]];
1718b9cda22cSBarry Smith       sum2  += v[jrow]*x[10*idx[jrow]+1];
1719b9cda22cSBarry Smith       sum3  += v[jrow]*x[10*idx[jrow]+2];
1720b9cda22cSBarry Smith       sum4  += v[jrow]*x[10*idx[jrow]+3];
1721b9cda22cSBarry Smith       sum5  += v[jrow]*x[10*idx[jrow]+4];
1722b9cda22cSBarry Smith       sum6  += v[jrow]*x[10*idx[jrow]+5];
1723b9cda22cSBarry Smith       sum7  += v[jrow]*x[10*idx[jrow]+6];
1724b9cda22cSBarry Smith       sum8  += v[jrow]*x[10*idx[jrow]+7];
1725b9cda22cSBarry Smith       sum9  += v[jrow]*x[10*idx[jrow]+8];
1726b9cda22cSBarry Smith       sum10 += v[jrow]*x[10*idx[jrow]+9];
1727b9cda22cSBarry Smith       jrow++;
1728b9cda22cSBarry Smith     }
1729b9cda22cSBarry Smith     y[10*i]   += sum1;
1730b9cda22cSBarry Smith     y[10*i+1] += sum2;
1731b9cda22cSBarry Smith     y[10*i+2] += sum3;
1732b9cda22cSBarry Smith     y[10*i+3] += sum4;
1733b9cda22cSBarry Smith     y[10*i+4] += sum5;
1734b9cda22cSBarry Smith     y[10*i+5] += sum6;
1735b9cda22cSBarry Smith     y[10*i+6] += sum7;
1736b9cda22cSBarry Smith     y[10*i+7] += sum8;
1737b9cda22cSBarry Smith     y[10*i+8] += sum9;
1738b9cda22cSBarry Smith     y[10*i+9] += sum10;
1739b9cda22cSBarry Smith   }
1740b9cda22cSBarry Smith 
1741dc0b31edSSatish Balay   ierr = PetscLogFlops(20.0*a->nz);CHKERRQ(ierr);
17423649974fSBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
1743b9cda22cSBarry Smith   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
1744b9cda22cSBarry Smith   PetscFunctionReturn(0);
1745b9cda22cSBarry Smith }
1746b9cda22cSBarry Smith 
1747b9cda22cSBarry Smith PetscErrorCode MatMultTranspose_SeqMAIJ_10(Mat A,Vec xx,Vec yy)
1748b9cda22cSBarry Smith {
1749b9cda22cSBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1750b9cda22cSBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1751d2840e09SBarry Smith   const PetscScalar *x,*v;
1752d2840e09SBarry Smith   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9,alpha10;
1753b9cda22cSBarry Smith   PetscErrorCode    ierr;
1754d2840e09SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx;
1755d2840e09SBarry Smith   PetscInt          n,i;
1756b9cda22cSBarry Smith 
1757b9cda22cSBarry Smith   PetscFunctionBegin;
1758d2840e09SBarry Smith   ierr = VecSet(yy,0.0);CHKERRQ(ierr);
17593649974fSBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
1760b9cda22cSBarry Smith   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
1761b9cda22cSBarry Smith 
1762b9cda22cSBarry Smith   for (i=0; i<m; i++) {
1763b9cda22cSBarry Smith     idx     = a->j + a->i[i];
1764b9cda22cSBarry Smith     v       = a->a + a->i[i];
1765b9cda22cSBarry Smith     n       = a->i[i+1] - a->i[i];
1766e29fdc22SBarry Smith     alpha1  = x[10*i];
1767e29fdc22SBarry Smith     alpha2  = x[10*i+1];
1768e29fdc22SBarry Smith     alpha3  = x[10*i+2];
1769e29fdc22SBarry Smith     alpha4  = x[10*i+3];
1770e29fdc22SBarry Smith     alpha5  = x[10*i+4];
1771e29fdc22SBarry Smith     alpha6  = x[10*i+5];
1772e29fdc22SBarry Smith     alpha7  = x[10*i+6];
1773e29fdc22SBarry Smith     alpha8  = x[10*i+7];
1774e29fdc22SBarry Smith     alpha9  = x[10*i+8];
1775b9cda22cSBarry Smith     alpha10 = x[10*i+9];
1776b9cda22cSBarry Smith     while (n-->0) {
1777e29fdc22SBarry Smith       y[10*(*idx)]   += alpha1*(*v);
1778e29fdc22SBarry Smith       y[10*(*idx)+1] += alpha2*(*v);
1779e29fdc22SBarry Smith       y[10*(*idx)+2] += alpha3*(*v);
1780e29fdc22SBarry Smith       y[10*(*idx)+3] += alpha4*(*v);
1781e29fdc22SBarry Smith       y[10*(*idx)+4] += alpha5*(*v);
1782e29fdc22SBarry Smith       y[10*(*idx)+5] += alpha6*(*v);
1783e29fdc22SBarry Smith       y[10*(*idx)+6] += alpha7*(*v);
1784e29fdc22SBarry Smith       y[10*(*idx)+7] += alpha8*(*v);
1785e29fdc22SBarry Smith       y[10*(*idx)+8] += alpha9*(*v);
1786b9cda22cSBarry Smith       y[10*(*idx)+9] += alpha10*(*v);
1787b9cda22cSBarry Smith       idx++; v++;
1788b9cda22cSBarry Smith     }
1789b9cda22cSBarry Smith   }
1790dc0b31edSSatish Balay   ierr = PetscLogFlops(20.0*a->nz);CHKERRQ(ierr);
17913649974fSBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
1792b9cda22cSBarry Smith   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
1793b9cda22cSBarry Smith   PetscFunctionReturn(0);
1794b9cda22cSBarry Smith }
1795b9cda22cSBarry Smith 
1796b9cda22cSBarry Smith PetscErrorCode MatMultTransposeAdd_SeqMAIJ_10(Mat A,Vec xx,Vec yy,Vec zz)
1797b9cda22cSBarry Smith {
1798b9cda22cSBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1799b9cda22cSBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1800d2840e09SBarry Smith   const PetscScalar *x,*v;
1801d2840e09SBarry Smith   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9,alpha10;
1802b9cda22cSBarry Smith   PetscErrorCode    ierr;
1803d2840e09SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx;
1804d2840e09SBarry Smith   PetscInt          n,i;
1805b9cda22cSBarry Smith 
1806b9cda22cSBarry Smith   PetscFunctionBegin;
1807b9cda22cSBarry Smith   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
18083649974fSBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
1809b9cda22cSBarry Smith   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
1810b9cda22cSBarry Smith   for (i=0; i<m; i++) {
1811b9cda22cSBarry Smith     idx     = a->j + a->i[i];
1812b9cda22cSBarry Smith     v       = a->a + a->i[i];
1813b9cda22cSBarry Smith     n       = a->i[i+1] - a->i[i];
1814b9cda22cSBarry Smith     alpha1  = x[10*i];
1815b9cda22cSBarry Smith     alpha2  = x[10*i+1];
1816b9cda22cSBarry Smith     alpha3  = x[10*i+2];
1817b9cda22cSBarry Smith     alpha4  = x[10*i+3];
1818b9cda22cSBarry Smith     alpha5  = x[10*i+4];
1819b9cda22cSBarry Smith     alpha6  = x[10*i+5];
1820b9cda22cSBarry Smith     alpha7  = x[10*i+6];
1821b9cda22cSBarry Smith     alpha8  = x[10*i+7];
1822b9cda22cSBarry Smith     alpha9  = x[10*i+8];
1823b9cda22cSBarry Smith     alpha10 = x[10*i+9];
1824b9cda22cSBarry Smith     while (n-->0) {
1825b9cda22cSBarry Smith       y[10*(*idx)]   += alpha1*(*v);
1826b9cda22cSBarry Smith       y[10*(*idx)+1] += alpha2*(*v);
1827b9cda22cSBarry Smith       y[10*(*idx)+2] += alpha3*(*v);
1828b9cda22cSBarry Smith       y[10*(*idx)+3] += alpha4*(*v);
1829b9cda22cSBarry Smith       y[10*(*idx)+4] += alpha5*(*v);
1830b9cda22cSBarry Smith       y[10*(*idx)+5] += alpha6*(*v);
1831b9cda22cSBarry Smith       y[10*(*idx)+6] += alpha7*(*v);
1832b9cda22cSBarry Smith       y[10*(*idx)+7] += alpha8*(*v);
1833b9cda22cSBarry Smith       y[10*(*idx)+8] += alpha9*(*v);
1834b9cda22cSBarry Smith       y[10*(*idx)+9] += alpha10*(*v);
1835b9cda22cSBarry Smith       idx++; v++;
1836b9cda22cSBarry Smith     }
1837b9cda22cSBarry Smith   }
1838dc0b31edSSatish Balay   ierr = PetscLogFlops(20.0*a->nz);CHKERRQ(ierr);
18393649974fSBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
1840b9cda22cSBarry Smith   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
1841b9cda22cSBarry Smith   PetscFunctionReturn(0);
1842b9cda22cSBarry Smith }
1843b9cda22cSBarry Smith 
18442b692628SMatthew Knepley 
18452f7816d4SBarry Smith /*--------------------------------------------------------------------------------------------*/
1846dbdd7285SBarry Smith PetscErrorCode MatMult_SeqMAIJ_11(Mat A,Vec xx,Vec yy)
1847dbdd7285SBarry Smith {
1848dbdd7285SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1849dbdd7285SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1850d2840e09SBarry Smith   const PetscScalar *x,*v;
1851d2840e09SBarry Smith   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9, sum10, sum11;
1852dbdd7285SBarry Smith   PetscErrorCode    ierr;
1853d2840e09SBarry Smith   const PetscInt    m         = b->AIJ->rmap->n,*idx,*ii;
1854d2840e09SBarry Smith   PetscInt          nonzerorow=0,n,i,jrow,j;
1855dbdd7285SBarry Smith 
1856dbdd7285SBarry Smith   PetscFunctionBegin;
18573649974fSBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
1858dbdd7285SBarry Smith   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
1859dbdd7285SBarry Smith   idx  = a->j;
1860dbdd7285SBarry Smith   v    = a->a;
1861dbdd7285SBarry Smith   ii   = a->i;
1862dbdd7285SBarry Smith 
1863dbdd7285SBarry Smith   for (i=0; i<m; i++) {
1864dbdd7285SBarry Smith     jrow  = ii[i];
1865dbdd7285SBarry Smith     n     = ii[i+1] - jrow;
1866dbdd7285SBarry Smith     sum1  = 0.0;
1867dbdd7285SBarry Smith     sum2  = 0.0;
1868dbdd7285SBarry Smith     sum3  = 0.0;
1869dbdd7285SBarry Smith     sum4  = 0.0;
1870dbdd7285SBarry Smith     sum5  = 0.0;
1871dbdd7285SBarry Smith     sum6  = 0.0;
1872dbdd7285SBarry Smith     sum7  = 0.0;
1873dbdd7285SBarry Smith     sum8  = 0.0;
1874dbdd7285SBarry Smith     sum9  = 0.0;
1875dbdd7285SBarry Smith     sum10 = 0.0;
1876dbdd7285SBarry Smith     sum11 = 0.0;
187726fbe8dcSKarl Rupp 
1878dbdd7285SBarry Smith     nonzerorow += (n>0);
1879dbdd7285SBarry Smith     for (j=0; j<n; j++) {
1880dbdd7285SBarry Smith       sum1  += v[jrow]*x[11*idx[jrow]];
1881dbdd7285SBarry Smith       sum2  += v[jrow]*x[11*idx[jrow]+1];
1882dbdd7285SBarry Smith       sum3  += v[jrow]*x[11*idx[jrow]+2];
1883dbdd7285SBarry Smith       sum4  += v[jrow]*x[11*idx[jrow]+3];
1884dbdd7285SBarry Smith       sum5  += v[jrow]*x[11*idx[jrow]+4];
1885dbdd7285SBarry Smith       sum6  += v[jrow]*x[11*idx[jrow]+5];
1886dbdd7285SBarry Smith       sum7  += v[jrow]*x[11*idx[jrow]+6];
1887dbdd7285SBarry Smith       sum8  += v[jrow]*x[11*idx[jrow]+7];
1888dbdd7285SBarry Smith       sum9  += v[jrow]*x[11*idx[jrow]+8];
1889dbdd7285SBarry Smith       sum10 += v[jrow]*x[11*idx[jrow]+9];
1890dbdd7285SBarry Smith       sum11 += v[jrow]*x[11*idx[jrow]+10];
1891dbdd7285SBarry Smith       jrow++;
1892dbdd7285SBarry Smith     }
1893dbdd7285SBarry Smith     y[11*i]    = sum1;
1894dbdd7285SBarry Smith     y[11*i+1]  = sum2;
1895dbdd7285SBarry Smith     y[11*i+2]  = sum3;
1896dbdd7285SBarry Smith     y[11*i+3]  = sum4;
1897dbdd7285SBarry Smith     y[11*i+4]  = sum5;
1898dbdd7285SBarry Smith     y[11*i+5]  = sum6;
1899dbdd7285SBarry Smith     y[11*i+6]  = sum7;
1900dbdd7285SBarry Smith     y[11*i+7]  = sum8;
1901dbdd7285SBarry Smith     y[11*i+8]  = sum9;
1902dbdd7285SBarry Smith     y[11*i+9]  = sum10;
1903dbdd7285SBarry Smith     y[11*i+10] = sum11;
1904dbdd7285SBarry Smith   }
1905dbdd7285SBarry Smith 
1906dbdd7285SBarry Smith   ierr = PetscLogFlops(22*a->nz - 11*nonzerorow);CHKERRQ(ierr);
19073649974fSBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
1908dbdd7285SBarry Smith   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
1909dbdd7285SBarry Smith   PetscFunctionReturn(0);
1910dbdd7285SBarry Smith }
1911dbdd7285SBarry Smith 
1912dbdd7285SBarry Smith PetscErrorCode MatMultAdd_SeqMAIJ_11(Mat A,Vec xx,Vec yy,Vec zz)
1913dbdd7285SBarry Smith {
1914dbdd7285SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1915dbdd7285SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1916d2840e09SBarry Smith   const PetscScalar *x,*v;
1917d2840e09SBarry Smith   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9, sum10, sum11;
1918dbdd7285SBarry Smith   PetscErrorCode    ierr;
1919d2840e09SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
1920dbdd7285SBarry Smith   PetscInt          n,i,jrow,j;
1921dbdd7285SBarry Smith 
1922dbdd7285SBarry Smith   PetscFunctionBegin;
1923dbdd7285SBarry Smith   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
19243649974fSBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
1925dbdd7285SBarry Smith   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
1926dbdd7285SBarry Smith   idx  = a->j;
1927dbdd7285SBarry Smith   v    = a->a;
1928dbdd7285SBarry Smith   ii   = a->i;
1929dbdd7285SBarry Smith 
1930dbdd7285SBarry Smith   for (i=0; i<m; i++) {
1931dbdd7285SBarry Smith     jrow  = ii[i];
1932dbdd7285SBarry Smith     n     = ii[i+1] - jrow;
1933dbdd7285SBarry Smith     sum1  = 0.0;
1934dbdd7285SBarry Smith     sum2  = 0.0;
1935dbdd7285SBarry Smith     sum3  = 0.0;
1936dbdd7285SBarry Smith     sum4  = 0.0;
1937dbdd7285SBarry Smith     sum5  = 0.0;
1938dbdd7285SBarry Smith     sum6  = 0.0;
1939dbdd7285SBarry Smith     sum7  = 0.0;
1940dbdd7285SBarry Smith     sum8  = 0.0;
1941dbdd7285SBarry Smith     sum9  = 0.0;
1942dbdd7285SBarry Smith     sum10 = 0.0;
1943dbdd7285SBarry Smith     sum11 = 0.0;
1944dbdd7285SBarry Smith     for (j=0; j<n; j++) {
1945dbdd7285SBarry Smith       sum1  += v[jrow]*x[11*idx[jrow]];
1946dbdd7285SBarry Smith       sum2  += v[jrow]*x[11*idx[jrow]+1];
1947dbdd7285SBarry Smith       sum3  += v[jrow]*x[11*idx[jrow]+2];
1948dbdd7285SBarry Smith       sum4  += v[jrow]*x[11*idx[jrow]+3];
1949dbdd7285SBarry Smith       sum5  += v[jrow]*x[11*idx[jrow]+4];
1950dbdd7285SBarry Smith       sum6  += v[jrow]*x[11*idx[jrow]+5];
1951dbdd7285SBarry Smith       sum7  += v[jrow]*x[11*idx[jrow]+6];
1952dbdd7285SBarry Smith       sum8  += v[jrow]*x[11*idx[jrow]+7];
1953dbdd7285SBarry Smith       sum9  += v[jrow]*x[11*idx[jrow]+8];
1954dbdd7285SBarry Smith       sum10 += v[jrow]*x[11*idx[jrow]+9];
1955dbdd7285SBarry Smith       sum11 += v[jrow]*x[11*idx[jrow]+10];
1956dbdd7285SBarry Smith       jrow++;
1957dbdd7285SBarry Smith     }
1958dbdd7285SBarry Smith     y[11*i]    += sum1;
1959dbdd7285SBarry Smith     y[11*i+1]  += sum2;
1960dbdd7285SBarry Smith     y[11*i+2]  += sum3;
1961dbdd7285SBarry Smith     y[11*i+3]  += sum4;
1962dbdd7285SBarry Smith     y[11*i+4]  += sum5;
1963dbdd7285SBarry Smith     y[11*i+5]  += sum6;
1964dbdd7285SBarry Smith     y[11*i+6]  += sum7;
1965dbdd7285SBarry Smith     y[11*i+7]  += sum8;
1966dbdd7285SBarry Smith     y[11*i+8]  += sum9;
1967dbdd7285SBarry Smith     y[11*i+9]  += sum10;
1968dbdd7285SBarry Smith     y[11*i+10] += sum11;
1969dbdd7285SBarry Smith   }
1970dbdd7285SBarry Smith 
1971dbdd7285SBarry Smith   ierr = PetscLogFlops(22*a->nz);CHKERRQ(ierr);
19723649974fSBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
1973dbdd7285SBarry Smith   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
1974dbdd7285SBarry Smith   PetscFunctionReturn(0);
1975dbdd7285SBarry Smith }
1976dbdd7285SBarry Smith 
1977dbdd7285SBarry Smith PetscErrorCode MatMultTranspose_SeqMAIJ_11(Mat A,Vec xx,Vec yy)
1978dbdd7285SBarry Smith {
1979dbdd7285SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1980dbdd7285SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1981d2840e09SBarry Smith   const PetscScalar *x,*v;
1982d2840e09SBarry Smith   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9,alpha10,alpha11;
1983dbdd7285SBarry Smith   PetscErrorCode    ierr;
1984d2840e09SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx;
1985d2840e09SBarry Smith   PetscInt          n,i;
1986dbdd7285SBarry Smith 
1987dbdd7285SBarry Smith   PetscFunctionBegin;
1988d2840e09SBarry Smith   ierr = VecSet(yy,0.0);CHKERRQ(ierr);
19893649974fSBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
1990dbdd7285SBarry Smith   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
1991dbdd7285SBarry Smith 
1992dbdd7285SBarry Smith   for (i=0; i<m; i++) {
1993dbdd7285SBarry Smith     idx     = a->j + a->i[i];
1994dbdd7285SBarry Smith     v       = a->a + a->i[i];
1995dbdd7285SBarry Smith     n       = a->i[i+1] - a->i[i];
1996dbdd7285SBarry Smith     alpha1  = x[11*i];
1997dbdd7285SBarry Smith     alpha2  = x[11*i+1];
1998dbdd7285SBarry Smith     alpha3  = x[11*i+2];
1999dbdd7285SBarry Smith     alpha4  = x[11*i+3];
2000dbdd7285SBarry Smith     alpha5  = x[11*i+4];
2001dbdd7285SBarry Smith     alpha6  = x[11*i+5];
2002dbdd7285SBarry Smith     alpha7  = x[11*i+6];
2003dbdd7285SBarry Smith     alpha8  = x[11*i+7];
2004dbdd7285SBarry Smith     alpha9  = x[11*i+8];
2005dbdd7285SBarry Smith     alpha10 = x[11*i+9];
2006dbdd7285SBarry Smith     alpha11 = x[11*i+10];
2007dbdd7285SBarry Smith     while (n-->0) {
2008dbdd7285SBarry Smith       y[11*(*idx)]    += alpha1*(*v);
2009dbdd7285SBarry Smith       y[11*(*idx)+1]  += alpha2*(*v);
2010dbdd7285SBarry Smith       y[11*(*idx)+2]  += alpha3*(*v);
2011dbdd7285SBarry Smith       y[11*(*idx)+3]  += alpha4*(*v);
2012dbdd7285SBarry Smith       y[11*(*idx)+4]  += alpha5*(*v);
2013dbdd7285SBarry Smith       y[11*(*idx)+5]  += alpha6*(*v);
2014dbdd7285SBarry Smith       y[11*(*idx)+6]  += alpha7*(*v);
2015dbdd7285SBarry Smith       y[11*(*idx)+7]  += alpha8*(*v);
2016dbdd7285SBarry Smith       y[11*(*idx)+8]  += alpha9*(*v);
2017dbdd7285SBarry Smith       y[11*(*idx)+9]  += alpha10*(*v);
2018dbdd7285SBarry Smith       y[11*(*idx)+10] += alpha11*(*v);
2019dbdd7285SBarry Smith       idx++; v++;
2020dbdd7285SBarry Smith     }
2021dbdd7285SBarry Smith   }
2022dbdd7285SBarry Smith   ierr = PetscLogFlops(22*a->nz);CHKERRQ(ierr);
20233649974fSBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
2024dbdd7285SBarry Smith   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
2025dbdd7285SBarry Smith   PetscFunctionReturn(0);
2026dbdd7285SBarry Smith }
2027dbdd7285SBarry Smith 
2028dbdd7285SBarry Smith PetscErrorCode MatMultTransposeAdd_SeqMAIJ_11(Mat A,Vec xx,Vec yy,Vec zz)
2029dbdd7285SBarry Smith {
2030dbdd7285SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
2031dbdd7285SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
2032d2840e09SBarry Smith   const PetscScalar *x,*v;
2033d2840e09SBarry Smith   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9,alpha10,alpha11;
2034dbdd7285SBarry Smith   PetscErrorCode    ierr;
2035d2840e09SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx;
2036d2840e09SBarry Smith   PetscInt          n,i;
2037dbdd7285SBarry Smith 
2038dbdd7285SBarry Smith   PetscFunctionBegin;
2039dbdd7285SBarry Smith   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
20403649974fSBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
2041dbdd7285SBarry Smith   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
2042dbdd7285SBarry Smith   for (i=0; i<m; i++) {
2043dbdd7285SBarry Smith     idx     = a->j + a->i[i];
2044dbdd7285SBarry Smith     v       = a->a + a->i[i];
2045dbdd7285SBarry Smith     n       = a->i[i+1] - a->i[i];
2046dbdd7285SBarry Smith     alpha1  = x[11*i];
2047dbdd7285SBarry Smith     alpha2  = x[11*i+1];
2048dbdd7285SBarry Smith     alpha3  = x[11*i+2];
2049dbdd7285SBarry Smith     alpha4  = x[11*i+3];
2050dbdd7285SBarry Smith     alpha5  = x[11*i+4];
2051dbdd7285SBarry Smith     alpha6  = x[11*i+5];
2052dbdd7285SBarry Smith     alpha7  = x[11*i+6];
2053dbdd7285SBarry Smith     alpha8  = x[11*i+7];
2054dbdd7285SBarry Smith     alpha9  = x[11*i+8];
2055dbdd7285SBarry Smith     alpha10 = x[11*i+9];
2056dbdd7285SBarry Smith     alpha11 = x[11*i+10];
2057dbdd7285SBarry Smith     while (n-->0) {
2058dbdd7285SBarry Smith       y[11*(*idx)]    += alpha1*(*v);
2059dbdd7285SBarry Smith       y[11*(*idx)+1]  += alpha2*(*v);
2060dbdd7285SBarry Smith       y[11*(*idx)+2]  += alpha3*(*v);
2061dbdd7285SBarry Smith       y[11*(*idx)+3]  += alpha4*(*v);
2062dbdd7285SBarry Smith       y[11*(*idx)+4]  += alpha5*(*v);
2063dbdd7285SBarry Smith       y[11*(*idx)+5]  += alpha6*(*v);
2064dbdd7285SBarry Smith       y[11*(*idx)+6]  += alpha7*(*v);
2065dbdd7285SBarry Smith       y[11*(*idx)+7]  += alpha8*(*v);
2066dbdd7285SBarry Smith       y[11*(*idx)+8]  += alpha9*(*v);
2067dbdd7285SBarry Smith       y[11*(*idx)+9]  += alpha10*(*v);
2068dbdd7285SBarry Smith       y[11*(*idx)+10] += alpha11*(*v);
2069dbdd7285SBarry Smith       idx++; v++;
2070dbdd7285SBarry Smith     }
2071dbdd7285SBarry Smith   }
2072dbdd7285SBarry Smith   ierr = PetscLogFlops(22*a->nz);CHKERRQ(ierr);
20733649974fSBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
2074dbdd7285SBarry Smith   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
2075dbdd7285SBarry Smith   PetscFunctionReturn(0);
2076dbdd7285SBarry Smith }
2077dbdd7285SBarry Smith 
2078dbdd7285SBarry Smith 
2079dbdd7285SBarry Smith /*--------------------------------------------------------------------------------------------*/
2080dfbe8321SBarry Smith PetscErrorCode MatMult_SeqMAIJ_16(Mat A,Vec xx,Vec yy)
20812f7816d4SBarry Smith {
20822f7816d4SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
20832f7816d4SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
2084d2840e09SBarry Smith   const PetscScalar *x,*v;
2085d2840e09SBarry Smith   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8;
20862f7816d4SBarry Smith   PetscScalar       sum9, sum10, sum11, sum12, sum13, sum14, sum15, sum16;
2087dfbe8321SBarry Smith   PetscErrorCode    ierr;
2088d2840e09SBarry Smith   const PetscInt    m         = b->AIJ->rmap->n,*idx,*ii;
2089d2840e09SBarry Smith   PetscInt          nonzerorow=0,n,i,jrow,j;
20902f7816d4SBarry Smith 
20912f7816d4SBarry Smith   PetscFunctionBegin;
20923649974fSBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
20931ebc52fbSHong Zhang   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
20942f7816d4SBarry Smith   idx  = a->j;
20952f7816d4SBarry Smith   v    = a->a;
20962f7816d4SBarry Smith   ii   = a->i;
20972f7816d4SBarry Smith 
20982f7816d4SBarry Smith   for (i=0; i<m; i++) {
20992f7816d4SBarry Smith     jrow  = ii[i];
21002f7816d4SBarry Smith     n     = ii[i+1] - jrow;
21012f7816d4SBarry Smith     sum1  = 0.0;
21022f7816d4SBarry Smith     sum2  = 0.0;
21032f7816d4SBarry Smith     sum3  = 0.0;
21042f7816d4SBarry Smith     sum4  = 0.0;
21052f7816d4SBarry Smith     sum5  = 0.0;
21062f7816d4SBarry Smith     sum6  = 0.0;
21072f7816d4SBarry Smith     sum7  = 0.0;
21082f7816d4SBarry Smith     sum8  = 0.0;
21092f7816d4SBarry Smith     sum9  = 0.0;
21102f7816d4SBarry Smith     sum10 = 0.0;
21112f7816d4SBarry Smith     sum11 = 0.0;
21122f7816d4SBarry Smith     sum12 = 0.0;
21132f7816d4SBarry Smith     sum13 = 0.0;
21142f7816d4SBarry Smith     sum14 = 0.0;
21152f7816d4SBarry Smith     sum15 = 0.0;
21162f7816d4SBarry Smith     sum16 = 0.0;
211726fbe8dcSKarl Rupp 
2118b3c51c66SMatthew Knepley     nonzerorow += (n>0);
21192f7816d4SBarry Smith     for (j=0; j<n; j++) {
21202f7816d4SBarry Smith       sum1  += v[jrow]*x[16*idx[jrow]];
21212f7816d4SBarry Smith       sum2  += v[jrow]*x[16*idx[jrow]+1];
21222f7816d4SBarry Smith       sum3  += v[jrow]*x[16*idx[jrow]+2];
21232f7816d4SBarry Smith       sum4  += v[jrow]*x[16*idx[jrow]+3];
21242f7816d4SBarry Smith       sum5  += v[jrow]*x[16*idx[jrow]+4];
21252f7816d4SBarry Smith       sum6  += v[jrow]*x[16*idx[jrow]+5];
21262f7816d4SBarry Smith       sum7  += v[jrow]*x[16*idx[jrow]+6];
21272f7816d4SBarry Smith       sum8  += v[jrow]*x[16*idx[jrow]+7];
21282f7816d4SBarry Smith       sum9  += v[jrow]*x[16*idx[jrow]+8];
21292f7816d4SBarry Smith       sum10 += v[jrow]*x[16*idx[jrow]+9];
21302f7816d4SBarry Smith       sum11 += v[jrow]*x[16*idx[jrow]+10];
21312f7816d4SBarry Smith       sum12 += v[jrow]*x[16*idx[jrow]+11];
21322f7816d4SBarry Smith       sum13 += v[jrow]*x[16*idx[jrow]+12];
21332f7816d4SBarry Smith       sum14 += v[jrow]*x[16*idx[jrow]+13];
21342f7816d4SBarry Smith       sum15 += v[jrow]*x[16*idx[jrow]+14];
21352f7816d4SBarry Smith       sum16 += v[jrow]*x[16*idx[jrow]+15];
21362f7816d4SBarry Smith       jrow++;
21372f7816d4SBarry Smith     }
21382f7816d4SBarry Smith     y[16*i]    = sum1;
21392f7816d4SBarry Smith     y[16*i+1]  = sum2;
21402f7816d4SBarry Smith     y[16*i+2]  = sum3;
21412f7816d4SBarry Smith     y[16*i+3]  = sum4;
21422f7816d4SBarry Smith     y[16*i+4]  = sum5;
21432f7816d4SBarry Smith     y[16*i+5]  = sum6;
21442f7816d4SBarry Smith     y[16*i+6]  = sum7;
21452f7816d4SBarry Smith     y[16*i+7]  = sum8;
21462f7816d4SBarry Smith     y[16*i+8]  = sum9;
21472f7816d4SBarry Smith     y[16*i+9]  = sum10;
21482f7816d4SBarry Smith     y[16*i+10] = sum11;
21492f7816d4SBarry Smith     y[16*i+11] = sum12;
21502f7816d4SBarry Smith     y[16*i+12] = sum13;
21512f7816d4SBarry Smith     y[16*i+13] = sum14;
21522f7816d4SBarry Smith     y[16*i+14] = sum15;
21532f7816d4SBarry Smith     y[16*i+15] = sum16;
21542f7816d4SBarry Smith   }
21552f7816d4SBarry Smith 
2156dc0b31edSSatish Balay   ierr = PetscLogFlops(32.0*a->nz - 16.0*nonzerorow);CHKERRQ(ierr);
21573649974fSBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
21581ebc52fbSHong Zhang   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
21592f7816d4SBarry Smith   PetscFunctionReturn(0);
21602f7816d4SBarry Smith }
21612f7816d4SBarry Smith 
2162dfbe8321SBarry Smith PetscErrorCode MatMultTranspose_SeqMAIJ_16(Mat A,Vec xx,Vec yy)
21632f7816d4SBarry Smith {
21642f7816d4SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
21652f7816d4SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
2166d2840e09SBarry Smith   const PetscScalar *x,*v;
2167d2840e09SBarry Smith   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8;
21682f7816d4SBarry Smith   PetscScalar       alpha9,alpha10,alpha11,alpha12,alpha13,alpha14,alpha15,alpha16;
2169dfbe8321SBarry Smith   PetscErrorCode    ierr;
2170d2840e09SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx;
2171d2840e09SBarry Smith   PetscInt          n,i;
21722f7816d4SBarry Smith 
21732f7816d4SBarry Smith   PetscFunctionBegin;
2174d2840e09SBarry Smith   ierr = VecSet(yy,0.0);CHKERRQ(ierr);
21753649974fSBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
21761ebc52fbSHong Zhang   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
2177bfec09a0SHong Zhang 
21782f7816d4SBarry Smith   for (i=0; i<m; i++) {
2179bfec09a0SHong Zhang     idx     = a->j + a->i[i];
2180bfec09a0SHong Zhang     v       = a->a + a->i[i];
21812f7816d4SBarry Smith     n       = a->i[i+1] - a->i[i];
21822f7816d4SBarry Smith     alpha1  = x[16*i];
21832f7816d4SBarry Smith     alpha2  = x[16*i+1];
21842f7816d4SBarry Smith     alpha3  = x[16*i+2];
21852f7816d4SBarry Smith     alpha4  = x[16*i+3];
21862f7816d4SBarry Smith     alpha5  = x[16*i+4];
21872f7816d4SBarry Smith     alpha6  = x[16*i+5];
21882f7816d4SBarry Smith     alpha7  = x[16*i+6];
21892f7816d4SBarry Smith     alpha8  = x[16*i+7];
21902f7816d4SBarry Smith     alpha9  = x[16*i+8];
21912f7816d4SBarry Smith     alpha10 = x[16*i+9];
21922f7816d4SBarry Smith     alpha11 = x[16*i+10];
21932f7816d4SBarry Smith     alpha12 = x[16*i+11];
21942f7816d4SBarry Smith     alpha13 = x[16*i+12];
21952f7816d4SBarry Smith     alpha14 = x[16*i+13];
21962f7816d4SBarry Smith     alpha15 = x[16*i+14];
21972f7816d4SBarry Smith     alpha16 = x[16*i+15];
21982f7816d4SBarry Smith     while (n-->0) {
21992f7816d4SBarry Smith       y[16*(*idx)]    += alpha1*(*v);
22002f7816d4SBarry Smith       y[16*(*idx)+1]  += alpha2*(*v);
22012f7816d4SBarry Smith       y[16*(*idx)+2]  += alpha3*(*v);
22022f7816d4SBarry Smith       y[16*(*idx)+3]  += alpha4*(*v);
22032f7816d4SBarry Smith       y[16*(*idx)+4]  += alpha5*(*v);
22042f7816d4SBarry Smith       y[16*(*idx)+5]  += alpha6*(*v);
22052f7816d4SBarry Smith       y[16*(*idx)+6]  += alpha7*(*v);
22062f7816d4SBarry Smith       y[16*(*idx)+7]  += alpha8*(*v);
22072f7816d4SBarry Smith       y[16*(*idx)+8]  += alpha9*(*v);
22082f7816d4SBarry Smith       y[16*(*idx)+9]  += alpha10*(*v);
22092f7816d4SBarry Smith       y[16*(*idx)+10] += alpha11*(*v);
22102f7816d4SBarry Smith       y[16*(*idx)+11] += alpha12*(*v);
22112f7816d4SBarry Smith       y[16*(*idx)+12] += alpha13*(*v);
22122f7816d4SBarry Smith       y[16*(*idx)+13] += alpha14*(*v);
22132f7816d4SBarry Smith       y[16*(*idx)+14] += alpha15*(*v);
22142f7816d4SBarry Smith       y[16*(*idx)+15] += alpha16*(*v);
22152f7816d4SBarry Smith       idx++; v++;
22162f7816d4SBarry Smith     }
22172f7816d4SBarry Smith   }
2218dc0b31edSSatish Balay   ierr = PetscLogFlops(32.0*a->nz);CHKERRQ(ierr);
22193649974fSBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
22201ebc52fbSHong Zhang   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
22212f7816d4SBarry Smith   PetscFunctionReturn(0);
22222f7816d4SBarry Smith }
22232f7816d4SBarry Smith 
2224dfbe8321SBarry Smith PetscErrorCode MatMultAdd_SeqMAIJ_16(Mat A,Vec xx,Vec yy,Vec zz)
22252f7816d4SBarry Smith {
22262f7816d4SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
22272f7816d4SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
2228d2840e09SBarry Smith   const PetscScalar *x,*v;
2229d2840e09SBarry Smith   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8;
22302f7816d4SBarry Smith   PetscScalar       sum9, sum10, sum11, sum12, sum13, sum14, sum15, sum16;
2231dfbe8321SBarry Smith   PetscErrorCode    ierr;
2232d2840e09SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
2233b24ad042SBarry Smith   PetscInt          n,i,jrow,j;
22342f7816d4SBarry Smith 
22352f7816d4SBarry Smith   PetscFunctionBegin;
22362f7816d4SBarry Smith   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
22373649974fSBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
22381ebc52fbSHong Zhang   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
22392f7816d4SBarry Smith   idx  = a->j;
22402f7816d4SBarry Smith   v    = a->a;
22412f7816d4SBarry Smith   ii   = a->i;
22422f7816d4SBarry Smith 
22432f7816d4SBarry Smith   for (i=0; i<m; i++) {
22442f7816d4SBarry Smith     jrow  = ii[i];
22452f7816d4SBarry Smith     n     = ii[i+1] - jrow;
22462f7816d4SBarry Smith     sum1  = 0.0;
22472f7816d4SBarry Smith     sum2  = 0.0;
22482f7816d4SBarry Smith     sum3  = 0.0;
22492f7816d4SBarry Smith     sum4  = 0.0;
22502f7816d4SBarry Smith     sum5  = 0.0;
22512f7816d4SBarry Smith     sum6  = 0.0;
22522f7816d4SBarry Smith     sum7  = 0.0;
22532f7816d4SBarry Smith     sum8  = 0.0;
22542f7816d4SBarry Smith     sum9  = 0.0;
22552f7816d4SBarry Smith     sum10 = 0.0;
22562f7816d4SBarry Smith     sum11 = 0.0;
22572f7816d4SBarry Smith     sum12 = 0.0;
22582f7816d4SBarry Smith     sum13 = 0.0;
22592f7816d4SBarry Smith     sum14 = 0.0;
22602f7816d4SBarry Smith     sum15 = 0.0;
22612f7816d4SBarry Smith     sum16 = 0.0;
22622f7816d4SBarry Smith     for (j=0; j<n; j++) {
22632f7816d4SBarry Smith       sum1  += v[jrow]*x[16*idx[jrow]];
22642f7816d4SBarry Smith       sum2  += v[jrow]*x[16*idx[jrow]+1];
22652f7816d4SBarry Smith       sum3  += v[jrow]*x[16*idx[jrow]+2];
22662f7816d4SBarry Smith       sum4  += v[jrow]*x[16*idx[jrow]+3];
22672f7816d4SBarry Smith       sum5  += v[jrow]*x[16*idx[jrow]+4];
22682f7816d4SBarry Smith       sum6  += v[jrow]*x[16*idx[jrow]+5];
22692f7816d4SBarry Smith       sum7  += v[jrow]*x[16*idx[jrow]+6];
22702f7816d4SBarry Smith       sum8  += v[jrow]*x[16*idx[jrow]+7];
22712f7816d4SBarry Smith       sum9  += v[jrow]*x[16*idx[jrow]+8];
22722f7816d4SBarry Smith       sum10 += v[jrow]*x[16*idx[jrow]+9];
22732f7816d4SBarry Smith       sum11 += v[jrow]*x[16*idx[jrow]+10];
22742f7816d4SBarry Smith       sum12 += v[jrow]*x[16*idx[jrow]+11];
22752f7816d4SBarry Smith       sum13 += v[jrow]*x[16*idx[jrow]+12];
22762f7816d4SBarry Smith       sum14 += v[jrow]*x[16*idx[jrow]+13];
22772f7816d4SBarry Smith       sum15 += v[jrow]*x[16*idx[jrow]+14];
22782f7816d4SBarry Smith       sum16 += v[jrow]*x[16*idx[jrow]+15];
22792f7816d4SBarry Smith       jrow++;
22802f7816d4SBarry Smith     }
22812f7816d4SBarry Smith     y[16*i]    += sum1;
22822f7816d4SBarry Smith     y[16*i+1]  += sum2;
22832f7816d4SBarry Smith     y[16*i+2]  += sum3;
22842f7816d4SBarry Smith     y[16*i+3]  += sum4;
22852f7816d4SBarry Smith     y[16*i+4]  += sum5;
22862f7816d4SBarry Smith     y[16*i+5]  += sum6;
22872f7816d4SBarry Smith     y[16*i+6]  += sum7;
22882f7816d4SBarry Smith     y[16*i+7]  += sum8;
22892f7816d4SBarry Smith     y[16*i+8]  += sum9;
22902f7816d4SBarry Smith     y[16*i+9]  += sum10;
22912f7816d4SBarry Smith     y[16*i+10] += sum11;
22922f7816d4SBarry Smith     y[16*i+11] += sum12;
22932f7816d4SBarry Smith     y[16*i+12] += sum13;
22942f7816d4SBarry Smith     y[16*i+13] += sum14;
22952f7816d4SBarry Smith     y[16*i+14] += sum15;
22962f7816d4SBarry Smith     y[16*i+15] += sum16;
22972f7816d4SBarry Smith   }
22982f7816d4SBarry Smith 
2299dc0b31edSSatish Balay   ierr = PetscLogFlops(32.0*a->nz);CHKERRQ(ierr);
23003649974fSBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
23011ebc52fbSHong Zhang   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
23022f7816d4SBarry Smith   PetscFunctionReturn(0);
23032f7816d4SBarry Smith }
23042f7816d4SBarry Smith 
2305dfbe8321SBarry Smith PetscErrorCode MatMultTransposeAdd_SeqMAIJ_16(Mat A,Vec xx,Vec yy,Vec zz)
23062f7816d4SBarry Smith {
23072f7816d4SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
23082f7816d4SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
2309d2840e09SBarry Smith   const PetscScalar *x,*v;
2310d2840e09SBarry Smith   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8;
23112f7816d4SBarry Smith   PetscScalar       alpha9,alpha10,alpha11,alpha12,alpha13,alpha14,alpha15,alpha16;
2312dfbe8321SBarry Smith   PetscErrorCode    ierr;
2313d2840e09SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx;
2314d2840e09SBarry Smith   PetscInt          n,i;
23152f7816d4SBarry Smith 
23162f7816d4SBarry Smith   PetscFunctionBegin;
23172f7816d4SBarry Smith   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
23183649974fSBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
23191ebc52fbSHong Zhang   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
23202f7816d4SBarry Smith   for (i=0; i<m; i++) {
2321bfec09a0SHong Zhang     idx     = a->j + a->i[i];
2322bfec09a0SHong Zhang     v       = a->a + a->i[i];
23232f7816d4SBarry Smith     n       = a->i[i+1] - a->i[i];
23242f7816d4SBarry Smith     alpha1  = x[16*i];
23252f7816d4SBarry Smith     alpha2  = x[16*i+1];
23262f7816d4SBarry Smith     alpha3  = x[16*i+2];
23272f7816d4SBarry Smith     alpha4  = x[16*i+3];
23282f7816d4SBarry Smith     alpha5  = x[16*i+4];
23292f7816d4SBarry Smith     alpha6  = x[16*i+5];
23302f7816d4SBarry Smith     alpha7  = x[16*i+6];
23312f7816d4SBarry Smith     alpha8  = x[16*i+7];
23322f7816d4SBarry Smith     alpha9  = x[16*i+8];
23332f7816d4SBarry Smith     alpha10 = x[16*i+9];
23342f7816d4SBarry Smith     alpha11 = x[16*i+10];
23352f7816d4SBarry Smith     alpha12 = x[16*i+11];
23362f7816d4SBarry Smith     alpha13 = x[16*i+12];
23372f7816d4SBarry Smith     alpha14 = x[16*i+13];
23382f7816d4SBarry Smith     alpha15 = x[16*i+14];
23392f7816d4SBarry Smith     alpha16 = x[16*i+15];
23402f7816d4SBarry Smith     while (n-->0) {
23412f7816d4SBarry Smith       y[16*(*idx)]    += alpha1*(*v);
23422f7816d4SBarry Smith       y[16*(*idx)+1]  += alpha2*(*v);
23432f7816d4SBarry Smith       y[16*(*idx)+2]  += alpha3*(*v);
23442f7816d4SBarry Smith       y[16*(*idx)+3]  += alpha4*(*v);
23452f7816d4SBarry Smith       y[16*(*idx)+4]  += alpha5*(*v);
23462f7816d4SBarry Smith       y[16*(*idx)+5]  += alpha6*(*v);
23472f7816d4SBarry Smith       y[16*(*idx)+6]  += alpha7*(*v);
23482f7816d4SBarry Smith       y[16*(*idx)+7]  += alpha8*(*v);
23492f7816d4SBarry Smith       y[16*(*idx)+8]  += alpha9*(*v);
23502f7816d4SBarry Smith       y[16*(*idx)+9]  += alpha10*(*v);
23512f7816d4SBarry Smith       y[16*(*idx)+10] += alpha11*(*v);
23522f7816d4SBarry Smith       y[16*(*idx)+11] += alpha12*(*v);
23532f7816d4SBarry Smith       y[16*(*idx)+12] += alpha13*(*v);
23542f7816d4SBarry Smith       y[16*(*idx)+13] += alpha14*(*v);
23552f7816d4SBarry Smith       y[16*(*idx)+14] += alpha15*(*v);
23562f7816d4SBarry Smith       y[16*(*idx)+15] += alpha16*(*v);
23572f7816d4SBarry Smith       idx++; v++;
23582f7816d4SBarry Smith     }
23592f7816d4SBarry Smith   }
2360dc0b31edSSatish Balay   ierr = PetscLogFlops(32.0*a->nz);CHKERRQ(ierr);
23613649974fSBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
23621ebc52fbSHong Zhang   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
236366ed3db0SBarry Smith   PetscFunctionReturn(0);
236466ed3db0SBarry Smith }
236566ed3db0SBarry Smith 
2366ed1c418cSBarry Smith /*--------------------------------------------------------------------------------------------*/
2367ed1c418cSBarry Smith PetscErrorCode MatMult_SeqMAIJ_18(Mat A,Vec xx,Vec yy)
2368ed1c418cSBarry Smith {
2369ed1c418cSBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
2370ed1c418cSBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
2371d2840e09SBarry Smith   const PetscScalar *x,*v;
2372d2840e09SBarry Smith   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8;
2373ed1c418cSBarry Smith   PetscScalar       sum9, sum10, sum11, sum12, sum13, sum14, sum15, sum16, sum17, sum18;
2374ed1c418cSBarry Smith   PetscErrorCode    ierr;
2375d2840e09SBarry Smith   const PetscInt    m         = b->AIJ->rmap->n,*idx,*ii;
2376d2840e09SBarry Smith   PetscInt          nonzerorow=0,n,i,jrow,j;
2377ed1c418cSBarry Smith 
2378ed1c418cSBarry Smith   PetscFunctionBegin;
23793649974fSBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
2380ed1c418cSBarry Smith   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
2381ed1c418cSBarry Smith   idx  = a->j;
2382ed1c418cSBarry Smith   v    = a->a;
2383ed1c418cSBarry Smith   ii   = a->i;
2384ed1c418cSBarry Smith 
2385ed1c418cSBarry Smith   for (i=0; i<m; i++) {
2386ed1c418cSBarry Smith     jrow  = ii[i];
2387ed1c418cSBarry Smith     n     = ii[i+1] - jrow;
2388ed1c418cSBarry Smith     sum1  = 0.0;
2389ed1c418cSBarry Smith     sum2  = 0.0;
2390ed1c418cSBarry Smith     sum3  = 0.0;
2391ed1c418cSBarry Smith     sum4  = 0.0;
2392ed1c418cSBarry Smith     sum5  = 0.0;
2393ed1c418cSBarry Smith     sum6  = 0.0;
2394ed1c418cSBarry Smith     sum7  = 0.0;
2395ed1c418cSBarry Smith     sum8  = 0.0;
2396ed1c418cSBarry Smith     sum9  = 0.0;
2397ed1c418cSBarry Smith     sum10 = 0.0;
2398ed1c418cSBarry Smith     sum11 = 0.0;
2399ed1c418cSBarry Smith     sum12 = 0.0;
2400ed1c418cSBarry Smith     sum13 = 0.0;
2401ed1c418cSBarry Smith     sum14 = 0.0;
2402ed1c418cSBarry Smith     sum15 = 0.0;
2403ed1c418cSBarry Smith     sum16 = 0.0;
2404ed1c418cSBarry Smith     sum17 = 0.0;
2405ed1c418cSBarry Smith     sum18 = 0.0;
240626fbe8dcSKarl Rupp 
2407ed1c418cSBarry Smith     nonzerorow += (n>0);
2408ed1c418cSBarry Smith     for (j=0; j<n; j++) {
2409ed1c418cSBarry Smith       sum1  += v[jrow]*x[18*idx[jrow]];
2410ed1c418cSBarry Smith       sum2  += v[jrow]*x[18*idx[jrow]+1];
2411ed1c418cSBarry Smith       sum3  += v[jrow]*x[18*idx[jrow]+2];
2412ed1c418cSBarry Smith       sum4  += v[jrow]*x[18*idx[jrow]+3];
2413ed1c418cSBarry Smith       sum5  += v[jrow]*x[18*idx[jrow]+4];
2414ed1c418cSBarry Smith       sum6  += v[jrow]*x[18*idx[jrow]+5];
2415ed1c418cSBarry Smith       sum7  += v[jrow]*x[18*idx[jrow]+6];
2416ed1c418cSBarry Smith       sum8  += v[jrow]*x[18*idx[jrow]+7];
2417ed1c418cSBarry Smith       sum9  += v[jrow]*x[18*idx[jrow]+8];
2418ed1c418cSBarry Smith       sum10 += v[jrow]*x[18*idx[jrow]+9];
2419ed1c418cSBarry Smith       sum11 += v[jrow]*x[18*idx[jrow]+10];
2420ed1c418cSBarry Smith       sum12 += v[jrow]*x[18*idx[jrow]+11];
2421ed1c418cSBarry Smith       sum13 += v[jrow]*x[18*idx[jrow]+12];
2422ed1c418cSBarry Smith       sum14 += v[jrow]*x[18*idx[jrow]+13];
2423ed1c418cSBarry Smith       sum15 += v[jrow]*x[18*idx[jrow]+14];
2424ed1c418cSBarry Smith       sum16 += v[jrow]*x[18*idx[jrow]+15];
2425ed1c418cSBarry Smith       sum17 += v[jrow]*x[18*idx[jrow]+16];
2426ed1c418cSBarry Smith       sum18 += v[jrow]*x[18*idx[jrow]+17];
2427ed1c418cSBarry Smith       jrow++;
2428ed1c418cSBarry Smith     }
2429ed1c418cSBarry Smith     y[18*i]    = sum1;
2430ed1c418cSBarry Smith     y[18*i+1]  = sum2;
2431ed1c418cSBarry Smith     y[18*i+2]  = sum3;
2432ed1c418cSBarry Smith     y[18*i+3]  = sum4;
2433ed1c418cSBarry Smith     y[18*i+4]  = sum5;
2434ed1c418cSBarry Smith     y[18*i+5]  = sum6;
2435ed1c418cSBarry Smith     y[18*i+6]  = sum7;
2436ed1c418cSBarry Smith     y[18*i+7]  = sum8;
2437ed1c418cSBarry Smith     y[18*i+8]  = sum9;
2438ed1c418cSBarry Smith     y[18*i+9]  = sum10;
2439ed1c418cSBarry Smith     y[18*i+10] = sum11;
2440ed1c418cSBarry Smith     y[18*i+11] = sum12;
2441ed1c418cSBarry Smith     y[18*i+12] = sum13;
2442ed1c418cSBarry Smith     y[18*i+13] = sum14;
2443ed1c418cSBarry Smith     y[18*i+14] = sum15;
2444ed1c418cSBarry Smith     y[18*i+15] = sum16;
2445ed1c418cSBarry Smith     y[18*i+16] = sum17;
2446ed1c418cSBarry Smith     y[18*i+17] = sum18;
2447ed1c418cSBarry Smith   }
2448ed1c418cSBarry Smith 
2449dc0b31edSSatish Balay   ierr = PetscLogFlops(36.0*a->nz - 18.0*nonzerorow);CHKERRQ(ierr);
24503649974fSBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
2451ed1c418cSBarry Smith   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
2452ed1c418cSBarry Smith   PetscFunctionReturn(0);
2453ed1c418cSBarry Smith }
2454ed1c418cSBarry Smith 
2455ed1c418cSBarry Smith PetscErrorCode MatMultTranspose_SeqMAIJ_18(Mat A,Vec xx,Vec yy)
2456ed1c418cSBarry Smith {
2457ed1c418cSBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
2458ed1c418cSBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
2459d2840e09SBarry Smith   const PetscScalar *x,*v;
2460d2840e09SBarry Smith   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8;
2461ed1c418cSBarry Smith   PetscScalar       alpha9,alpha10,alpha11,alpha12,alpha13,alpha14,alpha15,alpha16,alpha17,alpha18;
2462ed1c418cSBarry Smith   PetscErrorCode    ierr;
2463d2840e09SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx;
2464d2840e09SBarry Smith   PetscInt          n,i;
2465ed1c418cSBarry Smith 
2466ed1c418cSBarry Smith   PetscFunctionBegin;
2467d2840e09SBarry Smith   ierr = VecSet(yy,0.0);CHKERRQ(ierr);
24683649974fSBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
2469ed1c418cSBarry Smith   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
2470ed1c418cSBarry Smith 
2471ed1c418cSBarry Smith   for (i=0; i<m; i++) {
2472ed1c418cSBarry Smith     idx     = a->j + a->i[i];
2473ed1c418cSBarry Smith     v       = a->a + a->i[i];
2474ed1c418cSBarry Smith     n       = a->i[i+1] - a->i[i];
2475ed1c418cSBarry Smith     alpha1  = x[18*i];
2476ed1c418cSBarry Smith     alpha2  = x[18*i+1];
2477ed1c418cSBarry Smith     alpha3  = x[18*i+2];
2478ed1c418cSBarry Smith     alpha4  = x[18*i+3];
2479ed1c418cSBarry Smith     alpha5  = x[18*i+4];
2480ed1c418cSBarry Smith     alpha6  = x[18*i+5];
2481ed1c418cSBarry Smith     alpha7  = x[18*i+6];
2482ed1c418cSBarry Smith     alpha8  = x[18*i+7];
2483ed1c418cSBarry Smith     alpha9  = x[18*i+8];
2484ed1c418cSBarry Smith     alpha10 = x[18*i+9];
2485ed1c418cSBarry Smith     alpha11 = x[18*i+10];
2486ed1c418cSBarry Smith     alpha12 = x[18*i+11];
2487ed1c418cSBarry Smith     alpha13 = x[18*i+12];
2488ed1c418cSBarry Smith     alpha14 = x[18*i+13];
2489ed1c418cSBarry Smith     alpha15 = x[18*i+14];
2490ed1c418cSBarry Smith     alpha16 = x[18*i+15];
2491ed1c418cSBarry Smith     alpha17 = x[18*i+16];
2492ed1c418cSBarry Smith     alpha18 = x[18*i+17];
2493ed1c418cSBarry Smith     while (n-->0) {
2494ed1c418cSBarry Smith       y[18*(*idx)]    += alpha1*(*v);
2495ed1c418cSBarry Smith       y[18*(*idx)+1]  += alpha2*(*v);
2496ed1c418cSBarry Smith       y[18*(*idx)+2]  += alpha3*(*v);
2497ed1c418cSBarry Smith       y[18*(*idx)+3]  += alpha4*(*v);
2498ed1c418cSBarry Smith       y[18*(*idx)+4]  += alpha5*(*v);
2499ed1c418cSBarry Smith       y[18*(*idx)+5]  += alpha6*(*v);
2500ed1c418cSBarry Smith       y[18*(*idx)+6]  += alpha7*(*v);
2501ed1c418cSBarry Smith       y[18*(*idx)+7]  += alpha8*(*v);
2502ed1c418cSBarry Smith       y[18*(*idx)+8]  += alpha9*(*v);
2503ed1c418cSBarry Smith       y[18*(*idx)+9]  += alpha10*(*v);
2504ed1c418cSBarry Smith       y[18*(*idx)+10] += alpha11*(*v);
2505ed1c418cSBarry Smith       y[18*(*idx)+11] += alpha12*(*v);
2506ed1c418cSBarry Smith       y[18*(*idx)+12] += alpha13*(*v);
2507ed1c418cSBarry Smith       y[18*(*idx)+13] += alpha14*(*v);
2508ed1c418cSBarry Smith       y[18*(*idx)+14] += alpha15*(*v);
2509ed1c418cSBarry Smith       y[18*(*idx)+15] += alpha16*(*v);
2510ed1c418cSBarry Smith       y[18*(*idx)+16] += alpha17*(*v);
2511ed1c418cSBarry Smith       y[18*(*idx)+17] += alpha18*(*v);
2512ed1c418cSBarry Smith       idx++; v++;
2513ed1c418cSBarry Smith     }
2514ed1c418cSBarry Smith   }
2515dc0b31edSSatish Balay   ierr = PetscLogFlops(36.0*a->nz);CHKERRQ(ierr);
25163649974fSBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
2517ed1c418cSBarry Smith   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
2518ed1c418cSBarry Smith   PetscFunctionReturn(0);
2519ed1c418cSBarry Smith }
2520ed1c418cSBarry Smith 
2521ed1c418cSBarry Smith PetscErrorCode MatMultAdd_SeqMAIJ_18(Mat A,Vec xx,Vec yy,Vec zz)
2522ed1c418cSBarry Smith {
2523ed1c418cSBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
2524ed1c418cSBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
2525d2840e09SBarry Smith   const PetscScalar *x,*v;
2526d2840e09SBarry Smith   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8;
2527ed1c418cSBarry Smith   PetscScalar       sum9, sum10, sum11, sum12, sum13, sum14, sum15, sum16, sum17, sum18;
2528ed1c418cSBarry Smith   PetscErrorCode    ierr;
2529d2840e09SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
2530ed1c418cSBarry Smith   PetscInt          n,i,jrow,j;
2531ed1c418cSBarry Smith 
2532ed1c418cSBarry Smith   PetscFunctionBegin;
2533ed1c418cSBarry Smith   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
25343649974fSBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
2535ed1c418cSBarry Smith   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
2536ed1c418cSBarry Smith   idx  = a->j;
2537ed1c418cSBarry Smith   v    = a->a;
2538ed1c418cSBarry Smith   ii   = a->i;
2539ed1c418cSBarry Smith 
2540ed1c418cSBarry Smith   for (i=0; i<m; i++) {
2541ed1c418cSBarry Smith     jrow  = ii[i];
2542ed1c418cSBarry Smith     n     = ii[i+1] - jrow;
2543ed1c418cSBarry Smith     sum1  = 0.0;
2544ed1c418cSBarry Smith     sum2  = 0.0;
2545ed1c418cSBarry Smith     sum3  = 0.0;
2546ed1c418cSBarry Smith     sum4  = 0.0;
2547ed1c418cSBarry Smith     sum5  = 0.0;
2548ed1c418cSBarry Smith     sum6  = 0.0;
2549ed1c418cSBarry Smith     sum7  = 0.0;
2550ed1c418cSBarry Smith     sum8  = 0.0;
2551ed1c418cSBarry Smith     sum9  = 0.0;
2552ed1c418cSBarry Smith     sum10 = 0.0;
2553ed1c418cSBarry Smith     sum11 = 0.0;
2554ed1c418cSBarry Smith     sum12 = 0.0;
2555ed1c418cSBarry Smith     sum13 = 0.0;
2556ed1c418cSBarry Smith     sum14 = 0.0;
2557ed1c418cSBarry Smith     sum15 = 0.0;
2558ed1c418cSBarry Smith     sum16 = 0.0;
2559ed1c418cSBarry Smith     sum17 = 0.0;
2560ed1c418cSBarry Smith     sum18 = 0.0;
2561ed1c418cSBarry Smith     for (j=0; j<n; j++) {
2562ed1c418cSBarry Smith       sum1  += v[jrow]*x[18*idx[jrow]];
2563ed1c418cSBarry Smith       sum2  += v[jrow]*x[18*idx[jrow]+1];
2564ed1c418cSBarry Smith       sum3  += v[jrow]*x[18*idx[jrow]+2];
2565ed1c418cSBarry Smith       sum4  += v[jrow]*x[18*idx[jrow]+3];
2566ed1c418cSBarry Smith       sum5  += v[jrow]*x[18*idx[jrow]+4];
2567ed1c418cSBarry Smith       sum6  += v[jrow]*x[18*idx[jrow]+5];
2568ed1c418cSBarry Smith       sum7  += v[jrow]*x[18*idx[jrow]+6];
2569ed1c418cSBarry Smith       sum8  += v[jrow]*x[18*idx[jrow]+7];
2570ed1c418cSBarry Smith       sum9  += v[jrow]*x[18*idx[jrow]+8];
2571ed1c418cSBarry Smith       sum10 += v[jrow]*x[18*idx[jrow]+9];
2572ed1c418cSBarry Smith       sum11 += v[jrow]*x[18*idx[jrow]+10];
2573ed1c418cSBarry Smith       sum12 += v[jrow]*x[18*idx[jrow]+11];
2574ed1c418cSBarry Smith       sum13 += v[jrow]*x[18*idx[jrow]+12];
2575ed1c418cSBarry Smith       sum14 += v[jrow]*x[18*idx[jrow]+13];
2576ed1c418cSBarry Smith       sum15 += v[jrow]*x[18*idx[jrow]+14];
2577ed1c418cSBarry Smith       sum16 += v[jrow]*x[18*idx[jrow]+15];
2578ed1c418cSBarry Smith       sum17 += v[jrow]*x[18*idx[jrow]+16];
2579ed1c418cSBarry Smith       sum18 += v[jrow]*x[18*idx[jrow]+17];
2580ed1c418cSBarry Smith       jrow++;
2581ed1c418cSBarry Smith     }
2582ed1c418cSBarry Smith     y[18*i]    += sum1;
2583ed1c418cSBarry Smith     y[18*i+1]  += sum2;
2584ed1c418cSBarry Smith     y[18*i+2]  += sum3;
2585ed1c418cSBarry Smith     y[18*i+3]  += sum4;
2586ed1c418cSBarry Smith     y[18*i+4]  += sum5;
2587ed1c418cSBarry Smith     y[18*i+5]  += sum6;
2588ed1c418cSBarry Smith     y[18*i+6]  += sum7;
2589ed1c418cSBarry Smith     y[18*i+7]  += sum8;
2590ed1c418cSBarry Smith     y[18*i+8]  += sum9;
2591ed1c418cSBarry Smith     y[18*i+9]  += sum10;
2592ed1c418cSBarry Smith     y[18*i+10] += sum11;
2593ed1c418cSBarry Smith     y[18*i+11] += sum12;
2594ed1c418cSBarry Smith     y[18*i+12] += sum13;
2595ed1c418cSBarry Smith     y[18*i+13] += sum14;
2596ed1c418cSBarry Smith     y[18*i+14] += sum15;
2597ed1c418cSBarry Smith     y[18*i+15] += sum16;
2598ed1c418cSBarry Smith     y[18*i+16] += sum17;
2599ed1c418cSBarry Smith     y[18*i+17] += sum18;
2600ed1c418cSBarry Smith   }
2601ed1c418cSBarry Smith 
2602dc0b31edSSatish Balay   ierr = PetscLogFlops(36.0*a->nz);CHKERRQ(ierr);
26033649974fSBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
2604ed1c418cSBarry Smith   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
2605ed1c418cSBarry Smith   PetscFunctionReturn(0);
2606ed1c418cSBarry Smith }
2607ed1c418cSBarry Smith 
2608ed1c418cSBarry Smith PetscErrorCode MatMultTransposeAdd_SeqMAIJ_18(Mat A,Vec xx,Vec yy,Vec zz)
2609ed1c418cSBarry Smith {
2610ed1c418cSBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
2611ed1c418cSBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
2612d2840e09SBarry Smith   const PetscScalar *x,*v;
2613d2840e09SBarry Smith   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8;
2614ed1c418cSBarry Smith   PetscScalar       alpha9,alpha10,alpha11,alpha12,alpha13,alpha14,alpha15,alpha16,alpha17,alpha18;
2615ed1c418cSBarry Smith   PetscErrorCode    ierr;
2616d2840e09SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx;
2617d2840e09SBarry Smith   PetscInt          n,i;
2618ed1c418cSBarry Smith 
2619ed1c418cSBarry Smith   PetscFunctionBegin;
2620ed1c418cSBarry Smith   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
26213649974fSBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
2622ed1c418cSBarry Smith   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
2623ed1c418cSBarry Smith   for (i=0; i<m; i++) {
2624ed1c418cSBarry Smith     idx     = a->j + a->i[i];
2625ed1c418cSBarry Smith     v       = a->a + a->i[i];
2626ed1c418cSBarry Smith     n       = a->i[i+1] - a->i[i];
2627ed1c418cSBarry Smith     alpha1  = x[18*i];
2628ed1c418cSBarry Smith     alpha2  = x[18*i+1];
2629ed1c418cSBarry Smith     alpha3  = x[18*i+2];
2630ed1c418cSBarry Smith     alpha4  = x[18*i+3];
2631ed1c418cSBarry Smith     alpha5  = x[18*i+4];
2632ed1c418cSBarry Smith     alpha6  = x[18*i+5];
2633ed1c418cSBarry Smith     alpha7  = x[18*i+6];
2634ed1c418cSBarry Smith     alpha8  = x[18*i+7];
2635ed1c418cSBarry Smith     alpha9  = x[18*i+8];
2636ed1c418cSBarry Smith     alpha10 = x[18*i+9];
2637ed1c418cSBarry Smith     alpha11 = x[18*i+10];
2638ed1c418cSBarry Smith     alpha12 = x[18*i+11];
2639ed1c418cSBarry Smith     alpha13 = x[18*i+12];
2640ed1c418cSBarry Smith     alpha14 = x[18*i+13];
2641ed1c418cSBarry Smith     alpha15 = x[18*i+14];
2642ed1c418cSBarry Smith     alpha16 = x[18*i+15];
2643ed1c418cSBarry Smith     alpha17 = x[18*i+16];
2644ed1c418cSBarry Smith     alpha18 = x[18*i+17];
2645ed1c418cSBarry Smith     while (n-->0) {
2646ed1c418cSBarry Smith       y[18*(*idx)]    += alpha1*(*v);
2647ed1c418cSBarry Smith       y[18*(*idx)+1]  += alpha2*(*v);
2648ed1c418cSBarry Smith       y[18*(*idx)+2]  += alpha3*(*v);
2649ed1c418cSBarry Smith       y[18*(*idx)+3]  += alpha4*(*v);
2650ed1c418cSBarry Smith       y[18*(*idx)+4]  += alpha5*(*v);
2651ed1c418cSBarry Smith       y[18*(*idx)+5]  += alpha6*(*v);
2652ed1c418cSBarry Smith       y[18*(*idx)+6]  += alpha7*(*v);
2653ed1c418cSBarry Smith       y[18*(*idx)+7]  += alpha8*(*v);
2654ed1c418cSBarry Smith       y[18*(*idx)+8]  += alpha9*(*v);
2655ed1c418cSBarry Smith       y[18*(*idx)+9]  += alpha10*(*v);
2656ed1c418cSBarry Smith       y[18*(*idx)+10] += alpha11*(*v);
2657ed1c418cSBarry Smith       y[18*(*idx)+11] += alpha12*(*v);
2658ed1c418cSBarry Smith       y[18*(*idx)+12] += alpha13*(*v);
2659ed1c418cSBarry Smith       y[18*(*idx)+13] += alpha14*(*v);
2660ed1c418cSBarry Smith       y[18*(*idx)+14] += alpha15*(*v);
2661ed1c418cSBarry Smith       y[18*(*idx)+15] += alpha16*(*v);
2662ed1c418cSBarry Smith       y[18*(*idx)+16] += alpha17*(*v);
2663ed1c418cSBarry Smith       y[18*(*idx)+17] += alpha18*(*v);
2664ed1c418cSBarry Smith       idx++; v++;
2665ed1c418cSBarry Smith     }
2666ed1c418cSBarry Smith   }
2667dc0b31edSSatish Balay   ierr = PetscLogFlops(36.0*a->nz);CHKERRQ(ierr);
26683649974fSBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
2669ed1c418cSBarry Smith   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
2670ed1c418cSBarry Smith   PetscFunctionReturn(0);
2671ed1c418cSBarry Smith }
2672ed1c418cSBarry Smith 
26736861f193SBarry Smith PetscErrorCode MatMult_SeqMAIJ_N(Mat A,Vec xx,Vec yy)
26746861f193SBarry Smith {
26756861f193SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
26766861f193SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
26776861f193SBarry Smith   const PetscScalar *x,*v;
26786861f193SBarry Smith   PetscScalar       *y,*sums;
26796861f193SBarry Smith   PetscErrorCode    ierr;
26806861f193SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
26816861f193SBarry Smith   PetscInt          n,i,jrow,j,dof = b->dof,k;
26826861f193SBarry Smith 
26836861f193SBarry Smith   PetscFunctionBegin;
26843649974fSBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
26856861f193SBarry Smith   ierr = VecSet(yy,0.0);CHKERRQ(ierr);
26866861f193SBarry Smith   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
26876861f193SBarry Smith   idx  = a->j;
26886861f193SBarry Smith   v    = a->a;
26896861f193SBarry Smith   ii   = a->i;
26906861f193SBarry Smith 
26916861f193SBarry Smith   for (i=0; i<m; i++) {
26926861f193SBarry Smith     jrow = ii[i];
26936861f193SBarry Smith     n    = ii[i+1] - jrow;
26946861f193SBarry Smith     sums = y + dof*i;
26956861f193SBarry Smith     for (j=0; j<n; j++) {
26966861f193SBarry Smith       for (k=0; k<dof; k++) {
26976861f193SBarry Smith         sums[k] += v[jrow]*x[dof*idx[jrow]+k];
26986861f193SBarry Smith       }
26996861f193SBarry Smith       jrow++;
27006861f193SBarry Smith     }
27016861f193SBarry Smith   }
27026861f193SBarry Smith 
27036861f193SBarry Smith   ierr = PetscLogFlops(2.0*dof*a->nz);CHKERRQ(ierr);
27043649974fSBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
27056861f193SBarry Smith   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
27066861f193SBarry Smith   PetscFunctionReturn(0);
27076861f193SBarry Smith }
27086861f193SBarry Smith 
27096861f193SBarry Smith PetscErrorCode MatMultAdd_SeqMAIJ_N(Mat A,Vec xx,Vec yy,Vec zz)
27106861f193SBarry Smith {
27116861f193SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
27126861f193SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
27136861f193SBarry Smith   const PetscScalar *x,*v;
27146861f193SBarry Smith   PetscScalar       *y,*sums;
27156861f193SBarry Smith   PetscErrorCode    ierr;
27166861f193SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
27176861f193SBarry Smith   PetscInt          n,i,jrow,j,dof = b->dof,k;
27186861f193SBarry Smith 
27196861f193SBarry Smith   PetscFunctionBegin;
27206861f193SBarry Smith   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
27213649974fSBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
27226861f193SBarry Smith   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
27236861f193SBarry Smith   idx  = a->j;
27246861f193SBarry Smith   v    = a->a;
27256861f193SBarry Smith   ii   = a->i;
27266861f193SBarry Smith 
27276861f193SBarry Smith   for (i=0; i<m; i++) {
27286861f193SBarry Smith     jrow = ii[i];
27296861f193SBarry Smith     n    = ii[i+1] - jrow;
27306861f193SBarry Smith     sums = y + dof*i;
27316861f193SBarry Smith     for (j=0; j<n; j++) {
27326861f193SBarry Smith       for (k=0; k<dof; k++) {
27336861f193SBarry Smith         sums[k] += v[jrow]*x[dof*idx[jrow]+k];
27346861f193SBarry Smith       }
27356861f193SBarry Smith       jrow++;
27366861f193SBarry Smith     }
27376861f193SBarry Smith   }
27386861f193SBarry Smith 
27396861f193SBarry Smith   ierr = PetscLogFlops(2.0*dof*a->nz);CHKERRQ(ierr);
27403649974fSBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
27416861f193SBarry Smith   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
27426861f193SBarry Smith   PetscFunctionReturn(0);
27436861f193SBarry Smith }
27446861f193SBarry Smith 
27456861f193SBarry Smith PetscErrorCode MatMultTranspose_SeqMAIJ_N(Mat A,Vec xx,Vec yy)
27466861f193SBarry Smith {
27476861f193SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
27486861f193SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
27496861f193SBarry Smith   const PetscScalar *x,*v,*alpha;
27506861f193SBarry Smith   PetscScalar       *y;
27516861f193SBarry Smith   PetscErrorCode    ierr;
27526861f193SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx,dof = b->dof;
27536861f193SBarry Smith   PetscInt          n,i,k;
27546861f193SBarry Smith 
27556861f193SBarry Smith   PetscFunctionBegin;
27563649974fSBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
27576861f193SBarry Smith   ierr = VecSet(yy,0.0);CHKERRQ(ierr);
27586861f193SBarry Smith   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
27596861f193SBarry Smith   for (i=0; i<m; i++) {
27606861f193SBarry Smith     idx   = a->j + a->i[i];
27616861f193SBarry Smith     v     = a->a + a->i[i];
27626861f193SBarry Smith     n     = a->i[i+1] - a->i[i];
27636861f193SBarry Smith     alpha = x + dof*i;
27646861f193SBarry Smith     while (n-->0) {
27656861f193SBarry Smith       for (k=0; k<dof; k++) {
27666861f193SBarry Smith         y[dof*(*idx)+k] += alpha[k]*(*v);
27676861f193SBarry Smith       }
276883ce7366SBarry Smith       idx++; v++;
27696861f193SBarry Smith     }
27706861f193SBarry Smith   }
27716861f193SBarry Smith   ierr = PetscLogFlops(2.0*dof*a->nz);CHKERRQ(ierr);
27723649974fSBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
27736861f193SBarry Smith   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
27746861f193SBarry Smith   PetscFunctionReturn(0);
27756861f193SBarry Smith }
27766861f193SBarry Smith 
27776861f193SBarry Smith PetscErrorCode MatMultTransposeAdd_SeqMAIJ_N(Mat A,Vec xx,Vec yy,Vec zz)
27786861f193SBarry Smith {
27796861f193SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
27806861f193SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
27816861f193SBarry Smith   const PetscScalar *x,*v,*alpha;
27826861f193SBarry Smith   PetscScalar       *y;
27836861f193SBarry Smith   PetscErrorCode    ierr;
27846861f193SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx,dof = b->dof;
27856861f193SBarry Smith   PetscInt          n,i,k;
27866861f193SBarry Smith 
27876861f193SBarry Smith   PetscFunctionBegin;
27886861f193SBarry Smith   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
27893649974fSBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
27906861f193SBarry Smith   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
27916861f193SBarry Smith   for (i=0; i<m; i++) {
27926861f193SBarry Smith     idx   = a->j + a->i[i];
27936861f193SBarry Smith     v     = a->a + a->i[i];
27946861f193SBarry Smith     n     = a->i[i+1] - a->i[i];
27956861f193SBarry Smith     alpha = x + dof*i;
27966861f193SBarry Smith     while (n-->0) {
27976861f193SBarry Smith       for (k=0; k<dof; k++) {
27986861f193SBarry Smith         y[dof*(*idx)+k] += alpha[k]*(*v);
27996861f193SBarry Smith       }
280083ce7366SBarry Smith       idx++; v++;
28016861f193SBarry Smith     }
28026861f193SBarry Smith   }
28036861f193SBarry Smith   ierr = PetscLogFlops(2.0*dof*a->nz);CHKERRQ(ierr);
28043649974fSBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
28056861f193SBarry Smith   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
28066861f193SBarry Smith   PetscFunctionReturn(0);
28076861f193SBarry Smith }
28086861f193SBarry Smith 
2809f4a53059SBarry Smith /*===================================================================================*/
2810dfbe8321SBarry Smith PetscErrorCode MatMult_MPIMAIJ_dof(Mat A,Vec xx,Vec yy)
2811f4a53059SBarry Smith {
28124cb9d9b8SBarry Smith   Mat_MPIMAIJ    *b = (Mat_MPIMAIJ*)A->data;
2813dfbe8321SBarry Smith   PetscErrorCode ierr;
2814f4a53059SBarry Smith 
2815b24ad042SBarry Smith   PetscFunctionBegin;
2816f4a53059SBarry Smith   /* start the scatter */
2817ca9f406cSSatish Balay   ierr = VecScatterBegin(b->ctx,xx,b->w,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
28184cb9d9b8SBarry Smith   ierr = (*b->AIJ->ops->mult)(b->AIJ,xx,yy);CHKERRQ(ierr);
2819ca9f406cSSatish Balay   ierr = VecScatterEnd(b->ctx,xx,b->w,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
28204cb9d9b8SBarry Smith   ierr = (*b->OAIJ->ops->multadd)(b->OAIJ,b->w,yy,yy);CHKERRQ(ierr);
2821f4a53059SBarry Smith   PetscFunctionReturn(0);
2822f4a53059SBarry Smith }
2823f4a53059SBarry Smith 
2824dfbe8321SBarry Smith PetscErrorCode MatMultTranspose_MPIMAIJ_dof(Mat A,Vec xx,Vec yy)
28254cb9d9b8SBarry Smith {
28264cb9d9b8SBarry Smith   Mat_MPIMAIJ    *b = (Mat_MPIMAIJ*)A->data;
2827dfbe8321SBarry Smith   PetscErrorCode ierr;
2828b24ad042SBarry Smith 
28294cb9d9b8SBarry Smith   PetscFunctionBegin;
28304cb9d9b8SBarry Smith   ierr = (*b->OAIJ->ops->multtranspose)(b->OAIJ,xx,b->w);CHKERRQ(ierr);
28314cb9d9b8SBarry Smith   ierr = (*b->AIJ->ops->multtranspose)(b->AIJ,xx,yy);CHKERRQ(ierr);
2832ca9f406cSSatish Balay   ierr = VecScatterBegin(b->ctx,b->w,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2833ca9f406cSSatish Balay   ierr = VecScatterEnd(b->ctx,b->w,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
28344cb9d9b8SBarry Smith   PetscFunctionReturn(0);
28354cb9d9b8SBarry Smith }
28364cb9d9b8SBarry Smith 
2837dfbe8321SBarry Smith PetscErrorCode MatMultAdd_MPIMAIJ_dof(Mat A,Vec xx,Vec yy,Vec zz)
28384cb9d9b8SBarry Smith {
28394cb9d9b8SBarry Smith   Mat_MPIMAIJ    *b = (Mat_MPIMAIJ*)A->data;
2840dfbe8321SBarry Smith   PetscErrorCode ierr;
28414cb9d9b8SBarry Smith 
2842b24ad042SBarry Smith   PetscFunctionBegin;
28434cb9d9b8SBarry Smith   /* start the scatter */
2844ca9f406cSSatish Balay   ierr = VecScatterBegin(b->ctx,xx,b->w,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2845d72c5c08SBarry Smith   ierr = (*b->AIJ->ops->multadd)(b->AIJ,xx,yy,zz);CHKERRQ(ierr);
2846ca9f406cSSatish Balay   ierr = VecScatterEnd(b->ctx,xx,b->w,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2847717f2ec8SHong Zhang   ierr = (*b->OAIJ->ops->multadd)(b->OAIJ,b->w,zz,zz);CHKERRQ(ierr);
28484cb9d9b8SBarry Smith   PetscFunctionReturn(0);
28494cb9d9b8SBarry Smith }
28504cb9d9b8SBarry Smith 
2851dfbe8321SBarry Smith PetscErrorCode MatMultTransposeAdd_MPIMAIJ_dof(Mat A,Vec xx,Vec yy,Vec zz)
28524cb9d9b8SBarry Smith {
28534cb9d9b8SBarry Smith   Mat_MPIMAIJ    *b = (Mat_MPIMAIJ*)A->data;
2854dfbe8321SBarry Smith   PetscErrorCode ierr;
2855b24ad042SBarry Smith 
28564cb9d9b8SBarry Smith   PetscFunctionBegin;
28574cb9d9b8SBarry Smith   ierr = (*b->OAIJ->ops->multtranspose)(b->OAIJ,xx,b->w);CHKERRQ(ierr);
2858ca9f406cSSatish Balay   ierr = VecScatterBegin(b->ctx,b->w,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2859d72c5c08SBarry Smith   ierr = (*b->AIJ->ops->multtransposeadd)(b->AIJ,xx,yy,zz);CHKERRQ(ierr);
2860ca9f406cSSatish Balay   ierr = VecScatterEnd(b->ctx,b->w,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
28614cb9d9b8SBarry Smith   PetscFunctionReturn(0);
28624cb9d9b8SBarry Smith }
28634cb9d9b8SBarry Smith 
286495ddefa2SHong Zhang /* ----------------------------------------------------------------*/
28657ba1a0bfSKris Buschelman PetscErrorCode MatPtAPSymbolic_SeqAIJ_SeqMAIJ(Mat A,Mat PP,PetscReal fill,Mat *C)
28667ba1a0bfSKris Buschelman {
28677ba1a0bfSKris Buschelman   PetscErrorCode     ierr;
28680298fd71SBarry Smith   PetscFreeSpaceList free_space=NULL,current_space=NULL;
28697ba1a0bfSKris Buschelman   Mat_SeqMAIJ        *pp       =(Mat_SeqMAIJ*)PP->data;
28707ba1a0bfSKris Buschelman   Mat                P         =pp->AIJ;
28717ba1a0bfSKris Buschelman   Mat_SeqAIJ         *a        =(Mat_SeqAIJ*)A->data,*p=(Mat_SeqAIJ*)P->data,*c;
2872d2840e09SBarry Smith   PetscInt           *pti,*ptj,*ptJ;
28737ba1a0bfSKris Buschelman   PetscInt           *ci,*cj,*ptadenserow,*ptasparserow,*denserow,*sparserow,*ptaj;
2874d2840e09SBarry Smith   const PetscInt     an=A->cmap->N,am=A->rmap->N,pn=P->cmap->N,pm=P->rmap->N,ppdof=pp->dof;
2875d2840e09SBarry Smith   PetscInt           i,j,k,dof,pshift,ptnzi,arow,anzj,ptanzi,prow,pnzj,cnzi,cn;
28767ba1a0bfSKris Buschelman   MatScalar          *ca;
2877d2840e09SBarry Smith   const PetscInt     *pi = p->i,*pj = p->j,*pjj,*ai=a->i,*aj=a->j,*ajj;
28787ba1a0bfSKris Buschelman 
28797ba1a0bfSKris Buschelman   PetscFunctionBegin;
28807ba1a0bfSKris Buschelman   /* Get ij structure of P^T */
28817ba1a0bfSKris Buschelman   ierr = MatGetSymbolicTranspose_SeqAIJ(P,&pti,&ptj);CHKERRQ(ierr);
28827ba1a0bfSKris Buschelman 
28837ba1a0bfSKris Buschelman   cn = pn*ppdof;
28847ba1a0bfSKris Buschelman   /* Allocate ci array, arrays for fill computation and */
28857ba1a0bfSKris Buschelman   /* free space for accumulating nonzero column info */
2886854ce69bSBarry Smith   ierr  = PetscMalloc1(cn+1,&ci);CHKERRQ(ierr);
28877ba1a0bfSKris Buschelman   ci[0] = 0;
28887ba1a0bfSKris Buschelman 
28897ba1a0bfSKris Buschelman   /* Work arrays for rows of P^T*A */
2890dcca6d9dSJed Brown   ierr = PetscMalloc4(an,&ptadenserow,an,&ptasparserow,cn,&denserow,cn,&sparserow);CHKERRQ(ierr);
2891c43dabc9SBarry Smith   ierr = PetscMemzero(ptadenserow,an*sizeof(PetscInt));CHKERRQ(ierr);
2892c43dabc9SBarry Smith   ierr = PetscMemzero(denserow,cn*sizeof(PetscInt));CHKERRQ(ierr);
28937ba1a0bfSKris Buschelman 
28947ba1a0bfSKris Buschelman   /* Set initial free space to be nnz(A) scaled by aspect ratio of P. */
28957ba1a0bfSKris Buschelman   /* This should be reasonable if sparsity of PtAP is similar to that of A. */
28967ba1a0bfSKris Buschelman   /* Note, aspect ratio of P is the same as the aspect ratio of SeqAIJ inside P */
2897f91af8c7SBarry Smith   ierr          = PetscFreeSpaceGet(PetscIntMultTruncate(ai[am]/pm,pn),&free_space);CHKERRQ(ierr);
28987ba1a0bfSKris Buschelman   current_space = free_space;
28997ba1a0bfSKris Buschelman 
29007ba1a0bfSKris Buschelman   /* Determine symbolic info for each row of C: */
29017ba1a0bfSKris Buschelman   for (i=0; i<pn; i++) {
29027ba1a0bfSKris Buschelman     ptnzi = pti[i+1] - pti[i];
29037ba1a0bfSKris Buschelman     ptJ   = ptj + pti[i];
29047ba1a0bfSKris Buschelman     for (dof=0; dof<ppdof; dof++) {
29057ba1a0bfSKris Buschelman       ptanzi = 0;
29067ba1a0bfSKris Buschelman       /* Determine symbolic row of PtA: */
29077ba1a0bfSKris Buschelman       for (j=0; j<ptnzi; j++) {
29087ba1a0bfSKris Buschelman         /* Expand ptJ[j] by block size and shift by dof to get the right row of A */
29097ba1a0bfSKris Buschelman         arow = ptJ[j]*ppdof + dof;
29107ba1a0bfSKris Buschelman         /* Nonzeros of P^T*A will be in same locations as any element of A in that row */
29117ba1a0bfSKris Buschelman         anzj = ai[arow+1] - ai[arow];
29127ba1a0bfSKris Buschelman         ajj  = aj + ai[arow];
29137ba1a0bfSKris Buschelman         for (k=0; k<anzj; k++) {
29147ba1a0bfSKris Buschelman           if (!ptadenserow[ajj[k]]) {
29157ba1a0bfSKris Buschelman             ptadenserow[ajj[k]]    = -1;
29167ba1a0bfSKris Buschelman             ptasparserow[ptanzi++] = ajj[k];
29177ba1a0bfSKris Buschelman           }
29187ba1a0bfSKris Buschelman         }
29197ba1a0bfSKris Buschelman       }
29207ba1a0bfSKris Buschelman       /* Using symbolic info for row of PtA, determine symbolic info for row of C: */
29217ba1a0bfSKris Buschelman       ptaj = ptasparserow;
29227ba1a0bfSKris Buschelman       cnzi = 0;
29237ba1a0bfSKris Buschelman       for (j=0; j<ptanzi; j++) {
29247ba1a0bfSKris Buschelman         /* Get offset within block of P */
29257ba1a0bfSKris Buschelman         pshift = *ptaj%ppdof;
29267ba1a0bfSKris Buschelman         /* Get block row of P */
29277ba1a0bfSKris Buschelman         prow = (*ptaj++)/ppdof; /* integer division */
29287ba1a0bfSKris Buschelman         /* P has same number of nonzeros per row as the compressed form */
29297ba1a0bfSKris Buschelman         pnzj = pi[prow+1] - pi[prow];
29307ba1a0bfSKris Buschelman         pjj  = pj + pi[prow];
29317ba1a0bfSKris Buschelman         for (k=0;k<pnzj;k++) {
29327ba1a0bfSKris Buschelman           /* Locations in C are shifted by the offset within the block */
29337ba1a0bfSKris Buschelman           /* Note: we cannot use PetscLLAdd here because of the additional offset for the write location */
29347ba1a0bfSKris Buschelman           if (!denserow[pjj[k]*ppdof+pshift]) {
29357ba1a0bfSKris Buschelman             denserow[pjj[k]*ppdof+pshift] = -1;
29367ba1a0bfSKris Buschelman             sparserow[cnzi++]             = pjj[k]*ppdof+pshift;
29377ba1a0bfSKris Buschelman           }
29387ba1a0bfSKris Buschelman         }
29397ba1a0bfSKris Buschelman       }
29407ba1a0bfSKris Buschelman 
29417ba1a0bfSKris Buschelman       /* sort sparserow */
29427ba1a0bfSKris Buschelman       ierr = PetscSortInt(cnzi,sparserow);CHKERRQ(ierr);
29437ba1a0bfSKris Buschelman 
29447ba1a0bfSKris Buschelman       /* If free space is not available, make more free space */
29457ba1a0bfSKris Buschelman       /* Double the amount of total space in the list */
29467ba1a0bfSKris Buschelman       if (current_space->local_remaining<cnzi) {
2947f91af8c7SBarry Smith         ierr = PetscFreeSpaceGet(PetscIntSumTruncate(cnzi,current_space->total_array_size),&current_space);CHKERRQ(ierr);
29487ba1a0bfSKris Buschelman       }
29497ba1a0bfSKris Buschelman 
29507ba1a0bfSKris Buschelman       /* Copy data into free space, and zero out denserows */
29517ba1a0bfSKris Buschelman       ierr = PetscMemcpy(current_space->array,sparserow,cnzi*sizeof(PetscInt));CHKERRQ(ierr);
295226fbe8dcSKarl Rupp 
29537ba1a0bfSKris Buschelman       current_space->array           += cnzi;
29547ba1a0bfSKris Buschelman       current_space->local_used      += cnzi;
29557ba1a0bfSKris Buschelman       current_space->local_remaining -= cnzi;
29567ba1a0bfSKris Buschelman 
295726fbe8dcSKarl Rupp       for (j=0; j<ptanzi; j++) ptadenserow[ptasparserow[j]] = 0;
295826fbe8dcSKarl Rupp       for (j=0; j<cnzi; j++) denserow[sparserow[j]] = 0;
295926fbe8dcSKarl Rupp 
29607ba1a0bfSKris Buschelman       /* Aside: Perhaps we should save the pta info for the numerical factorization. */
29617ba1a0bfSKris Buschelman       /*        For now, we will recompute what is needed. */
29627ba1a0bfSKris Buschelman       ci[i*ppdof+1+dof] = ci[i*ppdof+dof] + cnzi;
29637ba1a0bfSKris Buschelman     }
29647ba1a0bfSKris Buschelman   }
29657ba1a0bfSKris Buschelman   /* nnz is now stored in ci[ptm], column indices are in the list of free space */
29667ba1a0bfSKris Buschelman   /* Allocate space for cj, initialize cj, and */
29677ba1a0bfSKris Buschelman   /* destroy list of free space and other temporary array(s) */
2968854ce69bSBarry Smith   ierr = PetscMalloc1(ci[cn]+1,&cj);CHKERRQ(ierr);
2969a1a86e44SBarry Smith   ierr = PetscFreeSpaceContiguous(&free_space,cj);CHKERRQ(ierr);
297074ed9c26SBarry Smith   ierr = PetscFree4(ptadenserow,ptasparserow,denserow,sparserow);CHKERRQ(ierr);
29717ba1a0bfSKris Buschelman 
29727ba1a0bfSKris Buschelman   /* Allocate space for ca */
2973854ce69bSBarry Smith   ierr = PetscCalloc1(ci[cn]+1,&ca);CHKERRQ(ierr);
29747ba1a0bfSKris Buschelman 
29757ba1a0bfSKris Buschelman   /* put together the new matrix */
2976ce94432eSBarry Smith   ierr = MatCreateSeqAIJWithArrays(PetscObjectComm((PetscObject)A),cn,cn,ci,cj,ca,C);CHKERRQ(ierr);
2977f27682edSJed Brown   ierr = MatSetBlockSize(*C,pp->dof);CHKERRQ(ierr);
29787ba1a0bfSKris Buschelman 
29797ba1a0bfSKris Buschelman   /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
29807ba1a0bfSKris Buschelman   /* Since these are PETSc arrays, change flags to free them as necessary. */
29817ba1a0bfSKris Buschelman   c          = (Mat_SeqAIJ*)((*C)->data);
2982e6b907acSBarry Smith   c->free_a  = PETSC_TRUE;
2983e6b907acSBarry Smith   c->free_ij = PETSC_TRUE;
29847ba1a0bfSKris Buschelman   c->nonew   = 0;
298526fbe8dcSKarl Rupp 
2986d76021d8SHong Zhang   (*C)->ops->ptapnumeric = MatPtAPNumeric_SeqAIJ_SeqMAIJ;
29877ba1a0bfSKris Buschelman 
29887ba1a0bfSKris Buschelman   /* Clean up. */
29897ba1a0bfSKris Buschelman   ierr = MatRestoreSymbolicTranspose_SeqAIJ(P,&pti,&ptj);CHKERRQ(ierr);
29907ba1a0bfSKris Buschelman   PetscFunctionReturn(0);
29917ba1a0bfSKris Buschelman }
29927ba1a0bfSKris Buschelman 
29937ba1a0bfSKris Buschelman PetscErrorCode MatPtAPNumeric_SeqAIJ_SeqMAIJ(Mat A,Mat PP,Mat C)
29947ba1a0bfSKris Buschelman {
29957ba1a0bfSKris Buschelman   /* This routine requires testing -- first draft only */
29967ba1a0bfSKris Buschelman   PetscErrorCode  ierr;
29977ba1a0bfSKris Buschelman   Mat_SeqMAIJ     *pp=(Mat_SeqMAIJ*)PP->data;
29987ba1a0bfSKris Buschelman   Mat             P  =pp->AIJ;
29997ba1a0bfSKris Buschelman   Mat_SeqAIJ      *a = (Mat_SeqAIJ*) A->data;
30007ba1a0bfSKris Buschelman   Mat_SeqAIJ      *p = (Mat_SeqAIJ*) P->data;
30017ba1a0bfSKris Buschelman   Mat_SeqAIJ      *c = (Mat_SeqAIJ*) C->data;
3002a2ea699eSBarry Smith   const PetscInt  *ai=a->i,*aj=a->j,*pi=p->i,*pj=p->j,*pJ,*pjj;
3003d2840e09SBarry Smith   const PetscInt  *ci=c->i,*cj=c->j,*cjj;
3004d2840e09SBarry Smith   const PetscInt  am =A->rmap->N,cn=C->cmap->N,cm=C->rmap->N,ppdof=pp->dof;
3005d2840e09SBarry Smith   PetscInt        i,j,k,pshift,poffset,anzi,pnzi,apnzj,nextap,pnzj,prow,crow,*apj,*apjdense;
3006a2ea699eSBarry Smith   const MatScalar *aa=a->a,*pa=p->a,*pA,*paj;
3007d2840e09SBarry Smith   MatScalar       *ca=c->a,*caj,*apa;
30087ba1a0bfSKris Buschelman 
30097ba1a0bfSKris Buschelman   PetscFunctionBegin;
30107ba1a0bfSKris Buschelman   /* Allocate temporary array for storage of one row of A*P */
30111795a4d1SJed Brown   ierr = PetscCalloc3(cn,&apa,cn,&apj,cn,&apjdense);CHKERRQ(ierr);
30127ba1a0bfSKris Buschelman 
30137ba1a0bfSKris Buschelman   /* Clear old values in C */
30147ba1a0bfSKris Buschelman   ierr = PetscMemzero(ca,ci[cm]*sizeof(MatScalar));CHKERRQ(ierr);
30157ba1a0bfSKris Buschelman 
30167ba1a0bfSKris Buschelman   for (i=0; i<am; i++) {
30177ba1a0bfSKris Buschelman     /* Form sparse row of A*P */
30187ba1a0bfSKris Buschelman     anzi  = ai[i+1] - ai[i];
30197ba1a0bfSKris Buschelman     apnzj = 0;
30207ba1a0bfSKris Buschelman     for (j=0; j<anzi; j++) {
30217ba1a0bfSKris Buschelman       /* Get offset within block of P */
30227ba1a0bfSKris Buschelman       pshift = *aj%ppdof;
30237ba1a0bfSKris Buschelman       /* Get block row of P */
30247ba1a0bfSKris Buschelman       prow = *aj++/ppdof;   /* integer division */
30257ba1a0bfSKris Buschelman       pnzj = pi[prow+1] - pi[prow];
30267ba1a0bfSKris Buschelman       pjj  = pj + pi[prow];
30277ba1a0bfSKris Buschelman       paj  = pa + pi[prow];
30287ba1a0bfSKris Buschelman       for (k=0; k<pnzj; k++) {
30297ba1a0bfSKris Buschelman         poffset = pjj[k]*ppdof+pshift;
30307ba1a0bfSKris Buschelman         if (!apjdense[poffset]) {
30317ba1a0bfSKris Buschelman           apjdense[poffset] = -1;
30327ba1a0bfSKris Buschelman           apj[apnzj++]      = poffset;
30337ba1a0bfSKris Buschelman         }
30347ba1a0bfSKris Buschelman         apa[poffset] += (*aa)*paj[k];
30357ba1a0bfSKris Buschelman       }
3036dc0b31edSSatish Balay       ierr = PetscLogFlops(2.0*pnzj);CHKERRQ(ierr);
30377ba1a0bfSKris Buschelman       aa++;
30387ba1a0bfSKris Buschelman     }
30397ba1a0bfSKris Buschelman 
30407ba1a0bfSKris Buschelman     /* Sort the j index array for quick sparse axpy. */
30417ba1a0bfSKris Buschelman     /* Note: a array does not need sorting as it is in dense storage locations. */
30427ba1a0bfSKris Buschelman     ierr = PetscSortInt(apnzj,apj);CHKERRQ(ierr);
30437ba1a0bfSKris Buschelman 
30447ba1a0bfSKris Buschelman     /* Compute P^T*A*P using outer product (P^T)[:,j]*(A*P)[j,:]. */
30457ba1a0bfSKris Buschelman     prow    = i/ppdof; /* integer division */
30467ba1a0bfSKris Buschelman     pshift  = i%ppdof;
30477ba1a0bfSKris Buschelman     poffset = pi[prow];
30487ba1a0bfSKris Buschelman     pnzi    = pi[prow+1] - poffset;
30497ba1a0bfSKris Buschelman     /* Reset pJ and pA so we can traverse the same row of P 'dof' times. */
30507ba1a0bfSKris Buschelman     pJ = pj+poffset;
30517ba1a0bfSKris Buschelman     pA = pa+poffset;
30527ba1a0bfSKris Buschelman     for (j=0; j<pnzi; j++) {
30537ba1a0bfSKris Buschelman       crow = (*pJ)*ppdof+pshift;
30547ba1a0bfSKris Buschelman       cjj  = cj + ci[crow];
30557ba1a0bfSKris Buschelman       caj  = ca + ci[crow];
30567ba1a0bfSKris Buschelman       pJ++;
30577ba1a0bfSKris Buschelman       /* Perform sparse axpy operation.  Note cjj includes apj. */
30587ba1a0bfSKris Buschelman       for (k=0,nextap=0; nextap<apnzj; k++) {
305926fbe8dcSKarl Rupp         if (cjj[k] == apj[nextap]) caj[k] += (*pA)*apa[apj[nextap++]];
30607ba1a0bfSKris Buschelman       }
3061dc0b31edSSatish Balay       ierr = PetscLogFlops(2.0*apnzj);CHKERRQ(ierr);
30627ba1a0bfSKris Buschelman       pA++;
30637ba1a0bfSKris Buschelman     }
30647ba1a0bfSKris Buschelman 
30657ba1a0bfSKris Buschelman     /* Zero the current row info for A*P */
30667ba1a0bfSKris Buschelman     for (j=0; j<apnzj; j++) {
30677ba1a0bfSKris Buschelman       apa[apj[j]]      = 0.;
30687ba1a0bfSKris Buschelman       apjdense[apj[j]] = 0;
30697ba1a0bfSKris Buschelman     }
30707ba1a0bfSKris Buschelman   }
30717ba1a0bfSKris Buschelman 
30727ba1a0bfSKris Buschelman   /* Assemble the final matrix and clean up */
30737ba1a0bfSKris Buschelman   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
30747ba1a0bfSKris Buschelman   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
307574ed9c26SBarry Smith   ierr = PetscFree3(apa,apj,apjdense);CHKERRQ(ierr);
307695ddefa2SHong Zhang   PetscFunctionReturn(0);
307795ddefa2SHong Zhang }
30787ba1a0bfSKris Buschelman 
3079150d2497SBarry Smith PETSC_INTERN PetscErrorCode MatPtAP_SeqAIJ_SeqMAIJ(Mat A,Mat P,MatReuse scall,PetscReal fill,Mat *C)
30802121bac1SHong Zhang {
30812121bac1SHong Zhang   PetscErrorCode ierr;
30822121bac1SHong Zhang 
30832121bac1SHong Zhang   PetscFunctionBegin;
30842121bac1SHong Zhang   if (scall == MAT_INITIAL_MATRIX) {
30853ff4c91cSHong Zhang     ierr = PetscLogEventBegin(MAT_PtAPSymbolic,A,P,0,0);CHKERRQ(ierr);
30862121bac1SHong Zhang     ierr = MatPtAPSymbolic_SeqAIJ_SeqMAIJ(A,P,fill,C);CHKERRQ(ierr);
30873ff4c91cSHong Zhang     ierr = PetscLogEventEnd(MAT_PtAPSymbolic,A,P,0,0);CHKERRQ(ierr);
30882121bac1SHong Zhang   }
30893ff4c91cSHong Zhang   ierr = PetscLogEventBegin(MAT_PtAPNumeric,A,P,0,0);CHKERRQ(ierr);
30902121bac1SHong Zhang   ierr = MatPtAPNumeric_SeqAIJ_SeqMAIJ(A,P,*C);CHKERRQ(ierr);
30913ff4c91cSHong Zhang   ierr = PetscLogEventEnd(MAT_PtAPNumeric,A,P,0,0);CHKERRQ(ierr);
30922121bac1SHong Zhang   PetscFunctionReturn(0);
30932121bac1SHong Zhang }
30942121bac1SHong Zhang 
3095f7a08781SBarry Smith PetscErrorCode MatPtAPSymbolic_MPIAIJ_MPIMAIJ(Mat A,Mat PP,PetscReal fill,Mat *C)
309695ddefa2SHong Zhang {
309795ddefa2SHong Zhang   PetscErrorCode ierr;
309895ddefa2SHong Zhang 
309995ddefa2SHong Zhang   PetscFunctionBegin;
310095ddefa2SHong Zhang   /* MatPtAPSymbolic_MPIAIJ_MPIMAIJ() is not implemented yet. Convert PP to mpiaij format */
3101511c6705SHong Zhang   ierr = MatConvert(PP,MATMPIAIJ,MAT_INPLACE_MATRIX,&PP);CHKERRQ(ierr);
310295ddefa2SHong Zhang   ierr =(*PP->ops->ptapsymbolic)(A,PP,fill,C);CHKERRQ(ierr);
310395ddefa2SHong Zhang   PetscFunctionReturn(0);
310495ddefa2SHong Zhang }
310595ddefa2SHong Zhang 
3106f7a08781SBarry Smith PetscErrorCode MatPtAPNumeric_MPIAIJ_MPIMAIJ(Mat A,Mat PP,Mat C)
310795ddefa2SHong Zhang {
310895ddefa2SHong Zhang   PetscFunctionBegin;
3109e32f2f54SBarry Smith   SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatPtAPNumeric is not implemented for MPIMAIJ matrix yet");
31107ba1a0bfSKris Buschelman   PetscFunctionReturn(0);
31117ba1a0bfSKris Buschelman }
31127ba1a0bfSKris Buschelman 
3113150d2497SBarry Smith PETSC_INTERN PetscErrorCode MatPtAP_MPIAIJ_MPIMAIJ(Mat A,Mat P,MatReuse scall,PetscReal fill,Mat *C)
31149e03dfbbSHong Zhang {
31159e03dfbbSHong Zhang   PetscErrorCode ierr;
31169e03dfbbSHong Zhang 
31179e03dfbbSHong Zhang   PetscFunctionBegin;
31189e03dfbbSHong Zhang   if (scall == MAT_INITIAL_MATRIX) {
31193ff4c91cSHong Zhang     ierr = PetscLogEventBegin(MAT_PtAPSymbolic,A,P,0,0);CHKERRQ(ierr);
31209e03dfbbSHong Zhang     ierr = MatPtAPSymbolic_MPIAIJ_MPIMAIJ(A,P,fill,C);CHKERRQ(ierr);
31213ff4c91cSHong Zhang     ierr = PetscLogEventEnd(MAT_PtAPSymbolic,A,P,0,0);CHKERRQ(ierr);
31229e03dfbbSHong Zhang   }
31233ff4c91cSHong Zhang   ierr = PetscLogEventBegin(MAT_PtAPNumeric,A,P,0,0);CHKERRQ(ierr);
31249e03dfbbSHong Zhang   ierr = ((*C)->ops->ptapnumeric)(A,P,*C);CHKERRQ(ierr);
31253ff4c91cSHong Zhang   ierr = PetscLogEventEnd(MAT_PtAPNumeric,A,P,0,0);CHKERRQ(ierr);
31269e03dfbbSHong Zhang   PetscFunctionReturn(0);
31279e03dfbbSHong Zhang }
31289e03dfbbSHong Zhang 
3129cc2e6a90SBarry Smith PETSC_INTERN PetscErrorCode MatConvert_SeqMAIJ_SeqAIJ(Mat A, MatType newtype,MatReuse reuse,Mat *newmat)
31309c4fc4c7SBarry Smith {
31319c4fc4c7SBarry Smith   Mat_SeqMAIJ    *b   = (Mat_SeqMAIJ*)A->data;
3132ceb03754SKris Buschelman   Mat            a    = b->AIJ,B;
31339c4fc4c7SBarry Smith   Mat_SeqAIJ     *aij = (Mat_SeqAIJ*)a->data;
31349c4fc4c7SBarry Smith   PetscErrorCode ierr;
31350fd73130SBarry Smith   PetscInt       m,n,i,ncols,*ilen,nmax = 0,*icols,j,k,ii,dof = b->dof;
3136ba8c8a56SBarry Smith   PetscInt       *cols;
3137ba8c8a56SBarry Smith   PetscScalar    *vals;
31389c4fc4c7SBarry Smith 
31399c4fc4c7SBarry Smith   PetscFunctionBegin;
31409c4fc4c7SBarry Smith   ierr = MatGetSize(a,&m,&n);CHKERRQ(ierr);
3141785e854fSJed Brown   ierr = PetscMalloc1(dof*m,&ilen);CHKERRQ(ierr);
31429c4fc4c7SBarry Smith   for (i=0; i<m; i++) {
31439c4fc4c7SBarry Smith     nmax = PetscMax(nmax,aij->ilen[i]);
314426fbe8dcSKarl Rupp     for (j=0; j<dof; j++) ilen[dof*i+j] = aij->ilen[i];
31459c4fc4c7SBarry Smith   }
3146ceb03754SKris Buschelman   ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,dof*m,dof*n,0,ilen,&B);CHKERRQ(ierr);
31479c4fc4c7SBarry Smith   ierr = PetscFree(ilen);CHKERRQ(ierr);
3148785e854fSJed Brown   ierr = PetscMalloc1(nmax,&icols);CHKERRQ(ierr);
31499c4fc4c7SBarry Smith   ii   = 0;
31509c4fc4c7SBarry Smith   for (i=0; i<m; i++) {
3151ba8c8a56SBarry Smith     ierr = MatGetRow_SeqAIJ(a,i,&ncols,&cols,&vals);CHKERRQ(ierr);
31520fd73130SBarry Smith     for (j=0; j<dof; j++) {
315326fbe8dcSKarl Rupp       for (k=0; k<ncols; k++) icols[k] = dof*cols[k]+j;
3154ceb03754SKris Buschelman       ierr = MatSetValues_SeqAIJ(B,1,&ii,ncols,icols,vals,INSERT_VALUES);CHKERRQ(ierr);
31559c4fc4c7SBarry Smith       ii++;
31569c4fc4c7SBarry Smith     }
3157ba8c8a56SBarry Smith     ierr = MatRestoreRow_SeqAIJ(a,i,&ncols,&cols,&vals);CHKERRQ(ierr);
31589c4fc4c7SBarry Smith   }
31599c4fc4c7SBarry Smith   ierr = PetscFree(icols);CHKERRQ(ierr);
3160ceb03754SKris Buschelman   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3161ceb03754SKris Buschelman   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3162ceb03754SKris Buschelman 
3163511c6705SHong Zhang   if (reuse == MAT_INPLACE_MATRIX) {
316428be2f97SBarry Smith     ierr = MatHeaderReplace(A,&B);CHKERRQ(ierr);
3165c3d102feSKris Buschelman   } else {
3166ceb03754SKris Buschelman     *newmat = B;
3167c3d102feSKris Buschelman   }
31689c4fc4c7SBarry Smith   PetscFunctionReturn(0);
31699c4fc4c7SBarry Smith }
31709c4fc4c7SBarry Smith 
3171c6db04a5SJed Brown #include <../src/mat/impls/aij/mpi/mpiaij.h>
3172be1d678aSKris Buschelman 
3173cc2e6a90SBarry Smith PETSC_INTERN PetscErrorCode MatConvert_MPIMAIJ_MPIAIJ(Mat A, MatType newtype,MatReuse reuse,Mat *newmat)
31740fd73130SBarry Smith {
31750fd73130SBarry Smith   Mat_MPIMAIJ    *maij   = (Mat_MPIMAIJ*)A->data;
3176ceb03754SKris Buschelman   Mat            MatAIJ  = ((Mat_SeqMAIJ*)maij->AIJ->data)->AIJ,B;
31770fd73130SBarry Smith   Mat            MatOAIJ = ((Mat_SeqMAIJ*)maij->OAIJ->data)->AIJ;
31780fd73130SBarry Smith   Mat_SeqAIJ     *AIJ    = (Mat_SeqAIJ*) MatAIJ->data;
31790fd73130SBarry Smith   Mat_SeqAIJ     *OAIJ   =(Mat_SeqAIJ*) MatOAIJ->data;
31800fd73130SBarry Smith   Mat_MPIAIJ     *mpiaij = (Mat_MPIAIJ*) maij->A->data;
31810298fd71SBarry Smith   PetscInt       dof     = maij->dof,i,j,*dnz = NULL,*onz = NULL,nmax = 0,onmax = 0;
31820298fd71SBarry Smith   PetscInt       *oicols = NULL,*icols = NULL,ncols,*cols = NULL,oncols,*ocols = NULL;
31830fd73130SBarry Smith   PetscInt       rstart,cstart,*garray,ii,k;
31840fd73130SBarry Smith   PetscErrorCode ierr;
31850fd73130SBarry Smith   PetscScalar    *vals,*ovals;
31860fd73130SBarry Smith 
31870fd73130SBarry Smith   PetscFunctionBegin;
3188dcca6d9dSJed Brown   ierr = PetscMalloc2(A->rmap->n,&dnz,A->rmap->n,&onz);CHKERRQ(ierr);
3189d0f46423SBarry Smith   for (i=0; i<A->rmap->n/dof; i++) {
31900fd73130SBarry Smith     nmax  = PetscMax(nmax,AIJ->ilen[i]);
31910fd73130SBarry Smith     onmax = PetscMax(onmax,OAIJ->ilen[i]);
31920fd73130SBarry Smith     for (j=0; j<dof; j++) {
31930fd73130SBarry Smith       dnz[dof*i+j] = AIJ->ilen[i];
31940fd73130SBarry Smith       onz[dof*i+j] = OAIJ->ilen[i];
31950fd73130SBarry Smith     }
31960fd73130SBarry Smith   }
3197ce94432eSBarry Smith   ierr = MatCreateAIJ(PetscObjectComm((PetscObject)A),A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N,0,dnz,0,onz,&B);CHKERRQ(ierr);
3198f27682edSJed Brown   ierr = MatSetBlockSize(B,dof);CHKERRQ(ierr);
31990fd73130SBarry Smith   ierr = PetscFree2(dnz,onz);CHKERRQ(ierr);
32000fd73130SBarry Smith 
3201dcca6d9dSJed Brown   ierr   = PetscMalloc2(nmax,&icols,onmax,&oicols);CHKERRQ(ierr);
3202d0f46423SBarry Smith   rstart = dof*maij->A->rmap->rstart;
3203d0f46423SBarry Smith   cstart = dof*maij->A->cmap->rstart;
32040fd73130SBarry Smith   garray = mpiaij->garray;
32050fd73130SBarry Smith 
32060fd73130SBarry Smith   ii = rstart;
3207d0f46423SBarry Smith   for (i=0; i<A->rmap->n/dof; i++) {
32080fd73130SBarry Smith     ierr = MatGetRow_SeqAIJ(MatAIJ,i,&ncols,&cols,&vals);CHKERRQ(ierr);
32090fd73130SBarry Smith     ierr = MatGetRow_SeqAIJ(MatOAIJ,i,&oncols,&ocols,&ovals);CHKERRQ(ierr);
32100fd73130SBarry Smith     for (j=0; j<dof; j++) {
32110fd73130SBarry Smith       for (k=0; k<ncols; k++) {
32120fd73130SBarry Smith         icols[k] = cstart + dof*cols[k]+j;
32130fd73130SBarry Smith       }
32140fd73130SBarry Smith       for (k=0; k<oncols; k++) {
32150fd73130SBarry Smith         oicols[k] = dof*garray[ocols[k]]+j;
32160fd73130SBarry Smith       }
3217ceb03754SKris Buschelman       ierr = MatSetValues_MPIAIJ(B,1,&ii,ncols,icols,vals,INSERT_VALUES);CHKERRQ(ierr);
3218ceb03754SKris Buschelman       ierr = MatSetValues_MPIAIJ(B,1,&ii,oncols,oicols,ovals,INSERT_VALUES);CHKERRQ(ierr);
32190fd73130SBarry Smith       ii++;
32200fd73130SBarry Smith     }
32210fd73130SBarry Smith     ierr = MatRestoreRow_SeqAIJ(MatAIJ,i,&ncols,&cols,&vals);CHKERRQ(ierr);
32220fd73130SBarry Smith     ierr = MatRestoreRow_SeqAIJ(MatOAIJ,i,&oncols,&ocols,&ovals);CHKERRQ(ierr);
32230fd73130SBarry Smith   }
32240fd73130SBarry Smith   ierr = PetscFree2(icols,oicols);CHKERRQ(ierr);
32250fd73130SBarry Smith 
3226ceb03754SKris Buschelman   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3227ceb03754SKris Buschelman   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3228ceb03754SKris Buschelman 
3229511c6705SHong Zhang   if (reuse == MAT_INPLACE_MATRIX) {
32307adad957SLisandro Dalcin     PetscInt refct = ((PetscObject)A)->refct; /* save ((PetscObject)A)->refct */
32317adad957SLisandro Dalcin     ((PetscObject)A)->refct = 1;
323226fbe8dcSKarl Rupp 
323328be2f97SBarry Smith     ierr = MatHeaderReplace(A,&B);CHKERRQ(ierr);
323426fbe8dcSKarl Rupp 
32357adad957SLisandro Dalcin     ((PetscObject)A)->refct = refct; /* restore ((PetscObject)A)->refct */
3236c3d102feSKris Buschelman   } else {
3237ceb03754SKris Buschelman     *newmat = B;
3238c3d102feSKris Buschelman   }
32390fd73130SBarry Smith   PetscFunctionReturn(0);
32400fd73130SBarry Smith }
32410fd73130SBarry Smith 
32427dae84e0SHong Zhang PetscErrorCode MatCreateSubMatrix_MAIJ(Mat mat,IS isrow,IS iscol,MatReuse cll,Mat *newmat)
32439e516c8fSBarry Smith {
32449e516c8fSBarry Smith   PetscErrorCode ierr;
32459e516c8fSBarry Smith   Mat            A;
32469e516c8fSBarry Smith 
32479e516c8fSBarry Smith   PetscFunctionBegin;
32489e516c8fSBarry Smith   ierr = MatConvert(mat,MATAIJ,MAT_INITIAL_MATRIX,&A);CHKERRQ(ierr);
32497dae84e0SHong Zhang   ierr = MatCreateSubMatrix(A,isrow,iscol,cll,newmat);CHKERRQ(ierr);
32509e516c8fSBarry Smith   ierr = MatDestroy(&A);CHKERRQ(ierr);
32519e516c8fSBarry Smith   PetscFunctionReturn(0);
32529e516c8fSBarry Smith }
32530fd73130SBarry Smith 
3254bcc973b7SBarry Smith /* ---------------------------------------------------------------------------------- */
3255480dffcfSJed Brown /*@
32560bad9183SKris Buschelman   MatCreateMAIJ - Creates a matrix type providing restriction and interpolation
32570bad9183SKris Buschelman   operations for multicomponent problems.  It interpolates each component the same
32580bad9183SKris Buschelman   way independently.  The matrix type is based on MATSEQAIJ for sequential matrices,
32590bad9183SKris Buschelman   and MATMPIAIJ for distributed matrices.
32600bad9183SKris Buschelman 
3261ff585edeSJed Brown   Collective
3262ff585edeSJed Brown 
3263ff585edeSJed Brown   Input Parameters:
3264ff585edeSJed Brown + A - the AIJ matrix describing the action on blocks
3265ff585edeSJed Brown - dof - the block size (number of components per node)
3266ff585edeSJed Brown 
3267ff585edeSJed Brown   Output Parameter:
3268ff585edeSJed Brown . maij - the new MAIJ matrix
3269ff585edeSJed Brown 
32700bad9183SKris Buschelman   Operations provided:
32710fd73130SBarry Smith + MatMult
32720bad9183SKris Buschelman . MatMultTranspose
32730bad9183SKris Buschelman . MatMultAdd
32740bad9183SKris Buschelman . MatMultTransposeAdd
32750fd73130SBarry Smith - MatView
32760bad9183SKris Buschelman 
32770bad9183SKris Buschelman   Level: advanced
32780bad9183SKris Buschelman 
3279b409243cSBarry Smith .seealso: MatMAIJGetAIJ(), MatMAIJRedimension(), MATMAIJ
3280ff585edeSJed Brown @*/
32817087cfbeSBarry Smith PetscErrorCode  MatCreateMAIJ(Mat A,PetscInt dof,Mat *maij)
328282b1193eSBarry Smith {
3283dfbe8321SBarry Smith   PetscErrorCode ierr;
3284b24ad042SBarry Smith   PetscMPIInt    size;
3285b24ad042SBarry Smith   PetscInt       n;
328682b1193eSBarry Smith   Mat            B;
328782b1193eSBarry Smith 
328882b1193eSBarry Smith   PetscFunctionBegin;
3289d72c5c08SBarry Smith   ierr = PetscObjectReference((PetscObject)A);CHKERRQ(ierr);
3290d72c5c08SBarry Smith 
329126fbe8dcSKarl Rupp   if (dof == 1) *maij = A;
329226fbe8dcSKarl Rupp   else {
3293ce94432eSBarry Smith     ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr);
3294d0f46423SBarry Smith     ierr = MatSetSizes(B,dof*A->rmap->n,dof*A->cmap->n,dof*A->rmap->N,dof*A->cmap->N);CHKERRQ(ierr);
3295bccba955SJed Brown     ierr = PetscLayoutSetBlockSize(B->rmap,dof);CHKERRQ(ierr);
3296bccba955SJed Brown     ierr = PetscLayoutSetBlockSize(B->cmap,dof);CHKERRQ(ierr);
3297bccba955SJed Brown     ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
3298bccba955SJed Brown     ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
329926fbe8dcSKarl Rupp 
3300cd3bbe55SBarry Smith     B->assembled = PETSC_TRUE;
3301d72c5c08SBarry Smith 
3302ce94432eSBarry Smith     ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRQ(ierr);
3303f4a53059SBarry Smith     if (size == 1) {
3304feb9fe23SJed Brown       Mat_SeqMAIJ *b;
3305feb9fe23SJed Brown 
3306b9b97703SBarry Smith       ierr = MatSetType(B,MATSEQMAIJ);CHKERRQ(ierr);
330726fbe8dcSKarl Rupp 
33080298fd71SBarry Smith       B->ops->setup   = NULL;
33094cb9d9b8SBarry Smith       B->ops->destroy = MatDestroy_SeqMAIJ;
33100fd73130SBarry Smith       B->ops->view    = MatView_SeqMAIJ;
3311feb9fe23SJed Brown       b               = (Mat_SeqMAIJ*)B->data;
3312b9b97703SBarry Smith       b->dof          = dof;
33134cb9d9b8SBarry Smith       b->AIJ          = A;
331426fbe8dcSKarl Rupp 
3315d72c5c08SBarry Smith       if (dof == 2) {
3316cd3bbe55SBarry Smith         B->ops->mult             = MatMult_SeqMAIJ_2;
3317cd3bbe55SBarry Smith         B->ops->multadd          = MatMultAdd_SeqMAIJ_2;
3318cd3bbe55SBarry Smith         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_2;
3319cd3bbe55SBarry Smith         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_2;
3320bcc973b7SBarry Smith       } else if (dof == 3) {
3321bcc973b7SBarry Smith         B->ops->mult             = MatMult_SeqMAIJ_3;
3322bcc973b7SBarry Smith         B->ops->multadd          = MatMultAdd_SeqMAIJ_3;
3323bcc973b7SBarry Smith         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_3;
3324bcc973b7SBarry Smith         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_3;
3325bcc973b7SBarry Smith       } else if (dof == 4) {
3326bcc973b7SBarry Smith         B->ops->mult             = MatMult_SeqMAIJ_4;
3327bcc973b7SBarry Smith         B->ops->multadd          = MatMultAdd_SeqMAIJ_4;
3328bcc973b7SBarry Smith         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_4;
3329bcc973b7SBarry Smith         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_4;
3330f9fae5adSBarry Smith       } else if (dof == 5) {
3331f9fae5adSBarry Smith         B->ops->mult             = MatMult_SeqMAIJ_5;
3332f9fae5adSBarry Smith         B->ops->multadd          = MatMultAdd_SeqMAIJ_5;
3333f9fae5adSBarry Smith         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_5;
3334f9fae5adSBarry Smith         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_5;
33356cd98798SBarry Smith       } else if (dof == 6) {
33366cd98798SBarry Smith         B->ops->mult             = MatMult_SeqMAIJ_6;
33376cd98798SBarry Smith         B->ops->multadd          = MatMultAdd_SeqMAIJ_6;
33386cd98798SBarry Smith         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_6;
33396cd98798SBarry Smith         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_6;
3340ed8eea03SBarry Smith       } else if (dof == 7) {
3341ed8eea03SBarry Smith         B->ops->mult             = MatMult_SeqMAIJ_7;
3342ed8eea03SBarry Smith         B->ops->multadd          = MatMultAdd_SeqMAIJ_7;
3343ed8eea03SBarry Smith         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_7;
3344ed8eea03SBarry Smith         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_7;
334566ed3db0SBarry Smith       } else if (dof == 8) {
334666ed3db0SBarry Smith         B->ops->mult             = MatMult_SeqMAIJ_8;
334766ed3db0SBarry Smith         B->ops->multadd          = MatMultAdd_SeqMAIJ_8;
334866ed3db0SBarry Smith         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_8;
334966ed3db0SBarry Smith         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_8;
33502b692628SMatthew Knepley       } else if (dof == 9) {
33512b692628SMatthew Knepley         B->ops->mult             = MatMult_SeqMAIJ_9;
33522b692628SMatthew Knepley         B->ops->multadd          = MatMultAdd_SeqMAIJ_9;
33532b692628SMatthew Knepley         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_9;
33542b692628SMatthew Knepley         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_9;
3355b9cda22cSBarry Smith       } else if (dof == 10) {
3356b9cda22cSBarry Smith         B->ops->mult             = MatMult_SeqMAIJ_10;
3357b9cda22cSBarry Smith         B->ops->multadd          = MatMultAdd_SeqMAIJ_10;
3358b9cda22cSBarry Smith         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_10;
3359b9cda22cSBarry Smith         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_10;
3360dbdd7285SBarry Smith       } else if (dof == 11) {
3361dbdd7285SBarry Smith         B->ops->mult             = MatMult_SeqMAIJ_11;
3362dbdd7285SBarry Smith         B->ops->multadd          = MatMultAdd_SeqMAIJ_11;
3363dbdd7285SBarry Smith         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_11;
3364dbdd7285SBarry Smith         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_11;
33652f7816d4SBarry Smith       } else if (dof == 16) {
33662f7816d4SBarry Smith         B->ops->mult             = MatMult_SeqMAIJ_16;
33672f7816d4SBarry Smith         B->ops->multadd          = MatMultAdd_SeqMAIJ_16;
33682f7816d4SBarry Smith         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_16;
33692f7816d4SBarry Smith         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_16;
3370ed1c418cSBarry Smith       } else if (dof == 18) {
3371ed1c418cSBarry Smith         B->ops->mult             = MatMult_SeqMAIJ_18;
3372ed1c418cSBarry Smith         B->ops->multadd          = MatMultAdd_SeqMAIJ_18;
3373ed1c418cSBarry Smith         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_18;
3374ed1c418cSBarry Smith         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_18;
337582b1193eSBarry Smith       } else {
33766861f193SBarry Smith         B->ops->mult             = MatMult_SeqMAIJ_N;
33776861f193SBarry Smith         B->ops->multadd          = MatMultAdd_SeqMAIJ_N;
33786861f193SBarry Smith         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_N;
33796861f193SBarry Smith         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_N;
338082b1193eSBarry Smith       }
3381bdf89e91SBarry Smith       ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqmaij_seqaij_C",MatConvert_SeqMAIJ_SeqAIJ);CHKERRQ(ierr);
3382bdf89e91SBarry Smith       ierr = PetscObjectComposeFunction((PetscObject)B,"MatPtAP_seqaij_seqmaij_C",MatPtAP_SeqAIJ_SeqMAIJ);CHKERRQ(ierr);
338359e29146SRichard Tran Mills       ierr = PetscObjectComposeFunction((PetscObject)B,"MatPtAP_seqaijperm_seqmaij_C",MatPtAP_SeqAIJ_SeqMAIJ);CHKERRQ(ierr);
3384279e4bd4SRichard Tran Mills       ierr = PetscObjectComposeFunction((PetscObject)B,"MatPtAP_seqaijmkl_seqmaij_C",MatPtAP_SeqAIJ_SeqMAIJ);CHKERRQ(ierr);
3385f4a53059SBarry Smith     } else {
3386f4a53059SBarry Smith       Mat_MPIAIJ  *mpiaij = (Mat_MPIAIJ*)A->data;
3387feb9fe23SJed Brown       Mat_MPIMAIJ *b;
3388f4a53059SBarry Smith       IS          from,to;
3389f4a53059SBarry Smith       Vec         gvec;
3390f4a53059SBarry Smith 
3391b9b97703SBarry Smith       ierr = MatSetType(B,MATMPIMAIJ);CHKERRQ(ierr);
339226fbe8dcSKarl Rupp 
33930298fd71SBarry Smith       B->ops->setup   = NULL;
3394d72c5c08SBarry Smith       B->ops->destroy = MatDestroy_MPIMAIJ;
33950fd73130SBarry Smith       B->ops->view    = MatView_MPIMAIJ;
339626fbe8dcSKarl Rupp 
3397b9b97703SBarry Smith       b      = (Mat_MPIMAIJ*)B->data;
3398b9b97703SBarry Smith       b->dof = dof;
3399b9b97703SBarry Smith       b->A   = A;
340026fbe8dcSKarl Rupp 
34014cb9d9b8SBarry Smith       ierr = MatCreateMAIJ(mpiaij->A,dof,&b->AIJ);CHKERRQ(ierr);
34024cb9d9b8SBarry Smith       ierr = MatCreateMAIJ(mpiaij->B,dof,&b->OAIJ);CHKERRQ(ierr);
34034cb9d9b8SBarry Smith 
3404f4a53059SBarry Smith       ierr = VecGetSize(mpiaij->lvec,&n);CHKERRQ(ierr);
3405a34bdc0bSBarry Smith       ierr = VecCreate(PETSC_COMM_SELF,&b->w);CHKERRQ(ierr);
3406a34bdc0bSBarry Smith       ierr = VecSetSizes(b->w,n*dof,n*dof);CHKERRQ(ierr);
340713288a74SBarry Smith       ierr = VecSetBlockSize(b->w,dof);CHKERRQ(ierr);
3408a34bdc0bSBarry Smith       ierr = VecSetType(b->w,VECSEQ);CHKERRQ(ierr);
3409f4a53059SBarry Smith 
3410f4a53059SBarry Smith       /* create two temporary Index sets for build scatter gather */
3411ce94432eSBarry Smith       ierr = ISCreateBlock(PetscObjectComm((PetscObject)A),dof,n,mpiaij->garray,PETSC_COPY_VALUES,&from);CHKERRQ(ierr);
3412f4a53059SBarry Smith       ierr = ISCreateStride(PETSC_COMM_SELF,n*dof,0,1,&to);CHKERRQ(ierr);
3413f4a53059SBarry Smith 
3414f4a53059SBarry Smith       /* create temporary global vector to generate scatter context */
3415ce94432eSBarry Smith       ierr = VecCreateMPIWithArray(PetscObjectComm((PetscObject)A),dof,dof*A->cmap->n,dof*A->cmap->N,NULL,&gvec);CHKERRQ(ierr);
3416f4a53059SBarry Smith 
3417f4a53059SBarry Smith       /* generate the scatter context */
3418f4a53059SBarry Smith       ierr = VecScatterCreate(gvec,from,b->w,to,&b->ctx);CHKERRQ(ierr);
3419f4a53059SBarry Smith 
34206bf464f9SBarry Smith       ierr = ISDestroy(&from);CHKERRQ(ierr);
34216bf464f9SBarry Smith       ierr = ISDestroy(&to);CHKERRQ(ierr);
34226bf464f9SBarry Smith       ierr = VecDestroy(&gvec);CHKERRQ(ierr);
3423f4a53059SBarry Smith 
3424f4a53059SBarry Smith       B->ops->mult             = MatMult_MPIMAIJ_dof;
34254cb9d9b8SBarry Smith       B->ops->multtranspose    = MatMultTranspose_MPIMAIJ_dof;
34264cb9d9b8SBarry Smith       B->ops->multadd          = MatMultAdd_MPIMAIJ_dof;
34274cb9d9b8SBarry Smith       B->ops->multtransposeadd = MatMultTransposeAdd_MPIMAIJ_dof;
342826fbe8dcSKarl Rupp 
3429bdf89e91SBarry Smith       ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpimaij_mpiaij_C",MatConvert_MPIMAIJ_MPIAIJ);CHKERRQ(ierr);
3430bdf89e91SBarry Smith       ierr = PetscObjectComposeFunction((PetscObject)B,"MatPtAP_mpiaij_mpimaij_C",MatPtAP_MPIAIJ_MPIMAIJ);CHKERRQ(ierr);
3431f4a53059SBarry Smith     }
34327dae84e0SHong Zhang     B->ops->createsubmatrix = MatCreateSubMatrix_MAIJ;
34334994cf47SJed Brown     ierr  = MatSetUp(B);CHKERRQ(ierr);
3434cd3bbe55SBarry Smith     *maij = B;
3435146574abSBarry Smith     ierr  = MatViewFromOptions(B,NULL,"-mat_view");CHKERRQ(ierr);
3436d72c5c08SBarry Smith   }
343782b1193eSBarry Smith   PetscFunctionReturn(0);
343882b1193eSBarry Smith }
3439