xref: /petsc/src/mat/impls/maij/maij.c (revision 3e0c911f9e26fb670f0fa381ba4e7d9c792dff14)
1be1d678aSKris Buschelman 
282b1193eSBarry Smith /*
3cd3bbe55SBarry Smith     Defines the basic matrix operations for the MAIJ  matrix storage format.
4cd3bbe55SBarry Smith   This format is used for restriction and interpolation operations for
5cd3bbe55SBarry Smith   multicomponent problems. It interpolates each component the same way
6cd3bbe55SBarry Smith   independently.
7cd3bbe55SBarry Smith 
8cd3bbe55SBarry Smith      We provide:
9cd3bbe55SBarry Smith          MatMult()
10cd3bbe55SBarry Smith          MatMultTranspose()
11cd3bbe55SBarry Smith          MatMultTransposeAdd()
12cd3bbe55SBarry Smith          MatMultAdd()
13cd3bbe55SBarry Smith           and
14cd3bbe55SBarry Smith          MatCreateMAIJ(Mat,dof,Mat*)
15f4a53059SBarry Smith 
16f4a53059SBarry Smith      This single directory handles both the sequential and parallel codes
1782b1193eSBarry Smith */
1882b1193eSBarry Smith 
19c6db04a5SJed Brown #include <../src/mat/impls/maij/maij.h> /*I "petscmat.h" I*/
20c6db04a5SJed Brown #include <../src/mat/utils/freespace.h>
2182b1193eSBarry Smith 
22b350b9acSSatish Balay /*@
23ff585edeSJed Brown    MatMAIJGetAIJ - Get the AIJ matrix describing the blockwise action of the MAIJ matrix
24ff585edeSJed Brown 
25ff585edeSJed Brown    Not Collective, but if the MAIJ matrix is parallel, the AIJ matrix is also parallel
26ff585edeSJed Brown 
27ff585edeSJed Brown    Input Parameter:
28ff585edeSJed Brown .  A - the MAIJ matrix
29ff585edeSJed Brown 
30ff585edeSJed Brown    Output Parameter:
31ff585edeSJed Brown .  B - the AIJ matrix
32ff585edeSJed Brown 
33ff585edeSJed Brown    Level: advanced
34ff585edeSJed Brown 
3595452b02SPatrick Sanan    Notes:
3695452b02SPatrick Sanan     The reference count on the AIJ matrix is not increased so you should not destroy it.
37ff585edeSJed Brown 
38ff585edeSJed Brown .seealso: MatCreateMAIJ()
39ff585edeSJed Brown @*/
407087cfbeSBarry Smith PetscErrorCode  MatMAIJGetAIJ(Mat A,Mat *B)
41b9b97703SBarry Smith {
42dfbe8321SBarry Smith   PetscErrorCode ierr;
43ace3abfcSBarry Smith   PetscBool      ismpimaij,isseqmaij;
44b9b97703SBarry Smith 
45b9b97703SBarry Smith   PetscFunctionBegin;
46251f4c67SDmitry Karpeev   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIMAIJ,&ismpimaij);CHKERRQ(ierr);
47251f4c67SDmitry Karpeev   ierr = PetscObjectTypeCompare((PetscObject)A,MATSEQMAIJ,&isseqmaij);CHKERRQ(ierr);
48b9b97703SBarry Smith   if (ismpimaij) {
49b9b97703SBarry Smith     Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data;
50b9b97703SBarry Smith 
51b9b97703SBarry Smith     *B = b->A;
52b9b97703SBarry Smith   } else if (isseqmaij) {
53b9b97703SBarry Smith     Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
54b9b97703SBarry Smith 
55b9b97703SBarry Smith     *B = b->AIJ;
56b9b97703SBarry Smith   } else {
57b9b97703SBarry Smith     *B = A;
58b9b97703SBarry Smith   }
59b9b97703SBarry Smith   PetscFunctionReturn(0);
60b9b97703SBarry Smith }
61b9b97703SBarry Smith 
62480dffcfSJed Brown /*@
63ff585edeSJed Brown    MatMAIJRedimension - Get an MAIJ matrix with the same action, but for a different block size
64ff585edeSJed Brown 
653f9fe445SBarry Smith    Logically Collective
66ff585edeSJed Brown 
67ff585edeSJed Brown    Input Parameter:
68ff585edeSJed Brown +  A - the MAIJ matrix
69ff585edeSJed Brown -  dof - the block size for the new matrix
70ff585edeSJed Brown 
71ff585edeSJed Brown    Output Parameter:
72ff585edeSJed Brown .  B - the new MAIJ matrix
73ff585edeSJed Brown 
74ff585edeSJed Brown    Level: advanced
75ff585edeSJed Brown 
76ff585edeSJed Brown .seealso: MatCreateMAIJ()
77ff585edeSJed Brown @*/
787087cfbeSBarry Smith PetscErrorCode  MatMAIJRedimension(Mat A,PetscInt dof,Mat *B)
79b9b97703SBarry Smith {
80dfbe8321SBarry Smith   PetscErrorCode ierr;
810298fd71SBarry Smith   Mat            Aij = NULL;
82b9b97703SBarry Smith 
83b9b97703SBarry Smith   PetscFunctionBegin;
84c5eb9154SBarry Smith   PetscValidLogicalCollectiveInt(A,dof,2);
85b9b97703SBarry Smith   ierr = MatMAIJGetAIJ(A,&Aij);CHKERRQ(ierr);
86b9b97703SBarry Smith   ierr = MatCreateMAIJ(Aij,dof,B);CHKERRQ(ierr);
87b9b97703SBarry Smith   PetscFunctionReturn(0);
88b9b97703SBarry Smith }
89b9b97703SBarry Smith 
90dfbe8321SBarry Smith PetscErrorCode MatDestroy_SeqMAIJ(Mat A)
9182b1193eSBarry Smith {
92dfbe8321SBarry Smith   PetscErrorCode ierr;
934cb9d9b8SBarry Smith   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
9482b1193eSBarry Smith 
9582b1193eSBarry Smith   PetscFunctionBegin;
966bf464f9SBarry Smith   ierr = MatDestroy(&b->AIJ);CHKERRQ(ierr);
97bf0cc555SLisandro Dalcin   ierr = PetscFree(A->data);CHKERRQ(ierr);
98bdf89e91SBarry Smith   ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqmaij_seqaij_C",NULL);CHKERRQ(ierr);
99bdf89e91SBarry Smith   ierr = PetscObjectComposeFunction((PetscObject)A,"MatPtAP_seqaij_seqmaij_C",NULL);CHKERRQ(ierr);
10059e29146SRichard Tran Mills   ierr = PetscObjectComposeFunction((PetscObject)A,"MatPtAP_seqaijperm_seqmaij_C",NULL);CHKERRQ(ierr);
101279e4bd4SRichard Tran Mills   ierr = PetscObjectComposeFunction((PetscObject)A,"MatPtAP_seqaijmkl_seqmaij_C",NULL);CHKERRQ(ierr);
102ec626240SStefano Zampini   ierr = PetscObjectComposeFunction((PetscObject)A,"MatPtAP_is_seqmaij_C",NULL);CHKERRQ(ierr);
1034cb9d9b8SBarry Smith   PetscFunctionReturn(0);
1044cb9d9b8SBarry Smith }
1054cb9d9b8SBarry Smith 
106356c569eSBarry Smith PetscErrorCode MatSetUp_MAIJ(Mat A)
107356c569eSBarry Smith {
108356c569eSBarry Smith   PetscFunctionBegin;
109ce94432eSBarry Smith   SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Must use MatCreateMAIJ() to create MAIJ matrices");
110356c569eSBarry Smith   PetscFunctionReturn(0);
111356c569eSBarry Smith }
112356c569eSBarry Smith 
1130fd73130SBarry Smith PetscErrorCode MatView_SeqMAIJ(Mat A,PetscViewer viewer)
1140fd73130SBarry Smith {
1150fd73130SBarry Smith   PetscErrorCode ierr;
1160fd73130SBarry Smith   Mat            B;
1170fd73130SBarry Smith 
1180fd73130SBarry Smith   PetscFunctionBegin;
119a2ea699eSBarry Smith   ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr);
1200fd73130SBarry Smith   ierr = MatView(B,viewer);CHKERRQ(ierr);
1216bf464f9SBarry Smith   ierr = MatDestroy(&B);CHKERRQ(ierr);
1220fd73130SBarry Smith   PetscFunctionReturn(0);
1230fd73130SBarry Smith }
1240fd73130SBarry Smith 
1250fd73130SBarry Smith PetscErrorCode MatView_MPIMAIJ(Mat A,PetscViewer viewer)
1260fd73130SBarry Smith {
1270fd73130SBarry Smith   PetscErrorCode ierr;
1280fd73130SBarry Smith   Mat            B;
1290fd73130SBarry Smith 
1300fd73130SBarry Smith   PetscFunctionBegin;
131a2ea699eSBarry Smith   ierr = MatConvert(A,MATMPIAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr);
1320fd73130SBarry Smith   ierr = MatView(B,viewer);CHKERRQ(ierr);
1336bf464f9SBarry Smith   ierr = MatDestroy(&B);CHKERRQ(ierr);
1340fd73130SBarry Smith   PetscFunctionReturn(0);
1350fd73130SBarry Smith }
1360fd73130SBarry Smith 
137dfbe8321SBarry Smith PetscErrorCode MatDestroy_MPIMAIJ(Mat A)
1384cb9d9b8SBarry Smith {
139dfbe8321SBarry Smith   PetscErrorCode ierr;
1404cb9d9b8SBarry Smith   Mat_MPIMAIJ    *b = (Mat_MPIMAIJ*)A->data;
1414cb9d9b8SBarry Smith 
1424cb9d9b8SBarry Smith   PetscFunctionBegin;
1436bf464f9SBarry Smith   ierr = MatDestroy(&b->AIJ);CHKERRQ(ierr);
1446bf464f9SBarry Smith   ierr = MatDestroy(&b->OAIJ);CHKERRQ(ierr);
1456bf464f9SBarry Smith   ierr = MatDestroy(&b->A);CHKERRQ(ierr);
1466bf464f9SBarry Smith   ierr = VecScatterDestroy(&b->ctx);CHKERRQ(ierr);
1476bf464f9SBarry Smith   ierr = VecDestroy(&b->w);CHKERRQ(ierr);
148bf0cc555SLisandro Dalcin   ierr = PetscFree(A->data);CHKERRQ(ierr);
149bdf89e91SBarry Smith   ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_mpimaij_mpiaij_C",NULL);CHKERRQ(ierr);
150bdf89e91SBarry Smith   ierr = PetscObjectComposeFunction((PetscObject)A,"MatPtAP_mpiaij_mpimaij_C",NULL);CHKERRQ(ierr);
151ec626240SStefano Zampini   ierr = PetscObjectComposeFunction((PetscObject)A,"MatPtAP_is_mpimaij_C",NULL);CHKERRQ(ierr);
152dbd8c25aSHong Zhang   ierr = PetscObjectChangeTypeName((PetscObject)A,0);CHKERRQ(ierr);
153b9b97703SBarry Smith   PetscFunctionReturn(0);
154b9b97703SBarry Smith }
155b9b97703SBarry Smith 
1560bad9183SKris Buschelman /*MC
157fafad747SKris Buschelman   MATMAIJ - MATMAIJ = "maij" - A matrix type to be used for restriction and interpolation operations for
1580bad9183SKris Buschelman   multicomponent problems, interpolating or restricting each component the same way independently.
1590bad9183SKris Buschelman   The matrix type is based on MATSEQAIJ for sequential matrices, and MATMPIAIJ for distributed matrices.
1600bad9183SKris Buschelman 
1610bad9183SKris Buschelman   Operations provided:
1620bad9183SKris Buschelman . MatMult
1630bad9183SKris Buschelman . MatMultTranspose
1640bad9183SKris Buschelman . MatMultAdd
1650bad9183SKris Buschelman . MatMultTransposeAdd
1660bad9183SKris Buschelman 
1670bad9183SKris Buschelman   Level: advanced
1680bad9183SKris Buschelman 
169b409243cSBarry Smith .seealso: MatMAIJGetAIJ(), MatMAIJRedimension(), MatCreateMAIJ()
1700bad9183SKris Buschelman M*/
1710bad9183SKris Buschelman 
1728cc058d9SJed Brown PETSC_EXTERN PetscErrorCode MatCreate_MAIJ(Mat A)
17382b1193eSBarry Smith {
174dfbe8321SBarry Smith   PetscErrorCode ierr;
1754cb9d9b8SBarry Smith   Mat_MPIMAIJ    *b;
17664b52464SHong Zhang   PetscMPIInt    size;
17782b1193eSBarry Smith 
17882b1193eSBarry Smith   PetscFunctionBegin;
179b00a9115SJed Brown   ierr     = PetscNewLog(A,&b);CHKERRQ(ierr);
180b0a32e0cSBarry Smith   A->data  = (void*)b;
18126fbe8dcSKarl Rupp 
182cd3bbe55SBarry Smith   ierr = PetscMemzero(A->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
18326fbe8dcSKarl Rupp 
184356c569eSBarry Smith   A->ops->setup = MatSetUp_MAIJ;
185f4a53059SBarry Smith 
186cd3bbe55SBarry Smith   b->AIJ  = 0;
187cd3bbe55SBarry Smith   b->dof  = 0;
188f4a53059SBarry Smith   b->OAIJ = 0;
189f4a53059SBarry Smith   b->ctx  = 0;
190f4a53059SBarry Smith   b->w    = 0;
191ce94432eSBarry Smith   ierr    = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRQ(ierr);
19264b52464SHong Zhang   if (size == 1) {
19364b52464SHong Zhang     ierr = PetscObjectChangeTypeName((PetscObject)A,MATSEQMAIJ);CHKERRQ(ierr);
19464b52464SHong Zhang   } else {
19564b52464SHong Zhang     ierr = PetscObjectChangeTypeName((PetscObject)A,MATMPIMAIJ);CHKERRQ(ierr);
19664b52464SHong Zhang   }
19732e7c8b0SBarry Smith   A->preallocated  = PETSC_TRUE;
19832e7c8b0SBarry Smith   A->assembled     = PETSC_TRUE;
19982b1193eSBarry Smith   PetscFunctionReturn(0);
20082b1193eSBarry Smith }
20182b1193eSBarry Smith 
202cd3bbe55SBarry Smith /* --------------------------------------------------------------------------------------*/
203dfbe8321SBarry Smith PetscErrorCode MatMult_SeqMAIJ_2(Mat A,Vec xx,Vec yy)
20482b1193eSBarry Smith {
2054cb9d9b8SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
206bcc973b7SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
207d2840e09SBarry Smith   const PetscScalar *x,*v;
208d2840e09SBarry Smith   PetscScalar       *y, sum1, sum2;
209dfbe8321SBarry Smith   PetscErrorCode    ierr;
210d2840e09SBarry Smith   PetscInt          nonzerorow=0,n,i,jrow,j;
211d2840e09SBarry Smith   const PetscInt    m         = b->AIJ->rmap->n,*idx,*ii;
21282b1193eSBarry Smith 
213bcc973b7SBarry Smith   PetscFunctionBegin;
2143649974fSBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
2151ebc52fbSHong Zhang   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
216bcc973b7SBarry Smith   idx  = a->j;
217bcc973b7SBarry Smith   v    = a->a;
218bcc973b7SBarry Smith   ii   = a->i;
219bcc973b7SBarry Smith 
220bcc973b7SBarry Smith   for (i=0; i<m; i++) {
221bcc973b7SBarry Smith     jrow  = ii[i];
222bcc973b7SBarry Smith     n     = ii[i+1] - jrow;
223bcc973b7SBarry Smith     sum1  = 0.0;
224bcc973b7SBarry Smith     sum2  = 0.0;
22526fbe8dcSKarl Rupp 
226b3c51c66SMatthew Knepley     nonzerorow += (n>0);
227bcc973b7SBarry Smith     for (j=0; j<n; j++) {
228bcc973b7SBarry Smith       sum1 += v[jrow]*x[2*idx[jrow]];
229bcc973b7SBarry Smith       sum2 += v[jrow]*x[2*idx[jrow]+1];
230bcc973b7SBarry Smith       jrow++;
231bcc973b7SBarry Smith     }
232bcc973b7SBarry Smith     y[2*i]   = sum1;
233bcc973b7SBarry Smith     y[2*i+1] = sum2;
234bcc973b7SBarry Smith   }
235bcc973b7SBarry Smith 
236dc0b31edSSatish Balay   ierr = PetscLogFlops(4.0*a->nz - 2.0*nonzerorow);CHKERRQ(ierr);
2373649974fSBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
2381ebc52fbSHong Zhang   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
23982b1193eSBarry Smith   PetscFunctionReturn(0);
24082b1193eSBarry Smith }
241bcc973b7SBarry Smith 
242dfbe8321SBarry Smith PetscErrorCode MatMultTranspose_SeqMAIJ_2(Mat A,Vec xx,Vec yy)
24382b1193eSBarry Smith {
2444cb9d9b8SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
245bcc973b7SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
246d2840e09SBarry Smith   const PetscScalar *x,*v;
247d2840e09SBarry Smith   PetscScalar       *y,alpha1,alpha2;
248dfbe8321SBarry Smith   PetscErrorCode    ierr;
249d2840e09SBarry Smith   PetscInt          n,i;
250d2840e09SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx;
25182b1193eSBarry Smith 
252bcc973b7SBarry Smith   PetscFunctionBegin;
253d2840e09SBarry Smith   ierr = VecSet(yy,0.0);CHKERRQ(ierr);
2543649974fSBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
2551ebc52fbSHong Zhang   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
256bfec09a0SHong Zhang 
257bcc973b7SBarry Smith   for (i=0; i<m; i++) {
258bfec09a0SHong Zhang     idx    = a->j + a->i[i];
259bfec09a0SHong Zhang     v      = a->a + a->i[i];
260bcc973b7SBarry Smith     n      = a->i[i+1] - a->i[i];
261bcc973b7SBarry Smith     alpha1 = x[2*i];
262bcc973b7SBarry Smith     alpha2 = x[2*i+1];
26326fbe8dcSKarl Rupp     while (n-->0) {
26426fbe8dcSKarl Rupp       y[2*(*idx)]   += alpha1*(*v);
26526fbe8dcSKarl Rupp       y[2*(*idx)+1] += alpha2*(*v);
26626fbe8dcSKarl Rupp       idx++; v++;
26726fbe8dcSKarl Rupp     }
268bcc973b7SBarry Smith   }
269dc0b31edSSatish Balay   ierr = PetscLogFlops(4.0*a->nz);CHKERRQ(ierr);
2703649974fSBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
2711ebc52fbSHong Zhang   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
27282b1193eSBarry Smith   PetscFunctionReturn(0);
27382b1193eSBarry Smith }
274bcc973b7SBarry Smith 
275dfbe8321SBarry Smith PetscErrorCode MatMultAdd_SeqMAIJ_2(Mat A,Vec xx,Vec yy,Vec zz)
27682b1193eSBarry Smith {
2774cb9d9b8SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
278bcc973b7SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
279d2840e09SBarry Smith   const PetscScalar *x,*v;
280d2840e09SBarry Smith   PetscScalar       *y,sum1, sum2;
281dfbe8321SBarry Smith   PetscErrorCode    ierr;
282b24ad042SBarry Smith   PetscInt          n,i,jrow,j;
283d2840e09SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
28482b1193eSBarry Smith 
285bcc973b7SBarry Smith   PetscFunctionBegin;
286f9fae5adSBarry Smith   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
2873649974fSBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
2881ebc52fbSHong Zhang   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
289bcc973b7SBarry Smith   idx  = a->j;
290bcc973b7SBarry Smith   v    = a->a;
291bcc973b7SBarry Smith   ii   = a->i;
292bcc973b7SBarry Smith 
293bcc973b7SBarry Smith   for (i=0; i<m; i++) {
294bcc973b7SBarry Smith     jrow = ii[i];
295bcc973b7SBarry Smith     n    = ii[i+1] - jrow;
296bcc973b7SBarry Smith     sum1 = 0.0;
297bcc973b7SBarry Smith     sum2 = 0.0;
298bcc973b7SBarry Smith     for (j=0; j<n; j++) {
299bcc973b7SBarry Smith       sum1 += v[jrow]*x[2*idx[jrow]];
300bcc973b7SBarry Smith       sum2 += v[jrow]*x[2*idx[jrow]+1];
301bcc973b7SBarry Smith       jrow++;
302bcc973b7SBarry Smith     }
303bcc973b7SBarry Smith     y[2*i]   += sum1;
304bcc973b7SBarry Smith     y[2*i+1] += sum2;
305bcc973b7SBarry Smith   }
306bcc973b7SBarry Smith 
307dc0b31edSSatish Balay   ierr = PetscLogFlops(4.0*a->nz);CHKERRQ(ierr);
3083649974fSBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
3091ebc52fbSHong Zhang   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
310bcc973b7SBarry Smith   PetscFunctionReturn(0);
31182b1193eSBarry Smith }
312dfbe8321SBarry Smith PetscErrorCode MatMultTransposeAdd_SeqMAIJ_2(Mat A,Vec xx,Vec yy,Vec zz)
31382b1193eSBarry Smith {
3144cb9d9b8SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
315bcc973b7SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
316d2840e09SBarry Smith   const PetscScalar *x,*v;
317d2840e09SBarry Smith   PetscScalar       *y,alpha1,alpha2;
318dfbe8321SBarry Smith   PetscErrorCode    ierr;
319d2840e09SBarry Smith   PetscInt          n,i;
320d2840e09SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx;
32182b1193eSBarry Smith 
322bcc973b7SBarry Smith   PetscFunctionBegin;
323f9fae5adSBarry Smith   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
3243649974fSBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
3251ebc52fbSHong Zhang   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
326bfec09a0SHong Zhang 
327bcc973b7SBarry Smith   for (i=0; i<m; i++) {
328bfec09a0SHong Zhang     idx    = a->j + a->i[i];
329bfec09a0SHong Zhang     v      = a->a + a->i[i];
330bcc973b7SBarry Smith     n      = a->i[i+1] - a->i[i];
331bcc973b7SBarry Smith     alpha1 = x[2*i];
332bcc973b7SBarry Smith     alpha2 = x[2*i+1];
33326fbe8dcSKarl Rupp     while (n-->0) {
33426fbe8dcSKarl Rupp       y[2*(*idx)]   += alpha1*(*v);
33526fbe8dcSKarl Rupp       y[2*(*idx)+1] += alpha2*(*v);
33626fbe8dcSKarl Rupp       idx++; v++;
33726fbe8dcSKarl Rupp     }
338bcc973b7SBarry Smith   }
339dc0b31edSSatish Balay   ierr = PetscLogFlops(4.0*a->nz);CHKERRQ(ierr);
3403649974fSBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
3411ebc52fbSHong Zhang   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
342bcc973b7SBarry Smith   PetscFunctionReturn(0);
34382b1193eSBarry Smith }
344cd3bbe55SBarry Smith /* --------------------------------------------------------------------------------------*/
345dfbe8321SBarry Smith PetscErrorCode MatMult_SeqMAIJ_3(Mat A,Vec xx,Vec yy)
346bcc973b7SBarry Smith {
3474cb9d9b8SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
348bcc973b7SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
349d2840e09SBarry Smith   const PetscScalar *x,*v;
350d2840e09SBarry Smith   PetscScalar       *y,sum1, sum2, sum3;
351dfbe8321SBarry Smith   PetscErrorCode    ierr;
352d2840e09SBarry Smith   PetscInt          nonzerorow=0,n,i,jrow,j;
353d2840e09SBarry Smith   const PetscInt    m         = b->AIJ->rmap->n,*idx,*ii;
35482b1193eSBarry Smith 
355bcc973b7SBarry Smith   PetscFunctionBegin;
3563649974fSBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
3571ebc52fbSHong Zhang   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
358bcc973b7SBarry Smith   idx  = a->j;
359bcc973b7SBarry Smith   v    = a->a;
360bcc973b7SBarry Smith   ii   = a->i;
361bcc973b7SBarry Smith 
362bcc973b7SBarry Smith   for (i=0; i<m; i++) {
363bcc973b7SBarry Smith     jrow  = ii[i];
364bcc973b7SBarry Smith     n     = ii[i+1] - jrow;
365bcc973b7SBarry Smith     sum1  = 0.0;
366bcc973b7SBarry Smith     sum2  = 0.0;
367bcc973b7SBarry Smith     sum3  = 0.0;
36826fbe8dcSKarl Rupp 
369b3c51c66SMatthew Knepley     nonzerorow += (n>0);
370bcc973b7SBarry Smith     for (j=0; j<n; j++) {
371bcc973b7SBarry Smith       sum1 += v[jrow]*x[3*idx[jrow]];
372bcc973b7SBarry Smith       sum2 += v[jrow]*x[3*idx[jrow]+1];
373bcc973b7SBarry Smith       sum3 += v[jrow]*x[3*idx[jrow]+2];
374bcc973b7SBarry Smith       jrow++;
375bcc973b7SBarry Smith      }
376bcc973b7SBarry Smith     y[3*i]   = sum1;
377bcc973b7SBarry Smith     y[3*i+1] = sum2;
378bcc973b7SBarry Smith     y[3*i+2] = sum3;
379bcc973b7SBarry Smith   }
380bcc973b7SBarry Smith 
381dc0b31edSSatish Balay   ierr = PetscLogFlops(6.0*a->nz - 3.0*nonzerorow);CHKERRQ(ierr);
3823649974fSBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
3831ebc52fbSHong Zhang   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
384bcc973b7SBarry Smith   PetscFunctionReturn(0);
385bcc973b7SBarry Smith }
386bcc973b7SBarry Smith 
387dfbe8321SBarry Smith PetscErrorCode MatMultTranspose_SeqMAIJ_3(Mat A,Vec xx,Vec yy)
388bcc973b7SBarry Smith {
3894cb9d9b8SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
390bcc973b7SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
391d2840e09SBarry Smith   const PetscScalar *x,*v;
392d2840e09SBarry Smith   PetscScalar       *y,alpha1,alpha2,alpha3;
393dfbe8321SBarry Smith   PetscErrorCode    ierr;
394d2840e09SBarry Smith   PetscInt          n,i;
395d2840e09SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx;
396bcc973b7SBarry Smith 
397bcc973b7SBarry Smith   PetscFunctionBegin;
398d2840e09SBarry Smith   ierr = VecSet(yy,0.0);CHKERRQ(ierr);
3993649974fSBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
4001ebc52fbSHong Zhang   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
401bfec09a0SHong Zhang 
402bcc973b7SBarry Smith   for (i=0; i<m; i++) {
403bfec09a0SHong Zhang     idx    = a->j + a->i[i];
404bfec09a0SHong Zhang     v      = a->a + a->i[i];
405bcc973b7SBarry Smith     n      = a->i[i+1] - a->i[i];
406bcc973b7SBarry Smith     alpha1 = x[3*i];
407bcc973b7SBarry Smith     alpha2 = x[3*i+1];
408bcc973b7SBarry Smith     alpha3 = x[3*i+2];
409bcc973b7SBarry Smith     while (n-->0) {
410bcc973b7SBarry Smith       y[3*(*idx)]   += alpha1*(*v);
411bcc973b7SBarry Smith       y[3*(*idx)+1] += alpha2*(*v);
412bcc973b7SBarry Smith       y[3*(*idx)+2] += alpha3*(*v);
413bcc973b7SBarry Smith       idx++; v++;
414bcc973b7SBarry Smith     }
415bcc973b7SBarry Smith   }
416dc0b31edSSatish Balay   ierr = PetscLogFlops(6.0*a->nz);CHKERRQ(ierr);
4173649974fSBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
4181ebc52fbSHong Zhang   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
419bcc973b7SBarry Smith   PetscFunctionReturn(0);
420bcc973b7SBarry Smith }
421bcc973b7SBarry Smith 
422dfbe8321SBarry Smith PetscErrorCode MatMultAdd_SeqMAIJ_3(Mat A,Vec xx,Vec yy,Vec zz)
423bcc973b7SBarry Smith {
4244cb9d9b8SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
425bcc973b7SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
426d2840e09SBarry Smith   const PetscScalar *x,*v;
427d2840e09SBarry Smith   PetscScalar       *y,sum1, sum2, sum3;
428dfbe8321SBarry Smith   PetscErrorCode    ierr;
429b24ad042SBarry Smith   PetscInt          n,i,jrow,j;
430d2840e09SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
431bcc973b7SBarry Smith 
432bcc973b7SBarry Smith   PetscFunctionBegin;
433f9fae5adSBarry Smith   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
4343649974fSBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
4351ebc52fbSHong Zhang   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
436bcc973b7SBarry Smith   idx  = a->j;
437bcc973b7SBarry Smith   v    = a->a;
438bcc973b7SBarry Smith   ii   = a->i;
439bcc973b7SBarry Smith 
440bcc973b7SBarry Smith   for (i=0; i<m; i++) {
441bcc973b7SBarry Smith     jrow = ii[i];
442bcc973b7SBarry Smith     n    = ii[i+1] - jrow;
443bcc973b7SBarry Smith     sum1 = 0.0;
444bcc973b7SBarry Smith     sum2 = 0.0;
445bcc973b7SBarry Smith     sum3 = 0.0;
446bcc973b7SBarry Smith     for (j=0; j<n; j++) {
447bcc973b7SBarry Smith       sum1 += v[jrow]*x[3*idx[jrow]];
448bcc973b7SBarry Smith       sum2 += v[jrow]*x[3*idx[jrow]+1];
449bcc973b7SBarry Smith       sum3 += v[jrow]*x[3*idx[jrow]+2];
450bcc973b7SBarry Smith       jrow++;
451bcc973b7SBarry Smith     }
452bcc973b7SBarry Smith     y[3*i]   += sum1;
453bcc973b7SBarry Smith     y[3*i+1] += sum2;
454bcc973b7SBarry Smith     y[3*i+2] += sum3;
455bcc973b7SBarry Smith   }
456bcc973b7SBarry Smith 
457dc0b31edSSatish Balay   ierr = PetscLogFlops(6.0*a->nz);CHKERRQ(ierr);
4583649974fSBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
4591ebc52fbSHong Zhang   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
460bcc973b7SBarry Smith   PetscFunctionReturn(0);
461bcc973b7SBarry Smith }
462dfbe8321SBarry Smith PetscErrorCode MatMultTransposeAdd_SeqMAIJ_3(Mat A,Vec xx,Vec yy,Vec zz)
463bcc973b7SBarry Smith {
4644cb9d9b8SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
465bcc973b7SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
466d2840e09SBarry Smith   const PetscScalar *x,*v;
467d2840e09SBarry Smith   PetscScalar       *y,alpha1,alpha2,alpha3;
468dfbe8321SBarry Smith   PetscErrorCode    ierr;
469d2840e09SBarry Smith   PetscInt          n,i;
470d2840e09SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx;
471bcc973b7SBarry Smith 
472bcc973b7SBarry Smith   PetscFunctionBegin;
473f9fae5adSBarry Smith   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
4743649974fSBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
4751ebc52fbSHong Zhang   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
476bcc973b7SBarry Smith   for (i=0; i<m; i++) {
477bfec09a0SHong Zhang     idx    = a->j + a->i[i];
478bfec09a0SHong Zhang     v      = a->a + a->i[i];
479bcc973b7SBarry Smith     n      = a->i[i+1] - a->i[i];
480bcc973b7SBarry Smith     alpha1 = x[3*i];
481bcc973b7SBarry Smith     alpha2 = x[3*i+1];
482bcc973b7SBarry Smith     alpha3 = x[3*i+2];
483bcc973b7SBarry Smith     while (n-->0) {
484bcc973b7SBarry Smith       y[3*(*idx)]   += alpha1*(*v);
485bcc973b7SBarry Smith       y[3*(*idx)+1] += alpha2*(*v);
486bcc973b7SBarry Smith       y[3*(*idx)+2] += alpha3*(*v);
487bcc973b7SBarry Smith       idx++; v++;
488bcc973b7SBarry Smith     }
489bcc973b7SBarry Smith   }
490dc0b31edSSatish Balay   ierr = PetscLogFlops(6.0*a->nz);CHKERRQ(ierr);
4913649974fSBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
4921ebc52fbSHong Zhang   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
493bcc973b7SBarry Smith   PetscFunctionReturn(0);
494bcc973b7SBarry Smith }
495bcc973b7SBarry Smith 
496bcc973b7SBarry Smith /* ------------------------------------------------------------------------------*/
497dfbe8321SBarry Smith PetscErrorCode MatMult_SeqMAIJ_4(Mat A,Vec xx,Vec yy)
498bcc973b7SBarry Smith {
4994cb9d9b8SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
500bcc973b7SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
501d2840e09SBarry Smith   const PetscScalar *x,*v;
502d2840e09SBarry Smith   PetscScalar       *y,sum1, sum2, sum3, sum4;
503dfbe8321SBarry Smith   PetscErrorCode    ierr;
504d2840e09SBarry Smith   PetscInt          nonzerorow=0,n,i,jrow,j;
505d2840e09SBarry Smith   const PetscInt    m         = b->AIJ->rmap->n,*idx,*ii;
506bcc973b7SBarry Smith 
507bcc973b7SBarry Smith   PetscFunctionBegin;
5083649974fSBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
5091ebc52fbSHong Zhang   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
510bcc973b7SBarry Smith   idx  = a->j;
511bcc973b7SBarry Smith   v    = a->a;
512bcc973b7SBarry Smith   ii   = a->i;
513bcc973b7SBarry Smith 
514bcc973b7SBarry Smith   for (i=0; i<m; i++) {
515bcc973b7SBarry Smith     jrow        = ii[i];
516bcc973b7SBarry Smith     n           = ii[i+1] - jrow;
517bcc973b7SBarry Smith     sum1        = 0.0;
518bcc973b7SBarry Smith     sum2        = 0.0;
519bcc973b7SBarry Smith     sum3        = 0.0;
520bcc973b7SBarry Smith     sum4        = 0.0;
521b3c51c66SMatthew Knepley     nonzerorow += (n>0);
522bcc973b7SBarry Smith     for (j=0; j<n; j++) {
523bcc973b7SBarry Smith       sum1 += v[jrow]*x[4*idx[jrow]];
524bcc973b7SBarry Smith       sum2 += v[jrow]*x[4*idx[jrow]+1];
525bcc973b7SBarry Smith       sum3 += v[jrow]*x[4*idx[jrow]+2];
526bcc973b7SBarry Smith       sum4 += v[jrow]*x[4*idx[jrow]+3];
527bcc973b7SBarry Smith       jrow++;
528bcc973b7SBarry Smith     }
529bcc973b7SBarry Smith     y[4*i]   = sum1;
530bcc973b7SBarry Smith     y[4*i+1] = sum2;
531bcc973b7SBarry Smith     y[4*i+2] = sum3;
532bcc973b7SBarry Smith     y[4*i+3] = sum4;
533bcc973b7SBarry Smith   }
534bcc973b7SBarry Smith 
535dc0b31edSSatish Balay   ierr = PetscLogFlops(8.0*a->nz - 4.0*nonzerorow);CHKERRQ(ierr);
5363649974fSBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
5371ebc52fbSHong Zhang   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
538bcc973b7SBarry Smith   PetscFunctionReturn(0);
539bcc973b7SBarry Smith }
540bcc973b7SBarry Smith 
541dfbe8321SBarry Smith PetscErrorCode MatMultTranspose_SeqMAIJ_4(Mat A,Vec xx,Vec yy)
542bcc973b7SBarry Smith {
5434cb9d9b8SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
544bcc973b7SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
545d2840e09SBarry Smith   const PetscScalar *x,*v;
546d2840e09SBarry Smith   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4;
547dfbe8321SBarry Smith   PetscErrorCode    ierr;
548d2840e09SBarry Smith   PetscInt          n,i;
549d2840e09SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx;
550bcc973b7SBarry Smith 
551bcc973b7SBarry Smith   PetscFunctionBegin;
552d2840e09SBarry Smith   ierr = VecSet(yy,0.0);CHKERRQ(ierr);
5533649974fSBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
5541ebc52fbSHong Zhang   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
555bcc973b7SBarry Smith   for (i=0; i<m; i++) {
556bfec09a0SHong Zhang     idx    = a->j + a->i[i];
557bfec09a0SHong Zhang     v      = a->a + a->i[i];
558bcc973b7SBarry Smith     n      = a->i[i+1] - a->i[i];
559bcc973b7SBarry Smith     alpha1 = x[4*i];
560bcc973b7SBarry Smith     alpha2 = x[4*i+1];
561bcc973b7SBarry Smith     alpha3 = x[4*i+2];
562bcc973b7SBarry Smith     alpha4 = x[4*i+3];
563bcc973b7SBarry Smith     while (n-->0) {
564bcc973b7SBarry Smith       y[4*(*idx)]   += alpha1*(*v);
565bcc973b7SBarry Smith       y[4*(*idx)+1] += alpha2*(*v);
566bcc973b7SBarry Smith       y[4*(*idx)+2] += alpha3*(*v);
567bcc973b7SBarry Smith       y[4*(*idx)+3] += alpha4*(*v);
568bcc973b7SBarry Smith       idx++; v++;
569bcc973b7SBarry Smith     }
570bcc973b7SBarry Smith   }
571dc0b31edSSatish Balay   ierr = PetscLogFlops(8.0*a->nz);CHKERRQ(ierr);
5723649974fSBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
5731ebc52fbSHong Zhang   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
574bcc973b7SBarry Smith   PetscFunctionReturn(0);
575bcc973b7SBarry Smith }
576bcc973b7SBarry Smith 
577dfbe8321SBarry Smith PetscErrorCode MatMultAdd_SeqMAIJ_4(Mat A,Vec xx,Vec yy,Vec zz)
578bcc973b7SBarry Smith {
5794cb9d9b8SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
580f9fae5adSBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
581d2840e09SBarry Smith   const PetscScalar *x,*v;
582d2840e09SBarry Smith   PetscScalar       *y,sum1, sum2, sum3, sum4;
583dfbe8321SBarry Smith   PetscErrorCode    ierr;
584b24ad042SBarry Smith   PetscInt          n,i,jrow,j;
585d2840e09SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
586f9fae5adSBarry Smith 
587f9fae5adSBarry Smith   PetscFunctionBegin;
588f9fae5adSBarry Smith   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
5893649974fSBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
5901ebc52fbSHong Zhang   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
591f9fae5adSBarry Smith   idx  = a->j;
592f9fae5adSBarry Smith   v    = a->a;
593f9fae5adSBarry Smith   ii   = a->i;
594f9fae5adSBarry Smith 
595f9fae5adSBarry Smith   for (i=0; i<m; i++) {
596f9fae5adSBarry Smith     jrow = ii[i];
597f9fae5adSBarry Smith     n    = ii[i+1] - jrow;
598f9fae5adSBarry Smith     sum1 = 0.0;
599f9fae5adSBarry Smith     sum2 = 0.0;
600f9fae5adSBarry Smith     sum3 = 0.0;
601f9fae5adSBarry Smith     sum4 = 0.0;
602f9fae5adSBarry Smith     for (j=0; j<n; j++) {
603f9fae5adSBarry Smith       sum1 += v[jrow]*x[4*idx[jrow]];
604f9fae5adSBarry Smith       sum2 += v[jrow]*x[4*idx[jrow]+1];
605f9fae5adSBarry Smith       sum3 += v[jrow]*x[4*idx[jrow]+2];
606f9fae5adSBarry Smith       sum4 += v[jrow]*x[4*idx[jrow]+3];
607f9fae5adSBarry Smith       jrow++;
608f9fae5adSBarry Smith     }
609f9fae5adSBarry Smith     y[4*i]   += sum1;
610f9fae5adSBarry Smith     y[4*i+1] += sum2;
611f9fae5adSBarry Smith     y[4*i+2] += sum3;
612f9fae5adSBarry Smith     y[4*i+3] += sum4;
613f9fae5adSBarry Smith   }
614f9fae5adSBarry Smith 
615dc0b31edSSatish Balay   ierr = PetscLogFlops(8.0*a->nz);CHKERRQ(ierr);
6163649974fSBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
6171ebc52fbSHong Zhang   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
618f9fae5adSBarry Smith   PetscFunctionReturn(0);
619bcc973b7SBarry Smith }
620dfbe8321SBarry Smith PetscErrorCode MatMultTransposeAdd_SeqMAIJ_4(Mat A,Vec xx,Vec yy,Vec zz)
621bcc973b7SBarry Smith {
6224cb9d9b8SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
623f9fae5adSBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
624d2840e09SBarry Smith   const PetscScalar *x,*v;
625d2840e09SBarry Smith   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4;
626dfbe8321SBarry Smith   PetscErrorCode    ierr;
627d2840e09SBarry Smith   PetscInt          n,i;
628d2840e09SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx;
629f9fae5adSBarry Smith 
630f9fae5adSBarry Smith   PetscFunctionBegin;
631f9fae5adSBarry Smith   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
6323649974fSBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
6331ebc52fbSHong Zhang   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
634bfec09a0SHong Zhang 
635f9fae5adSBarry Smith   for (i=0; i<m; i++) {
636bfec09a0SHong Zhang     idx    = a->j + a->i[i];
637bfec09a0SHong Zhang     v      = a->a + a->i[i];
638f9fae5adSBarry Smith     n      = a->i[i+1] - a->i[i];
639f9fae5adSBarry Smith     alpha1 = x[4*i];
640f9fae5adSBarry Smith     alpha2 = x[4*i+1];
641f9fae5adSBarry Smith     alpha3 = x[4*i+2];
642f9fae5adSBarry Smith     alpha4 = x[4*i+3];
643f9fae5adSBarry Smith     while (n-->0) {
644f9fae5adSBarry Smith       y[4*(*idx)]   += alpha1*(*v);
645f9fae5adSBarry Smith       y[4*(*idx)+1] += alpha2*(*v);
646f9fae5adSBarry Smith       y[4*(*idx)+2] += alpha3*(*v);
647f9fae5adSBarry Smith       y[4*(*idx)+3] += alpha4*(*v);
648f9fae5adSBarry Smith       idx++; v++;
649f9fae5adSBarry Smith     }
650f9fae5adSBarry Smith   }
651dc0b31edSSatish Balay   ierr = PetscLogFlops(8.0*a->nz);CHKERRQ(ierr);
6523649974fSBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
6531ebc52fbSHong Zhang   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
654f9fae5adSBarry Smith   PetscFunctionReturn(0);
655f9fae5adSBarry Smith }
656f9fae5adSBarry Smith /* ------------------------------------------------------------------------------*/
6576cd98798SBarry Smith 
658dfbe8321SBarry Smith PetscErrorCode MatMult_SeqMAIJ_5(Mat A,Vec xx,Vec yy)
659f9fae5adSBarry Smith {
6604cb9d9b8SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
661f9fae5adSBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
662d2840e09SBarry Smith   const PetscScalar *x,*v;
663d2840e09SBarry Smith   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5;
664dfbe8321SBarry Smith   PetscErrorCode    ierr;
665d2840e09SBarry Smith   PetscInt          nonzerorow=0,n,i,jrow,j;
666d2840e09SBarry Smith   const PetscInt    m         = b->AIJ->rmap->n,*idx,*ii;
667f9fae5adSBarry Smith 
668f9fae5adSBarry Smith   PetscFunctionBegin;
6693649974fSBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
6701ebc52fbSHong Zhang   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
671f9fae5adSBarry Smith   idx  = a->j;
672f9fae5adSBarry Smith   v    = a->a;
673f9fae5adSBarry Smith   ii   = a->i;
674f9fae5adSBarry Smith 
675f9fae5adSBarry Smith   for (i=0; i<m; i++) {
676f9fae5adSBarry Smith     jrow  = ii[i];
677f9fae5adSBarry Smith     n     = ii[i+1] - jrow;
678f9fae5adSBarry Smith     sum1  = 0.0;
679f9fae5adSBarry Smith     sum2  = 0.0;
680f9fae5adSBarry Smith     sum3  = 0.0;
681f9fae5adSBarry Smith     sum4  = 0.0;
682f9fae5adSBarry Smith     sum5  = 0.0;
68326fbe8dcSKarl Rupp 
684b3c51c66SMatthew Knepley     nonzerorow += (n>0);
685f9fae5adSBarry Smith     for (j=0; j<n; j++) {
686f9fae5adSBarry Smith       sum1 += v[jrow]*x[5*idx[jrow]];
687f9fae5adSBarry Smith       sum2 += v[jrow]*x[5*idx[jrow]+1];
688f9fae5adSBarry Smith       sum3 += v[jrow]*x[5*idx[jrow]+2];
689f9fae5adSBarry Smith       sum4 += v[jrow]*x[5*idx[jrow]+3];
690f9fae5adSBarry Smith       sum5 += v[jrow]*x[5*idx[jrow]+4];
691f9fae5adSBarry Smith       jrow++;
692f9fae5adSBarry Smith     }
693f9fae5adSBarry Smith     y[5*i]   = sum1;
694f9fae5adSBarry Smith     y[5*i+1] = sum2;
695f9fae5adSBarry Smith     y[5*i+2] = sum3;
696f9fae5adSBarry Smith     y[5*i+3] = sum4;
697f9fae5adSBarry Smith     y[5*i+4] = sum5;
698f9fae5adSBarry Smith   }
699f9fae5adSBarry Smith 
700dc0b31edSSatish Balay   ierr = PetscLogFlops(10.0*a->nz - 5.0*nonzerorow);CHKERRQ(ierr);
7013649974fSBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
7021ebc52fbSHong Zhang   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
703f9fae5adSBarry Smith   PetscFunctionReturn(0);
704f9fae5adSBarry Smith }
705f9fae5adSBarry Smith 
706dfbe8321SBarry Smith PetscErrorCode MatMultTranspose_SeqMAIJ_5(Mat A,Vec xx,Vec yy)
707f9fae5adSBarry Smith {
7084cb9d9b8SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
709f9fae5adSBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
710d2840e09SBarry Smith   const PetscScalar *x,*v;
711d2840e09SBarry Smith   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5;
712dfbe8321SBarry Smith   PetscErrorCode    ierr;
713d2840e09SBarry Smith   PetscInt          n,i;
714d2840e09SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx;
715f9fae5adSBarry Smith 
716f9fae5adSBarry Smith   PetscFunctionBegin;
717d2840e09SBarry Smith   ierr = VecSet(yy,0.0);CHKERRQ(ierr);
7183649974fSBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
7191ebc52fbSHong Zhang   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
720bfec09a0SHong Zhang 
721f9fae5adSBarry Smith   for (i=0; i<m; i++) {
722bfec09a0SHong Zhang     idx    = a->j + a->i[i];
723bfec09a0SHong Zhang     v      = a->a + a->i[i];
724f9fae5adSBarry Smith     n      = a->i[i+1] - a->i[i];
725f9fae5adSBarry Smith     alpha1 = x[5*i];
726f9fae5adSBarry Smith     alpha2 = x[5*i+1];
727f9fae5adSBarry Smith     alpha3 = x[5*i+2];
728f9fae5adSBarry Smith     alpha4 = x[5*i+3];
729f9fae5adSBarry Smith     alpha5 = x[5*i+4];
730f9fae5adSBarry Smith     while (n-->0) {
731f9fae5adSBarry Smith       y[5*(*idx)]   += alpha1*(*v);
732f9fae5adSBarry Smith       y[5*(*idx)+1] += alpha2*(*v);
733f9fae5adSBarry Smith       y[5*(*idx)+2] += alpha3*(*v);
734f9fae5adSBarry Smith       y[5*(*idx)+3] += alpha4*(*v);
735f9fae5adSBarry Smith       y[5*(*idx)+4] += alpha5*(*v);
736f9fae5adSBarry Smith       idx++; v++;
737f9fae5adSBarry Smith     }
738f9fae5adSBarry Smith   }
739dc0b31edSSatish Balay   ierr = PetscLogFlops(10.0*a->nz);CHKERRQ(ierr);
7403649974fSBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
7411ebc52fbSHong Zhang   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
742f9fae5adSBarry Smith   PetscFunctionReturn(0);
743f9fae5adSBarry Smith }
744f9fae5adSBarry Smith 
745dfbe8321SBarry Smith PetscErrorCode MatMultAdd_SeqMAIJ_5(Mat A,Vec xx,Vec yy,Vec zz)
746f9fae5adSBarry Smith {
7474cb9d9b8SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
748f9fae5adSBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
749d2840e09SBarry Smith   const PetscScalar *x,*v;
750d2840e09SBarry Smith   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5;
751dfbe8321SBarry Smith   PetscErrorCode    ierr;
752b24ad042SBarry Smith   PetscInt          n,i,jrow,j;
753d2840e09SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
754f9fae5adSBarry Smith 
755f9fae5adSBarry Smith   PetscFunctionBegin;
756f9fae5adSBarry Smith   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
7573649974fSBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
7581ebc52fbSHong Zhang   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
759f9fae5adSBarry Smith   idx  = a->j;
760f9fae5adSBarry Smith   v    = a->a;
761f9fae5adSBarry Smith   ii   = a->i;
762f9fae5adSBarry Smith 
763f9fae5adSBarry Smith   for (i=0; i<m; i++) {
764f9fae5adSBarry Smith     jrow = ii[i];
765f9fae5adSBarry Smith     n    = ii[i+1] - jrow;
766f9fae5adSBarry Smith     sum1 = 0.0;
767f9fae5adSBarry Smith     sum2 = 0.0;
768f9fae5adSBarry Smith     sum3 = 0.0;
769f9fae5adSBarry Smith     sum4 = 0.0;
770f9fae5adSBarry Smith     sum5 = 0.0;
771f9fae5adSBarry Smith     for (j=0; j<n; j++) {
772f9fae5adSBarry Smith       sum1 += v[jrow]*x[5*idx[jrow]];
773f9fae5adSBarry Smith       sum2 += v[jrow]*x[5*idx[jrow]+1];
774f9fae5adSBarry Smith       sum3 += v[jrow]*x[5*idx[jrow]+2];
775f9fae5adSBarry Smith       sum4 += v[jrow]*x[5*idx[jrow]+3];
776f9fae5adSBarry Smith       sum5 += v[jrow]*x[5*idx[jrow]+4];
777f9fae5adSBarry Smith       jrow++;
778f9fae5adSBarry Smith     }
779f9fae5adSBarry Smith     y[5*i]   += sum1;
780f9fae5adSBarry Smith     y[5*i+1] += sum2;
781f9fae5adSBarry Smith     y[5*i+2] += sum3;
782f9fae5adSBarry Smith     y[5*i+3] += sum4;
783f9fae5adSBarry Smith     y[5*i+4] += sum5;
784f9fae5adSBarry Smith   }
785f9fae5adSBarry Smith 
786dc0b31edSSatish Balay   ierr = PetscLogFlops(10.0*a->nz);CHKERRQ(ierr);
7873649974fSBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
7881ebc52fbSHong Zhang   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
789f9fae5adSBarry Smith   PetscFunctionReturn(0);
790f9fae5adSBarry Smith }
791f9fae5adSBarry Smith 
792dfbe8321SBarry Smith PetscErrorCode MatMultTransposeAdd_SeqMAIJ_5(Mat A,Vec xx,Vec yy,Vec zz)
793f9fae5adSBarry Smith {
7944cb9d9b8SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
795f9fae5adSBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
796d2840e09SBarry Smith   const PetscScalar *x,*v;
797d2840e09SBarry Smith   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5;
798dfbe8321SBarry Smith   PetscErrorCode    ierr;
799d2840e09SBarry Smith   PetscInt          n,i;
800d2840e09SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx;
801f9fae5adSBarry Smith 
802f9fae5adSBarry Smith   PetscFunctionBegin;
803f9fae5adSBarry Smith   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
8043649974fSBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
8051ebc52fbSHong Zhang   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
806bfec09a0SHong Zhang 
807f9fae5adSBarry Smith   for (i=0; i<m; i++) {
808bfec09a0SHong Zhang     idx    = a->j + a->i[i];
809bfec09a0SHong Zhang     v      = a->a + a->i[i];
810f9fae5adSBarry Smith     n      = a->i[i+1] - a->i[i];
811f9fae5adSBarry Smith     alpha1 = x[5*i];
812f9fae5adSBarry Smith     alpha2 = x[5*i+1];
813f9fae5adSBarry Smith     alpha3 = x[5*i+2];
814f9fae5adSBarry Smith     alpha4 = x[5*i+3];
815f9fae5adSBarry Smith     alpha5 = x[5*i+4];
816f9fae5adSBarry Smith     while (n-->0) {
817f9fae5adSBarry Smith       y[5*(*idx)]   += alpha1*(*v);
818f9fae5adSBarry Smith       y[5*(*idx)+1] += alpha2*(*v);
819f9fae5adSBarry Smith       y[5*(*idx)+2] += alpha3*(*v);
820f9fae5adSBarry Smith       y[5*(*idx)+3] += alpha4*(*v);
821f9fae5adSBarry Smith       y[5*(*idx)+4] += alpha5*(*v);
822f9fae5adSBarry Smith       idx++; v++;
823f9fae5adSBarry Smith     }
824f9fae5adSBarry Smith   }
825dc0b31edSSatish Balay   ierr = PetscLogFlops(10.0*a->nz);CHKERRQ(ierr);
8263649974fSBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
8271ebc52fbSHong Zhang   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
828f9fae5adSBarry Smith   PetscFunctionReturn(0);
829bcc973b7SBarry Smith }
830bcc973b7SBarry Smith 
8316cd98798SBarry Smith /* ------------------------------------------------------------------------------*/
832dfbe8321SBarry Smith PetscErrorCode MatMult_SeqMAIJ_6(Mat A,Vec xx,Vec yy)
8336cd98798SBarry Smith {
8346cd98798SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
8356cd98798SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
836d2840e09SBarry Smith   const PetscScalar *x,*v;
837d2840e09SBarry Smith   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6;
838dfbe8321SBarry Smith   PetscErrorCode    ierr;
839d2840e09SBarry Smith   PetscInt          nonzerorow=0,n,i,jrow,j;
840d2840e09SBarry Smith   const PetscInt    m         = b->AIJ->rmap->n,*idx,*ii;
8416cd98798SBarry Smith 
8426cd98798SBarry Smith   PetscFunctionBegin;
8433649974fSBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
8441ebc52fbSHong Zhang   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
8456cd98798SBarry Smith   idx  = a->j;
8466cd98798SBarry Smith   v    = a->a;
8476cd98798SBarry Smith   ii   = a->i;
8486cd98798SBarry Smith 
8496cd98798SBarry Smith   for (i=0; i<m; i++) {
8506cd98798SBarry Smith     jrow  = ii[i];
8516cd98798SBarry Smith     n     = ii[i+1] - jrow;
8526cd98798SBarry Smith     sum1  = 0.0;
8536cd98798SBarry Smith     sum2  = 0.0;
8546cd98798SBarry Smith     sum3  = 0.0;
8556cd98798SBarry Smith     sum4  = 0.0;
8566cd98798SBarry Smith     sum5  = 0.0;
8576cd98798SBarry Smith     sum6  = 0.0;
85826fbe8dcSKarl Rupp 
859b3c51c66SMatthew Knepley     nonzerorow += (n>0);
8606cd98798SBarry Smith     for (j=0; j<n; j++) {
8616cd98798SBarry Smith       sum1 += v[jrow]*x[6*idx[jrow]];
8626cd98798SBarry Smith       sum2 += v[jrow]*x[6*idx[jrow]+1];
8636cd98798SBarry Smith       sum3 += v[jrow]*x[6*idx[jrow]+2];
8646cd98798SBarry Smith       sum4 += v[jrow]*x[6*idx[jrow]+3];
8656cd98798SBarry Smith       sum5 += v[jrow]*x[6*idx[jrow]+4];
8666cd98798SBarry Smith       sum6 += v[jrow]*x[6*idx[jrow]+5];
8676cd98798SBarry Smith       jrow++;
8686cd98798SBarry Smith     }
8696cd98798SBarry Smith     y[6*i]   = sum1;
8706cd98798SBarry Smith     y[6*i+1] = sum2;
8716cd98798SBarry Smith     y[6*i+2] = sum3;
8726cd98798SBarry Smith     y[6*i+3] = sum4;
8736cd98798SBarry Smith     y[6*i+4] = sum5;
8746cd98798SBarry Smith     y[6*i+5] = sum6;
8756cd98798SBarry Smith   }
8766cd98798SBarry Smith 
877dc0b31edSSatish Balay   ierr = PetscLogFlops(12.0*a->nz - 6.0*nonzerorow);CHKERRQ(ierr);
8783649974fSBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
8791ebc52fbSHong Zhang   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
8806cd98798SBarry Smith   PetscFunctionReturn(0);
8816cd98798SBarry Smith }
8826cd98798SBarry Smith 
883dfbe8321SBarry Smith PetscErrorCode MatMultTranspose_SeqMAIJ_6(Mat A,Vec xx,Vec yy)
8846cd98798SBarry Smith {
8856cd98798SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
8866cd98798SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
887d2840e09SBarry Smith   const PetscScalar *x,*v;
888d2840e09SBarry Smith   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6;
889dfbe8321SBarry Smith   PetscErrorCode    ierr;
890d2840e09SBarry Smith   PetscInt          n,i;
891d2840e09SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx;
8926cd98798SBarry Smith 
8936cd98798SBarry Smith   PetscFunctionBegin;
894d2840e09SBarry Smith   ierr = VecSet(yy,0.0);CHKERRQ(ierr);
8953649974fSBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
8961ebc52fbSHong Zhang   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
897bfec09a0SHong Zhang 
8986cd98798SBarry Smith   for (i=0; i<m; i++) {
899bfec09a0SHong Zhang     idx    = a->j + a->i[i];
900bfec09a0SHong Zhang     v      = a->a + a->i[i];
9016cd98798SBarry Smith     n      = a->i[i+1] - a->i[i];
9026cd98798SBarry Smith     alpha1 = x[6*i];
9036cd98798SBarry Smith     alpha2 = x[6*i+1];
9046cd98798SBarry Smith     alpha3 = x[6*i+2];
9056cd98798SBarry Smith     alpha4 = x[6*i+3];
9066cd98798SBarry Smith     alpha5 = x[6*i+4];
9076cd98798SBarry Smith     alpha6 = x[6*i+5];
9086cd98798SBarry Smith     while (n-->0) {
9096cd98798SBarry Smith       y[6*(*idx)]   += alpha1*(*v);
9106cd98798SBarry Smith       y[6*(*idx)+1] += alpha2*(*v);
9116cd98798SBarry Smith       y[6*(*idx)+2] += alpha3*(*v);
9126cd98798SBarry Smith       y[6*(*idx)+3] += alpha4*(*v);
9136cd98798SBarry Smith       y[6*(*idx)+4] += alpha5*(*v);
9146cd98798SBarry Smith       y[6*(*idx)+5] += alpha6*(*v);
9156cd98798SBarry Smith       idx++; v++;
9166cd98798SBarry Smith     }
9176cd98798SBarry Smith   }
918dc0b31edSSatish Balay   ierr = PetscLogFlops(12.0*a->nz);CHKERRQ(ierr);
9193649974fSBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
9201ebc52fbSHong Zhang   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
9216cd98798SBarry Smith   PetscFunctionReturn(0);
9226cd98798SBarry Smith }
9236cd98798SBarry Smith 
924dfbe8321SBarry Smith PetscErrorCode MatMultAdd_SeqMAIJ_6(Mat A,Vec xx,Vec yy,Vec zz)
9256cd98798SBarry Smith {
9266cd98798SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
9276cd98798SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
928d2840e09SBarry Smith   const PetscScalar *x,*v;
929d2840e09SBarry Smith   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6;
930dfbe8321SBarry Smith   PetscErrorCode    ierr;
931b24ad042SBarry Smith   PetscInt          n,i,jrow,j;
932d2840e09SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
9336cd98798SBarry Smith 
9346cd98798SBarry Smith   PetscFunctionBegin;
9356cd98798SBarry Smith   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
9363649974fSBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
9371ebc52fbSHong Zhang   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
9386cd98798SBarry Smith   idx  = a->j;
9396cd98798SBarry Smith   v    = a->a;
9406cd98798SBarry Smith   ii   = a->i;
9416cd98798SBarry Smith 
9426cd98798SBarry Smith   for (i=0; i<m; i++) {
9436cd98798SBarry Smith     jrow = ii[i];
9446cd98798SBarry Smith     n    = ii[i+1] - jrow;
9456cd98798SBarry Smith     sum1 = 0.0;
9466cd98798SBarry Smith     sum2 = 0.0;
9476cd98798SBarry Smith     sum3 = 0.0;
9486cd98798SBarry Smith     sum4 = 0.0;
9496cd98798SBarry Smith     sum5 = 0.0;
9506cd98798SBarry Smith     sum6 = 0.0;
9516cd98798SBarry Smith     for (j=0; j<n; j++) {
9526cd98798SBarry Smith       sum1 += v[jrow]*x[6*idx[jrow]];
9536cd98798SBarry Smith       sum2 += v[jrow]*x[6*idx[jrow]+1];
9546cd98798SBarry Smith       sum3 += v[jrow]*x[6*idx[jrow]+2];
9556cd98798SBarry Smith       sum4 += v[jrow]*x[6*idx[jrow]+3];
9566cd98798SBarry Smith       sum5 += v[jrow]*x[6*idx[jrow]+4];
9576cd98798SBarry Smith       sum6 += v[jrow]*x[6*idx[jrow]+5];
9586cd98798SBarry Smith       jrow++;
9596cd98798SBarry Smith     }
9606cd98798SBarry Smith     y[6*i]   += sum1;
9616cd98798SBarry Smith     y[6*i+1] += sum2;
9626cd98798SBarry Smith     y[6*i+2] += sum3;
9636cd98798SBarry Smith     y[6*i+3] += sum4;
9646cd98798SBarry Smith     y[6*i+4] += sum5;
9656cd98798SBarry Smith     y[6*i+5] += sum6;
9666cd98798SBarry Smith   }
9676cd98798SBarry Smith 
968dc0b31edSSatish Balay   ierr = PetscLogFlops(12.0*a->nz);CHKERRQ(ierr);
9693649974fSBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
9701ebc52fbSHong Zhang   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
9716cd98798SBarry Smith   PetscFunctionReturn(0);
9726cd98798SBarry Smith }
9736cd98798SBarry Smith 
974dfbe8321SBarry Smith PetscErrorCode MatMultTransposeAdd_SeqMAIJ_6(Mat A,Vec xx,Vec yy,Vec zz)
9756cd98798SBarry Smith {
9766cd98798SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
9776cd98798SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
978d2840e09SBarry Smith   const PetscScalar *x,*v;
979d2840e09SBarry Smith   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6;
980dfbe8321SBarry Smith   PetscErrorCode    ierr;
981d2840e09SBarry Smith   PetscInt          n,i;
982d2840e09SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx;
9836cd98798SBarry Smith 
9846cd98798SBarry Smith   PetscFunctionBegin;
9856cd98798SBarry Smith   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
9863649974fSBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
9871ebc52fbSHong Zhang   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
988bfec09a0SHong Zhang 
9896cd98798SBarry Smith   for (i=0; i<m; i++) {
990bfec09a0SHong Zhang     idx    = a->j + a->i[i];
991bfec09a0SHong Zhang     v      = a->a + a->i[i];
9926cd98798SBarry Smith     n      = a->i[i+1] - a->i[i];
9936cd98798SBarry Smith     alpha1 = x[6*i];
9946cd98798SBarry Smith     alpha2 = x[6*i+1];
9956cd98798SBarry Smith     alpha3 = x[6*i+2];
9966cd98798SBarry Smith     alpha4 = x[6*i+3];
9976cd98798SBarry Smith     alpha5 = x[6*i+4];
9986cd98798SBarry Smith     alpha6 = x[6*i+5];
9996cd98798SBarry Smith     while (n-->0) {
10006cd98798SBarry Smith       y[6*(*idx)]   += alpha1*(*v);
10016cd98798SBarry Smith       y[6*(*idx)+1] += alpha2*(*v);
10026cd98798SBarry Smith       y[6*(*idx)+2] += alpha3*(*v);
10036cd98798SBarry Smith       y[6*(*idx)+3] += alpha4*(*v);
10046cd98798SBarry Smith       y[6*(*idx)+4] += alpha5*(*v);
10056cd98798SBarry Smith       y[6*(*idx)+5] += alpha6*(*v);
10066cd98798SBarry Smith       idx++; v++;
10076cd98798SBarry Smith     }
10086cd98798SBarry Smith   }
1009dc0b31edSSatish Balay   ierr = PetscLogFlops(12.0*a->nz);CHKERRQ(ierr);
10103649974fSBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
10111ebc52fbSHong Zhang   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
10126cd98798SBarry Smith   PetscFunctionReturn(0);
10136cd98798SBarry Smith }
10146cd98798SBarry Smith 
101566ed3db0SBarry Smith /* ------------------------------------------------------------------------------*/
1016ed8eea03SBarry Smith PetscErrorCode MatMult_SeqMAIJ_7(Mat A,Vec xx,Vec yy)
1017ed8eea03SBarry Smith {
1018ed8eea03SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1019ed8eea03SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1020d2840e09SBarry Smith   const PetscScalar *x,*v;
1021d2840e09SBarry Smith   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7;
1022ed8eea03SBarry Smith   PetscErrorCode    ierr;
1023d2840e09SBarry Smith   const PetscInt    m         = b->AIJ->rmap->n,*idx,*ii;
1024d2840e09SBarry Smith   PetscInt          nonzerorow=0,n,i,jrow,j;
1025ed8eea03SBarry Smith 
1026ed8eea03SBarry Smith   PetscFunctionBegin;
10273649974fSBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
1028ed8eea03SBarry Smith   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
1029ed8eea03SBarry Smith   idx  = a->j;
1030ed8eea03SBarry Smith   v    = a->a;
1031ed8eea03SBarry Smith   ii   = a->i;
1032ed8eea03SBarry Smith 
1033ed8eea03SBarry Smith   for (i=0; i<m; i++) {
1034ed8eea03SBarry Smith     jrow  = ii[i];
1035ed8eea03SBarry Smith     n     = ii[i+1] - jrow;
1036ed8eea03SBarry Smith     sum1  = 0.0;
1037ed8eea03SBarry Smith     sum2  = 0.0;
1038ed8eea03SBarry Smith     sum3  = 0.0;
1039ed8eea03SBarry Smith     sum4  = 0.0;
1040ed8eea03SBarry Smith     sum5  = 0.0;
1041ed8eea03SBarry Smith     sum6  = 0.0;
1042ed8eea03SBarry Smith     sum7  = 0.0;
104326fbe8dcSKarl Rupp 
1044b3c51c66SMatthew Knepley     nonzerorow += (n>0);
1045ed8eea03SBarry Smith     for (j=0; j<n; j++) {
1046ed8eea03SBarry Smith       sum1 += v[jrow]*x[7*idx[jrow]];
1047ed8eea03SBarry Smith       sum2 += v[jrow]*x[7*idx[jrow]+1];
1048ed8eea03SBarry Smith       sum3 += v[jrow]*x[7*idx[jrow]+2];
1049ed8eea03SBarry Smith       sum4 += v[jrow]*x[7*idx[jrow]+3];
1050ed8eea03SBarry Smith       sum5 += v[jrow]*x[7*idx[jrow]+4];
1051ed8eea03SBarry Smith       sum6 += v[jrow]*x[7*idx[jrow]+5];
1052ed8eea03SBarry Smith       sum7 += v[jrow]*x[7*idx[jrow]+6];
1053ed8eea03SBarry Smith       jrow++;
1054ed8eea03SBarry Smith     }
1055ed8eea03SBarry Smith     y[7*i]   = sum1;
1056ed8eea03SBarry Smith     y[7*i+1] = sum2;
1057ed8eea03SBarry Smith     y[7*i+2] = sum3;
1058ed8eea03SBarry Smith     y[7*i+3] = sum4;
1059ed8eea03SBarry Smith     y[7*i+4] = sum5;
1060ed8eea03SBarry Smith     y[7*i+5] = sum6;
1061ed8eea03SBarry Smith     y[7*i+6] = sum7;
1062ed8eea03SBarry Smith   }
1063ed8eea03SBarry Smith 
1064dc0b31edSSatish Balay   ierr = PetscLogFlops(14.0*a->nz - 7.0*nonzerorow);CHKERRQ(ierr);
10653649974fSBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
1066ed8eea03SBarry Smith   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
1067ed8eea03SBarry Smith   PetscFunctionReturn(0);
1068ed8eea03SBarry Smith }
1069ed8eea03SBarry Smith 
1070ed8eea03SBarry Smith PetscErrorCode MatMultTranspose_SeqMAIJ_7(Mat A,Vec xx,Vec yy)
1071ed8eea03SBarry Smith {
1072ed8eea03SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1073ed8eea03SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1074d2840e09SBarry Smith   const PetscScalar *x,*v;
1075d2840e09SBarry Smith   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7;
1076ed8eea03SBarry Smith   PetscErrorCode    ierr;
1077d2840e09SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx;
1078d2840e09SBarry Smith   PetscInt          n,i;
1079ed8eea03SBarry Smith 
1080ed8eea03SBarry Smith   PetscFunctionBegin;
1081d2840e09SBarry Smith   ierr = VecSet(yy,0.0);CHKERRQ(ierr);
10823649974fSBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
1083ed8eea03SBarry Smith   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
1084ed8eea03SBarry Smith 
1085ed8eea03SBarry Smith   for (i=0; i<m; i++) {
1086ed8eea03SBarry Smith     idx    = a->j + a->i[i];
1087ed8eea03SBarry Smith     v      = a->a + a->i[i];
1088ed8eea03SBarry Smith     n      = a->i[i+1] - a->i[i];
1089ed8eea03SBarry Smith     alpha1 = x[7*i];
1090ed8eea03SBarry Smith     alpha2 = x[7*i+1];
1091ed8eea03SBarry Smith     alpha3 = x[7*i+2];
1092ed8eea03SBarry Smith     alpha4 = x[7*i+3];
1093ed8eea03SBarry Smith     alpha5 = x[7*i+4];
1094ed8eea03SBarry Smith     alpha6 = x[7*i+5];
1095ed8eea03SBarry Smith     alpha7 = x[7*i+6];
1096ed8eea03SBarry Smith     while (n-->0) {
1097ed8eea03SBarry Smith       y[7*(*idx)]   += alpha1*(*v);
1098ed8eea03SBarry Smith       y[7*(*idx)+1] += alpha2*(*v);
1099ed8eea03SBarry Smith       y[7*(*idx)+2] += alpha3*(*v);
1100ed8eea03SBarry Smith       y[7*(*idx)+3] += alpha4*(*v);
1101ed8eea03SBarry Smith       y[7*(*idx)+4] += alpha5*(*v);
1102ed8eea03SBarry Smith       y[7*(*idx)+5] += alpha6*(*v);
1103ed8eea03SBarry Smith       y[7*(*idx)+6] += alpha7*(*v);
1104ed8eea03SBarry Smith       idx++; v++;
1105ed8eea03SBarry Smith     }
1106ed8eea03SBarry Smith   }
1107dc0b31edSSatish Balay   ierr = PetscLogFlops(14.0*a->nz);CHKERRQ(ierr);
11083649974fSBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
1109ed8eea03SBarry Smith   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
1110ed8eea03SBarry Smith   PetscFunctionReturn(0);
1111ed8eea03SBarry Smith }
1112ed8eea03SBarry Smith 
1113ed8eea03SBarry Smith PetscErrorCode MatMultAdd_SeqMAIJ_7(Mat A,Vec xx,Vec yy,Vec zz)
1114ed8eea03SBarry Smith {
1115ed8eea03SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1116ed8eea03SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1117d2840e09SBarry Smith   const PetscScalar *x,*v;
1118d2840e09SBarry Smith   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7;
1119ed8eea03SBarry Smith   PetscErrorCode    ierr;
1120d2840e09SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
1121ed8eea03SBarry Smith   PetscInt          n,i,jrow,j;
1122ed8eea03SBarry Smith 
1123ed8eea03SBarry Smith   PetscFunctionBegin;
1124ed8eea03SBarry Smith   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
11253649974fSBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
1126ed8eea03SBarry Smith   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
1127ed8eea03SBarry Smith   idx  = a->j;
1128ed8eea03SBarry Smith   v    = a->a;
1129ed8eea03SBarry Smith   ii   = a->i;
1130ed8eea03SBarry Smith 
1131ed8eea03SBarry Smith   for (i=0; i<m; i++) {
1132ed8eea03SBarry Smith     jrow = ii[i];
1133ed8eea03SBarry Smith     n    = ii[i+1] - jrow;
1134ed8eea03SBarry Smith     sum1 = 0.0;
1135ed8eea03SBarry Smith     sum2 = 0.0;
1136ed8eea03SBarry Smith     sum3 = 0.0;
1137ed8eea03SBarry Smith     sum4 = 0.0;
1138ed8eea03SBarry Smith     sum5 = 0.0;
1139ed8eea03SBarry Smith     sum6 = 0.0;
1140ed8eea03SBarry Smith     sum7 = 0.0;
1141ed8eea03SBarry Smith     for (j=0; j<n; j++) {
1142ed8eea03SBarry Smith       sum1 += v[jrow]*x[7*idx[jrow]];
1143ed8eea03SBarry Smith       sum2 += v[jrow]*x[7*idx[jrow]+1];
1144ed8eea03SBarry Smith       sum3 += v[jrow]*x[7*idx[jrow]+2];
1145ed8eea03SBarry Smith       sum4 += v[jrow]*x[7*idx[jrow]+3];
1146ed8eea03SBarry Smith       sum5 += v[jrow]*x[7*idx[jrow]+4];
1147ed8eea03SBarry Smith       sum6 += v[jrow]*x[7*idx[jrow]+5];
1148ed8eea03SBarry Smith       sum7 += v[jrow]*x[7*idx[jrow]+6];
1149ed8eea03SBarry Smith       jrow++;
1150ed8eea03SBarry Smith     }
1151ed8eea03SBarry Smith     y[7*i]   += sum1;
1152ed8eea03SBarry Smith     y[7*i+1] += sum2;
1153ed8eea03SBarry Smith     y[7*i+2] += sum3;
1154ed8eea03SBarry Smith     y[7*i+3] += sum4;
1155ed8eea03SBarry Smith     y[7*i+4] += sum5;
1156ed8eea03SBarry Smith     y[7*i+5] += sum6;
1157ed8eea03SBarry Smith     y[7*i+6] += sum7;
1158ed8eea03SBarry Smith   }
1159ed8eea03SBarry Smith 
1160dc0b31edSSatish Balay   ierr = PetscLogFlops(14.0*a->nz);CHKERRQ(ierr);
11613649974fSBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
1162ed8eea03SBarry Smith   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
1163ed8eea03SBarry Smith   PetscFunctionReturn(0);
1164ed8eea03SBarry Smith }
1165ed8eea03SBarry Smith 
1166ed8eea03SBarry Smith PetscErrorCode MatMultTransposeAdd_SeqMAIJ_7(Mat A,Vec xx,Vec yy,Vec zz)
1167ed8eea03SBarry Smith {
1168ed8eea03SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1169ed8eea03SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1170d2840e09SBarry Smith   const PetscScalar *x,*v;
1171d2840e09SBarry Smith   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7;
1172ed8eea03SBarry Smith   PetscErrorCode    ierr;
1173d2840e09SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx;
1174d2840e09SBarry Smith   PetscInt          n,i;
1175ed8eea03SBarry Smith 
1176ed8eea03SBarry Smith   PetscFunctionBegin;
1177ed8eea03SBarry Smith   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
11783649974fSBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
1179ed8eea03SBarry Smith   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
1180ed8eea03SBarry Smith   for (i=0; i<m; i++) {
1181ed8eea03SBarry Smith     idx    = a->j + a->i[i];
1182ed8eea03SBarry Smith     v      = a->a + a->i[i];
1183ed8eea03SBarry Smith     n      = a->i[i+1] - a->i[i];
1184ed8eea03SBarry Smith     alpha1 = x[7*i];
1185ed8eea03SBarry Smith     alpha2 = x[7*i+1];
1186ed8eea03SBarry Smith     alpha3 = x[7*i+2];
1187ed8eea03SBarry Smith     alpha4 = x[7*i+3];
1188ed8eea03SBarry Smith     alpha5 = x[7*i+4];
1189ed8eea03SBarry Smith     alpha6 = x[7*i+5];
1190ed8eea03SBarry Smith     alpha7 = x[7*i+6];
1191ed8eea03SBarry Smith     while (n-->0) {
1192ed8eea03SBarry Smith       y[7*(*idx)]   += alpha1*(*v);
1193ed8eea03SBarry Smith       y[7*(*idx)+1] += alpha2*(*v);
1194ed8eea03SBarry Smith       y[7*(*idx)+2] += alpha3*(*v);
1195ed8eea03SBarry Smith       y[7*(*idx)+3] += alpha4*(*v);
1196ed8eea03SBarry Smith       y[7*(*idx)+4] += alpha5*(*v);
1197ed8eea03SBarry Smith       y[7*(*idx)+5] += alpha6*(*v);
1198ed8eea03SBarry Smith       y[7*(*idx)+6] += alpha7*(*v);
1199ed8eea03SBarry Smith       idx++; v++;
1200ed8eea03SBarry Smith     }
1201ed8eea03SBarry Smith   }
1202dc0b31edSSatish Balay   ierr = PetscLogFlops(14.0*a->nz);CHKERRQ(ierr);
12033649974fSBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
1204ed8eea03SBarry Smith   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
1205ed8eea03SBarry Smith   PetscFunctionReturn(0);
1206ed8eea03SBarry Smith }
1207ed8eea03SBarry Smith 
1208dfbe8321SBarry Smith PetscErrorCode MatMult_SeqMAIJ_8(Mat A,Vec xx,Vec yy)
120966ed3db0SBarry Smith {
121066ed3db0SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
121166ed3db0SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1212d2840e09SBarry Smith   const PetscScalar *x,*v;
1213d2840e09SBarry Smith   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8;
1214dfbe8321SBarry Smith   PetscErrorCode    ierr;
1215d2840e09SBarry Smith   const PetscInt    m         = b->AIJ->rmap->n,*idx,*ii;
1216d2840e09SBarry Smith   PetscInt          nonzerorow=0,n,i,jrow,j;
121766ed3db0SBarry Smith 
121866ed3db0SBarry Smith   PetscFunctionBegin;
12193649974fSBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
12201ebc52fbSHong Zhang   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
122166ed3db0SBarry Smith   idx  = a->j;
122266ed3db0SBarry Smith   v    = a->a;
122366ed3db0SBarry Smith   ii   = a->i;
122466ed3db0SBarry Smith 
122566ed3db0SBarry Smith   for (i=0; i<m; i++) {
122666ed3db0SBarry Smith     jrow  = ii[i];
122766ed3db0SBarry Smith     n     = ii[i+1] - jrow;
122866ed3db0SBarry Smith     sum1  = 0.0;
122966ed3db0SBarry Smith     sum2  = 0.0;
123066ed3db0SBarry Smith     sum3  = 0.0;
123166ed3db0SBarry Smith     sum4  = 0.0;
123266ed3db0SBarry Smith     sum5  = 0.0;
123366ed3db0SBarry Smith     sum6  = 0.0;
123466ed3db0SBarry Smith     sum7  = 0.0;
123566ed3db0SBarry Smith     sum8  = 0.0;
123626fbe8dcSKarl Rupp 
1237b3c51c66SMatthew Knepley     nonzerorow += (n>0);
123866ed3db0SBarry Smith     for (j=0; j<n; j++) {
123966ed3db0SBarry Smith       sum1 += v[jrow]*x[8*idx[jrow]];
124066ed3db0SBarry Smith       sum2 += v[jrow]*x[8*idx[jrow]+1];
124166ed3db0SBarry Smith       sum3 += v[jrow]*x[8*idx[jrow]+2];
124266ed3db0SBarry Smith       sum4 += v[jrow]*x[8*idx[jrow]+3];
124366ed3db0SBarry Smith       sum5 += v[jrow]*x[8*idx[jrow]+4];
124466ed3db0SBarry Smith       sum6 += v[jrow]*x[8*idx[jrow]+5];
124566ed3db0SBarry Smith       sum7 += v[jrow]*x[8*idx[jrow]+6];
124666ed3db0SBarry Smith       sum8 += v[jrow]*x[8*idx[jrow]+7];
124766ed3db0SBarry Smith       jrow++;
124866ed3db0SBarry Smith     }
124966ed3db0SBarry Smith     y[8*i]   = sum1;
125066ed3db0SBarry Smith     y[8*i+1] = sum2;
125166ed3db0SBarry Smith     y[8*i+2] = sum3;
125266ed3db0SBarry Smith     y[8*i+3] = sum4;
125366ed3db0SBarry Smith     y[8*i+4] = sum5;
125466ed3db0SBarry Smith     y[8*i+5] = sum6;
125566ed3db0SBarry Smith     y[8*i+6] = sum7;
125666ed3db0SBarry Smith     y[8*i+7] = sum8;
125766ed3db0SBarry Smith   }
125866ed3db0SBarry Smith 
1259dc0b31edSSatish Balay   ierr = PetscLogFlops(16.0*a->nz - 8.0*nonzerorow);CHKERRQ(ierr);
12603649974fSBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
12611ebc52fbSHong Zhang   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
126266ed3db0SBarry Smith   PetscFunctionReturn(0);
126366ed3db0SBarry Smith }
126466ed3db0SBarry Smith 
1265dfbe8321SBarry Smith PetscErrorCode MatMultTranspose_SeqMAIJ_8(Mat A,Vec xx,Vec yy)
126666ed3db0SBarry Smith {
126766ed3db0SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
126866ed3db0SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1269d2840e09SBarry Smith   const PetscScalar *x,*v;
1270d2840e09SBarry Smith   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8;
1271dfbe8321SBarry Smith   PetscErrorCode    ierr;
1272d2840e09SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx;
1273d2840e09SBarry Smith   PetscInt          n,i;
127466ed3db0SBarry Smith 
127566ed3db0SBarry Smith   PetscFunctionBegin;
1276d2840e09SBarry Smith   ierr = VecSet(yy,0.0);CHKERRQ(ierr);
12773649974fSBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
12781ebc52fbSHong Zhang   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
1279bfec09a0SHong Zhang 
128066ed3db0SBarry Smith   for (i=0; i<m; i++) {
1281bfec09a0SHong Zhang     idx    = a->j + a->i[i];
1282bfec09a0SHong Zhang     v      = a->a + a->i[i];
128366ed3db0SBarry Smith     n      = a->i[i+1] - a->i[i];
128466ed3db0SBarry Smith     alpha1 = x[8*i];
128566ed3db0SBarry Smith     alpha2 = x[8*i+1];
128666ed3db0SBarry Smith     alpha3 = x[8*i+2];
128766ed3db0SBarry Smith     alpha4 = x[8*i+3];
128866ed3db0SBarry Smith     alpha5 = x[8*i+4];
128966ed3db0SBarry Smith     alpha6 = x[8*i+5];
129066ed3db0SBarry Smith     alpha7 = x[8*i+6];
129166ed3db0SBarry Smith     alpha8 = x[8*i+7];
129266ed3db0SBarry Smith     while (n-->0) {
129366ed3db0SBarry Smith       y[8*(*idx)]   += alpha1*(*v);
129466ed3db0SBarry Smith       y[8*(*idx)+1] += alpha2*(*v);
129566ed3db0SBarry Smith       y[8*(*idx)+2] += alpha3*(*v);
129666ed3db0SBarry Smith       y[8*(*idx)+3] += alpha4*(*v);
129766ed3db0SBarry Smith       y[8*(*idx)+4] += alpha5*(*v);
129866ed3db0SBarry Smith       y[8*(*idx)+5] += alpha6*(*v);
129966ed3db0SBarry Smith       y[8*(*idx)+6] += alpha7*(*v);
130066ed3db0SBarry Smith       y[8*(*idx)+7] += alpha8*(*v);
130166ed3db0SBarry Smith       idx++; v++;
130266ed3db0SBarry Smith     }
130366ed3db0SBarry Smith   }
1304dc0b31edSSatish Balay   ierr = PetscLogFlops(16.0*a->nz);CHKERRQ(ierr);
13053649974fSBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
13061ebc52fbSHong Zhang   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
130766ed3db0SBarry Smith   PetscFunctionReturn(0);
130866ed3db0SBarry Smith }
130966ed3db0SBarry Smith 
1310dfbe8321SBarry Smith PetscErrorCode MatMultAdd_SeqMAIJ_8(Mat A,Vec xx,Vec yy,Vec zz)
131166ed3db0SBarry Smith {
131266ed3db0SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
131366ed3db0SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1314d2840e09SBarry Smith   const PetscScalar *x,*v;
1315d2840e09SBarry Smith   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8;
1316dfbe8321SBarry Smith   PetscErrorCode    ierr;
1317d2840e09SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
1318b24ad042SBarry Smith   PetscInt          n,i,jrow,j;
131966ed3db0SBarry Smith 
132066ed3db0SBarry Smith   PetscFunctionBegin;
132166ed3db0SBarry Smith   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
13223649974fSBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
13231ebc52fbSHong Zhang   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
132466ed3db0SBarry Smith   idx  = a->j;
132566ed3db0SBarry Smith   v    = a->a;
132666ed3db0SBarry Smith   ii   = a->i;
132766ed3db0SBarry Smith 
132866ed3db0SBarry Smith   for (i=0; i<m; i++) {
132966ed3db0SBarry Smith     jrow = ii[i];
133066ed3db0SBarry Smith     n    = ii[i+1] - jrow;
133166ed3db0SBarry Smith     sum1 = 0.0;
133266ed3db0SBarry Smith     sum2 = 0.0;
133366ed3db0SBarry Smith     sum3 = 0.0;
133466ed3db0SBarry Smith     sum4 = 0.0;
133566ed3db0SBarry Smith     sum5 = 0.0;
133666ed3db0SBarry Smith     sum6 = 0.0;
133766ed3db0SBarry Smith     sum7 = 0.0;
133866ed3db0SBarry Smith     sum8 = 0.0;
133966ed3db0SBarry Smith     for (j=0; j<n; j++) {
134066ed3db0SBarry Smith       sum1 += v[jrow]*x[8*idx[jrow]];
134166ed3db0SBarry Smith       sum2 += v[jrow]*x[8*idx[jrow]+1];
134266ed3db0SBarry Smith       sum3 += v[jrow]*x[8*idx[jrow]+2];
134366ed3db0SBarry Smith       sum4 += v[jrow]*x[8*idx[jrow]+3];
134466ed3db0SBarry Smith       sum5 += v[jrow]*x[8*idx[jrow]+4];
134566ed3db0SBarry Smith       sum6 += v[jrow]*x[8*idx[jrow]+5];
134666ed3db0SBarry Smith       sum7 += v[jrow]*x[8*idx[jrow]+6];
134766ed3db0SBarry Smith       sum8 += v[jrow]*x[8*idx[jrow]+7];
134866ed3db0SBarry Smith       jrow++;
134966ed3db0SBarry Smith     }
135066ed3db0SBarry Smith     y[8*i]   += sum1;
135166ed3db0SBarry Smith     y[8*i+1] += sum2;
135266ed3db0SBarry Smith     y[8*i+2] += sum3;
135366ed3db0SBarry Smith     y[8*i+3] += sum4;
135466ed3db0SBarry Smith     y[8*i+4] += sum5;
135566ed3db0SBarry Smith     y[8*i+5] += sum6;
135666ed3db0SBarry Smith     y[8*i+6] += sum7;
135766ed3db0SBarry Smith     y[8*i+7] += sum8;
135866ed3db0SBarry Smith   }
135966ed3db0SBarry Smith 
1360dc0b31edSSatish Balay   ierr = PetscLogFlops(16.0*a->nz);CHKERRQ(ierr);
13613649974fSBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
13621ebc52fbSHong Zhang   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
136366ed3db0SBarry Smith   PetscFunctionReturn(0);
136466ed3db0SBarry Smith }
136566ed3db0SBarry Smith 
1366dfbe8321SBarry Smith PetscErrorCode MatMultTransposeAdd_SeqMAIJ_8(Mat A,Vec xx,Vec yy,Vec zz)
136766ed3db0SBarry Smith {
136866ed3db0SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
136966ed3db0SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1370d2840e09SBarry Smith   const PetscScalar *x,*v;
1371d2840e09SBarry Smith   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8;
1372dfbe8321SBarry Smith   PetscErrorCode    ierr;
1373d2840e09SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx;
1374d2840e09SBarry Smith   PetscInt          n,i;
137566ed3db0SBarry Smith 
137666ed3db0SBarry Smith   PetscFunctionBegin;
137766ed3db0SBarry Smith   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
13783649974fSBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
13791ebc52fbSHong Zhang   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
138066ed3db0SBarry Smith   for (i=0; i<m; i++) {
1381bfec09a0SHong Zhang     idx    = a->j + a->i[i];
1382bfec09a0SHong Zhang     v      = a->a + a->i[i];
138366ed3db0SBarry Smith     n      = a->i[i+1] - a->i[i];
138466ed3db0SBarry Smith     alpha1 = x[8*i];
138566ed3db0SBarry Smith     alpha2 = x[8*i+1];
138666ed3db0SBarry Smith     alpha3 = x[8*i+2];
138766ed3db0SBarry Smith     alpha4 = x[8*i+3];
138866ed3db0SBarry Smith     alpha5 = x[8*i+4];
138966ed3db0SBarry Smith     alpha6 = x[8*i+5];
139066ed3db0SBarry Smith     alpha7 = x[8*i+6];
139166ed3db0SBarry Smith     alpha8 = x[8*i+7];
139266ed3db0SBarry Smith     while (n-->0) {
139366ed3db0SBarry Smith       y[8*(*idx)]   += alpha1*(*v);
139466ed3db0SBarry Smith       y[8*(*idx)+1] += alpha2*(*v);
139566ed3db0SBarry Smith       y[8*(*idx)+2] += alpha3*(*v);
139666ed3db0SBarry Smith       y[8*(*idx)+3] += alpha4*(*v);
139766ed3db0SBarry Smith       y[8*(*idx)+4] += alpha5*(*v);
139866ed3db0SBarry Smith       y[8*(*idx)+5] += alpha6*(*v);
139966ed3db0SBarry Smith       y[8*(*idx)+6] += alpha7*(*v);
140066ed3db0SBarry Smith       y[8*(*idx)+7] += alpha8*(*v);
140166ed3db0SBarry Smith       idx++; v++;
140266ed3db0SBarry Smith     }
140366ed3db0SBarry Smith   }
1404dc0b31edSSatish Balay   ierr = PetscLogFlops(16.0*a->nz);CHKERRQ(ierr);
14053649974fSBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
14061ebc52fbSHong Zhang   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
14072f7816d4SBarry Smith   PetscFunctionReturn(0);
14082f7816d4SBarry Smith }
14092f7816d4SBarry Smith 
14102b692628SMatthew Knepley /* ------------------------------------------------------------------------------*/
1411dfbe8321SBarry Smith PetscErrorCode MatMult_SeqMAIJ_9(Mat A,Vec xx,Vec yy)
14122b692628SMatthew Knepley {
14132b692628SMatthew Knepley   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
14142b692628SMatthew Knepley   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1415d2840e09SBarry Smith   const PetscScalar *x,*v;
1416d2840e09SBarry Smith   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9;
1417dfbe8321SBarry Smith   PetscErrorCode    ierr;
1418d2840e09SBarry Smith   const PetscInt    m         = b->AIJ->rmap->n,*idx,*ii;
1419d2840e09SBarry Smith   PetscInt          nonzerorow=0,n,i,jrow,j;
14202b692628SMatthew Knepley 
14212b692628SMatthew Knepley   PetscFunctionBegin;
14223649974fSBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
14231ebc52fbSHong Zhang   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
14242b692628SMatthew Knepley   idx  = a->j;
14252b692628SMatthew Knepley   v    = a->a;
14262b692628SMatthew Knepley   ii   = a->i;
14272b692628SMatthew Knepley 
14282b692628SMatthew Knepley   for (i=0; i<m; i++) {
14292b692628SMatthew Knepley     jrow  = ii[i];
14302b692628SMatthew Knepley     n     = ii[i+1] - jrow;
14312b692628SMatthew Knepley     sum1  = 0.0;
14322b692628SMatthew Knepley     sum2  = 0.0;
14332b692628SMatthew Knepley     sum3  = 0.0;
14342b692628SMatthew Knepley     sum4  = 0.0;
14352b692628SMatthew Knepley     sum5  = 0.0;
14362b692628SMatthew Knepley     sum6  = 0.0;
14372b692628SMatthew Knepley     sum7  = 0.0;
14382b692628SMatthew Knepley     sum8  = 0.0;
14392b692628SMatthew Knepley     sum9  = 0.0;
144026fbe8dcSKarl Rupp 
1441b3c51c66SMatthew Knepley     nonzerorow += (n>0);
14422b692628SMatthew Knepley     for (j=0; j<n; j++) {
14432b692628SMatthew Knepley       sum1 += v[jrow]*x[9*idx[jrow]];
14442b692628SMatthew Knepley       sum2 += v[jrow]*x[9*idx[jrow]+1];
14452b692628SMatthew Knepley       sum3 += v[jrow]*x[9*idx[jrow]+2];
14462b692628SMatthew Knepley       sum4 += v[jrow]*x[9*idx[jrow]+3];
14472b692628SMatthew Knepley       sum5 += v[jrow]*x[9*idx[jrow]+4];
14482b692628SMatthew Knepley       sum6 += v[jrow]*x[9*idx[jrow]+5];
14492b692628SMatthew Knepley       sum7 += v[jrow]*x[9*idx[jrow]+6];
14502b692628SMatthew Knepley       sum8 += v[jrow]*x[9*idx[jrow]+7];
14512b692628SMatthew Knepley       sum9 += v[jrow]*x[9*idx[jrow]+8];
14522b692628SMatthew Knepley       jrow++;
14532b692628SMatthew Knepley     }
14542b692628SMatthew Knepley     y[9*i]   = sum1;
14552b692628SMatthew Knepley     y[9*i+1] = sum2;
14562b692628SMatthew Knepley     y[9*i+2] = sum3;
14572b692628SMatthew Knepley     y[9*i+3] = sum4;
14582b692628SMatthew Knepley     y[9*i+4] = sum5;
14592b692628SMatthew Knepley     y[9*i+5] = sum6;
14602b692628SMatthew Knepley     y[9*i+6] = sum7;
14612b692628SMatthew Knepley     y[9*i+7] = sum8;
14622b692628SMatthew Knepley     y[9*i+8] = sum9;
14632b692628SMatthew Knepley   }
14642b692628SMatthew Knepley 
1465dc0b31edSSatish Balay   ierr = PetscLogFlops(18.0*a->nz - 9*nonzerorow);CHKERRQ(ierr);
14663649974fSBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
14671ebc52fbSHong Zhang   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
14682b692628SMatthew Knepley   PetscFunctionReturn(0);
14692b692628SMatthew Knepley }
14702b692628SMatthew Knepley 
1471b9cda22cSBarry Smith /* ------------------------------------------------------------------------------*/
1472b9cda22cSBarry Smith 
1473dfbe8321SBarry Smith PetscErrorCode MatMultTranspose_SeqMAIJ_9(Mat A,Vec xx,Vec yy)
14742b692628SMatthew Knepley {
14752b692628SMatthew Knepley   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
14762b692628SMatthew Knepley   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1477d2840e09SBarry Smith   const PetscScalar *x,*v;
1478d2840e09SBarry Smith   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9;
1479dfbe8321SBarry Smith   PetscErrorCode    ierr;
1480d2840e09SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx;
1481d2840e09SBarry Smith   PetscInt          n,i;
14822b692628SMatthew Knepley 
14832b692628SMatthew Knepley   PetscFunctionBegin;
1484d2840e09SBarry Smith   ierr = VecSet(yy,0.0);CHKERRQ(ierr);
14853649974fSBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
14861ebc52fbSHong Zhang   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
14872b692628SMatthew Knepley 
14882b692628SMatthew Knepley   for (i=0; i<m; i++) {
14892b692628SMatthew Knepley     idx    = a->j + a->i[i];
14902b692628SMatthew Knepley     v      = a->a + a->i[i];
14912b692628SMatthew Knepley     n      = a->i[i+1] - a->i[i];
14922b692628SMatthew Knepley     alpha1 = x[9*i];
14932b692628SMatthew Knepley     alpha2 = x[9*i+1];
14942b692628SMatthew Knepley     alpha3 = x[9*i+2];
14952b692628SMatthew Knepley     alpha4 = x[9*i+3];
14962b692628SMatthew Knepley     alpha5 = x[9*i+4];
14972b692628SMatthew Knepley     alpha6 = x[9*i+5];
14982b692628SMatthew Knepley     alpha7 = x[9*i+6];
14992b692628SMatthew Knepley     alpha8 = x[9*i+7];
15002f6bd0c6SMatthew Knepley     alpha9 = x[9*i+8];
15012b692628SMatthew Knepley     while (n-->0) {
15022b692628SMatthew Knepley       y[9*(*idx)]   += alpha1*(*v);
15032b692628SMatthew Knepley       y[9*(*idx)+1] += alpha2*(*v);
15042b692628SMatthew Knepley       y[9*(*idx)+2] += alpha3*(*v);
15052b692628SMatthew Knepley       y[9*(*idx)+3] += alpha4*(*v);
15062b692628SMatthew Knepley       y[9*(*idx)+4] += alpha5*(*v);
15072b692628SMatthew Knepley       y[9*(*idx)+5] += alpha6*(*v);
15082b692628SMatthew Knepley       y[9*(*idx)+6] += alpha7*(*v);
15092b692628SMatthew Knepley       y[9*(*idx)+7] += alpha8*(*v);
15102b692628SMatthew Knepley       y[9*(*idx)+8] += alpha9*(*v);
15112b692628SMatthew Knepley       idx++; v++;
15122b692628SMatthew Knepley     }
15132b692628SMatthew Knepley   }
1514dc0b31edSSatish Balay   ierr = PetscLogFlops(18.0*a->nz);CHKERRQ(ierr);
15153649974fSBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
15161ebc52fbSHong Zhang   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
15172b692628SMatthew Knepley   PetscFunctionReturn(0);
15182b692628SMatthew Knepley }
15192b692628SMatthew Knepley 
1520dfbe8321SBarry Smith PetscErrorCode MatMultAdd_SeqMAIJ_9(Mat A,Vec xx,Vec yy,Vec zz)
15212b692628SMatthew Knepley {
15222b692628SMatthew Knepley   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
15232b692628SMatthew Knepley   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1524d2840e09SBarry Smith   const PetscScalar *x,*v;
1525d2840e09SBarry Smith   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9;
1526dfbe8321SBarry Smith   PetscErrorCode    ierr;
1527d2840e09SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
1528b24ad042SBarry Smith   PetscInt          n,i,jrow,j;
15292b692628SMatthew Knepley 
15302b692628SMatthew Knepley   PetscFunctionBegin;
15312b692628SMatthew Knepley   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
15323649974fSBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
15331ebc52fbSHong Zhang   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
15342b692628SMatthew Knepley   idx  = a->j;
15352b692628SMatthew Knepley   v    = a->a;
15362b692628SMatthew Knepley   ii   = a->i;
15372b692628SMatthew Knepley 
15382b692628SMatthew Knepley   for (i=0; i<m; i++) {
15392b692628SMatthew Knepley     jrow = ii[i];
15402b692628SMatthew Knepley     n    = ii[i+1] - jrow;
15412b692628SMatthew Knepley     sum1 = 0.0;
15422b692628SMatthew Knepley     sum2 = 0.0;
15432b692628SMatthew Knepley     sum3 = 0.0;
15442b692628SMatthew Knepley     sum4 = 0.0;
15452b692628SMatthew Knepley     sum5 = 0.0;
15462b692628SMatthew Knepley     sum6 = 0.0;
15472b692628SMatthew Knepley     sum7 = 0.0;
15482b692628SMatthew Knepley     sum8 = 0.0;
15492b692628SMatthew Knepley     sum9 = 0.0;
15502b692628SMatthew Knepley     for (j=0; j<n; j++) {
15512b692628SMatthew Knepley       sum1 += v[jrow]*x[9*idx[jrow]];
15522b692628SMatthew Knepley       sum2 += v[jrow]*x[9*idx[jrow]+1];
15532b692628SMatthew Knepley       sum3 += v[jrow]*x[9*idx[jrow]+2];
15542b692628SMatthew Knepley       sum4 += v[jrow]*x[9*idx[jrow]+3];
15552b692628SMatthew Knepley       sum5 += v[jrow]*x[9*idx[jrow]+4];
15562b692628SMatthew Knepley       sum6 += v[jrow]*x[9*idx[jrow]+5];
15572b692628SMatthew Knepley       sum7 += v[jrow]*x[9*idx[jrow]+6];
15582b692628SMatthew Knepley       sum8 += v[jrow]*x[9*idx[jrow]+7];
15592b692628SMatthew Knepley       sum9 += v[jrow]*x[9*idx[jrow]+8];
15602b692628SMatthew Knepley       jrow++;
15612b692628SMatthew Knepley     }
15622b692628SMatthew Knepley     y[9*i]   += sum1;
15632b692628SMatthew Knepley     y[9*i+1] += sum2;
15642b692628SMatthew Knepley     y[9*i+2] += sum3;
15652b692628SMatthew Knepley     y[9*i+3] += sum4;
15662b692628SMatthew Knepley     y[9*i+4] += sum5;
15672b692628SMatthew Knepley     y[9*i+5] += sum6;
15682b692628SMatthew Knepley     y[9*i+6] += sum7;
15692b692628SMatthew Knepley     y[9*i+7] += sum8;
15702b692628SMatthew Knepley     y[9*i+8] += sum9;
15712b692628SMatthew Knepley   }
15722b692628SMatthew Knepley 
1573efee365bSSatish Balay   ierr = PetscLogFlops(18*a->nz);CHKERRQ(ierr);
15743649974fSBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
15751ebc52fbSHong Zhang   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
15762b692628SMatthew Knepley   PetscFunctionReturn(0);
15772b692628SMatthew Knepley }
15782b692628SMatthew Knepley 
1579dfbe8321SBarry Smith PetscErrorCode MatMultTransposeAdd_SeqMAIJ_9(Mat A,Vec xx,Vec yy,Vec zz)
15802b692628SMatthew Knepley {
15812b692628SMatthew Knepley   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
15822b692628SMatthew Knepley   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1583d2840e09SBarry Smith   const PetscScalar *x,*v;
1584d2840e09SBarry Smith   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9;
1585dfbe8321SBarry Smith   PetscErrorCode    ierr;
1586d2840e09SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx;
1587d2840e09SBarry Smith   PetscInt          n,i;
15882b692628SMatthew Knepley 
15892b692628SMatthew Knepley   PetscFunctionBegin;
15902b692628SMatthew Knepley   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
15913649974fSBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
15921ebc52fbSHong Zhang   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
15932b692628SMatthew Knepley   for (i=0; i<m; i++) {
15942b692628SMatthew Knepley     idx    = a->j + a->i[i];
15952b692628SMatthew Knepley     v      = a->a + a->i[i];
15962b692628SMatthew Knepley     n      = a->i[i+1] - a->i[i];
15972b692628SMatthew Knepley     alpha1 = x[9*i];
15982b692628SMatthew Knepley     alpha2 = x[9*i+1];
15992b692628SMatthew Knepley     alpha3 = x[9*i+2];
16002b692628SMatthew Knepley     alpha4 = x[9*i+3];
16012b692628SMatthew Knepley     alpha5 = x[9*i+4];
16022b692628SMatthew Knepley     alpha6 = x[9*i+5];
16032b692628SMatthew Knepley     alpha7 = x[9*i+6];
16042b692628SMatthew Knepley     alpha8 = x[9*i+7];
16052b692628SMatthew Knepley     alpha9 = x[9*i+8];
16062b692628SMatthew Knepley     while (n-->0) {
16072b692628SMatthew Knepley       y[9*(*idx)]   += alpha1*(*v);
16082b692628SMatthew Knepley       y[9*(*idx)+1] += alpha2*(*v);
16092b692628SMatthew Knepley       y[9*(*idx)+2] += alpha3*(*v);
16102b692628SMatthew Knepley       y[9*(*idx)+3] += alpha4*(*v);
16112b692628SMatthew Knepley       y[9*(*idx)+4] += alpha5*(*v);
16122b692628SMatthew Knepley       y[9*(*idx)+5] += alpha6*(*v);
16132b692628SMatthew Knepley       y[9*(*idx)+6] += alpha7*(*v);
16142b692628SMatthew Knepley       y[9*(*idx)+7] += alpha8*(*v);
16152b692628SMatthew Knepley       y[9*(*idx)+8] += alpha9*(*v);
16162b692628SMatthew Knepley       idx++; v++;
16172b692628SMatthew Knepley     }
16182b692628SMatthew Knepley   }
1619dc0b31edSSatish Balay   ierr = PetscLogFlops(18.0*a->nz);CHKERRQ(ierr);
16203649974fSBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
16211ebc52fbSHong Zhang   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
16222b692628SMatthew Knepley   PetscFunctionReturn(0);
16232b692628SMatthew Knepley }
1624b9cda22cSBarry Smith PetscErrorCode MatMult_SeqMAIJ_10(Mat A,Vec xx,Vec yy)
1625b9cda22cSBarry Smith {
1626b9cda22cSBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1627b9cda22cSBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1628d2840e09SBarry Smith   const PetscScalar *x,*v;
1629d2840e09SBarry Smith   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9, sum10;
1630b9cda22cSBarry Smith   PetscErrorCode    ierr;
1631d2840e09SBarry Smith   const PetscInt    m         = b->AIJ->rmap->n,*idx,*ii;
1632d2840e09SBarry Smith   PetscInt          nonzerorow=0,n,i,jrow,j;
1633b9cda22cSBarry Smith 
1634b9cda22cSBarry Smith   PetscFunctionBegin;
16353649974fSBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
1636b9cda22cSBarry Smith   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
1637b9cda22cSBarry Smith   idx  = a->j;
1638b9cda22cSBarry Smith   v    = a->a;
1639b9cda22cSBarry Smith   ii   = a->i;
1640b9cda22cSBarry Smith 
1641b9cda22cSBarry Smith   for (i=0; i<m; i++) {
1642b9cda22cSBarry Smith     jrow  = ii[i];
1643b9cda22cSBarry Smith     n     = ii[i+1] - jrow;
1644b9cda22cSBarry Smith     sum1  = 0.0;
1645b9cda22cSBarry Smith     sum2  = 0.0;
1646b9cda22cSBarry Smith     sum3  = 0.0;
1647b9cda22cSBarry Smith     sum4  = 0.0;
1648b9cda22cSBarry Smith     sum5  = 0.0;
1649b9cda22cSBarry Smith     sum6  = 0.0;
1650b9cda22cSBarry Smith     sum7  = 0.0;
1651b9cda22cSBarry Smith     sum8  = 0.0;
1652b9cda22cSBarry Smith     sum9  = 0.0;
1653b9cda22cSBarry Smith     sum10 = 0.0;
165426fbe8dcSKarl Rupp 
1655b3c51c66SMatthew Knepley     nonzerorow += (n>0);
1656b9cda22cSBarry Smith     for (j=0; j<n; j++) {
1657b9cda22cSBarry Smith       sum1  += v[jrow]*x[10*idx[jrow]];
1658b9cda22cSBarry Smith       sum2  += v[jrow]*x[10*idx[jrow]+1];
1659b9cda22cSBarry Smith       sum3  += v[jrow]*x[10*idx[jrow]+2];
1660b9cda22cSBarry Smith       sum4  += v[jrow]*x[10*idx[jrow]+3];
1661b9cda22cSBarry Smith       sum5  += v[jrow]*x[10*idx[jrow]+4];
1662b9cda22cSBarry Smith       sum6  += v[jrow]*x[10*idx[jrow]+5];
1663b9cda22cSBarry Smith       sum7  += v[jrow]*x[10*idx[jrow]+6];
1664b9cda22cSBarry Smith       sum8  += v[jrow]*x[10*idx[jrow]+7];
1665b9cda22cSBarry Smith       sum9  += v[jrow]*x[10*idx[jrow]+8];
1666b9cda22cSBarry Smith       sum10 += v[jrow]*x[10*idx[jrow]+9];
1667b9cda22cSBarry Smith       jrow++;
1668b9cda22cSBarry Smith     }
1669b9cda22cSBarry Smith     y[10*i]   = sum1;
1670b9cda22cSBarry Smith     y[10*i+1] = sum2;
1671b9cda22cSBarry Smith     y[10*i+2] = sum3;
1672b9cda22cSBarry Smith     y[10*i+3] = sum4;
1673b9cda22cSBarry Smith     y[10*i+4] = sum5;
1674b9cda22cSBarry Smith     y[10*i+5] = sum6;
1675b9cda22cSBarry Smith     y[10*i+6] = sum7;
1676b9cda22cSBarry Smith     y[10*i+7] = sum8;
1677b9cda22cSBarry Smith     y[10*i+8] = sum9;
1678b9cda22cSBarry Smith     y[10*i+9] = sum10;
1679b9cda22cSBarry Smith   }
1680b9cda22cSBarry Smith 
1681dc0b31edSSatish Balay   ierr = PetscLogFlops(20.0*a->nz - 10.0*nonzerorow);CHKERRQ(ierr);
16823649974fSBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
1683b9cda22cSBarry Smith   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
1684b9cda22cSBarry Smith   PetscFunctionReturn(0);
1685b9cda22cSBarry Smith }
1686b9cda22cSBarry Smith 
1687b9cda22cSBarry Smith PetscErrorCode MatMultAdd_SeqMAIJ_10(Mat A,Vec xx,Vec yy,Vec zz)
1688b9cda22cSBarry Smith {
1689b9cda22cSBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1690b9cda22cSBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1691d2840e09SBarry Smith   const PetscScalar *x,*v;
1692d2840e09SBarry Smith   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9, sum10;
1693b9cda22cSBarry Smith   PetscErrorCode    ierr;
1694d2840e09SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
1695b9cda22cSBarry Smith   PetscInt          n,i,jrow,j;
1696b9cda22cSBarry Smith 
1697b9cda22cSBarry Smith   PetscFunctionBegin;
1698b9cda22cSBarry Smith   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
16993649974fSBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
1700b9cda22cSBarry Smith   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
1701b9cda22cSBarry Smith   idx  = a->j;
1702b9cda22cSBarry Smith   v    = a->a;
1703b9cda22cSBarry Smith   ii   = a->i;
1704b9cda22cSBarry Smith 
1705b9cda22cSBarry Smith   for (i=0; i<m; i++) {
1706b9cda22cSBarry Smith     jrow  = ii[i];
1707b9cda22cSBarry Smith     n     = ii[i+1] - jrow;
1708b9cda22cSBarry Smith     sum1  = 0.0;
1709b9cda22cSBarry Smith     sum2  = 0.0;
1710b9cda22cSBarry Smith     sum3  = 0.0;
1711b9cda22cSBarry Smith     sum4  = 0.0;
1712b9cda22cSBarry Smith     sum5  = 0.0;
1713b9cda22cSBarry Smith     sum6  = 0.0;
1714b9cda22cSBarry Smith     sum7  = 0.0;
1715b9cda22cSBarry Smith     sum8  = 0.0;
1716b9cda22cSBarry Smith     sum9  = 0.0;
1717b9cda22cSBarry Smith     sum10 = 0.0;
1718b9cda22cSBarry Smith     for (j=0; j<n; j++) {
1719b9cda22cSBarry Smith       sum1  += v[jrow]*x[10*idx[jrow]];
1720b9cda22cSBarry Smith       sum2  += v[jrow]*x[10*idx[jrow]+1];
1721b9cda22cSBarry Smith       sum3  += v[jrow]*x[10*idx[jrow]+2];
1722b9cda22cSBarry Smith       sum4  += v[jrow]*x[10*idx[jrow]+3];
1723b9cda22cSBarry Smith       sum5  += v[jrow]*x[10*idx[jrow]+4];
1724b9cda22cSBarry Smith       sum6  += v[jrow]*x[10*idx[jrow]+5];
1725b9cda22cSBarry Smith       sum7  += v[jrow]*x[10*idx[jrow]+6];
1726b9cda22cSBarry Smith       sum8  += v[jrow]*x[10*idx[jrow]+7];
1727b9cda22cSBarry Smith       sum9  += v[jrow]*x[10*idx[jrow]+8];
1728b9cda22cSBarry Smith       sum10 += v[jrow]*x[10*idx[jrow]+9];
1729b9cda22cSBarry Smith       jrow++;
1730b9cda22cSBarry Smith     }
1731b9cda22cSBarry Smith     y[10*i]   += sum1;
1732b9cda22cSBarry Smith     y[10*i+1] += sum2;
1733b9cda22cSBarry Smith     y[10*i+2] += sum3;
1734b9cda22cSBarry Smith     y[10*i+3] += sum4;
1735b9cda22cSBarry Smith     y[10*i+4] += sum5;
1736b9cda22cSBarry Smith     y[10*i+5] += sum6;
1737b9cda22cSBarry Smith     y[10*i+6] += sum7;
1738b9cda22cSBarry Smith     y[10*i+7] += sum8;
1739b9cda22cSBarry Smith     y[10*i+8] += sum9;
1740b9cda22cSBarry Smith     y[10*i+9] += sum10;
1741b9cda22cSBarry Smith   }
1742b9cda22cSBarry Smith 
1743dc0b31edSSatish Balay   ierr = PetscLogFlops(20.0*a->nz);CHKERRQ(ierr);
17443649974fSBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
1745b9cda22cSBarry Smith   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
1746b9cda22cSBarry Smith   PetscFunctionReturn(0);
1747b9cda22cSBarry Smith }
1748b9cda22cSBarry Smith 
1749b9cda22cSBarry Smith PetscErrorCode MatMultTranspose_SeqMAIJ_10(Mat A,Vec xx,Vec yy)
1750b9cda22cSBarry Smith {
1751b9cda22cSBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1752b9cda22cSBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1753d2840e09SBarry Smith   const PetscScalar *x,*v;
1754d2840e09SBarry Smith   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9,alpha10;
1755b9cda22cSBarry Smith   PetscErrorCode    ierr;
1756d2840e09SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx;
1757d2840e09SBarry Smith   PetscInt          n,i;
1758b9cda22cSBarry Smith 
1759b9cda22cSBarry Smith   PetscFunctionBegin;
1760d2840e09SBarry Smith   ierr = VecSet(yy,0.0);CHKERRQ(ierr);
17613649974fSBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
1762b9cda22cSBarry Smith   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
1763b9cda22cSBarry Smith 
1764b9cda22cSBarry Smith   for (i=0; i<m; i++) {
1765b9cda22cSBarry Smith     idx     = a->j + a->i[i];
1766b9cda22cSBarry Smith     v       = a->a + a->i[i];
1767b9cda22cSBarry Smith     n       = a->i[i+1] - a->i[i];
1768e29fdc22SBarry Smith     alpha1  = x[10*i];
1769e29fdc22SBarry Smith     alpha2  = x[10*i+1];
1770e29fdc22SBarry Smith     alpha3  = x[10*i+2];
1771e29fdc22SBarry Smith     alpha4  = x[10*i+3];
1772e29fdc22SBarry Smith     alpha5  = x[10*i+4];
1773e29fdc22SBarry Smith     alpha6  = x[10*i+5];
1774e29fdc22SBarry Smith     alpha7  = x[10*i+6];
1775e29fdc22SBarry Smith     alpha8  = x[10*i+7];
1776e29fdc22SBarry Smith     alpha9  = x[10*i+8];
1777b9cda22cSBarry Smith     alpha10 = x[10*i+9];
1778b9cda22cSBarry Smith     while (n-->0) {
1779e29fdc22SBarry Smith       y[10*(*idx)]   += alpha1*(*v);
1780e29fdc22SBarry Smith       y[10*(*idx)+1] += alpha2*(*v);
1781e29fdc22SBarry Smith       y[10*(*idx)+2] += alpha3*(*v);
1782e29fdc22SBarry Smith       y[10*(*idx)+3] += alpha4*(*v);
1783e29fdc22SBarry Smith       y[10*(*idx)+4] += alpha5*(*v);
1784e29fdc22SBarry Smith       y[10*(*idx)+5] += alpha6*(*v);
1785e29fdc22SBarry Smith       y[10*(*idx)+6] += alpha7*(*v);
1786e29fdc22SBarry Smith       y[10*(*idx)+7] += alpha8*(*v);
1787e29fdc22SBarry Smith       y[10*(*idx)+8] += alpha9*(*v);
1788b9cda22cSBarry Smith       y[10*(*idx)+9] += alpha10*(*v);
1789b9cda22cSBarry Smith       idx++; v++;
1790b9cda22cSBarry Smith     }
1791b9cda22cSBarry Smith   }
1792dc0b31edSSatish Balay   ierr = PetscLogFlops(20.0*a->nz);CHKERRQ(ierr);
17933649974fSBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
1794b9cda22cSBarry Smith   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
1795b9cda22cSBarry Smith   PetscFunctionReturn(0);
1796b9cda22cSBarry Smith }
1797b9cda22cSBarry Smith 
1798b9cda22cSBarry Smith PetscErrorCode MatMultTransposeAdd_SeqMAIJ_10(Mat A,Vec xx,Vec yy,Vec zz)
1799b9cda22cSBarry Smith {
1800b9cda22cSBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1801b9cda22cSBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1802d2840e09SBarry Smith   const PetscScalar *x,*v;
1803d2840e09SBarry Smith   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9,alpha10;
1804b9cda22cSBarry Smith   PetscErrorCode    ierr;
1805d2840e09SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx;
1806d2840e09SBarry Smith   PetscInt          n,i;
1807b9cda22cSBarry Smith 
1808b9cda22cSBarry Smith   PetscFunctionBegin;
1809b9cda22cSBarry Smith   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
18103649974fSBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
1811b9cda22cSBarry Smith   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
1812b9cda22cSBarry Smith   for (i=0; i<m; i++) {
1813b9cda22cSBarry Smith     idx     = a->j + a->i[i];
1814b9cda22cSBarry Smith     v       = a->a + a->i[i];
1815b9cda22cSBarry Smith     n       = a->i[i+1] - a->i[i];
1816b9cda22cSBarry Smith     alpha1  = x[10*i];
1817b9cda22cSBarry Smith     alpha2  = x[10*i+1];
1818b9cda22cSBarry Smith     alpha3  = x[10*i+2];
1819b9cda22cSBarry Smith     alpha4  = x[10*i+3];
1820b9cda22cSBarry Smith     alpha5  = x[10*i+4];
1821b9cda22cSBarry Smith     alpha6  = x[10*i+5];
1822b9cda22cSBarry Smith     alpha7  = x[10*i+6];
1823b9cda22cSBarry Smith     alpha8  = x[10*i+7];
1824b9cda22cSBarry Smith     alpha9  = x[10*i+8];
1825b9cda22cSBarry Smith     alpha10 = x[10*i+9];
1826b9cda22cSBarry Smith     while (n-->0) {
1827b9cda22cSBarry Smith       y[10*(*idx)]   += alpha1*(*v);
1828b9cda22cSBarry Smith       y[10*(*idx)+1] += alpha2*(*v);
1829b9cda22cSBarry Smith       y[10*(*idx)+2] += alpha3*(*v);
1830b9cda22cSBarry Smith       y[10*(*idx)+3] += alpha4*(*v);
1831b9cda22cSBarry Smith       y[10*(*idx)+4] += alpha5*(*v);
1832b9cda22cSBarry Smith       y[10*(*idx)+5] += alpha6*(*v);
1833b9cda22cSBarry Smith       y[10*(*idx)+6] += alpha7*(*v);
1834b9cda22cSBarry Smith       y[10*(*idx)+7] += alpha8*(*v);
1835b9cda22cSBarry Smith       y[10*(*idx)+8] += alpha9*(*v);
1836b9cda22cSBarry Smith       y[10*(*idx)+9] += alpha10*(*v);
1837b9cda22cSBarry Smith       idx++; v++;
1838b9cda22cSBarry Smith     }
1839b9cda22cSBarry Smith   }
1840dc0b31edSSatish Balay   ierr = PetscLogFlops(20.0*a->nz);CHKERRQ(ierr);
18413649974fSBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
1842b9cda22cSBarry Smith   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
1843b9cda22cSBarry Smith   PetscFunctionReturn(0);
1844b9cda22cSBarry Smith }
1845b9cda22cSBarry Smith 
18462b692628SMatthew Knepley 
18472f7816d4SBarry Smith /*--------------------------------------------------------------------------------------------*/
1848dbdd7285SBarry Smith PetscErrorCode MatMult_SeqMAIJ_11(Mat A,Vec xx,Vec yy)
1849dbdd7285SBarry Smith {
1850dbdd7285SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1851dbdd7285SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1852d2840e09SBarry Smith   const PetscScalar *x,*v;
1853d2840e09SBarry Smith   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9, sum10, sum11;
1854dbdd7285SBarry Smith   PetscErrorCode    ierr;
1855d2840e09SBarry Smith   const PetscInt    m         = b->AIJ->rmap->n,*idx,*ii;
1856d2840e09SBarry Smith   PetscInt          nonzerorow=0,n,i,jrow,j;
1857dbdd7285SBarry Smith 
1858dbdd7285SBarry Smith   PetscFunctionBegin;
18593649974fSBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
1860dbdd7285SBarry Smith   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
1861dbdd7285SBarry Smith   idx  = a->j;
1862dbdd7285SBarry Smith   v    = a->a;
1863dbdd7285SBarry Smith   ii   = a->i;
1864dbdd7285SBarry Smith 
1865dbdd7285SBarry Smith   for (i=0; i<m; i++) {
1866dbdd7285SBarry Smith     jrow  = ii[i];
1867dbdd7285SBarry Smith     n     = ii[i+1] - jrow;
1868dbdd7285SBarry Smith     sum1  = 0.0;
1869dbdd7285SBarry Smith     sum2  = 0.0;
1870dbdd7285SBarry Smith     sum3  = 0.0;
1871dbdd7285SBarry Smith     sum4  = 0.0;
1872dbdd7285SBarry Smith     sum5  = 0.0;
1873dbdd7285SBarry Smith     sum6  = 0.0;
1874dbdd7285SBarry Smith     sum7  = 0.0;
1875dbdd7285SBarry Smith     sum8  = 0.0;
1876dbdd7285SBarry Smith     sum9  = 0.0;
1877dbdd7285SBarry Smith     sum10 = 0.0;
1878dbdd7285SBarry Smith     sum11 = 0.0;
187926fbe8dcSKarl Rupp 
1880dbdd7285SBarry Smith     nonzerorow += (n>0);
1881dbdd7285SBarry Smith     for (j=0; j<n; j++) {
1882dbdd7285SBarry Smith       sum1  += v[jrow]*x[11*idx[jrow]];
1883dbdd7285SBarry Smith       sum2  += v[jrow]*x[11*idx[jrow]+1];
1884dbdd7285SBarry Smith       sum3  += v[jrow]*x[11*idx[jrow]+2];
1885dbdd7285SBarry Smith       sum4  += v[jrow]*x[11*idx[jrow]+3];
1886dbdd7285SBarry Smith       sum5  += v[jrow]*x[11*idx[jrow]+4];
1887dbdd7285SBarry Smith       sum6  += v[jrow]*x[11*idx[jrow]+5];
1888dbdd7285SBarry Smith       sum7  += v[jrow]*x[11*idx[jrow]+6];
1889dbdd7285SBarry Smith       sum8  += v[jrow]*x[11*idx[jrow]+7];
1890dbdd7285SBarry Smith       sum9  += v[jrow]*x[11*idx[jrow]+8];
1891dbdd7285SBarry Smith       sum10 += v[jrow]*x[11*idx[jrow]+9];
1892dbdd7285SBarry Smith       sum11 += v[jrow]*x[11*idx[jrow]+10];
1893dbdd7285SBarry Smith       jrow++;
1894dbdd7285SBarry Smith     }
1895dbdd7285SBarry Smith     y[11*i]    = sum1;
1896dbdd7285SBarry Smith     y[11*i+1]  = sum2;
1897dbdd7285SBarry Smith     y[11*i+2]  = sum3;
1898dbdd7285SBarry Smith     y[11*i+3]  = sum4;
1899dbdd7285SBarry Smith     y[11*i+4]  = sum5;
1900dbdd7285SBarry Smith     y[11*i+5]  = sum6;
1901dbdd7285SBarry Smith     y[11*i+6]  = sum7;
1902dbdd7285SBarry Smith     y[11*i+7]  = sum8;
1903dbdd7285SBarry Smith     y[11*i+8]  = sum9;
1904dbdd7285SBarry Smith     y[11*i+9]  = sum10;
1905dbdd7285SBarry Smith     y[11*i+10] = sum11;
1906dbdd7285SBarry Smith   }
1907dbdd7285SBarry Smith 
1908dbdd7285SBarry Smith   ierr = PetscLogFlops(22*a->nz - 11*nonzerorow);CHKERRQ(ierr);
19093649974fSBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
1910dbdd7285SBarry Smith   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
1911dbdd7285SBarry Smith   PetscFunctionReturn(0);
1912dbdd7285SBarry Smith }
1913dbdd7285SBarry Smith 
1914dbdd7285SBarry Smith PetscErrorCode MatMultAdd_SeqMAIJ_11(Mat A,Vec xx,Vec yy,Vec zz)
1915dbdd7285SBarry Smith {
1916dbdd7285SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1917dbdd7285SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1918d2840e09SBarry Smith   const PetscScalar *x,*v;
1919d2840e09SBarry Smith   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9, sum10, sum11;
1920dbdd7285SBarry Smith   PetscErrorCode    ierr;
1921d2840e09SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
1922dbdd7285SBarry Smith   PetscInt          n,i,jrow,j;
1923dbdd7285SBarry Smith 
1924dbdd7285SBarry Smith   PetscFunctionBegin;
1925dbdd7285SBarry Smith   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
19263649974fSBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
1927dbdd7285SBarry Smith   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
1928dbdd7285SBarry Smith   idx  = a->j;
1929dbdd7285SBarry Smith   v    = a->a;
1930dbdd7285SBarry Smith   ii   = a->i;
1931dbdd7285SBarry Smith 
1932dbdd7285SBarry Smith   for (i=0; i<m; i++) {
1933dbdd7285SBarry Smith     jrow  = ii[i];
1934dbdd7285SBarry Smith     n     = ii[i+1] - jrow;
1935dbdd7285SBarry Smith     sum1  = 0.0;
1936dbdd7285SBarry Smith     sum2  = 0.0;
1937dbdd7285SBarry Smith     sum3  = 0.0;
1938dbdd7285SBarry Smith     sum4  = 0.0;
1939dbdd7285SBarry Smith     sum5  = 0.0;
1940dbdd7285SBarry Smith     sum6  = 0.0;
1941dbdd7285SBarry Smith     sum7  = 0.0;
1942dbdd7285SBarry Smith     sum8  = 0.0;
1943dbdd7285SBarry Smith     sum9  = 0.0;
1944dbdd7285SBarry Smith     sum10 = 0.0;
1945dbdd7285SBarry Smith     sum11 = 0.0;
1946dbdd7285SBarry Smith     for (j=0; j<n; j++) {
1947dbdd7285SBarry Smith       sum1  += v[jrow]*x[11*idx[jrow]];
1948dbdd7285SBarry Smith       sum2  += v[jrow]*x[11*idx[jrow]+1];
1949dbdd7285SBarry Smith       sum3  += v[jrow]*x[11*idx[jrow]+2];
1950dbdd7285SBarry Smith       sum4  += v[jrow]*x[11*idx[jrow]+3];
1951dbdd7285SBarry Smith       sum5  += v[jrow]*x[11*idx[jrow]+4];
1952dbdd7285SBarry Smith       sum6  += v[jrow]*x[11*idx[jrow]+5];
1953dbdd7285SBarry Smith       sum7  += v[jrow]*x[11*idx[jrow]+6];
1954dbdd7285SBarry Smith       sum8  += v[jrow]*x[11*idx[jrow]+7];
1955dbdd7285SBarry Smith       sum9  += v[jrow]*x[11*idx[jrow]+8];
1956dbdd7285SBarry Smith       sum10 += v[jrow]*x[11*idx[jrow]+9];
1957dbdd7285SBarry Smith       sum11 += v[jrow]*x[11*idx[jrow]+10];
1958dbdd7285SBarry Smith       jrow++;
1959dbdd7285SBarry Smith     }
1960dbdd7285SBarry Smith     y[11*i]    += sum1;
1961dbdd7285SBarry Smith     y[11*i+1]  += sum2;
1962dbdd7285SBarry Smith     y[11*i+2]  += sum3;
1963dbdd7285SBarry Smith     y[11*i+3]  += sum4;
1964dbdd7285SBarry Smith     y[11*i+4]  += sum5;
1965dbdd7285SBarry Smith     y[11*i+5]  += sum6;
1966dbdd7285SBarry Smith     y[11*i+6]  += sum7;
1967dbdd7285SBarry Smith     y[11*i+7]  += sum8;
1968dbdd7285SBarry Smith     y[11*i+8]  += sum9;
1969dbdd7285SBarry Smith     y[11*i+9]  += sum10;
1970dbdd7285SBarry Smith     y[11*i+10] += sum11;
1971dbdd7285SBarry Smith   }
1972dbdd7285SBarry Smith 
1973dbdd7285SBarry Smith   ierr = PetscLogFlops(22*a->nz);CHKERRQ(ierr);
19743649974fSBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
1975dbdd7285SBarry Smith   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
1976dbdd7285SBarry Smith   PetscFunctionReturn(0);
1977dbdd7285SBarry Smith }
1978dbdd7285SBarry Smith 
1979dbdd7285SBarry Smith PetscErrorCode MatMultTranspose_SeqMAIJ_11(Mat A,Vec xx,Vec yy)
1980dbdd7285SBarry Smith {
1981dbdd7285SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1982dbdd7285SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1983d2840e09SBarry Smith   const PetscScalar *x,*v;
1984d2840e09SBarry Smith   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9,alpha10,alpha11;
1985dbdd7285SBarry Smith   PetscErrorCode    ierr;
1986d2840e09SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx;
1987d2840e09SBarry Smith   PetscInt          n,i;
1988dbdd7285SBarry Smith 
1989dbdd7285SBarry Smith   PetscFunctionBegin;
1990d2840e09SBarry Smith   ierr = VecSet(yy,0.0);CHKERRQ(ierr);
19913649974fSBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
1992dbdd7285SBarry Smith   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
1993dbdd7285SBarry Smith 
1994dbdd7285SBarry Smith   for (i=0; i<m; i++) {
1995dbdd7285SBarry Smith     idx     = a->j + a->i[i];
1996dbdd7285SBarry Smith     v       = a->a + a->i[i];
1997dbdd7285SBarry Smith     n       = a->i[i+1] - a->i[i];
1998dbdd7285SBarry Smith     alpha1  = x[11*i];
1999dbdd7285SBarry Smith     alpha2  = x[11*i+1];
2000dbdd7285SBarry Smith     alpha3  = x[11*i+2];
2001dbdd7285SBarry Smith     alpha4  = x[11*i+3];
2002dbdd7285SBarry Smith     alpha5  = x[11*i+4];
2003dbdd7285SBarry Smith     alpha6  = x[11*i+5];
2004dbdd7285SBarry Smith     alpha7  = x[11*i+6];
2005dbdd7285SBarry Smith     alpha8  = x[11*i+7];
2006dbdd7285SBarry Smith     alpha9  = x[11*i+8];
2007dbdd7285SBarry Smith     alpha10 = x[11*i+9];
2008dbdd7285SBarry Smith     alpha11 = x[11*i+10];
2009dbdd7285SBarry Smith     while (n-->0) {
2010dbdd7285SBarry Smith       y[11*(*idx)]    += alpha1*(*v);
2011dbdd7285SBarry Smith       y[11*(*idx)+1]  += alpha2*(*v);
2012dbdd7285SBarry Smith       y[11*(*idx)+2]  += alpha3*(*v);
2013dbdd7285SBarry Smith       y[11*(*idx)+3]  += alpha4*(*v);
2014dbdd7285SBarry Smith       y[11*(*idx)+4]  += alpha5*(*v);
2015dbdd7285SBarry Smith       y[11*(*idx)+5]  += alpha6*(*v);
2016dbdd7285SBarry Smith       y[11*(*idx)+6]  += alpha7*(*v);
2017dbdd7285SBarry Smith       y[11*(*idx)+7]  += alpha8*(*v);
2018dbdd7285SBarry Smith       y[11*(*idx)+8]  += alpha9*(*v);
2019dbdd7285SBarry Smith       y[11*(*idx)+9]  += alpha10*(*v);
2020dbdd7285SBarry Smith       y[11*(*idx)+10] += alpha11*(*v);
2021dbdd7285SBarry Smith       idx++; v++;
2022dbdd7285SBarry Smith     }
2023dbdd7285SBarry Smith   }
2024dbdd7285SBarry Smith   ierr = PetscLogFlops(22*a->nz);CHKERRQ(ierr);
20253649974fSBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
2026dbdd7285SBarry Smith   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
2027dbdd7285SBarry Smith   PetscFunctionReturn(0);
2028dbdd7285SBarry Smith }
2029dbdd7285SBarry Smith 
2030dbdd7285SBarry Smith PetscErrorCode MatMultTransposeAdd_SeqMAIJ_11(Mat A,Vec xx,Vec yy,Vec zz)
2031dbdd7285SBarry Smith {
2032dbdd7285SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
2033dbdd7285SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
2034d2840e09SBarry Smith   const PetscScalar *x,*v;
2035d2840e09SBarry Smith   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9,alpha10,alpha11;
2036dbdd7285SBarry Smith   PetscErrorCode    ierr;
2037d2840e09SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx;
2038d2840e09SBarry Smith   PetscInt          n,i;
2039dbdd7285SBarry Smith 
2040dbdd7285SBarry Smith   PetscFunctionBegin;
2041dbdd7285SBarry Smith   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
20423649974fSBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
2043dbdd7285SBarry Smith   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
2044dbdd7285SBarry Smith   for (i=0; i<m; i++) {
2045dbdd7285SBarry Smith     idx     = a->j + a->i[i];
2046dbdd7285SBarry Smith     v       = a->a + a->i[i];
2047dbdd7285SBarry Smith     n       = a->i[i+1] - a->i[i];
2048dbdd7285SBarry Smith     alpha1  = x[11*i];
2049dbdd7285SBarry Smith     alpha2  = x[11*i+1];
2050dbdd7285SBarry Smith     alpha3  = x[11*i+2];
2051dbdd7285SBarry Smith     alpha4  = x[11*i+3];
2052dbdd7285SBarry Smith     alpha5  = x[11*i+4];
2053dbdd7285SBarry Smith     alpha6  = x[11*i+5];
2054dbdd7285SBarry Smith     alpha7  = x[11*i+6];
2055dbdd7285SBarry Smith     alpha8  = x[11*i+7];
2056dbdd7285SBarry Smith     alpha9  = x[11*i+8];
2057dbdd7285SBarry Smith     alpha10 = x[11*i+9];
2058dbdd7285SBarry Smith     alpha11 = x[11*i+10];
2059dbdd7285SBarry Smith     while (n-->0) {
2060dbdd7285SBarry Smith       y[11*(*idx)]    += alpha1*(*v);
2061dbdd7285SBarry Smith       y[11*(*idx)+1]  += alpha2*(*v);
2062dbdd7285SBarry Smith       y[11*(*idx)+2]  += alpha3*(*v);
2063dbdd7285SBarry Smith       y[11*(*idx)+3]  += alpha4*(*v);
2064dbdd7285SBarry Smith       y[11*(*idx)+4]  += alpha5*(*v);
2065dbdd7285SBarry Smith       y[11*(*idx)+5]  += alpha6*(*v);
2066dbdd7285SBarry Smith       y[11*(*idx)+6]  += alpha7*(*v);
2067dbdd7285SBarry Smith       y[11*(*idx)+7]  += alpha8*(*v);
2068dbdd7285SBarry Smith       y[11*(*idx)+8]  += alpha9*(*v);
2069dbdd7285SBarry Smith       y[11*(*idx)+9]  += alpha10*(*v);
2070dbdd7285SBarry Smith       y[11*(*idx)+10] += alpha11*(*v);
2071dbdd7285SBarry Smith       idx++; v++;
2072dbdd7285SBarry Smith     }
2073dbdd7285SBarry Smith   }
2074dbdd7285SBarry Smith   ierr = PetscLogFlops(22*a->nz);CHKERRQ(ierr);
20753649974fSBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
2076dbdd7285SBarry Smith   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
2077dbdd7285SBarry Smith   PetscFunctionReturn(0);
2078dbdd7285SBarry Smith }
2079dbdd7285SBarry Smith 
2080dbdd7285SBarry Smith 
2081dbdd7285SBarry Smith /*--------------------------------------------------------------------------------------------*/
2082dfbe8321SBarry Smith PetscErrorCode MatMult_SeqMAIJ_16(Mat A,Vec xx,Vec yy)
20832f7816d4SBarry Smith {
20842f7816d4SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
20852f7816d4SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
2086d2840e09SBarry Smith   const PetscScalar *x,*v;
2087d2840e09SBarry Smith   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8;
20882f7816d4SBarry Smith   PetscScalar       sum9, sum10, sum11, sum12, sum13, sum14, sum15, sum16;
2089dfbe8321SBarry Smith   PetscErrorCode    ierr;
2090d2840e09SBarry Smith   const PetscInt    m         = b->AIJ->rmap->n,*idx,*ii;
2091d2840e09SBarry Smith   PetscInt          nonzerorow=0,n,i,jrow,j;
20922f7816d4SBarry Smith 
20932f7816d4SBarry Smith   PetscFunctionBegin;
20943649974fSBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
20951ebc52fbSHong Zhang   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
20962f7816d4SBarry Smith   idx  = a->j;
20972f7816d4SBarry Smith   v    = a->a;
20982f7816d4SBarry Smith   ii   = a->i;
20992f7816d4SBarry Smith 
21002f7816d4SBarry Smith   for (i=0; i<m; i++) {
21012f7816d4SBarry Smith     jrow  = ii[i];
21022f7816d4SBarry Smith     n     = ii[i+1] - jrow;
21032f7816d4SBarry Smith     sum1  = 0.0;
21042f7816d4SBarry Smith     sum2  = 0.0;
21052f7816d4SBarry Smith     sum3  = 0.0;
21062f7816d4SBarry Smith     sum4  = 0.0;
21072f7816d4SBarry Smith     sum5  = 0.0;
21082f7816d4SBarry Smith     sum6  = 0.0;
21092f7816d4SBarry Smith     sum7  = 0.0;
21102f7816d4SBarry Smith     sum8  = 0.0;
21112f7816d4SBarry Smith     sum9  = 0.0;
21122f7816d4SBarry Smith     sum10 = 0.0;
21132f7816d4SBarry Smith     sum11 = 0.0;
21142f7816d4SBarry Smith     sum12 = 0.0;
21152f7816d4SBarry Smith     sum13 = 0.0;
21162f7816d4SBarry Smith     sum14 = 0.0;
21172f7816d4SBarry Smith     sum15 = 0.0;
21182f7816d4SBarry Smith     sum16 = 0.0;
211926fbe8dcSKarl Rupp 
2120b3c51c66SMatthew Knepley     nonzerorow += (n>0);
21212f7816d4SBarry Smith     for (j=0; j<n; j++) {
21222f7816d4SBarry Smith       sum1  += v[jrow]*x[16*idx[jrow]];
21232f7816d4SBarry Smith       sum2  += v[jrow]*x[16*idx[jrow]+1];
21242f7816d4SBarry Smith       sum3  += v[jrow]*x[16*idx[jrow]+2];
21252f7816d4SBarry Smith       sum4  += v[jrow]*x[16*idx[jrow]+3];
21262f7816d4SBarry Smith       sum5  += v[jrow]*x[16*idx[jrow]+4];
21272f7816d4SBarry Smith       sum6  += v[jrow]*x[16*idx[jrow]+5];
21282f7816d4SBarry Smith       sum7  += v[jrow]*x[16*idx[jrow]+6];
21292f7816d4SBarry Smith       sum8  += v[jrow]*x[16*idx[jrow]+7];
21302f7816d4SBarry Smith       sum9  += v[jrow]*x[16*idx[jrow]+8];
21312f7816d4SBarry Smith       sum10 += v[jrow]*x[16*idx[jrow]+9];
21322f7816d4SBarry Smith       sum11 += v[jrow]*x[16*idx[jrow]+10];
21332f7816d4SBarry Smith       sum12 += v[jrow]*x[16*idx[jrow]+11];
21342f7816d4SBarry Smith       sum13 += v[jrow]*x[16*idx[jrow]+12];
21352f7816d4SBarry Smith       sum14 += v[jrow]*x[16*idx[jrow]+13];
21362f7816d4SBarry Smith       sum15 += v[jrow]*x[16*idx[jrow]+14];
21372f7816d4SBarry Smith       sum16 += v[jrow]*x[16*idx[jrow]+15];
21382f7816d4SBarry Smith       jrow++;
21392f7816d4SBarry Smith     }
21402f7816d4SBarry Smith     y[16*i]    = sum1;
21412f7816d4SBarry Smith     y[16*i+1]  = sum2;
21422f7816d4SBarry Smith     y[16*i+2]  = sum3;
21432f7816d4SBarry Smith     y[16*i+3]  = sum4;
21442f7816d4SBarry Smith     y[16*i+4]  = sum5;
21452f7816d4SBarry Smith     y[16*i+5]  = sum6;
21462f7816d4SBarry Smith     y[16*i+6]  = sum7;
21472f7816d4SBarry Smith     y[16*i+7]  = sum8;
21482f7816d4SBarry Smith     y[16*i+8]  = sum9;
21492f7816d4SBarry Smith     y[16*i+9]  = sum10;
21502f7816d4SBarry Smith     y[16*i+10] = sum11;
21512f7816d4SBarry Smith     y[16*i+11] = sum12;
21522f7816d4SBarry Smith     y[16*i+12] = sum13;
21532f7816d4SBarry Smith     y[16*i+13] = sum14;
21542f7816d4SBarry Smith     y[16*i+14] = sum15;
21552f7816d4SBarry Smith     y[16*i+15] = sum16;
21562f7816d4SBarry Smith   }
21572f7816d4SBarry Smith 
2158dc0b31edSSatish Balay   ierr = PetscLogFlops(32.0*a->nz - 16.0*nonzerorow);CHKERRQ(ierr);
21593649974fSBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
21601ebc52fbSHong Zhang   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
21612f7816d4SBarry Smith   PetscFunctionReturn(0);
21622f7816d4SBarry Smith }
21632f7816d4SBarry Smith 
2164dfbe8321SBarry Smith PetscErrorCode MatMultTranspose_SeqMAIJ_16(Mat A,Vec xx,Vec yy)
21652f7816d4SBarry Smith {
21662f7816d4SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
21672f7816d4SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
2168d2840e09SBarry Smith   const PetscScalar *x,*v;
2169d2840e09SBarry Smith   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8;
21702f7816d4SBarry Smith   PetscScalar       alpha9,alpha10,alpha11,alpha12,alpha13,alpha14,alpha15,alpha16;
2171dfbe8321SBarry Smith   PetscErrorCode    ierr;
2172d2840e09SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx;
2173d2840e09SBarry Smith   PetscInt          n,i;
21742f7816d4SBarry Smith 
21752f7816d4SBarry Smith   PetscFunctionBegin;
2176d2840e09SBarry Smith   ierr = VecSet(yy,0.0);CHKERRQ(ierr);
21773649974fSBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
21781ebc52fbSHong Zhang   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
2179bfec09a0SHong Zhang 
21802f7816d4SBarry Smith   for (i=0; i<m; i++) {
2181bfec09a0SHong Zhang     idx     = a->j + a->i[i];
2182bfec09a0SHong Zhang     v       = a->a + a->i[i];
21832f7816d4SBarry Smith     n       = a->i[i+1] - a->i[i];
21842f7816d4SBarry Smith     alpha1  = x[16*i];
21852f7816d4SBarry Smith     alpha2  = x[16*i+1];
21862f7816d4SBarry Smith     alpha3  = x[16*i+2];
21872f7816d4SBarry Smith     alpha4  = x[16*i+3];
21882f7816d4SBarry Smith     alpha5  = x[16*i+4];
21892f7816d4SBarry Smith     alpha6  = x[16*i+5];
21902f7816d4SBarry Smith     alpha7  = x[16*i+6];
21912f7816d4SBarry Smith     alpha8  = x[16*i+7];
21922f7816d4SBarry Smith     alpha9  = x[16*i+8];
21932f7816d4SBarry Smith     alpha10 = x[16*i+9];
21942f7816d4SBarry Smith     alpha11 = x[16*i+10];
21952f7816d4SBarry Smith     alpha12 = x[16*i+11];
21962f7816d4SBarry Smith     alpha13 = x[16*i+12];
21972f7816d4SBarry Smith     alpha14 = x[16*i+13];
21982f7816d4SBarry Smith     alpha15 = x[16*i+14];
21992f7816d4SBarry Smith     alpha16 = x[16*i+15];
22002f7816d4SBarry Smith     while (n-->0) {
22012f7816d4SBarry Smith       y[16*(*idx)]    += alpha1*(*v);
22022f7816d4SBarry Smith       y[16*(*idx)+1]  += alpha2*(*v);
22032f7816d4SBarry Smith       y[16*(*idx)+2]  += alpha3*(*v);
22042f7816d4SBarry Smith       y[16*(*idx)+3]  += alpha4*(*v);
22052f7816d4SBarry Smith       y[16*(*idx)+4]  += alpha5*(*v);
22062f7816d4SBarry Smith       y[16*(*idx)+5]  += alpha6*(*v);
22072f7816d4SBarry Smith       y[16*(*idx)+6]  += alpha7*(*v);
22082f7816d4SBarry Smith       y[16*(*idx)+7]  += alpha8*(*v);
22092f7816d4SBarry Smith       y[16*(*idx)+8]  += alpha9*(*v);
22102f7816d4SBarry Smith       y[16*(*idx)+9]  += alpha10*(*v);
22112f7816d4SBarry Smith       y[16*(*idx)+10] += alpha11*(*v);
22122f7816d4SBarry Smith       y[16*(*idx)+11] += alpha12*(*v);
22132f7816d4SBarry Smith       y[16*(*idx)+12] += alpha13*(*v);
22142f7816d4SBarry Smith       y[16*(*idx)+13] += alpha14*(*v);
22152f7816d4SBarry Smith       y[16*(*idx)+14] += alpha15*(*v);
22162f7816d4SBarry Smith       y[16*(*idx)+15] += alpha16*(*v);
22172f7816d4SBarry Smith       idx++; v++;
22182f7816d4SBarry Smith     }
22192f7816d4SBarry Smith   }
2220dc0b31edSSatish Balay   ierr = PetscLogFlops(32.0*a->nz);CHKERRQ(ierr);
22213649974fSBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
22221ebc52fbSHong Zhang   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
22232f7816d4SBarry Smith   PetscFunctionReturn(0);
22242f7816d4SBarry Smith }
22252f7816d4SBarry Smith 
2226dfbe8321SBarry Smith PetscErrorCode MatMultAdd_SeqMAIJ_16(Mat A,Vec xx,Vec yy,Vec zz)
22272f7816d4SBarry Smith {
22282f7816d4SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
22292f7816d4SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
2230d2840e09SBarry Smith   const PetscScalar *x,*v;
2231d2840e09SBarry Smith   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8;
22322f7816d4SBarry Smith   PetscScalar       sum9, sum10, sum11, sum12, sum13, sum14, sum15, sum16;
2233dfbe8321SBarry Smith   PetscErrorCode    ierr;
2234d2840e09SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
2235b24ad042SBarry Smith   PetscInt          n,i,jrow,j;
22362f7816d4SBarry Smith 
22372f7816d4SBarry Smith   PetscFunctionBegin;
22382f7816d4SBarry Smith   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
22393649974fSBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
22401ebc52fbSHong Zhang   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
22412f7816d4SBarry Smith   idx  = a->j;
22422f7816d4SBarry Smith   v    = a->a;
22432f7816d4SBarry Smith   ii   = a->i;
22442f7816d4SBarry Smith 
22452f7816d4SBarry Smith   for (i=0; i<m; i++) {
22462f7816d4SBarry Smith     jrow  = ii[i];
22472f7816d4SBarry Smith     n     = ii[i+1] - jrow;
22482f7816d4SBarry Smith     sum1  = 0.0;
22492f7816d4SBarry Smith     sum2  = 0.0;
22502f7816d4SBarry Smith     sum3  = 0.0;
22512f7816d4SBarry Smith     sum4  = 0.0;
22522f7816d4SBarry Smith     sum5  = 0.0;
22532f7816d4SBarry Smith     sum6  = 0.0;
22542f7816d4SBarry Smith     sum7  = 0.0;
22552f7816d4SBarry Smith     sum8  = 0.0;
22562f7816d4SBarry Smith     sum9  = 0.0;
22572f7816d4SBarry Smith     sum10 = 0.0;
22582f7816d4SBarry Smith     sum11 = 0.0;
22592f7816d4SBarry Smith     sum12 = 0.0;
22602f7816d4SBarry Smith     sum13 = 0.0;
22612f7816d4SBarry Smith     sum14 = 0.0;
22622f7816d4SBarry Smith     sum15 = 0.0;
22632f7816d4SBarry Smith     sum16 = 0.0;
22642f7816d4SBarry Smith     for (j=0; j<n; j++) {
22652f7816d4SBarry Smith       sum1  += v[jrow]*x[16*idx[jrow]];
22662f7816d4SBarry Smith       sum2  += v[jrow]*x[16*idx[jrow]+1];
22672f7816d4SBarry Smith       sum3  += v[jrow]*x[16*idx[jrow]+2];
22682f7816d4SBarry Smith       sum4  += v[jrow]*x[16*idx[jrow]+3];
22692f7816d4SBarry Smith       sum5  += v[jrow]*x[16*idx[jrow]+4];
22702f7816d4SBarry Smith       sum6  += v[jrow]*x[16*idx[jrow]+5];
22712f7816d4SBarry Smith       sum7  += v[jrow]*x[16*idx[jrow]+6];
22722f7816d4SBarry Smith       sum8  += v[jrow]*x[16*idx[jrow]+7];
22732f7816d4SBarry Smith       sum9  += v[jrow]*x[16*idx[jrow]+8];
22742f7816d4SBarry Smith       sum10 += v[jrow]*x[16*idx[jrow]+9];
22752f7816d4SBarry Smith       sum11 += v[jrow]*x[16*idx[jrow]+10];
22762f7816d4SBarry Smith       sum12 += v[jrow]*x[16*idx[jrow]+11];
22772f7816d4SBarry Smith       sum13 += v[jrow]*x[16*idx[jrow]+12];
22782f7816d4SBarry Smith       sum14 += v[jrow]*x[16*idx[jrow]+13];
22792f7816d4SBarry Smith       sum15 += v[jrow]*x[16*idx[jrow]+14];
22802f7816d4SBarry Smith       sum16 += v[jrow]*x[16*idx[jrow]+15];
22812f7816d4SBarry Smith       jrow++;
22822f7816d4SBarry Smith     }
22832f7816d4SBarry Smith     y[16*i]    += sum1;
22842f7816d4SBarry Smith     y[16*i+1]  += sum2;
22852f7816d4SBarry Smith     y[16*i+2]  += sum3;
22862f7816d4SBarry Smith     y[16*i+3]  += sum4;
22872f7816d4SBarry Smith     y[16*i+4]  += sum5;
22882f7816d4SBarry Smith     y[16*i+5]  += sum6;
22892f7816d4SBarry Smith     y[16*i+6]  += sum7;
22902f7816d4SBarry Smith     y[16*i+7]  += sum8;
22912f7816d4SBarry Smith     y[16*i+8]  += sum9;
22922f7816d4SBarry Smith     y[16*i+9]  += sum10;
22932f7816d4SBarry Smith     y[16*i+10] += sum11;
22942f7816d4SBarry Smith     y[16*i+11] += sum12;
22952f7816d4SBarry Smith     y[16*i+12] += sum13;
22962f7816d4SBarry Smith     y[16*i+13] += sum14;
22972f7816d4SBarry Smith     y[16*i+14] += sum15;
22982f7816d4SBarry Smith     y[16*i+15] += sum16;
22992f7816d4SBarry Smith   }
23002f7816d4SBarry Smith 
2301dc0b31edSSatish Balay   ierr = PetscLogFlops(32.0*a->nz);CHKERRQ(ierr);
23023649974fSBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
23031ebc52fbSHong Zhang   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
23042f7816d4SBarry Smith   PetscFunctionReturn(0);
23052f7816d4SBarry Smith }
23062f7816d4SBarry Smith 
2307dfbe8321SBarry Smith PetscErrorCode MatMultTransposeAdd_SeqMAIJ_16(Mat A,Vec xx,Vec yy,Vec zz)
23082f7816d4SBarry Smith {
23092f7816d4SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
23102f7816d4SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
2311d2840e09SBarry Smith   const PetscScalar *x,*v;
2312d2840e09SBarry Smith   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8;
23132f7816d4SBarry Smith   PetscScalar       alpha9,alpha10,alpha11,alpha12,alpha13,alpha14,alpha15,alpha16;
2314dfbe8321SBarry Smith   PetscErrorCode    ierr;
2315d2840e09SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx;
2316d2840e09SBarry Smith   PetscInt          n,i;
23172f7816d4SBarry Smith 
23182f7816d4SBarry Smith   PetscFunctionBegin;
23192f7816d4SBarry Smith   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
23203649974fSBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
23211ebc52fbSHong Zhang   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
23222f7816d4SBarry Smith   for (i=0; i<m; i++) {
2323bfec09a0SHong Zhang     idx     = a->j + a->i[i];
2324bfec09a0SHong Zhang     v       = a->a + a->i[i];
23252f7816d4SBarry Smith     n       = a->i[i+1] - a->i[i];
23262f7816d4SBarry Smith     alpha1  = x[16*i];
23272f7816d4SBarry Smith     alpha2  = x[16*i+1];
23282f7816d4SBarry Smith     alpha3  = x[16*i+2];
23292f7816d4SBarry Smith     alpha4  = x[16*i+3];
23302f7816d4SBarry Smith     alpha5  = x[16*i+4];
23312f7816d4SBarry Smith     alpha6  = x[16*i+5];
23322f7816d4SBarry Smith     alpha7  = x[16*i+6];
23332f7816d4SBarry Smith     alpha8  = x[16*i+7];
23342f7816d4SBarry Smith     alpha9  = x[16*i+8];
23352f7816d4SBarry Smith     alpha10 = x[16*i+9];
23362f7816d4SBarry Smith     alpha11 = x[16*i+10];
23372f7816d4SBarry Smith     alpha12 = x[16*i+11];
23382f7816d4SBarry Smith     alpha13 = x[16*i+12];
23392f7816d4SBarry Smith     alpha14 = x[16*i+13];
23402f7816d4SBarry Smith     alpha15 = x[16*i+14];
23412f7816d4SBarry Smith     alpha16 = x[16*i+15];
23422f7816d4SBarry Smith     while (n-->0) {
23432f7816d4SBarry Smith       y[16*(*idx)]    += alpha1*(*v);
23442f7816d4SBarry Smith       y[16*(*idx)+1]  += alpha2*(*v);
23452f7816d4SBarry Smith       y[16*(*idx)+2]  += alpha3*(*v);
23462f7816d4SBarry Smith       y[16*(*idx)+3]  += alpha4*(*v);
23472f7816d4SBarry Smith       y[16*(*idx)+4]  += alpha5*(*v);
23482f7816d4SBarry Smith       y[16*(*idx)+5]  += alpha6*(*v);
23492f7816d4SBarry Smith       y[16*(*idx)+6]  += alpha7*(*v);
23502f7816d4SBarry Smith       y[16*(*idx)+7]  += alpha8*(*v);
23512f7816d4SBarry Smith       y[16*(*idx)+8]  += alpha9*(*v);
23522f7816d4SBarry Smith       y[16*(*idx)+9]  += alpha10*(*v);
23532f7816d4SBarry Smith       y[16*(*idx)+10] += alpha11*(*v);
23542f7816d4SBarry Smith       y[16*(*idx)+11] += alpha12*(*v);
23552f7816d4SBarry Smith       y[16*(*idx)+12] += alpha13*(*v);
23562f7816d4SBarry Smith       y[16*(*idx)+13] += alpha14*(*v);
23572f7816d4SBarry Smith       y[16*(*idx)+14] += alpha15*(*v);
23582f7816d4SBarry Smith       y[16*(*idx)+15] += alpha16*(*v);
23592f7816d4SBarry Smith       idx++; v++;
23602f7816d4SBarry Smith     }
23612f7816d4SBarry Smith   }
2362dc0b31edSSatish Balay   ierr = PetscLogFlops(32.0*a->nz);CHKERRQ(ierr);
23633649974fSBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
23641ebc52fbSHong Zhang   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
236566ed3db0SBarry Smith   PetscFunctionReturn(0);
236666ed3db0SBarry Smith }
236766ed3db0SBarry Smith 
2368ed1c418cSBarry Smith /*--------------------------------------------------------------------------------------------*/
2369ed1c418cSBarry Smith PetscErrorCode MatMult_SeqMAIJ_18(Mat A,Vec xx,Vec yy)
2370ed1c418cSBarry Smith {
2371ed1c418cSBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
2372ed1c418cSBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
2373d2840e09SBarry Smith   const PetscScalar *x,*v;
2374d2840e09SBarry Smith   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8;
2375ed1c418cSBarry Smith   PetscScalar       sum9, sum10, sum11, sum12, sum13, sum14, sum15, sum16, sum17, sum18;
2376ed1c418cSBarry Smith   PetscErrorCode    ierr;
2377d2840e09SBarry Smith   const PetscInt    m         = b->AIJ->rmap->n,*idx,*ii;
2378d2840e09SBarry Smith   PetscInt          nonzerorow=0,n,i,jrow,j;
2379ed1c418cSBarry Smith 
2380ed1c418cSBarry Smith   PetscFunctionBegin;
23813649974fSBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
2382ed1c418cSBarry Smith   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
2383ed1c418cSBarry Smith   idx  = a->j;
2384ed1c418cSBarry Smith   v    = a->a;
2385ed1c418cSBarry Smith   ii   = a->i;
2386ed1c418cSBarry Smith 
2387ed1c418cSBarry Smith   for (i=0; i<m; i++) {
2388ed1c418cSBarry Smith     jrow  = ii[i];
2389ed1c418cSBarry Smith     n     = ii[i+1] - jrow;
2390ed1c418cSBarry Smith     sum1  = 0.0;
2391ed1c418cSBarry Smith     sum2  = 0.0;
2392ed1c418cSBarry Smith     sum3  = 0.0;
2393ed1c418cSBarry Smith     sum4  = 0.0;
2394ed1c418cSBarry Smith     sum5  = 0.0;
2395ed1c418cSBarry Smith     sum6  = 0.0;
2396ed1c418cSBarry Smith     sum7  = 0.0;
2397ed1c418cSBarry Smith     sum8  = 0.0;
2398ed1c418cSBarry Smith     sum9  = 0.0;
2399ed1c418cSBarry Smith     sum10 = 0.0;
2400ed1c418cSBarry Smith     sum11 = 0.0;
2401ed1c418cSBarry Smith     sum12 = 0.0;
2402ed1c418cSBarry Smith     sum13 = 0.0;
2403ed1c418cSBarry Smith     sum14 = 0.0;
2404ed1c418cSBarry Smith     sum15 = 0.0;
2405ed1c418cSBarry Smith     sum16 = 0.0;
2406ed1c418cSBarry Smith     sum17 = 0.0;
2407ed1c418cSBarry Smith     sum18 = 0.0;
240826fbe8dcSKarl Rupp 
2409ed1c418cSBarry Smith     nonzerorow += (n>0);
2410ed1c418cSBarry Smith     for (j=0; j<n; j++) {
2411ed1c418cSBarry Smith       sum1  += v[jrow]*x[18*idx[jrow]];
2412ed1c418cSBarry Smith       sum2  += v[jrow]*x[18*idx[jrow]+1];
2413ed1c418cSBarry Smith       sum3  += v[jrow]*x[18*idx[jrow]+2];
2414ed1c418cSBarry Smith       sum4  += v[jrow]*x[18*idx[jrow]+3];
2415ed1c418cSBarry Smith       sum5  += v[jrow]*x[18*idx[jrow]+4];
2416ed1c418cSBarry Smith       sum6  += v[jrow]*x[18*idx[jrow]+5];
2417ed1c418cSBarry Smith       sum7  += v[jrow]*x[18*idx[jrow]+6];
2418ed1c418cSBarry Smith       sum8  += v[jrow]*x[18*idx[jrow]+7];
2419ed1c418cSBarry Smith       sum9  += v[jrow]*x[18*idx[jrow]+8];
2420ed1c418cSBarry Smith       sum10 += v[jrow]*x[18*idx[jrow]+9];
2421ed1c418cSBarry Smith       sum11 += v[jrow]*x[18*idx[jrow]+10];
2422ed1c418cSBarry Smith       sum12 += v[jrow]*x[18*idx[jrow]+11];
2423ed1c418cSBarry Smith       sum13 += v[jrow]*x[18*idx[jrow]+12];
2424ed1c418cSBarry Smith       sum14 += v[jrow]*x[18*idx[jrow]+13];
2425ed1c418cSBarry Smith       sum15 += v[jrow]*x[18*idx[jrow]+14];
2426ed1c418cSBarry Smith       sum16 += v[jrow]*x[18*idx[jrow]+15];
2427ed1c418cSBarry Smith       sum17 += v[jrow]*x[18*idx[jrow]+16];
2428ed1c418cSBarry Smith       sum18 += v[jrow]*x[18*idx[jrow]+17];
2429ed1c418cSBarry Smith       jrow++;
2430ed1c418cSBarry Smith     }
2431ed1c418cSBarry Smith     y[18*i]    = sum1;
2432ed1c418cSBarry Smith     y[18*i+1]  = sum2;
2433ed1c418cSBarry Smith     y[18*i+2]  = sum3;
2434ed1c418cSBarry Smith     y[18*i+3]  = sum4;
2435ed1c418cSBarry Smith     y[18*i+4]  = sum5;
2436ed1c418cSBarry Smith     y[18*i+5]  = sum6;
2437ed1c418cSBarry Smith     y[18*i+6]  = sum7;
2438ed1c418cSBarry Smith     y[18*i+7]  = sum8;
2439ed1c418cSBarry Smith     y[18*i+8]  = sum9;
2440ed1c418cSBarry Smith     y[18*i+9]  = sum10;
2441ed1c418cSBarry Smith     y[18*i+10] = sum11;
2442ed1c418cSBarry Smith     y[18*i+11] = sum12;
2443ed1c418cSBarry Smith     y[18*i+12] = sum13;
2444ed1c418cSBarry Smith     y[18*i+13] = sum14;
2445ed1c418cSBarry Smith     y[18*i+14] = sum15;
2446ed1c418cSBarry Smith     y[18*i+15] = sum16;
2447ed1c418cSBarry Smith     y[18*i+16] = sum17;
2448ed1c418cSBarry Smith     y[18*i+17] = sum18;
2449ed1c418cSBarry Smith   }
2450ed1c418cSBarry Smith 
2451dc0b31edSSatish Balay   ierr = PetscLogFlops(36.0*a->nz - 18.0*nonzerorow);CHKERRQ(ierr);
24523649974fSBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
2453ed1c418cSBarry Smith   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
2454ed1c418cSBarry Smith   PetscFunctionReturn(0);
2455ed1c418cSBarry Smith }
2456ed1c418cSBarry Smith 
2457ed1c418cSBarry Smith PetscErrorCode MatMultTranspose_SeqMAIJ_18(Mat A,Vec xx,Vec yy)
2458ed1c418cSBarry Smith {
2459ed1c418cSBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
2460ed1c418cSBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
2461d2840e09SBarry Smith   const PetscScalar *x,*v;
2462d2840e09SBarry Smith   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8;
2463ed1c418cSBarry Smith   PetscScalar       alpha9,alpha10,alpha11,alpha12,alpha13,alpha14,alpha15,alpha16,alpha17,alpha18;
2464ed1c418cSBarry Smith   PetscErrorCode    ierr;
2465d2840e09SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx;
2466d2840e09SBarry Smith   PetscInt          n,i;
2467ed1c418cSBarry Smith 
2468ed1c418cSBarry Smith   PetscFunctionBegin;
2469d2840e09SBarry Smith   ierr = VecSet(yy,0.0);CHKERRQ(ierr);
24703649974fSBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
2471ed1c418cSBarry Smith   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
2472ed1c418cSBarry Smith 
2473ed1c418cSBarry Smith   for (i=0; i<m; i++) {
2474ed1c418cSBarry Smith     idx     = a->j + a->i[i];
2475ed1c418cSBarry Smith     v       = a->a + a->i[i];
2476ed1c418cSBarry Smith     n       = a->i[i+1] - a->i[i];
2477ed1c418cSBarry Smith     alpha1  = x[18*i];
2478ed1c418cSBarry Smith     alpha2  = x[18*i+1];
2479ed1c418cSBarry Smith     alpha3  = x[18*i+2];
2480ed1c418cSBarry Smith     alpha4  = x[18*i+3];
2481ed1c418cSBarry Smith     alpha5  = x[18*i+4];
2482ed1c418cSBarry Smith     alpha6  = x[18*i+5];
2483ed1c418cSBarry Smith     alpha7  = x[18*i+6];
2484ed1c418cSBarry Smith     alpha8  = x[18*i+7];
2485ed1c418cSBarry Smith     alpha9  = x[18*i+8];
2486ed1c418cSBarry Smith     alpha10 = x[18*i+9];
2487ed1c418cSBarry Smith     alpha11 = x[18*i+10];
2488ed1c418cSBarry Smith     alpha12 = x[18*i+11];
2489ed1c418cSBarry Smith     alpha13 = x[18*i+12];
2490ed1c418cSBarry Smith     alpha14 = x[18*i+13];
2491ed1c418cSBarry Smith     alpha15 = x[18*i+14];
2492ed1c418cSBarry Smith     alpha16 = x[18*i+15];
2493ed1c418cSBarry Smith     alpha17 = x[18*i+16];
2494ed1c418cSBarry Smith     alpha18 = x[18*i+17];
2495ed1c418cSBarry Smith     while (n-->0) {
2496ed1c418cSBarry Smith       y[18*(*idx)]    += alpha1*(*v);
2497ed1c418cSBarry Smith       y[18*(*idx)+1]  += alpha2*(*v);
2498ed1c418cSBarry Smith       y[18*(*idx)+2]  += alpha3*(*v);
2499ed1c418cSBarry Smith       y[18*(*idx)+3]  += alpha4*(*v);
2500ed1c418cSBarry Smith       y[18*(*idx)+4]  += alpha5*(*v);
2501ed1c418cSBarry Smith       y[18*(*idx)+5]  += alpha6*(*v);
2502ed1c418cSBarry Smith       y[18*(*idx)+6]  += alpha7*(*v);
2503ed1c418cSBarry Smith       y[18*(*idx)+7]  += alpha8*(*v);
2504ed1c418cSBarry Smith       y[18*(*idx)+8]  += alpha9*(*v);
2505ed1c418cSBarry Smith       y[18*(*idx)+9]  += alpha10*(*v);
2506ed1c418cSBarry Smith       y[18*(*idx)+10] += alpha11*(*v);
2507ed1c418cSBarry Smith       y[18*(*idx)+11] += alpha12*(*v);
2508ed1c418cSBarry Smith       y[18*(*idx)+12] += alpha13*(*v);
2509ed1c418cSBarry Smith       y[18*(*idx)+13] += alpha14*(*v);
2510ed1c418cSBarry Smith       y[18*(*idx)+14] += alpha15*(*v);
2511ed1c418cSBarry Smith       y[18*(*idx)+15] += alpha16*(*v);
2512ed1c418cSBarry Smith       y[18*(*idx)+16] += alpha17*(*v);
2513ed1c418cSBarry Smith       y[18*(*idx)+17] += alpha18*(*v);
2514ed1c418cSBarry Smith       idx++; v++;
2515ed1c418cSBarry Smith     }
2516ed1c418cSBarry Smith   }
2517dc0b31edSSatish Balay   ierr = PetscLogFlops(36.0*a->nz);CHKERRQ(ierr);
25183649974fSBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
2519ed1c418cSBarry Smith   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
2520ed1c418cSBarry Smith   PetscFunctionReturn(0);
2521ed1c418cSBarry Smith }
2522ed1c418cSBarry Smith 
2523ed1c418cSBarry Smith PetscErrorCode MatMultAdd_SeqMAIJ_18(Mat A,Vec xx,Vec yy,Vec zz)
2524ed1c418cSBarry Smith {
2525ed1c418cSBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
2526ed1c418cSBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
2527d2840e09SBarry Smith   const PetscScalar *x,*v;
2528d2840e09SBarry Smith   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8;
2529ed1c418cSBarry Smith   PetscScalar       sum9, sum10, sum11, sum12, sum13, sum14, sum15, sum16, sum17, sum18;
2530ed1c418cSBarry Smith   PetscErrorCode    ierr;
2531d2840e09SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
2532ed1c418cSBarry Smith   PetscInt          n,i,jrow,j;
2533ed1c418cSBarry Smith 
2534ed1c418cSBarry Smith   PetscFunctionBegin;
2535ed1c418cSBarry Smith   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
25363649974fSBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
2537ed1c418cSBarry Smith   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
2538ed1c418cSBarry Smith   idx  = a->j;
2539ed1c418cSBarry Smith   v    = a->a;
2540ed1c418cSBarry Smith   ii   = a->i;
2541ed1c418cSBarry Smith 
2542ed1c418cSBarry Smith   for (i=0; i<m; i++) {
2543ed1c418cSBarry Smith     jrow  = ii[i];
2544ed1c418cSBarry Smith     n     = ii[i+1] - jrow;
2545ed1c418cSBarry Smith     sum1  = 0.0;
2546ed1c418cSBarry Smith     sum2  = 0.0;
2547ed1c418cSBarry Smith     sum3  = 0.0;
2548ed1c418cSBarry Smith     sum4  = 0.0;
2549ed1c418cSBarry Smith     sum5  = 0.0;
2550ed1c418cSBarry Smith     sum6  = 0.0;
2551ed1c418cSBarry Smith     sum7  = 0.0;
2552ed1c418cSBarry Smith     sum8  = 0.0;
2553ed1c418cSBarry Smith     sum9  = 0.0;
2554ed1c418cSBarry Smith     sum10 = 0.0;
2555ed1c418cSBarry Smith     sum11 = 0.0;
2556ed1c418cSBarry Smith     sum12 = 0.0;
2557ed1c418cSBarry Smith     sum13 = 0.0;
2558ed1c418cSBarry Smith     sum14 = 0.0;
2559ed1c418cSBarry Smith     sum15 = 0.0;
2560ed1c418cSBarry Smith     sum16 = 0.0;
2561ed1c418cSBarry Smith     sum17 = 0.0;
2562ed1c418cSBarry Smith     sum18 = 0.0;
2563ed1c418cSBarry Smith     for (j=0; j<n; j++) {
2564ed1c418cSBarry Smith       sum1  += v[jrow]*x[18*idx[jrow]];
2565ed1c418cSBarry Smith       sum2  += v[jrow]*x[18*idx[jrow]+1];
2566ed1c418cSBarry Smith       sum3  += v[jrow]*x[18*idx[jrow]+2];
2567ed1c418cSBarry Smith       sum4  += v[jrow]*x[18*idx[jrow]+3];
2568ed1c418cSBarry Smith       sum5  += v[jrow]*x[18*idx[jrow]+4];
2569ed1c418cSBarry Smith       sum6  += v[jrow]*x[18*idx[jrow]+5];
2570ed1c418cSBarry Smith       sum7  += v[jrow]*x[18*idx[jrow]+6];
2571ed1c418cSBarry Smith       sum8  += v[jrow]*x[18*idx[jrow]+7];
2572ed1c418cSBarry Smith       sum9  += v[jrow]*x[18*idx[jrow]+8];
2573ed1c418cSBarry Smith       sum10 += v[jrow]*x[18*idx[jrow]+9];
2574ed1c418cSBarry Smith       sum11 += v[jrow]*x[18*idx[jrow]+10];
2575ed1c418cSBarry Smith       sum12 += v[jrow]*x[18*idx[jrow]+11];
2576ed1c418cSBarry Smith       sum13 += v[jrow]*x[18*idx[jrow]+12];
2577ed1c418cSBarry Smith       sum14 += v[jrow]*x[18*idx[jrow]+13];
2578ed1c418cSBarry Smith       sum15 += v[jrow]*x[18*idx[jrow]+14];
2579ed1c418cSBarry Smith       sum16 += v[jrow]*x[18*idx[jrow]+15];
2580ed1c418cSBarry Smith       sum17 += v[jrow]*x[18*idx[jrow]+16];
2581ed1c418cSBarry Smith       sum18 += v[jrow]*x[18*idx[jrow]+17];
2582ed1c418cSBarry Smith       jrow++;
2583ed1c418cSBarry Smith     }
2584ed1c418cSBarry Smith     y[18*i]    += sum1;
2585ed1c418cSBarry Smith     y[18*i+1]  += sum2;
2586ed1c418cSBarry Smith     y[18*i+2]  += sum3;
2587ed1c418cSBarry Smith     y[18*i+3]  += sum4;
2588ed1c418cSBarry Smith     y[18*i+4]  += sum5;
2589ed1c418cSBarry Smith     y[18*i+5]  += sum6;
2590ed1c418cSBarry Smith     y[18*i+6]  += sum7;
2591ed1c418cSBarry Smith     y[18*i+7]  += sum8;
2592ed1c418cSBarry Smith     y[18*i+8]  += sum9;
2593ed1c418cSBarry Smith     y[18*i+9]  += sum10;
2594ed1c418cSBarry Smith     y[18*i+10] += sum11;
2595ed1c418cSBarry Smith     y[18*i+11] += sum12;
2596ed1c418cSBarry Smith     y[18*i+12] += sum13;
2597ed1c418cSBarry Smith     y[18*i+13] += sum14;
2598ed1c418cSBarry Smith     y[18*i+14] += sum15;
2599ed1c418cSBarry Smith     y[18*i+15] += sum16;
2600ed1c418cSBarry Smith     y[18*i+16] += sum17;
2601ed1c418cSBarry Smith     y[18*i+17] += sum18;
2602ed1c418cSBarry Smith   }
2603ed1c418cSBarry Smith 
2604dc0b31edSSatish Balay   ierr = PetscLogFlops(36.0*a->nz);CHKERRQ(ierr);
26053649974fSBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
2606ed1c418cSBarry Smith   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
2607ed1c418cSBarry Smith   PetscFunctionReturn(0);
2608ed1c418cSBarry Smith }
2609ed1c418cSBarry Smith 
2610ed1c418cSBarry Smith PetscErrorCode MatMultTransposeAdd_SeqMAIJ_18(Mat A,Vec xx,Vec yy,Vec zz)
2611ed1c418cSBarry Smith {
2612ed1c418cSBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
2613ed1c418cSBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
2614d2840e09SBarry Smith   const PetscScalar *x,*v;
2615d2840e09SBarry Smith   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8;
2616ed1c418cSBarry Smith   PetscScalar       alpha9,alpha10,alpha11,alpha12,alpha13,alpha14,alpha15,alpha16,alpha17,alpha18;
2617ed1c418cSBarry Smith   PetscErrorCode    ierr;
2618d2840e09SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx;
2619d2840e09SBarry Smith   PetscInt          n,i;
2620ed1c418cSBarry Smith 
2621ed1c418cSBarry Smith   PetscFunctionBegin;
2622ed1c418cSBarry Smith   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
26233649974fSBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
2624ed1c418cSBarry Smith   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
2625ed1c418cSBarry Smith   for (i=0; i<m; i++) {
2626ed1c418cSBarry Smith     idx     = a->j + a->i[i];
2627ed1c418cSBarry Smith     v       = a->a + a->i[i];
2628ed1c418cSBarry Smith     n       = a->i[i+1] - a->i[i];
2629ed1c418cSBarry Smith     alpha1  = x[18*i];
2630ed1c418cSBarry Smith     alpha2  = x[18*i+1];
2631ed1c418cSBarry Smith     alpha3  = x[18*i+2];
2632ed1c418cSBarry Smith     alpha4  = x[18*i+3];
2633ed1c418cSBarry Smith     alpha5  = x[18*i+4];
2634ed1c418cSBarry Smith     alpha6  = x[18*i+5];
2635ed1c418cSBarry Smith     alpha7  = x[18*i+6];
2636ed1c418cSBarry Smith     alpha8  = x[18*i+7];
2637ed1c418cSBarry Smith     alpha9  = x[18*i+8];
2638ed1c418cSBarry Smith     alpha10 = x[18*i+9];
2639ed1c418cSBarry Smith     alpha11 = x[18*i+10];
2640ed1c418cSBarry Smith     alpha12 = x[18*i+11];
2641ed1c418cSBarry Smith     alpha13 = x[18*i+12];
2642ed1c418cSBarry Smith     alpha14 = x[18*i+13];
2643ed1c418cSBarry Smith     alpha15 = x[18*i+14];
2644ed1c418cSBarry Smith     alpha16 = x[18*i+15];
2645ed1c418cSBarry Smith     alpha17 = x[18*i+16];
2646ed1c418cSBarry Smith     alpha18 = x[18*i+17];
2647ed1c418cSBarry Smith     while (n-->0) {
2648ed1c418cSBarry Smith       y[18*(*idx)]    += alpha1*(*v);
2649ed1c418cSBarry Smith       y[18*(*idx)+1]  += alpha2*(*v);
2650ed1c418cSBarry Smith       y[18*(*idx)+2]  += alpha3*(*v);
2651ed1c418cSBarry Smith       y[18*(*idx)+3]  += alpha4*(*v);
2652ed1c418cSBarry Smith       y[18*(*idx)+4]  += alpha5*(*v);
2653ed1c418cSBarry Smith       y[18*(*idx)+5]  += alpha6*(*v);
2654ed1c418cSBarry Smith       y[18*(*idx)+6]  += alpha7*(*v);
2655ed1c418cSBarry Smith       y[18*(*idx)+7]  += alpha8*(*v);
2656ed1c418cSBarry Smith       y[18*(*idx)+8]  += alpha9*(*v);
2657ed1c418cSBarry Smith       y[18*(*idx)+9]  += alpha10*(*v);
2658ed1c418cSBarry Smith       y[18*(*idx)+10] += alpha11*(*v);
2659ed1c418cSBarry Smith       y[18*(*idx)+11] += alpha12*(*v);
2660ed1c418cSBarry Smith       y[18*(*idx)+12] += alpha13*(*v);
2661ed1c418cSBarry Smith       y[18*(*idx)+13] += alpha14*(*v);
2662ed1c418cSBarry Smith       y[18*(*idx)+14] += alpha15*(*v);
2663ed1c418cSBarry Smith       y[18*(*idx)+15] += alpha16*(*v);
2664ed1c418cSBarry Smith       y[18*(*idx)+16] += alpha17*(*v);
2665ed1c418cSBarry Smith       y[18*(*idx)+17] += alpha18*(*v);
2666ed1c418cSBarry Smith       idx++; v++;
2667ed1c418cSBarry Smith     }
2668ed1c418cSBarry Smith   }
2669dc0b31edSSatish Balay   ierr = PetscLogFlops(36.0*a->nz);CHKERRQ(ierr);
26703649974fSBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
2671ed1c418cSBarry Smith   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
2672ed1c418cSBarry Smith   PetscFunctionReturn(0);
2673ed1c418cSBarry Smith }
2674ed1c418cSBarry Smith 
26756861f193SBarry Smith PetscErrorCode MatMult_SeqMAIJ_N(Mat A,Vec xx,Vec yy)
26766861f193SBarry Smith {
26776861f193SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
26786861f193SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
26796861f193SBarry Smith   const PetscScalar *x,*v;
26806861f193SBarry Smith   PetscScalar       *y,*sums;
26816861f193SBarry Smith   PetscErrorCode    ierr;
26826861f193SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
26836861f193SBarry Smith   PetscInt          n,i,jrow,j,dof = b->dof,k;
26846861f193SBarry Smith 
26856861f193SBarry Smith   PetscFunctionBegin;
26863649974fSBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
26876861f193SBarry Smith   ierr = VecSet(yy,0.0);CHKERRQ(ierr);
26886861f193SBarry Smith   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
26896861f193SBarry Smith   idx  = a->j;
26906861f193SBarry Smith   v    = a->a;
26916861f193SBarry Smith   ii   = a->i;
26926861f193SBarry Smith 
26936861f193SBarry Smith   for (i=0; i<m; i++) {
26946861f193SBarry Smith     jrow = ii[i];
26956861f193SBarry Smith     n    = ii[i+1] - jrow;
26966861f193SBarry Smith     sums = y + dof*i;
26976861f193SBarry Smith     for (j=0; j<n; j++) {
26986861f193SBarry Smith       for (k=0; k<dof; k++) {
26996861f193SBarry Smith         sums[k] += v[jrow]*x[dof*idx[jrow]+k];
27006861f193SBarry Smith       }
27016861f193SBarry Smith       jrow++;
27026861f193SBarry Smith     }
27036861f193SBarry Smith   }
27046861f193SBarry Smith 
27056861f193SBarry Smith   ierr = PetscLogFlops(2.0*dof*a->nz);CHKERRQ(ierr);
27063649974fSBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
27076861f193SBarry Smith   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
27086861f193SBarry Smith   PetscFunctionReturn(0);
27096861f193SBarry Smith }
27106861f193SBarry Smith 
27116861f193SBarry Smith PetscErrorCode MatMultAdd_SeqMAIJ_N(Mat A,Vec xx,Vec yy,Vec zz)
27126861f193SBarry Smith {
27136861f193SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
27146861f193SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
27156861f193SBarry Smith   const PetscScalar *x,*v;
27166861f193SBarry Smith   PetscScalar       *y,*sums;
27176861f193SBarry Smith   PetscErrorCode    ierr;
27186861f193SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
27196861f193SBarry Smith   PetscInt          n,i,jrow,j,dof = b->dof,k;
27206861f193SBarry Smith 
27216861f193SBarry Smith   PetscFunctionBegin;
27226861f193SBarry Smith   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
27233649974fSBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
27246861f193SBarry Smith   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
27256861f193SBarry Smith   idx  = a->j;
27266861f193SBarry Smith   v    = a->a;
27276861f193SBarry Smith   ii   = a->i;
27286861f193SBarry Smith 
27296861f193SBarry Smith   for (i=0; i<m; i++) {
27306861f193SBarry Smith     jrow = ii[i];
27316861f193SBarry Smith     n    = ii[i+1] - jrow;
27326861f193SBarry Smith     sums = y + dof*i;
27336861f193SBarry Smith     for (j=0; j<n; j++) {
27346861f193SBarry Smith       for (k=0; k<dof; k++) {
27356861f193SBarry Smith         sums[k] += v[jrow]*x[dof*idx[jrow]+k];
27366861f193SBarry Smith       }
27376861f193SBarry Smith       jrow++;
27386861f193SBarry Smith     }
27396861f193SBarry Smith   }
27406861f193SBarry Smith 
27416861f193SBarry Smith   ierr = PetscLogFlops(2.0*dof*a->nz);CHKERRQ(ierr);
27423649974fSBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
27436861f193SBarry Smith   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
27446861f193SBarry Smith   PetscFunctionReturn(0);
27456861f193SBarry Smith }
27466861f193SBarry Smith 
27476861f193SBarry Smith PetscErrorCode MatMultTranspose_SeqMAIJ_N(Mat A,Vec xx,Vec yy)
27486861f193SBarry Smith {
27496861f193SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
27506861f193SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
27516861f193SBarry Smith   const PetscScalar *x,*v,*alpha;
27526861f193SBarry Smith   PetscScalar       *y;
27536861f193SBarry Smith   PetscErrorCode    ierr;
27546861f193SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx,dof = b->dof;
27556861f193SBarry Smith   PetscInt          n,i,k;
27566861f193SBarry Smith 
27576861f193SBarry Smith   PetscFunctionBegin;
27583649974fSBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
27596861f193SBarry Smith   ierr = VecSet(yy,0.0);CHKERRQ(ierr);
27606861f193SBarry Smith   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
27616861f193SBarry Smith   for (i=0; i<m; i++) {
27626861f193SBarry Smith     idx   = a->j + a->i[i];
27636861f193SBarry Smith     v     = a->a + a->i[i];
27646861f193SBarry Smith     n     = a->i[i+1] - a->i[i];
27656861f193SBarry Smith     alpha = x + dof*i;
27666861f193SBarry Smith     while (n-->0) {
27676861f193SBarry Smith       for (k=0; k<dof; k++) {
27686861f193SBarry Smith         y[dof*(*idx)+k] += alpha[k]*(*v);
27696861f193SBarry Smith       }
277083ce7366SBarry Smith       idx++; v++;
27716861f193SBarry Smith     }
27726861f193SBarry Smith   }
27736861f193SBarry Smith   ierr = PetscLogFlops(2.0*dof*a->nz);CHKERRQ(ierr);
27743649974fSBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
27756861f193SBarry Smith   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
27766861f193SBarry Smith   PetscFunctionReturn(0);
27776861f193SBarry Smith }
27786861f193SBarry Smith 
27796861f193SBarry Smith PetscErrorCode MatMultTransposeAdd_SeqMAIJ_N(Mat A,Vec xx,Vec yy,Vec zz)
27806861f193SBarry Smith {
27816861f193SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
27826861f193SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
27836861f193SBarry Smith   const PetscScalar *x,*v,*alpha;
27846861f193SBarry Smith   PetscScalar       *y;
27856861f193SBarry Smith   PetscErrorCode    ierr;
27866861f193SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx,dof = b->dof;
27876861f193SBarry Smith   PetscInt          n,i,k;
27886861f193SBarry Smith 
27896861f193SBarry Smith   PetscFunctionBegin;
27906861f193SBarry Smith   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
27913649974fSBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
27926861f193SBarry Smith   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
27936861f193SBarry Smith   for (i=0; i<m; i++) {
27946861f193SBarry Smith     idx   = a->j + a->i[i];
27956861f193SBarry Smith     v     = a->a + a->i[i];
27966861f193SBarry Smith     n     = a->i[i+1] - a->i[i];
27976861f193SBarry Smith     alpha = x + dof*i;
27986861f193SBarry Smith     while (n-->0) {
27996861f193SBarry Smith       for (k=0; k<dof; k++) {
28006861f193SBarry Smith         y[dof*(*idx)+k] += alpha[k]*(*v);
28016861f193SBarry Smith       }
280283ce7366SBarry Smith       idx++; v++;
28036861f193SBarry Smith     }
28046861f193SBarry Smith   }
28056861f193SBarry Smith   ierr = PetscLogFlops(2.0*dof*a->nz);CHKERRQ(ierr);
28063649974fSBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
28076861f193SBarry Smith   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
28086861f193SBarry Smith   PetscFunctionReturn(0);
28096861f193SBarry Smith }
28106861f193SBarry Smith 
2811f4a53059SBarry Smith /*===================================================================================*/
2812dfbe8321SBarry Smith PetscErrorCode MatMult_MPIMAIJ_dof(Mat A,Vec xx,Vec yy)
2813f4a53059SBarry Smith {
28144cb9d9b8SBarry Smith   Mat_MPIMAIJ    *b = (Mat_MPIMAIJ*)A->data;
2815dfbe8321SBarry Smith   PetscErrorCode ierr;
2816f4a53059SBarry Smith 
2817b24ad042SBarry Smith   PetscFunctionBegin;
2818f4a53059SBarry Smith   /* start the scatter */
2819ca9f406cSSatish Balay   ierr = VecScatterBegin(b->ctx,xx,b->w,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
28204cb9d9b8SBarry Smith   ierr = (*b->AIJ->ops->mult)(b->AIJ,xx,yy);CHKERRQ(ierr);
2821ca9f406cSSatish Balay   ierr = VecScatterEnd(b->ctx,xx,b->w,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
28224cb9d9b8SBarry Smith   ierr = (*b->OAIJ->ops->multadd)(b->OAIJ,b->w,yy,yy);CHKERRQ(ierr);
2823f4a53059SBarry Smith   PetscFunctionReturn(0);
2824f4a53059SBarry Smith }
2825f4a53059SBarry Smith 
2826dfbe8321SBarry Smith PetscErrorCode MatMultTranspose_MPIMAIJ_dof(Mat A,Vec xx,Vec yy)
28274cb9d9b8SBarry Smith {
28284cb9d9b8SBarry Smith   Mat_MPIMAIJ    *b = (Mat_MPIMAIJ*)A->data;
2829dfbe8321SBarry Smith   PetscErrorCode ierr;
2830b24ad042SBarry Smith 
28314cb9d9b8SBarry Smith   PetscFunctionBegin;
28324cb9d9b8SBarry Smith   ierr = (*b->OAIJ->ops->multtranspose)(b->OAIJ,xx,b->w);CHKERRQ(ierr);
28334cb9d9b8SBarry Smith   ierr = (*b->AIJ->ops->multtranspose)(b->AIJ,xx,yy);CHKERRQ(ierr);
2834ca9f406cSSatish Balay   ierr = VecScatterBegin(b->ctx,b->w,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2835ca9f406cSSatish Balay   ierr = VecScatterEnd(b->ctx,b->w,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
28364cb9d9b8SBarry Smith   PetscFunctionReturn(0);
28374cb9d9b8SBarry Smith }
28384cb9d9b8SBarry Smith 
2839dfbe8321SBarry Smith PetscErrorCode MatMultAdd_MPIMAIJ_dof(Mat A,Vec xx,Vec yy,Vec zz)
28404cb9d9b8SBarry Smith {
28414cb9d9b8SBarry Smith   Mat_MPIMAIJ    *b = (Mat_MPIMAIJ*)A->data;
2842dfbe8321SBarry Smith   PetscErrorCode ierr;
28434cb9d9b8SBarry Smith 
2844b24ad042SBarry Smith   PetscFunctionBegin;
28454cb9d9b8SBarry Smith   /* start the scatter */
2846ca9f406cSSatish Balay   ierr = VecScatterBegin(b->ctx,xx,b->w,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2847d72c5c08SBarry Smith   ierr = (*b->AIJ->ops->multadd)(b->AIJ,xx,yy,zz);CHKERRQ(ierr);
2848ca9f406cSSatish Balay   ierr = VecScatterEnd(b->ctx,xx,b->w,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2849717f2ec8SHong Zhang   ierr = (*b->OAIJ->ops->multadd)(b->OAIJ,b->w,zz,zz);CHKERRQ(ierr);
28504cb9d9b8SBarry Smith   PetscFunctionReturn(0);
28514cb9d9b8SBarry Smith }
28524cb9d9b8SBarry Smith 
2853dfbe8321SBarry Smith PetscErrorCode MatMultTransposeAdd_MPIMAIJ_dof(Mat A,Vec xx,Vec yy,Vec zz)
28544cb9d9b8SBarry Smith {
28554cb9d9b8SBarry Smith   Mat_MPIMAIJ    *b = (Mat_MPIMAIJ*)A->data;
2856dfbe8321SBarry Smith   PetscErrorCode ierr;
2857b24ad042SBarry Smith 
28584cb9d9b8SBarry Smith   PetscFunctionBegin;
28594cb9d9b8SBarry Smith   ierr = (*b->OAIJ->ops->multtranspose)(b->OAIJ,xx,b->w);CHKERRQ(ierr);
2860d72c5c08SBarry Smith   ierr = (*b->AIJ->ops->multtransposeadd)(b->AIJ,xx,yy,zz);CHKERRQ(ierr);
2861e4a140f6SJunchao Zhang   ierr = VecScatterBegin(b->ctx,b->w,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2862ca9f406cSSatish Balay   ierr = VecScatterEnd(b->ctx,b->w,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
28634cb9d9b8SBarry Smith   PetscFunctionReturn(0);
28644cb9d9b8SBarry Smith }
28654cb9d9b8SBarry Smith 
286695ddefa2SHong Zhang /* ----------------------------------------------------------------*/
28677ba1a0bfSKris Buschelman PetscErrorCode MatPtAPSymbolic_SeqAIJ_SeqMAIJ(Mat A,Mat PP,PetscReal fill,Mat *C)
28687ba1a0bfSKris Buschelman {
28697ba1a0bfSKris Buschelman   PetscErrorCode     ierr;
28700298fd71SBarry Smith   PetscFreeSpaceList free_space=NULL,current_space=NULL;
28717ba1a0bfSKris Buschelman   Mat_SeqMAIJ        *pp       =(Mat_SeqMAIJ*)PP->data;
28727ba1a0bfSKris Buschelman   Mat                P         =pp->AIJ;
28737ba1a0bfSKris Buschelman   Mat_SeqAIJ         *a        =(Mat_SeqAIJ*)A->data,*p=(Mat_SeqAIJ*)P->data,*c;
2874d2840e09SBarry Smith   PetscInt           *pti,*ptj,*ptJ;
28757ba1a0bfSKris Buschelman   PetscInt           *ci,*cj,*ptadenserow,*ptasparserow,*denserow,*sparserow,*ptaj;
2876d2840e09SBarry Smith   const PetscInt     an=A->cmap->N,am=A->rmap->N,pn=P->cmap->N,pm=P->rmap->N,ppdof=pp->dof;
2877d2840e09SBarry Smith   PetscInt           i,j,k,dof,pshift,ptnzi,arow,anzj,ptanzi,prow,pnzj,cnzi,cn;
28787ba1a0bfSKris Buschelman   MatScalar          *ca;
2879d2840e09SBarry Smith   const PetscInt     *pi = p->i,*pj = p->j,*pjj,*ai=a->i,*aj=a->j,*ajj;
28807ba1a0bfSKris Buschelman 
28817ba1a0bfSKris Buschelman   PetscFunctionBegin;
28827ba1a0bfSKris Buschelman   /* Get ij structure of P^T */
28837ba1a0bfSKris Buschelman   ierr = MatGetSymbolicTranspose_SeqAIJ(P,&pti,&ptj);CHKERRQ(ierr);
28847ba1a0bfSKris Buschelman 
28857ba1a0bfSKris Buschelman   cn = pn*ppdof;
28867ba1a0bfSKris Buschelman   /* Allocate ci array, arrays for fill computation and */
28877ba1a0bfSKris Buschelman   /* free space for accumulating nonzero column info */
2888854ce69bSBarry Smith   ierr  = PetscMalloc1(cn+1,&ci);CHKERRQ(ierr);
28897ba1a0bfSKris Buschelman   ci[0] = 0;
28907ba1a0bfSKris Buschelman 
28917ba1a0bfSKris Buschelman   /* Work arrays for rows of P^T*A */
2892dcca6d9dSJed Brown   ierr = PetscMalloc4(an,&ptadenserow,an,&ptasparserow,cn,&denserow,cn,&sparserow);CHKERRQ(ierr);
2893c43dabc9SBarry Smith   ierr = PetscMemzero(ptadenserow,an*sizeof(PetscInt));CHKERRQ(ierr);
2894c43dabc9SBarry Smith   ierr = PetscMemzero(denserow,cn*sizeof(PetscInt));CHKERRQ(ierr);
28957ba1a0bfSKris Buschelman 
28967ba1a0bfSKris Buschelman   /* Set initial free space to be nnz(A) scaled by aspect ratio of P. */
28977ba1a0bfSKris Buschelman   /* This should be reasonable if sparsity of PtAP is similar to that of A. */
28987ba1a0bfSKris Buschelman   /* Note, aspect ratio of P is the same as the aspect ratio of SeqAIJ inside P */
2899f91af8c7SBarry Smith   ierr          = PetscFreeSpaceGet(PetscIntMultTruncate(ai[am]/pm,pn),&free_space);CHKERRQ(ierr);
29007ba1a0bfSKris Buschelman   current_space = free_space;
29017ba1a0bfSKris Buschelman 
29027ba1a0bfSKris Buschelman   /* Determine symbolic info for each row of C: */
29037ba1a0bfSKris Buschelman   for (i=0; i<pn; i++) {
29047ba1a0bfSKris Buschelman     ptnzi = pti[i+1] - pti[i];
29057ba1a0bfSKris Buschelman     ptJ   = ptj + pti[i];
29067ba1a0bfSKris Buschelman     for (dof=0; dof<ppdof; dof++) {
29077ba1a0bfSKris Buschelman       ptanzi = 0;
29087ba1a0bfSKris Buschelman       /* Determine symbolic row of PtA: */
29097ba1a0bfSKris Buschelman       for (j=0; j<ptnzi; j++) {
29107ba1a0bfSKris Buschelman         /* Expand ptJ[j] by block size and shift by dof to get the right row of A */
29117ba1a0bfSKris Buschelman         arow = ptJ[j]*ppdof + dof;
29127ba1a0bfSKris Buschelman         /* Nonzeros of P^T*A will be in same locations as any element of A in that row */
29137ba1a0bfSKris Buschelman         anzj = ai[arow+1] - ai[arow];
29147ba1a0bfSKris Buschelman         ajj  = aj + ai[arow];
29157ba1a0bfSKris Buschelman         for (k=0; k<anzj; k++) {
29167ba1a0bfSKris Buschelman           if (!ptadenserow[ajj[k]]) {
29177ba1a0bfSKris Buschelman             ptadenserow[ajj[k]]    = -1;
29187ba1a0bfSKris Buschelman             ptasparserow[ptanzi++] = ajj[k];
29197ba1a0bfSKris Buschelman           }
29207ba1a0bfSKris Buschelman         }
29217ba1a0bfSKris Buschelman       }
29227ba1a0bfSKris Buschelman       /* Using symbolic info for row of PtA, determine symbolic info for row of C: */
29237ba1a0bfSKris Buschelman       ptaj = ptasparserow;
29247ba1a0bfSKris Buschelman       cnzi = 0;
29257ba1a0bfSKris Buschelman       for (j=0; j<ptanzi; j++) {
29267ba1a0bfSKris Buschelman         /* Get offset within block of P */
29277ba1a0bfSKris Buschelman         pshift = *ptaj%ppdof;
29287ba1a0bfSKris Buschelman         /* Get block row of P */
29297ba1a0bfSKris Buschelman         prow = (*ptaj++)/ppdof; /* integer division */
29307ba1a0bfSKris Buschelman         /* P has same number of nonzeros per row as the compressed form */
29317ba1a0bfSKris Buschelman         pnzj = pi[prow+1] - pi[prow];
29327ba1a0bfSKris Buschelman         pjj  = pj + pi[prow];
29337ba1a0bfSKris Buschelman         for (k=0;k<pnzj;k++) {
29347ba1a0bfSKris Buschelman           /* Locations in C are shifted by the offset within the block */
29357ba1a0bfSKris Buschelman           /* Note: we cannot use PetscLLAdd here because of the additional offset for the write location */
29367ba1a0bfSKris Buschelman           if (!denserow[pjj[k]*ppdof+pshift]) {
29377ba1a0bfSKris Buschelman             denserow[pjj[k]*ppdof+pshift] = -1;
29387ba1a0bfSKris Buschelman             sparserow[cnzi++]             = pjj[k]*ppdof+pshift;
29397ba1a0bfSKris Buschelman           }
29407ba1a0bfSKris Buschelman         }
29417ba1a0bfSKris Buschelman       }
29427ba1a0bfSKris Buschelman 
29437ba1a0bfSKris Buschelman       /* sort sparserow */
29447ba1a0bfSKris Buschelman       ierr = PetscSortInt(cnzi,sparserow);CHKERRQ(ierr);
29457ba1a0bfSKris Buschelman 
29467ba1a0bfSKris Buschelman       /* If free space is not available, make more free space */
29477ba1a0bfSKris Buschelman       /* Double the amount of total space in the list */
29487ba1a0bfSKris Buschelman       if (current_space->local_remaining<cnzi) {
2949f91af8c7SBarry Smith         ierr = PetscFreeSpaceGet(PetscIntSumTruncate(cnzi,current_space->total_array_size),&current_space);CHKERRQ(ierr);
29507ba1a0bfSKris Buschelman       }
29517ba1a0bfSKris Buschelman 
29527ba1a0bfSKris Buschelman       /* Copy data into free space, and zero out denserows */
29537ba1a0bfSKris Buschelman       ierr = PetscMemcpy(current_space->array,sparserow,cnzi*sizeof(PetscInt));CHKERRQ(ierr);
295426fbe8dcSKarl Rupp 
29557ba1a0bfSKris Buschelman       current_space->array           += cnzi;
29567ba1a0bfSKris Buschelman       current_space->local_used      += cnzi;
29577ba1a0bfSKris Buschelman       current_space->local_remaining -= cnzi;
29587ba1a0bfSKris Buschelman 
295926fbe8dcSKarl Rupp       for (j=0; j<ptanzi; j++) ptadenserow[ptasparserow[j]] = 0;
296026fbe8dcSKarl Rupp       for (j=0; j<cnzi; j++) denserow[sparserow[j]] = 0;
296126fbe8dcSKarl Rupp 
29627ba1a0bfSKris Buschelman       /* Aside: Perhaps we should save the pta info for the numerical factorization. */
29637ba1a0bfSKris Buschelman       /*        For now, we will recompute what is needed. */
29647ba1a0bfSKris Buschelman       ci[i*ppdof+1+dof] = ci[i*ppdof+dof] + cnzi;
29657ba1a0bfSKris Buschelman     }
29667ba1a0bfSKris Buschelman   }
29677ba1a0bfSKris Buschelman   /* nnz is now stored in ci[ptm], column indices are in the list of free space */
29687ba1a0bfSKris Buschelman   /* Allocate space for cj, initialize cj, and */
29697ba1a0bfSKris Buschelman   /* destroy list of free space and other temporary array(s) */
2970854ce69bSBarry Smith   ierr = PetscMalloc1(ci[cn]+1,&cj);CHKERRQ(ierr);
2971a1a86e44SBarry Smith   ierr = PetscFreeSpaceContiguous(&free_space,cj);CHKERRQ(ierr);
297274ed9c26SBarry Smith   ierr = PetscFree4(ptadenserow,ptasparserow,denserow,sparserow);CHKERRQ(ierr);
29737ba1a0bfSKris Buschelman 
29747ba1a0bfSKris Buschelman   /* Allocate space for ca */
2975854ce69bSBarry Smith   ierr = PetscCalloc1(ci[cn]+1,&ca);CHKERRQ(ierr);
29767ba1a0bfSKris Buschelman 
29777ba1a0bfSKris Buschelman   /* put together the new matrix */
2978ce94432eSBarry Smith   ierr = MatCreateSeqAIJWithArrays(PetscObjectComm((PetscObject)A),cn,cn,ci,cj,ca,C);CHKERRQ(ierr);
2979f27682edSJed Brown   ierr = MatSetBlockSize(*C,pp->dof);CHKERRQ(ierr);
29807ba1a0bfSKris Buschelman 
29817ba1a0bfSKris Buschelman   /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
29827ba1a0bfSKris Buschelman   /* Since these are PETSc arrays, change flags to free them as necessary. */
29837ba1a0bfSKris Buschelman   c          = (Mat_SeqAIJ*)((*C)->data);
2984e6b907acSBarry Smith   c->free_a  = PETSC_TRUE;
2985e6b907acSBarry Smith   c->free_ij = PETSC_TRUE;
29867ba1a0bfSKris Buschelman   c->nonew   = 0;
298726fbe8dcSKarl Rupp 
2988d76021d8SHong Zhang   (*C)->ops->ptapnumeric = MatPtAPNumeric_SeqAIJ_SeqMAIJ;
29897ba1a0bfSKris Buschelman 
29907ba1a0bfSKris Buschelman   /* Clean up. */
29917ba1a0bfSKris Buschelman   ierr = MatRestoreSymbolicTranspose_SeqAIJ(P,&pti,&ptj);CHKERRQ(ierr);
29927ba1a0bfSKris Buschelman   PetscFunctionReturn(0);
29937ba1a0bfSKris Buschelman }
29947ba1a0bfSKris Buschelman 
29957ba1a0bfSKris Buschelman PetscErrorCode MatPtAPNumeric_SeqAIJ_SeqMAIJ(Mat A,Mat PP,Mat C)
29967ba1a0bfSKris Buschelman {
29977ba1a0bfSKris Buschelman   /* This routine requires testing -- first draft only */
29987ba1a0bfSKris Buschelman   PetscErrorCode  ierr;
29997ba1a0bfSKris Buschelman   Mat_SeqMAIJ     *pp=(Mat_SeqMAIJ*)PP->data;
30007ba1a0bfSKris Buschelman   Mat             P  =pp->AIJ;
30017ba1a0bfSKris Buschelman   Mat_SeqAIJ      *a = (Mat_SeqAIJ*) A->data;
30027ba1a0bfSKris Buschelman   Mat_SeqAIJ      *p = (Mat_SeqAIJ*) P->data;
30037ba1a0bfSKris Buschelman   Mat_SeqAIJ      *c = (Mat_SeqAIJ*) C->data;
3004a2ea699eSBarry Smith   const PetscInt  *ai=a->i,*aj=a->j,*pi=p->i,*pj=p->j,*pJ,*pjj;
3005d2840e09SBarry Smith   const PetscInt  *ci=c->i,*cj=c->j,*cjj;
3006d2840e09SBarry Smith   const PetscInt  am =A->rmap->N,cn=C->cmap->N,cm=C->rmap->N,ppdof=pp->dof;
3007d2840e09SBarry Smith   PetscInt        i,j,k,pshift,poffset,anzi,pnzi,apnzj,nextap,pnzj,prow,crow,*apj,*apjdense;
3008a2ea699eSBarry Smith   const MatScalar *aa=a->a,*pa=p->a,*pA,*paj;
3009d2840e09SBarry Smith   MatScalar       *ca=c->a,*caj,*apa;
30107ba1a0bfSKris Buschelman 
30117ba1a0bfSKris Buschelman   PetscFunctionBegin;
30127ba1a0bfSKris Buschelman   /* Allocate temporary array for storage of one row of A*P */
30131795a4d1SJed Brown   ierr = PetscCalloc3(cn,&apa,cn,&apj,cn,&apjdense);CHKERRQ(ierr);
30147ba1a0bfSKris Buschelman 
30157ba1a0bfSKris Buschelman   /* Clear old values in C */
30167ba1a0bfSKris Buschelman   ierr = PetscMemzero(ca,ci[cm]*sizeof(MatScalar));CHKERRQ(ierr);
30177ba1a0bfSKris Buschelman 
30187ba1a0bfSKris Buschelman   for (i=0; i<am; i++) {
30197ba1a0bfSKris Buschelman     /* Form sparse row of A*P */
30207ba1a0bfSKris Buschelman     anzi  = ai[i+1] - ai[i];
30217ba1a0bfSKris Buschelman     apnzj = 0;
30227ba1a0bfSKris Buschelman     for (j=0; j<anzi; j++) {
30237ba1a0bfSKris Buschelman       /* Get offset within block of P */
30247ba1a0bfSKris Buschelman       pshift = *aj%ppdof;
30257ba1a0bfSKris Buschelman       /* Get block row of P */
30267ba1a0bfSKris Buschelman       prow = *aj++/ppdof;   /* integer division */
30277ba1a0bfSKris Buschelman       pnzj = pi[prow+1] - pi[prow];
30287ba1a0bfSKris Buschelman       pjj  = pj + pi[prow];
30297ba1a0bfSKris Buschelman       paj  = pa + pi[prow];
30307ba1a0bfSKris Buschelman       for (k=0; k<pnzj; k++) {
30317ba1a0bfSKris Buschelman         poffset = pjj[k]*ppdof+pshift;
30327ba1a0bfSKris Buschelman         if (!apjdense[poffset]) {
30337ba1a0bfSKris Buschelman           apjdense[poffset] = -1;
30347ba1a0bfSKris Buschelman           apj[apnzj++]      = poffset;
30357ba1a0bfSKris Buschelman         }
30367ba1a0bfSKris Buschelman         apa[poffset] += (*aa)*paj[k];
30377ba1a0bfSKris Buschelman       }
3038dc0b31edSSatish Balay       ierr = PetscLogFlops(2.0*pnzj);CHKERRQ(ierr);
30397ba1a0bfSKris Buschelman       aa++;
30407ba1a0bfSKris Buschelman     }
30417ba1a0bfSKris Buschelman 
30427ba1a0bfSKris Buschelman     /* Sort the j index array for quick sparse axpy. */
30437ba1a0bfSKris Buschelman     /* Note: a array does not need sorting as it is in dense storage locations. */
30447ba1a0bfSKris Buschelman     ierr = PetscSortInt(apnzj,apj);CHKERRQ(ierr);
30457ba1a0bfSKris Buschelman 
30467ba1a0bfSKris Buschelman     /* Compute P^T*A*P using outer product (P^T)[:,j]*(A*P)[j,:]. */
30477ba1a0bfSKris Buschelman     prow    = i/ppdof; /* integer division */
30487ba1a0bfSKris Buschelman     pshift  = i%ppdof;
30497ba1a0bfSKris Buschelman     poffset = pi[prow];
30507ba1a0bfSKris Buschelman     pnzi    = pi[prow+1] - poffset;
30517ba1a0bfSKris Buschelman     /* Reset pJ and pA so we can traverse the same row of P 'dof' times. */
30527ba1a0bfSKris Buschelman     pJ = pj+poffset;
30537ba1a0bfSKris Buschelman     pA = pa+poffset;
30547ba1a0bfSKris Buschelman     for (j=0; j<pnzi; j++) {
30557ba1a0bfSKris Buschelman       crow = (*pJ)*ppdof+pshift;
30567ba1a0bfSKris Buschelman       cjj  = cj + ci[crow];
30577ba1a0bfSKris Buschelman       caj  = ca + ci[crow];
30587ba1a0bfSKris Buschelman       pJ++;
30597ba1a0bfSKris Buschelman       /* Perform sparse axpy operation.  Note cjj includes apj. */
30607ba1a0bfSKris Buschelman       for (k=0,nextap=0; nextap<apnzj; k++) {
306126fbe8dcSKarl Rupp         if (cjj[k] == apj[nextap]) caj[k] += (*pA)*apa[apj[nextap++]];
30627ba1a0bfSKris Buschelman       }
3063dc0b31edSSatish Balay       ierr = PetscLogFlops(2.0*apnzj);CHKERRQ(ierr);
30647ba1a0bfSKris Buschelman       pA++;
30657ba1a0bfSKris Buschelman     }
30667ba1a0bfSKris Buschelman 
30677ba1a0bfSKris Buschelman     /* Zero the current row info for A*P */
30687ba1a0bfSKris Buschelman     for (j=0; j<apnzj; j++) {
30697ba1a0bfSKris Buschelman       apa[apj[j]]      = 0.;
30707ba1a0bfSKris Buschelman       apjdense[apj[j]] = 0;
30717ba1a0bfSKris Buschelman     }
30727ba1a0bfSKris Buschelman   }
30737ba1a0bfSKris Buschelman 
30747ba1a0bfSKris Buschelman   /* Assemble the final matrix and clean up */
30757ba1a0bfSKris Buschelman   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
30767ba1a0bfSKris Buschelman   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
307774ed9c26SBarry Smith   ierr = PetscFree3(apa,apj,apjdense);CHKERRQ(ierr);
307895ddefa2SHong Zhang   PetscFunctionReturn(0);
307995ddefa2SHong Zhang }
30807ba1a0bfSKris Buschelman 
3081150d2497SBarry Smith PETSC_INTERN PetscErrorCode MatPtAP_SeqAIJ_SeqMAIJ(Mat A,Mat P,MatReuse scall,PetscReal fill,Mat *C)
30822121bac1SHong Zhang {
30832121bac1SHong Zhang   PetscErrorCode ierr;
30842121bac1SHong Zhang 
30852121bac1SHong Zhang   PetscFunctionBegin;
30862121bac1SHong Zhang   if (scall == MAT_INITIAL_MATRIX) {
30873ff4c91cSHong Zhang     ierr = PetscLogEventBegin(MAT_PtAPSymbolic,A,P,0,0);CHKERRQ(ierr);
30882121bac1SHong Zhang     ierr = MatPtAPSymbolic_SeqAIJ_SeqMAIJ(A,P,fill,C);CHKERRQ(ierr);
30893ff4c91cSHong Zhang     ierr = PetscLogEventEnd(MAT_PtAPSymbolic,A,P,0,0);CHKERRQ(ierr);
30902121bac1SHong Zhang   }
30913ff4c91cSHong Zhang   ierr = PetscLogEventBegin(MAT_PtAPNumeric,A,P,0,0);CHKERRQ(ierr);
30922121bac1SHong Zhang   ierr = MatPtAPNumeric_SeqAIJ_SeqMAIJ(A,P,*C);CHKERRQ(ierr);
30933ff4c91cSHong Zhang   ierr = PetscLogEventEnd(MAT_PtAPNumeric,A,P,0,0);CHKERRQ(ierr);
30942121bac1SHong Zhang   PetscFunctionReturn(0);
30952121bac1SHong Zhang }
30962121bac1SHong Zhang 
3097f7a08781SBarry Smith PetscErrorCode MatPtAPSymbolic_MPIAIJ_MPIMAIJ(Mat A,Mat PP,PetscReal fill,Mat *C)
309895ddefa2SHong Zhang {
309995ddefa2SHong Zhang   PetscFunctionBegin;
3100*3e0c911fSBarry Smith   SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"MatPtAPSymbolic is not implemented for MPIMAIJ matrix yet");
310195ddefa2SHong Zhang   PetscFunctionReturn(0);
310295ddefa2SHong Zhang }
310395ddefa2SHong Zhang 
3104f7a08781SBarry Smith PetscErrorCode MatPtAPNumeric_MPIAIJ_MPIMAIJ(Mat A,Mat PP,Mat C)
310595ddefa2SHong Zhang {
310695ddefa2SHong Zhang   PetscFunctionBegin;
3107*3e0c911fSBarry Smith   SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"MatPtAPNumeric is not implemented for MPIMAIJ matrix yet");
31087ba1a0bfSKris Buschelman   PetscFunctionReturn(0);
31097ba1a0bfSKris Buschelman }
3110*3e0c911fSBarry Smith PETSC_INTERN PetscErrorCode MatPtAP_MPIAIJ_MPIAIJ(Mat,Mat,MatReuse,PetscReal,Mat*);
3111150d2497SBarry Smith PETSC_INTERN PetscErrorCode MatPtAP_MPIAIJ_MPIMAIJ(Mat A,Mat P,MatReuse scall,PetscReal fill,Mat *C)
31129e03dfbbSHong Zhang {
31139e03dfbbSHong Zhang   PetscErrorCode ierr;
31149e03dfbbSHong Zhang 
31159e03dfbbSHong Zhang   PetscFunctionBegin;
31169e03dfbbSHong Zhang   if (scall == MAT_INITIAL_MATRIX) {
3117*3e0c911fSBarry Smith     ierr = PetscInfo((PetscObject)A,"Converting from MAIJ to AIJ matrix since implementation not available for MAIJ");
3118*3e0c911fSBarry Smith     ierr = MatConvert(P,MATMPIAIJ,MAT_INPLACE_MATRIX,&P);CHKERRQ(ierr);
31199e03dfbbSHong Zhang   }
3120*3e0c911fSBarry Smith   ierr = MatPtAP_MPIAIJ_MPIAIJ(A,P,scall,fill,C);CHKERRQ(ierr);
31219e03dfbbSHong Zhang   PetscFunctionReturn(0);
31229e03dfbbSHong Zhang }
31239e03dfbbSHong Zhang 
3124cc2e6a90SBarry Smith PETSC_INTERN PetscErrorCode MatConvert_SeqMAIJ_SeqAIJ(Mat A, MatType newtype,MatReuse reuse,Mat *newmat)
31259c4fc4c7SBarry Smith {
31269c4fc4c7SBarry Smith   Mat_SeqMAIJ    *b   = (Mat_SeqMAIJ*)A->data;
3127ceb03754SKris Buschelman   Mat            a    = b->AIJ,B;
31289c4fc4c7SBarry Smith   Mat_SeqAIJ     *aij = (Mat_SeqAIJ*)a->data;
31299c4fc4c7SBarry Smith   PetscErrorCode ierr;
31300fd73130SBarry Smith   PetscInt       m,n,i,ncols,*ilen,nmax = 0,*icols,j,k,ii,dof = b->dof;
3131ba8c8a56SBarry Smith   PetscInt       *cols;
3132ba8c8a56SBarry Smith   PetscScalar    *vals;
31339c4fc4c7SBarry Smith 
31349c4fc4c7SBarry Smith   PetscFunctionBegin;
31359c4fc4c7SBarry Smith   ierr = MatGetSize(a,&m,&n);CHKERRQ(ierr);
3136785e854fSJed Brown   ierr = PetscMalloc1(dof*m,&ilen);CHKERRQ(ierr);
31379c4fc4c7SBarry Smith   for (i=0; i<m; i++) {
31389c4fc4c7SBarry Smith     nmax = PetscMax(nmax,aij->ilen[i]);
313926fbe8dcSKarl Rupp     for (j=0; j<dof; j++) ilen[dof*i+j] = aij->ilen[i];
31409c4fc4c7SBarry Smith   }
3141ceb03754SKris Buschelman   ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,dof*m,dof*n,0,ilen,&B);CHKERRQ(ierr);
31429c4fc4c7SBarry Smith   ierr = PetscFree(ilen);CHKERRQ(ierr);
3143785e854fSJed Brown   ierr = PetscMalloc1(nmax,&icols);CHKERRQ(ierr);
31449c4fc4c7SBarry Smith   ii   = 0;
31459c4fc4c7SBarry Smith   for (i=0; i<m; i++) {
3146ba8c8a56SBarry Smith     ierr = MatGetRow_SeqAIJ(a,i,&ncols,&cols,&vals);CHKERRQ(ierr);
31470fd73130SBarry Smith     for (j=0; j<dof; j++) {
314826fbe8dcSKarl Rupp       for (k=0; k<ncols; k++) icols[k] = dof*cols[k]+j;
3149ceb03754SKris Buschelman       ierr = MatSetValues_SeqAIJ(B,1,&ii,ncols,icols,vals,INSERT_VALUES);CHKERRQ(ierr);
31509c4fc4c7SBarry Smith       ii++;
31519c4fc4c7SBarry Smith     }
3152ba8c8a56SBarry Smith     ierr = MatRestoreRow_SeqAIJ(a,i,&ncols,&cols,&vals);CHKERRQ(ierr);
31539c4fc4c7SBarry Smith   }
31549c4fc4c7SBarry Smith   ierr = PetscFree(icols);CHKERRQ(ierr);
3155ceb03754SKris Buschelman   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3156ceb03754SKris Buschelman   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3157ceb03754SKris Buschelman 
3158511c6705SHong Zhang   if (reuse == MAT_INPLACE_MATRIX) {
315928be2f97SBarry Smith     ierr = MatHeaderReplace(A,&B);CHKERRQ(ierr);
3160c3d102feSKris Buschelman   } else {
3161ceb03754SKris Buschelman     *newmat = B;
3162c3d102feSKris Buschelman   }
31639c4fc4c7SBarry Smith   PetscFunctionReturn(0);
31649c4fc4c7SBarry Smith }
31659c4fc4c7SBarry Smith 
3166c6db04a5SJed Brown #include <../src/mat/impls/aij/mpi/mpiaij.h>
3167be1d678aSKris Buschelman 
3168cc2e6a90SBarry Smith PETSC_INTERN PetscErrorCode MatConvert_MPIMAIJ_MPIAIJ(Mat A, MatType newtype,MatReuse reuse,Mat *newmat)
31690fd73130SBarry Smith {
31700fd73130SBarry Smith   Mat_MPIMAIJ    *maij   = (Mat_MPIMAIJ*)A->data;
3171ceb03754SKris Buschelman   Mat            MatAIJ  = ((Mat_SeqMAIJ*)maij->AIJ->data)->AIJ,B;
31720fd73130SBarry Smith   Mat            MatOAIJ = ((Mat_SeqMAIJ*)maij->OAIJ->data)->AIJ;
31730fd73130SBarry Smith   Mat_SeqAIJ     *AIJ    = (Mat_SeqAIJ*) MatAIJ->data;
31740fd73130SBarry Smith   Mat_SeqAIJ     *OAIJ   =(Mat_SeqAIJ*) MatOAIJ->data;
31750fd73130SBarry Smith   Mat_MPIAIJ     *mpiaij = (Mat_MPIAIJ*) maij->A->data;
31760298fd71SBarry Smith   PetscInt       dof     = maij->dof,i,j,*dnz = NULL,*onz = NULL,nmax = 0,onmax = 0;
31770298fd71SBarry Smith   PetscInt       *oicols = NULL,*icols = NULL,ncols,*cols = NULL,oncols,*ocols = NULL;
31780fd73130SBarry Smith   PetscInt       rstart,cstart,*garray,ii,k;
31790fd73130SBarry Smith   PetscErrorCode ierr;
31800fd73130SBarry Smith   PetscScalar    *vals,*ovals;
31810fd73130SBarry Smith 
31820fd73130SBarry Smith   PetscFunctionBegin;
3183dcca6d9dSJed Brown   ierr = PetscMalloc2(A->rmap->n,&dnz,A->rmap->n,&onz);CHKERRQ(ierr);
3184d0f46423SBarry Smith   for (i=0; i<A->rmap->n/dof; i++) {
31850fd73130SBarry Smith     nmax  = PetscMax(nmax,AIJ->ilen[i]);
31860fd73130SBarry Smith     onmax = PetscMax(onmax,OAIJ->ilen[i]);
31870fd73130SBarry Smith     for (j=0; j<dof; j++) {
31880fd73130SBarry Smith       dnz[dof*i+j] = AIJ->ilen[i];
31890fd73130SBarry Smith       onz[dof*i+j] = OAIJ->ilen[i];
31900fd73130SBarry Smith     }
31910fd73130SBarry Smith   }
3192ce94432eSBarry Smith   ierr = MatCreateAIJ(PetscObjectComm((PetscObject)A),A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N,0,dnz,0,onz,&B);CHKERRQ(ierr);
3193f27682edSJed Brown   ierr = MatSetBlockSize(B,dof);CHKERRQ(ierr);
31940fd73130SBarry Smith   ierr = PetscFree2(dnz,onz);CHKERRQ(ierr);
31950fd73130SBarry Smith 
3196dcca6d9dSJed Brown   ierr   = PetscMalloc2(nmax,&icols,onmax,&oicols);CHKERRQ(ierr);
3197d0f46423SBarry Smith   rstart = dof*maij->A->rmap->rstart;
3198d0f46423SBarry Smith   cstart = dof*maij->A->cmap->rstart;
31990fd73130SBarry Smith   garray = mpiaij->garray;
32000fd73130SBarry Smith 
32010fd73130SBarry Smith   ii = rstart;
3202d0f46423SBarry Smith   for (i=0; i<A->rmap->n/dof; i++) {
32030fd73130SBarry Smith     ierr = MatGetRow_SeqAIJ(MatAIJ,i,&ncols,&cols,&vals);CHKERRQ(ierr);
32040fd73130SBarry Smith     ierr = MatGetRow_SeqAIJ(MatOAIJ,i,&oncols,&ocols,&ovals);CHKERRQ(ierr);
32050fd73130SBarry Smith     for (j=0; j<dof; j++) {
32060fd73130SBarry Smith       for (k=0; k<ncols; k++) {
32070fd73130SBarry Smith         icols[k] = cstart + dof*cols[k]+j;
32080fd73130SBarry Smith       }
32090fd73130SBarry Smith       for (k=0; k<oncols; k++) {
32100fd73130SBarry Smith         oicols[k] = dof*garray[ocols[k]]+j;
32110fd73130SBarry Smith       }
3212ceb03754SKris Buschelman       ierr = MatSetValues_MPIAIJ(B,1,&ii,ncols,icols,vals,INSERT_VALUES);CHKERRQ(ierr);
3213ceb03754SKris Buschelman       ierr = MatSetValues_MPIAIJ(B,1,&ii,oncols,oicols,ovals,INSERT_VALUES);CHKERRQ(ierr);
32140fd73130SBarry Smith       ii++;
32150fd73130SBarry Smith     }
32160fd73130SBarry Smith     ierr = MatRestoreRow_SeqAIJ(MatAIJ,i,&ncols,&cols,&vals);CHKERRQ(ierr);
32170fd73130SBarry Smith     ierr = MatRestoreRow_SeqAIJ(MatOAIJ,i,&oncols,&ocols,&ovals);CHKERRQ(ierr);
32180fd73130SBarry Smith   }
32190fd73130SBarry Smith   ierr = PetscFree2(icols,oicols);CHKERRQ(ierr);
32200fd73130SBarry Smith 
3221ceb03754SKris Buschelman   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3222ceb03754SKris Buschelman   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3223ceb03754SKris Buschelman 
3224511c6705SHong Zhang   if (reuse == MAT_INPLACE_MATRIX) {
32257adad957SLisandro Dalcin     PetscInt refct = ((PetscObject)A)->refct; /* save ((PetscObject)A)->refct */
32267adad957SLisandro Dalcin     ((PetscObject)A)->refct = 1;
322726fbe8dcSKarl Rupp 
322828be2f97SBarry Smith     ierr = MatHeaderReplace(A,&B);CHKERRQ(ierr);
322926fbe8dcSKarl Rupp 
32307adad957SLisandro Dalcin     ((PetscObject)A)->refct = refct; /* restore ((PetscObject)A)->refct */
3231c3d102feSKris Buschelman   } else {
3232ceb03754SKris Buschelman     *newmat = B;
3233c3d102feSKris Buschelman   }
32340fd73130SBarry Smith   PetscFunctionReturn(0);
32350fd73130SBarry Smith }
32360fd73130SBarry Smith 
32377dae84e0SHong Zhang PetscErrorCode MatCreateSubMatrix_MAIJ(Mat mat,IS isrow,IS iscol,MatReuse cll,Mat *newmat)
32389e516c8fSBarry Smith {
32399e516c8fSBarry Smith   PetscErrorCode ierr;
32409e516c8fSBarry Smith   Mat            A;
32419e516c8fSBarry Smith 
32429e516c8fSBarry Smith   PetscFunctionBegin;
32439e516c8fSBarry Smith   ierr = MatConvert(mat,MATAIJ,MAT_INITIAL_MATRIX,&A);CHKERRQ(ierr);
32447dae84e0SHong Zhang   ierr = MatCreateSubMatrix(A,isrow,iscol,cll,newmat);CHKERRQ(ierr);
32459e516c8fSBarry Smith   ierr = MatDestroy(&A);CHKERRQ(ierr);
32469e516c8fSBarry Smith   PetscFunctionReturn(0);
32479e516c8fSBarry Smith }
32480fd73130SBarry Smith 
3249ec626240SStefano Zampini PetscErrorCode MatCreateSubMatrices_MAIJ(Mat mat,PetscInt n,const IS irow[],const IS icol[],MatReuse scall,Mat *submat[])
3250ec626240SStefano Zampini {
3251ec626240SStefano Zampini   PetscErrorCode ierr;
3252ec626240SStefano Zampini   Mat            A;
3253ec626240SStefano Zampini 
3254ec626240SStefano Zampini   PetscFunctionBegin;
3255ec626240SStefano Zampini   ierr = MatConvert(mat,MATAIJ,MAT_INITIAL_MATRIX,&A);CHKERRQ(ierr);
3256ec626240SStefano Zampini   ierr = MatCreateSubMatrices(A,n,irow,icol,scall,submat);CHKERRQ(ierr);
3257ec626240SStefano Zampini   ierr = MatDestroy(&A);CHKERRQ(ierr);
3258ec626240SStefano Zampini   PetscFunctionReturn(0);
3259ec626240SStefano Zampini }
3260ec626240SStefano Zampini 
3261ec626240SStefano Zampini PETSC_INTERN PetscErrorCode MatPtAP_IS_XAIJ(Mat,Mat,MatReuse,PetscReal,Mat*);
3262ec626240SStefano Zampini 
3263ec626240SStefano Zampini 
3264bcc973b7SBarry Smith /* ---------------------------------------------------------------------------------- */
3265480dffcfSJed Brown /*@
32660bad9183SKris Buschelman   MatCreateMAIJ - Creates a matrix type providing restriction and interpolation
32670bad9183SKris Buschelman   operations for multicomponent problems.  It interpolates each component the same
32680bad9183SKris Buschelman   way independently.  The matrix type is based on MATSEQAIJ for sequential matrices,
32690bad9183SKris Buschelman   and MATMPIAIJ for distributed matrices.
32700bad9183SKris Buschelman 
3271ff585edeSJed Brown   Collective
3272ff585edeSJed Brown 
3273ff585edeSJed Brown   Input Parameters:
3274ff585edeSJed Brown + A - the AIJ matrix describing the action on blocks
3275ff585edeSJed Brown - dof - the block size (number of components per node)
3276ff585edeSJed Brown 
3277ff585edeSJed Brown   Output Parameter:
3278ff585edeSJed Brown . maij - the new MAIJ matrix
3279ff585edeSJed Brown 
32800bad9183SKris Buschelman   Operations provided:
32810fd73130SBarry Smith + MatMult
32820bad9183SKris Buschelman . MatMultTranspose
32830bad9183SKris Buschelman . MatMultAdd
32840bad9183SKris Buschelman . MatMultTransposeAdd
32850fd73130SBarry Smith - MatView
32860bad9183SKris Buschelman 
32870bad9183SKris Buschelman   Level: advanced
32880bad9183SKris Buschelman 
3289b409243cSBarry Smith .seealso: MatMAIJGetAIJ(), MatMAIJRedimension(), MATMAIJ
3290ff585edeSJed Brown @*/
32917087cfbeSBarry Smith PetscErrorCode  MatCreateMAIJ(Mat A,PetscInt dof,Mat *maij)
329282b1193eSBarry Smith {
3293dfbe8321SBarry Smith   PetscErrorCode ierr;
3294b24ad042SBarry Smith   PetscMPIInt    size;
3295b24ad042SBarry Smith   PetscInt       n;
329682b1193eSBarry Smith   Mat            B;
329782b1193eSBarry Smith 
329882b1193eSBarry Smith   PetscFunctionBegin;
3299d72c5c08SBarry Smith   ierr = PetscObjectReference((PetscObject)A);CHKERRQ(ierr);
3300d72c5c08SBarry Smith 
330126fbe8dcSKarl Rupp   if (dof == 1) *maij = A;
330226fbe8dcSKarl Rupp   else {
3303ce94432eSBarry Smith     ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr);
3304d0f46423SBarry Smith     ierr = MatSetSizes(B,dof*A->rmap->n,dof*A->cmap->n,dof*A->rmap->N,dof*A->cmap->N);CHKERRQ(ierr);
3305bccba955SJed Brown     ierr = PetscLayoutSetBlockSize(B->rmap,dof);CHKERRQ(ierr);
3306bccba955SJed Brown     ierr = PetscLayoutSetBlockSize(B->cmap,dof);CHKERRQ(ierr);
3307bccba955SJed Brown     ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
3308bccba955SJed Brown     ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
330926fbe8dcSKarl Rupp 
3310cd3bbe55SBarry Smith     B->assembled = PETSC_TRUE;
3311d72c5c08SBarry Smith 
3312ce94432eSBarry Smith     ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRQ(ierr);
3313f4a53059SBarry Smith     if (size == 1) {
3314feb9fe23SJed Brown       Mat_SeqMAIJ *b;
3315feb9fe23SJed Brown 
3316b9b97703SBarry Smith       ierr = MatSetType(B,MATSEQMAIJ);CHKERRQ(ierr);
331726fbe8dcSKarl Rupp 
33180298fd71SBarry Smith       B->ops->setup   = NULL;
33194cb9d9b8SBarry Smith       B->ops->destroy = MatDestroy_SeqMAIJ;
33200fd73130SBarry Smith       B->ops->view    = MatView_SeqMAIJ;
3321feb9fe23SJed Brown       b               = (Mat_SeqMAIJ*)B->data;
3322b9b97703SBarry Smith       b->dof          = dof;
33234cb9d9b8SBarry Smith       b->AIJ          = A;
332426fbe8dcSKarl Rupp 
3325d72c5c08SBarry Smith       if (dof == 2) {
3326cd3bbe55SBarry Smith         B->ops->mult             = MatMult_SeqMAIJ_2;
3327cd3bbe55SBarry Smith         B->ops->multadd          = MatMultAdd_SeqMAIJ_2;
3328cd3bbe55SBarry Smith         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_2;
3329cd3bbe55SBarry Smith         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_2;
3330bcc973b7SBarry Smith       } else if (dof == 3) {
3331bcc973b7SBarry Smith         B->ops->mult             = MatMult_SeqMAIJ_3;
3332bcc973b7SBarry Smith         B->ops->multadd          = MatMultAdd_SeqMAIJ_3;
3333bcc973b7SBarry Smith         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_3;
3334bcc973b7SBarry Smith         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_3;
3335bcc973b7SBarry Smith       } else if (dof == 4) {
3336bcc973b7SBarry Smith         B->ops->mult             = MatMult_SeqMAIJ_4;
3337bcc973b7SBarry Smith         B->ops->multadd          = MatMultAdd_SeqMAIJ_4;
3338bcc973b7SBarry Smith         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_4;
3339bcc973b7SBarry Smith         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_4;
3340f9fae5adSBarry Smith       } else if (dof == 5) {
3341f9fae5adSBarry Smith         B->ops->mult             = MatMult_SeqMAIJ_5;
3342f9fae5adSBarry Smith         B->ops->multadd          = MatMultAdd_SeqMAIJ_5;
3343f9fae5adSBarry Smith         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_5;
3344f9fae5adSBarry Smith         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_5;
33456cd98798SBarry Smith       } else if (dof == 6) {
33466cd98798SBarry Smith         B->ops->mult             = MatMult_SeqMAIJ_6;
33476cd98798SBarry Smith         B->ops->multadd          = MatMultAdd_SeqMAIJ_6;
33486cd98798SBarry Smith         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_6;
33496cd98798SBarry Smith         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_6;
3350ed8eea03SBarry Smith       } else if (dof == 7) {
3351ed8eea03SBarry Smith         B->ops->mult             = MatMult_SeqMAIJ_7;
3352ed8eea03SBarry Smith         B->ops->multadd          = MatMultAdd_SeqMAIJ_7;
3353ed8eea03SBarry Smith         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_7;
3354ed8eea03SBarry Smith         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_7;
335566ed3db0SBarry Smith       } else if (dof == 8) {
335666ed3db0SBarry Smith         B->ops->mult             = MatMult_SeqMAIJ_8;
335766ed3db0SBarry Smith         B->ops->multadd          = MatMultAdd_SeqMAIJ_8;
335866ed3db0SBarry Smith         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_8;
335966ed3db0SBarry Smith         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_8;
33602b692628SMatthew Knepley       } else if (dof == 9) {
33612b692628SMatthew Knepley         B->ops->mult             = MatMult_SeqMAIJ_9;
33622b692628SMatthew Knepley         B->ops->multadd          = MatMultAdd_SeqMAIJ_9;
33632b692628SMatthew Knepley         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_9;
33642b692628SMatthew Knepley         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_9;
3365b9cda22cSBarry Smith       } else if (dof == 10) {
3366b9cda22cSBarry Smith         B->ops->mult             = MatMult_SeqMAIJ_10;
3367b9cda22cSBarry Smith         B->ops->multadd          = MatMultAdd_SeqMAIJ_10;
3368b9cda22cSBarry Smith         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_10;
3369b9cda22cSBarry Smith         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_10;
3370dbdd7285SBarry Smith       } else if (dof == 11) {
3371dbdd7285SBarry Smith         B->ops->mult             = MatMult_SeqMAIJ_11;
3372dbdd7285SBarry Smith         B->ops->multadd          = MatMultAdd_SeqMAIJ_11;
3373dbdd7285SBarry Smith         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_11;
3374dbdd7285SBarry Smith         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_11;
33752f7816d4SBarry Smith       } else if (dof == 16) {
33762f7816d4SBarry Smith         B->ops->mult             = MatMult_SeqMAIJ_16;
33772f7816d4SBarry Smith         B->ops->multadd          = MatMultAdd_SeqMAIJ_16;
33782f7816d4SBarry Smith         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_16;
33792f7816d4SBarry Smith         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_16;
3380ed1c418cSBarry Smith       } else if (dof == 18) {
3381ed1c418cSBarry Smith         B->ops->mult             = MatMult_SeqMAIJ_18;
3382ed1c418cSBarry Smith         B->ops->multadd          = MatMultAdd_SeqMAIJ_18;
3383ed1c418cSBarry Smith         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_18;
3384ed1c418cSBarry Smith         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_18;
338582b1193eSBarry Smith       } else {
33866861f193SBarry Smith         B->ops->mult             = MatMult_SeqMAIJ_N;
33876861f193SBarry Smith         B->ops->multadd          = MatMultAdd_SeqMAIJ_N;
33886861f193SBarry Smith         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_N;
33896861f193SBarry Smith         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_N;
339082b1193eSBarry Smith       }
3391bdf89e91SBarry Smith       ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqmaij_seqaij_C",MatConvert_SeqMAIJ_SeqAIJ);CHKERRQ(ierr);
3392bdf89e91SBarry Smith       ierr = PetscObjectComposeFunction((PetscObject)B,"MatPtAP_seqaij_seqmaij_C",MatPtAP_SeqAIJ_SeqMAIJ);CHKERRQ(ierr);
339359e29146SRichard Tran Mills       ierr = PetscObjectComposeFunction((PetscObject)B,"MatPtAP_seqaijperm_seqmaij_C",MatPtAP_SeqAIJ_SeqMAIJ);CHKERRQ(ierr);
3394279e4bd4SRichard Tran Mills       ierr = PetscObjectComposeFunction((PetscObject)B,"MatPtAP_seqaijmkl_seqmaij_C",MatPtAP_SeqAIJ_SeqMAIJ);CHKERRQ(ierr);
3395ec626240SStefano Zampini       ierr = PetscObjectComposeFunction((PetscObject)B,"MatPtAP_is_seqmaij_C",MatPtAP_IS_XAIJ);CHKERRQ(ierr);
3396f4a53059SBarry Smith     } else {
3397f4a53059SBarry Smith       Mat_MPIAIJ  *mpiaij = (Mat_MPIAIJ*)A->data;
3398feb9fe23SJed Brown       Mat_MPIMAIJ *b;
3399f4a53059SBarry Smith       IS          from,to;
3400f4a53059SBarry Smith       Vec         gvec;
3401f4a53059SBarry Smith 
3402b9b97703SBarry Smith       ierr = MatSetType(B,MATMPIMAIJ);CHKERRQ(ierr);
340326fbe8dcSKarl Rupp 
34040298fd71SBarry Smith       B->ops->setup   = NULL;
3405d72c5c08SBarry Smith       B->ops->destroy = MatDestroy_MPIMAIJ;
34060fd73130SBarry Smith       B->ops->view    = MatView_MPIMAIJ;
340726fbe8dcSKarl Rupp 
3408b9b97703SBarry Smith       b      = (Mat_MPIMAIJ*)B->data;
3409b9b97703SBarry Smith       b->dof = dof;
3410b9b97703SBarry Smith       b->A   = A;
341126fbe8dcSKarl Rupp 
34124cb9d9b8SBarry Smith       ierr = MatCreateMAIJ(mpiaij->A,dof,&b->AIJ);CHKERRQ(ierr);
34134cb9d9b8SBarry Smith       ierr = MatCreateMAIJ(mpiaij->B,dof,&b->OAIJ);CHKERRQ(ierr);
34144cb9d9b8SBarry Smith 
3415f4a53059SBarry Smith       ierr = VecGetSize(mpiaij->lvec,&n);CHKERRQ(ierr);
3416a34bdc0bSBarry Smith       ierr = VecCreate(PETSC_COMM_SELF,&b->w);CHKERRQ(ierr);
3417a34bdc0bSBarry Smith       ierr = VecSetSizes(b->w,n*dof,n*dof);CHKERRQ(ierr);
341813288a74SBarry Smith       ierr = VecSetBlockSize(b->w,dof);CHKERRQ(ierr);
3419a34bdc0bSBarry Smith       ierr = VecSetType(b->w,VECSEQ);CHKERRQ(ierr);
3420f4a53059SBarry Smith 
3421f4a53059SBarry Smith       /* create two temporary Index sets for build scatter gather */
3422ce94432eSBarry Smith       ierr = ISCreateBlock(PetscObjectComm((PetscObject)A),dof,n,mpiaij->garray,PETSC_COPY_VALUES,&from);CHKERRQ(ierr);
3423f4a53059SBarry Smith       ierr = ISCreateStride(PETSC_COMM_SELF,n*dof,0,1,&to);CHKERRQ(ierr);
3424f4a53059SBarry Smith 
3425f4a53059SBarry Smith       /* create temporary global vector to generate scatter context */
3426ce94432eSBarry Smith       ierr = VecCreateMPIWithArray(PetscObjectComm((PetscObject)A),dof,dof*A->cmap->n,dof*A->cmap->N,NULL,&gvec);CHKERRQ(ierr);
3427f4a53059SBarry Smith 
3428f4a53059SBarry Smith       /* generate the scatter context */
34299448b7f1SJunchao Zhang       ierr = VecScatterCreate(gvec,from,b->w,to,&b->ctx);CHKERRQ(ierr);
3430f4a53059SBarry Smith 
34316bf464f9SBarry Smith       ierr = ISDestroy(&from);CHKERRQ(ierr);
34326bf464f9SBarry Smith       ierr = ISDestroy(&to);CHKERRQ(ierr);
34336bf464f9SBarry Smith       ierr = VecDestroy(&gvec);CHKERRQ(ierr);
3434f4a53059SBarry Smith 
3435f4a53059SBarry Smith       B->ops->mult             = MatMult_MPIMAIJ_dof;
34364cb9d9b8SBarry Smith       B->ops->multtranspose    = MatMultTranspose_MPIMAIJ_dof;
34374cb9d9b8SBarry Smith       B->ops->multadd          = MatMultAdd_MPIMAIJ_dof;
34384cb9d9b8SBarry Smith       B->ops->multtransposeadd = MatMultTransposeAdd_MPIMAIJ_dof;
343926fbe8dcSKarl Rupp 
3440bdf89e91SBarry Smith       ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpimaij_mpiaij_C",MatConvert_MPIMAIJ_MPIAIJ);CHKERRQ(ierr);
3441bdf89e91SBarry Smith       ierr = PetscObjectComposeFunction((PetscObject)B,"MatPtAP_mpiaij_mpimaij_C",MatPtAP_MPIAIJ_MPIMAIJ);CHKERRQ(ierr);
3442ec626240SStefano Zampini       ierr = PetscObjectComposeFunction((PetscObject)B,"MatPtAP_is_mpimaij_C",MatPtAP_IS_XAIJ);CHKERRQ(ierr);
3443f4a53059SBarry Smith     }
34447dae84e0SHong Zhang     B->ops->createsubmatrix   = MatCreateSubMatrix_MAIJ;
3445ec626240SStefano Zampini     B->ops->createsubmatrices = MatCreateSubMatrices_MAIJ;
34464994cf47SJed Brown     ierr  = MatSetUp(B);CHKERRQ(ierr);
3447cd3bbe55SBarry Smith     *maij = B;
3448146574abSBarry Smith     ierr  = MatViewFromOptions(B,NULL,"-mat_view");CHKERRQ(ierr);
3449d72c5c08SBarry Smith   }
345082b1193eSBarry Smith   PetscFunctionReturn(0);
345182b1193eSBarry Smith }
3452