xref: /petsc/src/mat/impls/maij/maij.c (revision 59e2914650ed2753f5d47e7a866137908663595f)
1be1d678aSKris Buschelman 
282b1193eSBarry Smith /*
3cd3bbe55SBarry Smith     Defines the basic matrix operations for the MAIJ  matrix storage format.
4cd3bbe55SBarry Smith   This format is used for restriction and interpolation operations for
5cd3bbe55SBarry Smith   multicomponent problems. It interpolates each component the same way
6cd3bbe55SBarry Smith   independently.
7cd3bbe55SBarry Smith 
8cd3bbe55SBarry Smith      We provide:
9cd3bbe55SBarry Smith          MatMult()
10cd3bbe55SBarry Smith          MatMultTranspose()
11cd3bbe55SBarry Smith          MatMultTransposeAdd()
12cd3bbe55SBarry Smith          MatMultAdd()
13cd3bbe55SBarry Smith           and
14cd3bbe55SBarry Smith          MatCreateMAIJ(Mat,dof,Mat*)
15f4a53059SBarry Smith 
16f4a53059SBarry Smith      This single directory handles both the sequential and parallel codes
1782b1193eSBarry Smith */
1882b1193eSBarry Smith 
19c6db04a5SJed Brown #include <../src/mat/impls/maij/maij.h> /*I "petscmat.h" I*/
20c6db04a5SJed Brown #include <../src/mat/utils/freespace.h>
2182b1193eSBarry Smith 
22b350b9acSSatish Balay /*@
23ff585edeSJed Brown    MatMAIJGetAIJ - Get the AIJ matrix describing the blockwise action of the MAIJ matrix
24ff585edeSJed Brown 
25ff585edeSJed Brown    Not Collective, but if the MAIJ matrix is parallel, the AIJ matrix is also parallel
26ff585edeSJed Brown 
27ff585edeSJed Brown    Input Parameter:
28ff585edeSJed Brown .  A - the MAIJ matrix
29ff585edeSJed Brown 
30ff585edeSJed Brown    Output Parameter:
31ff585edeSJed Brown .  B - the AIJ matrix
32ff585edeSJed Brown 
33ff585edeSJed Brown    Level: advanced
34ff585edeSJed Brown 
35ff585edeSJed Brown    Notes: The reference count on the AIJ matrix is not increased so you should not destroy it.
36ff585edeSJed Brown 
37ff585edeSJed Brown .seealso: MatCreateMAIJ()
38ff585edeSJed Brown @*/
397087cfbeSBarry Smith PetscErrorCode  MatMAIJGetAIJ(Mat A,Mat *B)
40b9b97703SBarry Smith {
41dfbe8321SBarry Smith   PetscErrorCode ierr;
42ace3abfcSBarry Smith   PetscBool      ismpimaij,isseqmaij;
43b9b97703SBarry Smith 
44b9b97703SBarry Smith   PetscFunctionBegin;
45251f4c67SDmitry Karpeev   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIMAIJ,&ismpimaij);CHKERRQ(ierr);
46251f4c67SDmitry Karpeev   ierr = PetscObjectTypeCompare((PetscObject)A,MATSEQMAIJ,&isseqmaij);CHKERRQ(ierr);
47b9b97703SBarry Smith   if (ismpimaij) {
48b9b97703SBarry Smith     Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data;
49b9b97703SBarry Smith 
50b9b97703SBarry Smith     *B = b->A;
51b9b97703SBarry Smith   } else if (isseqmaij) {
52b9b97703SBarry Smith     Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
53b9b97703SBarry Smith 
54b9b97703SBarry Smith     *B = b->AIJ;
55b9b97703SBarry Smith   } else {
56b9b97703SBarry Smith     *B = A;
57b9b97703SBarry Smith   }
58b9b97703SBarry Smith   PetscFunctionReturn(0);
59b9b97703SBarry Smith }
60b9b97703SBarry Smith 
61480dffcfSJed Brown /*@
62ff585edeSJed Brown    MatMAIJRedimension - Get an MAIJ matrix with the same action, but for a different block size
63ff585edeSJed Brown 
643f9fe445SBarry Smith    Logically Collective
65ff585edeSJed Brown 
66ff585edeSJed Brown    Input Parameter:
67ff585edeSJed Brown +  A - the MAIJ matrix
68ff585edeSJed Brown -  dof - the block size for the new matrix
69ff585edeSJed Brown 
70ff585edeSJed Brown    Output Parameter:
71ff585edeSJed Brown .  B - the new MAIJ matrix
72ff585edeSJed Brown 
73ff585edeSJed Brown    Level: advanced
74ff585edeSJed Brown 
75ff585edeSJed Brown .seealso: MatCreateMAIJ()
76ff585edeSJed Brown @*/
777087cfbeSBarry Smith PetscErrorCode  MatMAIJRedimension(Mat A,PetscInt dof,Mat *B)
78b9b97703SBarry Smith {
79dfbe8321SBarry Smith   PetscErrorCode ierr;
800298fd71SBarry Smith   Mat            Aij = NULL;
81b9b97703SBarry Smith 
82b9b97703SBarry Smith   PetscFunctionBegin;
83c5eb9154SBarry Smith   PetscValidLogicalCollectiveInt(A,dof,2);
84b9b97703SBarry Smith   ierr = MatMAIJGetAIJ(A,&Aij);CHKERRQ(ierr);
85b9b97703SBarry Smith   ierr = MatCreateMAIJ(Aij,dof,B);CHKERRQ(ierr);
86b9b97703SBarry Smith   PetscFunctionReturn(0);
87b9b97703SBarry Smith }
88b9b97703SBarry Smith 
89dfbe8321SBarry Smith PetscErrorCode MatDestroy_SeqMAIJ(Mat A)
9082b1193eSBarry Smith {
91dfbe8321SBarry Smith   PetscErrorCode ierr;
924cb9d9b8SBarry Smith   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
9382b1193eSBarry Smith 
9482b1193eSBarry Smith   PetscFunctionBegin;
956bf464f9SBarry Smith   ierr = MatDestroy(&b->AIJ);CHKERRQ(ierr);
96bf0cc555SLisandro Dalcin   ierr = PetscFree(A->data);CHKERRQ(ierr);
97bdf89e91SBarry Smith   ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqmaij_seqaij_C",NULL);CHKERRQ(ierr);
98bdf89e91SBarry Smith   ierr = PetscObjectComposeFunction((PetscObject)A,"MatPtAP_seqaij_seqmaij_C",NULL);CHKERRQ(ierr);
99*59e29146SRichard Tran Mills   ierr = PetscObjectComposeFunction((PetscObject)A,"MatPtAP_seqaijperm_seqmaij_C",NULL);CHKERRQ(ierr);
100279e4bd4SRichard Tran Mills   ierr = PetscObjectComposeFunction((PetscObject)A,"MatPtAP_seqaijmkl_seqmaij_C",NULL);CHKERRQ(ierr);
1014cb9d9b8SBarry Smith   PetscFunctionReturn(0);
1024cb9d9b8SBarry Smith }
1034cb9d9b8SBarry Smith 
104356c569eSBarry Smith PetscErrorCode MatSetUp_MAIJ(Mat A)
105356c569eSBarry Smith {
106356c569eSBarry Smith   PetscFunctionBegin;
107ce94432eSBarry Smith   SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Must use MatCreateMAIJ() to create MAIJ matrices");
108356c569eSBarry Smith   PetscFunctionReturn(0);
109356c569eSBarry Smith }
110356c569eSBarry Smith 
1110fd73130SBarry Smith PetscErrorCode MatView_SeqMAIJ(Mat A,PetscViewer viewer)
1120fd73130SBarry Smith {
1130fd73130SBarry Smith   PetscErrorCode ierr;
1140fd73130SBarry Smith   Mat            B;
1150fd73130SBarry Smith 
1160fd73130SBarry Smith   PetscFunctionBegin;
117a2ea699eSBarry Smith   ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr);
1180fd73130SBarry Smith   ierr = MatView(B,viewer);CHKERRQ(ierr);
1196bf464f9SBarry Smith   ierr = MatDestroy(&B);CHKERRQ(ierr);
1200fd73130SBarry Smith   PetscFunctionReturn(0);
1210fd73130SBarry Smith }
1220fd73130SBarry Smith 
1230fd73130SBarry Smith PetscErrorCode MatView_MPIMAIJ(Mat A,PetscViewer viewer)
1240fd73130SBarry Smith {
1250fd73130SBarry Smith   PetscErrorCode ierr;
1260fd73130SBarry Smith   Mat            B;
1270fd73130SBarry Smith 
1280fd73130SBarry Smith   PetscFunctionBegin;
129a2ea699eSBarry Smith   ierr = MatConvert(A,MATMPIAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr);
1300fd73130SBarry Smith   ierr = MatView(B,viewer);CHKERRQ(ierr);
1316bf464f9SBarry Smith   ierr = MatDestroy(&B);CHKERRQ(ierr);
1320fd73130SBarry Smith   PetscFunctionReturn(0);
1330fd73130SBarry Smith }
1340fd73130SBarry Smith 
135dfbe8321SBarry Smith PetscErrorCode MatDestroy_MPIMAIJ(Mat A)
1364cb9d9b8SBarry Smith {
137dfbe8321SBarry Smith   PetscErrorCode ierr;
1384cb9d9b8SBarry Smith   Mat_MPIMAIJ    *b = (Mat_MPIMAIJ*)A->data;
1394cb9d9b8SBarry Smith 
1404cb9d9b8SBarry Smith   PetscFunctionBegin;
1416bf464f9SBarry Smith   ierr = MatDestroy(&b->AIJ);CHKERRQ(ierr);
1426bf464f9SBarry Smith   ierr = MatDestroy(&b->OAIJ);CHKERRQ(ierr);
1436bf464f9SBarry Smith   ierr = MatDestroy(&b->A);CHKERRQ(ierr);
1446bf464f9SBarry Smith   ierr = VecScatterDestroy(&b->ctx);CHKERRQ(ierr);
1456bf464f9SBarry Smith   ierr = VecDestroy(&b->w);CHKERRQ(ierr);
146bf0cc555SLisandro Dalcin   ierr = PetscFree(A->data);CHKERRQ(ierr);
147bdf89e91SBarry Smith   ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_mpimaij_mpiaij_C",NULL);CHKERRQ(ierr);
148bdf89e91SBarry Smith   ierr = PetscObjectComposeFunction((PetscObject)A,"MatPtAP_mpiaij_mpimaij_C",NULL);CHKERRQ(ierr);
149dbd8c25aSHong Zhang   ierr = PetscObjectChangeTypeName((PetscObject)A,0);CHKERRQ(ierr);
150b9b97703SBarry Smith   PetscFunctionReturn(0);
151b9b97703SBarry Smith }
152b9b97703SBarry Smith 
1530bad9183SKris Buschelman /*MC
154fafad747SKris Buschelman   MATMAIJ - MATMAIJ = "maij" - A matrix type to be used for restriction and interpolation operations for
1550bad9183SKris Buschelman   multicomponent problems, interpolating or restricting each component the same way independently.
1560bad9183SKris Buschelman   The matrix type is based on MATSEQAIJ for sequential matrices, and MATMPIAIJ for distributed matrices.
1570bad9183SKris Buschelman 
1580bad9183SKris Buschelman   Operations provided:
1590bad9183SKris Buschelman . MatMult
1600bad9183SKris Buschelman . MatMultTranspose
1610bad9183SKris Buschelman . MatMultAdd
1620bad9183SKris Buschelman . MatMultTransposeAdd
1630bad9183SKris Buschelman 
1640bad9183SKris Buschelman   Level: advanced
1650bad9183SKris Buschelman 
166b409243cSBarry Smith .seealso: MatMAIJGetAIJ(), MatMAIJRedimension(), MatCreateMAIJ()
1670bad9183SKris Buschelman M*/
1680bad9183SKris Buschelman 
1698cc058d9SJed Brown PETSC_EXTERN PetscErrorCode MatCreate_MAIJ(Mat A)
17082b1193eSBarry Smith {
171dfbe8321SBarry Smith   PetscErrorCode ierr;
1724cb9d9b8SBarry Smith   Mat_MPIMAIJ    *b;
17364b52464SHong Zhang   PetscMPIInt    size;
17482b1193eSBarry Smith 
17582b1193eSBarry Smith   PetscFunctionBegin;
176b00a9115SJed Brown   ierr     = PetscNewLog(A,&b);CHKERRQ(ierr);
177b0a32e0cSBarry Smith   A->data  = (void*)b;
17826fbe8dcSKarl Rupp 
179cd3bbe55SBarry Smith   ierr = PetscMemzero(A->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
18026fbe8dcSKarl Rupp 
181356c569eSBarry Smith   A->ops->setup = MatSetUp_MAIJ;
182f4a53059SBarry Smith 
183cd3bbe55SBarry Smith   b->AIJ  = 0;
184cd3bbe55SBarry Smith   b->dof  = 0;
185f4a53059SBarry Smith   b->OAIJ = 0;
186f4a53059SBarry Smith   b->ctx  = 0;
187f4a53059SBarry Smith   b->w    = 0;
188ce94432eSBarry Smith   ierr    = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRQ(ierr);
18964b52464SHong Zhang   if (size == 1) {
19064b52464SHong Zhang     ierr = PetscObjectChangeTypeName((PetscObject)A,MATSEQMAIJ);CHKERRQ(ierr);
19164b52464SHong Zhang   } else {
19264b52464SHong Zhang     ierr = PetscObjectChangeTypeName((PetscObject)A,MATMPIMAIJ);CHKERRQ(ierr);
19364b52464SHong Zhang   }
19432e7c8b0SBarry Smith   A->preallocated  = PETSC_TRUE;
19532e7c8b0SBarry Smith   A->assembled     = PETSC_TRUE;
19682b1193eSBarry Smith   PetscFunctionReturn(0);
19782b1193eSBarry Smith }
19882b1193eSBarry Smith 
199cd3bbe55SBarry Smith /* --------------------------------------------------------------------------------------*/
200dfbe8321SBarry Smith PetscErrorCode MatMult_SeqMAIJ_2(Mat A,Vec xx,Vec yy)
20182b1193eSBarry Smith {
2024cb9d9b8SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
203bcc973b7SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
204d2840e09SBarry Smith   const PetscScalar *x,*v;
205d2840e09SBarry Smith   PetscScalar       *y, sum1, sum2;
206dfbe8321SBarry Smith   PetscErrorCode    ierr;
207d2840e09SBarry Smith   PetscInt          nonzerorow=0,n,i,jrow,j;
208d2840e09SBarry Smith   const PetscInt    m         = b->AIJ->rmap->n,*idx,*ii;
20982b1193eSBarry Smith 
210bcc973b7SBarry Smith   PetscFunctionBegin;
2113649974fSBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
2121ebc52fbSHong Zhang   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
213bcc973b7SBarry Smith   idx  = a->j;
214bcc973b7SBarry Smith   v    = a->a;
215bcc973b7SBarry Smith   ii   = a->i;
216bcc973b7SBarry Smith 
217bcc973b7SBarry Smith   for (i=0; i<m; i++) {
218bcc973b7SBarry Smith     jrow  = ii[i];
219bcc973b7SBarry Smith     n     = ii[i+1] - jrow;
220bcc973b7SBarry Smith     sum1  = 0.0;
221bcc973b7SBarry Smith     sum2  = 0.0;
22226fbe8dcSKarl Rupp 
223b3c51c66SMatthew Knepley     nonzerorow += (n>0);
224bcc973b7SBarry Smith     for (j=0; j<n; j++) {
225bcc973b7SBarry Smith       sum1 += v[jrow]*x[2*idx[jrow]];
226bcc973b7SBarry Smith       sum2 += v[jrow]*x[2*idx[jrow]+1];
227bcc973b7SBarry Smith       jrow++;
228bcc973b7SBarry Smith     }
229bcc973b7SBarry Smith     y[2*i]   = sum1;
230bcc973b7SBarry Smith     y[2*i+1] = sum2;
231bcc973b7SBarry Smith   }
232bcc973b7SBarry Smith 
233dc0b31edSSatish Balay   ierr = PetscLogFlops(4.0*a->nz - 2.0*nonzerorow);CHKERRQ(ierr);
2343649974fSBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
2351ebc52fbSHong Zhang   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
23682b1193eSBarry Smith   PetscFunctionReturn(0);
23782b1193eSBarry Smith }
238bcc973b7SBarry Smith 
239dfbe8321SBarry Smith PetscErrorCode MatMultTranspose_SeqMAIJ_2(Mat A,Vec xx,Vec yy)
24082b1193eSBarry Smith {
2414cb9d9b8SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
242bcc973b7SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
243d2840e09SBarry Smith   const PetscScalar *x,*v;
244d2840e09SBarry Smith   PetscScalar       *y,alpha1,alpha2;
245dfbe8321SBarry Smith   PetscErrorCode    ierr;
246d2840e09SBarry Smith   PetscInt          n,i;
247d2840e09SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx;
24882b1193eSBarry Smith 
249bcc973b7SBarry Smith   PetscFunctionBegin;
250d2840e09SBarry Smith   ierr = VecSet(yy,0.0);CHKERRQ(ierr);
2513649974fSBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
2521ebc52fbSHong Zhang   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
253bfec09a0SHong Zhang 
254bcc973b7SBarry Smith   for (i=0; i<m; i++) {
255bfec09a0SHong Zhang     idx    = a->j + a->i[i];
256bfec09a0SHong Zhang     v      = a->a + a->i[i];
257bcc973b7SBarry Smith     n      = a->i[i+1] - a->i[i];
258bcc973b7SBarry Smith     alpha1 = x[2*i];
259bcc973b7SBarry Smith     alpha2 = x[2*i+1];
26026fbe8dcSKarl Rupp     while (n-->0) {
26126fbe8dcSKarl Rupp       y[2*(*idx)]   += alpha1*(*v);
26226fbe8dcSKarl Rupp       y[2*(*idx)+1] += alpha2*(*v);
26326fbe8dcSKarl Rupp       idx++; v++;
26426fbe8dcSKarl Rupp     }
265bcc973b7SBarry Smith   }
266dc0b31edSSatish Balay   ierr = PetscLogFlops(4.0*a->nz);CHKERRQ(ierr);
2673649974fSBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
2681ebc52fbSHong Zhang   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
26982b1193eSBarry Smith   PetscFunctionReturn(0);
27082b1193eSBarry Smith }
271bcc973b7SBarry Smith 
272dfbe8321SBarry Smith PetscErrorCode MatMultAdd_SeqMAIJ_2(Mat A,Vec xx,Vec yy,Vec zz)
27382b1193eSBarry Smith {
2744cb9d9b8SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
275bcc973b7SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
276d2840e09SBarry Smith   const PetscScalar *x,*v;
277d2840e09SBarry Smith   PetscScalar       *y,sum1, sum2;
278dfbe8321SBarry Smith   PetscErrorCode    ierr;
279b24ad042SBarry Smith   PetscInt          n,i,jrow,j;
280d2840e09SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
28182b1193eSBarry Smith 
282bcc973b7SBarry Smith   PetscFunctionBegin;
283f9fae5adSBarry Smith   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
2843649974fSBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
2851ebc52fbSHong Zhang   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
286bcc973b7SBarry Smith   idx  = a->j;
287bcc973b7SBarry Smith   v    = a->a;
288bcc973b7SBarry Smith   ii   = a->i;
289bcc973b7SBarry Smith 
290bcc973b7SBarry Smith   for (i=0; i<m; i++) {
291bcc973b7SBarry Smith     jrow = ii[i];
292bcc973b7SBarry Smith     n    = ii[i+1] - jrow;
293bcc973b7SBarry Smith     sum1 = 0.0;
294bcc973b7SBarry Smith     sum2 = 0.0;
295bcc973b7SBarry Smith     for (j=0; j<n; j++) {
296bcc973b7SBarry Smith       sum1 += v[jrow]*x[2*idx[jrow]];
297bcc973b7SBarry Smith       sum2 += v[jrow]*x[2*idx[jrow]+1];
298bcc973b7SBarry Smith       jrow++;
299bcc973b7SBarry Smith     }
300bcc973b7SBarry Smith     y[2*i]   += sum1;
301bcc973b7SBarry Smith     y[2*i+1] += sum2;
302bcc973b7SBarry Smith   }
303bcc973b7SBarry Smith 
304dc0b31edSSatish Balay   ierr = PetscLogFlops(4.0*a->nz);CHKERRQ(ierr);
3053649974fSBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
3061ebc52fbSHong Zhang   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
307bcc973b7SBarry Smith   PetscFunctionReturn(0);
30882b1193eSBarry Smith }
309dfbe8321SBarry Smith PetscErrorCode MatMultTransposeAdd_SeqMAIJ_2(Mat A,Vec xx,Vec yy,Vec zz)
31082b1193eSBarry Smith {
3114cb9d9b8SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
312bcc973b7SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
313d2840e09SBarry Smith   const PetscScalar *x,*v;
314d2840e09SBarry Smith   PetscScalar       *y,alpha1,alpha2;
315dfbe8321SBarry Smith   PetscErrorCode    ierr;
316d2840e09SBarry Smith   PetscInt          n,i;
317d2840e09SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx;
31882b1193eSBarry Smith 
319bcc973b7SBarry Smith   PetscFunctionBegin;
320f9fae5adSBarry Smith   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
3213649974fSBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
3221ebc52fbSHong Zhang   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
323bfec09a0SHong Zhang 
324bcc973b7SBarry Smith   for (i=0; i<m; i++) {
325bfec09a0SHong Zhang     idx    = a->j + a->i[i];
326bfec09a0SHong Zhang     v      = a->a + a->i[i];
327bcc973b7SBarry Smith     n      = a->i[i+1] - a->i[i];
328bcc973b7SBarry Smith     alpha1 = x[2*i];
329bcc973b7SBarry Smith     alpha2 = x[2*i+1];
33026fbe8dcSKarl Rupp     while (n-->0) {
33126fbe8dcSKarl Rupp       y[2*(*idx)]   += alpha1*(*v);
33226fbe8dcSKarl Rupp       y[2*(*idx)+1] += alpha2*(*v);
33326fbe8dcSKarl Rupp       idx++; v++;
33426fbe8dcSKarl Rupp     }
335bcc973b7SBarry Smith   }
336dc0b31edSSatish Balay   ierr = PetscLogFlops(4.0*a->nz);CHKERRQ(ierr);
3373649974fSBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
3381ebc52fbSHong Zhang   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
339bcc973b7SBarry Smith   PetscFunctionReturn(0);
34082b1193eSBarry Smith }
341cd3bbe55SBarry Smith /* --------------------------------------------------------------------------------------*/
342dfbe8321SBarry Smith PetscErrorCode MatMult_SeqMAIJ_3(Mat A,Vec xx,Vec yy)
343bcc973b7SBarry Smith {
3444cb9d9b8SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
345bcc973b7SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
346d2840e09SBarry Smith   const PetscScalar *x,*v;
347d2840e09SBarry Smith   PetscScalar       *y,sum1, sum2, sum3;
348dfbe8321SBarry Smith   PetscErrorCode    ierr;
349d2840e09SBarry Smith   PetscInt          nonzerorow=0,n,i,jrow,j;
350d2840e09SBarry Smith   const PetscInt    m         = b->AIJ->rmap->n,*idx,*ii;
35182b1193eSBarry Smith 
352bcc973b7SBarry Smith   PetscFunctionBegin;
3533649974fSBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
3541ebc52fbSHong Zhang   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
355bcc973b7SBarry Smith   idx  = a->j;
356bcc973b7SBarry Smith   v    = a->a;
357bcc973b7SBarry Smith   ii   = a->i;
358bcc973b7SBarry Smith 
359bcc973b7SBarry Smith   for (i=0; i<m; i++) {
360bcc973b7SBarry Smith     jrow  = ii[i];
361bcc973b7SBarry Smith     n     = ii[i+1] - jrow;
362bcc973b7SBarry Smith     sum1  = 0.0;
363bcc973b7SBarry Smith     sum2  = 0.0;
364bcc973b7SBarry Smith     sum3  = 0.0;
36526fbe8dcSKarl Rupp 
366b3c51c66SMatthew Knepley     nonzerorow += (n>0);
367bcc973b7SBarry Smith     for (j=0; j<n; j++) {
368bcc973b7SBarry Smith       sum1 += v[jrow]*x[3*idx[jrow]];
369bcc973b7SBarry Smith       sum2 += v[jrow]*x[3*idx[jrow]+1];
370bcc973b7SBarry Smith       sum3 += v[jrow]*x[3*idx[jrow]+2];
371bcc973b7SBarry Smith       jrow++;
372bcc973b7SBarry Smith      }
373bcc973b7SBarry Smith     y[3*i]   = sum1;
374bcc973b7SBarry Smith     y[3*i+1] = sum2;
375bcc973b7SBarry Smith     y[3*i+2] = sum3;
376bcc973b7SBarry Smith   }
377bcc973b7SBarry Smith 
378dc0b31edSSatish Balay   ierr = PetscLogFlops(6.0*a->nz - 3.0*nonzerorow);CHKERRQ(ierr);
3793649974fSBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
3801ebc52fbSHong Zhang   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
381bcc973b7SBarry Smith   PetscFunctionReturn(0);
382bcc973b7SBarry Smith }
383bcc973b7SBarry Smith 
384dfbe8321SBarry Smith PetscErrorCode MatMultTranspose_SeqMAIJ_3(Mat A,Vec xx,Vec yy)
385bcc973b7SBarry Smith {
3864cb9d9b8SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
387bcc973b7SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
388d2840e09SBarry Smith   const PetscScalar *x,*v;
389d2840e09SBarry Smith   PetscScalar       *y,alpha1,alpha2,alpha3;
390dfbe8321SBarry Smith   PetscErrorCode    ierr;
391d2840e09SBarry Smith   PetscInt          n,i;
392d2840e09SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx;
393bcc973b7SBarry Smith 
394bcc973b7SBarry Smith   PetscFunctionBegin;
395d2840e09SBarry Smith   ierr = VecSet(yy,0.0);CHKERRQ(ierr);
3963649974fSBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
3971ebc52fbSHong Zhang   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
398bfec09a0SHong Zhang 
399bcc973b7SBarry Smith   for (i=0; i<m; i++) {
400bfec09a0SHong Zhang     idx    = a->j + a->i[i];
401bfec09a0SHong Zhang     v      = a->a + a->i[i];
402bcc973b7SBarry Smith     n      = a->i[i+1] - a->i[i];
403bcc973b7SBarry Smith     alpha1 = x[3*i];
404bcc973b7SBarry Smith     alpha2 = x[3*i+1];
405bcc973b7SBarry Smith     alpha3 = x[3*i+2];
406bcc973b7SBarry Smith     while (n-->0) {
407bcc973b7SBarry Smith       y[3*(*idx)]   += alpha1*(*v);
408bcc973b7SBarry Smith       y[3*(*idx)+1] += alpha2*(*v);
409bcc973b7SBarry Smith       y[3*(*idx)+2] += alpha3*(*v);
410bcc973b7SBarry Smith       idx++; v++;
411bcc973b7SBarry Smith     }
412bcc973b7SBarry Smith   }
413dc0b31edSSatish Balay   ierr = PetscLogFlops(6.0*a->nz);CHKERRQ(ierr);
4143649974fSBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
4151ebc52fbSHong Zhang   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
416bcc973b7SBarry Smith   PetscFunctionReturn(0);
417bcc973b7SBarry Smith }
418bcc973b7SBarry Smith 
419dfbe8321SBarry Smith PetscErrorCode MatMultAdd_SeqMAIJ_3(Mat A,Vec xx,Vec yy,Vec zz)
420bcc973b7SBarry Smith {
4214cb9d9b8SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
422bcc973b7SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
423d2840e09SBarry Smith   const PetscScalar *x,*v;
424d2840e09SBarry Smith   PetscScalar       *y,sum1, sum2, sum3;
425dfbe8321SBarry Smith   PetscErrorCode    ierr;
426b24ad042SBarry Smith   PetscInt          n,i,jrow,j;
427d2840e09SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
428bcc973b7SBarry Smith 
429bcc973b7SBarry Smith   PetscFunctionBegin;
430f9fae5adSBarry Smith   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
4313649974fSBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
4321ebc52fbSHong Zhang   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
433bcc973b7SBarry Smith   idx  = a->j;
434bcc973b7SBarry Smith   v    = a->a;
435bcc973b7SBarry Smith   ii   = a->i;
436bcc973b7SBarry Smith 
437bcc973b7SBarry Smith   for (i=0; i<m; i++) {
438bcc973b7SBarry Smith     jrow = ii[i];
439bcc973b7SBarry Smith     n    = ii[i+1] - jrow;
440bcc973b7SBarry Smith     sum1 = 0.0;
441bcc973b7SBarry Smith     sum2 = 0.0;
442bcc973b7SBarry Smith     sum3 = 0.0;
443bcc973b7SBarry Smith     for (j=0; j<n; j++) {
444bcc973b7SBarry Smith       sum1 += v[jrow]*x[3*idx[jrow]];
445bcc973b7SBarry Smith       sum2 += v[jrow]*x[3*idx[jrow]+1];
446bcc973b7SBarry Smith       sum3 += v[jrow]*x[3*idx[jrow]+2];
447bcc973b7SBarry Smith       jrow++;
448bcc973b7SBarry Smith     }
449bcc973b7SBarry Smith     y[3*i]   += sum1;
450bcc973b7SBarry Smith     y[3*i+1] += sum2;
451bcc973b7SBarry Smith     y[3*i+2] += sum3;
452bcc973b7SBarry Smith   }
453bcc973b7SBarry Smith 
454dc0b31edSSatish Balay   ierr = PetscLogFlops(6.0*a->nz);CHKERRQ(ierr);
4553649974fSBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
4561ebc52fbSHong Zhang   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
457bcc973b7SBarry Smith   PetscFunctionReturn(0);
458bcc973b7SBarry Smith }
459dfbe8321SBarry Smith PetscErrorCode MatMultTransposeAdd_SeqMAIJ_3(Mat A,Vec xx,Vec yy,Vec zz)
460bcc973b7SBarry Smith {
4614cb9d9b8SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
462bcc973b7SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
463d2840e09SBarry Smith   const PetscScalar *x,*v;
464d2840e09SBarry Smith   PetscScalar       *y,alpha1,alpha2,alpha3;
465dfbe8321SBarry Smith   PetscErrorCode    ierr;
466d2840e09SBarry Smith   PetscInt          n,i;
467d2840e09SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx;
468bcc973b7SBarry Smith 
469bcc973b7SBarry Smith   PetscFunctionBegin;
470f9fae5adSBarry Smith   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
4713649974fSBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
4721ebc52fbSHong Zhang   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
473bcc973b7SBarry Smith   for (i=0; i<m; i++) {
474bfec09a0SHong Zhang     idx    = a->j + a->i[i];
475bfec09a0SHong Zhang     v      = a->a + a->i[i];
476bcc973b7SBarry Smith     n      = a->i[i+1] - a->i[i];
477bcc973b7SBarry Smith     alpha1 = x[3*i];
478bcc973b7SBarry Smith     alpha2 = x[3*i+1];
479bcc973b7SBarry Smith     alpha3 = x[3*i+2];
480bcc973b7SBarry Smith     while (n-->0) {
481bcc973b7SBarry Smith       y[3*(*idx)]   += alpha1*(*v);
482bcc973b7SBarry Smith       y[3*(*idx)+1] += alpha2*(*v);
483bcc973b7SBarry Smith       y[3*(*idx)+2] += alpha3*(*v);
484bcc973b7SBarry Smith       idx++; v++;
485bcc973b7SBarry Smith     }
486bcc973b7SBarry Smith   }
487dc0b31edSSatish Balay   ierr = PetscLogFlops(6.0*a->nz);CHKERRQ(ierr);
4883649974fSBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
4891ebc52fbSHong Zhang   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
490bcc973b7SBarry Smith   PetscFunctionReturn(0);
491bcc973b7SBarry Smith }
492bcc973b7SBarry Smith 
493bcc973b7SBarry Smith /* ------------------------------------------------------------------------------*/
494dfbe8321SBarry Smith PetscErrorCode MatMult_SeqMAIJ_4(Mat A,Vec xx,Vec yy)
495bcc973b7SBarry Smith {
4964cb9d9b8SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
497bcc973b7SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
498d2840e09SBarry Smith   const PetscScalar *x,*v;
499d2840e09SBarry Smith   PetscScalar       *y,sum1, sum2, sum3, sum4;
500dfbe8321SBarry Smith   PetscErrorCode    ierr;
501d2840e09SBarry Smith   PetscInt          nonzerorow=0,n,i,jrow,j;
502d2840e09SBarry Smith   const PetscInt    m         = b->AIJ->rmap->n,*idx,*ii;
503bcc973b7SBarry Smith 
504bcc973b7SBarry Smith   PetscFunctionBegin;
5053649974fSBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
5061ebc52fbSHong Zhang   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
507bcc973b7SBarry Smith   idx  = a->j;
508bcc973b7SBarry Smith   v    = a->a;
509bcc973b7SBarry Smith   ii   = a->i;
510bcc973b7SBarry Smith 
511bcc973b7SBarry Smith   for (i=0; i<m; i++) {
512bcc973b7SBarry Smith     jrow        = ii[i];
513bcc973b7SBarry Smith     n           = ii[i+1] - jrow;
514bcc973b7SBarry Smith     sum1        = 0.0;
515bcc973b7SBarry Smith     sum2        = 0.0;
516bcc973b7SBarry Smith     sum3        = 0.0;
517bcc973b7SBarry Smith     sum4        = 0.0;
518b3c51c66SMatthew Knepley     nonzerorow += (n>0);
519bcc973b7SBarry Smith     for (j=0; j<n; j++) {
520bcc973b7SBarry Smith       sum1 += v[jrow]*x[4*idx[jrow]];
521bcc973b7SBarry Smith       sum2 += v[jrow]*x[4*idx[jrow]+1];
522bcc973b7SBarry Smith       sum3 += v[jrow]*x[4*idx[jrow]+2];
523bcc973b7SBarry Smith       sum4 += v[jrow]*x[4*idx[jrow]+3];
524bcc973b7SBarry Smith       jrow++;
525bcc973b7SBarry Smith     }
526bcc973b7SBarry Smith     y[4*i]   = sum1;
527bcc973b7SBarry Smith     y[4*i+1] = sum2;
528bcc973b7SBarry Smith     y[4*i+2] = sum3;
529bcc973b7SBarry Smith     y[4*i+3] = sum4;
530bcc973b7SBarry Smith   }
531bcc973b7SBarry Smith 
532dc0b31edSSatish Balay   ierr = PetscLogFlops(8.0*a->nz - 4.0*nonzerorow);CHKERRQ(ierr);
5333649974fSBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
5341ebc52fbSHong Zhang   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
535bcc973b7SBarry Smith   PetscFunctionReturn(0);
536bcc973b7SBarry Smith }
537bcc973b7SBarry Smith 
538dfbe8321SBarry Smith PetscErrorCode MatMultTranspose_SeqMAIJ_4(Mat A,Vec xx,Vec yy)
539bcc973b7SBarry Smith {
5404cb9d9b8SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
541bcc973b7SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
542d2840e09SBarry Smith   const PetscScalar *x,*v;
543d2840e09SBarry Smith   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4;
544dfbe8321SBarry Smith   PetscErrorCode    ierr;
545d2840e09SBarry Smith   PetscInt          n,i;
546d2840e09SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx;
547bcc973b7SBarry Smith 
548bcc973b7SBarry Smith   PetscFunctionBegin;
549d2840e09SBarry Smith   ierr = VecSet(yy,0.0);CHKERRQ(ierr);
5503649974fSBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
5511ebc52fbSHong Zhang   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
552bcc973b7SBarry Smith   for (i=0; i<m; i++) {
553bfec09a0SHong Zhang     idx    = a->j + a->i[i];
554bfec09a0SHong Zhang     v      = a->a + a->i[i];
555bcc973b7SBarry Smith     n      = a->i[i+1] - a->i[i];
556bcc973b7SBarry Smith     alpha1 = x[4*i];
557bcc973b7SBarry Smith     alpha2 = x[4*i+1];
558bcc973b7SBarry Smith     alpha3 = x[4*i+2];
559bcc973b7SBarry Smith     alpha4 = x[4*i+3];
560bcc973b7SBarry Smith     while (n-->0) {
561bcc973b7SBarry Smith       y[4*(*idx)]   += alpha1*(*v);
562bcc973b7SBarry Smith       y[4*(*idx)+1] += alpha2*(*v);
563bcc973b7SBarry Smith       y[4*(*idx)+2] += alpha3*(*v);
564bcc973b7SBarry Smith       y[4*(*idx)+3] += alpha4*(*v);
565bcc973b7SBarry Smith       idx++; v++;
566bcc973b7SBarry Smith     }
567bcc973b7SBarry Smith   }
568dc0b31edSSatish Balay   ierr = PetscLogFlops(8.0*a->nz);CHKERRQ(ierr);
5693649974fSBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
5701ebc52fbSHong Zhang   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
571bcc973b7SBarry Smith   PetscFunctionReturn(0);
572bcc973b7SBarry Smith }
573bcc973b7SBarry Smith 
574dfbe8321SBarry Smith PetscErrorCode MatMultAdd_SeqMAIJ_4(Mat A,Vec xx,Vec yy,Vec zz)
575bcc973b7SBarry Smith {
5764cb9d9b8SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
577f9fae5adSBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
578d2840e09SBarry Smith   const PetscScalar *x,*v;
579d2840e09SBarry Smith   PetscScalar       *y,sum1, sum2, sum3, sum4;
580dfbe8321SBarry Smith   PetscErrorCode    ierr;
581b24ad042SBarry Smith   PetscInt          n,i,jrow,j;
582d2840e09SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
583f9fae5adSBarry Smith 
584f9fae5adSBarry Smith   PetscFunctionBegin;
585f9fae5adSBarry Smith   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
5863649974fSBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
5871ebc52fbSHong Zhang   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
588f9fae5adSBarry Smith   idx  = a->j;
589f9fae5adSBarry Smith   v    = a->a;
590f9fae5adSBarry Smith   ii   = a->i;
591f9fae5adSBarry Smith 
592f9fae5adSBarry Smith   for (i=0; i<m; i++) {
593f9fae5adSBarry Smith     jrow = ii[i];
594f9fae5adSBarry Smith     n    = ii[i+1] - jrow;
595f9fae5adSBarry Smith     sum1 = 0.0;
596f9fae5adSBarry Smith     sum2 = 0.0;
597f9fae5adSBarry Smith     sum3 = 0.0;
598f9fae5adSBarry Smith     sum4 = 0.0;
599f9fae5adSBarry Smith     for (j=0; j<n; j++) {
600f9fae5adSBarry Smith       sum1 += v[jrow]*x[4*idx[jrow]];
601f9fae5adSBarry Smith       sum2 += v[jrow]*x[4*idx[jrow]+1];
602f9fae5adSBarry Smith       sum3 += v[jrow]*x[4*idx[jrow]+2];
603f9fae5adSBarry Smith       sum4 += v[jrow]*x[4*idx[jrow]+3];
604f9fae5adSBarry Smith       jrow++;
605f9fae5adSBarry Smith     }
606f9fae5adSBarry Smith     y[4*i]   += sum1;
607f9fae5adSBarry Smith     y[4*i+1] += sum2;
608f9fae5adSBarry Smith     y[4*i+2] += sum3;
609f9fae5adSBarry Smith     y[4*i+3] += sum4;
610f9fae5adSBarry Smith   }
611f9fae5adSBarry Smith 
612dc0b31edSSatish Balay   ierr = PetscLogFlops(8.0*a->nz);CHKERRQ(ierr);
6133649974fSBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
6141ebc52fbSHong Zhang   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
615f9fae5adSBarry Smith   PetscFunctionReturn(0);
616bcc973b7SBarry Smith }
617dfbe8321SBarry Smith PetscErrorCode MatMultTransposeAdd_SeqMAIJ_4(Mat A,Vec xx,Vec yy,Vec zz)
618bcc973b7SBarry Smith {
6194cb9d9b8SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
620f9fae5adSBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
621d2840e09SBarry Smith   const PetscScalar *x,*v;
622d2840e09SBarry Smith   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4;
623dfbe8321SBarry Smith   PetscErrorCode    ierr;
624d2840e09SBarry Smith   PetscInt          n,i;
625d2840e09SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx;
626f9fae5adSBarry Smith 
627f9fae5adSBarry Smith   PetscFunctionBegin;
628f9fae5adSBarry Smith   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
6293649974fSBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
6301ebc52fbSHong Zhang   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
631bfec09a0SHong Zhang 
632f9fae5adSBarry Smith   for (i=0; i<m; i++) {
633bfec09a0SHong Zhang     idx    = a->j + a->i[i];
634bfec09a0SHong Zhang     v      = a->a + a->i[i];
635f9fae5adSBarry Smith     n      = a->i[i+1] - a->i[i];
636f9fae5adSBarry Smith     alpha1 = x[4*i];
637f9fae5adSBarry Smith     alpha2 = x[4*i+1];
638f9fae5adSBarry Smith     alpha3 = x[4*i+2];
639f9fae5adSBarry Smith     alpha4 = x[4*i+3];
640f9fae5adSBarry Smith     while (n-->0) {
641f9fae5adSBarry Smith       y[4*(*idx)]   += alpha1*(*v);
642f9fae5adSBarry Smith       y[4*(*idx)+1] += alpha2*(*v);
643f9fae5adSBarry Smith       y[4*(*idx)+2] += alpha3*(*v);
644f9fae5adSBarry Smith       y[4*(*idx)+3] += alpha4*(*v);
645f9fae5adSBarry Smith       idx++; v++;
646f9fae5adSBarry Smith     }
647f9fae5adSBarry Smith   }
648dc0b31edSSatish Balay   ierr = PetscLogFlops(8.0*a->nz);CHKERRQ(ierr);
6493649974fSBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
6501ebc52fbSHong Zhang   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
651f9fae5adSBarry Smith   PetscFunctionReturn(0);
652f9fae5adSBarry Smith }
653f9fae5adSBarry Smith /* ------------------------------------------------------------------------------*/
6546cd98798SBarry Smith 
655dfbe8321SBarry Smith PetscErrorCode MatMult_SeqMAIJ_5(Mat A,Vec xx,Vec yy)
656f9fae5adSBarry Smith {
6574cb9d9b8SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
658f9fae5adSBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
659d2840e09SBarry Smith   const PetscScalar *x,*v;
660d2840e09SBarry Smith   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5;
661dfbe8321SBarry Smith   PetscErrorCode    ierr;
662d2840e09SBarry Smith   PetscInt          nonzerorow=0,n,i,jrow,j;
663d2840e09SBarry Smith   const PetscInt    m         = b->AIJ->rmap->n,*idx,*ii;
664f9fae5adSBarry Smith 
665f9fae5adSBarry Smith   PetscFunctionBegin;
6663649974fSBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
6671ebc52fbSHong Zhang   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
668f9fae5adSBarry Smith   idx  = a->j;
669f9fae5adSBarry Smith   v    = a->a;
670f9fae5adSBarry Smith   ii   = a->i;
671f9fae5adSBarry Smith 
672f9fae5adSBarry Smith   for (i=0; i<m; i++) {
673f9fae5adSBarry Smith     jrow  = ii[i];
674f9fae5adSBarry Smith     n     = ii[i+1] - jrow;
675f9fae5adSBarry Smith     sum1  = 0.0;
676f9fae5adSBarry Smith     sum2  = 0.0;
677f9fae5adSBarry Smith     sum3  = 0.0;
678f9fae5adSBarry Smith     sum4  = 0.0;
679f9fae5adSBarry Smith     sum5  = 0.0;
68026fbe8dcSKarl Rupp 
681b3c51c66SMatthew Knepley     nonzerorow += (n>0);
682f9fae5adSBarry Smith     for (j=0; j<n; j++) {
683f9fae5adSBarry Smith       sum1 += v[jrow]*x[5*idx[jrow]];
684f9fae5adSBarry Smith       sum2 += v[jrow]*x[5*idx[jrow]+1];
685f9fae5adSBarry Smith       sum3 += v[jrow]*x[5*idx[jrow]+2];
686f9fae5adSBarry Smith       sum4 += v[jrow]*x[5*idx[jrow]+3];
687f9fae5adSBarry Smith       sum5 += v[jrow]*x[5*idx[jrow]+4];
688f9fae5adSBarry Smith       jrow++;
689f9fae5adSBarry Smith     }
690f9fae5adSBarry Smith     y[5*i]   = sum1;
691f9fae5adSBarry Smith     y[5*i+1] = sum2;
692f9fae5adSBarry Smith     y[5*i+2] = sum3;
693f9fae5adSBarry Smith     y[5*i+3] = sum4;
694f9fae5adSBarry Smith     y[5*i+4] = sum5;
695f9fae5adSBarry Smith   }
696f9fae5adSBarry Smith 
697dc0b31edSSatish Balay   ierr = PetscLogFlops(10.0*a->nz - 5.0*nonzerorow);CHKERRQ(ierr);
6983649974fSBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
6991ebc52fbSHong Zhang   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
700f9fae5adSBarry Smith   PetscFunctionReturn(0);
701f9fae5adSBarry Smith }
702f9fae5adSBarry Smith 
703dfbe8321SBarry Smith PetscErrorCode MatMultTranspose_SeqMAIJ_5(Mat A,Vec xx,Vec yy)
704f9fae5adSBarry Smith {
7054cb9d9b8SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
706f9fae5adSBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
707d2840e09SBarry Smith   const PetscScalar *x,*v;
708d2840e09SBarry Smith   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5;
709dfbe8321SBarry Smith   PetscErrorCode    ierr;
710d2840e09SBarry Smith   PetscInt          n,i;
711d2840e09SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx;
712f9fae5adSBarry Smith 
713f9fae5adSBarry Smith   PetscFunctionBegin;
714d2840e09SBarry Smith   ierr = VecSet(yy,0.0);CHKERRQ(ierr);
7153649974fSBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
7161ebc52fbSHong Zhang   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
717bfec09a0SHong Zhang 
718f9fae5adSBarry Smith   for (i=0; i<m; i++) {
719bfec09a0SHong Zhang     idx    = a->j + a->i[i];
720bfec09a0SHong Zhang     v      = a->a + a->i[i];
721f9fae5adSBarry Smith     n      = a->i[i+1] - a->i[i];
722f9fae5adSBarry Smith     alpha1 = x[5*i];
723f9fae5adSBarry Smith     alpha2 = x[5*i+1];
724f9fae5adSBarry Smith     alpha3 = x[5*i+2];
725f9fae5adSBarry Smith     alpha4 = x[5*i+3];
726f9fae5adSBarry Smith     alpha5 = x[5*i+4];
727f9fae5adSBarry Smith     while (n-->0) {
728f9fae5adSBarry Smith       y[5*(*idx)]   += alpha1*(*v);
729f9fae5adSBarry Smith       y[5*(*idx)+1] += alpha2*(*v);
730f9fae5adSBarry Smith       y[5*(*idx)+2] += alpha3*(*v);
731f9fae5adSBarry Smith       y[5*(*idx)+3] += alpha4*(*v);
732f9fae5adSBarry Smith       y[5*(*idx)+4] += alpha5*(*v);
733f9fae5adSBarry Smith       idx++; v++;
734f9fae5adSBarry Smith     }
735f9fae5adSBarry Smith   }
736dc0b31edSSatish Balay   ierr = PetscLogFlops(10.0*a->nz);CHKERRQ(ierr);
7373649974fSBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
7381ebc52fbSHong Zhang   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
739f9fae5adSBarry Smith   PetscFunctionReturn(0);
740f9fae5adSBarry Smith }
741f9fae5adSBarry Smith 
742dfbe8321SBarry Smith PetscErrorCode MatMultAdd_SeqMAIJ_5(Mat A,Vec xx,Vec yy,Vec zz)
743f9fae5adSBarry Smith {
7444cb9d9b8SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
745f9fae5adSBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
746d2840e09SBarry Smith   const PetscScalar *x,*v;
747d2840e09SBarry Smith   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5;
748dfbe8321SBarry Smith   PetscErrorCode    ierr;
749b24ad042SBarry Smith   PetscInt          n,i,jrow,j;
750d2840e09SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
751f9fae5adSBarry Smith 
752f9fae5adSBarry Smith   PetscFunctionBegin;
753f9fae5adSBarry Smith   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
7543649974fSBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
7551ebc52fbSHong Zhang   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
756f9fae5adSBarry Smith   idx  = a->j;
757f9fae5adSBarry Smith   v    = a->a;
758f9fae5adSBarry Smith   ii   = a->i;
759f9fae5adSBarry Smith 
760f9fae5adSBarry Smith   for (i=0; i<m; i++) {
761f9fae5adSBarry Smith     jrow = ii[i];
762f9fae5adSBarry Smith     n    = ii[i+1] - jrow;
763f9fae5adSBarry Smith     sum1 = 0.0;
764f9fae5adSBarry Smith     sum2 = 0.0;
765f9fae5adSBarry Smith     sum3 = 0.0;
766f9fae5adSBarry Smith     sum4 = 0.0;
767f9fae5adSBarry Smith     sum5 = 0.0;
768f9fae5adSBarry Smith     for (j=0; j<n; j++) {
769f9fae5adSBarry Smith       sum1 += v[jrow]*x[5*idx[jrow]];
770f9fae5adSBarry Smith       sum2 += v[jrow]*x[5*idx[jrow]+1];
771f9fae5adSBarry Smith       sum3 += v[jrow]*x[5*idx[jrow]+2];
772f9fae5adSBarry Smith       sum4 += v[jrow]*x[5*idx[jrow]+3];
773f9fae5adSBarry Smith       sum5 += v[jrow]*x[5*idx[jrow]+4];
774f9fae5adSBarry Smith       jrow++;
775f9fae5adSBarry Smith     }
776f9fae5adSBarry Smith     y[5*i]   += sum1;
777f9fae5adSBarry Smith     y[5*i+1] += sum2;
778f9fae5adSBarry Smith     y[5*i+2] += sum3;
779f9fae5adSBarry Smith     y[5*i+3] += sum4;
780f9fae5adSBarry Smith     y[5*i+4] += sum5;
781f9fae5adSBarry Smith   }
782f9fae5adSBarry Smith 
783dc0b31edSSatish Balay   ierr = PetscLogFlops(10.0*a->nz);CHKERRQ(ierr);
7843649974fSBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
7851ebc52fbSHong Zhang   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
786f9fae5adSBarry Smith   PetscFunctionReturn(0);
787f9fae5adSBarry Smith }
788f9fae5adSBarry Smith 
789dfbe8321SBarry Smith PetscErrorCode MatMultTransposeAdd_SeqMAIJ_5(Mat A,Vec xx,Vec yy,Vec zz)
790f9fae5adSBarry Smith {
7914cb9d9b8SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
792f9fae5adSBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
793d2840e09SBarry Smith   const PetscScalar *x,*v;
794d2840e09SBarry Smith   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5;
795dfbe8321SBarry Smith   PetscErrorCode    ierr;
796d2840e09SBarry Smith   PetscInt          n,i;
797d2840e09SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx;
798f9fae5adSBarry Smith 
799f9fae5adSBarry Smith   PetscFunctionBegin;
800f9fae5adSBarry Smith   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
8013649974fSBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
8021ebc52fbSHong Zhang   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
803bfec09a0SHong Zhang 
804f9fae5adSBarry Smith   for (i=0; i<m; i++) {
805bfec09a0SHong Zhang     idx    = a->j + a->i[i];
806bfec09a0SHong Zhang     v      = a->a + a->i[i];
807f9fae5adSBarry Smith     n      = a->i[i+1] - a->i[i];
808f9fae5adSBarry Smith     alpha1 = x[5*i];
809f9fae5adSBarry Smith     alpha2 = x[5*i+1];
810f9fae5adSBarry Smith     alpha3 = x[5*i+2];
811f9fae5adSBarry Smith     alpha4 = x[5*i+3];
812f9fae5adSBarry Smith     alpha5 = x[5*i+4];
813f9fae5adSBarry Smith     while (n-->0) {
814f9fae5adSBarry Smith       y[5*(*idx)]   += alpha1*(*v);
815f9fae5adSBarry Smith       y[5*(*idx)+1] += alpha2*(*v);
816f9fae5adSBarry Smith       y[5*(*idx)+2] += alpha3*(*v);
817f9fae5adSBarry Smith       y[5*(*idx)+3] += alpha4*(*v);
818f9fae5adSBarry Smith       y[5*(*idx)+4] += alpha5*(*v);
819f9fae5adSBarry Smith       idx++; v++;
820f9fae5adSBarry Smith     }
821f9fae5adSBarry Smith   }
822dc0b31edSSatish Balay   ierr = PetscLogFlops(10.0*a->nz);CHKERRQ(ierr);
8233649974fSBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
8241ebc52fbSHong Zhang   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
825f9fae5adSBarry Smith   PetscFunctionReturn(0);
826bcc973b7SBarry Smith }
827bcc973b7SBarry Smith 
8286cd98798SBarry Smith /* ------------------------------------------------------------------------------*/
829dfbe8321SBarry Smith PetscErrorCode MatMult_SeqMAIJ_6(Mat A,Vec xx,Vec yy)
8306cd98798SBarry Smith {
8316cd98798SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
8326cd98798SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
833d2840e09SBarry Smith   const PetscScalar *x,*v;
834d2840e09SBarry Smith   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6;
835dfbe8321SBarry Smith   PetscErrorCode    ierr;
836d2840e09SBarry Smith   PetscInt          nonzerorow=0,n,i,jrow,j;
837d2840e09SBarry Smith   const PetscInt    m         = b->AIJ->rmap->n,*idx,*ii;
8386cd98798SBarry Smith 
8396cd98798SBarry Smith   PetscFunctionBegin;
8403649974fSBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
8411ebc52fbSHong Zhang   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
8426cd98798SBarry Smith   idx  = a->j;
8436cd98798SBarry Smith   v    = a->a;
8446cd98798SBarry Smith   ii   = a->i;
8456cd98798SBarry Smith 
8466cd98798SBarry Smith   for (i=0; i<m; i++) {
8476cd98798SBarry Smith     jrow  = ii[i];
8486cd98798SBarry Smith     n     = ii[i+1] - jrow;
8496cd98798SBarry Smith     sum1  = 0.0;
8506cd98798SBarry Smith     sum2  = 0.0;
8516cd98798SBarry Smith     sum3  = 0.0;
8526cd98798SBarry Smith     sum4  = 0.0;
8536cd98798SBarry Smith     sum5  = 0.0;
8546cd98798SBarry Smith     sum6  = 0.0;
85526fbe8dcSKarl Rupp 
856b3c51c66SMatthew Knepley     nonzerorow += (n>0);
8576cd98798SBarry Smith     for (j=0; j<n; j++) {
8586cd98798SBarry Smith       sum1 += v[jrow]*x[6*idx[jrow]];
8596cd98798SBarry Smith       sum2 += v[jrow]*x[6*idx[jrow]+1];
8606cd98798SBarry Smith       sum3 += v[jrow]*x[6*idx[jrow]+2];
8616cd98798SBarry Smith       sum4 += v[jrow]*x[6*idx[jrow]+3];
8626cd98798SBarry Smith       sum5 += v[jrow]*x[6*idx[jrow]+4];
8636cd98798SBarry Smith       sum6 += v[jrow]*x[6*idx[jrow]+5];
8646cd98798SBarry Smith       jrow++;
8656cd98798SBarry Smith     }
8666cd98798SBarry Smith     y[6*i]   = sum1;
8676cd98798SBarry Smith     y[6*i+1] = sum2;
8686cd98798SBarry Smith     y[6*i+2] = sum3;
8696cd98798SBarry Smith     y[6*i+3] = sum4;
8706cd98798SBarry Smith     y[6*i+4] = sum5;
8716cd98798SBarry Smith     y[6*i+5] = sum6;
8726cd98798SBarry Smith   }
8736cd98798SBarry Smith 
874dc0b31edSSatish Balay   ierr = PetscLogFlops(12.0*a->nz - 6.0*nonzerorow);CHKERRQ(ierr);
8753649974fSBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
8761ebc52fbSHong Zhang   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
8776cd98798SBarry Smith   PetscFunctionReturn(0);
8786cd98798SBarry Smith }
8796cd98798SBarry Smith 
880dfbe8321SBarry Smith PetscErrorCode MatMultTranspose_SeqMAIJ_6(Mat A,Vec xx,Vec yy)
8816cd98798SBarry Smith {
8826cd98798SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
8836cd98798SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
884d2840e09SBarry Smith   const PetscScalar *x,*v;
885d2840e09SBarry Smith   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6;
886dfbe8321SBarry Smith   PetscErrorCode    ierr;
887d2840e09SBarry Smith   PetscInt          n,i;
888d2840e09SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx;
8896cd98798SBarry Smith 
8906cd98798SBarry Smith   PetscFunctionBegin;
891d2840e09SBarry Smith   ierr = VecSet(yy,0.0);CHKERRQ(ierr);
8923649974fSBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
8931ebc52fbSHong Zhang   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
894bfec09a0SHong Zhang 
8956cd98798SBarry Smith   for (i=0; i<m; i++) {
896bfec09a0SHong Zhang     idx    = a->j + a->i[i];
897bfec09a0SHong Zhang     v      = a->a + a->i[i];
8986cd98798SBarry Smith     n      = a->i[i+1] - a->i[i];
8996cd98798SBarry Smith     alpha1 = x[6*i];
9006cd98798SBarry Smith     alpha2 = x[6*i+1];
9016cd98798SBarry Smith     alpha3 = x[6*i+2];
9026cd98798SBarry Smith     alpha4 = x[6*i+3];
9036cd98798SBarry Smith     alpha5 = x[6*i+4];
9046cd98798SBarry Smith     alpha6 = x[6*i+5];
9056cd98798SBarry Smith     while (n-->0) {
9066cd98798SBarry Smith       y[6*(*idx)]   += alpha1*(*v);
9076cd98798SBarry Smith       y[6*(*idx)+1] += alpha2*(*v);
9086cd98798SBarry Smith       y[6*(*idx)+2] += alpha3*(*v);
9096cd98798SBarry Smith       y[6*(*idx)+3] += alpha4*(*v);
9106cd98798SBarry Smith       y[6*(*idx)+4] += alpha5*(*v);
9116cd98798SBarry Smith       y[6*(*idx)+5] += alpha6*(*v);
9126cd98798SBarry Smith       idx++; v++;
9136cd98798SBarry Smith     }
9146cd98798SBarry Smith   }
915dc0b31edSSatish Balay   ierr = PetscLogFlops(12.0*a->nz);CHKERRQ(ierr);
9163649974fSBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
9171ebc52fbSHong Zhang   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
9186cd98798SBarry Smith   PetscFunctionReturn(0);
9196cd98798SBarry Smith }
9206cd98798SBarry Smith 
921dfbe8321SBarry Smith PetscErrorCode MatMultAdd_SeqMAIJ_6(Mat A,Vec xx,Vec yy,Vec zz)
9226cd98798SBarry Smith {
9236cd98798SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
9246cd98798SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
925d2840e09SBarry Smith   const PetscScalar *x,*v;
926d2840e09SBarry Smith   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6;
927dfbe8321SBarry Smith   PetscErrorCode    ierr;
928b24ad042SBarry Smith   PetscInt          n,i,jrow,j;
929d2840e09SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
9306cd98798SBarry Smith 
9316cd98798SBarry Smith   PetscFunctionBegin;
9326cd98798SBarry Smith   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
9333649974fSBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
9341ebc52fbSHong Zhang   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
9356cd98798SBarry Smith   idx  = a->j;
9366cd98798SBarry Smith   v    = a->a;
9376cd98798SBarry Smith   ii   = a->i;
9386cd98798SBarry Smith 
9396cd98798SBarry Smith   for (i=0; i<m; i++) {
9406cd98798SBarry Smith     jrow = ii[i];
9416cd98798SBarry Smith     n    = ii[i+1] - jrow;
9426cd98798SBarry Smith     sum1 = 0.0;
9436cd98798SBarry Smith     sum2 = 0.0;
9446cd98798SBarry Smith     sum3 = 0.0;
9456cd98798SBarry Smith     sum4 = 0.0;
9466cd98798SBarry Smith     sum5 = 0.0;
9476cd98798SBarry Smith     sum6 = 0.0;
9486cd98798SBarry Smith     for (j=0; j<n; j++) {
9496cd98798SBarry Smith       sum1 += v[jrow]*x[6*idx[jrow]];
9506cd98798SBarry Smith       sum2 += v[jrow]*x[6*idx[jrow]+1];
9516cd98798SBarry Smith       sum3 += v[jrow]*x[6*idx[jrow]+2];
9526cd98798SBarry Smith       sum4 += v[jrow]*x[6*idx[jrow]+3];
9536cd98798SBarry Smith       sum5 += v[jrow]*x[6*idx[jrow]+4];
9546cd98798SBarry Smith       sum6 += v[jrow]*x[6*idx[jrow]+5];
9556cd98798SBarry Smith       jrow++;
9566cd98798SBarry Smith     }
9576cd98798SBarry Smith     y[6*i]   += sum1;
9586cd98798SBarry Smith     y[6*i+1] += sum2;
9596cd98798SBarry Smith     y[6*i+2] += sum3;
9606cd98798SBarry Smith     y[6*i+3] += sum4;
9616cd98798SBarry Smith     y[6*i+4] += sum5;
9626cd98798SBarry Smith     y[6*i+5] += sum6;
9636cd98798SBarry Smith   }
9646cd98798SBarry Smith 
965dc0b31edSSatish Balay   ierr = PetscLogFlops(12.0*a->nz);CHKERRQ(ierr);
9663649974fSBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
9671ebc52fbSHong Zhang   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
9686cd98798SBarry Smith   PetscFunctionReturn(0);
9696cd98798SBarry Smith }
9706cd98798SBarry Smith 
971dfbe8321SBarry Smith PetscErrorCode MatMultTransposeAdd_SeqMAIJ_6(Mat A,Vec xx,Vec yy,Vec zz)
9726cd98798SBarry Smith {
9736cd98798SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
9746cd98798SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
975d2840e09SBarry Smith   const PetscScalar *x,*v;
976d2840e09SBarry Smith   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6;
977dfbe8321SBarry Smith   PetscErrorCode    ierr;
978d2840e09SBarry Smith   PetscInt          n,i;
979d2840e09SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx;
9806cd98798SBarry Smith 
9816cd98798SBarry Smith   PetscFunctionBegin;
9826cd98798SBarry Smith   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
9833649974fSBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
9841ebc52fbSHong Zhang   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
985bfec09a0SHong Zhang 
9866cd98798SBarry Smith   for (i=0; i<m; i++) {
987bfec09a0SHong Zhang     idx    = a->j + a->i[i];
988bfec09a0SHong Zhang     v      = a->a + a->i[i];
9896cd98798SBarry Smith     n      = a->i[i+1] - a->i[i];
9906cd98798SBarry Smith     alpha1 = x[6*i];
9916cd98798SBarry Smith     alpha2 = x[6*i+1];
9926cd98798SBarry Smith     alpha3 = x[6*i+2];
9936cd98798SBarry Smith     alpha4 = x[6*i+3];
9946cd98798SBarry Smith     alpha5 = x[6*i+4];
9956cd98798SBarry Smith     alpha6 = x[6*i+5];
9966cd98798SBarry Smith     while (n-->0) {
9976cd98798SBarry Smith       y[6*(*idx)]   += alpha1*(*v);
9986cd98798SBarry Smith       y[6*(*idx)+1] += alpha2*(*v);
9996cd98798SBarry Smith       y[6*(*idx)+2] += alpha3*(*v);
10006cd98798SBarry Smith       y[6*(*idx)+3] += alpha4*(*v);
10016cd98798SBarry Smith       y[6*(*idx)+4] += alpha5*(*v);
10026cd98798SBarry Smith       y[6*(*idx)+5] += alpha6*(*v);
10036cd98798SBarry Smith       idx++; v++;
10046cd98798SBarry Smith     }
10056cd98798SBarry Smith   }
1006dc0b31edSSatish Balay   ierr = PetscLogFlops(12.0*a->nz);CHKERRQ(ierr);
10073649974fSBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
10081ebc52fbSHong Zhang   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
10096cd98798SBarry Smith   PetscFunctionReturn(0);
10106cd98798SBarry Smith }
10116cd98798SBarry Smith 
101266ed3db0SBarry Smith /* ------------------------------------------------------------------------------*/
1013ed8eea03SBarry Smith PetscErrorCode MatMult_SeqMAIJ_7(Mat A,Vec xx,Vec yy)
1014ed8eea03SBarry Smith {
1015ed8eea03SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1016ed8eea03SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1017d2840e09SBarry Smith   const PetscScalar *x,*v;
1018d2840e09SBarry Smith   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7;
1019ed8eea03SBarry Smith   PetscErrorCode    ierr;
1020d2840e09SBarry Smith   const PetscInt    m         = b->AIJ->rmap->n,*idx,*ii;
1021d2840e09SBarry Smith   PetscInt          nonzerorow=0,n,i,jrow,j;
1022ed8eea03SBarry Smith 
1023ed8eea03SBarry Smith   PetscFunctionBegin;
10243649974fSBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
1025ed8eea03SBarry Smith   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
1026ed8eea03SBarry Smith   idx  = a->j;
1027ed8eea03SBarry Smith   v    = a->a;
1028ed8eea03SBarry Smith   ii   = a->i;
1029ed8eea03SBarry Smith 
1030ed8eea03SBarry Smith   for (i=0; i<m; i++) {
1031ed8eea03SBarry Smith     jrow  = ii[i];
1032ed8eea03SBarry Smith     n     = ii[i+1] - jrow;
1033ed8eea03SBarry Smith     sum1  = 0.0;
1034ed8eea03SBarry Smith     sum2  = 0.0;
1035ed8eea03SBarry Smith     sum3  = 0.0;
1036ed8eea03SBarry Smith     sum4  = 0.0;
1037ed8eea03SBarry Smith     sum5  = 0.0;
1038ed8eea03SBarry Smith     sum6  = 0.0;
1039ed8eea03SBarry Smith     sum7  = 0.0;
104026fbe8dcSKarl Rupp 
1041b3c51c66SMatthew Knepley     nonzerorow += (n>0);
1042ed8eea03SBarry Smith     for (j=0; j<n; j++) {
1043ed8eea03SBarry Smith       sum1 += v[jrow]*x[7*idx[jrow]];
1044ed8eea03SBarry Smith       sum2 += v[jrow]*x[7*idx[jrow]+1];
1045ed8eea03SBarry Smith       sum3 += v[jrow]*x[7*idx[jrow]+2];
1046ed8eea03SBarry Smith       sum4 += v[jrow]*x[7*idx[jrow]+3];
1047ed8eea03SBarry Smith       sum5 += v[jrow]*x[7*idx[jrow]+4];
1048ed8eea03SBarry Smith       sum6 += v[jrow]*x[7*idx[jrow]+5];
1049ed8eea03SBarry Smith       sum7 += v[jrow]*x[7*idx[jrow]+6];
1050ed8eea03SBarry Smith       jrow++;
1051ed8eea03SBarry Smith     }
1052ed8eea03SBarry Smith     y[7*i]   = sum1;
1053ed8eea03SBarry Smith     y[7*i+1] = sum2;
1054ed8eea03SBarry Smith     y[7*i+2] = sum3;
1055ed8eea03SBarry Smith     y[7*i+3] = sum4;
1056ed8eea03SBarry Smith     y[7*i+4] = sum5;
1057ed8eea03SBarry Smith     y[7*i+5] = sum6;
1058ed8eea03SBarry Smith     y[7*i+6] = sum7;
1059ed8eea03SBarry Smith   }
1060ed8eea03SBarry Smith 
1061dc0b31edSSatish Balay   ierr = PetscLogFlops(14.0*a->nz - 7.0*nonzerorow);CHKERRQ(ierr);
10623649974fSBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
1063ed8eea03SBarry Smith   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
1064ed8eea03SBarry Smith   PetscFunctionReturn(0);
1065ed8eea03SBarry Smith }
1066ed8eea03SBarry Smith 
1067ed8eea03SBarry Smith PetscErrorCode MatMultTranspose_SeqMAIJ_7(Mat A,Vec xx,Vec yy)
1068ed8eea03SBarry Smith {
1069ed8eea03SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1070ed8eea03SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1071d2840e09SBarry Smith   const PetscScalar *x,*v;
1072d2840e09SBarry Smith   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7;
1073ed8eea03SBarry Smith   PetscErrorCode    ierr;
1074d2840e09SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx;
1075d2840e09SBarry Smith   PetscInt          n,i;
1076ed8eea03SBarry Smith 
1077ed8eea03SBarry Smith   PetscFunctionBegin;
1078d2840e09SBarry Smith   ierr = VecSet(yy,0.0);CHKERRQ(ierr);
10793649974fSBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
1080ed8eea03SBarry Smith   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
1081ed8eea03SBarry Smith 
1082ed8eea03SBarry Smith   for (i=0; i<m; i++) {
1083ed8eea03SBarry Smith     idx    = a->j + a->i[i];
1084ed8eea03SBarry Smith     v      = a->a + a->i[i];
1085ed8eea03SBarry Smith     n      = a->i[i+1] - a->i[i];
1086ed8eea03SBarry Smith     alpha1 = x[7*i];
1087ed8eea03SBarry Smith     alpha2 = x[7*i+1];
1088ed8eea03SBarry Smith     alpha3 = x[7*i+2];
1089ed8eea03SBarry Smith     alpha4 = x[7*i+3];
1090ed8eea03SBarry Smith     alpha5 = x[7*i+4];
1091ed8eea03SBarry Smith     alpha6 = x[7*i+5];
1092ed8eea03SBarry Smith     alpha7 = x[7*i+6];
1093ed8eea03SBarry Smith     while (n-->0) {
1094ed8eea03SBarry Smith       y[7*(*idx)]   += alpha1*(*v);
1095ed8eea03SBarry Smith       y[7*(*idx)+1] += alpha2*(*v);
1096ed8eea03SBarry Smith       y[7*(*idx)+2] += alpha3*(*v);
1097ed8eea03SBarry Smith       y[7*(*idx)+3] += alpha4*(*v);
1098ed8eea03SBarry Smith       y[7*(*idx)+4] += alpha5*(*v);
1099ed8eea03SBarry Smith       y[7*(*idx)+5] += alpha6*(*v);
1100ed8eea03SBarry Smith       y[7*(*idx)+6] += alpha7*(*v);
1101ed8eea03SBarry Smith       idx++; v++;
1102ed8eea03SBarry Smith     }
1103ed8eea03SBarry Smith   }
1104dc0b31edSSatish Balay   ierr = PetscLogFlops(14.0*a->nz);CHKERRQ(ierr);
11053649974fSBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
1106ed8eea03SBarry Smith   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
1107ed8eea03SBarry Smith   PetscFunctionReturn(0);
1108ed8eea03SBarry Smith }
1109ed8eea03SBarry Smith 
1110ed8eea03SBarry Smith PetscErrorCode MatMultAdd_SeqMAIJ_7(Mat A,Vec xx,Vec yy,Vec zz)
1111ed8eea03SBarry Smith {
1112ed8eea03SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1113ed8eea03SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1114d2840e09SBarry Smith   const PetscScalar *x,*v;
1115d2840e09SBarry Smith   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7;
1116ed8eea03SBarry Smith   PetscErrorCode    ierr;
1117d2840e09SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
1118ed8eea03SBarry Smith   PetscInt          n,i,jrow,j;
1119ed8eea03SBarry Smith 
1120ed8eea03SBarry Smith   PetscFunctionBegin;
1121ed8eea03SBarry Smith   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
11223649974fSBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
1123ed8eea03SBarry Smith   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
1124ed8eea03SBarry Smith   idx  = a->j;
1125ed8eea03SBarry Smith   v    = a->a;
1126ed8eea03SBarry Smith   ii   = a->i;
1127ed8eea03SBarry Smith 
1128ed8eea03SBarry Smith   for (i=0; i<m; i++) {
1129ed8eea03SBarry Smith     jrow = ii[i];
1130ed8eea03SBarry Smith     n    = ii[i+1] - jrow;
1131ed8eea03SBarry Smith     sum1 = 0.0;
1132ed8eea03SBarry Smith     sum2 = 0.0;
1133ed8eea03SBarry Smith     sum3 = 0.0;
1134ed8eea03SBarry Smith     sum4 = 0.0;
1135ed8eea03SBarry Smith     sum5 = 0.0;
1136ed8eea03SBarry Smith     sum6 = 0.0;
1137ed8eea03SBarry Smith     sum7 = 0.0;
1138ed8eea03SBarry Smith     for (j=0; j<n; j++) {
1139ed8eea03SBarry Smith       sum1 += v[jrow]*x[7*idx[jrow]];
1140ed8eea03SBarry Smith       sum2 += v[jrow]*x[7*idx[jrow]+1];
1141ed8eea03SBarry Smith       sum3 += v[jrow]*x[7*idx[jrow]+2];
1142ed8eea03SBarry Smith       sum4 += v[jrow]*x[7*idx[jrow]+3];
1143ed8eea03SBarry Smith       sum5 += v[jrow]*x[7*idx[jrow]+4];
1144ed8eea03SBarry Smith       sum6 += v[jrow]*x[7*idx[jrow]+5];
1145ed8eea03SBarry Smith       sum7 += v[jrow]*x[7*idx[jrow]+6];
1146ed8eea03SBarry Smith       jrow++;
1147ed8eea03SBarry Smith     }
1148ed8eea03SBarry Smith     y[7*i]   += sum1;
1149ed8eea03SBarry Smith     y[7*i+1] += sum2;
1150ed8eea03SBarry Smith     y[7*i+2] += sum3;
1151ed8eea03SBarry Smith     y[7*i+3] += sum4;
1152ed8eea03SBarry Smith     y[7*i+4] += sum5;
1153ed8eea03SBarry Smith     y[7*i+5] += sum6;
1154ed8eea03SBarry Smith     y[7*i+6] += sum7;
1155ed8eea03SBarry Smith   }
1156ed8eea03SBarry Smith 
1157dc0b31edSSatish Balay   ierr = PetscLogFlops(14.0*a->nz);CHKERRQ(ierr);
11583649974fSBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
1159ed8eea03SBarry Smith   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
1160ed8eea03SBarry Smith   PetscFunctionReturn(0);
1161ed8eea03SBarry Smith }
1162ed8eea03SBarry Smith 
1163ed8eea03SBarry Smith PetscErrorCode MatMultTransposeAdd_SeqMAIJ_7(Mat A,Vec xx,Vec yy,Vec zz)
1164ed8eea03SBarry Smith {
1165ed8eea03SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1166ed8eea03SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1167d2840e09SBarry Smith   const PetscScalar *x,*v;
1168d2840e09SBarry Smith   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7;
1169ed8eea03SBarry Smith   PetscErrorCode    ierr;
1170d2840e09SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx;
1171d2840e09SBarry Smith   PetscInt          n,i;
1172ed8eea03SBarry Smith 
1173ed8eea03SBarry Smith   PetscFunctionBegin;
1174ed8eea03SBarry Smith   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
11753649974fSBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
1176ed8eea03SBarry Smith   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
1177ed8eea03SBarry Smith   for (i=0; i<m; i++) {
1178ed8eea03SBarry Smith     idx    = a->j + a->i[i];
1179ed8eea03SBarry Smith     v      = a->a + a->i[i];
1180ed8eea03SBarry Smith     n      = a->i[i+1] - a->i[i];
1181ed8eea03SBarry Smith     alpha1 = x[7*i];
1182ed8eea03SBarry Smith     alpha2 = x[7*i+1];
1183ed8eea03SBarry Smith     alpha3 = x[7*i+2];
1184ed8eea03SBarry Smith     alpha4 = x[7*i+3];
1185ed8eea03SBarry Smith     alpha5 = x[7*i+4];
1186ed8eea03SBarry Smith     alpha6 = x[7*i+5];
1187ed8eea03SBarry Smith     alpha7 = x[7*i+6];
1188ed8eea03SBarry Smith     while (n-->0) {
1189ed8eea03SBarry Smith       y[7*(*idx)]   += alpha1*(*v);
1190ed8eea03SBarry Smith       y[7*(*idx)+1] += alpha2*(*v);
1191ed8eea03SBarry Smith       y[7*(*idx)+2] += alpha3*(*v);
1192ed8eea03SBarry Smith       y[7*(*idx)+3] += alpha4*(*v);
1193ed8eea03SBarry Smith       y[7*(*idx)+4] += alpha5*(*v);
1194ed8eea03SBarry Smith       y[7*(*idx)+5] += alpha6*(*v);
1195ed8eea03SBarry Smith       y[7*(*idx)+6] += alpha7*(*v);
1196ed8eea03SBarry Smith       idx++; v++;
1197ed8eea03SBarry Smith     }
1198ed8eea03SBarry Smith   }
1199dc0b31edSSatish Balay   ierr = PetscLogFlops(14.0*a->nz);CHKERRQ(ierr);
12003649974fSBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
1201ed8eea03SBarry Smith   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
1202ed8eea03SBarry Smith   PetscFunctionReturn(0);
1203ed8eea03SBarry Smith }
1204ed8eea03SBarry Smith 
1205dfbe8321SBarry Smith PetscErrorCode MatMult_SeqMAIJ_8(Mat A,Vec xx,Vec yy)
120666ed3db0SBarry Smith {
120766ed3db0SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
120866ed3db0SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1209d2840e09SBarry Smith   const PetscScalar *x,*v;
1210d2840e09SBarry Smith   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8;
1211dfbe8321SBarry Smith   PetscErrorCode    ierr;
1212d2840e09SBarry Smith   const PetscInt    m         = b->AIJ->rmap->n,*idx,*ii;
1213d2840e09SBarry Smith   PetscInt          nonzerorow=0,n,i,jrow,j;
121466ed3db0SBarry Smith 
121566ed3db0SBarry Smith   PetscFunctionBegin;
12163649974fSBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
12171ebc52fbSHong Zhang   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
121866ed3db0SBarry Smith   idx  = a->j;
121966ed3db0SBarry Smith   v    = a->a;
122066ed3db0SBarry Smith   ii   = a->i;
122166ed3db0SBarry Smith 
122266ed3db0SBarry Smith   for (i=0; i<m; i++) {
122366ed3db0SBarry Smith     jrow  = ii[i];
122466ed3db0SBarry Smith     n     = ii[i+1] - jrow;
122566ed3db0SBarry Smith     sum1  = 0.0;
122666ed3db0SBarry Smith     sum2  = 0.0;
122766ed3db0SBarry Smith     sum3  = 0.0;
122866ed3db0SBarry Smith     sum4  = 0.0;
122966ed3db0SBarry Smith     sum5  = 0.0;
123066ed3db0SBarry Smith     sum6  = 0.0;
123166ed3db0SBarry Smith     sum7  = 0.0;
123266ed3db0SBarry Smith     sum8  = 0.0;
123326fbe8dcSKarl Rupp 
1234b3c51c66SMatthew Knepley     nonzerorow += (n>0);
123566ed3db0SBarry Smith     for (j=0; j<n; j++) {
123666ed3db0SBarry Smith       sum1 += v[jrow]*x[8*idx[jrow]];
123766ed3db0SBarry Smith       sum2 += v[jrow]*x[8*idx[jrow]+1];
123866ed3db0SBarry Smith       sum3 += v[jrow]*x[8*idx[jrow]+2];
123966ed3db0SBarry Smith       sum4 += v[jrow]*x[8*idx[jrow]+3];
124066ed3db0SBarry Smith       sum5 += v[jrow]*x[8*idx[jrow]+4];
124166ed3db0SBarry Smith       sum6 += v[jrow]*x[8*idx[jrow]+5];
124266ed3db0SBarry Smith       sum7 += v[jrow]*x[8*idx[jrow]+6];
124366ed3db0SBarry Smith       sum8 += v[jrow]*x[8*idx[jrow]+7];
124466ed3db0SBarry Smith       jrow++;
124566ed3db0SBarry Smith     }
124666ed3db0SBarry Smith     y[8*i]   = sum1;
124766ed3db0SBarry Smith     y[8*i+1] = sum2;
124866ed3db0SBarry Smith     y[8*i+2] = sum3;
124966ed3db0SBarry Smith     y[8*i+3] = sum4;
125066ed3db0SBarry Smith     y[8*i+4] = sum5;
125166ed3db0SBarry Smith     y[8*i+5] = sum6;
125266ed3db0SBarry Smith     y[8*i+6] = sum7;
125366ed3db0SBarry Smith     y[8*i+7] = sum8;
125466ed3db0SBarry Smith   }
125566ed3db0SBarry Smith 
1256dc0b31edSSatish Balay   ierr = PetscLogFlops(16.0*a->nz - 8.0*nonzerorow);CHKERRQ(ierr);
12573649974fSBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
12581ebc52fbSHong Zhang   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
125966ed3db0SBarry Smith   PetscFunctionReturn(0);
126066ed3db0SBarry Smith }
126166ed3db0SBarry Smith 
1262dfbe8321SBarry Smith PetscErrorCode MatMultTranspose_SeqMAIJ_8(Mat A,Vec xx,Vec yy)
126366ed3db0SBarry Smith {
126466ed3db0SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
126566ed3db0SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1266d2840e09SBarry Smith   const PetscScalar *x,*v;
1267d2840e09SBarry Smith   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8;
1268dfbe8321SBarry Smith   PetscErrorCode    ierr;
1269d2840e09SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx;
1270d2840e09SBarry Smith   PetscInt          n,i;
127166ed3db0SBarry Smith 
127266ed3db0SBarry Smith   PetscFunctionBegin;
1273d2840e09SBarry Smith   ierr = VecSet(yy,0.0);CHKERRQ(ierr);
12743649974fSBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
12751ebc52fbSHong Zhang   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
1276bfec09a0SHong Zhang 
127766ed3db0SBarry Smith   for (i=0; i<m; i++) {
1278bfec09a0SHong Zhang     idx    = a->j + a->i[i];
1279bfec09a0SHong Zhang     v      = a->a + a->i[i];
128066ed3db0SBarry Smith     n      = a->i[i+1] - a->i[i];
128166ed3db0SBarry Smith     alpha1 = x[8*i];
128266ed3db0SBarry Smith     alpha2 = x[8*i+1];
128366ed3db0SBarry Smith     alpha3 = x[8*i+2];
128466ed3db0SBarry Smith     alpha4 = x[8*i+3];
128566ed3db0SBarry Smith     alpha5 = x[8*i+4];
128666ed3db0SBarry Smith     alpha6 = x[8*i+5];
128766ed3db0SBarry Smith     alpha7 = x[8*i+6];
128866ed3db0SBarry Smith     alpha8 = x[8*i+7];
128966ed3db0SBarry Smith     while (n-->0) {
129066ed3db0SBarry Smith       y[8*(*idx)]   += alpha1*(*v);
129166ed3db0SBarry Smith       y[8*(*idx)+1] += alpha2*(*v);
129266ed3db0SBarry Smith       y[8*(*idx)+2] += alpha3*(*v);
129366ed3db0SBarry Smith       y[8*(*idx)+3] += alpha4*(*v);
129466ed3db0SBarry Smith       y[8*(*idx)+4] += alpha5*(*v);
129566ed3db0SBarry Smith       y[8*(*idx)+5] += alpha6*(*v);
129666ed3db0SBarry Smith       y[8*(*idx)+6] += alpha7*(*v);
129766ed3db0SBarry Smith       y[8*(*idx)+7] += alpha8*(*v);
129866ed3db0SBarry Smith       idx++; v++;
129966ed3db0SBarry Smith     }
130066ed3db0SBarry Smith   }
1301dc0b31edSSatish Balay   ierr = PetscLogFlops(16.0*a->nz);CHKERRQ(ierr);
13023649974fSBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
13031ebc52fbSHong Zhang   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
130466ed3db0SBarry Smith   PetscFunctionReturn(0);
130566ed3db0SBarry Smith }
130666ed3db0SBarry Smith 
1307dfbe8321SBarry Smith PetscErrorCode MatMultAdd_SeqMAIJ_8(Mat A,Vec xx,Vec yy,Vec zz)
130866ed3db0SBarry Smith {
130966ed3db0SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
131066ed3db0SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1311d2840e09SBarry Smith   const PetscScalar *x,*v;
1312d2840e09SBarry Smith   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8;
1313dfbe8321SBarry Smith   PetscErrorCode    ierr;
1314d2840e09SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
1315b24ad042SBarry Smith   PetscInt          n,i,jrow,j;
131666ed3db0SBarry Smith 
131766ed3db0SBarry Smith   PetscFunctionBegin;
131866ed3db0SBarry Smith   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
13193649974fSBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
13201ebc52fbSHong Zhang   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
132166ed3db0SBarry Smith   idx  = a->j;
132266ed3db0SBarry Smith   v    = a->a;
132366ed3db0SBarry Smith   ii   = a->i;
132466ed3db0SBarry Smith 
132566ed3db0SBarry Smith   for (i=0; i<m; i++) {
132666ed3db0SBarry Smith     jrow = ii[i];
132766ed3db0SBarry Smith     n    = ii[i+1] - jrow;
132866ed3db0SBarry Smith     sum1 = 0.0;
132966ed3db0SBarry Smith     sum2 = 0.0;
133066ed3db0SBarry Smith     sum3 = 0.0;
133166ed3db0SBarry Smith     sum4 = 0.0;
133266ed3db0SBarry Smith     sum5 = 0.0;
133366ed3db0SBarry Smith     sum6 = 0.0;
133466ed3db0SBarry Smith     sum7 = 0.0;
133566ed3db0SBarry Smith     sum8 = 0.0;
133666ed3db0SBarry Smith     for (j=0; j<n; j++) {
133766ed3db0SBarry Smith       sum1 += v[jrow]*x[8*idx[jrow]];
133866ed3db0SBarry Smith       sum2 += v[jrow]*x[8*idx[jrow]+1];
133966ed3db0SBarry Smith       sum3 += v[jrow]*x[8*idx[jrow]+2];
134066ed3db0SBarry Smith       sum4 += v[jrow]*x[8*idx[jrow]+3];
134166ed3db0SBarry Smith       sum5 += v[jrow]*x[8*idx[jrow]+4];
134266ed3db0SBarry Smith       sum6 += v[jrow]*x[8*idx[jrow]+5];
134366ed3db0SBarry Smith       sum7 += v[jrow]*x[8*idx[jrow]+6];
134466ed3db0SBarry Smith       sum8 += v[jrow]*x[8*idx[jrow]+7];
134566ed3db0SBarry Smith       jrow++;
134666ed3db0SBarry Smith     }
134766ed3db0SBarry Smith     y[8*i]   += sum1;
134866ed3db0SBarry Smith     y[8*i+1] += sum2;
134966ed3db0SBarry Smith     y[8*i+2] += sum3;
135066ed3db0SBarry Smith     y[8*i+3] += sum4;
135166ed3db0SBarry Smith     y[8*i+4] += sum5;
135266ed3db0SBarry Smith     y[8*i+5] += sum6;
135366ed3db0SBarry Smith     y[8*i+6] += sum7;
135466ed3db0SBarry Smith     y[8*i+7] += sum8;
135566ed3db0SBarry Smith   }
135666ed3db0SBarry Smith 
1357dc0b31edSSatish Balay   ierr = PetscLogFlops(16.0*a->nz);CHKERRQ(ierr);
13583649974fSBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
13591ebc52fbSHong Zhang   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
136066ed3db0SBarry Smith   PetscFunctionReturn(0);
136166ed3db0SBarry Smith }
136266ed3db0SBarry Smith 
1363dfbe8321SBarry Smith PetscErrorCode MatMultTransposeAdd_SeqMAIJ_8(Mat A,Vec xx,Vec yy,Vec zz)
136466ed3db0SBarry Smith {
136566ed3db0SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
136666ed3db0SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1367d2840e09SBarry Smith   const PetscScalar *x,*v;
1368d2840e09SBarry Smith   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8;
1369dfbe8321SBarry Smith   PetscErrorCode    ierr;
1370d2840e09SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx;
1371d2840e09SBarry Smith   PetscInt          n,i;
137266ed3db0SBarry Smith 
137366ed3db0SBarry Smith   PetscFunctionBegin;
137466ed3db0SBarry Smith   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
13753649974fSBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
13761ebc52fbSHong Zhang   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
137766ed3db0SBarry Smith   for (i=0; i<m; i++) {
1378bfec09a0SHong Zhang     idx    = a->j + a->i[i];
1379bfec09a0SHong Zhang     v      = a->a + a->i[i];
138066ed3db0SBarry Smith     n      = a->i[i+1] - a->i[i];
138166ed3db0SBarry Smith     alpha1 = x[8*i];
138266ed3db0SBarry Smith     alpha2 = x[8*i+1];
138366ed3db0SBarry Smith     alpha3 = x[8*i+2];
138466ed3db0SBarry Smith     alpha4 = x[8*i+3];
138566ed3db0SBarry Smith     alpha5 = x[8*i+4];
138666ed3db0SBarry Smith     alpha6 = x[8*i+5];
138766ed3db0SBarry Smith     alpha7 = x[8*i+6];
138866ed3db0SBarry Smith     alpha8 = x[8*i+7];
138966ed3db0SBarry Smith     while (n-->0) {
139066ed3db0SBarry Smith       y[8*(*idx)]   += alpha1*(*v);
139166ed3db0SBarry Smith       y[8*(*idx)+1] += alpha2*(*v);
139266ed3db0SBarry Smith       y[8*(*idx)+2] += alpha3*(*v);
139366ed3db0SBarry Smith       y[8*(*idx)+3] += alpha4*(*v);
139466ed3db0SBarry Smith       y[8*(*idx)+4] += alpha5*(*v);
139566ed3db0SBarry Smith       y[8*(*idx)+5] += alpha6*(*v);
139666ed3db0SBarry Smith       y[8*(*idx)+6] += alpha7*(*v);
139766ed3db0SBarry Smith       y[8*(*idx)+7] += alpha8*(*v);
139866ed3db0SBarry Smith       idx++; v++;
139966ed3db0SBarry Smith     }
140066ed3db0SBarry Smith   }
1401dc0b31edSSatish Balay   ierr = PetscLogFlops(16.0*a->nz);CHKERRQ(ierr);
14023649974fSBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
14031ebc52fbSHong Zhang   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
14042f7816d4SBarry Smith   PetscFunctionReturn(0);
14052f7816d4SBarry Smith }
14062f7816d4SBarry Smith 
14072b692628SMatthew Knepley /* ------------------------------------------------------------------------------*/
1408dfbe8321SBarry Smith PetscErrorCode MatMult_SeqMAIJ_9(Mat A,Vec xx,Vec yy)
14092b692628SMatthew Knepley {
14102b692628SMatthew Knepley   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
14112b692628SMatthew Knepley   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1412d2840e09SBarry Smith   const PetscScalar *x,*v;
1413d2840e09SBarry Smith   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9;
1414dfbe8321SBarry Smith   PetscErrorCode    ierr;
1415d2840e09SBarry Smith   const PetscInt    m         = b->AIJ->rmap->n,*idx,*ii;
1416d2840e09SBarry Smith   PetscInt          nonzerorow=0,n,i,jrow,j;
14172b692628SMatthew Knepley 
14182b692628SMatthew Knepley   PetscFunctionBegin;
14193649974fSBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
14201ebc52fbSHong Zhang   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
14212b692628SMatthew Knepley   idx  = a->j;
14222b692628SMatthew Knepley   v    = a->a;
14232b692628SMatthew Knepley   ii   = a->i;
14242b692628SMatthew Knepley 
14252b692628SMatthew Knepley   for (i=0; i<m; i++) {
14262b692628SMatthew Knepley     jrow  = ii[i];
14272b692628SMatthew Knepley     n     = ii[i+1] - jrow;
14282b692628SMatthew Knepley     sum1  = 0.0;
14292b692628SMatthew Knepley     sum2  = 0.0;
14302b692628SMatthew Knepley     sum3  = 0.0;
14312b692628SMatthew Knepley     sum4  = 0.0;
14322b692628SMatthew Knepley     sum5  = 0.0;
14332b692628SMatthew Knepley     sum6  = 0.0;
14342b692628SMatthew Knepley     sum7  = 0.0;
14352b692628SMatthew Knepley     sum8  = 0.0;
14362b692628SMatthew Knepley     sum9  = 0.0;
143726fbe8dcSKarl Rupp 
1438b3c51c66SMatthew Knepley     nonzerorow += (n>0);
14392b692628SMatthew Knepley     for (j=0; j<n; j++) {
14402b692628SMatthew Knepley       sum1 += v[jrow]*x[9*idx[jrow]];
14412b692628SMatthew Knepley       sum2 += v[jrow]*x[9*idx[jrow]+1];
14422b692628SMatthew Knepley       sum3 += v[jrow]*x[9*idx[jrow]+2];
14432b692628SMatthew Knepley       sum4 += v[jrow]*x[9*idx[jrow]+3];
14442b692628SMatthew Knepley       sum5 += v[jrow]*x[9*idx[jrow]+4];
14452b692628SMatthew Knepley       sum6 += v[jrow]*x[9*idx[jrow]+5];
14462b692628SMatthew Knepley       sum7 += v[jrow]*x[9*idx[jrow]+6];
14472b692628SMatthew Knepley       sum8 += v[jrow]*x[9*idx[jrow]+7];
14482b692628SMatthew Knepley       sum9 += v[jrow]*x[9*idx[jrow]+8];
14492b692628SMatthew Knepley       jrow++;
14502b692628SMatthew Knepley     }
14512b692628SMatthew Knepley     y[9*i]   = sum1;
14522b692628SMatthew Knepley     y[9*i+1] = sum2;
14532b692628SMatthew Knepley     y[9*i+2] = sum3;
14542b692628SMatthew Knepley     y[9*i+3] = sum4;
14552b692628SMatthew Knepley     y[9*i+4] = sum5;
14562b692628SMatthew Knepley     y[9*i+5] = sum6;
14572b692628SMatthew Knepley     y[9*i+6] = sum7;
14582b692628SMatthew Knepley     y[9*i+7] = sum8;
14592b692628SMatthew Knepley     y[9*i+8] = sum9;
14602b692628SMatthew Knepley   }
14612b692628SMatthew Knepley 
1462dc0b31edSSatish Balay   ierr = PetscLogFlops(18.0*a->nz - 9*nonzerorow);CHKERRQ(ierr);
14633649974fSBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
14641ebc52fbSHong Zhang   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
14652b692628SMatthew Knepley   PetscFunctionReturn(0);
14662b692628SMatthew Knepley }
14672b692628SMatthew Knepley 
1468b9cda22cSBarry Smith /* ------------------------------------------------------------------------------*/
1469b9cda22cSBarry Smith 
1470dfbe8321SBarry Smith PetscErrorCode MatMultTranspose_SeqMAIJ_9(Mat A,Vec xx,Vec yy)
14712b692628SMatthew Knepley {
14722b692628SMatthew Knepley   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
14732b692628SMatthew Knepley   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1474d2840e09SBarry Smith   const PetscScalar *x,*v;
1475d2840e09SBarry Smith   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9;
1476dfbe8321SBarry Smith   PetscErrorCode    ierr;
1477d2840e09SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx;
1478d2840e09SBarry Smith   PetscInt          n,i;
14792b692628SMatthew Knepley 
14802b692628SMatthew Knepley   PetscFunctionBegin;
1481d2840e09SBarry Smith   ierr = VecSet(yy,0.0);CHKERRQ(ierr);
14823649974fSBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
14831ebc52fbSHong Zhang   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
14842b692628SMatthew Knepley 
14852b692628SMatthew Knepley   for (i=0; i<m; i++) {
14862b692628SMatthew Knepley     idx    = a->j + a->i[i];
14872b692628SMatthew Knepley     v      = a->a + a->i[i];
14882b692628SMatthew Knepley     n      = a->i[i+1] - a->i[i];
14892b692628SMatthew Knepley     alpha1 = x[9*i];
14902b692628SMatthew Knepley     alpha2 = x[9*i+1];
14912b692628SMatthew Knepley     alpha3 = x[9*i+2];
14922b692628SMatthew Knepley     alpha4 = x[9*i+3];
14932b692628SMatthew Knepley     alpha5 = x[9*i+4];
14942b692628SMatthew Knepley     alpha6 = x[9*i+5];
14952b692628SMatthew Knepley     alpha7 = x[9*i+6];
14962b692628SMatthew Knepley     alpha8 = x[9*i+7];
14972f6bd0c6SMatthew Knepley     alpha9 = x[9*i+8];
14982b692628SMatthew Knepley     while (n-->0) {
14992b692628SMatthew Knepley       y[9*(*idx)]   += alpha1*(*v);
15002b692628SMatthew Knepley       y[9*(*idx)+1] += alpha2*(*v);
15012b692628SMatthew Knepley       y[9*(*idx)+2] += alpha3*(*v);
15022b692628SMatthew Knepley       y[9*(*idx)+3] += alpha4*(*v);
15032b692628SMatthew Knepley       y[9*(*idx)+4] += alpha5*(*v);
15042b692628SMatthew Knepley       y[9*(*idx)+5] += alpha6*(*v);
15052b692628SMatthew Knepley       y[9*(*idx)+6] += alpha7*(*v);
15062b692628SMatthew Knepley       y[9*(*idx)+7] += alpha8*(*v);
15072b692628SMatthew Knepley       y[9*(*idx)+8] += alpha9*(*v);
15082b692628SMatthew Knepley       idx++; v++;
15092b692628SMatthew Knepley     }
15102b692628SMatthew Knepley   }
1511dc0b31edSSatish Balay   ierr = PetscLogFlops(18.0*a->nz);CHKERRQ(ierr);
15123649974fSBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
15131ebc52fbSHong Zhang   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
15142b692628SMatthew Knepley   PetscFunctionReturn(0);
15152b692628SMatthew Knepley }
15162b692628SMatthew Knepley 
1517dfbe8321SBarry Smith PetscErrorCode MatMultAdd_SeqMAIJ_9(Mat A,Vec xx,Vec yy,Vec zz)
15182b692628SMatthew Knepley {
15192b692628SMatthew Knepley   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
15202b692628SMatthew Knepley   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1521d2840e09SBarry Smith   const PetscScalar *x,*v;
1522d2840e09SBarry Smith   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9;
1523dfbe8321SBarry Smith   PetscErrorCode    ierr;
1524d2840e09SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
1525b24ad042SBarry Smith   PetscInt          n,i,jrow,j;
15262b692628SMatthew Knepley 
15272b692628SMatthew Knepley   PetscFunctionBegin;
15282b692628SMatthew Knepley   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
15293649974fSBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
15301ebc52fbSHong Zhang   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
15312b692628SMatthew Knepley   idx  = a->j;
15322b692628SMatthew Knepley   v    = a->a;
15332b692628SMatthew Knepley   ii   = a->i;
15342b692628SMatthew Knepley 
15352b692628SMatthew Knepley   for (i=0; i<m; i++) {
15362b692628SMatthew Knepley     jrow = ii[i];
15372b692628SMatthew Knepley     n    = ii[i+1] - jrow;
15382b692628SMatthew Knepley     sum1 = 0.0;
15392b692628SMatthew Knepley     sum2 = 0.0;
15402b692628SMatthew Knepley     sum3 = 0.0;
15412b692628SMatthew Knepley     sum4 = 0.0;
15422b692628SMatthew Knepley     sum5 = 0.0;
15432b692628SMatthew Knepley     sum6 = 0.0;
15442b692628SMatthew Knepley     sum7 = 0.0;
15452b692628SMatthew Knepley     sum8 = 0.0;
15462b692628SMatthew Knepley     sum9 = 0.0;
15472b692628SMatthew Knepley     for (j=0; j<n; j++) {
15482b692628SMatthew Knepley       sum1 += v[jrow]*x[9*idx[jrow]];
15492b692628SMatthew Knepley       sum2 += v[jrow]*x[9*idx[jrow]+1];
15502b692628SMatthew Knepley       sum3 += v[jrow]*x[9*idx[jrow]+2];
15512b692628SMatthew Knepley       sum4 += v[jrow]*x[9*idx[jrow]+3];
15522b692628SMatthew Knepley       sum5 += v[jrow]*x[9*idx[jrow]+4];
15532b692628SMatthew Knepley       sum6 += v[jrow]*x[9*idx[jrow]+5];
15542b692628SMatthew Knepley       sum7 += v[jrow]*x[9*idx[jrow]+6];
15552b692628SMatthew Knepley       sum8 += v[jrow]*x[9*idx[jrow]+7];
15562b692628SMatthew Knepley       sum9 += v[jrow]*x[9*idx[jrow]+8];
15572b692628SMatthew Knepley       jrow++;
15582b692628SMatthew Knepley     }
15592b692628SMatthew Knepley     y[9*i]   += sum1;
15602b692628SMatthew Knepley     y[9*i+1] += sum2;
15612b692628SMatthew Knepley     y[9*i+2] += sum3;
15622b692628SMatthew Knepley     y[9*i+3] += sum4;
15632b692628SMatthew Knepley     y[9*i+4] += sum5;
15642b692628SMatthew Knepley     y[9*i+5] += sum6;
15652b692628SMatthew Knepley     y[9*i+6] += sum7;
15662b692628SMatthew Knepley     y[9*i+7] += sum8;
15672b692628SMatthew Knepley     y[9*i+8] += sum9;
15682b692628SMatthew Knepley   }
15692b692628SMatthew Knepley 
1570efee365bSSatish Balay   ierr = PetscLogFlops(18*a->nz);CHKERRQ(ierr);
15713649974fSBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
15721ebc52fbSHong Zhang   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
15732b692628SMatthew Knepley   PetscFunctionReturn(0);
15742b692628SMatthew Knepley }
15752b692628SMatthew Knepley 
1576dfbe8321SBarry Smith PetscErrorCode MatMultTransposeAdd_SeqMAIJ_9(Mat A,Vec xx,Vec yy,Vec zz)
15772b692628SMatthew Knepley {
15782b692628SMatthew Knepley   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
15792b692628SMatthew Knepley   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1580d2840e09SBarry Smith   const PetscScalar *x,*v;
1581d2840e09SBarry Smith   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9;
1582dfbe8321SBarry Smith   PetscErrorCode    ierr;
1583d2840e09SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx;
1584d2840e09SBarry Smith   PetscInt          n,i;
15852b692628SMatthew Knepley 
15862b692628SMatthew Knepley   PetscFunctionBegin;
15872b692628SMatthew Knepley   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
15883649974fSBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
15891ebc52fbSHong Zhang   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
15902b692628SMatthew Knepley   for (i=0; i<m; i++) {
15912b692628SMatthew Knepley     idx    = a->j + a->i[i];
15922b692628SMatthew Knepley     v      = a->a + a->i[i];
15932b692628SMatthew Knepley     n      = a->i[i+1] - a->i[i];
15942b692628SMatthew Knepley     alpha1 = x[9*i];
15952b692628SMatthew Knepley     alpha2 = x[9*i+1];
15962b692628SMatthew Knepley     alpha3 = x[9*i+2];
15972b692628SMatthew Knepley     alpha4 = x[9*i+3];
15982b692628SMatthew Knepley     alpha5 = x[9*i+4];
15992b692628SMatthew Knepley     alpha6 = x[9*i+5];
16002b692628SMatthew Knepley     alpha7 = x[9*i+6];
16012b692628SMatthew Knepley     alpha8 = x[9*i+7];
16022b692628SMatthew Knepley     alpha9 = x[9*i+8];
16032b692628SMatthew Knepley     while (n-->0) {
16042b692628SMatthew Knepley       y[9*(*idx)]   += alpha1*(*v);
16052b692628SMatthew Knepley       y[9*(*idx)+1] += alpha2*(*v);
16062b692628SMatthew Knepley       y[9*(*idx)+2] += alpha3*(*v);
16072b692628SMatthew Knepley       y[9*(*idx)+3] += alpha4*(*v);
16082b692628SMatthew Knepley       y[9*(*idx)+4] += alpha5*(*v);
16092b692628SMatthew Knepley       y[9*(*idx)+5] += alpha6*(*v);
16102b692628SMatthew Knepley       y[9*(*idx)+6] += alpha7*(*v);
16112b692628SMatthew Knepley       y[9*(*idx)+7] += alpha8*(*v);
16122b692628SMatthew Knepley       y[9*(*idx)+8] += alpha9*(*v);
16132b692628SMatthew Knepley       idx++; v++;
16142b692628SMatthew Knepley     }
16152b692628SMatthew Knepley   }
1616dc0b31edSSatish Balay   ierr = PetscLogFlops(18.0*a->nz);CHKERRQ(ierr);
16173649974fSBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
16181ebc52fbSHong Zhang   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
16192b692628SMatthew Knepley   PetscFunctionReturn(0);
16202b692628SMatthew Knepley }
1621b9cda22cSBarry Smith PetscErrorCode MatMult_SeqMAIJ_10(Mat A,Vec xx,Vec yy)
1622b9cda22cSBarry Smith {
1623b9cda22cSBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1624b9cda22cSBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1625d2840e09SBarry Smith   const PetscScalar *x,*v;
1626d2840e09SBarry Smith   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9, sum10;
1627b9cda22cSBarry Smith   PetscErrorCode    ierr;
1628d2840e09SBarry Smith   const PetscInt    m         = b->AIJ->rmap->n,*idx,*ii;
1629d2840e09SBarry Smith   PetscInt          nonzerorow=0,n,i,jrow,j;
1630b9cda22cSBarry Smith 
1631b9cda22cSBarry Smith   PetscFunctionBegin;
16323649974fSBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
1633b9cda22cSBarry Smith   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
1634b9cda22cSBarry Smith   idx  = a->j;
1635b9cda22cSBarry Smith   v    = a->a;
1636b9cda22cSBarry Smith   ii   = a->i;
1637b9cda22cSBarry Smith 
1638b9cda22cSBarry Smith   for (i=0; i<m; i++) {
1639b9cda22cSBarry Smith     jrow  = ii[i];
1640b9cda22cSBarry Smith     n     = ii[i+1] - jrow;
1641b9cda22cSBarry Smith     sum1  = 0.0;
1642b9cda22cSBarry Smith     sum2  = 0.0;
1643b9cda22cSBarry Smith     sum3  = 0.0;
1644b9cda22cSBarry Smith     sum4  = 0.0;
1645b9cda22cSBarry Smith     sum5  = 0.0;
1646b9cda22cSBarry Smith     sum6  = 0.0;
1647b9cda22cSBarry Smith     sum7  = 0.0;
1648b9cda22cSBarry Smith     sum8  = 0.0;
1649b9cda22cSBarry Smith     sum9  = 0.0;
1650b9cda22cSBarry Smith     sum10 = 0.0;
165126fbe8dcSKarl Rupp 
1652b3c51c66SMatthew Knepley     nonzerorow += (n>0);
1653b9cda22cSBarry Smith     for (j=0; j<n; j++) {
1654b9cda22cSBarry Smith       sum1  += v[jrow]*x[10*idx[jrow]];
1655b9cda22cSBarry Smith       sum2  += v[jrow]*x[10*idx[jrow]+1];
1656b9cda22cSBarry Smith       sum3  += v[jrow]*x[10*idx[jrow]+2];
1657b9cda22cSBarry Smith       sum4  += v[jrow]*x[10*idx[jrow]+3];
1658b9cda22cSBarry Smith       sum5  += v[jrow]*x[10*idx[jrow]+4];
1659b9cda22cSBarry Smith       sum6  += v[jrow]*x[10*idx[jrow]+5];
1660b9cda22cSBarry Smith       sum7  += v[jrow]*x[10*idx[jrow]+6];
1661b9cda22cSBarry Smith       sum8  += v[jrow]*x[10*idx[jrow]+7];
1662b9cda22cSBarry Smith       sum9  += v[jrow]*x[10*idx[jrow]+8];
1663b9cda22cSBarry Smith       sum10 += v[jrow]*x[10*idx[jrow]+9];
1664b9cda22cSBarry Smith       jrow++;
1665b9cda22cSBarry Smith     }
1666b9cda22cSBarry Smith     y[10*i]   = sum1;
1667b9cda22cSBarry Smith     y[10*i+1] = sum2;
1668b9cda22cSBarry Smith     y[10*i+2] = sum3;
1669b9cda22cSBarry Smith     y[10*i+3] = sum4;
1670b9cda22cSBarry Smith     y[10*i+4] = sum5;
1671b9cda22cSBarry Smith     y[10*i+5] = sum6;
1672b9cda22cSBarry Smith     y[10*i+6] = sum7;
1673b9cda22cSBarry Smith     y[10*i+7] = sum8;
1674b9cda22cSBarry Smith     y[10*i+8] = sum9;
1675b9cda22cSBarry Smith     y[10*i+9] = sum10;
1676b9cda22cSBarry Smith   }
1677b9cda22cSBarry Smith 
1678dc0b31edSSatish Balay   ierr = PetscLogFlops(20.0*a->nz - 10.0*nonzerorow);CHKERRQ(ierr);
16793649974fSBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
1680b9cda22cSBarry Smith   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
1681b9cda22cSBarry Smith   PetscFunctionReturn(0);
1682b9cda22cSBarry Smith }
1683b9cda22cSBarry Smith 
1684b9cda22cSBarry Smith PetscErrorCode MatMultAdd_SeqMAIJ_10(Mat A,Vec xx,Vec yy,Vec zz)
1685b9cda22cSBarry Smith {
1686b9cda22cSBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1687b9cda22cSBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1688d2840e09SBarry Smith   const PetscScalar *x,*v;
1689d2840e09SBarry Smith   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9, sum10;
1690b9cda22cSBarry Smith   PetscErrorCode    ierr;
1691d2840e09SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
1692b9cda22cSBarry Smith   PetscInt          n,i,jrow,j;
1693b9cda22cSBarry Smith 
1694b9cda22cSBarry Smith   PetscFunctionBegin;
1695b9cda22cSBarry Smith   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
16963649974fSBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
1697b9cda22cSBarry Smith   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
1698b9cda22cSBarry Smith   idx  = a->j;
1699b9cda22cSBarry Smith   v    = a->a;
1700b9cda22cSBarry Smith   ii   = a->i;
1701b9cda22cSBarry Smith 
1702b9cda22cSBarry Smith   for (i=0; i<m; i++) {
1703b9cda22cSBarry Smith     jrow  = ii[i];
1704b9cda22cSBarry Smith     n     = ii[i+1] - jrow;
1705b9cda22cSBarry Smith     sum1  = 0.0;
1706b9cda22cSBarry Smith     sum2  = 0.0;
1707b9cda22cSBarry Smith     sum3  = 0.0;
1708b9cda22cSBarry Smith     sum4  = 0.0;
1709b9cda22cSBarry Smith     sum5  = 0.0;
1710b9cda22cSBarry Smith     sum6  = 0.0;
1711b9cda22cSBarry Smith     sum7  = 0.0;
1712b9cda22cSBarry Smith     sum8  = 0.0;
1713b9cda22cSBarry Smith     sum9  = 0.0;
1714b9cda22cSBarry Smith     sum10 = 0.0;
1715b9cda22cSBarry Smith     for (j=0; j<n; j++) {
1716b9cda22cSBarry Smith       sum1  += v[jrow]*x[10*idx[jrow]];
1717b9cda22cSBarry Smith       sum2  += v[jrow]*x[10*idx[jrow]+1];
1718b9cda22cSBarry Smith       sum3  += v[jrow]*x[10*idx[jrow]+2];
1719b9cda22cSBarry Smith       sum4  += v[jrow]*x[10*idx[jrow]+3];
1720b9cda22cSBarry Smith       sum5  += v[jrow]*x[10*idx[jrow]+4];
1721b9cda22cSBarry Smith       sum6  += v[jrow]*x[10*idx[jrow]+5];
1722b9cda22cSBarry Smith       sum7  += v[jrow]*x[10*idx[jrow]+6];
1723b9cda22cSBarry Smith       sum8  += v[jrow]*x[10*idx[jrow]+7];
1724b9cda22cSBarry Smith       sum9  += v[jrow]*x[10*idx[jrow]+8];
1725b9cda22cSBarry Smith       sum10 += v[jrow]*x[10*idx[jrow]+9];
1726b9cda22cSBarry Smith       jrow++;
1727b9cda22cSBarry Smith     }
1728b9cda22cSBarry Smith     y[10*i]   += sum1;
1729b9cda22cSBarry Smith     y[10*i+1] += sum2;
1730b9cda22cSBarry Smith     y[10*i+2] += sum3;
1731b9cda22cSBarry Smith     y[10*i+3] += sum4;
1732b9cda22cSBarry Smith     y[10*i+4] += sum5;
1733b9cda22cSBarry Smith     y[10*i+5] += sum6;
1734b9cda22cSBarry Smith     y[10*i+6] += sum7;
1735b9cda22cSBarry Smith     y[10*i+7] += sum8;
1736b9cda22cSBarry Smith     y[10*i+8] += sum9;
1737b9cda22cSBarry Smith     y[10*i+9] += sum10;
1738b9cda22cSBarry Smith   }
1739b9cda22cSBarry Smith 
1740dc0b31edSSatish Balay   ierr = PetscLogFlops(20.0*a->nz);CHKERRQ(ierr);
17413649974fSBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
1742b9cda22cSBarry Smith   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
1743b9cda22cSBarry Smith   PetscFunctionReturn(0);
1744b9cda22cSBarry Smith }
1745b9cda22cSBarry Smith 
1746b9cda22cSBarry Smith PetscErrorCode MatMultTranspose_SeqMAIJ_10(Mat A,Vec xx,Vec yy)
1747b9cda22cSBarry Smith {
1748b9cda22cSBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1749b9cda22cSBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1750d2840e09SBarry Smith   const PetscScalar *x,*v;
1751d2840e09SBarry Smith   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9,alpha10;
1752b9cda22cSBarry Smith   PetscErrorCode    ierr;
1753d2840e09SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx;
1754d2840e09SBarry Smith   PetscInt          n,i;
1755b9cda22cSBarry Smith 
1756b9cda22cSBarry Smith   PetscFunctionBegin;
1757d2840e09SBarry Smith   ierr = VecSet(yy,0.0);CHKERRQ(ierr);
17583649974fSBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
1759b9cda22cSBarry Smith   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
1760b9cda22cSBarry Smith 
1761b9cda22cSBarry Smith   for (i=0; i<m; i++) {
1762b9cda22cSBarry Smith     idx     = a->j + a->i[i];
1763b9cda22cSBarry Smith     v       = a->a + a->i[i];
1764b9cda22cSBarry Smith     n       = a->i[i+1] - a->i[i];
1765e29fdc22SBarry Smith     alpha1  = x[10*i];
1766e29fdc22SBarry Smith     alpha2  = x[10*i+1];
1767e29fdc22SBarry Smith     alpha3  = x[10*i+2];
1768e29fdc22SBarry Smith     alpha4  = x[10*i+3];
1769e29fdc22SBarry Smith     alpha5  = x[10*i+4];
1770e29fdc22SBarry Smith     alpha6  = x[10*i+5];
1771e29fdc22SBarry Smith     alpha7  = x[10*i+6];
1772e29fdc22SBarry Smith     alpha8  = x[10*i+7];
1773e29fdc22SBarry Smith     alpha9  = x[10*i+8];
1774b9cda22cSBarry Smith     alpha10 = x[10*i+9];
1775b9cda22cSBarry Smith     while (n-->0) {
1776e29fdc22SBarry Smith       y[10*(*idx)]   += alpha1*(*v);
1777e29fdc22SBarry Smith       y[10*(*idx)+1] += alpha2*(*v);
1778e29fdc22SBarry Smith       y[10*(*idx)+2] += alpha3*(*v);
1779e29fdc22SBarry Smith       y[10*(*idx)+3] += alpha4*(*v);
1780e29fdc22SBarry Smith       y[10*(*idx)+4] += alpha5*(*v);
1781e29fdc22SBarry Smith       y[10*(*idx)+5] += alpha6*(*v);
1782e29fdc22SBarry Smith       y[10*(*idx)+6] += alpha7*(*v);
1783e29fdc22SBarry Smith       y[10*(*idx)+7] += alpha8*(*v);
1784e29fdc22SBarry Smith       y[10*(*idx)+8] += alpha9*(*v);
1785b9cda22cSBarry Smith       y[10*(*idx)+9] += alpha10*(*v);
1786b9cda22cSBarry Smith       idx++; v++;
1787b9cda22cSBarry Smith     }
1788b9cda22cSBarry Smith   }
1789dc0b31edSSatish Balay   ierr = PetscLogFlops(20.0*a->nz);CHKERRQ(ierr);
17903649974fSBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
1791b9cda22cSBarry Smith   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
1792b9cda22cSBarry Smith   PetscFunctionReturn(0);
1793b9cda22cSBarry Smith }
1794b9cda22cSBarry Smith 
1795b9cda22cSBarry Smith PetscErrorCode MatMultTransposeAdd_SeqMAIJ_10(Mat A,Vec xx,Vec yy,Vec zz)
1796b9cda22cSBarry Smith {
1797b9cda22cSBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1798b9cda22cSBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1799d2840e09SBarry Smith   const PetscScalar *x,*v;
1800d2840e09SBarry Smith   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9,alpha10;
1801b9cda22cSBarry Smith   PetscErrorCode    ierr;
1802d2840e09SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx;
1803d2840e09SBarry Smith   PetscInt          n,i;
1804b9cda22cSBarry Smith 
1805b9cda22cSBarry Smith   PetscFunctionBegin;
1806b9cda22cSBarry Smith   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
18073649974fSBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
1808b9cda22cSBarry Smith   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
1809b9cda22cSBarry Smith   for (i=0; i<m; i++) {
1810b9cda22cSBarry Smith     idx     = a->j + a->i[i];
1811b9cda22cSBarry Smith     v       = a->a + a->i[i];
1812b9cda22cSBarry Smith     n       = a->i[i+1] - a->i[i];
1813b9cda22cSBarry Smith     alpha1  = x[10*i];
1814b9cda22cSBarry Smith     alpha2  = x[10*i+1];
1815b9cda22cSBarry Smith     alpha3  = x[10*i+2];
1816b9cda22cSBarry Smith     alpha4  = x[10*i+3];
1817b9cda22cSBarry Smith     alpha5  = x[10*i+4];
1818b9cda22cSBarry Smith     alpha6  = x[10*i+5];
1819b9cda22cSBarry Smith     alpha7  = x[10*i+6];
1820b9cda22cSBarry Smith     alpha8  = x[10*i+7];
1821b9cda22cSBarry Smith     alpha9  = x[10*i+8];
1822b9cda22cSBarry Smith     alpha10 = x[10*i+9];
1823b9cda22cSBarry Smith     while (n-->0) {
1824b9cda22cSBarry Smith       y[10*(*idx)]   += alpha1*(*v);
1825b9cda22cSBarry Smith       y[10*(*idx)+1] += alpha2*(*v);
1826b9cda22cSBarry Smith       y[10*(*idx)+2] += alpha3*(*v);
1827b9cda22cSBarry Smith       y[10*(*idx)+3] += alpha4*(*v);
1828b9cda22cSBarry Smith       y[10*(*idx)+4] += alpha5*(*v);
1829b9cda22cSBarry Smith       y[10*(*idx)+5] += alpha6*(*v);
1830b9cda22cSBarry Smith       y[10*(*idx)+6] += alpha7*(*v);
1831b9cda22cSBarry Smith       y[10*(*idx)+7] += alpha8*(*v);
1832b9cda22cSBarry Smith       y[10*(*idx)+8] += alpha9*(*v);
1833b9cda22cSBarry Smith       y[10*(*idx)+9] += alpha10*(*v);
1834b9cda22cSBarry Smith       idx++; v++;
1835b9cda22cSBarry Smith     }
1836b9cda22cSBarry Smith   }
1837dc0b31edSSatish Balay   ierr = PetscLogFlops(20.0*a->nz);CHKERRQ(ierr);
18383649974fSBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
1839b9cda22cSBarry Smith   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
1840b9cda22cSBarry Smith   PetscFunctionReturn(0);
1841b9cda22cSBarry Smith }
1842b9cda22cSBarry Smith 
18432b692628SMatthew Knepley 
18442f7816d4SBarry Smith /*--------------------------------------------------------------------------------------------*/
1845dbdd7285SBarry Smith PetscErrorCode MatMult_SeqMAIJ_11(Mat A,Vec xx,Vec yy)
1846dbdd7285SBarry Smith {
1847dbdd7285SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1848dbdd7285SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1849d2840e09SBarry Smith   const PetscScalar *x,*v;
1850d2840e09SBarry Smith   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9, sum10, sum11;
1851dbdd7285SBarry Smith   PetscErrorCode    ierr;
1852d2840e09SBarry Smith   const PetscInt    m         = b->AIJ->rmap->n,*idx,*ii;
1853d2840e09SBarry Smith   PetscInt          nonzerorow=0,n,i,jrow,j;
1854dbdd7285SBarry Smith 
1855dbdd7285SBarry Smith   PetscFunctionBegin;
18563649974fSBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
1857dbdd7285SBarry Smith   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
1858dbdd7285SBarry Smith   idx  = a->j;
1859dbdd7285SBarry Smith   v    = a->a;
1860dbdd7285SBarry Smith   ii   = a->i;
1861dbdd7285SBarry Smith 
1862dbdd7285SBarry Smith   for (i=0; i<m; i++) {
1863dbdd7285SBarry Smith     jrow  = ii[i];
1864dbdd7285SBarry Smith     n     = ii[i+1] - jrow;
1865dbdd7285SBarry Smith     sum1  = 0.0;
1866dbdd7285SBarry Smith     sum2  = 0.0;
1867dbdd7285SBarry Smith     sum3  = 0.0;
1868dbdd7285SBarry Smith     sum4  = 0.0;
1869dbdd7285SBarry Smith     sum5  = 0.0;
1870dbdd7285SBarry Smith     sum6  = 0.0;
1871dbdd7285SBarry Smith     sum7  = 0.0;
1872dbdd7285SBarry Smith     sum8  = 0.0;
1873dbdd7285SBarry Smith     sum9  = 0.0;
1874dbdd7285SBarry Smith     sum10 = 0.0;
1875dbdd7285SBarry Smith     sum11 = 0.0;
187626fbe8dcSKarl Rupp 
1877dbdd7285SBarry Smith     nonzerorow += (n>0);
1878dbdd7285SBarry Smith     for (j=0; j<n; j++) {
1879dbdd7285SBarry Smith       sum1  += v[jrow]*x[11*idx[jrow]];
1880dbdd7285SBarry Smith       sum2  += v[jrow]*x[11*idx[jrow]+1];
1881dbdd7285SBarry Smith       sum3  += v[jrow]*x[11*idx[jrow]+2];
1882dbdd7285SBarry Smith       sum4  += v[jrow]*x[11*idx[jrow]+3];
1883dbdd7285SBarry Smith       sum5  += v[jrow]*x[11*idx[jrow]+4];
1884dbdd7285SBarry Smith       sum6  += v[jrow]*x[11*idx[jrow]+5];
1885dbdd7285SBarry Smith       sum7  += v[jrow]*x[11*idx[jrow]+6];
1886dbdd7285SBarry Smith       sum8  += v[jrow]*x[11*idx[jrow]+7];
1887dbdd7285SBarry Smith       sum9  += v[jrow]*x[11*idx[jrow]+8];
1888dbdd7285SBarry Smith       sum10 += v[jrow]*x[11*idx[jrow]+9];
1889dbdd7285SBarry Smith       sum11 += v[jrow]*x[11*idx[jrow]+10];
1890dbdd7285SBarry Smith       jrow++;
1891dbdd7285SBarry Smith     }
1892dbdd7285SBarry Smith     y[11*i]    = sum1;
1893dbdd7285SBarry Smith     y[11*i+1]  = sum2;
1894dbdd7285SBarry Smith     y[11*i+2]  = sum3;
1895dbdd7285SBarry Smith     y[11*i+3]  = sum4;
1896dbdd7285SBarry Smith     y[11*i+4]  = sum5;
1897dbdd7285SBarry Smith     y[11*i+5]  = sum6;
1898dbdd7285SBarry Smith     y[11*i+6]  = sum7;
1899dbdd7285SBarry Smith     y[11*i+7]  = sum8;
1900dbdd7285SBarry Smith     y[11*i+8]  = sum9;
1901dbdd7285SBarry Smith     y[11*i+9]  = sum10;
1902dbdd7285SBarry Smith     y[11*i+10] = sum11;
1903dbdd7285SBarry Smith   }
1904dbdd7285SBarry Smith 
1905dbdd7285SBarry Smith   ierr = PetscLogFlops(22*a->nz - 11*nonzerorow);CHKERRQ(ierr);
19063649974fSBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
1907dbdd7285SBarry Smith   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
1908dbdd7285SBarry Smith   PetscFunctionReturn(0);
1909dbdd7285SBarry Smith }
1910dbdd7285SBarry Smith 
1911dbdd7285SBarry Smith PetscErrorCode MatMultAdd_SeqMAIJ_11(Mat A,Vec xx,Vec yy,Vec zz)
1912dbdd7285SBarry Smith {
1913dbdd7285SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1914dbdd7285SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1915d2840e09SBarry Smith   const PetscScalar *x,*v;
1916d2840e09SBarry Smith   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9, sum10, sum11;
1917dbdd7285SBarry Smith   PetscErrorCode    ierr;
1918d2840e09SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
1919dbdd7285SBarry Smith   PetscInt          n,i,jrow,j;
1920dbdd7285SBarry Smith 
1921dbdd7285SBarry Smith   PetscFunctionBegin;
1922dbdd7285SBarry Smith   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
19233649974fSBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
1924dbdd7285SBarry Smith   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
1925dbdd7285SBarry Smith   idx  = a->j;
1926dbdd7285SBarry Smith   v    = a->a;
1927dbdd7285SBarry Smith   ii   = a->i;
1928dbdd7285SBarry Smith 
1929dbdd7285SBarry Smith   for (i=0; i<m; i++) {
1930dbdd7285SBarry Smith     jrow  = ii[i];
1931dbdd7285SBarry Smith     n     = ii[i+1] - jrow;
1932dbdd7285SBarry Smith     sum1  = 0.0;
1933dbdd7285SBarry Smith     sum2  = 0.0;
1934dbdd7285SBarry Smith     sum3  = 0.0;
1935dbdd7285SBarry Smith     sum4  = 0.0;
1936dbdd7285SBarry Smith     sum5  = 0.0;
1937dbdd7285SBarry Smith     sum6  = 0.0;
1938dbdd7285SBarry Smith     sum7  = 0.0;
1939dbdd7285SBarry Smith     sum8  = 0.0;
1940dbdd7285SBarry Smith     sum9  = 0.0;
1941dbdd7285SBarry Smith     sum10 = 0.0;
1942dbdd7285SBarry Smith     sum11 = 0.0;
1943dbdd7285SBarry Smith     for (j=0; j<n; j++) {
1944dbdd7285SBarry Smith       sum1  += v[jrow]*x[11*idx[jrow]];
1945dbdd7285SBarry Smith       sum2  += v[jrow]*x[11*idx[jrow]+1];
1946dbdd7285SBarry Smith       sum3  += v[jrow]*x[11*idx[jrow]+2];
1947dbdd7285SBarry Smith       sum4  += v[jrow]*x[11*idx[jrow]+3];
1948dbdd7285SBarry Smith       sum5  += v[jrow]*x[11*idx[jrow]+4];
1949dbdd7285SBarry Smith       sum6  += v[jrow]*x[11*idx[jrow]+5];
1950dbdd7285SBarry Smith       sum7  += v[jrow]*x[11*idx[jrow]+6];
1951dbdd7285SBarry Smith       sum8  += v[jrow]*x[11*idx[jrow]+7];
1952dbdd7285SBarry Smith       sum9  += v[jrow]*x[11*idx[jrow]+8];
1953dbdd7285SBarry Smith       sum10 += v[jrow]*x[11*idx[jrow]+9];
1954dbdd7285SBarry Smith       sum11 += v[jrow]*x[11*idx[jrow]+10];
1955dbdd7285SBarry Smith       jrow++;
1956dbdd7285SBarry Smith     }
1957dbdd7285SBarry Smith     y[11*i]    += sum1;
1958dbdd7285SBarry Smith     y[11*i+1]  += sum2;
1959dbdd7285SBarry Smith     y[11*i+2]  += sum3;
1960dbdd7285SBarry Smith     y[11*i+3]  += sum4;
1961dbdd7285SBarry Smith     y[11*i+4]  += sum5;
1962dbdd7285SBarry Smith     y[11*i+5]  += sum6;
1963dbdd7285SBarry Smith     y[11*i+6]  += sum7;
1964dbdd7285SBarry Smith     y[11*i+7]  += sum8;
1965dbdd7285SBarry Smith     y[11*i+8]  += sum9;
1966dbdd7285SBarry Smith     y[11*i+9]  += sum10;
1967dbdd7285SBarry Smith     y[11*i+10] += sum11;
1968dbdd7285SBarry Smith   }
1969dbdd7285SBarry Smith 
1970dbdd7285SBarry Smith   ierr = PetscLogFlops(22*a->nz);CHKERRQ(ierr);
19713649974fSBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
1972dbdd7285SBarry Smith   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
1973dbdd7285SBarry Smith   PetscFunctionReturn(0);
1974dbdd7285SBarry Smith }
1975dbdd7285SBarry Smith 
1976dbdd7285SBarry Smith PetscErrorCode MatMultTranspose_SeqMAIJ_11(Mat A,Vec xx,Vec yy)
1977dbdd7285SBarry Smith {
1978dbdd7285SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1979dbdd7285SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1980d2840e09SBarry Smith   const PetscScalar *x,*v;
1981d2840e09SBarry Smith   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9,alpha10,alpha11;
1982dbdd7285SBarry Smith   PetscErrorCode    ierr;
1983d2840e09SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx;
1984d2840e09SBarry Smith   PetscInt          n,i;
1985dbdd7285SBarry Smith 
1986dbdd7285SBarry Smith   PetscFunctionBegin;
1987d2840e09SBarry Smith   ierr = VecSet(yy,0.0);CHKERRQ(ierr);
19883649974fSBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
1989dbdd7285SBarry Smith   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
1990dbdd7285SBarry Smith 
1991dbdd7285SBarry Smith   for (i=0; i<m; i++) {
1992dbdd7285SBarry Smith     idx     = a->j + a->i[i];
1993dbdd7285SBarry Smith     v       = a->a + a->i[i];
1994dbdd7285SBarry Smith     n       = a->i[i+1] - a->i[i];
1995dbdd7285SBarry Smith     alpha1  = x[11*i];
1996dbdd7285SBarry Smith     alpha2  = x[11*i+1];
1997dbdd7285SBarry Smith     alpha3  = x[11*i+2];
1998dbdd7285SBarry Smith     alpha4  = x[11*i+3];
1999dbdd7285SBarry Smith     alpha5  = x[11*i+4];
2000dbdd7285SBarry Smith     alpha6  = x[11*i+5];
2001dbdd7285SBarry Smith     alpha7  = x[11*i+6];
2002dbdd7285SBarry Smith     alpha8  = x[11*i+7];
2003dbdd7285SBarry Smith     alpha9  = x[11*i+8];
2004dbdd7285SBarry Smith     alpha10 = x[11*i+9];
2005dbdd7285SBarry Smith     alpha11 = x[11*i+10];
2006dbdd7285SBarry Smith     while (n-->0) {
2007dbdd7285SBarry Smith       y[11*(*idx)]    += alpha1*(*v);
2008dbdd7285SBarry Smith       y[11*(*idx)+1]  += alpha2*(*v);
2009dbdd7285SBarry Smith       y[11*(*idx)+2]  += alpha3*(*v);
2010dbdd7285SBarry Smith       y[11*(*idx)+3]  += alpha4*(*v);
2011dbdd7285SBarry Smith       y[11*(*idx)+4]  += alpha5*(*v);
2012dbdd7285SBarry Smith       y[11*(*idx)+5]  += alpha6*(*v);
2013dbdd7285SBarry Smith       y[11*(*idx)+6]  += alpha7*(*v);
2014dbdd7285SBarry Smith       y[11*(*idx)+7]  += alpha8*(*v);
2015dbdd7285SBarry Smith       y[11*(*idx)+8]  += alpha9*(*v);
2016dbdd7285SBarry Smith       y[11*(*idx)+9]  += alpha10*(*v);
2017dbdd7285SBarry Smith       y[11*(*idx)+10] += alpha11*(*v);
2018dbdd7285SBarry Smith       idx++; v++;
2019dbdd7285SBarry Smith     }
2020dbdd7285SBarry Smith   }
2021dbdd7285SBarry Smith   ierr = PetscLogFlops(22*a->nz);CHKERRQ(ierr);
20223649974fSBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
2023dbdd7285SBarry Smith   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
2024dbdd7285SBarry Smith   PetscFunctionReturn(0);
2025dbdd7285SBarry Smith }
2026dbdd7285SBarry Smith 
2027dbdd7285SBarry Smith PetscErrorCode MatMultTransposeAdd_SeqMAIJ_11(Mat A,Vec xx,Vec yy,Vec zz)
2028dbdd7285SBarry Smith {
2029dbdd7285SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
2030dbdd7285SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
2031d2840e09SBarry Smith   const PetscScalar *x,*v;
2032d2840e09SBarry Smith   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9,alpha10,alpha11;
2033dbdd7285SBarry Smith   PetscErrorCode    ierr;
2034d2840e09SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx;
2035d2840e09SBarry Smith   PetscInt          n,i;
2036dbdd7285SBarry Smith 
2037dbdd7285SBarry Smith   PetscFunctionBegin;
2038dbdd7285SBarry Smith   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
20393649974fSBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
2040dbdd7285SBarry Smith   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
2041dbdd7285SBarry Smith   for (i=0; i<m; i++) {
2042dbdd7285SBarry Smith     idx     = a->j + a->i[i];
2043dbdd7285SBarry Smith     v       = a->a + a->i[i];
2044dbdd7285SBarry Smith     n       = a->i[i+1] - a->i[i];
2045dbdd7285SBarry Smith     alpha1  = x[11*i];
2046dbdd7285SBarry Smith     alpha2  = x[11*i+1];
2047dbdd7285SBarry Smith     alpha3  = x[11*i+2];
2048dbdd7285SBarry Smith     alpha4  = x[11*i+3];
2049dbdd7285SBarry Smith     alpha5  = x[11*i+4];
2050dbdd7285SBarry Smith     alpha6  = x[11*i+5];
2051dbdd7285SBarry Smith     alpha7  = x[11*i+6];
2052dbdd7285SBarry Smith     alpha8  = x[11*i+7];
2053dbdd7285SBarry Smith     alpha9  = x[11*i+8];
2054dbdd7285SBarry Smith     alpha10 = x[11*i+9];
2055dbdd7285SBarry Smith     alpha11 = x[11*i+10];
2056dbdd7285SBarry Smith     while (n-->0) {
2057dbdd7285SBarry Smith       y[11*(*idx)]    += alpha1*(*v);
2058dbdd7285SBarry Smith       y[11*(*idx)+1]  += alpha2*(*v);
2059dbdd7285SBarry Smith       y[11*(*idx)+2]  += alpha3*(*v);
2060dbdd7285SBarry Smith       y[11*(*idx)+3]  += alpha4*(*v);
2061dbdd7285SBarry Smith       y[11*(*idx)+4]  += alpha5*(*v);
2062dbdd7285SBarry Smith       y[11*(*idx)+5]  += alpha6*(*v);
2063dbdd7285SBarry Smith       y[11*(*idx)+6]  += alpha7*(*v);
2064dbdd7285SBarry Smith       y[11*(*idx)+7]  += alpha8*(*v);
2065dbdd7285SBarry Smith       y[11*(*idx)+8]  += alpha9*(*v);
2066dbdd7285SBarry Smith       y[11*(*idx)+9]  += alpha10*(*v);
2067dbdd7285SBarry Smith       y[11*(*idx)+10] += alpha11*(*v);
2068dbdd7285SBarry Smith       idx++; v++;
2069dbdd7285SBarry Smith     }
2070dbdd7285SBarry Smith   }
2071dbdd7285SBarry Smith   ierr = PetscLogFlops(22*a->nz);CHKERRQ(ierr);
20723649974fSBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
2073dbdd7285SBarry Smith   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
2074dbdd7285SBarry Smith   PetscFunctionReturn(0);
2075dbdd7285SBarry Smith }
2076dbdd7285SBarry Smith 
2077dbdd7285SBarry Smith 
2078dbdd7285SBarry Smith /*--------------------------------------------------------------------------------------------*/
2079dfbe8321SBarry Smith PetscErrorCode MatMult_SeqMAIJ_16(Mat A,Vec xx,Vec yy)
20802f7816d4SBarry Smith {
20812f7816d4SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
20822f7816d4SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
2083d2840e09SBarry Smith   const PetscScalar *x,*v;
2084d2840e09SBarry Smith   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8;
20852f7816d4SBarry Smith   PetscScalar       sum9, sum10, sum11, sum12, sum13, sum14, sum15, sum16;
2086dfbe8321SBarry Smith   PetscErrorCode    ierr;
2087d2840e09SBarry Smith   const PetscInt    m         = b->AIJ->rmap->n,*idx,*ii;
2088d2840e09SBarry Smith   PetscInt          nonzerorow=0,n,i,jrow,j;
20892f7816d4SBarry Smith 
20902f7816d4SBarry Smith   PetscFunctionBegin;
20913649974fSBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
20921ebc52fbSHong Zhang   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
20932f7816d4SBarry Smith   idx  = a->j;
20942f7816d4SBarry Smith   v    = a->a;
20952f7816d4SBarry Smith   ii   = a->i;
20962f7816d4SBarry Smith 
20972f7816d4SBarry Smith   for (i=0; i<m; i++) {
20982f7816d4SBarry Smith     jrow  = ii[i];
20992f7816d4SBarry Smith     n     = ii[i+1] - jrow;
21002f7816d4SBarry Smith     sum1  = 0.0;
21012f7816d4SBarry Smith     sum2  = 0.0;
21022f7816d4SBarry Smith     sum3  = 0.0;
21032f7816d4SBarry Smith     sum4  = 0.0;
21042f7816d4SBarry Smith     sum5  = 0.0;
21052f7816d4SBarry Smith     sum6  = 0.0;
21062f7816d4SBarry Smith     sum7  = 0.0;
21072f7816d4SBarry Smith     sum8  = 0.0;
21082f7816d4SBarry Smith     sum9  = 0.0;
21092f7816d4SBarry Smith     sum10 = 0.0;
21102f7816d4SBarry Smith     sum11 = 0.0;
21112f7816d4SBarry Smith     sum12 = 0.0;
21122f7816d4SBarry Smith     sum13 = 0.0;
21132f7816d4SBarry Smith     sum14 = 0.0;
21142f7816d4SBarry Smith     sum15 = 0.0;
21152f7816d4SBarry Smith     sum16 = 0.0;
211626fbe8dcSKarl Rupp 
2117b3c51c66SMatthew Knepley     nonzerorow += (n>0);
21182f7816d4SBarry Smith     for (j=0; j<n; j++) {
21192f7816d4SBarry Smith       sum1  += v[jrow]*x[16*idx[jrow]];
21202f7816d4SBarry Smith       sum2  += v[jrow]*x[16*idx[jrow]+1];
21212f7816d4SBarry Smith       sum3  += v[jrow]*x[16*idx[jrow]+2];
21222f7816d4SBarry Smith       sum4  += v[jrow]*x[16*idx[jrow]+3];
21232f7816d4SBarry Smith       sum5  += v[jrow]*x[16*idx[jrow]+4];
21242f7816d4SBarry Smith       sum6  += v[jrow]*x[16*idx[jrow]+5];
21252f7816d4SBarry Smith       sum7  += v[jrow]*x[16*idx[jrow]+6];
21262f7816d4SBarry Smith       sum8  += v[jrow]*x[16*idx[jrow]+7];
21272f7816d4SBarry Smith       sum9  += v[jrow]*x[16*idx[jrow]+8];
21282f7816d4SBarry Smith       sum10 += v[jrow]*x[16*idx[jrow]+9];
21292f7816d4SBarry Smith       sum11 += v[jrow]*x[16*idx[jrow]+10];
21302f7816d4SBarry Smith       sum12 += v[jrow]*x[16*idx[jrow]+11];
21312f7816d4SBarry Smith       sum13 += v[jrow]*x[16*idx[jrow]+12];
21322f7816d4SBarry Smith       sum14 += v[jrow]*x[16*idx[jrow]+13];
21332f7816d4SBarry Smith       sum15 += v[jrow]*x[16*idx[jrow]+14];
21342f7816d4SBarry Smith       sum16 += v[jrow]*x[16*idx[jrow]+15];
21352f7816d4SBarry Smith       jrow++;
21362f7816d4SBarry Smith     }
21372f7816d4SBarry Smith     y[16*i]    = sum1;
21382f7816d4SBarry Smith     y[16*i+1]  = sum2;
21392f7816d4SBarry Smith     y[16*i+2]  = sum3;
21402f7816d4SBarry Smith     y[16*i+3]  = sum4;
21412f7816d4SBarry Smith     y[16*i+4]  = sum5;
21422f7816d4SBarry Smith     y[16*i+5]  = sum6;
21432f7816d4SBarry Smith     y[16*i+6]  = sum7;
21442f7816d4SBarry Smith     y[16*i+7]  = sum8;
21452f7816d4SBarry Smith     y[16*i+8]  = sum9;
21462f7816d4SBarry Smith     y[16*i+9]  = sum10;
21472f7816d4SBarry Smith     y[16*i+10] = sum11;
21482f7816d4SBarry Smith     y[16*i+11] = sum12;
21492f7816d4SBarry Smith     y[16*i+12] = sum13;
21502f7816d4SBarry Smith     y[16*i+13] = sum14;
21512f7816d4SBarry Smith     y[16*i+14] = sum15;
21522f7816d4SBarry Smith     y[16*i+15] = sum16;
21532f7816d4SBarry Smith   }
21542f7816d4SBarry Smith 
2155dc0b31edSSatish Balay   ierr = PetscLogFlops(32.0*a->nz - 16.0*nonzerorow);CHKERRQ(ierr);
21563649974fSBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
21571ebc52fbSHong Zhang   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
21582f7816d4SBarry Smith   PetscFunctionReturn(0);
21592f7816d4SBarry Smith }
21602f7816d4SBarry Smith 
2161dfbe8321SBarry Smith PetscErrorCode MatMultTranspose_SeqMAIJ_16(Mat A,Vec xx,Vec yy)
21622f7816d4SBarry Smith {
21632f7816d4SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
21642f7816d4SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
2165d2840e09SBarry Smith   const PetscScalar *x,*v;
2166d2840e09SBarry Smith   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8;
21672f7816d4SBarry Smith   PetscScalar       alpha9,alpha10,alpha11,alpha12,alpha13,alpha14,alpha15,alpha16;
2168dfbe8321SBarry Smith   PetscErrorCode    ierr;
2169d2840e09SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx;
2170d2840e09SBarry Smith   PetscInt          n,i;
21712f7816d4SBarry Smith 
21722f7816d4SBarry Smith   PetscFunctionBegin;
2173d2840e09SBarry Smith   ierr = VecSet(yy,0.0);CHKERRQ(ierr);
21743649974fSBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
21751ebc52fbSHong Zhang   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
2176bfec09a0SHong Zhang 
21772f7816d4SBarry Smith   for (i=0; i<m; i++) {
2178bfec09a0SHong Zhang     idx     = a->j + a->i[i];
2179bfec09a0SHong Zhang     v       = a->a + a->i[i];
21802f7816d4SBarry Smith     n       = a->i[i+1] - a->i[i];
21812f7816d4SBarry Smith     alpha1  = x[16*i];
21822f7816d4SBarry Smith     alpha2  = x[16*i+1];
21832f7816d4SBarry Smith     alpha3  = x[16*i+2];
21842f7816d4SBarry Smith     alpha4  = x[16*i+3];
21852f7816d4SBarry Smith     alpha5  = x[16*i+4];
21862f7816d4SBarry Smith     alpha6  = x[16*i+5];
21872f7816d4SBarry Smith     alpha7  = x[16*i+6];
21882f7816d4SBarry Smith     alpha8  = x[16*i+7];
21892f7816d4SBarry Smith     alpha9  = x[16*i+8];
21902f7816d4SBarry Smith     alpha10 = x[16*i+9];
21912f7816d4SBarry Smith     alpha11 = x[16*i+10];
21922f7816d4SBarry Smith     alpha12 = x[16*i+11];
21932f7816d4SBarry Smith     alpha13 = x[16*i+12];
21942f7816d4SBarry Smith     alpha14 = x[16*i+13];
21952f7816d4SBarry Smith     alpha15 = x[16*i+14];
21962f7816d4SBarry Smith     alpha16 = x[16*i+15];
21972f7816d4SBarry Smith     while (n-->0) {
21982f7816d4SBarry Smith       y[16*(*idx)]    += alpha1*(*v);
21992f7816d4SBarry Smith       y[16*(*idx)+1]  += alpha2*(*v);
22002f7816d4SBarry Smith       y[16*(*idx)+2]  += alpha3*(*v);
22012f7816d4SBarry Smith       y[16*(*idx)+3]  += alpha4*(*v);
22022f7816d4SBarry Smith       y[16*(*idx)+4]  += alpha5*(*v);
22032f7816d4SBarry Smith       y[16*(*idx)+5]  += alpha6*(*v);
22042f7816d4SBarry Smith       y[16*(*idx)+6]  += alpha7*(*v);
22052f7816d4SBarry Smith       y[16*(*idx)+7]  += alpha8*(*v);
22062f7816d4SBarry Smith       y[16*(*idx)+8]  += alpha9*(*v);
22072f7816d4SBarry Smith       y[16*(*idx)+9]  += alpha10*(*v);
22082f7816d4SBarry Smith       y[16*(*idx)+10] += alpha11*(*v);
22092f7816d4SBarry Smith       y[16*(*idx)+11] += alpha12*(*v);
22102f7816d4SBarry Smith       y[16*(*idx)+12] += alpha13*(*v);
22112f7816d4SBarry Smith       y[16*(*idx)+13] += alpha14*(*v);
22122f7816d4SBarry Smith       y[16*(*idx)+14] += alpha15*(*v);
22132f7816d4SBarry Smith       y[16*(*idx)+15] += alpha16*(*v);
22142f7816d4SBarry Smith       idx++; v++;
22152f7816d4SBarry Smith     }
22162f7816d4SBarry Smith   }
2217dc0b31edSSatish Balay   ierr = PetscLogFlops(32.0*a->nz);CHKERRQ(ierr);
22183649974fSBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
22191ebc52fbSHong Zhang   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
22202f7816d4SBarry Smith   PetscFunctionReturn(0);
22212f7816d4SBarry Smith }
22222f7816d4SBarry Smith 
2223dfbe8321SBarry Smith PetscErrorCode MatMultAdd_SeqMAIJ_16(Mat A,Vec xx,Vec yy,Vec zz)
22242f7816d4SBarry Smith {
22252f7816d4SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
22262f7816d4SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
2227d2840e09SBarry Smith   const PetscScalar *x,*v;
2228d2840e09SBarry Smith   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8;
22292f7816d4SBarry Smith   PetscScalar       sum9, sum10, sum11, sum12, sum13, sum14, sum15, sum16;
2230dfbe8321SBarry Smith   PetscErrorCode    ierr;
2231d2840e09SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
2232b24ad042SBarry Smith   PetscInt          n,i,jrow,j;
22332f7816d4SBarry Smith 
22342f7816d4SBarry Smith   PetscFunctionBegin;
22352f7816d4SBarry Smith   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
22363649974fSBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
22371ebc52fbSHong Zhang   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
22382f7816d4SBarry Smith   idx  = a->j;
22392f7816d4SBarry Smith   v    = a->a;
22402f7816d4SBarry Smith   ii   = a->i;
22412f7816d4SBarry Smith 
22422f7816d4SBarry Smith   for (i=0; i<m; i++) {
22432f7816d4SBarry Smith     jrow  = ii[i];
22442f7816d4SBarry Smith     n     = ii[i+1] - jrow;
22452f7816d4SBarry Smith     sum1  = 0.0;
22462f7816d4SBarry Smith     sum2  = 0.0;
22472f7816d4SBarry Smith     sum3  = 0.0;
22482f7816d4SBarry Smith     sum4  = 0.0;
22492f7816d4SBarry Smith     sum5  = 0.0;
22502f7816d4SBarry Smith     sum6  = 0.0;
22512f7816d4SBarry Smith     sum7  = 0.0;
22522f7816d4SBarry Smith     sum8  = 0.0;
22532f7816d4SBarry Smith     sum9  = 0.0;
22542f7816d4SBarry Smith     sum10 = 0.0;
22552f7816d4SBarry Smith     sum11 = 0.0;
22562f7816d4SBarry Smith     sum12 = 0.0;
22572f7816d4SBarry Smith     sum13 = 0.0;
22582f7816d4SBarry Smith     sum14 = 0.0;
22592f7816d4SBarry Smith     sum15 = 0.0;
22602f7816d4SBarry Smith     sum16 = 0.0;
22612f7816d4SBarry Smith     for (j=0; j<n; j++) {
22622f7816d4SBarry Smith       sum1  += v[jrow]*x[16*idx[jrow]];
22632f7816d4SBarry Smith       sum2  += v[jrow]*x[16*idx[jrow]+1];
22642f7816d4SBarry Smith       sum3  += v[jrow]*x[16*idx[jrow]+2];
22652f7816d4SBarry Smith       sum4  += v[jrow]*x[16*idx[jrow]+3];
22662f7816d4SBarry Smith       sum5  += v[jrow]*x[16*idx[jrow]+4];
22672f7816d4SBarry Smith       sum6  += v[jrow]*x[16*idx[jrow]+5];
22682f7816d4SBarry Smith       sum7  += v[jrow]*x[16*idx[jrow]+6];
22692f7816d4SBarry Smith       sum8  += v[jrow]*x[16*idx[jrow]+7];
22702f7816d4SBarry Smith       sum9  += v[jrow]*x[16*idx[jrow]+8];
22712f7816d4SBarry Smith       sum10 += v[jrow]*x[16*idx[jrow]+9];
22722f7816d4SBarry Smith       sum11 += v[jrow]*x[16*idx[jrow]+10];
22732f7816d4SBarry Smith       sum12 += v[jrow]*x[16*idx[jrow]+11];
22742f7816d4SBarry Smith       sum13 += v[jrow]*x[16*idx[jrow]+12];
22752f7816d4SBarry Smith       sum14 += v[jrow]*x[16*idx[jrow]+13];
22762f7816d4SBarry Smith       sum15 += v[jrow]*x[16*idx[jrow]+14];
22772f7816d4SBarry Smith       sum16 += v[jrow]*x[16*idx[jrow]+15];
22782f7816d4SBarry Smith       jrow++;
22792f7816d4SBarry Smith     }
22802f7816d4SBarry Smith     y[16*i]    += sum1;
22812f7816d4SBarry Smith     y[16*i+1]  += sum2;
22822f7816d4SBarry Smith     y[16*i+2]  += sum3;
22832f7816d4SBarry Smith     y[16*i+3]  += sum4;
22842f7816d4SBarry Smith     y[16*i+4]  += sum5;
22852f7816d4SBarry Smith     y[16*i+5]  += sum6;
22862f7816d4SBarry Smith     y[16*i+6]  += sum7;
22872f7816d4SBarry Smith     y[16*i+7]  += sum8;
22882f7816d4SBarry Smith     y[16*i+8]  += sum9;
22892f7816d4SBarry Smith     y[16*i+9]  += sum10;
22902f7816d4SBarry Smith     y[16*i+10] += sum11;
22912f7816d4SBarry Smith     y[16*i+11] += sum12;
22922f7816d4SBarry Smith     y[16*i+12] += sum13;
22932f7816d4SBarry Smith     y[16*i+13] += sum14;
22942f7816d4SBarry Smith     y[16*i+14] += sum15;
22952f7816d4SBarry Smith     y[16*i+15] += sum16;
22962f7816d4SBarry Smith   }
22972f7816d4SBarry Smith 
2298dc0b31edSSatish Balay   ierr = PetscLogFlops(32.0*a->nz);CHKERRQ(ierr);
22993649974fSBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
23001ebc52fbSHong Zhang   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
23012f7816d4SBarry Smith   PetscFunctionReturn(0);
23022f7816d4SBarry Smith }
23032f7816d4SBarry Smith 
2304dfbe8321SBarry Smith PetscErrorCode MatMultTransposeAdd_SeqMAIJ_16(Mat A,Vec xx,Vec yy,Vec zz)
23052f7816d4SBarry Smith {
23062f7816d4SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
23072f7816d4SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
2308d2840e09SBarry Smith   const PetscScalar *x,*v;
2309d2840e09SBarry Smith   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8;
23102f7816d4SBarry Smith   PetscScalar       alpha9,alpha10,alpha11,alpha12,alpha13,alpha14,alpha15,alpha16;
2311dfbe8321SBarry Smith   PetscErrorCode    ierr;
2312d2840e09SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx;
2313d2840e09SBarry Smith   PetscInt          n,i;
23142f7816d4SBarry Smith 
23152f7816d4SBarry Smith   PetscFunctionBegin;
23162f7816d4SBarry Smith   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
23173649974fSBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
23181ebc52fbSHong Zhang   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
23192f7816d4SBarry Smith   for (i=0; i<m; i++) {
2320bfec09a0SHong Zhang     idx     = a->j + a->i[i];
2321bfec09a0SHong Zhang     v       = a->a + a->i[i];
23222f7816d4SBarry Smith     n       = a->i[i+1] - a->i[i];
23232f7816d4SBarry Smith     alpha1  = x[16*i];
23242f7816d4SBarry Smith     alpha2  = x[16*i+1];
23252f7816d4SBarry Smith     alpha3  = x[16*i+2];
23262f7816d4SBarry Smith     alpha4  = x[16*i+3];
23272f7816d4SBarry Smith     alpha5  = x[16*i+4];
23282f7816d4SBarry Smith     alpha6  = x[16*i+5];
23292f7816d4SBarry Smith     alpha7  = x[16*i+6];
23302f7816d4SBarry Smith     alpha8  = x[16*i+7];
23312f7816d4SBarry Smith     alpha9  = x[16*i+8];
23322f7816d4SBarry Smith     alpha10 = x[16*i+9];
23332f7816d4SBarry Smith     alpha11 = x[16*i+10];
23342f7816d4SBarry Smith     alpha12 = x[16*i+11];
23352f7816d4SBarry Smith     alpha13 = x[16*i+12];
23362f7816d4SBarry Smith     alpha14 = x[16*i+13];
23372f7816d4SBarry Smith     alpha15 = x[16*i+14];
23382f7816d4SBarry Smith     alpha16 = x[16*i+15];
23392f7816d4SBarry Smith     while (n-->0) {
23402f7816d4SBarry Smith       y[16*(*idx)]    += alpha1*(*v);
23412f7816d4SBarry Smith       y[16*(*idx)+1]  += alpha2*(*v);
23422f7816d4SBarry Smith       y[16*(*idx)+2]  += alpha3*(*v);
23432f7816d4SBarry Smith       y[16*(*idx)+3]  += alpha4*(*v);
23442f7816d4SBarry Smith       y[16*(*idx)+4]  += alpha5*(*v);
23452f7816d4SBarry Smith       y[16*(*idx)+5]  += alpha6*(*v);
23462f7816d4SBarry Smith       y[16*(*idx)+6]  += alpha7*(*v);
23472f7816d4SBarry Smith       y[16*(*idx)+7]  += alpha8*(*v);
23482f7816d4SBarry Smith       y[16*(*idx)+8]  += alpha9*(*v);
23492f7816d4SBarry Smith       y[16*(*idx)+9]  += alpha10*(*v);
23502f7816d4SBarry Smith       y[16*(*idx)+10] += alpha11*(*v);
23512f7816d4SBarry Smith       y[16*(*idx)+11] += alpha12*(*v);
23522f7816d4SBarry Smith       y[16*(*idx)+12] += alpha13*(*v);
23532f7816d4SBarry Smith       y[16*(*idx)+13] += alpha14*(*v);
23542f7816d4SBarry Smith       y[16*(*idx)+14] += alpha15*(*v);
23552f7816d4SBarry Smith       y[16*(*idx)+15] += alpha16*(*v);
23562f7816d4SBarry Smith       idx++; v++;
23572f7816d4SBarry Smith     }
23582f7816d4SBarry Smith   }
2359dc0b31edSSatish Balay   ierr = PetscLogFlops(32.0*a->nz);CHKERRQ(ierr);
23603649974fSBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
23611ebc52fbSHong Zhang   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
236266ed3db0SBarry Smith   PetscFunctionReturn(0);
236366ed3db0SBarry Smith }
236466ed3db0SBarry Smith 
2365ed1c418cSBarry Smith /*--------------------------------------------------------------------------------------------*/
2366ed1c418cSBarry Smith PetscErrorCode MatMult_SeqMAIJ_18(Mat A,Vec xx,Vec yy)
2367ed1c418cSBarry Smith {
2368ed1c418cSBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
2369ed1c418cSBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
2370d2840e09SBarry Smith   const PetscScalar *x,*v;
2371d2840e09SBarry Smith   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8;
2372ed1c418cSBarry Smith   PetscScalar       sum9, sum10, sum11, sum12, sum13, sum14, sum15, sum16, sum17, sum18;
2373ed1c418cSBarry Smith   PetscErrorCode    ierr;
2374d2840e09SBarry Smith   const PetscInt    m         = b->AIJ->rmap->n,*idx,*ii;
2375d2840e09SBarry Smith   PetscInt          nonzerorow=0,n,i,jrow,j;
2376ed1c418cSBarry Smith 
2377ed1c418cSBarry Smith   PetscFunctionBegin;
23783649974fSBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
2379ed1c418cSBarry Smith   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
2380ed1c418cSBarry Smith   idx  = a->j;
2381ed1c418cSBarry Smith   v    = a->a;
2382ed1c418cSBarry Smith   ii   = a->i;
2383ed1c418cSBarry Smith 
2384ed1c418cSBarry Smith   for (i=0; i<m; i++) {
2385ed1c418cSBarry Smith     jrow  = ii[i];
2386ed1c418cSBarry Smith     n     = ii[i+1] - jrow;
2387ed1c418cSBarry Smith     sum1  = 0.0;
2388ed1c418cSBarry Smith     sum2  = 0.0;
2389ed1c418cSBarry Smith     sum3  = 0.0;
2390ed1c418cSBarry Smith     sum4  = 0.0;
2391ed1c418cSBarry Smith     sum5  = 0.0;
2392ed1c418cSBarry Smith     sum6  = 0.0;
2393ed1c418cSBarry Smith     sum7  = 0.0;
2394ed1c418cSBarry Smith     sum8  = 0.0;
2395ed1c418cSBarry Smith     sum9  = 0.0;
2396ed1c418cSBarry Smith     sum10 = 0.0;
2397ed1c418cSBarry Smith     sum11 = 0.0;
2398ed1c418cSBarry Smith     sum12 = 0.0;
2399ed1c418cSBarry Smith     sum13 = 0.0;
2400ed1c418cSBarry Smith     sum14 = 0.0;
2401ed1c418cSBarry Smith     sum15 = 0.0;
2402ed1c418cSBarry Smith     sum16 = 0.0;
2403ed1c418cSBarry Smith     sum17 = 0.0;
2404ed1c418cSBarry Smith     sum18 = 0.0;
240526fbe8dcSKarl Rupp 
2406ed1c418cSBarry Smith     nonzerorow += (n>0);
2407ed1c418cSBarry Smith     for (j=0; j<n; j++) {
2408ed1c418cSBarry Smith       sum1  += v[jrow]*x[18*idx[jrow]];
2409ed1c418cSBarry Smith       sum2  += v[jrow]*x[18*idx[jrow]+1];
2410ed1c418cSBarry Smith       sum3  += v[jrow]*x[18*idx[jrow]+2];
2411ed1c418cSBarry Smith       sum4  += v[jrow]*x[18*idx[jrow]+3];
2412ed1c418cSBarry Smith       sum5  += v[jrow]*x[18*idx[jrow]+4];
2413ed1c418cSBarry Smith       sum6  += v[jrow]*x[18*idx[jrow]+5];
2414ed1c418cSBarry Smith       sum7  += v[jrow]*x[18*idx[jrow]+6];
2415ed1c418cSBarry Smith       sum8  += v[jrow]*x[18*idx[jrow]+7];
2416ed1c418cSBarry Smith       sum9  += v[jrow]*x[18*idx[jrow]+8];
2417ed1c418cSBarry Smith       sum10 += v[jrow]*x[18*idx[jrow]+9];
2418ed1c418cSBarry Smith       sum11 += v[jrow]*x[18*idx[jrow]+10];
2419ed1c418cSBarry Smith       sum12 += v[jrow]*x[18*idx[jrow]+11];
2420ed1c418cSBarry Smith       sum13 += v[jrow]*x[18*idx[jrow]+12];
2421ed1c418cSBarry Smith       sum14 += v[jrow]*x[18*idx[jrow]+13];
2422ed1c418cSBarry Smith       sum15 += v[jrow]*x[18*idx[jrow]+14];
2423ed1c418cSBarry Smith       sum16 += v[jrow]*x[18*idx[jrow]+15];
2424ed1c418cSBarry Smith       sum17 += v[jrow]*x[18*idx[jrow]+16];
2425ed1c418cSBarry Smith       sum18 += v[jrow]*x[18*idx[jrow]+17];
2426ed1c418cSBarry Smith       jrow++;
2427ed1c418cSBarry Smith     }
2428ed1c418cSBarry Smith     y[18*i]    = sum1;
2429ed1c418cSBarry Smith     y[18*i+1]  = sum2;
2430ed1c418cSBarry Smith     y[18*i+2]  = sum3;
2431ed1c418cSBarry Smith     y[18*i+3]  = sum4;
2432ed1c418cSBarry Smith     y[18*i+4]  = sum5;
2433ed1c418cSBarry Smith     y[18*i+5]  = sum6;
2434ed1c418cSBarry Smith     y[18*i+6]  = sum7;
2435ed1c418cSBarry Smith     y[18*i+7]  = sum8;
2436ed1c418cSBarry Smith     y[18*i+8]  = sum9;
2437ed1c418cSBarry Smith     y[18*i+9]  = sum10;
2438ed1c418cSBarry Smith     y[18*i+10] = sum11;
2439ed1c418cSBarry Smith     y[18*i+11] = sum12;
2440ed1c418cSBarry Smith     y[18*i+12] = sum13;
2441ed1c418cSBarry Smith     y[18*i+13] = sum14;
2442ed1c418cSBarry Smith     y[18*i+14] = sum15;
2443ed1c418cSBarry Smith     y[18*i+15] = sum16;
2444ed1c418cSBarry Smith     y[18*i+16] = sum17;
2445ed1c418cSBarry Smith     y[18*i+17] = sum18;
2446ed1c418cSBarry Smith   }
2447ed1c418cSBarry Smith 
2448dc0b31edSSatish Balay   ierr = PetscLogFlops(36.0*a->nz - 18.0*nonzerorow);CHKERRQ(ierr);
24493649974fSBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
2450ed1c418cSBarry Smith   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
2451ed1c418cSBarry Smith   PetscFunctionReturn(0);
2452ed1c418cSBarry Smith }
2453ed1c418cSBarry Smith 
2454ed1c418cSBarry Smith PetscErrorCode MatMultTranspose_SeqMAIJ_18(Mat A,Vec xx,Vec yy)
2455ed1c418cSBarry Smith {
2456ed1c418cSBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
2457ed1c418cSBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
2458d2840e09SBarry Smith   const PetscScalar *x,*v;
2459d2840e09SBarry Smith   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8;
2460ed1c418cSBarry Smith   PetscScalar       alpha9,alpha10,alpha11,alpha12,alpha13,alpha14,alpha15,alpha16,alpha17,alpha18;
2461ed1c418cSBarry Smith   PetscErrorCode    ierr;
2462d2840e09SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx;
2463d2840e09SBarry Smith   PetscInt          n,i;
2464ed1c418cSBarry Smith 
2465ed1c418cSBarry Smith   PetscFunctionBegin;
2466d2840e09SBarry Smith   ierr = VecSet(yy,0.0);CHKERRQ(ierr);
24673649974fSBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
2468ed1c418cSBarry Smith   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
2469ed1c418cSBarry Smith 
2470ed1c418cSBarry Smith   for (i=0; i<m; i++) {
2471ed1c418cSBarry Smith     idx     = a->j + a->i[i];
2472ed1c418cSBarry Smith     v       = a->a + a->i[i];
2473ed1c418cSBarry Smith     n       = a->i[i+1] - a->i[i];
2474ed1c418cSBarry Smith     alpha1  = x[18*i];
2475ed1c418cSBarry Smith     alpha2  = x[18*i+1];
2476ed1c418cSBarry Smith     alpha3  = x[18*i+2];
2477ed1c418cSBarry Smith     alpha4  = x[18*i+3];
2478ed1c418cSBarry Smith     alpha5  = x[18*i+4];
2479ed1c418cSBarry Smith     alpha6  = x[18*i+5];
2480ed1c418cSBarry Smith     alpha7  = x[18*i+6];
2481ed1c418cSBarry Smith     alpha8  = x[18*i+7];
2482ed1c418cSBarry Smith     alpha9  = x[18*i+8];
2483ed1c418cSBarry Smith     alpha10 = x[18*i+9];
2484ed1c418cSBarry Smith     alpha11 = x[18*i+10];
2485ed1c418cSBarry Smith     alpha12 = x[18*i+11];
2486ed1c418cSBarry Smith     alpha13 = x[18*i+12];
2487ed1c418cSBarry Smith     alpha14 = x[18*i+13];
2488ed1c418cSBarry Smith     alpha15 = x[18*i+14];
2489ed1c418cSBarry Smith     alpha16 = x[18*i+15];
2490ed1c418cSBarry Smith     alpha17 = x[18*i+16];
2491ed1c418cSBarry Smith     alpha18 = x[18*i+17];
2492ed1c418cSBarry Smith     while (n-->0) {
2493ed1c418cSBarry Smith       y[18*(*idx)]    += alpha1*(*v);
2494ed1c418cSBarry Smith       y[18*(*idx)+1]  += alpha2*(*v);
2495ed1c418cSBarry Smith       y[18*(*idx)+2]  += alpha3*(*v);
2496ed1c418cSBarry Smith       y[18*(*idx)+3]  += alpha4*(*v);
2497ed1c418cSBarry Smith       y[18*(*idx)+4]  += alpha5*(*v);
2498ed1c418cSBarry Smith       y[18*(*idx)+5]  += alpha6*(*v);
2499ed1c418cSBarry Smith       y[18*(*idx)+6]  += alpha7*(*v);
2500ed1c418cSBarry Smith       y[18*(*idx)+7]  += alpha8*(*v);
2501ed1c418cSBarry Smith       y[18*(*idx)+8]  += alpha9*(*v);
2502ed1c418cSBarry Smith       y[18*(*idx)+9]  += alpha10*(*v);
2503ed1c418cSBarry Smith       y[18*(*idx)+10] += alpha11*(*v);
2504ed1c418cSBarry Smith       y[18*(*idx)+11] += alpha12*(*v);
2505ed1c418cSBarry Smith       y[18*(*idx)+12] += alpha13*(*v);
2506ed1c418cSBarry Smith       y[18*(*idx)+13] += alpha14*(*v);
2507ed1c418cSBarry Smith       y[18*(*idx)+14] += alpha15*(*v);
2508ed1c418cSBarry Smith       y[18*(*idx)+15] += alpha16*(*v);
2509ed1c418cSBarry Smith       y[18*(*idx)+16] += alpha17*(*v);
2510ed1c418cSBarry Smith       y[18*(*idx)+17] += alpha18*(*v);
2511ed1c418cSBarry Smith       idx++; v++;
2512ed1c418cSBarry Smith     }
2513ed1c418cSBarry Smith   }
2514dc0b31edSSatish Balay   ierr = PetscLogFlops(36.0*a->nz);CHKERRQ(ierr);
25153649974fSBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
2516ed1c418cSBarry Smith   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
2517ed1c418cSBarry Smith   PetscFunctionReturn(0);
2518ed1c418cSBarry Smith }
2519ed1c418cSBarry Smith 
2520ed1c418cSBarry Smith PetscErrorCode MatMultAdd_SeqMAIJ_18(Mat A,Vec xx,Vec yy,Vec zz)
2521ed1c418cSBarry Smith {
2522ed1c418cSBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
2523ed1c418cSBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
2524d2840e09SBarry Smith   const PetscScalar *x,*v;
2525d2840e09SBarry Smith   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8;
2526ed1c418cSBarry Smith   PetscScalar       sum9, sum10, sum11, sum12, sum13, sum14, sum15, sum16, sum17, sum18;
2527ed1c418cSBarry Smith   PetscErrorCode    ierr;
2528d2840e09SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
2529ed1c418cSBarry Smith   PetscInt          n,i,jrow,j;
2530ed1c418cSBarry Smith 
2531ed1c418cSBarry Smith   PetscFunctionBegin;
2532ed1c418cSBarry Smith   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
25333649974fSBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
2534ed1c418cSBarry Smith   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
2535ed1c418cSBarry Smith   idx  = a->j;
2536ed1c418cSBarry Smith   v    = a->a;
2537ed1c418cSBarry Smith   ii   = a->i;
2538ed1c418cSBarry Smith 
2539ed1c418cSBarry Smith   for (i=0; i<m; i++) {
2540ed1c418cSBarry Smith     jrow  = ii[i];
2541ed1c418cSBarry Smith     n     = ii[i+1] - jrow;
2542ed1c418cSBarry Smith     sum1  = 0.0;
2543ed1c418cSBarry Smith     sum2  = 0.0;
2544ed1c418cSBarry Smith     sum3  = 0.0;
2545ed1c418cSBarry Smith     sum4  = 0.0;
2546ed1c418cSBarry Smith     sum5  = 0.0;
2547ed1c418cSBarry Smith     sum6  = 0.0;
2548ed1c418cSBarry Smith     sum7  = 0.0;
2549ed1c418cSBarry Smith     sum8  = 0.0;
2550ed1c418cSBarry Smith     sum9  = 0.0;
2551ed1c418cSBarry Smith     sum10 = 0.0;
2552ed1c418cSBarry Smith     sum11 = 0.0;
2553ed1c418cSBarry Smith     sum12 = 0.0;
2554ed1c418cSBarry Smith     sum13 = 0.0;
2555ed1c418cSBarry Smith     sum14 = 0.0;
2556ed1c418cSBarry Smith     sum15 = 0.0;
2557ed1c418cSBarry Smith     sum16 = 0.0;
2558ed1c418cSBarry Smith     sum17 = 0.0;
2559ed1c418cSBarry Smith     sum18 = 0.0;
2560ed1c418cSBarry Smith     for (j=0; j<n; j++) {
2561ed1c418cSBarry Smith       sum1  += v[jrow]*x[18*idx[jrow]];
2562ed1c418cSBarry Smith       sum2  += v[jrow]*x[18*idx[jrow]+1];
2563ed1c418cSBarry Smith       sum3  += v[jrow]*x[18*idx[jrow]+2];
2564ed1c418cSBarry Smith       sum4  += v[jrow]*x[18*idx[jrow]+3];
2565ed1c418cSBarry Smith       sum5  += v[jrow]*x[18*idx[jrow]+4];
2566ed1c418cSBarry Smith       sum6  += v[jrow]*x[18*idx[jrow]+5];
2567ed1c418cSBarry Smith       sum7  += v[jrow]*x[18*idx[jrow]+6];
2568ed1c418cSBarry Smith       sum8  += v[jrow]*x[18*idx[jrow]+7];
2569ed1c418cSBarry Smith       sum9  += v[jrow]*x[18*idx[jrow]+8];
2570ed1c418cSBarry Smith       sum10 += v[jrow]*x[18*idx[jrow]+9];
2571ed1c418cSBarry Smith       sum11 += v[jrow]*x[18*idx[jrow]+10];
2572ed1c418cSBarry Smith       sum12 += v[jrow]*x[18*idx[jrow]+11];
2573ed1c418cSBarry Smith       sum13 += v[jrow]*x[18*idx[jrow]+12];
2574ed1c418cSBarry Smith       sum14 += v[jrow]*x[18*idx[jrow]+13];
2575ed1c418cSBarry Smith       sum15 += v[jrow]*x[18*idx[jrow]+14];
2576ed1c418cSBarry Smith       sum16 += v[jrow]*x[18*idx[jrow]+15];
2577ed1c418cSBarry Smith       sum17 += v[jrow]*x[18*idx[jrow]+16];
2578ed1c418cSBarry Smith       sum18 += v[jrow]*x[18*idx[jrow]+17];
2579ed1c418cSBarry Smith       jrow++;
2580ed1c418cSBarry Smith     }
2581ed1c418cSBarry Smith     y[18*i]    += sum1;
2582ed1c418cSBarry Smith     y[18*i+1]  += sum2;
2583ed1c418cSBarry Smith     y[18*i+2]  += sum3;
2584ed1c418cSBarry Smith     y[18*i+3]  += sum4;
2585ed1c418cSBarry Smith     y[18*i+4]  += sum5;
2586ed1c418cSBarry Smith     y[18*i+5]  += sum6;
2587ed1c418cSBarry Smith     y[18*i+6]  += sum7;
2588ed1c418cSBarry Smith     y[18*i+7]  += sum8;
2589ed1c418cSBarry Smith     y[18*i+8]  += sum9;
2590ed1c418cSBarry Smith     y[18*i+9]  += sum10;
2591ed1c418cSBarry Smith     y[18*i+10] += sum11;
2592ed1c418cSBarry Smith     y[18*i+11] += sum12;
2593ed1c418cSBarry Smith     y[18*i+12] += sum13;
2594ed1c418cSBarry Smith     y[18*i+13] += sum14;
2595ed1c418cSBarry Smith     y[18*i+14] += sum15;
2596ed1c418cSBarry Smith     y[18*i+15] += sum16;
2597ed1c418cSBarry Smith     y[18*i+16] += sum17;
2598ed1c418cSBarry Smith     y[18*i+17] += sum18;
2599ed1c418cSBarry Smith   }
2600ed1c418cSBarry Smith 
2601dc0b31edSSatish Balay   ierr = PetscLogFlops(36.0*a->nz);CHKERRQ(ierr);
26023649974fSBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
2603ed1c418cSBarry Smith   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
2604ed1c418cSBarry Smith   PetscFunctionReturn(0);
2605ed1c418cSBarry Smith }
2606ed1c418cSBarry Smith 
2607ed1c418cSBarry Smith PetscErrorCode MatMultTransposeAdd_SeqMAIJ_18(Mat A,Vec xx,Vec yy,Vec zz)
2608ed1c418cSBarry Smith {
2609ed1c418cSBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
2610ed1c418cSBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
2611d2840e09SBarry Smith   const PetscScalar *x,*v;
2612d2840e09SBarry Smith   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8;
2613ed1c418cSBarry Smith   PetscScalar       alpha9,alpha10,alpha11,alpha12,alpha13,alpha14,alpha15,alpha16,alpha17,alpha18;
2614ed1c418cSBarry Smith   PetscErrorCode    ierr;
2615d2840e09SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx;
2616d2840e09SBarry Smith   PetscInt          n,i;
2617ed1c418cSBarry Smith 
2618ed1c418cSBarry Smith   PetscFunctionBegin;
2619ed1c418cSBarry Smith   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
26203649974fSBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
2621ed1c418cSBarry Smith   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
2622ed1c418cSBarry Smith   for (i=0; i<m; i++) {
2623ed1c418cSBarry Smith     idx     = a->j + a->i[i];
2624ed1c418cSBarry Smith     v       = a->a + a->i[i];
2625ed1c418cSBarry Smith     n       = a->i[i+1] - a->i[i];
2626ed1c418cSBarry Smith     alpha1  = x[18*i];
2627ed1c418cSBarry Smith     alpha2  = x[18*i+1];
2628ed1c418cSBarry Smith     alpha3  = x[18*i+2];
2629ed1c418cSBarry Smith     alpha4  = x[18*i+3];
2630ed1c418cSBarry Smith     alpha5  = x[18*i+4];
2631ed1c418cSBarry Smith     alpha6  = x[18*i+5];
2632ed1c418cSBarry Smith     alpha7  = x[18*i+6];
2633ed1c418cSBarry Smith     alpha8  = x[18*i+7];
2634ed1c418cSBarry Smith     alpha9  = x[18*i+8];
2635ed1c418cSBarry Smith     alpha10 = x[18*i+9];
2636ed1c418cSBarry Smith     alpha11 = x[18*i+10];
2637ed1c418cSBarry Smith     alpha12 = x[18*i+11];
2638ed1c418cSBarry Smith     alpha13 = x[18*i+12];
2639ed1c418cSBarry Smith     alpha14 = x[18*i+13];
2640ed1c418cSBarry Smith     alpha15 = x[18*i+14];
2641ed1c418cSBarry Smith     alpha16 = x[18*i+15];
2642ed1c418cSBarry Smith     alpha17 = x[18*i+16];
2643ed1c418cSBarry Smith     alpha18 = x[18*i+17];
2644ed1c418cSBarry Smith     while (n-->0) {
2645ed1c418cSBarry Smith       y[18*(*idx)]    += alpha1*(*v);
2646ed1c418cSBarry Smith       y[18*(*idx)+1]  += alpha2*(*v);
2647ed1c418cSBarry Smith       y[18*(*idx)+2]  += alpha3*(*v);
2648ed1c418cSBarry Smith       y[18*(*idx)+3]  += alpha4*(*v);
2649ed1c418cSBarry Smith       y[18*(*idx)+4]  += alpha5*(*v);
2650ed1c418cSBarry Smith       y[18*(*idx)+5]  += alpha6*(*v);
2651ed1c418cSBarry Smith       y[18*(*idx)+6]  += alpha7*(*v);
2652ed1c418cSBarry Smith       y[18*(*idx)+7]  += alpha8*(*v);
2653ed1c418cSBarry Smith       y[18*(*idx)+8]  += alpha9*(*v);
2654ed1c418cSBarry Smith       y[18*(*idx)+9]  += alpha10*(*v);
2655ed1c418cSBarry Smith       y[18*(*idx)+10] += alpha11*(*v);
2656ed1c418cSBarry Smith       y[18*(*idx)+11] += alpha12*(*v);
2657ed1c418cSBarry Smith       y[18*(*idx)+12] += alpha13*(*v);
2658ed1c418cSBarry Smith       y[18*(*idx)+13] += alpha14*(*v);
2659ed1c418cSBarry Smith       y[18*(*idx)+14] += alpha15*(*v);
2660ed1c418cSBarry Smith       y[18*(*idx)+15] += alpha16*(*v);
2661ed1c418cSBarry Smith       y[18*(*idx)+16] += alpha17*(*v);
2662ed1c418cSBarry Smith       y[18*(*idx)+17] += alpha18*(*v);
2663ed1c418cSBarry Smith       idx++; v++;
2664ed1c418cSBarry Smith     }
2665ed1c418cSBarry Smith   }
2666dc0b31edSSatish Balay   ierr = PetscLogFlops(36.0*a->nz);CHKERRQ(ierr);
26673649974fSBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
2668ed1c418cSBarry Smith   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
2669ed1c418cSBarry Smith   PetscFunctionReturn(0);
2670ed1c418cSBarry Smith }
2671ed1c418cSBarry Smith 
26726861f193SBarry Smith PetscErrorCode MatMult_SeqMAIJ_N(Mat A,Vec xx,Vec yy)
26736861f193SBarry Smith {
26746861f193SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
26756861f193SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
26766861f193SBarry Smith   const PetscScalar *x,*v;
26776861f193SBarry Smith   PetscScalar       *y,*sums;
26786861f193SBarry Smith   PetscErrorCode    ierr;
26796861f193SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
26806861f193SBarry Smith   PetscInt          n,i,jrow,j,dof = b->dof,k;
26816861f193SBarry Smith 
26826861f193SBarry Smith   PetscFunctionBegin;
26833649974fSBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
26846861f193SBarry Smith   ierr = VecSet(yy,0.0);CHKERRQ(ierr);
26856861f193SBarry Smith   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
26866861f193SBarry Smith   idx  = a->j;
26876861f193SBarry Smith   v    = a->a;
26886861f193SBarry Smith   ii   = a->i;
26896861f193SBarry Smith 
26906861f193SBarry Smith   for (i=0; i<m; i++) {
26916861f193SBarry Smith     jrow = ii[i];
26926861f193SBarry Smith     n    = ii[i+1] - jrow;
26936861f193SBarry Smith     sums = y + dof*i;
26946861f193SBarry Smith     for (j=0; j<n; j++) {
26956861f193SBarry Smith       for (k=0; k<dof; k++) {
26966861f193SBarry Smith         sums[k] += v[jrow]*x[dof*idx[jrow]+k];
26976861f193SBarry Smith       }
26986861f193SBarry Smith       jrow++;
26996861f193SBarry Smith     }
27006861f193SBarry Smith   }
27016861f193SBarry Smith 
27026861f193SBarry Smith   ierr = PetscLogFlops(2.0*dof*a->nz);CHKERRQ(ierr);
27033649974fSBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
27046861f193SBarry Smith   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
27056861f193SBarry Smith   PetscFunctionReturn(0);
27066861f193SBarry Smith }
27076861f193SBarry Smith 
27086861f193SBarry Smith PetscErrorCode MatMultAdd_SeqMAIJ_N(Mat A,Vec xx,Vec yy,Vec zz)
27096861f193SBarry Smith {
27106861f193SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
27116861f193SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
27126861f193SBarry Smith   const PetscScalar *x,*v;
27136861f193SBarry Smith   PetscScalar       *y,*sums;
27146861f193SBarry Smith   PetscErrorCode    ierr;
27156861f193SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
27166861f193SBarry Smith   PetscInt          n,i,jrow,j,dof = b->dof,k;
27176861f193SBarry Smith 
27186861f193SBarry Smith   PetscFunctionBegin;
27196861f193SBarry Smith   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
27203649974fSBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
27216861f193SBarry Smith   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
27226861f193SBarry Smith   idx  = a->j;
27236861f193SBarry Smith   v    = a->a;
27246861f193SBarry Smith   ii   = a->i;
27256861f193SBarry Smith 
27266861f193SBarry Smith   for (i=0; i<m; i++) {
27276861f193SBarry Smith     jrow = ii[i];
27286861f193SBarry Smith     n    = ii[i+1] - jrow;
27296861f193SBarry Smith     sums = y + dof*i;
27306861f193SBarry Smith     for (j=0; j<n; j++) {
27316861f193SBarry Smith       for (k=0; k<dof; k++) {
27326861f193SBarry Smith         sums[k] += v[jrow]*x[dof*idx[jrow]+k];
27336861f193SBarry Smith       }
27346861f193SBarry Smith       jrow++;
27356861f193SBarry Smith     }
27366861f193SBarry Smith   }
27376861f193SBarry Smith 
27386861f193SBarry Smith   ierr = PetscLogFlops(2.0*dof*a->nz);CHKERRQ(ierr);
27393649974fSBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
27406861f193SBarry Smith   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
27416861f193SBarry Smith   PetscFunctionReturn(0);
27426861f193SBarry Smith }
27436861f193SBarry Smith 
27446861f193SBarry Smith PetscErrorCode MatMultTranspose_SeqMAIJ_N(Mat A,Vec xx,Vec yy)
27456861f193SBarry Smith {
27466861f193SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
27476861f193SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
27486861f193SBarry Smith   const PetscScalar *x,*v,*alpha;
27496861f193SBarry Smith   PetscScalar       *y;
27506861f193SBarry Smith   PetscErrorCode    ierr;
27516861f193SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx,dof = b->dof;
27526861f193SBarry Smith   PetscInt          n,i,k;
27536861f193SBarry Smith 
27546861f193SBarry Smith   PetscFunctionBegin;
27553649974fSBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
27566861f193SBarry Smith   ierr = VecSet(yy,0.0);CHKERRQ(ierr);
27576861f193SBarry Smith   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
27586861f193SBarry Smith   for (i=0; i<m; i++) {
27596861f193SBarry Smith     idx   = a->j + a->i[i];
27606861f193SBarry Smith     v     = a->a + a->i[i];
27616861f193SBarry Smith     n     = a->i[i+1] - a->i[i];
27626861f193SBarry Smith     alpha = x + dof*i;
27636861f193SBarry Smith     while (n-->0) {
27646861f193SBarry Smith       for (k=0; k<dof; k++) {
27656861f193SBarry Smith         y[dof*(*idx)+k] += alpha[k]*(*v);
27666861f193SBarry Smith       }
276783ce7366SBarry Smith       idx++; v++;
27686861f193SBarry Smith     }
27696861f193SBarry Smith   }
27706861f193SBarry Smith   ierr = PetscLogFlops(2.0*dof*a->nz);CHKERRQ(ierr);
27713649974fSBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
27726861f193SBarry Smith   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
27736861f193SBarry Smith   PetscFunctionReturn(0);
27746861f193SBarry Smith }
27756861f193SBarry Smith 
27766861f193SBarry Smith PetscErrorCode MatMultTransposeAdd_SeqMAIJ_N(Mat A,Vec xx,Vec yy,Vec zz)
27776861f193SBarry Smith {
27786861f193SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
27796861f193SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
27806861f193SBarry Smith   const PetscScalar *x,*v,*alpha;
27816861f193SBarry Smith   PetscScalar       *y;
27826861f193SBarry Smith   PetscErrorCode    ierr;
27836861f193SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx,dof = b->dof;
27846861f193SBarry Smith   PetscInt          n,i,k;
27856861f193SBarry Smith 
27866861f193SBarry Smith   PetscFunctionBegin;
27876861f193SBarry Smith   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
27883649974fSBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
27896861f193SBarry Smith   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
27906861f193SBarry Smith   for (i=0; i<m; i++) {
27916861f193SBarry Smith     idx   = a->j + a->i[i];
27926861f193SBarry Smith     v     = a->a + a->i[i];
27936861f193SBarry Smith     n     = a->i[i+1] - a->i[i];
27946861f193SBarry Smith     alpha = x + dof*i;
27956861f193SBarry Smith     while (n-->0) {
27966861f193SBarry Smith       for (k=0; k<dof; k++) {
27976861f193SBarry Smith         y[dof*(*idx)+k] += alpha[k]*(*v);
27986861f193SBarry Smith       }
279983ce7366SBarry Smith       idx++; v++;
28006861f193SBarry Smith     }
28016861f193SBarry Smith   }
28026861f193SBarry Smith   ierr = PetscLogFlops(2.0*dof*a->nz);CHKERRQ(ierr);
28033649974fSBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
28046861f193SBarry Smith   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
28056861f193SBarry Smith   PetscFunctionReturn(0);
28066861f193SBarry Smith }
28076861f193SBarry Smith 
2808f4a53059SBarry Smith /*===================================================================================*/
2809dfbe8321SBarry Smith PetscErrorCode MatMult_MPIMAIJ_dof(Mat A,Vec xx,Vec yy)
2810f4a53059SBarry Smith {
28114cb9d9b8SBarry Smith   Mat_MPIMAIJ    *b = (Mat_MPIMAIJ*)A->data;
2812dfbe8321SBarry Smith   PetscErrorCode ierr;
2813f4a53059SBarry Smith 
2814b24ad042SBarry Smith   PetscFunctionBegin;
2815f4a53059SBarry Smith   /* start the scatter */
2816ca9f406cSSatish Balay   ierr = VecScatterBegin(b->ctx,xx,b->w,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
28174cb9d9b8SBarry Smith   ierr = (*b->AIJ->ops->mult)(b->AIJ,xx,yy);CHKERRQ(ierr);
2818ca9f406cSSatish Balay   ierr = VecScatterEnd(b->ctx,xx,b->w,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
28194cb9d9b8SBarry Smith   ierr = (*b->OAIJ->ops->multadd)(b->OAIJ,b->w,yy,yy);CHKERRQ(ierr);
2820f4a53059SBarry Smith   PetscFunctionReturn(0);
2821f4a53059SBarry Smith }
2822f4a53059SBarry Smith 
2823dfbe8321SBarry Smith PetscErrorCode MatMultTranspose_MPIMAIJ_dof(Mat A,Vec xx,Vec yy)
28244cb9d9b8SBarry Smith {
28254cb9d9b8SBarry Smith   Mat_MPIMAIJ    *b = (Mat_MPIMAIJ*)A->data;
2826dfbe8321SBarry Smith   PetscErrorCode ierr;
2827b24ad042SBarry Smith 
28284cb9d9b8SBarry Smith   PetscFunctionBegin;
28294cb9d9b8SBarry Smith   ierr = (*b->OAIJ->ops->multtranspose)(b->OAIJ,xx,b->w);CHKERRQ(ierr);
28304cb9d9b8SBarry Smith   ierr = (*b->AIJ->ops->multtranspose)(b->AIJ,xx,yy);CHKERRQ(ierr);
2831ca9f406cSSatish Balay   ierr = VecScatterBegin(b->ctx,b->w,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2832ca9f406cSSatish Balay   ierr = VecScatterEnd(b->ctx,b->w,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
28334cb9d9b8SBarry Smith   PetscFunctionReturn(0);
28344cb9d9b8SBarry Smith }
28354cb9d9b8SBarry Smith 
2836dfbe8321SBarry Smith PetscErrorCode MatMultAdd_MPIMAIJ_dof(Mat A,Vec xx,Vec yy,Vec zz)
28374cb9d9b8SBarry Smith {
28384cb9d9b8SBarry Smith   Mat_MPIMAIJ    *b = (Mat_MPIMAIJ*)A->data;
2839dfbe8321SBarry Smith   PetscErrorCode ierr;
28404cb9d9b8SBarry Smith 
2841b24ad042SBarry Smith   PetscFunctionBegin;
28424cb9d9b8SBarry Smith   /* start the scatter */
2843ca9f406cSSatish Balay   ierr = VecScatterBegin(b->ctx,xx,b->w,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2844d72c5c08SBarry Smith   ierr = (*b->AIJ->ops->multadd)(b->AIJ,xx,yy,zz);CHKERRQ(ierr);
2845ca9f406cSSatish Balay   ierr = VecScatterEnd(b->ctx,xx,b->w,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2846717f2ec8SHong Zhang   ierr = (*b->OAIJ->ops->multadd)(b->OAIJ,b->w,zz,zz);CHKERRQ(ierr);
28474cb9d9b8SBarry Smith   PetscFunctionReturn(0);
28484cb9d9b8SBarry Smith }
28494cb9d9b8SBarry Smith 
2850dfbe8321SBarry Smith PetscErrorCode MatMultTransposeAdd_MPIMAIJ_dof(Mat A,Vec xx,Vec yy,Vec zz)
28514cb9d9b8SBarry Smith {
28524cb9d9b8SBarry Smith   Mat_MPIMAIJ    *b = (Mat_MPIMAIJ*)A->data;
2853dfbe8321SBarry Smith   PetscErrorCode ierr;
2854b24ad042SBarry Smith 
28554cb9d9b8SBarry Smith   PetscFunctionBegin;
28564cb9d9b8SBarry Smith   ierr = (*b->OAIJ->ops->multtranspose)(b->OAIJ,xx,b->w);CHKERRQ(ierr);
2857ca9f406cSSatish Balay   ierr = VecScatterBegin(b->ctx,b->w,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2858d72c5c08SBarry Smith   ierr = (*b->AIJ->ops->multtransposeadd)(b->AIJ,xx,yy,zz);CHKERRQ(ierr);
2859ca9f406cSSatish Balay   ierr = VecScatterEnd(b->ctx,b->w,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
28604cb9d9b8SBarry Smith   PetscFunctionReturn(0);
28614cb9d9b8SBarry Smith }
28624cb9d9b8SBarry Smith 
286395ddefa2SHong Zhang /* ----------------------------------------------------------------*/
28647ba1a0bfSKris Buschelman PetscErrorCode MatPtAPSymbolic_SeqAIJ_SeqMAIJ(Mat A,Mat PP,PetscReal fill,Mat *C)
28657ba1a0bfSKris Buschelman {
28667ba1a0bfSKris Buschelman   PetscErrorCode     ierr;
28670298fd71SBarry Smith   PetscFreeSpaceList free_space=NULL,current_space=NULL;
28687ba1a0bfSKris Buschelman   Mat_SeqMAIJ        *pp       =(Mat_SeqMAIJ*)PP->data;
28697ba1a0bfSKris Buschelman   Mat                P         =pp->AIJ;
28707ba1a0bfSKris Buschelman   Mat_SeqAIJ         *a        =(Mat_SeqAIJ*)A->data,*p=(Mat_SeqAIJ*)P->data,*c;
2871d2840e09SBarry Smith   PetscInt           *pti,*ptj,*ptJ;
28727ba1a0bfSKris Buschelman   PetscInt           *ci,*cj,*ptadenserow,*ptasparserow,*denserow,*sparserow,*ptaj;
2873d2840e09SBarry Smith   const PetscInt     an=A->cmap->N,am=A->rmap->N,pn=P->cmap->N,pm=P->rmap->N,ppdof=pp->dof;
2874d2840e09SBarry Smith   PetscInt           i,j,k,dof,pshift,ptnzi,arow,anzj,ptanzi,prow,pnzj,cnzi,cn;
28757ba1a0bfSKris Buschelman   MatScalar          *ca;
2876d2840e09SBarry Smith   const PetscInt     *pi = p->i,*pj = p->j,*pjj,*ai=a->i,*aj=a->j,*ajj;
28777ba1a0bfSKris Buschelman 
28787ba1a0bfSKris Buschelman   PetscFunctionBegin;
28797ba1a0bfSKris Buschelman   /* Get ij structure of P^T */
28807ba1a0bfSKris Buschelman   ierr = MatGetSymbolicTranspose_SeqAIJ(P,&pti,&ptj);CHKERRQ(ierr);
28817ba1a0bfSKris Buschelman 
28827ba1a0bfSKris Buschelman   cn = pn*ppdof;
28837ba1a0bfSKris Buschelman   /* Allocate ci array, arrays for fill computation and */
28847ba1a0bfSKris Buschelman   /* free space for accumulating nonzero column info */
2885854ce69bSBarry Smith   ierr  = PetscMalloc1(cn+1,&ci);CHKERRQ(ierr);
28867ba1a0bfSKris Buschelman   ci[0] = 0;
28877ba1a0bfSKris Buschelman 
28887ba1a0bfSKris Buschelman   /* Work arrays for rows of P^T*A */
2889dcca6d9dSJed Brown   ierr = PetscMalloc4(an,&ptadenserow,an,&ptasparserow,cn,&denserow,cn,&sparserow);CHKERRQ(ierr);
2890c43dabc9SBarry Smith   ierr = PetscMemzero(ptadenserow,an*sizeof(PetscInt));CHKERRQ(ierr);
2891c43dabc9SBarry Smith   ierr = PetscMemzero(denserow,cn*sizeof(PetscInt));CHKERRQ(ierr);
28927ba1a0bfSKris Buschelman 
28937ba1a0bfSKris Buschelman   /* Set initial free space to be nnz(A) scaled by aspect ratio of P. */
28947ba1a0bfSKris Buschelman   /* This should be reasonable if sparsity of PtAP is similar to that of A. */
28957ba1a0bfSKris Buschelman   /* Note, aspect ratio of P is the same as the aspect ratio of SeqAIJ inside P */
2896f91af8c7SBarry Smith   ierr          = PetscFreeSpaceGet(PetscIntMultTruncate(ai[am]/pm,pn),&free_space);CHKERRQ(ierr);
28977ba1a0bfSKris Buschelman   current_space = free_space;
28987ba1a0bfSKris Buschelman 
28997ba1a0bfSKris Buschelman   /* Determine symbolic info for each row of C: */
29007ba1a0bfSKris Buschelman   for (i=0; i<pn; i++) {
29017ba1a0bfSKris Buschelman     ptnzi = pti[i+1] - pti[i];
29027ba1a0bfSKris Buschelman     ptJ   = ptj + pti[i];
29037ba1a0bfSKris Buschelman     for (dof=0; dof<ppdof; dof++) {
29047ba1a0bfSKris Buschelman       ptanzi = 0;
29057ba1a0bfSKris Buschelman       /* Determine symbolic row of PtA: */
29067ba1a0bfSKris Buschelman       for (j=0; j<ptnzi; j++) {
29077ba1a0bfSKris Buschelman         /* Expand ptJ[j] by block size and shift by dof to get the right row of A */
29087ba1a0bfSKris Buschelman         arow = ptJ[j]*ppdof + dof;
29097ba1a0bfSKris Buschelman         /* Nonzeros of P^T*A will be in same locations as any element of A in that row */
29107ba1a0bfSKris Buschelman         anzj = ai[arow+1] - ai[arow];
29117ba1a0bfSKris Buschelman         ajj  = aj + ai[arow];
29127ba1a0bfSKris Buschelman         for (k=0; k<anzj; k++) {
29137ba1a0bfSKris Buschelman           if (!ptadenserow[ajj[k]]) {
29147ba1a0bfSKris Buschelman             ptadenserow[ajj[k]]    = -1;
29157ba1a0bfSKris Buschelman             ptasparserow[ptanzi++] = ajj[k];
29167ba1a0bfSKris Buschelman           }
29177ba1a0bfSKris Buschelman         }
29187ba1a0bfSKris Buschelman       }
29197ba1a0bfSKris Buschelman       /* Using symbolic info for row of PtA, determine symbolic info for row of C: */
29207ba1a0bfSKris Buschelman       ptaj = ptasparserow;
29217ba1a0bfSKris Buschelman       cnzi = 0;
29227ba1a0bfSKris Buschelman       for (j=0; j<ptanzi; j++) {
29237ba1a0bfSKris Buschelman         /* Get offset within block of P */
29247ba1a0bfSKris Buschelman         pshift = *ptaj%ppdof;
29257ba1a0bfSKris Buschelman         /* Get block row of P */
29267ba1a0bfSKris Buschelman         prow = (*ptaj++)/ppdof; /* integer division */
29277ba1a0bfSKris Buschelman         /* P has same number of nonzeros per row as the compressed form */
29287ba1a0bfSKris Buschelman         pnzj = pi[prow+1] - pi[prow];
29297ba1a0bfSKris Buschelman         pjj  = pj + pi[prow];
29307ba1a0bfSKris Buschelman         for (k=0;k<pnzj;k++) {
29317ba1a0bfSKris Buschelman           /* Locations in C are shifted by the offset within the block */
29327ba1a0bfSKris Buschelman           /* Note: we cannot use PetscLLAdd here because of the additional offset for the write location */
29337ba1a0bfSKris Buschelman           if (!denserow[pjj[k]*ppdof+pshift]) {
29347ba1a0bfSKris Buschelman             denserow[pjj[k]*ppdof+pshift] = -1;
29357ba1a0bfSKris Buschelman             sparserow[cnzi++]             = pjj[k]*ppdof+pshift;
29367ba1a0bfSKris Buschelman           }
29377ba1a0bfSKris Buschelman         }
29387ba1a0bfSKris Buschelman       }
29397ba1a0bfSKris Buschelman 
29407ba1a0bfSKris Buschelman       /* sort sparserow */
29417ba1a0bfSKris Buschelman       ierr = PetscSortInt(cnzi,sparserow);CHKERRQ(ierr);
29427ba1a0bfSKris Buschelman 
29437ba1a0bfSKris Buschelman       /* If free space is not available, make more free space */
29447ba1a0bfSKris Buschelman       /* Double the amount of total space in the list */
29457ba1a0bfSKris Buschelman       if (current_space->local_remaining<cnzi) {
2946f91af8c7SBarry Smith         ierr = PetscFreeSpaceGet(PetscIntSumTruncate(cnzi,current_space->total_array_size),&current_space);CHKERRQ(ierr);
29477ba1a0bfSKris Buschelman       }
29487ba1a0bfSKris Buschelman 
29497ba1a0bfSKris Buschelman       /* Copy data into free space, and zero out denserows */
29507ba1a0bfSKris Buschelman       ierr = PetscMemcpy(current_space->array,sparserow,cnzi*sizeof(PetscInt));CHKERRQ(ierr);
295126fbe8dcSKarl Rupp 
29527ba1a0bfSKris Buschelman       current_space->array           += cnzi;
29537ba1a0bfSKris Buschelman       current_space->local_used      += cnzi;
29547ba1a0bfSKris Buschelman       current_space->local_remaining -= cnzi;
29557ba1a0bfSKris Buschelman 
295626fbe8dcSKarl Rupp       for (j=0; j<ptanzi; j++) ptadenserow[ptasparserow[j]] = 0;
295726fbe8dcSKarl Rupp       for (j=0; j<cnzi; j++) denserow[sparserow[j]] = 0;
295826fbe8dcSKarl Rupp 
29597ba1a0bfSKris Buschelman       /* Aside: Perhaps we should save the pta info for the numerical factorization. */
29607ba1a0bfSKris Buschelman       /*        For now, we will recompute what is needed. */
29617ba1a0bfSKris Buschelman       ci[i*ppdof+1+dof] = ci[i*ppdof+dof] + cnzi;
29627ba1a0bfSKris Buschelman     }
29637ba1a0bfSKris Buschelman   }
29647ba1a0bfSKris Buschelman   /* nnz is now stored in ci[ptm], column indices are in the list of free space */
29657ba1a0bfSKris Buschelman   /* Allocate space for cj, initialize cj, and */
29667ba1a0bfSKris Buschelman   /* destroy list of free space and other temporary array(s) */
2967854ce69bSBarry Smith   ierr = PetscMalloc1(ci[cn]+1,&cj);CHKERRQ(ierr);
2968a1a86e44SBarry Smith   ierr = PetscFreeSpaceContiguous(&free_space,cj);CHKERRQ(ierr);
296974ed9c26SBarry Smith   ierr = PetscFree4(ptadenserow,ptasparserow,denserow,sparserow);CHKERRQ(ierr);
29707ba1a0bfSKris Buschelman 
29717ba1a0bfSKris Buschelman   /* Allocate space for ca */
2972854ce69bSBarry Smith   ierr = PetscCalloc1(ci[cn]+1,&ca);CHKERRQ(ierr);
29737ba1a0bfSKris Buschelman 
29747ba1a0bfSKris Buschelman   /* put together the new matrix */
2975ce94432eSBarry Smith   ierr = MatCreateSeqAIJWithArrays(PetscObjectComm((PetscObject)A),cn,cn,ci,cj,ca,C);CHKERRQ(ierr);
2976f27682edSJed Brown   ierr = MatSetBlockSize(*C,pp->dof);CHKERRQ(ierr);
29777ba1a0bfSKris Buschelman 
29787ba1a0bfSKris Buschelman   /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
29797ba1a0bfSKris Buschelman   /* Since these are PETSc arrays, change flags to free them as necessary. */
29807ba1a0bfSKris Buschelman   c          = (Mat_SeqAIJ*)((*C)->data);
2981e6b907acSBarry Smith   c->free_a  = PETSC_TRUE;
2982e6b907acSBarry Smith   c->free_ij = PETSC_TRUE;
29837ba1a0bfSKris Buschelman   c->nonew   = 0;
298426fbe8dcSKarl Rupp 
2985d76021d8SHong Zhang   (*C)->ops->ptapnumeric = MatPtAPNumeric_SeqAIJ_SeqMAIJ;
29867ba1a0bfSKris Buschelman 
29877ba1a0bfSKris Buschelman   /* Clean up. */
29887ba1a0bfSKris Buschelman   ierr = MatRestoreSymbolicTranspose_SeqAIJ(P,&pti,&ptj);CHKERRQ(ierr);
29897ba1a0bfSKris Buschelman   PetscFunctionReturn(0);
29907ba1a0bfSKris Buschelman }
29917ba1a0bfSKris Buschelman 
29927ba1a0bfSKris Buschelman PetscErrorCode MatPtAPNumeric_SeqAIJ_SeqMAIJ(Mat A,Mat PP,Mat C)
29937ba1a0bfSKris Buschelman {
29947ba1a0bfSKris Buschelman   /* This routine requires testing -- first draft only */
29957ba1a0bfSKris Buschelman   PetscErrorCode  ierr;
29967ba1a0bfSKris Buschelman   Mat_SeqMAIJ     *pp=(Mat_SeqMAIJ*)PP->data;
29977ba1a0bfSKris Buschelman   Mat             P  =pp->AIJ;
29987ba1a0bfSKris Buschelman   Mat_SeqAIJ      *a = (Mat_SeqAIJ*) A->data;
29997ba1a0bfSKris Buschelman   Mat_SeqAIJ      *p = (Mat_SeqAIJ*) P->data;
30007ba1a0bfSKris Buschelman   Mat_SeqAIJ      *c = (Mat_SeqAIJ*) C->data;
3001a2ea699eSBarry Smith   const PetscInt  *ai=a->i,*aj=a->j,*pi=p->i,*pj=p->j,*pJ,*pjj;
3002d2840e09SBarry Smith   const PetscInt  *ci=c->i,*cj=c->j,*cjj;
3003d2840e09SBarry Smith   const PetscInt  am =A->rmap->N,cn=C->cmap->N,cm=C->rmap->N,ppdof=pp->dof;
3004d2840e09SBarry Smith   PetscInt        i,j,k,pshift,poffset,anzi,pnzi,apnzj,nextap,pnzj,prow,crow,*apj,*apjdense;
3005a2ea699eSBarry Smith   const MatScalar *aa=a->a,*pa=p->a,*pA,*paj;
3006d2840e09SBarry Smith   MatScalar       *ca=c->a,*caj,*apa;
30077ba1a0bfSKris Buschelman 
30087ba1a0bfSKris Buschelman   PetscFunctionBegin;
30097ba1a0bfSKris Buschelman   /* Allocate temporary array for storage of one row of A*P */
30101795a4d1SJed Brown   ierr = PetscCalloc3(cn,&apa,cn,&apj,cn,&apjdense);CHKERRQ(ierr);
30117ba1a0bfSKris Buschelman 
30127ba1a0bfSKris Buschelman   /* Clear old values in C */
30137ba1a0bfSKris Buschelman   ierr = PetscMemzero(ca,ci[cm]*sizeof(MatScalar));CHKERRQ(ierr);
30147ba1a0bfSKris Buschelman 
30157ba1a0bfSKris Buschelman   for (i=0; i<am; i++) {
30167ba1a0bfSKris Buschelman     /* Form sparse row of A*P */
30177ba1a0bfSKris Buschelman     anzi  = ai[i+1] - ai[i];
30187ba1a0bfSKris Buschelman     apnzj = 0;
30197ba1a0bfSKris Buschelman     for (j=0; j<anzi; j++) {
30207ba1a0bfSKris Buschelman       /* Get offset within block of P */
30217ba1a0bfSKris Buschelman       pshift = *aj%ppdof;
30227ba1a0bfSKris Buschelman       /* Get block row of P */
30237ba1a0bfSKris Buschelman       prow = *aj++/ppdof;   /* integer division */
30247ba1a0bfSKris Buschelman       pnzj = pi[prow+1] - pi[prow];
30257ba1a0bfSKris Buschelman       pjj  = pj + pi[prow];
30267ba1a0bfSKris Buschelman       paj  = pa + pi[prow];
30277ba1a0bfSKris Buschelman       for (k=0; k<pnzj; k++) {
30287ba1a0bfSKris Buschelman         poffset = pjj[k]*ppdof+pshift;
30297ba1a0bfSKris Buschelman         if (!apjdense[poffset]) {
30307ba1a0bfSKris Buschelman           apjdense[poffset] = -1;
30317ba1a0bfSKris Buschelman           apj[apnzj++]      = poffset;
30327ba1a0bfSKris Buschelman         }
30337ba1a0bfSKris Buschelman         apa[poffset] += (*aa)*paj[k];
30347ba1a0bfSKris Buschelman       }
3035dc0b31edSSatish Balay       ierr = PetscLogFlops(2.0*pnzj);CHKERRQ(ierr);
30367ba1a0bfSKris Buschelman       aa++;
30377ba1a0bfSKris Buschelman     }
30387ba1a0bfSKris Buschelman 
30397ba1a0bfSKris Buschelman     /* Sort the j index array for quick sparse axpy. */
30407ba1a0bfSKris Buschelman     /* Note: a array does not need sorting as it is in dense storage locations. */
30417ba1a0bfSKris Buschelman     ierr = PetscSortInt(apnzj,apj);CHKERRQ(ierr);
30427ba1a0bfSKris Buschelman 
30437ba1a0bfSKris Buschelman     /* Compute P^T*A*P using outer product (P^T)[:,j]*(A*P)[j,:]. */
30447ba1a0bfSKris Buschelman     prow    = i/ppdof; /* integer division */
30457ba1a0bfSKris Buschelman     pshift  = i%ppdof;
30467ba1a0bfSKris Buschelman     poffset = pi[prow];
30477ba1a0bfSKris Buschelman     pnzi    = pi[prow+1] - poffset;
30487ba1a0bfSKris Buschelman     /* Reset pJ and pA so we can traverse the same row of P 'dof' times. */
30497ba1a0bfSKris Buschelman     pJ = pj+poffset;
30507ba1a0bfSKris Buschelman     pA = pa+poffset;
30517ba1a0bfSKris Buschelman     for (j=0; j<pnzi; j++) {
30527ba1a0bfSKris Buschelman       crow = (*pJ)*ppdof+pshift;
30537ba1a0bfSKris Buschelman       cjj  = cj + ci[crow];
30547ba1a0bfSKris Buschelman       caj  = ca + ci[crow];
30557ba1a0bfSKris Buschelman       pJ++;
30567ba1a0bfSKris Buschelman       /* Perform sparse axpy operation.  Note cjj includes apj. */
30577ba1a0bfSKris Buschelman       for (k=0,nextap=0; nextap<apnzj; k++) {
305826fbe8dcSKarl Rupp         if (cjj[k] == apj[nextap]) caj[k] += (*pA)*apa[apj[nextap++]];
30597ba1a0bfSKris Buschelman       }
3060dc0b31edSSatish Balay       ierr = PetscLogFlops(2.0*apnzj);CHKERRQ(ierr);
30617ba1a0bfSKris Buschelman       pA++;
30627ba1a0bfSKris Buschelman     }
30637ba1a0bfSKris Buschelman 
30647ba1a0bfSKris Buschelman     /* Zero the current row info for A*P */
30657ba1a0bfSKris Buschelman     for (j=0; j<apnzj; j++) {
30667ba1a0bfSKris Buschelman       apa[apj[j]]      = 0.;
30677ba1a0bfSKris Buschelman       apjdense[apj[j]] = 0;
30687ba1a0bfSKris Buschelman     }
30697ba1a0bfSKris Buschelman   }
30707ba1a0bfSKris Buschelman 
30717ba1a0bfSKris Buschelman   /* Assemble the final matrix and clean up */
30727ba1a0bfSKris Buschelman   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
30737ba1a0bfSKris Buschelman   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
307474ed9c26SBarry Smith   ierr = PetscFree3(apa,apj,apjdense);CHKERRQ(ierr);
307595ddefa2SHong Zhang   PetscFunctionReturn(0);
307695ddefa2SHong Zhang }
30777ba1a0bfSKris Buschelman 
3078150d2497SBarry Smith PETSC_INTERN PetscErrorCode MatPtAP_SeqAIJ_SeqMAIJ(Mat A,Mat P,MatReuse scall,PetscReal fill,Mat *C)
30792121bac1SHong Zhang {
30802121bac1SHong Zhang   PetscErrorCode ierr;
30812121bac1SHong Zhang 
30822121bac1SHong Zhang   PetscFunctionBegin;
30832121bac1SHong Zhang   if (scall == MAT_INITIAL_MATRIX) {
30843ff4c91cSHong Zhang     ierr = PetscLogEventBegin(MAT_PtAPSymbolic,A,P,0,0);CHKERRQ(ierr);
30852121bac1SHong Zhang     ierr = MatPtAPSymbolic_SeqAIJ_SeqMAIJ(A,P,fill,C);CHKERRQ(ierr);
30863ff4c91cSHong Zhang     ierr = PetscLogEventEnd(MAT_PtAPSymbolic,A,P,0,0);CHKERRQ(ierr);
30872121bac1SHong Zhang   }
30883ff4c91cSHong Zhang   ierr = PetscLogEventBegin(MAT_PtAPNumeric,A,P,0,0);CHKERRQ(ierr);
30892121bac1SHong Zhang   ierr = MatPtAPNumeric_SeqAIJ_SeqMAIJ(A,P,*C);CHKERRQ(ierr);
30903ff4c91cSHong Zhang   ierr = PetscLogEventEnd(MAT_PtAPNumeric,A,P,0,0);CHKERRQ(ierr);
30912121bac1SHong Zhang   PetscFunctionReturn(0);
30922121bac1SHong Zhang }
30932121bac1SHong Zhang 
3094f7a08781SBarry Smith PetscErrorCode MatPtAPSymbolic_MPIAIJ_MPIMAIJ(Mat A,Mat PP,PetscReal fill,Mat *C)
309595ddefa2SHong Zhang {
309695ddefa2SHong Zhang   PetscErrorCode ierr;
309795ddefa2SHong Zhang 
309895ddefa2SHong Zhang   PetscFunctionBegin;
309995ddefa2SHong Zhang   /* MatPtAPSymbolic_MPIAIJ_MPIMAIJ() is not implemented yet. Convert PP to mpiaij format */
3100511c6705SHong Zhang   ierr = MatConvert(PP,MATMPIAIJ,MAT_INPLACE_MATRIX,&PP);CHKERRQ(ierr);
310195ddefa2SHong Zhang   ierr =(*PP->ops->ptapsymbolic)(A,PP,fill,C);CHKERRQ(ierr);
310295ddefa2SHong Zhang   PetscFunctionReturn(0);
310395ddefa2SHong Zhang }
310495ddefa2SHong Zhang 
3105f7a08781SBarry Smith PetscErrorCode MatPtAPNumeric_MPIAIJ_MPIMAIJ(Mat A,Mat PP,Mat C)
310695ddefa2SHong Zhang {
310795ddefa2SHong Zhang   PetscFunctionBegin;
3108e32f2f54SBarry Smith   SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatPtAPNumeric is not implemented for MPIMAIJ matrix yet");
31097ba1a0bfSKris Buschelman   PetscFunctionReturn(0);
31107ba1a0bfSKris Buschelman }
31117ba1a0bfSKris Buschelman 
3112150d2497SBarry Smith PETSC_INTERN PetscErrorCode MatPtAP_MPIAIJ_MPIMAIJ(Mat A,Mat P,MatReuse scall,PetscReal fill,Mat *C)
31139e03dfbbSHong Zhang {
31149e03dfbbSHong Zhang   PetscErrorCode ierr;
31159e03dfbbSHong Zhang 
31169e03dfbbSHong Zhang   PetscFunctionBegin;
31179e03dfbbSHong Zhang   if (scall == MAT_INITIAL_MATRIX) {
31183ff4c91cSHong Zhang     ierr = PetscLogEventBegin(MAT_PtAPSymbolic,A,P,0,0);CHKERRQ(ierr);
31199e03dfbbSHong Zhang     ierr = MatPtAPSymbolic_MPIAIJ_MPIMAIJ(A,P,fill,C);CHKERRQ(ierr);
31203ff4c91cSHong Zhang     ierr = PetscLogEventEnd(MAT_PtAPSymbolic,A,P,0,0);CHKERRQ(ierr);
31219e03dfbbSHong Zhang   }
31223ff4c91cSHong Zhang   ierr = PetscLogEventBegin(MAT_PtAPNumeric,A,P,0,0);CHKERRQ(ierr);
31239e03dfbbSHong Zhang   ierr = ((*C)->ops->ptapnumeric)(A,P,*C);CHKERRQ(ierr);
31243ff4c91cSHong Zhang   ierr = PetscLogEventEnd(MAT_PtAPNumeric,A,P,0,0);CHKERRQ(ierr);
31259e03dfbbSHong Zhang   PetscFunctionReturn(0);
31269e03dfbbSHong Zhang }
31279e03dfbbSHong Zhang 
3128cc2e6a90SBarry Smith PETSC_INTERN PetscErrorCode MatConvert_SeqMAIJ_SeqAIJ(Mat A, MatType newtype,MatReuse reuse,Mat *newmat)
31299c4fc4c7SBarry Smith {
31309c4fc4c7SBarry Smith   Mat_SeqMAIJ    *b   = (Mat_SeqMAIJ*)A->data;
3131ceb03754SKris Buschelman   Mat            a    = b->AIJ,B;
31329c4fc4c7SBarry Smith   Mat_SeqAIJ     *aij = (Mat_SeqAIJ*)a->data;
31339c4fc4c7SBarry Smith   PetscErrorCode ierr;
31340fd73130SBarry Smith   PetscInt       m,n,i,ncols,*ilen,nmax = 0,*icols,j,k,ii,dof = b->dof;
3135ba8c8a56SBarry Smith   PetscInt       *cols;
3136ba8c8a56SBarry Smith   PetscScalar    *vals;
31379c4fc4c7SBarry Smith 
31389c4fc4c7SBarry Smith   PetscFunctionBegin;
31399c4fc4c7SBarry Smith   ierr = MatGetSize(a,&m,&n);CHKERRQ(ierr);
3140785e854fSJed Brown   ierr = PetscMalloc1(dof*m,&ilen);CHKERRQ(ierr);
31419c4fc4c7SBarry Smith   for (i=0; i<m; i++) {
31429c4fc4c7SBarry Smith     nmax = PetscMax(nmax,aij->ilen[i]);
314326fbe8dcSKarl Rupp     for (j=0; j<dof; j++) ilen[dof*i+j] = aij->ilen[i];
31449c4fc4c7SBarry Smith   }
3145ceb03754SKris Buschelman   ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,dof*m,dof*n,0,ilen,&B);CHKERRQ(ierr);
31469c4fc4c7SBarry Smith   ierr = PetscFree(ilen);CHKERRQ(ierr);
3147785e854fSJed Brown   ierr = PetscMalloc1(nmax,&icols);CHKERRQ(ierr);
31489c4fc4c7SBarry Smith   ii   = 0;
31499c4fc4c7SBarry Smith   for (i=0; i<m; i++) {
3150ba8c8a56SBarry Smith     ierr = MatGetRow_SeqAIJ(a,i,&ncols,&cols,&vals);CHKERRQ(ierr);
31510fd73130SBarry Smith     for (j=0; j<dof; j++) {
315226fbe8dcSKarl Rupp       for (k=0; k<ncols; k++) icols[k] = dof*cols[k]+j;
3153ceb03754SKris Buschelman       ierr = MatSetValues_SeqAIJ(B,1,&ii,ncols,icols,vals,INSERT_VALUES);CHKERRQ(ierr);
31549c4fc4c7SBarry Smith       ii++;
31559c4fc4c7SBarry Smith     }
3156ba8c8a56SBarry Smith     ierr = MatRestoreRow_SeqAIJ(a,i,&ncols,&cols,&vals);CHKERRQ(ierr);
31579c4fc4c7SBarry Smith   }
31589c4fc4c7SBarry Smith   ierr = PetscFree(icols);CHKERRQ(ierr);
3159ceb03754SKris Buschelman   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3160ceb03754SKris Buschelman   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3161ceb03754SKris Buschelman 
3162511c6705SHong Zhang   if (reuse == MAT_INPLACE_MATRIX) {
316328be2f97SBarry Smith     ierr = MatHeaderReplace(A,&B);CHKERRQ(ierr);
3164c3d102feSKris Buschelman   } else {
3165ceb03754SKris Buschelman     *newmat = B;
3166c3d102feSKris Buschelman   }
31679c4fc4c7SBarry Smith   PetscFunctionReturn(0);
31689c4fc4c7SBarry Smith }
31699c4fc4c7SBarry Smith 
3170c6db04a5SJed Brown #include <../src/mat/impls/aij/mpi/mpiaij.h>
3171be1d678aSKris Buschelman 
3172cc2e6a90SBarry Smith PETSC_INTERN PetscErrorCode MatConvert_MPIMAIJ_MPIAIJ(Mat A, MatType newtype,MatReuse reuse,Mat *newmat)
31730fd73130SBarry Smith {
31740fd73130SBarry Smith   Mat_MPIMAIJ    *maij   = (Mat_MPIMAIJ*)A->data;
3175ceb03754SKris Buschelman   Mat            MatAIJ  = ((Mat_SeqMAIJ*)maij->AIJ->data)->AIJ,B;
31760fd73130SBarry Smith   Mat            MatOAIJ = ((Mat_SeqMAIJ*)maij->OAIJ->data)->AIJ;
31770fd73130SBarry Smith   Mat_SeqAIJ     *AIJ    = (Mat_SeqAIJ*) MatAIJ->data;
31780fd73130SBarry Smith   Mat_SeqAIJ     *OAIJ   =(Mat_SeqAIJ*) MatOAIJ->data;
31790fd73130SBarry Smith   Mat_MPIAIJ     *mpiaij = (Mat_MPIAIJ*) maij->A->data;
31800298fd71SBarry Smith   PetscInt       dof     = maij->dof,i,j,*dnz = NULL,*onz = NULL,nmax = 0,onmax = 0;
31810298fd71SBarry Smith   PetscInt       *oicols = NULL,*icols = NULL,ncols,*cols = NULL,oncols,*ocols = NULL;
31820fd73130SBarry Smith   PetscInt       rstart,cstart,*garray,ii,k;
31830fd73130SBarry Smith   PetscErrorCode ierr;
31840fd73130SBarry Smith   PetscScalar    *vals,*ovals;
31850fd73130SBarry Smith 
31860fd73130SBarry Smith   PetscFunctionBegin;
3187dcca6d9dSJed Brown   ierr = PetscMalloc2(A->rmap->n,&dnz,A->rmap->n,&onz);CHKERRQ(ierr);
3188d0f46423SBarry Smith   for (i=0; i<A->rmap->n/dof; i++) {
31890fd73130SBarry Smith     nmax  = PetscMax(nmax,AIJ->ilen[i]);
31900fd73130SBarry Smith     onmax = PetscMax(onmax,OAIJ->ilen[i]);
31910fd73130SBarry Smith     for (j=0; j<dof; j++) {
31920fd73130SBarry Smith       dnz[dof*i+j] = AIJ->ilen[i];
31930fd73130SBarry Smith       onz[dof*i+j] = OAIJ->ilen[i];
31940fd73130SBarry Smith     }
31950fd73130SBarry Smith   }
3196ce94432eSBarry Smith   ierr = MatCreateAIJ(PetscObjectComm((PetscObject)A),A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N,0,dnz,0,onz,&B);CHKERRQ(ierr);
3197f27682edSJed Brown   ierr = MatSetBlockSize(B,dof);CHKERRQ(ierr);
31980fd73130SBarry Smith   ierr = PetscFree2(dnz,onz);CHKERRQ(ierr);
31990fd73130SBarry Smith 
3200dcca6d9dSJed Brown   ierr   = PetscMalloc2(nmax,&icols,onmax,&oicols);CHKERRQ(ierr);
3201d0f46423SBarry Smith   rstart = dof*maij->A->rmap->rstart;
3202d0f46423SBarry Smith   cstart = dof*maij->A->cmap->rstart;
32030fd73130SBarry Smith   garray = mpiaij->garray;
32040fd73130SBarry Smith 
32050fd73130SBarry Smith   ii = rstart;
3206d0f46423SBarry Smith   for (i=0; i<A->rmap->n/dof; i++) {
32070fd73130SBarry Smith     ierr = MatGetRow_SeqAIJ(MatAIJ,i,&ncols,&cols,&vals);CHKERRQ(ierr);
32080fd73130SBarry Smith     ierr = MatGetRow_SeqAIJ(MatOAIJ,i,&oncols,&ocols,&ovals);CHKERRQ(ierr);
32090fd73130SBarry Smith     for (j=0; j<dof; j++) {
32100fd73130SBarry Smith       for (k=0; k<ncols; k++) {
32110fd73130SBarry Smith         icols[k] = cstart + dof*cols[k]+j;
32120fd73130SBarry Smith       }
32130fd73130SBarry Smith       for (k=0; k<oncols; k++) {
32140fd73130SBarry Smith         oicols[k] = dof*garray[ocols[k]]+j;
32150fd73130SBarry Smith       }
3216ceb03754SKris Buschelman       ierr = MatSetValues_MPIAIJ(B,1,&ii,ncols,icols,vals,INSERT_VALUES);CHKERRQ(ierr);
3217ceb03754SKris Buschelman       ierr = MatSetValues_MPIAIJ(B,1,&ii,oncols,oicols,ovals,INSERT_VALUES);CHKERRQ(ierr);
32180fd73130SBarry Smith       ii++;
32190fd73130SBarry Smith     }
32200fd73130SBarry Smith     ierr = MatRestoreRow_SeqAIJ(MatAIJ,i,&ncols,&cols,&vals);CHKERRQ(ierr);
32210fd73130SBarry Smith     ierr = MatRestoreRow_SeqAIJ(MatOAIJ,i,&oncols,&ocols,&ovals);CHKERRQ(ierr);
32220fd73130SBarry Smith   }
32230fd73130SBarry Smith   ierr = PetscFree2(icols,oicols);CHKERRQ(ierr);
32240fd73130SBarry Smith 
3225ceb03754SKris Buschelman   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3226ceb03754SKris Buschelman   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3227ceb03754SKris Buschelman 
3228511c6705SHong Zhang   if (reuse == MAT_INPLACE_MATRIX) {
32297adad957SLisandro Dalcin     PetscInt refct = ((PetscObject)A)->refct; /* save ((PetscObject)A)->refct */
32307adad957SLisandro Dalcin     ((PetscObject)A)->refct = 1;
323126fbe8dcSKarl Rupp 
323228be2f97SBarry Smith     ierr = MatHeaderReplace(A,&B);CHKERRQ(ierr);
323326fbe8dcSKarl Rupp 
32347adad957SLisandro Dalcin     ((PetscObject)A)->refct = refct; /* restore ((PetscObject)A)->refct */
3235c3d102feSKris Buschelman   } else {
3236ceb03754SKris Buschelman     *newmat = B;
3237c3d102feSKris Buschelman   }
32380fd73130SBarry Smith   PetscFunctionReturn(0);
32390fd73130SBarry Smith }
32400fd73130SBarry Smith 
32417dae84e0SHong Zhang PetscErrorCode MatCreateSubMatrix_MAIJ(Mat mat,IS isrow,IS iscol,MatReuse cll,Mat *newmat)
32429e516c8fSBarry Smith {
32439e516c8fSBarry Smith   PetscErrorCode ierr;
32449e516c8fSBarry Smith   Mat            A;
32459e516c8fSBarry Smith 
32469e516c8fSBarry Smith   PetscFunctionBegin;
32479e516c8fSBarry Smith   ierr = MatConvert(mat,MATAIJ,MAT_INITIAL_MATRIX,&A);CHKERRQ(ierr);
32487dae84e0SHong Zhang   ierr = MatCreateSubMatrix(A,isrow,iscol,cll,newmat);CHKERRQ(ierr);
32499e516c8fSBarry Smith   ierr = MatDestroy(&A);CHKERRQ(ierr);
32509e516c8fSBarry Smith   PetscFunctionReturn(0);
32519e516c8fSBarry Smith }
32520fd73130SBarry Smith 
3253bcc973b7SBarry Smith /* ---------------------------------------------------------------------------------- */
3254480dffcfSJed Brown /*@
32550bad9183SKris Buschelman   MatCreateMAIJ - Creates a matrix type providing restriction and interpolation
32560bad9183SKris Buschelman   operations for multicomponent problems.  It interpolates each component the same
32570bad9183SKris Buschelman   way independently.  The matrix type is based on MATSEQAIJ for sequential matrices,
32580bad9183SKris Buschelman   and MATMPIAIJ for distributed matrices.
32590bad9183SKris Buschelman 
3260ff585edeSJed Brown   Collective
3261ff585edeSJed Brown 
3262ff585edeSJed Brown   Input Parameters:
3263ff585edeSJed Brown + A - the AIJ matrix describing the action on blocks
3264ff585edeSJed Brown - dof - the block size (number of components per node)
3265ff585edeSJed Brown 
3266ff585edeSJed Brown   Output Parameter:
3267ff585edeSJed Brown . maij - the new MAIJ matrix
3268ff585edeSJed Brown 
32690bad9183SKris Buschelman   Operations provided:
32700fd73130SBarry Smith + MatMult
32710bad9183SKris Buschelman . MatMultTranspose
32720bad9183SKris Buschelman . MatMultAdd
32730bad9183SKris Buschelman . MatMultTransposeAdd
32740fd73130SBarry Smith - MatView
32750bad9183SKris Buschelman 
32760bad9183SKris Buschelman   Level: advanced
32770bad9183SKris Buschelman 
3278b409243cSBarry Smith .seealso: MatMAIJGetAIJ(), MatMAIJRedimension(), MATMAIJ
3279ff585edeSJed Brown @*/
32807087cfbeSBarry Smith PetscErrorCode  MatCreateMAIJ(Mat A,PetscInt dof,Mat *maij)
328182b1193eSBarry Smith {
3282dfbe8321SBarry Smith   PetscErrorCode ierr;
3283b24ad042SBarry Smith   PetscMPIInt    size;
3284b24ad042SBarry Smith   PetscInt       n;
328582b1193eSBarry Smith   Mat            B;
328682b1193eSBarry Smith 
328782b1193eSBarry Smith   PetscFunctionBegin;
3288d72c5c08SBarry Smith   ierr = PetscObjectReference((PetscObject)A);CHKERRQ(ierr);
3289d72c5c08SBarry Smith 
329026fbe8dcSKarl Rupp   if (dof == 1) *maij = A;
329126fbe8dcSKarl Rupp   else {
3292ce94432eSBarry Smith     ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr);
3293d0f46423SBarry Smith     ierr = MatSetSizes(B,dof*A->rmap->n,dof*A->cmap->n,dof*A->rmap->N,dof*A->cmap->N);CHKERRQ(ierr);
3294bccba955SJed Brown     ierr = PetscLayoutSetBlockSize(B->rmap,dof);CHKERRQ(ierr);
3295bccba955SJed Brown     ierr = PetscLayoutSetBlockSize(B->cmap,dof);CHKERRQ(ierr);
3296bccba955SJed Brown     ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
3297bccba955SJed Brown     ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
329826fbe8dcSKarl Rupp 
3299cd3bbe55SBarry Smith     B->assembled = PETSC_TRUE;
3300d72c5c08SBarry Smith 
3301ce94432eSBarry Smith     ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRQ(ierr);
3302f4a53059SBarry Smith     if (size == 1) {
3303feb9fe23SJed Brown       Mat_SeqMAIJ *b;
3304feb9fe23SJed Brown 
3305b9b97703SBarry Smith       ierr = MatSetType(B,MATSEQMAIJ);CHKERRQ(ierr);
330626fbe8dcSKarl Rupp 
33070298fd71SBarry Smith       B->ops->setup   = NULL;
33084cb9d9b8SBarry Smith       B->ops->destroy = MatDestroy_SeqMAIJ;
33090fd73130SBarry Smith       B->ops->view    = MatView_SeqMAIJ;
3310feb9fe23SJed Brown       b               = (Mat_SeqMAIJ*)B->data;
3311b9b97703SBarry Smith       b->dof          = dof;
33124cb9d9b8SBarry Smith       b->AIJ          = A;
331326fbe8dcSKarl Rupp 
3314d72c5c08SBarry Smith       if (dof == 2) {
3315cd3bbe55SBarry Smith         B->ops->mult             = MatMult_SeqMAIJ_2;
3316cd3bbe55SBarry Smith         B->ops->multadd          = MatMultAdd_SeqMAIJ_2;
3317cd3bbe55SBarry Smith         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_2;
3318cd3bbe55SBarry Smith         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_2;
3319bcc973b7SBarry Smith       } else if (dof == 3) {
3320bcc973b7SBarry Smith         B->ops->mult             = MatMult_SeqMAIJ_3;
3321bcc973b7SBarry Smith         B->ops->multadd          = MatMultAdd_SeqMAIJ_3;
3322bcc973b7SBarry Smith         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_3;
3323bcc973b7SBarry Smith         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_3;
3324bcc973b7SBarry Smith       } else if (dof == 4) {
3325bcc973b7SBarry Smith         B->ops->mult             = MatMult_SeqMAIJ_4;
3326bcc973b7SBarry Smith         B->ops->multadd          = MatMultAdd_SeqMAIJ_4;
3327bcc973b7SBarry Smith         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_4;
3328bcc973b7SBarry Smith         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_4;
3329f9fae5adSBarry Smith       } else if (dof == 5) {
3330f9fae5adSBarry Smith         B->ops->mult             = MatMult_SeqMAIJ_5;
3331f9fae5adSBarry Smith         B->ops->multadd          = MatMultAdd_SeqMAIJ_5;
3332f9fae5adSBarry Smith         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_5;
3333f9fae5adSBarry Smith         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_5;
33346cd98798SBarry Smith       } else if (dof == 6) {
33356cd98798SBarry Smith         B->ops->mult             = MatMult_SeqMAIJ_6;
33366cd98798SBarry Smith         B->ops->multadd          = MatMultAdd_SeqMAIJ_6;
33376cd98798SBarry Smith         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_6;
33386cd98798SBarry Smith         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_6;
3339ed8eea03SBarry Smith       } else if (dof == 7) {
3340ed8eea03SBarry Smith         B->ops->mult             = MatMult_SeqMAIJ_7;
3341ed8eea03SBarry Smith         B->ops->multadd          = MatMultAdd_SeqMAIJ_7;
3342ed8eea03SBarry Smith         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_7;
3343ed8eea03SBarry Smith         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_7;
334466ed3db0SBarry Smith       } else if (dof == 8) {
334566ed3db0SBarry Smith         B->ops->mult             = MatMult_SeqMAIJ_8;
334666ed3db0SBarry Smith         B->ops->multadd          = MatMultAdd_SeqMAIJ_8;
334766ed3db0SBarry Smith         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_8;
334866ed3db0SBarry Smith         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_8;
33492b692628SMatthew Knepley       } else if (dof == 9) {
33502b692628SMatthew Knepley         B->ops->mult             = MatMult_SeqMAIJ_9;
33512b692628SMatthew Knepley         B->ops->multadd          = MatMultAdd_SeqMAIJ_9;
33522b692628SMatthew Knepley         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_9;
33532b692628SMatthew Knepley         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_9;
3354b9cda22cSBarry Smith       } else if (dof == 10) {
3355b9cda22cSBarry Smith         B->ops->mult             = MatMult_SeqMAIJ_10;
3356b9cda22cSBarry Smith         B->ops->multadd          = MatMultAdd_SeqMAIJ_10;
3357b9cda22cSBarry Smith         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_10;
3358b9cda22cSBarry Smith         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_10;
3359dbdd7285SBarry Smith       } else if (dof == 11) {
3360dbdd7285SBarry Smith         B->ops->mult             = MatMult_SeqMAIJ_11;
3361dbdd7285SBarry Smith         B->ops->multadd          = MatMultAdd_SeqMAIJ_11;
3362dbdd7285SBarry Smith         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_11;
3363dbdd7285SBarry Smith         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_11;
33642f7816d4SBarry Smith       } else if (dof == 16) {
33652f7816d4SBarry Smith         B->ops->mult             = MatMult_SeqMAIJ_16;
33662f7816d4SBarry Smith         B->ops->multadd          = MatMultAdd_SeqMAIJ_16;
33672f7816d4SBarry Smith         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_16;
33682f7816d4SBarry Smith         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_16;
3369ed1c418cSBarry Smith       } else if (dof == 18) {
3370ed1c418cSBarry Smith         B->ops->mult             = MatMult_SeqMAIJ_18;
3371ed1c418cSBarry Smith         B->ops->multadd          = MatMultAdd_SeqMAIJ_18;
3372ed1c418cSBarry Smith         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_18;
3373ed1c418cSBarry Smith         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_18;
337482b1193eSBarry Smith       } else {
33756861f193SBarry Smith         B->ops->mult             = MatMult_SeqMAIJ_N;
33766861f193SBarry Smith         B->ops->multadd          = MatMultAdd_SeqMAIJ_N;
33776861f193SBarry Smith         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_N;
33786861f193SBarry Smith         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_N;
337982b1193eSBarry Smith       }
3380bdf89e91SBarry Smith       ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqmaij_seqaij_C",MatConvert_SeqMAIJ_SeqAIJ);CHKERRQ(ierr);
3381bdf89e91SBarry Smith       ierr = PetscObjectComposeFunction((PetscObject)B,"MatPtAP_seqaij_seqmaij_C",MatPtAP_SeqAIJ_SeqMAIJ);CHKERRQ(ierr);
3382*59e29146SRichard Tran Mills       ierr = PetscObjectComposeFunction((PetscObject)B,"MatPtAP_seqaijperm_seqmaij_C",MatPtAP_SeqAIJ_SeqMAIJ);CHKERRQ(ierr);
3383279e4bd4SRichard Tran Mills       ierr = PetscObjectComposeFunction((PetscObject)B,"MatPtAP_seqaijmkl_seqmaij_C",MatPtAP_SeqAIJ_SeqMAIJ);CHKERRQ(ierr);
3384f4a53059SBarry Smith     } else {
3385f4a53059SBarry Smith       Mat_MPIAIJ  *mpiaij = (Mat_MPIAIJ*)A->data;
3386feb9fe23SJed Brown       Mat_MPIMAIJ *b;
3387f4a53059SBarry Smith       IS          from,to;
3388f4a53059SBarry Smith       Vec         gvec;
3389f4a53059SBarry Smith 
3390b9b97703SBarry Smith       ierr = MatSetType(B,MATMPIMAIJ);CHKERRQ(ierr);
339126fbe8dcSKarl Rupp 
33920298fd71SBarry Smith       B->ops->setup   = NULL;
3393d72c5c08SBarry Smith       B->ops->destroy = MatDestroy_MPIMAIJ;
33940fd73130SBarry Smith       B->ops->view    = MatView_MPIMAIJ;
339526fbe8dcSKarl Rupp 
3396b9b97703SBarry Smith       b      = (Mat_MPIMAIJ*)B->data;
3397b9b97703SBarry Smith       b->dof = dof;
3398b9b97703SBarry Smith       b->A   = A;
339926fbe8dcSKarl Rupp 
34004cb9d9b8SBarry Smith       ierr = MatCreateMAIJ(mpiaij->A,dof,&b->AIJ);CHKERRQ(ierr);
34014cb9d9b8SBarry Smith       ierr = MatCreateMAIJ(mpiaij->B,dof,&b->OAIJ);CHKERRQ(ierr);
34024cb9d9b8SBarry Smith 
3403f4a53059SBarry Smith       ierr = VecGetSize(mpiaij->lvec,&n);CHKERRQ(ierr);
3404a34bdc0bSBarry Smith       ierr = VecCreate(PETSC_COMM_SELF,&b->w);CHKERRQ(ierr);
3405a34bdc0bSBarry Smith       ierr = VecSetSizes(b->w,n*dof,n*dof);CHKERRQ(ierr);
340613288a74SBarry Smith       ierr = VecSetBlockSize(b->w,dof);CHKERRQ(ierr);
3407a34bdc0bSBarry Smith       ierr = VecSetType(b->w,VECSEQ);CHKERRQ(ierr);
3408f4a53059SBarry Smith 
3409f4a53059SBarry Smith       /* create two temporary Index sets for build scatter gather */
3410ce94432eSBarry Smith       ierr = ISCreateBlock(PetscObjectComm((PetscObject)A),dof,n,mpiaij->garray,PETSC_COPY_VALUES,&from);CHKERRQ(ierr);
3411f4a53059SBarry Smith       ierr = ISCreateStride(PETSC_COMM_SELF,n*dof,0,1,&to);CHKERRQ(ierr);
3412f4a53059SBarry Smith 
3413f4a53059SBarry Smith       /* create temporary global vector to generate scatter context */
3414ce94432eSBarry Smith       ierr = VecCreateMPIWithArray(PetscObjectComm((PetscObject)A),dof,dof*A->cmap->n,dof*A->cmap->N,NULL,&gvec);CHKERRQ(ierr);
3415f4a53059SBarry Smith 
3416f4a53059SBarry Smith       /* generate the scatter context */
3417f4a53059SBarry Smith       ierr = VecScatterCreate(gvec,from,b->w,to,&b->ctx);CHKERRQ(ierr);
3418f4a53059SBarry Smith 
34196bf464f9SBarry Smith       ierr = ISDestroy(&from);CHKERRQ(ierr);
34206bf464f9SBarry Smith       ierr = ISDestroy(&to);CHKERRQ(ierr);
34216bf464f9SBarry Smith       ierr = VecDestroy(&gvec);CHKERRQ(ierr);
3422f4a53059SBarry Smith 
3423f4a53059SBarry Smith       B->ops->mult             = MatMult_MPIMAIJ_dof;
34244cb9d9b8SBarry Smith       B->ops->multtranspose    = MatMultTranspose_MPIMAIJ_dof;
34254cb9d9b8SBarry Smith       B->ops->multadd          = MatMultAdd_MPIMAIJ_dof;
34264cb9d9b8SBarry Smith       B->ops->multtransposeadd = MatMultTransposeAdd_MPIMAIJ_dof;
342726fbe8dcSKarl Rupp 
3428bdf89e91SBarry Smith       ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpimaij_mpiaij_C",MatConvert_MPIMAIJ_MPIAIJ);CHKERRQ(ierr);
3429bdf89e91SBarry Smith       ierr = PetscObjectComposeFunction((PetscObject)B,"MatPtAP_mpiaij_mpimaij_C",MatPtAP_MPIAIJ_MPIMAIJ);CHKERRQ(ierr);
3430f4a53059SBarry Smith     }
34317dae84e0SHong Zhang     B->ops->createsubmatrix = MatCreateSubMatrix_MAIJ;
34324994cf47SJed Brown     ierr  = MatSetUp(B);CHKERRQ(ierr);
3433cd3bbe55SBarry Smith     *maij = B;
3434146574abSBarry Smith     ierr  = MatViewFromOptions(B,NULL,"-mat_view");CHKERRQ(ierr);
3435d72c5c08SBarry Smith   }
343682b1193eSBarry Smith   PetscFunctionReturn(0);
343782b1193eSBarry Smith }
3438