xref: /petsc/src/mat/impls/maij/maij.c (revision c248e2fdb97266ed9d2a39cacf953cbeab8a7e95)
1be1d678aSKris Buschelman 
282b1193eSBarry Smith /*
3cd3bbe55SBarry Smith     Defines the basic matrix operations for the MAIJ  matrix storage format.
4cd3bbe55SBarry Smith   This format is used for restriction and interpolation operations for
5cd3bbe55SBarry Smith   multicomponent problems. It interpolates each component the same way
6cd3bbe55SBarry Smith   independently.
7cd3bbe55SBarry Smith 
8cd3bbe55SBarry Smith      We provide:
9cd3bbe55SBarry Smith          MatMult()
10cd3bbe55SBarry Smith          MatMultTranspose()
11cd3bbe55SBarry Smith          MatMultTransposeAdd()
12cd3bbe55SBarry Smith          MatMultAdd()
13cd3bbe55SBarry Smith           and
14cd3bbe55SBarry Smith          MatCreateMAIJ(Mat,dof,Mat*)
15f4a53059SBarry Smith 
16f4a53059SBarry Smith      This single directory handles both the sequential and parallel codes
1782b1193eSBarry Smith */
1882b1193eSBarry Smith 
19c6db04a5SJed Brown #include <../src/mat/impls/maij/maij.h> /*I "petscmat.h" I*/
20c6db04a5SJed Brown #include <../src/mat/utils/freespace.h>
2182b1193eSBarry Smith 
22b350b9acSSatish Balay /*@
23ff585edeSJed Brown    MatMAIJGetAIJ - Get the AIJ matrix describing the blockwise action of the MAIJ matrix
24ff585edeSJed Brown 
25ff585edeSJed Brown    Not Collective, but if the MAIJ matrix is parallel, the AIJ matrix is also parallel
26ff585edeSJed Brown 
27ff585edeSJed Brown    Input Parameter:
28ff585edeSJed Brown .  A - the MAIJ matrix
29ff585edeSJed Brown 
30ff585edeSJed Brown    Output Parameter:
31ff585edeSJed Brown .  B - the AIJ matrix
32ff585edeSJed Brown 
33ff585edeSJed Brown    Level: advanced
34ff585edeSJed Brown 
3595452b02SPatrick Sanan    Notes:
3695452b02SPatrick Sanan     The reference count on the AIJ matrix is not increased so you should not destroy it.
37ff585edeSJed Brown 
38ff585edeSJed Brown .seealso: MatCreateMAIJ()
39ff585edeSJed Brown @*/
407087cfbeSBarry Smith PetscErrorCode  MatMAIJGetAIJ(Mat A,Mat *B)
41b9b97703SBarry Smith {
42dfbe8321SBarry Smith   PetscErrorCode ierr;
43ace3abfcSBarry Smith   PetscBool      ismpimaij,isseqmaij;
44b9b97703SBarry Smith 
45b9b97703SBarry Smith   PetscFunctionBegin;
46251f4c67SDmitry Karpeev   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIMAIJ,&ismpimaij);CHKERRQ(ierr);
47251f4c67SDmitry Karpeev   ierr = PetscObjectTypeCompare((PetscObject)A,MATSEQMAIJ,&isseqmaij);CHKERRQ(ierr);
48b9b97703SBarry Smith   if (ismpimaij) {
49b9b97703SBarry Smith     Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data;
50b9b97703SBarry Smith 
51b9b97703SBarry Smith     *B = b->A;
52b9b97703SBarry Smith   } else if (isseqmaij) {
53b9b97703SBarry Smith     Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
54b9b97703SBarry Smith 
55b9b97703SBarry Smith     *B = b->AIJ;
56b9b97703SBarry Smith   } else {
57b9b97703SBarry Smith     *B = A;
58b9b97703SBarry Smith   }
59b9b97703SBarry Smith   PetscFunctionReturn(0);
60b9b97703SBarry Smith }
61b9b97703SBarry Smith 
62480dffcfSJed Brown /*@
63ff585edeSJed Brown    MatMAIJRedimension - Get an MAIJ matrix with the same action, but for a different block size
64ff585edeSJed Brown 
653f9fe445SBarry Smith    Logically Collective
66ff585edeSJed Brown 
67ff585edeSJed Brown    Input Parameter:
68ff585edeSJed Brown +  A - the MAIJ matrix
69ff585edeSJed Brown -  dof - the block size for the new matrix
70ff585edeSJed Brown 
71ff585edeSJed Brown    Output Parameter:
72ff585edeSJed Brown .  B - the new MAIJ matrix
73ff585edeSJed Brown 
74ff585edeSJed Brown    Level: advanced
75ff585edeSJed Brown 
76ff585edeSJed Brown .seealso: MatCreateMAIJ()
77ff585edeSJed Brown @*/
787087cfbeSBarry Smith PetscErrorCode  MatMAIJRedimension(Mat A,PetscInt dof,Mat *B)
79b9b97703SBarry Smith {
80dfbe8321SBarry Smith   PetscErrorCode ierr;
810298fd71SBarry Smith   Mat            Aij = NULL;
82b9b97703SBarry Smith 
83b9b97703SBarry Smith   PetscFunctionBegin;
84c5eb9154SBarry Smith   PetscValidLogicalCollectiveInt(A,dof,2);
85b9b97703SBarry Smith   ierr = MatMAIJGetAIJ(A,&Aij);CHKERRQ(ierr);
86b9b97703SBarry Smith   ierr = MatCreateMAIJ(Aij,dof,B);CHKERRQ(ierr);
87b9b97703SBarry Smith   PetscFunctionReturn(0);
88b9b97703SBarry Smith }
89b9b97703SBarry Smith 
90dfbe8321SBarry Smith PetscErrorCode MatDestroy_SeqMAIJ(Mat A)
9182b1193eSBarry Smith {
92dfbe8321SBarry Smith   PetscErrorCode ierr;
934cb9d9b8SBarry Smith   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
9482b1193eSBarry Smith 
9582b1193eSBarry Smith   PetscFunctionBegin;
966bf464f9SBarry Smith   ierr = MatDestroy(&b->AIJ);CHKERRQ(ierr);
97bf0cc555SLisandro Dalcin   ierr = PetscFree(A->data);CHKERRQ(ierr);
98bdf89e91SBarry Smith   ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqmaij_seqaij_C",NULL);CHKERRQ(ierr);
994222ddf1SHong Zhang   ierr = PetscObjectComposeFunction((PetscObject)A,"MatProductSetFromOptions_seqaij_seqmaij_C",NULL);CHKERRQ(ierr);
1004cb9d9b8SBarry Smith   PetscFunctionReturn(0);
1014cb9d9b8SBarry Smith }
1024cb9d9b8SBarry Smith 
103356c569eSBarry Smith PetscErrorCode MatSetUp_MAIJ(Mat A)
104356c569eSBarry Smith {
105356c569eSBarry Smith   PetscFunctionBegin;
106ce94432eSBarry Smith   SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Must use MatCreateMAIJ() to create MAIJ matrices");
107356c569eSBarry Smith   PetscFunctionReturn(0);
108356c569eSBarry Smith }
109356c569eSBarry Smith 
1100fd73130SBarry Smith PetscErrorCode MatView_SeqMAIJ(Mat A,PetscViewer viewer)
1110fd73130SBarry Smith {
1120fd73130SBarry Smith   PetscErrorCode ierr;
1130fd73130SBarry Smith   Mat            B;
1140fd73130SBarry Smith 
1150fd73130SBarry Smith   PetscFunctionBegin;
116a2ea699eSBarry Smith   ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr);
1170fd73130SBarry Smith   ierr = MatView(B,viewer);CHKERRQ(ierr);
1186bf464f9SBarry Smith   ierr = MatDestroy(&B);CHKERRQ(ierr);
1190fd73130SBarry Smith   PetscFunctionReturn(0);
1200fd73130SBarry Smith }
1210fd73130SBarry Smith 
1220fd73130SBarry Smith PetscErrorCode MatView_MPIMAIJ(Mat A,PetscViewer viewer)
1230fd73130SBarry Smith {
1240fd73130SBarry Smith   PetscErrorCode ierr;
1250fd73130SBarry Smith   Mat            B;
1260fd73130SBarry Smith 
1270fd73130SBarry Smith   PetscFunctionBegin;
128a2ea699eSBarry Smith   ierr = MatConvert(A,MATMPIAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr);
1290fd73130SBarry Smith   ierr = MatView(B,viewer);CHKERRQ(ierr);
1306bf464f9SBarry Smith   ierr = MatDestroy(&B);CHKERRQ(ierr);
1310fd73130SBarry Smith   PetscFunctionReturn(0);
1320fd73130SBarry Smith }
1330fd73130SBarry Smith 
134dfbe8321SBarry Smith PetscErrorCode MatDestroy_MPIMAIJ(Mat A)
1354cb9d9b8SBarry Smith {
136dfbe8321SBarry Smith   PetscErrorCode ierr;
1374cb9d9b8SBarry Smith   Mat_MPIMAIJ    *b = (Mat_MPIMAIJ*)A->data;
1384cb9d9b8SBarry Smith 
1394cb9d9b8SBarry Smith   PetscFunctionBegin;
1406bf464f9SBarry Smith   ierr = MatDestroy(&b->AIJ);CHKERRQ(ierr);
1416bf464f9SBarry Smith   ierr = MatDestroy(&b->OAIJ);CHKERRQ(ierr);
1426bf464f9SBarry Smith   ierr = MatDestroy(&b->A);CHKERRQ(ierr);
1436bf464f9SBarry Smith   ierr = VecScatterDestroy(&b->ctx);CHKERRQ(ierr);
1446bf464f9SBarry Smith   ierr = VecDestroy(&b->w);CHKERRQ(ierr);
145bf0cc555SLisandro Dalcin   ierr = PetscFree(A->data);CHKERRQ(ierr);
146bdf89e91SBarry Smith   ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_mpimaij_mpiaij_C",NULL);CHKERRQ(ierr);
1474222ddf1SHong Zhang   ierr = PetscObjectComposeFunction((PetscObject)A,"MatProductSetFromOptions_mpiaij_mpimaij_C",NULL);CHKERRQ(ierr);
148dbd8c25aSHong Zhang   ierr = PetscObjectChangeTypeName((PetscObject)A,0);CHKERRQ(ierr);
149b9b97703SBarry Smith   PetscFunctionReturn(0);
150b9b97703SBarry Smith }
151b9b97703SBarry Smith 
1520bad9183SKris Buschelman /*MC
153fafad747SKris Buschelman   MATMAIJ - MATMAIJ = "maij" - A matrix type to be used for restriction and interpolation operations for
1540bad9183SKris Buschelman   multicomponent problems, interpolating or restricting each component the same way independently.
1550bad9183SKris Buschelman   The matrix type is based on MATSEQAIJ for sequential matrices, and MATMPIAIJ for distributed matrices.
1560bad9183SKris Buschelman 
1570bad9183SKris Buschelman   Operations provided:
158a2b725a8SWilliam Gropp + MatMult
1590bad9183SKris Buschelman . MatMultTranspose
1600bad9183SKris Buschelman . MatMultAdd
161a2b725a8SWilliam Gropp - MatMultTransposeAdd
1620bad9183SKris Buschelman 
1630bad9183SKris Buschelman   Level: advanced
1640bad9183SKris Buschelman 
165b409243cSBarry Smith .seealso: MatMAIJGetAIJ(), MatMAIJRedimension(), MatCreateMAIJ()
1660bad9183SKris Buschelman M*/
1670bad9183SKris Buschelman 
1688cc058d9SJed Brown PETSC_EXTERN PetscErrorCode MatCreate_MAIJ(Mat A)
16982b1193eSBarry Smith {
170dfbe8321SBarry Smith   PetscErrorCode ierr;
1714cb9d9b8SBarry Smith   Mat_MPIMAIJ    *b;
17264b52464SHong Zhang   PetscMPIInt    size;
17382b1193eSBarry Smith 
17482b1193eSBarry Smith   PetscFunctionBegin;
175b00a9115SJed Brown   ierr     = PetscNewLog(A,&b);CHKERRQ(ierr);
176b0a32e0cSBarry Smith   A->data  = (void*)b;
17726fbe8dcSKarl Rupp 
178cd3bbe55SBarry Smith   ierr = PetscMemzero(A->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
17926fbe8dcSKarl Rupp 
180356c569eSBarry Smith   A->ops->setup = MatSetUp_MAIJ;
181f4a53059SBarry Smith 
182cd3bbe55SBarry Smith   b->AIJ  = 0;
183cd3bbe55SBarry Smith   b->dof  = 0;
184f4a53059SBarry Smith   b->OAIJ = 0;
185f4a53059SBarry Smith   b->ctx  = 0;
186f4a53059SBarry Smith   b->w    = 0;
187ce94432eSBarry Smith   ierr    = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRQ(ierr);
18864b52464SHong Zhang   if (size == 1) {
18964b52464SHong Zhang     ierr = PetscObjectChangeTypeName((PetscObject)A,MATSEQMAIJ);CHKERRQ(ierr);
19064b52464SHong Zhang   } else {
19164b52464SHong Zhang     ierr = PetscObjectChangeTypeName((PetscObject)A,MATMPIMAIJ);CHKERRQ(ierr);
19264b52464SHong Zhang   }
19332e7c8b0SBarry Smith   A->preallocated  = PETSC_TRUE;
19432e7c8b0SBarry Smith   A->assembled     = PETSC_TRUE;
19582b1193eSBarry Smith   PetscFunctionReturn(0);
19682b1193eSBarry Smith }
19782b1193eSBarry Smith 
198cd3bbe55SBarry Smith /* --------------------------------------------------------------------------------------*/
199dfbe8321SBarry Smith PetscErrorCode MatMult_SeqMAIJ_2(Mat A,Vec xx,Vec yy)
20082b1193eSBarry Smith {
2014cb9d9b8SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
202bcc973b7SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
203d2840e09SBarry Smith   const PetscScalar *x,*v;
204d2840e09SBarry Smith   PetscScalar       *y, sum1, sum2;
205dfbe8321SBarry Smith   PetscErrorCode    ierr;
206d2840e09SBarry Smith   PetscInt          nonzerorow=0,n,i,jrow,j;
207d2840e09SBarry Smith   const PetscInt    m         = b->AIJ->rmap->n,*idx,*ii;
20882b1193eSBarry Smith 
209bcc973b7SBarry Smith   PetscFunctionBegin;
2103649974fSBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
2111ebc52fbSHong Zhang   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
212bcc973b7SBarry Smith   idx  = a->j;
213bcc973b7SBarry Smith   v    = a->a;
214bcc973b7SBarry Smith   ii   = a->i;
215bcc973b7SBarry Smith 
216bcc973b7SBarry Smith   for (i=0; i<m; i++) {
217bcc973b7SBarry Smith     jrow  = ii[i];
218bcc973b7SBarry Smith     n     = ii[i+1] - jrow;
219bcc973b7SBarry Smith     sum1  = 0.0;
220bcc973b7SBarry Smith     sum2  = 0.0;
22126fbe8dcSKarl Rupp 
222b3c51c66SMatthew Knepley     nonzerorow += (n>0);
223bcc973b7SBarry Smith     for (j=0; j<n; j++) {
224bcc973b7SBarry Smith       sum1 += v[jrow]*x[2*idx[jrow]];
225bcc973b7SBarry Smith       sum2 += v[jrow]*x[2*idx[jrow]+1];
226bcc973b7SBarry Smith       jrow++;
227bcc973b7SBarry Smith     }
228bcc973b7SBarry Smith     y[2*i]   = sum1;
229bcc973b7SBarry Smith     y[2*i+1] = sum2;
230bcc973b7SBarry Smith   }
231bcc973b7SBarry Smith 
232dc0b31edSSatish Balay   ierr = PetscLogFlops(4.0*a->nz - 2.0*nonzerorow);CHKERRQ(ierr);
2333649974fSBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
2341ebc52fbSHong Zhang   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
23582b1193eSBarry Smith   PetscFunctionReturn(0);
23682b1193eSBarry Smith }
237bcc973b7SBarry Smith 
238dfbe8321SBarry Smith PetscErrorCode MatMultTranspose_SeqMAIJ_2(Mat A,Vec xx,Vec yy)
23982b1193eSBarry Smith {
2404cb9d9b8SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
241bcc973b7SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
242d2840e09SBarry Smith   const PetscScalar *x,*v;
243d2840e09SBarry Smith   PetscScalar       *y,alpha1,alpha2;
244dfbe8321SBarry Smith   PetscErrorCode    ierr;
245d2840e09SBarry Smith   PetscInt          n,i;
246d2840e09SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx;
24782b1193eSBarry Smith 
248bcc973b7SBarry Smith   PetscFunctionBegin;
249d2840e09SBarry Smith   ierr = VecSet(yy,0.0);CHKERRQ(ierr);
2503649974fSBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
2511ebc52fbSHong Zhang   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
252bfec09a0SHong Zhang 
253bcc973b7SBarry Smith   for (i=0; i<m; i++) {
254bfec09a0SHong Zhang     idx    = a->j + a->i[i];
255bfec09a0SHong Zhang     v      = a->a + a->i[i];
256bcc973b7SBarry Smith     n      = a->i[i+1] - a->i[i];
257bcc973b7SBarry Smith     alpha1 = x[2*i];
258bcc973b7SBarry Smith     alpha2 = x[2*i+1];
25926fbe8dcSKarl Rupp     while (n-->0) {
26026fbe8dcSKarl Rupp       y[2*(*idx)]   += alpha1*(*v);
26126fbe8dcSKarl Rupp       y[2*(*idx)+1] += alpha2*(*v);
26226fbe8dcSKarl Rupp       idx++; v++;
26326fbe8dcSKarl Rupp     }
264bcc973b7SBarry Smith   }
265dc0b31edSSatish Balay   ierr = PetscLogFlops(4.0*a->nz);CHKERRQ(ierr);
2663649974fSBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
2671ebc52fbSHong Zhang   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
26882b1193eSBarry Smith   PetscFunctionReturn(0);
26982b1193eSBarry Smith }
270bcc973b7SBarry Smith 
271dfbe8321SBarry Smith PetscErrorCode MatMultAdd_SeqMAIJ_2(Mat A,Vec xx,Vec yy,Vec zz)
27282b1193eSBarry Smith {
2734cb9d9b8SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
274bcc973b7SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
275d2840e09SBarry Smith   const PetscScalar *x,*v;
276d2840e09SBarry Smith   PetscScalar       *y,sum1, sum2;
277dfbe8321SBarry Smith   PetscErrorCode    ierr;
278b24ad042SBarry Smith   PetscInt          n,i,jrow,j;
279d2840e09SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
28082b1193eSBarry Smith 
281bcc973b7SBarry Smith   PetscFunctionBegin;
282f9fae5adSBarry Smith   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
2833649974fSBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
2841ebc52fbSHong Zhang   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
285bcc973b7SBarry Smith   idx  = a->j;
286bcc973b7SBarry Smith   v    = a->a;
287bcc973b7SBarry Smith   ii   = a->i;
288bcc973b7SBarry Smith 
289bcc973b7SBarry Smith   for (i=0; i<m; i++) {
290bcc973b7SBarry Smith     jrow = ii[i];
291bcc973b7SBarry Smith     n    = ii[i+1] - jrow;
292bcc973b7SBarry Smith     sum1 = 0.0;
293bcc973b7SBarry Smith     sum2 = 0.0;
294bcc973b7SBarry Smith     for (j=0; j<n; j++) {
295bcc973b7SBarry Smith       sum1 += v[jrow]*x[2*idx[jrow]];
296bcc973b7SBarry Smith       sum2 += v[jrow]*x[2*idx[jrow]+1];
297bcc973b7SBarry Smith       jrow++;
298bcc973b7SBarry Smith     }
299bcc973b7SBarry Smith     y[2*i]   += sum1;
300bcc973b7SBarry Smith     y[2*i+1] += sum2;
301bcc973b7SBarry Smith   }
302bcc973b7SBarry Smith 
303dc0b31edSSatish Balay   ierr = PetscLogFlops(4.0*a->nz);CHKERRQ(ierr);
3043649974fSBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
3051ebc52fbSHong Zhang   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
306bcc973b7SBarry Smith   PetscFunctionReturn(0);
30782b1193eSBarry Smith }
308dfbe8321SBarry Smith PetscErrorCode MatMultTransposeAdd_SeqMAIJ_2(Mat A,Vec xx,Vec yy,Vec zz)
30982b1193eSBarry Smith {
3104cb9d9b8SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
311bcc973b7SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
312d2840e09SBarry Smith   const PetscScalar *x,*v;
313d2840e09SBarry Smith   PetscScalar       *y,alpha1,alpha2;
314dfbe8321SBarry Smith   PetscErrorCode    ierr;
315d2840e09SBarry Smith   PetscInt          n,i;
316d2840e09SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx;
31782b1193eSBarry Smith 
318bcc973b7SBarry Smith   PetscFunctionBegin;
319f9fae5adSBarry Smith   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
3203649974fSBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
3211ebc52fbSHong Zhang   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
322bfec09a0SHong Zhang 
323bcc973b7SBarry Smith   for (i=0; i<m; i++) {
324bfec09a0SHong Zhang     idx    = a->j + a->i[i];
325bfec09a0SHong Zhang     v      = a->a + a->i[i];
326bcc973b7SBarry Smith     n      = a->i[i+1] - a->i[i];
327bcc973b7SBarry Smith     alpha1 = x[2*i];
328bcc973b7SBarry Smith     alpha2 = x[2*i+1];
32926fbe8dcSKarl Rupp     while (n-->0) {
33026fbe8dcSKarl Rupp       y[2*(*idx)]   += alpha1*(*v);
33126fbe8dcSKarl Rupp       y[2*(*idx)+1] += alpha2*(*v);
33226fbe8dcSKarl Rupp       idx++; v++;
33326fbe8dcSKarl Rupp     }
334bcc973b7SBarry Smith   }
335dc0b31edSSatish Balay   ierr = PetscLogFlops(4.0*a->nz);CHKERRQ(ierr);
3363649974fSBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
3371ebc52fbSHong Zhang   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
338bcc973b7SBarry Smith   PetscFunctionReturn(0);
33982b1193eSBarry Smith }
340cd3bbe55SBarry Smith /* --------------------------------------------------------------------------------------*/
341dfbe8321SBarry Smith PetscErrorCode MatMult_SeqMAIJ_3(Mat A,Vec xx,Vec yy)
342bcc973b7SBarry Smith {
3434cb9d9b8SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
344bcc973b7SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
345d2840e09SBarry Smith   const PetscScalar *x,*v;
346d2840e09SBarry Smith   PetscScalar       *y,sum1, sum2, sum3;
347dfbe8321SBarry Smith   PetscErrorCode    ierr;
348d2840e09SBarry Smith   PetscInt          nonzerorow=0,n,i,jrow,j;
349d2840e09SBarry Smith   const PetscInt    m         = b->AIJ->rmap->n,*idx,*ii;
35082b1193eSBarry Smith 
351bcc973b7SBarry Smith   PetscFunctionBegin;
3523649974fSBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
3531ebc52fbSHong Zhang   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
354bcc973b7SBarry Smith   idx  = a->j;
355bcc973b7SBarry Smith   v    = a->a;
356bcc973b7SBarry Smith   ii   = a->i;
357bcc973b7SBarry Smith 
358bcc973b7SBarry Smith   for (i=0; i<m; i++) {
359bcc973b7SBarry Smith     jrow  = ii[i];
360bcc973b7SBarry Smith     n     = ii[i+1] - jrow;
361bcc973b7SBarry Smith     sum1  = 0.0;
362bcc973b7SBarry Smith     sum2  = 0.0;
363bcc973b7SBarry Smith     sum3  = 0.0;
36426fbe8dcSKarl Rupp 
365b3c51c66SMatthew Knepley     nonzerorow += (n>0);
366bcc973b7SBarry Smith     for (j=0; j<n; j++) {
367bcc973b7SBarry Smith       sum1 += v[jrow]*x[3*idx[jrow]];
368bcc973b7SBarry Smith       sum2 += v[jrow]*x[3*idx[jrow]+1];
369bcc973b7SBarry Smith       sum3 += v[jrow]*x[3*idx[jrow]+2];
370bcc973b7SBarry Smith       jrow++;
371bcc973b7SBarry Smith      }
372bcc973b7SBarry Smith     y[3*i]   = sum1;
373bcc973b7SBarry Smith     y[3*i+1] = sum2;
374bcc973b7SBarry Smith     y[3*i+2] = sum3;
375bcc973b7SBarry Smith   }
376bcc973b7SBarry Smith 
377dc0b31edSSatish Balay   ierr = PetscLogFlops(6.0*a->nz - 3.0*nonzerorow);CHKERRQ(ierr);
3783649974fSBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
3791ebc52fbSHong Zhang   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
380bcc973b7SBarry Smith   PetscFunctionReturn(0);
381bcc973b7SBarry Smith }
382bcc973b7SBarry Smith 
383dfbe8321SBarry Smith PetscErrorCode MatMultTranspose_SeqMAIJ_3(Mat A,Vec xx,Vec yy)
384bcc973b7SBarry Smith {
3854cb9d9b8SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
386bcc973b7SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
387d2840e09SBarry Smith   const PetscScalar *x,*v;
388d2840e09SBarry Smith   PetscScalar       *y,alpha1,alpha2,alpha3;
389dfbe8321SBarry Smith   PetscErrorCode    ierr;
390d2840e09SBarry Smith   PetscInt          n,i;
391d2840e09SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx;
392bcc973b7SBarry Smith 
393bcc973b7SBarry Smith   PetscFunctionBegin;
394d2840e09SBarry Smith   ierr = VecSet(yy,0.0);CHKERRQ(ierr);
3953649974fSBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
3961ebc52fbSHong Zhang   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
397bfec09a0SHong Zhang 
398bcc973b7SBarry Smith   for (i=0; i<m; i++) {
399bfec09a0SHong Zhang     idx    = a->j + a->i[i];
400bfec09a0SHong Zhang     v      = a->a + a->i[i];
401bcc973b7SBarry Smith     n      = a->i[i+1] - a->i[i];
402bcc973b7SBarry Smith     alpha1 = x[3*i];
403bcc973b7SBarry Smith     alpha2 = x[3*i+1];
404bcc973b7SBarry Smith     alpha3 = x[3*i+2];
405bcc973b7SBarry Smith     while (n-->0) {
406bcc973b7SBarry Smith       y[3*(*idx)]   += alpha1*(*v);
407bcc973b7SBarry Smith       y[3*(*idx)+1] += alpha2*(*v);
408bcc973b7SBarry Smith       y[3*(*idx)+2] += alpha3*(*v);
409bcc973b7SBarry Smith       idx++; v++;
410bcc973b7SBarry Smith     }
411bcc973b7SBarry Smith   }
412dc0b31edSSatish Balay   ierr = PetscLogFlops(6.0*a->nz);CHKERRQ(ierr);
4133649974fSBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
4141ebc52fbSHong Zhang   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
415bcc973b7SBarry Smith   PetscFunctionReturn(0);
416bcc973b7SBarry Smith }
417bcc973b7SBarry Smith 
418dfbe8321SBarry Smith PetscErrorCode MatMultAdd_SeqMAIJ_3(Mat A,Vec xx,Vec yy,Vec zz)
419bcc973b7SBarry Smith {
4204cb9d9b8SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
421bcc973b7SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
422d2840e09SBarry Smith   const PetscScalar *x,*v;
423d2840e09SBarry Smith   PetscScalar       *y,sum1, sum2, sum3;
424dfbe8321SBarry Smith   PetscErrorCode    ierr;
425b24ad042SBarry Smith   PetscInt          n,i,jrow,j;
426d2840e09SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
427bcc973b7SBarry Smith 
428bcc973b7SBarry Smith   PetscFunctionBegin;
429f9fae5adSBarry Smith   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
4303649974fSBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
4311ebc52fbSHong Zhang   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
432bcc973b7SBarry Smith   idx  = a->j;
433bcc973b7SBarry Smith   v    = a->a;
434bcc973b7SBarry Smith   ii   = a->i;
435bcc973b7SBarry Smith 
436bcc973b7SBarry Smith   for (i=0; i<m; i++) {
437bcc973b7SBarry Smith     jrow = ii[i];
438bcc973b7SBarry Smith     n    = ii[i+1] - jrow;
439bcc973b7SBarry Smith     sum1 = 0.0;
440bcc973b7SBarry Smith     sum2 = 0.0;
441bcc973b7SBarry Smith     sum3 = 0.0;
442bcc973b7SBarry Smith     for (j=0; j<n; j++) {
443bcc973b7SBarry Smith       sum1 += v[jrow]*x[3*idx[jrow]];
444bcc973b7SBarry Smith       sum2 += v[jrow]*x[3*idx[jrow]+1];
445bcc973b7SBarry Smith       sum3 += v[jrow]*x[3*idx[jrow]+2];
446bcc973b7SBarry Smith       jrow++;
447bcc973b7SBarry Smith     }
448bcc973b7SBarry Smith     y[3*i]   += sum1;
449bcc973b7SBarry Smith     y[3*i+1] += sum2;
450bcc973b7SBarry Smith     y[3*i+2] += sum3;
451bcc973b7SBarry Smith   }
452bcc973b7SBarry Smith 
453dc0b31edSSatish Balay   ierr = PetscLogFlops(6.0*a->nz);CHKERRQ(ierr);
4543649974fSBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
4551ebc52fbSHong Zhang   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
456bcc973b7SBarry Smith   PetscFunctionReturn(0);
457bcc973b7SBarry Smith }
458dfbe8321SBarry Smith PetscErrorCode MatMultTransposeAdd_SeqMAIJ_3(Mat A,Vec xx,Vec yy,Vec zz)
459bcc973b7SBarry Smith {
4604cb9d9b8SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
461bcc973b7SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
462d2840e09SBarry Smith   const PetscScalar *x,*v;
463d2840e09SBarry Smith   PetscScalar       *y,alpha1,alpha2,alpha3;
464dfbe8321SBarry Smith   PetscErrorCode    ierr;
465d2840e09SBarry Smith   PetscInt          n,i;
466d2840e09SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx;
467bcc973b7SBarry Smith 
468bcc973b7SBarry Smith   PetscFunctionBegin;
469f9fae5adSBarry Smith   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
4703649974fSBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
4711ebc52fbSHong Zhang   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
472bcc973b7SBarry Smith   for (i=0; i<m; i++) {
473bfec09a0SHong Zhang     idx    = a->j + a->i[i];
474bfec09a0SHong Zhang     v      = a->a + a->i[i];
475bcc973b7SBarry Smith     n      = a->i[i+1] - a->i[i];
476bcc973b7SBarry Smith     alpha1 = x[3*i];
477bcc973b7SBarry Smith     alpha2 = x[3*i+1];
478bcc973b7SBarry Smith     alpha3 = x[3*i+2];
479bcc973b7SBarry Smith     while (n-->0) {
480bcc973b7SBarry Smith       y[3*(*idx)]   += alpha1*(*v);
481bcc973b7SBarry Smith       y[3*(*idx)+1] += alpha2*(*v);
482bcc973b7SBarry Smith       y[3*(*idx)+2] += alpha3*(*v);
483bcc973b7SBarry Smith       idx++; v++;
484bcc973b7SBarry Smith     }
485bcc973b7SBarry Smith   }
486dc0b31edSSatish Balay   ierr = PetscLogFlops(6.0*a->nz);CHKERRQ(ierr);
4873649974fSBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
4881ebc52fbSHong Zhang   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
489bcc973b7SBarry Smith   PetscFunctionReturn(0);
490bcc973b7SBarry Smith }
491bcc973b7SBarry Smith 
492bcc973b7SBarry Smith /* ------------------------------------------------------------------------------*/
493dfbe8321SBarry Smith PetscErrorCode MatMult_SeqMAIJ_4(Mat A,Vec xx,Vec yy)
494bcc973b7SBarry Smith {
4954cb9d9b8SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
496bcc973b7SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
497d2840e09SBarry Smith   const PetscScalar *x,*v;
498d2840e09SBarry Smith   PetscScalar       *y,sum1, sum2, sum3, sum4;
499dfbe8321SBarry Smith   PetscErrorCode    ierr;
500d2840e09SBarry Smith   PetscInt          nonzerorow=0,n,i,jrow,j;
501d2840e09SBarry Smith   const PetscInt    m         = b->AIJ->rmap->n,*idx,*ii;
502bcc973b7SBarry Smith 
503bcc973b7SBarry Smith   PetscFunctionBegin;
5043649974fSBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
5051ebc52fbSHong Zhang   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
506bcc973b7SBarry Smith   idx  = a->j;
507bcc973b7SBarry Smith   v    = a->a;
508bcc973b7SBarry Smith   ii   = a->i;
509bcc973b7SBarry Smith 
510bcc973b7SBarry Smith   for (i=0; i<m; i++) {
511bcc973b7SBarry Smith     jrow        = ii[i];
512bcc973b7SBarry Smith     n           = ii[i+1] - jrow;
513bcc973b7SBarry Smith     sum1        = 0.0;
514bcc973b7SBarry Smith     sum2        = 0.0;
515bcc973b7SBarry Smith     sum3        = 0.0;
516bcc973b7SBarry Smith     sum4        = 0.0;
517b3c51c66SMatthew Knepley     nonzerorow += (n>0);
518bcc973b7SBarry Smith     for (j=0; j<n; j++) {
519bcc973b7SBarry Smith       sum1 += v[jrow]*x[4*idx[jrow]];
520bcc973b7SBarry Smith       sum2 += v[jrow]*x[4*idx[jrow]+1];
521bcc973b7SBarry Smith       sum3 += v[jrow]*x[4*idx[jrow]+2];
522bcc973b7SBarry Smith       sum4 += v[jrow]*x[4*idx[jrow]+3];
523bcc973b7SBarry Smith       jrow++;
524bcc973b7SBarry Smith     }
525bcc973b7SBarry Smith     y[4*i]   = sum1;
526bcc973b7SBarry Smith     y[4*i+1] = sum2;
527bcc973b7SBarry Smith     y[4*i+2] = sum3;
528bcc973b7SBarry Smith     y[4*i+3] = sum4;
529bcc973b7SBarry Smith   }
530bcc973b7SBarry Smith 
531dc0b31edSSatish Balay   ierr = PetscLogFlops(8.0*a->nz - 4.0*nonzerorow);CHKERRQ(ierr);
5323649974fSBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
5331ebc52fbSHong Zhang   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
534bcc973b7SBarry Smith   PetscFunctionReturn(0);
535bcc973b7SBarry Smith }
536bcc973b7SBarry Smith 
537dfbe8321SBarry Smith PetscErrorCode MatMultTranspose_SeqMAIJ_4(Mat A,Vec xx,Vec yy)
538bcc973b7SBarry Smith {
5394cb9d9b8SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
540bcc973b7SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
541d2840e09SBarry Smith   const PetscScalar *x,*v;
542d2840e09SBarry Smith   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4;
543dfbe8321SBarry Smith   PetscErrorCode    ierr;
544d2840e09SBarry Smith   PetscInt          n,i;
545d2840e09SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx;
546bcc973b7SBarry Smith 
547bcc973b7SBarry Smith   PetscFunctionBegin;
548d2840e09SBarry Smith   ierr = VecSet(yy,0.0);CHKERRQ(ierr);
5493649974fSBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
5501ebc52fbSHong Zhang   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
551bcc973b7SBarry Smith   for (i=0; i<m; i++) {
552bfec09a0SHong Zhang     idx    = a->j + a->i[i];
553bfec09a0SHong Zhang     v      = a->a + a->i[i];
554bcc973b7SBarry Smith     n      = a->i[i+1] - a->i[i];
555bcc973b7SBarry Smith     alpha1 = x[4*i];
556bcc973b7SBarry Smith     alpha2 = x[4*i+1];
557bcc973b7SBarry Smith     alpha3 = x[4*i+2];
558bcc973b7SBarry Smith     alpha4 = x[4*i+3];
559bcc973b7SBarry Smith     while (n-->0) {
560bcc973b7SBarry Smith       y[4*(*idx)]   += alpha1*(*v);
561bcc973b7SBarry Smith       y[4*(*idx)+1] += alpha2*(*v);
562bcc973b7SBarry Smith       y[4*(*idx)+2] += alpha3*(*v);
563bcc973b7SBarry Smith       y[4*(*idx)+3] += alpha4*(*v);
564bcc973b7SBarry Smith       idx++; v++;
565bcc973b7SBarry Smith     }
566bcc973b7SBarry Smith   }
567dc0b31edSSatish Balay   ierr = PetscLogFlops(8.0*a->nz);CHKERRQ(ierr);
5683649974fSBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
5691ebc52fbSHong Zhang   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
570bcc973b7SBarry Smith   PetscFunctionReturn(0);
571bcc973b7SBarry Smith }
572bcc973b7SBarry Smith 
573dfbe8321SBarry Smith PetscErrorCode MatMultAdd_SeqMAIJ_4(Mat A,Vec xx,Vec yy,Vec zz)
574bcc973b7SBarry Smith {
5754cb9d9b8SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
576f9fae5adSBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
577d2840e09SBarry Smith   const PetscScalar *x,*v;
578d2840e09SBarry Smith   PetscScalar       *y,sum1, sum2, sum3, sum4;
579dfbe8321SBarry Smith   PetscErrorCode    ierr;
580b24ad042SBarry Smith   PetscInt          n,i,jrow,j;
581d2840e09SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
582f9fae5adSBarry Smith 
583f9fae5adSBarry Smith   PetscFunctionBegin;
584f9fae5adSBarry Smith   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
5853649974fSBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
5861ebc52fbSHong Zhang   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
587f9fae5adSBarry Smith   idx  = a->j;
588f9fae5adSBarry Smith   v    = a->a;
589f9fae5adSBarry Smith   ii   = a->i;
590f9fae5adSBarry Smith 
591f9fae5adSBarry Smith   for (i=0; i<m; i++) {
592f9fae5adSBarry Smith     jrow = ii[i];
593f9fae5adSBarry Smith     n    = ii[i+1] - jrow;
594f9fae5adSBarry Smith     sum1 = 0.0;
595f9fae5adSBarry Smith     sum2 = 0.0;
596f9fae5adSBarry Smith     sum3 = 0.0;
597f9fae5adSBarry Smith     sum4 = 0.0;
598f9fae5adSBarry Smith     for (j=0; j<n; j++) {
599f9fae5adSBarry Smith       sum1 += v[jrow]*x[4*idx[jrow]];
600f9fae5adSBarry Smith       sum2 += v[jrow]*x[4*idx[jrow]+1];
601f9fae5adSBarry Smith       sum3 += v[jrow]*x[4*idx[jrow]+2];
602f9fae5adSBarry Smith       sum4 += v[jrow]*x[4*idx[jrow]+3];
603f9fae5adSBarry Smith       jrow++;
604f9fae5adSBarry Smith     }
605f9fae5adSBarry Smith     y[4*i]   += sum1;
606f9fae5adSBarry Smith     y[4*i+1] += sum2;
607f9fae5adSBarry Smith     y[4*i+2] += sum3;
608f9fae5adSBarry Smith     y[4*i+3] += sum4;
609f9fae5adSBarry Smith   }
610f9fae5adSBarry Smith 
611dc0b31edSSatish Balay   ierr = PetscLogFlops(8.0*a->nz);CHKERRQ(ierr);
6123649974fSBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
6131ebc52fbSHong Zhang   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
614f9fae5adSBarry Smith   PetscFunctionReturn(0);
615bcc973b7SBarry Smith }
616dfbe8321SBarry Smith PetscErrorCode MatMultTransposeAdd_SeqMAIJ_4(Mat A,Vec xx,Vec yy,Vec zz)
617bcc973b7SBarry Smith {
6184cb9d9b8SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
619f9fae5adSBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
620d2840e09SBarry Smith   const PetscScalar *x,*v;
621d2840e09SBarry Smith   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4;
622dfbe8321SBarry Smith   PetscErrorCode    ierr;
623d2840e09SBarry Smith   PetscInt          n,i;
624d2840e09SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx;
625f9fae5adSBarry Smith 
626f9fae5adSBarry Smith   PetscFunctionBegin;
627f9fae5adSBarry Smith   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
6283649974fSBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
6291ebc52fbSHong Zhang   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
630bfec09a0SHong Zhang 
631f9fae5adSBarry Smith   for (i=0; i<m; i++) {
632bfec09a0SHong Zhang     idx    = a->j + a->i[i];
633bfec09a0SHong Zhang     v      = a->a + a->i[i];
634f9fae5adSBarry Smith     n      = a->i[i+1] - a->i[i];
635f9fae5adSBarry Smith     alpha1 = x[4*i];
636f9fae5adSBarry Smith     alpha2 = x[4*i+1];
637f9fae5adSBarry Smith     alpha3 = x[4*i+2];
638f9fae5adSBarry Smith     alpha4 = x[4*i+3];
639f9fae5adSBarry Smith     while (n-->0) {
640f9fae5adSBarry Smith       y[4*(*idx)]   += alpha1*(*v);
641f9fae5adSBarry Smith       y[4*(*idx)+1] += alpha2*(*v);
642f9fae5adSBarry Smith       y[4*(*idx)+2] += alpha3*(*v);
643f9fae5adSBarry Smith       y[4*(*idx)+3] += alpha4*(*v);
644f9fae5adSBarry Smith       idx++; v++;
645f9fae5adSBarry Smith     }
646f9fae5adSBarry Smith   }
647dc0b31edSSatish Balay   ierr = PetscLogFlops(8.0*a->nz);CHKERRQ(ierr);
6483649974fSBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
6491ebc52fbSHong Zhang   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
650f9fae5adSBarry Smith   PetscFunctionReturn(0);
651f9fae5adSBarry Smith }
652f9fae5adSBarry Smith /* ------------------------------------------------------------------------------*/
6536cd98798SBarry Smith 
654dfbe8321SBarry Smith PetscErrorCode MatMult_SeqMAIJ_5(Mat A,Vec xx,Vec yy)
655f9fae5adSBarry Smith {
6564cb9d9b8SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
657f9fae5adSBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
658d2840e09SBarry Smith   const PetscScalar *x,*v;
659d2840e09SBarry Smith   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5;
660dfbe8321SBarry Smith   PetscErrorCode    ierr;
661d2840e09SBarry Smith   PetscInt          nonzerorow=0,n,i,jrow,j;
662d2840e09SBarry Smith   const PetscInt    m         = b->AIJ->rmap->n,*idx,*ii;
663f9fae5adSBarry Smith 
664f9fae5adSBarry Smith   PetscFunctionBegin;
6653649974fSBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
6661ebc52fbSHong Zhang   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
667f9fae5adSBarry Smith   idx  = a->j;
668f9fae5adSBarry Smith   v    = a->a;
669f9fae5adSBarry Smith   ii   = a->i;
670f9fae5adSBarry Smith 
671f9fae5adSBarry Smith   for (i=0; i<m; i++) {
672f9fae5adSBarry Smith     jrow  = ii[i];
673f9fae5adSBarry Smith     n     = ii[i+1] - jrow;
674f9fae5adSBarry Smith     sum1  = 0.0;
675f9fae5adSBarry Smith     sum2  = 0.0;
676f9fae5adSBarry Smith     sum3  = 0.0;
677f9fae5adSBarry Smith     sum4  = 0.0;
678f9fae5adSBarry Smith     sum5  = 0.0;
67926fbe8dcSKarl Rupp 
680b3c51c66SMatthew Knepley     nonzerorow += (n>0);
681f9fae5adSBarry Smith     for (j=0; j<n; j++) {
682f9fae5adSBarry Smith       sum1 += v[jrow]*x[5*idx[jrow]];
683f9fae5adSBarry Smith       sum2 += v[jrow]*x[5*idx[jrow]+1];
684f9fae5adSBarry Smith       sum3 += v[jrow]*x[5*idx[jrow]+2];
685f9fae5adSBarry Smith       sum4 += v[jrow]*x[5*idx[jrow]+3];
686f9fae5adSBarry Smith       sum5 += v[jrow]*x[5*idx[jrow]+4];
687f9fae5adSBarry Smith       jrow++;
688f9fae5adSBarry Smith     }
689f9fae5adSBarry Smith     y[5*i]   = sum1;
690f9fae5adSBarry Smith     y[5*i+1] = sum2;
691f9fae5adSBarry Smith     y[5*i+2] = sum3;
692f9fae5adSBarry Smith     y[5*i+3] = sum4;
693f9fae5adSBarry Smith     y[5*i+4] = sum5;
694f9fae5adSBarry Smith   }
695f9fae5adSBarry Smith 
696dc0b31edSSatish Balay   ierr = PetscLogFlops(10.0*a->nz - 5.0*nonzerorow);CHKERRQ(ierr);
6973649974fSBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
6981ebc52fbSHong Zhang   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
699f9fae5adSBarry Smith   PetscFunctionReturn(0);
700f9fae5adSBarry Smith }
701f9fae5adSBarry Smith 
702dfbe8321SBarry Smith PetscErrorCode MatMultTranspose_SeqMAIJ_5(Mat A,Vec xx,Vec yy)
703f9fae5adSBarry Smith {
7044cb9d9b8SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
705f9fae5adSBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
706d2840e09SBarry Smith   const PetscScalar *x,*v;
707d2840e09SBarry Smith   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5;
708dfbe8321SBarry Smith   PetscErrorCode    ierr;
709d2840e09SBarry Smith   PetscInt          n,i;
710d2840e09SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx;
711f9fae5adSBarry Smith 
712f9fae5adSBarry Smith   PetscFunctionBegin;
713d2840e09SBarry Smith   ierr = VecSet(yy,0.0);CHKERRQ(ierr);
7143649974fSBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
7151ebc52fbSHong Zhang   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
716bfec09a0SHong Zhang 
717f9fae5adSBarry Smith   for (i=0; i<m; i++) {
718bfec09a0SHong Zhang     idx    = a->j + a->i[i];
719bfec09a0SHong Zhang     v      = a->a + a->i[i];
720f9fae5adSBarry Smith     n      = a->i[i+1] - a->i[i];
721f9fae5adSBarry Smith     alpha1 = x[5*i];
722f9fae5adSBarry Smith     alpha2 = x[5*i+1];
723f9fae5adSBarry Smith     alpha3 = x[5*i+2];
724f9fae5adSBarry Smith     alpha4 = x[5*i+3];
725f9fae5adSBarry Smith     alpha5 = x[5*i+4];
726f9fae5adSBarry Smith     while (n-->0) {
727f9fae5adSBarry Smith       y[5*(*idx)]   += alpha1*(*v);
728f9fae5adSBarry Smith       y[5*(*idx)+1] += alpha2*(*v);
729f9fae5adSBarry Smith       y[5*(*idx)+2] += alpha3*(*v);
730f9fae5adSBarry Smith       y[5*(*idx)+3] += alpha4*(*v);
731f9fae5adSBarry Smith       y[5*(*idx)+4] += alpha5*(*v);
732f9fae5adSBarry Smith       idx++; v++;
733f9fae5adSBarry Smith     }
734f9fae5adSBarry Smith   }
735dc0b31edSSatish Balay   ierr = PetscLogFlops(10.0*a->nz);CHKERRQ(ierr);
7363649974fSBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
7371ebc52fbSHong Zhang   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
738f9fae5adSBarry Smith   PetscFunctionReturn(0);
739f9fae5adSBarry Smith }
740f9fae5adSBarry Smith 
741dfbe8321SBarry Smith PetscErrorCode MatMultAdd_SeqMAIJ_5(Mat A,Vec xx,Vec yy,Vec zz)
742f9fae5adSBarry Smith {
7434cb9d9b8SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
744f9fae5adSBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
745d2840e09SBarry Smith   const PetscScalar *x,*v;
746d2840e09SBarry Smith   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5;
747dfbe8321SBarry Smith   PetscErrorCode    ierr;
748b24ad042SBarry Smith   PetscInt          n,i,jrow,j;
749d2840e09SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
750f9fae5adSBarry Smith 
751f9fae5adSBarry Smith   PetscFunctionBegin;
752f9fae5adSBarry Smith   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
7533649974fSBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
7541ebc52fbSHong Zhang   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
755f9fae5adSBarry Smith   idx  = a->j;
756f9fae5adSBarry Smith   v    = a->a;
757f9fae5adSBarry Smith   ii   = a->i;
758f9fae5adSBarry Smith 
759f9fae5adSBarry Smith   for (i=0; i<m; i++) {
760f9fae5adSBarry Smith     jrow = ii[i];
761f9fae5adSBarry Smith     n    = ii[i+1] - jrow;
762f9fae5adSBarry Smith     sum1 = 0.0;
763f9fae5adSBarry Smith     sum2 = 0.0;
764f9fae5adSBarry Smith     sum3 = 0.0;
765f9fae5adSBarry Smith     sum4 = 0.0;
766f9fae5adSBarry Smith     sum5 = 0.0;
767f9fae5adSBarry Smith     for (j=0; j<n; j++) {
768f9fae5adSBarry Smith       sum1 += v[jrow]*x[5*idx[jrow]];
769f9fae5adSBarry Smith       sum2 += v[jrow]*x[5*idx[jrow]+1];
770f9fae5adSBarry Smith       sum3 += v[jrow]*x[5*idx[jrow]+2];
771f9fae5adSBarry Smith       sum4 += v[jrow]*x[5*idx[jrow]+3];
772f9fae5adSBarry Smith       sum5 += v[jrow]*x[5*idx[jrow]+4];
773f9fae5adSBarry Smith       jrow++;
774f9fae5adSBarry Smith     }
775f9fae5adSBarry Smith     y[5*i]   += sum1;
776f9fae5adSBarry Smith     y[5*i+1] += sum2;
777f9fae5adSBarry Smith     y[5*i+2] += sum3;
778f9fae5adSBarry Smith     y[5*i+3] += sum4;
779f9fae5adSBarry Smith     y[5*i+4] += sum5;
780f9fae5adSBarry Smith   }
781f9fae5adSBarry Smith 
782dc0b31edSSatish Balay   ierr = PetscLogFlops(10.0*a->nz);CHKERRQ(ierr);
7833649974fSBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
7841ebc52fbSHong Zhang   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
785f9fae5adSBarry Smith   PetscFunctionReturn(0);
786f9fae5adSBarry Smith }
787f9fae5adSBarry Smith 
788dfbe8321SBarry Smith PetscErrorCode MatMultTransposeAdd_SeqMAIJ_5(Mat A,Vec xx,Vec yy,Vec zz)
789f9fae5adSBarry Smith {
7904cb9d9b8SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
791f9fae5adSBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
792d2840e09SBarry Smith   const PetscScalar *x,*v;
793d2840e09SBarry Smith   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5;
794dfbe8321SBarry Smith   PetscErrorCode    ierr;
795d2840e09SBarry Smith   PetscInt          n,i;
796d2840e09SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx;
797f9fae5adSBarry Smith 
798f9fae5adSBarry Smith   PetscFunctionBegin;
799f9fae5adSBarry Smith   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
8003649974fSBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
8011ebc52fbSHong Zhang   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
802bfec09a0SHong Zhang 
803f9fae5adSBarry Smith   for (i=0; i<m; i++) {
804bfec09a0SHong Zhang     idx    = a->j + a->i[i];
805bfec09a0SHong Zhang     v      = a->a + a->i[i];
806f9fae5adSBarry Smith     n      = a->i[i+1] - a->i[i];
807f9fae5adSBarry Smith     alpha1 = x[5*i];
808f9fae5adSBarry Smith     alpha2 = x[5*i+1];
809f9fae5adSBarry Smith     alpha3 = x[5*i+2];
810f9fae5adSBarry Smith     alpha4 = x[5*i+3];
811f9fae5adSBarry Smith     alpha5 = x[5*i+4];
812f9fae5adSBarry Smith     while (n-->0) {
813f9fae5adSBarry Smith       y[5*(*idx)]   += alpha1*(*v);
814f9fae5adSBarry Smith       y[5*(*idx)+1] += alpha2*(*v);
815f9fae5adSBarry Smith       y[5*(*idx)+2] += alpha3*(*v);
816f9fae5adSBarry Smith       y[5*(*idx)+3] += alpha4*(*v);
817f9fae5adSBarry Smith       y[5*(*idx)+4] += alpha5*(*v);
818f9fae5adSBarry Smith       idx++; v++;
819f9fae5adSBarry Smith     }
820f9fae5adSBarry Smith   }
821dc0b31edSSatish Balay   ierr = PetscLogFlops(10.0*a->nz);CHKERRQ(ierr);
8223649974fSBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
8231ebc52fbSHong Zhang   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
824f9fae5adSBarry Smith   PetscFunctionReturn(0);
825bcc973b7SBarry Smith }
826bcc973b7SBarry Smith 
8276cd98798SBarry Smith /* ------------------------------------------------------------------------------*/
828dfbe8321SBarry Smith PetscErrorCode MatMult_SeqMAIJ_6(Mat A,Vec xx,Vec yy)
8296cd98798SBarry Smith {
8306cd98798SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
8316cd98798SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
832d2840e09SBarry Smith   const PetscScalar *x,*v;
833d2840e09SBarry Smith   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6;
834dfbe8321SBarry Smith   PetscErrorCode    ierr;
835d2840e09SBarry Smith   PetscInt          nonzerorow=0,n,i,jrow,j;
836d2840e09SBarry Smith   const PetscInt    m         = b->AIJ->rmap->n,*idx,*ii;
8376cd98798SBarry Smith 
8386cd98798SBarry Smith   PetscFunctionBegin;
8393649974fSBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
8401ebc52fbSHong Zhang   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
8416cd98798SBarry Smith   idx  = a->j;
8426cd98798SBarry Smith   v    = a->a;
8436cd98798SBarry Smith   ii   = a->i;
8446cd98798SBarry Smith 
8456cd98798SBarry Smith   for (i=0; i<m; i++) {
8466cd98798SBarry Smith     jrow  = ii[i];
8476cd98798SBarry Smith     n     = ii[i+1] - jrow;
8486cd98798SBarry Smith     sum1  = 0.0;
8496cd98798SBarry Smith     sum2  = 0.0;
8506cd98798SBarry Smith     sum3  = 0.0;
8516cd98798SBarry Smith     sum4  = 0.0;
8526cd98798SBarry Smith     sum5  = 0.0;
8536cd98798SBarry Smith     sum6  = 0.0;
85426fbe8dcSKarl Rupp 
855b3c51c66SMatthew Knepley     nonzerorow += (n>0);
8566cd98798SBarry Smith     for (j=0; j<n; j++) {
8576cd98798SBarry Smith       sum1 += v[jrow]*x[6*idx[jrow]];
8586cd98798SBarry Smith       sum2 += v[jrow]*x[6*idx[jrow]+1];
8596cd98798SBarry Smith       sum3 += v[jrow]*x[6*idx[jrow]+2];
8606cd98798SBarry Smith       sum4 += v[jrow]*x[6*idx[jrow]+3];
8616cd98798SBarry Smith       sum5 += v[jrow]*x[6*idx[jrow]+4];
8626cd98798SBarry Smith       sum6 += v[jrow]*x[6*idx[jrow]+5];
8636cd98798SBarry Smith       jrow++;
8646cd98798SBarry Smith     }
8656cd98798SBarry Smith     y[6*i]   = sum1;
8666cd98798SBarry Smith     y[6*i+1] = sum2;
8676cd98798SBarry Smith     y[6*i+2] = sum3;
8686cd98798SBarry Smith     y[6*i+3] = sum4;
8696cd98798SBarry Smith     y[6*i+4] = sum5;
8706cd98798SBarry Smith     y[6*i+5] = sum6;
8716cd98798SBarry Smith   }
8726cd98798SBarry Smith 
873dc0b31edSSatish Balay   ierr = PetscLogFlops(12.0*a->nz - 6.0*nonzerorow);CHKERRQ(ierr);
8743649974fSBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
8751ebc52fbSHong Zhang   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
8766cd98798SBarry Smith   PetscFunctionReturn(0);
8776cd98798SBarry Smith }
8786cd98798SBarry Smith 
879dfbe8321SBarry Smith PetscErrorCode MatMultTranspose_SeqMAIJ_6(Mat A,Vec xx,Vec yy)
8806cd98798SBarry Smith {
8816cd98798SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
8826cd98798SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
883d2840e09SBarry Smith   const PetscScalar *x,*v;
884d2840e09SBarry Smith   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6;
885dfbe8321SBarry Smith   PetscErrorCode    ierr;
886d2840e09SBarry Smith   PetscInt          n,i;
887d2840e09SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx;
8886cd98798SBarry Smith 
8896cd98798SBarry Smith   PetscFunctionBegin;
890d2840e09SBarry Smith   ierr = VecSet(yy,0.0);CHKERRQ(ierr);
8913649974fSBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
8921ebc52fbSHong Zhang   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
893bfec09a0SHong Zhang 
8946cd98798SBarry Smith   for (i=0; i<m; i++) {
895bfec09a0SHong Zhang     idx    = a->j + a->i[i];
896bfec09a0SHong Zhang     v      = a->a + a->i[i];
8976cd98798SBarry Smith     n      = a->i[i+1] - a->i[i];
8986cd98798SBarry Smith     alpha1 = x[6*i];
8996cd98798SBarry Smith     alpha2 = x[6*i+1];
9006cd98798SBarry Smith     alpha3 = x[6*i+2];
9016cd98798SBarry Smith     alpha4 = x[6*i+3];
9026cd98798SBarry Smith     alpha5 = x[6*i+4];
9036cd98798SBarry Smith     alpha6 = x[6*i+5];
9046cd98798SBarry Smith     while (n-->0) {
9056cd98798SBarry Smith       y[6*(*idx)]   += alpha1*(*v);
9066cd98798SBarry Smith       y[6*(*idx)+1] += alpha2*(*v);
9076cd98798SBarry Smith       y[6*(*idx)+2] += alpha3*(*v);
9086cd98798SBarry Smith       y[6*(*idx)+3] += alpha4*(*v);
9096cd98798SBarry Smith       y[6*(*idx)+4] += alpha5*(*v);
9106cd98798SBarry Smith       y[6*(*idx)+5] += alpha6*(*v);
9116cd98798SBarry Smith       idx++; v++;
9126cd98798SBarry Smith     }
9136cd98798SBarry Smith   }
914dc0b31edSSatish Balay   ierr = PetscLogFlops(12.0*a->nz);CHKERRQ(ierr);
9153649974fSBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
9161ebc52fbSHong Zhang   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
9176cd98798SBarry Smith   PetscFunctionReturn(0);
9186cd98798SBarry Smith }
9196cd98798SBarry Smith 
920dfbe8321SBarry Smith PetscErrorCode MatMultAdd_SeqMAIJ_6(Mat A,Vec xx,Vec yy,Vec zz)
9216cd98798SBarry Smith {
9226cd98798SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
9236cd98798SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
924d2840e09SBarry Smith   const PetscScalar *x,*v;
925d2840e09SBarry Smith   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6;
926dfbe8321SBarry Smith   PetscErrorCode    ierr;
927b24ad042SBarry Smith   PetscInt          n,i,jrow,j;
928d2840e09SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
9296cd98798SBarry Smith 
9306cd98798SBarry Smith   PetscFunctionBegin;
9316cd98798SBarry Smith   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
9323649974fSBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
9331ebc52fbSHong Zhang   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
9346cd98798SBarry Smith   idx  = a->j;
9356cd98798SBarry Smith   v    = a->a;
9366cd98798SBarry Smith   ii   = a->i;
9376cd98798SBarry Smith 
9386cd98798SBarry Smith   for (i=0; i<m; i++) {
9396cd98798SBarry Smith     jrow = ii[i];
9406cd98798SBarry Smith     n    = ii[i+1] - jrow;
9416cd98798SBarry Smith     sum1 = 0.0;
9426cd98798SBarry Smith     sum2 = 0.0;
9436cd98798SBarry Smith     sum3 = 0.0;
9446cd98798SBarry Smith     sum4 = 0.0;
9456cd98798SBarry Smith     sum5 = 0.0;
9466cd98798SBarry Smith     sum6 = 0.0;
9476cd98798SBarry Smith     for (j=0; j<n; j++) {
9486cd98798SBarry Smith       sum1 += v[jrow]*x[6*idx[jrow]];
9496cd98798SBarry Smith       sum2 += v[jrow]*x[6*idx[jrow]+1];
9506cd98798SBarry Smith       sum3 += v[jrow]*x[6*idx[jrow]+2];
9516cd98798SBarry Smith       sum4 += v[jrow]*x[6*idx[jrow]+3];
9526cd98798SBarry Smith       sum5 += v[jrow]*x[6*idx[jrow]+4];
9536cd98798SBarry Smith       sum6 += v[jrow]*x[6*idx[jrow]+5];
9546cd98798SBarry Smith       jrow++;
9556cd98798SBarry Smith     }
9566cd98798SBarry Smith     y[6*i]   += sum1;
9576cd98798SBarry Smith     y[6*i+1] += sum2;
9586cd98798SBarry Smith     y[6*i+2] += sum3;
9596cd98798SBarry Smith     y[6*i+3] += sum4;
9606cd98798SBarry Smith     y[6*i+4] += sum5;
9616cd98798SBarry Smith     y[6*i+5] += sum6;
9626cd98798SBarry Smith   }
9636cd98798SBarry Smith 
964dc0b31edSSatish Balay   ierr = PetscLogFlops(12.0*a->nz);CHKERRQ(ierr);
9653649974fSBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
9661ebc52fbSHong Zhang   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
9676cd98798SBarry Smith   PetscFunctionReturn(0);
9686cd98798SBarry Smith }
9696cd98798SBarry Smith 
970dfbe8321SBarry Smith PetscErrorCode MatMultTransposeAdd_SeqMAIJ_6(Mat A,Vec xx,Vec yy,Vec zz)
9716cd98798SBarry Smith {
9726cd98798SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
9736cd98798SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
974d2840e09SBarry Smith   const PetscScalar *x,*v;
975d2840e09SBarry Smith   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6;
976dfbe8321SBarry Smith   PetscErrorCode    ierr;
977d2840e09SBarry Smith   PetscInt          n,i;
978d2840e09SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx;
9796cd98798SBarry Smith 
9806cd98798SBarry Smith   PetscFunctionBegin;
9816cd98798SBarry Smith   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
9823649974fSBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
9831ebc52fbSHong Zhang   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
984bfec09a0SHong Zhang 
9856cd98798SBarry Smith   for (i=0; i<m; i++) {
986bfec09a0SHong Zhang     idx    = a->j + a->i[i];
987bfec09a0SHong Zhang     v      = a->a + a->i[i];
9886cd98798SBarry Smith     n      = a->i[i+1] - a->i[i];
9896cd98798SBarry Smith     alpha1 = x[6*i];
9906cd98798SBarry Smith     alpha2 = x[6*i+1];
9916cd98798SBarry Smith     alpha3 = x[6*i+2];
9926cd98798SBarry Smith     alpha4 = x[6*i+3];
9936cd98798SBarry Smith     alpha5 = x[6*i+4];
9946cd98798SBarry Smith     alpha6 = x[6*i+5];
9956cd98798SBarry Smith     while (n-->0) {
9966cd98798SBarry Smith       y[6*(*idx)]   += alpha1*(*v);
9976cd98798SBarry Smith       y[6*(*idx)+1] += alpha2*(*v);
9986cd98798SBarry Smith       y[6*(*idx)+2] += alpha3*(*v);
9996cd98798SBarry Smith       y[6*(*idx)+3] += alpha4*(*v);
10006cd98798SBarry Smith       y[6*(*idx)+4] += alpha5*(*v);
10016cd98798SBarry Smith       y[6*(*idx)+5] += alpha6*(*v);
10026cd98798SBarry Smith       idx++; v++;
10036cd98798SBarry Smith     }
10046cd98798SBarry Smith   }
1005dc0b31edSSatish Balay   ierr = PetscLogFlops(12.0*a->nz);CHKERRQ(ierr);
10063649974fSBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
10071ebc52fbSHong Zhang   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
10086cd98798SBarry Smith   PetscFunctionReturn(0);
10096cd98798SBarry Smith }
10106cd98798SBarry Smith 
101166ed3db0SBarry Smith /* ------------------------------------------------------------------------------*/
1012ed8eea03SBarry Smith PetscErrorCode MatMult_SeqMAIJ_7(Mat A,Vec xx,Vec yy)
1013ed8eea03SBarry Smith {
1014ed8eea03SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1015ed8eea03SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1016d2840e09SBarry Smith   const PetscScalar *x,*v;
1017d2840e09SBarry Smith   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7;
1018ed8eea03SBarry Smith   PetscErrorCode    ierr;
1019d2840e09SBarry Smith   const PetscInt    m         = b->AIJ->rmap->n,*idx,*ii;
1020d2840e09SBarry Smith   PetscInt          nonzerorow=0,n,i,jrow,j;
1021ed8eea03SBarry Smith 
1022ed8eea03SBarry Smith   PetscFunctionBegin;
10233649974fSBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
1024ed8eea03SBarry Smith   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
1025ed8eea03SBarry Smith   idx  = a->j;
1026ed8eea03SBarry Smith   v    = a->a;
1027ed8eea03SBarry Smith   ii   = a->i;
1028ed8eea03SBarry Smith 
1029ed8eea03SBarry Smith   for (i=0; i<m; i++) {
1030ed8eea03SBarry Smith     jrow  = ii[i];
1031ed8eea03SBarry Smith     n     = ii[i+1] - jrow;
1032ed8eea03SBarry Smith     sum1  = 0.0;
1033ed8eea03SBarry Smith     sum2  = 0.0;
1034ed8eea03SBarry Smith     sum3  = 0.0;
1035ed8eea03SBarry Smith     sum4  = 0.0;
1036ed8eea03SBarry Smith     sum5  = 0.0;
1037ed8eea03SBarry Smith     sum6  = 0.0;
1038ed8eea03SBarry Smith     sum7  = 0.0;
103926fbe8dcSKarl Rupp 
1040b3c51c66SMatthew Knepley     nonzerorow += (n>0);
1041ed8eea03SBarry Smith     for (j=0; j<n; j++) {
1042ed8eea03SBarry Smith       sum1 += v[jrow]*x[7*idx[jrow]];
1043ed8eea03SBarry Smith       sum2 += v[jrow]*x[7*idx[jrow]+1];
1044ed8eea03SBarry Smith       sum3 += v[jrow]*x[7*idx[jrow]+2];
1045ed8eea03SBarry Smith       sum4 += v[jrow]*x[7*idx[jrow]+3];
1046ed8eea03SBarry Smith       sum5 += v[jrow]*x[7*idx[jrow]+4];
1047ed8eea03SBarry Smith       sum6 += v[jrow]*x[7*idx[jrow]+5];
1048ed8eea03SBarry Smith       sum7 += v[jrow]*x[7*idx[jrow]+6];
1049ed8eea03SBarry Smith       jrow++;
1050ed8eea03SBarry Smith     }
1051ed8eea03SBarry Smith     y[7*i]   = sum1;
1052ed8eea03SBarry Smith     y[7*i+1] = sum2;
1053ed8eea03SBarry Smith     y[7*i+2] = sum3;
1054ed8eea03SBarry Smith     y[7*i+3] = sum4;
1055ed8eea03SBarry Smith     y[7*i+4] = sum5;
1056ed8eea03SBarry Smith     y[7*i+5] = sum6;
1057ed8eea03SBarry Smith     y[7*i+6] = sum7;
1058ed8eea03SBarry Smith   }
1059ed8eea03SBarry Smith 
1060dc0b31edSSatish Balay   ierr = PetscLogFlops(14.0*a->nz - 7.0*nonzerorow);CHKERRQ(ierr);
10613649974fSBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
1062ed8eea03SBarry Smith   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
1063ed8eea03SBarry Smith   PetscFunctionReturn(0);
1064ed8eea03SBarry Smith }
1065ed8eea03SBarry Smith 
1066ed8eea03SBarry Smith PetscErrorCode MatMultTranspose_SeqMAIJ_7(Mat A,Vec xx,Vec yy)
1067ed8eea03SBarry Smith {
1068ed8eea03SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1069ed8eea03SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1070d2840e09SBarry Smith   const PetscScalar *x,*v;
1071d2840e09SBarry Smith   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7;
1072ed8eea03SBarry Smith   PetscErrorCode    ierr;
1073d2840e09SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx;
1074d2840e09SBarry Smith   PetscInt          n,i;
1075ed8eea03SBarry Smith 
1076ed8eea03SBarry Smith   PetscFunctionBegin;
1077d2840e09SBarry Smith   ierr = VecSet(yy,0.0);CHKERRQ(ierr);
10783649974fSBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
1079ed8eea03SBarry Smith   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
1080ed8eea03SBarry Smith 
1081ed8eea03SBarry Smith   for (i=0; i<m; i++) {
1082ed8eea03SBarry Smith     idx    = a->j + a->i[i];
1083ed8eea03SBarry Smith     v      = a->a + a->i[i];
1084ed8eea03SBarry Smith     n      = a->i[i+1] - a->i[i];
1085ed8eea03SBarry Smith     alpha1 = x[7*i];
1086ed8eea03SBarry Smith     alpha2 = x[7*i+1];
1087ed8eea03SBarry Smith     alpha3 = x[7*i+2];
1088ed8eea03SBarry Smith     alpha4 = x[7*i+3];
1089ed8eea03SBarry Smith     alpha5 = x[7*i+4];
1090ed8eea03SBarry Smith     alpha6 = x[7*i+5];
1091ed8eea03SBarry Smith     alpha7 = x[7*i+6];
1092ed8eea03SBarry Smith     while (n-->0) {
1093ed8eea03SBarry Smith       y[7*(*idx)]   += alpha1*(*v);
1094ed8eea03SBarry Smith       y[7*(*idx)+1] += alpha2*(*v);
1095ed8eea03SBarry Smith       y[7*(*idx)+2] += alpha3*(*v);
1096ed8eea03SBarry Smith       y[7*(*idx)+3] += alpha4*(*v);
1097ed8eea03SBarry Smith       y[7*(*idx)+4] += alpha5*(*v);
1098ed8eea03SBarry Smith       y[7*(*idx)+5] += alpha6*(*v);
1099ed8eea03SBarry Smith       y[7*(*idx)+6] += alpha7*(*v);
1100ed8eea03SBarry Smith       idx++; v++;
1101ed8eea03SBarry Smith     }
1102ed8eea03SBarry Smith   }
1103dc0b31edSSatish Balay   ierr = PetscLogFlops(14.0*a->nz);CHKERRQ(ierr);
11043649974fSBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
1105ed8eea03SBarry Smith   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
1106ed8eea03SBarry Smith   PetscFunctionReturn(0);
1107ed8eea03SBarry Smith }
1108ed8eea03SBarry Smith 
1109ed8eea03SBarry Smith PetscErrorCode MatMultAdd_SeqMAIJ_7(Mat A,Vec xx,Vec yy,Vec zz)
1110ed8eea03SBarry Smith {
1111ed8eea03SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1112ed8eea03SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1113d2840e09SBarry Smith   const PetscScalar *x,*v;
1114d2840e09SBarry Smith   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7;
1115ed8eea03SBarry Smith   PetscErrorCode    ierr;
1116d2840e09SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
1117ed8eea03SBarry Smith   PetscInt          n,i,jrow,j;
1118ed8eea03SBarry Smith 
1119ed8eea03SBarry Smith   PetscFunctionBegin;
1120ed8eea03SBarry Smith   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
11213649974fSBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
1122ed8eea03SBarry Smith   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
1123ed8eea03SBarry Smith   idx  = a->j;
1124ed8eea03SBarry Smith   v    = a->a;
1125ed8eea03SBarry Smith   ii   = a->i;
1126ed8eea03SBarry Smith 
1127ed8eea03SBarry Smith   for (i=0; i<m; i++) {
1128ed8eea03SBarry Smith     jrow = ii[i];
1129ed8eea03SBarry Smith     n    = ii[i+1] - jrow;
1130ed8eea03SBarry Smith     sum1 = 0.0;
1131ed8eea03SBarry Smith     sum2 = 0.0;
1132ed8eea03SBarry Smith     sum3 = 0.0;
1133ed8eea03SBarry Smith     sum4 = 0.0;
1134ed8eea03SBarry Smith     sum5 = 0.0;
1135ed8eea03SBarry Smith     sum6 = 0.0;
1136ed8eea03SBarry Smith     sum7 = 0.0;
1137ed8eea03SBarry Smith     for (j=0; j<n; j++) {
1138ed8eea03SBarry Smith       sum1 += v[jrow]*x[7*idx[jrow]];
1139ed8eea03SBarry Smith       sum2 += v[jrow]*x[7*idx[jrow]+1];
1140ed8eea03SBarry Smith       sum3 += v[jrow]*x[7*idx[jrow]+2];
1141ed8eea03SBarry Smith       sum4 += v[jrow]*x[7*idx[jrow]+3];
1142ed8eea03SBarry Smith       sum5 += v[jrow]*x[7*idx[jrow]+4];
1143ed8eea03SBarry Smith       sum6 += v[jrow]*x[7*idx[jrow]+5];
1144ed8eea03SBarry Smith       sum7 += v[jrow]*x[7*idx[jrow]+6];
1145ed8eea03SBarry Smith       jrow++;
1146ed8eea03SBarry Smith     }
1147ed8eea03SBarry Smith     y[7*i]   += sum1;
1148ed8eea03SBarry Smith     y[7*i+1] += sum2;
1149ed8eea03SBarry Smith     y[7*i+2] += sum3;
1150ed8eea03SBarry Smith     y[7*i+3] += sum4;
1151ed8eea03SBarry Smith     y[7*i+4] += sum5;
1152ed8eea03SBarry Smith     y[7*i+5] += sum6;
1153ed8eea03SBarry Smith     y[7*i+6] += sum7;
1154ed8eea03SBarry Smith   }
1155ed8eea03SBarry Smith 
1156dc0b31edSSatish Balay   ierr = PetscLogFlops(14.0*a->nz);CHKERRQ(ierr);
11573649974fSBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
1158ed8eea03SBarry Smith   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
1159ed8eea03SBarry Smith   PetscFunctionReturn(0);
1160ed8eea03SBarry Smith }
1161ed8eea03SBarry Smith 
1162ed8eea03SBarry Smith PetscErrorCode MatMultTransposeAdd_SeqMAIJ_7(Mat A,Vec xx,Vec yy,Vec zz)
1163ed8eea03SBarry Smith {
1164ed8eea03SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1165ed8eea03SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1166d2840e09SBarry Smith   const PetscScalar *x,*v;
1167d2840e09SBarry Smith   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7;
1168ed8eea03SBarry Smith   PetscErrorCode    ierr;
1169d2840e09SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx;
1170d2840e09SBarry Smith   PetscInt          n,i;
1171ed8eea03SBarry Smith 
1172ed8eea03SBarry Smith   PetscFunctionBegin;
1173ed8eea03SBarry Smith   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
11743649974fSBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
1175ed8eea03SBarry Smith   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
1176ed8eea03SBarry Smith   for (i=0; i<m; i++) {
1177ed8eea03SBarry Smith     idx    = a->j + a->i[i];
1178ed8eea03SBarry Smith     v      = a->a + a->i[i];
1179ed8eea03SBarry Smith     n      = a->i[i+1] - a->i[i];
1180ed8eea03SBarry Smith     alpha1 = x[7*i];
1181ed8eea03SBarry Smith     alpha2 = x[7*i+1];
1182ed8eea03SBarry Smith     alpha3 = x[7*i+2];
1183ed8eea03SBarry Smith     alpha4 = x[7*i+3];
1184ed8eea03SBarry Smith     alpha5 = x[7*i+4];
1185ed8eea03SBarry Smith     alpha6 = x[7*i+5];
1186ed8eea03SBarry Smith     alpha7 = x[7*i+6];
1187ed8eea03SBarry Smith     while (n-->0) {
1188ed8eea03SBarry Smith       y[7*(*idx)]   += alpha1*(*v);
1189ed8eea03SBarry Smith       y[7*(*idx)+1] += alpha2*(*v);
1190ed8eea03SBarry Smith       y[7*(*idx)+2] += alpha3*(*v);
1191ed8eea03SBarry Smith       y[7*(*idx)+3] += alpha4*(*v);
1192ed8eea03SBarry Smith       y[7*(*idx)+4] += alpha5*(*v);
1193ed8eea03SBarry Smith       y[7*(*idx)+5] += alpha6*(*v);
1194ed8eea03SBarry Smith       y[7*(*idx)+6] += alpha7*(*v);
1195ed8eea03SBarry Smith       idx++; v++;
1196ed8eea03SBarry Smith     }
1197ed8eea03SBarry Smith   }
1198dc0b31edSSatish Balay   ierr = PetscLogFlops(14.0*a->nz);CHKERRQ(ierr);
11993649974fSBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
1200ed8eea03SBarry Smith   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
1201ed8eea03SBarry Smith   PetscFunctionReturn(0);
1202ed8eea03SBarry Smith }
1203ed8eea03SBarry Smith 
1204dfbe8321SBarry Smith PetscErrorCode MatMult_SeqMAIJ_8(Mat A,Vec xx,Vec yy)
120566ed3db0SBarry Smith {
120666ed3db0SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
120766ed3db0SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1208d2840e09SBarry Smith   const PetscScalar *x,*v;
1209d2840e09SBarry Smith   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8;
1210dfbe8321SBarry Smith   PetscErrorCode    ierr;
1211d2840e09SBarry Smith   const PetscInt    m         = b->AIJ->rmap->n,*idx,*ii;
1212d2840e09SBarry Smith   PetscInt          nonzerorow=0,n,i,jrow,j;
121366ed3db0SBarry Smith 
121466ed3db0SBarry Smith   PetscFunctionBegin;
12153649974fSBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
12161ebc52fbSHong Zhang   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
121766ed3db0SBarry Smith   idx  = a->j;
121866ed3db0SBarry Smith   v    = a->a;
121966ed3db0SBarry Smith   ii   = a->i;
122066ed3db0SBarry Smith 
122166ed3db0SBarry Smith   for (i=0; i<m; i++) {
122266ed3db0SBarry Smith     jrow  = ii[i];
122366ed3db0SBarry Smith     n     = ii[i+1] - jrow;
122466ed3db0SBarry Smith     sum1  = 0.0;
122566ed3db0SBarry Smith     sum2  = 0.0;
122666ed3db0SBarry Smith     sum3  = 0.0;
122766ed3db0SBarry Smith     sum4  = 0.0;
122866ed3db0SBarry Smith     sum5  = 0.0;
122966ed3db0SBarry Smith     sum6  = 0.0;
123066ed3db0SBarry Smith     sum7  = 0.0;
123166ed3db0SBarry Smith     sum8  = 0.0;
123226fbe8dcSKarl Rupp 
1233b3c51c66SMatthew Knepley     nonzerorow += (n>0);
123466ed3db0SBarry Smith     for (j=0; j<n; j++) {
123566ed3db0SBarry Smith       sum1 += v[jrow]*x[8*idx[jrow]];
123666ed3db0SBarry Smith       sum2 += v[jrow]*x[8*idx[jrow]+1];
123766ed3db0SBarry Smith       sum3 += v[jrow]*x[8*idx[jrow]+2];
123866ed3db0SBarry Smith       sum4 += v[jrow]*x[8*idx[jrow]+3];
123966ed3db0SBarry Smith       sum5 += v[jrow]*x[8*idx[jrow]+4];
124066ed3db0SBarry Smith       sum6 += v[jrow]*x[8*idx[jrow]+5];
124166ed3db0SBarry Smith       sum7 += v[jrow]*x[8*idx[jrow]+6];
124266ed3db0SBarry Smith       sum8 += v[jrow]*x[8*idx[jrow]+7];
124366ed3db0SBarry Smith       jrow++;
124466ed3db0SBarry Smith     }
124566ed3db0SBarry Smith     y[8*i]   = sum1;
124666ed3db0SBarry Smith     y[8*i+1] = sum2;
124766ed3db0SBarry Smith     y[8*i+2] = sum3;
124866ed3db0SBarry Smith     y[8*i+3] = sum4;
124966ed3db0SBarry Smith     y[8*i+4] = sum5;
125066ed3db0SBarry Smith     y[8*i+5] = sum6;
125166ed3db0SBarry Smith     y[8*i+6] = sum7;
125266ed3db0SBarry Smith     y[8*i+7] = sum8;
125366ed3db0SBarry Smith   }
125466ed3db0SBarry Smith 
1255dc0b31edSSatish Balay   ierr = PetscLogFlops(16.0*a->nz - 8.0*nonzerorow);CHKERRQ(ierr);
12563649974fSBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
12571ebc52fbSHong Zhang   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
125866ed3db0SBarry Smith   PetscFunctionReturn(0);
125966ed3db0SBarry Smith }
126066ed3db0SBarry Smith 
1261dfbe8321SBarry Smith PetscErrorCode MatMultTranspose_SeqMAIJ_8(Mat A,Vec xx,Vec yy)
126266ed3db0SBarry Smith {
126366ed3db0SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
126466ed3db0SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1265d2840e09SBarry Smith   const PetscScalar *x,*v;
1266d2840e09SBarry Smith   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8;
1267dfbe8321SBarry Smith   PetscErrorCode    ierr;
1268d2840e09SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx;
1269d2840e09SBarry Smith   PetscInt          n,i;
127066ed3db0SBarry Smith 
127166ed3db0SBarry Smith   PetscFunctionBegin;
1272d2840e09SBarry Smith   ierr = VecSet(yy,0.0);CHKERRQ(ierr);
12733649974fSBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
12741ebc52fbSHong Zhang   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
1275bfec09a0SHong Zhang 
127666ed3db0SBarry Smith   for (i=0; i<m; i++) {
1277bfec09a0SHong Zhang     idx    = a->j + a->i[i];
1278bfec09a0SHong Zhang     v      = a->a + a->i[i];
127966ed3db0SBarry Smith     n      = a->i[i+1] - a->i[i];
128066ed3db0SBarry Smith     alpha1 = x[8*i];
128166ed3db0SBarry Smith     alpha2 = x[8*i+1];
128266ed3db0SBarry Smith     alpha3 = x[8*i+2];
128366ed3db0SBarry Smith     alpha4 = x[8*i+3];
128466ed3db0SBarry Smith     alpha5 = x[8*i+4];
128566ed3db0SBarry Smith     alpha6 = x[8*i+5];
128666ed3db0SBarry Smith     alpha7 = x[8*i+6];
128766ed3db0SBarry Smith     alpha8 = x[8*i+7];
128866ed3db0SBarry Smith     while (n-->0) {
128966ed3db0SBarry Smith       y[8*(*idx)]   += alpha1*(*v);
129066ed3db0SBarry Smith       y[8*(*idx)+1] += alpha2*(*v);
129166ed3db0SBarry Smith       y[8*(*idx)+2] += alpha3*(*v);
129266ed3db0SBarry Smith       y[8*(*idx)+3] += alpha4*(*v);
129366ed3db0SBarry Smith       y[8*(*idx)+4] += alpha5*(*v);
129466ed3db0SBarry Smith       y[8*(*idx)+5] += alpha6*(*v);
129566ed3db0SBarry Smith       y[8*(*idx)+6] += alpha7*(*v);
129666ed3db0SBarry Smith       y[8*(*idx)+7] += alpha8*(*v);
129766ed3db0SBarry Smith       idx++; v++;
129866ed3db0SBarry Smith     }
129966ed3db0SBarry Smith   }
1300dc0b31edSSatish Balay   ierr = PetscLogFlops(16.0*a->nz);CHKERRQ(ierr);
13013649974fSBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
13021ebc52fbSHong Zhang   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
130366ed3db0SBarry Smith   PetscFunctionReturn(0);
130466ed3db0SBarry Smith }
130566ed3db0SBarry Smith 
1306dfbe8321SBarry Smith PetscErrorCode MatMultAdd_SeqMAIJ_8(Mat A,Vec xx,Vec yy,Vec zz)
130766ed3db0SBarry Smith {
130866ed3db0SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
130966ed3db0SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1310d2840e09SBarry Smith   const PetscScalar *x,*v;
1311d2840e09SBarry Smith   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8;
1312dfbe8321SBarry Smith   PetscErrorCode    ierr;
1313d2840e09SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
1314b24ad042SBarry Smith   PetscInt          n,i,jrow,j;
131566ed3db0SBarry Smith 
131666ed3db0SBarry Smith   PetscFunctionBegin;
131766ed3db0SBarry Smith   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
13183649974fSBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
13191ebc52fbSHong Zhang   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
132066ed3db0SBarry Smith   idx  = a->j;
132166ed3db0SBarry Smith   v    = a->a;
132266ed3db0SBarry Smith   ii   = a->i;
132366ed3db0SBarry Smith 
132466ed3db0SBarry Smith   for (i=0; i<m; i++) {
132566ed3db0SBarry Smith     jrow = ii[i];
132666ed3db0SBarry Smith     n    = ii[i+1] - jrow;
132766ed3db0SBarry Smith     sum1 = 0.0;
132866ed3db0SBarry Smith     sum2 = 0.0;
132966ed3db0SBarry Smith     sum3 = 0.0;
133066ed3db0SBarry Smith     sum4 = 0.0;
133166ed3db0SBarry Smith     sum5 = 0.0;
133266ed3db0SBarry Smith     sum6 = 0.0;
133366ed3db0SBarry Smith     sum7 = 0.0;
133466ed3db0SBarry Smith     sum8 = 0.0;
133566ed3db0SBarry Smith     for (j=0; j<n; j++) {
133666ed3db0SBarry Smith       sum1 += v[jrow]*x[8*idx[jrow]];
133766ed3db0SBarry Smith       sum2 += v[jrow]*x[8*idx[jrow]+1];
133866ed3db0SBarry Smith       sum3 += v[jrow]*x[8*idx[jrow]+2];
133966ed3db0SBarry Smith       sum4 += v[jrow]*x[8*idx[jrow]+3];
134066ed3db0SBarry Smith       sum5 += v[jrow]*x[8*idx[jrow]+4];
134166ed3db0SBarry Smith       sum6 += v[jrow]*x[8*idx[jrow]+5];
134266ed3db0SBarry Smith       sum7 += v[jrow]*x[8*idx[jrow]+6];
134366ed3db0SBarry Smith       sum8 += v[jrow]*x[8*idx[jrow]+7];
134466ed3db0SBarry Smith       jrow++;
134566ed3db0SBarry Smith     }
134666ed3db0SBarry Smith     y[8*i]   += sum1;
134766ed3db0SBarry Smith     y[8*i+1] += sum2;
134866ed3db0SBarry Smith     y[8*i+2] += sum3;
134966ed3db0SBarry Smith     y[8*i+3] += sum4;
135066ed3db0SBarry Smith     y[8*i+4] += sum5;
135166ed3db0SBarry Smith     y[8*i+5] += sum6;
135266ed3db0SBarry Smith     y[8*i+6] += sum7;
135366ed3db0SBarry Smith     y[8*i+7] += sum8;
135466ed3db0SBarry Smith   }
135566ed3db0SBarry Smith 
1356dc0b31edSSatish Balay   ierr = PetscLogFlops(16.0*a->nz);CHKERRQ(ierr);
13573649974fSBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
13581ebc52fbSHong Zhang   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
135966ed3db0SBarry Smith   PetscFunctionReturn(0);
136066ed3db0SBarry Smith }
136166ed3db0SBarry Smith 
1362dfbe8321SBarry Smith PetscErrorCode MatMultTransposeAdd_SeqMAIJ_8(Mat A,Vec xx,Vec yy,Vec zz)
136366ed3db0SBarry Smith {
136466ed3db0SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
136566ed3db0SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1366d2840e09SBarry Smith   const PetscScalar *x,*v;
1367d2840e09SBarry Smith   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8;
1368dfbe8321SBarry Smith   PetscErrorCode    ierr;
1369d2840e09SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx;
1370d2840e09SBarry Smith   PetscInt          n,i;
137166ed3db0SBarry Smith 
137266ed3db0SBarry Smith   PetscFunctionBegin;
137366ed3db0SBarry Smith   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
13743649974fSBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
13751ebc52fbSHong Zhang   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
137666ed3db0SBarry Smith   for (i=0; i<m; i++) {
1377bfec09a0SHong Zhang     idx    = a->j + a->i[i];
1378bfec09a0SHong Zhang     v      = a->a + a->i[i];
137966ed3db0SBarry Smith     n      = a->i[i+1] - a->i[i];
138066ed3db0SBarry Smith     alpha1 = x[8*i];
138166ed3db0SBarry Smith     alpha2 = x[8*i+1];
138266ed3db0SBarry Smith     alpha3 = x[8*i+2];
138366ed3db0SBarry Smith     alpha4 = x[8*i+3];
138466ed3db0SBarry Smith     alpha5 = x[8*i+4];
138566ed3db0SBarry Smith     alpha6 = x[8*i+5];
138666ed3db0SBarry Smith     alpha7 = x[8*i+6];
138766ed3db0SBarry Smith     alpha8 = x[8*i+7];
138866ed3db0SBarry Smith     while (n-->0) {
138966ed3db0SBarry Smith       y[8*(*idx)]   += alpha1*(*v);
139066ed3db0SBarry Smith       y[8*(*idx)+1] += alpha2*(*v);
139166ed3db0SBarry Smith       y[8*(*idx)+2] += alpha3*(*v);
139266ed3db0SBarry Smith       y[8*(*idx)+3] += alpha4*(*v);
139366ed3db0SBarry Smith       y[8*(*idx)+4] += alpha5*(*v);
139466ed3db0SBarry Smith       y[8*(*idx)+5] += alpha6*(*v);
139566ed3db0SBarry Smith       y[8*(*idx)+6] += alpha7*(*v);
139666ed3db0SBarry Smith       y[8*(*idx)+7] += alpha8*(*v);
139766ed3db0SBarry Smith       idx++; v++;
139866ed3db0SBarry Smith     }
139966ed3db0SBarry Smith   }
1400dc0b31edSSatish Balay   ierr = PetscLogFlops(16.0*a->nz);CHKERRQ(ierr);
14013649974fSBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
14021ebc52fbSHong Zhang   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
14032f7816d4SBarry Smith   PetscFunctionReturn(0);
14042f7816d4SBarry Smith }
14052f7816d4SBarry Smith 
14062b692628SMatthew Knepley /* ------------------------------------------------------------------------------*/
1407dfbe8321SBarry Smith PetscErrorCode MatMult_SeqMAIJ_9(Mat A,Vec xx,Vec yy)
14082b692628SMatthew Knepley {
14092b692628SMatthew Knepley   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
14102b692628SMatthew Knepley   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1411d2840e09SBarry Smith   const PetscScalar *x,*v;
1412d2840e09SBarry Smith   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9;
1413dfbe8321SBarry Smith   PetscErrorCode    ierr;
1414d2840e09SBarry Smith   const PetscInt    m         = b->AIJ->rmap->n,*idx,*ii;
1415d2840e09SBarry Smith   PetscInt          nonzerorow=0,n,i,jrow,j;
14162b692628SMatthew Knepley 
14172b692628SMatthew Knepley   PetscFunctionBegin;
14183649974fSBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
14191ebc52fbSHong Zhang   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
14202b692628SMatthew Knepley   idx  = a->j;
14212b692628SMatthew Knepley   v    = a->a;
14222b692628SMatthew Knepley   ii   = a->i;
14232b692628SMatthew Knepley 
14242b692628SMatthew Knepley   for (i=0; i<m; i++) {
14252b692628SMatthew Knepley     jrow  = ii[i];
14262b692628SMatthew Knepley     n     = ii[i+1] - jrow;
14272b692628SMatthew Knepley     sum1  = 0.0;
14282b692628SMatthew Knepley     sum2  = 0.0;
14292b692628SMatthew Knepley     sum3  = 0.0;
14302b692628SMatthew Knepley     sum4  = 0.0;
14312b692628SMatthew Knepley     sum5  = 0.0;
14322b692628SMatthew Knepley     sum6  = 0.0;
14332b692628SMatthew Knepley     sum7  = 0.0;
14342b692628SMatthew Knepley     sum8  = 0.0;
14352b692628SMatthew Knepley     sum9  = 0.0;
143626fbe8dcSKarl Rupp 
1437b3c51c66SMatthew Knepley     nonzerorow += (n>0);
14382b692628SMatthew Knepley     for (j=0; j<n; j++) {
14392b692628SMatthew Knepley       sum1 += v[jrow]*x[9*idx[jrow]];
14402b692628SMatthew Knepley       sum2 += v[jrow]*x[9*idx[jrow]+1];
14412b692628SMatthew Knepley       sum3 += v[jrow]*x[9*idx[jrow]+2];
14422b692628SMatthew Knepley       sum4 += v[jrow]*x[9*idx[jrow]+3];
14432b692628SMatthew Knepley       sum5 += v[jrow]*x[9*idx[jrow]+4];
14442b692628SMatthew Knepley       sum6 += v[jrow]*x[9*idx[jrow]+5];
14452b692628SMatthew Knepley       sum7 += v[jrow]*x[9*idx[jrow]+6];
14462b692628SMatthew Knepley       sum8 += v[jrow]*x[9*idx[jrow]+7];
14472b692628SMatthew Knepley       sum9 += v[jrow]*x[9*idx[jrow]+8];
14482b692628SMatthew Knepley       jrow++;
14492b692628SMatthew Knepley     }
14502b692628SMatthew Knepley     y[9*i]   = sum1;
14512b692628SMatthew Knepley     y[9*i+1] = sum2;
14522b692628SMatthew Knepley     y[9*i+2] = sum3;
14532b692628SMatthew Knepley     y[9*i+3] = sum4;
14542b692628SMatthew Knepley     y[9*i+4] = sum5;
14552b692628SMatthew Knepley     y[9*i+5] = sum6;
14562b692628SMatthew Knepley     y[9*i+6] = sum7;
14572b692628SMatthew Knepley     y[9*i+7] = sum8;
14582b692628SMatthew Knepley     y[9*i+8] = sum9;
14592b692628SMatthew Knepley   }
14602b692628SMatthew Knepley 
1461dc0b31edSSatish Balay   ierr = PetscLogFlops(18.0*a->nz - 9*nonzerorow);CHKERRQ(ierr);
14623649974fSBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
14631ebc52fbSHong Zhang   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
14642b692628SMatthew Knepley   PetscFunctionReturn(0);
14652b692628SMatthew Knepley }
14662b692628SMatthew Knepley 
1467b9cda22cSBarry Smith /* ------------------------------------------------------------------------------*/
1468b9cda22cSBarry Smith 
1469dfbe8321SBarry Smith PetscErrorCode MatMultTranspose_SeqMAIJ_9(Mat A,Vec xx,Vec yy)
14702b692628SMatthew Knepley {
14712b692628SMatthew Knepley   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
14722b692628SMatthew Knepley   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1473d2840e09SBarry Smith   const PetscScalar *x,*v;
1474d2840e09SBarry Smith   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9;
1475dfbe8321SBarry Smith   PetscErrorCode    ierr;
1476d2840e09SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx;
1477d2840e09SBarry Smith   PetscInt          n,i;
14782b692628SMatthew Knepley 
14792b692628SMatthew Knepley   PetscFunctionBegin;
1480d2840e09SBarry Smith   ierr = VecSet(yy,0.0);CHKERRQ(ierr);
14813649974fSBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
14821ebc52fbSHong Zhang   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
14832b692628SMatthew Knepley 
14842b692628SMatthew Knepley   for (i=0; i<m; i++) {
14852b692628SMatthew Knepley     idx    = a->j + a->i[i];
14862b692628SMatthew Knepley     v      = a->a + a->i[i];
14872b692628SMatthew Knepley     n      = a->i[i+1] - a->i[i];
14882b692628SMatthew Knepley     alpha1 = x[9*i];
14892b692628SMatthew Knepley     alpha2 = x[9*i+1];
14902b692628SMatthew Knepley     alpha3 = x[9*i+2];
14912b692628SMatthew Knepley     alpha4 = x[9*i+3];
14922b692628SMatthew Knepley     alpha5 = x[9*i+4];
14932b692628SMatthew Knepley     alpha6 = x[9*i+5];
14942b692628SMatthew Knepley     alpha7 = x[9*i+6];
14952b692628SMatthew Knepley     alpha8 = x[9*i+7];
14962f6bd0c6SMatthew Knepley     alpha9 = x[9*i+8];
14972b692628SMatthew Knepley     while (n-->0) {
14982b692628SMatthew Knepley       y[9*(*idx)]   += alpha1*(*v);
14992b692628SMatthew Knepley       y[9*(*idx)+1] += alpha2*(*v);
15002b692628SMatthew Knepley       y[9*(*idx)+2] += alpha3*(*v);
15012b692628SMatthew Knepley       y[9*(*idx)+3] += alpha4*(*v);
15022b692628SMatthew Knepley       y[9*(*idx)+4] += alpha5*(*v);
15032b692628SMatthew Knepley       y[9*(*idx)+5] += alpha6*(*v);
15042b692628SMatthew Knepley       y[9*(*idx)+6] += alpha7*(*v);
15052b692628SMatthew Knepley       y[9*(*idx)+7] += alpha8*(*v);
15062b692628SMatthew Knepley       y[9*(*idx)+8] += alpha9*(*v);
15072b692628SMatthew Knepley       idx++; v++;
15082b692628SMatthew Knepley     }
15092b692628SMatthew Knepley   }
1510dc0b31edSSatish Balay   ierr = PetscLogFlops(18.0*a->nz);CHKERRQ(ierr);
15113649974fSBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
15121ebc52fbSHong Zhang   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
15132b692628SMatthew Knepley   PetscFunctionReturn(0);
15142b692628SMatthew Knepley }
15152b692628SMatthew Knepley 
1516dfbe8321SBarry Smith PetscErrorCode MatMultAdd_SeqMAIJ_9(Mat A,Vec xx,Vec yy,Vec zz)
15172b692628SMatthew Knepley {
15182b692628SMatthew Knepley   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
15192b692628SMatthew Knepley   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1520d2840e09SBarry Smith   const PetscScalar *x,*v;
1521d2840e09SBarry Smith   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9;
1522dfbe8321SBarry Smith   PetscErrorCode    ierr;
1523d2840e09SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
1524b24ad042SBarry Smith   PetscInt          n,i,jrow,j;
15252b692628SMatthew Knepley 
15262b692628SMatthew Knepley   PetscFunctionBegin;
15272b692628SMatthew Knepley   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
15283649974fSBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
15291ebc52fbSHong Zhang   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
15302b692628SMatthew Knepley   idx  = a->j;
15312b692628SMatthew Knepley   v    = a->a;
15322b692628SMatthew Knepley   ii   = a->i;
15332b692628SMatthew Knepley 
15342b692628SMatthew Knepley   for (i=0; i<m; i++) {
15352b692628SMatthew Knepley     jrow = ii[i];
15362b692628SMatthew Knepley     n    = ii[i+1] - jrow;
15372b692628SMatthew Knepley     sum1 = 0.0;
15382b692628SMatthew Knepley     sum2 = 0.0;
15392b692628SMatthew Knepley     sum3 = 0.0;
15402b692628SMatthew Knepley     sum4 = 0.0;
15412b692628SMatthew Knepley     sum5 = 0.0;
15422b692628SMatthew Knepley     sum6 = 0.0;
15432b692628SMatthew Knepley     sum7 = 0.0;
15442b692628SMatthew Knepley     sum8 = 0.0;
15452b692628SMatthew Knepley     sum9 = 0.0;
15462b692628SMatthew Knepley     for (j=0; j<n; j++) {
15472b692628SMatthew Knepley       sum1 += v[jrow]*x[9*idx[jrow]];
15482b692628SMatthew Knepley       sum2 += v[jrow]*x[9*idx[jrow]+1];
15492b692628SMatthew Knepley       sum3 += v[jrow]*x[9*idx[jrow]+2];
15502b692628SMatthew Knepley       sum4 += v[jrow]*x[9*idx[jrow]+3];
15512b692628SMatthew Knepley       sum5 += v[jrow]*x[9*idx[jrow]+4];
15522b692628SMatthew Knepley       sum6 += v[jrow]*x[9*idx[jrow]+5];
15532b692628SMatthew Knepley       sum7 += v[jrow]*x[9*idx[jrow]+6];
15542b692628SMatthew Knepley       sum8 += v[jrow]*x[9*idx[jrow]+7];
15552b692628SMatthew Knepley       sum9 += v[jrow]*x[9*idx[jrow]+8];
15562b692628SMatthew Knepley       jrow++;
15572b692628SMatthew Knepley     }
15582b692628SMatthew Knepley     y[9*i]   += sum1;
15592b692628SMatthew Knepley     y[9*i+1] += sum2;
15602b692628SMatthew Knepley     y[9*i+2] += sum3;
15612b692628SMatthew Knepley     y[9*i+3] += sum4;
15622b692628SMatthew Knepley     y[9*i+4] += sum5;
15632b692628SMatthew Knepley     y[9*i+5] += sum6;
15642b692628SMatthew Knepley     y[9*i+6] += sum7;
15652b692628SMatthew Knepley     y[9*i+7] += sum8;
15662b692628SMatthew Knepley     y[9*i+8] += sum9;
15672b692628SMatthew Knepley   }
15682b692628SMatthew Knepley 
1569efee365bSSatish Balay   ierr = PetscLogFlops(18*a->nz);CHKERRQ(ierr);
15703649974fSBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
15711ebc52fbSHong Zhang   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
15722b692628SMatthew Knepley   PetscFunctionReturn(0);
15732b692628SMatthew Knepley }
15742b692628SMatthew Knepley 
1575dfbe8321SBarry Smith PetscErrorCode MatMultTransposeAdd_SeqMAIJ_9(Mat A,Vec xx,Vec yy,Vec zz)
15762b692628SMatthew Knepley {
15772b692628SMatthew Knepley   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
15782b692628SMatthew Knepley   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1579d2840e09SBarry Smith   const PetscScalar *x,*v;
1580d2840e09SBarry Smith   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9;
1581dfbe8321SBarry Smith   PetscErrorCode    ierr;
1582d2840e09SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx;
1583d2840e09SBarry Smith   PetscInt          n,i;
15842b692628SMatthew Knepley 
15852b692628SMatthew Knepley   PetscFunctionBegin;
15862b692628SMatthew Knepley   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
15873649974fSBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
15881ebc52fbSHong Zhang   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
15892b692628SMatthew Knepley   for (i=0; i<m; i++) {
15902b692628SMatthew Knepley     idx    = a->j + a->i[i];
15912b692628SMatthew Knepley     v      = a->a + a->i[i];
15922b692628SMatthew Knepley     n      = a->i[i+1] - a->i[i];
15932b692628SMatthew Knepley     alpha1 = x[9*i];
15942b692628SMatthew Knepley     alpha2 = x[9*i+1];
15952b692628SMatthew Knepley     alpha3 = x[9*i+2];
15962b692628SMatthew Knepley     alpha4 = x[9*i+3];
15972b692628SMatthew Knepley     alpha5 = x[9*i+4];
15982b692628SMatthew Knepley     alpha6 = x[9*i+5];
15992b692628SMatthew Knepley     alpha7 = x[9*i+6];
16002b692628SMatthew Knepley     alpha8 = x[9*i+7];
16012b692628SMatthew Knepley     alpha9 = x[9*i+8];
16022b692628SMatthew Knepley     while (n-->0) {
16032b692628SMatthew Knepley       y[9*(*idx)]   += alpha1*(*v);
16042b692628SMatthew Knepley       y[9*(*idx)+1] += alpha2*(*v);
16052b692628SMatthew Knepley       y[9*(*idx)+2] += alpha3*(*v);
16062b692628SMatthew Knepley       y[9*(*idx)+3] += alpha4*(*v);
16072b692628SMatthew Knepley       y[9*(*idx)+4] += alpha5*(*v);
16082b692628SMatthew Knepley       y[9*(*idx)+5] += alpha6*(*v);
16092b692628SMatthew Knepley       y[9*(*idx)+6] += alpha7*(*v);
16102b692628SMatthew Knepley       y[9*(*idx)+7] += alpha8*(*v);
16112b692628SMatthew Knepley       y[9*(*idx)+8] += alpha9*(*v);
16122b692628SMatthew Knepley       idx++; v++;
16132b692628SMatthew Knepley     }
16142b692628SMatthew Knepley   }
1615dc0b31edSSatish Balay   ierr = PetscLogFlops(18.0*a->nz);CHKERRQ(ierr);
16163649974fSBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
16171ebc52fbSHong Zhang   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
16182b692628SMatthew Knepley   PetscFunctionReturn(0);
16192b692628SMatthew Knepley }
1620b9cda22cSBarry Smith PetscErrorCode MatMult_SeqMAIJ_10(Mat A,Vec xx,Vec yy)
1621b9cda22cSBarry Smith {
1622b9cda22cSBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1623b9cda22cSBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1624d2840e09SBarry Smith   const PetscScalar *x,*v;
1625d2840e09SBarry Smith   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9, sum10;
1626b9cda22cSBarry Smith   PetscErrorCode    ierr;
1627d2840e09SBarry Smith   const PetscInt    m         = b->AIJ->rmap->n,*idx,*ii;
1628d2840e09SBarry Smith   PetscInt          nonzerorow=0,n,i,jrow,j;
1629b9cda22cSBarry Smith 
1630b9cda22cSBarry Smith   PetscFunctionBegin;
16313649974fSBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
1632b9cda22cSBarry Smith   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
1633b9cda22cSBarry Smith   idx  = a->j;
1634b9cda22cSBarry Smith   v    = a->a;
1635b9cda22cSBarry Smith   ii   = a->i;
1636b9cda22cSBarry Smith 
1637b9cda22cSBarry Smith   for (i=0; i<m; i++) {
1638b9cda22cSBarry Smith     jrow  = ii[i];
1639b9cda22cSBarry Smith     n     = ii[i+1] - jrow;
1640b9cda22cSBarry Smith     sum1  = 0.0;
1641b9cda22cSBarry Smith     sum2  = 0.0;
1642b9cda22cSBarry Smith     sum3  = 0.0;
1643b9cda22cSBarry Smith     sum4  = 0.0;
1644b9cda22cSBarry Smith     sum5  = 0.0;
1645b9cda22cSBarry Smith     sum6  = 0.0;
1646b9cda22cSBarry Smith     sum7  = 0.0;
1647b9cda22cSBarry Smith     sum8  = 0.0;
1648b9cda22cSBarry Smith     sum9  = 0.0;
1649b9cda22cSBarry Smith     sum10 = 0.0;
165026fbe8dcSKarl Rupp 
1651b3c51c66SMatthew Knepley     nonzerorow += (n>0);
1652b9cda22cSBarry Smith     for (j=0; j<n; j++) {
1653b9cda22cSBarry Smith       sum1  += v[jrow]*x[10*idx[jrow]];
1654b9cda22cSBarry Smith       sum2  += v[jrow]*x[10*idx[jrow]+1];
1655b9cda22cSBarry Smith       sum3  += v[jrow]*x[10*idx[jrow]+2];
1656b9cda22cSBarry Smith       sum4  += v[jrow]*x[10*idx[jrow]+3];
1657b9cda22cSBarry Smith       sum5  += v[jrow]*x[10*idx[jrow]+4];
1658b9cda22cSBarry Smith       sum6  += v[jrow]*x[10*idx[jrow]+5];
1659b9cda22cSBarry Smith       sum7  += v[jrow]*x[10*idx[jrow]+6];
1660b9cda22cSBarry Smith       sum8  += v[jrow]*x[10*idx[jrow]+7];
1661b9cda22cSBarry Smith       sum9  += v[jrow]*x[10*idx[jrow]+8];
1662b9cda22cSBarry Smith       sum10 += v[jrow]*x[10*idx[jrow]+9];
1663b9cda22cSBarry Smith       jrow++;
1664b9cda22cSBarry Smith     }
1665b9cda22cSBarry Smith     y[10*i]   = sum1;
1666b9cda22cSBarry Smith     y[10*i+1] = sum2;
1667b9cda22cSBarry Smith     y[10*i+2] = sum3;
1668b9cda22cSBarry Smith     y[10*i+3] = sum4;
1669b9cda22cSBarry Smith     y[10*i+4] = sum5;
1670b9cda22cSBarry Smith     y[10*i+5] = sum6;
1671b9cda22cSBarry Smith     y[10*i+6] = sum7;
1672b9cda22cSBarry Smith     y[10*i+7] = sum8;
1673b9cda22cSBarry Smith     y[10*i+8] = sum9;
1674b9cda22cSBarry Smith     y[10*i+9] = sum10;
1675b9cda22cSBarry Smith   }
1676b9cda22cSBarry Smith 
1677dc0b31edSSatish Balay   ierr = PetscLogFlops(20.0*a->nz - 10.0*nonzerorow);CHKERRQ(ierr);
16783649974fSBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
1679b9cda22cSBarry Smith   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
1680b9cda22cSBarry Smith   PetscFunctionReturn(0);
1681b9cda22cSBarry Smith }
1682b9cda22cSBarry Smith 
1683b9cda22cSBarry Smith PetscErrorCode MatMultAdd_SeqMAIJ_10(Mat A,Vec xx,Vec yy,Vec zz)
1684b9cda22cSBarry Smith {
1685b9cda22cSBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1686b9cda22cSBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1687d2840e09SBarry Smith   const PetscScalar *x,*v;
1688d2840e09SBarry Smith   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9, sum10;
1689b9cda22cSBarry Smith   PetscErrorCode    ierr;
1690d2840e09SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
1691b9cda22cSBarry Smith   PetscInt          n,i,jrow,j;
1692b9cda22cSBarry Smith 
1693b9cda22cSBarry Smith   PetscFunctionBegin;
1694b9cda22cSBarry Smith   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
16953649974fSBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
1696b9cda22cSBarry Smith   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
1697b9cda22cSBarry Smith   idx  = a->j;
1698b9cda22cSBarry Smith   v    = a->a;
1699b9cda22cSBarry Smith   ii   = a->i;
1700b9cda22cSBarry Smith 
1701b9cda22cSBarry Smith   for (i=0; i<m; i++) {
1702b9cda22cSBarry Smith     jrow  = ii[i];
1703b9cda22cSBarry Smith     n     = ii[i+1] - jrow;
1704b9cda22cSBarry Smith     sum1  = 0.0;
1705b9cda22cSBarry Smith     sum2  = 0.0;
1706b9cda22cSBarry Smith     sum3  = 0.0;
1707b9cda22cSBarry Smith     sum4  = 0.0;
1708b9cda22cSBarry Smith     sum5  = 0.0;
1709b9cda22cSBarry Smith     sum6  = 0.0;
1710b9cda22cSBarry Smith     sum7  = 0.0;
1711b9cda22cSBarry Smith     sum8  = 0.0;
1712b9cda22cSBarry Smith     sum9  = 0.0;
1713b9cda22cSBarry Smith     sum10 = 0.0;
1714b9cda22cSBarry Smith     for (j=0; j<n; j++) {
1715b9cda22cSBarry Smith       sum1  += v[jrow]*x[10*idx[jrow]];
1716b9cda22cSBarry Smith       sum2  += v[jrow]*x[10*idx[jrow]+1];
1717b9cda22cSBarry Smith       sum3  += v[jrow]*x[10*idx[jrow]+2];
1718b9cda22cSBarry Smith       sum4  += v[jrow]*x[10*idx[jrow]+3];
1719b9cda22cSBarry Smith       sum5  += v[jrow]*x[10*idx[jrow]+4];
1720b9cda22cSBarry Smith       sum6  += v[jrow]*x[10*idx[jrow]+5];
1721b9cda22cSBarry Smith       sum7  += v[jrow]*x[10*idx[jrow]+6];
1722b9cda22cSBarry Smith       sum8  += v[jrow]*x[10*idx[jrow]+7];
1723b9cda22cSBarry Smith       sum9  += v[jrow]*x[10*idx[jrow]+8];
1724b9cda22cSBarry Smith       sum10 += v[jrow]*x[10*idx[jrow]+9];
1725b9cda22cSBarry Smith       jrow++;
1726b9cda22cSBarry Smith     }
1727b9cda22cSBarry Smith     y[10*i]   += sum1;
1728b9cda22cSBarry Smith     y[10*i+1] += sum2;
1729b9cda22cSBarry Smith     y[10*i+2] += sum3;
1730b9cda22cSBarry Smith     y[10*i+3] += sum4;
1731b9cda22cSBarry Smith     y[10*i+4] += sum5;
1732b9cda22cSBarry Smith     y[10*i+5] += sum6;
1733b9cda22cSBarry Smith     y[10*i+6] += sum7;
1734b9cda22cSBarry Smith     y[10*i+7] += sum8;
1735b9cda22cSBarry Smith     y[10*i+8] += sum9;
1736b9cda22cSBarry Smith     y[10*i+9] += sum10;
1737b9cda22cSBarry Smith   }
1738b9cda22cSBarry Smith 
1739dc0b31edSSatish Balay   ierr = PetscLogFlops(20.0*a->nz);CHKERRQ(ierr);
17403649974fSBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
1741b9cda22cSBarry Smith   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
1742b9cda22cSBarry Smith   PetscFunctionReturn(0);
1743b9cda22cSBarry Smith }
1744b9cda22cSBarry Smith 
1745b9cda22cSBarry Smith PetscErrorCode MatMultTranspose_SeqMAIJ_10(Mat A,Vec xx,Vec yy)
1746b9cda22cSBarry Smith {
1747b9cda22cSBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1748b9cda22cSBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1749d2840e09SBarry Smith   const PetscScalar *x,*v;
1750d2840e09SBarry Smith   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9,alpha10;
1751b9cda22cSBarry Smith   PetscErrorCode    ierr;
1752d2840e09SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx;
1753d2840e09SBarry Smith   PetscInt          n,i;
1754b9cda22cSBarry Smith 
1755b9cda22cSBarry Smith   PetscFunctionBegin;
1756d2840e09SBarry Smith   ierr = VecSet(yy,0.0);CHKERRQ(ierr);
17573649974fSBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
1758b9cda22cSBarry Smith   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
1759b9cda22cSBarry Smith 
1760b9cda22cSBarry Smith   for (i=0; i<m; i++) {
1761b9cda22cSBarry Smith     idx     = a->j + a->i[i];
1762b9cda22cSBarry Smith     v       = a->a + a->i[i];
1763b9cda22cSBarry Smith     n       = a->i[i+1] - a->i[i];
1764e29fdc22SBarry Smith     alpha1  = x[10*i];
1765e29fdc22SBarry Smith     alpha2  = x[10*i+1];
1766e29fdc22SBarry Smith     alpha3  = x[10*i+2];
1767e29fdc22SBarry Smith     alpha4  = x[10*i+3];
1768e29fdc22SBarry Smith     alpha5  = x[10*i+4];
1769e29fdc22SBarry Smith     alpha6  = x[10*i+5];
1770e29fdc22SBarry Smith     alpha7  = x[10*i+6];
1771e29fdc22SBarry Smith     alpha8  = x[10*i+7];
1772e29fdc22SBarry Smith     alpha9  = x[10*i+8];
1773b9cda22cSBarry Smith     alpha10 = x[10*i+9];
1774b9cda22cSBarry Smith     while (n-->0) {
1775e29fdc22SBarry Smith       y[10*(*idx)]   += alpha1*(*v);
1776e29fdc22SBarry Smith       y[10*(*idx)+1] += alpha2*(*v);
1777e29fdc22SBarry Smith       y[10*(*idx)+2] += alpha3*(*v);
1778e29fdc22SBarry Smith       y[10*(*idx)+3] += alpha4*(*v);
1779e29fdc22SBarry Smith       y[10*(*idx)+4] += alpha5*(*v);
1780e29fdc22SBarry Smith       y[10*(*idx)+5] += alpha6*(*v);
1781e29fdc22SBarry Smith       y[10*(*idx)+6] += alpha7*(*v);
1782e29fdc22SBarry Smith       y[10*(*idx)+7] += alpha8*(*v);
1783e29fdc22SBarry Smith       y[10*(*idx)+8] += alpha9*(*v);
1784b9cda22cSBarry Smith       y[10*(*idx)+9] += alpha10*(*v);
1785b9cda22cSBarry Smith       idx++; v++;
1786b9cda22cSBarry Smith     }
1787b9cda22cSBarry Smith   }
1788dc0b31edSSatish Balay   ierr = PetscLogFlops(20.0*a->nz);CHKERRQ(ierr);
17893649974fSBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
1790b9cda22cSBarry Smith   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
1791b9cda22cSBarry Smith   PetscFunctionReturn(0);
1792b9cda22cSBarry Smith }
1793b9cda22cSBarry Smith 
1794b9cda22cSBarry Smith PetscErrorCode MatMultTransposeAdd_SeqMAIJ_10(Mat A,Vec xx,Vec yy,Vec zz)
1795b9cda22cSBarry Smith {
1796b9cda22cSBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1797b9cda22cSBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1798d2840e09SBarry Smith   const PetscScalar *x,*v;
1799d2840e09SBarry Smith   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9,alpha10;
1800b9cda22cSBarry Smith   PetscErrorCode    ierr;
1801d2840e09SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx;
1802d2840e09SBarry Smith   PetscInt          n,i;
1803b9cda22cSBarry Smith 
1804b9cda22cSBarry Smith   PetscFunctionBegin;
1805b9cda22cSBarry Smith   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
18063649974fSBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
1807b9cda22cSBarry Smith   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
1808b9cda22cSBarry Smith   for (i=0; i<m; i++) {
1809b9cda22cSBarry Smith     idx     = a->j + a->i[i];
1810b9cda22cSBarry Smith     v       = a->a + a->i[i];
1811b9cda22cSBarry Smith     n       = a->i[i+1] - a->i[i];
1812b9cda22cSBarry Smith     alpha1  = x[10*i];
1813b9cda22cSBarry Smith     alpha2  = x[10*i+1];
1814b9cda22cSBarry Smith     alpha3  = x[10*i+2];
1815b9cda22cSBarry Smith     alpha4  = x[10*i+3];
1816b9cda22cSBarry Smith     alpha5  = x[10*i+4];
1817b9cda22cSBarry Smith     alpha6  = x[10*i+5];
1818b9cda22cSBarry Smith     alpha7  = x[10*i+6];
1819b9cda22cSBarry Smith     alpha8  = x[10*i+7];
1820b9cda22cSBarry Smith     alpha9  = x[10*i+8];
1821b9cda22cSBarry Smith     alpha10 = x[10*i+9];
1822b9cda22cSBarry Smith     while (n-->0) {
1823b9cda22cSBarry Smith       y[10*(*idx)]   += alpha1*(*v);
1824b9cda22cSBarry Smith       y[10*(*idx)+1] += alpha2*(*v);
1825b9cda22cSBarry Smith       y[10*(*idx)+2] += alpha3*(*v);
1826b9cda22cSBarry Smith       y[10*(*idx)+3] += alpha4*(*v);
1827b9cda22cSBarry Smith       y[10*(*idx)+4] += alpha5*(*v);
1828b9cda22cSBarry Smith       y[10*(*idx)+5] += alpha6*(*v);
1829b9cda22cSBarry Smith       y[10*(*idx)+6] += alpha7*(*v);
1830b9cda22cSBarry Smith       y[10*(*idx)+7] += alpha8*(*v);
1831b9cda22cSBarry Smith       y[10*(*idx)+8] += alpha9*(*v);
1832b9cda22cSBarry Smith       y[10*(*idx)+9] += alpha10*(*v);
1833b9cda22cSBarry Smith       idx++; v++;
1834b9cda22cSBarry Smith     }
1835b9cda22cSBarry Smith   }
1836dc0b31edSSatish Balay   ierr = PetscLogFlops(20.0*a->nz);CHKERRQ(ierr);
18373649974fSBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
1838b9cda22cSBarry Smith   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
1839b9cda22cSBarry Smith   PetscFunctionReturn(0);
1840b9cda22cSBarry Smith }
1841b9cda22cSBarry Smith 
18422b692628SMatthew Knepley 
18432f7816d4SBarry Smith /*--------------------------------------------------------------------------------------------*/
1844dbdd7285SBarry Smith PetscErrorCode MatMult_SeqMAIJ_11(Mat A,Vec xx,Vec yy)
1845dbdd7285SBarry Smith {
1846dbdd7285SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1847dbdd7285SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1848d2840e09SBarry Smith   const PetscScalar *x,*v;
1849d2840e09SBarry Smith   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9, sum10, sum11;
1850dbdd7285SBarry Smith   PetscErrorCode    ierr;
1851d2840e09SBarry Smith   const PetscInt    m         = b->AIJ->rmap->n,*idx,*ii;
1852d2840e09SBarry Smith   PetscInt          nonzerorow=0,n,i,jrow,j;
1853dbdd7285SBarry Smith 
1854dbdd7285SBarry Smith   PetscFunctionBegin;
18553649974fSBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
1856dbdd7285SBarry Smith   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
1857dbdd7285SBarry Smith   idx  = a->j;
1858dbdd7285SBarry Smith   v    = a->a;
1859dbdd7285SBarry Smith   ii   = a->i;
1860dbdd7285SBarry Smith 
1861dbdd7285SBarry Smith   for (i=0; i<m; i++) {
1862dbdd7285SBarry Smith     jrow  = ii[i];
1863dbdd7285SBarry Smith     n     = ii[i+1] - jrow;
1864dbdd7285SBarry Smith     sum1  = 0.0;
1865dbdd7285SBarry Smith     sum2  = 0.0;
1866dbdd7285SBarry Smith     sum3  = 0.0;
1867dbdd7285SBarry Smith     sum4  = 0.0;
1868dbdd7285SBarry Smith     sum5  = 0.0;
1869dbdd7285SBarry Smith     sum6  = 0.0;
1870dbdd7285SBarry Smith     sum7  = 0.0;
1871dbdd7285SBarry Smith     sum8  = 0.0;
1872dbdd7285SBarry Smith     sum9  = 0.0;
1873dbdd7285SBarry Smith     sum10 = 0.0;
1874dbdd7285SBarry Smith     sum11 = 0.0;
187526fbe8dcSKarl Rupp 
1876dbdd7285SBarry Smith     nonzerorow += (n>0);
1877dbdd7285SBarry Smith     for (j=0; j<n; j++) {
1878dbdd7285SBarry Smith       sum1  += v[jrow]*x[11*idx[jrow]];
1879dbdd7285SBarry Smith       sum2  += v[jrow]*x[11*idx[jrow]+1];
1880dbdd7285SBarry Smith       sum3  += v[jrow]*x[11*idx[jrow]+2];
1881dbdd7285SBarry Smith       sum4  += v[jrow]*x[11*idx[jrow]+3];
1882dbdd7285SBarry Smith       sum5  += v[jrow]*x[11*idx[jrow]+4];
1883dbdd7285SBarry Smith       sum6  += v[jrow]*x[11*idx[jrow]+5];
1884dbdd7285SBarry Smith       sum7  += v[jrow]*x[11*idx[jrow]+6];
1885dbdd7285SBarry Smith       sum8  += v[jrow]*x[11*idx[jrow]+7];
1886dbdd7285SBarry Smith       sum9  += v[jrow]*x[11*idx[jrow]+8];
1887dbdd7285SBarry Smith       sum10 += v[jrow]*x[11*idx[jrow]+9];
1888dbdd7285SBarry Smith       sum11 += v[jrow]*x[11*idx[jrow]+10];
1889dbdd7285SBarry Smith       jrow++;
1890dbdd7285SBarry Smith     }
1891dbdd7285SBarry Smith     y[11*i]    = sum1;
1892dbdd7285SBarry Smith     y[11*i+1]  = sum2;
1893dbdd7285SBarry Smith     y[11*i+2]  = sum3;
1894dbdd7285SBarry Smith     y[11*i+3]  = sum4;
1895dbdd7285SBarry Smith     y[11*i+4]  = sum5;
1896dbdd7285SBarry Smith     y[11*i+5]  = sum6;
1897dbdd7285SBarry Smith     y[11*i+6]  = sum7;
1898dbdd7285SBarry Smith     y[11*i+7]  = sum8;
1899dbdd7285SBarry Smith     y[11*i+8]  = sum9;
1900dbdd7285SBarry Smith     y[11*i+9]  = sum10;
1901dbdd7285SBarry Smith     y[11*i+10] = sum11;
1902dbdd7285SBarry Smith   }
1903dbdd7285SBarry Smith 
1904dbdd7285SBarry Smith   ierr = PetscLogFlops(22*a->nz - 11*nonzerorow);CHKERRQ(ierr);
19053649974fSBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
1906dbdd7285SBarry Smith   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
1907dbdd7285SBarry Smith   PetscFunctionReturn(0);
1908dbdd7285SBarry Smith }
1909dbdd7285SBarry Smith 
1910dbdd7285SBarry Smith PetscErrorCode MatMultAdd_SeqMAIJ_11(Mat A,Vec xx,Vec yy,Vec zz)
1911dbdd7285SBarry Smith {
1912dbdd7285SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1913dbdd7285SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1914d2840e09SBarry Smith   const PetscScalar *x,*v;
1915d2840e09SBarry Smith   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9, sum10, sum11;
1916dbdd7285SBarry Smith   PetscErrorCode    ierr;
1917d2840e09SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
1918dbdd7285SBarry Smith   PetscInt          n,i,jrow,j;
1919dbdd7285SBarry Smith 
1920dbdd7285SBarry Smith   PetscFunctionBegin;
1921dbdd7285SBarry Smith   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
19223649974fSBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
1923dbdd7285SBarry Smith   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
1924dbdd7285SBarry Smith   idx  = a->j;
1925dbdd7285SBarry Smith   v    = a->a;
1926dbdd7285SBarry Smith   ii   = a->i;
1927dbdd7285SBarry Smith 
1928dbdd7285SBarry Smith   for (i=0; i<m; i++) {
1929dbdd7285SBarry Smith     jrow  = ii[i];
1930dbdd7285SBarry Smith     n     = ii[i+1] - jrow;
1931dbdd7285SBarry Smith     sum1  = 0.0;
1932dbdd7285SBarry Smith     sum2  = 0.0;
1933dbdd7285SBarry Smith     sum3  = 0.0;
1934dbdd7285SBarry Smith     sum4  = 0.0;
1935dbdd7285SBarry Smith     sum5  = 0.0;
1936dbdd7285SBarry Smith     sum6  = 0.0;
1937dbdd7285SBarry Smith     sum7  = 0.0;
1938dbdd7285SBarry Smith     sum8  = 0.0;
1939dbdd7285SBarry Smith     sum9  = 0.0;
1940dbdd7285SBarry Smith     sum10 = 0.0;
1941dbdd7285SBarry Smith     sum11 = 0.0;
1942dbdd7285SBarry Smith     for (j=0; j<n; j++) {
1943dbdd7285SBarry Smith       sum1  += v[jrow]*x[11*idx[jrow]];
1944dbdd7285SBarry Smith       sum2  += v[jrow]*x[11*idx[jrow]+1];
1945dbdd7285SBarry Smith       sum3  += v[jrow]*x[11*idx[jrow]+2];
1946dbdd7285SBarry Smith       sum4  += v[jrow]*x[11*idx[jrow]+3];
1947dbdd7285SBarry Smith       sum5  += v[jrow]*x[11*idx[jrow]+4];
1948dbdd7285SBarry Smith       sum6  += v[jrow]*x[11*idx[jrow]+5];
1949dbdd7285SBarry Smith       sum7  += v[jrow]*x[11*idx[jrow]+6];
1950dbdd7285SBarry Smith       sum8  += v[jrow]*x[11*idx[jrow]+7];
1951dbdd7285SBarry Smith       sum9  += v[jrow]*x[11*idx[jrow]+8];
1952dbdd7285SBarry Smith       sum10 += v[jrow]*x[11*idx[jrow]+9];
1953dbdd7285SBarry Smith       sum11 += v[jrow]*x[11*idx[jrow]+10];
1954dbdd7285SBarry Smith       jrow++;
1955dbdd7285SBarry Smith     }
1956dbdd7285SBarry Smith     y[11*i]    += sum1;
1957dbdd7285SBarry Smith     y[11*i+1]  += sum2;
1958dbdd7285SBarry Smith     y[11*i+2]  += sum3;
1959dbdd7285SBarry Smith     y[11*i+3]  += sum4;
1960dbdd7285SBarry Smith     y[11*i+4]  += sum5;
1961dbdd7285SBarry Smith     y[11*i+5]  += sum6;
1962dbdd7285SBarry Smith     y[11*i+6]  += sum7;
1963dbdd7285SBarry Smith     y[11*i+7]  += sum8;
1964dbdd7285SBarry Smith     y[11*i+8]  += sum9;
1965dbdd7285SBarry Smith     y[11*i+9]  += sum10;
1966dbdd7285SBarry Smith     y[11*i+10] += sum11;
1967dbdd7285SBarry Smith   }
1968dbdd7285SBarry Smith 
1969dbdd7285SBarry Smith   ierr = PetscLogFlops(22*a->nz);CHKERRQ(ierr);
19703649974fSBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
1971dbdd7285SBarry Smith   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
1972dbdd7285SBarry Smith   PetscFunctionReturn(0);
1973dbdd7285SBarry Smith }
1974dbdd7285SBarry Smith 
1975dbdd7285SBarry Smith PetscErrorCode MatMultTranspose_SeqMAIJ_11(Mat A,Vec xx,Vec yy)
1976dbdd7285SBarry Smith {
1977dbdd7285SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1978dbdd7285SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1979d2840e09SBarry Smith   const PetscScalar *x,*v;
1980d2840e09SBarry Smith   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9,alpha10,alpha11;
1981dbdd7285SBarry Smith   PetscErrorCode    ierr;
1982d2840e09SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx;
1983d2840e09SBarry Smith   PetscInt          n,i;
1984dbdd7285SBarry Smith 
1985dbdd7285SBarry Smith   PetscFunctionBegin;
1986d2840e09SBarry Smith   ierr = VecSet(yy,0.0);CHKERRQ(ierr);
19873649974fSBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
1988dbdd7285SBarry Smith   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
1989dbdd7285SBarry Smith 
1990dbdd7285SBarry Smith   for (i=0; i<m; i++) {
1991dbdd7285SBarry Smith     idx     = a->j + a->i[i];
1992dbdd7285SBarry Smith     v       = a->a + a->i[i];
1993dbdd7285SBarry Smith     n       = a->i[i+1] - a->i[i];
1994dbdd7285SBarry Smith     alpha1  = x[11*i];
1995dbdd7285SBarry Smith     alpha2  = x[11*i+1];
1996dbdd7285SBarry Smith     alpha3  = x[11*i+2];
1997dbdd7285SBarry Smith     alpha4  = x[11*i+3];
1998dbdd7285SBarry Smith     alpha5  = x[11*i+4];
1999dbdd7285SBarry Smith     alpha6  = x[11*i+5];
2000dbdd7285SBarry Smith     alpha7  = x[11*i+6];
2001dbdd7285SBarry Smith     alpha8  = x[11*i+7];
2002dbdd7285SBarry Smith     alpha9  = x[11*i+8];
2003dbdd7285SBarry Smith     alpha10 = x[11*i+9];
2004dbdd7285SBarry Smith     alpha11 = x[11*i+10];
2005dbdd7285SBarry Smith     while (n-->0) {
2006dbdd7285SBarry Smith       y[11*(*idx)]    += alpha1*(*v);
2007dbdd7285SBarry Smith       y[11*(*idx)+1]  += alpha2*(*v);
2008dbdd7285SBarry Smith       y[11*(*idx)+2]  += alpha3*(*v);
2009dbdd7285SBarry Smith       y[11*(*idx)+3]  += alpha4*(*v);
2010dbdd7285SBarry Smith       y[11*(*idx)+4]  += alpha5*(*v);
2011dbdd7285SBarry Smith       y[11*(*idx)+5]  += alpha6*(*v);
2012dbdd7285SBarry Smith       y[11*(*idx)+6]  += alpha7*(*v);
2013dbdd7285SBarry Smith       y[11*(*idx)+7]  += alpha8*(*v);
2014dbdd7285SBarry Smith       y[11*(*idx)+8]  += alpha9*(*v);
2015dbdd7285SBarry Smith       y[11*(*idx)+9]  += alpha10*(*v);
2016dbdd7285SBarry Smith       y[11*(*idx)+10] += alpha11*(*v);
2017dbdd7285SBarry Smith       idx++; v++;
2018dbdd7285SBarry Smith     }
2019dbdd7285SBarry Smith   }
2020dbdd7285SBarry Smith   ierr = PetscLogFlops(22*a->nz);CHKERRQ(ierr);
20213649974fSBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
2022dbdd7285SBarry Smith   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
2023dbdd7285SBarry Smith   PetscFunctionReturn(0);
2024dbdd7285SBarry Smith }
2025dbdd7285SBarry Smith 
2026dbdd7285SBarry Smith PetscErrorCode MatMultTransposeAdd_SeqMAIJ_11(Mat A,Vec xx,Vec yy,Vec zz)
2027dbdd7285SBarry Smith {
2028dbdd7285SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
2029dbdd7285SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
2030d2840e09SBarry Smith   const PetscScalar *x,*v;
2031d2840e09SBarry Smith   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9,alpha10,alpha11;
2032dbdd7285SBarry Smith   PetscErrorCode    ierr;
2033d2840e09SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx;
2034d2840e09SBarry Smith   PetscInt          n,i;
2035dbdd7285SBarry Smith 
2036dbdd7285SBarry Smith   PetscFunctionBegin;
2037dbdd7285SBarry Smith   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
20383649974fSBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
2039dbdd7285SBarry Smith   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
2040dbdd7285SBarry Smith   for (i=0; i<m; i++) {
2041dbdd7285SBarry Smith     idx     = a->j + a->i[i];
2042dbdd7285SBarry Smith     v       = a->a + a->i[i];
2043dbdd7285SBarry Smith     n       = a->i[i+1] - a->i[i];
2044dbdd7285SBarry Smith     alpha1  = x[11*i];
2045dbdd7285SBarry Smith     alpha2  = x[11*i+1];
2046dbdd7285SBarry Smith     alpha3  = x[11*i+2];
2047dbdd7285SBarry Smith     alpha4  = x[11*i+3];
2048dbdd7285SBarry Smith     alpha5  = x[11*i+4];
2049dbdd7285SBarry Smith     alpha6  = x[11*i+5];
2050dbdd7285SBarry Smith     alpha7  = x[11*i+6];
2051dbdd7285SBarry Smith     alpha8  = x[11*i+7];
2052dbdd7285SBarry Smith     alpha9  = x[11*i+8];
2053dbdd7285SBarry Smith     alpha10 = x[11*i+9];
2054dbdd7285SBarry Smith     alpha11 = x[11*i+10];
2055dbdd7285SBarry Smith     while (n-->0) {
2056dbdd7285SBarry Smith       y[11*(*idx)]    += alpha1*(*v);
2057dbdd7285SBarry Smith       y[11*(*idx)+1]  += alpha2*(*v);
2058dbdd7285SBarry Smith       y[11*(*idx)+2]  += alpha3*(*v);
2059dbdd7285SBarry Smith       y[11*(*idx)+3]  += alpha4*(*v);
2060dbdd7285SBarry Smith       y[11*(*idx)+4]  += alpha5*(*v);
2061dbdd7285SBarry Smith       y[11*(*idx)+5]  += alpha6*(*v);
2062dbdd7285SBarry Smith       y[11*(*idx)+6]  += alpha7*(*v);
2063dbdd7285SBarry Smith       y[11*(*idx)+7]  += alpha8*(*v);
2064dbdd7285SBarry Smith       y[11*(*idx)+8]  += alpha9*(*v);
2065dbdd7285SBarry Smith       y[11*(*idx)+9]  += alpha10*(*v);
2066dbdd7285SBarry Smith       y[11*(*idx)+10] += alpha11*(*v);
2067dbdd7285SBarry Smith       idx++; v++;
2068dbdd7285SBarry Smith     }
2069dbdd7285SBarry Smith   }
2070dbdd7285SBarry Smith   ierr = PetscLogFlops(22*a->nz);CHKERRQ(ierr);
20713649974fSBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
2072dbdd7285SBarry Smith   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
2073dbdd7285SBarry Smith   PetscFunctionReturn(0);
2074dbdd7285SBarry Smith }
2075dbdd7285SBarry Smith 
2076dbdd7285SBarry Smith 
2077dbdd7285SBarry Smith /*--------------------------------------------------------------------------------------------*/
2078dfbe8321SBarry Smith PetscErrorCode MatMult_SeqMAIJ_16(Mat A,Vec xx,Vec yy)
20792f7816d4SBarry Smith {
20802f7816d4SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
20812f7816d4SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
2082d2840e09SBarry Smith   const PetscScalar *x,*v;
2083d2840e09SBarry Smith   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8;
20842f7816d4SBarry Smith   PetscScalar       sum9, sum10, sum11, sum12, sum13, sum14, sum15, sum16;
2085dfbe8321SBarry Smith   PetscErrorCode    ierr;
2086d2840e09SBarry Smith   const PetscInt    m         = b->AIJ->rmap->n,*idx,*ii;
2087d2840e09SBarry Smith   PetscInt          nonzerorow=0,n,i,jrow,j;
20882f7816d4SBarry Smith 
20892f7816d4SBarry Smith   PetscFunctionBegin;
20903649974fSBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
20911ebc52fbSHong Zhang   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
20922f7816d4SBarry Smith   idx  = a->j;
20932f7816d4SBarry Smith   v    = a->a;
20942f7816d4SBarry Smith   ii   = a->i;
20952f7816d4SBarry Smith 
20962f7816d4SBarry Smith   for (i=0; i<m; i++) {
20972f7816d4SBarry Smith     jrow  = ii[i];
20982f7816d4SBarry Smith     n     = ii[i+1] - jrow;
20992f7816d4SBarry Smith     sum1  = 0.0;
21002f7816d4SBarry Smith     sum2  = 0.0;
21012f7816d4SBarry Smith     sum3  = 0.0;
21022f7816d4SBarry Smith     sum4  = 0.0;
21032f7816d4SBarry Smith     sum5  = 0.0;
21042f7816d4SBarry Smith     sum6  = 0.0;
21052f7816d4SBarry Smith     sum7  = 0.0;
21062f7816d4SBarry Smith     sum8  = 0.0;
21072f7816d4SBarry Smith     sum9  = 0.0;
21082f7816d4SBarry Smith     sum10 = 0.0;
21092f7816d4SBarry Smith     sum11 = 0.0;
21102f7816d4SBarry Smith     sum12 = 0.0;
21112f7816d4SBarry Smith     sum13 = 0.0;
21122f7816d4SBarry Smith     sum14 = 0.0;
21132f7816d4SBarry Smith     sum15 = 0.0;
21142f7816d4SBarry Smith     sum16 = 0.0;
211526fbe8dcSKarl Rupp 
2116b3c51c66SMatthew Knepley     nonzerorow += (n>0);
21172f7816d4SBarry Smith     for (j=0; j<n; j++) {
21182f7816d4SBarry Smith       sum1  += v[jrow]*x[16*idx[jrow]];
21192f7816d4SBarry Smith       sum2  += v[jrow]*x[16*idx[jrow]+1];
21202f7816d4SBarry Smith       sum3  += v[jrow]*x[16*idx[jrow]+2];
21212f7816d4SBarry Smith       sum4  += v[jrow]*x[16*idx[jrow]+3];
21222f7816d4SBarry Smith       sum5  += v[jrow]*x[16*idx[jrow]+4];
21232f7816d4SBarry Smith       sum6  += v[jrow]*x[16*idx[jrow]+5];
21242f7816d4SBarry Smith       sum7  += v[jrow]*x[16*idx[jrow]+6];
21252f7816d4SBarry Smith       sum8  += v[jrow]*x[16*idx[jrow]+7];
21262f7816d4SBarry Smith       sum9  += v[jrow]*x[16*idx[jrow]+8];
21272f7816d4SBarry Smith       sum10 += v[jrow]*x[16*idx[jrow]+9];
21282f7816d4SBarry Smith       sum11 += v[jrow]*x[16*idx[jrow]+10];
21292f7816d4SBarry Smith       sum12 += v[jrow]*x[16*idx[jrow]+11];
21302f7816d4SBarry Smith       sum13 += v[jrow]*x[16*idx[jrow]+12];
21312f7816d4SBarry Smith       sum14 += v[jrow]*x[16*idx[jrow]+13];
21322f7816d4SBarry Smith       sum15 += v[jrow]*x[16*idx[jrow]+14];
21332f7816d4SBarry Smith       sum16 += v[jrow]*x[16*idx[jrow]+15];
21342f7816d4SBarry Smith       jrow++;
21352f7816d4SBarry Smith     }
21362f7816d4SBarry Smith     y[16*i]    = sum1;
21372f7816d4SBarry Smith     y[16*i+1]  = sum2;
21382f7816d4SBarry Smith     y[16*i+2]  = sum3;
21392f7816d4SBarry Smith     y[16*i+3]  = sum4;
21402f7816d4SBarry Smith     y[16*i+4]  = sum5;
21412f7816d4SBarry Smith     y[16*i+5]  = sum6;
21422f7816d4SBarry Smith     y[16*i+6]  = sum7;
21432f7816d4SBarry Smith     y[16*i+7]  = sum8;
21442f7816d4SBarry Smith     y[16*i+8]  = sum9;
21452f7816d4SBarry Smith     y[16*i+9]  = sum10;
21462f7816d4SBarry Smith     y[16*i+10] = sum11;
21472f7816d4SBarry Smith     y[16*i+11] = sum12;
21482f7816d4SBarry Smith     y[16*i+12] = sum13;
21492f7816d4SBarry Smith     y[16*i+13] = sum14;
21502f7816d4SBarry Smith     y[16*i+14] = sum15;
21512f7816d4SBarry Smith     y[16*i+15] = sum16;
21522f7816d4SBarry Smith   }
21532f7816d4SBarry Smith 
2154dc0b31edSSatish Balay   ierr = PetscLogFlops(32.0*a->nz - 16.0*nonzerorow);CHKERRQ(ierr);
21553649974fSBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
21561ebc52fbSHong Zhang   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
21572f7816d4SBarry Smith   PetscFunctionReturn(0);
21582f7816d4SBarry Smith }
21592f7816d4SBarry Smith 
2160dfbe8321SBarry Smith PetscErrorCode MatMultTranspose_SeqMAIJ_16(Mat A,Vec xx,Vec yy)
21612f7816d4SBarry Smith {
21622f7816d4SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
21632f7816d4SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
2164d2840e09SBarry Smith   const PetscScalar *x,*v;
2165d2840e09SBarry Smith   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8;
21662f7816d4SBarry Smith   PetscScalar       alpha9,alpha10,alpha11,alpha12,alpha13,alpha14,alpha15,alpha16;
2167dfbe8321SBarry Smith   PetscErrorCode    ierr;
2168d2840e09SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx;
2169d2840e09SBarry Smith   PetscInt          n,i;
21702f7816d4SBarry Smith 
21712f7816d4SBarry Smith   PetscFunctionBegin;
2172d2840e09SBarry Smith   ierr = VecSet(yy,0.0);CHKERRQ(ierr);
21733649974fSBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
21741ebc52fbSHong Zhang   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
2175bfec09a0SHong Zhang 
21762f7816d4SBarry Smith   for (i=0; i<m; i++) {
2177bfec09a0SHong Zhang     idx     = a->j + a->i[i];
2178bfec09a0SHong Zhang     v       = a->a + a->i[i];
21792f7816d4SBarry Smith     n       = a->i[i+1] - a->i[i];
21802f7816d4SBarry Smith     alpha1  = x[16*i];
21812f7816d4SBarry Smith     alpha2  = x[16*i+1];
21822f7816d4SBarry Smith     alpha3  = x[16*i+2];
21832f7816d4SBarry Smith     alpha4  = x[16*i+3];
21842f7816d4SBarry Smith     alpha5  = x[16*i+4];
21852f7816d4SBarry Smith     alpha6  = x[16*i+5];
21862f7816d4SBarry Smith     alpha7  = x[16*i+6];
21872f7816d4SBarry Smith     alpha8  = x[16*i+7];
21882f7816d4SBarry Smith     alpha9  = x[16*i+8];
21892f7816d4SBarry Smith     alpha10 = x[16*i+9];
21902f7816d4SBarry Smith     alpha11 = x[16*i+10];
21912f7816d4SBarry Smith     alpha12 = x[16*i+11];
21922f7816d4SBarry Smith     alpha13 = x[16*i+12];
21932f7816d4SBarry Smith     alpha14 = x[16*i+13];
21942f7816d4SBarry Smith     alpha15 = x[16*i+14];
21952f7816d4SBarry Smith     alpha16 = x[16*i+15];
21962f7816d4SBarry Smith     while (n-->0) {
21972f7816d4SBarry Smith       y[16*(*idx)]    += alpha1*(*v);
21982f7816d4SBarry Smith       y[16*(*idx)+1]  += alpha2*(*v);
21992f7816d4SBarry Smith       y[16*(*idx)+2]  += alpha3*(*v);
22002f7816d4SBarry Smith       y[16*(*idx)+3]  += alpha4*(*v);
22012f7816d4SBarry Smith       y[16*(*idx)+4]  += alpha5*(*v);
22022f7816d4SBarry Smith       y[16*(*idx)+5]  += alpha6*(*v);
22032f7816d4SBarry Smith       y[16*(*idx)+6]  += alpha7*(*v);
22042f7816d4SBarry Smith       y[16*(*idx)+7]  += alpha8*(*v);
22052f7816d4SBarry Smith       y[16*(*idx)+8]  += alpha9*(*v);
22062f7816d4SBarry Smith       y[16*(*idx)+9]  += alpha10*(*v);
22072f7816d4SBarry Smith       y[16*(*idx)+10] += alpha11*(*v);
22082f7816d4SBarry Smith       y[16*(*idx)+11] += alpha12*(*v);
22092f7816d4SBarry Smith       y[16*(*idx)+12] += alpha13*(*v);
22102f7816d4SBarry Smith       y[16*(*idx)+13] += alpha14*(*v);
22112f7816d4SBarry Smith       y[16*(*idx)+14] += alpha15*(*v);
22122f7816d4SBarry Smith       y[16*(*idx)+15] += alpha16*(*v);
22132f7816d4SBarry Smith       idx++; v++;
22142f7816d4SBarry Smith     }
22152f7816d4SBarry Smith   }
2216dc0b31edSSatish Balay   ierr = PetscLogFlops(32.0*a->nz);CHKERRQ(ierr);
22173649974fSBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
22181ebc52fbSHong Zhang   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
22192f7816d4SBarry Smith   PetscFunctionReturn(0);
22202f7816d4SBarry Smith }
22212f7816d4SBarry Smith 
2222dfbe8321SBarry Smith PetscErrorCode MatMultAdd_SeqMAIJ_16(Mat A,Vec xx,Vec yy,Vec zz)
22232f7816d4SBarry Smith {
22242f7816d4SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
22252f7816d4SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
2226d2840e09SBarry Smith   const PetscScalar *x,*v;
2227d2840e09SBarry Smith   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8;
22282f7816d4SBarry Smith   PetscScalar       sum9, sum10, sum11, sum12, sum13, sum14, sum15, sum16;
2229dfbe8321SBarry Smith   PetscErrorCode    ierr;
2230d2840e09SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
2231b24ad042SBarry Smith   PetscInt          n,i,jrow,j;
22322f7816d4SBarry Smith 
22332f7816d4SBarry Smith   PetscFunctionBegin;
22342f7816d4SBarry Smith   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
22353649974fSBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
22361ebc52fbSHong Zhang   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
22372f7816d4SBarry Smith   idx  = a->j;
22382f7816d4SBarry Smith   v    = a->a;
22392f7816d4SBarry Smith   ii   = a->i;
22402f7816d4SBarry Smith 
22412f7816d4SBarry Smith   for (i=0; i<m; i++) {
22422f7816d4SBarry Smith     jrow  = ii[i];
22432f7816d4SBarry Smith     n     = ii[i+1] - jrow;
22442f7816d4SBarry Smith     sum1  = 0.0;
22452f7816d4SBarry Smith     sum2  = 0.0;
22462f7816d4SBarry Smith     sum3  = 0.0;
22472f7816d4SBarry Smith     sum4  = 0.0;
22482f7816d4SBarry Smith     sum5  = 0.0;
22492f7816d4SBarry Smith     sum6  = 0.0;
22502f7816d4SBarry Smith     sum7  = 0.0;
22512f7816d4SBarry Smith     sum8  = 0.0;
22522f7816d4SBarry Smith     sum9  = 0.0;
22532f7816d4SBarry Smith     sum10 = 0.0;
22542f7816d4SBarry Smith     sum11 = 0.0;
22552f7816d4SBarry Smith     sum12 = 0.0;
22562f7816d4SBarry Smith     sum13 = 0.0;
22572f7816d4SBarry Smith     sum14 = 0.0;
22582f7816d4SBarry Smith     sum15 = 0.0;
22592f7816d4SBarry Smith     sum16 = 0.0;
22602f7816d4SBarry Smith     for (j=0; j<n; j++) {
22612f7816d4SBarry Smith       sum1  += v[jrow]*x[16*idx[jrow]];
22622f7816d4SBarry Smith       sum2  += v[jrow]*x[16*idx[jrow]+1];
22632f7816d4SBarry Smith       sum3  += v[jrow]*x[16*idx[jrow]+2];
22642f7816d4SBarry Smith       sum4  += v[jrow]*x[16*idx[jrow]+3];
22652f7816d4SBarry Smith       sum5  += v[jrow]*x[16*idx[jrow]+4];
22662f7816d4SBarry Smith       sum6  += v[jrow]*x[16*idx[jrow]+5];
22672f7816d4SBarry Smith       sum7  += v[jrow]*x[16*idx[jrow]+6];
22682f7816d4SBarry Smith       sum8  += v[jrow]*x[16*idx[jrow]+7];
22692f7816d4SBarry Smith       sum9  += v[jrow]*x[16*idx[jrow]+8];
22702f7816d4SBarry Smith       sum10 += v[jrow]*x[16*idx[jrow]+9];
22712f7816d4SBarry Smith       sum11 += v[jrow]*x[16*idx[jrow]+10];
22722f7816d4SBarry Smith       sum12 += v[jrow]*x[16*idx[jrow]+11];
22732f7816d4SBarry Smith       sum13 += v[jrow]*x[16*idx[jrow]+12];
22742f7816d4SBarry Smith       sum14 += v[jrow]*x[16*idx[jrow]+13];
22752f7816d4SBarry Smith       sum15 += v[jrow]*x[16*idx[jrow]+14];
22762f7816d4SBarry Smith       sum16 += v[jrow]*x[16*idx[jrow]+15];
22772f7816d4SBarry Smith       jrow++;
22782f7816d4SBarry Smith     }
22792f7816d4SBarry Smith     y[16*i]    += sum1;
22802f7816d4SBarry Smith     y[16*i+1]  += sum2;
22812f7816d4SBarry Smith     y[16*i+2]  += sum3;
22822f7816d4SBarry Smith     y[16*i+3]  += sum4;
22832f7816d4SBarry Smith     y[16*i+4]  += sum5;
22842f7816d4SBarry Smith     y[16*i+5]  += sum6;
22852f7816d4SBarry Smith     y[16*i+6]  += sum7;
22862f7816d4SBarry Smith     y[16*i+7]  += sum8;
22872f7816d4SBarry Smith     y[16*i+8]  += sum9;
22882f7816d4SBarry Smith     y[16*i+9]  += sum10;
22892f7816d4SBarry Smith     y[16*i+10] += sum11;
22902f7816d4SBarry Smith     y[16*i+11] += sum12;
22912f7816d4SBarry Smith     y[16*i+12] += sum13;
22922f7816d4SBarry Smith     y[16*i+13] += sum14;
22932f7816d4SBarry Smith     y[16*i+14] += sum15;
22942f7816d4SBarry Smith     y[16*i+15] += sum16;
22952f7816d4SBarry Smith   }
22962f7816d4SBarry Smith 
2297dc0b31edSSatish Balay   ierr = PetscLogFlops(32.0*a->nz);CHKERRQ(ierr);
22983649974fSBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
22991ebc52fbSHong Zhang   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
23002f7816d4SBarry Smith   PetscFunctionReturn(0);
23012f7816d4SBarry Smith }
23022f7816d4SBarry Smith 
2303dfbe8321SBarry Smith PetscErrorCode MatMultTransposeAdd_SeqMAIJ_16(Mat A,Vec xx,Vec yy,Vec zz)
23042f7816d4SBarry Smith {
23052f7816d4SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
23062f7816d4SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
2307d2840e09SBarry Smith   const PetscScalar *x,*v;
2308d2840e09SBarry Smith   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8;
23092f7816d4SBarry Smith   PetscScalar       alpha9,alpha10,alpha11,alpha12,alpha13,alpha14,alpha15,alpha16;
2310dfbe8321SBarry Smith   PetscErrorCode    ierr;
2311d2840e09SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx;
2312d2840e09SBarry Smith   PetscInt          n,i;
23132f7816d4SBarry Smith 
23142f7816d4SBarry Smith   PetscFunctionBegin;
23152f7816d4SBarry Smith   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
23163649974fSBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
23171ebc52fbSHong Zhang   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
23182f7816d4SBarry Smith   for (i=0; i<m; i++) {
2319bfec09a0SHong Zhang     idx     = a->j + a->i[i];
2320bfec09a0SHong Zhang     v       = a->a + a->i[i];
23212f7816d4SBarry Smith     n       = a->i[i+1] - a->i[i];
23222f7816d4SBarry Smith     alpha1  = x[16*i];
23232f7816d4SBarry Smith     alpha2  = x[16*i+1];
23242f7816d4SBarry Smith     alpha3  = x[16*i+2];
23252f7816d4SBarry Smith     alpha4  = x[16*i+3];
23262f7816d4SBarry Smith     alpha5  = x[16*i+4];
23272f7816d4SBarry Smith     alpha6  = x[16*i+5];
23282f7816d4SBarry Smith     alpha7  = x[16*i+6];
23292f7816d4SBarry Smith     alpha8  = x[16*i+7];
23302f7816d4SBarry Smith     alpha9  = x[16*i+8];
23312f7816d4SBarry Smith     alpha10 = x[16*i+9];
23322f7816d4SBarry Smith     alpha11 = x[16*i+10];
23332f7816d4SBarry Smith     alpha12 = x[16*i+11];
23342f7816d4SBarry Smith     alpha13 = x[16*i+12];
23352f7816d4SBarry Smith     alpha14 = x[16*i+13];
23362f7816d4SBarry Smith     alpha15 = x[16*i+14];
23372f7816d4SBarry Smith     alpha16 = x[16*i+15];
23382f7816d4SBarry Smith     while (n-->0) {
23392f7816d4SBarry Smith       y[16*(*idx)]    += alpha1*(*v);
23402f7816d4SBarry Smith       y[16*(*idx)+1]  += alpha2*(*v);
23412f7816d4SBarry Smith       y[16*(*idx)+2]  += alpha3*(*v);
23422f7816d4SBarry Smith       y[16*(*idx)+3]  += alpha4*(*v);
23432f7816d4SBarry Smith       y[16*(*idx)+4]  += alpha5*(*v);
23442f7816d4SBarry Smith       y[16*(*idx)+5]  += alpha6*(*v);
23452f7816d4SBarry Smith       y[16*(*idx)+6]  += alpha7*(*v);
23462f7816d4SBarry Smith       y[16*(*idx)+7]  += alpha8*(*v);
23472f7816d4SBarry Smith       y[16*(*idx)+8]  += alpha9*(*v);
23482f7816d4SBarry Smith       y[16*(*idx)+9]  += alpha10*(*v);
23492f7816d4SBarry Smith       y[16*(*idx)+10] += alpha11*(*v);
23502f7816d4SBarry Smith       y[16*(*idx)+11] += alpha12*(*v);
23512f7816d4SBarry Smith       y[16*(*idx)+12] += alpha13*(*v);
23522f7816d4SBarry Smith       y[16*(*idx)+13] += alpha14*(*v);
23532f7816d4SBarry Smith       y[16*(*idx)+14] += alpha15*(*v);
23542f7816d4SBarry Smith       y[16*(*idx)+15] += alpha16*(*v);
23552f7816d4SBarry Smith       idx++; v++;
23562f7816d4SBarry Smith     }
23572f7816d4SBarry Smith   }
2358dc0b31edSSatish Balay   ierr = PetscLogFlops(32.0*a->nz);CHKERRQ(ierr);
23593649974fSBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
23601ebc52fbSHong Zhang   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
236166ed3db0SBarry Smith   PetscFunctionReturn(0);
236266ed3db0SBarry Smith }
236366ed3db0SBarry Smith 
2364ed1c418cSBarry Smith /*--------------------------------------------------------------------------------------------*/
2365ed1c418cSBarry Smith PetscErrorCode MatMult_SeqMAIJ_18(Mat A,Vec xx,Vec yy)
2366ed1c418cSBarry Smith {
2367ed1c418cSBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
2368ed1c418cSBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
2369d2840e09SBarry Smith   const PetscScalar *x,*v;
2370d2840e09SBarry Smith   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8;
2371ed1c418cSBarry Smith   PetscScalar       sum9, sum10, sum11, sum12, sum13, sum14, sum15, sum16, sum17, sum18;
2372ed1c418cSBarry Smith   PetscErrorCode    ierr;
2373d2840e09SBarry Smith   const PetscInt    m         = b->AIJ->rmap->n,*idx,*ii;
2374d2840e09SBarry Smith   PetscInt          nonzerorow=0,n,i,jrow,j;
2375ed1c418cSBarry Smith 
2376ed1c418cSBarry Smith   PetscFunctionBegin;
23773649974fSBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
2378ed1c418cSBarry Smith   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
2379ed1c418cSBarry Smith   idx  = a->j;
2380ed1c418cSBarry Smith   v    = a->a;
2381ed1c418cSBarry Smith   ii   = a->i;
2382ed1c418cSBarry Smith 
2383ed1c418cSBarry Smith   for (i=0; i<m; i++) {
2384ed1c418cSBarry Smith     jrow  = ii[i];
2385ed1c418cSBarry Smith     n     = ii[i+1] - jrow;
2386ed1c418cSBarry Smith     sum1  = 0.0;
2387ed1c418cSBarry Smith     sum2  = 0.0;
2388ed1c418cSBarry Smith     sum3  = 0.0;
2389ed1c418cSBarry Smith     sum4  = 0.0;
2390ed1c418cSBarry Smith     sum5  = 0.0;
2391ed1c418cSBarry Smith     sum6  = 0.0;
2392ed1c418cSBarry Smith     sum7  = 0.0;
2393ed1c418cSBarry Smith     sum8  = 0.0;
2394ed1c418cSBarry Smith     sum9  = 0.0;
2395ed1c418cSBarry Smith     sum10 = 0.0;
2396ed1c418cSBarry Smith     sum11 = 0.0;
2397ed1c418cSBarry Smith     sum12 = 0.0;
2398ed1c418cSBarry Smith     sum13 = 0.0;
2399ed1c418cSBarry Smith     sum14 = 0.0;
2400ed1c418cSBarry Smith     sum15 = 0.0;
2401ed1c418cSBarry Smith     sum16 = 0.0;
2402ed1c418cSBarry Smith     sum17 = 0.0;
2403ed1c418cSBarry Smith     sum18 = 0.0;
240426fbe8dcSKarl Rupp 
2405ed1c418cSBarry Smith     nonzerorow += (n>0);
2406ed1c418cSBarry Smith     for (j=0; j<n; j++) {
2407ed1c418cSBarry Smith       sum1  += v[jrow]*x[18*idx[jrow]];
2408ed1c418cSBarry Smith       sum2  += v[jrow]*x[18*idx[jrow]+1];
2409ed1c418cSBarry Smith       sum3  += v[jrow]*x[18*idx[jrow]+2];
2410ed1c418cSBarry Smith       sum4  += v[jrow]*x[18*idx[jrow]+3];
2411ed1c418cSBarry Smith       sum5  += v[jrow]*x[18*idx[jrow]+4];
2412ed1c418cSBarry Smith       sum6  += v[jrow]*x[18*idx[jrow]+5];
2413ed1c418cSBarry Smith       sum7  += v[jrow]*x[18*idx[jrow]+6];
2414ed1c418cSBarry Smith       sum8  += v[jrow]*x[18*idx[jrow]+7];
2415ed1c418cSBarry Smith       sum9  += v[jrow]*x[18*idx[jrow]+8];
2416ed1c418cSBarry Smith       sum10 += v[jrow]*x[18*idx[jrow]+9];
2417ed1c418cSBarry Smith       sum11 += v[jrow]*x[18*idx[jrow]+10];
2418ed1c418cSBarry Smith       sum12 += v[jrow]*x[18*idx[jrow]+11];
2419ed1c418cSBarry Smith       sum13 += v[jrow]*x[18*idx[jrow]+12];
2420ed1c418cSBarry Smith       sum14 += v[jrow]*x[18*idx[jrow]+13];
2421ed1c418cSBarry Smith       sum15 += v[jrow]*x[18*idx[jrow]+14];
2422ed1c418cSBarry Smith       sum16 += v[jrow]*x[18*idx[jrow]+15];
2423ed1c418cSBarry Smith       sum17 += v[jrow]*x[18*idx[jrow]+16];
2424ed1c418cSBarry Smith       sum18 += v[jrow]*x[18*idx[jrow]+17];
2425ed1c418cSBarry Smith       jrow++;
2426ed1c418cSBarry Smith     }
2427ed1c418cSBarry Smith     y[18*i]    = sum1;
2428ed1c418cSBarry Smith     y[18*i+1]  = sum2;
2429ed1c418cSBarry Smith     y[18*i+2]  = sum3;
2430ed1c418cSBarry Smith     y[18*i+3]  = sum4;
2431ed1c418cSBarry Smith     y[18*i+4]  = sum5;
2432ed1c418cSBarry Smith     y[18*i+5]  = sum6;
2433ed1c418cSBarry Smith     y[18*i+6]  = sum7;
2434ed1c418cSBarry Smith     y[18*i+7]  = sum8;
2435ed1c418cSBarry Smith     y[18*i+8]  = sum9;
2436ed1c418cSBarry Smith     y[18*i+9]  = sum10;
2437ed1c418cSBarry Smith     y[18*i+10] = sum11;
2438ed1c418cSBarry Smith     y[18*i+11] = sum12;
2439ed1c418cSBarry Smith     y[18*i+12] = sum13;
2440ed1c418cSBarry Smith     y[18*i+13] = sum14;
2441ed1c418cSBarry Smith     y[18*i+14] = sum15;
2442ed1c418cSBarry Smith     y[18*i+15] = sum16;
2443ed1c418cSBarry Smith     y[18*i+16] = sum17;
2444ed1c418cSBarry Smith     y[18*i+17] = sum18;
2445ed1c418cSBarry Smith   }
2446ed1c418cSBarry Smith 
2447dc0b31edSSatish Balay   ierr = PetscLogFlops(36.0*a->nz - 18.0*nonzerorow);CHKERRQ(ierr);
24483649974fSBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
2449ed1c418cSBarry Smith   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
2450ed1c418cSBarry Smith   PetscFunctionReturn(0);
2451ed1c418cSBarry Smith }
2452ed1c418cSBarry Smith 
2453ed1c418cSBarry Smith PetscErrorCode MatMultTranspose_SeqMAIJ_18(Mat A,Vec xx,Vec yy)
2454ed1c418cSBarry Smith {
2455ed1c418cSBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
2456ed1c418cSBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
2457d2840e09SBarry Smith   const PetscScalar *x,*v;
2458d2840e09SBarry Smith   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8;
2459ed1c418cSBarry Smith   PetscScalar       alpha9,alpha10,alpha11,alpha12,alpha13,alpha14,alpha15,alpha16,alpha17,alpha18;
2460ed1c418cSBarry Smith   PetscErrorCode    ierr;
2461d2840e09SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx;
2462d2840e09SBarry Smith   PetscInt          n,i;
2463ed1c418cSBarry Smith 
2464ed1c418cSBarry Smith   PetscFunctionBegin;
2465d2840e09SBarry Smith   ierr = VecSet(yy,0.0);CHKERRQ(ierr);
24663649974fSBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
2467ed1c418cSBarry Smith   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
2468ed1c418cSBarry Smith 
2469ed1c418cSBarry Smith   for (i=0; i<m; i++) {
2470ed1c418cSBarry Smith     idx     = a->j + a->i[i];
2471ed1c418cSBarry Smith     v       = a->a + a->i[i];
2472ed1c418cSBarry Smith     n       = a->i[i+1] - a->i[i];
2473ed1c418cSBarry Smith     alpha1  = x[18*i];
2474ed1c418cSBarry Smith     alpha2  = x[18*i+1];
2475ed1c418cSBarry Smith     alpha3  = x[18*i+2];
2476ed1c418cSBarry Smith     alpha4  = x[18*i+3];
2477ed1c418cSBarry Smith     alpha5  = x[18*i+4];
2478ed1c418cSBarry Smith     alpha6  = x[18*i+5];
2479ed1c418cSBarry Smith     alpha7  = x[18*i+6];
2480ed1c418cSBarry Smith     alpha8  = x[18*i+7];
2481ed1c418cSBarry Smith     alpha9  = x[18*i+8];
2482ed1c418cSBarry Smith     alpha10 = x[18*i+9];
2483ed1c418cSBarry Smith     alpha11 = x[18*i+10];
2484ed1c418cSBarry Smith     alpha12 = x[18*i+11];
2485ed1c418cSBarry Smith     alpha13 = x[18*i+12];
2486ed1c418cSBarry Smith     alpha14 = x[18*i+13];
2487ed1c418cSBarry Smith     alpha15 = x[18*i+14];
2488ed1c418cSBarry Smith     alpha16 = x[18*i+15];
2489ed1c418cSBarry Smith     alpha17 = x[18*i+16];
2490ed1c418cSBarry Smith     alpha18 = x[18*i+17];
2491ed1c418cSBarry Smith     while (n-->0) {
2492ed1c418cSBarry Smith       y[18*(*idx)]    += alpha1*(*v);
2493ed1c418cSBarry Smith       y[18*(*idx)+1]  += alpha2*(*v);
2494ed1c418cSBarry Smith       y[18*(*idx)+2]  += alpha3*(*v);
2495ed1c418cSBarry Smith       y[18*(*idx)+3]  += alpha4*(*v);
2496ed1c418cSBarry Smith       y[18*(*idx)+4]  += alpha5*(*v);
2497ed1c418cSBarry Smith       y[18*(*idx)+5]  += alpha6*(*v);
2498ed1c418cSBarry Smith       y[18*(*idx)+6]  += alpha7*(*v);
2499ed1c418cSBarry Smith       y[18*(*idx)+7]  += alpha8*(*v);
2500ed1c418cSBarry Smith       y[18*(*idx)+8]  += alpha9*(*v);
2501ed1c418cSBarry Smith       y[18*(*idx)+9]  += alpha10*(*v);
2502ed1c418cSBarry Smith       y[18*(*idx)+10] += alpha11*(*v);
2503ed1c418cSBarry Smith       y[18*(*idx)+11] += alpha12*(*v);
2504ed1c418cSBarry Smith       y[18*(*idx)+12] += alpha13*(*v);
2505ed1c418cSBarry Smith       y[18*(*idx)+13] += alpha14*(*v);
2506ed1c418cSBarry Smith       y[18*(*idx)+14] += alpha15*(*v);
2507ed1c418cSBarry Smith       y[18*(*idx)+15] += alpha16*(*v);
2508ed1c418cSBarry Smith       y[18*(*idx)+16] += alpha17*(*v);
2509ed1c418cSBarry Smith       y[18*(*idx)+17] += alpha18*(*v);
2510ed1c418cSBarry Smith       idx++; v++;
2511ed1c418cSBarry Smith     }
2512ed1c418cSBarry Smith   }
2513dc0b31edSSatish Balay   ierr = PetscLogFlops(36.0*a->nz);CHKERRQ(ierr);
25143649974fSBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
2515ed1c418cSBarry Smith   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
2516ed1c418cSBarry Smith   PetscFunctionReturn(0);
2517ed1c418cSBarry Smith }
2518ed1c418cSBarry Smith 
2519ed1c418cSBarry Smith PetscErrorCode MatMultAdd_SeqMAIJ_18(Mat A,Vec xx,Vec yy,Vec zz)
2520ed1c418cSBarry Smith {
2521ed1c418cSBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
2522ed1c418cSBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
2523d2840e09SBarry Smith   const PetscScalar *x,*v;
2524d2840e09SBarry Smith   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8;
2525ed1c418cSBarry Smith   PetscScalar       sum9, sum10, sum11, sum12, sum13, sum14, sum15, sum16, sum17, sum18;
2526ed1c418cSBarry Smith   PetscErrorCode    ierr;
2527d2840e09SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
2528ed1c418cSBarry Smith   PetscInt          n,i,jrow,j;
2529ed1c418cSBarry Smith 
2530ed1c418cSBarry Smith   PetscFunctionBegin;
2531ed1c418cSBarry Smith   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
25323649974fSBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
2533ed1c418cSBarry Smith   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
2534ed1c418cSBarry Smith   idx  = a->j;
2535ed1c418cSBarry Smith   v    = a->a;
2536ed1c418cSBarry Smith   ii   = a->i;
2537ed1c418cSBarry Smith 
2538ed1c418cSBarry Smith   for (i=0; i<m; i++) {
2539ed1c418cSBarry Smith     jrow  = ii[i];
2540ed1c418cSBarry Smith     n     = ii[i+1] - jrow;
2541ed1c418cSBarry Smith     sum1  = 0.0;
2542ed1c418cSBarry Smith     sum2  = 0.0;
2543ed1c418cSBarry Smith     sum3  = 0.0;
2544ed1c418cSBarry Smith     sum4  = 0.0;
2545ed1c418cSBarry Smith     sum5  = 0.0;
2546ed1c418cSBarry Smith     sum6  = 0.0;
2547ed1c418cSBarry Smith     sum7  = 0.0;
2548ed1c418cSBarry Smith     sum8  = 0.0;
2549ed1c418cSBarry Smith     sum9  = 0.0;
2550ed1c418cSBarry Smith     sum10 = 0.0;
2551ed1c418cSBarry Smith     sum11 = 0.0;
2552ed1c418cSBarry Smith     sum12 = 0.0;
2553ed1c418cSBarry Smith     sum13 = 0.0;
2554ed1c418cSBarry Smith     sum14 = 0.0;
2555ed1c418cSBarry Smith     sum15 = 0.0;
2556ed1c418cSBarry Smith     sum16 = 0.0;
2557ed1c418cSBarry Smith     sum17 = 0.0;
2558ed1c418cSBarry Smith     sum18 = 0.0;
2559ed1c418cSBarry Smith     for (j=0; j<n; j++) {
2560ed1c418cSBarry Smith       sum1  += v[jrow]*x[18*idx[jrow]];
2561ed1c418cSBarry Smith       sum2  += v[jrow]*x[18*idx[jrow]+1];
2562ed1c418cSBarry Smith       sum3  += v[jrow]*x[18*idx[jrow]+2];
2563ed1c418cSBarry Smith       sum4  += v[jrow]*x[18*idx[jrow]+3];
2564ed1c418cSBarry Smith       sum5  += v[jrow]*x[18*idx[jrow]+4];
2565ed1c418cSBarry Smith       sum6  += v[jrow]*x[18*idx[jrow]+5];
2566ed1c418cSBarry Smith       sum7  += v[jrow]*x[18*idx[jrow]+6];
2567ed1c418cSBarry Smith       sum8  += v[jrow]*x[18*idx[jrow]+7];
2568ed1c418cSBarry Smith       sum9  += v[jrow]*x[18*idx[jrow]+8];
2569ed1c418cSBarry Smith       sum10 += v[jrow]*x[18*idx[jrow]+9];
2570ed1c418cSBarry Smith       sum11 += v[jrow]*x[18*idx[jrow]+10];
2571ed1c418cSBarry Smith       sum12 += v[jrow]*x[18*idx[jrow]+11];
2572ed1c418cSBarry Smith       sum13 += v[jrow]*x[18*idx[jrow]+12];
2573ed1c418cSBarry Smith       sum14 += v[jrow]*x[18*idx[jrow]+13];
2574ed1c418cSBarry Smith       sum15 += v[jrow]*x[18*idx[jrow]+14];
2575ed1c418cSBarry Smith       sum16 += v[jrow]*x[18*idx[jrow]+15];
2576ed1c418cSBarry Smith       sum17 += v[jrow]*x[18*idx[jrow]+16];
2577ed1c418cSBarry Smith       sum18 += v[jrow]*x[18*idx[jrow]+17];
2578ed1c418cSBarry Smith       jrow++;
2579ed1c418cSBarry Smith     }
2580ed1c418cSBarry Smith     y[18*i]    += sum1;
2581ed1c418cSBarry Smith     y[18*i+1]  += sum2;
2582ed1c418cSBarry Smith     y[18*i+2]  += sum3;
2583ed1c418cSBarry Smith     y[18*i+3]  += sum4;
2584ed1c418cSBarry Smith     y[18*i+4]  += sum5;
2585ed1c418cSBarry Smith     y[18*i+5]  += sum6;
2586ed1c418cSBarry Smith     y[18*i+6]  += sum7;
2587ed1c418cSBarry Smith     y[18*i+7]  += sum8;
2588ed1c418cSBarry Smith     y[18*i+8]  += sum9;
2589ed1c418cSBarry Smith     y[18*i+9]  += sum10;
2590ed1c418cSBarry Smith     y[18*i+10] += sum11;
2591ed1c418cSBarry Smith     y[18*i+11] += sum12;
2592ed1c418cSBarry Smith     y[18*i+12] += sum13;
2593ed1c418cSBarry Smith     y[18*i+13] += sum14;
2594ed1c418cSBarry Smith     y[18*i+14] += sum15;
2595ed1c418cSBarry Smith     y[18*i+15] += sum16;
2596ed1c418cSBarry Smith     y[18*i+16] += sum17;
2597ed1c418cSBarry Smith     y[18*i+17] += sum18;
2598ed1c418cSBarry Smith   }
2599ed1c418cSBarry Smith 
2600dc0b31edSSatish Balay   ierr = PetscLogFlops(36.0*a->nz);CHKERRQ(ierr);
26013649974fSBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
2602ed1c418cSBarry Smith   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
2603ed1c418cSBarry Smith   PetscFunctionReturn(0);
2604ed1c418cSBarry Smith }
2605ed1c418cSBarry Smith 
2606ed1c418cSBarry Smith PetscErrorCode MatMultTransposeAdd_SeqMAIJ_18(Mat A,Vec xx,Vec yy,Vec zz)
2607ed1c418cSBarry Smith {
2608ed1c418cSBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
2609ed1c418cSBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
2610d2840e09SBarry Smith   const PetscScalar *x,*v;
2611d2840e09SBarry Smith   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8;
2612ed1c418cSBarry Smith   PetscScalar       alpha9,alpha10,alpha11,alpha12,alpha13,alpha14,alpha15,alpha16,alpha17,alpha18;
2613ed1c418cSBarry Smith   PetscErrorCode    ierr;
2614d2840e09SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx;
2615d2840e09SBarry Smith   PetscInt          n,i;
2616ed1c418cSBarry Smith 
2617ed1c418cSBarry Smith   PetscFunctionBegin;
2618ed1c418cSBarry Smith   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
26193649974fSBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
2620ed1c418cSBarry Smith   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
2621ed1c418cSBarry Smith   for (i=0; i<m; i++) {
2622ed1c418cSBarry Smith     idx     = a->j + a->i[i];
2623ed1c418cSBarry Smith     v       = a->a + a->i[i];
2624ed1c418cSBarry Smith     n       = a->i[i+1] - a->i[i];
2625ed1c418cSBarry Smith     alpha1  = x[18*i];
2626ed1c418cSBarry Smith     alpha2  = x[18*i+1];
2627ed1c418cSBarry Smith     alpha3  = x[18*i+2];
2628ed1c418cSBarry Smith     alpha4  = x[18*i+3];
2629ed1c418cSBarry Smith     alpha5  = x[18*i+4];
2630ed1c418cSBarry Smith     alpha6  = x[18*i+5];
2631ed1c418cSBarry Smith     alpha7  = x[18*i+6];
2632ed1c418cSBarry Smith     alpha8  = x[18*i+7];
2633ed1c418cSBarry Smith     alpha9  = x[18*i+8];
2634ed1c418cSBarry Smith     alpha10 = x[18*i+9];
2635ed1c418cSBarry Smith     alpha11 = x[18*i+10];
2636ed1c418cSBarry Smith     alpha12 = x[18*i+11];
2637ed1c418cSBarry Smith     alpha13 = x[18*i+12];
2638ed1c418cSBarry Smith     alpha14 = x[18*i+13];
2639ed1c418cSBarry Smith     alpha15 = x[18*i+14];
2640ed1c418cSBarry Smith     alpha16 = x[18*i+15];
2641ed1c418cSBarry Smith     alpha17 = x[18*i+16];
2642ed1c418cSBarry Smith     alpha18 = x[18*i+17];
2643ed1c418cSBarry Smith     while (n-->0) {
2644ed1c418cSBarry Smith       y[18*(*idx)]    += alpha1*(*v);
2645ed1c418cSBarry Smith       y[18*(*idx)+1]  += alpha2*(*v);
2646ed1c418cSBarry Smith       y[18*(*idx)+2]  += alpha3*(*v);
2647ed1c418cSBarry Smith       y[18*(*idx)+3]  += alpha4*(*v);
2648ed1c418cSBarry Smith       y[18*(*idx)+4]  += alpha5*(*v);
2649ed1c418cSBarry Smith       y[18*(*idx)+5]  += alpha6*(*v);
2650ed1c418cSBarry Smith       y[18*(*idx)+6]  += alpha7*(*v);
2651ed1c418cSBarry Smith       y[18*(*idx)+7]  += alpha8*(*v);
2652ed1c418cSBarry Smith       y[18*(*idx)+8]  += alpha9*(*v);
2653ed1c418cSBarry Smith       y[18*(*idx)+9]  += alpha10*(*v);
2654ed1c418cSBarry Smith       y[18*(*idx)+10] += alpha11*(*v);
2655ed1c418cSBarry Smith       y[18*(*idx)+11] += alpha12*(*v);
2656ed1c418cSBarry Smith       y[18*(*idx)+12] += alpha13*(*v);
2657ed1c418cSBarry Smith       y[18*(*idx)+13] += alpha14*(*v);
2658ed1c418cSBarry Smith       y[18*(*idx)+14] += alpha15*(*v);
2659ed1c418cSBarry Smith       y[18*(*idx)+15] += alpha16*(*v);
2660ed1c418cSBarry Smith       y[18*(*idx)+16] += alpha17*(*v);
2661ed1c418cSBarry Smith       y[18*(*idx)+17] += alpha18*(*v);
2662ed1c418cSBarry Smith       idx++; v++;
2663ed1c418cSBarry Smith     }
2664ed1c418cSBarry Smith   }
2665dc0b31edSSatish Balay   ierr = PetscLogFlops(36.0*a->nz);CHKERRQ(ierr);
26663649974fSBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
2667ed1c418cSBarry Smith   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
2668ed1c418cSBarry Smith   PetscFunctionReturn(0);
2669ed1c418cSBarry Smith }
2670ed1c418cSBarry Smith 
26716861f193SBarry Smith PetscErrorCode MatMult_SeqMAIJ_N(Mat A,Vec xx,Vec yy)
26726861f193SBarry Smith {
26736861f193SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
26746861f193SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
26756861f193SBarry Smith   const PetscScalar *x,*v;
26766861f193SBarry Smith   PetscScalar       *y,*sums;
26776861f193SBarry Smith   PetscErrorCode    ierr;
26786861f193SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
26796861f193SBarry Smith   PetscInt          n,i,jrow,j,dof = b->dof,k;
26806861f193SBarry Smith 
26816861f193SBarry Smith   PetscFunctionBegin;
26823649974fSBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
26836861f193SBarry Smith   ierr = VecSet(yy,0.0);CHKERRQ(ierr);
26846861f193SBarry Smith   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
26856861f193SBarry Smith   idx  = a->j;
26866861f193SBarry Smith   v    = a->a;
26876861f193SBarry Smith   ii   = a->i;
26886861f193SBarry Smith 
26896861f193SBarry Smith   for (i=0; i<m; i++) {
26906861f193SBarry Smith     jrow = ii[i];
26916861f193SBarry Smith     n    = ii[i+1] - jrow;
26926861f193SBarry Smith     sums = y + dof*i;
26936861f193SBarry Smith     for (j=0; j<n; j++) {
26946861f193SBarry Smith       for (k=0; k<dof; k++) {
26956861f193SBarry Smith         sums[k] += v[jrow]*x[dof*idx[jrow]+k];
26966861f193SBarry Smith       }
26976861f193SBarry Smith       jrow++;
26986861f193SBarry Smith     }
26996861f193SBarry Smith   }
27006861f193SBarry Smith 
27016861f193SBarry Smith   ierr = PetscLogFlops(2.0*dof*a->nz);CHKERRQ(ierr);
27023649974fSBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
27036861f193SBarry Smith   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
27046861f193SBarry Smith   PetscFunctionReturn(0);
27056861f193SBarry Smith }
27066861f193SBarry Smith 
27076861f193SBarry Smith PetscErrorCode MatMultAdd_SeqMAIJ_N(Mat A,Vec xx,Vec yy,Vec zz)
27086861f193SBarry Smith {
27096861f193SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
27106861f193SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
27116861f193SBarry Smith   const PetscScalar *x,*v;
27126861f193SBarry Smith   PetscScalar       *y,*sums;
27136861f193SBarry Smith   PetscErrorCode    ierr;
27146861f193SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
27156861f193SBarry Smith   PetscInt          n,i,jrow,j,dof = b->dof,k;
27166861f193SBarry Smith 
27176861f193SBarry Smith   PetscFunctionBegin;
27186861f193SBarry Smith   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
27193649974fSBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
27206861f193SBarry Smith   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
27216861f193SBarry Smith   idx  = a->j;
27226861f193SBarry Smith   v    = a->a;
27236861f193SBarry Smith   ii   = a->i;
27246861f193SBarry Smith 
27256861f193SBarry Smith   for (i=0; i<m; i++) {
27266861f193SBarry Smith     jrow = ii[i];
27276861f193SBarry Smith     n    = ii[i+1] - jrow;
27286861f193SBarry Smith     sums = y + dof*i;
27296861f193SBarry Smith     for (j=0; j<n; j++) {
27306861f193SBarry Smith       for (k=0; k<dof; k++) {
27316861f193SBarry Smith         sums[k] += v[jrow]*x[dof*idx[jrow]+k];
27326861f193SBarry Smith       }
27336861f193SBarry Smith       jrow++;
27346861f193SBarry Smith     }
27356861f193SBarry Smith   }
27366861f193SBarry Smith 
27376861f193SBarry Smith   ierr = PetscLogFlops(2.0*dof*a->nz);CHKERRQ(ierr);
27383649974fSBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
27396861f193SBarry Smith   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
27406861f193SBarry Smith   PetscFunctionReturn(0);
27416861f193SBarry Smith }
27426861f193SBarry Smith 
27436861f193SBarry Smith PetscErrorCode MatMultTranspose_SeqMAIJ_N(Mat A,Vec xx,Vec yy)
27446861f193SBarry Smith {
27456861f193SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
27466861f193SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
27476861f193SBarry Smith   const PetscScalar *x,*v,*alpha;
27486861f193SBarry Smith   PetscScalar       *y;
27496861f193SBarry Smith   PetscErrorCode    ierr;
27506861f193SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx,dof = b->dof;
27516861f193SBarry Smith   PetscInt          n,i,k;
27526861f193SBarry Smith 
27536861f193SBarry Smith   PetscFunctionBegin;
27543649974fSBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
27556861f193SBarry Smith   ierr = VecSet(yy,0.0);CHKERRQ(ierr);
27566861f193SBarry Smith   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
27576861f193SBarry Smith   for (i=0; i<m; i++) {
27586861f193SBarry Smith     idx   = a->j + a->i[i];
27596861f193SBarry Smith     v     = a->a + a->i[i];
27606861f193SBarry Smith     n     = a->i[i+1] - a->i[i];
27616861f193SBarry Smith     alpha = x + dof*i;
27626861f193SBarry Smith     while (n-->0) {
27636861f193SBarry Smith       for (k=0; k<dof; k++) {
27646861f193SBarry Smith         y[dof*(*idx)+k] += alpha[k]*(*v);
27656861f193SBarry Smith       }
276683ce7366SBarry Smith       idx++; v++;
27676861f193SBarry Smith     }
27686861f193SBarry Smith   }
27696861f193SBarry Smith   ierr = PetscLogFlops(2.0*dof*a->nz);CHKERRQ(ierr);
27703649974fSBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
27716861f193SBarry Smith   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
27726861f193SBarry Smith   PetscFunctionReturn(0);
27736861f193SBarry Smith }
27746861f193SBarry Smith 
27756861f193SBarry Smith PetscErrorCode MatMultTransposeAdd_SeqMAIJ_N(Mat A,Vec xx,Vec yy,Vec zz)
27766861f193SBarry Smith {
27776861f193SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
27786861f193SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
27796861f193SBarry Smith   const PetscScalar *x,*v,*alpha;
27806861f193SBarry Smith   PetscScalar       *y;
27816861f193SBarry Smith   PetscErrorCode    ierr;
27826861f193SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx,dof = b->dof;
27836861f193SBarry Smith   PetscInt          n,i,k;
27846861f193SBarry Smith 
27856861f193SBarry Smith   PetscFunctionBegin;
27866861f193SBarry Smith   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
27873649974fSBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
27886861f193SBarry Smith   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
27896861f193SBarry Smith   for (i=0; i<m; i++) {
27906861f193SBarry Smith     idx   = a->j + a->i[i];
27916861f193SBarry Smith     v     = a->a + a->i[i];
27926861f193SBarry Smith     n     = a->i[i+1] - a->i[i];
27936861f193SBarry Smith     alpha = x + dof*i;
27946861f193SBarry Smith     while (n-->0) {
27956861f193SBarry Smith       for (k=0; k<dof; k++) {
27966861f193SBarry Smith         y[dof*(*idx)+k] += alpha[k]*(*v);
27976861f193SBarry Smith       }
279883ce7366SBarry Smith       idx++; v++;
27996861f193SBarry Smith     }
28006861f193SBarry Smith   }
28016861f193SBarry Smith   ierr = PetscLogFlops(2.0*dof*a->nz);CHKERRQ(ierr);
28023649974fSBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
28036861f193SBarry Smith   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
28046861f193SBarry Smith   PetscFunctionReturn(0);
28056861f193SBarry Smith }
28066861f193SBarry Smith 
2807f4a53059SBarry Smith /*===================================================================================*/
2808dfbe8321SBarry Smith PetscErrorCode MatMult_MPIMAIJ_dof(Mat A,Vec xx,Vec yy)
2809f4a53059SBarry Smith {
28104cb9d9b8SBarry Smith   Mat_MPIMAIJ    *b = (Mat_MPIMAIJ*)A->data;
2811dfbe8321SBarry Smith   PetscErrorCode ierr;
2812f4a53059SBarry Smith 
2813b24ad042SBarry Smith   PetscFunctionBegin;
2814f4a53059SBarry Smith   /* start the scatter */
2815ca9f406cSSatish Balay   ierr = VecScatterBegin(b->ctx,xx,b->w,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
28164cb9d9b8SBarry Smith   ierr = (*b->AIJ->ops->mult)(b->AIJ,xx,yy);CHKERRQ(ierr);
2817ca9f406cSSatish Balay   ierr = VecScatterEnd(b->ctx,xx,b->w,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
28184cb9d9b8SBarry Smith   ierr = (*b->OAIJ->ops->multadd)(b->OAIJ,b->w,yy,yy);CHKERRQ(ierr);
2819f4a53059SBarry Smith   PetscFunctionReturn(0);
2820f4a53059SBarry Smith }
2821f4a53059SBarry Smith 
2822dfbe8321SBarry Smith PetscErrorCode MatMultTranspose_MPIMAIJ_dof(Mat A,Vec xx,Vec yy)
28234cb9d9b8SBarry Smith {
28244cb9d9b8SBarry Smith   Mat_MPIMAIJ    *b = (Mat_MPIMAIJ*)A->data;
2825dfbe8321SBarry Smith   PetscErrorCode ierr;
2826b24ad042SBarry Smith 
28274cb9d9b8SBarry Smith   PetscFunctionBegin;
28284cb9d9b8SBarry Smith   ierr = (*b->OAIJ->ops->multtranspose)(b->OAIJ,xx,b->w);CHKERRQ(ierr);
28294cb9d9b8SBarry Smith   ierr = (*b->AIJ->ops->multtranspose)(b->AIJ,xx,yy);CHKERRQ(ierr);
2830ca9f406cSSatish Balay   ierr = VecScatterBegin(b->ctx,b->w,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2831ca9f406cSSatish Balay   ierr = VecScatterEnd(b->ctx,b->w,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
28324cb9d9b8SBarry Smith   PetscFunctionReturn(0);
28334cb9d9b8SBarry Smith }
28344cb9d9b8SBarry Smith 
2835dfbe8321SBarry Smith PetscErrorCode MatMultAdd_MPIMAIJ_dof(Mat A,Vec xx,Vec yy,Vec zz)
28364cb9d9b8SBarry Smith {
28374cb9d9b8SBarry Smith   Mat_MPIMAIJ    *b = (Mat_MPIMAIJ*)A->data;
2838dfbe8321SBarry Smith   PetscErrorCode ierr;
28394cb9d9b8SBarry Smith 
2840b24ad042SBarry Smith   PetscFunctionBegin;
28414cb9d9b8SBarry Smith   /* start the scatter */
2842ca9f406cSSatish Balay   ierr = VecScatterBegin(b->ctx,xx,b->w,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2843d72c5c08SBarry Smith   ierr = (*b->AIJ->ops->multadd)(b->AIJ,xx,yy,zz);CHKERRQ(ierr);
2844ca9f406cSSatish Balay   ierr = VecScatterEnd(b->ctx,xx,b->w,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2845717f2ec8SHong Zhang   ierr = (*b->OAIJ->ops->multadd)(b->OAIJ,b->w,zz,zz);CHKERRQ(ierr);
28464cb9d9b8SBarry Smith   PetscFunctionReturn(0);
28474cb9d9b8SBarry Smith }
28484cb9d9b8SBarry Smith 
2849dfbe8321SBarry Smith PetscErrorCode MatMultTransposeAdd_MPIMAIJ_dof(Mat A,Vec xx,Vec yy,Vec zz)
28504cb9d9b8SBarry Smith {
28514cb9d9b8SBarry Smith   Mat_MPIMAIJ    *b = (Mat_MPIMAIJ*)A->data;
2852dfbe8321SBarry Smith   PetscErrorCode ierr;
2853b24ad042SBarry Smith 
28544cb9d9b8SBarry Smith   PetscFunctionBegin;
28554cb9d9b8SBarry Smith   ierr = (*b->OAIJ->ops->multtranspose)(b->OAIJ,xx,b->w);CHKERRQ(ierr);
2856d72c5c08SBarry Smith   ierr = (*b->AIJ->ops->multtransposeadd)(b->AIJ,xx,yy,zz);CHKERRQ(ierr);
2857e4a140f6SJunchao Zhang   ierr = VecScatterBegin(b->ctx,b->w,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2858ca9f406cSSatish Balay   ierr = VecScatterEnd(b->ctx,b->w,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
28594cb9d9b8SBarry Smith   PetscFunctionReturn(0);
28604cb9d9b8SBarry Smith }
28614cb9d9b8SBarry Smith 
286295ddefa2SHong Zhang /* ----------------------------------------------------------------*/
28634222ddf1SHong Zhang PetscErrorCode MatProductSetFromOptions_SeqAIJ_SeqMAIJ(Mat C)
28644222ddf1SHong Zhang {
28654222ddf1SHong Zhang   Mat_Product    *product = C->product;
28664222ddf1SHong Zhang 
28674222ddf1SHong Zhang   PetscFunctionBegin;
28684222ddf1SHong Zhang   if (product->type == MATPRODUCT_PtAP) {
28694222ddf1SHong Zhang     C->ops->productsymbolic = MatProductSymbolic_PtAP_SeqAIJ_SeqMAIJ;
2870544a5e07SHong Zhang   } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Mat Product type %s is not supported for SeqAIJ and SeqMAIJ matrices",MatProductTypes[product->type]);
28714222ddf1SHong Zhang   PetscFunctionReturn(0);
28724222ddf1SHong Zhang }
28734222ddf1SHong Zhang 
28744222ddf1SHong Zhang PetscErrorCode MatProductSetFromOptions_MPIAIJ_MPIMAIJ(Mat C)
28754222ddf1SHong Zhang {
28764222ddf1SHong Zhang   PetscErrorCode ierr;
28774222ddf1SHong Zhang   Mat_Product    *product = C->product;
28784222ddf1SHong Zhang   PetscBool      flg = PETSC_FALSE;
28794222ddf1SHong Zhang   Mat            A=product->A,P=product->B;
28804222ddf1SHong Zhang   PetscInt       alg=1; /* set default algorithm */
28814222ddf1SHong Zhang #if !defined(PETSC_HAVE_HYPRE)
28824222ddf1SHong Zhang   const char     *algTypes[4] = {"scalable","nonscalable","allatonce","allatonce_merged"};
28834222ddf1SHong Zhang   PetscInt       nalg=4;
28844222ddf1SHong Zhang #else
28854222ddf1SHong Zhang   const char     *algTypes[5] = {"scalable","nonscalable","allatonce","allatonce_merged","hypre"};
28864222ddf1SHong Zhang   PetscInt       nalg=5;
28874222ddf1SHong Zhang #endif
28884222ddf1SHong Zhang 
28894222ddf1SHong Zhang   PetscFunctionBegin;
2890544a5e07SHong Zhang   if (product->type != MATPRODUCT_PtAP) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Mat Product type %s is not supported for MPIAIJ and MPIMAIJ matrices",MatProductTypes[product->type]);
28914222ddf1SHong Zhang 
28924222ddf1SHong Zhang   /* PtAP */
28934222ddf1SHong Zhang   /* Check matrix local sizes */
28944222ddf1SHong Zhang   if (A->rmap->rstart != P->rmap->rstart || A->rmap->rend != P->rmap->rend) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, Arow (%D, %D) != Prow (%D,%D)",A->rmap->rstart,A->rmap->rend,P->rmap->rstart,P->rmap->rend);
28954222ddf1SHong Zhang   if (A->cmap->rstart != P->rmap->rstart || A->cmap->rend != P->rmap->rend) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, Acol (%D, %D) != Prow (%D,%D)",A->cmap->rstart,A->cmap->rend,P->rmap->rstart,P->rmap->rend);
28964222ddf1SHong Zhang 
28974222ddf1SHong Zhang   /* Set the default algorithm */
28984222ddf1SHong Zhang   ierr = PetscStrcmp(C->product->alg,"default",&flg);CHKERRQ(ierr);
28994222ddf1SHong Zhang   if (flg) {
29004222ddf1SHong Zhang     ierr = MatProductSetAlgorithm(C,(MatProductAlgorithm)algTypes[alg]);CHKERRQ(ierr);
29014222ddf1SHong Zhang   }
29024222ddf1SHong Zhang 
29034222ddf1SHong Zhang   /* Get runtime option */
29044222ddf1SHong Zhang   ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatProduct_PtAP","Mat");CHKERRQ(ierr);
29054222ddf1SHong Zhang   ierr = PetscOptionsEList("-matproduct_ptap_via","Algorithmic approach","MatPtAP",algTypes,nalg,algTypes[alg],&alg,&flg);CHKERRQ(ierr);
29064222ddf1SHong Zhang   if (flg) {
29074222ddf1SHong Zhang     ierr = MatProductSetAlgorithm(C,(MatProductAlgorithm)algTypes[alg]);CHKERRQ(ierr);
29084222ddf1SHong Zhang   }
29094222ddf1SHong Zhang   ierr = PetscOptionsEnd();CHKERRQ(ierr);
29104222ddf1SHong Zhang 
29114222ddf1SHong Zhang   ierr = PetscStrcmp(C->product->alg,"allatonce",&flg);CHKERRQ(ierr);
29124222ddf1SHong Zhang   if (flg) {
29134222ddf1SHong Zhang     C->ops->productsymbolic = MatProductSymbolic_PtAP_MPIAIJ_MPIMAIJ;
29144222ddf1SHong Zhang     PetscFunctionReturn(0);
29154222ddf1SHong Zhang   }
29164222ddf1SHong Zhang 
29174222ddf1SHong Zhang   ierr = PetscStrcmp(C->product->alg,"allatonce_merged",&flg);CHKERRQ(ierr);
29184222ddf1SHong Zhang   if (flg) {
29194222ddf1SHong Zhang     C->ops->productsymbolic = MatProductSymbolic_PtAP_MPIAIJ_MPIMAIJ;
29204222ddf1SHong Zhang     PetscFunctionReturn(0);
29214222ddf1SHong Zhang   }
29224222ddf1SHong Zhang 
29234222ddf1SHong Zhang   /* Convert P from MAIJ to AIJ matrix since implementation not available for MAIJ */
29244222ddf1SHong Zhang   ierr = PetscInfo((PetscObject)A,"Converting from MAIJ to AIJ matrix since implementation not available for MAIJ");
29254222ddf1SHong Zhang   ierr = MatConvert(P,MATMPIAIJ,MAT_INPLACE_MATRIX,&P);CHKERRQ(ierr);
29264222ddf1SHong Zhang   ierr = (C->ops->productsetfromoptions)(C);CHKERRQ(ierr);
29274222ddf1SHong Zhang   PetscFunctionReturn(0);
29284222ddf1SHong Zhang }
29294222ddf1SHong Zhang 
29304222ddf1SHong Zhang /* ----------------------------------------------------------------*/
29314222ddf1SHong Zhang PetscErrorCode MatPtAPSymbolic_SeqAIJ_SeqMAIJ(Mat A,Mat PP,PetscReal fill,Mat C)
29327ba1a0bfSKris Buschelman {
29337ba1a0bfSKris Buschelman   PetscErrorCode     ierr;
29340298fd71SBarry Smith   PetscFreeSpaceList free_space=NULL,current_space=NULL;
29357ba1a0bfSKris Buschelman   Mat_SeqMAIJ        *pp       =(Mat_SeqMAIJ*)PP->data;
29367ba1a0bfSKris Buschelman   Mat                P         =pp->AIJ;
29377ba1a0bfSKris Buschelman   Mat_SeqAIJ         *a        =(Mat_SeqAIJ*)A->data,*p=(Mat_SeqAIJ*)P->data,*c;
2938d2840e09SBarry Smith   PetscInt           *pti,*ptj,*ptJ;
29397ba1a0bfSKris Buschelman   PetscInt           *ci,*cj,*ptadenserow,*ptasparserow,*denserow,*sparserow,*ptaj;
2940d2840e09SBarry Smith   const PetscInt     an=A->cmap->N,am=A->rmap->N,pn=P->cmap->N,pm=P->rmap->N,ppdof=pp->dof;
2941d2840e09SBarry Smith   PetscInt           i,j,k,dof,pshift,ptnzi,arow,anzj,ptanzi,prow,pnzj,cnzi,cn;
29427ba1a0bfSKris Buschelman   MatScalar          *ca;
2943d2840e09SBarry Smith   const PetscInt     *pi = p->i,*pj = p->j,*pjj,*ai=a->i,*aj=a->j,*ajj;
29447ba1a0bfSKris Buschelman 
29457ba1a0bfSKris Buschelman   PetscFunctionBegin;
29467ba1a0bfSKris Buschelman   /* Get ij structure of P^T */
29477ba1a0bfSKris Buschelman   ierr = MatGetSymbolicTranspose_SeqAIJ(P,&pti,&ptj);CHKERRQ(ierr);
29487ba1a0bfSKris Buschelman 
29497ba1a0bfSKris Buschelman   cn = pn*ppdof;
29507ba1a0bfSKris Buschelman   /* Allocate ci array, arrays for fill computation and */
29517ba1a0bfSKris Buschelman   /* free space for accumulating nonzero column info */
2952854ce69bSBarry Smith   ierr  = PetscMalloc1(cn+1,&ci);CHKERRQ(ierr);
29537ba1a0bfSKris Buschelman   ci[0] = 0;
29547ba1a0bfSKris Buschelman 
29557ba1a0bfSKris Buschelman   /* Work arrays for rows of P^T*A */
2956dcca6d9dSJed Brown   ierr = PetscMalloc4(an,&ptadenserow,an,&ptasparserow,cn,&denserow,cn,&sparserow);CHKERRQ(ierr);
2957580bdb30SBarry Smith   ierr = PetscArrayzero(ptadenserow,an);CHKERRQ(ierr);
2958580bdb30SBarry Smith   ierr = PetscArrayzero(denserow,cn);CHKERRQ(ierr);
29597ba1a0bfSKris Buschelman 
29607ba1a0bfSKris Buschelman   /* Set initial free space to be nnz(A) scaled by aspect ratio of P. */
29617ba1a0bfSKris Buschelman   /* This should be reasonable if sparsity of PtAP is similar to that of A. */
29627ba1a0bfSKris Buschelman   /* Note, aspect ratio of P is the same as the aspect ratio of SeqAIJ inside P */
2963f91af8c7SBarry Smith   ierr          = PetscFreeSpaceGet(PetscIntMultTruncate(ai[am]/pm,pn),&free_space);CHKERRQ(ierr);
29647ba1a0bfSKris Buschelman   current_space = free_space;
29657ba1a0bfSKris Buschelman 
29667ba1a0bfSKris Buschelman   /* Determine symbolic info for each row of C: */
29677ba1a0bfSKris Buschelman   for (i=0; i<pn; i++) {
29687ba1a0bfSKris Buschelman     ptnzi = pti[i+1] - pti[i];
29697ba1a0bfSKris Buschelman     ptJ   = ptj + pti[i];
29707ba1a0bfSKris Buschelman     for (dof=0; dof<ppdof; dof++) {
29717ba1a0bfSKris Buschelman       ptanzi = 0;
29727ba1a0bfSKris Buschelman       /* Determine symbolic row of PtA: */
29737ba1a0bfSKris Buschelman       for (j=0; j<ptnzi; j++) {
29747ba1a0bfSKris Buschelman         /* Expand ptJ[j] by block size and shift by dof to get the right row of A */
29757ba1a0bfSKris Buschelman         arow = ptJ[j]*ppdof + dof;
29767ba1a0bfSKris Buschelman         /* Nonzeros of P^T*A will be in same locations as any element of A in that row */
29777ba1a0bfSKris Buschelman         anzj = ai[arow+1] - ai[arow];
29787ba1a0bfSKris Buschelman         ajj  = aj + ai[arow];
29797ba1a0bfSKris Buschelman         for (k=0; k<anzj; k++) {
29807ba1a0bfSKris Buschelman           if (!ptadenserow[ajj[k]]) {
29817ba1a0bfSKris Buschelman             ptadenserow[ajj[k]]    = -1;
29827ba1a0bfSKris Buschelman             ptasparserow[ptanzi++] = ajj[k];
29837ba1a0bfSKris Buschelman           }
29847ba1a0bfSKris Buschelman         }
29857ba1a0bfSKris Buschelman       }
29867ba1a0bfSKris Buschelman       /* Using symbolic info for row of PtA, determine symbolic info for row of C: */
29877ba1a0bfSKris Buschelman       ptaj = ptasparserow;
29887ba1a0bfSKris Buschelman       cnzi = 0;
29897ba1a0bfSKris Buschelman       for (j=0; j<ptanzi; j++) {
29907ba1a0bfSKris Buschelman         /* Get offset within block of P */
29917ba1a0bfSKris Buschelman         pshift = *ptaj%ppdof;
29927ba1a0bfSKris Buschelman         /* Get block row of P */
29937ba1a0bfSKris Buschelman         prow = (*ptaj++)/ppdof; /* integer division */
29947ba1a0bfSKris Buschelman         /* P has same number of nonzeros per row as the compressed form */
29957ba1a0bfSKris Buschelman         pnzj = pi[prow+1] - pi[prow];
29967ba1a0bfSKris Buschelman         pjj  = pj + pi[prow];
29977ba1a0bfSKris Buschelman         for (k=0;k<pnzj;k++) {
29987ba1a0bfSKris Buschelman           /* Locations in C are shifted by the offset within the block */
29997ba1a0bfSKris Buschelman           /* Note: we cannot use PetscLLAdd here because of the additional offset for the write location */
30007ba1a0bfSKris Buschelman           if (!denserow[pjj[k]*ppdof+pshift]) {
30017ba1a0bfSKris Buschelman             denserow[pjj[k]*ppdof+pshift] = -1;
30027ba1a0bfSKris Buschelman             sparserow[cnzi++]             = pjj[k]*ppdof+pshift;
30037ba1a0bfSKris Buschelman           }
30047ba1a0bfSKris Buschelman         }
30057ba1a0bfSKris Buschelman       }
30067ba1a0bfSKris Buschelman 
30077ba1a0bfSKris Buschelman       /* sort sparserow */
30087ba1a0bfSKris Buschelman       ierr = PetscSortInt(cnzi,sparserow);CHKERRQ(ierr);
30097ba1a0bfSKris Buschelman 
30107ba1a0bfSKris Buschelman       /* If free space is not available, make more free space */
30117ba1a0bfSKris Buschelman       /* Double the amount of total space in the list */
30127ba1a0bfSKris Buschelman       if (current_space->local_remaining<cnzi) {
3013f91af8c7SBarry Smith         ierr = PetscFreeSpaceGet(PetscIntSumTruncate(cnzi,current_space->total_array_size),&current_space);CHKERRQ(ierr);
30147ba1a0bfSKris Buschelman       }
30157ba1a0bfSKris Buschelman 
30167ba1a0bfSKris Buschelman       /* Copy data into free space, and zero out denserows */
3017580bdb30SBarry Smith       ierr = PetscArraycpy(current_space->array,sparserow,cnzi);CHKERRQ(ierr);
301826fbe8dcSKarl Rupp 
30197ba1a0bfSKris Buschelman       current_space->array           += cnzi;
30207ba1a0bfSKris Buschelman       current_space->local_used      += cnzi;
30217ba1a0bfSKris Buschelman       current_space->local_remaining -= cnzi;
30227ba1a0bfSKris Buschelman 
302326fbe8dcSKarl Rupp       for (j=0; j<ptanzi; j++) ptadenserow[ptasparserow[j]] = 0;
302426fbe8dcSKarl Rupp       for (j=0; j<cnzi; j++) denserow[sparserow[j]] = 0;
302526fbe8dcSKarl Rupp 
30267ba1a0bfSKris Buschelman       /* Aside: Perhaps we should save the pta info for the numerical factorization. */
30277ba1a0bfSKris Buschelman       /*        For now, we will recompute what is needed. */
30287ba1a0bfSKris Buschelman       ci[i*ppdof+1+dof] = ci[i*ppdof+dof] + cnzi;
30297ba1a0bfSKris Buschelman     }
30307ba1a0bfSKris Buschelman   }
30317ba1a0bfSKris Buschelman   /* nnz is now stored in ci[ptm], column indices are in the list of free space */
30327ba1a0bfSKris Buschelman   /* Allocate space for cj, initialize cj, and */
30337ba1a0bfSKris Buschelman   /* destroy list of free space and other temporary array(s) */
3034854ce69bSBarry Smith   ierr = PetscMalloc1(ci[cn]+1,&cj);CHKERRQ(ierr);
3035a1a86e44SBarry Smith   ierr = PetscFreeSpaceContiguous(&free_space,cj);CHKERRQ(ierr);
303674ed9c26SBarry Smith   ierr = PetscFree4(ptadenserow,ptasparserow,denserow,sparserow);CHKERRQ(ierr);
30377ba1a0bfSKris Buschelman 
30387ba1a0bfSKris Buschelman   /* Allocate space for ca */
3039854ce69bSBarry Smith   ierr = PetscCalloc1(ci[cn]+1,&ca);CHKERRQ(ierr);
30407ba1a0bfSKris Buschelman 
30417ba1a0bfSKris Buschelman   /* put together the new matrix */
3042e4e71118SStefano Zampini   ierr = MatSetSeqAIJWithArrays_private(PetscObjectComm((PetscObject)A),cn,cn,ci,cj,ca,NULL,C);CHKERRQ(ierr);
30434222ddf1SHong Zhang   ierr = MatSetBlockSize(C,pp->dof);CHKERRQ(ierr);
30447ba1a0bfSKris Buschelman 
30457ba1a0bfSKris Buschelman   /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
30467ba1a0bfSKris Buschelman   /* Since these are PETSc arrays, change flags to free them as necessary. */
30474222ddf1SHong Zhang   c          = (Mat_SeqAIJ*)(C->data);
3048e6b907acSBarry Smith   c->free_a  = PETSC_TRUE;
3049e6b907acSBarry Smith   c->free_ij = PETSC_TRUE;
30507ba1a0bfSKris Buschelman   c->nonew   = 0;
305126fbe8dcSKarl Rupp 
30524222ddf1SHong Zhang   C->ops->ptapnumeric    = MatPtAPNumeric_SeqAIJ_SeqMAIJ;
30534222ddf1SHong Zhang   C->ops->productnumeric = MatProductNumeric_PtAP;
30547ba1a0bfSKris Buschelman 
30557ba1a0bfSKris Buschelman   /* Clean up. */
30567ba1a0bfSKris Buschelman   ierr = MatRestoreSymbolicTranspose_SeqAIJ(P,&pti,&ptj);CHKERRQ(ierr);
30577ba1a0bfSKris Buschelman   PetscFunctionReturn(0);
30587ba1a0bfSKris Buschelman }
30597ba1a0bfSKris Buschelman 
30607ba1a0bfSKris Buschelman PetscErrorCode MatPtAPNumeric_SeqAIJ_SeqMAIJ(Mat A,Mat PP,Mat C)
30617ba1a0bfSKris Buschelman {
30627ba1a0bfSKris Buschelman   /* This routine requires testing -- first draft only */
30637ba1a0bfSKris Buschelman   PetscErrorCode  ierr;
30647ba1a0bfSKris Buschelman   Mat_SeqMAIJ     *pp=(Mat_SeqMAIJ*)PP->data;
30657ba1a0bfSKris Buschelman   Mat             P  =pp->AIJ;
30667ba1a0bfSKris Buschelman   Mat_SeqAIJ      *a = (Mat_SeqAIJ*) A->data;
30677ba1a0bfSKris Buschelman   Mat_SeqAIJ      *p = (Mat_SeqAIJ*) P->data;
30687ba1a0bfSKris Buschelman   Mat_SeqAIJ      *c = (Mat_SeqAIJ*) C->data;
3069a2ea699eSBarry Smith   const PetscInt  *ai=a->i,*aj=a->j,*pi=p->i,*pj=p->j,*pJ,*pjj;
3070d2840e09SBarry Smith   const PetscInt  *ci=c->i,*cj=c->j,*cjj;
3071d2840e09SBarry Smith   const PetscInt  am =A->rmap->N,cn=C->cmap->N,cm=C->rmap->N,ppdof=pp->dof;
3072d2840e09SBarry Smith   PetscInt        i,j,k,pshift,poffset,anzi,pnzi,apnzj,nextap,pnzj,prow,crow,*apj,*apjdense;
3073a2ea699eSBarry Smith   const MatScalar *aa=a->a,*pa=p->a,*pA,*paj;
3074d2840e09SBarry Smith   MatScalar       *ca=c->a,*caj,*apa;
30757ba1a0bfSKris Buschelman 
30767ba1a0bfSKris Buschelman   PetscFunctionBegin;
30777ba1a0bfSKris Buschelman   /* Allocate temporary array for storage of one row of A*P */
30781795a4d1SJed Brown   ierr = PetscCalloc3(cn,&apa,cn,&apj,cn,&apjdense);CHKERRQ(ierr);
30797ba1a0bfSKris Buschelman 
30807ba1a0bfSKris Buschelman   /* Clear old values in C */
3081580bdb30SBarry Smith   ierr = PetscArrayzero(ca,ci[cm]);CHKERRQ(ierr);
30827ba1a0bfSKris Buschelman 
30837ba1a0bfSKris Buschelman   for (i=0; i<am; i++) {
30847ba1a0bfSKris Buschelman     /* Form sparse row of A*P */
30857ba1a0bfSKris Buschelman     anzi  = ai[i+1] - ai[i];
30867ba1a0bfSKris Buschelman     apnzj = 0;
30877ba1a0bfSKris Buschelman     for (j=0; j<anzi; j++) {
30887ba1a0bfSKris Buschelman       /* Get offset within block of P */
30897ba1a0bfSKris Buschelman       pshift = *aj%ppdof;
30907ba1a0bfSKris Buschelman       /* Get block row of P */
30917ba1a0bfSKris Buschelman       prow = *aj++/ppdof;   /* integer division */
30927ba1a0bfSKris Buschelman       pnzj = pi[prow+1] - pi[prow];
30937ba1a0bfSKris Buschelman       pjj  = pj + pi[prow];
30947ba1a0bfSKris Buschelman       paj  = pa + pi[prow];
30957ba1a0bfSKris Buschelman       for (k=0; k<pnzj; k++) {
30967ba1a0bfSKris Buschelman         poffset = pjj[k]*ppdof+pshift;
30977ba1a0bfSKris Buschelman         if (!apjdense[poffset]) {
30987ba1a0bfSKris Buschelman           apjdense[poffset] = -1;
30997ba1a0bfSKris Buschelman           apj[apnzj++]      = poffset;
31007ba1a0bfSKris Buschelman         }
31017ba1a0bfSKris Buschelman         apa[poffset] += (*aa)*paj[k];
31027ba1a0bfSKris Buschelman       }
3103dc0b31edSSatish Balay       ierr = PetscLogFlops(2.0*pnzj);CHKERRQ(ierr);
31047ba1a0bfSKris Buschelman       aa++;
31057ba1a0bfSKris Buschelman     }
31067ba1a0bfSKris Buschelman 
31077ba1a0bfSKris Buschelman     /* Sort the j index array for quick sparse axpy. */
31087ba1a0bfSKris Buschelman     /* Note: a array does not need sorting as it is in dense storage locations. */
31097ba1a0bfSKris Buschelman     ierr = PetscSortInt(apnzj,apj);CHKERRQ(ierr);
31107ba1a0bfSKris Buschelman 
31117ba1a0bfSKris Buschelman     /* Compute P^T*A*P using outer product (P^T)[:,j]*(A*P)[j,:]. */
31127ba1a0bfSKris Buschelman     prow    = i/ppdof; /* integer division */
31137ba1a0bfSKris Buschelman     pshift  = i%ppdof;
31147ba1a0bfSKris Buschelman     poffset = pi[prow];
31157ba1a0bfSKris Buschelman     pnzi    = pi[prow+1] - poffset;
31167ba1a0bfSKris Buschelman     /* Reset pJ and pA so we can traverse the same row of P 'dof' times. */
31177ba1a0bfSKris Buschelman     pJ = pj+poffset;
31187ba1a0bfSKris Buschelman     pA = pa+poffset;
31197ba1a0bfSKris Buschelman     for (j=0; j<pnzi; j++) {
31207ba1a0bfSKris Buschelman       crow = (*pJ)*ppdof+pshift;
31217ba1a0bfSKris Buschelman       cjj  = cj + ci[crow];
31227ba1a0bfSKris Buschelman       caj  = ca + ci[crow];
31237ba1a0bfSKris Buschelman       pJ++;
31247ba1a0bfSKris Buschelman       /* Perform sparse axpy operation.  Note cjj includes apj. */
31257ba1a0bfSKris Buschelman       for (k=0,nextap=0; nextap<apnzj; k++) {
312626fbe8dcSKarl Rupp         if (cjj[k] == apj[nextap]) caj[k] += (*pA)*apa[apj[nextap++]];
31277ba1a0bfSKris Buschelman       }
3128dc0b31edSSatish Balay       ierr = PetscLogFlops(2.0*apnzj);CHKERRQ(ierr);
31297ba1a0bfSKris Buschelman       pA++;
31307ba1a0bfSKris Buschelman     }
31317ba1a0bfSKris Buschelman 
31327ba1a0bfSKris Buschelman     /* Zero the current row info for A*P */
31337ba1a0bfSKris Buschelman     for (j=0; j<apnzj; j++) {
31347ba1a0bfSKris Buschelman       apa[apj[j]]      = 0.;
31357ba1a0bfSKris Buschelman       apjdense[apj[j]] = 0;
31367ba1a0bfSKris Buschelman     }
31377ba1a0bfSKris Buschelman   }
31387ba1a0bfSKris Buschelman 
31397ba1a0bfSKris Buschelman   /* Assemble the final matrix and clean up */
31407ba1a0bfSKris Buschelman   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
31417ba1a0bfSKris Buschelman   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
314274ed9c26SBarry Smith   ierr = PetscFree3(apa,apj,apjdense);CHKERRQ(ierr);
314395ddefa2SHong Zhang   PetscFunctionReturn(0);
314495ddefa2SHong Zhang }
31457ba1a0bfSKris Buschelman 
31464222ddf1SHong Zhang PETSC_INTERN PetscErrorCode MatProductSymbolic_PtAP_SeqAIJ_SeqMAIJ(Mat C)
31472121bac1SHong Zhang {
31482121bac1SHong Zhang   PetscErrorCode      ierr;
31494222ddf1SHong Zhang   Mat_Product         *product = C->product;
31504222ddf1SHong Zhang   Mat                 A=product->A,P=product->B;
31512121bac1SHong Zhang 
31522121bac1SHong Zhang   PetscFunctionBegin;
31534222ddf1SHong Zhang   ierr = MatPtAPSymbolic_SeqAIJ_SeqMAIJ(A,P,product->fill,C);CHKERRQ(ierr);
31542121bac1SHong Zhang   PetscFunctionReturn(0);
31552121bac1SHong Zhang }
31562121bac1SHong Zhang 
3157f7a08781SBarry Smith PetscErrorCode MatPtAPSymbolic_MPIAIJ_MPIMAIJ(Mat A,Mat PP,PetscReal fill,Mat *C)
315895ddefa2SHong Zhang {
315995ddefa2SHong Zhang   PetscFunctionBegin;
31603e0c911fSBarry Smith   SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"MatPtAPSymbolic is not implemented for MPIMAIJ matrix yet");
316195ddefa2SHong Zhang   PetscFunctionReturn(0);
316295ddefa2SHong Zhang }
316395ddefa2SHong Zhang 
3164f7a08781SBarry Smith PetscErrorCode MatPtAPNumeric_MPIAIJ_MPIMAIJ(Mat A,Mat PP,Mat C)
316595ddefa2SHong Zhang {
316695ddefa2SHong Zhang   PetscFunctionBegin;
31673e0c911fSBarry Smith   SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"MatPtAPNumeric is not implemented for MPIMAIJ matrix yet");
31687ba1a0bfSKris Buschelman   PetscFunctionReturn(0);
31697ba1a0bfSKris Buschelman }
31705c65b9ecSFande Kong 
3171bc8e477aSFande Kong PetscErrorCode MatPtAPNumeric_MPIAIJ_MPIXAIJ_allatonce(Mat,Mat,PetscInt,Mat);
3172bc8e477aSFande Kong 
3173bc8e477aSFande Kong PETSC_INTERN PetscErrorCode MatPtAPNumeric_MPIAIJ_MPIMAIJ_allatonce(Mat A,Mat P,Mat C)
3174bc8e477aSFande Kong {
3175bc8e477aSFande Kong   Mat_MPIMAIJ     *maij = (Mat_MPIMAIJ*)P->data;
3176bc8e477aSFande Kong   PetscErrorCode  ierr;
3177bc8e477aSFande Kong 
3178bc8e477aSFande Kong   PetscFunctionBegin;
317934bcad68SFande Kong 
3180bc8e477aSFande Kong   ierr = MatPtAPNumeric_MPIAIJ_MPIXAIJ_allatonce(A,maij->A,maij->dof,C);CHKERRQ(ierr);
3181bc8e477aSFande Kong   PetscFunctionReturn(0);
3182bc8e477aSFande Kong }
3183bc8e477aSFande Kong 
31844222ddf1SHong Zhang PetscErrorCode MatPtAPSymbolic_MPIAIJ_MPIXAIJ_allatonce(Mat,Mat,PetscInt,PetscReal,Mat);
3185bc8e477aSFande Kong 
31864222ddf1SHong Zhang PETSC_INTERN PetscErrorCode MatPtAPSymbolic_MPIAIJ_MPIMAIJ_allatonce(Mat A,Mat P,PetscReal fill,Mat C)
3187bc8e477aSFande Kong {
3188bc8e477aSFande Kong   Mat_MPIMAIJ     *maij = (Mat_MPIMAIJ*)P->data;
3189bc8e477aSFande Kong   PetscErrorCode  ierr;
3190bc8e477aSFande Kong 
3191bc8e477aSFande Kong   PetscFunctionBegin;
3192bc8e477aSFande Kong   ierr = MatPtAPSymbolic_MPIAIJ_MPIXAIJ_allatonce(A,maij->A,maij->dof,fill,C);CHKERRQ(ierr);
31934222ddf1SHong Zhang   C->ops->ptapnumeric = MatPtAPNumeric_MPIAIJ_MPIMAIJ_allatonce;
3194bc8e477aSFande Kong   PetscFunctionReturn(0);
3195bc8e477aSFande Kong }
3196bc8e477aSFande Kong 
3197bc8e477aSFande Kong PetscErrorCode MatPtAPNumeric_MPIAIJ_MPIXAIJ_allatonce_merged(Mat,Mat,PetscInt,Mat);
3198bc8e477aSFande Kong 
3199bc8e477aSFande Kong PETSC_INTERN PetscErrorCode MatPtAPNumeric_MPIAIJ_MPIMAIJ_allatonce_merged(Mat A,Mat P,Mat C)
3200bc8e477aSFande Kong {
3201bc8e477aSFande Kong   Mat_MPIMAIJ     *maij = (Mat_MPIMAIJ*)P->data;
3202bc8e477aSFande Kong   PetscErrorCode  ierr;
3203bc8e477aSFande Kong 
3204bc8e477aSFande Kong   PetscFunctionBegin;
320534bcad68SFande Kong 
3206bc8e477aSFande Kong   ierr = MatPtAPNumeric_MPIAIJ_MPIXAIJ_allatonce_merged(A,maij->A,maij->dof,C);CHKERRQ(ierr);
3207bc8e477aSFande Kong   PetscFunctionReturn(0);
3208bc8e477aSFande Kong }
3209bc8e477aSFande Kong 
32104222ddf1SHong Zhang PetscErrorCode MatPtAPSymbolic_MPIAIJ_MPIXAIJ_allatonce_merged(Mat,Mat,PetscInt,PetscReal,Mat);
3211bc8e477aSFande Kong 
32124222ddf1SHong Zhang PETSC_INTERN PetscErrorCode MatPtAPSymbolic_MPIAIJ_MPIMAIJ_allatonce_merged(Mat A,Mat P,PetscReal fill,Mat C)
3213bc8e477aSFande Kong {
3214bc8e477aSFande Kong   Mat_MPIMAIJ     *maij = (Mat_MPIMAIJ*)P->data;
3215bc8e477aSFande Kong   PetscErrorCode  ierr;
3216bc8e477aSFande Kong 
3217bc8e477aSFande Kong   PetscFunctionBegin;
321834bcad68SFande Kong 
3219bc8e477aSFande Kong   ierr = MatPtAPSymbolic_MPIAIJ_MPIXAIJ_allatonce_merged(A,maij->A,maij->dof,fill,C);CHKERRQ(ierr);
32204222ddf1SHong Zhang   C->ops->ptapnumeric = MatPtAPNumeric_MPIAIJ_MPIMAIJ_allatonce_merged;
3221bc8e477aSFande Kong   PetscFunctionReturn(0);
3222bc8e477aSFande Kong }
3223bc8e477aSFande Kong 
32244222ddf1SHong Zhang PETSC_INTERN PetscErrorCode MatProductSymbolic_PtAP_MPIAIJ_MPIMAIJ(Mat C)
3225bc8e477aSFande Kong {
3226bc8e477aSFande Kong   PetscErrorCode      ierr;
32274222ddf1SHong Zhang   Mat_Product         *product = C->product;
32284222ddf1SHong Zhang   Mat                 A=product->A,P=product->B;
32294222ddf1SHong Zhang   PetscBool           flg;
3230bc8e477aSFande Kong 
3231bc8e477aSFande Kong   PetscFunctionBegin;
32324222ddf1SHong Zhang   ierr = PetscStrcmp(product->alg,"allatonce",&flg);CHKERRQ(ierr);
32334222ddf1SHong Zhang   if (flg) {
32344222ddf1SHong Zhang     ierr = MatPtAPSymbolic_MPIAIJ_MPIMAIJ_allatonce(A,P,product->fill,C);CHKERRQ(ierr);
32354222ddf1SHong Zhang     C->ops->productnumeric = MatProductNumeric_PtAP;
32364222ddf1SHong Zhang     PetscFunctionReturn(0);
3237bc8e477aSFande Kong   }
323834bcad68SFande Kong 
32394222ddf1SHong Zhang   ierr = PetscStrcmp(product->alg,"allatonce_merged",&flg);CHKERRQ(ierr);
32404222ddf1SHong Zhang   if (flg) {
32414222ddf1SHong Zhang     ierr = MatPtAPSymbolic_MPIAIJ_MPIMAIJ_allatonce_merged(A,P,product->fill,C);CHKERRQ(ierr);
32424222ddf1SHong Zhang     C->ops->productnumeric = MatProductNumeric_PtAP;
32434222ddf1SHong Zhang     PetscFunctionReturn(0);
32444222ddf1SHong Zhang   }
324534bcad68SFande Kong 
32464222ddf1SHong Zhang   SETERRQ(PetscObjectComm((PetscObject)C),PETSC_ERR_SUP,"Mat Product Algorithm is not supported");
3247bc8e477aSFande Kong   PetscFunctionReturn(0);
3248bc8e477aSFande Kong }
3249bc8e477aSFande Kong 
3250cc2e6a90SBarry Smith PETSC_INTERN PetscErrorCode MatConvert_SeqMAIJ_SeqAIJ(Mat A, MatType newtype,MatReuse reuse,Mat *newmat)
32519c4fc4c7SBarry Smith {
32529c4fc4c7SBarry Smith   Mat_SeqMAIJ    *b   = (Mat_SeqMAIJ*)A->data;
3253ceb03754SKris Buschelman   Mat            a    = b->AIJ,B;
32549c4fc4c7SBarry Smith   Mat_SeqAIJ     *aij = (Mat_SeqAIJ*)a->data;
32559c4fc4c7SBarry Smith   PetscErrorCode ierr;
32560fd73130SBarry Smith   PetscInt       m,n,i,ncols,*ilen,nmax = 0,*icols,j,k,ii,dof = b->dof;
3257ba8c8a56SBarry Smith   PetscInt       *cols;
3258ba8c8a56SBarry Smith   PetscScalar    *vals;
32599c4fc4c7SBarry Smith 
32609c4fc4c7SBarry Smith   PetscFunctionBegin;
32619c4fc4c7SBarry Smith   ierr = MatGetSize(a,&m,&n);CHKERRQ(ierr);
3262785e854fSJed Brown   ierr = PetscMalloc1(dof*m,&ilen);CHKERRQ(ierr);
32639c4fc4c7SBarry Smith   for (i=0; i<m; i++) {
32649c4fc4c7SBarry Smith     nmax = PetscMax(nmax,aij->ilen[i]);
326526fbe8dcSKarl Rupp     for (j=0; j<dof; j++) ilen[dof*i+j] = aij->ilen[i];
32669c4fc4c7SBarry Smith   }
3267fdc842d1SBarry Smith   ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr);
3268fdc842d1SBarry Smith   ierr = MatSetSizes(B,dof*m,dof*n,dof*m,dof*n);CHKERRQ(ierr);
3269fdc842d1SBarry Smith   ierr = MatSetType(B,newtype);CHKERRQ(ierr);
3270fdc842d1SBarry Smith   ierr = MatSeqAIJSetPreallocation(B,0,ilen);CHKERRQ(ierr);
32719c4fc4c7SBarry Smith   ierr = PetscFree(ilen);CHKERRQ(ierr);
3272785e854fSJed Brown   ierr = PetscMalloc1(nmax,&icols);CHKERRQ(ierr);
32739c4fc4c7SBarry Smith   ii   = 0;
32749c4fc4c7SBarry Smith   for (i=0; i<m; i++) {
3275ba8c8a56SBarry Smith     ierr = MatGetRow_SeqAIJ(a,i,&ncols,&cols,&vals);CHKERRQ(ierr);
32760fd73130SBarry Smith     for (j=0; j<dof; j++) {
327726fbe8dcSKarl Rupp       for (k=0; k<ncols; k++) icols[k] = dof*cols[k]+j;
3278ceb03754SKris Buschelman       ierr = MatSetValues_SeqAIJ(B,1,&ii,ncols,icols,vals,INSERT_VALUES);CHKERRQ(ierr);
32799c4fc4c7SBarry Smith       ii++;
32809c4fc4c7SBarry Smith     }
3281ba8c8a56SBarry Smith     ierr = MatRestoreRow_SeqAIJ(a,i,&ncols,&cols,&vals);CHKERRQ(ierr);
32829c4fc4c7SBarry Smith   }
32839c4fc4c7SBarry Smith   ierr = PetscFree(icols);CHKERRQ(ierr);
3284ceb03754SKris Buschelman   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3285ceb03754SKris Buschelman   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3286ceb03754SKris Buschelman 
3287511c6705SHong Zhang   if (reuse == MAT_INPLACE_MATRIX) {
328828be2f97SBarry Smith     ierr = MatHeaderReplace(A,&B);CHKERRQ(ierr);
3289c3d102feSKris Buschelman   } else {
3290ceb03754SKris Buschelman     *newmat = B;
3291c3d102feSKris Buschelman   }
32929c4fc4c7SBarry Smith   PetscFunctionReturn(0);
32939c4fc4c7SBarry Smith }
32949c4fc4c7SBarry Smith 
3295c6db04a5SJed Brown #include <../src/mat/impls/aij/mpi/mpiaij.h>
3296be1d678aSKris Buschelman 
3297cc2e6a90SBarry Smith PETSC_INTERN PetscErrorCode MatConvert_MPIMAIJ_MPIAIJ(Mat A, MatType newtype,MatReuse reuse,Mat *newmat)
32980fd73130SBarry Smith {
32990fd73130SBarry Smith   Mat_MPIMAIJ    *maij   = (Mat_MPIMAIJ*)A->data;
3300ceb03754SKris Buschelman   Mat            MatAIJ  = ((Mat_SeqMAIJ*)maij->AIJ->data)->AIJ,B;
33010fd73130SBarry Smith   Mat            MatOAIJ = ((Mat_SeqMAIJ*)maij->OAIJ->data)->AIJ;
33020fd73130SBarry Smith   Mat_SeqAIJ     *AIJ    = (Mat_SeqAIJ*) MatAIJ->data;
33030fd73130SBarry Smith   Mat_SeqAIJ     *OAIJ   =(Mat_SeqAIJ*) MatOAIJ->data;
33040fd73130SBarry Smith   Mat_MPIAIJ     *mpiaij = (Mat_MPIAIJ*) maij->A->data;
33050298fd71SBarry Smith   PetscInt       dof     = maij->dof,i,j,*dnz = NULL,*onz = NULL,nmax = 0,onmax = 0;
33060298fd71SBarry Smith   PetscInt       *oicols = NULL,*icols = NULL,ncols,*cols = NULL,oncols,*ocols = NULL;
33070fd73130SBarry Smith   PetscInt       rstart,cstart,*garray,ii,k;
33080fd73130SBarry Smith   PetscErrorCode ierr;
33090fd73130SBarry Smith   PetscScalar    *vals,*ovals;
33100fd73130SBarry Smith 
33110fd73130SBarry Smith   PetscFunctionBegin;
3312dcca6d9dSJed Brown   ierr = PetscMalloc2(A->rmap->n,&dnz,A->rmap->n,&onz);CHKERRQ(ierr);
3313d0f46423SBarry Smith   for (i=0; i<A->rmap->n/dof; i++) {
33140fd73130SBarry Smith     nmax  = PetscMax(nmax,AIJ->ilen[i]);
33150fd73130SBarry Smith     onmax = PetscMax(onmax,OAIJ->ilen[i]);
33160fd73130SBarry Smith     for (j=0; j<dof; j++) {
33170fd73130SBarry Smith       dnz[dof*i+j] = AIJ->ilen[i];
33180fd73130SBarry Smith       onz[dof*i+j] = OAIJ->ilen[i];
33190fd73130SBarry Smith     }
33200fd73130SBarry Smith   }
3321fdc842d1SBarry Smith   ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr);
3322fdc842d1SBarry Smith   ierr = MatSetSizes(B,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr);
3323fdc842d1SBarry Smith   ierr = MatSetType(B,newtype);CHKERRQ(ierr);
3324fdc842d1SBarry Smith   ierr = MatMPIAIJSetPreallocation(B,0,dnz,0,onz);CHKERRQ(ierr);
3325f27682edSJed Brown   ierr = MatSetBlockSize(B,dof);CHKERRQ(ierr);
33260fd73130SBarry Smith   ierr = PetscFree2(dnz,onz);CHKERRQ(ierr);
33270fd73130SBarry Smith 
3328dcca6d9dSJed Brown   ierr   = PetscMalloc2(nmax,&icols,onmax,&oicols);CHKERRQ(ierr);
3329d0f46423SBarry Smith   rstart = dof*maij->A->rmap->rstart;
3330d0f46423SBarry Smith   cstart = dof*maij->A->cmap->rstart;
33310fd73130SBarry Smith   garray = mpiaij->garray;
33320fd73130SBarry Smith 
33330fd73130SBarry Smith   ii = rstart;
3334d0f46423SBarry Smith   for (i=0; i<A->rmap->n/dof; i++) {
33350fd73130SBarry Smith     ierr = MatGetRow_SeqAIJ(MatAIJ,i,&ncols,&cols,&vals);CHKERRQ(ierr);
33360fd73130SBarry Smith     ierr = MatGetRow_SeqAIJ(MatOAIJ,i,&oncols,&ocols,&ovals);CHKERRQ(ierr);
33370fd73130SBarry Smith     for (j=0; j<dof; j++) {
33380fd73130SBarry Smith       for (k=0; k<ncols; k++) {
33390fd73130SBarry Smith         icols[k] = cstart + dof*cols[k]+j;
33400fd73130SBarry Smith       }
33410fd73130SBarry Smith       for (k=0; k<oncols; k++) {
33420fd73130SBarry Smith         oicols[k] = dof*garray[ocols[k]]+j;
33430fd73130SBarry Smith       }
3344ceb03754SKris Buschelman       ierr = MatSetValues_MPIAIJ(B,1,&ii,ncols,icols,vals,INSERT_VALUES);CHKERRQ(ierr);
3345ceb03754SKris Buschelman       ierr = MatSetValues_MPIAIJ(B,1,&ii,oncols,oicols,ovals,INSERT_VALUES);CHKERRQ(ierr);
33460fd73130SBarry Smith       ii++;
33470fd73130SBarry Smith     }
33480fd73130SBarry Smith     ierr = MatRestoreRow_SeqAIJ(MatAIJ,i,&ncols,&cols,&vals);CHKERRQ(ierr);
33490fd73130SBarry Smith     ierr = MatRestoreRow_SeqAIJ(MatOAIJ,i,&oncols,&ocols,&ovals);CHKERRQ(ierr);
33500fd73130SBarry Smith   }
33510fd73130SBarry Smith   ierr = PetscFree2(icols,oicols);CHKERRQ(ierr);
33520fd73130SBarry Smith 
3353ceb03754SKris Buschelman   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3354ceb03754SKris Buschelman   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3355ceb03754SKris Buschelman 
3356511c6705SHong Zhang   if (reuse == MAT_INPLACE_MATRIX) {
33577adad957SLisandro Dalcin     PetscInt refct = ((PetscObject)A)->refct; /* save ((PetscObject)A)->refct */
33587adad957SLisandro Dalcin     ((PetscObject)A)->refct = 1;
335926fbe8dcSKarl Rupp 
336028be2f97SBarry Smith     ierr = MatHeaderReplace(A,&B);CHKERRQ(ierr);
336126fbe8dcSKarl Rupp 
33627adad957SLisandro Dalcin     ((PetscObject)A)->refct = refct; /* restore ((PetscObject)A)->refct */
3363c3d102feSKris Buschelman   } else {
3364ceb03754SKris Buschelman     *newmat = B;
3365c3d102feSKris Buschelman   }
33660fd73130SBarry Smith   PetscFunctionReturn(0);
33670fd73130SBarry Smith }
33680fd73130SBarry Smith 
33697dae84e0SHong Zhang PetscErrorCode MatCreateSubMatrix_MAIJ(Mat mat,IS isrow,IS iscol,MatReuse cll,Mat *newmat)
33709e516c8fSBarry Smith {
33719e516c8fSBarry Smith   PetscErrorCode ierr;
33729e516c8fSBarry Smith   Mat            A;
33739e516c8fSBarry Smith 
33749e516c8fSBarry Smith   PetscFunctionBegin;
33759e516c8fSBarry Smith   ierr = MatConvert(mat,MATAIJ,MAT_INITIAL_MATRIX,&A);CHKERRQ(ierr);
33767dae84e0SHong Zhang   ierr = MatCreateSubMatrix(A,isrow,iscol,cll,newmat);CHKERRQ(ierr);
33779e516c8fSBarry Smith   ierr = MatDestroy(&A);CHKERRQ(ierr);
33789e516c8fSBarry Smith   PetscFunctionReturn(0);
33799e516c8fSBarry Smith }
33800fd73130SBarry Smith 
3381ec626240SStefano Zampini PetscErrorCode MatCreateSubMatrices_MAIJ(Mat mat,PetscInt n,const IS irow[],const IS icol[],MatReuse scall,Mat *submat[])
3382ec626240SStefano Zampini {
3383ec626240SStefano Zampini   PetscErrorCode ierr;
3384ec626240SStefano Zampini   Mat            A;
3385ec626240SStefano Zampini 
3386ec626240SStefano Zampini   PetscFunctionBegin;
3387ec626240SStefano Zampini   ierr = MatConvert(mat,MATAIJ,MAT_INITIAL_MATRIX,&A);CHKERRQ(ierr);
3388ec626240SStefano Zampini   ierr = MatCreateSubMatrices(A,n,irow,icol,scall,submat);CHKERRQ(ierr);
3389ec626240SStefano Zampini   ierr = MatDestroy(&A);CHKERRQ(ierr);
3390ec626240SStefano Zampini   PetscFunctionReturn(0);
3391ec626240SStefano Zampini }
3392ec626240SStefano Zampini 
3393bcc973b7SBarry Smith /* ---------------------------------------------------------------------------------- */
3394480dffcfSJed Brown /*@
33950bad9183SKris Buschelman   MatCreateMAIJ - Creates a matrix type providing restriction and interpolation
33960bad9183SKris Buschelman   operations for multicomponent problems.  It interpolates each component the same
33970bad9183SKris Buschelman   way independently.  The matrix type is based on MATSEQAIJ for sequential matrices,
33980bad9183SKris Buschelman   and MATMPIAIJ for distributed matrices.
33990bad9183SKris Buschelman 
3400ff585edeSJed Brown   Collective
3401ff585edeSJed Brown 
3402ff585edeSJed Brown   Input Parameters:
3403ff585edeSJed Brown + A - the AIJ matrix describing the action on blocks
3404ff585edeSJed Brown - dof - the block size (number of components per node)
3405ff585edeSJed Brown 
3406ff585edeSJed Brown   Output Parameter:
3407ff585edeSJed Brown . maij - the new MAIJ matrix
3408ff585edeSJed Brown 
34090bad9183SKris Buschelman   Operations provided:
34100fd73130SBarry Smith + MatMult
34110bad9183SKris Buschelman . MatMultTranspose
34120bad9183SKris Buschelman . MatMultAdd
34130bad9183SKris Buschelman . MatMultTransposeAdd
34140fd73130SBarry Smith - MatView
34150bad9183SKris Buschelman 
34160bad9183SKris Buschelman   Level: advanced
34170bad9183SKris Buschelman 
3418b409243cSBarry Smith .seealso: MatMAIJGetAIJ(), MatMAIJRedimension(), MATMAIJ
3419ff585edeSJed Brown @*/
34207087cfbeSBarry Smith PetscErrorCode  MatCreateMAIJ(Mat A,PetscInt dof,Mat *maij)
342182b1193eSBarry Smith {
3422dfbe8321SBarry Smith   PetscErrorCode ierr;
3423b24ad042SBarry Smith   PetscMPIInt    size;
3424b24ad042SBarry Smith   PetscInt       n;
342582b1193eSBarry Smith   Mat            B;
3426fdc842d1SBarry Smith #if defined(PETSC_HAVE_CUDA)
3427fdc842d1SBarry Smith   /* hack to prevent conversion to AIJ format for CUDA when used inside a parallel MAIJ */
3428fdc842d1SBarry Smith   PetscBool      convert = dof < 0 ? PETSC_FALSE : PETSC_TRUE;
3429fdc842d1SBarry Smith #endif
343082b1193eSBarry Smith 
343182b1193eSBarry Smith   PetscFunctionBegin;
3432fdc842d1SBarry Smith   dof  = PetscAbs(dof);
3433d72c5c08SBarry Smith   ierr = PetscObjectReference((PetscObject)A);CHKERRQ(ierr);
3434d72c5c08SBarry Smith 
343526fbe8dcSKarl Rupp   if (dof == 1) *maij = A;
343626fbe8dcSKarl Rupp   else {
3437ce94432eSBarry Smith     ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr);
3438*c248e2fdSStefano Zampini     /* propagate vec type */
3439*c248e2fdSStefano Zampini     ierr = MatSetVecType(B,A->defaultvectype);CHKERRQ(ierr);
3440d0f46423SBarry Smith     ierr = MatSetSizes(B,dof*A->rmap->n,dof*A->cmap->n,dof*A->rmap->N,dof*A->cmap->N);CHKERRQ(ierr);
3441bccba955SJed Brown     ierr = PetscLayoutSetBlockSize(B->rmap,dof);CHKERRQ(ierr);
3442bccba955SJed Brown     ierr = PetscLayoutSetBlockSize(B->cmap,dof);CHKERRQ(ierr);
3443bccba955SJed Brown     ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
3444bccba955SJed Brown     ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
344526fbe8dcSKarl Rupp 
3446cd3bbe55SBarry Smith     B->assembled = PETSC_TRUE;
3447d72c5c08SBarry Smith 
3448ce94432eSBarry Smith     ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRQ(ierr);
3449f4a53059SBarry Smith     if (size == 1) {
3450feb9fe23SJed Brown       Mat_SeqMAIJ *b;
3451feb9fe23SJed Brown 
3452b9b97703SBarry Smith       ierr = MatSetType(B,MATSEQMAIJ);CHKERRQ(ierr);
345326fbe8dcSKarl Rupp 
34540298fd71SBarry Smith       B->ops->setup   = NULL;
34554cb9d9b8SBarry Smith       B->ops->destroy = MatDestroy_SeqMAIJ;
34560fd73130SBarry Smith       B->ops->view    = MatView_SeqMAIJ;
34574222ddf1SHong Zhang 
3458feb9fe23SJed Brown       b               = (Mat_SeqMAIJ*)B->data;
3459b9b97703SBarry Smith       b->dof          = dof;
34604cb9d9b8SBarry Smith       b->AIJ          = A;
346126fbe8dcSKarl Rupp 
3462d72c5c08SBarry Smith       if (dof == 2) {
3463cd3bbe55SBarry Smith         B->ops->mult             = MatMult_SeqMAIJ_2;
3464cd3bbe55SBarry Smith         B->ops->multadd          = MatMultAdd_SeqMAIJ_2;
3465cd3bbe55SBarry Smith         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_2;
3466cd3bbe55SBarry Smith         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_2;
3467bcc973b7SBarry Smith       } else if (dof == 3) {
3468bcc973b7SBarry Smith         B->ops->mult             = MatMult_SeqMAIJ_3;
3469bcc973b7SBarry Smith         B->ops->multadd          = MatMultAdd_SeqMAIJ_3;
3470bcc973b7SBarry Smith         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_3;
3471bcc973b7SBarry Smith         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_3;
3472bcc973b7SBarry Smith       } else if (dof == 4) {
3473bcc973b7SBarry Smith         B->ops->mult             = MatMult_SeqMAIJ_4;
3474bcc973b7SBarry Smith         B->ops->multadd          = MatMultAdd_SeqMAIJ_4;
3475bcc973b7SBarry Smith         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_4;
3476bcc973b7SBarry Smith         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_4;
3477f9fae5adSBarry Smith       } else if (dof == 5) {
3478f9fae5adSBarry Smith         B->ops->mult             = MatMult_SeqMAIJ_5;
3479f9fae5adSBarry Smith         B->ops->multadd          = MatMultAdd_SeqMAIJ_5;
3480f9fae5adSBarry Smith         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_5;
3481f9fae5adSBarry Smith         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_5;
34826cd98798SBarry Smith       } else if (dof == 6) {
34836cd98798SBarry Smith         B->ops->mult             = MatMult_SeqMAIJ_6;
34846cd98798SBarry Smith         B->ops->multadd          = MatMultAdd_SeqMAIJ_6;
34856cd98798SBarry Smith         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_6;
34866cd98798SBarry Smith         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_6;
3487ed8eea03SBarry Smith       } else if (dof == 7) {
3488ed8eea03SBarry Smith         B->ops->mult             = MatMult_SeqMAIJ_7;
3489ed8eea03SBarry Smith         B->ops->multadd          = MatMultAdd_SeqMAIJ_7;
3490ed8eea03SBarry Smith         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_7;
3491ed8eea03SBarry Smith         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_7;
349266ed3db0SBarry Smith       } else if (dof == 8) {
349366ed3db0SBarry Smith         B->ops->mult             = MatMult_SeqMAIJ_8;
349466ed3db0SBarry Smith         B->ops->multadd          = MatMultAdd_SeqMAIJ_8;
349566ed3db0SBarry Smith         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_8;
349666ed3db0SBarry Smith         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_8;
34972b692628SMatthew Knepley       } else if (dof == 9) {
34982b692628SMatthew Knepley         B->ops->mult             = MatMult_SeqMAIJ_9;
34992b692628SMatthew Knepley         B->ops->multadd          = MatMultAdd_SeqMAIJ_9;
35002b692628SMatthew Knepley         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_9;
35012b692628SMatthew Knepley         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_9;
3502b9cda22cSBarry Smith       } else if (dof == 10) {
3503b9cda22cSBarry Smith         B->ops->mult             = MatMult_SeqMAIJ_10;
3504b9cda22cSBarry Smith         B->ops->multadd          = MatMultAdd_SeqMAIJ_10;
3505b9cda22cSBarry Smith         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_10;
3506b9cda22cSBarry Smith         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_10;
3507dbdd7285SBarry Smith       } else if (dof == 11) {
3508dbdd7285SBarry Smith         B->ops->mult             = MatMult_SeqMAIJ_11;
3509dbdd7285SBarry Smith         B->ops->multadd          = MatMultAdd_SeqMAIJ_11;
3510dbdd7285SBarry Smith         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_11;
3511dbdd7285SBarry Smith         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_11;
35122f7816d4SBarry Smith       } else if (dof == 16) {
35132f7816d4SBarry Smith         B->ops->mult             = MatMult_SeqMAIJ_16;
35142f7816d4SBarry Smith         B->ops->multadd          = MatMultAdd_SeqMAIJ_16;
35152f7816d4SBarry Smith         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_16;
35162f7816d4SBarry Smith         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_16;
3517ed1c418cSBarry Smith       } else if (dof == 18) {
3518ed1c418cSBarry Smith         B->ops->mult             = MatMult_SeqMAIJ_18;
3519ed1c418cSBarry Smith         B->ops->multadd          = MatMultAdd_SeqMAIJ_18;
3520ed1c418cSBarry Smith         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_18;
3521ed1c418cSBarry Smith         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_18;
352282b1193eSBarry Smith       } else {
35236861f193SBarry Smith         B->ops->mult             = MatMult_SeqMAIJ_N;
35246861f193SBarry Smith         B->ops->multadd          = MatMultAdd_SeqMAIJ_N;
35256861f193SBarry Smith         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_N;
35266861f193SBarry Smith         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_N;
352782b1193eSBarry Smith       }
3528fdc842d1SBarry Smith #if defined(PETSC_HAVE_CUDA)
3529fdc842d1SBarry Smith       ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqmaij_seqaijcusparse_C",MatConvert_SeqMAIJ_SeqAIJ);CHKERRQ(ierr);
3530fdc842d1SBarry Smith #endif
3531bdf89e91SBarry Smith       ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqmaij_seqaij_C",MatConvert_SeqMAIJ_SeqAIJ);CHKERRQ(ierr);
35324222ddf1SHong Zhang       ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_seqaij_seqmaij_C",MatProductSetFromOptions_SeqAIJ_SeqMAIJ);CHKERRQ(ierr);
3533f4a53059SBarry Smith     } else {
3534f4a53059SBarry Smith       Mat_MPIAIJ  *mpiaij = (Mat_MPIAIJ*)A->data;
3535feb9fe23SJed Brown       Mat_MPIMAIJ *b;
3536f4a53059SBarry Smith       IS          from,to;
3537f4a53059SBarry Smith       Vec         gvec;
3538f4a53059SBarry Smith 
3539b9b97703SBarry Smith       ierr = MatSetType(B,MATMPIMAIJ);CHKERRQ(ierr);
354026fbe8dcSKarl Rupp 
35410298fd71SBarry Smith       B->ops->setup            = NULL;
3542d72c5c08SBarry Smith       B->ops->destroy          = MatDestroy_MPIMAIJ;
35430fd73130SBarry Smith       B->ops->view             = MatView_MPIMAIJ;
354426fbe8dcSKarl Rupp 
3545b9b97703SBarry Smith       b      = (Mat_MPIMAIJ*)B->data;
3546b9b97703SBarry Smith       b->dof = dof;
3547b9b97703SBarry Smith       b->A   = A;
354826fbe8dcSKarl Rupp 
3549fdc842d1SBarry Smith       ierr = MatCreateMAIJ(mpiaij->A,-dof,&b->AIJ);CHKERRQ(ierr);
3550fdc842d1SBarry Smith       ierr = MatCreateMAIJ(mpiaij->B,-dof,&b->OAIJ);CHKERRQ(ierr);
35514cb9d9b8SBarry Smith 
3552f4a53059SBarry Smith       ierr = VecGetSize(mpiaij->lvec,&n);CHKERRQ(ierr);
3553a34bdc0bSBarry Smith       ierr = VecCreate(PETSC_COMM_SELF,&b->w);CHKERRQ(ierr);
3554a34bdc0bSBarry Smith       ierr = VecSetSizes(b->w,n*dof,n*dof);CHKERRQ(ierr);
355513288a74SBarry Smith       ierr = VecSetBlockSize(b->w,dof);CHKERRQ(ierr);
3556a34bdc0bSBarry Smith       ierr = VecSetType(b->w,VECSEQ);CHKERRQ(ierr);
3557f4a53059SBarry Smith 
3558f4a53059SBarry Smith       /* create two temporary Index sets for build scatter gather */
3559ce94432eSBarry Smith       ierr = ISCreateBlock(PetscObjectComm((PetscObject)A),dof,n,mpiaij->garray,PETSC_COPY_VALUES,&from);CHKERRQ(ierr);
3560f4a53059SBarry Smith       ierr = ISCreateStride(PETSC_COMM_SELF,n*dof,0,1,&to);CHKERRQ(ierr);
3561f4a53059SBarry Smith 
3562f4a53059SBarry Smith       /* create temporary global vector to generate scatter context */
3563ce94432eSBarry Smith       ierr = VecCreateMPIWithArray(PetscObjectComm((PetscObject)A),dof,dof*A->cmap->n,dof*A->cmap->N,NULL,&gvec);CHKERRQ(ierr);
3564f4a53059SBarry Smith 
3565f4a53059SBarry Smith       /* generate the scatter context */
35669448b7f1SJunchao Zhang       ierr = VecScatterCreate(gvec,from,b->w,to,&b->ctx);CHKERRQ(ierr);
3567f4a53059SBarry Smith 
35686bf464f9SBarry Smith       ierr = ISDestroy(&from);CHKERRQ(ierr);
35696bf464f9SBarry Smith       ierr = ISDestroy(&to);CHKERRQ(ierr);
35706bf464f9SBarry Smith       ierr = VecDestroy(&gvec);CHKERRQ(ierr);
3571f4a53059SBarry Smith 
3572f4a53059SBarry Smith       B->ops->mult             = MatMult_MPIMAIJ_dof;
35734cb9d9b8SBarry Smith       B->ops->multtranspose    = MatMultTranspose_MPIMAIJ_dof;
35744cb9d9b8SBarry Smith       B->ops->multadd          = MatMultAdd_MPIMAIJ_dof;
35754cb9d9b8SBarry Smith       B->ops->multtransposeadd = MatMultTransposeAdd_MPIMAIJ_dof;
357626fbe8dcSKarl Rupp 
3577fdc842d1SBarry Smith #if defined(PETSC_HAVE_CUDA)
3578fdc842d1SBarry Smith       ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpimaij_mpiaijcusparse_C",MatConvert_MPIMAIJ_MPIAIJ);CHKERRQ(ierr);
3579fdc842d1SBarry Smith #endif
3580bdf89e91SBarry Smith       ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpimaij_mpiaij_C",MatConvert_MPIMAIJ_MPIAIJ);CHKERRQ(ierr);
35814222ddf1SHong Zhang       ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpiaij_mpimaij_C",MatProductSetFromOptions_MPIAIJ_MPIMAIJ);CHKERRQ(ierr);
3582f4a53059SBarry Smith     }
35837dae84e0SHong Zhang     B->ops->createsubmatrix   = MatCreateSubMatrix_MAIJ;
3584ec626240SStefano Zampini     B->ops->createsubmatrices = MatCreateSubMatrices_MAIJ;
35855c65b9ecSFande Kong     ierr  = MatSetFromOptions(B);CHKERRQ(ierr);
35864994cf47SJed Brown     ierr  = MatSetUp(B);CHKERRQ(ierr);
3587fdc842d1SBarry Smith #if defined(PETSC_HAVE_CUDA)
3588fdc842d1SBarry Smith     /* temporary until we have CUDA implementation of MAIJ */
3589fdc842d1SBarry Smith     {
3590fdc842d1SBarry Smith       PetscBool flg;
3591fdc842d1SBarry Smith       if (convert) {
3592fdc842d1SBarry Smith         ierr = PetscObjectTypeCompareAny((PetscObject)A,&flg,MATSEQAIJCUSPARSE,MATMPIAIJCUSPARSE,MATAIJCUSPARSE,"");CHKERRQ(ierr);
3593fdc842d1SBarry Smith         if (flg) {
3594fdc842d1SBarry Smith           ierr = MatConvert(B,((PetscObject)A)->type_name,MAT_INPLACE_MATRIX,&B);CHKERRQ(ierr);
3595fdc842d1SBarry Smith         }
3596fdc842d1SBarry Smith       }
3597fdc842d1SBarry Smith     }
3598fdc842d1SBarry Smith #endif
3599cd3bbe55SBarry Smith     *maij = B;
3600146574abSBarry Smith     ierr  = MatViewFromOptions(B,NULL,"-mat_view");CHKERRQ(ierr);
3601d72c5c08SBarry Smith   }
360282b1193eSBarry Smith   PetscFunctionReturn(0);
360382b1193eSBarry Smith }
3604