xref: /petsc/src/mat/impls/maij/maij.c (revision 356c569eeddcd3d2ddd75f7f6c32309775ac8da6)
1be1d678aSKris Buschelman 
282b1193eSBarry Smith /*
3cd3bbe55SBarry Smith     Defines the basic matrix operations for the MAIJ  matrix storage format.
4cd3bbe55SBarry Smith   This format is used for restriction and interpolation operations for
5cd3bbe55SBarry Smith   multicomponent problems. It interpolates each component the same way
6cd3bbe55SBarry Smith   independently.
7cd3bbe55SBarry Smith 
8cd3bbe55SBarry Smith      We provide:
9cd3bbe55SBarry Smith          MatMult()
10cd3bbe55SBarry Smith          MatMultTranspose()
11cd3bbe55SBarry Smith          MatMultTransposeAdd()
12cd3bbe55SBarry Smith          MatMultAdd()
13cd3bbe55SBarry Smith           and
14cd3bbe55SBarry Smith          MatCreateMAIJ(Mat,dof,Mat*)
15f4a53059SBarry Smith 
16f4a53059SBarry Smith      This single directory handles both the sequential and parallel codes
1782b1193eSBarry Smith */
1882b1193eSBarry Smith 
19c6db04a5SJed Brown #include <../src/mat/impls/maij/maij.h> /*I "petscmat.h" I*/
20c6db04a5SJed Brown #include <../src/mat/utils/freespace.h>
21b45d2f2cSJed Brown #include <petsc-private/vecimpl.h>
2282b1193eSBarry Smith 
234a2ae208SSatish Balay #undef __FUNCT__
244a2ae208SSatish Balay #define __FUNCT__ "MatMAIJGetAIJ"
25ff585edeSJed Brown /*@C
26ff585edeSJed Brown    MatMAIJGetAIJ - Get the AIJ matrix describing the blockwise action of the MAIJ matrix
27ff585edeSJed Brown 
28ff585edeSJed Brown    Not Collective, but if the MAIJ matrix is parallel, the AIJ matrix is also parallel
29ff585edeSJed Brown 
30ff585edeSJed Brown    Input Parameter:
31ff585edeSJed Brown .  A - the MAIJ matrix
32ff585edeSJed Brown 
33ff585edeSJed Brown    Output Parameter:
34ff585edeSJed Brown .  B - the AIJ matrix
35ff585edeSJed Brown 
36ff585edeSJed Brown    Level: advanced
37ff585edeSJed Brown 
38ff585edeSJed Brown    Notes: The reference count on the AIJ matrix is not increased so you should not destroy it.
39ff585edeSJed Brown 
40ff585edeSJed Brown .seealso: MatCreateMAIJ()
41ff585edeSJed Brown @*/
427087cfbeSBarry Smith PetscErrorCode  MatMAIJGetAIJ(Mat A,Mat *B)
43b9b97703SBarry Smith {
44dfbe8321SBarry Smith   PetscErrorCode ierr;
45ace3abfcSBarry Smith   PetscBool      ismpimaij,isseqmaij;
46b9b97703SBarry Smith 
47b9b97703SBarry Smith   PetscFunctionBegin;
48251f4c67SDmitry Karpeev   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIMAIJ,&ismpimaij);CHKERRQ(ierr);
49251f4c67SDmitry Karpeev   ierr = PetscObjectTypeCompare((PetscObject)A,MATSEQMAIJ,&isseqmaij);CHKERRQ(ierr);
50b9b97703SBarry Smith   if (ismpimaij) {
51b9b97703SBarry Smith     Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data;
52b9b97703SBarry Smith 
53b9b97703SBarry Smith     *B = b->A;
54b9b97703SBarry Smith   } else if (isseqmaij) {
55b9b97703SBarry Smith     Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
56b9b97703SBarry Smith 
57b9b97703SBarry Smith     *B = b->AIJ;
58b9b97703SBarry Smith   } else {
59b9b97703SBarry Smith     *B = A;
60b9b97703SBarry Smith   }
61b9b97703SBarry Smith   PetscFunctionReturn(0);
62b9b97703SBarry Smith }
63b9b97703SBarry Smith 
644a2ae208SSatish Balay #undef __FUNCT__
654a2ae208SSatish Balay #define __FUNCT__ "MatMAIJRedimension"
66ff585edeSJed Brown /*@C
67ff585edeSJed Brown    MatMAIJRedimension - Get an MAIJ matrix with the same action, but for a different block size
68ff585edeSJed Brown 
693f9fe445SBarry Smith    Logically Collective
70ff585edeSJed Brown 
71ff585edeSJed Brown    Input Parameter:
72ff585edeSJed Brown +  A - the MAIJ matrix
73ff585edeSJed Brown -  dof - the block size for the new matrix
74ff585edeSJed Brown 
75ff585edeSJed Brown    Output Parameter:
76ff585edeSJed Brown .  B - the new MAIJ matrix
77ff585edeSJed Brown 
78ff585edeSJed Brown    Level: advanced
79ff585edeSJed Brown 
80ff585edeSJed Brown .seealso: MatCreateMAIJ()
81ff585edeSJed Brown @*/
827087cfbeSBarry Smith PetscErrorCode  MatMAIJRedimension(Mat A,PetscInt dof,Mat *B)
83b9b97703SBarry Smith {
84dfbe8321SBarry Smith   PetscErrorCode ierr;
853b98c0a2SBarry Smith   Mat            Aij = PETSC_NULL;
86b9b97703SBarry Smith 
87b9b97703SBarry Smith   PetscFunctionBegin;
88c5eb9154SBarry Smith   PetscValidLogicalCollectiveInt(A,dof,2);
89b9b97703SBarry Smith   ierr = MatMAIJGetAIJ(A,&Aij);CHKERRQ(ierr);
90b9b97703SBarry Smith   ierr = MatCreateMAIJ(Aij,dof,B);CHKERRQ(ierr);
91b9b97703SBarry Smith   PetscFunctionReturn(0);
92b9b97703SBarry Smith }
93b9b97703SBarry Smith 
944a2ae208SSatish Balay #undef __FUNCT__
954a2ae208SSatish Balay #define __FUNCT__ "MatDestroy_SeqMAIJ"
96dfbe8321SBarry Smith PetscErrorCode MatDestroy_SeqMAIJ(Mat A)
9782b1193eSBarry Smith {
98dfbe8321SBarry Smith   PetscErrorCode ierr;
994cb9d9b8SBarry Smith   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
10082b1193eSBarry Smith 
10182b1193eSBarry Smith   PetscFunctionBegin;
1026bf464f9SBarry Smith   ierr = MatDestroy(&b->AIJ);CHKERRQ(ierr);
103bf0cc555SLisandro Dalcin   ierr = PetscFree(A->data);CHKERRQ(ierr);
1044cb9d9b8SBarry Smith   PetscFunctionReturn(0);
1054cb9d9b8SBarry Smith }
1064cb9d9b8SBarry Smith 
1074a2ae208SSatish Balay #undef __FUNCT__
108*356c569eSBarry Smith #define __FUNCT__ "MatSetUp_MAIJ"
109*356c569eSBarry Smith PetscErrorCode MatSetUp_MAIJ(Mat A)
110*356c569eSBarry Smith {
111*356c569eSBarry Smith   PetscFunctionBegin;
112*356c569eSBarry Smith   SETERRQ(((PetscObject)A)->comm,PETSC_ERR_SUP,"Must use MatCreateMAIJ() to create MAIJ matrices");
113*356c569eSBarry Smith   PetscFunctionReturn(0);
114*356c569eSBarry Smith }
115*356c569eSBarry Smith 
116*356c569eSBarry Smith #undef __FUNCT__
1170fd73130SBarry Smith #define __FUNCT__ "MatView_SeqMAIJ"
1180fd73130SBarry Smith PetscErrorCode MatView_SeqMAIJ(Mat A,PetscViewer viewer)
1190fd73130SBarry Smith {
1200fd73130SBarry Smith   PetscErrorCode ierr;
1210fd73130SBarry Smith   Mat            B;
1220fd73130SBarry Smith 
1230fd73130SBarry Smith   PetscFunctionBegin;
124ceb03754SKris Buschelman   ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);
1250fd73130SBarry Smith   ierr = MatView(B,viewer);CHKERRQ(ierr);
1266bf464f9SBarry Smith   ierr = MatDestroy(&B);CHKERRQ(ierr);
1270fd73130SBarry Smith   PetscFunctionReturn(0);
1280fd73130SBarry Smith }
1290fd73130SBarry Smith 
1300fd73130SBarry Smith #undef __FUNCT__
1310fd73130SBarry Smith #define __FUNCT__ "MatView_MPIMAIJ"
1320fd73130SBarry Smith PetscErrorCode MatView_MPIMAIJ(Mat A,PetscViewer viewer)
1330fd73130SBarry Smith {
1340fd73130SBarry Smith   PetscErrorCode ierr;
1350fd73130SBarry Smith   Mat            B;
1360fd73130SBarry Smith 
1370fd73130SBarry Smith   PetscFunctionBegin;
138ceb03754SKris Buschelman   ierr = MatConvert(A,MATMPIAIJ,MAT_INITIAL_MATRIX,&B);
1390fd73130SBarry Smith   ierr = MatView(B,viewer);CHKERRQ(ierr);
1406bf464f9SBarry Smith   ierr = MatDestroy(&B);CHKERRQ(ierr);
1410fd73130SBarry Smith   PetscFunctionReturn(0);
1420fd73130SBarry Smith }
1430fd73130SBarry Smith 
1440fd73130SBarry Smith #undef __FUNCT__
1454a2ae208SSatish Balay #define __FUNCT__ "MatDestroy_MPIMAIJ"
146dfbe8321SBarry Smith PetscErrorCode MatDestroy_MPIMAIJ(Mat A)
1474cb9d9b8SBarry Smith {
148dfbe8321SBarry Smith   PetscErrorCode ierr;
1494cb9d9b8SBarry Smith   Mat_MPIMAIJ    *b = (Mat_MPIMAIJ*)A->data;
1504cb9d9b8SBarry Smith 
1514cb9d9b8SBarry Smith   PetscFunctionBegin;
1526bf464f9SBarry Smith   ierr = MatDestroy(&b->AIJ);CHKERRQ(ierr);
1536bf464f9SBarry Smith   ierr = MatDestroy(&b->OAIJ);CHKERRQ(ierr);
1546bf464f9SBarry Smith   ierr = MatDestroy(&b->A);CHKERRQ(ierr);
1556bf464f9SBarry Smith   ierr = VecScatterDestroy(&b->ctx);CHKERRQ(ierr);
1566bf464f9SBarry Smith   ierr = VecDestroy(&b->w);CHKERRQ(ierr);
157bf0cc555SLisandro Dalcin   ierr = PetscFree(A->data);CHKERRQ(ierr);
158dbd8c25aSHong Zhang   ierr = PetscObjectChangeTypeName((PetscObject)A,0);CHKERRQ(ierr);
159b9b97703SBarry Smith   PetscFunctionReturn(0);
160b9b97703SBarry Smith }
161b9b97703SBarry Smith 
1620bad9183SKris Buschelman /*MC
163fafad747SKris Buschelman   MATMAIJ - MATMAIJ = "maij" - A matrix type to be used for restriction and interpolation operations for
1640bad9183SKris Buschelman   multicomponent problems, interpolating or restricting each component the same way independently.
1650bad9183SKris Buschelman   The matrix type is based on MATSEQAIJ for sequential matrices, and MATMPIAIJ for distributed matrices.
1660bad9183SKris Buschelman 
1670bad9183SKris Buschelman   Operations provided:
1680bad9183SKris Buschelman . MatMult
1690bad9183SKris Buschelman . MatMultTranspose
1700bad9183SKris Buschelman . MatMultAdd
1710bad9183SKris Buschelman . MatMultTransposeAdd
1720bad9183SKris Buschelman 
1730bad9183SKris Buschelman   Level: advanced
1740bad9183SKris Buschelman 
175b409243cSBarry Smith .seealso: MatMAIJGetAIJ(), MatMAIJRedimension(), MatCreateMAIJ()
1760bad9183SKris Buschelman M*/
1770bad9183SKris Buschelman 
17882b1193eSBarry Smith EXTERN_C_BEGIN
1794a2ae208SSatish Balay #undef __FUNCT__
1804a2ae208SSatish Balay #define __FUNCT__ "MatCreate_MAIJ"
1817087cfbeSBarry Smith PetscErrorCode  MatCreate_MAIJ(Mat A)
18282b1193eSBarry Smith {
183dfbe8321SBarry Smith   PetscErrorCode ierr;
1844cb9d9b8SBarry Smith   Mat_MPIMAIJ    *b;
18564b52464SHong Zhang   PetscMPIInt    size;
18682b1193eSBarry Smith 
18782b1193eSBarry Smith   PetscFunctionBegin;
18838f2d2fdSLisandro Dalcin   ierr     = PetscNewLog(A,Mat_MPIMAIJ,&b);CHKERRQ(ierr);
189b0a32e0cSBarry Smith   A->data  = (void*)b;
190cd3bbe55SBarry Smith   ierr = PetscMemzero(A->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
191*356c569eSBarry Smith   A->ops->setup = MatSetUp_MAIJ;
192f4a53059SBarry Smith 
193cd3bbe55SBarry Smith   b->AIJ  = 0;
194cd3bbe55SBarry Smith   b->dof  = 0;
195f4a53059SBarry Smith   b->OAIJ = 0;
196f4a53059SBarry Smith   b->ctx  = 0;
197f4a53059SBarry Smith   b->w    = 0;
1987adad957SLisandro Dalcin   ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr);
19964b52464SHong Zhang   if (size == 1){
20064b52464SHong Zhang     ierr = PetscObjectChangeTypeName((PetscObject)A,MATSEQMAIJ);CHKERRQ(ierr);
20164b52464SHong Zhang   } else {
20264b52464SHong Zhang     ierr = PetscObjectChangeTypeName((PetscObject)A,MATMPIMAIJ);CHKERRQ(ierr);
20364b52464SHong Zhang   }
20482b1193eSBarry Smith   PetscFunctionReturn(0);
20582b1193eSBarry Smith }
20682b1193eSBarry Smith EXTERN_C_END
20782b1193eSBarry Smith 
208cd3bbe55SBarry Smith /* --------------------------------------------------------------------------------------*/
2094a2ae208SSatish Balay #undef __FUNCT__
210b9617806SBarry Smith #define __FUNCT__ "MatMult_SeqMAIJ_2"
211dfbe8321SBarry Smith PetscErrorCode MatMult_SeqMAIJ_2(Mat A,Vec xx,Vec yy)
21282b1193eSBarry Smith {
2134cb9d9b8SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
214bcc973b7SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
215d2840e09SBarry Smith   const PetscScalar *x,*v;
216d2840e09SBarry Smith   PetscScalar       *y, sum1, sum2;
217dfbe8321SBarry Smith   PetscErrorCode    ierr;
218d2840e09SBarry Smith   PetscInt          nonzerorow=0,n,i,jrow,j;
219d2840e09SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
22082b1193eSBarry Smith 
221bcc973b7SBarry Smith   PetscFunctionBegin;
2223649974fSBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
2231ebc52fbSHong Zhang   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
224bcc973b7SBarry Smith   idx  = a->j;
225bcc973b7SBarry Smith   v    = a->a;
226bcc973b7SBarry Smith   ii   = a->i;
227bcc973b7SBarry Smith 
228bcc973b7SBarry Smith   for (i=0; i<m; i++) {
229bcc973b7SBarry Smith     jrow = ii[i];
230bcc973b7SBarry Smith     n    = ii[i+1] - jrow;
231bcc973b7SBarry Smith     sum1  = 0.0;
232bcc973b7SBarry Smith     sum2  = 0.0;
233b3c51c66SMatthew Knepley     nonzerorow += (n>0);
234bcc973b7SBarry Smith     for (j=0; j<n; j++) {
235bcc973b7SBarry Smith       sum1 += v[jrow]*x[2*idx[jrow]];
236bcc973b7SBarry Smith       sum2 += v[jrow]*x[2*idx[jrow]+1];
237bcc973b7SBarry Smith       jrow++;
238bcc973b7SBarry Smith      }
239bcc973b7SBarry Smith     y[2*i]   = sum1;
240bcc973b7SBarry Smith     y[2*i+1] = sum2;
241bcc973b7SBarry Smith   }
242bcc973b7SBarry Smith 
243dc0b31edSSatish Balay   ierr = PetscLogFlops(4.0*a->nz - 2.0*nonzerorow);CHKERRQ(ierr);
2443649974fSBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
2451ebc52fbSHong Zhang   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
24682b1193eSBarry Smith   PetscFunctionReturn(0);
24782b1193eSBarry Smith }
248bcc973b7SBarry Smith 
2494a2ae208SSatish Balay #undef __FUNCT__
250b9617806SBarry Smith #define __FUNCT__ "MatMultTranspose_SeqMAIJ_2"
251dfbe8321SBarry Smith PetscErrorCode MatMultTranspose_SeqMAIJ_2(Mat A,Vec xx,Vec yy)
25282b1193eSBarry Smith {
2534cb9d9b8SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
254bcc973b7SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
255d2840e09SBarry Smith   const PetscScalar *x,*v;
256d2840e09SBarry Smith   PetscScalar       *y,alpha1,alpha2;
257dfbe8321SBarry Smith   PetscErrorCode    ierr;
258d2840e09SBarry Smith   PetscInt          n,i;
259d2840e09SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx;
26082b1193eSBarry Smith 
261bcc973b7SBarry Smith   PetscFunctionBegin;
262d2840e09SBarry Smith   ierr = VecSet(yy,0.0);CHKERRQ(ierr);
2633649974fSBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
2641ebc52fbSHong Zhang   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
265bfec09a0SHong Zhang 
266bcc973b7SBarry Smith   for (i=0; i<m; i++) {
267bfec09a0SHong Zhang     idx    = a->j + a->i[i] ;
268bfec09a0SHong Zhang     v      = a->a + a->i[i] ;
269bcc973b7SBarry Smith     n      = a->i[i+1] - a->i[i];
270bcc973b7SBarry Smith     alpha1 = x[2*i];
271bcc973b7SBarry Smith     alpha2 = x[2*i+1];
272bcc973b7SBarry Smith     while (n-->0) {y[2*(*idx)] += alpha1*(*v); y[2*(*idx)+1] += alpha2*(*v); idx++; v++;}
273bcc973b7SBarry Smith   }
274dc0b31edSSatish Balay   ierr = PetscLogFlops(4.0*a->nz);CHKERRQ(ierr);
2753649974fSBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
2761ebc52fbSHong Zhang   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
27782b1193eSBarry Smith   PetscFunctionReturn(0);
27882b1193eSBarry Smith }
279bcc973b7SBarry Smith 
2804a2ae208SSatish Balay #undef __FUNCT__
281b9617806SBarry Smith #define __FUNCT__ "MatMultAdd_SeqMAIJ_2"
282dfbe8321SBarry Smith PetscErrorCode MatMultAdd_SeqMAIJ_2(Mat A,Vec xx,Vec yy,Vec zz)
28382b1193eSBarry Smith {
2844cb9d9b8SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
285bcc973b7SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
286d2840e09SBarry Smith   const PetscScalar *x,*v;
287d2840e09SBarry Smith   PetscScalar       *y,sum1, sum2;
288dfbe8321SBarry Smith   PetscErrorCode    ierr;
289b24ad042SBarry Smith   PetscInt          n,i,jrow,j;
290d2840e09SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
29182b1193eSBarry Smith 
292bcc973b7SBarry Smith   PetscFunctionBegin;
293f9fae5adSBarry Smith   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
2943649974fSBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
2951ebc52fbSHong Zhang   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
296bcc973b7SBarry Smith   idx  = a->j;
297bcc973b7SBarry Smith   v    = a->a;
298bcc973b7SBarry Smith   ii   = a->i;
299bcc973b7SBarry Smith 
300bcc973b7SBarry Smith   for (i=0; i<m; i++) {
301bcc973b7SBarry Smith     jrow = ii[i];
302bcc973b7SBarry Smith     n    = ii[i+1] - jrow;
303bcc973b7SBarry Smith     sum1  = 0.0;
304bcc973b7SBarry Smith     sum2  = 0.0;
305bcc973b7SBarry Smith     for (j=0; j<n; j++) {
306bcc973b7SBarry Smith       sum1 += v[jrow]*x[2*idx[jrow]];
307bcc973b7SBarry Smith       sum2 += v[jrow]*x[2*idx[jrow]+1];
308bcc973b7SBarry Smith       jrow++;
309bcc973b7SBarry Smith      }
310bcc973b7SBarry Smith     y[2*i]   += sum1;
311bcc973b7SBarry Smith     y[2*i+1] += sum2;
312bcc973b7SBarry Smith   }
313bcc973b7SBarry Smith 
314dc0b31edSSatish Balay   ierr = PetscLogFlops(4.0*a->nz);CHKERRQ(ierr);
3153649974fSBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
3161ebc52fbSHong Zhang   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
317bcc973b7SBarry Smith   PetscFunctionReturn(0);
31882b1193eSBarry Smith }
3194a2ae208SSatish Balay #undef __FUNCT__
320b9617806SBarry Smith #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_2"
321dfbe8321SBarry Smith PetscErrorCode MatMultTransposeAdd_SeqMAIJ_2(Mat A,Vec xx,Vec yy,Vec zz)
32282b1193eSBarry Smith {
3234cb9d9b8SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
324bcc973b7SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
325d2840e09SBarry Smith   const PetscScalar *x,*v;
326d2840e09SBarry Smith   PetscScalar       *y,alpha1,alpha2;
327dfbe8321SBarry Smith   PetscErrorCode    ierr;
328d2840e09SBarry Smith   PetscInt          n,i;
329d2840e09SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx;
33082b1193eSBarry Smith 
331bcc973b7SBarry Smith   PetscFunctionBegin;
332f9fae5adSBarry Smith   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
3333649974fSBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
3341ebc52fbSHong Zhang   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
335bfec09a0SHong Zhang 
336bcc973b7SBarry Smith   for (i=0; i<m; i++) {
337bfec09a0SHong Zhang     idx   = a->j + a->i[i] ;
338bfec09a0SHong Zhang     v     = a->a + a->i[i] ;
339bcc973b7SBarry Smith     n     = a->i[i+1] - a->i[i];
340bcc973b7SBarry Smith     alpha1 = x[2*i];
341bcc973b7SBarry Smith     alpha2 = x[2*i+1];
342bcc973b7SBarry Smith     while (n-->0) {y[2*(*idx)] += alpha1*(*v); y[2*(*idx)+1] += alpha2*(*v); idx++; v++;}
343bcc973b7SBarry Smith   }
344dc0b31edSSatish Balay   ierr = PetscLogFlops(4.0*a->nz);CHKERRQ(ierr);
3453649974fSBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
3461ebc52fbSHong Zhang   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
347bcc973b7SBarry Smith   PetscFunctionReturn(0);
34882b1193eSBarry Smith }
349cd3bbe55SBarry Smith /* --------------------------------------------------------------------------------------*/
3504a2ae208SSatish Balay #undef __FUNCT__
351b9617806SBarry Smith #define __FUNCT__ "MatMult_SeqMAIJ_3"
352dfbe8321SBarry Smith PetscErrorCode MatMult_SeqMAIJ_3(Mat A,Vec xx,Vec yy)
353bcc973b7SBarry Smith {
3544cb9d9b8SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
355bcc973b7SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
356d2840e09SBarry Smith   const PetscScalar *x,*v;
357d2840e09SBarry Smith   PetscScalar       *y,sum1, sum2, sum3;
358dfbe8321SBarry Smith   PetscErrorCode    ierr;
359d2840e09SBarry Smith   PetscInt          nonzerorow=0,n,i,jrow,j;
360d2840e09SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
36182b1193eSBarry Smith 
362bcc973b7SBarry Smith   PetscFunctionBegin;
3633649974fSBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
3641ebc52fbSHong Zhang   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
365bcc973b7SBarry Smith   idx  = a->j;
366bcc973b7SBarry Smith   v    = a->a;
367bcc973b7SBarry Smith   ii   = a->i;
368bcc973b7SBarry Smith 
369bcc973b7SBarry Smith   for (i=0; i<m; i++) {
370bcc973b7SBarry Smith     jrow = ii[i];
371bcc973b7SBarry Smith     n    = ii[i+1] - jrow;
372bcc973b7SBarry Smith     sum1  = 0.0;
373bcc973b7SBarry Smith     sum2  = 0.0;
374bcc973b7SBarry Smith     sum3  = 0.0;
375b3c51c66SMatthew Knepley     nonzerorow += (n>0);
376bcc973b7SBarry Smith     for (j=0; j<n; j++) {
377bcc973b7SBarry Smith       sum1 += v[jrow]*x[3*idx[jrow]];
378bcc973b7SBarry Smith       sum2 += v[jrow]*x[3*idx[jrow]+1];
379bcc973b7SBarry Smith       sum3 += v[jrow]*x[3*idx[jrow]+2];
380bcc973b7SBarry Smith       jrow++;
381bcc973b7SBarry Smith      }
382bcc973b7SBarry Smith     y[3*i]   = sum1;
383bcc973b7SBarry Smith     y[3*i+1] = sum2;
384bcc973b7SBarry Smith     y[3*i+2] = sum3;
385bcc973b7SBarry Smith   }
386bcc973b7SBarry Smith 
387dc0b31edSSatish Balay   ierr = PetscLogFlops(6.0*a->nz - 3.0*nonzerorow);CHKERRQ(ierr);
3883649974fSBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
3891ebc52fbSHong Zhang   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
390bcc973b7SBarry Smith   PetscFunctionReturn(0);
391bcc973b7SBarry Smith }
392bcc973b7SBarry Smith 
3934a2ae208SSatish Balay #undef __FUNCT__
394b9617806SBarry Smith #define __FUNCT__ "MatMultTranspose_SeqMAIJ_3"
395dfbe8321SBarry Smith PetscErrorCode MatMultTranspose_SeqMAIJ_3(Mat A,Vec xx,Vec yy)
396bcc973b7SBarry Smith {
3974cb9d9b8SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
398bcc973b7SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
399d2840e09SBarry Smith   const PetscScalar *x,*v;
400d2840e09SBarry Smith   PetscScalar       *y,alpha1,alpha2,alpha3;
401dfbe8321SBarry Smith   PetscErrorCode    ierr;
402d2840e09SBarry Smith   PetscInt          n,i;
403d2840e09SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx;
404bcc973b7SBarry Smith 
405bcc973b7SBarry Smith   PetscFunctionBegin;
406d2840e09SBarry Smith   ierr = VecSet(yy,0.0);CHKERRQ(ierr);
4073649974fSBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
4081ebc52fbSHong Zhang   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
409bfec09a0SHong Zhang 
410bcc973b7SBarry Smith   for (i=0; i<m; i++) {
411bfec09a0SHong Zhang     idx    = a->j + a->i[i];
412bfec09a0SHong Zhang     v      = a->a + a->i[i];
413bcc973b7SBarry Smith     n      = a->i[i+1] - a->i[i];
414bcc973b7SBarry Smith     alpha1 = x[3*i];
415bcc973b7SBarry Smith     alpha2 = x[3*i+1];
416bcc973b7SBarry Smith     alpha3 = x[3*i+2];
417bcc973b7SBarry Smith     while (n-->0) {
418bcc973b7SBarry Smith       y[3*(*idx)]   += alpha1*(*v);
419bcc973b7SBarry Smith       y[3*(*idx)+1] += alpha2*(*v);
420bcc973b7SBarry Smith       y[3*(*idx)+2] += alpha3*(*v);
421bcc973b7SBarry Smith       idx++; v++;
422bcc973b7SBarry Smith     }
423bcc973b7SBarry Smith   }
424dc0b31edSSatish Balay   ierr = PetscLogFlops(6.0*a->nz);CHKERRQ(ierr);
4253649974fSBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
4261ebc52fbSHong Zhang   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
427bcc973b7SBarry Smith   PetscFunctionReturn(0);
428bcc973b7SBarry Smith }
429bcc973b7SBarry Smith 
4304a2ae208SSatish Balay #undef __FUNCT__
431b9617806SBarry Smith #define __FUNCT__ "MatMultAdd_SeqMAIJ_3"
432dfbe8321SBarry Smith PetscErrorCode MatMultAdd_SeqMAIJ_3(Mat A,Vec xx,Vec yy,Vec zz)
433bcc973b7SBarry Smith {
4344cb9d9b8SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
435bcc973b7SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
436d2840e09SBarry Smith   const PetscScalar *x,*v;
437d2840e09SBarry Smith   PetscScalar       *y,sum1, sum2, sum3;
438dfbe8321SBarry Smith   PetscErrorCode    ierr;
439b24ad042SBarry Smith   PetscInt          n,i,jrow,j;
440d2840e09SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
441bcc973b7SBarry Smith 
442bcc973b7SBarry Smith   PetscFunctionBegin;
443f9fae5adSBarry Smith   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
4443649974fSBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
4451ebc52fbSHong Zhang   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
446bcc973b7SBarry Smith   idx  = a->j;
447bcc973b7SBarry Smith   v    = a->a;
448bcc973b7SBarry Smith   ii   = a->i;
449bcc973b7SBarry Smith 
450bcc973b7SBarry Smith   for (i=0; i<m; i++) {
451bcc973b7SBarry Smith     jrow = ii[i];
452bcc973b7SBarry Smith     n    = ii[i+1] - jrow;
453bcc973b7SBarry Smith     sum1  = 0.0;
454bcc973b7SBarry Smith     sum2  = 0.0;
455bcc973b7SBarry Smith     sum3  = 0.0;
456bcc973b7SBarry Smith     for (j=0; j<n; j++) {
457bcc973b7SBarry Smith       sum1 += v[jrow]*x[3*idx[jrow]];
458bcc973b7SBarry Smith       sum2 += v[jrow]*x[3*idx[jrow]+1];
459bcc973b7SBarry Smith       sum3 += v[jrow]*x[3*idx[jrow]+2];
460bcc973b7SBarry Smith       jrow++;
461bcc973b7SBarry Smith      }
462bcc973b7SBarry Smith     y[3*i]   += sum1;
463bcc973b7SBarry Smith     y[3*i+1] += sum2;
464bcc973b7SBarry Smith     y[3*i+2] += sum3;
465bcc973b7SBarry Smith   }
466bcc973b7SBarry Smith 
467dc0b31edSSatish Balay   ierr = PetscLogFlops(6.0*a->nz);CHKERRQ(ierr);
4683649974fSBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
4691ebc52fbSHong Zhang   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
470bcc973b7SBarry Smith   PetscFunctionReturn(0);
471bcc973b7SBarry Smith }
4724a2ae208SSatish Balay #undef __FUNCT__
473b9617806SBarry Smith #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_3"
474dfbe8321SBarry Smith PetscErrorCode MatMultTransposeAdd_SeqMAIJ_3(Mat A,Vec xx,Vec yy,Vec zz)
475bcc973b7SBarry Smith {
4764cb9d9b8SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
477bcc973b7SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
478d2840e09SBarry Smith   const PetscScalar *x,*v;
479d2840e09SBarry Smith   PetscScalar       *y,alpha1,alpha2,alpha3;
480dfbe8321SBarry Smith   PetscErrorCode    ierr;
481d2840e09SBarry Smith   PetscInt          n,i;
482d2840e09SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx;
483bcc973b7SBarry Smith 
484bcc973b7SBarry Smith   PetscFunctionBegin;
485f9fae5adSBarry Smith   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
4863649974fSBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
4871ebc52fbSHong Zhang   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
488bcc973b7SBarry Smith   for (i=0; i<m; i++) {
489bfec09a0SHong Zhang     idx    = a->j + a->i[i] ;
490bfec09a0SHong Zhang     v      = a->a + a->i[i] ;
491bcc973b7SBarry Smith     n      = a->i[i+1] - a->i[i];
492bcc973b7SBarry Smith     alpha1 = x[3*i];
493bcc973b7SBarry Smith     alpha2 = x[3*i+1];
494bcc973b7SBarry Smith     alpha3 = x[3*i+2];
495bcc973b7SBarry Smith     while (n-->0) {
496bcc973b7SBarry Smith       y[3*(*idx)]   += alpha1*(*v);
497bcc973b7SBarry Smith       y[3*(*idx)+1] += alpha2*(*v);
498bcc973b7SBarry Smith       y[3*(*idx)+2] += alpha3*(*v);
499bcc973b7SBarry Smith       idx++; v++;
500bcc973b7SBarry Smith     }
501bcc973b7SBarry Smith   }
502dc0b31edSSatish Balay   ierr = PetscLogFlops(6.0*a->nz);CHKERRQ(ierr);
5033649974fSBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
5041ebc52fbSHong Zhang   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
505bcc973b7SBarry Smith   PetscFunctionReturn(0);
506bcc973b7SBarry Smith }
507bcc973b7SBarry Smith 
508bcc973b7SBarry Smith /* ------------------------------------------------------------------------------*/
5094a2ae208SSatish Balay #undef __FUNCT__
510b9617806SBarry Smith #define __FUNCT__ "MatMult_SeqMAIJ_4"
511dfbe8321SBarry Smith PetscErrorCode MatMult_SeqMAIJ_4(Mat A,Vec xx,Vec yy)
512bcc973b7SBarry Smith {
5134cb9d9b8SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
514bcc973b7SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
515d2840e09SBarry Smith   const PetscScalar *x,*v;
516d2840e09SBarry Smith   PetscScalar       *y,sum1, sum2, sum3, sum4;
517dfbe8321SBarry Smith   PetscErrorCode    ierr;
518d2840e09SBarry Smith   PetscInt          nonzerorow=0,n,i,jrow,j;
519d2840e09SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
520bcc973b7SBarry Smith 
521bcc973b7SBarry Smith   PetscFunctionBegin;
5223649974fSBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
5231ebc52fbSHong Zhang   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
524bcc973b7SBarry Smith   idx  = a->j;
525bcc973b7SBarry Smith   v    = a->a;
526bcc973b7SBarry Smith   ii   = a->i;
527bcc973b7SBarry Smith 
528bcc973b7SBarry Smith   for (i=0; i<m; i++) {
529bcc973b7SBarry Smith     jrow = ii[i];
530bcc973b7SBarry Smith     n    = ii[i+1] - jrow;
531bcc973b7SBarry Smith     sum1  = 0.0;
532bcc973b7SBarry Smith     sum2  = 0.0;
533bcc973b7SBarry Smith     sum3  = 0.0;
534bcc973b7SBarry Smith     sum4  = 0.0;
535b3c51c66SMatthew Knepley     nonzerorow += (n>0);
536bcc973b7SBarry Smith     for (j=0; j<n; j++) {
537bcc973b7SBarry Smith       sum1 += v[jrow]*x[4*idx[jrow]];
538bcc973b7SBarry Smith       sum2 += v[jrow]*x[4*idx[jrow]+1];
539bcc973b7SBarry Smith       sum3 += v[jrow]*x[4*idx[jrow]+2];
540bcc973b7SBarry Smith       sum4 += v[jrow]*x[4*idx[jrow]+3];
541bcc973b7SBarry Smith       jrow++;
542bcc973b7SBarry Smith      }
543bcc973b7SBarry Smith     y[4*i]   = sum1;
544bcc973b7SBarry Smith     y[4*i+1] = sum2;
545bcc973b7SBarry Smith     y[4*i+2] = sum3;
546bcc973b7SBarry Smith     y[4*i+3] = sum4;
547bcc973b7SBarry Smith   }
548bcc973b7SBarry Smith 
549dc0b31edSSatish Balay   ierr = PetscLogFlops(8.0*a->nz - 4.0*nonzerorow);CHKERRQ(ierr);
5503649974fSBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
5511ebc52fbSHong Zhang   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
552bcc973b7SBarry Smith   PetscFunctionReturn(0);
553bcc973b7SBarry Smith }
554bcc973b7SBarry Smith 
5554a2ae208SSatish Balay #undef __FUNCT__
556b9617806SBarry Smith #define __FUNCT__ "MatMultTranspose_SeqMAIJ_4"
557dfbe8321SBarry Smith PetscErrorCode MatMultTranspose_SeqMAIJ_4(Mat A,Vec xx,Vec yy)
558bcc973b7SBarry Smith {
5594cb9d9b8SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
560bcc973b7SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
561d2840e09SBarry Smith   const PetscScalar *x,*v;
562d2840e09SBarry Smith   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4;
563dfbe8321SBarry Smith   PetscErrorCode    ierr;
564d2840e09SBarry Smith   PetscInt          n,i;
565d2840e09SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx;
566bcc973b7SBarry Smith 
567bcc973b7SBarry Smith   PetscFunctionBegin;
568d2840e09SBarry Smith   ierr = VecSet(yy,0.0);CHKERRQ(ierr);
5693649974fSBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
5701ebc52fbSHong Zhang   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
571bcc973b7SBarry Smith   for (i=0; i<m; i++) {
572bfec09a0SHong Zhang     idx    = a->j + a->i[i] ;
573bfec09a0SHong Zhang     v      = a->a + a->i[i] ;
574bcc973b7SBarry Smith     n      = a->i[i+1] - a->i[i];
575bcc973b7SBarry Smith     alpha1 = x[4*i];
576bcc973b7SBarry Smith     alpha2 = x[4*i+1];
577bcc973b7SBarry Smith     alpha3 = x[4*i+2];
578bcc973b7SBarry Smith     alpha4 = x[4*i+3];
579bcc973b7SBarry Smith     while (n-->0) {
580bcc973b7SBarry Smith       y[4*(*idx)]   += alpha1*(*v);
581bcc973b7SBarry Smith       y[4*(*idx)+1] += alpha2*(*v);
582bcc973b7SBarry Smith       y[4*(*idx)+2] += alpha3*(*v);
583bcc973b7SBarry Smith       y[4*(*idx)+3] += alpha4*(*v);
584bcc973b7SBarry Smith       idx++; v++;
585bcc973b7SBarry Smith     }
586bcc973b7SBarry Smith   }
587dc0b31edSSatish Balay   ierr = PetscLogFlops(8.0*a->nz);CHKERRQ(ierr);
5883649974fSBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
5891ebc52fbSHong Zhang   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
590bcc973b7SBarry Smith   PetscFunctionReturn(0);
591bcc973b7SBarry Smith }
592bcc973b7SBarry Smith 
5934a2ae208SSatish Balay #undef __FUNCT__
594b9617806SBarry Smith #define __FUNCT__ "MatMultAdd_SeqMAIJ_4"
595dfbe8321SBarry Smith PetscErrorCode MatMultAdd_SeqMAIJ_4(Mat A,Vec xx,Vec yy,Vec zz)
596bcc973b7SBarry Smith {
5974cb9d9b8SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
598f9fae5adSBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
599d2840e09SBarry Smith   const PetscScalar *x,*v;
600d2840e09SBarry Smith   PetscScalar       *y,sum1, sum2, sum3, sum4;
601dfbe8321SBarry Smith   PetscErrorCode    ierr;
602b24ad042SBarry Smith   PetscInt          n,i,jrow,j;
603d2840e09SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
604f9fae5adSBarry Smith 
605f9fae5adSBarry Smith   PetscFunctionBegin;
606f9fae5adSBarry Smith   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
6073649974fSBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
6081ebc52fbSHong Zhang   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
609f9fae5adSBarry Smith   idx  = a->j;
610f9fae5adSBarry Smith   v    = a->a;
611f9fae5adSBarry Smith   ii   = a->i;
612f9fae5adSBarry Smith 
613f9fae5adSBarry Smith   for (i=0; i<m; i++) {
614f9fae5adSBarry Smith     jrow = ii[i];
615f9fae5adSBarry Smith     n    = ii[i+1] - jrow;
616f9fae5adSBarry Smith     sum1  = 0.0;
617f9fae5adSBarry Smith     sum2  = 0.0;
618f9fae5adSBarry Smith     sum3  = 0.0;
619f9fae5adSBarry Smith     sum4  = 0.0;
620f9fae5adSBarry Smith     for (j=0; j<n; j++) {
621f9fae5adSBarry Smith       sum1 += v[jrow]*x[4*idx[jrow]];
622f9fae5adSBarry Smith       sum2 += v[jrow]*x[4*idx[jrow]+1];
623f9fae5adSBarry Smith       sum3 += v[jrow]*x[4*idx[jrow]+2];
624f9fae5adSBarry Smith       sum4 += v[jrow]*x[4*idx[jrow]+3];
625f9fae5adSBarry Smith       jrow++;
626f9fae5adSBarry Smith      }
627f9fae5adSBarry Smith     y[4*i]   += sum1;
628f9fae5adSBarry Smith     y[4*i+1] += sum2;
629f9fae5adSBarry Smith     y[4*i+2] += sum3;
630f9fae5adSBarry Smith     y[4*i+3] += sum4;
631f9fae5adSBarry Smith   }
632f9fae5adSBarry Smith 
633dc0b31edSSatish Balay   ierr = PetscLogFlops(8.0*a->nz);CHKERRQ(ierr);
6343649974fSBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
6351ebc52fbSHong Zhang   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
636f9fae5adSBarry Smith   PetscFunctionReturn(0);
637bcc973b7SBarry Smith }
6384a2ae208SSatish Balay #undef __FUNCT__
639b9617806SBarry Smith #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_4"
640dfbe8321SBarry Smith PetscErrorCode MatMultTransposeAdd_SeqMAIJ_4(Mat A,Vec xx,Vec yy,Vec zz)
641bcc973b7SBarry Smith {
6424cb9d9b8SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
643f9fae5adSBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
644d2840e09SBarry Smith   const PetscScalar *x,*v;
645d2840e09SBarry Smith   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4;
646dfbe8321SBarry Smith   PetscErrorCode    ierr;
647d2840e09SBarry Smith   PetscInt          n,i;
648d2840e09SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx;
649f9fae5adSBarry Smith 
650f9fae5adSBarry Smith   PetscFunctionBegin;
651f9fae5adSBarry Smith   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
6523649974fSBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
6531ebc52fbSHong Zhang   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
654bfec09a0SHong Zhang 
655f9fae5adSBarry Smith   for (i=0; i<m; i++) {
656bfec09a0SHong Zhang     idx    = a->j + a->i[i] ;
657bfec09a0SHong Zhang     v      = a->a + a->i[i] ;
658f9fae5adSBarry Smith     n      = a->i[i+1] - a->i[i];
659f9fae5adSBarry Smith     alpha1 = x[4*i];
660f9fae5adSBarry Smith     alpha2 = x[4*i+1];
661f9fae5adSBarry Smith     alpha3 = x[4*i+2];
662f9fae5adSBarry Smith     alpha4 = x[4*i+3];
663f9fae5adSBarry Smith     while (n-->0) {
664f9fae5adSBarry Smith       y[4*(*idx)]   += alpha1*(*v);
665f9fae5adSBarry Smith       y[4*(*idx)+1] += alpha2*(*v);
666f9fae5adSBarry Smith       y[4*(*idx)+2] += alpha3*(*v);
667f9fae5adSBarry Smith       y[4*(*idx)+3] += alpha4*(*v);
668f9fae5adSBarry Smith       idx++; v++;
669f9fae5adSBarry Smith     }
670f9fae5adSBarry Smith   }
671dc0b31edSSatish Balay   ierr = PetscLogFlops(8.0*a->nz);CHKERRQ(ierr);
6723649974fSBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
6731ebc52fbSHong Zhang   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
674f9fae5adSBarry Smith   PetscFunctionReturn(0);
675f9fae5adSBarry Smith }
676f9fae5adSBarry Smith /* ------------------------------------------------------------------------------*/
6776cd98798SBarry Smith 
6784a2ae208SSatish Balay #undef __FUNCT__
679b9617806SBarry Smith #define __FUNCT__ "MatMult_SeqMAIJ_5"
680dfbe8321SBarry Smith PetscErrorCode MatMult_SeqMAIJ_5(Mat A,Vec xx,Vec yy)
681f9fae5adSBarry Smith {
6824cb9d9b8SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
683f9fae5adSBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
684d2840e09SBarry Smith   const PetscScalar *x,*v;
685d2840e09SBarry Smith   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5;
686dfbe8321SBarry Smith   PetscErrorCode    ierr;
687d2840e09SBarry Smith   PetscInt          nonzerorow=0,n,i,jrow,j;
688d2840e09SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
689f9fae5adSBarry Smith 
690f9fae5adSBarry Smith   PetscFunctionBegin;
6913649974fSBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
6921ebc52fbSHong Zhang   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
693f9fae5adSBarry Smith   idx  = a->j;
694f9fae5adSBarry Smith   v    = a->a;
695f9fae5adSBarry Smith   ii   = a->i;
696f9fae5adSBarry Smith 
697f9fae5adSBarry Smith   for (i=0; i<m; i++) {
698f9fae5adSBarry Smith     jrow = ii[i];
699f9fae5adSBarry Smith     n    = ii[i+1] - jrow;
700f9fae5adSBarry Smith     sum1  = 0.0;
701f9fae5adSBarry Smith     sum2  = 0.0;
702f9fae5adSBarry Smith     sum3  = 0.0;
703f9fae5adSBarry Smith     sum4  = 0.0;
704f9fae5adSBarry Smith     sum5  = 0.0;
705b3c51c66SMatthew Knepley     nonzerorow += (n>0);
706f9fae5adSBarry Smith     for (j=0; j<n; j++) {
707f9fae5adSBarry Smith       sum1 += v[jrow]*x[5*idx[jrow]];
708f9fae5adSBarry Smith       sum2 += v[jrow]*x[5*idx[jrow]+1];
709f9fae5adSBarry Smith       sum3 += v[jrow]*x[5*idx[jrow]+2];
710f9fae5adSBarry Smith       sum4 += v[jrow]*x[5*idx[jrow]+3];
711f9fae5adSBarry Smith       sum5 += v[jrow]*x[5*idx[jrow]+4];
712f9fae5adSBarry Smith       jrow++;
713f9fae5adSBarry Smith      }
714f9fae5adSBarry Smith     y[5*i]   = sum1;
715f9fae5adSBarry Smith     y[5*i+1] = sum2;
716f9fae5adSBarry Smith     y[5*i+2] = sum3;
717f9fae5adSBarry Smith     y[5*i+3] = sum4;
718f9fae5adSBarry Smith     y[5*i+4] = sum5;
719f9fae5adSBarry Smith   }
720f9fae5adSBarry Smith 
721dc0b31edSSatish Balay   ierr = PetscLogFlops(10.0*a->nz - 5.0*nonzerorow);CHKERRQ(ierr);
7223649974fSBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
7231ebc52fbSHong Zhang   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
724f9fae5adSBarry Smith   PetscFunctionReturn(0);
725f9fae5adSBarry Smith }
726f9fae5adSBarry Smith 
7274a2ae208SSatish Balay #undef __FUNCT__
728b9617806SBarry Smith #define __FUNCT__ "MatMultTranspose_SeqMAIJ_5"
729dfbe8321SBarry Smith PetscErrorCode MatMultTranspose_SeqMAIJ_5(Mat A,Vec xx,Vec yy)
730f9fae5adSBarry Smith {
7314cb9d9b8SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
732f9fae5adSBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
733d2840e09SBarry Smith   const PetscScalar *x,*v;
734d2840e09SBarry Smith   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5;
735dfbe8321SBarry Smith   PetscErrorCode    ierr;
736d2840e09SBarry Smith   PetscInt          n,i;
737d2840e09SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx;
738f9fae5adSBarry Smith 
739f9fae5adSBarry Smith   PetscFunctionBegin;
740d2840e09SBarry Smith   ierr = VecSet(yy,0.0);CHKERRQ(ierr);
7413649974fSBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
7421ebc52fbSHong Zhang   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
743bfec09a0SHong Zhang 
744f9fae5adSBarry Smith   for (i=0; i<m; i++) {
745bfec09a0SHong Zhang     idx    = a->j + a->i[i] ;
746bfec09a0SHong Zhang     v      = a->a + a->i[i] ;
747f9fae5adSBarry Smith     n      = a->i[i+1] - a->i[i];
748f9fae5adSBarry Smith     alpha1 = x[5*i];
749f9fae5adSBarry Smith     alpha2 = x[5*i+1];
750f9fae5adSBarry Smith     alpha3 = x[5*i+2];
751f9fae5adSBarry Smith     alpha4 = x[5*i+3];
752f9fae5adSBarry Smith     alpha5 = x[5*i+4];
753f9fae5adSBarry Smith     while (n-->0) {
754f9fae5adSBarry Smith       y[5*(*idx)]   += alpha1*(*v);
755f9fae5adSBarry Smith       y[5*(*idx)+1] += alpha2*(*v);
756f9fae5adSBarry Smith       y[5*(*idx)+2] += alpha3*(*v);
757f9fae5adSBarry Smith       y[5*(*idx)+3] += alpha4*(*v);
758f9fae5adSBarry Smith       y[5*(*idx)+4] += alpha5*(*v);
759f9fae5adSBarry Smith       idx++; v++;
760f9fae5adSBarry Smith     }
761f9fae5adSBarry Smith   }
762dc0b31edSSatish Balay   ierr = PetscLogFlops(10.0*a->nz);CHKERRQ(ierr);
7633649974fSBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
7641ebc52fbSHong Zhang   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
765f9fae5adSBarry Smith   PetscFunctionReturn(0);
766f9fae5adSBarry Smith }
767f9fae5adSBarry Smith 
7684a2ae208SSatish Balay #undef __FUNCT__
769b9617806SBarry Smith #define __FUNCT__ "MatMultAdd_SeqMAIJ_5"
770dfbe8321SBarry Smith PetscErrorCode MatMultAdd_SeqMAIJ_5(Mat A,Vec xx,Vec yy,Vec zz)
771f9fae5adSBarry Smith {
7724cb9d9b8SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
773f9fae5adSBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
774d2840e09SBarry Smith   const PetscScalar *x,*v;
775d2840e09SBarry Smith   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5;
776dfbe8321SBarry Smith   PetscErrorCode    ierr;
777b24ad042SBarry Smith   PetscInt          n,i,jrow,j;
778d2840e09SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
779f9fae5adSBarry Smith 
780f9fae5adSBarry Smith   PetscFunctionBegin;
781f9fae5adSBarry Smith   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
7823649974fSBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
7831ebc52fbSHong Zhang   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
784f9fae5adSBarry Smith   idx  = a->j;
785f9fae5adSBarry Smith   v    = a->a;
786f9fae5adSBarry Smith   ii   = a->i;
787f9fae5adSBarry Smith 
788f9fae5adSBarry Smith   for (i=0; i<m; i++) {
789f9fae5adSBarry Smith     jrow = ii[i];
790f9fae5adSBarry Smith     n    = ii[i+1] - jrow;
791f9fae5adSBarry Smith     sum1  = 0.0;
792f9fae5adSBarry Smith     sum2  = 0.0;
793f9fae5adSBarry Smith     sum3  = 0.0;
794f9fae5adSBarry Smith     sum4  = 0.0;
795f9fae5adSBarry Smith     sum5  = 0.0;
796f9fae5adSBarry Smith     for (j=0; j<n; j++) {
797f9fae5adSBarry Smith       sum1 += v[jrow]*x[5*idx[jrow]];
798f9fae5adSBarry Smith       sum2 += v[jrow]*x[5*idx[jrow]+1];
799f9fae5adSBarry Smith       sum3 += v[jrow]*x[5*idx[jrow]+2];
800f9fae5adSBarry Smith       sum4 += v[jrow]*x[5*idx[jrow]+3];
801f9fae5adSBarry Smith       sum5 += v[jrow]*x[5*idx[jrow]+4];
802f9fae5adSBarry Smith       jrow++;
803f9fae5adSBarry Smith      }
804f9fae5adSBarry Smith     y[5*i]   += sum1;
805f9fae5adSBarry Smith     y[5*i+1] += sum2;
806f9fae5adSBarry Smith     y[5*i+2] += sum3;
807f9fae5adSBarry Smith     y[5*i+3] += sum4;
808f9fae5adSBarry Smith     y[5*i+4] += sum5;
809f9fae5adSBarry Smith   }
810f9fae5adSBarry Smith 
811dc0b31edSSatish Balay   ierr = PetscLogFlops(10.0*a->nz);CHKERRQ(ierr);
8123649974fSBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
8131ebc52fbSHong Zhang   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
814f9fae5adSBarry Smith   PetscFunctionReturn(0);
815f9fae5adSBarry Smith }
816f9fae5adSBarry Smith 
8174a2ae208SSatish Balay #undef __FUNCT__
818b9617806SBarry Smith #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_5"
819dfbe8321SBarry Smith PetscErrorCode MatMultTransposeAdd_SeqMAIJ_5(Mat A,Vec xx,Vec yy,Vec zz)
820f9fae5adSBarry Smith {
8214cb9d9b8SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
822f9fae5adSBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
823d2840e09SBarry Smith   const PetscScalar *x,*v;
824d2840e09SBarry Smith   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5;
825dfbe8321SBarry Smith   PetscErrorCode    ierr;
826d2840e09SBarry Smith   PetscInt          n,i;
827d2840e09SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx;
828f9fae5adSBarry Smith 
829f9fae5adSBarry Smith   PetscFunctionBegin;
830f9fae5adSBarry Smith   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
8313649974fSBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
8321ebc52fbSHong Zhang   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
833bfec09a0SHong Zhang 
834f9fae5adSBarry Smith   for (i=0; i<m; i++) {
835bfec09a0SHong Zhang     idx    = a->j + a->i[i] ;
836bfec09a0SHong Zhang     v      = a->a + a->i[i] ;
837f9fae5adSBarry Smith     n      = a->i[i+1] - a->i[i];
838f9fae5adSBarry Smith     alpha1 = x[5*i];
839f9fae5adSBarry Smith     alpha2 = x[5*i+1];
840f9fae5adSBarry Smith     alpha3 = x[5*i+2];
841f9fae5adSBarry Smith     alpha4 = x[5*i+3];
842f9fae5adSBarry Smith     alpha5 = x[5*i+4];
843f9fae5adSBarry Smith     while (n-->0) {
844f9fae5adSBarry Smith       y[5*(*idx)]   += alpha1*(*v);
845f9fae5adSBarry Smith       y[5*(*idx)+1] += alpha2*(*v);
846f9fae5adSBarry Smith       y[5*(*idx)+2] += alpha3*(*v);
847f9fae5adSBarry Smith       y[5*(*idx)+3] += alpha4*(*v);
848f9fae5adSBarry Smith       y[5*(*idx)+4] += alpha5*(*v);
849f9fae5adSBarry Smith       idx++; v++;
850f9fae5adSBarry Smith     }
851f9fae5adSBarry Smith   }
852dc0b31edSSatish Balay   ierr = PetscLogFlops(10.0*a->nz);CHKERRQ(ierr);
8533649974fSBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
8541ebc52fbSHong Zhang   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
855f9fae5adSBarry Smith   PetscFunctionReturn(0);
856bcc973b7SBarry Smith }
857bcc973b7SBarry Smith 
8586cd98798SBarry Smith /* ------------------------------------------------------------------------------*/
8594a2ae208SSatish Balay #undef __FUNCT__
860b9617806SBarry Smith #define __FUNCT__ "MatMult_SeqMAIJ_6"
861dfbe8321SBarry Smith PetscErrorCode MatMult_SeqMAIJ_6(Mat A,Vec xx,Vec yy)
8626cd98798SBarry Smith {
8636cd98798SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
8646cd98798SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
865d2840e09SBarry Smith   const PetscScalar *x,*v;
866d2840e09SBarry Smith   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6;
867dfbe8321SBarry Smith   PetscErrorCode    ierr;
868d2840e09SBarry Smith   PetscInt          nonzerorow=0,n,i,jrow,j;
869d2840e09SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
8706cd98798SBarry Smith 
8716cd98798SBarry Smith   PetscFunctionBegin;
8723649974fSBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
8731ebc52fbSHong Zhang   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
8746cd98798SBarry Smith   idx  = a->j;
8756cd98798SBarry Smith   v    = a->a;
8766cd98798SBarry Smith   ii   = a->i;
8776cd98798SBarry Smith 
8786cd98798SBarry Smith   for (i=0; i<m; i++) {
8796cd98798SBarry Smith     jrow = ii[i];
8806cd98798SBarry Smith     n    = ii[i+1] - jrow;
8816cd98798SBarry Smith     sum1  = 0.0;
8826cd98798SBarry Smith     sum2  = 0.0;
8836cd98798SBarry Smith     sum3  = 0.0;
8846cd98798SBarry Smith     sum4  = 0.0;
8856cd98798SBarry Smith     sum5  = 0.0;
8866cd98798SBarry Smith     sum6  = 0.0;
887b3c51c66SMatthew Knepley     nonzerorow += (n>0);
8886cd98798SBarry Smith     for (j=0; j<n; j++) {
8896cd98798SBarry Smith       sum1 += v[jrow]*x[6*idx[jrow]];
8906cd98798SBarry Smith       sum2 += v[jrow]*x[6*idx[jrow]+1];
8916cd98798SBarry Smith       sum3 += v[jrow]*x[6*idx[jrow]+2];
8926cd98798SBarry Smith       sum4 += v[jrow]*x[6*idx[jrow]+3];
8936cd98798SBarry Smith       sum5 += v[jrow]*x[6*idx[jrow]+4];
8946cd98798SBarry Smith       sum6 += v[jrow]*x[6*idx[jrow]+5];
8956cd98798SBarry Smith       jrow++;
8966cd98798SBarry Smith      }
8976cd98798SBarry Smith     y[6*i]   = sum1;
8986cd98798SBarry Smith     y[6*i+1] = sum2;
8996cd98798SBarry Smith     y[6*i+2] = sum3;
9006cd98798SBarry Smith     y[6*i+3] = sum4;
9016cd98798SBarry Smith     y[6*i+4] = sum5;
9026cd98798SBarry Smith     y[6*i+5] = sum6;
9036cd98798SBarry Smith   }
9046cd98798SBarry Smith 
905dc0b31edSSatish Balay   ierr = PetscLogFlops(12.0*a->nz - 6.0*nonzerorow);CHKERRQ(ierr);
9063649974fSBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
9071ebc52fbSHong Zhang   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
9086cd98798SBarry Smith   PetscFunctionReturn(0);
9096cd98798SBarry Smith }
9106cd98798SBarry Smith 
9114a2ae208SSatish Balay #undef __FUNCT__
912b9617806SBarry Smith #define __FUNCT__ "MatMultTranspose_SeqMAIJ_6"
913dfbe8321SBarry Smith PetscErrorCode MatMultTranspose_SeqMAIJ_6(Mat A,Vec xx,Vec yy)
9146cd98798SBarry Smith {
9156cd98798SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
9166cd98798SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
917d2840e09SBarry Smith   const PetscScalar *x,*v;
918d2840e09SBarry Smith   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6;
919dfbe8321SBarry Smith   PetscErrorCode    ierr;
920d2840e09SBarry Smith   PetscInt          n,i;
921d2840e09SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx;
9226cd98798SBarry Smith 
9236cd98798SBarry Smith   PetscFunctionBegin;
924d2840e09SBarry Smith   ierr = VecSet(yy,0.0);CHKERRQ(ierr);
9253649974fSBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
9261ebc52fbSHong Zhang   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
927bfec09a0SHong Zhang 
9286cd98798SBarry Smith   for (i=0; i<m; i++) {
929bfec09a0SHong Zhang     idx    = a->j + a->i[i] ;
930bfec09a0SHong Zhang     v      = a->a + a->i[i] ;
9316cd98798SBarry Smith     n      = a->i[i+1] - a->i[i];
9326cd98798SBarry Smith     alpha1 = x[6*i];
9336cd98798SBarry Smith     alpha2 = x[6*i+1];
9346cd98798SBarry Smith     alpha3 = x[6*i+2];
9356cd98798SBarry Smith     alpha4 = x[6*i+3];
9366cd98798SBarry Smith     alpha5 = x[6*i+4];
9376cd98798SBarry Smith     alpha6 = x[6*i+5];
9386cd98798SBarry Smith     while (n-->0) {
9396cd98798SBarry Smith       y[6*(*idx)]   += alpha1*(*v);
9406cd98798SBarry Smith       y[6*(*idx)+1] += alpha2*(*v);
9416cd98798SBarry Smith       y[6*(*idx)+2] += alpha3*(*v);
9426cd98798SBarry Smith       y[6*(*idx)+3] += alpha4*(*v);
9436cd98798SBarry Smith       y[6*(*idx)+4] += alpha5*(*v);
9446cd98798SBarry Smith       y[6*(*idx)+5] += alpha6*(*v);
9456cd98798SBarry Smith       idx++; v++;
9466cd98798SBarry Smith     }
9476cd98798SBarry Smith   }
948dc0b31edSSatish Balay   ierr = PetscLogFlops(12.0*a->nz);CHKERRQ(ierr);
9493649974fSBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
9501ebc52fbSHong Zhang   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
9516cd98798SBarry Smith   PetscFunctionReturn(0);
9526cd98798SBarry Smith }
9536cd98798SBarry Smith 
9544a2ae208SSatish Balay #undef __FUNCT__
955b9617806SBarry Smith #define __FUNCT__ "MatMultAdd_SeqMAIJ_6"
956dfbe8321SBarry Smith PetscErrorCode MatMultAdd_SeqMAIJ_6(Mat A,Vec xx,Vec yy,Vec zz)
9576cd98798SBarry Smith {
9586cd98798SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
9596cd98798SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
960d2840e09SBarry Smith   const PetscScalar *x,*v;
961d2840e09SBarry Smith   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6;
962dfbe8321SBarry Smith   PetscErrorCode    ierr;
963b24ad042SBarry Smith   PetscInt          n,i,jrow,j;
964d2840e09SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
9656cd98798SBarry Smith 
9666cd98798SBarry Smith   PetscFunctionBegin;
9676cd98798SBarry Smith   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
9683649974fSBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
9691ebc52fbSHong Zhang   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
9706cd98798SBarry Smith   idx  = a->j;
9716cd98798SBarry Smith   v    = a->a;
9726cd98798SBarry Smith   ii   = a->i;
9736cd98798SBarry Smith 
9746cd98798SBarry Smith   for (i=0; i<m; i++) {
9756cd98798SBarry Smith     jrow = ii[i];
9766cd98798SBarry Smith     n    = ii[i+1] - jrow;
9776cd98798SBarry Smith     sum1  = 0.0;
9786cd98798SBarry Smith     sum2  = 0.0;
9796cd98798SBarry Smith     sum3  = 0.0;
9806cd98798SBarry Smith     sum4  = 0.0;
9816cd98798SBarry Smith     sum5  = 0.0;
9826cd98798SBarry Smith     sum6  = 0.0;
9836cd98798SBarry Smith     for (j=0; j<n; j++) {
9846cd98798SBarry Smith       sum1 += v[jrow]*x[6*idx[jrow]];
9856cd98798SBarry Smith       sum2 += v[jrow]*x[6*idx[jrow]+1];
9866cd98798SBarry Smith       sum3 += v[jrow]*x[6*idx[jrow]+2];
9876cd98798SBarry Smith       sum4 += v[jrow]*x[6*idx[jrow]+3];
9886cd98798SBarry Smith       sum5 += v[jrow]*x[6*idx[jrow]+4];
9896cd98798SBarry Smith       sum6 += v[jrow]*x[6*idx[jrow]+5];
9906cd98798SBarry Smith       jrow++;
9916cd98798SBarry Smith      }
9926cd98798SBarry Smith     y[6*i]   += sum1;
9936cd98798SBarry Smith     y[6*i+1] += sum2;
9946cd98798SBarry Smith     y[6*i+2] += sum3;
9956cd98798SBarry Smith     y[6*i+3] += sum4;
9966cd98798SBarry Smith     y[6*i+4] += sum5;
9976cd98798SBarry Smith     y[6*i+5] += sum6;
9986cd98798SBarry Smith   }
9996cd98798SBarry Smith 
1000dc0b31edSSatish Balay   ierr = PetscLogFlops(12.0*a->nz);CHKERRQ(ierr);
10013649974fSBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
10021ebc52fbSHong Zhang   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
10036cd98798SBarry Smith   PetscFunctionReturn(0);
10046cd98798SBarry Smith }
10056cd98798SBarry Smith 
10064a2ae208SSatish Balay #undef __FUNCT__
1007b9617806SBarry Smith #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_6"
1008dfbe8321SBarry Smith PetscErrorCode MatMultTransposeAdd_SeqMAIJ_6(Mat A,Vec xx,Vec yy,Vec zz)
10096cd98798SBarry Smith {
10106cd98798SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
10116cd98798SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1012d2840e09SBarry Smith   const PetscScalar *x,*v;
1013d2840e09SBarry Smith   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6;
1014dfbe8321SBarry Smith   PetscErrorCode    ierr;
1015d2840e09SBarry Smith   PetscInt          n,i;
1016d2840e09SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx;
10176cd98798SBarry Smith 
10186cd98798SBarry Smith   PetscFunctionBegin;
10196cd98798SBarry Smith   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
10203649974fSBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
10211ebc52fbSHong Zhang   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
1022bfec09a0SHong Zhang 
10236cd98798SBarry Smith   for (i=0; i<m; i++) {
1024bfec09a0SHong Zhang     idx    = a->j + a->i[i] ;
1025bfec09a0SHong Zhang     v      = a->a + a->i[i] ;
10266cd98798SBarry Smith     n      = a->i[i+1] - a->i[i];
10276cd98798SBarry Smith     alpha1 = x[6*i];
10286cd98798SBarry Smith     alpha2 = x[6*i+1];
10296cd98798SBarry Smith     alpha3 = x[6*i+2];
10306cd98798SBarry Smith     alpha4 = x[6*i+3];
10316cd98798SBarry Smith     alpha5 = x[6*i+4];
10326cd98798SBarry Smith     alpha6 = x[6*i+5];
10336cd98798SBarry Smith     while (n-->0) {
10346cd98798SBarry Smith       y[6*(*idx)]   += alpha1*(*v);
10356cd98798SBarry Smith       y[6*(*idx)+1] += alpha2*(*v);
10366cd98798SBarry Smith       y[6*(*idx)+2] += alpha3*(*v);
10376cd98798SBarry Smith       y[6*(*idx)+3] += alpha4*(*v);
10386cd98798SBarry Smith       y[6*(*idx)+4] += alpha5*(*v);
10396cd98798SBarry Smith       y[6*(*idx)+5] += alpha6*(*v);
10406cd98798SBarry Smith       idx++; v++;
10416cd98798SBarry Smith     }
10426cd98798SBarry Smith   }
1043dc0b31edSSatish Balay   ierr = PetscLogFlops(12.0*a->nz);CHKERRQ(ierr);
10443649974fSBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
10451ebc52fbSHong Zhang   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
10466cd98798SBarry Smith   PetscFunctionReturn(0);
10476cd98798SBarry Smith }
10486cd98798SBarry Smith 
104966ed3db0SBarry Smith /* ------------------------------------------------------------------------------*/
105066ed3db0SBarry Smith #undef __FUNCT__
1051ed8eea03SBarry Smith #define __FUNCT__ "MatMult_SeqMAIJ_7"
1052ed8eea03SBarry Smith PetscErrorCode MatMult_SeqMAIJ_7(Mat A,Vec xx,Vec yy)
1053ed8eea03SBarry Smith {
1054ed8eea03SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1055ed8eea03SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1056d2840e09SBarry Smith   const PetscScalar *x,*v;
1057d2840e09SBarry Smith   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7;
1058ed8eea03SBarry Smith   PetscErrorCode    ierr;
1059d2840e09SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
1060d2840e09SBarry Smith   PetscInt          nonzerorow=0,n,i,jrow,j;
1061ed8eea03SBarry Smith 
1062ed8eea03SBarry Smith   PetscFunctionBegin;
10633649974fSBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
1064ed8eea03SBarry Smith   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
1065ed8eea03SBarry Smith   idx  = a->j;
1066ed8eea03SBarry Smith   v    = a->a;
1067ed8eea03SBarry Smith   ii   = a->i;
1068ed8eea03SBarry Smith 
1069ed8eea03SBarry Smith   for (i=0; i<m; i++) {
1070ed8eea03SBarry Smith     jrow = ii[i];
1071ed8eea03SBarry Smith     n    = ii[i+1] - jrow;
1072ed8eea03SBarry Smith     sum1  = 0.0;
1073ed8eea03SBarry Smith     sum2  = 0.0;
1074ed8eea03SBarry Smith     sum3  = 0.0;
1075ed8eea03SBarry Smith     sum4  = 0.0;
1076ed8eea03SBarry Smith     sum5  = 0.0;
1077ed8eea03SBarry Smith     sum6  = 0.0;
1078ed8eea03SBarry Smith     sum7  = 0.0;
1079b3c51c66SMatthew Knepley     nonzerorow += (n>0);
1080ed8eea03SBarry Smith     for (j=0; j<n; j++) {
1081ed8eea03SBarry Smith       sum1 += v[jrow]*x[7*idx[jrow]];
1082ed8eea03SBarry Smith       sum2 += v[jrow]*x[7*idx[jrow]+1];
1083ed8eea03SBarry Smith       sum3 += v[jrow]*x[7*idx[jrow]+2];
1084ed8eea03SBarry Smith       sum4 += v[jrow]*x[7*idx[jrow]+3];
1085ed8eea03SBarry Smith       sum5 += v[jrow]*x[7*idx[jrow]+4];
1086ed8eea03SBarry Smith       sum6 += v[jrow]*x[7*idx[jrow]+5];
1087ed8eea03SBarry Smith       sum7 += v[jrow]*x[7*idx[jrow]+6];
1088ed8eea03SBarry Smith       jrow++;
1089ed8eea03SBarry Smith      }
1090ed8eea03SBarry Smith     y[7*i]   = sum1;
1091ed8eea03SBarry Smith     y[7*i+1] = sum2;
1092ed8eea03SBarry Smith     y[7*i+2] = sum3;
1093ed8eea03SBarry Smith     y[7*i+3] = sum4;
1094ed8eea03SBarry Smith     y[7*i+4] = sum5;
1095ed8eea03SBarry Smith     y[7*i+5] = sum6;
1096ed8eea03SBarry Smith     y[7*i+6] = sum7;
1097ed8eea03SBarry Smith   }
1098ed8eea03SBarry Smith 
1099dc0b31edSSatish Balay   ierr = PetscLogFlops(14.0*a->nz - 7.0*nonzerorow);CHKERRQ(ierr);
11003649974fSBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
1101ed8eea03SBarry Smith   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
1102ed8eea03SBarry Smith   PetscFunctionReturn(0);
1103ed8eea03SBarry Smith }
1104ed8eea03SBarry Smith 
1105ed8eea03SBarry Smith #undef __FUNCT__
1106ed8eea03SBarry Smith #define __FUNCT__ "MatMultTranspose_SeqMAIJ_7"
1107ed8eea03SBarry Smith PetscErrorCode MatMultTranspose_SeqMAIJ_7(Mat A,Vec xx,Vec yy)
1108ed8eea03SBarry Smith {
1109ed8eea03SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1110ed8eea03SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1111d2840e09SBarry Smith   const PetscScalar *x,*v;
1112d2840e09SBarry Smith   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7;
1113ed8eea03SBarry Smith   PetscErrorCode    ierr;
1114d2840e09SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx;
1115d2840e09SBarry Smith   PetscInt          n,i;
1116ed8eea03SBarry Smith 
1117ed8eea03SBarry Smith   PetscFunctionBegin;
1118d2840e09SBarry Smith   ierr = VecSet(yy,0.0);CHKERRQ(ierr);
11193649974fSBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
1120ed8eea03SBarry Smith   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
1121ed8eea03SBarry Smith 
1122ed8eea03SBarry Smith   for (i=0; i<m; i++) {
1123ed8eea03SBarry Smith     idx    = a->j + a->i[i] ;
1124ed8eea03SBarry Smith     v      = a->a + a->i[i] ;
1125ed8eea03SBarry Smith     n      = a->i[i+1] - a->i[i];
1126ed8eea03SBarry Smith     alpha1 = x[7*i];
1127ed8eea03SBarry Smith     alpha2 = x[7*i+1];
1128ed8eea03SBarry Smith     alpha3 = x[7*i+2];
1129ed8eea03SBarry Smith     alpha4 = x[7*i+3];
1130ed8eea03SBarry Smith     alpha5 = x[7*i+4];
1131ed8eea03SBarry Smith     alpha6 = x[7*i+5];
1132ed8eea03SBarry Smith     alpha7 = x[7*i+6];
1133ed8eea03SBarry Smith     while (n-->0) {
1134ed8eea03SBarry Smith       y[7*(*idx)]   += alpha1*(*v);
1135ed8eea03SBarry Smith       y[7*(*idx)+1] += alpha2*(*v);
1136ed8eea03SBarry Smith       y[7*(*idx)+2] += alpha3*(*v);
1137ed8eea03SBarry Smith       y[7*(*idx)+3] += alpha4*(*v);
1138ed8eea03SBarry Smith       y[7*(*idx)+4] += alpha5*(*v);
1139ed8eea03SBarry Smith       y[7*(*idx)+5] += alpha6*(*v);
1140ed8eea03SBarry Smith       y[7*(*idx)+6] += alpha7*(*v);
1141ed8eea03SBarry Smith       idx++; v++;
1142ed8eea03SBarry Smith     }
1143ed8eea03SBarry Smith   }
1144dc0b31edSSatish Balay   ierr = PetscLogFlops(14.0*a->nz);CHKERRQ(ierr);
11453649974fSBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
1146ed8eea03SBarry Smith   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
1147ed8eea03SBarry Smith   PetscFunctionReturn(0);
1148ed8eea03SBarry Smith }
1149ed8eea03SBarry Smith 
1150ed8eea03SBarry Smith #undef __FUNCT__
1151ed8eea03SBarry Smith #define __FUNCT__ "MatMultAdd_SeqMAIJ_7"
1152ed8eea03SBarry Smith PetscErrorCode MatMultAdd_SeqMAIJ_7(Mat A,Vec xx,Vec yy,Vec zz)
1153ed8eea03SBarry Smith {
1154ed8eea03SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1155ed8eea03SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1156d2840e09SBarry Smith   const PetscScalar *x,*v;
1157d2840e09SBarry Smith   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7;
1158ed8eea03SBarry Smith   PetscErrorCode    ierr;
1159d2840e09SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
1160ed8eea03SBarry Smith   PetscInt          n,i,jrow,j;
1161ed8eea03SBarry Smith 
1162ed8eea03SBarry Smith   PetscFunctionBegin;
1163ed8eea03SBarry Smith   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
11643649974fSBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
1165ed8eea03SBarry Smith   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
1166ed8eea03SBarry Smith   idx  = a->j;
1167ed8eea03SBarry Smith   v    = a->a;
1168ed8eea03SBarry Smith   ii   = a->i;
1169ed8eea03SBarry Smith 
1170ed8eea03SBarry Smith   for (i=0; i<m; i++) {
1171ed8eea03SBarry Smith     jrow = ii[i];
1172ed8eea03SBarry Smith     n    = ii[i+1] - jrow;
1173ed8eea03SBarry Smith     sum1  = 0.0;
1174ed8eea03SBarry Smith     sum2  = 0.0;
1175ed8eea03SBarry Smith     sum3  = 0.0;
1176ed8eea03SBarry Smith     sum4  = 0.0;
1177ed8eea03SBarry Smith     sum5  = 0.0;
1178ed8eea03SBarry Smith     sum6  = 0.0;
1179ed8eea03SBarry Smith     sum7  = 0.0;
1180ed8eea03SBarry Smith     for (j=0; j<n; j++) {
1181ed8eea03SBarry Smith       sum1 += v[jrow]*x[7*idx[jrow]];
1182ed8eea03SBarry Smith       sum2 += v[jrow]*x[7*idx[jrow]+1];
1183ed8eea03SBarry Smith       sum3 += v[jrow]*x[7*idx[jrow]+2];
1184ed8eea03SBarry Smith       sum4 += v[jrow]*x[7*idx[jrow]+3];
1185ed8eea03SBarry Smith       sum5 += v[jrow]*x[7*idx[jrow]+4];
1186ed8eea03SBarry Smith       sum6 += v[jrow]*x[7*idx[jrow]+5];
1187ed8eea03SBarry Smith       sum7 += v[jrow]*x[7*idx[jrow]+6];
1188ed8eea03SBarry Smith       jrow++;
1189ed8eea03SBarry Smith      }
1190ed8eea03SBarry Smith     y[7*i]   += sum1;
1191ed8eea03SBarry Smith     y[7*i+1] += sum2;
1192ed8eea03SBarry Smith     y[7*i+2] += sum3;
1193ed8eea03SBarry Smith     y[7*i+3] += sum4;
1194ed8eea03SBarry Smith     y[7*i+4] += sum5;
1195ed8eea03SBarry Smith     y[7*i+5] += sum6;
1196ed8eea03SBarry Smith     y[7*i+6] += sum7;
1197ed8eea03SBarry Smith   }
1198ed8eea03SBarry Smith 
1199dc0b31edSSatish Balay   ierr = PetscLogFlops(14.0*a->nz);CHKERRQ(ierr);
12003649974fSBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
1201ed8eea03SBarry Smith   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
1202ed8eea03SBarry Smith   PetscFunctionReturn(0);
1203ed8eea03SBarry Smith }
1204ed8eea03SBarry Smith 
1205ed8eea03SBarry Smith #undef __FUNCT__
1206ed8eea03SBarry Smith #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_7"
1207ed8eea03SBarry Smith PetscErrorCode MatMultTransposeAdd_SeqMAIJ_7(Mat A,Vec xx,Vec yy,Vec zz)
1208ed8eea03SBarry Smith {
1209ed8eea03SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1210ed8eea03SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1211d2840e09SBarry Smith   const PetscScalar *x,*v;
1212d2840e09SBarry Smith   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7;
1213ed8eea03SBarry Smith   PetscErrorCode    ierr;
1214d2840e09SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx;
1215d2840e09SBarry Smith   PetscInt          n,i;
1216ed8eea03SBarry Smith 
1217ed8eea03SBarry Smith   PetscFunctionBegin;
1218ed8eea03SBarry Smith   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
12193649974fSBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
1220ed8eea03SBarry Smith   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
1221ed8eea03SBarry Smith   for (i=0; i<m; i++) {
1222ed8eea03SBarry Smith     idx    = a->j + a->i[i] ;
1223ed8eea03SBarry Smith     v      = a->a + a->i[i] ;
1224ed8eea03SBarry Smith     n      = a->i[i+1] - a->i[i];
1225ed8eea03SBarry Smith     alpha1 = x[7*i];
1226ed8eea03SBarry Smith     alpha2 = x[7*i+1];
1227ed8eea03SBarry Smith     alpha3 = x[7*i+2];
1228ed8eea03SBarry Smith     alpha4 = x[7*i+3];
1229ed8eea03SBarry Smith     alpha5 = x[7*i+4];
1230ed8eea03SBarry Smith     alpha6 = x[7*i+5];
1231ed8eea03SBarry Smith     alpha7 = x[7*i+6];
1232ed8eea03SBarry Smith     while (n-->0) {
1233ed8eea03SBarry Smith       y[7*(*idx)]   += alpha1*(*v);
1234ed8eea03SBarry Smith       y[7*(*idx)+1] += alpha2*(*v);
1235ed8eea03SBarry Smith       y[7*(*idx)+2] += alpha3*(*v);
1236ed8eea03SBarry Smith       y[7*(*idx)+3] += alpha4*(*v);
1237ed8eea03SBarry Smith       y[7*(*idx)+4] += alpha5*(*v);
1238ed8eea03SBarry Smith       y[7*(*idx)+5] += alpha6*(*v);
1239ed8eea03SBarry Smith       y[7*(*idx)+6] += alpha7*(*v);
1240ed8eea03SBarry Smith       idx++; v++;
1241ed8eea03SBarry Smith     }
1242ed8eea03SBarry Smith   }
1243dc0b31edSSatish Balay   ierr = PetscLogFlops(14.0*a->nz);CHKERRQ(ierr);
12443649974fSBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
1245ed8eea03SBarry Smith   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
1246ed8eea03SBarry Smith   PetscFunctionReturn(0);
1247ed8eea03SBarry Smith }
1248ed8eea03SBarry Smith 
1249ed8eea03SBarry Smith #undef __FUNCT__
125066ed3db0SBarry Smith #define __FUNCT__ "MatMult_SeqMAIJ_8"
1251dfbe8321SBarry Smith PetscErrorCode MatMult_SeqMAIJ_8(Mat A,Vec xx,Vec yy)
125266ed3db0SBarry Smith {
125366ed3db0SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
125466ed3db0SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1255d2840e09SBarry Smith   const PetscScalar *x,*v;
1256d2840e09SBarry Smith   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8;
1257dfbe8321SBarry Smith   PetscErrorCode    ierr;
1258d2840e09SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
1259d2840e09SBarry Smith   PetscInt          nonzerorow=0,n,i,jrow,j;
126066ed3db0SBarry Smith 
126166ed3db0SBarry Smith   PetscFunctionBegin;
12623649974fSBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
12631ebc52fbSHong Zhang   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
126466ed3db0SBarry Smith   idx  = a->j;
126566ed3db0SBarry Smith   v    = a->a;
126666ed3db0SBarry Smith   ii   = a->i;
126766ed3db0SBarry Smith 
126866ed3db0SBarry Smith   for (i=0; i<m; i++) {
126966ed3db0SBarry Smith     jrow = ii[i];
127066ed3db0SBarry Smith     n    = ii[i+1] - jrow;
127166ed3db0SBarry Smith     sum1  = 0.0;
127266ed3db0SBarry Smith     sum2  = 0.0;
127366ed3db0SBarry Smith     sum3  = 0.0;
127466ed3db0SBarry Smith     sum4  = 0.0;
127566ed3db0SBarry Smith     sum5  = 0.0;
127666ed3db0SBarry Smith     sum6  = 0.0;
127766ed3db0SBarry Smith     sum7  = 0.0;
127866ed3db0SBarry Smith     sum8  = 0.0;
1279b3c51c66SMatthew Knepley     nonzerorow += (n>0);
128066ed3db0SBarry Smith     for (j=0; j<n; j++) {
128166ed3db0SBarry Smith       sum1 += v[jrow]*x[8*idx[jrow]];
128266ed3db0SBarry Smith       sum2 += v[jrow]*x[8*idx[jrow]+1];
128366ed3db0SBarry Smith       sum3 += v[jrow]*x[8*idx[jrow]+2];
128466ed3db0SBarry Smith       sum4 += v[jrow]*x[8*idx[jrow]+3];
128566ed3db0SBarry Smith       sum5 += v[jrow]*x[8*idx[jrow]+4];
128666ed3db0SBarry Smith       sum6 += v[jrow]*x[8*idx[jrow]+5];
128766ed3db0SBarry Smith       sum7 += v[jrow]*x[8*idx[jrow]+6];
128866ed3db0SBarry Smith       sum8 += v[jrow]*x[8*idx[jrow]+7];
128966ed3db0SBarry Smith       jrow++;
129066ed3db0SBarry Smith      }
129166ed3db0SBarry Smith     y[8*i]   = sum1;
129266ed3db0SBarry Smith     y[8*i+1] = sum2;
129366ed3db0SBarry Smith     y[8*i+2] = sum3;
129466ed3db0SBarry Smith     y[8*i+3] = sum4;
129566ed3db0SBarry Smith     y[8*i+4] = sum5;
129666ed3db0SBarry Smith     y[8*i+5] = sum6;
129766ed3db0SBarry Smith     y[8*i+6] = sum7;
129866ed3db0SBarry Smith     y[8*i+7] = sum8;
129966ed3db0SBarry Smith   }
130066ed3db0SBarry Smith 
1301dc0b31edSSatish Balay   ierr = PetscLogFlops(16.0*a->nz - 8.0*nonzerorow);CHKERRQ(ierr);
13023649974fSBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
13031ebc52fbSHong Zhang   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
130466ed3db0SBarry Smith   PetscFunctionReturn(0);
130566ed3db0SBarry Smith }
130666ed3db0SBarry Smith 
130766ed3db0SBarry Smith #undef __FUNCT__
130866ed3db0SBarry Smith #define __FUNCT__ "MatMultTranspose_SeqMAIJ_8"
1309dfbe8321SBarry Smith PetscErrorCode MatMultTranspose_SeqMAIJ_8(Mat A,Vec xx,Vec yy)
131066ed3db0SBarry Smith {
131166ed3db0SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
131266ed3db0SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1313d2840e09SBarry Smith   const PetscScalar *x,*v;
1314d2840e09SBarry Smith   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8;
1315dfbe8321SBarry Smith   PetscErrorCode    ierr;
1316d2840e09SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx;
1317d2840e09SBarry Smith   PetscInt          n,i;
131866ed3db0SBarry Smith 
131966ed3db0SBarry Smith   PetscFunctionBegin;
1320d2840e09SBarry Smith   ierr = VecSet(yy,0.0);CHKERRQ(ierr);
13213649974fSBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
13221ebc52fbSHong Zhang   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
1323bfec09a0SHong Zhang 
132466ed3db0SBarry Smith   for (i=0; i<m; i++) {
1325bfec09a0SHong Zhang     idx    = a->j + a->i[i] ;
1326bfec09a0SHong Zhang     v      = a->a + a->i[i] ;
132766ed3db0SBarry Smith     n      = a->i[i+1] - a->i[i];
132866ed3db0SBarry Smith     alpha1 = x[8*i];
132966ed3db0SBarry Smith     alpha2 = x[8*i+1];
133066ed3db0SBarry Smith     alpha3 = x[8*i+2];
133166ed3db0SBarry Smith     alpha4 = x[8*i+3];
133266ed3db0SBarry Smith     alpha5 = x[8*i+4];
133366ed3db0SBarry Smith     alpha6 = x[8*i+5];
133466ed3db0SBarry Smith     alpha7 = x[8*i+6];
133566ed3db0SBarry Smith     alpha8 = x[8*i+7];
133666ed3db0SBarry Smith     while (n-->0) {
133766ed3db0SBarry Smith       y[8*(*idx)]   += alpha1*(*v);
133866ed3db0SBarry Smith       y[8*(*idx)+1] += alpha2*(*v);
133966ed3db0SBarry Smith       y[8*(*idx)+2] += alpha3*(*v);
134066ed3db0SBarry Smith       y[8*(*idx)+3] += alpha4*(*v);
134166ed3db0SBarry Smith       y[8*(*idx)+4] += alpha5*(*v);
134266ed3db0SBarry Smith       y[8*(*idx)+5] += alpha6*(*v);
134366ed3db0SBarry Smith       y[8*(*idx)+6] += alpha7*(*v);
134466ed3db0SBarry Smith       y[8*(*idx)+7] += alpha8*(*v);
134566ed3db0SBarry Smith       idx++; v++;
134666ed3db0SBarry Smith     }
134766ed3db0SBarry Smith   }
1348dc0b31edSSatish Balay   ierr = PetscLogFlops(16.0*a->nz);CHKERRQ(ierr);
13493649974fSBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
13501ebc52fbSHong Zhang   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
135166ed3db0SBarry Smith   PetscFunctionReturn(0);
135266ed3db0SBarry Smith }
135366ed3db0SBarry Smith 
135466ed3db0SBarry Smith #undef __FUNCT__
135566ed3db0SBarry Smith #define __FUNCT__ "MatMultAdd_SeqMAIJ_8"
1356dfbe8321SBarry Smith PetscErrorCode MatMultAdd_SeqMAIJ_8(Mat A,Vec xx,Vec yy,Vec zz)
135766ed3db0SBarry Smith {
135866ed3db0SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
135966ed3db0SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1360d2840e09SBarry Smith   const PetscScalar *x,*v;
1361d2840e09SBarry Smith   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8;
1362dfbe8321SBarry Smith   PetscErrorCode    ierr;
1363d2840e09SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
1364b24ad042SBarry Smith   PetscInt          n,i,jrow,j;
136566ed3db0SBarry Smith 
136666ed3db0SBarry Smith   PetscFunctionBegin;
136766ed3db0SBarry Smith   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
13683649974fSBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
13691ebc52fbSHong Zhang   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
137066ed3db0SBarry Smith   idx  = a->j;
137166ed3db0SBarry Smith   v    = a->a;
137266ed3db0SBarry Smith   ii   = a->i;
137366ed3db0SBarry Smith 
137466ed3db0SBarry Smith   for (i=0; i<m; i++) {
137566ed3db0SBarry Smith     jrow = ii[i];
137666ed3db0SBarry Smith     n    = ii[i+1] - jrow;
137766ed3db0SBarry Smith     sum1  = 0.0;
137866ed3db0SBarry Smith     sum2  = 0.0;
137966ed3db0SBarry Smith     sum3  = 0.0;
138066ed3db0SBarry Smith     sum4  = 0.0;
138166ed3db0SBarry Smith     sum5  = 0.0;
138266ed3db0SBarry Smith     sum6  = 0.0;
138366ed3db0SBarry Smith     sum7  = 0.0;
138466ed3db0SBarry Smith     sum8  = 0.0;
138566ed3db0SBarry Smith     for (j=0; j<n; j++) {
138666ed3db0SBarry Smith       sum1 += v[jrow]*x[8*idx[jrow]];
138766ed3db0SBarry Smith       sum2 += v[jrow]*x[8*idx[jrow]+1];
138866ed3db0SBarry Smith       sum3 += v[jrow]*x[8*idx[jrow]+2];
138966ed3db0SBarry Smith       sum4 += v[jrow]*x[8*idx[jrow]+3];
139066ed3db0SBarry Smith       sum5 += v[jrow]*x[8*idx[jrow]+4];
139166ed3db0SBarry Smith       sum6 += v[jrow]*x[8*idx[jrow]+5];
139266ed3db0SBarry Smith       sum7 += v[jrow]*x[8*idx[jrow]+6];
139366ed3db0SBarry Smith       sum8 += v[jrow]*x[8*idx[jrow]+7];
139466ed3db0SBarry Smith       jrow++;
139566ed3db0SBarry Smith      }
139666ed3db0SBarry Smith     y[8*i]   += sum1;
139766ed3db0SBarry Smith     y[8*i+1] += sum2;
139866ed3db0SBarry Smith     y[8*i+2] += sum3;
139966ed3db0SBarry Smith     y[8*i+3] += sum4;
140066ed3db0SBarry Smith     y[8*i+4] += sum5;
140166ed3db0SBarry Smith     y[8*i+5] += sum6;
140266ed3db0SBarry Smith     y[8*i+6] += sum7;
140366ed3db0SBarry Smith     y[8*i+7] += sum8;
140466ed3db0SBarry Smith   }
140566ed3db0SBarry Smith 
1406dc0b31edSSatish Balay   ierr = PetscLogFlops(16.0*a->nz);CHKERRQ(ierr);
14073649974fSBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
14081ebc52fbSHong Zhang   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
140966ed3db0SBarry Smith   PetscFunctionReturn(0);
141066ed3db0SBarry Smith }
141166ed3db0SBarry Smith 
141266ed3db0SBarry Smith #undef __FUNCT__
141366ed3db0SBarry Smith #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_8"
1414dfbe8321SBarry Smith PetscErrorCode MatMultTransposeAdd_SeqMAIJ_8(Mat A,Vec xx,Vec yy,Vec zz)
141566ed3db0SBarry Smith {
141666ed3db0SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
141766ed3db0SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1418d2840e09SBarry Smith   const PetscScalar *x,*v;
1419d2840e09SBarry Smith   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8;
1420dfbe8321SBarry Smith   PetscErrorCode    ierr;
1421d2840e09SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx;
1422d2840e09SBarry Smith   PetscInt          n,i;
142366ed3db0SBarry Smith 
142466ed3db0SBarry Smith   PetscFunctionBegin;
142566ed3db0SBarry Smith   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
14263649974fSBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
14271ebc52fbSHong Zhang   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
142866ed3db0SBarry Smith   for (i=0; i<m; i++) {
1429bfec09a0SHong Zhang     idx    = a->j + a->i[i] ;
1430bfec09a0SHong Zhang     v      = a->a + a->i[i] ;
143166ed3db0SBarry Smith     n      = a->i[i+1] - a->i[i];
143266ed3db0SBarry Smith     alpha1 = x[8*i];
143366ed3db0SBarry Smith     alpha2 = x[8*i+1];
143466ed3db0SBarry Smith     alpha3 = x[8*i+2];
143566ed3db0SBarry Smith     alpha4 = x[8*i+3];
143666ed3db0SBarry Smith     alpha5 = x[8*i+4];
143766ed3db0SBarry Smith     alpha6 = x[8*i+5];
143866ed3db0SBarry Smith     alpha7 = x[8*i+6];
143966ed3db0SBarry Smith     alpha8 = x[8*i+7];
144066ed3db0SBarry Smith     while (n-->0) {
144166ed3db0SBarry Smith       y[8*(*idx)]   += alpha1*(*v);
144266ed3db0SBarry Smith       y[8*(*idx)+1] += alpha2*(*v);
144366ed3db0SBarry Smith       y[8*(*idx)+2] += alpha3*(*v);
144466ed3db0SBarry Smith       y[8*(*idx)+3] += alpha4*(*v);
144566ed3db0SBarry Smith       y[8*(*idx)+4] += alpha5*(*v);
144666ed3db0SBarry Smith       y[8*(*idx)+5] += alpha6*(*v);
144766ed3db0SBarry Smith       y[8*(*idx)+6] += alpha7*(*v);
144866ed3db0SBarry Smith       y[8*(*idx)+7] += alpha8*(*v);
144966ed3db0SBarry Smith       idx++; v++;
145066ed3db0SBarry Smith     }
145166ed3db0SBarry Smith   }
1452dc0b31edSSatish Balay   ierr = PetscLogFlops(16.0*a->nz);CHKERRQ(ierr);
14533649974fSBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
14541ebc52fbSHong Zhang   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
14552f7816d4SBarry Smith   PetscFunctionReturn(0);
14562f7816d4SBarry Smith }
14572f7816d4SBarry Smith 
14582b692628SMatthew Knepley /* ------------------------------------------------------------------------------*/
14592b692628SMatthew Knepley #undef __FUNCT__
14602b692628SMatthew Knepley #define __FUNCT__ "MatMult_SeqMAIJ_9"
1461dfbe8321SBarry Smith PetscErrorCode MatMult_SeqMAIJ_9(Mat A,Vec xx,Vec yy)
14622b692628SMatthew Knepley {
14632b692628SMatthew Knepley   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
14642b692628SMatthew Knepley   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1465d2840e09SBarry Smith   const PetscScalar *x,*v;
1466d2840e09SBarry Smith   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9;
1467dfbe8321SBarry Smith   PetscErrorCode    ierr;
1468d2840e09SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
1469d2840e09SBarry Smith   PetscInt          nonzerorow=0,n,i,jrow,j;
14702b692628SMatthew Knepley 
14712b692628SMatthew Knepley   PetscFunctionBegin;
14723649974fSBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
14731ebc52fbSHong Zhang   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
14742b692628SMatthew Knepley   idx  = a->j;
14752b692628SMatthew Knepley   v    = a->a;
14762b692628SMatthew Knepley   ii   = a->i;
14772b692628SMatthew Knepley 
14782b692628SMatthew Knepley   for (i=0; i<m; i++) {
14792b692628SMatthew Knepley     jrow = ii[i];
14802b692628SMatthew Knepley     n    = ii[i+1] - jrow;
14812b692628SMatthew Knepley     sum1  = 0.0;
14822b692628SMatthew Knepley     sum2  = 0.0;
14832b692628SMatthew Knepley     sum3  = 0.0;
14842b692628SMatthew Knepley     sum4  = 0.0;
14852b692628SMatthew Knepley     sum5  = 0.0;
14862b692628SMatthew Knepley     sum6  = 0.0;
14872b692628SMatthew Knepley     sum7  = 0.0;
14882b692628SMatthew Knepley     sum8  = 0.0;
14892b692628SMatthew Knepley     sum9  = 0.0;
1490b3c51c66SMatthew Knepley     nonzerorow += (n>0);
14912b692628SMatthew Knepley     for (j=0; j<n; j++) {
14922b692628SMatthew Knepley       sum1 += v[jrow]*x[9*idx[jrow]];
14932b692628SMatthew Knepley       sum2 += v[jrow]*x[9*idx[jrow]+1];
14942b692628SMatthew Knepley       sum3 += v[jrow]*x[9*idx[jrow]+2];
14952b692628SMatthew Knepley       sum4 += v[jrow]*x[9*idx[jrow]+3];
14962b692628SMatthew Knepley       sum5 += v[jrow]*x[9*idx[jrow]+4];
14972b692628SMatthew Knepley       sum6 += v[jrow]*x[9*idx[jrow]+5];
14982b692628SMatthew Knepley       sum7 += v[jrow]*x[9*idx[jrow]+6];
14992b692628SMatthew Knepley       sum8 += v[jrow]*x[9*idx[jrow]+7];
15002b692628SMatthew Knepley       sum9 += v[jrow]*x[9*idx[jrow]+8];
15012b692628SMatthew Knepley       jrow++;
15022b692628SMatthew Knepley      }
15032b692628SMatthew Knepley     y[9*i]   = sum1;
15042b692628SMatthew Knepley     y[9*i+1] = sum2;
15052b692628SMatthew Knepley     y[9*i+2] = sum3;
15062b692628SMatthew Knepley     y[9*i+3] = sum4;
15072b692628SMatthew Knepley     y[9*i+4] = sum5;
15082b692628SMatthew Knepley     y[9*i+5] = sum6;
15092b692628SMatthew Knepley     y[9*i+6] = sum7;
15102b692628SMatthew Knepley     y[9*i+7] = sum8;
15112b692628SMatthew Knepley     y[9*i+8] = sum9;
15122b692628SMatthew Knepley   }
15132b692628SMatthew Knepley 
1514dc0b31edSSatish Balay   ierr = PetscLogFlops(18.0*a->nz - 9*nonzerorow);CHKERRQ(ierr);
15153649974fSBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
15161ebc52fbSHong Zhang   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
15172b692628SMatthew Knepley   PetscFunctionReturn(0);
15182b692628SMatthew Knepley }
15192b692628SMatthew Knepley 
1520b9cda22cSBarry Smith /* ------------------------------------------------------------------------------*/
1521b9cda22cSBarry Smith 
15222b692628SMatthew Knepley #undef __FUNCT__
15232b692628SMatthew Knepley #define __FUNCT__ "MatMultTranspose_SeqMAIJ_9"
1524dfbe8321SBarry Smith PetscErrorCode MatMultTranspose_SeqMAIJ_9(Mat A,Vec xx,Vec yy)
15252b692628SMatthew Knepley {
15262b692628SMatthew Knepley   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
15272b692628SMatthew Knepley   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1528d2840e09SBarry Smith   const PetscScalar *x,*v;
1529d2840e09SBarry Smith   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9;
1530dfbe8321SBarry Smith   PetscErrorCode    ierr;
1531d2840e09SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx;
1532d2840e09SBarry Smith   PetscInt          n,i;
15332b692628SMatthew Knepley 
15342b692628SMatthew Knepley   PetscFunctionBegin;
1535d2840e09SBarry Smith   ierr = VecSet(yy,0.0);CHKERRQ(ierr);
15363649974fSBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
15371ebc52fbSHong Zhang   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
15382b692628SMatthew Knepley 
15392b692628SMatthew Knepley   for (i=0; i<m; i++) {
15402b692628SMatthew Knepley     idx    = a->j + a->i[i] ;
15412b692628SMatthew Knepley     v      = a->a + a->i[i] ;
15422b692628SMatthew Knepley     n      = a->i[i+1] - a->i[i];
15432b692628SMatthew Knepley     alpha1 = x[9*i];
15442b692628SMatthew Knepley     alpha2 = x[9*i+1];
15452b692628SMatthew Knepley     alpha3 = x[9*i+2];
15462b692628SMatthew Knepley     alpha4 = x[9*i+3];
15472b692628SMatthew Knepley     alpha5 = x[9*i+4];
15482b692628SMatthew Knepley     alpha6 = x[9*i+5];
15492b692628SMatthew Knepley     alpha7 = x[9*i+6];
15502b692628SMatthew Knepley     alpha8 = x[9*i+7];
15512f6bd0c6SMatthew Knepley     alpha9 = x[9*i+8];
15522b692628SMatthew Knepley     while (n-->0) {
15532b692628SMatthew Knepley       y[9*(*idx)]   += alpha1*(*v);
15542b692628SMatthew Knepley       y[9*(*idx)+1] += alpha2*(*v);
15552b692628SMatthew Knepley       y[9*(*idx)+2] += alpha3*(*v);
15562b692628SMatthew Knepley       y[9*(*idx)+3] += alpha4*(*v);
15572b692628SMatthew Knepley       y[9*(*idx)+4] += alpha5*(*v);
15582b692628SMatthew Knepley       y[9*(*idx)+5] += alpha6*(*v);
15592b692628SMatthew Knepley       y[9*(*idx)+6] += alpha7*(*v);
15602b692628SMatthew Knepley       y[9*(*idx)+7] += alpha8*(*v);
15612b692628SMatthew Knepley       y[9*(*idx)+8] += alpha9*(*v);
15622b692628SMatthew Knepley       idx++; v++;
15632b692628SMatthew Knepley     }
15642b692628SMatthew Knepley   }
1565dc0b31edSSatish Balay   ierr = PetscLogFlops(18.0*a->nz);CHKERRQ(ierr);
15663649974fSBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
15671ebc52fbSHong Zhang   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
15682b692628SMatthew Knepley   PetscFunctionReturn(0);
15692b692628SMatthew Knepley }
15702b692628SMatthew Knepley 
15712b692628SMatthew Knepley #undef __FUNCT__
15722b692628SMatthew Knepley #define __FUNCT__ "MatMultAdd_SeqMAIJ_9"
1573dfbe8321SBarry Smith PetscErrorCode MatMultAdd_SeqMAIJ_9(Mat A,Vec xx,Vec yy,Vec zz)
15742b692628SMatthew Knepley {
15752b692628SMatthew Knepley   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
15762b692628SMatthew Knepley   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1577d2840e09SBarry Smith   const PetscScalar *x,*v;
1578d2840e09SBarry Smith   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9;
1579dfbe8321SBarry Smith   PetscErrorCode    ierr;
1580d2840e09SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
1581b24ad042SBarry Smith   PetscInt          n,i,jrow,j;
15822b692628SMatthew Knepley 
15832b692628SMatthew Knepley   PetscFunctionBegin;
15842b692628SMatthew Knepley   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
15853649974fSBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
15861ebc52fbSHong Zhang   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
15872b692628SMatthew Knepley   idx  = a->j;
15882b692628SMatthew Knepley   v    = a->a;
15892b692628SMatthew Knepley   ii   = a->i;
15902b692628SMatthew Knepley 
15912b692628SMatthew Knepley   for (i=0; i<m; i++) {
15922b692628SMatthew Knepley     jrow = ii[i];
15932b692628SMatthew Knepley     n    = ii[i+1] - jrow;
15942b692628SMatthew Knepley     sum1  = 0.0;
15952b692628SMatthew Knepley     sum2  = 0.0;
15962b692628SMatthew Knepley     sum3  = 0.0;
15972b692628SMatthew Knepley     sum4  = 0.0;
15982b692628SMatthew Knepley     sum5  = 0.0;
15992b692628SMatthew Knepley     sum6  = 0.0;
16002b692628SMatthew Knepley     sum7  = 0.0;
16012b692628SMatthew Knepley     sum8  = 0.0;
16022b692628SMatthew Knepley     sum9  = 0.0;
16032b692628SMatthew Knepley     for (j=0; j<n; j++) {
16042b692628SMatthew Knepley       sum1 += v[jrow]*x[9*idx[jrow]];
16052b692628SMatthew Knepley       sum2 += v[jrow]*x[9*idx[jrow]+1];
16062b692628SMatthew Knepley       sum3 += v[jrow]*x[9*idx[jrow]+2];
16072b692628SMatthew Knepley       sum4 += v[jrow]*x[9*idx[jrow]+3];
16082b692628SMatthew Knepley       sum5 += v[jrow]*x[9*idx[jrow]+4];
16092b692628SMatthew Knepley       sum6 += v[jrow]*x[9*idx[jrow]+5];
16102b692628SMatthew Knepley       sum7 += v[jrow]*x[9*idx[jrow]+6];
16112b692628SMatthew Knepley       sum8 += v[jrow]*x[9*idx[jrow]+7];
16122b692628SMatthew Knepley       sum9 += v[jrow]*x[9*idx[jrow]+8];
16132b692628SMatthew Knepley       jrow++;
16142b692628SMatthew Knepley      }
16152b692628SMatthew Knepley     y[9*i]   += sum1;
16162b692628SMatthew Knepley     y[9*i+1] += sum2;
16172b692628SMatthew Knepley     y[9*i+2] += sum3;
16182b692628SMatthew Knepley     y[9*i+3] += sum4;
16192b692628SMatthew Knepley     y[9*i+4] += sum5;
16202b692628SMatthew Knepley     y[9*i+5] += sum6;
16212b692628SMatthew Knepley     y[9*i+6] += sum7;
16222b692628SMatthew Knepley     y[9*i+7] += sum8;
16232b692628SMatthew Knepley     y[9*i+8] += sum9;
16242b692628SMatthew Knepley   }
16252b692628SMatthew Knepley 
1626efee365bSSatish Balay   ierr = PetscLogFlops(18*a->nz);CHKERRQ(ierr);
16273649974fSBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
16281ebc52fbSHong Zhang   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
16292b692628SMatthew Knepley   PetscFunctionReturn(0);
16302b692628SMatthew Knepley }
16312b692628SMatthew Knepley 
16322b692628SMatthew Knepley #undef __FUNCT__
16332b692628SMatthew Knepley #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_9"
1634dfbe8321SBarry Smith PetscErrorCode MatMultTransposeAdd_SeqMAIJ_9(Mat A,Vec xx,Vec yy,Vec zz)
16352b692628SMatthew Knepley {
16362b692628SMatthew Knepley   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
16372b692628SMatthew Knepley   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1638d2840e09SBarry Smith   const PetscScalar *x,*v;
1639d2840e09SBarry Smith   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9;
1640dfbe8321SBarry Smith   PetscErrorCode    ierr;
1641d2840e09SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx;
1642d2840e09SBarry Smith   PetscInt          n,i;
16432b692628SMatthew Knepley 
16442b692628SMatthew Knepley   PetscFunctionBegin;
16452b692628SMatthew Knepley   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
16463649974fSBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
16471ebc52fbSHong Zhang   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
16482b692628SMatthew Knepley   for (i=0; i<m; i++) {
16492b692628SMatthew Knepley     idx    = a->j + a->i[i] ;
16502b692628SMatthew Knepley     v      = a->a + a->i[i] ;
16512b692628SMatthew Knepley     n      = a->i[i+1] - a->i[i];
16522b692628SMatthew Knepley     alpha1 = x[9*i];
16532b692628SMatthew Knepley     alpha2 = x[9*i+1];
16542b692628SMatthew Knepley     alpha3 = x[9*i+2];
16552b692628SMatthew Knepley     alpha4 = x[9*i+3];
16562b692628SMatthew Knepley     alpha5 = x[9*i+4];
16572b692628SMatthew Knepley     alpha6 = x[9*i+5];
16582b692628SMatthew Knepley     alpha7 = x[9*i+6];
16592b692628SMatthew Knepley     alpha8 = x[9*i+7];
16602b692628SMatthew Knepley     alpha9 = x[9*i+8];
16612b692628SMatthew Knepley     while (n-->0) {
16622b692628SMatthew Knepley       y[9*(*idx)]   += alpha1*(*v);
16632b692628SMatthew Knepley       y[9*(*idx)+1] += alpha2*(*v);
16642b692628SMatthew Knepley       y[9*(*idx)+2] += alpha3*(*v);
16652b692628SMatthew Knepley       y[9*(*idx)+3] += alpha4*(*v);
16662b692628SMatthew Knepley       y[9*(*idx)+4] += alpha5*(*v);
16672b692628SMatthew Knepley       y[9*(*idx)+5] += alpha6*(*v);
16682b692628SMatthew Knepley       y[9*(*idx)+6] += alpha7*(*v);
16692b692628SMatthew Knepley       y[9*(*idx)+7] += alpha8*(*v);
16702b692628SMatthew Knepley       y[9*(*idx)+8] += alpha9*(*v);
16712b692628SMatthew Knepley       idx++; v++;
16722b692628SMatthew Knepley     }
16732b692628SMatthew Knepley   }
1674dc0b31edSSatish Balay   ierr = PetscLogFlops(18.0*a->nz);CHKERRQ(ierr);
16753649974fSBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
16761ebc52fbSHong Zhang   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
16772b692628SMatthew Knepley   PetscFunctionReturn(0);
16782b692628SMatthew Knepley }
1679b9cda22cSBarry Smith #undef __FUNCT__
1680b9cda22cSBarry Smith #define __FUNCT__ "MatMult_SeqMAIJ_10"
1681b9cda22cSBarry Smith PetscErrorCode MatMult_SeqMAIJ_10(Mat A,Vec xx,Vec yy)
1682b9cda22cSBarry Smith {
1683b9cda22cSBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1684b9cda22cSBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1685d2840e09SBarry Smith   const PetscScalar *x,*v;
1686d2840e09SBarry Smith   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9, sum10;
1687b9cda22cSBarry Smith   PetscErrorCode    ierr;
1688d2840e09SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
1689d2840e09SBarry Smith   PetscInt          nonzerorow=0,n,i,jrow,j;
1690b9cda22cSBarry Smith 
1691b9cda22cSBarry Smith   PetscFunctionBegin;
16923649974fSBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
1693b9cda22cSBarry Smith   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
1694b9cda22cSBarry Smith   idx  = a->j;
1695b9cda22cSBarry Smith   v    = a->a;
1696b9cda22cSBarry Smith   ii   = a->i;
1697b9cda22cSBarry Smith 
1698b9cda22cSBarry Smith   for (i=0; i<m; i++) {
1699b9cda22cSBarry Smith     jrow = ii[i];
1700b9cda22cSBarry Smith     n    = ii[i+1] - jrow;
1701b9cda22cSBarry Smith     sum1  = 0.0;
1702b9cda22cSBarry Smith     sum2  = 0.0;
1703b9cda22cSBarry Smith     sum3  = 0.0;
1704b9cda22cSBarry Smith     sum4  = 0.0;
1705b9cda22cSBarry Smith     sum5  = 0.0;
1706b9cda22cSBarry Smith     sum6  = 0.0;
1707b9cda22cSBarry Smith     sum7  = 0.0;
1708b9cda22cSBarry Smith     sum8  = 0.0;
1709b9cda22cSBarry Smith     sum9  = 0.0;
1710b9cda22cSBarry Smith     sum10 = 0.0;
1711b3c51c66SMatthew Knepley     nonzerorow += (n>0);
1712b9cda22cSBarry Smith     for (j=0; j<n; j++) {
1713b9cda22cSBarry Smith       sum1  += v[jrow]*x[10*idx[jrow]];
1714b9cda22cSBarry Smith       sum2  += v[jrow]*x[10*idx[jrow]+1];
1715b9cda22cSBarry Smith       sum3  += v[jrow]*x[10*idx[jrow]+2];
1716b9cda22cSBarry Smith       sum4  += v[jrow]*x[10*idx[jrow]+3];
1717b9cda22cSBarry Smith       sum5  += v[jrow]*x[10*idx[jrow]+4];
1718b9cda22cSBarry Smith       sum6  += v[jrow]*x[10*idx[jrow]+5];
1719b9cda22cSBarry Smith       sum7  += v[jrow]*x[10*idx[jrow]+6];
1720b9cda22cSBarry Smith       sum8  += v[jrow]*x[10*idx[jrow]+7];
1721b9cda22cSBarry Smith       sum9  += v[jrow]*x[10*idx[jrow]+8];
1722b9cda22cSBarry Smith       sum10 += v[jrow]*x[10*idx[jrow]+9];
1723b9cda22cSBarry Smith       jrow++;
1724b9cda22cSBarry Smith      }
1725b9cda22cSBarry Smith     y[10*i]   = sum1;
1726b9cda22cSBarry Smith     y[10*i+1] = sum2;
1727b9cda22cSBarry Smith     y[10*i+2] = sum3;
1728b9cda22cSBarry Smith     y[10*i+3] = sum4;
1729b9cda22cSBarry Smith     y[10*i+4] = sum5;
1730b9cda22cSBarry Smith     y[10*i+5] = sum6;
1731b9cda22cSBarry Smith     y[10*i+6] = sum7;
1732b9cda22cSBarry Smith     y[10*i+7] = sum8;
1733b9cda22cSBarry Smith     y[10*i+8] = sum9;
1734b9cda22cSBarry Smith     y[10*i+9] = sum10;
1735b9cda22cSBarry Smith   }
1736b9cda22cSBarry Smith 
1737dc0b31edSSatish Balay   ierr = PetscLogFlops(20.0*a->nz - 10.0*nonzerorow);CHKERRQ(ierr);
17383649974fSBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
1739b9cda22cSBarry Smith   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
1740b9cda22cSBarry Smith   PetscFunctionReturn(0);
1741b9cda22cSBarry Smith }
1742b9cda22cSBarry Smith 
1743b9cda22cSBarry Smith #undef __FUNCT__
1744dbdd7285SBarry Smith #define __FUNCT__ "MatMultAdd_SeqMAIJ_10"
1745b9cda22cSBarry Smith PetscErrorCode MatMultAdd_SeqMAIJ_10(Mat A,Vec xx,Vec yy,Vec zz)
1746b9cda22cSBarry Smith {
1747b9cda22cSBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1748b9cda22cSBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1749d2840e09SBarry Smith   const PetscScalar *x,*v;
1750d2840e09SBarry Smith   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9, sum10;
1751b9cda22cSBarry Smith   PetscErrorCode    ierr;
1752d2840e09SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
1753b9cda22cSBarry Smith   PetscInt          n,i,jrow,j;
1754b9cda22cSBarry Smith 
1755b9cda22cSBarry Smith   PetscFunctionBegin;
1756b9cda22cSBarry Smith   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
17573649974fSBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
1758b9cda22cSBarry Smith   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
1759b9cda22cSBarry Smith   idx  = a->j;
1760b9cda22cSBarry Smith   v    = a->a;
1761b9cda22cSBarry Smith   ii   = a->i;
1762b9cda22cSBarry Smith 
1763b9cda22cSBarry Smith   for (i=0; i<m; i++) {
1764b9cda22cSBarry Smith     jrow = ii[i];
1765b9cda22cSBarry Smith     n    = ii[i+1] - jrow;
1766b9cda22cSBarry Smith     sum1  = 0.0;
1767b9cda22cSBarry Smith     sum2  = 0.0;
1768b9cda22cSBarry Smith     sum3  = 0.0;
1769b9cda22cSBarry Smith     sum4  = 0.0;
1770b9cda22cSBarry Smith     sum5  = 0.0;
1771b9cda22cSBarry Smith     sum6  = 0.0;
1772b9cda22cSBarry Smith     sum7  = 0.0;
1773b9cda22cSBarry Smith     sum8  = 0.0;
1774b9cda22cSBarry Smith     sum9  = 0.0;
1775b9cda22cSBarry Smith     sum10 = 0.0;
1776b9cda22cSBarry Smith     for (j=0; j<n; j++) {
1777b9cda22cSBarry Smith       sum1  += v[jrow]*x[10*idx[jrow]];
1778b9cda22cSBarry Smith       sum2  += v[jrow]*x[10*idx[jrow]+1];
1779b9cda22cSBarry Smith       sum3  += v[jrow]*x[10*idx[jrow]+2];
1780b9cda22cSBarry Smith       sum4  += v[jrow]*x[10*idx[jrow]+3];
1781b9cda22cSBarry Smith       sum5  += v[jrow]*x[10*idx[jrow]+4];
1782b9cda22cSBarry Smith       sum6  += v[jrow]*x[10*idx[jrow]+5];
1783b9cda22cSBarry Smith       sum7  += v[jrow]*x[10*idx[jrow]+6];
1784b9cda22cSBarry Smith       sum8  += v[jrow]*x[10*idx[jrow]+7];
1785b9cda22cSBarry Smith       sum9  += v[jrow]*x[10*idx[jrow]+8];
1786b9cda22cSBarry Smith       sum10 += v[jrow]*x[10*idx[jrow]+9];
1787b9cda22cSBarry Smith       jrow++;
1788b9cda22cSBarry Smith      }
1789b9cda22cSBarry Smith     y[10*i]   += sum1;
1790b9cda22cSBarry Smith     y[10*i+1] += sum2;
1791b9cda22cSBarry Smith     y[10*i+2] += sum3;
1792b9cda22cSBarry Smith     y[10*i+3] += sum4;
1793b9cda22cSBarry Smith     y[10*i+4] += sum5;
1794b9cda22cSBarry Smith     y[10*i+5] += sum6;
1795b9cda22cSBarry Smith     y[10*i+6] += sum7;
1796b9cda22cSBarry Smith     y[10*i+7] += sum8;
1797b9cda22cSBarry Smith     y[10*i+8] += sum9;
1798b9cda22cSBarry Smith     y[10*i+9] += sum10;
1799b9cda22cSBarry Smith   }
1800b9cda22cSBarry Smith 
1801dc0b31edSSatish Balay   ierr = PetscLogFlops(20.0*a->nz);CHKERRQ(ierr);
18023649974fSBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
1803b9cda22cSBarry Smith   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
1804b9cda22cSBarry Smith   PetscFunctionReturn(0);
1805b9cda22cSBarry Smith }
1806b9cda22cSBarry Smith 
1807b9cda22cSBarry Smith #undef __FUNCT__
1808b9cda22cSBarry Smith #define __FUNCT__ "MatMultTranspose_SeqMAIJ_10"
1809b9cda22cSBarry Smith PetscErrorCode MatMultTranspose_SeqMAIJ_10(Mat A,Vec xx,Vec yy)
1810b9cda22cSBarry Smith {
1811b9cda22cSBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1812b9cda22cSBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1813d2840e09SBarry Smith   const PetscScalar *x,*v;
1814d2840e09SBarry Smith   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9,alpha10;
1815b9cda22cSBarry Smith   PetscErrorCode    ierr;
1816d2840e09SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx;
1817d2840e09SBarry Smith   PetscInt          n,i;
1818b9cda22cSBarry Smith 
1819b9cda22cSBarry Smith   PetscFunctionBegin;
1820d2840e09SBarry Smith   ierr = VecSet(yy,0.0);CHKERRQ(ierr);
18213649974fSBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
1822b9cda22cSBarry Smith   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
1823b9cda22cSBarry Smith 
1824b9cda22cSBarry Smith   for (i=0; i<m; i++) {
1825b9cda22cSBarry Smith     idx    = a->j + a->i[i] ;
1826b9cda22cSBarry Smith     v      = a->a + a->i[i] ;
1827b9cda22cSBarry Smith     n      = a->i[i+1] - a->i[i];
1828e29fdc22SBarry Smith     alpha1 = x[10*i];
1829e29fdc22SBarry Smith     alpha2 = x[10*i+1];
1830e29fdc22SBarry Smith     alpha3 = x[10*i+2];
1831e29fdc22SBarry Smith     alpha4 = x[10*i+3];
1832e29fdc22SBarry Smith     alpha5 = x[10*i+4];
1833e29fdc22SBarry Smith     alpha6 = x[10*i+5];
1834e29fdc22SBarry Smith     alpha7 = x[10*i+6];
1835e29fdc22SBarry Smith     alpha8 = x[10*i+7];
1836e29fdc22SBarry Smith     alpha9 = x[10*i+8];
1837b9cda22cSBarry Smith     alpha10 = x[10*i+9];
1838b9cda22cSBarry Smith     while (n-->0) {
1839e29fdc22SBarry Smith       y[10*(*idx)]   += alpha1*(*v);
1840e29fdc22SBarry Smith       y[10*(*idx)+1] += alpha2*(*v);
1841e29fdc22SBarry Smith       y[10*(*idx)+2] += alpha3*(*v);
1842e29fdc22SBarry Smith       y[10*(*idx)+3] += alpha4*(*v);
1843e29fdc22SBarry Smith       y[10*(*idx)+4] += alpha5*(*v);
1844e29fdc22SBarry Smith       y[10*(*idx)+5] += alpha6*(*v);
1845e29fdc22SBarry Smith       y[10*(*idx)+6] += alpha7*(*v);
1846e29fdc22SBarry Smith       y[10*(*idx)+7] += alpha8*(*v);
1847e29fdc22SBarry Smith       y[10*(*idx)+8] += alpha9*(*v);
1848b9cda22cSBarry Smith       y[10*(*idx)+9] += alpha10*(*v);
1849b9cda22cSBarry Smith       idx++; v++;
1850b9cda22cSBarry Smith     }
1851b9cda22cSBarry Smith   }
1852dc0b31edSSatish Balay   ierr = PetscLogFlops(20.0*a->nz);CHKERRQ(ierr);
18533649974fSBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
1854b9cda22cSBarry Smith   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
1855b9cda22cSBarry Smith   PetscFunctionReturn(0);
1856b9cda22cSBarry Smith }
1857b9cda22cSBarry Smith 
1858b9cda22cSBarry Smith #undef __FUNCT__
1859b9cda22cSBarry Smith #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_10"
1860b9cda22cSBarry Smith PetscErrorCode MatMultTransposeAdd_SeqMAIJ_10(Mat A,Vec xx,Vec yy,Vec zz)
1861b9cda22cSBarry Smith {
1862b9cda22cSBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1863b9cda22cSBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1864d2840e09SBarry Smith   const PetscScalar *x,*v;
1865d2840e09SBarry Smith   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9,alpha10;
1866b9cda22cSBarry Smith   PetscErrorCode    ierr;
1867d2840e09SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx;
1868d2840e09SBarry Smith   PetscInt          n,i;
1869b9cda22cSBarry Smith 
1870b9cda22cSBarry Smith   PetscFunctionBegin;
1871b9cda22cSBarry Smith   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
18723649974fSBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
1873b9cda22cSBarry Smith   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
1874b9cda22cSBarry Smith   for (i=0; i<m; i++) {
1875b9cda22cSBarry Smith     idx    = a->j + a->i[i] ;
1876b9cda22cSBarry Smith     v      = a->a + a->i[i] ;
1877b9cda22cSBarry Smith     n      = a->i[i+1] - a->i[i];
1878b9cda22cSBarry Smith     alpha1 = x[10*i];
1879b9cda22cSBarry Smith     alpha2 = x[10*i+1];
1880b9cda22cSBarry Smith     alpha3 = x[10*i+2];
1881b9cda22cSBarry Smith     alpha4 = x[10*i+3];
1882b9cda22cSBarry Smith     alpha5 = x[10*i+4];
1883b9cda22cSBarry Smith     alpha6 = x[10*i+5];
1884b9cda22cSBarry Smith     alpha7 = x[10*i+6];
1885b9cda22cSBarry Smith     alpha8 = x[10*i+7];
1886b9cda22cSBarry Smith     alpha9 = x[10*i+8];
1887b9cda22cSBarry Smith     alpha10 = x[10*i+9];
1888b9cda22cSBarry Smith     while (n-->0) {
1889b9cda22cSBarry Smith       y[10*(*idx)]   += alpha1*(*v);
1890b9cda22cSBarry Smith       y[10*(*idx)+1] += alpha2*(*v);
1891b9cda22cSBarry Smith       y[10*(*idx)+2] += alpha3*(*v);
1892b9cda22cSBarry Smith       y[10*(*idx)+3] += alpha4*(*v);
1893b9cda22cSBarry Smith       y[10*(*idx)+4] += alpha5*(*v);
1894b9cda22cSBarry Smith       y[10*(*idx)+5] += alpha6*(*v);
1895b9cda22cSBarry Smith       y[10*(*idx)+6] += alpha7*(*v);
1896b9cda22cSBarry Smith       y[10*(*idx)+7] += alpha8*(*v);
1897b9cda22cSBarry Smith       y[10*(*idx)+8] += alpha9*(*v);
1898b9cda22cSBarry Smith       y[10*(*idx)+9] += alpha10*(*v);
1899b9cda22cSBarry Smith       idx++; v++;
1900b9cda22cSBarry Smith     }
1901b9cda22cSBarry Smith   }
1902dc0b31edSSatish Balay   ierr = PetscLogFlops(20.0*a->nz);CHKERRQ(ierr);
19033649974fSBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
1904b9cda22cSBarry Smith   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
1905b9cda22cSBarry Smith   PetscFunctionReturn(0);
1906b9cda22cSBarry Smith }
1907b9cda22cSBarry Smith 
19082b692628SMatthew Knepley 
19092f7816d4SBarry Smith /*--------------------------------------------------------------------------------------------*/
19102f7816d4SBarry Smith #undef __FUNCT__
1911dbdd7285SBarry Smith #define __FUNCT__ "MatMult_SeqMAIJ_11"
1912dbdd7285SBarry Smith PetscErrorCode MatMult_SeqMAIJ_11(Mat A,Vec xx,Vec yy)
1913dbdd7285SBarry Smith {
1914dbdd7285SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1915dbdd7285SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1916d2840e09SBarry Smith   const PetscScalar *x,*v;
1917d2840e09SBarry Smith   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9, sum10, sum11;
1918dbdd7285SBarry Smith   PetscErrorCode    ierr;
1919d2840e09SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
1920d2840e09SBarry Smith   PetscInt          nonzerorow=0,n,i,jrow,j;
1921dbdd7285SBarry Smith 
1922dbdd7285SBarry Smith   PetscFunctionBegin;
19233649974fSBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
1924dbdd7285SBarry Smith   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
1925dbdd7285SBarry Smith   idx  = a->j;
1926dbdd7285SBarry Smith   v    = a->a;
1927dbdd7285SBarry Smith   ii   = a->i;
1928dbdd7285SBarry Smith 
1929dbdd7285SBarry Smith   for (i=0; i<m; i++) {
1930dbdd7285SBarry Smith     jrow = ii[i];
1931dbdd7285SBarry Smith     n    = ii[i+1] - jrow;
1932dbdd7285SBarry Smith     sum1  = 0.0;
1933dbdd7285SBarry Smith     sum2  = 0.0;
1934dbdd7285SBarry Smith     sum3  = 0.0;
1935dbdd7285SBarry Smith     sum4  = 0.0;
1936dbdd7285SBarry Smith     sum5  = 0.0;
1937dbdd7285SBarry Smith     sum6  = 0.0;
1938dbdd7285SBarry Smith     sum7  = 0.0;
1939dbdd7285SBarry Smith     sum8  = 0.0;
1940dbdd7285SBarry Smith     sum9  = 0.0;
1941dbdd7285SBarry Smith     sum10 = 0.0;
1942dbdd7285SBarry Smith     sum11 = 0.0;
1943dbdd7285SBarry Smith     nonzerorow += (n>0);
1944dbdd7285SBarry Smith     for (j=0; j<n; j++) {
1945dbdd7285SBarry Smith       sum1  += v[jrow]*x[11*idx[jrow]];
1946dbdd7285SBarry Smith       sum2  += v[jrow]*x[11*idx[jrow]+1];
1947dbdd7285SBarry Smith       sum3  += v[jrow]*x[11*idx[jrow]+2];
1948dbdd7285SBarry Smith       sum4  += v[jrow]*x[11*idx[jrow]+3];
1949dbdd7285SBarry Smith       sum5  += v[jrow]*x[11*idx[jrow]+4];
1950dbdd7285SBarry Smith       sum6  += v[jrow]*x[11*idx[jrow]+5];
1951dbdd7285SBarry Smith       sum7  += v[jrow]*x[11*idx[jrow]+6];
1952dbdd7285SBarry Smith       sum8  += v[jrow]*x[11*idx[jrow]+7];
1953dbdd7285SBarry Smith       sum9  += v[jrow]*x[11*idx[jrow]+8];
1954dbdd7285SBarry Smith       sum10 += v[jrow]*x[11*idx[jrow]+9];
1955dbdd7285SBarry Smith       sum11 += v[jrow]*x[11*idx[jrow]+10];
1956dbdd7285SBarry Smith       jrow++;
1957dbdd7285SBarry Smith      }
1958dbdd7285SBarry Smith     y[11*i]   = sum1;
1959dbdd7285SBarry Smith     y[11*i+1] = sum2;
1960dbdd7285SBarry Smith     y[11*i+2] = sum3;
1961dbdd7285SBarry Smith     y[11*i+3] = sum4;
1962dbdd7285SBarry Smith     y[11*i+4] = sum5;
1963dbdd7285SBarry Smith     y[11*i+5] = sum6;
1964dbdd7285SBarry Smith     y[11*i+6] = sum7;
1965dbdd7285SBarry Smith     y[11*i+7] = sum8;
1966dbdd7285SBarry Smith     y[11*i+8] = sum9;
1967dbdd7285SBarry Smith     y[11*i+9] = sum10;
1968dbdd7285SBarry Smith     y[11*i+10] = sum11;
1969dbdd7285SBarry Smith   }
1970dbdd7285SBarry Smith 
1971dbdd7285SBarry Smith   ierr = PetscLogFlops(22*a->nz - 11*nonzerorow);CHKERRQ(ierr);
19723649974fSBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
1973dbdd7285SBarry Smith   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
1974dbdd7285SBarry Smith   PetscFunctionReturn(0);
1975dbdd7285SBarry Smith }
1976dbdd7285SBarry Smith 
1977dbdd7285SBarry Smith #undef __FUNCT__
1978dbdd7285SBarry Smith #define __FUNCT__ "MatMultAdd_SeqMAIJ_11"
1979dbdd7285SBarry Smith PetscErrorCode MatMultAdd_SeqMAIJ_11(Mat A,Vec xx,Vec yy,Vec zz)
1980dbdd7285SBarry Smith {
1981dbdd7285SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1982dbdd7285SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1983d2840e09SBarry Smith   const PetscScalar *x,*v;
1984d2840e09SBarry Smith   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9, sum10, sum11;
1985dbdd7285SBarry Smith   PetscErrorCode    ierr;
1986d2840e09SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
1987dbdd7285SBarry Smith   PetscInt          n,i,jrow,j;
1988dbdd7285SBarry Smith 
1989dbdd7285SBarry Smith   PetscFunctionBegin;
1990dbdd7285SBarry Smith   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
19913649974fSBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
1992dbdd7285SBarry Smith   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
1993dbdd7285SBarry Smith   idx  = a->j;
1994dbdd7285SBarry Smith   v    = a->a;
1995dbdd7285SBarry Smith   ii   = a->i;
1996dbdd7285SBarry Smith 
1997dbdd7285SBarry Smith   for (i=0; i<m; i++) {
1998dbdd7285SBarry Smith     jrow = ii[i];
1999dbdd7285SBarry Smith     n    = ii[i+1] - jrow;
2000dbdd7285SBarry Smith     sum1  = 0.0;
2001dbdd7285SBarry Smith     sum2  = 0.0;
2002dbdd7285SBarry Smith     sum3  = 0.0;
2003dbdd7285SBarry Smith     sum4  = 0.0;
2004dbdd7285SBarry Smith     sum5  = 0.0;
2005dbdd7285SBarry Smith     sum6  = 0.0;
2006dbdd7285SBarry Smith     sum7  = 0.0;
2007dbdd7285SBarry Smith     sum8  = 0.0;
2008dbdd7285SBarry Smith     sum9  = 0.0;
2009dbdd7285SBarry Smith     sum10 = 0.0;
2010dbdd7285SBarry Smith     sum11 = 0.0;
2011dbdd7285SBarry Smith     for (j=0; j<n; j++) {
2012dbdd7285SBarry Smith       sum1  += v[jrow]*x[11*idx[jrow]];
2013dbdd7285SBarry Smith       sum2  += v[jrow]*x[11*idx[jrow]+1];
2014dbdd7285SBarry Smith       sum3  += v[jrow]*x[11*idx[jrow]+2];
2015dbdd7285SBarry Smith       sum4  += v[jrow]*x[11*idx[jrow]+3];
2016dbdd7285SBarry Smith       sum5  += v[jrow]*x[11*idx[jrow]+4];
2017dbdd7285SBarry Smith       sum6  += v[jrow]*x[11*idx[jrow]+5];
2018dbdd7285SBarry Smith       sum7  += v[jrow]*x[11*idx[jrow]+6];
2019dbdd7285SBarry Smith       sum8  += v[jrow]*x[11*idx[jrow]+7];
2020dbdd7285SBarry Smith       sum9  += v[jrow]*x[11*idx[jrow]+8];
2021dbdd7285SBarry Smith       sum10 += v[jrow]*x[11*idx[jrow]+9];
2022dbdd7285SBarry Smith       sum11 += v[jrow]*x[11*idx[jrow]+10];
2023dbdd7285SBarry Smith       jrow++;
2024dbdd7285SBarry Smith      }
2025dbdd7285SBarry Smith     y[11*i]   += sum1;
2026dbdd7285SBarry Smith     y[11*i+1] += sum2;
2027dbdd7285SBarry Smith     y[11*i+2] += sum3;
2028dbdd7285SBarry Smith     y[11*i+3] += sum4;
2029dbdd7285SBarry Smith     y[11*i+4] += sum5;
2030dbdd7285SBarry Smith     y[11*i+5] += sum6;
2031dbdd7285SBarry Smith     y[11*i+6] += sum7;
2032dbdd7285SBarry Smith     y[11*i+7] += sum8;
2033dbdd7285SBarry Smith     y[11*i+8] += sum9;
2034dbdd7285SBarry Smith     y[11*i+9] += sum10;
2035dbdd7285SBarry Smith     y[11*i+10] += sum11;
2036dbdd7285SBarry Smith   }
2037dbdd7285SBarry Smith 
2038dbdd7285SBarry Smith   ierr = PetscLogFlops(22*a->nz);CHKERRQ(ierr);
20393649974fSBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
2040dbdd7285SBarry Smith   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
2041dbdd7285SBarry Smith   PetscFunctionReturn(0);
2042dbdd7285SBarry Smith }
2043dbdd7285SBarry Smith 
2044dbdd7285SBarry Smith #undef __FUNCT__
2045dbdd7285SBarry Smith #define __FUNCT__ "MatMultTranspose_SeqMAIJ_11"
2046dbdd7285SBarry Smith PetscErrorCode MatMultTranspose_SeqMAIJ_11(Mat A,Vec xx,Vec yy)
2047dbdd7285SBarry Smith {
2048dbdd7285SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
2049dbdd7285SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
2050d2840e09SBarry Smith   const PetscScalar *x,*v;
2051d2840e09SBarry Smith   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9,alpha10,alpha11;
2052dbdd7285SBarry Smith   PetscErrorCode    ierr;
2053d2840e09SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx;
2054d2840e09SBarry Smith   PetscInt          n,i;
2055dbdd7285SBarry Smith 
2056dbdd7285SBarry Smith   PetscFunctionBegin;
2057d2840e09SBarry Smith   ierr = VecSet(yy,0.0);CHKERRQ(ierr);
20583649974fSBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
2059dbdd7285SBarry Smith   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
2060dbdd7285SBarry Smith 
2061dbdd7285SBarry Smith   for (i=0; i<m; i++) {
2062dbdd7285SBarry Smith     idx    = a->j + a->i[i] ;
2063dbdd7285SBarry Smith     v      = a->a + a->i[i] ;
2064dbdd7285SBarry Smith     n      = a->i[i+1] - a->i[i];
2065dbdd7285SBarry Smith     alpha1 = x[11*i];
2066dbdd7285SBarry Smith     alpha2 = x[11*i+1];
2067dbdd7285SBarry Smith     alpha3 = x[11*i+2];
2068dbdd7285SBarry Smith     alpha4 = x[11*i+3];
2069dbdd7285SBarry Smith     alpha5 = x[11*i+4];
2070dbdd7285SBarry Smith     alpha6 = x[11*i+5];
2071dbdd7285SBarry Smith     alpha7 = x[11*i+6];
2072dbdd7285SBarry Smith     alpha8 = x[11*i+7];
2073dbdd7285SBarry Smith     alpha9 = x[11*i+8];
2074dbdd7285SBarry Smith     alpha10 = x[11*i+9];
2075dbdd7285SBarry Smith     alpha11 = x[11*i+10];
2076dbdd7285SBarry Smith     while (n-->0) {
2077dbdd7285SBarry Smith       y[11*(*idx)]   += alpha1*(*v);
2078dbdd7285SBarry Smith       y[11*(*idx)+1] += alpha2*(*v);
2079dbdd7285SBarry Smith       y[11*(*idx)+2] += alpha3*(*v);
2080dbdd7285SBarry Smith       y[11*(*idx)+3] += alpha4*(*v);
2081dbdd7285SBarry Smith       y[11*(*idx)+4] += alpha5*(*v);
2082dbdd7285SBarry Smith       y[11*(*idx)+5] += alpha6*(*v);
2083dbdd7285SBarry Smith       y[11*(*idx)+6] += alpha7*(*v);
2084dbdd7285SBarry Smith       y[11*(*idx)+7] += alpha8*(*v);
2085dbdd7285SBarry Smith       y[11*(*idx)+8] += alpha9*(*v);
2086dbdd7285SBarry Smith       y[11*(*idx)+9] += alpha10*(*v);
2087dbdd7285SBarry Smith       y[11*(*idx)+10] += alpha11*(*v);
2088dbdd7285SBarry Smith       idx++; v++;
2089dbdd7285SBarry Smith     }
2090dbdd7285SBarry Smith   }
2091dbdd7285SBarry Smith   ierr = PetscLogFlops(22*a->nz);CHKERRQ(ierr);
20923649974fSBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
2093dbdd7285SBarry Smith   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
2094dbdd7285SBarry Smith   PetscFunctionReturn(0);
2095dbdd7285SBarry Smith }
2096dbdd7285SBarry Smith 
2097dbdd7285SBarry Smith #undef __FUNCT__
2098dbdd7285SBarry Smith #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_11"
2099dbdd7285SBarry Smith PetscErrorCode MatMultTransposeAdd_SeqMAIJ_11(Mat A,Vec xx,Vec yy,Vec zz)
2100dbdd7285SBarry Smith {
2101dbdd7285SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
2102dbdd7285SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
2103d2840e09SBarry Smith   const PetscScalar *x,*v;
2104d2840e09SBarry Smith   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9,alpha10,alpha11;
2105dbdd7285SBarry Smith   PetscErrorCode    ierr;
2106d2840e09SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx;
2107d2840e09SBarry Smith   PetscInt          n,i;
2108dbdd7285SBarry Smith 
2109dbdd7285SBarry Smith   PetscFunctionBegin;
2110dbdd7285SBarry Smith   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
21113649974fSBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
2112dbdd7285SBarry Smith   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
2113dbdd7285SBarry Smith   for (i=0; i<m; i++) {
2114dbdd7285SBarry Smith     idx    = a->j + a->i[i] ;
2115dbdd7285SBarry Smith     v      = a->a + a->i[i] ;
2116dbdd7285SBarry Smith     n      = a->i[i+1] - a->i[i];
2117dbdd7285SBarry Smith     alpha1 = x[11*i];
2118dbdd7285SBarry Smith     alpha2 = x[11*i+1];
2119dbdd7285SBarry Smith     alpha3 = x[11*i+2];
2120dbdd7285SBarry Smith     alpha4 = x[11*i+3];
2121dbdd7285SBarry Smith     alpha5 = x[11*i+4];
2122dbdd7285SBarry Smith     alpha6 = x[11*i+5];
2123dbdd7285SBarry Smith     alpha7 = x[11*i+6];
2124dbdd7285SBarry Smith     alpha8 = x[11*i+7];
2125dbdd7285SBarry Smith     alpha9 = x[11*i+8];
2126dbdd7285SBarry Smith     alpha10 = x[11*i+9];
2127dbdd7285SBarry Smith     alpha11 = x[11*i+10];
2128dbdd7285SBarry Smith     while (n-->0) {
2129dbdd7285SBarry Smith       y[11*(*idx)]   += alpha1*(*v);
2130dbdd7285SBarry Smith       y[11*(*idx)+1] += alpha2*(*v);
2131dbdd7285SBarry Smith       y[11*(*idx)+2] += alpha3*(*v);
2132dbdd7285SBarry Smith       y[11*(*idx)+3] += alpha4*(*v);
2133dbdd7285SBarry Smith       y[11*(*idx)+4] += alpha5*(*v);
2134dbdd7285SBarry Smith       y[11*(*idx)+5] += alpha6*(*v);
2135dbdd7285SBarry Smith       y[11*(*idx)+6] += alpha7*(*v);
2136dbdd7285SBarry Smith       y[11*(*idx)+7] += alpha8*(*v);
2137dbdd7285SBarry Smith       y[11*(*idx)+8] += alpha9*(*v);
2138dbdd7285SBarry Smith       y[11*(*idx)+9] += alpha10*(*v);
2139dbdd7285SBarry Smith       y[11*(*idx)+10] += alpha11*(*v);
2140dbdd7285SBarry Smith       idx++; v++;
2141dbdd7285SBarry Smith     }
2142dbdd7285SBarry Smith   }
2143dbdd7285SBarry Smith   ierr = PetscLogFlops(22*a->nz);CHKERRQ(ierr);
21443649974fSBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
2145dbdd7285SBarry Smith   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
2146dbdd7285SBarry Smith   PetscFunctionReturn(0);
2147dbdd7285SBarry Smith }
2148dbdd7285SBarry Smith 
2149dbdd7285SBarry Smith 
2150dbdd7285SBarry Smith /*--------------------------------------------------------------------------------------------*/
2151dbdd7285SBarry Smith #undef __FUNCT__
21522f7816d4SBarry Smith #define __FUNCT__ "MatMult_SeqMAIJ_16"
2153dfbe8321SBarry Smith PetscErrorCode MatMult_SeqMAIJ_16(Mat A,Vec xx,Vec yy)
21542f7816d4SBarry Smith {
21552f7816d4SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
21562f7816d4SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
2157d2840e09SBarry Smith   const PetscScalar *x,*v;
2158d2840e09SBarry Smith   PetscScalar        *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8;
21592f7816d4SBarry Smith   PetscScalar        sum9, sum10, sum11, sum12, sum13, sum14, sum15, sum16;
2160dfbe8321SBarry Smith   PetscErrorCode     ierr;
2161d2840e09SBarry Smith   const PetscInt     m = b->AIJ->rmap->n,*idx,*ii;
2162d2840e09SBarry Smith   PetscInt           nonzerorow=0,n,i,jrow,j;
21632f7816d4SBarry Smith 
21642f7816d4SBarry Smith   PetscFunctionBegin;
21653649974fSBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
21661ebc52fbSHong Zhang   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
21672f7816d4SBarry Smith   idx  = a->j;
21682f7816d4SBarry Smith   v    = a->a;
21692f7816d4SBarry Smith   ii   = a->i;
21702f7816d4SBarry Smith 
21712f7816d4SBarry Smith   for (i=0; i<m; i++) {
21722f7816d4SBarry Smith     jrow = ii[i];
21732f7816d4SBarry Smith     n    = ii[i+1] - jrow;
21742f7816d4SBarry Smith     sum1  = 0.0;
21752f7816d4SBarry Smith     sum2  = 0.0;
21762f7816d4SBarry Smith     sum3  = 0.0;
21772f7816d4SBarry Smith     sum4  = 0.0;
21782f7816d4SBarry Smith     sum5  = 0.0;
21792f7816d4SBarry Smith     sum6  = 0.0;
21802f7816d4SBarry Smith     sum7  = 0.0;
21812f7816d4SBarry Smith     sum8  = 0.0;
21822f7816d4SBarry Smith     sum9  = 0.0;
21832f7816d4SBarry Smith     sum10 = 0.0;
21842f7816d4SBarry Smith     sum11 = 0.0;
21852f7816d4SBarry Smith     sum12 = 0.0;
21862f7816d4SBarry Smith     sum13 = 0.0;
21872f7816d4SBarry Smith     sum14 = 0.0;
21882f7816d4SBarry Smith     sum15 = 0.0;
21892f7816d4SBarry Smith     sum16 = 0.0;
2190b3c51c66SMatthew Knepley     nonzerorow += (n>0);
21912f7816d4SBarry Smith     for (j=0; j<n; j++) {
21922f7816d4SBarry Smith       sum1  += v[jrow]*x[16*idx[jrow]];
21932f7816d4SBarry Smith       sum2  += v[jrow]*x[16*idx[jrow]+1];
21942f7816d4SBarry Smith       sum3  += v[jrow]*x[16*idx[jrow]+2];
21952f7816d4SBarry Smith       sum4  += v[jrow]*x[16*idx[jrow]+3];
21962f7816d4SBarry Smith       sum5  += v[jrow]*x[16*idx[jrow]+4];
21972f7816d4SBarry Smith       sum6  += v[jrow]*x[16*idx[jrow]+5];
21982f7816d4SBarry Smith       sum7  += v[jrow]*x[16*idx[jrow]+6];
21992f7816d4SBarry Smith       sum8  += v[jrow]*x[16*idx[jrow]+7];
22002f7816d4SBarry Smith       sum9  += v[jrow]*x[16*idx[jrow]+8];
22012f7816d4SBarry Smith       sum10 += v[jrow]*x[16*idx[jrow]+9];
22022f7816d4SBarry Smith       sum11 += v[jrow]*x[16*idx[jrow]+10];
22032f7816d4SBarry Smith       sum12 += v[jrow]*x[16*idx[jrow]+11];
22042f7816d4SBarry Smith       sum13 += v[jrow]*x[16*idx[jrow]+12];
22052f7816d4SBarry Smith       sum14 += v[jrow]*x[16*idx[jrow]+13];
22062f7816d4SBarry Smith       sum15 += v[jrow]*x[16*idx[jrow]+14];
22072f7816d4SBarry Smith       sum16 += v[jrow]*x[16*idx[jrow]+15];
22082f7816d4SBarry Smith       jrow++;
22092f7816d4SBarry Smith      }
22102f7816d4SBarry Smith     y[16*i]    = sum1;
22112f7816d4SBarry Smith     y[16*i+1]  = sum2;
22122f7816d4SBarry Smith     y[16*i+2]  = sum3;
22132f7816d4SBarry Smith     y[16*i+3]  = sum4;
22142f7816d4SBarry Smith     y[16*i+4]  = sum5;
22152f7816d4SBarry Smith     y[16*i+5]  = sum6;
22162f7816d4SBarry Smith     y[16*i+6]  = sum7;
22172f7816d4SBarry Smith     y[16*i+7]  = sum8;
22182f7816d4SBarry Smith     y[16*i+8]  = sum9;
22192f7816d4SBarry Smith     y[16*i+9]  = sum10;
22202f7816d4SBarry Smith     y[16*i+10] = sum11;
22212f7816d4SBarry Smith     y[16*i+11] = sum12;
22222f7816d4SBarry Smith     y[16*i+12] = sum13;
22232f7816d4SBarry Smith     y[16*i+13] = sum14;
22242f7816d4SBarry Smith     y[16*i+14] = sum15;
22252f7816d4SBarry Smith     y[16*i+15] = sum16;
22262f7816d4SBarry Smith   }
22272f7816d4SBarry Smith 
2228dc0b31edSSatish Balay   ierr = PetscLogFlops(32.0*a->nz - 16.0*nonzerorow);CHKERRQ(ierr);
22293649974fSBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
22301ebc52fbSHong Zhang   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
22312f7816d4SBarry Smith   PetscFunctionReturn(0);
22322f7816d4SBarry Smith }
22332f7816d4SBarry Smith 
22342f7816d4SBarry Smith #undef __FUNCT__
22352f7816d4SBarry Smith #define __FUNCT__ "MatMultTranspose_SeqMAIJ_16"
2236dfbe8321SBarry Smith PetscErrorCode MatMultTranspose_SeqMAIJ_16(Mat A,Vec xx,Vec yy)
22372f7816d4SBarry Smith {
22382f7816d4SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
22392f7816d4SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
2240d2840e09SBarry Smith   const PetscScalar *x,*v;
2241d2840e09SBarry Smith   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8;
22422f7816d4SBarry Smith   PetscScalar       alpha9,alpha10,alpha11,alpha12,alpha13,alpha14,alpha15,alpha16;
2243dfbe8321SBarry Smith   PetscErrorCode    ierr;
2244d2840e09SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx;
2245d2840e09SBarry Smith   PetscInt          n,i;
22462f7816d4SBarry Smith 
22472f7816d4SBarry Smith   PetscFunctionBegin;
2248d2840e09SBarry Smith   ierr = VecSet(yy,0.0);CHKERRQ(ierr);
22493649974fSBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
22501ebc52fbSHong Zhang   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
2251bfec09a0SHong Zhang 
22522f7816d4SBarry Smith   for (i=0; i<m; i++) {
2253bfec09a0SHong Zhang     idx    = a->j + a->i[i] ;
2254bfec09a0SHong Zhang     v      = a->a + a->i[i] ;
22552f7816d4SBarry Smith     n      = a->i[i+1] - a->i[i];
22562f7816d4SBarry Smith     alpha1  = x[16*i];
22572f7816d4SBarry Smith     alpha2  = x[16*i+1];
22582f7816d4SBarry Smith     alpha3  = x[16*i+2];
22592f7816d4SBarry Smith     alpha4  = x[16*i+3];
22602f7816d4SBarry Smith     alpha5  = x[16*i+4];
22612f7816d4SBarry Smith     alpha6  = x[16*i+5];
22622f7816d4SBarry Smith     alpha7  = x[16*i+6];
22632f7816d4SBarry Smith     alpha8  = x[16*i+7];
22642f7816d4SBarry Smith     alpha9  = x[16*i+8];
22652f7816d4SBarry Smith     alpha10 = x[16*i+9];
22662f7816d4SBarry Smith     alpha11 = x[16*i+10];
22672f7816d4SBarry Smith     alpha12 = x[16*i+11];
22682f7816d4SBarry Smith     alpha13 = x[16*i+12];
22692f7816d4SBarry Smith     alpha14 = x[16*i+13];
22702f7816d4SBarry Smith     alpha15 = x[16*i+14];
22712f7816d4SBarry Smith     alpha16 = x[16*i+15];
22722f7816d4SBarry Smith     while (n-->0) {
22732f7816d4SBarry Smith       y[16*(*idx)]    += alpha1*(*v);
22742f7816d4SBarry Smith       y[16*(*idx)+1]  += alpha2*(*v);
22752f7816d4SBarry Smith       y[16*(*idx)+2]  += alpha3*(*v);
22762f7816d4SBarry Smith       y[16*(*idx)+3]  += alpha4*(*v);
22772f7816d4SBarry Smith       y[16*(*idx)+4]  += alpha5*(*v);
22782f7816d4SBarry Smith       y[16*(*idx)+5]  += alpha6*(*v);
22792f7816d4SBarry Smith       y[16*(*idx)+6]  += alpha7*(*v);
22802f7816d4SBarry Smith       y[16*(*idx)+7]  += alpha8*(*v);
22812f7816d4SBarry Smith       y[16*(*idx)+8]  += alpha9*(*v);
22822f7816d4SBarry Smith       y[16*(*idx)+9]  += alpha10*(*v);
22832f7816d4SBarry Smith       y[16*(*idx)+10] += alpha11*(*v);
22842f7816d4SBarry Smith       y[16*(*idx)+11] += alpha12*(*v);
22852f7816d4SBarry Smith       y[16*(*idx)+12] += alpha13*(*v);
22862f7816d4SBarry Smith       y[16*(*idx)+13] += alpha14*(*v);
22872f7816d4SBarry Smith       y[16*(*idx)+14] += alpha15*(*v);
22882f7816d4SBarry Smith       y[16*(*idx)+15] += alpha16*(*v);
22892f7816d4SBarry Smith       idx++; v++;
22902f7816d4SBarry Smith     }
22912f7816d4SBarry Smith   }
2292dc0b31edSSatish Balay   ierr = PetscLogFlops(32.0*a->nz);CHKERRQ(ierr);
22933649974fSBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
22941ebc52fbSHong Zhang   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
22952f7816d4SBarry Smith   PetscFunctionReturn(0);
22962f7816d4SBarry Smith }
22972f7816d4SBarry Smith 
22982f7816d4SBarry Smith #undef __FUNCT__
22992f7816d4SBarry Smith #define __FUNCT__ "MatMultAdd_SeqMAIJ_16"
2300dfbe8321SBarry Smith PetscErrorCode MatMultAdd_SeqMAIJ_16(Mat A,Vec xx,Vec yy,Vec zz)
23012f7816d4SBarry Smith {
23022f7816d4SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
23032f7816d4SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
2304d2840e09SBarry Smith   const PetscScalar *x,*v;
2305d2840e09SBarry Smith   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8;
23062f7816d4SBarry Smith   PetscScalar       sum9, sum10, sum11, sum12, sum13, sum14, sum15, sum16;
2307dfbe8321SBarry Smith   PetscErrorCode    ierr;
2308d2840e09SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
2309b24ad042SBarry Smith   PetscInt          n,i,jrow,j;
23102f7816d4SBarry Smith 
23112f7816d4SBarry Smith   PetscFunctionBegin;
23122f7816d4SBarry Smith   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
23133649974fSBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
23141ebc52fbSHong Zhang   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
23152f7816d4SBarry Smith   idx  = a->j;
23162f7816d4SBarry Smith   v    = a->a;
23172f7816d4SBarry Smith   ii   = a->i;
23182f7816d4SBarry Smith 
23192f7816d4SBarry Smith   for (i=0; i<m; i++) {
23202f7816d4SBarry Smith     jrow = ii[i];
23212f7816d4SBarry Smith     n    = ii[i+1] - jrow;
23222f7816d4SBarry Smith     sum1  = 0.0;
23232f7816d4SBarry Smith     sum2  = 0.0;
23242f7816d4SBarry Smith     sum3  = 0.0;
23252f7816d4SBarry Smith     sum4  = 0.0;
23262f7816d4SBarry Smith     sum5  = 0.0;
23272f7816d4SBarry Smith     sum6  = 0.0;
23282f7816d4SBarry Smith     sum7  = 0.0;
23292f7816d4SBarry Smith     sum8  = 0.0;
23302f7816d4SBarry Smith     sum9  = 0.0;
23312f7816d4SBarry Smith     sum10 = 0.0;
23322f7816d4SBarry Smith     sum11 = 0.0;
23332f7816d4SBarry Smith     sum12 = 0.0;
23342f7816d4SBarry Smith     sum13 = 0.0;
23352f7816d4SBarry Smith     sum14 = 0.0;
23362f7816d4SBarry Smith     sum15 = 0.0;
23372f7816d4SBarry Smith     sum16 = 0.0;
23382f7816d4SBarry Smith     for (j=0; j<n; j++) {
23392f7816d4SBarry Smith       sum1  += v[jrow]*x[16*idx[jrow]];
23402f7816d4SBarry Smith       sum2  += v[jrow]*x[16*idx[jrow]+1];
23412f7816d4SBarry Smith       sum3  += v[jrow]*x[16*idx[jrow]+2];
23422f7816d4SBarry Smith       sum4  += v[jrow]*x[16*idx[jrow]+3];
23432f7816d4SBarry Smith       sum5  += v[jrow]*x[16*idx[jrow]+4];
23442f7816d4SBarry Smith       sum6  += v[jrow]*x[16*idx[jrow]+5];
23452f7816d4SBarry Smith       sum7  += v[jrow]*x[16*idx[jrow]+6];
23462f7816d4SBarry Smith       sum8  += v[jrow]*x[16*idx[jrow]+7];
23472f7816d4SBarry Smith       sum9  += v[jrow]*x[16*idx[jrow]+8];
23482f7816d4SBarry Smith       sum10 += v[jrow]*x[16*idx[jrow]+9];
23492f7816d4SBarry Smith       sum11 += v[jrow]*x[16*idx[jrow]+10];
23502f7816d4SBarry Smith       sum12 += v[jrow]*x[16*idx[jrow]+11];
23512f7816d4SBarry Smith       sum13 += v[jrow]*x[16*idx[jrow]+12];
23522f7816d4SBarry Smith       sum14 += v[jrow]*x[16*idx[jrow]+13];
23532f7816d4SBarry Smith       sum15 += v[jrow]*x[16*idx[jrow]+14];
23542f7816d4SBarry Smith       sum16 += v[jrow]*x[16*idx[jrow]+15];
23552f7816d4SBarry Smith       jrow++;
23562f7816d4SBarry Smith      }
23572f7816d4SBarry Smith     y[16*i]    += sum1;
23582f7816d4SBarry Smith     y[16*i+1]  += sum2;
23592f7816d4SBarry Smith     y[16*i+2]  += sum3;
23602f7816d4SBarry Smith     y[16*i+3]  += sum4;
23612f7816d4SBarry Smith     y[16*i+4]  += sum5;
23622f7816d4SBarry Smith     y[16*i+5]  += sum6;
23632f7816d4SBarry Smith     y[16*i+6]  += sum7;
23642f7816d4SBarry Smith     y[16*i+7]  += sum8;
23652f7816d4SBarry Smith     y[16*i+8]  += sum9;
23662f7816d4SBarry Smith     y[16*i+9]  += sum10;
23672f7816d4SBarry Smith     y[16*i+10] += sum11;
23682f7816d4SBarry Smith     y[16*i+11] += sum12;
23692f7816d4SBarry Smith     y[16*i+12] += sum13;
23702f7816d4SBarry Smith     y[16*i+13] += sum14;
23712f7816d4SBarry Smith     y[16*i+14] += sum15;
23722f7816d4SBarry Smith     y[16*i+15] += sum16;
23732f7816d4SBarry Smith   }
23742f7816d4SBarry Smith 
2375dc0b31edSSatish Balay   ierr = PetscLogFlops(32.0*a->nz);CHKERRQ(ierr);
23763649974fSBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
23771ebc52fbSHong Zhang   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
23782f7816d4SBarry Smith   PetscFunctionReturn(0);
23792f7816d4SBarry Smith }
23802f7816d4SBarry Smith 
23812f7816d4SBarry Smith #undef __FUNCT__
23822f7816d4SBarry Smith #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_16"
2383dfbe8321SBarry Smith PetscErrorCode MatMultTransposeAdd_SeqMAIJ_16(Mat A,Vec xx,Vec yy,Vec zz)
23842f7816d4SBarry Smith {
23852f7816d4SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
23862f7816d4SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
2387d2840e09SBarry Smith   const PetscScalar *x,*v;
2388d2840e09SBarry Smith   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8;
23892f7816d4SBarry Smith   PetscScalar       alpha9,alpha10,alpha11,alpha12,alpha13,alpha14,alpha15,alpha16;
2390dfbe8321SBarry Smith   PetscErrorCode    ierr;
2391d2840e09SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx;
2392d2840e09SBarry Smith   PetscInt          n,i;
23932f7816d4SBarry Smith 
23942f7816d4SBarry Smith   PetscFunctionBegin;
23952f7816d4SBarry Smith   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
23963649974fSBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
23971ebc52fbSHong Zhang   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
23982f7816d4SBarry Smith   for (i=0; i<m; i++) {
2399bfec09a0SHong Zhang     idx    = a->j + a->i[i] ;
2400bfec09a0SHong Zhang     v      = a->a + a->i[i] ;
24012f7816d4SBarry Smith     n      = a->i[i+1] - a->i[i];
24022f7816d4SBarry Smith     alpha1 = x[16*i];
24032f7816d4SBarry Smith     alpha2 = x[16*i+1];
24042f7816d4SBarry Smith     alpha3 = x[16*i+2];
24052f7816d4SBarry Smith     alpha4 = x[16*i+3];
24062f7816d4SBarry Smith     alpha5 = x[16*i+4];
24072f7816d4SBarry Smith     alpha6 = x[16*i+5];
24082f7816d4SBarry Smith     alpha7 = x[16*i+6];
24092f7816d4SBarry Smith     alpha8 = x[16*i+7];
24102f7816d4SBarry Smith     alpha9  = x[16*i+8];
24112f7816d4SBarry Smith     alpha10 = x[16*i+9];
24122f7816d4SBarry Smith     alpha11 = x[16*i+10];
24132f7816d4SBarry Smith     alpha12 = x[16*i+11];
24142f7816d4SBarry Smith     alpha13 = x[16*i+12];
24152f7816d4SBarry Smith     alpha14 = x[16*i+13];
24162f7816d4SBarry Smith     alpha15 = x[16*i+14];
24172f7816d4SBarry Smith     alpha16 = x[16*i+15];
24182f7816d4SBarry Smith     while (n-->0) {
24192f7816d4SBarry Smith       y[16*(*idx)]   += alpha1*(*v);
24202f7816d4SBarry Smith       y[16*(*idx)+1] += alpha2*(*v);
24212f7816d4SBarry Smith       y[16*(*idx)+2] += alpha3*(*v);
24222f7816d4SBarry Smith       y[16*(*idx)+3] += alpha4*(*v);
24232f7816d4SBarry Smith       y[16*(*idx)+4] += alpha5*(*v);
24242f7816d4SBarry Smith       y[16*(*idx)+5] += alpha6*(*v);
24252f7816d4SBarry Smith       y[16*(*idx)+6] += alpha7*(*v);
24262f7816d4SBarry Smith       y[16*(*idx)+7] += alpha8*(*v);
24272f7816d4SBarry Smith       y[16*(*idx)+8]  += alpha9*(*v);
24282f7816d4SBarry Smith       y[16*(*idx)+9]  += alpha10*(*v);
24292f7816d4SBarry Smith       y[16*(*idx)+10] += alpha11*(*v);
24302f7816d4SBarry Smith       y[16*(*idx)+11] += alpha12*(*v);
24312f7816d4SBarry Smith       y[16*(*idx)+12] += alpha13*(*v);
24322f7816d4SBarry Smith       y[16*(*idx)+13] += alpha14*(*v);
24332f7816d4SBarry Smith       y[16*(*idx)+14] += alpha15*(*v);
24342f7816d4SBarry Smith       y[16*(*idx)+15] += alpha16*(*v);
24352f7816d4SBarry Smith       idx++; v++;
24362f7816d4SBarry Smith     }
24372f7816d4SBarry Smith   }
2438dc0b31edSSatish Balay   ierr = PetscLogFlops(32.0*a->nz);CHKERRQ(ierr);
24393649974fSBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
24401ebc52fbSHong Zhang   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
244166ed3db0SBarry Smith   PetscFunctionReturn(0);
244266ed3db0SBarry Smith }
244366ed3db0SBarry Smith 
2444ed1c418cSBarry Smith /*--------------------------------------------------------------------------------------------*/
2445ed1c418cSBarry Smith #undef __FUNCT__
2446ed1c418cSBarry Smith #define __FUNCT__ "MatMult_SeqMAIJ_18"
2447ed1c418cSBarry Smith PetscErrorCode MatMult_SeqMAIJ_18(Mat A,Vec xx,Vec yy)
2448ed1c418cSBarry Smith {
2449ed1c418cSBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
2450ed1c418cSBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
2451d2840e09SBarry Smith   const PetscScalar *x,*v;
2452d2840e09SBarry Smith   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8;
2453ed1c418cSBarry Smith   PetscScalar       sum9, sum10, sum11, sum12, sum13, sum14, sum15, sum16, sum17, sum18;
2454ed1c418cSBarry Smith   PetscErrorCode    ierr;
2455d2840e09SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
2456d2840e09SBarry Smith   PetscInt          nonzerorow=0,n,i,jrow,j;
2457ed1c418cSBarry Smith 
2458ed1c418cSBarry Smith   PetscFunctionBegin;
24593649974fSBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
2460ed1c418cSBarry Smith   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
2461ed1c418cSBarry Smith   idx  = a->j;
2462ed1c418cSBarry Smith   v    = a->a;
2463ed1c418cSBarry Smith   ii   = a->i;
2464ed1c418cSBarry Smith 
2465ed1c418cSBarry Smith   for (i=0; i<m; i++) {
2466ed1c418cSBarry Smith     jrow = ii[i];
2467ed1c418cSBarry Smith     n    = ii[i+1] - jrow;
2468ed1c418cSBarry Smith     sum1  = 0.0;
2469ed1c418cSBarry Smith     sum2  = 0.0;
2470ed1c418cSBarry Smith     sum3  = 0.0;
2471ed1c418cSBarry Smith     sum4  = 0.0;
2472ed1c418cSBarry Smith     sum5  = 0.0;
2473ed1c418cSBarry Smith     sum6  = 0.0;
2474ed1c418cSBarry Smith     sum7  = 0.0;
2475ed1c418cSBarry Smith     sum8  = 0.0;
2476ed1c418cSBarry Smith     sum9  = 0.0;
2477ed1c418cSBarry Smith     sum10 = 0.0;
2478ed1c418cSBarry Smith     sum11 = 0.0;
2479ed1c418cSBarry Smith     sum12 = 0.0;
2480ed1c418cSBarry Smith     sum13 = 0.0;
2481ed1c418cSBarry Smith     sum14 = 0.0;
2482ed1c418cSBarry Smith     sum15 = 0.0;
2483ed1c418cSBarry Smith     sum16 = 0.0;
2484ed1c418cSBarry Smith     sum17 = 0.0;
2485ed1c418cSBarry Smith     sum18 = 0.0;
2486ed1c418cSBarry Smith     nonzerorow += (n>0);
2487ed1c418cSBarry Smith     for (j=0; j<n; j++) {
2488ed1c418cSBarry Smith       sum1  += v[jrow]*x[18*idx[jrow]];
2489ed1c418cSBarry Smith       sum2  += v[jrow]*x[18*idx[jrow]+1];
2490ed1c418cSBarry Smith       sum3  += v[jrow]*x[18*idx[jrow]+2];
2491ed1c418cSBarry Smith       sum4  += v[jrow]*x[18*idx[jrow]+3];
2492ed1c418cSBarry Smith       sum5  += v[jrow]*x[18*idx[jrow]+4];
2493ed1c418cSBarry Smith       sum6  += v[jrow]*x[18*idx[jrow]+5];
2494ed1c418cSBarry Smith       sum7  += v[jrow]*x[18*idx[jrow]+6];
2495ed1c418cSBarry Smith       sum8  += v[jrow]*x[18*idx[jrow]+7];
2496ed1c418cSBarry Smith       sum9  += v[jrow]*x[18*idx[jrow]+8];
2497ed1c418cSBarry Smith       sum10 += v[jrow]*x[18*idx[jrow]+9];
2498ed1c418cSBarry Smith       sum11 += v[jrow]*x[18*idx[jrow]+10];
2499ed1c418cSBarry Smith       sum12 += v[jrow]*x[18*idx[jrow]+11];
2500ed1c418cSBarry Smith       sum13 += v[jrow]*x[18*idx[jrow]+12];
2501ed1c418cSBarry Smith       sum14 += v[jrow]*x[18*idx[jrow]+13];
2502ed1c418cSBarry Smith       sum15 += v[jrow]*x[18*idx[jrow]+14];
2503ed1c418cSBarry Smith       sum16 += v[jrow]*x[18*idx[jrow]+15];
2504ed1c418cSBarry Smith       sum17 += v[jrow]*x[18*idx[jrow]+16];
2505ed1c418cSBarry Smith       sum18 += v[jrow]*x[18*idx[jrow]+17];
2506ed1c418cSBarry Smith       jrow++;
2507ed1c418cSBarry Smith      }
2508ed1c418cSBarry Smith     y[18*i]    = sum1;
2509ed1c418cSBarry Smith     y[18*i+1]  = sum2;
2510ed1c418cSBarry Smith     y[18*i+2]  = sum3;
2511ed1c418cSBarry Smith     y[18*i+3]  = sum4;
2512ed1c418cSBarry Smith     y[18*i+4]  = sum5;
2513ed1c418cSBarry Smith     y[18*i+5]  = sum6;
2514ed1c418cSBarry Smith     y[18*i+6]  = sum7;
2515ed1c418cSBarry Smith     y[18*i+7]  = sum8;
2516ed1c418cSBarry Smith     y[18*i+8]  = sum9;
2517ed1c418cSBarry Smith     y[18*i+9]  = sum10;
2518ed1c418cSBarry Smith     y[18*i+10] = sum11;
2519ed1c418cSBarry Smith     y[18*i+11] = sum12;
2520ed1c418cSBarry Smith     y[18*i+12] = sum13;
2521ed1c418cSBarry Smith     y[18*i+13] = sum14;
2522ed1c418cSBarry Smith     y[18*i+14] = sum15;
2523ed1c418cSBarry Smith     y[18*i+15] = sum16;
2524ed1c418cSBarry Smith     y[18*i+16] = sum17;
2525ed1c418cSBarry Smith     y[18*i+17] = sum18;
2526ed1c418cSBarry Smith   }
2527ed1c418cSBarry Smith 
2528dc0b31edSSatish Balay   ierr = PetscLogFlops(36.0*a->nz - 18.0*nonzerorow);CHKERRQ(ierr);
25293649974fSBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
2530ed1c418cSBarry Smith   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
2531ed1c418cSBarry Smith   PetscFunctionReturn(0);
2532ed1c418cSBarry Smith }
2533ed1c418cSBarry Smith 
2534ed1c418cSBarry Smith #undef __FUNCT__
2535ed1c418cSBarry Smith #define __FUNCT__ "MatMultTranspose_SeqMAIJ_18"
2536ed1c418cSBarry Smith PetscErrorCode MatMultTranspose_SeqMAIJ_18(Mat A,Vec xx,Vec yy)
2537ed1c418cSBarry Smith {
2538ed1c418cSBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
2539ed1c418cSBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
2540d2840e09SBarry Smith   const PetscScalar *x,*v;
2541d2840e09SBarry Smith   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8;
2542ed1c418cSBarry Smith   PetscScalar       alpha9,alpha10,alpha11,alpha12,alpha13,alpha14,alpha15,alpha16,alpha17,alpha18;
2543ed1c418cSBarry Smith   PetscErrorCode    ierr;
2544d2840e09SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx;
2545d2840e09SBarry Smith   PetscInt          n,i;
2546ed1c418cSBarry Smith 
2547ed1c418cSBarry Smith   PetscFunctionBegin;
2548d2840e09SBarry Smith   ierr = VecSet(yy,0.0);CHKERRQ(ierr);
25493649974fSBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
2550ed1c418cSBarry Smith   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
2551ed1c418cSBarry Smith 
2552ed1c418cSBarry Smith   for (i=0; i<m; i++) {
2553ed1c418cSBarry Smith     idx    = a->j + a->i[i] ;
2554ed1c418cSBarry Smith     v      = a->a + a->i[i] ;
2555ed1c418cSBarry Smith     n      = a->i[i+1] - a->i[i];
2556ed1c418cSBarry Smith     alpha1  = x[18*i];
2557ed1c418cSBarry Smith     alpha2  = x[18*i+1];
2558ed1c418cSBarry Smith     alpha3  = x[18*i+2];
2559ed1c418cSBarry Smith     alpha4  = x[18*i+3];
2560ed1c418cSBarry Smith     alpha5  = x[18*i+4];
2561ed1c418cSBarry Smith     alpha6  = x[18*i+5];
2562ed1c418cSBarry Smith     alpha7  = x[18*i+6];
2563ed1c418cSBarry Smith     alpha8  = x[18*i+7];
2564ed1c418cSBarry Smith     alpha9  = x[18*i+8];
2565ed1c418cSBarry Smith     alpha10 = x[18*i+9];
2566ed1c418cSBarry Smith     alpha11 = x[18*i+10];
2567ed1c418cSBarry Smith     alpha12 = x[18*i+11];
2568ed1c418cSBarry Smith     alpha13 = x[18*i+12];
2569ed1c418cSBarry Smith     alpha14 = x[18*i+13];
2570ed1c418cSBarry Smith     alpha15 = x[18*i+14];
2571ed1c418cSBarry Smith     alpha16 = x[18*i+15];
2572ed1c418cSBarry Smith     alpha17 = x[18*i+16];
2573ed1c418cSBarry Smith     alpha18 = x[18*i+17];
2574ed1c418cSBarry Smith     while (n-->0) {
2575ed1c418cSBarry Smith       y[18*(*idx)]    += alpha1*(*v);
2576ed1c418cSBarry Smith       y[18*(*idx)+1]  += alpha2*(*v);
2577ed1c418cSBarry Smith       y[18*(*idx)+2]  += alpha3*(*v);
2578ed1c418cSBarry Smith       y[18*(*idx)+3]  += alpha4*(*v);
2579ed1c418cSBarry Smith       y[18*(*idx)+4]  += alpha5*(*v);
2580ed1c418cSBarry Smith       y[18*(*idx)+5]  += alpha6*(*v);
2581ed1c418cSBarry Smith       y[18*(*idx)+6]  += alpha7*(*v);
2582ed1c418cSBarry Smith       y[18*(*idx)+7]  += alpha8*(*v);
2583ed1c418cSBarry Smith       y[18*(*idx)+8]  += alpha9*(*v);
2584ed1c418cSBarry Smith       y[18*(*idx)+9]  += alpha10*(*v);
2585ed1c418cSBarry Smith       y[18*(*idx)+10] += alpha11*(*v);
2586ed1c418cSBarry Smith       y[18*(*idx)+11] += alpha12*(*v);
2587ed1c418cSBarry Smith       y[18*(*idx)+12] += alpha13*(*v);
2588ed1c418cSBarry Smith       y[18*(*idx)+13] += alpha14*(*v);
2589ed1c418cSBarry Smith       y[18*(*idx)+14] += alpha15*(*v);
2590ed1c418cSBarry Smith       y[18*(*idx)+15] += alpha16*(*v);
2591ed1c418cSBarry Smith       y[18*(*idx)+16] += alpha17*(*v);
2592ed1c418cSBarry Smith       y[18*(*idx)+17] += alpha18*(*v);
2593ed1c418cSBarry Smith       idx++; v++;
2594ed1c418cSBarry Smith     }
2595ed1c418cSBarry Smith   }
2596dc0b31edSSatish Balay   ierr = PetscLogFlops(36.0*a->nz);CHKERRQ(ierr);
25973649974fSBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
2598ed1c418cSBarry Smith   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
2599ed1c418cSBarry Smith   PetscFunctionReturn(0);
2600ed1c418cSBarry Smith }
2601ed1c418cSBarry Smith 
2602ed1c418cSBarry Smith #undef __FUNCT__
2603ed1c418cSBarry Smith #define __FUNCT__ "MatMultAdd_SeqMAIJ_18"
2604ed1c418cSBarry Smith PetscErrorCode MatMultAdd_SeqMAIJ_18(Mat A,Vec xx,Vec yy,Vec zz)
2605ed1c418cSBarry Smith {
2606ed1c418cSBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
2607ed1c418cSBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
2608d2840e09SBarry Smith   const PetscScalar *x,*v;
2609d2840e09SBarry Smith   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8;
2610ed1c418cSBarry Smith   PetscScalar       sum9, sum10, sum11, sum12, sum13, sum14, sum15, sum16, sum17, sum18;
2611ed1c418cSBarry Smith   PetscErrorCode    ierr;
2612d2840e09SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
2613ed1c418cSBarry Smith   PetscInt          n,i,jrow,j;
2614ed1c418cSBarry Smith 
2615ed1c418cSBarry Smith   PetscFunctionBegin;
2616ed1c418cSBarry Smith   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
26173649974fSBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
2618ed1c418cSBarry Smith   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
2619ed1c418cSBarry Smith   idx  = a->j;
2620ed1c418cSBarry Smith   v    = a->a;
2621ed1c418cSBarry Smith   ii   = a->i;
2622ed1c418cSBarry Smith 
2623ed1c418cSBarry Smith   for (i=0; i<m; i++) {
2624ed1c418cSBarry Smith     jrow = ii[i];
2625ed1c418cSBarry Smith     n    = ii[i+1] - jrow;
2626ed1c418cSBarry Smith     sum1  = 0.0;
2627ed1c418cSBarry Smith     sum2  = 0.0;
2628ed1c418cSBarry Smith     sum3  = 0.0;
2629ed1c418cSBarry Smith     sum4  = 0.0;
2630ed1c418cSBarry Smith     sum5  = 0.0;
2631ed1c418cSBarry Smith     sum6  = 0.0;
2632ed1c418cSBarry Smith     sum7  = 0.0;
2633ed1c418cSBarry Smith     sum8  = 0.0;
2634ed1c418cSBarry Smith     sum9  = 0.0;
2635ed1c418cSBarry Smith     sum10 = 0.0;
2636ed1c418cSBarry Smith     sum11 = 0.0;
2637ed1c418cSBarry Smith     sum12 = 0.0;
2638ed1c418cSBarry Smith     sum13 = 0.0;
2639ed1c418cSBarry Smith     sum14 = 0.0;
2640ed1c418cSBarry Smith     sum15 = 0.0;
2641ed1c418cSBarry Smith     sum16 = 0.0;
2642ed1c418cSBarry Smith     sum17 = 0.0;
2643ed1c418cSBarry Smith     sum18 = 0.0;
2644ed1c418cSBarry Smith     for (j=0; j<n; j++) {
2645ed1c418cSBarry Smith       sum1  += v[jrow]*x[18*idx[jrow]];
2646ed1c418cSBarry Smith       sum2  += v[jrow]*x[18*idx[jrow]+1];
2647ed1c418cSBarry Smith       sum3  += v[jrow]*x[18*idx[jrow]+2];
2648ed1c418cSBarry Smith       sum4  += v[jrow]*x[18*idx[jrow]+3];
2649ed1c418cSBarry Smith       sum5  += v[jrow]*x[18*idx[jrow]+4];
2650ed1c418cSBarry Smith       sum6  += v[jrow]*x[18*idx[jrow]+5];
2651ed1c418cSBarry Smith       sum7  += v[jrow]*x[18*idx[jrow]+6];
2652ed1c418cSBarry Smith       sum8  += v[jrow]*x[18*idx[jrow]+7];
2653ed1c418cSBarry Smith       sum9  += v[jrow]*x[18*idx[jrow]+8];
2654ed1c418cSBarry Smith       sum10 += v[jrow]*x[18*idx[jrow]+9];
2655ed1c418cSBarry Smith       sum11 += v[jrow]*x[18*idx[jrow]+10];
2656ed1c418cSBarry Smith       sum12 += v[jrow]*x[18*idx[jrow]+11];
2657ed1c418cSBarry Smith       sum13 += v[jrow]*x[18*idx[jrow]+12];
2658ed1c418cSBarry Smith       sum14 += v[jrow]*x[18*idx[jrow]+13];
2659ed1c418cSBarry Smith       sum15 += v[jrow]*x[18*idx[jrow]+14];
2660ed1c418cSBarry Smith       sum16 += v[jrow]*x[18*idx[jrow]+15];
2661ed1c418cSBarry Smith       sum17 += v[jrow]*x[18*idx[jrow]+16];
2662ed1c418cSBarry Smith       sum18 += v[jrow]*x[18*idx[jrow]+17];
2663ed1c418cSBarry Smith       jrow++;
2664ed1c418cSBarry Smith      }
2665ed1c418cSBarry Smith     y[18*i]    += sum1;
2666ed1c418cSBarry Smith     y[18*i+1]  += sum2;
2667ed1c418cSBarry Smith     y[18*i+2]  += sum3;
2668ed1c418cSBarry Smith     y[18*i+3]  += sum4;
2669ed1c418cSBarry Smith     y[18*i+4]  += sum5;
2670ed1c418cSBarry Smith     y[18*i+5]  += sum6;
2671ed1c418cSBarry Smith     y[18*i+6]  += sum7;
2672ed1c418cSBarry Smith     y[18*i+7]  += sum8;
2673ed1c418cSBarry Smith     y[18*i+8]  += sum9;
2674ed1c418cSBarry Smith     y[18*i+9]  += sum10;
2675ed1c418cSBarry Smith     y[18*i+10] += sum11;
2676ed1c418cSBarry Smith     y[18*i+11] += sum12;
2677ed1c418cSBarry Smith     y[18*i+12] += sum13;
2678ed1c418cSBarry Smith     y[18*i+13] += sum14;
2679ed1c418cSBarry Smith     y[18*i+14] += sum15;
2680ed1c418cSBarry Smith     y[18*i+15] += sum16;
2681ed1c418cSBarry Smith     y[18*i+16] += sum17;
2682ed1c418cSBarry Smith     y[18*i+17] += sum18;
2683ed1c418cSBarry Smith   }
2684ed1c418cSBarry Smith 
2685dc0b31edSSatish Balay   ierr = PetscLogFlops(36.0*a->nz);CHKERRQ(ierr);
26863649974fSBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
2687ed1c418cSBarry Smith   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
2688ed1c418cSBarry Smith   PetscFunctionReturn(0);
2689ed1c418cSBarry Smith }
2690ed1c418cSBarry Smith 
2691ed1c418cSBarry Smith #undef __FUNCT__
2692ed1c418cSBarry Smith #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_18"
2693ed1c418cSBarry Smith PetscErrorCode MatMultTransposeAdd_SeqMAIJ_18(Mat A,Vec xx,Vec yy,Vec zz)
2694ed1c418cSBarry Smith {
2695ed1c418cSBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
2696ed1c418cSBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
2697d2840e09SBarry Smith   const PetscScalar *x,*v;
2698d2840e09SBarry Smith   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8;
2699ed1c418cSBarry Smith   PetscScalar       alpha9,alpha10,alpha11,alpha12,alpha13,alpha14,alpha15,alpha16,alpha17,alpha18;
2700ed1c418cSBarry Smith   PetscErrorCode    ierr;
2701d2840e09SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx;
2702d2840e09SBarry Smith   PetscInt          n,i;
2703ed1c418cSBarry Smith 
2704ed1c418cSBarry Smith   PetscFunctionBegin;
2705ed1c418cSBarry Smith   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
27063649974fSBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
2707ed1c418cSBarry Smith   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
2708ed1c418cSBarry Smith   for (i=0; i<m; i++) {
2709ed1c418cSBarry Smith     idx    = a->j + a->i[i] ;
2710ed1c418cSBarry Smith     v      = a->a + a->i[i] ;
2711ed1c418cSBarry Smith     n      = a->i[i+1] - a->i[i];
2712ed1c418cSBarry Smith     alpha1 = x[18*i];
2713ed1c418cSBarry Smith     alpha2 = x[18*i+1];
2714ed1c418cSBarry Smith     alpha3 = x[18*i+2];
2715ed1c418cSBarry Smith     alpha4 = x[18*i+3];
2716ed1c418cSBarry Smith     alpha5 = x[18*i+4];
2717ed1c418cSBarry Smith     alpha6 = x[18*i+5];
2718ed1c418cSBarry Smith     alpha7 = x[18*i+6];
2719ed1c418cSBarry Smith     alpha8 = x[18*i+7];
2720ed1c418cSBarry Smith     alpha9  = x[18*i+8];
2721ed1c418cSBarry Smith     alpha10 = x[18*i+9];
2722ed1c418cSBarry Smith     alpha11 = x[18*i+10];
2723ed1c418cSBarry Smith     alpha12 = x[18*i+11];
2724ed1c418cSBarry Smith     alpha13 = x[18*i+12];
2725ed1c418cSBarry Smith     alpha14 = x[18*i+13];
2726ed1c418cSBarry Smith     alpha15 = x[18*i+14];
2727ed1c418cSBarry Smith     alpha16 = x[18*i+15];
2728ed1c418cSBarry Smith     alpha17 = x[18*i+16];
2729ed1c418cSBarry Smith     alpha18 = x[18*i+17];
2730ed1c418cSBarry Smith     while (n-->0) {
2731ed1c418cSBarry Smith       y[18*(*idx)]   += alpha1*(*v);
2732ed1c418cSBarry Smith       y[18*(*idx)+1] += alpha2*(*v);
2733ed1c418cSBarry Smith       y[18*(*idx)+2] += alpha3*(*v);
2734ed1c418cSBarry Smith       y[18*(*idx)+3] += alpha4*(*v);
2735ed1c418cSBarry Smith       y[18*(*idx)+4] += alpha5*(*v);
2736ed1c418cSBarry Smith       y[18*(*idx)+5] += alpha6*(*v);
2737ed1c418cSBarry Smith       y[18*(*idx)+6] += alpha7*(*v);
2738ed1c418cSBarry Smith       y[18*(*idx)+7] += alpha8*(*v);
2739ed1c418cSBarry Smith       y[18*(*idx)+8]  += alpha9*(*v);
2740ed1c418cSBarry Smith       y[18*(*idx)+9]  += alpha10*(*v);
2741ed1c418cSBarry Smith       y[18*(*idx)+10] += alpha11*(*v);
2742ed1c418cSBarry Smith       y[18*(*idx)+11] += alpha12*(*v);
2743ed1c418cSBarry Smith       y[18*(*idx)+12] += alpha13*(*v);
2744ed1c418cSBarry Smith       y[18*(*idx)+13] += alpha14*(*v);
2745ed1c418cSBarry Smith       y[18*(*idx)+14] += alpha15*(*v);
2746ed1c418cSBarry Smith       y[18*(*idx)+15] += alpha16*(*v);
2747ed1c418cSBarry Smith       y[18*(*idx)+16] += alpha17*(*v);
2748ed1c418cSBarry Smith       y[18*(*idx)+17] += alpha18*(*v);
2749ed1c418cSBarry Smith       idx++; v++;
2750ed1c418cSBarry Smith     }
2751ed1c418cSBarry Smith   }
2752dc0b31edSSatish Balay   ierr = PetscLogFlops(36.0*a->nz);CHKERRQ(ierr);
27533649974fSBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
2754ed1c418cSBarry Smith   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
2755ed1c418cSBarry Smith   PetscFunctionReturn(0);
2756ed1c418cSBarry Smith }
2757ed1c418cSBarry Smith 
27586861f193SBarry Smith #undef __FUNCT__
27596861f193SBarry Smith #define __FUNCT__ "MatMult_SeqMAIJ_N"
27606861f193SBarry Smith PetscErrorCode MatMult_SeqMAIJ_N(Mat A,Vec xx,Vec yy)
27616861f193SBarry Smith {
27626861f193SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
27636861f193SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
27646861f193SBarry Smith   const PetscScalar *x,*v;
27656861f193SBarry Smith   PetscScalar       *y,*sums;
27666861f193SBarry Smith   PetscErrorCode    ierr;
27676861f193SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
27686861f193SBarry Smith   PetscInt          n,i,jrow,j,dof = b->dof,k;
27696861f193SBarry Smith 
27706861f193SBarry Smith   PetscFunctionBegin;
27713649974fSBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
27726861f193SBarry Smith   ierr = VecSet(yy,0.0);CHKERRQ(ierr);
27736861f193SBarry Smith   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
27746861f193SBarry Smith   idx  = a->j;
27756861f193SBarry Smith   v    = a->a;
27766861f193SBarry Smith   ii   = a->i;
27776861f193SBarry Smith 
27786861f193SBarry Smith   for (i=0; i<m; i++) {
27796861f193SBarry Smith     jrow = ii[i];
27806861f193SBarry Smith     n    = ii[i+1] - jrow;
27816861f193SBarry Smith     sums = y + dof*i;
27826861f193SBarry Smith     for (j=0; j<n; j++) {
27836861f193SBarry Smith       for (k=0; k<dof; k++) {
27846861f193SBarry Smith         sums[k]  += v[jrow]*x[dof*idx[jrow]+k];
27856861f193SBarry Smith       }
27866861f193SBarry Smith       jrow++;
27876861f193SBarry Smith     }
27886861f193SBarry Smith   }
27896861f193SBarry Smith 
27906861f193SBarry Smith   ierr = PetscLogFlops(2.0*dof*a->nz);CHKERRQ(ierr);
27913649974fSBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
27926861f193SBarry Smith   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
27936861f193SBarry Smith   PetscFunctionReturn(0);
27946861f193SBarry Smith }
27956861f193SBarry Smith 
27966861f193SBarry Smith #undef __FUNCT__
27976861f193SBarry Smith #define __FUNCT__ "MatMultAdd_SeqMAIJ_N"
27986861f193SBarry Smith PetscErrorCode MatMultAdd_SeqMAIJ_N(Mat A,Vec xx,Vec yy,Vec zz)
27996861f193SBarry Smith {
28006861f193SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
28016861f193SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
28026861f193SBarry Smith   const PetscScalar *x,*v;
28036861f193SBarry Smith   PetscScalar       *y,*sums;
28046861f193SBarry Smith   PetscErrorCode    ierr;
28056861f193SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
28066861f193SBarry Smith   PetscInt          n,i,jrow,j,dof = b->dof,k;
28076861f193SBarry Smith 
28086861f193SBarry Smith   PetscFunctionBegin;
28096861f193SBarry Smith   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
28103649974fSBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
28116861f193SBarry Smith   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
28126861f193SBarry Smith   idx  = a->j;
28136861f193SBarry Smith   v    = a->a;
28146861f193SBarry Smith   ii   = a->i;
28156861f193SBarry Smith 
28166861f193SBarry Smith   for (i=0; i<m; i++) {
28176861f193SBarry Smith     jrow = ii[i];
28186861f193SBarry Smith     n    = ii[i+1] - jrow;
28196861f193SBarry Smith     sums = y + dof*i;
28206861f193SBarry Smith     for (j=0; j<n; j++) {
28216861f193SBarry Smith       for (k=0; k<dof; k++) {
28226861f193SBarry Smith         sums[k]  += v[jrow]*x[dof*idx[jrow]+k];
28236861f193SBarry Smith       }
28246861f193SBarry Smith       jrow++;
28256861f193SBarry Smith     }
28266861f193SBarry Smith   }
28276861f193SBarry Smith 
28286861f193SBarry Smith   ierr = PetscLogFlops(2.0*dof*a->nz);CHKERRQ(ierr);
28293649974fSBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
28306861f193SBarry Smith   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
28316861f193SBarry Smith   PetscFunctionReturn(0);
28326861f193SBarry Smith }
28336861f193SBarry Smith 
28346861f193SBarry Smith #undef __FUNCT__
28356861f193SBarry Smith #define __FUNCT__ "MatMultTranspose_SeqMAIJ_N"
28366861f193SBarry Smith PetscErrorCode MatMultTranspose_SeqMAIJ_N(Mat A,Vec xx,Vec yy)
28376861f193SBarry Smith {
28386861f193SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
28396861f193SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
28406861f193SBarry Smith   const PetscScalar *x,*v,*alpha;
28416861f193SBarry Smith   PetscScalar       *y;
28426861f193SBarry Smith   PetscErrorCode    ierr;
28436861f193SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx,dof = b->dof;
28446861f193SBarry Smith   PetscInt          n,i,k;
28456861f193SBarry Smith 
28466861f193SBarry Smith   PetscFunctionBegin;
28473649974fSBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
28486861f193SBarry Smith   ierr = VecSet(yy,0.0);CHKERRQ(ierr);
28496861f193SBarry Smith   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
28506861f193SBarry Smith   for (i=0; i<m; i++) {
28516861f193SBarry Smith     idx    = a->j + a->i[i] ;
28526861f193SBarry Smith     v      = a->a + a->i[i] ;
28536861f193SBarry Smith     n      = a->i[i+1] - a->i[i];
28546861f193SBarry Smith     alpha  = x + dof*i;
28556861f193SBarry Smith     while (n-->0) {
28566861f193SBarry Smith       for (k=0; k<dof; k++) {
28576861f193SBarry Smith         y[dof*(*idx)+k] += alpha[k]*(*v);
28586861f193SBarry Smith       }
285983ce7366SBarry Smith       idx++; v++;
28606861f193SBarry Smith     }
28616861f193SBarry Smith   }
28626861f193SBarry Smith   ierr = PetscLogFlops(2.0*dof*a->nz);CHKERRQ(ierr);
28633649974fSBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
28646861f193SBarry Smith   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
28656861f193SBarry Smith   PetscFunctionReturn(0);
28666861f193SBarry Smith }
28676861f193SBarry Smith 
28686861f193SBarry Smith #undef __FUNCT__
28696861f193SBarry Smith #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_N"
28706861f193SBarry Smith PetscErrorCode MatMultTransposeAdd_SeqMAIJ_N(Mat A,Vec xx,Vec yy,Vec zz)
28716861f193SBarry Smith {
28726861f193SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
28736861f193SBarry Smith   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
28746861f193SBarry Smith   const PetscScalar *x,*v,*alpha;
28756861f193SBarry Smith   PetscScalar       *y;
28766861f193SBarry Smith   PetscErrorCode    ierr;
28776861f193SBarry Smith   const PetscInt    m = b->AIJ->rmap->n,*idx,dof = b->dof;
28786861f193SBarry Smith   PetscInt          n,i,k;
28796861f193SBarry Smith 
28806861f193SBarry Smith   PetscFunctionBegin;
28816861f193SBarry Smith   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
28823649974fSBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
28836861f193SBarry Smith   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
28846861f193SBarry Smith   for (i=0; i<m; i++) {
28856861f193SBarry Smith     idx    = a->j + a->i[i] ;
28866861f193SBarry Smith     v      = a->a + a->i[i] ;
28876861f193SBarry Smith     n      = a->i[i+1] - a->i[i];
28886861f193SBarry Smith     alpha  = x + dof*i;
28896861f193SBarry Smith     while (n-->0) {
28906861f193SBarry Smith       for (k=0; k<dof; k++) {
28916861f193SBarry Smith         y[dof*(*idx)+k] += alpha[k]*(*v);
28926861f193SBarry Smith       }
289383ce7366SBarry Smith       idx++; v++;
28946861f193SBarry Smith     }
28956861f193SBarry Smith   }
28966861f193SBarry Smith   ierr = PetscLogFlops(2.0*dof*a->nz);CHKERRQ(ierr);
28973649974fSBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
28986861f193SBarry Smith   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
28996861f193SBarry Smith   PetscFunctionReturn(0);
29006861f193SBarry Smith }
29016861f193SBarry Smith 
2902f4a53059SBarry Smith /*===================================================================================*/
29034a2ae208SSatish Balay #undef __FUNCT__
29044a2ae208SSatish Balay #define __FUNCT__ "MatMult_MPIMAIJ_dof"
2905dfbe8321SBarry Smith PetscErrorCode MatMult_MPIMAIJ_dof(Mat A,Vec xx,Vec yy)
2906f4a53059SBarry Smith {
29074cb9d9b8SBarry Smith   Mat_MPIMAIJ    *b = (Mat_MPIMAIJ*)A->data;
2908dfbe8321SBarry Smith   PetscErrorCode ierr;
2909f4a53059SBarry Smith 
2910b24ad042SBarry Smith   PetscFunctionBegin;
2911f4a53059SBarry Smith   /* start the scatter */
2912ca9f406cSSatish Balay   ierr = VecScatterBegin(b->ctx,xx,b->w,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
29134cb9d9b8SBarry Smith   ierr = (*b->AIJ->ops->mult)(b->AIJ,xx,yy);CHKERRQ(ierr);
2914ca9f406cSSatish Balay   ierr = VecScatterEnd(b->ctx,xx,b->w,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
29154cb9d9b8SBarry Smith   ierr = (*b->OAIJ->ops->multadd)(b->OAIJ,b->w,yy,yy);CHKERRQ(ierr);
2916f4a53059SBarry Smith   PetscFunctionReturn(0);
2917f4a53059SBarry Smith }
2918f4a53059SBarry Smith 
29194a2ae208SSatish Balay #undef __FUNCT__
29204a2ae208SSatish Balay #define __FUNCT__ "MatMultTranspose_MPIMAIJ_dof"
2921dfbe8321SBarry Smith PetscErrorCode MatMultTranspose_MPIMAIJ_dof(Mat A,Vec xx,Vec yy)
29224cb9d9b8SBarry Smith {
29234cb9d9b8SBarry Smith   Mat_MPIMAIJ    *b = (Mat_MPIMAIJ*)A->data;
2924dfbe8321SBarry Smith   PetscErrorCode ierr;
2925b24ad042SBarry Smith 
29264cb9d9b8SBarry Smith   PetscFunctionBegin;
29274cb9d9b8SBarry Smith   ierr = (*b->OAIJ->ops->multtranspose)(b->OAIJ,xx,b->w);CHKERRQ(ierr);
29284cb9d9b8SBarry Smith   ierr = (*b->AIJ->ops->multtranspose)(b->AIJ,xx,yy);CHKERRQ(ierr);
2929ca9f406cSSatish Balay   ierr = VecScatterBegin(b->ctx,b->w,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2930ca9f406cSSatish Balay   ierr = VecScatterEnd(b->ctx,b->w,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
29314cb9d9b8SBarry Smith   PetscFunctionReturn(0);
29324cb9d9b8SBarry Smith }
29334cb9d9b8SBarry Smith 
29344a2ae208SSatish Balay #undef __FUNCT__
29354a2ae208SSatish Balay #define __FUNCT__ "MatMultAdd_MPIMAIJ_dof"
2936dfbe8321SBarry Smith PetscErrorCode MatMultAdd_MPIMAIJ_dof(Mat A,Vec xx,Vec yy,Vec zz)
29374cb9d9b8SBarry Smith {
29384cb9d9b8SBarry Smith   Mat_MPIMAIJ    *b = (Mat_MPIMAIJ*)A->data;
2939dfbe8321SBarry Smith   PetscErrorCode ierr;
29404cb9d9b8SBarry Smith 
2941b24ad042SBarry Smith   PetscFunctionBegin;
29424cb9d9b8SBarry Smith   /* start the scatter */
2943ca9f406cSSatish Balay   ierr = VecScatterBegin(b->ctx,xx,b->w,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2944d72c5c08SBarry Smith   ierr = (*b->AIJ->ops->multadd)(b->AIJ,xx,yy,zz);CHKERRQ(ierr);
2945ca9f406cSSatish Balay   ierr = VecScatterEnd(b->ctx,xx,b->w,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2946717f2ec8SHong Zhang   ierr = (*b->OAIJ->ops->multadd)(b->OAIJ,b->w,zz,zz);CHKERRQ(ierr);
29474cb9d9b8SBarry Smith   PetscFunctionReturn(0);
29484cb9d9b8SBarry Smith }
29494cb9d9b8SBarry Smith 
29504a2ae208SSatish Balay #undef __FUNCT__
29514a2ae208SSatish Balay #define __FUNCT__ "MatMultTransposeAdd_MPIMAIJ_dof"
2952dfbe8321SBarry Smith PetscErrorCode MatMultTransposeAdd_MPIMAIJ_dof(Mat A,Vec xx,Vec yy,Vec zz)
29534cb9d9b8SBarry Smith {
29544cb9d9b8SBarry Smith   Mat_MPIMAIJ    *b = (Mat_MPIMAIJ*)A->data;
2955dfbe8321SBarry Smith   PetscErrorCode ierr;
2956b24ad042SBarry Smith 
29574cb9d9b8SBarry Smith   PetscFunctionBegin;
29584cb9d9b8SBarry Smith   ierr = (*b->OAIJ->ops->multtranspose)(b->OAIJ,xx,b->w);CHKERRQ(ierr);
2959ca9f406cSSatish Balay   ierr = VecScatterBegin(b->ctx,b->w,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2960d72c5c08SBarry Smith   ierr = (*b->AIJ->ops->multtransposeadd)(b->AIJ,xx,yy,zz);CHKERRQ(ierr);
2961ca9f406cSSatish Balay   ierr = VecScatterEnd(b->ctx,b->w,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
29624cb9d9b8SBarry Smith   PetscFunctionReturn(0);
29634cb9d9b8SBarry Smith }
29644cb9d9b8SBarry Smith 
296595ddefa2SHong Zhang /* ----------------------------------------------------------------*/
29669c4fc4c7SBarry Smith #undef __FUNCT__
29677ba1a0bfSKris Buschelman #define __FUNCT__ "MatPtAPSymbolic_SeqAIJ_SeqMAIJ"
29687ba1a0bfSKris Buschelman PetscErrorCode MatPtAPSymbolic_SeqAIJ_SeqMAIJ(Mat A,Mat PP,PetscReal fill,Mat *C)
29697ba1a0bfSKris Buschelman {
29707ba1a0bfSKris Buschelman   /* This routine requires testing -- but it's getting better. */
29717ba1a0bfSKris Buschelman   PetscErrorCode     ierr;
2972a1a86e44SBarry Smith   PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL;
29737ba1a0bfSKris Buschelman   Mat_SeqMAIJ        *pp=(Mat_SeqMAIJ*)PP->data;
29747ba1a0bfSKris Buschelman   Mat                P=pp->AIJ;
29757ba1a0bfSKris Buschelman   Mat_SeqAIJ         *a=(Mat_SeqAIJ*)A->data,*p=(Mat_SeqAIJ*)P->data,*c;
2976d2840e09SBarry Smith   PetscInt           *pti,*ptj,*ptJ;
29777ba1a0bfSKris Buschelman   PetscInt           *ci,*cj,*ptadenserow,*ptasparserow,*denserow,*sparserow,*ptaj;
2978d2840e09SBarry Smith   const PetscInt     an=A->cmap->N,am=A->rmap->N,pn=P->cmap->N,pm=P->rmap->N,ppdof=pp->dof;
2979d2840e09SBarry Smith   PetscInt           i,j,k,dof,pshift,ptnzi,arow,anzj,ptanzi,prow,pnzj,cnzi,cn;
29807ba1a0bfSKris Buschelman   MatScalar          *ca;
2981d2840e09SBarry Smith   const PetscInt     *pi = p->i,*pj = p->j,*pjj,*ai=a->i,*aj=a->j,*ajj;
29827ba1a0bfSKris Buschelman 
29837ba1a0bfSKris Buschelman   PetscFunctionBegin;
29847ba1a0bfSKris Buschelman   /* Start timer */
29857ba1a0bfSKris Buschelman   ierr = PetscLogEventBegin(MAT_PtAPSymbolic,A,PP,0,0);CHKERRQ(ierr);
29867ba1a0bfSKris Buschelman 
29877ba1a0bfSKris Buschelman   /* Get ij structure of P^T */
29887ba1a0bfSKris Buschelman   ierr = MatGetSymbolicTranspose_SeqAIJ(P,&pti,&ptj);CHKERRQ(ierr);
29897ba1a0bfSKris Buschelman 
29907ba1a0bfSKris Buschelman   cn = pn*ppdof;
29917ba1a0bfSKris Buschelman   /* Allocate ci array, arrays for fill computation and */
29927ba1a0bfSKris Buschelman   /* free space for accumulating nonzero column info */
29937ba1a0bfSKris Buschelman   ierr = PetscMalloc((cn+1)*sizeof(PetscInt),&ci);CHKERRQ(ierr);
29947ba1a0bfSKris Buschelman   ci[0] = 0;
29957ba1a0bfSKris Buschelman 
29967ba1a0bfSKris Buschelman   /* Work arrays for rows of P^T*A */
299774ed9c26SBarry Smith   ierr = PetscMalloc4(an,PetscInt,&ptadenserow,an,PetscInt,&ptasparserow,cn,PetscInt,&denserow,cn,PetscInt,&sparserow);CHKERRQ(ierr);
2998c43dabc9SBarry Smith   ierr = PetscMemzero(ptadenserow,an*sizeof(PetscInt));CHKERRQ(ierr);
2999c43dabc9SBarry Smith   ierr = PetscMemzero(denserow,cn*sizeof(PetscInt));CHKERRQ(ierr);
30007ba1a0bfSKris Buschelman 
30017ba1a0bfSKris Buschelman   /* Set initial free space to be nnz(A) scaled by aspect ratio of P. */
30027ba1a0bfSKris Buschelman   /* This should be reasonable if sparsity of PtAP is similar to that of A. */
30037ba1a0bfSKris Buschelman   /* Note, aspect ratio of P is the same as the aspect ratio of SeqAIJ inside P */
3004a1a86e44SBarry Smith   ierr          = PetscFreeSpaceGet((ai[am]/pm)*pn,&free_space);
30057ba1a0bfSKris Buschelman   current_space = free_space;
30067ba1a0bfSKris Buschelman 
30077ba1a0bfSKris Buschelman   /* Determine symbolic info for each row of C: */
30087ba1a0bfSKris Buschelman   for (i=0;i<pn;i++) {
30097ba1a0bfSKris Buschelman     ptnzi  = pti[i+1] - pti[i];
30107ba1a0bfSKris Buschelman     ptJ    = ptj + pti[i];
30117ba1a0bfSKris Buschelman     for (dof=0;dof<ppdof;dof++) {
30127ba1a0bfSKris Buschelman       ptanzi = 0;
30137ba1a0bfSKris Buschelman       /* Determine symbolic row of PtA: */
30147ba1a0bfSKris Buschelman       for (j=0;j<ptnzi;j++) {
30157ba1a0bfSKris Buschelman         /* Expand ptJ[j] by block size and shift by dof to get the right row of A */
30167ba1a0bfSKris Buschelman         arow = ptJ[j]*ppdof + dof;
30177ba1a0bfSKris Buschelman         /* Nonzeros of P^T*A will be in same locations as any element of A in that row */
30187ba1a0bfSKris Buschelman         anzj = ai[arow+1] - ai[arow];
30197ba1a0bfSKris Buschelman         ajj  = aj + ai[arow];
30207ba1a0bfSKris Buschelman         for (k=0;k<anzj;k++) {
30217ba1a0bfSKris Buschelman           if (!ptadenserow[ajj[k]]) {
30227ba1a0bfSKris Buschelman             ptadenserow[ajj[k]]    = -1;
30237ba1a0bfSKris Buschelman             ptasparserow[ptanzi++] = ajj[k];
30247ba1a0bfSKris Buschelman           }
30257ba1a0bfSKris Buschelman         }
30267ba1a0bfSKris Buschelman       }
30277ba1a0bfSKris Buschelman       /* Using symbolic info for row of PtA, determine symbolic info for row of C: */
30287ba1a0bfSKris Buschelman       ptaj = ptasparserow;
30297ba1a0bfSKris Buschelman       cnzi   = 0;
30307ba1a0bfSKris Buschelman       for (j=0;j<ptanzi;j++) {
30317ba1a0bfSKris Buschelman         /* Get offset within block of P */
30327ba1a0bfSKris Buschelman         pshift = *ptaj%ppdof;
30337ba1a0bfSKris Buschelman         /* Get block row of P */
30347ba1a0bfSKris Buschelman         prow = (*ptaj++)/ppdof; /* integer division */
30357ba1a0bfSKris Buschelman         /* P has same number of nonzeros per row as the compressed form */
30367ba1a0bfSKris Buschelman         pnzj = pi[prow+1] - pi[prow];
30377ba1a0bfSKris Buschelman         pjj  = pj + pi[prow];
30387ba1a0bfSKris Buschelman         for (k=0;k<pnzj;k++) {
30397ba1a0bfSKris Buschelman           /* Locations in C are shifted by the offset within the block */
30407ba1a0bfSKris Buschelman           /* Note: we cannot use PetscLLAdd here because of the additional offset for the write location */
30417ba1a0bfSKris Buschelman           if (!denserow[pjj[k]*ppdof+pshift]) {
30427ba1a0bfSKris Buschelman             denserow[pjj[k]*ppdof+pshift] = -1;
30437ba1a0bfSKris Buschelman             sparserow[cnzi++]             = pjj[k]*ppdof+pshift;
30447ba1a0bfSKris Buschelman           }
30457ba1a0bfSKris Buschelman         }
30467ba1a0bfSKris Buschelman       }
30477ba1a0bfSKris Buschelman 
30487ba1a0bfSKris Buschelman       /* sort sparserow */
30497ba1a0bfSKris Buschelman       ierr = PetscSortInt(cnzi,sparserow);CHKERRQ(ierr);
30507ba1a0bfSKris Buschelman 
30517ba1a0bfSKris Buschelman       /* If free space is not available, make more free space */
30527ba1a0bfSKris Buschelman       /* Double the amount of total space in the list */
30537ba1a0bfSKris Buschelman       if (current_space->local_remaining<cnzi) {
30544238b7adSHong Zhang         ierr = PetscFreeSpaceGet(cnzi+current_space->total_array_size,&current_space);CHKERRQ(ierr);
30557ba1a0bfSKris Buschelman       }
30567ba1a0bfSKris Buschelman 
30577ba1a0bfSKris Buschelman       /* Copy data into free space, and zero out denserows */
30587ba1a0bfSKris Buschelman       ierr = PetscMemcpy(current_space->array,sparserow,cnzi*sizeof(PetscInt));CHKERRQ(ierr);
30597ba1a0bfSKris Buschelman       current_space->array           += cnzi;
30607ba1a0bfSKris Buschelman       current_space->local_used      += cnzi;
30617ba1a0bfSKris Buschelman       current_space->local_remaining -= cnzi;
30627ba1a0bfSKris Buschelman 
30637ba1a0bfSKris Buschelman       for (j=0;j<ptanzi;j++) {
30647ba1a0bfSKris Buschelman         ptadenserow[ptasparserow[j]] = 0;
30657ba1a0bfSKris Buschelman       }
30667ba1a0bfSKris Buschelman       for (j=0;j<cnzi;j++) {
30677ba1a0bfSKris Buschelman         denserow[sparserow[j]] = 0;
30687ba1a0bfSKris Buschelman       }
30697ba1a0bfSKris Buschelman       /* Aside: Perhaps we should save the pta info for the numerical factorization. */
30707ba1a0bfSKris Buschelman       /*        For now, we will recompute what is needed. */
30717ba1a0bfSKris Buschelman       ci[i*ppdof+1+dof] = ci[i*ppdof+dof] + cnzi;
30727ba1a0bfSKris Buschelman     }
30737ba1a0bfSKris Buschelman   }
30747ba1a0bfSKris Buschelman   /* nnz is now stored in ci[ptm], column indices are in the list of free space */
30757ba1a0bfSKris Buschelman   /* Allocate space for cj, initialize cj, and */
30767ba1a0bfSKris Buschelman   /* destroy list of free space and other temporary array(s) */
30777ba1a0bfSKris Buschelman   ierr = PetscMalloc((ci[cn]+1)*sizeof(PetscInt),&cj);CHKERRQ(ierr);
3078a1a86e44SBarry Smith   ierr = PetscFreeSpaceContiguous(&free_space,cj);CHKERRQ(ierr);
307974ed9c26SBarry Smith   ierr = PetscFree4(ptadenserow,ptasparserow,denserow,sparserow);CHKERRQ(ierr);
30807ba1a0bfSKris Buschelman 
30817ba1a0bfSKris Buschelman   /* Allocate space for ca */
30827ba1a0bfSKris Buschelman   ierr = PetscMalloc((ci[cn]+1)*sizeof(MatScalar),&ca);CHKERRQ(ierr);
30837ba1a0bfSKris Buschelman   ierr = PetscMemzero(ca,(ci[cn]+1)*sizeof(MatScalar));CHKERRQ(ierr);
30847ba1a0bfSKris Buschelman 
30857ba1a0bfSKris Buschelman   /* put together the new matrix */
30867adad957SLisandro Dalcin   ierr = MatCreateSeqAIJWithArrays(((PetscObject)A)->comm,cn,cn,ci,cj,ca,C);CHKERRQ(ierr);
30877ba1a0bfSKris Buschelman 
30887ba1a0bfSKris Buschelman   /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
30897ba1a0bfSKris Buschelman   /* Since these are PETSc arrays, change flags to free them as necessary. */
30907ba1a0bfSKris Buschelman   c          = (Mat_SeqAIJ *)((*C)->data);
3091e6b907acSBarry Smith   c->free_a  = PETSC_TRUE;
3092e6b907acSBarry Smith   c->free_ij = PETSC_TRUE;
30937ba1a0bfSKris Buschelman   c->nonew   = 0;
30947ba1a0bfSKris Buschelman 
30957ba1a0bfSKris Buschelman   /* Clean up. */
30967ba1a0bfSKris Buschelman   ierr = MatRestoreSymbolicTranspose_SeqAIJ(P,&pti,&ptj);CHKERRQ(ierr);
30977ba1a0bfSKris Buschelman 
30987ba1a0bfSKris Buschelman   ierr = PetscLogEventEnd(MAT_PtAPSymbolic,A,PP,0,0);CHKERRQ(ierr);
30997ba1a0bfSKris Buschelman   PetscFunctionReturn(0);
31007ba1a0bfSKris Buschelman }
31017ba1a0bfSKris Buschelman 
31027ba1a0bfSKris Buschelman #undef __FUNCT__
31037ba1a0bfSKris Buschelman #define __FUNCT__ "MatPtAPNumeric_SeqAIJ_SeqMAIJ"
31047ba1a0bfSKris Buschelman PetscErrorCode MatPtAPNumeric_SeqAIJ_SeqMAIJ(Mat A,Mat PP,Mat C)
31057ba1a0bfSKris Buschelman {
31067ba1a0bfSKris Buschelman   /* This routine requires testing -- first draft only */
31077ba1a0bfSKris Buschelman   PetscErrorCode  ierr;
31087ba1a0bfSKris Buschelman   Mat_SeqMAIJ     *pp=(Mat_SeqMAIJ*)PP->data;
31097ba1a0bfSKris Buschelman   Mat             P=pp->AIJ;
31107ba1a0bfSKris Buschelman   Mat_SeqAIJ      *a  = (Mat_SeqAIJ *) A->data;
31117ba1a0bfSKris Buschelman   Mat_SeqAIJ      *p  = (Mat_SeqAIJ *) P->data;
31127ba1a0bfSKris Buschelman   Mat_SeqAIJ      *c  = (Mat_SeqAIJ *) C->data;
3113d2840e09SBarry Smith   const PetscInt  *ai=a->i,*aj=a->j,*pi=p->i,*pj=p->j,*pJ=p->j,*pjj;
3114d2840e09SBarry Smith   const PetscInt  *ci=c->i,*cj=c->j,*cjj;
3115d2840e09SBarry Smith   const PetscInt  am=A->rmap->N,cn=C->cmap->N,cm=C->rmap->N,ppdof=pp->dof;
3116d2840e09SBarry Smith   PetscInt        i,j,k,pshift,poffset,anzi,pnzi,apnzj,nextap,pnzj,prow,crow,*apj,*apjdense;
3117d2840e09SBarry Smith   const MatScalar *aa=a->a,*pa=p->a,*pA=p->a,*paj;
3118d2840e09SBarry Smith   MatScalar       *ca=c->a,*caj,*apa;
31197ba1a0bfSKris Buschelman 
31207ba1a0bfSKris Buschelman   PetscFunctionBegin;
31217ba1a0bfSKris Buschelman   /* Allocate temporary array for storage of one row of A*P */
312274ed9c26SBarry Smith   ierr = PetscMalloc3(cn,MatScalar,&apa,cn,PetscInt,&apj,cn,PetscInt,&apjdense);CHKERRQ(ierr);
312374ed9c26SBarry Smith   ierr = PetscMemzero(apa,cn*sizeof(MatScalar));CHKERRQ(ierr);
312474ed9c26SBarry Smith   ierr = PetscMemzero(apj,cn*sizeof(PetscInt));CHKERRQ(ierr);
312574ed9c26SBarry Smith   ierr = PetscMemzero(apjdense,cn*sizeof(PetscInt));CHKERRQ(ierr);
31267ba1a0bfSKris Buschelman 
31277ba1a0bfSKris Buschelman   /* Clear old values in C */
31287ba1a0bfSKris Buschelman   ierr = PetscMemzero(ca,ci[cm]*sizeof(MatScalar));CHKERRQ(ierr);
31297ba1a0bfSKris Buschelman 
31307ba1a0bfSKris Buschelman   for (i=0;i<am;i++) {
31317ba1a0bfSKris Buschelman     /* Form sparse row of A*P */
31327ba1a0bfSKris Buschelman     anzi  = ai[i+1] - ai[i];
31337ba1a0bfSKris Buschelman     apnzj = 0;
31347ba1a0bfSKris Buschelman     for (j=0;j<anzi;j++) {
31357ba1a0bfSKris Buschelman       /* Get offset within block of P */
31367ba1a0bfSKris Buschelman       pshift = *aj%ppdof;
31377ba1a0bfSKris Buschelman       /* Get block row of P */
31387ba1a0bfSKris Buschelman       prow   = *aj++/ppdof; /* integer division */
31397ba1a0bfSKris Buschelman       pnzj = pi[prow+1] - pi[prow];
31407ba1a0bfSKris Buschelman       pjj  = pj + pi[prow];
31417ba1a0bfSKris Buschelman       paj  = pa + pi[prow];
31427ba1a0bfSKris Buschelman       for (k=0;k<pnzj;k++) {
31437ba1a0bfSKris Buschelman         poffset = pjj[k]*ppdof+pshift;
31447ba1a0bfSKris Buschelman         if (!apjdense[poffset]) {
31457ba1a0bfSKris Buschelman           apjdense[poffset] = -1;
31467ba1a0bfSKris Buschelman           apj[apnzj++]      = poffset;
31477ba1a0bfSKris Buschelman         }
31487ba1a0bfSKris Buschelman         apa[poffset] += (*aa)*paj[k];
31497ba1a0bfSKris Buschelman       }
3150dc0b31edSSatish Balay       ierr = PetscLogFlops(2.0*pnzj);CHKERRQ(ierr);
31517ba1a0bfSKris Buschelman       aa++;
31527ba1a0bfSKris Buschelman     }
31537ba1a0bfSKris Buschelman 
31547ba1a0bfSKris Buschelman     /* Sort the j index array for quick sparse axpy. */
31557ba1a0bfSKris Buschelman     /* Note: a array does not need sorting as it is in dense storage locations. */
31567ba1a0bfSKris Buschelman     ierr = PetscSortInt(apnzj,apj);CHKERRQ(ierr);
31577ba1a0bfSKris Buschelman 
31587ba1a0bfSKris Buschelman     /* Compute P^T*A*P using outer product (P^T)[:,j]*(A*P)[j,:]. */
31597ba1a0bfSKris Buschelman     prow    = i/ppdof; /* integer division */
31607ba1a0bfSKris Buschelman     pshift  = i%ppdof;
31617ba1a0bfSKris Buschelman     poffset = pi[prow];
31627ba1a0bfSKris Buschelman     pnzi = pi[prow+1] - poffset;
31637ba1a0bfSKris Buschelman     /* Reset pJ and pA so we can traverse the same row of P 'dof' times. */
31647ba1a0bfSKris Buschelman     pJ   = pj+poffset;
31657ba1a0bfSKris Buschelman     pA   = pa+poffset;
31667ba1a0bfSKris Buschelman     for (j=0;j<pnzi;j++) {
31677ba1a0bfSKris Buschelman       crow   = (*pJ)*ppdof+pshift;
31687ba1a0bfSKris Buschelman       cjj    = cj + ci[crow];
31697ba1a0bfSKris Buschelman       caj    = ca + ci[crow];
31707ba1a0bfSKris Buschelman       pJ++;
31717ba1a0bfSKris Buschelman       /* Perform sparse axpy operation.  Note cjj includes apj. */
31727ba1a0bfSKris Buschelman       for (k=0,nextap=0;nextap<apnzj;k++) {
31737ba1a0bfSKris Buschelman         if (cjj[k]==apj[nextap]) {
31747ba1a0bfSKris Buschelman           caj[k] += (*pA)*apa[apj[nextap++]];
31757ba1a0bfSKris Buschelman         }
31767ba1a0bfSKris Buschelman       }
3177dc0b31edSSatish Balay       ierr = PetscLogFlops(2.0*apnzj);CHKERRQ(ierr);
31787ba1a0bfSKris Buschelman       pA++;
31797ba1a0bfSKris Buschelman     }
31807ba1a0bfSKris Buschelman 
31817ba1a0bfSKris Buschelman     /* Zero the current row info for A*P */
31827ba1a0bfSKris Buschelman     for (j=0;j<apnzj;j++) {
31837ba1a0bfSKris Buschelman       apa[apj[j]]      = 0.;
31847ba1a0bfSKris Buschelman       apjdense[apj[j]] = 0;
31857ba1a0bfSKris Buschelman     }
31867ba1a0bfSKris Buschelman   }
31877ba1a0bfSKris Buschelman 
31887ba1a0bfSKris Buschelman   /* Assemble the final matrix and clean up */
31897ba1a0bfSKris Buschelman   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
31907ba1a0bfSKris Buschelman   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
319174ed9c26SBarry Smith   ierr = PetscFree3(apa,apj,apjdense);CHKERRQ(ierr);
319295ddefa2SHong Zhang   PetscFunctionReturn(0);
319395ddefa2SHong Zhang }
31947ba1a0bfSKris Buschelman 
319595ddefa2SHong Zhang #undef __FUNCT__
319695ddefa2SHong Zhang #define __FUNCT__ "MatPtAPSymbolic_MPIAIJ_MPIMAIJ"
319795ddefa2SHong Zhang PetscErrorCode MatPtAPSymbolic_MPIAIJ_MPIMAIJ(Mat A,Mat PP,PetscReal fill,Mat *C)
319895ddefa2SHong Zhang {
319995ddefa2SHong Zhang   PetscErrorCode    ierr;
320095ddefa2SHong Zhang 
320195ddefa2SHong Zhang   PetscFunctionBegin;
320295ddefa2SHong Zhang   /* MatPtAPSymbolic_MPIAIJ_MPIMAIJ() is not implemented yet. Convert PP to mpiaij format */
320395ddefa2SHong Zhang   ierr = MatConvert(PP,MATMPIAIJ,MAT_REUSE_MATRIX,&PP);CHKERRQ(ierr);
320495ddefa2SHong Zhang   ierr =(*PP->ops->ptapsymbolic)(A,PP,fill,C);CHKERRQ(ierr);
320595ddefa2SHong Zhang   PetscFunctionReturn(0);
320695ddefa2SHong Zhang }
320795ddefa2SHong Zhang 
320895ddefa2SHong Zhang #undef __FUNCT__
320995ddefa2SHong Zhang #define __FUNCT__ "MatPtAPNumeric_MPIAIJ_MPIMAIJ"
321095ddefa2SHong Zhang PetscErrorCode MatPtAPNumeric_MPIAIJ_MPIMAIJ(Mat A,Mat PP,Mat C)
321195ddefa2SHong Zhang {
321295ddefa2SHong Zhang   PetscFunctionBegin;
3213e32f2f54SBarry Smith   SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatPtAPNumeric is not implemented for MPIMAIJ matrix yet");
32147ba1a0bfSKris Buschelman   PetscFunctionReturn(0);
32157ba1a0bfSKris Buschelman }
32167ba1a0bfSKris Buschelman 
3217be1d678aSKris Buschelman EXTERN_C_BEGIN
32187ba1a0bfSKris Buschelman #undef __FUNCT__
32190fd73130SBarry Smith #define __FUNCT__ "MatConvert_SeqMAIJ_SeqAIJ"
32207087cfbeSBarry Smith PetscErrorCode  MatConvert_SeqMAIJ_SeqAIJ(Mat A, MatType newtype,MatReuse reuse,Mat *newmat)
32219c4fc4c7SBarry Smith {
32229c4fc4c7SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
3223ceb03754SKris Buschelman   Mat               a = b->AIJ,B;
32249c4fc4c7SBarry Smith   Mat_SeqAIJ        *aij = (Mat_SeqAIJ*)a->data;
32259c4fc4c7SBarry Smith   PetscErrorCode    ierr;
32260fd73130SBarry Smith   PetscInt          m,n,i,ncols,*ilen,nmax = 0,*icols,j,k,ii,dof = b->dof;
3227ba8c8a56SBarry Smith   PetscInt          *cols;
3228ba8c8a56SBarry Smith   PetscScalar       *vals;
32299c4fc4c7SBarry Smith 
32309c4fc4c7SBarry Smith   PetscFunctionBegin;
32319c4fc4c7SBarry Smith   ierr = MatGetSize(a,&m,&n);CHKERRQ(ierr);
32327c307921SBarry Smith   ierr = PetscMalloc(dof*m*sizeof(PetscInt),&ilen);CHKERRQ(ierr);
32339c4fc4c7SBarry Smith   for (i=0; i<m; i++) {
32349c4fc4c7SBarry Smith     nmax = PetscMax(nmax,aij->ilen[i]);
32350fd73130SBarry Smith     for (j=0; j<dof; j++) {
32360fd73130SBarry Smith       ilen[dof*i+j] = aij->ilen[i];
32379c4fc4c7SBarry Smith     }
32389c4fc4c7SBarry Smith   }
3239ceb03754SKris Buschelman   ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,dof*m,dof*n,0,ilen,&B);CHKERRQ(ierr);
32409c4fc4c7SBarry Smith   ierr = PetscFree(ilen);CHKERRQ(ierr);
32419c4fc4c7SBarry Smith   ierr = PetscMalloc(nmax*sizeof(PetscInt),&icols);CHKERRQ(ierr);
32429c4fc4c7SBarry Smith   ii   = 0;
32439c4fc4c7SBarry Smith   for (i=0; i<m; i++) {
3244ba8c8a56SBarry Smith     ierr = MatGetRow_SeqAIJ(a,i,&ncols,&cols,&vals);CHKERRQ(ierr);
32450fd73130SBarry Smith     for (j=0; j<dof; j++) {
32469c4fc4c7SBarry Smith       for (k=0; k<ncols; k++) {
32470fd73130SBarry Smith         icols[k] = dof*cols[k]+j;
32489c4fc4c7SBarry Smith       }
3249ceb03754SKris Buschelman       ierr = MatSetValues_SeqAIJ(B,1,&ii,ncols,icols,vals,INSERT_VALUES);CHKERRQ(ierr);
32509c4fc4c7SBarry Smith       ii++;
32519c4fc4c7SBarry Smith     }
3252ba8c8a56SBarry Smith     ierr = MatRestoreRow_SeqAIJ(a,i,&ncols,&cols,&vals);CHKERRQ(ierr);
32539c4fc4c7SBarry Smith   }
32549c4fc4c7SBarry Smith   ierr = PetscFree(icols);CHKERRQ(ierr);
3255ceb03754SKris Buschelman   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3256ceb03754SKris Buschelman   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3257ceb03754SKris Buschelman 
3258ceb03754SKris Buschelman   if (reuse == MAT_REUSE_MATRIX) {
32598ab5b326SKris Buschelman     ierr = MatHeaderReplace(A,B);CHKERRQ(ierr);
3260c3d102feSKris Buschelman   } else {
3261ceb03754SKris Buschelman     *newmat = B;
3262c3d102feSKris Buschelman   }
32639c4fc4c7SBarry Smith   PetscFunctionReturn(0);
32649c4fc4c7SBarry Smith }
3265be1d678aSKris Buschelman EXTERN_C_END
32669c4fc4c7SBarry Smith 
3267c6db04a5SJed Brown #include <../src/mat/impls/aij/mpi/mpiaij.h>
3268be1d678aSKris Buschelman 
3269be1d678aSKris Buschelman EXTERN_C_BEGIN
32700fd73130SBarry Smith #undef __FUNCT__
32710fd73130SBarry Smith #define __FUNCT__ "MatConvert_MPIMAIJ_MPIAIJ"
32727087cfbeSBarry Smith PetscErrorCode  MatConvert_MPIMAIJ_MPIAIJ(Mat A, MatType newtype,MatReuse reuse,Mat *newmat)
32730fd73130SBarry Smith {
32740fd73130SBarry Smith   Mat_MPIMAIJ       *maij = (Mat_MPIMAIJ*)A->data;
3275ceb03754SKris Buschelman   Mat               MatAIJ  = ((Mat_SeqMAIJ*)maij->AIJ->data)->AIJ,B;
32760fd73130SBarry Smith   Mat               MatOAIJ = ((Mat_SeqMAIJ*)maij->OAIJ->data)->AIJ;
32770fd73130SBarry Smith   Mat_SeqAIJ        *AIJ = (Mat_SeqAIJ*) MatAIJ->data;
32780fd73130SBarry Smith   Mat_SeqAIJ        *OAIJ =(Mat_SeqAIJ*) MatOAIJ->data;
32790fd73130SBarry Smith   Mat_MPIAIJ        *mpiaij = (Mat_MPIAIJ*) maij->A->data;
3280910ba992SMatthew Knepley   PetscInt          dof = maij->dof,i,j,*dnz = PETSC_NULL,*onz = PETSC_NULL,nmax = 0,onmax = 0;
3281910ba992SMatthew Knepley   PetscInt          *oicols = PETSC_NULL,*icols = PETSC_NULL,ncols,*cols = PETSC_NULL,oncols,*ocols = PETSC_NULL;
32820fd73130SBarry Smith   PetscInt          rstart,cstart,*garray,ii,k;
32830fd73130SBarry Smith   PetscErrorCode    ierr;
32840fd73130SBarry Smith   PetscScalar       *vals,*ovals;
32850fd73130SBarry Smith 
32860fd73130SBarry Smith   PetscFunctionBegin;
3287d0f46423SBarry Smith   ierr = PetscMalloc2(A->rmap->n,PetscInt,&dnz,A->rmap->n,PetscInt,&onz);CHKERRQ(ierr);
3288d0f46423SBarry Smith   for (i=0; i<A->rmap->n/dof; i++) {
32890fd73130SBarry Smith     nmax  = PetscMax(nmax,AIJ->ilen[i]);
32900fd73130SBarry Smith     onmax = PetscMax(onmax,OAIJ->ilen[i]);
32910fd73130SBarry Smith     for (j=0; j<dof; j++) {
32920fd73130SBarry Smith       dnz[dof*i+j] = AIJ->ilen[i];
32930fd73130SBarry Smith       onz[dof*i+j] = OAIJ->ilen[i];
32940fd73130SBarry Smith     }
32950fd73130SBarry Smith   }
329669b1f4b7SBarry Smith   ierr = MatCreateAIJ(((PetscObject)A)->comm,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N,0,dnz,0,onz,&B);CHKERRQ(ierr);
32970fd73130SBarry Smith   ierr = PetscFree2(dnz,onz);CHKERRQ(ierr);
32980fd73130SBarry Smith 
32997a1a7badSBarry Smith   ierr   = PetscMalloc2(nmax,PetscInt,&icols,onmax,PetscInt,&oicols);CHKERRQ(ierr);
3300d0f46423SBarry Smith   rstart = dof*maij->A->rmap->rstart;
3301d0f46423SBarry Smith   cstart = dof*maij->A->cmap->rstart;
33020fd73130SBarry Smith   garray = mpiaij->garray;
33030fd73130SBarry Smith 
33040fd73130SBarry Smith   ii = rstart;
3305d0f46423SBarry Smith   for (i=0; i<A->rmap->n/dof; i++) {
33060fd73130SBarry Smith     ierr = MatGetRow_SeqAIJ(MatAIJ,i,&ncols,&cols,&vals);CHKERRQ(ierr);
33070fd73130SBarry Smith     ierr = MatGetRow_SeqAIJ(MatOAIJ,i,&oncols,&ocols,&ovals);CHKERRQ(ierr);
33080fd73130SBarry Smith     for (j=0; j<dof; j++) {
33090fd73130SBarry Smith       for (k=0; k<ncols; k++) {
33100fd73130SBarry Smith         icols[k] = cstart + dof*cols[k]+j;
33110fd73130SBarry Smith       }
33120fd73130SBarry Smith       for (k=0; k<oncols; k++) {
33130fd73130SBarry Smith         oicols[k] = dof*garray[ocols[k]]+j;
33140fd73130SBarry Smith       }
3315ceb03754SKris Buschelman       ierr = MatSetValues_MPIAIJ(B,1,&ii,ncols,icols,vals,INSERT_VALUES);CHKERRQ(ierr);
3316ceb03754SKris Buschelman       ierr = MatSetValues_MPIAIJ(B,1,&ii,oncols,oicols,ovals,INSERT_VALUES);CHKERRQ(ierr);
33170fd73130SBarry Smith       ii++;
33180fd73130SBarry Smith     }
33190fd73130SBarry Smith     ierr = MatRestoreRow_SeqAIJ(MatAIJ,i,&ncols,&cols,&vals);CHKERRQ(ierr);
33200fd73130SBarry Smith     ierr = MatRestoreRow_SeqAIJ(MatOAIJ,i,&oncols,&ocols,&ovals);CHKERRQ(ierr);
33210fd73130SBarry Smith   }
33220fd73130SBarry Smith   ierr = PetscFree2(icols,oicols);CHKERRQ(ierr);
33230fd73130SBarry Smith 
3324ceb03754SKris Buschelman   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3325ceb03754SKris Buschelman   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3326ceb03754SKris Buschelman 
3327ceb03754SKris Buschelman   if (reuse == MAT_REUSE_MATRIX) {
33287adad957SLisandro Dalcin     PetscInt refct = ((PetscObject)A)->refct; /* save ((PetscObject)A)->refct */
33297adad957SLisandro Dalcin     ((PetscObject)A)->refct = 1;
33308ab5b326SKris Buschelman     ierr = MatHeaderReplace(A,B);CHKERRQ(ierr);
33317adad957SLisandro Dalcin     ((PetscObject)A)->refct = refct; /* restore ((PetscObject)A)->refct */
3332c3d102feSKris Buschelman   } else {
3333ceb03754SKris Buschelman     *newmat = B;
3334c3d102feSKris Buschelman   }
33350fd73130SBarry Smith   PetscFunctionReturn(0);
33360fd73130SBarry Smith }
3337be1d678aSKris Buschelman EXTERN_C_END
33380fd73130SBarry Smith 
33399e516c8fSBarry Smith #undef __FUNCT__
33409e516c8fSBarry Smith #define __FUNCT__ "MatGetSubMatrix_MAIJ"
33419e516c8fSBarry Smith PetscErrorCode  MatGetSubMatrix_MAIJ(Mat mat,IS isrow,IS iscol,MatReuse cll,Mat *newmat)
33429e516c8fSBarry Smith {
33439e516c8fSBarry Smith   PetscErrorCode ierr;
33449e516c8fSBarry Smith   Mat            A;
33459e516c8fSBarry Smith 
33469e516c8fSBarry Smith   PetscFunctionBegin;
33479e516c8fSBarry Smith   ierr = MatConvert(mat,MATAIJ,MAT_INITIAL_MATRIX,&A);CHKERRQ(ierr);
33489e516c8fSBarry Smith   ierr = MatGetSubMatrix(A,isrow,iscol,cll,newmat);CHKERRQ(ierr);
33499e516c8fSBarry Smith   ierr = MatDestroy(&A);CHKERRQ(ierr);
33509e516c8fSBarry Smith   PetscFunctionReturn(0);
33519e516c8fSBarry Smith }
33520fd73130SBarry Smith 
3353bcc973b7SBarry Smith /* ---------------------------------------------------------------------------------- */
3354ff585edeSJed Brown #undef __FUNCT__
3355ff585edeSJed Brown #define __FUNCT__ "MatCreateMAIJ"
3356ff585edeSJed Brown /*@C
33570bad9183SKris Buschelman   MatCreateMAIJ - Creates a matrix type providing restriction and interpolation
33580bad9183SKris Buschelman   operations for multicomponent problems.  It interpolates each component the same
33590bad9183SKris Buschelman   way independently.  The matrix type is based on MATSEQAIJ for sequential matrices,
33600bad9183SKris Buschelman   and MATMPIAIJ for distributed matrices.
33610bad9183SKris Buschelman 
3362ff585edeSJed Brown   Collective
3363ff585edeSJed Brown 
3364ff585edeSJed Brown   Input Parameters:
3365ff585edeSJed Brown + A - the AIJ matrix describing the action on blocks
3366ff585edeSJed Brown - dof - the block size (number of components per node)
3367ff585edeSJed Brown 
3368ff585edeSJed Brown   Output Parameter:
3369ff585edeSJed Brown . maij - the new MAIJ matrix
3370ff585edeSJed Brown 
33710bad9183SKris Buschelman   Operations provided:
33720fd73130SBarry Smith + MatMult
33730bad9183SKris Buschelman . MatMultTranspose
33740bad9183SKris Buschelman . MatMultAdd
33750bad9183SKris Buschelman . MatMultTransposeAdd
33760fd73130SBarry Smith - MatView
33770bad9183SKris Buschelman 
33780bad9183SKris Buschelman   Level: advanced
33790bad9183SKris Buschelman 
3380b409243cSBarry Smith .seealso: MatMAIJGetAIJ(), MatMAIJRedimension(), MATMAIJ
3381ff585edeSJed Brown @*/
33827087cfbeSBarry Smith PetscErrorCode  MatCreateMAIJ(Mat A,PetscInt dof,Mat *maij)
338382b1193eSBarry Smith {
3384dfbe8321SBarry Smith   PetscErrorCode ierr;
3385b24ad042SBarry Smith   PetscMPIInt    size;
3386b24ad042SBarry Smith   PetscInt       n;
338782b1193eSBarry Smith   Mat            B;
338882b1193eSBarry Smith 
338982b1193eSBarry Smith   PetscFunctionBegin;
3390d72c5c08SBarry Smith   ierr   = PetscObjectReference((PetscObject)A);CHKERRQ(ierr);
3391d72c5c08SBarry Smith 
3392d72c5c08SBarry Smith   if (dof == 1) {
3393d72c5c08SBarry Smith     *maij = A;
3394d72c5c08SBarry Smith   } else {
33957adad957SLisandro Dalcin     ierr = MatCreate(((PetscObject)A)->comm,&B);CHKERRQ(ierr);
3396d0f46423SBarry Smith     ierr = MatSetSizes(B,dof*A->rmap->n,dof*A->cmap->n,dof*A->rmap->N,dof*A->cmap->N);CHKERRQ(ierr);
3397bccba955SJed Brown     ierr = PetscLayoutSetBlockSize(B->rmap,dof);CHKERRQ(ierr);
3398bccba955SJed Brown     ierr = PetscLayoutSetBlockSize(B->cmap,dof);CHKERRQ(ierr);
3399bccba955SJed Brown     ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
3400bccba955SJed Brown     ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
3401cd3bbe55SBarry Smith     B->assembled    = PETSC_TRUE;
3402d72c5c08SBarry Smith 
34037adad957SLisandro Dalcin     ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr);
3404f4a53059SBarry Smith     if (size == 1) {
3405feb9fe23SJed Brown       Mat_SeqMAIJ    *b;
3406feb9fe23SJed Brown 
3407b9b97703SBarry Smith       ierr = MatSetType(B,MATSEQMAIJ);CHKERRQ(ierr);
3408*356c569eSBarry Smith       B->ops->setup   = PETSC_NULL;
34094cb9d9b8SBarry Smith       B->ops->destroy = MatDestroy_SeqMAIJ;
34100fd73130SBarry Smith       B->ops->view    = MatView_SeqMAIJ;
3411feb9fe23SJed Brown       b      = (Mat_SeqMAIJ*)B->data;
3412b9b97703SBarry Smith       b->dof = dof;
34134cb9d9b8SBarry Smith       b->AIJ = A;
3414d72c5c08SBarry Smith       if (dof == 2) {
3415cd3bbe55SBarry Smith         B->ops->mult             = MatMult_SeqMAIJ_2;
3416cd3bbe55SBarry Smith         B->ops->multadd          = MatMultAdd_SeqMAIJ_2;
3417cd3bbe55SBarry Smith         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_2;
3418cd3bbe55SBarry Smith         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_2;
3419bcc973b7SBarry Smith       } else if (dof == 3) {
3420bcc973b7SBarry Smith         B->ops->mult             = MatMult_SeqMAIJ_3;
3421bcc973b7SBarry Smith         B->ops->multadd          = MatMultAdd_SeqMAIJ_3;
3422bcc973b7SBarry Smith         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_3;
3423bcc973b7SBarry Smith         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_3;
3424bcc973b7SBarry Smith       } else if (dof == 4) {
3425bcc973b7SBarry Smith         B->ops->mult             = MatMult_SeqMAIJ_4;
3426bcc973b7SBarry Smith         B->ops->multadd          = MatMultAdd_SeqMAIJ_4;
3427bcc973b7SBarry Smith         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_4;
3428bcc973b7SBarry Smith         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_4;
3429f9fae5adSBarry Smith       } else if (dof == 5) {
3430f9fae5adSBarry Smith         B->ops->mult             = MatMult_SeqMAIJ_5;
3431f9fae5adSBarry Smith         B->ops->multadd          = MatMultAdd_SeqMAIJ_5;
3432f9fae5adSBarry Smith         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_5;
3433f9fae5adSBarry Smith         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_5;
34346cd98798SBarry Smith       } else if (dof == 6) {
34356cd98798SBarry Smith         B->ops->mult             = MatMult_SeqMAIJ_6;
34366cd98798SBarry Smith         B->ops->multadd          = MatMultAdd_SeqMAIJ_6;
34376cd98798SBarry Smith         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_6;
34386cd98798SBarry Smith         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_6;
3439ed8eea03SBarry Smith       } else if (dof == 7) {
3440ed8eea03SBarry Smith         B->ops->mult             = MatMult_SeqMAIJ_7;
3441ed8eea03SBarry Smith         B->ops->multadd          = MatMultAdd_SeqMAIJ_7;
3442ed8eea03SBarry Smith         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_7;
3443ed8eea03SBarry Smith         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_7;
344466ed3db0SBarry Smith       } else if (dof == 8) {
344566ed3db0SBarry Smith         B->ops->mult             = MatMult_SeqMAIJ_8;
344666ed3db0SBarry Smith         B->ops->multadd          = MatMultAdd_SeqMAIJ_8;
344766ed3db0SBarry Smith         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_8;
344866ed3db0SBarry Smith         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_8;
34492b692628SMatthew Knepley       } else if (dof == 9) {
34502b692628SMatthew Knepley         B->ops->mult             = MatMult_SeqMAIJ_9;
34512b692628SMatthew Knepley         B->ops->multadd          = MatMultAdd_SeqMAIJ_9;
34522b692628SMatthew Knepley         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_9;
34532b692628SMatthew Knepley         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_9;
3454b9cda22cSBarry Smith       } else if (dof == 10) {
3455b9cda22cSBarry Smith         B->ops->mult             = MatMult_SeqMAIJ_10;
3456b9cda22cSBarry Smith         B->ops->multadd          = MatMultAdd_SeqMAIJ_10;
3457b9cda22cSBarry Smith         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_10;
3458b9cda22cSBarry Smith         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_10;
3459dbdd7285SBarry Smith       } else if (dof == 11) {
3460dbdd7285SBarry Smith         B->ops->mult             = MatMult_SeqMAIJ_11;
3461dbdd7285SBarry Smith         B->ops->multadd          = MatMultAdd_SeqMAIJ_11;
3462dbdd7285SBarry Smith         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_11;
3463dbdd7285SBarry Smith         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_11;
34642f7816d4SBarry Smith       } else if (dof == 16) {
34652f7816d4SBarry Smith         B->ops->mult             = MatMult_SeqMAIJ_16;
34662f7816d4SBarry Smith         B->ops->multadd          = MatMultAdd_SeqMAIJ_16;
34672f7816d4SBarry Smith         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_16;
34682f7816d4SBarry Smith         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_16;
3469ed1c418cSBarry Smith       } else if (dof == 18) {
3470ed1c418cSBarry Smith         B->ops->mult             = MatMult_SeqMAIJ_18;
3471ed1c418cSBarry Smith         B->ops->multadd          = MatMultAdd_SeqMAIJ_18;
3472ed1c418cSBarry Smith         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_18;
3473ed1c418cSBarry Smith         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_18;
347482b1193eSBarry Smith       } else {
34756861f193SBarry Smith         B->ops->mult             = MatMult_SeqMAIJ_N;
34766861f193SBarry Smith         B->ops->multadd          = MatMultAdd_SeqMAIJ_N;
34776861f193SBarry Smith         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_N;
34786861f193SBarry Smith         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_N;
347982b1193eSBarry Smith       }
34807ba1a0bfSKris Buschelman       B->ops->ptapsymbolic_seqaij = MatPtAPSymbolic_SeqAIJ_SeqMAIJ;
34817ba1a0bfSKris Buschelman       B->ops->ptapnumeric_seqaij  = MatPtAPNumeric_SeqAIJ_SeqMAIJ;
34829c4fc4c7SBarry Smith       ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqmaij_seqaij_C","MatConvert_SeqMAIJ_SeqAIJ",MatConvert_SeqMAIJ_SeqAIJ);CHKERRQ(ierr);
3483f4a53059SBarry Smith     } else {
3484f4a53059SBarry Smith       Mat_MPIAIJ  *mpiaij = (Mat_MPIAIJ *)A->data;
3485feb9fe23SJed Brown       Mat_MPIMAIJ *b;
3486f4a53059SBarry Smith       IS          from,to;
3487f4a53059SBarry Smith       Vec         gvec;
3488f4a53059SBarry Smith 
3489b9b97703SBarry Smith       ierr = MatSetType(B,MATMPIMAIJ);CHKERRQ(ierr);
3490*356c569eSBarry Smith       B->ops->setup   = PETSC_NULL;
3491d72c5c08SBarry Smith       B->ops->destroy = MatDestroy_MPIMAIJ;
34920fd73130SBarry Smith       B->ops->view    = MatView_MPIMAIJ;
3493b9b97703SBarry Smith       b      = (Mat_MPIMAIJ*)B->data;
3494b9b97703SBarry Smith       b->dof = dof;
3495b9b97703SBarry Smith       b->A   = A;
34964cb9d9b8SBarry Smith       ierr = MatCreateMAIJ(mpiaij->A,dof,&b->AIJ);CHKERRQ(ierr);
34974cb9d9b8SBarry Smith       ierr = MatCreateMAIJ(mpiaij->B,dof,&b->OAIJ);CHKERRQ(ierr);
34984cb9d9b8SBarry Smith 
3499f4a53059SBarry Smith       ierr = VecGetSize(mpiaij->lvec,&n);CHKERRQ(ierr);
3500a34bdc0bSBarry Smith       ierr = VecCreate(PETSC_COMM_SELF,&b->w);CHKERRQ(ierr);
3501a34bdc0bSBarry Smith       ierr = VecSetSizes(b->w,n*dof,n*dof);CHKERRQ(ierr);
350213288a74SBarry Smith       ierr = VecSetBlockSize(b->w,dof);CHKERRQ(ierr);
3503a34bdc0bSBarry Smith       ierr = VecSetType(b->w,VECSEQ);CHKERRQ(ierr);
3504f4a53059SBarry Smith 
3505f4a53059SBarry Smith       /* create two temporary Index sets for build scatter gather */
3506deff0451SBarry Smith       ierr = ISCreateBlock(((PetscObject)A)->comm,dof,n,mpiaij->garray,PETSC_COPY_VALUES,&from);CHKERRQ(ierr);
3507f4a53059SBarry Smith       ierr = ISCreateStride(PETSC_COMM_SELF,n*dof,0,1,&to);CHKERRQ(ierr);
3508f4a53059SBarry Smith 
3509f4a53059SBarry Smith       /* create temporary global vector to generate scatter context */
3510778a2246SBarry Smith       ierr = VecCreateMPIWithArray(((PetscObject)A)->comm,dof,dof*A->cmap->n,dof*A->cmap->N,PETSC_NULL,&gvec);CHKERRQ(ierr);
3511f4a53059SBarry Smith 
3512f4a53059SBarry Smith       /* generate the scatter context */
3513f4a53059SBarry Smith       ierr = VecScatterCreate(gvec,from,b->w,to,&b->ctx);CHKERRQ(ierr);
3514f4a53059SBarry Smith 
35156bf464f9SBarry Smith       ierr = ISDestroy(&from);CHKERRQ(ierr);
35166bf464f9SBarry Smith       ierr = ISDestroy(&to);CHKERRQ(ierr);
35176bf464f9SBarry Smith       ierr = VecDestroy(&gvec);CHKERRQ(ierr);
3518f4a53059SBarry Smith 
3519f4a53059SBarry Smith       B->ops->mult                = MatMult_MPIMAIJ_dof;
35204cb9d9b8SBarry Smith       B->ops->multtranspose       = MatMultTranspose_MPIMAIJ_dof;
35214cb9d9b8SBarry Smith       B->ops->multadd             = MatMultAdd_MPIMAIJ_dof;
35224cb9d9b8SBarry Smith       B->ops->multtransposeadd    = MatMultTransposeAdd_MPIMAIJ_dof;
352395ddefa2SHong Zhang       B->ops->ptapsymbolic_mpiaij = MatPtAPSymbolic_MPIAIJ_MPIMAIJ;
352495ddefa2SHong Zhang       B->ops->ptapnumeric_mpiaij  = MatPtAPNumeric_MPIAIJ_MPIMAIJ;
35250fd73130SBarry Smith       ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpimaij_mpiaij_C","MatConvert_MPIMAIJ_MPIAIJ",MatConvert_MPIMAIJ_MPIAIJ);CHKERRQ(ierr);
3526f4a53059SBarry Smith     }
35279e516c8fSBarry Smith     B->ops->getsubmatrix        = MatGetSubMatrix_MAIJ;
35284994cf47SJed Brown     ierr = MatSetUp(B);CHKERRQ(ierr);
3529cd3bbe55SBarry Smith     *maij = B;
35300fd73130SBarry Smith     ierr = MatView_Private(B);CHKERRQ(ierr);
3531d72c5c08SBarry Smith   }
353282b1193eSBarry Smith   PetscFunctionReturn(0);
353382b1193eSBarry Smith }
3534