xref: /petsc/src/mat/impls/maij/maij.c (revision c3d102fe2dfc19c1832704c537829f04ceb136a2)
182b1193eSBarry Smith /*
2cd3bbe55SBarry Smith     Defines the basic matrix operations for the MAIJ  matrix storage format.
3cd3bbe55SBarry Smith   This format is used for restriction and interpolation operations for
4cd3bbe55SBarry Smith   multicomponent problems. It interpolates each component the same way
5cd3bbe55SBarry Smith   independently.
6cd3bbe55SBarry Smith 
7cd3bbe55SBarry Smith      We provide:
8cd3bbe55SBarry Smith          MatMult()
9cd3bbe55SBarry Smith          MatMultTranspose()
10cd3bbe55SBarry Smith          MatMultTransposeAdd()
11cd3bbe55SBarry Smith          MatMultAdd()
12cd3bbe55SBarry Smith           and
13cd3bbe55SBarry Smith          MatCreateMAIJ(Mat,dof,Mat*)
14f4a53059SBarry Smith 
15f4a53059SBarry Smith      This single directory handles both the sequential and parallel codes
1682b1193eSBarry Smith */
1782b1193eSBarry Smith 
18be911618SKris Buschelman #include "src/mat/impls/maij/maij.h"
197ba1a0bfSKris Buschelman #include "src/mat/utils/freespace.h"
203c94ec11SBarry Smith #include "vecimpl.h"
2182b1193eSBarry Smith 
224a2ae208SSatish Balay #undef __FUNCT__
234a2ae208SSatish Balay #define __FUNCT__ "MatMAIJGetAIJ"
24dfbe8321SBarry Smith PetscErrorCode MatMAIJGetAIJ(Mat A,Mat *B)
25b9b97703SBarry Smith {
26dfbe8321SBarry Smith   PetscErrorCode ierr;
27b9b97703SBarry Smith   PetscTruth     ismpimaij,isseqmaij;
28b9b97703SBarry Smith 
29b9b97703SBarry Smith   PetscFunctionBegin;
30b9b97703SBarry Smith   ierr = PetscTypeCompare((PetscObject)A,MATMPIMAIJ,&ismpimaij);CHKERRQ(ierr);
31b9b97703SBarry Smith   ierr = PetscTypeCompare((PetscObject)A,MATSEQMAIJ,&isseqmaij);CHKERRQ(ierr);
32b9b97703SBarry Smith   if (ismpimaij) {
33b9b97703SBarry Smith     Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data;
34b9b97703SBarry Smith 
35b9b97703SBarry Smith     *B = b->A;
36b9b97703SBarry Smith   } else if (isseqmaij) {
37b9b97703SBarry Smith     Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
38b9b97703SBarry Smith 
39b9b97703SBarry Smith     *B = b->AIJ;
40b9b97703SBarry Smith   } else {
41b9b97703SBarry Smith     *B = A;
42b9b97703SBarry Smith   }
43b9b97703SBarry Smith   PetscFunctionReturn(0);
44b9b97703SBarry Smith }
45b9b97703SBarry Smith 
464a2ae208SSatish Balay #undef __FUNCT__
474a2ae208SSatish Balay #define __FUNCT__ "MatMAIJRedimension"
48b24ad042SBarry Smith PetscErrorCode MatMAIJRedimension(Mat A,PetscInt dof,Mat *B)
49b9b97703SBarry Smith {
50dfbe8321SBarry Smith   PetscErrorCode ierr;
51b9b97703SBarry Smith   Mat            Aij;
52b9b97703SBarry Smith 
53b9b97703SBarry Smith   PetscFunctionBegin;
54b9b97703SBarry Smith   ierr = MatMAIJGetAIJ(A,&Aij);CHKERRQ(ierr);
55b9b97703SBarry Smith   ierr = MatCreateMAIJ(Aij,dof,B);CHKERRQ(ierr);
56b9b97703SBarry Smith   PetscFunctionReturn(0);
57b9b97703SBarry Smith }
58b9b97703SBarry Smith 
594a2ae208SSatish Balay #undef __FUNCT__
604a2ae208SSatish Balay #define __FUNCT__ "MatDestroy_SeqMAIJ"
61dfbe8321SBarry Smith PetscErrorCode MatDestroy_SeqMAIJ(Mat A)
6282b1193eSBarry Smith {
63dfbe8321SBarry Smith   PetscErrorCode ierr;
644cb9d9b8SBarry Smith   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
6582b1193eSBarry Smith 
6682b1193eSBarry Smith   PetscFunctionBegin;
67cd3bbe55SBarry Smith   if (b->AIJ) {
68cd3bbe55SBarry Smith     ierr = MatDestroy(b->AIJ);CHKERRQ(ierr);
6982b1193eSBarry Smith   }
704cb9d9b8SBarry Smith   ierr = PetscFree(b);CHKERRQ(ierr);
714cb9d9b8SBarry Smith   PetscFunctionReturn(0);
724cb9d9b8SBarry Smith }
734cb9d9b8SBarry Smith 
744a2ae208SSatish Balay #undef __FUNCT__
750fd73130SBarry Smith #define __FUNCT__ "MatView_SeqMAIJ"
760fd73130SBarry Smith PetscErrorCode MatView_SeqMAIJ(Mat A,PetscViewer viewer)
770fd73130SBarry Smith {
780fd73130SBarry Smith   PetscErrorCode ierr;
790fd73130SBarry Smith   Mat            B;
800fd73130SBarry Smith 
810fd73130SBarry Smith   PetscFunctionBegin;
82ceb03754SKris Buschelman   ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);
830fd73130SBarry Smith   ierr = MatView(B,viewer);CHKERRQ(ierr);
840fd73130SBarry Smith   ierr = MatDestroy(B);CHKERRQ(ierr);
850fd73130SBarry Smith   PetscFunctionReturn(0);
860fd73130SBarry Smith }
870fd73130SBarry Smith 
880fd73130SBarry Smith #undef __FUNCT__
890fd73130SBarry Smith #define __FUNCT__ "MatView_MPIMAIJ"
900fd73130SBarry Smith PetscErrorCode MatView_MPIMAIJ(Mat A,PetscViewer viewer)
910fd73130SBarry Smith {
920fd73130SBarry Smith   PetscErrorCode ierr;
930fd73130SBarry Smith   Mat            B;
940fd73130SBarry Smith 
950fd73130SBarry Smith   PetscFunctionBegin;
96ceb03754SKris Buschelman   ierr = MatConvert(A,MATMPIAIJ,MAT_INITIAL_MATRIX,&B);
970fd73130SBarry Smith   ierr = MatView(B,viewer);CHKERRQ(ierr);
980fd73130SBarry Smith   ierr = MatDestroy(B);CHKERRQ(ierr);
990fd73130SBarry Smith   PetscFunctionReturn(0);
1000fd73130SBarry Smith }
1010fd73130SBarry Smith 
1020fd73130SBarry Smith #undef __FUNCT__
1034a2ae208SSatish Balay #define __FUNCT__ "MatDestroy_MPIMAIJ"
104dfbe8321SBarry Smith PetscErrorCode MatDestroy_MPIMAIJ(Mat A)
1054cb9d9b8SBarry Smith {
106dfbe8321SBarry Smith   PetscErrorCode ierr;
1074cb9d9b8SBarry Smith   Mat_MPIMAIJ    *b = (Mat_MPIMAIJ*)A->data;
1084cb9d9b8SBarry Smith 
1094cb9d9b8SBarry Smith   PetscFunctionBegin;
1104cb9d9b8SBarry Smith   if (b->AIJ) {
1114cb9d9b8SBarry Smith     ierr = MatDestroy(b->AIJ);CHKERRQ(ierr);
1124cb9d9b8SBarry Smith   }
113f4a53059SBarry Smith   if (b->OAIJ) {
114f4a53059SBarry Smith     ierr = MatDestroy(b->OAIJ);CHKERRQ(ierr);
115f4a53059SBarry Smith   }
116b9b97703SBarry Smith   if (b->A) {
117b9b97703SBarry Smith     ierr = MatDestroy(b->A);CHKERRQ(ierr);
118b9b97703SBarry Smith   }
119f4a53059SBarry Smith   if (b->ctx) {
120f4a53059SBarry Smith     ierr = VecScatterDestroy(b->ctx);CHKERRQ(ierr);
121f4a53059SBarry Smith   }
122f4a53059SBarry Smith   if (b->w) {
123f4a53059SBarry Smith     ierr = VecDestroy(b->w);CHKERRQ(ierr);
124f4a53059SBarry Smith   }
125cd3bbe55SBarry Smith   ierr = PetscFree(b);CHKERRQ(ierr);
126b9b97703SBarry Smith   PetscFunctionReturn(0);
127b9b97703SBarry Smith }
128b9b97703SBarry Smith 
1290bad9183SKris Buschelman /*MC
130fafad747SKris Buschelman   MATMAIJ - MATMAIJ = "maij" - A matrix type to be used for restriction and interpolation operations for
1310bad9183SKris Buschelman   multicomponent problems, interpolating or restricting each component the same way independently.
1320bad9183SKris Buschelman   The matrix type is based on MATSEQAIJ for sequential matrices, and MATMPIAIJ for distributed matrices.
1330bad9183SKris Buschelman 
1340bad9183SKris Buschelman   Operations provided:
1350bad9183SKris Buschelman . MatMult
1360bad9183SKris Buschelman . MatMultTranspose
1370bad9183SKris Buschelman . MatMultAdd
1380bad9183SKris Buschelman . MatMultTransposeAdd
1390bad9183SKris Buschelman 
1400bad9183SKris Buschelman   Level: advanced
1410bad9183SKris Buschelman 
1420bad9183SKris Buschelman .seealso: MatCreateSeqDense
1430bad9183SKris Buschelman M*/
1440bad9183SKris Buschelman 
14582b1193eSBarry Smith EXTERN_C_BEGIN
1464a2ae208SSatish Balay #undef __FUNCT__
1474a2ae208SSatish Balay #define __FUNCT__ "MatCreate_MAIJ"
148dfbe8321SBarry Smith PetscErrorCode MatCreate_MAIJ(Mat A)
14982b1193eSBarry Smith {
150dfbe8321SBarry Smith   PetscErrorCode ierr;
1514cb9d9b8SBarry Smith   Mat_MPIMAIJ    *b;
15282b1193eSBarry Smith 
15382b1193eSBarry Smith   PetscFunctionBegin;
154b0a32e0cSBarry Smith   ierr     = PetscNew(Mat_MPIMAIJ,&b);CHKERRQ(ierr);
155b0a32e0cSBarry Smith   A->data  = (void*)b;
156cd3bbe55SBarry Smith   ierr = PetscMemzero(A->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
157cd3bbe55SBarry Smith   A->factor           = 0;
158cd3bbe55SBarry Smith   A->mapping          = 0;
159f4a53059SBarry Smith 
160cd3bbe55SBarry Smith   b->AIJ  = 0;
161cd3bbe55SBarry Smith   b->dof  = 0;
162f4a53059SBarry Smith   b->OAIJ = 0;
163f4a53059SBarry Smith   b->ctx  = 0;
164f4a53059SBarry Smith   b->w    = 0;
16582b1193eSBarry Smith   PetscFunctionReturn(0);
16682b1193eSBarry Smith }
16782b1193eSBarry Smith EXTERN_C_END
16882b1193eSBarry Smith 
169cd3bbe55SBarry Smith /* --------------------------------------------------------------------------------------*/
1704a2ae208SSatish Balay #undef __FUNCT__
171b9617806SBarry Smith #define __FUNCT__ "MatMult_SeqMAIJ_2"
172dfbe8321SBarry Smith PetscErrorCode MatMult_SeqMAIJ_2(Mat A,Vec xx,Vec yy)
17382b1193eSBarry Smith {
1744cb9d9b8SBarry Smith   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
175bcc973b7SBarry Smith   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
17687828ca2SBarry Smith   PetscScalar    *x,*y,*v,sum1, sum2;
177dfbe8321SBarry Smith   PetscErrorCode ierr;
178b24ad042SBarry Smith   PetscInt       m = b->AIJ->m,*idx,*ii;
179b24ad042SBarry Smith   PetscInt       n,i,jrow,j;
18082b1193eSBarry Smith 
181bcc973b7SBarry Smith   PetscFunctionBegin;
1821ebc52fbSHong Zhang   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1831ebc52fbSHong Zhang   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
184bcc973b7SBarry Smith   idx  = a->j;
185bcc973b7SBarry Smith   v    = a->a;
186bcc973b7SBarry Smith   ii   = a->i;
187bcc973b7SBarry Smith 
188bcc973b7SBarry Smith   for (i=0; i<m; i++) {
189bcc973b7SBarry Smith     jrow = ii[i];
190bcc973b7SBarry Smith     n    = ii[i+1] - jrow;
191bcc973b7SBarry Smith     sum1  = 0.0;
192bcc973b7SBarry Smith     sum2  = 0.0;
193bcc973b7SBarry Smith     for (j=0; j<n; j++) {
194bcc973b7SBarry Smith       sum1 += v[jrow]*x[2*idx[jrow]];
195bcc973b7SBarry Smith       sum2 += v[jrow]*x[2*idx[jrow]+1];
196bcc973b7SBarry Smith       jrow++;
197bcc973b7SBarry Smith      }
198bcc973b7SBarry Smith     y[2*i]   = sum1;
199bcc973b7SBarry Smith     y[2*i+1] = sum2;
200bcc973b7SBarry Smith   }
201bcc973b7SBarry Smith 
202b0a32e0cSBarry Smith   PetscLogFlops(4*a->nz - 2*m);
2031ebc52fbSHong Zhang   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
2041ebc52fbSHong Zhang   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
20582b1193eSBarry Smith   PetscFunctionReturn(0);
20682b1193eSBarry Smith }
207bcc973b7SBarry Smith 
2084a2ae208SSatish Balay #undef __FUNCT__
209b9617806SBarry Smith #define __FUNCT__ "MatMultTranspose_SeqMAIJ_2"
210dfbe8321SBarry Smith PetscErrorCode MatMultTranspose_SeqMAIJ_2(Mat A,Vec xx,Vec yy)
21182b1193eSBarry Smith {
2124cb9d9b8SBarry Smith   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
213bcc973b7SBarry Smith   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
21487828ca2SBarry Smith   PetscScalar    *x,*y,*v,alpha1,alpha2,zero = 0.0;
215dfbe8321SBarry Smith   PetscErrorCode ierr;
216b24ad042SBarry Smith   PetscInt       m = b->AIJ->m,n,i,*idx;
21782b1193eSBarry Smith 
218bcc973b7SBarry Smith   PetscFunctionBegin;
219bcc973b7SBarry Smith   ierr = VecSet(&zero,yy);CHKERRQ(ierr);
2201ebc52fbSHong Zhang   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
2211ebc52fbSHong Zhang   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
222bfec09a0SHong Zhang 
223bcc973b7SBarry Smith   for (i=0; i<m; i++) {
224bfec09a0SHong Zhang     idx    = a->j + a->i[i] ;
225bfec09a0SHong Zhang     v      = a->a + a->i[i] ;
226bcc973b7SBarry Smith     n      = a->i[i+1] - a->i[i];
227bcc973b7SBarry Smith     alpha1 = x[2*i];
228bcc973b7SBarry Smith     alpha2 = x[2*i+1];
229bcc973b7SBarry Smith     while (n-->0) {y[2*(*idx)] += alpha1*(*v); y[2*(*idx)+1] += alpha2*(*v); idx++; v++;}
230bcc973b7SBarry Smith   }
231b0a32e0cSBarry Smith   PetscLogFlops(4*a->nz - 2*b->AIJ->n);
2321ebc52fbSHong Zhang   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
2331ebc52fbSHong Zhang   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
23482b1193eSBarry Smith   PetscFunctionReturn(0);
23582b1193eSBarry Smith }
236bcc973b7SBarry Smith 
2374a2ae208SSatish Balay #undef __FUNCT__
238b9617806SBarry Smith #define __FUNCT__ "MatMultAdd_SeqMAIJ_2"
239dfbe8321SBarry Smith PetscErrorCode MatMultAdd_SeqMAIJ_2(Mat A,Vec xx,Vec yy,Vec zz)
24082b1193eSBarry Smith {
2414cb9d9b8SBarry Smith   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
242bcc973b7SBarry Smith   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
24387828ca2SBarry Smith   PetscScalar    *x,*y,*v,sum1, sum2;
244dfbe8321SBarry Smith   PetscErrorCode ierr;
245b24ad042SBarry Smith   PetscInt       m = b->AIJ->m,*idx,*ii;
246b24ad042SBarry Smith   PetscInt       n,i,jrow,j;
24782b1193eSBarry Smith 
248bcc973b7SBarry Smith   PetscFunctionBegin;
249f9fae5adSBarry Smith   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
2501ebc52fbSHong Zhang   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
2511ebc52fbSHong Zhang   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
252bcc973b7SBarry Smith   idx  = a->j;
253bcc973b7SBarry Smith   v    = a->a;
254bcc973b7SBarry Smith   ii   = a->i;
255bcc973b7SBarry Smith 
256bcc973b7SBarry Smith   for (i=0; i<m; i++) {
257bcc973b7SBarry Smith     jrow = ii[i];
258bcc973b7SBarry Smith     n    = ii[i+1] - jrow;
259bcc973b7SBarry Smith     sum1  = 0.0;
260bcc973b7SBarry Smith     sum2  = 0.0;
261bcc973b7SBarry Smith     for (j=0; j<n; j++) {
262bcc973b7SBarry Smith       sum1 += v[jrow]*x[2*idx[jrow]];
263bcc973b7SBarry Smith       sum2 += v[jrow]*x[2*idx[jrow]+1];
264bcc973b7SBarry Smith       jrow++;
265bcc973b7SBarry Smith      }
266bcc973b7SBarry Smith     y[2*i]   += sum1;
267bcc973b7SBarry Smith     y[2*i+1] += sum2;
268bcc973b7SBarry Smith   }
269bcc973b7SBarry Smith 
270b0a32e0cSBarry Smith   PetscLogFlops(4*a->nz - 2*m);
2711ebc52fbSHong Zhang   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
2721ebc52fbSHong Zhang   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
273bcc973b7SBarry Smith   PetscFunctionReturn(0);
27482b1193eSBarry Smith }
2754a2ae208SSatish Balay #undef __FUNCT__
276b9617806SBarry Smith #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_2"
277dfbe8321SBarry Smith PetscErrorCode MatMultTransposeAdd_SeqMAIJ_2(Mat A,Vec xx,Vec yy,Vec zz)
27882b1193eSBarry Smith {
2794cb9d9b8SBarry Smith   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
280bcc973b7SBarry Smith   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
28187828ca2SBarry Smith   PetscScalar    *x,*y,*v,alpha1,alpha2;
282dfbe8321SBarry Smith   PetscErrorCode ierr;
283b24ad042SBarry Smith   PetscInt       m = b->AIJ->m,n,i,*idx;
28482b1193eSBarry Smith 
285bcc973b7SBarry Smith   PetscFunctionBegin;
286f9fae5adSBarry Smith   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
2871ebc52fbSHong Zhang   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
2881ebc52fbSHong Zhang   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
289bfec09a0SHong Zhang 
290bcc973b7SBarry Smith   for (i=0; i<m; i++) {
291bfec09a0SHong Zhang     idx   = a->j + a->i[i] ;
292bfec09a0SHong Zhang     v     = a->a + a->i[i] ;
293bcc973b7SBarry Smith     n     = a->i[i+1] - a->i[i];
294bcc973b7SBarry Smith     alpha1 = x[2*i];
295bcc973b7SBarry Smith     alpha2 = x[2*i+1];
296bcc973b7SBarry Smith     while (n-->0) {y[2*(*idx)] += alpha1*(*v); y[2*(*idx)+1] += alpha2*(*v); idx++; v++;}
297bcc973b7SBarry Smith   }
298b0a32e0cSBarry Smith   PetscLogFlops(4*a->nz - 2*b->AIJ->n);
2991ebc52fbSHong Zhang   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
3001ebc52fbSHong Zhang   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
301bcc973b7SBarry Smith   PetscFunctionReturn(0);
30282b1193eSBarry Smith }
303cd3bbe55SBarry Smith /* --------------------------------------------------------------------------------------*/
3044a2ae208SSatish Balay #undef __FUNCT__
305b9617806SBarry Smith #define __FUNCT__ "MatMult_SeqMAIJ_3"
306dfbe8321SBarry Smith PetscErrorCode MatMult_SeqMAIJ_3(Mat A,Vec xx,Vec yy)
307bcc973b7SBarry Smith {
3084cb9d9b8SBarry Smith   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
309bcc973b7SBarry Smith   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
31087828ca2SBarry Smith   PetscScalar    *x,*y,*v,sum1, sum2, sum3;
311dfbe8321SBarry Smith   PetscErrorCode ierr;
312b24ad042SBarry Smith   PetscInt       m = b->AIJ->m,*idx,*ii;
313b24ad042SBarry Smith   PetscInt       n,i,jrow,j;
31482b1193eSBarry Smith 
315bcc973b7SBarry Smith   PetscFunctionBegin;
3161ebc52fbSHong Zhang   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
3171ebc52fbSHong Zhang   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
318bcc973b7SBarry Smith   idx  = a->j;
319bcc973b7SBarry Smith   v    = a->a;
320bcc973b7SBarry Smith   ii   = a->i;
321bcc973b7SBarry Smith 
322bcc973b7SBarry Smith   for (i=0; i<m; i++) {
323bcc973b7SBarry Smith     jrow = ii[i];
324bcc973b7SBarry Smith     n    = ii[i+1] - jrow;
325bcc973b7SBarry Smith     sum1  = 0.0;
326bcc973b7SBarry Smith     sum2  = 0.0;
327bcc973b7SBarry Smith     sum3  = 0.0;
328bcc973b7SBarry Smith     for (j=0; j<n; j++) {
329bcc973b7SBarry Smith       sum1 += v[jrow]*x[3*idx[jrow]];
330bcc973b7SBarry Smith       sum2 += v[jrow]*x[3*idx[jrow]+1];
331bcc973b7SBarry Smith       sum3 += v[jrow]*x[3*idx[jrow]+2];
332bcc973b7SBarry Smith       jrow++;
333bcc973b7SBarry Smith      }
334bcc973b7SBarry Smith     y[3*i]   = sum1;
335bcc973b7SBarry Smith     y[3*i+1] = sum2;
336bcc973b7SBarry Smith     y[3*i+2] = sum3;
337bcc973b7SBarry Smith   }
338bcc973b7SBarry Smith 
339b0a32e0cSBarry Smith   PetscLogFlops(6*a->nz - 3*m);
3401ebc52fbSHong Zhang   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
3411ebc52fbSHong Zhang   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
342bcc973b7SBarry Smith   PetscFunctionReturn(0);
343bcc973b7SBarry Smith }
344bcc973b7SBarry Smith 
3454a2ae208SSatish Balay #undef __FUNCT__
346b9617806SBarry Smith #define __FUNCT__ "MatMultTranspose_SeqMAIJ_3"
347dfbe8321SBarry Smith PetscErrorCode MatMultTranspose_SeqMAIJ_3(Mat A,Vec xx,Vec yy)
348bcc973b7SBarry Smith {
3494cb9d9b8SBarry Smith   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
350bcc973b7SBarry Smith   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
35187828ca2SBarry Smith   PetscScalar    *x,*y,*v,alpha1,alpha2,alpha3,zero = 0.0;
352dfbe8321SBarry Smith   PetscErrorCode ierr;
353b24ad042SBarry Smith   PetscInt       m = b->AIJ->m,n,i,*idx;
354bcc973b7SBarry Smith 
355bcc973b7SBarry Smith   PetscFunctionBegin;
356bcc973b7SBarry Smith   ierr = VecSet(&zero,yy);CHKERRQ(ierr);
3571ebc52fbSHong Zhang   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
3581ebc52fbSHong Zhang   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
359bfec09a0SHong Zhang 
360bcc973b7SBarry Smith   for (i=0; i<m; i++) {
361bfec09a0SHong Zhang     idx    = a->j + a->i[i];
362bfec09a0SHong Zhang     v      = a->a + a->i[i];
363bcc973b7SBarry Smith     n      = a->i[i+1] - a->i[i];
364bcc973b7SBarry Smith     alpha1 = x[3*i];
365bcc973b7SBarry Smith     alpha2 = x[3*i+1];
366bcc973b7SBarry Smith     alpha3 = x[3*i+2];
367bcc973b7SBarry Smith     while (n-->0) {
368bcc973b7SBarry Smith       y[3*(*idx)]   += alpha1*(*v);
369bcc973b7SBarry Smith       y[3*(*idx)+1] += alpha2*(*v);
370bcc973b7SBarry Smith       y[3*(*idx)+2] += alpha3*(*v);
371bcc973b7SBarry Smith       idx++; v++;
372bcc973b7SBarry Smith     }
373bcc973b7SBarry Smith   }
374b0a32e0cSBarry Smith   PetscLogFlops(6*a->nz - 3*b->AIJ->n);
3751ebc52fbSHong Zhang   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
3761ebc52fbSHong Zhang   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
377bcc973b7SBarry Smith   PetscFunctionReturn(0);
378bcc973b7SBarry Smith }
379bcc973b7SBarry Smith 
3804a2ae208SSatish Balay #undef __FUNCT__
381b9617806SBarry Smith #define __FUNCT__ "MatMultAdd_SeqMAIJ_3"
382dfbe8321SBarry Smith PetscErrorCode MatMultAdd_SeqMAIJ_3(Mat A,Vec xx,Vec yy,Vec zz)
383bcc973b7SBarry Smith {
3844cb9d9b8SBarry Smith   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
385bcc973b7SBarry Smith   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
38687828ca2SBarry Smith   PetscScalar    *x,*y,*v,sum1, sum2, sum3;
387dfbe8321SBarry Smith   PetscErrorCode ierr;
388b24ad042SBarry Smith   PetscInt       m = b->AIJ->m,*idx,*ii;
389b24ad042SBarry Smith   PetscInt       n,i,jrow,j;
390bcc973b7SBarry Smith 
391bcc973b7SBarry Smith   PetscFunctionBegin;
392f9fae5adSBarry Smith   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
3931ebc52fbSHong Zhang   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
3941ebc52fbSHong Zhang   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
395bcc973b7SBarry Smith   idx  = a->j;
396bcc973b7SBarry Smith   v    = a->a;
397bcc973b7SBarry Smith   ii   = a->i;
398bcc973b7SBarry Smith 
399bcc973b7SBarry Smith   for (i=0; i<m; i++) {
400bcc973b7SBarry Smith     jrow = ii[i];
401bcc973b7SBarry Smith     n    = ii[i+1] - jrow;
402bcc973b7SBarry Smith     sum1  = 0.0;
403bcc973b7SBarry Smith     sum2  = 0.0;
404bcc973b7SBarry Smith     sum3  = 0.0;
405bcc973b7SBarry Smith     for (j=0; j<n; j++) {
406bcc973b7SBarry Smith       sum1 += v[jrow]*x[3*idx[jrow]];
407bcc973b7SBarry Smith       sum2 += v[jrow]*x[3*idx[jrow]+1];
408bcc973b7SBarry Smith       sum3 += v[jrow]*x[3*idx[jrow]+2];
409bcc973b7SBarry Smith       jrow++;
410bcc973b7SBarry Smith      }
411bcc973b7SBarry Smith     y[3*i]   += sum1;
412bcc973b7SBarry Smith     y[3*i+1] += sum2;
413bcc973b7SBarry Smith     y[3*i+2] += sum3;
414bcc973b7SBarry Smith   }
415bcc973b7SBarry Smith 
416b0a32e0cSBarry Smith   PetscLogFlops(6*a->nz);
4171ebc52fbSHong Zhang   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
4181ebc52fbSHong Zhang   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
419bcc973b7SBarry Smith   PetscFunctionReturn(0);
420bcc973b7SBarry Smith }
4214a2ae208SSatish Balay #undef __FUNCT__
422b9617806SBarry Smith #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_3"
423dfbe8321SBarry Smith PetscErrorCode MatMultTransposeAdd_SeqMAIJ_3(Mat A,Vec xx,Vec yy,Vec zz)
424bcc973b7SBarry Smith {
4254cb9d9b8SBarry Smith   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
426bcc973b7SBarry Smith   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
42787828ca2SBarry Smith   PetscScalar    *x,*y,*v,alpha1,alpha2,alpha3;
428dfbe8321SBarry Smith   PetscErrorCode ierr;
429b24ad042SBarry Smith   PetscInt       m = b->AIJ->m,n,i,*idx;
430bcc973b7SBarry Smith 
431bcc973b7SBarry Smith   PetscFunctionBegin;
432f9fae5adSBarry Smith   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
4331ebc52fbSHong Zhang   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
4341ebc52fbSHong Zhang   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
435bcc973b7SBarry Smith   for (i=0; i<m; i++) {
436bfec09a0SHong Zhang     idx    = a->j + a->i[i] ;
437bfec09a0SHong Zhang     v      = a->a + a->i[i] ;
438bcc973b7SBarry Smith     n      = a->i[i+1] - a->i[i];
439bcc973b7SBarry Smith     alpha1 = x[3*i];
440bcc973b7SBarry Smith     alpha2 = x[3*i+1];
441bcc973b7SBarry Smith     alpha3 = x[3*i+2];
442bcc973b7SBarry Smith     while (n-->0) {
443bcc973b7SBarry Smith       y[3*(*idx)]   += alpha1*(*v);
444bcc973b7SBarry Smith       y[3*(*idx)+1] += alpha2*(*v);
445bcc973b7SBarry Smith       y[3*(*idx)+2] += alpha3*(*v);
446bcc973b7SBarry Smith       idx++; v++;
447bcc973b7SBarry Smith     }
448bcc973b7SBarry Smith   }
449b0a32e0cSBarry Smith   PetscLogFlops(6*a->nz);
4501ebc52fbSHong Zhang   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
4511ebc52fbSHong Zhang   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
452bcc973b7SBarry Smith   PetscFunctionReturn(0);
453bcc973b7SBarry Smith }
454bcc973b7SBarry Smith 
455bcc973b7SBarry Smith /* ------------------------------------------------------------------------------*/
4564a2ae208SSatish Balay #undef __FUNCT__
457b9617806SBarry Smith #define __FUNCT__ "MatMult_SeqMAIJ_4"
458dfbe8321SBarry Smith PetscErrorCode MatMult_SeqMAIJ_4(Mat A,Vec xx,Vec yy)
459bcc973b7SBarry Smith {
4604cb9d9b8SBarry Smith   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
461bcc973b7SBarry Smith   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
46287828ca2SBarry Smith   PetscScalar    *x,*y,*v,sum1, sum2, sum3, sum4;
463dfbe8321SBarry Smith   PetscErrorCode ierr;
464b24ad042SBarry Smith   PetscInt       m = b->AIJ->m,*idx,*ii;
465b24ad042SBarry Smith   PetscInt       n,i,jrow,j;
466bcc973b7SBarry Smith 
467bcc973b7SBarry Smith   PetscFunctionBegin;
4681ebc52fbSHong Zhang   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
4691ebc52fbSHong Zhang   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
470bcc973b7SBarry Smith   idx  = a->j;
471bcc973b7SBarry Smith   v    = a->a;
472bcc973b7SBarry Smith   ii   = a->i;
473bcc973b7SBarry Smith 
474bcc973b7SBarry Smith   for (i=0; i<m; i++) {
475bcc973b7SBarry Smith     jrow = ii[i];
476bcc973b7SBarry Smith     n    = ii[i+1] - jrow;
477bcc973b7SBarry Smith     sum1  = 0.0;
478bcc973b7SBarry Smith     sum2  = 0.0;
479bcc973b7SBarry Smith     sum3  = 0.0;
480bcc973b7SBarry Smith     sum4  = 0.0;
481bcc973b7SBarry Smith     for (j=0; j<n; j++) {
482bcc973b7SBarry Smith       sum1 += v[jrow]*x[4*idx[jrow]];
483bcc973b7SBarry Smith       sum2 += v[jrow]*x[4*idx[jrow]+1];
484bcc973b7SBarry Smith       sum3 += v[jrow]*x[4*idx[jrow]+2];
485bcc973b7SBarry Smith       sum4 += v[jrow]*x[4*idx[jrow]+3];
486bcc973b7SBarry Smith       jrow++;
487bcc973b7SBarry Smith      }
488bcc973b7SBarry Smith     y[4*i]   = sum1;
489bcc973b7SBarry Smith     y[4*i+1] = sum2;
490bcc973b7SBarry Smith     y[4*i+2] = sum3;
491bcc973b7SBarry Smith     y[4*i+3] = sum4;
492bcc973b7SBarry Smith   }
493bcc973b7SBarry Smith 
494b0a32e0cSBarry Smith   PetscLogFlops(8*a->nz - 4*m);
4951ebc52fbSHong Zhang   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
4961ebc52fbSHong Zhang   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
497bcc973b7SBarry Smith   PetscFunctionReturn(0);
498bcc973b7SBarry Smith }
499bcc973b7SBarry Smith 
5004a2ae208SSatish Balay #undef __FUNCT__
501b9617806SBarry Smith #define __FUNCT__ "MatMultTranspose_SeqMAIJ_4"
502dfbe8321SBarry Smith PetscErrorCode MatMultTranspose_SeqMAIJ_4(Mat A,Vec xx,Vec yy)
503bcc973b7SBarry Smith {
5044cb9d9b8SBarry Smith   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
505bcc973b7SBarry Smith   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
50687828ca2SBarry Smith   PetscScalar    *x,*y,*v,alpha1,alpha2,alpha3,alpha4,zero = 0.0;
507dfbe8321SBarry Smith   PetscErrorCode ierr;
508b24ad042SBarry Smith   PetscInt       m = b->AIJ->m,n,i,*idx;
509bcc973b7SBarry Smith 
510bcc973b7SBarry Smith   PetscFunctionBegin;
511bcc973b7SBarry Smith   ierr = VecSet(&zero,yy);CHKERRQ(ierr);
5121ebc52fbSHong Zhang   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
5131ebc52fbSHong Zhang   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
514bcc973b7SBarry Smith   for (i=0; i<m; i++) {
515bfec09a0SHong Zhang     idx    = a->j + a->i[i] ;
516bfec09a0SHong Zhang     v      = a->a + a->i[i] ;
517bcc973b7SBarry Smith     n      = a->i[i+1] - a->i[i];
518bcc973b7SBarry Smith     alpha1 = x[4*i];
519bcc973b7SBarry Smith     alpha2 = x[4*i+1];
520bcc973b7SBarry Smith     alpha3 = x[4*i+2];
521bcc973b7SBarry Smith     alpha4 = x[4*i+3];
522bcc973b7SBarry Smith     while (n-->0) {
523bcc973b7SBarry Smith       y[4*(*idx)]   += alpha1*(*v);
524bcc973b7SBarry Smith       y[4*(*idx)+1] += alpha2*(*v);
525bcc973b7SBarry Smith       y[4*(*idx)+2] += alpha3*(*v);
526bcc973b7SBarry Smith       y[4*(*idx)+3] += alpha4*(*v);
527bcc973b7SBarry Smith       idx++; v++;
528bcc973b7SBarry Smith     }
529bcc973b7SBarry Smith   }
530b0a32e0cSBarry Smith   PetscLogFlops(8*a->nz - 4*b->AIJ->n);
5311ebc52fbSHong Zhang   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
5321ebc52fbSHong Zhang   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
533bcc973b7SBarry Smith   PetscFunctionReturn(0);
534bcc973b7SBarry Smith }
535bcc973b7SBarry Smith 
5364a2ae208SSatish Balay #undef __FUNCT__
537b9617806SBarry Smith #define __FUNCT__ "MatMultAdd_SeqMAIJ_4"
538dfbe8321SBarry Smith PetscErrorCode MatMultAdd_SeqMAIJ_4(Mat A,Vec xx,Vec yy,Vec zz)
539bcc973b7SBarry Smith {
5404cb9d9b8SBarry Smith   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
541f9fae5adSBarry Smith   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
54287828ca2SBarry Smith   PetscScalar    *x,*y,*v,sum1, sum2, sum3, sum4;
543dfbe8321SBarry Smith   PetscErrorCode ierr;
544b24ad042SBarry Smith   PetscInt       m = b->AIJ->m,*idx,*ii;
545b24ad042SBarry Smith   PetscInt       n,i,jrow,j;
546f9fae5adSBarry Smith 
547f9fae5adSBarry Smith   PetscFunctionBegin;
548f9fae5adSBarry Smith   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
5491ebc52fbSHong Zhang   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
5501ebc52fbSHong Zhang   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
551f9fae5adSBarry Smith   idx  = a->j;
552f9fae5adSBarry Smith   v    = a->a;
553f9fae5adSBarry Smith   ii   = a->i;
554f9fae5adSBarry Smith 
555f9fae5adSBarry Smith   for (i=0; i<m; i++) {
556f9fae5adSBarry Smith     jrow = ii[i];
557f9fae5adSBarry Smith     n    = ii[i+1] - jrow;
558f9fae5adSBarry Smith     sum1  = 0.0;
559f9fae5adSBarry Smith     sum2  = 0.0;
560f9fae5adSBarry Smith     sum3  = 0.0;
561f9fae5adSBarry Smith     sum4  = 0.0;
562f9fae5adSBarry Smith     for (j=0; j<n; j++) {
563f9fae5adSBarry Smith       sum1 += v[jrow]*x[4*idx[jrow]];
564f9fae5adSBarry Smith       sum2 += v[jrow]*x[4*idx[jrow]+1];
565f9fae5adSBarry Smith       sum3 += v[jrow]*x[4*idx[jrow]+2];
566f9fae5adSBarry Smith       sum4 += v[jrow]*x[4*idx[jrow]+3];
567f9fae5adSBarry Smith       jrow++;
568f9fae5adSBarry Smith      }
569f9fae5adSBarry Smith     y[4*i]   += sum1;
570f9fae5adSBarry Smith     y[4*i+1] += sum2;
571f9fae5adSBarry Smith     y[4*i+2] += sum3;
572f9fae5adSBarry Smith     y[4*i+3] += sum4;
573f9fae5adSBarry Smith   }
574f9fae5adSBarry Smith 
575b0a32e0cSBarry Smith   PetscLogFlops(8*a->nz - 4*m);
5761ebc52fbSHong Zhang   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
5771ebc52fbSHong Zhang   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
578f9fae5adSBarry Smith   PetscFunctionReturn(0);
579bcc973b7SBarry Smith }
5804a2ae208SSatish Balay #undef __FUNCT__
581b9617806SBarry Smith #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_4"
582dfbe8321SBarry Smith PetscErrorCode MatMultTransposeAdd_SeqMAIJ_4(Mat A,Vec xx,Vec yy,Vec zz)
583bcc973b7SBarry Smith {
5844cb9d9b8SBarry Smith   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
585f9fae5adSBarry Smith   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
58687828ca2SBarry Smith   PetscScalar    *x,*y,*v,alpha1,alpha2,alpha3,alpha4;
587dfbe8321SBarry Smith   PetscErrorCode ierr;
588b24ad042SBarry Smith   PetscInt       m = b->AIJ->m,n,i,*idx;
589f9fae5adSBarry Smith 
590f9fae5adSBarry Smith   PetscFunctionBegin;
591f9fae5adSBarry Smith   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
5921ebc52fbSHong Zhang   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
5931ebc52fbSHong Zhang   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
594bfec09a0SHong Zhang 
595f9fae5adSBarry Smith   for (i=0; i<m; i++) {
596bfec09a0SHong Zhang     idx    = a->j + a->i[i] ;
597bfec09a0SHong Zhang     v      = a->a + a->i[i] ;
598f9fae5adSBarry Smith     n      = a->i[i+1] - a->i[i];
599f9fae5adSBarry Smith     alpha1 = x[4*i];
600f9fae5adSBarry Smith     alpha2 = x[4*i+1];
601f9fae5adSBarry Smith     alpha3 = x[4*i+2];
602f9fae5adSBarry Smith     alpha4 = x[4*i+3];
603f9fae5adSBarry Smith     while (n-->0) {
604f9fae5adSBarry Smith       y[4*(*idx)]   += alpha1*(*v);
605f9fae5adSBarry Smith       y[4*(*idx)+1] += alpha2*(*v);
606f9fae5adSBarry Smith       y[4*(*idx)+2] += alpha3*(*v);
607f9fae5adSBarry Smith       y[4*(*idx)+3] += alpha4*(*v);
608f9fae5adSBarry Smith       idx++; v++;
609f9fae5adSBarry Smith     }
610f9fae5adSBarry Smith   }
611b0a32e0cSBarry Smith   PetscLogFlops(8*a->nz - 4*b->AIJ->n);
6121ebc52fbSHong Zhang   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
6131ebc52fbSHong Zhang   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
614f9fae5adSBarry Smith   PetscFunctionReturn(0);
615f9fae5adSBarry Smith }
616f9fae5adSBarry Smith /* ------------------------------------------------------------------------------*/
6176cd98798SBarry Smith 
6184a2ae208SSatish Balay #undef __FUNCT__
619b9617806SBarry Smith #define __FUNCT__ "MatMult_SeqMAIJ_5"
620dfbe8321SBarry Smith PetscErrorCode MatMult_SeqMAIJ_5(Mat A,Vec xx,Vec yy)
621f9fae5adSBarry Smith {
6224cb9d9b8SBarry Smith   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
623f9fae5adSBarry Smith   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
62487828ca2SBarry Smith   PetscScalar    *x,*y,*v,sum1, sum2, sum3, sum4, sum5;
625dfbe8321SBarry Smith   PetscErrorCode ierr;
626b24ad042SBarry Smith   PetscInt       m = b->AIJ->m,*idx,*ii;
627b24ad042SBarry Smith   PetscInt       n,i,jrow,j;
628f9fae5adSBarry Smith 
629f9fae5adSBarry Smith   PetscFunctionBegin;
6301ebc52fbSHong Zhang   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
6311ebc52fbSHong Zhang   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
632f9fae5adSBarry Smith   idx  = a->j;
633f9fae5adSBarry Smith   v    = a->a;
634f9fae5adSBarry Smith   ii   = a->i;
635f9fae5adSBarry Smith 
636f9fae5adSBarry Smith   for (i=0; i<m; i++) {
637f9fae5adSBarry Smith     jrow = ii[i];
638f9fae5adSBarry Smith     n    = ii[i+1] - jrow;
639f9fae5adSBarry Smith     sum1  = 0.0;
640f9fae5adSBarry Smith     sum2  = 0.0;
641f9fae5adSBarry Smith     sum3  = 0.0;
642f9fae5adSBarry Smith     sum4  = 0.0;
643f9fae5adSBarry Smith     sum5  = 0.0;
644f9fae5adSBarry Smith     for (j=0; j<n; j++) {
645f9fae5adSBarry Smith       sum1 += v[jrow]*x[5*idx[jrow]];
646f9fae5adSBarry Smith       sum2 += v[jrow]*x[5*idx[jrow]+1];
647f9fae5adSBarry Smith       sum3 += v[jrow]*x[5*idx[jrow]+2];
648f9fae5adSBarry Smith       sum4 += v[jrow]*x[5*idx[jrow]+3];
649f9fae5adSBarry Smith       sum5 += v[jrow]*x[5*idx[jrow]+4];
650f9fae5adSBarry Smith       jrow++;
651f9fae5adSBarry Smith      }
652f9fae5adSBarry Smith     y[5*i]   = sum1;
653f9fae5adSBarry Smith     y[5*i+1] = sum2;
654f9fae5adSBarry Smith     y[5*i+2] = sum3;
655f9fae5adSBarry Smith     y[5*i+3] = sum4;
656f9fae5adSBarry Smith     y[5*i+4] = sum5;
657f9fae5adSBarry Smith   }
658f9fae5adSBarry Smith 
659b0a32e0cSBarry Smith   PetscLogFlops(10*a->nz - 5*m);
6601ebc52fbSHong Zhang   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
6611ebc52fbSHong Zhang   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
662f9fae5adSBarry Smith   PetscFunctionReturn(0);
663f9fae5adSBarry Smith }
664f9fae5adSBarry Smith 
6654a2ae208SSatish Balay #undef __FUNCT__
666b9617806SBarry Smith #define __FUNCT__ "MatMultTranspose_SeqMAIJ_5"
667dfbe8321SBarry Smith PetscErrorCode MatMultTranspose_SeqMAIJ_5(Mat A,Vec xx,Vec yy)
668f9fae5adSBarry Smith {
6694cb9d9b8SBarry Smith   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
670f9fae5adSBarry Smith   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
67187828ca2SBarry Smith   PetscScalar    *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,zero = 0.0;
672dfbe8321SBarry Smith   PetscErrorCode ierr;
673b24ad042SBarry Smith   PetscInt       m = b->AIJ->m,n,i,*idx;
674f9fae5adSBarry Smith 
675f9fae5adSBarry Smith   PetscFunctionBegin;
676f9fae5adSBarry Smith   ierr = VecSet(&zero,yy);CHKERRQ(ierr);
6771ebc52fbSHong Zhang   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
6781ebc52fbSHong Zhang   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
679bfec09a0SHong Zhang 
680f9fae5adSBarry Smith   for (i=0; i<m; i++) {
681bfec09a0SHong Zhang     idx    = a->j + a->i[i] ;
682bfec09a0SHong Zhang     v      = a->a + a->i[i] ;
683f9fae5adSBarry Smith     n      = a->i[i+1] - a->i[i];
684f9fae5adSBarry Smith     alpha1 = x[5*i];
685f9fae5adSBarry Smith     alpha2 = x[5*i+1];
686f9fae5adSBarry Smith     alpha3 = x[5*i+2];
687f9fae5adSBarry Smith     alpha4 = x[5*i+3];
688f9fae5adSBarry Smith     alpha5 = x[5*i+4];
689f9fae5adSBarry Smith     while (n-->0) {
690f9fae5adSBarry Smith       y[5*(*idx)]   += alpha1*(*v);
691f9fae5adSBarry Smith       y[5*(*idx)+1] += alpha2*(*v);
692f9fae5adSBarry Smith       y[5*(*idx)+2] += alpha3*(*v);
693f9fae5adSBarry Smith       y[5*(*idx)+3] += alpha4*(*v);
694f9fae5adSBarry Smith       y[5*(*idx)+4] += alpha5*(*v);
695f9fae5adSBarry Smith       idx++; v++;
696f9fae5adSBarry Smith     }
697f9fae5adSBarry Smith   }
698b0a32e0cSBarry Smith   PetscLogFlops(10*a->nz - 5*b->AIJ->n);
6991ebc52fbSHong Zhang   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
7001ebc52fbSHong Zhang   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
701f9fae5adSBarry Smith   PetscFunctionReturn(0);
702f9fae5adSBarry Smith }
703f9fae5adSBarry Smith 
7044a2ae208SSatish Balay #undef __FUNCT__
705b9617806SBarry Smith #define __FUNCT__ "MatMultAdd_SeqMAIJ_5"
706dfbe8321SBarry Smith PetscErrorCode MatMultAdd_SeqMAIJ_5(Mat A,Vec xx,Vec yy,Vec zz)
707f9fae5adSBarry Smith {
7084cb9d9b8SBarry Smith   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
709f9fae5adSBarry Smith   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
71087828ca2SBarry Smith   PetscScalar    *x,*y,*v,sum1, sum2, sum3, sum4, sum5;
711dfbe8321SBarry Smith   PetscErrorCode ierr;
712b24ad042SBarry Smith   PetscInt       m = b->AIJ->m,*idx,*ii;
713b24ad042SBarry Smith   PetscInt       n,i,jrow,j;
714f9fae5adSBarry Smith 
715f9fae5adSBarry Smith   PetscFunctionBegin;
716f9fae5adSBarry Smith   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
7171ebc52fbSHong Zhang   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
7181ebc52fbSHong Zhang   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
719f9fae5adSBarry Smith   idx  = a->j;
720f9fae5adSBarry Smith   v    = a->a;
721f9fae5adSBarry Smith   ii   = a->i;
722f9fae5adSBarry Smith 
723f9fae5adSBarry Smith   for (i=0; i<m; i++) {
724f9fae5adSBarry Smith     jrow = ii[i];
725f9fae5adSBarry Smith     n    = ii[i+1] - jrow;
726f9fae5adSBarry Smith     sum1  = 0.0;
727f9fae5adSBarry Smith     sum2  = 0.0;
728f9fae5adSBarry Smith     sum3  = 0.0;
729f9fae5adSBarry Smith     sum4  = 0.0;
730f9fae5adSBarry Smith     sum5  = 0.0;
731f9fae5adSBarry Smith     for (j=0; j<n; j++) {
732f9fae5adSBarry Smith       sum1 += v[jrow]*x[5*idx[jrow]];
733f9fae5adSBarry Smith       sum2 += v[jrow]*x[5*idx[jrow]+1];
734f9fae5adSBarry Smith       sum3 += v[jrow]*x[5*idx[jrow]+2];
735f9fae5adSBarry Smith       sum4 += v[jrow]*x[5*idx[jrow]+3];
736f9fae5adSBarry Smith       sum5 += v[jrow]*x[5*idx[jrow]+4];
737f9fae5adSBarry Smith       jrow++;
738f9fae5adSBarry Smith      }
739f9fae5adSBarry Smith     y[5*i]   += sum1;
740f9fae5adSBarry Smith     y[5*i+1] += sum2;
741f9fae5adSBarry Smith     y[5*i+2] += sum3;
742f9fae5adSBarry Smith     y[5*i+3] += sum4;
743f9fae5adSBarry Smith     y[5*i+4] += sum5;
744f9fae5adSBarry Smith   }
745f9fae5adSBarry Smith 
746b0a32e0cSBarry Smith   PetscLogFlops(10*a->nz);
7471ebc52fbSHong Zhang   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
7481ebc52fbSHong Zhang   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
749f9fae5adSBarry Smith   PetscFunctionReturn(0);
750f9fae5adSBarry Smith }
751f9fae5adSBarry Smith 
7524a2ae208SSatish Balay #undef __FUNCT__
753b9617806SBarry Smith #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_5"
754dfbe8321SBarry Smith PetscErrorCode MatMultTransposeAdd_SeqMAIJ_5(Mat A,Vec xx,Vec yy,Vec zz)
755f9fae5adSBarry Smith {
7564cb9d9b8SBarry Smith   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
757f9fae5adSBarry Smith   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
75887828ca2SBarry Smith   PetscScalar    *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5;
759dfbe8321SBarry Smith   PetscErrorCode ierr;
760b24ad042SBarry Smith   PetscInt       m = b->AIJ->m,n,i,*idx;
761f9fae5adSBarry Smith 
762f9fae5adSBarry Smith   PetscFunctionBegin;
763f9fae5adSBarry Smith   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
7641ebc52fbSHong Zhang   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
7651ebc52fbSHong Zhang   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
766bfec09a0SHong Zhang 
767f9fae5adSBarry Smith   for (i=0; i<m; i++) {
768bfec09a0SHong Zhang     idx    = a->j + a->i[i] ;
769bfec09a0SHong Zhang     v      = a->a + a->i[i] ;
770f9fae5adSBarry Smith     n      = a->i[i+1] - a->i[i];
771f9fae5adSBarry Smith     alpha1 = x[5*i];
772f9fae5adSBarry Smith     alpha2 = x[5*i+1];
773f9fae5adSBarry Smith     alpha3 = x[5*i+2];
774f9fae5adSBarry Smith     alpha4 = x[5*i+3];
775f9fae5adSBarry Smith     alpha5 = x[5*i+4];
776f9fae5adSBarry Smith     while (n-->0) {
777f9fae5adSBarry Smith       y[5*(*idx)]   += alpha1*(*v);
778f9fae5adSBarry Smith       y[5*(*idx)+1] += alpha2*(*v);
779f9fae5adSBarry Smith       y[5*(*idx)+2] += alpha3*(*v);
780f9fae5adSBarry Smith       y[5*(*idx)+3] += alpha4*(*v);
781f9fae5adSBarry Smith       y[5*(*idx)+4] += alpha5*(*v);
782f9fae5adSBarry Smith       idx++; v++;
783f9fae5adSBarry Smith     }
784f9fae5adSBarry Smith   }
785b0a32e0cSBarry Smith   PetscLogFlops(10*a->nz);
7861ebc52fbSHong Zhang   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
7871ebc52fbSHong Zhang   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
788f9fae5adSBarry Smith   PetscFunctionReturn(0);
789bcc973b7SBarry Smith }
790bcc973b7SBarry Smith 
7916cd98798SBarry Smith /* ------------------------------------------------------------------------------*/
7924a2ae208SSatish Balay #undef __FUNCT__
793b9617806SBarry Smith #define __FUNCT__ "MatMult_SeqMAIJ_6"
794dfbe8321SBarry Smith PetscErrorCode MatMult_SeqMAIJ_6(Mat A,Vec xx,Vec yy)
7956cd98798SBarry Smith {
7966cd98798SBarry Smith   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
7976cd98798SBarry Smith   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
79887828ca2SBarry Smith   PetscScalar    *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6;
799dfbe8321SBarry Smith   PetscErrorCode ierr;
800b24ad042SBarry Smith   PetscInt       m = b->AIJ->m,*idx,*ii;
801b24ad042SBarry Smith   PetscInt       n,i,jrow,j;
8026cd98798SBarry Smith 
8036cd98798SBarry Smith   PetscFunctionBegin;
8041ebc52fbSHong Zhang   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
8051ebc52fbSHong Zhang   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
8066cd98798SBarry Smith   idx  = a->j;
8076cd98798SBarry Smith   v    = a->a;
8086cd98798SBarry Smith   ii   = a->i;
8096cd98798SBarry Smith 
8106cd98798SBarry Smith   for (i=0; i<m; i++) {
8116cd98798SBarry Smith     jrow = ii[i];
8126cd98798SBarry Smith     n    = ii[i+1] - jrow;
8136cd98798SBarry Smith     sum1  = 0.0;
8146cd98798SBarry Smith     sum2  = 0.0;
8156cd98798SBarry Smith     sum3  = 0.0;
8166cd98798SBarry Smith     sum4  = 0.0;
8176cd98798SBarry Smith     sum5  = 0.0;
8186cd98798SBarry Smith     sum6  = 0.0;
8196cd98798SBarry Smith     for (j=0; j<n; j++) {
8206cd98798SBarry Smith       sum1 += v[jrow]*x[6*idx[jrow]];
8216cd98798SBarry Smith       sum2 += v[jrow]*x[6*idx[jrow]+1];
8226cd98798SBarry Smith       sum3 += v[jrow]*x[6*idx[jrow]+2];
8236cd98798SBarry Smith       sum4 += v[jrow]*x[6*idx[jrow]+3];
8246cd98798SBarry Smith       sum5 += v[jrow]*x[6*idx[jrow]+4];
8256cd98798SBarry Smith       sum6 += v[jrow]*x[6*idx[jrow]+5];
8266cd98798SBarry Smith       jrow++;
8276cd98798SBarry Smith      }
8286cd98798SBarry Smith     y[6*i]   = sum1;
8296cd98798SBarry Smith     y[6*i+1] = sum2;
8306cd98798SBarry Smith     y[6*i+2] = sum3;
8316cd98798SBarry Smith     y[6*i+3] = sum4;
8326cd98798SBarry Smith     y[6*i+4] = sum5;
8336cd98798SBarry Smith     y[6*i+5] = sum6;
8346cd98798SBarry Smith   }
8356cd98798SBarry Smith 
836b0a32e0cSBarry Smith   PetscLogFlops(12*a->nz - 6*m);
8371ebc52fbSHong Zhang   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
8381ebc52fbSHong Zhang   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
8396cd98798SBarry Smith   PetscFunctionReturn(0);
8406cd98798SBarry Smith }
8416cd98798SBarry Smith 
8424a2ae208SSatish Balay #undef __FUNCT__
843b9617806SBarry Smith #define __FUNCT__ "MatMultTranspose_SeqMAIJ_6"
844dfbe8321SBarry Smith PetscErrorCode MatMultTranspose_SeqMAIJ_6(Mat A,Vec xx,Vec yy)
8456cd98798SBarry Smith {
8466cd98798SBarry Smith   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
8476cd98798SBarry Smith   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
84887828ca2SBarry Smith   PetscScalar    *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,zero = 0.0;
849dfbe8321SBarry Smith   PetscErrorCode ierr;
850b24ad042SBarry Smith   PetscInt       m = b->AIJ->m,n,i,*idx;
8516cd98798SBarry Smith 
8526cd98798SBarry Smith   PetscFunctionBegin;
8536cd98798SBarry Smith   ierr = VecSet(&zero,yy);CHKERRQ(ierr);
8541ebc52fbSHong Zhang   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
8551ebc52fbSHong Zhang   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
856bfec09a0SHong Zhang 
8576cd98798SBarry Smith   for (i=0; i<m; i++) {
858bfec09a0SHong Zhang     idx    = a->j + a->i[i] ;
859bfec09a0SHong Zhang     v      = a->a + a->i[i] ;
8606cd98798SBarry Smith     n      = a->i[i+1] - a->i[i];
8616cd98798SBarry Smith     alpha1 = x[6*i];
8626cd98798SBarry Smith     alpha2 = x[6*i+1];
8636cd98798SBarry Smith     alpha3 = x[6*i+2];
8646cd98798SBarry Smith     alpha4 = x[6*i+3];
8656cd98798SBarry Smith     alpha5 = x[6*i+4];
8666cd98798SBarry Smith     alpha6 = x[6*i+5];
8676cd98798SBarry Smith     while (n-->0) {
8686cd98798SBarry Smith       y[6*(*idx)]   += alpha1*(*v);
8696cd98798SBarry Smith       y[6*(*idx)+1] += alpha2*(*v);
8706cd98798SBarry Smith       y[6*(*idx)+2] += alpha3*(*v);
8716cd98798SBarry Smith       y[6*(*idx)+3] += alpha4*(*v);
8726cd98798SBarry Smith       y[6*(*idx)+4] += alpha5*(*v);
8736cd98798SBarry Smith       y[6*(*idx)+5] += alpha6*(*v);
8746cd98798SBarry Smith       idx++; v++;
8756cd98798SBarry Smith     }
8766cd98798SBarry Smith   }
877b0a32e0cSBarry Smith   PetscLogFlops(12*a->nz - 6*b->AIJ->n);
8781ebc52fbSHong Zhang   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
8791ebc52fbSHong Zhang   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
8806cd98798SBarry Smith   PetscFunctionReturn(0);
8816cd98798SBarry Smith }
8826cd98798SBarry Smith 
8834a2ae208SSatish Balay #undef __FUNCT__
884b9617806SBarry Smith #define __FUNCT__ "MatMultAdd_SeqMAIJ_6"
885dfbe8321SBarry Smith PetscErrorCode MatMultAdd_SeqMAIJ_6(Mat A,Vec xx,Vec yy,Vec zz)
8866cd98798SBarry Smith {
8876cd98798SBarry Smith   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
8886cd98798SBarry Smith   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
88987828ca2SBarry Smith   PetscScalar    *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6;
890dfbe8321SBarry Smith   PetscErrorCode ierr;
891b24ad042SBarry Smith   PetscInt       m = b->AIJ->m,*idx,*ii;
892b24ad042SBarry Smith   PetscInt       n,i,jrow,j;
8936cd98798SBarry Smith 
8946cd98798SBarry Smith   PetscFunctionBegin;
8956cd98798SBarry Smith   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
8961ebc52fbSHong Zhang   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
8971ebc52fbSHong Zhang   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
8986cd98798SBarry Smith   idx  = a->j;
8996cd98798SBarry Smith   v    = a->a;
9006cd98798SBarry Smith   ii   = a->i;
9016cd98798SBarry Smith 
9026cd98798SBarry Smith   for (i=0; i<m; i++) {
9036cd98798SBarry Smith     jrow = ii[i];
9046cd98798SBarry Smith     n    = ii[i+1] - jrow;
9056cd98798SBarry Smith     sum1  = 0.0;
9066cd98798SBarry Smith     sum2  = 0.0;
9076cd98798SBarry Smith     sum3  = 0.0;
9086cd98798SBarry Smith     sum4  = 0.0;
9096cd98798SBarry Smith     sum5  = 0.0;
9106cd98798SBarry Smith     sum6  = 0.0;
9116cd98798SBarry Smith     for (j=0; j<n; j++) {
9126cd98798SBarry Smith       sum1 += v[jrow]*x[6*idx[jrow]];
9136cd98798SBarry Smith       sum2 += v[jrow]*x[6*idx[jrow]+1];
9146cd98798SBarry Smith       sum3 += v[jrow]*x[6*idx[jrow]+2];
9156cd98798SBarry Smith       sum4 += v[jrow]*x[6*idx[jrow]+3];
9166cd98798SBarry Smith       sum5 += v[jrow]*x[6*idx[jrow]+4];
9176cd98798SBarry Smith       sum6 += v[jrow]*x[6*idx[jrow]+5];
9186cd98798SBarry Smith       jrow++;
9196cd98798SBarry Smith      }
9206cd98798SBarry Smith     y[6*i]   += sum1;
9216cd98798SBarry Smith     y[6*i+1] += sum2;
9226cd98798SBarry Smith     y[6*i+2] += sum3;
9236cd98798SBarry Smith     y[6*i+3] += sum4;
9246cd98798SBarry Smith     y[6*i+4] += sum5;
9256cd98798SBarry Smith     y[6*i+5] += sum6;
9266cd98798SBarry Smith   }
9276cd98798SBarry Smith 
928b0a32e0cSBarry Smith   PetscLogFlops(12*a->nz);
9291ebc52fbSHong Zhang   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
9301ebc52fbSHong Zhang   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
9316cd98798SBarry Smith   PetscFunctionReturn(0);
9326cd98798SBarry Smith }
9336cd98798SBarry Smith 
9344a2ae208SSatish Balay #undef __FUNCT__
935b9617806SBarry Smith #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_6"
936dfbe8321SBarry Smith PetscErrorCode MatMultTransposeAdd_SeqMAIJ_6(Mat A,Vec xx,Vec yy,Vec zz)
9376cd98798SBarry Smith {
9386cd98798SBarry Smith   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
9396cd98798SBarry Smith   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
94087828ca2SBarry Smith   PetscScalar    *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6;
941dfbe8321SBarry Smith   PetscErrorCode ierr;
942b24ad042SBarry Smith   PetscInt       m = b->AIJ->m,n,i,*idx;
9436cd98798SBarry Smith 
9446cd98798SBarry Smith   PetscFunctionBegin;
9456cd98798SBarry Smith   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
9461ebc52fbSHong Zhang   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
9471ebc52fbSHong Zhang   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
948bfec09a0SHong Zhang 
9496cd98798SBarry Smith   for (i=0; i<m; i++) {
950bfec09a0SHong Zhang     idx    = a->j + a->i[i] ;
951bfec09a0SHong Zhang     v      = a->a + a->i[i] ;
9526cd98798SBarry Smith     n      = a->i[i+1] - a->i[i];
9536cd98798SBarry Smith     alpha1 = x[6*i];
9546cd98798SBarry Smith     alpha2 = x[6*i+1];
9556cd98798SBarry Smith     alpha3 = x[6*i+2];
9566cd98798SBarry Smith     alpha4 = x[6*i+3];
9576cd98798SBarry Smith     alpha5 = x[6*i+4];
9586cd98798SBarry Smith     alpha6 = x[6*i+5];
9596cd98798SBarry Smith     while (n-->0) {
9606cd98798SBarry Smith       y[6*(*idx)]   += alpha1*(*v);
9616cd98798SBarry Smith       y[6*(*idx)+1] += alpha2*(*v);
9626cd98798SBarry Smith       y[6*(*idx)+2] += alpha3*(*v);
9636cd98798SBarry Smith       y[6*(*idx)+3] += alpha4*(*v);
9646cd98798SBarry Smith       y[6*(*idx)+4] += alpha5*(*v);
9656cd98798SBarry Smith       y[6*(*idx)+5] += alpha6*(*v);
9666cd98798SBarry Smith       idx++; v++;
9676cd98798SBarry Smith     }
9686cd98798SBarry Smith   }
969b0a32e0cSBarry Smith   PetscLogFlops(12*a->nz);
9701ebc52fbSHong Zhang   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
9711ebc52fbSHong Zhang   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
9726cd98798SBarry Smith   PetscFunctionReturn(0);
9736cd98798SBarry Smith }
9746cd98798SBarry Smith 
97566ed3db0SBarry Smith /* ------------------------------------------------------------------------------*/
97666ed3db0SBarry Smith #undef __FUNCT__
97766ed3db0SBarry Smith #define __FUNCT__ "MatMult_SeqMAIJ_8"
978dfbe8321SBarry Smith PetscErrorCode MatMult_SeqMAIJ_8(Mat A,Vec xx,Vec yy)
97966ed3db0SBarry Smith {
98066ed3db0SBarry Smith   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
98166ed3db0SBarry Smith   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
98287828ca2SBarry Smith   PetscScalar    *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8;
983dfbe8321SBarry Smith   PetscErrorCode ierr;
984b24ad042SBarry Smith   PetscInt       m = b->AIJ->m,*idx,*ii;
985b24ad042SBarry Smith   PetscInt       n,i,jrow,j;
98666ed3db0SBarry Smith 
98766ed3db0SBarry Smith   PetscFunctionBegin;
9881ebc52fbSHong Zhang   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
9891ebc52fbSHong Zhang   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
99066ed3db0SBarry Smith   idx  = a->j;
99166ed3db0SBarry Smith   v    = a->a;
99266ed3db0SBarry Smith   ii   = a->i;
99366ed3db0SBarry Smith 
99466ed3db0SBarry Smith   for (i=0; i<m; i++) {
99566ed3db0SBarry Smith     jrow = ii[i];
99666ed3db0SBarry Smith     n    = ii[i+1] - jrow;
99766ed3db0SBarry Smith     sum1  = 0.0;
99866ed3db0SBarry Smith     sum2  = 0.0;
99966ed3db0SBarry Smith     sum3  = 0.0;
100066ed3db0SBarry Smith     sum4  = 0.0;
100166ed3db0SBarry Smith     sum5  = 0.0;
100266ed3db0SBarry Smith     sum6  = 0.0;
100366ed3db0SBarry Smith     sum7  = 0.0;
100466ed3db0SBarry Smith     sum8  = 0.0;
100566ed3db0SBarry Smith     for (j=0; j<n; j++) {
100666ed3db0SBarry Smith       sum1 += v[jrow]*x[8*idx[jrow]];
100766ed3db0SBarry Smith       sum2 += v[jrow]*x[8*idx[jrow]+1];
100866ed3db0SBarry Smith       sum3 += v[jrow]*x[8*idx[jrow]+2];
100966ed3db0SBarry Smith       sum4 += v[jrow]*x[8*idx[jrow]+3];
101066ed3db0SBarry Smith       sum5 += v[jrow]*x[8*idx[jrow]+4];
101166ed3db0SBarry Smith       sum6 += v[jrow]*x[8*idx[jrow]+5];
101266ed3db0SBarry Smith       sum7 += v[jrow]*x[8*idx[jrow]+6];
101366ed3db0SBarry Smith       sum8 += v[jrow]*x[8*idx[jrow]+7];
101466ed3db0SBarry Smith       jrow++;
101566ed3db0SBarry Smith      }
101666ed3db0SBarry Smith     y[8*i]   = sum1;
101766ed3db0SBarry Smith     y[8*i+1] = sum2;
101866ed3db0SBarry Smith     y[8*i+2] = sum3;
101966ed3db0SBarry Smith     y[8*i+3] = sum4;
102066ed3db0SBarry Smith     y[8*i+4] = sum5;
102166ed3db0SBarry Smith     y[8*i+5] = sum6;
102266ed3db0SBarry Smith     y[8*i+6] = sum7;
102366ed3db0SBarry Smith     y[8*i+7] = sum8;
102466ed3db0SBarry Smith   }
102566ed3db0SBarry Smith 
102666ed3db0SBarry Smith   PetscLogFlops(16*a->nz - 8*m);
10271ebc52fbSHong Zhang   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
10281ebc52fbSHong Zhang   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
102966ed3db0SBarry Smith   PetscFunctionReturn(0);
103066ed3db0SBarry Smith }
103166ed3db0SBarry Smith 
103266ed3db0SBarry Smith #undef __FUNCT__
103366ed3db0SBarry Smith #define __FUNCT__ "MatMultTranspose_SeqMAIJ_8"
1034dfbe8321SBarry Smith PetscErrorCode MatMultTranspose_SeqMAIJ_8(Mat A,Vec xx,Vec yy)
103566ed3db0SBarry Smith {
103666ed3db0SBarry Smith   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
103766ed3db0SBarry Smith   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
103887828ca2SBarry Smith   PetscScalar    *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,zero = 0.0;
1039dfbe8321SBarry Smith   PetscErrorCode ierr;
1040b24ad042SBarry Smith   PetscInt       m = b->AIJ->m,n,i,*idx;
104166ed3db0SBarry Smith 
104266ed3db0SBarry Smith   PetscFunctionBegin;
104366ed3db0SBarry Smith   ierr = VecSet(&zero,yy);CHKERRQ(ierr);
10441ebc52fbSHong Zhang   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
10451ebc52fbSHong Zhang   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
1046bfec09a0SHong Zhang 
104766ed3db0SBarry Smith   for (i=0; i<m; i++) {
1048bfec09a0SHong Zhang     idx    = a->j + a->i[i] ;
1049bfec09a0SHong Zhang     v      = a->a + a->i[i] ;
105066ed3db0SBarry Smith     n      = a->i[i+1] - a->i[i];
105166ed3db0SBarry Smith     alpha1 = x[8*i];
105266ed3db0SBarry Smith     alpha2 = x[8*i+1];
105366ed3db0SBarry Smith     alpha3 = x[8*i+2];
105466ed3db0SBarry Smith     alpha4 = x[8*i+3];
105566ed3db0SBarry Smith     alpha5 = x[8*i+4];
105666ed3db0SBarry Smith     alpha6 = x[8*i+5];
105766ed3db0SBarry Smith     alpha7 = x[8*i+6];
105866ed3db0SBarry Smith     alpha8 = x[8*i+7];
105966ed3db0SBarry Smith     while (n-->0) {
106066ed3db0SBarry Smith       y[8*(*idx)]   += alpha1*(*v);
106166ed3db0SBarry Smith       y[8*(*idx)+1] += alpha2*(*v);
106266ed3db0SBarry Smith       y[8*(*idx)+2] += alpha3*(*v);
106366ed3db0SBarry Smith       y[8*(*idx)+3] += alpha4*(*v);
106466ed3db0SBarry Smith       y[8*(*idx)+4] += alpha5*(*v);
106566ed3db0SBarry Smith       y[8*(*idx)+5] += alpha6*(*v);
106666ed3db0SBarry Smith       y[8*(*idx)+6] += alpha7*(*v);
106766ed3db0SBarry Smith       y[8*(*idx)+7] += alpha8*(*v);
106866ed3db0SBarry Smith       idx++; v++;
106966ed3db0SBarry Smith     }
107066ed3db0SBarry Smith   }
107166ed3db0SBarry Smith   PetscLogFlops(16*a->nz - 8*b->AIJ->n);
10721ebc52fbSHong Zhang   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
10731ebc52fbSHong Zhang   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
107466ed3db0SBarry Smith   PetscFunctionReturn(0);
107566ed3db0SBarry Smith }
107666ed3db0SBarry Smith 
107766ed3db0SBarry Smith #undef __FUNCT__
107866ed3db0SBarry Smith #define __FUNCT__ "MatMultAdd_SeqMAIJ_8"
1079dfbe8321SBarry Smith PetscErrorCode MatMultAdd_SeqMAIJ_8(Mat A,Vec xx,Vec yy,Vec zz)
108066ed3db0SBarry Smith {
108166ed3db0SBarry Smith   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
108266ed3db0SBarry Smith   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
108387828ca2SBarry Smith   PetscScalar    *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8;
1084dfbe8321SBarry Smith   PetscErrorCode ierr;
1085b24ad042SBarry Smith   PetscInt       m = b->AIJ->m,*idx,*ii;
1086b24ad042SBarry Smith   PetscInt       n,i,jrow,j;
108766ed3db0SBarry Smith 
108866ed3db0SBarry Smith   PetscFunctionBegin;
108966ed3db0SBarry Smith   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
10901ebc52fbSHong Zhang   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
10911ebc52fbSHong Zhang   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
109266ed3db0SBarry Smith   idx  = a->j;
109366ed3db0SBarry Smith   v    = a->a;
109466ed3db0SBarry Smith   ii   = a->i;
109566ed3db0SBarry Smith 
109666ed3db0SBarry Smith   for (i=0; i<m; i++) {
109766ed3db0SBarry Smith     jrow = ii[i];
109866ed3db0SBarry Smith     n    = ii[i+1] - jrow;
109966ed3db0SBarry Smith     sum1  = 0.0;
110066ed3db0SBarry Smith     sum2  = 0.0;
110166ed3db0SBarry Smith     sum3  = 0.0;
110266ed3db0SBarry Smith     sum4  = 0.0;
110366ed3db0SBarry Smith     sum5  = 0.0;
110466ed3db0SBarry Smith     sum6  = 0.0;
110566ed3db0SBarry Smith     sum7  = 0.0;
110666ed3db0SBarry Smith     sum8  = 0.0;
110766ed3db0SBarry Smith     for (j=0; j<n; j++) {
110866ed3db0SBarry Smith       sum1 += v[jrow]*x[8*idx[jrow]];
110966ed3db0SBarry Smith       sum2 += v[jrow]*x[8*idx[jrow]+1];
111066ed3db0SBarry Smith       sum3 += v[jrow]*x[8*idx[jrow]+2];
111166ed3db0SBarry Smith       sum4 += v[jrow]*x[8*idx[jrow]+3];
111266ed3db0SBarry Smith       sum5 += v[jrow]*x[8*idx[jrow]+4];
111366ed3db0SBarry Smith       sum6 += v[jrow]*x[8*idx[jrow]+5];
111466ed3db0SBarry Smith       sum7 += v[jrow]*x[8*idx[jrow]+6];
111566ed3db0SBarry Smith       sum8 += v[jrow]*x[8*idx[jrow]+7];
111666ed3db0SBarry Smith       jrow++;
111766ed3db0SBarry Smith      }
111866ed3db0SBarry Smith     y[8*i]   += sum1;
111966ed3db0SBarry Smith     y[8*i+1] += sum2;
112066ed3db0SBarry Smith     y[8*i+2] += sum3;
112166ed3db0SBarry Smith     y[8*i+3] += sum4;
112266ed3db0SBarry Smith     y[8*i+4] += sum5;
112366ed3db0SBarry Smith     y[8*i+5] += sum6;
112466ed3db0SBarry Smith     y[8*i+6] += sum7;
112566ed3db0SBarry Smith     y[8*i+7] += sum8;
112666ed3db0SBarry Smith   }
112766ed3db0SBarry Smith 
112866ed3db0SBarry Smith   PetscLogFlops(16*a->nz);
11291ebc52fbSHong Zhang   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
11301ebc52fbSHong Zhang   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
113166ed3db0SBarry Smith   PetscFunctionReturn(0);
113266ed3db0SBarry Smith }
113366ed3db0SBarry Smith 
113466ed3db0SBarry Smith #undef __FUNCT__
113566ed3db0SBarry Smith #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_8"
1136dfbe8321SBarry Smith PetscErrorCode MatMultTransposeAdd_SeqMAIJ_8(Mat A,Vec xx,Vec yy,Vec zz)
113766ed3db0SBarry Smith {
113866ed3db0SBarry Smith   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
113966ed3db0SBarry Smith   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
114087828ca2SBarry Smith   PetscScalar    *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8;
1141dfbe8321SBarry Smith   PetscErrorCode ierr;
1142b24ad042SBarry Smith   PetscInt       m = b->AIJ->m,n,i,*idx;
114366ed3db0SBarry Smith 
114466ed3db0SBarry Smith   PetscFunctionBegin;
114566ed3db0SBarry Smith   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
11461ebc52fbSHong Zhang   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
11471ebc52fbSHong Zhang   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
114866ed3db0SBarry Smith   for (i=0; i<m; i++) {
1149bfec09a0SHong Zhang     idx    = a->j + a->i[i] ;
1150bfec09a0SHong Zhang     v      = a->a + a->i[i] ;
115166ed3db0SBarry Smith     n      = a->i[i+1] - a->i[i];
115266ed3db0SBarry Smith     alpha1 = x[8*i];
115366ed3db0SBarry Smith     alpha2 = x[8*i+1];
115466ed3db0SBarry Smith     alpha3 = x[8*i+2];
115566ed3db0SBarry Smith     alpha4 = x[8*i+3];
115666ed3db0SBarry Smith     alpha5 = x[8*i+4];
115766ed3db0SBarry Smith     alpha6 = x[8*i+5];
115866ed3db0SBarry Smith     alpha7 = x[8*i+6];
115966ed3db0SBarry Smith     alpha8 = x[8*i+7];
116066ed3db0SBarry Smith     while (n-->0) {
116166ed3db0SBarry Smith       y[8*(*idx)]   += alpha1*(*v);
116266ed3db0SBarry Smith       y[8*(*idx)+1] += alpha2*(*v);
116366ed3db0SBarry Smith       y[8*(*idx)+2] += alpha3*(*v);
116466ed3db0SBarry Smith       y[8*(*idx)+3] += alpha4*(*v);
116566ed3db0SBarry Smith       y[8*(*idx)+4] += alpha5*(*v);
116666ed3db0SBarry Smith       y[8*(*idx)+5] += alpha6*(*v);
116766ed3db0SBarry Smith       y[8*(*idx)+6] += alpha7*(*v);
116866ed3db0SBarry Smith       y[8*(*idx)+7] += alpha8*(*v);
116966ed3db0SBarry Smith       idx++; v++;
117066ed3db0SBarry Smith     }
117166ed3db0SBarry Smith   }
117266ed3db0SBarry Smith   PetscLogFlops(16*a->nz);
11731ebc52fbSHong Zhang   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
11741ebc52fbSHong Zhang   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
11752f7816d4SBarry Smith   PetscFunctionReturn(0);
11762f7816d4SBarry Smith }
11772f7816d4SBarry Smith 
11782b692628SMatthew Knepley /* ------------------------------------------------------------------------------*/
11792b692628SMatthew Knepley #undef __FUNCT__
11802b692628SMatthew Knepley #define __FUNCT__ "MatMult_SeqMAIJ_9"
1181dfbe8321SBarry Smith PetscErrorCode MatMult_SeqMAIJ_9(Mat A,Vec xx,Vec yy)
11822b692628SMatthew Knepley {
11832b692628SMatthew Knepley   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
11842b692628SMatthew Knepley   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
11852b692628SMatthew Knepley   PetscScalar    *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9;
1186dfbe8321SBarry Smith   PetscErrorCode ierr;
1187b24ad042SBarry Smith   PetscInt       m = b->AIJ->m,*idx,*ii;
1188b24ad042SBarry Smith   PetscInt       n,i,jrow,j;
11892b692628SMatthew Knepley 
11902b692628SMatthew Knepley   PetscFunctionBegin;
11911ebc52fbSHong Zhang   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
11921ebc52fbSHong Zhang   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
11932b692628SMatthew Knepley   idx  = a->j;
11942b692628SMatthew Knepley   v    = a->a;
11952b692628SMatthew Knepley   ii   = a->i;
11962b692628SMatthew Knepley 
11972b692628SMatthew Knepley   for (i=0; i<m; i++) {
11982b692628SMatthew Knepley     jrow = ii[i];
11992b692628SMatthew Knepley     n    = ii[i+1] - jrow;
12002b692628SMatthew Knepley     sum1  = 0.0;
12012b692628SMatthew Knepley     sum2  = 0.0;
12022b692628SMatthew Knepley     sum3  = 0.0;
12032b692628SMatthew Knepley     sum4  = 0.0;
12042b692628SMatthew Knepley     sum5  = 0.0;
12052b692628SMatthew Knepley     sum6  = 0.0;
12062b692628SMatthew Knepley     sum7  = 0.0;
12072b692628SMatthew Knepley     sum8  = 0.0;
12082b692628SMatthew Knepley     sum9  = 0.0;
12092b692628SMatthew Knepley     for (j=0; j<n; j++) {
12102b692628SMatthew Knepley       sum1 += v[jrow]*x[9*idx[jrow]];
12112b692628SMatthew Knepley       sum2 += v[jrow]*x[9*idx[jrow]+1];
12122b692628SMatthew Knepley       sum3 += v[jrow]*x[9*idx[jrow]+2];
12132b692628SMatthew Knepley       sum4 += v[jrow]*x[9*idx[jrow]+3];
12142b692628SMatthew Knepley       sum5 += v[jrow]*x[9*idx[jrow]+4];
12152b692628SMatthew Knepley       sum6 += v[jrow]*x[9*idx[jrow]+5];
12162b692628SMatthew Knepley       sum7 += v[jrow]*x[9*idx[jrow]+6];
12172b692628SMatthew Knepley       sum8 += v[jrow]*x[9*idx[jrow]+7];
12182b692628SMatthew Knepley       sum9 += v[jrow]*x[9*idx[jrow]+8];
12192b692628SMatthew Knepley       jrow++;
12202b692628SMatthew Knepley      }
12212b692628SMatthew Knepley     y[9*i]   = sum1;
12222b692628SMatthew Knepley     y[9*i+1] = sum2;
12232b692628SMatthew Knepley     y[9*i+2] = sum3;
12242b692628SMatthew Knepley     y[9*i+3] = sum4;
12252b692628SMatthew Knepley     y[9*i+4] = sum5;
12262b692628SMatthew Knepley     y[9*i+5] = sum6;
12272b692628SMatthew Knepley     y[9*i+6] = sum7;
12282b692628SMatthew Knepley     y[9*i+7] = sum8;
12292b692628SMatthew Knepley     y[9*i+8] = sum9;
12302b692628SMatthew Knepley   }
12312b692628SMatthew Knepley 
12322b692628SMatthew Knepley   PetscLogFlops(18*a->nz - 9*m);
12331ebc52fbSHong Zhang   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
12341ebc52fbSHong Zhang   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
12352b692628SMatthew Knepley   PetscFunctionReturn(0);
12362b692628SMatthew Knepley }
12372b692628SMatthew Knepley 
12382b692628SMatthew Knepley #undef __FUNCT__
12392b692628SMatthew Knepley #define __FUNCT__ "MatMultTranspose_SeqMAIJ_9"
1240dfbe8321SBarry Smith PetscErrorCode MatMultTranspose_SeqMAIJ_9(Mat A,Vec xx,Vec yy)
12412b692628SMatthew Knepley {
12422b692628SMatthew Knepley   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
12432b692628SMatthew Knepley   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
12442b692628SMatthew Knepley   PetscScalar    *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9,zero = 0.0;
1245dfbe8321SBarry Smith   PetscErrorCode ierr;
1246b24ad042SBarry Smith   PetscInt       m = b->AIJ->m,n,i,*idx;
12472b692628SMatthew Knepley 
12482b692628SMatthew Knepley   PetscFunctionBegin;
12492b692628SMatthew Knepley   ierr = VecSet(&zero,yy);CHKERRQ(ierr);
12501ebc52fbSHong Zhang   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
12511ebc52fbSHong Zhang   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
12522b692628SMatthew Knepley 
12532b692628SMatthew Knepley   for (i=0; i<m; i++) {
12542b692628SMatthew Knepley     idx    = a->j + a->i[i] ;
12552b692628SMatthew Knepley     v      = a->a + a->i[i] ;
12562b692628SMatthew Knepley     n      = a->i[i+1] - a->i[i];
12572b692628SMatthew Knepley     alpha1 = x[9*i];
12582b692628SMatthew Knepley     alpha2 = x[9*i+1];
12592b692628SMatthew Knepley     alpha3 = x[9*i+2];
12602b692628SMatthew Knepley     alpha4 = x[9*i+3];
12612b692628SMatthew Knepley     alpha5 = x[9*i+4];
12622b692628SMatthew Knepley     alpha6 = x[9*i+5];
12632b692628SMatthew Knepley     alpha7 = x[9*i+6];
12642b692628SMatthew Knepley     alpha8 = x[9*i+7];
12652f6bd0c6SMatthew Knepley     alpha9 = x[9*i+8];
12662b692628SMatthew Knepley     while (n-->0) {
12672b692628SMatthew Knepley       y[9*(*idx)]   += alpha1*(*v);
12682b692628SMatthew Knepley       y[9*(*idx)+1] += alpha2*(*v);
12692b692628SMatthew Knepley       y[9*(*idx)+2] += alpha3*(*v);
12702b692628SMatthew Knepley       y[9*(*idx)+3] += alpha4*(*v);
12712b692628SMatthew Knepley       y[9*(*idx)+4] += alpha5*(*v);
12722b692628SMatthew Knepley       y[9*(*idx)+5] += alpha6*(*v);
12732b692628SMatthew Knepley       y[9*(*idx)+6] += alpha7*(*v);
12742b692628SMatthew Knepley       y[9*(*idx)+7] += alpha8*(*v);
12752b692628SMatthew Knepley       y[9*(*idx)+8] += alpha9*(*v);
12762b692628SMatthew Knepley       idx++; v++;
12772b692628SMatthew Knepley     }
12782b692628SMatthew Knepley   }
12792b692628SMatthew Knepley   PetscLogFlops(18*a->nz - 9*b->AIJ->n);
12801ebc52fbSHong Zhang   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
12811ebc52fbSHong Zhang   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
12822b692628SMatthew Knepley   PetscFunctionReturn(0);
12832b692628SMatthew Knepley }
12842b692628SMatthew Knepley 
12852b692628SMatthew Knepley #undef __FUNCT__
12862b692628SMatthew Knepley #define __FUNCT__ "MatMultAdd_SeqMAIJ_9"
1287dfbe8321SBarry Smith PetscErrorCode MatMultAdd_SeqMAIJ_9(Mat A,Vec xx,Vec yy,Vec zz)
12882b692628SMatthew Knepley {
12892b692628SMatthew Knepley   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
12902b692628SMatthew Knepley   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
12912b692628SMatthew Knepley   PetscScalar    *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9;
1292dfbe8321SBarry Smith   PetscErrorCode ierr;
1293b24ad042SBarry Smith   PetscInt       m = b->AIJ->m,*idx,*ii;
1294b24ad042SBarry Smith   PetscInt       n,i,jrow,j;
12952b692628SMatthew Knepley 
12962b692628SMatthew Knepley   PetscFunctionBegin;
12972b692628SMatthew Knepley   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
12981ebc52fbSHong Zhang   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
12991ebc52fbSHong Zhang   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
13002b692628SMatthew Knepley   idx  = a->j;
13012b692628SMatthew Knepley   v    = a->a;
13022b692628SMatthew Knepley   ii   = a->i;
13032b692628SMatthew Knepley 
13042b692628SMatthew Knepley   for (i=0; i<m; i++) {
13052b692628SMatthew Knepley     jrow = ii[i];
13062b692628SMatthew Knepley     n    = ii[i+1] - jrow;
13072b692628SMatthew Knepley     sum1  = 0.0;
13082b692628SMatthew Knepley     sum2  = 0.0;
13092b692628SMatthew Knepley     sum3  = 0.0;
13102b692628SMatthew Knepley     sum4  = 0.0;
13112b692628SMatthew Knepley     sum5  = 0.0;
13122b692628SMatthew Knepley     sum6  = 0.0;
13132b692628SMatthew Knepley     sum7  = 0.0;
13142b692628SMatthew Knepley     sum8  = 0.0;
13152b692628SMatthew Knepley     sum9  = 0.0;
13162b692628SMatthew Knepley     for (j=0; j<n; j++) {
13172b692628SMatthew Knepley       sum1 += v[jrow]*x[9*idx[jrow]];
13182b692628SMatthew Knepley       sum2 += v[jrow]*x[9*idx[jrow]+1];
13192b692628SMatthew Knepley       sum3 += v[jrow]*x[9*idx[jrow]+2];
13202b692628SMatthew Knepley       sum4 += v[jrow]*x[9*idx[jrow]+3];
13212b692628SMatthew Knepley       sum5 += v[jrow]*x[9*idx[jrow]+4];
13222b692628SMatthew Knepley       sum6 += v[jrow]*x[9*idx[jrow]+5];
13232b692628SMatthew Knepley       sum7 += v[jrow]*x[9*idx[jrow]+6];
13242b692628SMatthew Knepley       sum8 += v[jrow]*x[9*idx[jrow]+7];
13252b692628SMatthew Knepley       sum9 += v[jrow]*x[9*idx[jrow]+8];
13262b692628SMatthew Knepley       jrow++;
13272b692628SMatthew Knepley      }
13282b692628SMatthew Knepley     y[9*i]   += sum1;
13292b692628SMatthew Knepley     y[9*i+1] += sum2;
13302b692628SMatthew Knepley     y[9*i+2] += sum3;
13312b692628SMatthew Knepley     y[9*i+3] += sum4;
13322b692628SMatthew Knepley     y[9*i+4] += sum5;
13332b692628SMatthew Knepley     y[9*i+5] += sum6;
13342b692628SMatthew Knepley     y[9*i+6] += sum7;
13352b692628SMatthew Knepley     y[9*i+7] += sum8;
13362b692628SMatthew Knepley     y[9*i+8] += sum9;
13372b692628SMatthew Knepley   }
13382b692628SMatthew Knepley 
13392b692628SMatthew Knepley   PetscLogFlops(18*a->nz);
13401ebc52fbSHong Zhang   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
13411ebc52fbSHong Zhang   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
13422b692628SMatthew Knepley   PetscFunctionReturn(0);
13432b692628SMatthew Knepley }
13442b692628SMatthew Knepley 
13452b692628SMatthew Knepley #undef __FUNCT__
13462b692628SMatthew Knepley #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_9"
1347dfbe8321SBarry Smith PetscErrorCode MatMultTransposeAdd_SeqMAIJ_9(Mat A,Vec xx,Vec yy,Vec zz)
13482b692628SMatthew Knepley {
13492b692628SMatthew Knepley   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
13502b692628SMatthew Knepley   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
13512b692628SMatthew Knepley   PetscScalar    *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9;
1352dfbe8321SBarry Smith   PetscErrorCode ierr;
1353b24ad042SBarry Smith   PetscInt       m = b->AIJ->m,n,i,*idx;
13542b692628SMatthew Knepley 
13552b692628SMatthew Knepley   PetscFunctionBegin;
13562b692628SMatthew Knepley   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
13571ebc52fbSHong Zhang   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
13581ebc52fbSHong Zhang   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
13592b692628SMatthew Knepley   for (i=0; i<m; i++) {
13602b692628SMatthew Knepley     idx    = a->j + a->i[i] ;
13612b692628SMatthew Knepley     v      = a->a + a->i[i] ;
13622b692628SMatthew Knepley     n      = a->i[i+1] - a->i[i];
13632b692628SMatthew Knepley     alpha1 = x[9*i];
13642b692628SMatthew Knepley     alpha2 = x[9*i+1];
13652b692628SMatthew Knepley     alpha3 = x[9*i+2];
13662b692628SMatthew Knepley     alpha4 = x[9*i+3];
13672b692628SMatthew Knepley     alpha5 = x[9*i+4];
13682b692628SMatthew Knepley     alpha6 = x[9*i+5];
13692b692628SMatthew Knepley     alpha7 = x[9*i+6];
13702b692628SMatthew Knepley     alpha8 = x[9*i+7];
13712b692628SMatthew Knepley     alpha9 = x[9*i+8];
13722b692628SMatthew Knepley     while (n-->0) {
13732b692628SMatthew Knepley       y[9*(*idx)]   += alpha1*(*v);
13742b692628SMatthew Knepley       y[9*(*idx)+1] += alpha2*(*v);
13752b692628SMatthew Knepley       y[9*(*idx)+2] += alpha3*(*v);
13762b692628SMatthew Knepley       y[9*(*idx)+3] += alpha4*(*v);
13772b692628SMatthew Knepley       y[9*(*idx)+4] += alpha5*(*v);
13782b692628SMatthew Knepley       y[9*(*idx)+5] += alpha6*(*v);
13792b692628SMatthew Knepley       y[9*(*idx)+6] += alpha7*(*v);
13802b692628SMatthew Knepley       y[9*(*idx)+7] += alpha8*(*v);
13812b692628SMatthew Knepley       y[9*(*idx)+8] += alpha9*(*v);
13822b692628SMatthew Knepley       idx++; v++;
13832b692628SMatthew Knepley     }
13842b692628SMatthew Knepley   }
13852b692628SMatthew Knepley   PetscLogFlops(18*a->nz);
13861ebc52fbSHong Zhang   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
13871ebc52fbSHong Zhang   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
13882b692628SMatthew Knepley   PetscFunctionReturn(0);
13892b692628SMatthew Knepley }
13902b692628SMatthew Knepley 
13912f7816d4SBarry Smith /*--------------------------------------------------------------------------------------------*/
13922f7816d4SBarry Smith #undef __FUNCT__
13932f7816d4SBarry Smith #define __FUNCT__ "MatMult_SeqMAIJ_16"
1394dfbe8321SBarry Smith PetscErrorCode MatMult_SeqMAIJ_16(Mat A,Vec xx,Vec yy)
13952f7816d4SBarry Smith {
13962f7816d4SBarry Smith   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
13972f7816d4SBarry Smith   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
13982f7816d4SBarry Smith   PetscScalar    *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8;
13992f7816d4SBarry Smith   PetscScalar    sum9, sum10, sum11, sum12, sum13, sum14, sum15, sum16;
1400dfbe8321SBarry Smith   PetscErrorCode ierr;
1401b24ad042SBarry Smith   PetscInt       m = b->AIJ->m,*idx,*ii;
1402b24ad042SBarry Smith   PetscInt       n,i,jrow,j;
14032f7816d4SBarry Smith 
14042f7816d4SBarry Smith   PetscFunctionBegin;
14051ebc52fbSHong Zhang   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
14061ebc52fbSHong Zhang   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
14072f7816d4SBarry Smith   idx  = a->j;
14082f7816d4SBarry Smith   v    = a->a;
14092f7816d4SBarry Smith   ii   = a->i;
14102f7816d4SBarry Smith 
14112f7816d4SBarry Smith   for (i=0; i<m; i++) {
14122f7816d4SBarry Smith     jrow = ii[i];
14132f7816d4SBarry Smith     n    = ii[i+1] - jrow;
14142f7816d4SBarry Smith     sum1  = 0.0;
14152f7816d4SBarry Smith     sum2  = 0.0;
14162f7816d4SBarry Smith     sum3  = 0.0;
14172f7816d4SBarry Smith     sum4  = 0.0;
14182f7816d4SBarry Smith     sum5  = 0.0;
14192f7816d4SBarry Smith     sum6  = 0.0;
14202f7816d4SBarry Smith     sum7  = 0.0;
14212f7816d4SBarry Smith     sum8  = 0.0;
14222f7816d4SBarry Smith     sum9  = 0.0;
14232f7816d4SBarry Smith     sum10 = 0.0;
14242f7816d4SBarry Smith     sum11 = 0.0;
14252f7816d4SBarry Smith     sum12 = 0.0;
14262f7816d4SBarry Smith     sum13 = 0.0;
14272f7816d4SBarry Smith     sum14 = 0.0;
14282f7816d4SBarry Smith     sum15 = 0.0;
14292f7816d4SBarry Smith     sum16 = 0.0;
14302f7816d4SBarry Smith     for (j=0; j<n; j++) {
14312f7816d4SBarry Smith       sum1  += v[jrow]*x[16*idx[jrow]];
14322f7816d4SBarry Smith       sum2  += v[jrow]*x[16*idx[jrow]+1];
14332f7816d4SBarry Smith       sum3  += v[jrow]*x[16*idx[jrow]+2];
14342f7816d4SBarry Smith       sum4  += v[jrow]*x[16*idx[jrow]+3];
14352f7816d4SBarry Smith       sum5  += v[jrow]*x[16*idx[jrow]+4];
14362f7816d4SBarry Smith       sum6  += v[jrow]*x[16*idx[jrow]+5];
14372f7816d4SBarry Smith       sum7  += v[jrow]*x[16*idx[jrow]+6];
14382f7816d4SBarry Smith       sum8  += v[jrow]*x[16*idx[jrow]+7];
14392f7816d4SBarry Smith       sum9  += v[jrow]*x[16*idx[jrow]+8];
14402f7816d4SBarry Smith       sum10 += v[jrow]*x[16*idx[jrow]+9];
14412f7816d4SBarry Smith       sum11 += v[jrow]*x[16*idx[jrow]+10];
14422f7816d4SBarry Smith       sum12 += v[jrow]*x[16*idx[jrow]+11];
14432f7816d4SBarry Smith       sum13 += v[jrow]*x[16*idx[jrow]+12];
14442f7816d4SBarry Smith       sum14 += v[jrow]*x[16*idx[jrow]+13];
14452f7816d4SBarry Smith       sum15 += v[jrow]*x[16*idx[jrow]+14];
14462f7816d4SBarry Smith       sum16 += v[jrow]*x[16*idx[jrow]+15];
14472f7816d4SBarry Smith       jrow++;
14482f7816d4SBarry Smith      }
14492f7816d4SBarry Smith     y[16*i]    = sum1;
14502f7816d4SBarry Smith     y[16*i+1]  = sum2;
14512f7816d4SBarry Smith     y[16*i+2]  = sum3;
14522f7816d4SBarry Smith     y[16*i+3]  = sum4;
14532f7816d4SBarry Smith     y[16*i+4]  = sum5;
14542f7816d4SBarry Smith     y[16*i+5]  = sum6;
14552f7816d4SBarry Smith     y[16*i+6]  = sum7;
14562f7816d4SBarry Smith     y[16*i+7]  = sum8;
14572f7816d4SBarry Smith     y[16*i+8]  = sum9;
14582f7816d4SBarry Smith     y[16*i+9]  = sum10;
14592f7816d4SBarry Smith     y[16*i+10] = sum11;
14602f7816d4SBarry Smith     y[16*i+11] = sum12;
14612f7816d4SBarry Smith     y[16*i+12] = sum13;
14622f7816d4SBarry Smith     y[16*i+13] = sum14;
14632f7816d4SBarry Smith     y[16*i+14] = sum15;
14642f7816d4SBarry Smith     y[16*i+15] = sum16;
14652f7816d4SBarry Smith   }
14662f7816d4SBarry Smith 
14672f7816d4SBarry Smith   PetscLogFlops(32*a->nz - 16*m);
14681ebc52fbSHong Zhang   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
14691ebc52fbSHong Zhang   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
14702f7816d4SBarry Smith   PetscFunctionReturn(0);
14712f7816d4SBarry Smith }
14722f7816d4SBarry Smith 
14732f7816d4SBarry Smith #undef __FUNCT__
14742f7816d4SBarry Smith #define __FUNCT__ "MatMultTranspose_SeqMAIJ_16"
1475dfbe8321SBarry Smith PetscErrorCode MatMultTranspose_SeqMAIJ_16(Mat A,Vec xx,Vec yy)
14762f7816d4SBarry Smith {
14772f7816d4SBarry Smith   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
14782f7816d4SBarry Smith   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
14792f7816d4SBarry Smith   PetscScalar    *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,zero = 0.0;
14802f7816d4SBarry Smith   PetscScalar    alpha9,alpha10,alpha11,alpha12,alpha13,alpha14,alpha15,alpha16;
1481dfbe8321SBarry Smith   PetscErrorCode ierr;
1482b24ad042SBarry Smith   PetscInt       m = b->AIJ->m,n,i,*idx;
14832f7816d4SBarry Smith 
14842f7816d4SBarry Smith   PetscFunctionBegin;
14852f7816d4SBarry Smith   ierr = VecSet(&zero,yy);CHKERRQ(ierr);
14861ebc52fbSHong Zhang   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
14871ebc52fbSHong Zhang   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
1488bfec09a0SHong Zhang 
14892f7816d4SBarry Smith   for (i=0; i<m; i++) {
1490bfec09a0SHong Zhang     idx    = a->j + a->i[i] ;
1491bfec09a0SHong Zhang     v      = a->a + a->i[i] ;
14922f7816d4SBarry Smith     n      = a->i[i+1] - a->i[i];
14932f7816d4SBarry Smith     alpha1  = x[16*i];
14942f7816d4SBarry Smith     alpha2  = x[16*i+1];
14952f7816d4SBarry Smith     alpha3  = x[16*i+2];
14962f7816d4SBarry Smith     alpha4  = x[16*i+3];
14972f7816d4SBarry Smith     alpha5  = x[16*i+4];
14982f7816d4SBarry Smith     alpha6  = x[16*i+5];
14992f7816d4SBarry Smith     alpha7  = x[16*i+6];
15002f7816d4SBarry Smith     alpha8  = x[16*i+7];
15012f7816d4SBarry Smith     alpha9  = x[16*i+8];
15022f7816d4SBarry Smith     alpha10 = x[16*i+9];
15032f7816d4SBarry Smith     alpha11 = x[16*i+10];
15042f7816d4SBarry Smith     alpha12 = x[16*i+11];
15052f7816d4SBarry Smith     alpha13 = x[16*i+12];
15062f7816d4SBarry Smith     alpha14 = x[16*i+13];
15072f7816d4SBarry Smith     alpha15 = x[16*i+14];
15082f7816d4SBarry Smith     alpha16 = x[16*i+15];
15092f7816d4SBarry Smith     while (n-->0) {
15102f7816d4SBarry Smith       y[16*(*idx)]    += alpha1*(*v);
15112f7816d4SBarry Smith       y[16*(*idx)+1]  += alpha2*(*v);
15122f7816d4SBarry Smith       y[16*(*idx)+2]  += alpha3*(*v);
15132f7816d4SBarry Smith       y[16*(*idx)+3]  += alpha4*(*v);
15142f7816d4SBarry Smith       y[16*(*idx)+4]  += alpha5*(*v);
15152f7816d4SBarry Smith       y[16*(*idx)+5]  += alpha6*(*v);
15162f7816d4SBarry Smith       y[16*(*idx)+6]  += alpha7*(*v);
15172f7816d4SBarry Smith       y[16*(*idx)+7]  += alpha8*(*v);
15182f7816d4SBarry Smith       y[16*(*idx)+8]  += alpha9*(*v);
15192f7816d4SBarry Smith       y[16*(*idx)+9]  += alpha10*(*v);
15202f7816d4SBarry Smith       y[16*(*idx)+10] += alpha11*(*v);
15212f7816d4SBarry Smith       y[16*(*idx)+11] += alpha12*(*v);
15222f7816d4SBarry Smith       y[16*(*idx)+12] += alpha13*(*v);
15232f7816d4SBarry Smith       y[16*(*idx)+13] += alpha14*(*v);
15242f7816d4SBarry Smith       y[16*(*idx)+14] += alpha15*(*v);
15252f7816d4SBarry Smith       y[16*(*idx)+15] += alpha16*(*v);
15262f7816d4SBarry Smith       idx++; v++;
15272f7816d4SBarry Smith     }
15282f7816d4SBarry Smith   }
15292f7816d4SBarry Smith   PetscLogFlops(32*a->nz - 16*b->AIJ->n);
15301ebc52fbSHong Zhang   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
15311ebc52fbSHong Zhang   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
15322f7816d4SBarry Smith   PetscFunctionReturn(0);
15332f7816d4SBarry Smith }
15342f7816d4SBarry Smith 
15352f7816d4SBarry Smith #undef __FUNCT__
15362f7816d4SBarry Smith #define __FUNCT__ "MatMultAdd_SeqMAIJ_16"
1537dfbe8321SBarry Smith PetscErrorCode MatMultAdd_SeqMAIJ_16(Mat A,Vec xx,Vec yy,Vec zz)
15382f7816d4SBarry Smith {
15392f7816d4SBarry Smith   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
15402f7816d4SBarry Smith   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
15412f7816d4SBarry Smith   PetscScalar    *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8;
15422f7816d4SBarry Smith   PetscScalar    sum9, sum10, sum11, sum12, sum13, sum14, sum15, sum16;
1543dfbe8321SBarry Smith   PetscErrorCode ierr;
1544b24ad042SBarry Smith   PetscInt       m = b->AIJ->m,*idx,*ii;
1545b24ad042SBarry Smith   PetscInt       n,i,jrow,j;
15462f7816d4SBarry Smith 
15472f7816d4SBarry Smith   PetscFunctionBegin;
15482f7816d4SBarry Smith   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
15491ebc52fbSHong Zhang   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
15501ebc52fbSHong Zhang   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
15512f7816d4SBarry Smith   idx  = a->j;
15522f7816d4SBarry Smith   v    = a->a;
15532f7816d4SBarry Smith   ii   = a->i;
15542f7816d4SBarry Smith 
15552f7816d4SBarry Smith   for (i=0; i<m; i++) {
15562f7816d4SBarry Smith     jrow = ii[i];
15572f7816d4SBarry Smith     n    = ii[i+1] - jrow;
15582f7816d4SBarry Smith     sum1  = 0.0;
15592f7816d4SBarry Smith     sum2  = 0.0;
15602f7816d4SBarry Smith     sum3  = 0.0;
15612f7816d4SBarry Smith     sum4  = 0.0;
15622f7816d4SBarry Smith     sum5  = 0.0;
15632f7816d4SBarry Smith     sum6  = 0.0;
15642f7816d4SBarry Smith     sum7  = 0.0;
15652f7816d4SBarry Smith     sum8  = 0.0;
15662f7816d4SBarry Smith     sum9  = 0.0;
15672f7816d4SBarry Smith     sum10 = 0.0;
15682f7816d4SBarry Smith     sum11 = 0.0;
15692f7816d4SBarry Smith     sum12 = 0.0;
15702f7816d4SBarry Smith     sum13 = 0.0;
15712f7816d4SBarry Smith     sum14 = 0.0;
15722f7816d4SBarry Smith     sum15 = 0.0;
15732f7816d4SBarry Smith     sum16 = 0.0;
15742f7816d4SBarry Smith     for (j=0; j<n; j++) {
15752f7816d4SBarry Smith       sum1  += v[jrow]*x[16*idx[jrow]];
15762f7816d4SBarry Smith       sum2  += v[jrow]*x[16*idx[jrow]+1];
15772f7816d4SBarry Smith       sum3  += v[jrow]*x[16*idx[jrow]+2];
15782f7816d4SBarry Smith       sum4  += v[jrow]*x[16*idx[jrow]+3];
15792f7816d4SBarry Smith       sum5  += v[jrow]*x[16*idx[jrow]+4];
15802f7816d4SBarry Smith       sum6  += v[jrow]*x[16*idx[jrow]+5];
15812f7816d4SBarry Smith       sum7  += v[jrow]*x[16*idx[jrow]+6];
15822f7816d4SBarry Smith       sum8  += v[jrow]*x[16*idx[jrow]+7];
15832f7816d4SBarry Smith       sum9  += v[jrow]*x[16*idx[jrow]+8];
15842f7816d4SBarry Smith       sum10 += v[jrow]*x[16*idx[jrow]+9];
15852f7816d4SBarry Smith       sum11 += v[jrow]*x[16*idx[jrow]+10];
15862f7816d4SBarry Smith       sum12 += v[jrow]*x[16*idx[jrow]+11];
15872f7816d4SBarry Smith       sum13 += v[jrow]*x[16*idx[jrow]+12];
15882f7816d4SBarry Smith       sum14 += v[jrow]*x[16*idx[jrow]+13];
15892f7816d4SBarry Smith       sum15 += v[jrow]*x[16*idx[jrow]+14];
15902f7816d4SBarry Smith       sum16 += v[jrow]*x[16*idx[jrow]+15];
15912f7816d4SBarry Smith       jrow++;
15922f7816d4SBarry Smith      }
15932f7816d4SBarry Smith     y[16*i]    += sum1;
15942f7816d4SBarry Smith     y[16*i+1]  += sum2;
15952f7816d4SBarry Smith     y[16*i+2]  += sum3;
15962f7816d4SBarry Smith     y[16*i+3]  += sum4;
15972f7816d4SBarry Smith     y[16*i+4]  += sum5;
15982f7816d4SBarry Smith     y[16*i+5]  += sum6;
15992f7816d4SBarry Smith     y[16*i+6]  += sum7;
16002f7816d4SBarry Smith     y[16*i+7]  += sum8;
16012f7816d4SBarry Smith     y[16*i+8]  += sum9;
16022f7816d4SBarry Smith     y[16*i+9]  += sum10;
16032f7816d4SBarry Smith     y[16*i+10] += sum11;
16042f7816d4SBarry Smith     y[16*i+11] += sum12;
16052f7816d4SBarry Smith     y[16*i+12] += sum13;
16062f7816d4SBarry Smith     y[16*i+13] += sum14;
16072f7816d4SBarry Smith     y[16*i+14] += sum15;
16082f7816d4SBarry Smith     y[16*i+15] += sum16;
16092f7816d4SBarry Smith   }
16102f7816d4SBarry Smith 
16112f7816d4SBarry Smith   PetscLogFlops(32*a->nz);
16121ebc52fbSHong Zhang   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
16131ebc52fbSHong Zhang   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
16142f7816d4SBarry Smith   PetscFunctionReturn(0);
16152f7816d4SBarry Smith }
16162f7816d4SBarry Smith 
16172f7816d4SBarry Smith #undef __FUNCT__
16182f7816d4SBarry Smith #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_16"
1619dfbe8321SBarry Smith PetscErrorCode MatMultTransposeAdd_SeqMAIJ_16(Mat A,Vec xx,Vec yy,Vec zz)
16202f7816d4SBarry Smith {
16212f7816d4SBarry Smith   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
16222f7816d4SBarry Smith   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
16232f7816d4SBarry Smith   PetscScalar    *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8;
16242f7816d4SBarry Smith   PetscScalar    alpha9,alpha10,alpha11,alpha12,alpha13,alpha14,alpha15,alpha16;
1625dfbe8321SBarry Smith   PetscErrorCode ierr;
1626b24ad042SBarry Smith   PetscInt       m = b->AIJ->m,n,i,*idx;
16272f7816d4SBarry Smith 
16282f7816d4SBarry Smith   PetscFunctionBegin;
16292f7816d4SBarry Smith   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
16301ebc52fbSHong Zhang   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
16311ebc52fbSHong Zhang   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
16322f7816d4SBarry Smith   for (i=0; i<m; i++) {
1633bfec09a0SHong Zhang     idx    = a->j + a->i[i] ;
1634bfec09a0SHong Zhang     v      = a->a + a->i[i] ;
16352f7816d4SBarry Smith     n      = a->i[i+1] - a->i[i];
16362f7816d4SBarry Smith     alpha1 = x[16*i];
16372f7816d4SBarry Smith     alpha2 = x[16*i+1];
16382f7816d4SBarry Smith     alpha3 = x[16*i+2];
16392f7816d4SBarry Smith     alpha4 = x[16*i+3];
16402f7816d4SBarry Smith     alpha5 = x[16*i+4];
16412f7816d4SBarry Smith     alpha6 = x[16*i+5];
16422f7816d4SBarry Smith     alpha7 = x[16*i+6];
16432f7816d4SBarry Smith     alpha8 = x[16*i+7];
16442f7816d4SBarry Smith     alpha9  = x[16*i+8];
16452f7816d4SBarry Smith     alpha10 = x[16*i+9];
16462f7816d4SBarry Smith     alpha11 = x[16*i+10];
16472f7816d4SBarry Smith     alpha12 = x[16*i+11];
16482f7816d4SBarry Smith     alpha13 = x[16*i+12];
16492f7816d4SBarry Smith     alpha14 = x[16*i+13];
16502f7816d4SBarry Smith     alpha15 = x[16*i+14];
16512f7816d4SBarry Smith     alpha16 = x[16*i+15];
16522f7816d4SBarry Smith     while (n-->0) {
16532f7816d4SBarry Smith       y[16*(*idx)]   += alpha1*(*v);
16542f7816d4SBarry Smith       y[16*(*idx)+1] += alpha2*(*v);
16552f7816d4SBarry Smith       y[16*(*idx)+2] += alpha3*(*v);
16562f7816d4SBarry Smith       y[16*(*idx)+3] += alpha4*(*v);
16572f7816d4SBarry Smith       y[16*(*idx)+4] += alpha5*(*v);
16582f7816d4SBarry Smith       y[16*(*idx)+5] += alpha6*(*v);
16592f7816d4SBarry Smith       y[16*(*idx)+6] += alpha7*(*v);
16602f7816d4SBarry Smith       y[16*(*idx)+7] += alpha8*(*v);
16612f7816d4SBarry Smith       y[16*(*idx)+8]  += alpha9*(*v);
16622f7816d4SBarry Smith       y[16*(*idx)+9]  += alpha10*(*v);
16632f7816d4SBarry Smith       y[16*(*idx)+10] += alpha11*(*v);
16642f7816d4SBarry Smith       y[16*(*idx)+11] += alpha12*(*v);
16652f7816d4SBarry Smith       y[16*(*idx)+12] += alpha13*(*v);
16662f7816d4SBarry Smith       y[16*(*idx)+13] += alpha14*(*v);
16672f7816d4SBarry Smith       y[16*(*idx)+14] += alpha15*(*v);
16682f7816d4SBarry Smith       y[16*(*idx)+15] += alpha16*(*v);
16692f7816d4SBarry Smith       idx++; v++;
16702f7816d4SBarry Smith     }
16712f7816d4SBarry Smith   }
16722f7816d4SBarry Smith   PetscLogFlops(32*a->nz);
16731ebc52fbSHong Zhang   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
16741ebc52fbSHong Zhang   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
167566ed3db0SBarry Smith   PetscFunctionReturn(0);
167666ed3db0SBarry Smith }
167766ed3db0SBarry Smith 
1678f4a53059SBarry Smith /*===================================================================================*/
16794a2ae208SSatish Balay #undef __FUNCT__
16804a2ae208SSatish Balay #define __FUNCT__ "MatMult_MPIMAIJ_dof"
1681dfbe8321SBarry Smith PetscErrorCode MatMult_MPIMAIJ_dof(Mat A,Vec xx,Vec yy)
1682f4a53059SBarry Smith {
16834cb9d9b8SBarry Smith   Mat_MPIMAIJ    *b = (Mat_MPIMAIJ*)A->data;
1684dfbe8321SBarry Smith   PetscErrorCode ierr;
1685f4a53059SBarry Smith 
1686b24ad042SBarry Smith   PetscFunctionBegin;
1687f4a53059SBarry Smith   /* start the scatter */
1688f4a53059SBarry Smith   ierr = VecScatterBegin(xx,b->w,INSERT_VALUES,SCATTER_FORWARD,b->ctx);CHKERRQ(ierr);
16894cb9d9b8SBarry Smith   ierr = (*b->AIJ->ops->mult)(b->AIJ,xx,yy);CHKERRQ(ierr);
1690f4a53059SBarry Smith   ierr = VecScatterEnd(xx,b->w,INSERT_VALUES,SCATTER_FORWARD,b->ctx);CHKERRQ(ierr);
16914cb9d9b8SBarry Smith   ierr = (*b->OAIJ->ops->multadd)(b->OAIJ,b->w,yy,yy);CHKERRQ(ierr);
1692f4a53059SBarry Smith   PetscFunctionReturn(0);
1693f4a53059SBarry Smith }
1694f4a53059SBarry Smith 
16954a2ae208SSatish Balay #undef __FUNCT__
16964a2ae208SSatish Balay #define __FUNCT__ "MatMultTranspose_MPIMAIJ_dof"
1697dfbe8321SBarry Smith PetscErrorCode MatMultTranspose_MPIMAIJ_dof(Mat A,Vec xx,Vec yy)
16984cb9d9b8SBarry Smith {
16994cb9d9b8SBarry Smith   Mat_MPIMAIJ    *b = (Mat_MPIMAIJ*)A->data;
1700dfbe8321SBarry Smith   PetscErrorCode ierr;
1701b24ad042SBarry Smith 
17024cb9d9b8SBarry Smith   PetscFunctionBegin;
17034cb9d9b8SBarry Smith   ierr = (*b->OAIJ->ops->multtranspose)(b->OAIJ,xx,b->w);CHKERRQ(ierr);
17044cb9d9b8SBarry Smith   ierr = (*b->AIJ->ops->multtranspose)(b->AIJ,xx,yy);CHKERRQ(ierr);
1705a5ff213dSBarry Smith   ierr = VecScatterBegin(b->w,yy,ADD_VALUES,SCATTER_REVERSE,b->ctx);CHKERRQ(ierr);
17064cb9d9b8SBarry Smith   ierr = VecScatterEnd(b->w,yy,ADD_VALUES,SCATTER_REVERSE,b->ctx);CHKERRQ(ierr);
17074cb9d9b8SBarry Smith   PetscFunctionReturn(0);
17084cb9d9b8SBarry Smith }
17094cb9d9b8SBarry Smith 
17104a2ae208SSatish Balay #undef __FUNCT__
17114a2ae208SSatish Balay #define __FUNCT__ "MatMultAdd_MPIMAIJ_dof"
1712dfbe8321SBarry Smith PetscErrorCode MatMultAdd_MPIMAIJ_dof(Mat A,Vec xx,Vec yy,Vec zz)
17134cb9d9b8SBarry Smith {
17144cb9d9b8SBarry Smith   Mat_MPIMAIJ    *b = (Mat_MPIMAIJ*)A->data;
1715dfbe8321SBarry Smith   PetscErrorCode ierr;
17164cb9d9b8SBarry Smith 
1717b24ad042SBarry Smith   PetscFunctionBegin;
17184cb9d9b8SBarry Smith   /* start the scatter */
17194cb9d9b8SBarry Smith   ierr = VecScatterBegin(xx,b->w,INSERT_VALUES,SCATTER_FORWARD,b->ctx);CHKERRQ(ierr);
1720d72c5c08SBarry Smith   ierr = (*b->AIJ->ops->multadd)(b->AIJ,xx,yy,zz);CHKERRQ(ierr);
17214cb9d9b8SBarry Smith   ierr = VecScatterEnd(xx,b->w,INSERT_VALUES,SCATTER_FORWARD,b->ctx);CHKERRQ(ierr);
1722717f2ec8SHong Zhang   ierr = (*b->OAIJ->ops->multadd)(b->OAIJ,b->w,zz,zz);CHKERRQ(ierr);
17234cb9d9b8SBarry Smith   PetscFunctionReturn(0);
17244cb9d9b8SBarry Smith }
17254cb9d9b8SBarry Smith 
17264a2ae208SSatish Balay #undef __FUNCT__
17274a2ae208SSatish Balay #define __FUNCT__ "MatMultTransposeAdd_MPIMAIJ_dof"
1728dfbe8321SBarry Smith PetscErrorCode MatMultTransposeAdd_MPIMAIJ_dof(Mat A,Vec xx,Vec yy,Vec zz)
17294cb9d9b8SBarry Smith {
17304cb9d9b8SBarry Smith   Mat_MPIMAIJ    *b = (Mat_MPIMAIJ*)A->data;
1731dfbe8321SBarry Smith   PetscErrorCode ierr;
1732b24ad042SBarry Smith 
17334cb9d9b8SBarry Smith   PetscFunctionBegin;
17344cb9d9b8SBarry Smith   ierr = (*b->OAIJ->ops->multtranspose)(b->OAIJ,xx,b->w);CHKERRQ(ierr);
1735d72c5c08SBarry Smith   ierr = VecScatterBegin(b->w,zz,ADD_VALUES,SCATTER_REVERSE,b->ctx);CHKERRQ(ierr);
1736d72c5c08SBarry Smith   ierr = (*b->AIJ->ops->multtransposeadd)(b->AIJ,xx,yy,zz);CHKERRQ(ierr);
1737d72c5c08SBarry Smith   ierr = VecScatterEnd(b->w,zz,ADD_VALUES,SCATTER_REVERSE,b->ctx);CHKERRQ(ierr);
17384cb9d9b8SBarry Smith   PetscFunctionReturn(0);
17394cb9d9b8SBarry Smith }
17404cb9d9b8SBarry Smith 
17419c4fc4c7SBarry Smith #undef __FUNCT__
17427ba1a0bfSKris Buschelman #define __FUNCT__ "MatPtAPSymbolic_SeqAIJ_SeqMAIJ"
17437ba1a0bfSKris Buschelman PetscErrorCode MatPtAPSymbolic_SeqAIJ_SeqMAIJ(Mat A,Mat PP,PetscReal fill,Mat *C)
17447ba1a0bfSKris Buschelman {
17457ba1a0bfSKris Buschelman   /* This routine requires testing -- but it's getting better. */
17467ba1a0bfSKris Buschelman   PetscErrorCode ierr;
17477ba1a0bfSKris Buschelman   FreeSpaceList  free_space=PETSC_NULL,current_space=PETSC_NULL;
17487ba1a0bfSKris Buschelman   Mat_SeqMAIJ    *pp=(Mat_SeqMAIJ*)PP->data;
17497ba1a0bfSKris Buschelman   Mat            P=pp->AIJ;
17507ba1a0bfSKris Buschelman   Mat_SeqAIJ     *a=(Mat_SeqAIJ*)A->data,*p=(Mat_SeqAIJ*)P->data,*c;
17517ba1a0bfSKris Buschelman   PetscInt       *pti,*ptj,*ptJ,*ai=a->i,*aj=a->j,*ajj,*pi=p->i,*pj=p->j,*pjj;
17527ba1a0bfSKris Buschelman   PetscInt       *ci,*cj,*ptadenserow,*ptasparserow,*denserow,*sparserow,*ptaj;
17537ba1a0bfSKris Buschelman   PetscInt       an=A->N,am=A->M,pn=P->N,pm=P->M,ppdof=pp->dof,cn;
17547ba1a0bfSKris Buschelman   PetscInt       i,j,k,dof,pshift,ptnzi,arow,anzj,ptanzi,prow,pnzj,cnzi;
17557ba1a0bfSKris Buschelman   MatScalar      *ca;
17567ba1a0bfSKris Buschelman 
17577ba1a0bfSKris Buschelman   PetscFunctionBegin;
17587ba1a0bfSKris Buschelman   /* Start timer */
17597ba1a0bfSKris Buschelman   ierr = PetscLogEventBegin(MAT_PtAPSymbolic,A,PP,0,0);CHKERRQ(ierr);
17607ba1a0bfSKris Buschelman 
17617ba1a0bfSKris Buschelman   /* Get ij structure of P^T */
17627ba1a0bfSKris Buschelman   ierr = MatGetSymbolicTranspose_SeqAIJ(P,&pti,&ptj);CHKERRQ(ierr);
17637ba1a0bfSKris Buschelman 
17647ba1a0bfSKris Buschelman   cn = pn*ppdof;
17657ba1a0bfSKris Buschelman   /* Allocate ci array, arrays for fill computation and */
17667ba1a0bfSKris Buschelman   /* free space for accumulating nonzero column info */
17677ba1a0bfSKris Buschelman   ierr = PetscMalloc((cn+1)*sizeof(PetscInt),&ci);CHKERRQ(ierr);
17687ba1a0bfSKris Buschelman   ci[0] = 0;
17697ba1a0bfSKris Buschelman 
17707ba1a0bfSKris Buschelman   /* Work arrays for rows of P^T*A */
17717ba1a0bfSKris Buschelman   ierr = PetscMalloc((2*cn+2*an+1)*sizeof(PetscInt),&ptadenserow);CHKERRQ(ierr);
17727ba1a0bfSKris Buschelman   ierr = PetscMemzero(ptadenserow,(2*cn+2*an+1)*sizeof(PetscInt));CHKERRQ(ierr);
17737ba1a0bfSKris Buschelman   ptasparserow = ptadenserow  + an;
17747ba1a0bfSKris Buschelman   denserow     = ptasparserow + an;
17757ba1a0bfSKris Buschelman   sparserow    = denserow     + cn;
17767ba1a0bfSKris Buschelman 
17777ba1a0bfSKris Buschelman   /* Set initial free space to be nnz(A) scaled by aspect ratio of P. */
17787ba1a0bfSKris Buschelman   /* This should be reasonable if sparsity of PtAP is similar to that of A. */
17797ba1a0bfSKris Buschelman   /* Note, aspect ratio of P is the same as the aspect ratio of SeqAIJ inside P */
17807ba1a0bfSKris Buschelman   ierr          = GetMoreSpace((ai[am]/pm)*pn,&free_space);
17817ba1a0bfSKris Buschelman   current_space = free_space;
17827ba1a0bfSKris Buschelman 
17837ba1a0bfSKris Buschelman   /* Determine symbolic info for each row of C: */
17847ba1a0bfSKris Buschelman   for (i=0;i<pn;i++) {
17857ba1a0bfSKris Buschelman     ptnzi  = pti[i+1] - pti[i];
17867ba1a0bfSKris Buschelman     ptJ    = ptj + pti[i];
17877ba1a0bfSKris Buschelman     for (dof=0;dof<ppdof;dof++) {
17887ba1a0bfSKris Buschelman       ptanzi = 0;
17897ba1a0bfSKris Buschelman       /* Determine symbolic row of PtA: */
17907ba1a0bfSKris Buschelman       for (j=0;j<ptnzi;j++) {
17917ba1a0bfSKris Buschelman         /* Expand ptJ[j] by block size and shift by dof to get the right row of A */
17927ba1a0bfSKris Buschelman         arow = ptJ[j]*ppdof + dof;
17937ba1a0bfSKris Buschelman         /* Nonzeros of P^T*A will be in same locations as any element of A in that row */
17947ba1a0bfSKris Buschelman         anzj = ai[arow+1] - ai[arow];
17957ba1a0bfSKris Buschelman         ajj  = aj + ai[arow];
17967ba1a0bfSKris Buschelman         for (k=0;k<anzj;k++) {
17977ba1a0bfSKris Buschelman           if (!ptadenserow[ajj[k]]) {
17987ba1a0bfSKris Buschelman             ptadenserow[ajj[k]]    = -1;
17997ba1a0bfSKris Buschelman             ptasparserow[ptanzi++] = ajj[k];
18007ba1a0bfSKris Buschelman           }
18017ba1a0bfSKris Buschelman         }
18027ba1a0bfSKris Buschelman       }
18037ba1a0bfSKris Buschelman       /* Using symbolic info for row of PtA, determine symbolic info for row of C: */
18047ba1a0bfSKris Buschelman       ptaj = ptasparserow;
18057ba1a0bfSKris Buschelman       cnzi   = 0;
18067ba1a0bfSKris Buschelman       for (j=0;j<ptanzi;j++) {
18077ba1a0bfSKris Buschelman         /* Get offset within block of P */
18087ba1a0bfSKris Buschelman         pshift = *ptaj%ppdof;
18097ba1a0bfSKris Buschelman         /* Get block row of P */
18107ba1a0bfSKris Buschelman         prow = (*ptaj++)/ppdof; /* integer division */
18117ba1a0bfSKris Buschelman         /* P has same number of nonzeros per row as the compressed form */
18127ba1a0bfSKris Buschelman         pnzj = pi[prow+1] - pi[prow];
18137ba1a0bfSKris Buschelman         pjj  = pj + pi[prow];
18147ba1a0bfSKris Buschelman         for (k=0;k<pnzj;k++) {
18157ba1a0bfSKris Buschelman           /* Locations in C are shifted by the offset within the block */
18167ba1a0bfSKris Buschelman           /* Note: we cannot use PetscLLAdd here because of the additional offset for the write location */
18177ba1a0bfSKris Buschelman           if (!denserow[pjj[k]*ppdof+pshift]) {
18187ba1a0bfSKris Buschelman             denserow[pjj[k]*ppdof+pshift] = -1;
18197ba1a0bfSKris Buschelman             sparserow[cnzi++]             = pjj[k]*ppdof+pshift;
18207ba1a0bfSKris Buschelman           }
18217ba1a0bfSKris Buschelman         }
18227ba1a0bfSKris Buschelman       }
18237ba1a0bfSKris Buschelman 
18247ba1a0bfSKris Buschelman       /* sort sparserow */
18257ba1a0bfSKris Buschelman       ierr = PetscSortInt(cnzi,sparserow);CHKERRQ(ierr);
18267ba1a0bfSKris Buschelman 
18277ba1a0bfSKris Buschelman       /* If free space is not available, make more free space */
18287ba1a0bfSKris Buschelman       /* Double the amount of total space in the list */
18297ba1a0bfSKris Buschelman       if (current_space->local_remaining<cnzi) {
18307ba1a0bfSKris Buschelman         ierr = GetMoreSpace(current_space->total_array_size,&current_space);CHKERRQ(ierr);
18317ba1a0bfSKris Buschelman       }
18327ba1a0bfSKris Buschelman 
18337ba1a0bfSKris Buschelman       /* Copy data into free space, and zero out denserows */
18347ba1a0bfSKris Buschelman       ierr = PetscMemcpy(current_space->array,sparserow,cnzi*sizeof(PetscInt));CHKERRQ(ierr);
18357ba1a0bfSKris Buschelman       current_space->array           += cnzi;
18367ba1a0bfSKris Buschelman       current_space->local_used      += cnzi;
18377ba1a0bfSKris Buschelman       current_space->local_remaining -= cnzi;
18387ba1a0bfSKris Buschelman 
18397ba1a0bfSKris Buschelman       for (j=0;j<ptanzi;j++) {
18407ba1a0bfSKris Buschelman         ptadenserow[ptasparserow[j]] = 0;
18417ba1a0bfSKris Buschelman       }
18427ba1a0bfSKris Buschelman       for (j=0;j<cnzi;j++) {
18437ba1a0bfSKris Buschelman         denserow[sparserow[j]] = 0;
18447ba1a0bfSKris Buschelman       }
18457ba1a0bfSKris Buschelman       /* Aside: Perhaps we should save the pta info for the numerical factorization. */
18467ba1a0bfSKris Buschelman       /*        For now, we will recompute what is needed. */
18477ba1a0bfSKris Buschelman       ci[i*ppdof+1+dof] = ci[i*ppdof+dof] + cnzi;
18487ba1a0bfSKris Buschelman     }
18497ba1a0bfSKris Buschelman   }
18507ba1a0bfSKris Buschelman   /* nnz is now stored in ci[ptm], column indices are in the list of free space */
18517ba1a0bfSKris Buschelman   /* Allocate space for cj, initialize cj, and */
18527ba1a0bfSKris Buschelman   /* destroy list of free space and other temporary array(s) */
18537ba1a0bfSKris Buschelman   ierr = PetscMalloc((ci[cn]+1)*sizeof(PetscInt),&cj);CHKERRQ(ierr);
18547ba1a0bfSKris Buschelman   ierr = MakeSpaceContiguous(&free_space,cj);CHKERRQ(ierr);
18557ba1a0bfSKris Buschelman   ierr = PetscFree(ptadenserow);CHKERRQ(ierr);
18567ba1a0bfSKris Buschelman 
18577ba1a0bfSKris Buschelman   /* Allocate space for ca */
18587ba1a0bfSKris Buschelman   ierr = PetscMalloc((ci[cn]+1)*sizeof(MatScalar),&ca);CHKERRQ(ierr);
18597ba1a0bfSKris Buschelman   ierr = PetscMemzero(ca,(ci[cn]+1)*sizeof(MatScalar));CHKERRQ(ierr);
18607ba1a0bfSKris Buschelman 
18617ba1a0bfSKris Buschelman   /* put together the new matrix */
18627ba1a0bfSKris Buschelman   ierr = MatCreateSeqAIJWithArrays(A->comm,cn,cn,ci,cj,ca,C);CHKERRQ(ierr);
18637ba1a0bfSKris Buschelman 
18647ba1a0bfSKris Buschelman   /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
18657ba1a0bfSKris Buschelman   /* Since these are PETSc arrays, change flags to free them as necessary. */
18667ba1a0bfSKris Buschelman   c = (Mat_SeqAIJ *)((*C)->data);
18677ba1a0bfSKris Buschelman   c->freedata = PETSC_TRUE;
18687ba1a0bfSKris Buschelman   c->nonew    = 0;
18697ba1a0bfSKris Buschelman 
18707ba1a0bfSKris Buschelman   /* Clean up. */
18717ba1a0bfSKris Buschelman   ierr = MatRestoreSymbolicTranspose_SeqAIJ(P,&pti,&ptj);CHKERRQ(ierr);
18727ba1a0bfSKris Buschelman 
18737ba1a0bfSKris Buschelman   ierr = PetscLogEventEnd(MAT_PtAPSymbolic,A,PP,0,0);CHKERRQ(ierr);
18747ba1a0bfSKris Buschelman   PetscFunctionReturn(0);
18757ba1a0bfSKris Buschelman }
18767ba1a0bfSKris Buschelman 
18777ba1a0bfSKris Buschelman #undef __FUNCT__
18787ba1a0bfSKris Buschelman #define __FUNCT__ "MatPtAPNumeric_SeqAIJ_SeqMAIJ"
18797ba1a0bfSKris Buschelman PetscErrorCode MatPtAPNumeric_SeqAIJ_SeqMAIJ(Mat A,Mat PP,Mat C)
18807ba1a0bfSKris Buschelman {
18817ba1a0bfSKris Buschelman   /* This routine requires testing -- first draft only */
18827ba1a0bfSKris Buschelman   PetscErrorCode ierr;
18837ba1a0bfSKris Buschelman   PetscInt       flops=0;
18847ba1a0bfSKris Buschelman   Mat_SeqMAIJ    *pp=(Mat_SeqMAIJ*)PP->data;
18857ba1a0bfSKris Buschelman   Mat            P=pp->AIJ;
18867ba1a0bfSKris Buschelman   Mat_SeqAIJ     *a  = (Mat_SeqAIJ *) A->data;
18877ba1a0bfSKris Buschelman   Mat_SeqAIJ     *p  = (Mat_SeqAIJ *) P->data;
18887ba1a0bfSKris Buschelman   Mat_SeqAIJ     *c  = (Mat_SeqAIJ *) C->data;
18897ba1a0bfSKris Buschelman   PetscInt       *ai=a->i,*aj=a->j,*apj,*apjdense,*pi=p->i,*pj=p->j,*pJ=p->j,*pjj;
18907ba1a0bfSKris Buschelman   PetscInt       *ci=c->i,*cj=c->j,*cjj;
18917ba1a0bfSKris Buschelman   PetscInt       am=A->M,cn=C->N,cm=C->M,ppdof=pp->dof;
18927ba1a0bfSKris Buschelman   PetscInt       i,j,k,pshift,poffset,anzi,pnzi,apnzj,nextap,pnzj,prow,crow;
18937ba1a0bfSKris Buschelman   MatScalar      *aa=a->a,*apa,*pa=p->a,*pA=p->a,*paj,*ca=c->a,*caj;
18947ba1a0bfSKris Buschelman 
18957ba1a0bfSKris Buschelman   PetscFunctionBegin;
18967ba1a0bfSKris Buschelman   /* Allocate temporary array for storage of one row of A*P */
18977ba1a0bfSKris Buschelman   ierr = PetscMalloc(cn*(sizeof(MatScalar)+2*sizeof(PetscInt)),&apa);CHKERRQ(ierr);
18987ba1a0bfSKris Buschelman   ierr = PetscMemzero(apa,cn*(sizeof(MatScalar)+2*sizeof(PetscInt)));CHKERRQ(ierr);
18997ba1a0bfSKris Buschelman 
19007ba1a0bfSKris Buschelman   apj      = (PetscInt *)(apa + cn);
19017ba1a0bfSKris Buschelman   apjdense = apj + cn;
19027ba1a0bfSKris Buschelman 
19037ba1a0bfSKris Buschelman   /* Clear old values in C */
19047ba1a0bfSKris Buschelman   ierr = PetscMemzero(ca,ci[cm]*sizeof(MatScalar));CHKERRQ(ierr);
19057ba1a0bfSKris Buschelman 
19067ba1a0bfSKris Buschelman   for (i=0;i<am;i++) {
19077ba1a0bfSKris Buschelman     /* Form sparse row of A*P */
19087ba1a0bfSKris Buschelman     anzi  = ai[i+1] - ai[i];
19097ba1a0bfSKris Buschelman     apnzj = 0;
19107ba1a0bfSKris Buschelman     for (j=0;j<anzi;j++) {
19117ba1a0bfSKris Buschelman       /* Get offset within block of P */
19127ba1a0bfSKris Buschelman       pshift = *aj%ppdof;
19137ba1a0bfSKris Buschelman       /* Get block row of P */
19147ba1a0bfSKris Buschelman       prow   = *aj++/ppdof; /* integer division */
19157ba1a0bfSKris Buschelman       pnzj = pi[prow+1] - pi[prow];
19167ba1a0bfSKris Buschelman       pjj  = pj + pi[prow];
19177ba1a0bfSKris Buschelman       paj  = pa + pi[prow];
19187ba1a0bfSKris Buschelman       for (k=0;k<pnzj;k++) {
19197ba1a0bfSKris Buschelman         poffset = pjj[k]*ppdof+pshift;
19207ba1a0bfSKris Buschelman         if (!apjdense[poffset]) {
19217ba1a0bfSKris Buschelman           apjdense[poffset] = -1;
19227ba1a0bfSKris Buschelman           apj[apnzj++]      = poffset;
19237ba1a0bfSKris Buschelman         }
19247ba1a0bfSKris Buschelman         apa[poffset] += (*aa)*paj[k];
19257ba1a0bfSKris Buschelman       }
19267ba1a0bfSKris Buschelman       flops += 2*pnzj;
19277ba1a0bfSKris Buschelman       aa++;
19287ba1a0bfSKris Buschelman     }
19297ba1a0bfSKris Buschelman 
19307ba1a0bfSKris Buschelman     /* Sort the j index array for quick sparse axpy. */
19317ba1a0bfSKris Buschelman     /* Note: a array does not need sorting as it is in dense storage locations. */
19327ba1a0bfSKris Buschelman     ierr = PetscSortInt(apnzj,apj);CHKERRQ(ierr);
19337ba1a0bfSKris Buschelman 
19347ba1a0bfSKris Buschelman     /* Compute P^T*A*P using outer product (P^T)[:,j]*(A*P)[j,:]. */
19357ba1a0bfSKris Buschelman     prow    = i/ppdof; /* integer division */
19367ba1a0bfSKris Buschelman     pshift  = i%ppdof;
19377ba1a0bfSKris Buschelman     poffset = pi[prow];
19387ba1a0bfSKris Buschelman     pnzi = pi[prow+1] - poffset;
19397ba1a0bfSKris Buschelman     /* Reset pJ and pA so we can traverse the same row of P 'dof' times. */
19407ba1a0bfSKris Buschelman     pJ   = pj+poffset;
19417ba1a0bfSKris Buschelman     pA   = pa+poffset;
19427ba1a0bfSKris Buschelman     for (j=0;j<pnzi;j++) {
19437ba1a0bfSKris Buschelman       crow   = (*pJ)*ppdof+pshift;
19447ba1a0bfSKris Buschelman       cjj    = cj + ci[crow];
19457ba1a0bfSKris Buschelman       caj    = ca + ci[crow];
19467ba1a0bfSKris Buschelman       pJ++;
19477ba1a0bfSKris Buschelman       /* Perform sparse axpy operation.  Note cjj includes apj. */
19487ba1a0bfSKris Buschelman       for (k=0,nextap=0;nextap<apnzj;k++) {
19497ba1a0bfSKris Buschelman         if (cjj[k]==apj[nextap]) {
19507ba1a0bfSKris Buschelman           caj[k] += (*pA)*apa[apj[nextap++]];
19517ba1a0bfSKris Buschelman         }
19527ba1a0bfSKris Buschelman       }
19537ba1a0bfSKris Buschelman       flops += 2*apnzj;
19547ba1a0bfSKris Buschelman       pA++;
19557ba1a0bfSKris Buschelman     }
19567ba1a0bfSKris Buschelman 
19577ba1a0bfSKris Buschelman     /* Zero the current row info for A*P */
19587ba1a0bfSKris Buschelman     for (j=0;j<apnzj;j++) {
19597ba1a0bfSKris Buschelman       apa[apj[j]]      = 0.;
19607ba1a0bfSKris Buschelman       apjdense[apj[j]] = 0;
19617ba1a0bfSKris Buschelman     }
19627ba1a0bfSKris Buschelman   }
19637ba1a0bfSKris Buschelman 
19647ba1a0bfSKris Buschelman   /* Assemble the final matrix and clean up */
19657ba1a0bfSKris Buschelman   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
19667ba1a0bfSKris Buschelman   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
19677ba1a0bfSKris Buschelman   ierr = PetscFree(apa);CHKERRQ(ierr);
19687ba1a0bfSKris Buschelman   ierr = PetscLogFlops(flops);CHKERRQ(ierr);
19697ba1a0bfSKris Buschelman 
19707ba1a0bfSKris Buschelman   PetscFunctionReturn(0);
19717ba1a0bfSKris Buschelman }
19727ba1a0bfSKris Buschelman 
19737ba1a0bfSKris Buschelman #undef __FUNCT__
19740fd73130SBarry Smith #define __FUNCT__ "MatConvert_SeqMAIJ_SeqAIJ"
1975ceb03754SKris Buschelman PetscErrorCode MatConvert_SeqMAIJ_SeqAIJ(Mat A,const MatType newtype,MatReuse reuse,Mat *newmat)
19769c4fc4c7SBarry Smith {
19779c4fc4c7SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1978ceb03754SKris Buschelman   Mat               a = b->AIJ,B;
19799c4fc4c7SBarry Smith   Mat_SeqAIJ        *aij = (Mat_SeqAIJ*)a->data;
19809c4fc4c7SBarry Smith   PetscErrorCode    ierr;
19810fd73130SBarry Smith   PetscInt          m,n,i,ncols,*ilen,nmax = 0,*icols,j,k,ii,dof = b->dof;
1982ba8c8a56SBarry Smith   PetscInt          *cols;
1983ba8c8a56SBarry Smith   PetscScalar       *vals;
19849c4fc4c7SBarry Smith 
19859c4fc4c7SBarry Smith   PetscFunctionBegin;
19869c4fc4c7SBarry Smith   ierr = MatGetSize(a,&m,&n);CHKERRQ(ierr);
19870fd73130SBarry Smith   ierr = PetscMalloc(dof*m*sizeof(int),&ilen);CHKERRQ(ierr);
19889c4fc4c7SBarry Smith   for (i=0; i<m; i++) {
19899c4fc4c7SBarry Smith     nmax = PetscMax(nmax,aij->ilen[i]);
19900fd73130SBarry Smith     for (j=0; j<dof; j++) {
19910fd73130SBarry Smith       ilen[dof*i+j] = aij->ilen[i];
19929c4fc4c7SBarry Smith     }
19939c4fc4c7SBarry Smith   }
1994ceb03754SKris Buschelman   ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,dof*m,dof*n,0,ilen,&B);CHKERRQ(ierr);
1995ceb03754SKris Buschelman   ierr = MatSetOption(B,MAT_COLUMNS_SORTED);CHKERRQ(ierr);
19969c4fc4c7SBarry Smith   ierr = PetscFree(ilen);CHKERRQ(ierr);
19979c4fc4c7SBarry Smith   ierr = PetscMalloc(nmax*sizeof(PetscInt),&icols);CHKERRQ(ierr);
19989c4fc4c7SBarry Smith   ii   = 0;
19999c4fc4c7SBarry Smith   for (i=0; i<m; i++) {
2000ba8c8a56SBarry Smith     ierr = MatGetRow_SeqAIJ(a,i,&ncols,&cols,&vals);CHKERRQ(ierr);
20010fd73130SBarry Smith     for (j=0; j<dof; j++) {
20029c4fc4c7SBarry Smith       for (k=0; k<ncols; k++) {
20030fd73130SBarry Smith         icols[k] = dof*cols[k]+j;
20049c4fc4c7SBarry Smith       }
2005ceb03754SKris Buschelman       ierr = MatSetValues_SeqAIJ(B,1,&ii,ncols,icols,vals,INSERT_VALUES);CHKERRQ(ierr);
20069c4fc4c7SBarry Smith       ii++;
20079c4fc4c7SBarry Smith     }
2008ba8c8a56SBarry Smith     ierr = MatRestoreRow_SeqAIJ(a,i,&ncols,&cols,&vals);CHKERRQ(ierr);
20099c4fc4c7SBarry Smith   }
20109c4fc4c7SBarry Smith   ierr = PetscFree(icols);CHKERRQ(ierr);
2011ceb03754SKris Buschelman   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2012ceb03754SKris Buschelman   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2013ceb03754SKris Buschelman 
2014ceb03754SKris Buschelman   if (reuse == MAT_REUSE_MATRIX) {
2015*c3d102feSKris Buschelman     ierr = MatHeaderCopy(A,B);CHKERRQ(ierr);
2016*c3d102feSKris Buschelman   } else {
2017ceb03754SKris Buschelman     *newmat = B;
2018*c3d102feSKris Buschelman   }
20199c4fc4c7SBarry Smith   PetscFunctionReturn(0);
20209c4fc4c7SBarry Smith }
20219c4fc4c7SBarry Smith 
20220fd73130SBarry Smith #include "src/mat/impls/aij/mpi/mpiaij.h"
20230fd73130SBarry Smith #undef __FUNCT__
20240fd73130SBarry Smith #define __FUNCT__ "MatConvert_MPIMAIJ_MPIAIJ"
2025ceb03754SKris Buschelman PetscErrorCode MatConvert_MPIMAIJ_MPIAIJ(Mat A,const MatType newtype,MatReuse reuse,Mat *newmat)
20260fd73130SBarry Smith {
20270fd73130SBarry Smith   Mat_MPIMAIJ       *maij = (Mat_MPIMAIJ*)A->data;
2028ceb03754SKris Buschelman   Mat               MatAIJ  = ((Mat_SeqMAIJ*)maij->AIJ->data)->AIJ,B;
20290fd73130SBarry Smith   Mat               MatOAIJ = ((Mat_SeqMAIJ*)maij->OAIJ->data)->AIJ;
20300fd73130SBarry Smith   Mat_SeqAIJ        *AIJ = (Mat_SeqAIJ*) MatAIJ->data;
20310fd73130SBarry Smith   Mat_SeqAIJ        *OAIJ =(Mat_SeqAIJ*) MatOAIJ->data;
20320fd73130SBarry Smith   Mat_MPIAIJ        *mpiaij = (Mat_MPIAIJ*) maij->A->data;
20337a1a7badSBarry Smith   PetscInt          dof = maij->dof,i,j,*dnz,*onz,nmax = 0,onmax = 0,*oicols,*icols,ncols,*cols,oncols,*ocols;
20340fd73130SBarry Smith   PetscInt          rstart,cstart,*garray,ii,k;
20350fd73130SBarry Smith   PetscErrorCode    ierr;
20360fd73130SBarry Smith   PetscScalar       *vals,*ovals;
20370fd73130SBarry Smith 
20380fd73130SBarry Smith   PetscFunctionBegin;
20397a1a7badSBarry Smith   ierr = PetscMalloc2(A->m,PetscInt,&dnz,A->m,PetscInt,&onz);CHKERRQ(ierr);
20400fd73130SBarry Smith   for (i=0; i<A->m/dof; i++) {
20410fd73130SBarry Smith     nmax  = PetscMax(nmax,AIJ->ilen[i]);
20420fd73130SBarry Smith     onmax = PetscMax(onmax,OAIJ->ilen[i]);
20430fd73130SBarry Smith     for (j=0; j<dof; j++) {
20440fd73130SBarry Smith       dnz[dof*i+j] = AIJ->ilen[i];
20450fd73130SBarry Smith       onz[dof*i+j] = OAIJ->ilen[i];
20460fd73130SBarry Smith     }
20470fd73130SBarry Smith   }
2048ceb03754SKris Buschelman   ierr = MatCreateMPIAIJ(A->comm,A->m,A->n,A->M,A->N,0,dnz,0,onz,&B);CHKERRQ(ierr);
2049ceb03754SKris Buschelman   ierr = MatSetOption(B,MAT_COLUMNS_SORTED);CHKERRQ(ierr);
20500fd73130SBarry Smith   ierr = PetscFree2(dnz,onz);CHKERRQ(ierr);
20510fd73130SBarry Smith 
20527a1a7badSBarry Smith   ierr   = PetscMalloc2(nmax,PetscInt,&icols,onmax,PetscInt,&oicols);CHKERRQ(ierr);
20530fd73130SBarry Smith   rstart = dof*mpiaij->rstart;
20540fd73130SBarry Smith   cstart = dof*mpiaij->cstart;
20550fd73130SBarry Smith   garray = mpiaij->garray;
20560fd73130SBarry Smith 
20570fd73130SBarry Smith   ii = rstart;
20580fd73130SBarry Smith   for (i=0; i<A->m/dof; i++) {
20590fd73130SBarry Smith     ierr = MatGetRow_SeqAIJ(MatAIJ,i,&ncols,&cols,&vals);CHKERRQ(ierr);
20600fd73130SBarry Smith     ierr = MatGetRow_SeqAIJ(MatOAIJ,i,&oncols,&ocols,&ovals);CHKERRQ(ierr);
20610fd73130SBarry Smith     for (j=0; j<dof; j++) {
20620fd73130SBarry Smith       for (k=0; k<ncols; k++) {
20630fd73130SBarry Smith         icols[k] = cstart + dof*cols[k]+j;
20640fd73130SBarry Smith       }
20650fd73130SBarry Smith       for (k=0; k<oncols; k++) {
20660fd73130SBarry Smith         oicols[k] = dof*garray[ocols[k]]+j;
20670fd73130SBarry Smith       }
2068ceb03754SKris Buschelman       ierr = MatSetValues_MPIAIJ(B,1,&ii,ncols,icols,vals,INSERT_VALUES);CHKERRQ(ierr);
2069ceb03754SKris Buschelman       ierr = MatSetValues_MPIAIJ(B,1,&ii,oncols,oicols,ovals,INSERT_VALUES);CHKERRQ(ierr);
20700fd73130SBarry Smith       ii++;
20710fd73130SBarry Smith     }
20720fd73130SBarry Smith     ierr = MatRestoreRow_SeqAIJ(MatAIJ,i,&ncols,&cols,&vals);CHKERRQ(ierr);
20730fd73130SBarry Smith     ierr = MatRestoreRow_SeqAIJ(MatOAIJ,i,&oncols,&ocols,&ovals);CHKERRQ(ierr);
20740fd73130SBarry Smith   }
20750fd73130SBarry Smith   ierr = PetscFree2(icols,oicols);CHKERRQ(ierr);
20760fd73130SBarry Smith 
2077ceb03754SKris Buschelman   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2078ceb03754SKris Buschelman   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2079ceb03754SKris Buschelman 
2080ceb03754SKris Buschelman   if (reuse == MAT_REUSE_MATRIX) {
2081*c3d102feSKris Buschelman     ierr = MatHeaderCopy(A,B);CHKERRQ(ierr);
2082*c3d102feSKris Buschelman   } else {
2083ceb03754SKris Buschelman     *newmat = B;
2084*c3d102feSKris Buschelman   }
20850fd73130SBarry Smith   PetscFunctionReturn(0);
20860fd73130SBarry Smith }
20870fd73130SBarry Smith 
20880fd73130SBarry Smith 
20890fd73130SBarry Smith 
2090bcc973b7SBarry Smith /* ---------------------------------------------------------------------------------- */
20915983afb6SSatish Balay /*MC
20920bad9183SKris Buschelman   MatCreateMAIJ - Creates a matrix type providing restriction and interpolation
20930bad9183SKris Buschelman   operations for multicomponent problems.  It interpolates each component the same
20940bad9183SKris Buschelman   way independently.  The matrix type is based on MATSEQAIJ for sequential matrices,
20950bad9183SKris Buschelman   and MATMPIAIJ for distributed matrices.
20960bad9183SKris Buschelman 
20970bad9183SKris Buschelman   Operations provided:
20980fd73130SBarry Smith + MatMult
20990bad9183SKris Buschelman . MatMultTranspose
21000bad9183SKris Buschelman . MatMultAdd
21010bad9183SKris Buschelman . MatMultTransposeAdd
21020fd73130SBarry Smith - MatView
21030bad9183SKris Buschelman 
21040bad9183SKris Buschelman   Level: advanced
21050bad9183SKris Buschelman 
21060bad9183SKris Buschelman M*/
21074a2ae208SSatish Balay #undef __FUNCT__
21084a2ae208SSatish Balay #define __FUNCT__ "MatCreateMAIJ"
2109b24ad042SBarry Smith PetscErrorCode MatCreateMAIJ(Mat A,PetscInt dof,Mat *maij)
211082b1193eSBarry Smith {
2111dfbe8321SBarry Smith   PetscErrorCode ierr;
2112b24ad042SBarry Smith   PetscMPIInt    size;
2113b24ad042SBarry Smith   PetscInt       n;
21144cb9d9b8SBarry Smith   Mat_MPIMAIJ    *b;
211582b1193eSBarry Smith   Mat            B;
211682b1193eSBarry Smith 
211782b1193eSBarry Smith   PetscFunctionBegin;
2118d72c5c08SBarry Smith   ierr   = PetscObjectReference((PetscObject)A);CHKERRQ(ierr);
2119d72c5c08SBarry Smith 
2120d72c5c08SBarry Smith   if (dof == 1) {
2121d72c5c08SBarry Smith     *maij = A;
2122d72c5c08SBarry Smith   } else {
212383903688SBarry Smith     ierr = MatCreate(A->comm,dof*A->m,dof*A->n,dof*A->M,dof*A->N,&B);CHKERRQ(ierr);
2124cd3bbe55SBarry Smith     B->assembled    = PETSC_TRUE;
2125d72c5c08SBarry Smith 
2126f4a53059SBarry Smith     ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr);
2127f4a53059SBarry Smith     if (size == 1) {
2128b9b97703SBarry Smith       ierr = MatSetType(B,MATSEQMAIJ);CHKERRQ(ierr);
21294cb9d9b8SBarry Smith       B->ops->destroy = MatDestroy_SeqMAIJ;
21300fd73130SBarry Smith       B->ops->view    = MatView_SeqMAIJ;
2131b9b97703SBarry Smith       b      = (Mat_MPIMAIJ*)B->data;
2132b9b97703SBarry Smith       b->dof = dof;
21334cb9d9b8SBarry Smith       b->AIJ = A;
2134d72c5c08SBarry Smith       if (dof == 2) {
2135cd3bbe55SBarry Smith         B->ops->mult             = MatMult_SeqMAIJ_2;
2136cd3bbe55SBarry Smith         B->ops->multadd          = MatMultAdd_SeqMAIJ_2;
2137cd3bbe55SBarry Smith         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_2;
2138cd3bbe55SBarry Smith         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_2;
2139bcc973b7SBarry Smith       } else if (dof == 3) {
2140bcc973b7SBarry Smith         B->ops->mult             = MatMult_SeqMAIJ_3;
2141bcc973b7SBarry Smith         B->ops->multadd          = MatMultAdd_SeqMAIJ_3;
2142bcc973b7SBarry Smith         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_3;
2143bcc973b7SBarry Smith         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_3;
2144bcc973b7SBarry Smith       } else if (dof == 4) {
2145bcc973b7SBarry Smith         B->ops->mult             = MatMult_SeqMAIJ_4;
2146bcc973b7SBarry Smith         B->ops->multadd          = MatMultAdd_SeqMAIJ_4;
2147bcc973b7SBarry Smith         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_4;
2148bcc973b7SBarry Smith         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_4;
2149f9fae5adSBarry Smith       } else if (dof == 5) {
2150f9fae5adSBarry Smith         B->ops->mult             = MatMult_SeqMAIJ_5;
2151f9fae5adSBarry Smith         B->ops->multadd          = MatMultAdd_SeqMAIJ_5;
2152f9fae5adSBarry Smith         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_5;
2153f9fae5adSBarry Smith         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_5;
21546cd98798SBarry Smith       } else if (dof == 6) {
21556cd98798SBarry Smith         B->ops->mult             = MatMult_SeqMAIJ_6;
21566cd98798SBarry Smith         B->ops->multadd          = MatMultAdd_SeqMAIJ_6;
21576cd98798SBarry Smith         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_6;
21586cd98798SBarry Smith         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_6;
215966ed3db0SBarry Smith       } else if (dof == 8) {
216066ed3db0SBarry Smith         B->ops->mult             = MatMult_SeqMAIJ_8;
216166ed3db0SBarry Smith         B->ops->multadd          = MatMultAdd_SeqMAIJ_8;
216266ed3db0SBarry Smith         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_8;
216366ed3db0SBarry Smith         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_8;
21642b692628SMatthew Knepley       } else if (dof == 9) {
21652b692628SMatthew Knepley         B->ops->mult             = MatMult_SeqMAIJ_9;
21662b692628SMatthew Knepley         B->ops->multadd          = MatMultAdd_SeqMAIJ_9;
21672b692628SMatthew Knepley         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_9;
21682b692628SMatthew Knepley         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_9;
21692f7816d4SBarry Smith       } else if (dof == 16) {
21702f7816d4SBarry Smith         B->ops->mult             = MatMult_SeqMAIJ_16;
21712f7816d4SBarry Smith         B->ops->multadd          = MatMultAdd_SeqMAIJ_16;
21722f7816d4SBarry Smith         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_16;
21732f7816d4SBarry Smith         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_16;
217482b1193eSBarry Smith       } else {
217577431f27SBarry Smith         SETERRQ1(PETSC_ERR_SUP,"Cannot handle a dof of %D. Send request for code to petsc-maint@mcs.anl.gov\n",dof);
217682b1193eSBarry Smith       }
21777ba1a0bfSKris Buschelman       B->ops->ptapsymbolic_seqaij = MatPtAPSymbolic_SeqAIJ_SeqMAIJ;
21787ba1a0bfSKris Buschelman       B->ops->ptapnumeric_seqaij  = MatPtAPNumeric_SeqAIJ_SeqMAIJ;
21799c4fc4c7SBarry Smith       ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqmaij_seqaij_C","MatConvert_SeqMAIJ_SeqAIJ",MatConvert_SeqMAIJ_SeqAIJ);CHKERRQ(ierr);
2180f4a53059SBarry Smith     } else {
2181f4a53059SBarry Smith       Mat_MPIAIJ *mpiaij = (Mat_MPIAIJ *)A->data;
2182f4a53059SBarry Smith       IS         from,to;
2183f4a53059SBarry Smith       Vec        gvec;
2184b24ad042SBarry Smith       PetscInt   *garray,i;
2185f4a53059SBarry Smith 
2186b9b97703SBarry Smith       ierr = MatSetType(B,MATMPIMAIJ);CHKERRQ(ierr);
2187d72c5c08SBarry Smith       B->ops->destroy = MatDestroy_MPIMAIJ;
21880fd73130SBarry Smith       B->ops->view    = MatView_MPIMAIJ;
2189b9b97703SBarry Smith       b      = (Mat_MPIMAIJ*)B->data;
2190b9b97703SBarry Smith       b->dof = dof;
2191b9b97703SBarry Smith       b->A   = A;
21924cb9d9b8SBarry Smith       ierr = MatCreateMAIJ(mpiaij->A,dof,&b->AIJ);CHKERRQ(ierr);
21934cb9d9b8SBarry Smith       ierr = MatCreateMAIJ(mpiaij->B,dof,&b->OAIJ);CHKERRQ(ierr);
21944cb9d9b8SBarry Smith 
2195f4a53059SBarry Smith       ierr = VecGetSize(mpiaij->lvec,&n);CHKERRQ(ierr);
2196f4a53059SBarry Smith       ierr = VecCreateSeq(PETSC_COMM_SELF,n*dof,&b->w);CHKERRQ(ierr);
219713288a74SBarry Smith       ierr = VecSetBlockSize(b->w,dof);CHKERRQ(ierr);
2198f4a53059SBarry Smith 
2199f4a53059SBarry Smith       /* create two temporary Index sets for build scatter gather */
2200b24ad042SBarry Smith       ierr = PetscMalloc((n+1)*sizeof(PetscInt),&garray);CHKERRQ(ierr);
2201f4a53059SBarry Smith       for (i=0; i<n; i++) garray[i] = dof*mpiaij->garray[i];
2202f4a53059SBarry Smith       ierr = ISCreateBlock(A->comm,dof,n,garray,&from);CHKERRQ(ierr);
2203f4a53059SBarry Smith       ierr = PetscFree(garray);CHKERRQ(ierr);
2204f4a53059SBarry Smith       ierr = ISCreateStride(PETSC_COMM_SELF,n*dof,0,1,&to);CHKERRQ(ierr);
2205f4a53059SBarry Smith 
2206f4a53059SBarry Smith       /* create temporary global vector to generate scatter context */
2207f4a53059SBarry Smith       ierr = VecCreateMPI(A->comm,dof*A->n,dof*A->N,&gvec);CHKERRQ(ierr);
220813288a74SBarry Smith       ierr = VecSetBlockSize(gvec,dof);CHKERRQ(ierr);
2209f4a53059SBarry Smith 
2210f4a53059SBarry Smith       /* generate the scatter context */
2211f4a53059SBarry Smith       ierr = VecScatterCreate(gvec,from,b->w,to,&b->ctx);CHKERRQ(ierr);
2212f4a53059SBarry Smith 
2213f4a53059SBarry Smith       ierr = ISDestroy(from);CHKERRQ(ierr);
2214f4a53059SBarry Smith       ierr = ISDestroy(to);CHKERRQ(ierr);
2215f4a53059SBarry Smith       ierr = VecDestroy(gvec);CHKERRQ(ierr);
2216f4a53059SBarry Smith 
2217f4a53059SBarry Smith       B->ops->mult             = MatMult_MPIMAIJ_dof;
22184cb9d9b8SBarry Smith       B->ops->multtranspose    = MatMultTranspose_MPIMAIJ_dof;
22194cb9d9b8SBarry Smith       B->ops->multadd          = MatMultAdd_MPIMAIJ_dof;
22204cb9d9b8SBarry Smith       B->ops->multtransposeadd = MatMultTransposeAdd_MPIMAIJ_dof;
22210fd73130SBarry Smith       ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpimaij_mpiaij_C","MatConvert_MPIMAIJ_MPIAIJ",MatConvert_MPIMAIJ_MPIAIJ);CHKERRQ(ierr);
2222f4a53059SBarry Smith     }
2223cd3bbe55SBarry Smith     *maij = B;
22240fd73130SBarry Smith     ierr = MatView_Private(B);CHKERRQ(ierr);
2225d72c5c08SBarry Smith   }
222682b1193eSBarry Smith   PetscFunctionReturn(0);
222782b1193eSBarry Smith }
222882b1193eSBarry Smith 
222982b1193eSBarry Smith 
223082b1193eSBarry Smith 
223182b1193eSBarry Smith 
223282b1193eSBarry Smith 
223382b1193eSBarry Smith 
223482b1193eSBarry Smith 
223582b1193eSBarry Smith 
223682b1193eSBarry Smith 
223782b1193eSBarry Smith 
223882b1193eSBarry Smith 
223982b1193eSBarry Smith 
2240