xref: /petsc/src/mat/impls/maij/maij.c (revision ed8eea031df87cac88b16b7fd78c5dd018903d3f)
182b1193eSBarry Smith /*
2cd3bbe55SBarry Smith     Defines the basic matrix operations for the MAIJ  matrix storage format.
3cd3bbe55SBarry Smith   This format is used for restriction and interpolation operations for
4cd3bbe55SBarry Smith   multicomponent problems. It interpolates each component the same way
5cd3bbe55SBarry Smith   independently.
6cd3bbe55SBarry Smith 
7cd3bbe55SBarry Smith      We provide:
8cd3bbe55SBarry Smith          MatMult()
9cd3bbe55SBarry Smith          MatMultTranspose()
10cd3bbe55SBarry Smith          MatMultTransposeAdd()
11cd3bbe55SBarry Smith          MatMultAdd()
12cd3bbe55SBarry Smith           and
13cd3bbe55SBarry Smith          MatCreateMAIJ(Mat,dof,Mat*)
14f4a53059SBarry Smith 
15f4a53059SBarry Smith      This single directory handles both the sequential and parallel codes
1682b1193eSBarry Smith */
1782b1193eSBarry Smith 
18be911618SKris Buschelman #include "src/mat/impls/maij/maij.h"
197ba1a0bfSKris Buschelman #include "src/mat/utils/freespace.h"
203c94ec11SBarry Smith #include "vecimpl.h"
2182b1193eSBarry Smith 
224a2ae208SSatish Balay #undef __FUNCT__
234a2ae208SSatish Balay #define __FUNCT__ "MatMAIJGetAIJ"
24dfbe8321SBarry Smith PetscErrorCode MatMAIJGetAIJ(Mat A,Mat *B)
25b9b97703SBarry Smith {
26dfbe8321SBarry Smith   PetscErrorCode ierr;
27b9b97703SBarry Smith   PetscTruth     ismpimaij,isseqmaij;
28b9b97703SBarry Smith 
29b9b97703SBarry Smith   PetscFunctionBegin;
30b9b97703SBarry Smith   ierr = PetscTypeCompare((PetscObject)A,MATMPIMAIJ,&ismpimaij);CHKERRQ(ierr);
31b9b97703SBarry Smith   ierr = PetscTypeCompare((PetscObject)A,MATSEQMAIJ,&isseqmaij);CHKERRQ(ierr);
32b9b97703SBarry Smith   if (ismpimaij) {
33b9b97703SBarry Smith     Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data;
34b9b97703SBarry Smith 
35b9b97703SBarry Smith     *B = b->A;
36b9b97703SBarry Smith   } else if (isseqmaij) {
37b9b97703SBarry Smith     Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
38b9b97703SBarry Smith 
39b9b97703SBarry Smith     *B = b->AIJ;
40b9b97703SBarry Smith   } else {
41b9b97703SBarry Smith     *B = A;
42b9b97703SBarry Smith   }
43b9b97703SBarry Smith   PetscFunctionReturn(0);
44b9b97703SBarry Smith }
45b9b97703SBarry Smith 
464a2ae208SSatish Balay #undef __FUNCT__
474a2ae208SSatish Balay #define __FUNCT__ "MatMAIJRedimension"
48b24ad042SBarry Smith PetscErrorCode MatMAIJRedimension(Mat A,PetscInt dof,Mat *B)
49b9b97703SBarry Smith {
50dfbe8321SBarry Smith   PetscErrorCode ierr;
51b9b97703SBarry Smith   Mat            Aij;
52b9b97703SBarry Smith 
53b9b97703SBarry Smith   PetscFunctionBegin;
54b9b97703SBarry Smith   ierr = MatMAIJGetAIJ(A,&Aij);CHKERRQ(ierr);
55b9b97703SBarry Smith   ierr = MatCreateMAIJ(Aij,dof,B);CHKERRQ(ierr);
56b9b97703SBarry Smith   PetscFunctionReturn(0);
57b9b97703SBarry Smith }
58b9b97703SBarry Smith 
594a2ae208SSatish Balay #undef __FUNCT__
604a2ae208SSatish Balay #define __FUNCT__ "MatDestroy_SeqMAIJ"
61dfbe8321SBarry Smith PetscErrorCode MatDestroy_SeqMAIJ(Mat A)
6282b1193eSBarry Smith {
63dfbe8321SBarry Smith   PetscErrorCode ierr;
644cb9d9b8SBarry Smith   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
6582b1193eSBarry Smith 
6682b1193eSBarry Smith   PetscFunctionBegin;
67cd3bbe55SBarry Smith   if (b->AIJ) {
68cd3bbe55SBarry Smith     ierr = MatDestroy(b->AIJ);CHKERRQ(ierr);
6982b1193eSBarry Smith   }
704cb9d9b8SBarry Smith   ierr = PetscFree(b);CHKERRQ(ierr);
714cb9d9b8SBarry Smith   PetscFunctionReturn(0);
724cb9d9b8SBarry Smith }
734cb9d9b8SBarry Smith 
744a2ae208SSatish Balay #undef __FUNCT__
750fd73130SBarry Smith #define __FUNCT__ "MatView_SeqMAIJ"
760fd73130SBarry Smith PetscErrorCode MatView_SeqMAIJ(Mat A,PetscViewer viewer)
770fd73130SBarry Smith {
780fd73130SBarry Smith   PetscErrorCode ierr;
790fd73130SBarry Smith   Mat            B;
800fd73130SBarry Smith 
810fd73130SBarry Smith   PetscFunctionBegin;
82ceb03754SKris Buschelman   ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);
830fd73130SBarry Smith   ierr = MatView(B,viewer);CHKERRQ(ierr);
840fd73130SBarry Smith   ierr = MatDestroy(B);CHKERRQ(ierr);
850fd73130SBarry Smith   PetscFunctionReturn(0);
860fd73130SBarry Smith }
870fd73130SBarry Smith 
880fd73130SBarry Smith #undef __FUNCT__
890fd73130SBarry Smith #define __FUNCT__ "MatView_MPIMAIJ"
900fd73130SBarry Smith PetscErrorCode MatView_MPIMAIJ(Mat A,PetscViewer viewer)
910fd73130SBarry Smith {
920fd73130SBarry Smith   PetscErrorCode ierr;
930fd73130SBarry Smith   Mat            B;
940fd73130SBarry Smith 
950fd73130SBarry Smith   PetscFunctionBegin;
96ceb03754SKris Buschelman   ierr = MatConvert(A,MATMPIAIJ,MAT_INITIAL_MATRIX,&B);
970fd73130SBarry Smith   ierr = MatView(B,viewer);CHKERRQ(ierr);
980fd73130SBarry Smith   ierr = MatDestroy(B);CHKERRQ(ierr);
990fd73130SBarry Smith   PetscFunctionReturn(0);
1000fd73130SBarry Smith }
1010fd73130SBarry Smith 
1020fd73130SBarry Smith #undef __FUNCT__
1034a2ae208SSatish Balay #define __FUNCT__ "MatDestroy_MPIMAIJ"
104dfbe8321SBarry Smith PetscErrorCode MatDestroy_MPIMAIJ(Mat A)
1054cb9d9b8SBarry Smith {
106dfbe8321SBarry Smith   PetscErrorCode ierr;
1074cb9d9b8SBarry Smith   Mat_MPIMAIJ    *b = (Mat_MPIMAIJ*)A->data;
1084cb9d9b8SBarry Smith 
1094cb9d9b8SBarry Smith   PetscFunctionBegin;
1104cb9d9b8SBarry Smith   if (b->AIJ) {
1114cb9d9b8SBarry Smith     ierr = MatDestroy(b->AIJ);CHKERRQ(ierr);
1124cb9d9b8SBarry Smith   }
113f4a53059SBarry Smith   if (b->OAIJ) {
114f4a53059SBarry Smith     ierr = MatDestroy(b->OAIJ);CHKERRQ(ierr);
115f4a53059SBarry Smith   }
116b9b97703SBarry Smith   if (b->A) {
117b9b97703SBarry Smith     ierr = MatDestroy(b->A);CHKERRQ(ierr);
118b9b97703SBarry Smith   }
119f4a53059SBarry Smith   if (b->ctx) {
120f4a53059SBarry Smith     ierr = VecScatterDestroy(b->ctx);CHKERRQ(ierr);
121f4a53059SBarry Smith   }
122f4a53059SBarry Smith   if (b->w) {
123f4a53059SBarry Smith     ierr = VecDestroy(b->w);CHKERRQ(ierr);
124f4a53059SBarry Smith   }
125cd3bbe55SBarry Smith   ierr = PetscFree(b);CHKERRQ(ierr);
126b9b97703SBarry Smith   PetscFunctionReturn(0);
127b9b97703SBarry Smith }
128b9b97703SBarry Smith 
1290bad9183SKris Buschelman /*MC
130fafad747SKris Buschelman   MATMAIJ - MATMAIJ = "maij" - A matrix type to be used for restriction and interpolation operations for
1310bad9183SKris Buschelman   multicomponent problems, interpolating or restricting each component the same way independently.
1320bad9183SKris Buschelman   The matrix type is based on MATSEQAIJ for sequential matrices, and MATMPIAIJ for distributed matrices.
1330bad9183SKris Buschelman 
1340bad9183SKris Buschelman   Operations provided:
1350bad9183SKris Buschelman . MatMult
1360bad9183SKris Buschelman . MatMultTranspose
1370bad9183SKris Buschelman . MatMultAdd
1380bad9183SKris Buschelman . MatMultTransposeAdd
1390bad9183SKris Buschelman 
1400bad9183SKris Buschelman   Level: advanced
1410bad9183SKris Buschelman 
1420bad9183SKris Buschelman .seealso: MatCreateSeqDense
1430bad9183SKris Buschelman M*/
1440bad9183SKris Buschelman 
14582b1193eSBarry Smith EXTERN_C_BEGIN
1464a2ae208SSatish Balay #undef __FUNCT__
1474a2ae208SSatish Balay #define __FUNCT__ "MatCreate_MAIJ"
148dfbe8321SBarry Smith PetscErrorCode MatCreate_MAIJ(Mat A)
14982b1193eSBarry Smith {
150dfbe8321SBarry Smith   PetscErrorCode ierr;
1514cb9d9b8SBarry Smith   Mat_MPIMAIJ    *b;
15282b1193eSBarry Smith 
15382b1193eSBarry Smith   PetscFunctionBegin;
154b0a32e0cSBarry Smith   ierr     = PetscNew(Mat_MPIMAIJ,&b);CHKERRQ(ierr);
155b0a32e0cSBarry Smith   A->data  = (void*)b;
156cd3bbe55SBarry Smith   ierr = PetscMemzero(A->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
157cd3bbe55SBarry Smith   A->factor           = 0;
158cd3bbe55SBarry Smith   A->mapping          = 0;
159f4a53059SBarry Smith 
160cd3bbe55SBarry Smith   b->AIJ  = 0;
161cd3bbe55SBarry Smith   b->dof  = 0;
162f4a53059SBarry Smith   b->OAIJ = 0;
163f4a53059SBarry Smith   b->ctx  = 0;
164f4a53059SBarry Smith   b->w    = 0;
16582b1193eSBarry Smith   PetscFunctionReturn(0);
16682b1193eSBarry Smith }
16782b1193eSBarry Smith EXTERN_C_END
16882b1193eSBarry Smith 
169cd3bbe55SBarry Smith /* --------------------------------------------------------------------------------------*/
1704a2ae208SSatish Balay #undef __FUNCT__
171b9617806SBarry Smith #define __FUNCT__ "MatMult_SeqMAIJ_2"
172dfbe8321SBarry Smith PetscErrorCode MatMult_SeqMAIJ_2(Mat A,Vec xx,Vec yy)
17382b1193eSBarry Smith {
1744cb9d9b8SBarry Smith   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
175bcc973b7SBarry Smith   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
17687828ca2SBarry Smith   PetscScalar    *x,*y,*v,sum1, sum2;
177dfbe8321SBarry Smith   PetscErrorCode ierr;
178b24ad042SBarry Smith   PetscInt       m = b->AIJ->m,*idx,*ii;
179b24ad042SBarry Smith   PetscInt       n,i,jrow,j;
18082b1193eSBarry Smith 
181bcc973b7SBarry Smith   PetscFunctionBegin;
1821ebc52fbSHong Zhang   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1831ebc52fbSHong Zhang   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
184bcc973b7SBarry Smith   idx  = a->j;
185bcc973b7SBarry Smith   v    = a->a;
186bcc973b7SBarry Smith   ii   = a->i;
187bcc973b7SBarry Smith 
188bcc973b7SBarry Smith   for (i=0; i<m; i++) {
189bcc973b7SBarry Smith     jrow = ii[i];
190bcc973b7SBarry Smith     n    = ii[i+1] - jrow;
191bcc973b7SBarry Smith     sum1  = 0.0;
192bcc973b7SBarry Smith     sum2  = 0.0;
193bcc973b7SBarry Smith     for (j=0; j<n; j++) {
194bcc973b7SBarry Smith       sum1 += v[jrow]*x[2*idx[jrow]];
195bcc973b7SBarry Smith       sum2 += v[jrow]*x[2*idx[jrow]+1];
196bcc973b7SBarry Smith       jrow++;
197bcc973b7SBarry Smith      }
198bcc973b7SBarry Smith     y[2*i]   = sum1;
199bcc973b7SBarry Smith     y[2*i+1] = sum2;
200bcc973b7SBarry Smith   }
201bcc973b7SBarry Smith 
202b0a32e0cSBarry Smith   PetscLogFlops(4*a->nz - 2*m);
2031ebc52fbSHong Zhang   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
2041ebc52fbSHong Zhang   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
20582b1193eSBarry Smith   PetscFunctionReturn(0);
20682b1193eSBarry Smith }
207bcc973b7SBarry Smith 
2084a2ae208SSatish Balay #undef __FUNCT__
209b9617806SBarry Smith #define __FUNCT__ "MatMultTranspose_SeqMAIJ_2"
210dfbe8321SBarry Smith PetscErrorCode MatMultTranspose_SeqMAIJ_2(Mat A,Vec xx,Vec yy)
21182b1193eSBarry Smith {
2124cb9d9b8SBarry Smith   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
213bcc973b7SBarry Smith   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
21487828ca2SBarry Smith   PetscScalar    *x,*y,*v,alpha1,alpha2,zero = 0.0;
215dfbe8321SBarry Smith   PetscErrorCode ierr;
216b24ad042SBarry Smith   PetscInt       m = b->AIJ->m,n,i,*idx;
21782b1193eSBarry Smith 
218bcc973b7SBarry Smith   PetscFunctionBegin;
219bcc973b7SBarry Smith   ierr = VecSet(&zero,yy);CHKERRQ(ierr);
2201ebc52fbSHong Zhang   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
2211ebc52fbSHong Zhang   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
222bfec09a0SHong Zhang 
223bcc973b7SBarry Smith   for (i=0; i<m; i++) {
224bfec09a0SHong Zhang     idx    = a->j + a->i[i] ;
225bfec09a0SHong Zhang     v      = a->a + a->i[i] ;
226bcc973b7SBarry Smith     n      = a->i[i+1] - a->i[i];
227bcc973b7SBarry Smith     alpha1 = x[2*i];
228bcc973b7SBarry Smith     alpha2 = x[2*i+1];
229bcc973b7SBarry Smith     while (n-->0) {y[2*(*idx)] += alpha1*(*v); y[2*(*idx)+1] += alpha2*(*v); idx++; v++;}
230bcc973b7SBarry Smith   }
231b0a32e0cSBarry Smith   PetscLogFlops(4*a->nz - 2*b->AIJ->n);
2321ebc52fbSHong Zhang   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
2331ebc52fbSHong Zhang   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
23482b1193eSBarry Smith   PetscFunctionReturn(0);
23582b1193eSBarry Smith }
236bcc973b7SBarry Smith 
2374a2ae208SSatish Balay #undef __FUNCT__
238b9617806SBarry Smith #define __FUNCT__ "MatMultAdd_SeqMAIJ_2"
239dfbe8321SBarry Smith PetscErrorCode MatMultAdd_SeqMAIJ_2(Mat A,Vec xx,Vec yy,Vec zz)
24082b1193eSBarry Smith {
2414cb9d9b8SBarry Smith   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
242bcc973b7SBarry Smith   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
24387828ca2SBarry Smith   PetscScalar    *x,*y,*v,sum1, sum2;
244dfbe8321SBarry Smith   PetscErrorCode ierr;
245b24ad042SBarry Smith   PetscInt       m = b->AIJ->m,*idx,*ii;
246b24ad042SBarry Smith   PetscInt       n,i,jrow,j;
24782b1193eSBarry Smith 
248bcc973b7SBarry Smith   PetscFunctionBegin;
249f9fae5adSBarry Smith   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
2501ebc52fbSHong Zhang   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
2511ebc52fbSHong Zhang   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
252bcc973b7SBarry Smith   idx  = a->j;
253bcc973b7SBarry Smith   v    = a->a;
254bcc973b7SBarry Smith   ii   = a->i;
255bcc973b7SBarry Smith 
256bcc973b7SBarry Smith   for (i=0; i<m; i++) {
257bcc973b7SBarry Smith     jrow = ii[i];
258bcc973b7SBarry Smith     n    = ii[i+1] - jrow;
259bcc973b7SBarry Smith     sum1  = 0.0;
260bcc973b7SBarry Smith     sum2  = 0.0;
261bcc973b7SBarry Smith     for (j=0; j<n; j++) {
262bcc973b7SBarry Smith       sum1 += v[jrow]*x[2*idx[jrow]];
263bcc973b7SBarry Smith       sum2 += v[jrow]*x[2*idx[jrow]+1];
264bcc973b7SBarry Smith       jrow++;
265bcc973b7SBarry Smith      }
266bcc973b7SBarry Smith     y[2*i]   += sum1;
267bcc973b7SBarry Smith     y[2*i+1] += sum2;
268bcc973b7SBarry Smith   }
269bcc973b7SBarry Smith 
270b0a32e0cSBarry Smith   PetscLogFlops(4*a->nz - 2*m);
2711ebc52fbSHong Zhang   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
2721ebc52fbSHong Zhang   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
273bcc973b7SBarry Smith   PetscFunctionReturn(0);
27482b1193eSBarry Smith }
2754a2ae208SSatish Balay #undef __FUNCT__
276b9617806SBarry Smith #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_2"
277dfbe8321SBarry Smith PetscErrorCode MatMultTransposeAdd_SeqMAIJ_2(Mat A,Vec xx,Vec yy,Vec zz)
27882b1193eSBarry Smith {
2794cb9d9b8SBarry Smith   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
280bcc973b7SBarry Smith   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
28187828ca2SBarry Smith   PetscScalar    *x,*y,*v,alpha1,alpha2;
282dfbe8321SBarry Smith   PetscErrorCode ierr;
283b24ad042SBarry Smith   PetscInt       m = b->AIJ->m,n,i,*idx;
28482b1193eSBarry Smith 
285bcc973b7SBarry Smith   PetscFunctionBegin;
286f9fae5adSBarry Smith   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
2871ebc52fbSHong Zhang   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
2881ebc52fbSHong Zhang   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
289bfec09a0SHong Zhang 
290bcc973b7SBarry Smith   for (i=0; i<m; i++) {
291bfec09a0SHong Zhang     idx   = a->j + a->i[i] ;
292bfec09a0SHong Zhang     v     = a->a + a->i[i] ;
293bcc973b7SBarry Smith     n     = a->i[i+1] - a->i[i];
294bcc973b7SBarry Smith     alpha1 = x[2*i];
295bcc973b7SBarry Smith     alpha2 = x[2*i+1];
296bcc973b7SBarry Smith     while (n-->0) {y[2*(*idx)] += alpha1*(*v); y[2*(*idx)+1] += alpha2*(*v); idx++; v++;}
297bcc973b7SBarry Smith   }
298b0a32e0cSBarry Smith   PetscLogFlops(4*a->nz - 2*b->AIJ->n);
2991ebc52fbSHong Zhang   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
3001ebc52fbSHong Zhang   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
301bcc973b7SBarry Smith   PetscFunctionReturn(0);
30282b1193eSBarry Smith }
303cd3bbe55SBarry Smith /* --------------------------------------------------------------------------------------*/
3044a2ae208SSatish Balay #undef __FUNCT__
305b9617806SBarry Smith #define __FUNCT__ "MatMult_SeqMAIJ_3"
306dfbe8321SBarry Smith PetscErrorCode MatMult_SeqMAIJ_3(Mat A,Vec xx,Vec yy)
307bcc973b7SBarry Smith {
3084cb9d9b8SBarry Smith   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
309bcc973b7SBarry Smith   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
31087828ca2SBarry Smith   PetscScalar    *x,*y,*v,sum1, sum2, sum3;
311dfbe8321SBarry Smith   PetscErrorCode ierr;
312b24ad042SBarry Smith   PetscInt       m = b->AIJ->m,*idx,*ii;
313b24ad042SBarry Smith   PetscInt       n,i,jrow,j;
31482b1193eSBarry Smith 
315bcc973b7SBarry Smith   PetscFunctionBegin;
3161ebc52fbSHong Zhang   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
3171ebc52fbSHong Zhang   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
318bcc973b7SBarry Smith   idx  = a->j;
319bcc973b7SBarry Smith   v    = a->a;
320bcc973b7SBarry Smith   ii   = a->i;
321bcc973b7SBarry Smith 
322bcc973b7SBarry Smith   for (i=0; i<m; i++) {
323bcc973b7SBarry Smith     jrow = ii[i];
324bcc973b7SBarry Smith     n    = ii[i+1] - jrow;
325bcc973b7SBarry Smith     sum1  = 0.0;
326bcc973b7SBarry Smith     sum2  = 0.0;
327bcc973b7SBarry Smith     sum3  = 0.0;
328bcc973b7SBarry Smith     for (j=0; j<n; j++) {
329bcc973b7SBarry Smith       sum1 += v[jrow]*x[3*idx[jrow]];
330bcc973b7SBarry Smith       sum2 += v[jrow]*x[3*idx[jrow]+1];
331bcc973b7SBarry Smith       sum3 += v[jrow]*x[3*idx[jrow]+2];
332bcc973b7SBarry Smith       jrow++;
333bcc973b7SBarry Smith      }
334bcc973b7SBarry Smith     y[3*i]   = sum1;
335bcc973b7SBarry Smith     y[3*i+1] = sum2;
336bcc973b7SBarry Smith     y[3*i+2] = sum3;
337bcc973b7SBarry Smith   }
338bcc973b7SBarry Smith 
339b0a32e0cSBarry Smith   PetscLogFlops(6*a->nz - 3*m);
3401ebc52fbSHong Zhang   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
3411ebc52fbSHong Zhang   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
342bcc973b7SBarry Smith   PetscFunctionReturn(0);
343bcc973b7SBarry Smith }
344bcc973b7SBarry Smith 
3454a2ae208SSatish Balay #undef __FUNCT__
346b9617806SBarry Smith #define __FUNCT__ "MatMultTranspose_SeqMAIJ_3"
347dfbe8321SBarry Smith PetscErrorCode MatMultTranspose_SeqMAIJ_3(Mat A,Vec xx,Vec yy)
348bcc973b7SBarry Smith {
3494cb9d9b8SBarry Smith   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
350bcc973b7SBarry Smith   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
35187828ca2SBarry Smith   PetscScalar    *x,*y,*v,alpha1,alpha2,alpha3,zero = 0.0;
352dfbe8321SBarry Smith   PetscErrorCode ierr;
353b24ad042SBarry Smith   PetscInt       m = b->AIJ->m,n,i,*idx;
354bcc973b7SBarry Smith 
355bcc973b7SBarry Smith   PetscFunctionBegin;
356bcc973b7SBarry Smith   ierr = VecSet(&zero,yy);CHKERRQ(ierr);
3571ebc52fbSHong Zhang   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
3581ebc52fbSHong Zhang   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
359bfec09a0SHong Zhang 
360bcc973b7SBarry Smith   for (i=0; i<m; i++) {
361bfec09a0SHong Zhang     idx    = a->j + a->i[i];
362bfec09a0SHong Zhang     v      = a->a + a->i[i];
363bcc973b7SBarry Smith     n      = a->i[i+1] - a->i[i];
364bcc973b7SBarry Smith     alpha1 = x[3*i];
365bcc973b7SBarry Smith     alpha2 = x[3*i+1];
366bcc973b7SBarry Smith     alpha3 = x[3*i+2];
367bcc973b7SBarry Smith     while (n-->0) {
368bcc973b7SBarry Smith       y[3*(*idx)]   += alpha1*(*v);
369bcc973b7SBarry Smith       y[3*(*idx)+1] += alpha2*(*v);
370bcc973b7SBarry Smith       y[3*(*idx)+2] += alpha3*(*v);
371bcc973b7SBarry Smith       idx++; v++;
372bcc973b7SBarry Smith     }
373bcc973b7SBarry Smith   }
374b0a32e0cSBarry Smith   PetscLogFlops(6*a->nz - 3*b->AIJ->n);
3751ebc52fbSHong Zhang   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
3761ebc52fbSHong Zhang   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
377bcc973b7SBarry Smith   PetscFunctionReturn(0);
378bcc973b7SBarry Smith }
379bcc973b7SBarry Smith 
3804a2ae208SSatish Balay #undef __FUNCT__
381b9617806SBarry Smith #define __FUNCT__ "MatMultAdd_SeqMAIJ_3"
382dfbe8321SBarry Smith PetscErrorCode MatMultAdd_SeqMAIJ_3(Mat A,Vec xx,Vec yy,Vec zz)
383bcc973b7SBarry Smith {
3844cb9d9b8SBarry Smith   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
385bcc973b7SBarry Smith   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
38687828ca2SBarry Smith   PetscScalar    *x,*y,*v,sum1, sum2, sum3;
387dfbe8321SBarry Smith   PetscErrorCode ierr;
388b24ad042SBarry Smith   PetscInt       m = b->AIJ->m,*idx,*ii;
389b24ad042SBarry Smith   PetscInt       n,i,jrow,j;
390bcc973b7SBarry Smith 
391bcc973b7SBarry Smith   PetscFunctionBegin;
392f9fae5adSBarry Smith   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
3931ebc52fbSHong Zhang   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
3941ebc52fbSHong Zhang   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
395bcc973b7SBarry Smith   idx  = a->j;
396bcc973b7SBarry Smith   v    = a->a;
397bcc973b7SBarry Smith   ii   = a->i;
398bcc973b7SBarry Smith 
399bcc973b7SBarry Smith   for (i=0; i<m; i++) {
400bcc973b7SBarry Smith     jrow = ii[i];
401bcc973b7SBarry Smith     n    = ii[i+1] - jrow;
402bcc973b7SBarry Smith     sum1  = 0.0;
403bcc973b7SBarry Smith     sum2  = 0.0;
404bcc973b7SBarry Smith     sum3  = 0.0;
405bcc973b7SBarry Smith     for (j=0; j<n; j++) {
406bcc973b7SBarry Smith       sum1 += v[jrow]*x[3*idx[jrow]];
407bcc973b7SBarry Smith       sum2 += v[jrow]*x[3*idx[jrow]+1];
408bcc973b7SBarry Smith       sum3 += v[jrow]*x[3*idx[jrow]+2];
409bcc973b7SBarry Smith       jrow++;
410bcc973b7SBarry Smith      }
411bcc973b7SBarry Smith     y[3*i]   += sum1;
412bcc973b7SBarry Smith     y[3*i+1] += sum2;
413bcc973b7SBarry Smith     y[3*i+2] += sum3;
414bcc973b7SBarry Smith   }
415bcc973b7SBarry Smith 
416b0a32e0cSBarry Smith   PetscLogFlops(6*a->nz);
4171ebc52fbSHong Zhang   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
4181ebc52fbSHong Zhang   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
419bcc973b7SBarry Smith   PetscFunctionReturn(0);
420bcc973b7SBarry Smith }
4214a2ae208SSatish Balay #undef __FUNCT__
422b9617806SBarry Smith #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_3"
423dfbe8321SBarry Smith PetscErrorCode MatMultTransposeAdd_SeqMAIJ_3(Mat A,Vec xx,Vec yy,Vec zz)
424bcc973b7SBarry Smith {
4254cb9d9b8SBarry Smith   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
426bcc973b7SBarry Smith   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
42787828ca2SBarry Smith   PetscScalar    *x,*y,*v,alpha1,alpha2,alpha3;
428dfbe8321SBarry Smith   PetscErrorCode ierr;
429b24ad042SBarry Smith   PetscInt       m = b->AIJ->m,n,i,*idx;
430bcc973b7SBarry Smith 
431bcc973b7SBarry Smith   PetscFunctionBegin;
432f9fae5adSBarry Smith   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
4331ebc52fbSHong Zhang   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
4341ebc52fbSHong Zhang   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
435bcc973b7SBarry Smith   for (i=0; i<m; i++) {
436bfec09a0SHong Zhang     idx    = a->j + a->i[i] ;
437bfec09a0SHong Zhang     v      = a->a + a->i[i] ;
438bcc973b7SBarry Smith     n      = a->i[i+1] - a->i[i];
439bcc973b7SBarry Smith     alpha1 = x[3*i];
440bcc973b7SBarry Smith     alpha2 = x[3*i+1];
441bcc973b7SBarry Smith     alpha3 = x[3*i+2];
442bcc973b7SBarry Smith     while (n-->0) {
443bcc973b7SBarry Smith       y[3*(*idx)]   += alpha1*(*v);
444bcc973b7SBarry Smith       y[3*(*idx)+1] += alpha2*(*v);
445bcc973b7SBarry Smith       y[3*(*idx)+2] += alpha3*(*v);
446bcc973b7SBarry Smith       idx++; v++;
447bcc973b7SBarry Smith     }
448bcc973b7SBarry Smith   }
449b0a32e0cSBarry Smith   PetscLogFlops(6*a->nz);
4501ebc52fbSHong Zhang   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
4511ebc52fbSHong Zhang   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
452bcc973b7SBarry Smith   PetscFunctionReturn(0);
453bcc973b7SBarry Smith }
454bcc973b7SBarry Smith 
455bcc973b7SBarry Smith /* ------------------------------------------------------------------------------*/
4564a2ae208SSatish Balay #undef __FUNCT__
457b9617806SBarry Smith #define __FUNCT__ "MatMult_SeqMAIJ_4"
458dfbe8321SBarry Smith PetscErrorCode MatMult_SeqMAIJ_4(Mat A,Vec xx,Vec yy)
459bcc973b7SBarry Smith {
4604cb9d9b8SBarry Smith   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
461bcc973b7SBarry Smith   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
46287828ca2SBarry Smith   PetscScalar    *x,*y,*v,sum1, sum2, sum3, sum4;
463dfbe8321SBarry Smith   PetscErrorCode ierr;
464b24ad042SBarry Smith   PetscInt       m = b->AIJ->m,*idx,*ii;
465b24ad042SBarry Smith   PetscInt       n,i,jrow,j;
466bcc973b7SBarry Smith 
467bcc973b7SBarry Smith   PetscFunctionBegin;
4681ebc52fbSHong Zhang   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
4691ebc52fbSHong Zhang   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
470bcc973b7SBarry Smith   idx  = a->j;
471bcc973b7SBarry Smith   v    = a->a;
472bcc973b7SBarry Smith   ii   = a->i;
473bcc973b7SBarry Smith 
474bcc973b7SBarry Smith   for (i=0; i<m; i++) {
475bcc973b7SBarry Smith     jrow = ii[i];
476bcc973b7SBarry Smith     n    = ii[i+1] - jrow;
477bcc973b7SBarry Smith     sum1  = 0.0;
478bcc973b7SBarry Smith     sum2  = 0.0;
479bcc973b7SBarry Smith     sum3  = 0.0;
480bcc973b7SBarry Smith     sum4  = 0.0;
481bcc973b7SBarry Smith     for (j=0; j<n; j++) {
482bcc973b7SBarry Smith       sum1 += v[jrow]*x[4*idx[jrow]];
483bcc973b7SBarry Smith       sum2 += v[jrow]*x[4*idx[jrow]+1];
484bcc973b7SBarry Smith       sum3 += v[jrow]*x[4*idx[jrow]+2];
485bcc973b7SBarry Smith       sum4 += v[jrow]*x[4*idx[jrow]+3];
486bcc973b7SBarry Smith       jrow++;
487bcc973b7SBarry Smith      }
488bcc973b7SBarry Smith     y[4*i]   = sum1;
489bcc973b7SBarry Smith     y[4*i+1] = sum2;
490bcc973b7SBarry Smith     y[4*i+2] = sum3;
491bcc973b7SBarry Smith     y[4*i+3] = sum4;
492bcc973b7SBarry Smith   }
493bcc973b7SBarry Smith 
494b0a32e0cSBarry Smith   PetscLogFlops(8*a->nz - 4*m);
4951ebc52fbSHong Zhang   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
4961ebc52fbSHong Zhang   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
497bcc973b7SBarry Smith   PetscFunctionReturn(0);
498bcc973b7SBarry Smith }
499bcc973b7SBarry Smith 
5004a2ae208SSatish Balay #undef __FUNCT__
501b9617806SBarry Smith #define __FUNCT__ "MatMultTranspose_SeqMAIJ_4"
502dfbe8321SBarry Smith PetscErrorCode MatMultTranspose_SeqMAIJ_4(Mat A,Vec xx,Vec yy)
503bcc973b7SBarry Smith {
5044cb9d9b8SBarry Smith   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
505bcc973b7SBarry Smith   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
50687828ca2SBarry Smith   PetscScalar    *x,*y,*v,alpha1,alpha2,alpha3,alpha4,zero = 0.0;
507dfbe8321SBarry Smith   PetscErrorCode ierr;
508b24ad042SBarry Smith   PetscInt       m = b->AIJ->m,n,i,*idx;
509bcc973b7SBarry Smith 
510bcc973b7SBarry Smith   PetscFunctionBegin;
511bcc973b7SBarry Smith   ierr = VecSet(&zero,yy);CHKERRQ(ierr);
5121ebc52fbSHong Zhang   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
5131ebc52fbSHong Zhang   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
514bcc973b7SBarry Smith   for (i=0; i<m; i++) {
515bfec09a0SHong Zhang     idx    = a->j + a->i[i] ;
516bfec09a0SHong Zhang     v      = a->a + a->i[i] ;
517bcc973b7SBarry Smith     n      = a->i[i+1] - a->i[i];
518bcc973b7SBarry Smith     alpha1 = x[4*i];
519bcc973b7SBarry Smith     alpha2 = x[4*i+1];
520bcc973b7SBarry Smith     alpha3 = x[4*i+2];
521bcc973b7SBarry Smith     alpha4 = x[4*i+3];
522bcc973b7SBarry Smith     while (n-->0) {
523bcc973b7SBarry Smith       y[4*(*idx)]   += alpha1*(*v);
524bcc973b7SBarry Smith       y[4*(*idx)+1] += alpha2*(*v);
525bcc973b7SBarry Smith       y[4*(*idx)+2] += alpha3*(*v);
526bcc973b7SBarry Smith       y[4*(*idx)+3] += alpha4*(*v);
527bcc973b7SBarry Smith       idx++; v++;
528bcc973b7SBarry Smith     }
529bcc973b7SBarry Smith   }
530b0a32e0cSBarry Smith   PetscLogFlops(8*a->nz - 4*b->AIJ->n);
5311ebc52fbSHong Zhang   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
5321ebc52fbSHong Zhang   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
533bcc973b7SBarry Smith   PetscFunctionReturn(0);
534bcc973b7SBarry Smith }
535bcc973b7SBarry Smith 
5364a2ae208SSatish Balay #undef __FUNCT__
537b9617806SBarry Smith #define __FUNCT__ "MatMultAdd_SeqMAIJ_4"
538dfbe8321SBarry Smith PetscErrorCode MatMultAdd_SeqMAIJ_4(Mat A,Vec xx,Vec yy,Vec zz)
539bcc973b7SBarry Smith {
5404cb9d9b8SBarry Smith   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
541f9fae5adSBarry Smith   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
54287828ca2SBarry Smith   PetscScalar    *x,*y,*v,sum1, sum2, sum3, sum4;
543dfbe8321SBarry Smith   PetscErrorCode ierr;
544b24ad042SBarry Smith   PetscInt       m = b->AIJ->m,*idx,*ii;
545b24ad042SBarry Smith   PetscInt       n,i,jrow,j;
546f9fae5adSBarry Smith 
547f9fae5adSBarry Smith   PetscFunctionBegin;
548f9fae5adSBarry Smith   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
5491ebc52fbSHong Zhang   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
5501ebc52fbSHong Zhang   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
551f9fae5adSBarry Smith   idx  = a->j;
552f9fae5adSBarry Smith   v    = a->a;
553f9fae5adSBarry Smith   ii   = a->i;
554f9fae5adSBarry Smith 
555f9fae5adSBarry Smith   for (i=0; i<m; i++) {
556f9fae5adSBarry Smith     jrow = ii[i];
557f9fae5adSBarry Smith     n    = ii[i+1] - jrow;
558f9fae5adSBarry Smith     sum1  = 0.0;
559f9fae5adSBarry Smith     sum2  = 0.0;
560f9fae5adSBarry Smith     sum3  = 0.0;
561f9fae5adSBarry Smith     sum4  = 0.0;
562f9fae5adSBarry Smith     for (j=0; j<n; j++) {
563f9fae5adSBarry Smith       sum1 += v[jrow]*x[4*idx[jrow]];
564f9fae5adSBarry Smith       sum2 += v[jrow]*x[4*idx[jrow]+1];
565f9fae5adSBarry Smith       sum3 += v[jrow]*x[4*idx[jrow]+2];
566f9fae5adSBarry Smith       sum4 += v[jrow]*x[4*idx[jrow]+3];
567f9fae5adSBarry Smith       jrow++;
568f9fae5adSBarry Smith      }
569f9fae5adSBarry Smith     y[4*i]   += sum1;
570f9fae5adSBarry Smith     y[4*i+1] += sum2;
571f9fae5adSBarry Smith     y[4*i+2] += sum3;
572f9fae5adSBarry Smith     y[4*i+3] += sum4;
573f9fae5adSBarry Smith   }
574f9fae5adSBarry Smith 
575b0a32e0cSBarry Smith   PetscLogFlops(8*a->nz - 4*m);
5761ebc52fbSHong Zhang   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
5771ebc52fbSHong Zhang   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
578f9fae5adSBarry Smith   PetscFunctionReturn(0);
579bcc973b7SBarry Smith }
5804a2ae208SSatish Balay #undef __FUNCT__
581b9617806SBarry Smith #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_4"
582dfbe8321SBarry Smith PetscErrorCode MatMultTransposeAdd_SeqMAIJ_4(Mat A,Vec xx,Vec yy,Vec zz)
583bcc973b7SBarry Smith {
5844cb9d9b8SBarry Smith   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
585f9fae5adSBarry Smith   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
58687828ca2SBarry Smith   PetscScalar    *x,*y,*v,alpha1,alpha2,alpha3,alpha4;
587dfbe8321SBarry Smith   PetscErrorCode ierr;
588b24ad042SBarry Smith   PetscInt       m = b->AIJ->m,n,i,*idx;
589f9fae5adSBarry Smith 
590f9fae5adSBarry Smith   PetscFunctionBegin;
591f9fae5adSBarry Smith   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
5921ebc52fbSHong Zhang   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
5931ebc52fbSHong Zhang   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
594bfec09a0SHong Zhang 
595f9fae5adSBarry Smith   for (i=0; i<m; i++) {
596bfec09a0SHong Zhang     idx    = a->j + a->i[i] ;
597bfec09a0SHong Zhang     v      = a->a + a->i[i] ;
598f9fae5adSBarry Smith     n      = a->i[i+1] - a->i[i];
599f9fae5adSBarry Smith     alpha1 = x[4*i];
600f9fae5adSBarry Smith     alpha2 = x[4*i+1];
601f9fae5adSBarry Smith     alpha3 = x[4*i+2];
602f9fae5adSBarry Smith     alpha4 = x[4*i+3];
603f9fae5adSBarry Smith     while (n-->0) {
604f9fae5adSBarry Smith       y[4*(*idx)]   += alpha1*(*v);
605f9fae5adSBarry Smith       y[4*(*idx)+1] += alpha2*(*v);
606f9fae5adSBarry Smith       y[4*(*idx)+2] += alpha3*(*v);
607f9fae5adSBarry Smith       y[4*(*idx)+3] += alpha4*(*v);
608f9fae5adSBarry Smith       idx++; v++;
609f9fae5adSBarry Smith     }
610f9fae5adSBarry Smith   }
611b0a32e0cSBarry Smith   PetscLogFlops(8*a->nz - 4*b->AIJ->n);
6121ebc52fbSHong Zhang   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
6131ebc52fbSHong Zhang   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
614f9fae5adSBarry Smith   PetscFunctionReturn(0);
615f9fae5adSBarry Smith }
616f9fae5adSBarry Smith /* ------------------------------------------------------------------------------*/
6176cd98798SBarry Smith 
6184a2ae208SSatish Balay #undef __FUNCT__
619b9617806SBarry Smith #define __FUNCT__ "MatMult_SeqMAIJ_5"
620dfbe8321SBarry Smith PetscErrorCode MatMult_SeqMAIJ_5(Mat A,Vec xx,Vec yy)
621f9fae5adSBarry Smith {
6224cb9d9b8SBarry Smith   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
623f9fae5adSBarry Smith   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
62487828ca2SBarry Smith   PetscScalar    *x,*y,*v,sum1, sum2, sum3, sum4, sum5;
625dfbe8321SBarry Smith   PetscErrorCode ierr;
626b24ad042SBarry Smith   PetscInt       m = b->AIJ->m,*idx,*ii;
627b24ad042SBarry Smith   PetscInt       n,i,jrow,j;
628f9fae5adSBarry Smith 
629f9fae5adSBarry Smith   PetscFunctionBegin;
6301ebc52fbSHong Zhang   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
6311ebc52fbSHong Zhang   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
632f9fae5adSBarry Smith   idx  = a->j;
633f9fae5adSBarry Smith   v    = a->a;
634f9fae5adSBarry Smith   ii   = a->i;
635f9fae5adSBarry Smith 
636f9fae5adSBarry Smith   for (i=0; i<m; i++) {
637f9fae5adSBarry Smith     jrow = ii[i];
638f9fae5adSBarry Smith     n    = ii[i+1] - jrow;
639f9fae5adSBarry Smith     sum1  = 0.0;
640f9fae5adSBarry Smith     sum2  = 0.0;
641f9fae5adSBarry Smith     sum3  = 0.0;
642f9fae5adSBarry Smith     sum4  = 0.0;
643f9fae5adSBarry Smith     sum5  = 0.0;
644f9fae5adSBarry Smith     for (j=0; j<n; j++) {
645f9fae5adSBarry Smith       sum1 += v[jrow]*x[5*idx[jrow]];
646f9fae5adSBarry Smith       sum2 += v[jrow]*x[5*idx[jrow]+1];
647f9fae5adSBarry Smith       sum3 += v[jrow]*x[5*idx[jrow]+2];
648f9fae5adSBarry Smith       sum4 += v[jrow]*x[5*idx[jrow]+3];
649f9fae5adSBarry Smith       sum5 += v[jrow]*x[5*idx[jrow]+4];
650f9fae5adSBarry Smith       jrow++;
651f9fae5adSBarry Smith      }
652f9fae5adSBarry Smith     y[5*i]   = sum1;
653f9fae5adSBarry Smith     y[5*i+1] = sum2;
654f9fae5adSBarry Smith     y[5*i+2] = sum3;
655f9fae5adSBarry Smith     y[5*i+3] = sum4;
656f9fae5adSBarry Smith     y[5*i+4] = sum5;
657f9fae5adSBarry Smith   }
658f9fae5adSBarry Smith 
659b0a32e0cSBarry Smith   PetscLogFlops(10*a->nz - 5*m);
6601ebc52fbSHong Zhang   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
6611ebc52fbSHong Zhang   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
662f9fae5adSBarry Smith   PetscFunctionReturn(0);
663f9fae5adSBarry Smith }
664f9fae5adSBarry Smith 
6654a2ae208SSatish Balay #undef __FUNCT__
666b9617806SBarry Smith #define __FUNCT__ "MatMultTranspose_SeqMAIJ_5"
667dfbe8321SBarry Smith PetscErrorCode MatMultTranspose_SeqMAIJ_5(Mat A,Vec xx,Vec yy)
668f9fae5adSBarry Smith {
6694cb9d9b8SBarry Smith   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
670f9fae5adSBarry Smith   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
67187828ca2SBarry Smith   PetscScalar    *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,zero = 0.0;
672dfbe8321SBarry Smith   PetscErrorCode ierr;
673b24ad042SBarry Smith   PetscInt       m = b->AIJ->m,n,i,*idx;
674f9fae5adSBarry Smith 
675f9fae5adSBarry Smith   PetscFunctionBegin;
676f9fae5adSBarry Smith   ierr = VecSet(&zero,yy);CHKERRQ(ierr);
6771ebc52fbSHong Zhang   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
6781ebc52fbSHong Zhang   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
679bfec09a0SHong Zhang 
680f9fae5adSBarry Smith   for (i=0; i<m; i++) {
681bfec09a0SHong Zhang     idx    = a->j + a->i[i] ;
682bfec09a0SHong Zhang     v      = a->a + a->i[i] ;
683f9fae5adSBarry Smith     n      = a->i[i+1] - a->i[i];
684f9fae5adSBarry Smith     alpha1 = x[5*i];
685f9fae5adSBarry Smith     alpha2 = x[5*i+1];
686f9fae5adSBarry Smith     alpha3 = x[5*i+2];
687f9fae5adSBarry Smith     alpha4 = x[5*i+3];
688f9fae5adSBarry Smith     alpha5 = x[5*i+4];
689f9fae5adSBarry Smith     while (n-->0) {
690f9fae5adSBarry Smith       y[5*(*idx)]   += alpha1*(*v);
691f9fae5adSBarry Smith       y[5*(*idx)+1] += alpha2*(*v);
692f9fae5adSBarry Smith       y[5*(*idx)+2] += alpha3*(*v);
693f9fae5adSBarry Smith       y[5*(*idx)+3] += alpha4*(*v);
694f9fae5adSBarry Smith       y[5*(*idx)+4] += alpha5*(*v);
695f9fae5adSBarry Smith       idx++; v++;
696f9fae5adSBarry Smith     }
697f9fae5adSBarry Smith   }
698b0a32e0cSBarry Smith   PetscLogFlops(10*a->nz - 5*b->AIJ->n);
6991ebc52fbSHong Zhang   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
7001ebc52fbSHong Zhang   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
701f9fae5adSBarry Smith   PetscFunctionReturn(0);
702f9fae5adSBarry Smith }
703f9fae5adSBarry Smith 
7044a2ae208SSatish Balay #undef __FUNCT__
705b9617806SBarry Smith #define __FUNCT__ "MatMultAdd_SeqMAIJ_5"
706dfbe8321SBarry Smith PetscErrorCode MatMultAdd_SeqMAIJ_5(Mat A,Vec xx,Vec yy,Vec zz)
707f9fae5adSBarry Smith {
7084cb9d9b8SBarry Smith   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
709f9fae5adSBarry Smith   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
71087828ca2SBarry Smith   PetscScalar    *x,*y,*v,sum1, sum2, sum3, sum4, sum5;
711dfbe8321SBarry Smith   PetscErrorCode ierr;
712b24ad042SBarry Smith   PetscInt       m = b->AIJ->m,*idx,*ii;
713b24ad042SBarry Smith   PetscInt       n,i,jrow,j;
714f9fae5adSBarry Smith 
715f9fae5adSBarry Smith   PetscFunctionBegin;
716f9fae5adSBarry Smith   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
7171ebc52fbSHong Zhang   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
7181ebc52fbSHong Zhang   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
719f9fae5adSBarry Smith   idx  = a->j;
720f9fae5adSBarry Smith   v    = a->a;
721f9fae5adSBarry Smith   ii   = a->i;
722f9fae5adSBarry Smith 
723f9fae5adSBarry Smith   for (i=0; i<m; i++) {
724f9fae5adSBarry Smith     jrow = ii[i];
725f9fae5adSBarry Smith     n    = ii[i+1] - jrow;
726f9fae5adSBarry Smith     sum1  = 0.0;
727f9fae5adSBarry Smith     sum2  = 0.0;
728f9fae5adSBarry Smith     sum3  = 0.0;
729f9fae5adSBarry Smith     sum4  = 0.0;
730f9fae5adSBarry Smith     sum5  = 0.0;
731f9fae5adSBarry Smith     for (j=0; j<n; j++) {
732f9fae5adSBarry Smith       sum1 += v[jrow]*x[5*idx[jrow]];
733f9fae5adSBarry Smith       sum2 += v[jrow]*x[5*idx[jrow]+1];
734f9fae5adSBarry Smith       sum3 += v[jrow]*x[5*idx[jrow]+2];
735f9fae5adSBarry Smith       sum4 += v[jrow]*x[5*idx[jrow]+3];
736f9fae5adSBarry Smith       sum5 += v[jrow]*x[5*idx[jrow]+4];
737f9fae5adSBarry Smith       jrow++;
738f9fae5adSBarry Smith      }
739f9fae5adSBarry Smith     y[5*i]   += sum1;
740f9fae5adSBarry Smith     y[5*i+1] += sum2;
741f9fae5adSBarry Smith     y[5*i+2] += sum3;
742f9fae5adSBarry Smith     y[5*i+3] += sum4;
743f9fae5adSBarry Smith     y[5*i+4] += sum5;
744f9fae5adSBarry Smith   }
745f9fae5adSBarry Smith 
746b0a32e0cSBarry Smith   PetscLogFlops(10*a->nz);
7471ebc52fbSHong Zhang   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
7481ebc52fbSHong Zhang   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
749f9fae5adSBarry Smith   PetscFunctionReturn(0);
750f9fae5adSBarry Smith }
751f9fae5adSBarry Smith 
7524a2ae208SSatish Balay #undef __FUNCT__
753b9617806SBarry Smith #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_5"
754dfbe8321SBarry Smith PetscErrorCode MatMultTransposeAdd_SeqMAIJ_5(Mat A,Vec xx,Vec yy,Vec zz)
755f9fae5adSBarry Smith {
7564cb9d9b8SBarry Smith   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
757f9fae5adSBarry Smith   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
75887828ca2SBarry Smith   PetscScalar    *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5;
759dfbe8321SBarry Smith   PetscErrorCode ierr;
760b24ad042SBarry Smith   PetscInt       m = b->AIJ->m,n,i,*idx;
761f9fae5adSBarry Smith 
762f9fae5adSBarry Smith   PetscFunctionBegin;
763f9fae5adSBarry Smith   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
7641ebc52fbSHong Zhang   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
7651ebc52fbSHong Zhang   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
766bfec09a0SHong Zhang 
767f9fae5adSBarry Smith   for (i=0; i<m; i++) {
768bfec09a0SHong Zhang     idx    = a->j + a->i[i] ;
769bfec09a0SHong Zhang     v      = a->a + a->i[i] ;
770f9fae5adSBarry Smith     n      = a->i[i+1] - a->i[i];
771f9fae5adSBarry Smith     alpha1 = x[5*i];
772f9fae5adSBarry Smith     alpha2 = x[5*i+1];
773f9fae5adSBarry Smith     alpha3 = x[5*i+2];
774f9fae5adSBarry Smith     alpha4 = x[5*i+3];
775f9fae5adSBarry Smith     alpha5 = x[5*i+4];
776f9fae5adSBarry Smith     while (n-->0) {
777f9fae5adSBarry Smith       y[5*(*idx)]   += alpha1*(*v);
778f9fae5adSBarry Smith       y[5*(*idx)+1] += alpha2*(*v);
779f9fae5adSBarry Smith       y[5*(*idx)+2] += alpha3*(*v);
780f9fae5adSBarry Smith       y[5*(*idx)+3] += alpha4*(*v);
781f9fae5adSBarry Smith       y[5*(*idx)+4] += alpha5*(*v);
782f9fae5adSBarry Smith       idx++; v++;
783f9fae5adSBarry Smith     }
784f9fae5adSBarry Smith   }
785b0a32e0cSBarry Smith   PetscLogFlops(10*a->nz);
7861ebc52fbSHong Zhang   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
7871ebc52fbSHong Zhang   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
788f9fae5adSBarry Smith   PetscFunctionReturn(0);
789bcc973b7SBarry Smith }
790bcc973b7SBarry Smith 
7916cd98798SBarry Smith /* ------------------------------------------------------------------------------*/
7924a2ae208SSatish Balay #undef __FUNCT__
793b9617806SBarry Smith #define __FUNCT__ "MatMult_SeqMAIJ_6"
794dfbe8321SBarry Smith PetscErrorCode MatMult_SeqMAIJ_6(Mat A,Vec xx,Vec yy)
7956cd98798SBarry Smith {
7966cd98798SBarry Smith   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
7976cd98798SBarry Smith   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
79887828ca2SBarry Smith   PetscScalar    *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6;
799dfbe8321SBarry Smith   PetscErrorCode ierr;
800b24ad042SBarry Smith   PetscInt       m = b->AIJ->m,*idx,*ii;
801b24ad042SBarry Smith   PetscInt       n,i,jrow,j;
8026cd98798SBarry Smith 
8036cd98798SBarry Smith   PetscFunctionBegin;
8041ebc52fbSHong Zhang   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
8051ebc52fbSHong Zhang   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
8066cd98798SBarry Smith   idx  = a->j;
8076cd98798SBarry Smith   v    = a->a;
8086cd98798SBarry Smith   ii   = a->i;
8096cd98798SBarry Smith 
8106cd98798SBarry Smith   for (i=0; i<m; i++) {
8116cd98798SBarry Smith     jrow = ii[i];
8126cd98798SBarry Smith     n    = ii[i+1] - jrow;
8136cd98798SBarry Smith     sum1  = 0.0;
8146cd98798SBarry Smith     sum2  = 0.0;
8156cd98798SBarry Smith     sum3  = 0.0;
8166cd98798SBarry Smith     sum4  = 0.0;
8176cd98798SBarry Smith     sum5  = 0.0;
8186cd98798SBarry Smith     sum6  = 0.0;
8196cd98798SBarry Smith     for (j=0; j<n; j++) {
8206cd98798SBarry Smith       sum1 += v[jrow]*x[6*idx[jrow]];
8216cd98798SBarry Smith       sum2 += v[jrow]*x[6*idx[jrow]+1];
8226cd98798SBarry Smith       sum3 += v[jrow]*x[6*idx[jrow]+2];
8236cd98798SBarry Smith       sum4 += v[jrow]*x[6*idx[jrow]+3];
8246cd98798SBarry Smith       sum5 += v[jrow]*x[6*idx[jrow]+4];
8256cd98798SBarry Smith       sum6 += v[jrow]*x[6*idx[jrow]+5];
8266cd98798SBarry Smith       jrow++;
8276cd98798SBarry Smith      }
8286cd98798SBarry Smith     y[6*i]   = sum1;
8296cd98798SBarry Smith     y[6*i+1] = sum2;
8306cd98798SBarry Smith     y[6*i+2] = sum3;
8316cd98798SBarry Smith     y[6*i+3] = sum4;
8326cd98798SBarry Smith     y[6*i+4] = sum5;
8336cd98798SBarry Smith     y[6*i+5] = sum6;
8346cd98798SBarry Smith   }
8356cd98798SBarry Smith 
836b0a32e0cSBarry Smith   PetscLogFlops(12*a->nz - 6*m);
8371ebc52fbSHong Zhang   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
8381ebc52fbSHong Zhang   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
8396cd98798SBarry Smith   PetscFunctionReturn(0);
8406cd98798SBarry Smith }
8416cd98798SBarry Smith 
8424a2ae208SSatish Balay #undef __FUNCT__
843b9617806SBarry Smith #define __FUNCT__ "MatMultTranspose_SeqMAIJ_6"
844dfbe8321SBarry Smith PetscErrorCode MatMultTranspose_SeqMAIJ_6(Mat A,Vec xx,Vec yy)
8456cd98798SBarry Smith {
8466cd98798SBarry Smith   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
8476cd98798SBarry Smith   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
84887828ca2SBarry Smith   PetscScalar    *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,zero = 0.0;
849dfbe8321SBarry Smith   PetscErrorCode ierr;
850b24ad042SBarry Smith   PetscInt       m = b->AIJ->m,n,i,*idx;
8516cd98798SBarry Smith 
8526cd98798SBarry Smith   PetscFunctionBegin;
8536cd98798SBarry Smith   ierr = VecSet(&zero,yy);CHKERRQ(ierr);
8541ebc52fbSHong Zhang   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
8551ebc52fbSHong Zhang   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
856bfec09a0SHong Zhang 
8576cd98798SBarry Smith   for (i=0; i<m; i++) {
858bfec09a0SHong Zhang     idx    = a->j + a->i[i] ;
859bfec09a0SHong Zhang     v      = a->a + a->i[i] ;
8606cd98798SBarry Smith     n      = a->i[i+1] - a->i[i];
8616cd98798SBarry Smith     alpha1 = x[6*i];
8626cd98798SBarry Smith     alpha2 = x[6*i+1];
8636cd98798SBarry Smith     alpha3 = x[6*i+2];
8646cd98798SBarry Smith     alpha4 = x[6*i+3];
8656cd98798SBarry Smith     alpha5 = x[6*i+4];
8666cd98798SBarry Smith     alpha6 = x[6*i+5];
8676cd98798SBarry Smith     while (n-->0) {
8686cd98798SBarry Smith       y[6*(*idx)]   += alpha1*(*v);
8696cd98798SBarry Smith       y[6*(*idx)+1] += alpha2*(*v);
8706cd98798SBarry Smith       y[6*(*idx)+2] += alpha3*(*v);
8716cd98798SBarry Smith       y[6*(*idx)+3] += alpha4*(*v);
8726cd98798SBarry Smith       y[6*(*idx)+4] += alpha5*(*v);
8736cd98798SBarry Smith       y[6*(*idx)+5] += alpha6*(*v);
8746cd98798SBarry Smith       idx++; v++;
8756cd98798SBarry Smith     }
8766cd98798SBarry Smith   }
877b0a32e0cSBarry Smith   PetscLogFlops(12*a->nz - 6*b->AIJ->n);
8781ebc52fbSHong Zhang   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
8791ebc52fbSHong Zhang   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
8806cd98798SBarry Smith   PetscFunctionReturn(0);
8816cd98798SBarry Smith }
8826cd98798SBarry Smith 
8834a2ae208SSatish Balay #undef __FUNCT__
884b9617806SBarry Smith #define __FUNCT__ "MatMultAdd_SeqMAIJ_6"
885dfbe8321SBarry Smith PetscErrorCode MatMultAdd_SeqMAIJ_6(Mat A,Vec xx,Vec yy,Vec zz)
8866cd98798SBarry Smith {
8876cd98798SBarry Smith   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
8886cd98798SBarry Smith   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
88987828ca2SBarry Smith   PetscScalar    *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6;
890dfbe8321SBarry Smith   PetscErrorCode ierr;
891b24ad042SBarry Smith   PetscInt       m = b->AIJ->m,*idx,*ii;
892b24ad042SBarry Smith   PetscInt       n,i,jrow,j;
8936cd98798SBarry Smith 
8946cd98798SBarry Smith   PetscFunctionBegin;
8956cd98798SBarry Smith   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
8961ebc52fbSHong Zhang   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
8971ebc52fbSHong Zhang   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
8986cd98798SBarry Smith   idx  = a->j;
8996cd98798SBarry Smith   v    = a->a;
9006cd98798SBarry Smith   ii   = a->i;
9016cd98798SBarry Smith 
9026cd98798SBarry Smith   for (i=0; i<m; i++) {
9036cd98798SBarry Smith     jrow = ii[i];
9046cd98798SBarry Smith     n    = ii[i+1] - jrow;
9056cd98798SBarry Smith     sum1  = 0.0;
9066cd98798SBarry Smith     sum2  = 0.0;
9076cd98798SBarry Smith     sum3  = 0.0;
9086cd98798SBarry Smith     sum4  = 0.0;
9096cd98798SBarry Smith     sum5  = 0.0;
9106cd98798SBarry Smith     sum6  = 0.0;
9116cd98798SBarry Smith     for (j=0; j<n; j++) {
9126cd98798SBarry Smith       sum1 += v[jrow]*x[6*idx[jrow]];
9136cd98798SBarry Smith       sum2 += v[jrow]*x[6*idx[jrow]+1];
9146cd98798SBarry Smith       sum3 += v[jrow]*x[6*idx[jrow]+2];
9156cd98798SBarry Smith       sum4 += v[jrow]*x[6*idx[jrow]+3];
9166cd98798SBarry Smith       sum5 += v[jrow]*x[6*idx[jrow]+4];
9176cd98798SBarry Smith       sum6 += v[jrow]*x[6*idx[jrow]+5];
9186cd98798SBarry Smith       jrow++;
9196cd98798SBarry Smith      }
9206cd98798SBarry Smith     y[6*i]   += sum1;
9216cd98798SBarry Smith     y[6*i+1] += sum2;
9226cd98798SBarry Smith     y[6*i+2] += sum3;
9236cd98798SBarry Smith     y[6*i+3] += sum4;
9246cd98798SBarry Smith     y[6*i+4] += sum5;
9256cd98798SBarry Smith     y[6*i+5] += sum6;
9266cd98798SBarry Smith   }
9276cd98798SBarry Smith 
928b0a32e0cSBarry Smith   PetscLogFlops(12*a->nz);
9291ebc52fbSHong Zhang   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
9301ebc52fbSHong Zhang   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
9316cd98798SBarry Smith   PetscFunctionReturn(0);
9326cd98798SBarry Smith }
9336cd98798SBarry Smith 
9344a2ae208SSatish Balay #undef __FUNCT__
935b9617806SBarry Smith #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_6"
936dfbe8321SBarry Smith PetscErrorCode MatMultTransposeAdd_SeqMAIJ_6(Mat A,Vec xx,Vec yy,Vec zz)
9376cd98798SBarry Smith {
9386cd98798SBarry Smith   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
9396cd98798SBarry Smith   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
94087828ca2SBarry Smith   PetscScalar    *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6;
941dfbe8321SBarry Smith   PetscErrorCode ierr;
942b24ad042SBarry Smith   PetscInt       m = b->AIJ->m,n,i,*idx;
9436cd98798SBarry Smith 
9446cd98798SBarry Smith   PetscFunctionBegin;
9456cd98798SBarry Smith   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
9461ebc52fbSHong Zhang   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
9471ebc52fbSHong Zhang   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
948bfec09a0SHong Zhang 
9496cd98798SBarry Smith   for (i=0; i<m; i++) {
950bfec09a0SHong Zhang     idx    = a->j + a->i[i] ;
951bfec09a0SHong Zhang     v      = a->a + a->i[i] ;
9526cd98798SBarry Smith     n      = a->i[i+1] - a->i[i];
9536cd98798SBarry Smith     alpha1 = x[6*i];
9546cd98798SBarry Smith     alpha2 = x[6*i+1];
9556cd98798SBarry Smith     alpha3 = x[6*i+2];
9566cd98798SBarry Smith     alpha4 = x[6*i+3];
9576cd98798SBarry Smith     alpha5 = x[6*i+4];
9586cd98798SBarry Smith     alpha6 = x[6*i+5];
9596cd98798SBarry Smith     while (n-->0) {
9606cd98798SBarry Smith       y[6*(*idx)]   += alpha1*(*v);
9616cd98798SBarry Smith       y[6*(*idx)+1] += alpha2*(*v);
9626cd98798SBarry Smith       y[6*(*idx)+2] += alpha3*(*v);
9636cd98798SBarry Smith       y[6*(*idx)+3] += alpha4*(*v);
9646cd98798SBarry Smith       y[6*(*idx)+4] += alpha5*(*v);
9656cd98798SBarry Smith       y[6*(*idx)+5] += alpha6*(*v);
9666cd98798SBarry Smith       idx++; v++;
9676cd98798SBarry Smith     }
9686cd98798SBarry Smith   }
969b0a32e0cSBarry Smith   PetscLogFlops(12*a->nz);
9701ebc52fbSHong Zhang   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
9711ebc52fbSHong Zhang   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
9726cd98798SBarry Smith   PetscFunctionReturn(0);
9736cd98798SBarry Smith }
9746cd98798SBarry Smith 
97566ed3db0SBarry Smith /* ------------------------------------------------------------------------------*/
97666ed3db0SBarry Smith #undef __FUNCT__
977*ed8eea03SBarry Smith #define __FUNCT__ "MatMult_SeqMAIJ_7"
978*ed8eea03SBarry Smith PetscErrorCode MatMult_SeqMAIJ_7(Mat A,Vec xx,Vec yy)
979*ed8eea03SBarry Smith {
980*ed8eea03SBarry Smith   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
981*ed8eea03SBarry Smith   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
982*ed8eea03SBarry Smith   PetscScalar    *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7;
983*ed8eea03SBarry Smith   PetscErrorCode ierr;
984*ed8eea03SBarry Smith   PetscInt       m = b->AIJ->m,*idx,*ii;
985*ed8eea03SBarry Smith   PetscInt       n,i,jrow,j;
986*ed8eea03SBarry Smith 
987*ed8eea03SBarry Smith   PetscFunctionBegin;
988*ed8eea03SBarry Smith   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
989*ed8eea03SBarry Smith   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
990*ed8eea03SBarry Smith   idx  = a->j;
991*ed8eea03SBarry Smith   v    = a->a;
992*ed8eea03SBarry Smith   ii   = a->i;
993*ed8eea03SBarry Smith 
994*ed8eea03SBarry Smith   for (i=0; i<m; i++) {
995*ed8eea03SBarry Smith     jrow = ii[i];
996*ed8eea03SBarry Smith     n    = ii[i+1] - jrow;
997*ed8eea03SBarry Smith     sum1  = 0.0;
998*ed8eea03SBarry Smith     sum2  = 0.0;
999*ed8eea03SBarry Smith     sum3  = 0.0;
1000*ed8eea03SBarry Smith     sum4  = 0.0;
1001*ed8eea03SBarry Smith     sum5  = 0.0;
1002*ed8eea03SBarry Smith     sum6  = 0.0;
1003*ed8eea03SBarry Smith     sum7  = 0.0;
1004*ed8eea03SBarry Smith     for (j=0; j<n; j++) {
1005*ed8eea03SBarry Smith       sum1 += v[jrow]*x[7*idx[jrow]];
1006*ed8eea03SBarry Smith       sum2 += v[jrow]*x[7*idx[jrow]+1];
1007*ed8eea03SBarry Smith       sum3 += v[jrow]*x[7*idx[jrow]+2];
1008*ed8eea03SBarry Smith       sum4 += v[jrow]*x[7*idx[jrow]+3];
1009*ed8eea03SBarry Smith       sum5 += v[jrow]*x[7*idx[jrow]+4];
1010*ed8eea03SBarry Smith       sum6 += v[jrow]*x[7*idx[jrow]+5];
1011*ed8eea03SBarry Smith       sum7 += v[jrow]*x[7*idx[jrow]+6];
1012*ed8eea03SBarry Smith       jrow++;
1013*ed8eea03SBarry Smith      }
1014*ed8eea03SBarry Smith     y[7*i]   = sum1;
1015*ed8eea03SBarry Smith     y[7*i+1] = sum2;
1016*ed8eea03SBarry Smith     y[7*i+2] = sum3;
1017*ed8eea03SBarry Smith     y[7*i+3] = sum4;
1018*ed8eea03SBarry Smith     y[7*i+4] = sum5;
1019*ed8eea03SBarry Smith     y[7*i+5] = sum6;
1020*ed8eea03SBarry Smith     y[7*i+6] = sum7;
1021*ed8eea03SBarry Smith   }
1022*ed8eea03SBarry Smith 
1023*ed8eea03SBarry Smith   PetscLogFlops(14*a->nz - 7*m);
1024*ed8eea03SBarry Smith   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1025*ed8eea03SBarry Smith   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
1026*ed8eea03SBarry Smith   PetscFunctionReturn(0);
1027*ed8eea03SBarry Smith }
1028*ed8eea03SBarry Smith 
1029*ed8eea03SBarry Smith #undef __FUNCT__
1030*ed8eea03SBarry Smith #define __FUNCT__ "MatMultTranspose_SeqMAIJ_7"
1031*ed8eea03SBarry Smith PetscErrorCode MatMultTranspose_SeqMAIJ_7(Mat A,Vec xx,Vec yy)
1032*ed8eea03SBarry Smith {
1033*ed8eea03SBarry Smith   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
1034*ed8eea03SBarry Smith   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
1035*ed8eea03SBarry Smith   PetscScalar    *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,zero = 0.0;
1036*ed8eea03SBarry Smith   PetscErrorCode ierr;
1037*ed8eea03SBarry Smith   PetscInt       m = b->AIJ->m,n,i,*idx;
1038*ed8eea03SBarry Smith 
1039*ed8eea03SBarry Smith   PetscFunctionBegin;
1040*ed8eea03SBarry Smith   ierr = VecSet(&zero,yy);CHKERRQ(ierr);
1041*ed8eea03SBarry Smith   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1042*ed8eea03SBarry Smith   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
1043*ed8eea03SBarry Smith 
1044*ed8eea03SBarry Smith   for (i=0; i<m; i++) {
1045*ed8eea03SBarry Smith     idx    = a->j + a->i[i] ;
1046*ed8eea03SBarry Smith     v      = a->a + a->i[i] ;
1047*ed8eea03SBarry Smith     n      = a->i[i+1] - a->i[i];
1048*ed8eea03SBarry Smith     alpha1 = x[7*i];
1049*ed8eea03SBarry Smith     alpha2 = x[7*i+1];
1050*ed8eea03SBarry Smith     alpha3 = x[7*i+2];
1051*ed8eea03SBarry Smith     alpha4 = x[7*i+3];
1052*ed8eea03SBarry Smith     alpha5 = x[7*i+4];
1053*ed8eea03SBarry Smith     alpha6 = x[7*i+5];
1054*ed8eea03SBarry Smith     alpha7 = x[7*i+6];
1055*ed8eea03SBarry Smith     while (n-->0) {
1056*ed8eea03SBarry Smith       y[7*(*idx)]   += alpha1*(*v);
1057*ed8eea03SBarry Smith       y[7*(*idx)+1] += alpha2*(*v);
1058*ed8eea03SBarry Smith       y[7*(*idx)+2] += alpha3*(*v);
1059*ed8eea03SBarry Smith       y[7*(*idx)+3] += alpha4*(*v);
1060*ed8eea03SBarry Smith       y[7*(*idx)+4] += alpha5*(*v);
1061*ed8eea03SBarry Smith       y[7*(*idx)+5] += alpha6*(*v);
1062*ed8eea03SBarry Smith       y[7*(*idx)+6] += alpha7*(*v);
1063*ed8eea03SBarry Smith       idx++; v++;
1064*ed8eea03SBarry Smith     }
1065*ed8eea03SBarry Smith   }
1066*ed8eea03SBarry Smith   PetscLogFlops(14*a->nz - 7*b->AIJ->n);
1067*ed8eea03SBarry Smith   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1068*ed8eea03SBarry Smith   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
1069*ed8eea03SBarry Smith   PetscFunctionReturn(0);
1070*ed8eea03SBarry Smith }
1071*ed8eea03SBarry Smith 
1072*ed8eea03SBarry Smith #undef __FUNCT__
1073*ed8eea03SBarry Smith #define __FUNCT__ "MatMultAdd_SeqMAIJ_7"
1074*ed8eea03SBarry Smith PetscErrorCode MatMultAdd_SeqMAIJ_7(Mat A,Vec xx,Vec yy,Vec zz)
1075*ed8eea03SBarry Smith {
1076*ed8eea03SBarry Smith   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
1077*ed8eea03SBarry Smith   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
1078*ed8eea03SBarry Smith   PetscScalar    *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7;
1079*ed8eea03SBarry Smith   PetscErrorCode ierr;
1080*ed8eea03SBarry Smith   PetscInt       m = b->AIJ->m,*idx,*ii;
1081*ed8eea03SBarry Smith   PetscInt       n,i,jrow,j;
1082*ed8eea03SBarry Smith 
1083*ed8eea03SBarry Smith   PetscFunctionBegin;
1084*ed8eea03SBarry Smith   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
1085*ed8eea03SBarry Smith   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1086*ed8eea03SBarry Smith   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
1087*ed8eea03SBarry Smith   idx  = a->j;
1088*ed8eea03SBarry Smith   v    = a->a;
1089*ed8eea03SBarry Smith   ii   = a->i;
1090*ed8eea03SBarry Smith 
1091*ed8eea03SBarry Smith   for (i=0; i<m; i++) {
1092*ed8eea03SBarry Smith     jrow = ii[i];
1093*ed8eea03SBarry Smith     n    = ii[i+1] - jrow;
1094*ed8eea03SBarry Smith     sum1  = 0.0;
1095*ed8eea03SBarry Smith     sum2  = 0.0;
1096*ed8eea03SBarry Smith     sum3  = 0.0;
1097*ed8eea03SBarry Smith     sum4  = 0.0;
1098*ed8eea03SBarry Smith     sum5  = 0.0;
1099*ed8eea03SBarry Smith     sum6  = 0.0;
1100*ed8eea03SBarry Smith     sum7  = 0.0;
1101*ed8eea03SBarry Smith     for (j=0; j<n; j++) {
1102*ed8eea03SBarry Smith       sum1 += v[jrow]*x[7*idx[jrow]];
1103*ed8eea03SBarry Smith       sum2 += v[jrow]*x[7*idx[jrow]+1];
1104*ed8eea03SBarry Smith       sum3 += v[jrow]*x[7*idx[jrow]+2];
1105*ed8eea03SBarry Smith       sum4 += v[jrow]*x[7*idx[jrow]+3];
1106*ed8eea03SBarry Smith       sum5 += v[jrow]*x[7*idx[jrow]+4];
1107*ed8eea03SBarry Smith       sum6 += v[jrow]*x[7*idx[jrow]+5];
1108*ed8eea03SBarry Smith       sum7 += v[jrow]*x[7*idx[jrow]+6];
1109*ed8eea03SBarry Smith       jrow++;
1110*ed8eea03SBarry Smith      }
1111*ed8eea03SBarry Smith     y[7*i]   += sum1;
1112*ed8eea03SBarry Smith     y[7*i+1] += sum2;
1113*ed8eea03SBarry Smith     y[7*i+2] += sum3;
1114*ed8eea03SBarry Smith     y[7*i+3] += sum4;
1115*ed8eea03SBarry Smith     y[7*i+4] += sum5;
1116*ed8eea03SBarry Smith     y[7*i+5] += sum6;
1117*ed8eea03SBarry Smith     y[7*i+6] += sum7;
1118*ed8eea03SBarry Smith   }
1119*ed8eea03SBarry Smith 
1120*ed8eea03SBarry Smith   PetscLogFlops(14*a->nz);
1121*ed8eea03SBarry Smith   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1122*ed8eea03SBarry Smith   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
1123*ed8eea03SBarry Smith   PetscFunctionReturn(0);
1124*ed8eea03SBarry Smith }
1125*ed8eea03SBarry Smith 
1126*ed8eea03SBarry Smith #undef __FUNCT__
1127*ed8eea03SBarry Smith #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_7"
1128*ed8eea03SBarry Smith PetscErrorCode MatMultTransposeAdd_SeqMAIJ_7(Mat A,Vec xx,Vec yy,Vec zz)
1129*ed8eea03SBarry Smith {
1130*ed8eea03SBarry Smith   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
1131*ed8eea03SBarry Smith   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
1132*ed8eea03SBarry Smith   PetscScalar    *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7;
1133*ed8eea03SBarry Smith   PetscErrorCode ierr;
1134*ed8eea03SBarry Smith   PetscInt       m = b->AIJ->m,n,i,*idx;
1135*ed8eea03SBarry Smith 
1136*ed8eea03SBarry Smith   PetscFunctionBegin;
1137*ed8eea03SBarry Smith   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
1138*ed8eea03SBarry Smith   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1139*ed8eea03SBarry Smith   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
1140*ed8eea03SBarry Smith   for (i=0; i<m; i++) {
1141*ed8eea03SBarry Smith     idx    = a->j + a->i[i] ;
1142*ed8eea03SBarry Smith     v      = a->a + a->i[i] ;
1143*ed8eea03SBarry Smith     n      = a->i[i+1] - a->i[i];
1144*ed8eea03SBarry Smith     alpha1 = x[7*i];
1145*ed8eea03SBarry Smith     alpha2 = x[7*i+1];
1146*ed8eea03SBarry Smith     alpha3 = x[7*i+2];
1147*ed8eea03SBarry Smith     alpha4 = x[7*i+3];
1148*ed8eea03SBarry Smith     alpha5 = x[7*i+4];
1149*ed8eea03SBarry Smith     alpha6 = x[7*i+5];
1150*ed8eea03SBarry Smith     alpha7 = x[7*i+6];
1151*ed8eea03SBarry Smith     while (n-->0) {
1152*ed8eea03SBarry Smith       y[7*(*idx)]   += alpha1*(*v);
1153*ed8eea03SBarry Smith       y[7*(*idx)+1] += alpha2*(*v);
1154*ed8eea03SBarry Smith       y[7*(*idx)+2] += alpha3*(*v);
1155*ed8eea03SBarry Smith       y[7*(*idx)+3] += alpha4*(*v);
1156*ed8eea03SBarry Smith       y[7*(*idx)+4] += alpha5*(*v);
1157*ed8eea03SBarry Smith       y[7*(*idx)+5] += alpha6*(*v);
1158*ed8eea03SBarry Smith       y[7*(*idx)+6] += alpha7*(*v);
1159*ed8eea03SBarry Smith       idx++; v++;
1160*ed8eea03SBarry Smith     }
1161*ed8eea03SBarry Smith   }
1162*ed8eea03SBarry Smith   PetscLogFlops(14*a->nz);
1163*ed8eea03SBarry Smith   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1164*ed8eea03SBarry Smith   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
1165*ed8eea03SBarry Smith   PetscFunctionReturn(0);
1166*ed8eea03SBarry Smith }
1167*ed8eea03SBarry Smith 
1168*ed8eea03SBarry Smith #undef __FUNCT__
116966ed3db0SBarry Smith #define __FUNCT__ "MatMult_SeqMAIJ_8"
1170dfbe8321SBarry Smith PetscErrorCode MatMult_SeqMAIJ_8(Mat A,Vec xx,Vec yy)
117166ed3db0SBarry Smith {
117266ed3db0SBarry Smith   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
117366ed3db0SBarry Smith   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
117487828ca2SBarry Smith   PetscScalar    *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8;
1175dfbe8321SBarry Smith   PetscErrorCode ierr;
1176b24ad042SBarry Smith   PetscInt       m = b->AIJ->m,*idx,*ii;
1177b24ad042SBarry Smith   PetscInt       n,i,jrow,j;
117866ed3db0SBarry Smith 
117966ed3db0SBarry Smith   PetscFunctionBegin;
11801ebc52fbSHong Zhang   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
11811ebc52fbSHong Zhang   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
118266ed3db0SBarry Smith   idx  = a->j;
118366ed3db0SBarry Smith   v    = a->a;
118466ed3db0SBarry Smith   ii   = a->i;
118566ed3db0SBarry Smith 
118666ed3db0SBarry Smith   for (i=0; i<m; i++) {
118766ed3db0SBarry Smith     jrow = ii[i];
118866ed3db0SBarry Smith     n    = ii[i+1] - jrow;
118966ed3db0SBarry Smith     sum1  = 0.0;
119066ed3db0SBarry Smith     sum2  = 0.0;
119166ed3db0SBarry Smith     sum3  = 0.0;
119266ed3db0SBarry Smith     sum4  = 0.0;
119366ed3db0SBarry Smith     sum5  = 0.0;
119466ed3db0SBarry Smith     sum6  = 0.0;
119566ed3db0SBarry Smith     sum7  = 0.0;
119666ed3db0SBarry Smith     sum8  = 0.0;
119766ed3db0SBarry Smith     for (j=0; j<n; j++) {
119866ed3db0SBarry Smith       sum1 += v[jrow]*x[8*idx[jrow]];
119966ed3db0SBarry Smith       sum2 += v[jrow]*x[8*idx[jrow]+1];
120066ed3db0SBarry Smith       sum3 += v[jrow]*x[8*idx[jrow]+2];
120166ed3db0SBarry Smith       sum4 += v[jrow]*x[8*idx[jrow]+3];
120266ed3db0SBarry Smith       sum5 += v[jrow]*x[8*idx[jrow]+4];
120366ed3db0SBarry Smith       sum6 += v[jrow]*x[8*idx[jrow]+5];
120466ed3db0SBarry Smith       sum7 += v[jrow]*x[8*idx[jrow]+6];
120566ed3db0SBarry Smith       sum8 += v[jrow]*x[8*idx[jrow]+7];
120666ed3db0SBarry Smith       jrow++;
120766ed3db0SBarry Smith      }
120866ed3db0SBarry Smith     y[8*i]   = sum1;
120966ed3db0SBarry Smith     y[8*i+1] = sum2;
121066ed3db0SBarry Smith     y[8*i+2] = sum3;
121166ed3db0SBarry Smith     y[8*i+3] = sum4;
121266ed3db0SBarry Smith     y[8*i+4] = sum5;
121366ed3db0SBarry Smith     y[8*i+5] = sum6;
121466ed3db0SBarry Smith     y[8*i+6] = sum7;
121566ed3db0SBarry Smith     y[8*i+7] = sum8;
121666ed3db0SBarry Smith   }
121766ed3db0SBarry Smith 
121866ed3db0SBarry Smith   PetscLogFlops(16*a->nz - 8*m);
12191ebc52fbSHong Zhang   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
12201ebc52fbSHong Zhang   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
122166ed3db0SBarry Smith   PetscFunctionReturn(0);
122266ed3db0SBarry Smith }
122366ed3db0SBarry Smith 
122466ed3db0SBarry Smith #undef __FUNCT__
122566ed3db0SBarry Smith #define __FUNCT__ "MatMultTranspose_SeqMAIJ_8"
1226dfbe8321SBarry Smith PetscErrorCode MatMultTranspose_SeqMAIJ_8(Mat A,Vec xx,Vec yy)
122766ed3db0SBarry Smith {
122866ed3db0SBarry Smith   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
122966ed3db0SBarry Smith   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
123087828ca2SBarry Smith   PetscScalar    *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,zero = 0.0;
1231dfbe8321SBarry Smith   PetscErrorCode ierr;
1232b24ad042SBarry Smith   PetscInt       m = b->AIJ->m,n,i,*idx;
123366ed3db0SBarry Smith 
123466ed3db0SBarry Smith   PetscFunctionBegin;
123566ed3db0SBarry Smith   ierr = VecSet(&zero,yy);CHKERRQ(ierr);
12361ebc52fbSHong Zhang   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
12371ebc52fbSHong Zhang   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
1238bfec09a0SHong Zhang 
123966ed3db0SBarry Smith   for (i=0; i<m; i++) {
1240bfec09a0SHong Zhang     idx    = a->j + a->i[i] ;
1241bfec09a0SHong Zhang     v      = a->a + a->i[i] ;
124266ed3db0SBarry Smith     n      = a->i[i+1] - a->i[i];
124366ed3db0SBarry Smith     alpha1 = x[8*i];
124466ed3db0SBarry Smith     alpha2 = x[8*i+1];
124566ed3db0SBarry Smith     alpha3 = x[8*i+2];
124666ed3db0SBarry Smith     alpha4 = x[8*i+3];
124766ed3db0SBarry Smith     alpha5 = x[8*i+4];
124866ed3db0SBarry Smith     alpha6 = x[8*i+5];
124966ed3db0SBarry Smith     alpha7 = x[8*i+6];
125066ed3db0SBarry Smith     alpha8 = x[8*i+7];
125166ed3db0SBarry Smith     while (n-->0) {
125266ed3db0SBarry Smith       y[8*(*idx)]   += alpha1*(*v);
125366ed3db0SBarry Smith       y[8*(*idx)+1] += alpha2*(*v);
125466ed3db0SBarry Smith       y[8*(*idx)+2] += alpha3*(*v);
125566ed3db0SBarry Smith       y[8*(*idx)+3] += alpha4*(*v);
125666ed3db0SBarry Smith       y[8*(*idx)+4] += alpha5*(*v);
125766ed3db0SBarry Smith       y[8*(*idx)+5] += alpha6*(*v);
125866ed3db0SBarry Smith       y[8*(*idx)+6] += alpha7*(*v);
125966ed3db0SBarry Smith       y[8*(*idx)+7] += alpha8*(*v);
126066ed3db0SBarry Smith       idx++; v++;
126166ed3db0SBarry Smith     }
126266ed3db0SBarry Smith   }
126366ed3db0SBarry Smith   PetscLogFlops(16*a->nz - 8*b->AIJ->n);
12641ebc52fbSHong Zhang   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
12651ebc52fbSHong Zhang   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
126666ed3db0SBarry Smith   PetscFunctionReturn(0);
126766ed3db0SBarry Smith }
126866ed3db0SBarry Smith 
126966ed3db0SBarry Smith #undef __FUNCT__
127066ed3db0SBarry Smith #define __FUNCT__ "MatMultAdd_SeqMAIJ_8"
1271dfbe8321SBarry Smith PetscErrorCode MatMultAdd_SeqMAIJ_8(Mat A,Vec xx,Vec yy,Vec zz)
127266ed3db0SBarry Smith {
127366ed3db0SBarry Smith   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
127466ed3db0SBarry Smith   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
127587828ca2SBarry Smith   PetscScalar    *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8;
1276dfbe8321SBarry Smith   PetscErrorCode ierr;
1277b24ad042SBarry Smith   PetscInt       m = b->AIJ->m,*idx,*ii;
1278b24ad042SBarry Smith   PetscInt       n,i,jrow,j;
127966ed3db0SBarry Smith 
128066ed3db0SBarry Smith   PetscFunctionBegin;
128166ed3db0SBarry Smith   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
12821ebc52fbSHong Zhang   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
12831ebc52fbSHong Zhang   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
128466ed3db0SBarry Smith   idx  = a->j;
128566ed3db0SBarry Smith   v    = a->a;
128666ed3db0SBarry Smith   ii   = a->i;
128766ed3db0SBarry Smith 
128866ed3db0SBarry Smith   for (i=0; i<m; i++) {
128966ed3db0SBarry Smith     jrow = ii[i];
129066ed3db0SBarry Smith     n    = ii[i+1] - jrow;
129166ed3db0SBarry Smith     sum1  = 0.0;
129266ed3db0SBarry Smith     sum2  = 0.0;
129366ed3db0SBarry Smith     sum3  = 0.0;
129466ed3db0SBarry Smith     sum4  = 0.0;
129566ed3db0SBarry Smith     sum5  = 0.0;
129666ed3db0SBarry Smith     sum6  = 0.0;
129766ed3db0SBarry Smith     sum7  = 0.0;
129866ed3db0SBarry Smith     sum8  = 0.0;
129966ed3db0SBarry Smith     for (j=0; j<n; j++) {
130066ed3db0SBarry Smith       sum1 += v[jrow]*x[8*idx[jrow]];
130166ed3db0SBarry Smith       sum2 += v[jrow]*x[8*idx[jrow]+1];
130266ed3db0SBarry Smith       sum3 += v[jrow]*x[8*idx[jrow]+2];
130366ed3db0SBarry Smith       sum4 += v[jrow]*x[8*idx[jrow]+3];
130466ed3db0SBarry Smith       sum5 += v[jrow]*x[8*idx[jrow]+4];
130566ed3db0SBarry Smith       sum6 += v[jrow]*x[8*idx[jrow]+5];
130666ed3db0SBarry Smith       sum7 += v[jrow]*x[8*idx[jrow]+6];
130766ed3db0SBarry Smith       sum8 += v[jrow]*x[8*idx[jrow]+7];
130866ed3db0SBarry Smith       jrow++;
130966ed3db0SBarry Smith      }
131066ed3db0SBarry Smith     y[8*i]   += sum1;
131166ed3db0SBarry Smith     y[8*i+1] += sum2;
131266ed3db0SBarry Smith     y[8*i+2] += sum3;
131366ed3db0SBarry Smith     y[8*i+3] += sum4;
131466ed3db0SBarry Smith     y[8*i+4] += sum5;
131566ed3db0SBarry Smith     y[8*i+5] += sum6;
131666ed3db0SBarry Smith     y[8*i+6] += sum7;
131766ed3db0SBarry Smith     y[8*i+7] += sum8;
131866ed3db0SBarry Smith   }
131966ed3db0SBarry Smith 
132066ed3db0SBarry Smith   PetscLogFlops(16*a->nz);
13211ebc52fbSHong Zhang   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
13221ebc52fbSHong Zhang   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
132366ed3db0SBarry Smith   PetscFunctionReturn(0);
132466ed3db0SBarry Smith }
132566ed3db0SBarry Smith 
132666ed3db0SBarry Smith #undef __FUNCT__
132766ed3db0SBarry Smith #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_8"
1328dfbe8321SBarry Smith PetscErrorCode MatMultTransposeAdd_SeqMAIJ_8(Mat A,Vec xx,Vec yy,Vec zz)
132966ed3db0SBarry Smith {
133066ed3db0SBarry Smith   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
133166ed3db0SBarry Smith   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
133287828ca2SBarry Smith   PetscScalar    *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8;
1333dfbe8321SBarry Smith   PetscErrorCode ierr;
1334b24ad042SBarry Smith   PetscInt       m = b->AIJ->m,n,i,*idx;
133566ed3db0SBarry Smith 
133666ed3db0SBarry Smith   PetscFunctionBegin;
133766ed3db0SBarry Smith   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
13381ebc52fbSHong Zhang   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
13391ebc52fbSHong Zhang   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
134066ed3db0SBarry Smith   for (i=0; i<m; i++) {
1341bfec09a0SHong Zhang     idx    = a->j + a->i[i] ;
1342bfec09a0SHong Zhang     v      = a->a + a->i[i] ;
134366ed3db0SBarry Smith     n      = a->i[i+1] - a->i[i];
134466ed3db0SBarry Smith     alpha1 = x[8*i];
134566ed3db0SBarry Smith     alpha2 = x[8*i+1];
134666ed3db0SBarry Smith     alpha3 = x[8*i+2];
134766ed3db0SBarry Smith     alpha4 = x[8*i+3];
134866ed3db0SBarry Smith     alpha5 = x[8*i+4];
134966ed3db0SBarry Smith     alpha6 = x[8*i+5];
135066ed3db0SBarry Smith     alpha7 = x[8*i+6];
135166ed3db0SBarry Smith     alpha8 = x[8*i+7];
135266ed3db0SBarry Smith     while (n-->0) {
135366ed3db0SBarry Smith       y[8*(*idx)]   += alpha1*(*v);
135466ed3db0SBarry Smith       y[8*(*idx)+1] += alpha2*(*v);
135566ed3db0SBarry Smith       y[8*(*idx)+2] += alpha3*(*v);
135666ed3db0SBarry Smith       y[8*(*idx)+3] += alpha4*(*v);
135766ed3db0SBarry Smith       y[8*(*idx)+4] += alpha5*(*v);
135866ed3db0SBarry Smith       y[8*(*idx)+5] += alpha6*(*v);
135966ed3db0SBarry Smith       y[8*(*idx)+6] += alpha7*(*v);
136066ed3db0SBarry Smith       y[8*(*idx)+7] += alpha8*(*v);
136166ed3db0SBarry Smith       idx++; v++;
136266ed3db0SBarry Smith     }
136366ed3db0SBarry Smith   }
136466ed3db0SBarry Smith   PetscLogFlops(16*a->nz);
13651ebc52fbSHong Zhang   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
13661ebc52fbSHong Zhang   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
13672f7816d4SBarry Smith   PetscFunctionReturn(0);
13682f7816d4SBarry Smith }
13692f7816d4SBarry Smith 
1370*ed8eea03SBarry Smith 
13712b692628SMatthew Knepley /* ------------------------------------------------------------------------------*/
13722b692628SMatthew Knepley #undef __FUNCT__
13732b692628SMatthew Knepley #define __FUNCT__ "MatMult_SeqMAIJ_9"
1374dfbe8321SBarry Smith PetscErrorCode MatMult_SeqMAIJ_9(Mat A,Vec xx,Vec yy)
13752b692628SMatthew Knepley {
13762b692628SMatthew Knepley   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
13772b692628SMatthew Knepley   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
13782b692628SMatthew Knepley   PetscScalar    *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9;
1379dfbe8321SBarry Smith   PetscErrorCode ierr;
1380b24ad042SBarry Smith   PetscInt       m = b->AIJ->m,*idx,*ii;
1381b24ad042SBarry Smith   PetscInt       n,i,jrow,j;
13822b692628SMatthew Knepley 
13832b692628SMatthew Knepley   PetscFunctionBegin;
13841ebc52fbSHong Zhang   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
13851ebc52fbSHong Zhang   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
13862b692628SMatthew Knepley   idx  = a->j;
13872b692628SMatthew Knepley   v    = a->a;
13882b692628SMatthew Knepley   ii   = a->i;
13892b692628SMatthew Knepley 
13902b692628SMatthew Knepley   for (i=0; i<m; i++) {
13912b692628SMatthew Knepley     jrow = ii[i];
13922b692628SMatthew Knepley     n    = ii[i+1] - jrow;
13932b692628SMatthew Knepley     sum1  = 0.0;
13942b692628SMatthew Knepley     sum2  = 0.0;
13952b692628SMatthew Knepley     sum3  = 0.0;
13962b692628SMatthew Knepley     sum4  = 0.0;
13972b692628SMatthew Knepley     sum5  = 0.0;
13982b692628SMatthew Knepley     sum6  = 0.0;
13992b692628SMatthew Knepley     sum7  = 0.0;
14002b692628SMatthew Knepley     sum8  = 0.0;
14012b692628SMatthew Knepley     sum9  = 0.0;
14022b692628SMatthew Knepley     for (j=0; j<n; j++) {
14032b692628SMatthew Knepley       sum1 += v[jrow]*x[9*idx[jrow]];
14042b692628SMatthew Knepley       sum2 += v[jrow]*x[9*idx[jrow]+1];
14052b692628SMatthew Knepley       sum3 += v[jrow]*x[9*idx[jrow]+2];
14062b692628SMatthew Knepley       sum4 += v[jrow]*x[9*idx[jrow]+3];
14072b692628SMatthew Knepley       sum5 += v[jrow]*x[9*idx[jrow]+4];
14082b692628SMatthew Knepley       sum6 += v[jrow]*x[9*idx[jrow]+5];
14092b692628SMatthew Knepley       sum7 += v[jrow]*x[9*idx[jrow]+6];
14102b692628SMatthew Knepley       sum8 += v[jrow]*x[9*idx[jrow]+7];
14112b692628SMatthew Knepley       sum9 += v[jrow]*x[9*idx[jrow]+8];
14122b692628SMatthew Knepley       jrow++;
14132b692628SMatthew Knepley      }
14142b692628SMatthew Knepley     y[9*i]   = sum1;
14152b692628SMatthew Knepley     y[9*i+1] = sum2;
14162b692628SMatthew Knepley     y[9*i+2] = sum3;
14172b692628SMatthew Knepley     y[9*i+3] = sum4;
14182b692628SMatthew Knepley     y[9*i+4] = sum5;
14192b692628SMatthew Knepley     y[9*i+5] = sum6;
14202b692628SMatthew Knepley     y[9*i+6] = sum7;
14212b692628SMatthew Knepley     y[9*i+7] = sum8;
14222b692628SMatthew Knepley     y[9*i+8] = sum9;
14232b692628SMatthew Knepley   }
14242b692628SMatthew Knepley 
14252b692628SMatthew Knepley   PetscLogFlops(18*a->nz - 9*m);
14261ebc52fbSHong Zhang   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
14271ebc52fbSHong Zhang   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
14282b692628SMatthew Knepley   PetscFunctionReturn(0);
14292b692628SMatthew Knepley }
14302b692628SMatthew Knepley 
14312b692628SMatthew Knepley #undef __FUNCT__
14322b692628SMatthew Knepley #define __FUNCT__ "MatMultTranspose_SeqMAIJ_9"
1433dfbe8321SBarry Smith PetscErrorCode MatMultTranspose_SeqMAIJ_9(Mat A,Vec xx,Vec yy)
14342b692628SMatthew Knepley {
14352b692628SMatthew Knepley   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
14362b692628SMatthew Knepley   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
14372b692628SMatthew Knepley   PetscScalar    *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9,zero = 0.0;
1438dfbe8321SBarry Smith   PetscErrorCode ierr;
1439b24ad042SBarry Smith   PetscInt       m = b->AIJ->m,n,i,*idx;
14402b692628SMatthew Knepley 
14412b692628SMatthew Knepley   PetscFunctionBegin;
14422b692628SMatthew Knepley   ierr = VecSet(&zero,yy);CHKERRQ(ierr);
14431ebc52fbSHong Zhang   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
14441ebc52fbSHong Zhang   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
14452b692628SMatthew Knepley 
14462b692628SMatthew Knepley   for (i=0; i<m; i++) {
14472b692628SMatthew Knepley     idx    = a->j + a->i[i] ;
14482b692628SMatthew Knepley     v      = a->a + a->i[i] ;
14492b692628SMatthew Knepley     n      = a->i[i+1] - a->i[i];
14502b692628SMatthew Knepley     alpha1 = x[9*i];
14512b692628SMatthew Knepley     alpha2 = x[9*i+1];
14522b692628SMatthew Knepley     alpha3 = x[9*i+2];
14532b692628SMatthew Knepley     alpha4 = x[9*i+3];
14542b692628SMatthew Knepley     alpha5 = x[9*i+4];
14552b692628SMatthew Knepley     alpha6 = x[9*i+5];
14562b692628SMatthew Knepley     alpha7 = x[9*i+6];
14572b692628SMatthew Knepley     alpha8 = x[9*i+7];
14582f6bd0c6SMatthew Knepley     alpha9 = x[9*i+8];
14592b692628SMatthew Knepley     while (n-->0) {
14602b692628SMatthew Knepley       y[9*(*idx)]   += alpha1*(*v);
14612b692628SMatthew Knepley       y[9*(*idx)+1] += alpha2*(*v);
14622b692628SMatthew Knepley       y[9*(*idx)+2] += alpha3*(*v);
14632b692628SMatthew Knepley       y[9*(*idx)+3] += alpha4*(*v);
14642b692628SMatthew Knepley       y[9*(*idx)+4] += alpha5*(*v);
14652b692628SMatthew Knepley       y[9*(*idx)+5] += alpha6*(*v);
14662b692628SMatthew Knepley       y[9*(*idx)+6] += alpha7*(*v);
14672b692628SMatthew Knepley       y[9*(*idx)+7] += alpha8*(*v);
14682b692628SMatthew Knepley       y[9*(*idx)+8] += alpha9*(*v);
14692b692628SMatthew Knepley       idx++; v++;
14702b692628SMatthew Knepley     }
14712b692628SMatthew Knepley   }
14722b692628SMatthew Knepley   PetscLogFlops(18*a->nz - 9*b->AIJ->n);
14731ebc52fbSHong Zhang   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
14741ebc52fbSHong Zhang   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
14752b692628SMatthew Knepley   PetscFunctionReturn(0);
14762b692628SMatthew Knepley }
14772b692628SMatthew Knepley 
14782b692628SMatthew Knepley #undef __FUNCT__
14792b692628SMatthew Knepley #define __FUNCT__ "MatMultAdd_SeqMAIJ_9"
1480dfbe8321SBarry Smith PetscErrorCode MatMultAdd_SeqMAIJ_9(Mat A,Vec xx,Vec yy,Vec zz)
14812b692628SMatthew Knepley {
14822b692628SMatthew Knepley   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
14832b692628SMatthew Knepley   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
14842b692628SMatthew Knepley   PetscScalar    *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9;
1485dfbe8321SBarry Smith   PetscErrorCode ierr;
1486b24ad042SBarry Smith   PetscInt       m = b->AIJ->m,*idx,*ii;
1487b24ad042SBarry Smith   PetscInt       n,i,jrow,j;
14882b692628SMatthew Knepley 
14892b692628SMatthew Knepley   PetscFunctionBegin;
14902b692628SMatthew Knepley   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
14911ebc52fbSHong Zhang   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
14921ebc52fbSHong Zhang   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
14932b692628SMatthew Knepley   idx  = a->j;
14942b692628SMatthew Knepley   v    = a->a;
14952b692628SMatthew Knepley   ii   = a->i;
14962b692628SMatthew Knepley 
14972b692628SMatthew Knepley   for (i=0; i<m; i++) {
14982b692628SMatthew Knepley     jrow = ii[i];
14992b692628SMatthew Knepley     n    = ii[i+1] - jrow;
15002b692628SMatthew Knepley     sum1  = 0.0;
15012b692628SMatthew Knepley     sum2  = 0.0;
15022b692628SMatthew Knepley     sum3  = 0.0;
15032b692628SMatthew Knepley     sum4  = 0.0;
15042b692628SMatthew Knepley     sum5  = 0.0;
15052b692628SMatthew Knepley     sum6  = 0.0;
15062b692628SMatthew Knepley     sum7  = 0.0;
15072b692628SMatthew Knepley     sum8  = 0.0;
15082b692628SMatthew Knepley     sum9  = 0.0;
15092b692628SMatthew Knepley     for (j=0; j<n; j++) {
15102b692628SMatthew Knepley       sum1 += v[jrow]*x[9*idx[jrow]];
15112b692628SMatthew Knepley       sum2 += v[jrow]*x[9*idx[jrow]+1];
15122b692628SMatthew Knepley       sum3 += v[jrow]*x[9*idx[jrow]+2];
15132b692628SMatthew Knepley       sum4 += v[jrow]*x[9*idx[jrow]+3];
15142b692628SMatthew Knepley       sum5 += v[jrow]*x[9*idx[jrow]+4];
15152b692628SMatthew Knepley       sum6 += v[jrow]*x[9*idx[jrow]+5];
15162b692628SMatthew Knepley       sum7 += v[jrow]*x[9*idx[jrow]+6];
15172b692628SMatthew Knepley       sum8 += v[jrow]*x[9*idx[jrow]+7];
15182b692628SMatthew Knepley       sum9 += v[jrow]*x[9*idx[jrow]+8];
15192b692628SMatthew Knepley       jrow++;
15202b692628SMatthew Knepley      }
15212b692628SMatthew Knepley     y[9*i]   += sum1;
15222b692628SMatthew Knepley     y[9*i+1] += sum2;
15232b692628SMatthew Knepley     y[9*i+2] += sum3;
15242b692628SMatthew Knepley     y[9*i+3] += sum4;
15252b692628SMatthew Knepley     y[9*i+4] += sum5;
15262b692628SMatthew Knepley     y[9*i+5] += sum6;
15272b692628SMatthew Knepley     y[9*i+6] += sum7;
15282b692628SMatthew Knepley     y[9*i+7] += sum8;
15292b692628SMatthew Knepley     y[9*i+8] += sum9;
15302b692628SMatthew Knepley   }
15312b692628SMatthew Knepley 
15322b692628SMatthew Knepley   PetscLogFlops(18*a->nz);
15331ebc52fbSHong Zhang   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
15341ebc52fbSHong Zhang   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
15352b692628SMatthew Knepley   PetscFunctionReturn(0);
15362b692628SMatthew Knepley }
15372b692628SMatthew Knepley 
15382b692628SMatthew Knepley #undef __FUNCT__
15392b692628SMatthew Knepley #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_9"
1540dfbe8321SBarry Smith PetscErrorCode MatMultTransposeAdd_SeqMAIJ_9(Mat A,Vec xx,Vec yy,Vec zz)
15412b692628SMatthew Knepley {
15422b692628SMatthew Knepley   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
15432b692628SMatthew Knepley   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
15442b692628SMatthew Knepley   PetscScalar    *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9;
1545dfbe8321SBarry Smith   PetscErrorCode ierr;
1546b24ad042SBarry Smith   PetscInt       m = b->AIJ->m,n,i,*idx;
15472b692628SMatthew Knepley 
15482b692628SMatthew Knepley   PetscFunctionBegin;
15492b692628SMatthew Knepley   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
15501ebc52fbSHong Zhang   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
15511ebc52fbSHong Zhang   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
15522b692628SMatthew Knepley   for (i=0; i<m; i++) {
15532b692628SMatthew Knepley     idx    = a->j + a->i[i] ;
15542b692628SMatthew Knepley     v      = a->a + a->i[i] ;
15552b692628SMatthew Knepley     n      = a->i[i+1] - a->i[i];
15562b692628SMatthew Knepley     alpha1 = x[9*i];
15572b692628SMatthew Knepley     alpha2 = x[9*i+1];
15582b692628SMatthew Knepley     alpha3 = x[9*i+2];
15592b692628SMatthew Knepley     alpha4 = x[9*i+3];
15602b692628SMatthew Knepley     alpha5 = x[9*i+4];
15612b692628SMatthew Knepley     alpha6 = x[9*i+5];
15622b692628SMatthew Knepley     alpha7 = x[9*i+6];
15632b692628SMatthew Knepley     alpha8 = x[9*i+7];
15642b692628SMatthew Knepley     alpha9 = x[9*i+8];
15652b692628SMatthew Knepley     while (n-->0) {
15662b692628SMatthew Knepley       y[9*(*idx)]   += alpha1*(*v);
15672b692628SMatthew Knepley       y[9*(*idx)+1] += alpha2*(*v);
15682b692628SMatthew Knepley       y[9*(*idx)+2] += alpha3*(*v);
15692b692628SMatthew Knepley       y[9*(*idx)+3] += alpha4*(*v);
15702b692628SMatthew Knepley       y[9*(*idx)+4] += alpha5*(*v);
15712b692628SMatthew Knepley       y[9*(*idx)+5] += alpha6*(*v);
15722b692628SMatthew Knepley       y[9*(*idx)+6] += alpha7*(*v);
15732b692628SMatthew Knepley       y[9*(*idx)+7] += alpha8*(*v);
15742b692628SMatthew Knepley       y[9*(*idx)+8] += alpha9*(*v);
15752b692628SMatthew Knepley       idx++; v++;
15762b692628SMatthew Knepley     }
15772b692628SMatthew Knepley   }
15782b692628SMatthew Knepley   PetscLogFlops(18*a->nz);
15791ebc52fbSHong Zhang   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
15801ebc52fbSHong Zhang   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
15812b692628SMatthew Knepley   PetscFunctionReturn(0);
15822b692628SMatthew Knepley }
15832b692628SMatthew Knepley 
15842f7816d4SBarry Smith /*--------------------------------------------------------------------------------------------*/
15852f7816d4SBarry Smith #undef __FUNCT__
15862f7816d4SBarry Smith #define __FUNCT__ "MatMult_SeqMAIJ_16"
1587dfbe8321SBarry Smith PetscErrorCode MatMult_SeqMAIJ_16(Mat A,Vec xx,Vec yy)
15882f7816d4SBarry Smith {
15892f7816d4SBarry Smith   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
15902f7816d4SBarry Smith   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
15912f7816d4SBarry Smith   PetscScalar    *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8;
15922f7816d4SBarry Smith   PetscScalar    sum9, sum10, sum11, sum12, sum13, sum14, sum15, sum16;
1593dfbe8321SBarry Smith   PetscErrorCode ierr;
1594b24ad042SBarry Smith   PetscInt       m = b->AIJ->m,*idx,*ii;
1595b24ad042SBarry Smith   PetscInt       n,i,jrow,j;
15962f7816d4SBarry Smith 
15972f7816d4SBarry Smith   PetscFunctionBegin;
15981ebc52fbSHong Zhang   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
15991ebc52fbSHong Zhang   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
16002f7816d4SBarry Smith   idx  = a->j;
16012f7816d4SBarry Smith   v    = a->a;
16022f7816d4SBarry Smith   ii   = a->i;
16032f7816d4SBarry Smith 
16042f7816d4SBarry Smith   for (i=0; i<m; i++) {
16052f7816d4SBarry Smith     jrow = ii[i];
16062f7816d4SBarry Smith     n    = ii[i+1] - jrow;
16072f7816d4SBarry Smith     sum1  = 0.0;
16082f7816d4SBarry Smith     sum2  = 0.0;
16092f7816d4SBarry Smith     sum3  = 0.0;
16102f7816d4SBarry Smith     sum4  = 0.0;
16112f7816d4SBarry Smith     sum5  = 0.0;
16122f7816d4SBarry Smith     sum6  = 0.0;
16132f7816d4SBarry Smith     sum7  = 0.0;
16142f7816d4SBarry Smith     sum8  = 0.0;
16152f7816d4SBarry Smith     sum9  = 0.0;
16162f7816d4SBarry Smith     sum10 = 0.0;
16172f7816d4SBarry Smith     sum11 = 0.0;
16182f7816d4SBarry Smith     sum12 = 0.0;
16192f7816d4SBarry Smith     sum13 = 0.0;
16202f7816d4SBarry Smith     sum14 = 0.0;
16212f7816d4SBarry Smith     sum15 = 0.0;
16222f7816d4SBarry Smith     sum16 = 0.0;
16232f7816d4SBarry Smith     for (j=0; j<n; j++) {
16242f7816d4SBarry Smith       sum1  += v[jrow]*x[16*idx[jrow]];
16252f7816d4SBarry Smith       sum2  += v[jrow]*x[16*idx[jrow]+1];
16262f7816d4SBarry Smith       sum3  += v[jrow]*x[16*idx[jrow]+2];
16272f7816d4SBarry Smith       sum4  += v[jrow]*x[16*idx[jrow]+3];
16282f7816d4SBarry Smith       sum5  += v[jrow]*x[16*idx[jrow]+4];
16292f7816d4SBarry Smith       sum6  += v[jrow]*x[16*idx[jrow]+5];
16302f7816d4SBarry Smith       sum7  += v[jrow]*x[16*idx[jrow]+6];
16312f7816d4SBarry Smith       sum8  += v[jrow]*x[16*idx[jrow]+7];
16322f7816d4SBarry Smith       sum9  += v[jrow]*x[16*idx[jrow]+8];
16332f7816d4SBarry Smith       sum10 += v[jrow]*x[16*idx[jrow]+9];
16342f7816d4SBarry Smith       sum11 += v[jrow]*x[16*idx[jrow]+10];
16352f7816d4SBarry Smith       sum12 += v[jrow]*x[16*idx[jrow]+11];
16362f7816d4SBarry Smith       sum13 += v[jrow]*x[16*idx[jrow]+12];
16372f7816d4SBarry Smith       sum14 += v[jrow]*x[16*idx[jrow]+13];
16382f7816d4SBarry Smith       sum15 += v[jrow]*x[16*idx[jrow]+14];
16392f7816d4SBarry Smith       sum16 += v[jrow]*x[16*idx[jrow]+15];
16402f7816d4SBarry Smith       jrow++;
16412f7816d4SBarry Smith      }
16422f7816d4SBarry Smith     y[16*i]    = sum1;
16432f7816d4SBarry Smith     y[16*i+1]  = sum2;
16442f7816d4SBarry Smith     y[16*i+2]  = sum3;
16452f7816d4SBarry Smith     y[16*i+3]  = sum4;
16462f7816d4SBarry Smith     y[16*i+4]  = sum5;
16472f7816d4SBarry Smith     y[16*i+5]  = sum6;
16482f7816d4SBarry Smith     y[16*i+6]  = sum7;
16492f7816d4SBarry Smith     y[16*i+7]  = sum8;
16502f7816d4SBarry Smith     y[16*i+8]  = sum9;
16512f7816d4SBarry Smith     y[16*i+9]  = sum10;
16522f7816d4SBarry Smith     y[16*i+10] = sum11;
16532f7816d4SBarry Smith     y[16*i+11] = sum12;
16542f7816d4SBarry Smith     y[16*i+12] = sum13;
16552f7816d4SBarry Smith     y[16*i+13] = sum14;
16562f7816d4SBarry Smith     y[16*i+14] = sum15;
16572f7816d4SBarry Smith     y[16*i+15] = sum16;
16582f7816d4SBarry Smith   }
16592f7816d4SBarry Smith 
16602f7816d4SBarry Smith   PetscLogFlops(32*a->nz - 16*m);
16611ebc52fbSHong Zhang   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
16621ebc52fbSHong Zhang   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
16632f7816d4SBarry Smith   PetscFunctionReturn(0);
16642f7816d4SBarry Smith }
16652f7816d4SBarry Smith 
16662f7816d4SBarry Smith #undef __FUNCT__
16672f7816d4SBarry Smith #define __FUNCT__ "MatMultTranspose_SeqMAIJ_16"
1668dfbe8321SBarry Smith PetscErrorCode MatMultTranspose_SeqMAIJ_16(Mat A,Vec xx,Vec yy)
16692f7816d4SBarry Smith {
16702f7816d4SBarry Smith   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
16712f7816d4SBarry Smith   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
16722f7816d4SBarry Smith   PetscScalar    *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,zero = 0.0;
16732f7816d4SBarry Smith   PetscScalar    alpha9,alpha10,alpha11,alpha12,alpha13,alpha14,alpha15,alpha16;
1674dfbe8321SBarry Smith   PetscErrorCode ierr;
1675b24ad042SBarry Smith   PetscInt       m = b->AIJ->m,n,i,*idx;
16762f7816d4SBarry Smith 
16772f7816d4SBarry Smith   PetscFunctionBegin;
16782f7816d4SBarry Smith   ierr = VecSet(&zero,yy);CHKERRQ(ierr);
16791ebc52fbSHong Zhang   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
16801ebc52fbSHong Zhang   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
1681bfec09a0SHong Zhang 
16822f7816d4SBarry Smith   for (i=0; i<m; i++) {
1683bfec09a0SHong Zhang     idx    = a->j + a->i[i] ;
1684bfec09a0SHong Zhang     v      = a->a + a->i[i] ;
16852f7816d4SBarry Smith     n      = a->i[i+1] - a->i[i];
16862f7816d4SBarry Smith     alpha1  = x[16*i];
16872f7816d4SBarry Smith     alpha2  = x[16*i+1];
16882f7816d4SBarry Smith     alpha3  = x[16*i+2];
16892f7816d4SBarry Smith     alpha4  = x[16*i+3];
16902f7816d4SBarry Smith     alpha5  = x[16*i+4];
16912f7816d4SBarry Smith     alpha6  = x[16*i+5];
16922f7816d4SBarry Smith     alpha7  = x[16*i+6];
16932f7816d4SBarry Smith     alpha8  = x[16*i+7];
16942f7816d4SBarry Smith     alpha9  = x[16*i+8];
16952f7816d4SBarry Smith     alpha10 = x[16*i+9];
16962f7816d4SBarry Smith     alpha11 = x[16*i+10];
16972f7816d4SBarry Smith     alpha12 = x[16*i+11];
16982f7816d4SBarry Smith     alpha13 = x[16*i+12];
16992f7816d4SBarry Smith     alpha14 = x[16*i+13];
17002f7816d4SBarry Smith     alpha15 = x[16*i+14];
17012f7816d4SBarry Smith     alpha16 = x[16*i+15];
17022f7816d4SBarry Smith     while (n-->0) {
17032f7816d4SBarry Smith       y[16*(*idx)]    += alpha1*(*v);
17042f7816d4SBarry Smith       y[16*(*idx)+1]  += alpha2*(*v);
17052f7816d4SBarry Smith       y[16*(*idx)+2]  += alpha3*(*v);
17062f7816d4SBarry Smith       y[16*(*idx)+3]  += alpha4*(*v);
17072f7816d4SBarry Smith       y[16*(*idx)+4]  += alpha5*(*v);
17082f7816d4SBarry Smith       y[16*(*idx)+5]  += alpha6*(*v);
17092f7816d4SBarry Smith       y[16*(*idx)+6]  += alpha7*(*v);
17102f7816d4SBarry Smith       y[16*(*idx)+7]  += alpha8*(*v);
17112f7816d4SBarry Smith       y[16*(*idx)+8]  += alpha9*(*v);
17122f7816d4SBarry Smith       y[16*(*idx)+9]  += alpha10*(*v);
17132f7816d4SBarry Smith       y[16*(*idx)+10] += alpha11*(*v);
17142f7816d4SBarry Smith       y[16*(*idx)+11] += alpha12*(*v);
17152f7816d4SBarry Smith       y[16*(*idx)+12] += alpha13*(*v);
17162f7816d4SBarry Smith       y[16*(*idx)+13] += alpha14*(*v);
17172f7816d4SBarry Smith       y[16*(*idx)+14] += alpha15*(*v);
17182f7816d4SBarry Smith       y[16*(*idx)+15] += alpha16*(*v);
17192f7816d4SBarry Smith       idx++; v++;
17202f7816d4SBarry Smith     }
17212f7816d4SBarry Smith   }
17222f7816d4SBarry Smith   PetscLogFlops(32*a->nz - 16*b->AIJ->n);
17231ebc52fbSHong Zhang   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
17241ebc52fbSHong Zhang   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
17252f7816d4SBarry Smith   PetscFunctionReturn(0);
17262f7816d4SBarry Smith }
17272f7816d4SBarry Smith 
17282f7816d4SBarry Smith #undef __FUNCT__
17292f7816d4SBarry Smith #define __FUNCT__ "MatMultAdd_SeqMAIJ_16"
1730dfbe8321SBarry Smith PetscErrorCode MatMultAdd_SeqMAIJ_16(Mat A,Vec xx,Vec yy,Vec zz)
17312f7816d4SBarry Smith {
17322f7816d4SBarry Smith   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
17332f7816d4SBarry Smith   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
17342f7816d4SBarry Smith   PetscScalar    *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8;
17352f7816d4SBarry Smith   PetscScalar    sum9, sum10, sum11, sum12, sum13, sum14, sum15, sum16;
1736dfbe8321SBarry Smith   PetscErrorCode ierr;
1737b24ad042SBarry Smith   PetscInt       m = b->AIJ->m,*idx,*ii;
1738b24ad042SBarry Smith   PetscInt       n,i,jrow,j;
17392f7816d4SBarry Smith 
17402f7816d4SBarry Smith   PetscFunctionBegin;
17412f7816d4SBarry Smith   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
17421ebc52fbSHong Zhang   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
17431ebc52fbSHong Zhang   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
17442f7816d4SBarry Smith   idx  = a->j;
17452f7816d4SBarry Smith   v    = a->a;
17462f7816d4SBarry Smith   ii   = a->i;
17472f7816d4SBarry Smith 
17482f7816d4SBarry Smith   for (i=0; i<m; i++) {
17492f7816d4SBarry Smith     jrow = ii[i];
17502f7816d4SBarry Smith     n    = ii[i+1] - jrow;
17512f7816d4SBarry Smith     sum1  = 0.0;
17522f7816d4SBarry Smith     sum2  = 0.0;
17532f7816d4SBarry Smith     sum3  = 0.0;
17542f7816d4SBarry Smith     sum4  = 0.0;
17552f7816d4SBarry Smith     sum5  = 0.0;
17562f7816d4SBarry Smith     sum6  = 0.0;
17572f7816d4SBarry Smith     sum7  = 0.0;
17582f7816d4SBarry Smith     sum8  = 0.0;
17592f7816d4SBarry Smith     sum9  = 0.0;
17602f7816d4SBarry Smith     sum10 = 0.0;
17612f7816d4SBarry Smith     sum11 = 0.0;
17622f7816d4SBarry Smith     sum12 = 0.0;
17632f7816d4SBarry Smith     sum13 = 0.0;
17642f7816d4SBarry Smith     sum14 = 0.0;
17652f7816d4SBarry Smith     sum15 = 0.0;
17662f7816d4SBarry Smith     sum16 = 0.0;
17672f7816d4SBarry Smith     for (j=0; j<n; j++) {
17682f7816d4SBarry Smith       sum1  += v[jrow]*x[16*idx[jrow]];
17692f7816d4SBarry Smith       sum2  += v[jrow]*x[16*idx[jrow]+1];
17702f7816d4SBarry Smith       sum3  += v[jrow]*x[16*idx[jrow]+2];
17712f7816d4SBarry Smith       sum4  += v[jrow]*x[16*idx[jrow]+3];
17722f7816d4SBarry Smith       sum5  += v[jrow]*x[16*idx[jrow]+4];
17732f7816d4SBarry Smith       sum6  += v[jrow]*x[16*idx[jrow]+5];
17742f7816d4SBarry Smith       sum7  += v[jrow]*x[16*idx[jrow]+6];
17752f7816d4SBarry Smith       sum8  += v[jrow]*x[16*idx[jrow]+7];
17762f7816d4SBarry Smith       sum9  += v[jrow]*x[16*idx[jrow]+8];
17772f7816d4SBarry Smith       sum10 += v[jrow]*x[16*idx[jrow]+9];
17782f7816d4SBarry Smith       sum11 += v[jrow]*x[16*idx[jrow]+10];
17792f7816d4SBarry Smith       sum12 += v[jrow]*x[16*idx[jrow]+11];
17802f7816d4SBarry Smith       sum13 += v[jrow]*x[16*idx[jrow]+12];
17812f7816d4SBarry Smith       sum14 += v[jrow]*x[16*idx[jrow]+13];
17822f7816d4SBarry Smith       sum15 += v[jrow]*x[16*idx[jrow]+14];
17832f7816d4SBarry Smith       sum16 += v[jrow]*x[16*idx[jrow]+15];
17842f7816d4SBarry Smith       jrow++;
17852f7816d4SBarry Smith      }
17862f7816d4SBarry Smith     y[16*i]    += sum1;
17872f7816d4SBarry Smith     y[16*i+1]  += sum2;
17882f7816d4SBarry Smith     y[16*i+2]  += sum3;
17892f7816d4SBarry Smith     y[16*i+3]  += sum4;
17902f7816d4SBarry Smith     y[16*i+4]  += sum5;
17912f7816d4SBarry Smith     y[16*i+5]  += sum6;
17922f7816d4SBarry Smith     y[16*i+6]  += sum7;
17932f7816d4SBarry Smith     y[16*i+7]  += sum8;
17942f7816d4SBarry Smith     y[16*i+8]  += sum9;
17952f7816d4SBarry Smith     y[16*i+9]  += sum10;
17962f7816d4SBarry Smith     y[16*i+10] += sum11;
17972f7816d4SBarry Smith     y[16*i+11] += sum12;
17982f7816d4SBarry Smith     y[16*i+12] += sum13;
17992f7816d4SBarry Smith     y[16*i+13] += sum14;
18002f7816d4SBarry Smith     y[16*i+14] += sum15;
18012f7816d4SBarry Smith     y[16*i+15] += sum16;
18022f7816d4SBarry Smith   }
18032f7816d4SBarry Smith 
18042f7816d4SBarry Smith   PetscLogFlops(32*a->nz);
18051ebc52fbSHong Zhang   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
18061ebc52fbSHong Zhang   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
18072f7816d4SBarry Smith   PetscFunctionReturn(0);
18082f7816d4SBarry Smith }
18092f7816d4SBarry Smith 
18102f7816d4SBarry Smith #undef __FUNCT__
18112f7816d4SBarry Smith #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_16"
1812dfbe8321SBarry Smith PetscErrorCode MatMultTransposeAdd_SeqMAIJ_16(Mat A,Vec xx,Vec yy,Vec zz)
18132f7816d4SBarry Smith {
18142f7816d4SBarry Smith   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
18152f7816d4SBarry Smith   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
18162f7816d4SBarry Smith   PetscScalar    *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8;
18172f7816d4SBarry Smith   PetscScalar    alpha9,alpha10,alpha11,alpha12,alpha13,alpha14,alpha15,alpha16;
1818dfbe8321SBarry Smith   PetscErrorCode ierr;
1819b24ad042SBarry Smith   PetscInt       m = b->AIJ->m,n,i,*idx;
18202f7816d4SBarry Smith 
18212f7816d4SBarry Smith   PetscFunctionBegin;
18222f7816d4SBarry Smith   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
18231ebc52fbSHong Zhang   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
18241ebc52fbSHong Zhang   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
18252f7816d4SBarry Smith   for (i=0; i<m; i++) {
1826bfec09a0SHong Zhang     idx    = a->j + a->i[i] ;
1827bfec09a0SHong Zhang     v      = a->a + a->i[i] ;
18282f7816d4SBarry Smith     n      = a->i[i+1] - a->i[i];
18292f7816d4SBarry Smith     alpha1 = x[16*i];
18302f7816d4SBarry Smith     alpha2 = x[16*i+1];
18312f7816d4SBarry Smith     alpha3 = x[16*i+2];
18322f7816d4SBarry Smith     alpha4 = x[16*i+3];
18332f7816d4SBarry Smith     alpha5 = x[16*i+4];
18342f7816d4SBarry Smith     alpha6 = x[16*i+5];
18352f7816d4SBarry Smith     alpha7 = x[16*i+6];
18362f7816d4SBarry Smith     alpha8 = x[16*i+7];
18372f7816d4SBarry Smith     alpha9  = x[16*i+8];
18382f7816d4SBarry Smith     alpha10 = x[16*i+9];
18392f7816d4SBarry Smith     alpha11 = x[16*i+10];
18402f7816d4SBarry Smith     alpha12 = x[16*i+11];
18412f7816d4SBarry Smith     alpha13 = x[16*i+12];
18422f7816d4SBarry Smith     alpha14 = x[16*i+13];
18432f7816d4SBarry Smith     alpha15 = x[16*i+14];
18442f7816d4SBarry Smith     alpha16 = x[16*i+15];
18452f7816d4SBarry Smith     while (n-->0) {
18462f7816d4SBarry Smith       y[16*(*idx)]   += alpha1*(*v);
18472f7816d4SBarry Smith       y[16*(*idx)+1] += alpha2*(*v);
18482f7816d4SBarry Smith       y[16*(*idx)+2] += alpha3*(*v);
18492f7816d4SBarry Smith       y[16*(*idx)+3] += alpha4*(*v);
18502f7816d4SBarry Smith       y[16*(*idx)+4] += alpha5*(*v);
18512f7816d4SBarry Smith       y[16*(*idx)+5] += alpha6*(*v);
18522f7816d4SBarry Smith       y[16*(*idx)+6] += alpha7*(*v);
18532f7816d4SBarry Smith       y[16*(*idx)+7] += alpha8*(*v);
18542f7816d4SBarry Smith       y[16*(*idx)+8]  += alpha9*(*v);
18552f7816d4SBarry Smith       y[16*(*idx)+9]  += alpha10*(*v);
18562f7816d4SBarry Smith       y[16*(*idx)+10] += alpha11*(*v);
18572f7816d4SBarry Smith       y[16*(*idx)+11] += alpha12*(*v);
18582f7816d4SBarry Smith       y[16*(*idx)+12] += alpha13*(*v);
18592f7816d4SBarry Smith       y[16*(*idx)+13] += alpha14*(*v);
18602f7816d4SBarry Smith       y[16*(*idx)+14] += alpha15*(*v);
18612f7816d4SBarry Smith       y[16*(*idx)+15] += alpha16*(*v);
18622f7816d4SBarry Smith       idx++; v++;
18632f7816d4SBarry Smith     }
18642f7816d4SBarry Smith   }
18652f7816d4SBarry Smith   PetscLogFlops(32*a->nz);
18661ebc52fbSHong Zhang   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
18671ebc52fbSHong Zhang   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
186866ed3db0SBarry Smith   PetscFunctionReturn(0);
186966ed3db0SBarry Smith }
187066ed3db0SBarry Smith 
1871f4a53059SBarry Smith /*===================================================================================*/
18724a2ae208SSatish Balay #undef __FUNCT__
18734a2ae208SSatish Balay #define __FUNCT__ "MatMult_MPIMAIJ_dof"
1874dfbe8321SBarry Smith PetscErrorCode MatMult_MPIMAIJ_dof(Mat A,Vec xx,Vec yy)
1875f4a53059SBarry Smith {
18764cb9d9b8SBarry Smith   Mat_MPIMAIJ    *b = (Mat_MPIMAIJ*)A->data;
1877dfbe8321SBarry Smith   PetscErrorCode ierr;
1878f4a53059SBarry Smith 
1879b24ad042SBarry Smith   PetscFunctionBegin;
1880f4a53059SBarry Smith   /* start the scatter */
1881f4a53059SBarry Smith   ierr = VecScatterBegin(xx,b->w,INSERT_VALUES,SCATTER_FORWARD,b->ctx);CHKERRQ(ierr);
18824cb9d9b8SBarry Smith   ierr = (*b->AIJ->ops->mult)(b->AIJ,xx,yy);CHKERRQ(ierr);
1883f4a53059SBarry Smith   ierr = VecScatterEnd(xx,b->w,INSERT_VALUES,SCATTER_FORWARD,b->ctx);CHKERRQ(ierr);
18844cb9d9b8SBarry Smith   ierr = (*b->OAIJ->ops->multadd)(b->OAIJ,b->w,yy,yy);CHKERRQ(ierr);
1885f4a53059SBarry Smith   PetscFunctionReturn(0);
1886f4a53059SBarry Smith }
1887f4a53059SBarry Smith 
18884a2ae208SSatish Balay #undef __FUNCT__
18894a2ae208SSatish Balay #define __FUNCT__ "MatMultTranspose_MPIMAIJ_dof"
1890dfbe8321SBarry Smith PetscErrorCode MatMultTranspose_MPIMAIJ_dof(Mat A,Vec xx,Vec yy)
18914cb9d9b8SBarry Smith {
18924cb9d9b8SBarry Smith   Mat_MPIMAIJ    *b = (Mat_MPIMAIJ*)A->data;
1893dfbe8321SBarry Smith   PetscErrorCode ierr;
1894b24ad042SBarry Smith 
18954cb9d9b8SBarry Smith   PetscFunctionBegin;
18964cb9d9b8SBarry Smith   ierr = (*b->OAIJ->ops->multtranspose)(b->OAIJ,xx,b->w);CHKERRQ(ierr);
18974cb9d9b8SBarry Smith   ierr = (*b->AIJ->ops->multtranspose)(b->AIJ,xx,yy);CHKERRQ(ierr);
1898a5ff213dSBarry Smith   ierr = VecScatterBegin(b->w,yy,ADD_VALUES,SCATTER_REVERSE,b->ctx);CHKERRQ(ierr);
18994cb9d9b8SBarry Smith   ierr = VecScatterEnd(b->w,yy,ADD_VALUES,SCATTER_REVERSE,b->ctx);CHKERRQ(ierr);
19004cb9d9b8SBarry Smith   PetscFunctionReturn(0);
19014cb9d9b8SBarry Smith }
19024cb9d9b8SBarry Smith 
19034a2ae208SSatish Balay #undef __FUNCT__
19044a2ae208SSatish Balay #define __FUNCT__ "MatMultAdd_MPIMAIJ_dof"
1905dfbe8321SBarry Smith PetscErrorCode MatMultAdd_MPIMAIJ_dof(Mat A,Vec xx,Vec yy,Vec zz)
19064cb9d9b8SBarry Smith {
19074cb9d9b8SBarry Smith   Mat_MPIMAIJ    *b = (Mat_MPIMAIJ*)A->data;
1908dfbe8321SBarry Smith   PetscErrorCode ierr;
19094cb9d9b8SBarry Smith 
1910b24ad042SBarry Smith   PetscFunctionBegin;
19114cb9d9b8SBarry Smith   /* start the scatter */
19124cb9d9b8SBarry Smith   ierr = VecScatterBegin(xx,b->w,INSERT_VALUES,SCATTER_FORWARD,b->ctx);CHKERRQ(ierr);
1913d72c5c08SBarry Smith   ierr = (*b->AIJ->ops->multadd)(b->AIJ,xx,yy,zz);CHKERRQ(ierr);
19144cb9d9b8SBarry Smith   ierr = VecScatterEnd(xx,b->w,INSERT_VALUES,SCATTER_FORWARD,b->ctx);CHKERRQ(ierr);
1915717f2ec8SHong Zhang   ierr = (*b->OAIJ->ops->multadd)(b->OAIJ,b->w,zz,zz);CHKERRQ(ierr);
19164cb9d9b8SBarry Smith   PetscFunctionReturn(0);
19174cb9d9b8SBarry Smith }
19184cb9d9b8SBarry Smith 
19194a2ae208SSatish Balay #undef __FUNCT__
19204a2ae208SSatish Balay #define __FUNCT__ "MatMultTransposeAdd_MPIMAIJ_dof"
1921dfbe8321SBarry Smith PetscErrorCode MatMultTransposeAdd_MPIMAIJ_dof(Mat A,Vec xx,Vec yy,Vec zz)
19224cb9d9b8SBarry Smith {
19234cb9d9b8SBarry Smith   Mat_MPIMAIJ    *b = (Mat_MPIMAIJ*)A->data;
1924dfbe8321SBarry Smith   PetscErrorCode ierr;
1925b24ad042SBarry Smith 
19264cb9d9b8SBarry Smith   PetscFunctionBegin;
19274cb9d9b8SBarry Smith   ierr = (*b->OAIJ->ops->multtranspose)(b->OAIJ,xx,b->w);CHKERRQ(ierr);
1928d72c5c08SBarry Smith   ierr = VecScatterBegin(b->w,zz,ADD_VALUES,SCATTER_REVERSE,b->ctx);CHKERRQ(ierr);
1929d72c5c08SBarry Smith   ierr = (*b->AIJ->ops->multtransposeadd)(b->AIJ,xx,yy,zz);CHKERRQ(ierr);
1930d72c5c08SBarry Smith   ierr = VecScatterEnd(b->w,zz,ADD_VALUES,SCATTER_REVERSE,b->ctx);CHKERRQ(ierr);
19314cb9d9b8SBarry Smith   PetscFunctionReturn(0);
19324cb9d9b8SBarry Smith }
19334cb9d9b8SBarry Smith 
19349c4fc4c7SBarry Smith #undef __FUNCT__
19357ba1a0bfSKris Buschelman #define __FUNCT__ "MatPtAPSymbolic_SeqAIJ_SeqMAIJ"
19367ba1a0bfSKris Buschelman PetscErrorCode MatPtAPSymbolic_SeqAIJ_SeqMAIJ(Mat A,Mat PP,PetscReal fill,Mat *C)
19377ba1a0bfSKris Buschelman {
19387ba1a0bfSKris Buschelman   /* This routine requires testing -- but it's getting better. */
19397ba1a0bfSKris Buschelman   PetscErrorCode ierr;
19407ba1a0bfSKris Buschelman   FreeSpaceList  free_space=PETSC_NULL,current_space=PETSC_NULL;
19417ba1a0bfSKris Buschelman   Mat_SeqMAIJ    *pp=(Mat_SeqMAIJ*)PP->data;
19427ba1a0bfSKris Buschelman   Mat            P=pp->AIJ;
19437ba1a0bfSKris Buschelman   Mat_SeqAIJ     *a=(Mat_SeqAIJ*)A->data,*p=(Mat_SeqAIJ*)P->data,*c;
19447ba1a0bfSKris Buschelman   PetscInt       *pti,*ptj,*ptJ,*ai=a->i,*aj=a->j,*ajj,*pi=p->i,*pj=p->j,*pjj;
19457ba1a0bfSKris Buschelman   PetscInt       *ci,*cj,*ptadenserow,*ptasparserow,*denserow,*sparserow,*ptaj;
19467ba1a0bfSKris Buschelman   PetscInt       an=A->N,am=A->M,pn=P->N,pm=P->M,ppdof=pp->dof,cn;
19477ba1a0bfSKris Buschelman   PetscInt       i,j,k,dof,pshift,ptnzi,arow,anzj,ptanzi,prow,pnzj,cnzi;
19487ba1a0bfSKris Buschelman   MatScalar      *ca;
19497ba1a0bfSKris Buschelman 
19507ba1a0bfSKris Buschelman   PetscFunctionBegin;
19517ba1a0bfSKris Buschelman   /* Start timer */
19527ba1a0bfSKris Buschelman   ierr = PetscLogEventBegin(MAT_PtAPSymbolic,A,PP,0,0);CHKERRQ(ierr);
19537ba1a0bfSKris Buschelman 
19547ba1a0bfSKris Buschelman   /* Get ij structure of P^T */
19557ba1a0bfSKris Buschelman   ierr = MatGetSymbolicTranspose_SeqAIJ(P,&pti,&ptj);CHKERRQ(ierr);
19567ba1a0bfSKris Buschelman 
19577ba1a0bfSKris Buschelman   cn = pn*ppdof;
19587ba1a0bfSKris Buschelman   /* Allocate ci array, arrays for fill computation and */
19597ba1a0bfSKris Buschelman   /* free space for accumulating nonzero column info */
19607ba1a0bfSKris Buschelman   ierr = PetscMalloc((cn+1)*sizeof(PetscInt),&ci);CHKERRQ(ierr);
19617ba1a0bfSKris Buschelman   ci[0] = 0;
19627ba1a0bfSKris Buschelman 
19637ba1a0bfSKris Buschelman   /* Work arrays for rows of P^T*A */
19647ba1a0bfSKris Buschelman   ierr = PetscMalloc((2*cn+2*an+1)*sizeof(PetscInt),&ptadenserow);CHKERRQ(ierr);
19657ba1a0bfSKris Buschelman   ierr = PetscMemzero(ptadenserow,(2*cn+2*an+1)*sizeof(PetscInt));CHKERRQ(ierr);
19667ba1a0bfSKris Buschelman   ptasparserow = ptadenserow  + an;
19677ba1a0bfSKris Buschelman   denserow     = ptasparserow + an;
19687ba1a0bfSKris Buschelman   sparserow    = denserow     + cn;
19697ba1a0bfSKris Buschelman 
19707ba1a0bfSKris Buschelman   /* Set initial free space to be nnz(A) scaled by aspect ratio of P. */
19717ba1a0bfSKris Buschelman   /* This should be reasonable if sparsity of PtAP is similar to that of A. */
19727ba1a0bfSKris Buschelman   /* Note, aspect ratio of P is the same as the aspect ratio of SeqAIJ inside P */
19737ba1a0bfSKris Buschelman   ierr          = GetMoreSpace((ai[am]/pm)*pn,&free_space);
19747ba1a0bfSKris Buschelman   current_space = free_space;
19757ba1a0bfSKris Buschelman 
19767ba1a0bfSKris Buschelman   /* Determine symbolic info for each row of C: */
19777ba1a0bfSKris Buschelman   for (i=0;i<pn;i++) {
19787ba1a0bfSKris Buschelman     ptnzi  = pti[i+1] - pti[i];
19797ba1a0bfSKris Buschelman     ptJ    = ptj + pti[i];
19807ba1a0bfSKris Buschelman     for (dof=0;dof<ppdof;dof++) {
19817ba1a0bfSKris Buschelman       ptanzi = 0;
19827ba1a0bfSKris Buschelman       /* Determine symbolic row of PtA: */
19837ba1a0bfSKris Buschelman       for (j=0;j<ptnzi;j++) {
19847ba1a0bfSKris Buschelman         /* Expand ptJ[j] by block size and shift by dof to get the right row of A */
19857ba1a0bfSKris Buschelman         arow = ptJ[j]*ppdof + dof;
19867ba1a0bfSKris Buschelman         /* Nonzeros of P^T*A will be in same locations as any element of A in that row */
19877ba1a0bfSKris Buschelman         anzj = ai[arow+1] - ai[arow];
19887ba1a0bfSKris Buschelman         ajj  = aj + ai[arow];
19897ba1a0bfSKris Buschelman         for (k=0;k<anzj;k++) {
19907ba1a0bfSKris Buschelman           if (!ptadenserow[ajj[k]]) {
19917ba1a0bfSKris Buschelman             ptadenserow[ajj[k]]    = -1;
19927ba1a0bfSKris Buschelman             ptasparserow[ptanzi++] = ajj[k];
19937ba1a0bfSKris Buschelman           }
19947ba1a0bfSKris Buschelman         }
19957ba1a0bfSKris Buschelman       }
19967ba1a0bfSKris Buschelman       /* Using symbolic info for row of PtA, determine symbolic info for row of C: */
19977ba1a0bfSKris Buschelman       ptaj = ptasparserow;
19987ba1a0bfSKris Buschelman       cnzi   = 0;
19997ba1a0bfSKris Buschelman       for (j=0;j<ptanzi;j++) {
20007ba1a0bfSKris Buschelman         /* Get offset within block of P */
20017ba1a0bfSKris Buschelman         pshift = *ptaj%ppdof;
20027ba1a0bfSKris Buschelman         /* Get block row of P */
20037ba1a0bfSKris Buschelman         prow = (*ptaj++)/ppdof; /* integer division */
20047ba1a0bfSKris Buschelman         /* P has same number of nonzeros per row as the compressed form */
20057ba1a0bfSKris Buschelman         pnzj = pi[prow+1] - pi[prow];
20067ba1a0bfSKris Buschelman         pjj  = pj + pi[prow];
20077ba1a0bfSKris Buschelman         for (k=0;k<pnzj;k++) {
20087ba1a0bfSKris Buschelman           /* Locations in C are shifted by the offset within the block */
20097ba1a0bfSKris Buschelman           /* Note: we cannot use PetscLLAdd here because of the additional offset for the write location */
20107ba1a0bfSKris Buschelman           if (!denserow[pjj[k]*ppdof+pshift]) {
20117ba1a0bfSKris Buschelman             denserow[pjj[k]*ppdof+pshift] = -1;
20127ba1a0bfSKris Buschelman             sparserow[cnzi++]             = pjj[k]*ppdof+pshift;
20137ba1a0bfSKris Buschelman           }
20147ba1a0bfSKris Buschelman         }
20157ba1a0bfSKris Buschelman       }
20167ba1a0bfSKris Buschelman 
20177ba1a0bfSKris Buschelman       /* sort sparserow */
20187ba1a0bfSKris Buschelman       ierr = PetscSortInt(cnzi,sparserow);CHKERRQ(ierr);
20197ba1a0bfSKris Buschelman 
20207ba1a0bfSKris Buschelman       /* If free space is not available, make more free space */
20217ba1a0bfSKris Buschelman       /* Double the amount of total space in the list */
20227ba1a0bfSKris Buschelman       if (current_space->local_remaining<cnzi) {
20237ba1a0bfSKris Buschelman         ierr = GetMoreSpace(current_space->total_array_size,&current_space);CHKERRQ(ierr);
20247ba1a0bfSKris Buschelman       }
20257ba1a0bfSKris Buschelman 
20267ba1a0bfSKris Buschelman       /* Copy data into free space, and zero out denserows */
20277ba1a0bfSKris Buschelman       ierr = PetscMemcpy(current_space->array,sparserow,cnzi*sizeof(PetscInt));CHKERRQ(ierr);
20287ba1a0bfSKris Buschelman       current_space->array           += cnzi;
20297ba1a0bfSKris Buschelman       current_space->local_used      += cnzi;
20307ba1a0bfSKris Buschelman       current_space->local_remaining -= cnzi;
20317ba1a0bfSKris Buschelman 
20327ba1a0bfSKris Buschelman       for (j=0;j<ptanzi;j++) {
20337ba1a0bfSKris Buschelman         ptadenserow[ptasparserow[j]] = 0;
20347ba1a0bfSKris Buschelman       }
20357ba1a0bfSKris Buschelman       for (j=0;j<cnzi;j++) {
20367ba1a0bfSKris Buschelman         denserow[sparserow[j]] = 0;
20377ba1a0bfSKris Buschelman       }
20387ba1a0bfSKris Buschelman       /* Aside: Perhaps we should save the pta info for the numerical factorization. */
20397ba1a0bfSKris Buschelman       /*        For now, we will recompute what is needed. */
20407ba1a0bfSKris Buschelman       ci[i*ppdof+1+dof] = ci[i*ppdof+dof] + cnzi;
20417ba1a0bfSKris Buschelman     }
20427ba1a0bfSKris Buschelman   }
20437ba1a0bfSKris Buschelman   /* nnz is now stored in ci[ptm], column indices are in the list of free space */
20447ba1a0bfSKris Buschelman   /* Allocate space for cj, initialize cj, and */
20457ba1a0bfSKris Buschelman   /* destroy list of free space and other temporary array(s) */
20467ba1a0bfSKris Buschelman   ierr = PetscMalloc((ci[cn]+1)*sizeof(PetscInt),&cj);CHKERRQ(ierr);
20477ba1a0bfSKris Buschelman   ierr = MakeSpaceContiguous(&free_space,cj);CHKERRQ(ierr);
20487ba1a0bfSKris Buschelman   ierr = PetscFree(ptadenserow);CHKERRQ(ierr);
20497ba1a0bfSKris Buschelman 
20507ba1a0bfSKris Buschelman   /* Allocate space for ca */
20517ba1a0bfSKris Buschelman   ierr = PetscMalloc((ci[cn]+1)*sizeof(MatScalar),&ca);CHKERRQ(ierr);
20527ba1a0bfSKris Buschelman   ierr = PetscMemzero(ca,(ci[cn]+1)*sizeof(MatScalar));CHKERRQ(ierr);
20537ba1a0bfSKris Buschelman 
20547ba1a0bfSKris Buschelman   /* put together the new matrix */
20557ba1a0bfSKris Buschelman   ierr = MatCreateSeqAIJWithArrays(A->comm,cn,cn,ci,cj,ca,C);CHKERRQ(ierr);
20567ba1a0bfSKris Buschelman 
20577ba1a0bfSKris Buschelman   /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
20587ba1a0bfSKris Buschelman   /* Since these are PETSc arrays, change flags to free them as necessary. */
20597ba1a0bfSKris Buschelman   c = (Mat_SeqAIJ *)((*C)->data);
20607ba1a0bfSKris Buschelman   c->freedata = PETSC_TRUE;
20617ba1a0bfSKris Buschelman   c->nonew    = 0;
20627ba1a0bfSKris Buschelman 
20637ba1a0bfSKris Buschelman   /* Clean up. */
20647ba1a0bfSKris Buschelman   ierr = MatRestoreSymbolicTranspose_SeqAIJ(P,&pti,&ptj);CHKERRQ(ierr);
20657ba1a0bfSKris Buschelman 
20667ba1a0bfSKris Buschelman   ierr = PetscLogEventEnd(MAT_PtAPSymbolic,A,PP,0,0);CHKERRQ(ierr);
20677ba1a0bfSKris Buschelman   PetscFunctionReturn(0);
20687ba1a0bfSKris Buschelman }
20697ba1a0bfSKris Buschelman 
20707ba1a0bfSKris Buschelman #undef __FUNCT__
20717ba1a0bfSKris Buschelman #define __FUNCT__ "MatPtAPNumeric_SeqAIJ_SeqMAIJ"
20727ba1a0bfSKris Buschelman PetscErrorCode MatPtAPNumeric_SeqAIJ_SeqMAIJ(Mat A,Mat PP,Mat C)
20737ba1a0bfSKris Buschelman {
20747ba1a0bfSKris Buschelman   /* This routine requires testing -- first draft only */
20757ba1a0bfSKris Buschelman   PetscErrorCode ierr;
20767ba1a0bfSKris Buschelman   PetscInt       flops=0;
20777ba1a0bfSKris Buschelman   Mat_SeqMAIJ    *pp=(Mat_SeqMAIJ*)PP->data;
20787ba1a0bfSKris Buschelman   Mat            P=pp->AIJ;
20797ba1a0bfSKris Buschelman   Mat_SeqAIJ     *a  = (Mat_SeqAIJ *) A->data;
20807ba1a0bfSKris Buschelman   Mat_SeqAIJ     *p  = (Mat_SeqAIJ *) P->data;
20817ba1a0bfSKris Buschelman   Mat_SeqAIJ     *c  = (Mat_SeqAIJ *) C->data;
20827ba1a0bfSKris Buschelman   PetscInt       *ai=a->i,*aj=a->j,*apj,*apjdense,*pi=p->i,*pj=p->j,*pJ=p->j,*pjj;
20837ba1a0bfSKris Buschelman   PetscInt       *ci=c->i,*cj=c->j,*cjj;
20847ba1a0bfSKris Buschelman   PetscInt       am=A->M,cn=C->N,cm=C->M,ppdof=pp->dof;
20857ba1a0bfSKris Buschelman   PetscInt       i,j,k,pshift,poffset,anzi,pnzi,apnzj,nextap,pnzj,prow,crow;
20867ba1a0bfSKris Buschelman   MatScalar      *aa=a->a,*apa,*pa=p->a,*pA=p->a,*paj,*ca=c->a,*caj;
20877ba1a0bfSKris Buschelman 
20887ba1a0bfSKris Buschelman   PetscFunctionBegin;
20897ba1a0bfSKris Buschelman   /* Allocate temporary array for storage of one row of A*P */
20907ba1a0bfSKris Buschelman   ierr = PetscMalloc(cn*(sizeof(MatScalar)+2*sizeof(PetscInt)),&apa);CHKERRQ(ierr);
20917ba1a0bfSKris Buschelman   ierr = PetscMemzero(apa,cn*(sizeof(MatScalar)+2*sizeof(PetscInt)));CHKERRQ(ierr);
20927ba1a0bfSKris Buschelman 
20937ba1a0bfSKris Buschelman   apj      = (PetscInt *)(apa + cn);
20947ba1a0bfSKris Buschelman   apjdense = apj + cn;
20957ba1a0bfSKris Buschelman 
20967ba1a0bfSKris Buschelman   /* Clear old values in C */
20977ba1a0bfSKris Buschelman   ierr = PetscMemzero(ca,ci[cm]*sizeof(MatScalar));CHKERRQ(ierr);
20987ba1a0bfSKris Buschelman 
20997ba1a0bfSKris Buschelman   for (i=0;i<am;i++) {
21007ba1a0bfSKris Buschelman     /* Form sparse row of A*P */
21017ba1a0bfSKris Buschelman     anzi  = ai[i+1] - ai[i];
21027ba1a0bfSKris Buschelman     apnzj = 0;
21037ba1a0bfSKris Buschelman     for (j=0;j<anzi;j++) {
21047ba1a0bfSKris Buschelman       /* Get offset within block of P */
21057ba1a0bfSKris Buschelman       pshift = *aj%ppdof;
21067ba1a0bfSKris Buschelman       /* Get block row of P */
21077ba1a0bfSKris Buschelman       prow   = *aj++/ppdof; /* integer division */
21087ba1a0bfSKris Buschelman       pnzj = pi[prow+1] - pi[prow];
21097ba1a0bfSKris Buschelman       pjj  = pj + pi[prow];
21107ba1a0bfSKris Buschelman       paj  = pa + pi[prow];
21117ba1a0bfSKris Buschelman       for (k=0;k<pnzj;k++) {
21127ba1a0bfSKris Buschelman         poffset = pjj[k]*ppdof+pshift;
21137ba1a0bfSKris Buschelman         if (!apjdense[poffset]) {
21147ba1a0bfSKris Buschelman           apjdense[poffset] = -1;
21157ba1a0bfSKris Buschelman           apj[apnzj++]      = poffset;
21167ba1a0bfSKris Buschelman         }
21177ba1a0bfSKris Buschelman         apa[poffset] += (*aa)*paj[k];
21187ba1a0bfSKris Buschelman       }
21197ba1a0bfSKris Buschelman       flops += 2*pnzj;
21207ba1a0bfSKris Buschelman       aa++;
21217ba1a0bfSKris Buschelman     }
21227ba1a0bfSKris Buschelman 
21237ba1a0bfSKris Buschelman     /* Sort the j index array for quick sparse axpy. */
21247ba1a0bfSKris Buschelman     /* Note: a array does not need sorting as it is in dense storage locations. */
21257ba1a0bfSKris Buschelman     ierr = PetscSortInt(apnzj,apj);CHKERRQ(ierr);
21267ba1a0bfSKris Buschelman 
21277ba1a0bfSKris Buschelman     /* Compute P^T*A*P using outer product (P^T)[:,j]*(A*P)[j,:]. */
21287ba1a0bfSKris Buschelman     prow    = i/ppdof; /* integer division */
21297ba1a0bfSKris Buschelman     pshift  = i%ppdof;
21307ba1a0bfSKris Buschelman     poffset = pi[prow];
21317ba1a0bfSKris Buschelman     pnzi = pi[prow+1] - poffset;
21327ba1a0bfSKris Buschelman     /* Reset pJ and pA so we can traverse the same row of P 'dof' times. */
21337ba1a0bfSKris Buschelman     pJ   = pj+poffset;
21347ba1a0bfSKris Buschelman     pA   = pa+poffset;
21357ba1a0bfSKris Buschelman     for (j=0;j<pnzi;j++) {
21367ba1a0bfSKris Buschelman       crow   = (*pJ)*ppdof+pshift;
21377ba1a0bfSKris Buschelman       cjj    = cj + ci[crow];
21387ba1a0bfSKris Buschelman       caj    = ca + ci[crow];
21397ba1a0bfSKris Buschelman       pJ++;
21407ba1a0bfSKris Buschelman       /* Perform sparse axpy operation.  Note cjj includes apj. */
21417ba1a0bfSKris Buschelman       for (k=0,nextap=0;nextap<apnzj;k++) {
21427ba1a0bfSKris Buschelman         if (cjj[k]==apj[nextap]) {
21437ba1a0bfSKris Buschelman           caj[k] += (*pA)*apa[apj[nextap++]];
21447ba1a0bfSKris Buschelman         }
21457ba1a0bfSKris Buschelman       }
21467ba1a0bfSKris Buschelman       flops += 2*apnzj;
21477ba1a0bfSKris Buschelman       pA++;
21487ba1a0bfSKris Buschelman     }
21497ba1a0bfSKris Buschelman 
21507ba1a0bfSKris Buschelman     /* Zero the current row info for A*P */
21517ba1a0bfSKris Buschelman     for (j=0;j<apnzj;j++) {
21527ba1a0bfSKris Buschelman       apa[apj[j]]      = 0.;
21537ba1a0bfSKris Buschelman       apjdense[apj[j]] = 0;
21547ba1a0bfSKris Buschelman     }
21557ba1a0bfSKris Buschelman   }
21567ba1a0bfSKris Buschelman 
21577ba1a0bfSKris Buschelman   /* Assemble the final matrix and clean up */
21587ba1a0bfSKris Buschelman   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
21597ba1a0bfSKris Buschelman   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
21607ba1a0bfSKris Buschelman   ierr = PetscFree(apa);CHKERRQ(ierr);
21617ba1a0bfSKris Buschelman   ierr = PetscLogFlops(flops);CHKERRQ(ierr);
21627ba1a0bfSKris Buschelman 
21637ba1a0bfSKris Buschelman   PetscFunctionReturn(0);
21647ba1a0bfSKris Buschelman }
21657ba1a0bfSKris Buschelman 
21667ba1a0bfSKris Buschelman #undef __FUNCT__
21670fd73130SBarry Smith #define __FUNCT__ "MatConvert_SeqMAIJ_SeqAIJ"
2168ceb03754SKris Buschelman PetscErrorCode MatConvert_SeqMAIJ_SeqAIJ(Mat A,const MatType newtype,MatReuse reuse,Mat *newmat)
21699c4fc4c7SBarry Smith {
21709c4fc4c7SBarry Smith   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
2171ceb03754SKris Buschelman   Mat               a = b->AIJ,B;
21729c4fc4c7SBarry Smith   Mat_SeqAIJ        *aij = (Mat_SeqAIJ*)a->data;
21739c4fc4c7SBarry Smith   PetscErrorCode    ierr;
21740fd73130SBarry Smith   PetscInt          m,n,i,ncols,*ilen,nmax = 0,*icols,j,k,ii,dof = b->dof;
2175ba8c8a56SBarry Smith   PetscInt          *cols;
2176ba8c8a56SBarry Smith   PetscScalar       *vals;
21779c4fc4c7SBarry Smith 
21789c4fc4c7SBarry Smith   PetscFunctionBegin;
21799c4fc4c7SBarry Smith   ierr = MatGetSize(a,&m,&n);CHKERRQ(ierr);
21800fd73130SBarry Smith   ierr = PetscMalloc(dof*m*sizeof(int),&ilen);CHKERRQ(ierr);
21819c4fc4c7SBarry Smith   for (i=0; i<m; i++) {
21829c4fc4c7SBarry Smith     nmax = PetscMax(nmax,aij->ilen[i]);
21830fd73130SBarry Smith     for (j=0; j<dof; j++) {
21840fd73130SBarry Smith       ilen[dof*i+j] = aij->ilen[i];
21859c4fc4c7SBarry Smith     }
21869c4fc4c7SBarry Smith   }
2187ceb03754SKris Buschelman   ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,dof*m,dof*n,0,ilen,&B);CHKERRQ(ierr);
2188ceb03754SKris Buschelman   ierr = MatSetOption(B,MAT_COLUMNS_SORTED);CHKERRQ(ierr);
21899c4fc4c7SBarry Smith   ierr = PetscFree(ilen);CHKERRQ(ierr);
21909c4fc4c7SBarry Smith   ierr = PetscMalloc(nmax*sizeof(PetscInt),&icols);CHKERRQ(ierr);
21919c4fc4c7SBarry Smith   ii   = 0;
21929c4fc4c7SBarry Smith   for (i=0; i<m; i++) {
2193ba8c8a56SBarry Smith     ierr = MatGetRow_SeqAIJ(a,i,&ncols,&cols,&vals);CHKERRQ(ierr);
21940fd73130SBarry Smith     for (j=0; j<dof; j++) {
21959c4fc4c7SBarry Smith       for (k=0; k<ncols; k++) {
21960fd73130SBarry Smith         icols[k] = dof*cols[k]+j;
21979c4fc4c7SBarry Smith       }
2198ceb03754SKris Buschelman       ierr = MatSetValues_SeqAIJ(B,1,&ii,ncols,icols,vals,INSERT_VALUES);CHKERRQ(ierr);
21999c4fc4c7SBarry Smith       ii++;
22009c4fc4c7SBarry Smith     }
2201ba8c8a56SBarry Smith     ierr = MatRestoreRow_SeqAIJ(a,i,&ncols,&cols,&vals);CHKERRQ(ierr);
22029c4fc4c7SBarry Smith   }
22039c4fc4c7SBarry Smith   ierr = PetscFree(icols);CHKERRQ(ierr);
2204ceb03754SKris Buschelman   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2205ceb03754SKris Buschelman   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2206ceb03754SKris Buschelman 
2207ceb03754SKris Buschelman   if (reuse == MAT_REUSE_MATRIX) {
22088ab5b326SKris Buschelman     ierr = MatHeaderReplace(A,B);CHKERRQ(ierr);
2209c3d102feSKris Buschelman   } else {
2210ceb03754SKris Buschelman     *newmat = B;
2211c3d102feSKris Buschelman   }
22129c4fc4c7SBarry Smith   PetscFunctionReturn(0);
22139c4fc4c7SBarry Smith }
22149c4fc4c7SBarry Smith 
22150fd73130SBarry Smith #include "src/mat/impls/aij/mpi/mpiaij.h"
22160fd73130SBarry Smith #undef __FUNCT__
22170fd73130SBarry Smith #define __FUNCT__ "MatConvert_MPIMAIJ_MPIAIJ"
2218ceb03754SKris Buschelman PetscErrorCode MatConvert_MPIMAIJ_MPIAIJ(Mat A,const MatType newtype,MatReuse reuse,Mat *newmat)
22190fd73130SBarry Smith {
22200fd73130SBarry Smith   Mat_MPIMAIJ       *maij = (Mat_MPIMAIJ*)A->data;
2221ceb03754SKris Buschelman   Mat               MatAIJ  = ((Mat_SeqMAIJ*)maij->AIJ->data)->AIJ,B;
22220fd73130SBarry Smith   Mat               MatOAIJ = ((Mat_SeqMAIJ*)maij->OAIJ->data)->AIJ;
22230fd73130SBarry Smith   Mat_SeqAIJ        *AIJ = (Mat_SeqAIJ*) MatAIJ->data;
22240fd73130SBarry Smith   Mat_SeqAIJ        *OAIJ =(Mat_SeqAIJ*) MatOAIJ->data;
22250fd73130SBarry Smith   Mat_MPIAIJ        *mpiaij = (Mat_MPIAIJ*) maij->A->data;
22267a1a7badSBarry Smith   PetscInt          dof = maij->dof,i,j,*dnz,*onz,nmax = 0,onmax = 0,*oicols,*icols,ncols,*cols,oncols,*ocols;
22270fd73130SBarry Smith   PetscInt          rstart,cstart,*garray,ii,k;
22280fd73130SBarry Smith   PetscErrorCode    ierr;
22290fd73130SBarry Smith   PetscScalar       *vals,*ovals;
22300fd73130SBarry Smith 
22310fd73130SBarry Smith   PetscFunctionBegin;
22327a1a7badSBarry Smith   ierr = PetscMalloc2(A->m,PetscInt,&dnz,A->m,PetscInt,&onz);CHKERRQ(ierr);
22330fd73130SBarry Smith   for (i=0; i<A->m/dof; i++) {
22340fd73130SBarry Smith     nmax  = PetscMax(nmax,AIJ->ilen[i]);
22350fd73130SBarry Smith     onmax = PetscMax(onmax,OAIJ->ilen[i]);
22360fd73130SBarry Smith     for (j=0; j<dof; j++) {
22370fd73130SBarry Smith       dnz[dof*i+j] = AIJ->ilen[i];
22380fd73130SBarry Smith       onz[dof*i+j] = OAIJ->ilen[i];
22390fd73130SBarry Smith     }
22400fd73130SBarry Smith   }
2241ceb03754SKris Buschelman   ierr = MatCreateMPIAIJ(A->comm,A->m,A->n,A->M,A->N,0,dnz,0,onz,&B);CHKERRQ(ierr);
2242ceb03754SKris Buschelman   ierr = MatSetOption(B,MAT_COLUMNS_SORTED);CHKERRQ(ierr);
22430fd73130SBarry Smith   ierr = PetscFree2(dnz,onz);CHKERRQ(ierr);
22440fd73130SBarry Smith 
22457a1a7badSBarry Smith   ierr   = PetscMalloc2(nmax,PetscInt,&icols,onmax,PetscInt,&oicols);CHKERRQ(ierr);
22460fd73130SBarry Smith   rstart = dof*mpiaij->rstart;
22470fd73130SBarry Smith   cstart = dof*mpiaij->cstart;
22480fd73130SBarry Smith   garray = mpiaij->garray;
22490fd73130SBarry Smith 
22500fd73130SBarry Smith   ii = rstart;
22510fd73130SBarry Smith   for (i=0; i<A->m/dof; i++) {
22520fd73130SBarry Smith     ierr = MatGetRow_SeqAIJ(MatAIJ,i,&ncols,&cols,&vals);CHKERRQ(ierr);
22530fd73130SBarry Smith     ierr = MatGetRow_SeqAIJ(MatOAIJ,i,&oncols,&ocols,&ovals);CHKERRQ(ierr);
22540fd73130SBarry Smith     for (j=0; j<dof; j++) {
22550fd73130SBarry Smith       for (k=0; k<ncols; k++) {
22560fd73130SBarry Smith         icols[k] = cstart + dof*cols[k]+j;
22570fd73130SBarry Smith       }
22580fd73130SBarry Smith       for (k=0; k<oncols; k++) {
22590fd73130SBarry Smith         oicols[k] = dof*garray[ocols[k]]+j;
22600fd73130SBarry Smith       }
2261ceb03754SKris Buschelman       ierr = MatSetValues_MPIAIJ(B,1,&ii,ncols,icols,vals,INSERT_VALUES);CHKERRQ(ierr);
2262ceb03754SKris Buschelman       ierr = MatSetValues_MPIAIJ(B,1,&ii,oncols,oicols,ovals,INSERT_VALUES);CHKERRQ(ierr);
22630fd73130SBarry Smith       ii++;
22640fd73130SBarry Smith     }
22650fd73130SBarry Smith     ierr = MatRestoreRow_SeqAIJ(MatAIJ,i,&ncols,&cols,&vals);CHKERRQ(ierr);
22660fd73130SBarry Smith     ierr = MatRestoreRow_SeqAIJ(MatOAIJ,i,&oncols,&ocols,&ovals);CHKERRQ(ierr);
22670fd73130SBarry Smith   }
22680fd73130SBarry Smith   ierr = PetscFree2(icols,oicols);CHKERRQ(ierr);
22690fd73130SBarry Smith 
2270ceb03754SKris Buschelman   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2271ceb03754SKris Buschelman   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2272ceb03754SKris Buschelman 
2273ceb03754SKris Buschelman   if (reuse == MAT_REUSE_MATRIX) {
22748ab5b326SKris Buschelman     ierr = MatHeaderReplace(A,B);CHKERRQ(ierr);
2275c3d102feSKris Buschelman   } else {
2276ceb03754SKris Buschelman     *newmat = B;
2277c3d102feSKris Buschelman   }
22780fd73130SBarry Smith   PetscFunctionReturn(0);
22790fd73130SBarry Smith }
22800fd73130SBarry Smith 
22810fd73130SBarry Smith 
22820fd73130SBarry Smith 
2283bcc973b7SBarry Smith /* ---------------------------------------------------------------------------------- */
22845983afb6SSatish Balay /*MC
22850bad9183SKris Buschelman   MatCreateMAIJ - Creates a matrix type providing restriction and interpolation
22860bad9183SKris Buschelman   operations for multicomponent problems.  It interpolates each component the same
22870bad9183SKris Buschelman   way independently.  The matrix type is based on MATSEQAIJ for sequential matrices,
22880bad9183SKris Buschelman   and MATMPIAIJ for distributed matrices.
22890bad9183SKris Buschelman 
22900bad9183SKris Buschelman   Operations provided:
22910fd73130SBarry Smith + MatMult
22920bad9183SKris Buschelman . MatMultTranspose
22930bad9183SKris Buschelman . MatMultAdd
22940bad9183SKris Buschelman . MatMultTransposeAdd
22950fd73130SBarry Smith - MatView
22960bad9183SKris Buschelman 
22970bad9183SKris Buschelman   Level: advanced
22980bad9183SKris Buschelman 
22990bad9183SKris Buschelman M*/
23004a2ae208SSatish Balay #undef __FUNCT__
23014a2ae208SSatish Balay #define __FUNCT__ "MatCreateMAIJ"
2302b24ad042SBarry Smith PetscErrorCode MatCreateMAIJ(Mat A,PetscInt dof,Mat *maij)
230382b1193eSBarry Smith {
2304dfbe8321SBarry Smith   PetscErrorCode ierr;
2305b24ad042SBarry Smith   PetscMPIInt    size;
2306b24ad042SBarry Smith   PetscInt       n;
23074cb9d9b8SBarry Smith   Mat_MPIMAIJ    *b;
230882b1193eSBarry Smith   Mat            B;
230982b1193eSBarry Smith 
231082b1193eSBarry Smith   PetscFunctionBegin;
2311d72c5c08SBarry Smith   ierr   = PetscObjectReference((PetscObject)A);CHKERRQ(ierr);
2312d72c5c08SBarry Smith 
2313d72c5c08SBarry Smith   if (dof == 1) {
2314d72c5c08SBarry Smith     *maij = A;
2315d72c5c08SBarry Smith   } else {
231683903688SBarry Smith     ierr = MatCreate(A->comm,dof*A->m,dof*A->n,dof*A->M,dof*A->N,&B);CHKERRQ(ierr);
2317cd3bbe55SBarry Smith     B->assembled    = PETSC_TRUE;
2318d72c5c08SBarry Smith 
2319f4a53059SBarry Smith     ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr);
2320f4a53059SBarry Smith     if (size == 1) {
2321b9b97703SBarry Smith       ierr = MatSetType(B,MATSEQMAIJ);CHKERRQ(ierr);
23224cb9d9b8SBarry Smith       B->ops->destroy = MatDestroy_SeqMAIJ;
23230fd73130SBarry Smith       B->ops->view    = MatView_SeqMAIJ;
2324b9b97703SBarry Smith       b      = (Mat_MPIMAIJ*)B->data;
2325b9b97703SBarry Smith       b->dof = dof;
23264cb9d9b8SBarry Smith       b->AIJ = A;
2327d72c5c08SBarry Smith       if (dof == 2) {
2328cd3bbe55SBarry Smith         B->ops->mult             = MatMult_SeqMAIJ_2;
2329cd3bbe55SBarry Smith         B->ops->multadd          = MatMultAdd_SeqMAIJ_2;
2330cd3bbe55SBarry Smith         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_2;
2331cd3bbe55SBarry Smith         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_2;
2332bcc973b7SBarry Smith       } else if (dof == 3) {
2333bcc973b7SBarry Smith         B->ops->mult             = MatMult_SeqMAIJ_3;
2334bcc973b7SBarry Smith         B->ops->multadd          = MatMultAdd_SeqMAIJ_3;
2335bcc973b7SBarry Smith         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_3;
2336bcc973b7SBarry Smith         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_3;
2337bcc973b7SBarry Smith       } else if (dof == 4) {
2338bcc973b7SBarry Smith         B->ops->mult             = MatMult_SeqMAIJ_4;
2339bcc973b7SBarry Smith         B->ops->multadd          = MatMultAdd_SeqMAIJ_4;
2340bcc973b7SBarry Smith         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_4;
2341bcc973b7SBarry Smith         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_4;
2342f9fae5adSBarry Smith       } else if (dof == 5) {
2343f9fae5adSBarry Smith         B->ops->mult             = MatMult_SeqMAIJ_5;
2344f9fae5adSBarry Smith         B->ops->multadd          = MatMultAdd_SeqMAIJ_5;
2345f9fae5adSBarry Smith         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_5;
2346f9fae5adSBarry Smith         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_5;
23476cd98798SBarry Smith       } else if (dof == 6) {
23486cd98798SBarry Smith         B->ops->mult             = MatMult_SeqMAIJ_6;
23496cd98798SBarry Smith         B->ops->multadd          = MatMultAdd_SeqMAIJ_6;
23506cd98798SBarry Smith         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_6;
23516cd98798SBarry Smith         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_6;
2352*ed8eea03SBarry Smith       } else if (dof == 7) {
2353*ed8eea03SBarry Smith         B->ops->mult             = MatMult_SeqMAIJ_7;
2354*ed8eea03SBarry Smith         B->ops->multadd          = MatMultAdd_SeqMAIJ_7;
2355*ed8eea03SBarry Smith         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_7;
2356*ed8eea03SBarry Smith         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_7;
235766ed3db0SBarry Smith       } else if (dof == 8) {
235866ed3db0SBarry Smith         B->ops->mult             = MatMult_SeqMAIJ_8;
235966ed3db0SBarry Smith         B->ops->multadd          = MatMultAdd_SeqMAIJ_8;
236066ed3db0SBarry Smith         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_8;
236166ed3db0SBarry Smith         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_8;
23622b692628SMatthew Knepley       } else if (dof == 9) {
23632b692628SMatthew Knepley         B->ops->mult             = MatMult_SeqMAIJ_9;
23642b692628SMatthew Knepley         B->ops->multadd          = MatMultAdd_SeqMAIJ_9;
23652b692628SMatthew Knepley         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_9;
23662b692628SMatthew Knepley         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_9;
23672f7816d4SBarry Smith       } else if (dof == 16) {
23682f7816d4SBarry Smith         B->ops->mult             = MatMult_SeqMAIJ_16;
23692f7816d4SBarry Smith         B->ops->multadd          = MatMultAdd_SeqMAIJ_16;
23702f7816d4SBarry Smith         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_16;
23712f7816d4SBarry Smith         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_16;
237282b1193eSBarry Smith       } else {
237377431f27SBarry Smith         SETERRQ1(PETSC_ERR_SUP,"Cannot handle a dof of %D. Send request for code to petsc-maint@mcs.anl.gov\n",dof);
237482b1193eSBarry Smith       }
23757ba1a0bfSKris Buschelman       B->ops->ptapsymbolic_seqaij = MatPtAPSymbolic_SeqAIJ_SeqMAIJ;
23767ba1a0bfSKris Buschelman       B->ops->ptapnumeric_seqaij  = MatPtAPNumeric_SeqAIJ_SeqMAIJ;
23779c4fc4c7SBarry Smith       ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqmaij_seqaij_C","MatConvert_SeqMAIJ_SeqAIJ",MatConvert_SeqMAIJ_SeqAIJ);CHKERRQ(ierr);
2378f4a53059SBarry Smith     } else {
2379f4a53059SBarry Smith       Mat_MPIAIJ *mpiaij = (Mat_MPIAIJ *)A->data;
2380f4a53059SBarry Smith       IS         from,to;
2381f4a53059SBarry Smith       Vec        gvec;
2382b24ad042SBarry Smith       PetscInt   *garray,i;
2383f4a53059SBarry Smith 
2384b9b97703SBarry Smith       ierr = MatSetType(B,MATMPIMAIJ);CHKERRQ(ierr);
2385d72c5c08SBarry Smith       B->ops->destroy = MatDestroy_MPIMAIJ;
23860fd73130SBarry Smith       B->ops->view    = MatView_MPIMAIJ;
2387b9b97703SBarry Smith       b      = (Mat_MPIMAIJ*)B->data;
2388b9b97703SBarry Smith       b->dof = dof;
2389b9b97703SBarry Smith       b->A   = A;
23904cb9d9b8SBarry Smith       ierr = MatCreateMAIJ(mpiaij->A,dof,&b->AIJ);CHKERRQ(ierr);
23914cb9d9b8SBarry Smith       ierr = MatCreateMAIJ(mpiaij->B,dof,&b->OAIJ);CHKERRQ(ierr);
23924cb9d9b8SBarry Smith 
2393f4a53059SBarry Smith       ierr = VecGetSize(mpiaij->lvec,&n);CHKERRQ(ierr);
2394f4a53059SBarry Smith       ierr = VecCreateSeq(PETSC_COMM_SELF,n*dof,&b->w);CHKERRQ(ierr);
239513288a74SBarry Smith       ierr = VecSetBlockSize(b->w,dof);CHKERRQ(ierr);
2396f4a53059SBarry Smith 
2397f4a53059SBarry Smith       /* create two temporary Index sets for build scatter gather */
2398b24ad042SBarry Smith       ierr = PetscMalloc((n+1)*sizeof(PetscInt),&garray);CHKERRQ(ierr);
2399f4a53059SBarry Smith       for (i=0; i<n; i++) garray[i] = dof*mpiaij->garray[i];
2400f4a53059SBarry Smith       ierr = ISCreateBlock(A->comm,dof,n,garray,&from);CHKERRQ(ierr);
2401f4a53059SBarry Smith       ierr = PetscFree(garray);CHKERRQ(ierr);
2402f4a53059SBarry Smith       ierr = ISCreateStride(PETSC_COMM_SELF,n*dof,0,1,&to);CHKERRQ(ierr);
2403f4a53059SBarry Smith 
2404f4a53059SBarry Smith       /* create temporary global vector to generate scatter context */
2405f4a53059SBarry Smith       ierr = VecCreateMPI(A->comm,dof*A->n,dof*A->N,&gvec);CHKERRQ(ierr);
240613288a74SBarry Smith       ierr = VecSetBlockSize(gvec,dof);CHKERRQ(ierr);
2407f4a53059SBarry Smith 
2408f4a53059SBarry Smith       /* generate the scatter context */
2409f4a53059SBarry Smith       ierr = VecScatterCreate(gvec,from,b->w,to,&b->ctx);CHKERRQ(ierr);
2410f4a53059SBarry Smith 
2411f4a53059SBarry Smith       ierr = ISDestroy(from);CHKERRQ(ierr);
2412f4a53059SBarry Smith       ierr = ISDestroy(to);CHKERRQ(ierr);
2413f4a53059SBarry Smith       ierr = VecDestroy(gvec);CHKERRQ(ierr);
2414f4a53059SBarry Smith 
2415f4a53059SBarry Smith       B->ops->mult             = MatMult_MPIMAIJ_dof;
24164cb9d9b8SBarry Smith       B->ops->multtranspose    = MatMultTranspose_MPIMAIJ_dof;
24174cb9d9b8SBarry Smith       B->ops->multadd          = MatMultAdd_MPIMAIJ_dof;
24184cb9d9b8SBarry Smith       B->ops->multtransposeadd = MatMultTransposeAdd_MPIMAIJ_dof;
24190fd73130SBarry Smith       ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpimaij_mpiaij_C","MatConvert_MPIMAIJ_MPIAIJ",MatConvert_MPIMAIJ_MPIAIJ);CHKERRQ(ierr);
2420f4a53059SBarry Smith     }
2421cd3bbe55SBarry Smith     *maij = B;
24220fd73130SBarry Smith     ierr = MatView_Private(B);CHKERRQ(ierr);
2423d72c5c08SBarry Smith   }
242482b1193eSBarry Smith   PetscFunctionReturn(0);
242582b1193eSBarry Smith }
242682b1193eSBarry Smith 
242782b1193eSBarry Smith 
242882b1193eSBarry Smith 
242982b1193eSBarry Smith 
243082b1193eSBarry Smith 
243182b1193eSBarry Smith 
243282b1193eSBarry Smith 
243382b1193eSBarry Smith 
243482b1193eSBarry Smith 
243582b1193eSBarry Smith 
243682b1193eSBarry Smith 
243782b1193eSBarry Smith 
2438