xref: /petsc/src/mat/impls/aij/seq/matmatmult.c (revision bc011b1ea0f1d686ef542cd9b8d1c77812488e84)
1d50806bdSBarry Smith /*
2a50f8bf6SHong Zhang   Defines matrix-matrix product routines for pairs of AIJ matrices
3d50806bdSBarry Smith           C = A * B
4d50806bdSBarry Smith */
5d50806bdSBarry Smith 
6c1f4806aSKris Buschelman #include "src/mat/impls/aij/seq/aij.h" /*I "petscmat.h" I*/
770f19b1fSKris Buschelman #include "src/mat/utils/freespace.h"
82d09714cSHong Zhang #include "src/mat/impls/aij/mpi/mpiaij.h"
9d50806bdSBarry Smith 
10c1f4806aSKris Buschelman #undef __FUNCT__
11c1f4806aSKris Buschelman #define __FUNCT__ "MatMatMult"
125c66b693SKris Buschelman /*@
135c66b693SKris Buschelman    MatMatMult - Performs Matrix-Matrix Multiplication C=A*B.
1494e3eecaSKris Buschelman 
155c66b693SKris Buschelman    Collective on Mat
16d50806bdSBarry Smith 
175c66b693SKris Buschelman    Input Parameters:
185c66b693SKris Buschelman +  A - the left matrix
191c741599SHong Zhang .  B - the right matrix
201c741599SHong Zhang .  scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
21c5db241fSHong Zhang -  fill - expected fill as ratio of nnz(C)/(nnz(A) + nnz(B))
225c66b693SKris Buschelman 
235c66b693SKris Buschelman    Output Parameters:
245c66b693SKris Buschelman .  C - the product matrix
255c66b693SKris Buschelman 
265c66b693SKris Buschelman    Notes:
275c66b693SKris Buschelman    C will be created and must be destroyed by the user with MatDestroy().
285c66b693SKris Buschelman 
29*bc011b1eSHong Zhang    This routine is currently only implemented for pairs of AIJ matrices and classes
30*bc011b1eSHong Zhang    which inherit from AIJ.  C will be of type MATAIJ.
315c66b693SKris Buschelman 
325c66b693SKris Buschelman    Level: intermediate
335c66b693SKris Buschelman 
345c66b693SKris Buschelman .seealso: MatMatMultSymbolic(),MatMatMultNumeric()
355c66b693SKris Buschelman @*/
36dfbe8321SBarry Smith PetscErrorCode MatMatMult(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C)
372d09714cSHong Zhang {
38dfbe8321SBarry Smith   PetscErrorCode ierr;
39cb00f407SKris Buschelman   PetscErrorCode (*fA)(Mat,Mat,MatReuse,PetscReal,Mat*);
40cb00f407SKris Buschelman   PetscErrorCode (*fB)(Mat,Mat,MatReuse,PetscReal,Mat*);
412d09714cSHong Zhang 
42d50806bdSBarry Smith   PetscFunctionBegin;
434482741eSBarry Smith   PetscValidHeaderSpecific(A,MAT_COOKIE,1);
44c9780b6fSBarry Smith   PetscValidType(A,1);
455c66b693SKris Buschelman   MatPreallocated(A);
465c66b693SKris Buschelman   if (!A->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix");
475c66b693SKris Buschelman   if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
484482741eSBarry Smith   PetscValidHeaderSpecific(B,MAT_COOKIE,2);
49c9780b6fSBarry Smith   PetscValidType(B,2);
505c66b693SKris Buschelman   MatPreallocated(B);
515c66b693SKris Buschelman   if (!B->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix");
525c66b693SKris Buschelman   if (B->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
534482741eSBarry Smith   PetscValidPointer(C,3);
545c66b693SKris Buschelman   if (B->M!=A->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",B->M,A->N);
55d50806bdSBarry Smith 
561c24bd37SHong Zhang   if (fill <=0.0) SETERRQ1(PETSC_ERR_ARG_SIZ,"fill=%g must be > 0.0",fill);
57c5db241fSHong Zhang 
58cb00f407SKris Buschelman   /* For now, we do not dispatch based on the type of A and B */
59cb00f407SKris Buschelman   /* When implementations like _SeqAIJ_MAIJ exist, attack the multiple dispatch problem. */
60cb00f407SKris Buschelman   fA = A->ops->matmult;
61cb00f407SKris Buschelman   if (!fA) SETERRQ1(PETSC_ERR_SUP,"MatMatMult not supported for A of type %s",A->type_name);
62cb00f407SKris Buschelman   fB = B->ops->matmult;
63cb00f407SKris Buschelman   if (!fB) SETERRQ1(PETSC_ERR_SUP,"MatMatMult not supported for B of type %s",B->type_name);
64cb00f407SKris Buschelman   if (fB!=fA) SETERRQ2(PETSC_ERR_ARG_INCOMP,"MatMatMult requires A, %s, to be compatible with B, %s",A->type_name,B->type_name);
65cb00f407SKris Buschelman 
666284ec50SHong Zhang   ierr = PetscLogEventBegin(MAT_MatMult,A,B,0,0);CHKERRQ(ierr);
671c741599SHong Zhang   ierr = (*A->ops->matmult)(A,B,scall,fill,C);CHKERRQ(ierr);
686284ec50SHong Zhang   ierr = PetscLogEventEnd(MAT_MatMult,A,B,0,0);CHKERRQ(ierr);
694d3841fdSKris Buschelman 
706284ec50SHong Zhang   PetscFunctionReturn(0);
716284ec50SHong Zhang }
726284ec50SHong Zhang 
736284ec50SHong Zhang #undef __FUNCT__
746284ec50SHong Zhang #define __FUNCT__ "MatMatMult_MPIAIJ_MPIAIJ"
75dfbe8321SBarry Smith PetscErrorCode MatMatMult_MPIAIJ_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill, Mat *C)
762d09714cSHong Zhang {
77dfbe8321SBarry Smith   PetscErrorCode ierr;
786284ec50SHong Zhang 
796284ec50SHong Zhang   PetscFunctionBegin;
8026be0446SHong Zhang   if (scall == MAT_INITIAL_MATRIX){
81d6bb3c2dSHong Zhang     ierr = MatMatMultSymbolic_MPIAIJ_MPIAIJ(A,B,fill,C);CHKERRQ(ierr);/* numeric product is computed as well */
82d6bb3c2dSHong Zhang   } else if (scall == MAT_REUSE_MATRIX){
8326be0446SHong Zhang     ierr = MatMatMultNumeric_MPIAIJ_MPIAIJ(A,B,*C);CHKERRQ(ierr);
84d6bb3c2dSHong Zhang   } else {
85d6bb3c2dSHong Zhang     SETERRQ1(PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",scall);
86d6bb3c2dSHong Zhang   }
87d50806bdSBarry Smith   PetscFunctionReturn(0);
88d50806bdSBarry Smith }
895c66b693SKris Buschelman 
905c66b693SKris Buschelman #undef __FUNCT__
915c66b693SKris Buschelman #define __FUNCT__ "MatMatMult_SeqAIJ_SeqAIJ"
92dfbe8321SBarry Smith PetscErrorCode MatMatMult_SeqAIJ_SeqAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C) {
93dfbe8321SBarry Smith   PetscErrorCode ierr;
945c66b693SKris Buschelman 
955c66b693SKris Buschelman   PetscFunctionBegin;
9626be0446SHong Zhang   if (scall == MAT_INITIAL_MATRIX){
9726be0446SHong Zhang     ierr = MatMatMultSymbolic_SeqAIJ_SeqAIJ(A,B,fill,C);CHKERRQ(ierr);
9826be0446SHong Zhang   }
9926be0446SHong Zhang   ierr = MatMatMultNumeric_SeqAIJ_SeqAIJ(A,B,*C);CHKERRQ(ierr);
1005c66b693SKris Buschelman   PetscFunctionReturn(0);
1015c66b693SKris Buschelman }
1025c66b693SKris Buschelman 
103c1f4806aSKris Buschelman #undef __FUNCT__
104c1f4806aSKris Buschelman #define __FUNCT__ "MatMatMultSymbolic"
1055c66b693SKris Buschelman /*@
1065c66b693SKris Buschelman    MatMatMultSymbolic - Performs construction, preallocation, and computes the ij structure
1075c66b693SKris Buschelman    of the matrix-matrix product C=A*B.  Call this routine before calling MatMatMultNumeric().
1085c66b693SKris Buschelman 
1095c66b693SKris Buschelman    Collective on Mat
1105c66b693SKris Buschelman 
1115c66b693SKris Buschelman    Input Parameters:
1125c66b693SKris Buschelman +  A - the left matrix
113c5db241fSHong Zhang .  B - the right matrix
114c5db241fSHong Zhang -  fill - expected fill as ratio of nnz(C)/(nnz(A) + nnz(B))
1155c66b693SKris Buschelman 
1165c66b693SKris Buschelman    Output Parameters:
1175c66b693SKris Buschelman .  C - the matrix containing the ij structure of product matrix
1185c66b693SKris Buschelman 
1195c66b693SKris Buschelman    Notes:
1204d3841fdSKris Buschelman    C will be created as a MATSEQAIJ matrix and must be destroyed by the user with MatDestroy().
1215c66b693SKris Buschelman 
1224d3841fdSKris Buschelman    This routine is currently only implemented for SeqAIJ matrices and classes which inherit from SeqAIJ.
1235c66b693SKris Buschelman 
1245c66b693SKris Buschelman    Level: intermediate
1255c66b693SKris Buschelman 
1265c66b693SKris Buschelman .seealso: MatMatMult(),MatMatMultNumeric()
1275c66b693SKris Buschelman @*/
128dfbe8321SBarry Smith PetscErrorCode MatMatMultSymbolic(Mat A,Mat B,PetscReal fill,Mat *C) {
129dfbe8321SBarry Smith   PetscErrorCode ierr;
130cb00f407SKris Buschelman   PetscErrorCode (*Asymbolic)(Mat,Mat,PetscReal,Mat *);
131cb00f407SKris Buschelman   PetscErrorCode (*Bsymbolic)(Mat,Mat,PetscReal,Mat *);
1325c66b693SKris Buschelman 
1335c66b693SKris Buschelman   PetscFunctionBegin;
1344482741eSBarry Smith   PetscValidHeaderSpecific(A,MAT_COOKIE,1);
135c9780b6fSBarry Smith   PetscValidType(A,1);
1365c66b693SKris Buschelman   MatPreallocated(A);
1375c66b693SKris Buschelman   if (!A->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix");
1385c66b693SKris Buschelman   if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
1395c66b693SKris Buschelman 
1404482741eSBarry Smith   PetscValidHeaderSpecific(B,MAT_COOKIE,2);
141c9780b6fSBarry Smith   PetscValidType(B,2);
1425c66b693SKris Buschelman   MatPreallocated(B);
1435c66b693SKris Buschelman   if (!B->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix");
1445c66b693SKris Buschelman   if (B->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
1454482741eSBarry Smith   PetscValidPointer(C,3);
1464482741eSBarry Smith 
1475c66b693SKris Buschelman   if (B->M!=A->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",B->M,A->N);
1481c24bd37SHong Zhang   if (fill <=0.0) SETERRQ1(PETSC_ERR_ARG_SIZ,"fill=%g must be > 0.0",fill);
1495c66b693SKris Buschelman 
150cb00f407SKris Buschelman   /* For now, we do not dispatch based on the type of A and P */
151cb00f407SKris Buschelman   /* When implementations like _SeqAIJ_MAIJ exist, attack the multiple dispatch problem. */
152cb00f407SKris Buschelman   Asymbolic = A->ops->matmultsymbolic;
153cb00f407SKris Buschelman   if (!Asymbolic) SETERRQ1(PETSC_ERR_SUP,"C=A*B not implemented for A of type %s",A->type_name);
154cb00f407SKris Buschelman   Bsymbolic = B->ops->matmultsymbolic;
155cb00f407SKris Buschelman   if (!Bsymbolic) SETERRQ1(PETSC_ERR_SUP,"C=A*B not implemented for B of type %s",B->type_name);
156cb00f407SKris Buschelman   if (Bsymbolic!=Asymbolic) SETERRQ2(PETSC_ERR_ARG_INCOMP,"MatMatMultSymbolic requires A, %s, to be compatible with B, %s",A->type_name,B->type_name);
157cb00f407SKris Buschelman 
158cb00f407SKris Buschelman   ierr = PetscLogEventBegin(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr);
159cb00f407SKris Buschelman   ierr = (*Asymbolic)(A,B,fill,C);CHKERRQ(ierr);
160cb00f407SKris Buschelman   ierr = PetscLogEventEnd(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr);
1615c66b693SKris Buschelman 
1625c66b693SKris Buschelman   PetscFunctionReturn(0);
1635c66b693SKris Buschelman }
1641c24bd37SHong Zhang 
165dfbe8321SBarry Smith EXTERN PetscErrorCode MatDestroy_MPIAIJ(Mat);
16626be0446SHong Zhang #undef __FUNCT__
16726be0446SHong Zhang #define __FUNCT__ "MatDestroy_MPIAIJ_MatMatMult"
168dfbe8321SBarry Smith PetscErrorCode MatDestroy_MPIAIJ_MatMatMult(Mat A)
16926be0446SHong Zhang {
170dfbe8321SBarry Smith   PetscErrorCode ierr;
17126be0446SHong Zhang   Mat_MatMatMultMPI *mult=(Mat_MatMatMultMPI*)A->spptr;
17226be0446SHong Zhang 
17326be0446SHong Zhang   PetscFunctionBegin;
174d6bb3c2dSHong Zhang   ierr = ISDestroy(mult->isrowb);CHKERRQ(ierr);
175d6bb3c2dSHong Zhang   ierr = ISDestroy(mult->iscolb);CHKERRQ(ierr);
176d6bb3c2dSHong Zhang   ierr = ISDestroy(mult->isrowa);CHKERRQ(ierr);
177d6bb3c2dSHong Zhang   ierr = MatDestroyMatrices(1,&mult->aseq);CHKERRQ(ierr);
178d6bb3c2dSHong Zhang   ierr = MatDestroyMatrices(1,&mult->bseq);CHKERRQ(ierr);
179d6bb3c2dSHong Zhang   ierr = MatDestroy(mult->C_seq);CHKERRQ(ierr);
18026be0446SHong Zhang   ierr = PetscFree(mult);CHKERRQ(ierr);
181d6bb3c2dSHong Zhang 
18226be0446SHong Zhang   ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr);
18326be0446SHong Zhang 
18426be0446SHong Zhang   PetscFunctionReturn(0);
18526be0446SHong Zhang }
18658c24d83SHong Zhang 
18758c24d83SHong Zhang #undef __FUNCT__
18826be0446SHong Zhang #define __FUNCT__ "MatMatMultSymbolic_MPIAIJ_MPIAIJ"
189dfbe8321SBarry Smith PetscErrorCode MatMatMultSymbolic_MPIAIJ_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat *C)
19026be0446SHong Zhang {
191ff134f7aSHong Zhang   Mat_MPIAIJ        *a=(Mat_MPIAIJ*)A->data,*b=(Mat_MPIAIJ*)B->data;
192dfbe8321SBarry Smith   PetscErrorCode    ierr;
193ff134f7aSHong Zhang   int               *idx,i,start,end,ncols,nzA,nzB,*cmap;
19426be0446SHong Zhang   Mat_MatMatMultMPI *mult;
19526be0446SHong Zhang 
19626be0446SHong Zhang   PetscFunctionBegin;
197ff134f7aSHong Zhang   if (a->cstart != b->rstart || a->cend != b->rend){
198ff134f7aSHong Zhang     SETERRQ4(PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",a->cstart,a->cend,b->rstart,b->rend);
199ff134f7aSHong Zhang   }
200d6bb3c2dSHong Zhang   ierr = PetscNew(Mat_MatMatMultMPI,&mult);CHKERRQ(ierr);
201d6bb3c2dSHong Zhang 
20226be0446SHong Zhang   /* create a seq matrix B_seq = submatrix of B by taking rows of B that equal to nonzero col of A */
20326be0446SHong Zhang   start = a->cstart;
20426be0446SHong Zhang   cmap  = a->garray;
20526be0446SHong Zhang   nzA   = a->A->n;
20626be0446SHong Zhang   nzB   = a->B->n;
20726be0446SHong Zhang   ierr  = PetscMalloc((nzA+nzB)*sizeof(int), &idx);CHKERRQ(ierr);
20826be0446SHong Zhang   ncols = 0;
20926be0446SHong Zhang   for (i=0; i<nzB; i++) {
21026be0446SHong Zhang     if (cmap[i] < start) idx[ncols++] = cmap[i];
21126be0446SHong Zhang     else break;
21226be0446SHong Zhang   }
213ff134f7aSHong Zhang   mult->brstart = i;
21426be0446SHong Zhang   for (i=0; i<nzA; i++) idx[ncols++] = start + i;
215ff134f7aSHong Zhang   for (i=mult->brstart; i<nzB; i++) idx[ncols++] = cmap[i];
216d6bb3c2dSHong Zhang   ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,&mult->isrowb);CHKERRQ(ierr);
21726be0446SHong Zhang   ierr = PetscFree(idx);CHKERRQ(ierr);
218d6bb3c2dSHong Zhang   ierr = ISCreateStride(PETSC_COMM_SELF,B->N,0,1,&mult->iscolb);CHKERRQ(ierr);
219ff134f7aSHong Zhang   ierr = MatGetSubMatrices(B,1,&mult->isrowb,&mult->iscolb,MAT_INITIAL_MATRIX,&mult->bseq);CHKERRQ(ierr);
22026be0446SHong Zhang 
22126be0446SHong Zhang   /*  create a seq matrix A_seq = submatrix of A by taking all local rows of A */
22226be0446SHong Zhang   start = a->rstart; end = a->rend;
223d6bb3c2dSHong Zhang   ierr = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&mult->isrowa);CHKERRQ(ierr);
224d6bb3c2dSHong Zhang   ierr = MatGetSubMatrices(A,1,&mult->isrowa,&mult->isrowb,MAT_INITIAL_MATRIX,&mult->aseq);CHKERRQ(ierr);
22526be0446SHong Zhang 
22626be0446SHong Zhang   /* compute C_seq = A_seq * B_seq */
227d6bb3c2dSHong Zhang   ierr = MatMatMult_SeqAIJ_SeqAIJ(mult->aseq[0],mult->bseq[0],MAT_INITIAL_MATRIX,fill,&mult->C_seq);CHKERRQ(ierr);
22826be0446SHong Zhang 
22926be0446SHong Zhang   /* create mpi matrix C by concatinating C_seq */
230d6bb3c2dSHong Zhang   ierr = PetscObjectReference((PetscObject)mult->C_seq);CHKERRQ(ierr); /* prevent C_seq being destroyed by MatMerge() */
231d6bb3c2dSHong Zhang   ierr = MatMerge(A->comm,mult->C_seq,MAT_INITIAL_MATRIX,C);CHKERRQ(ierr);
23226be0446SHong Zhang 
23326be0446SHong Zhang   /* attach the supporting struct to C for reuse of symbolic C */
23426be0446SHong Zhang   (*C)->spptr         = (void*)mult;
23526be0446SHong Zhang   (*C)->ops->destroy  = MatDestroy_MPIAIJ_MatMatMult;
23626be0446SHong Zhang 
23726be0446SHong Zhang   PetscFunctionReturn(0);
23826be0446SHong Zhang }
23926be0446SHong Zhang 
24026be0446SHong Zhang #undef __FUNCT__
24126be0446SHong Zhang #define __FUNCT__ "MatMatMultSymbolic_SeqAIJ_SeqAIJ"
242dfbe8321SBarry Smith PetscErrorCode MatMatMultSymbolic_SeqAIJ_SeqAIJ(Mat A,Mat B,PetscReal fill,Mat *C)
24358c24d83SHong Zhang {
244dfbe8321SBarry Smith   PetscErrorCode ierr;
24558c24d83SHong Zhang   FreeSpaceList  free_space=PETSC_NULL,current_space=PETSC_NULL;
24658c24d83SHong Zhang   Mat_SeqAIJ     *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data,*c;
24758c24d83SHong Zhang   int            *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*bjj;
2481c24bd37SHong Zhang   int            *ci,*cj,*lnk;
2495c66b693SKris Buschelman   int            am=A->M,bn=B->N,bm=B->M;
2500e76890bSHong Zhang   int            i,j,anzi,brow,bnzj,cnzi,nlnk,nspacedouble=0;
25158c24d83SHong Zhang   MatScalar      *ca;
25258c24d83SHong Zhang 
25358c24d83SHong Zhang   PetscFunctionBegin;
25458c24d83SHong Zhang   /* Set up */
25558c24d83SHong Zhang   /* Allocate ci array, arrays for fill computation and */
25658c24d83SHong Zhang   /* free space for accumulating nonzero column info */
25758c24d83SHong Zhang   ierr = PetscMalloc(((am+1)+1)*sizeof(int),&ci);CHKERRQ(ierr);
25858c24d83SHong Zhang   ci[0] = 0;
25958c24d83SHong Zhang 
2600e76890bSHong Zhang   /* create and initialize a linked list for symbolic product */
26158c24d83SHong Zhang   ierr = PetscMalloc((bn+1)*sizeof(int),&lnk);CHKERRQ(ierr);
2620e76890bSHong Zhang   ierr = PetscLLInitialize(bn,bn);CHKERRQ(ierr);
26358c24d83SHong Zhang 
264c5db241fSHong Zhang   /* Initial FreeSpace size is fill*(nnz(A)+nnz(B)) */
265d6bb3c2dSHong Zhang   ierr = GetMoreSpace((int)(fill*(ai[am]+bi[bm])),&free_space);CHKERRQ(ierr);
26658c24d83SHong Zhang   current_space = free_space;
26758c24d83SHong Zhang 
26858c24d83SHong Zhang   /* Determine symbolic info for each row of the product: */
26958c24d83SHong Zhang   for (i=0;i<am;i++) {
27058c24d83SHong Zhang     anzi = ai[i+1] - ai[i];
27158c24d83SHong Zhang     cnzi = 0;
2722d09714cSHong Zhang     j    = anzi;
2732d09714cSHong Zhang     aj   = a->j + ai[i];
2742d09714cSHong Zhang     while (j){/* assume cols are almost in increasing order, starting from its end saves computation */
2752d09714cSHong Zhang       j--;
2762d09714cSHong Zhang       brow = *(aj + j);
27758c24d83SHong Zhang       bnzj = bi[brow+1] - bi[brow];
27858c24d83SHong Zhang       bjj  = bj + bi[brow];
2791c239cc6SHong Zhang       /* add non-zero cols of B into the sorted linked list lnk */
2800e76890bSHong Zhang       ierr = PetscLLAdd(bnzj,bjj,bn,nlnk,lnk);CHKERRQ(ierr);
2811c239cc6SHong Zhang       cnzi += nlnk;
28258c24d83SHong Zhang     }
28358c24d83SHong Zhang 
28458c24d83SHong Zhang     /* If free space is not available, make more free space */
28558c24d83SHong Zhang     /* Double the amount of total space in the list */
28658c24d83SHong Zhang     if (current_space->local_remaining<cnzi) {
28758c24d83SHong Zhang       ierr = GetMoreSpace(current_space->total_array_size,&current_space);CHKERRQ(ierr);
288c5db241fSHong Zhang       nspacedouble++;
28958c24d83SHong Zhang     }
29058c24d83SHong Zhang 
291c5db241fSHong Zhang     /* Copy data into free space, then initialize lnk */
2920e76890bSHong Zhang     ierr = PetscLLClean(bn,bn,cnzi,lnk,current_space->array);CHKERRQ(ierr);
293c5db241fSHong Zhang     current_space->array           += cnzi;
29458c24d83SHong Zhang     current_space->local_used      += cnzi;
29558c24d83SHong Zhang     current_space->local_remaining -= cnzi;
29658c24d83SHong Zhang 
29758c24d83SHong Zhang     ci[i+1] = ci[i] + cnzi;
29858c24d83SHong Zhang   }
29958c24d83SHong Zhang 
30058c24d83SHong Zhang   /* Column indices are in the list of free space */
30158c24d83SHong Zhang   /* Allocate space for cj, initialize cj, and */
30258c24d83SHong Zhang   /* destroy list of free space and other temporary array(s) */
30358c24d83SHong Zhang   ierr = PetscMalloc((ci[am]+1)*sizeof(int),&cj);CHKERRQ(ierr);
30458c24d83SHong Zhang   ierr = MakeSpaceContiguous(&free_space,cj);CHKERRQ(ierr);
30558c24d83SHong Zhang   ierr = PetscFree(lnk);CHKERRQ(ierr);
30658c24d83SHong Zhang 
30758c24d83SHong Zhang   /* Allocate space for ca */
30858c24d83SHong Zhang   ierr = PetscMalloc((ci[am]+1)*sizeof(MatScalar),&ca);CHKERRQ(ierr);
30958c24d83SHong Zhang   ierr = PetscMemzero(ca,(ci[am]+1)*sizeof(MatScalar));CHKERRQ(ierr);
31058c24d83SHong Zhang 
31126be0446SHong Zhang   /* put together the new symbolic matrix */
31258c24d83SHong Zhang   ierr = MatCreateSeqAIJWithArrays(A->comm,am,bn,ci,cj,ca,C);CHKERRQ(ierr);
31358c24d83SHong Zhang 
31458c24d83SHong Zhang   /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
31558c24d83SHong Zhang   /* These are PETSc arrays, so change flags so arrays can be deleted by PETSc */
31658c24d83SHong Zhang   c = (Mat_SeqAIJ *)((*C)->data);
31758c24d83SHong Zhang   c->freedata = PETSC_TRUE;
31858c24d83SHong Zhang   c->nonew    = 0;
31958c24d83SHong Zhang 
320c5db241fSHong Zhang   PetscLogInfo((PetscObject)(*C),"Number of calls to GetMoreSpace(): %d\n",nspacedouble);
32158c24d83SHong Zhang   PetscFunctionReturn(0);
32258c24d83SHong Zhang }
323d50806bdSBarry Smith 
324c1f4806aSKris Buschelman #undef __FUNCT__
325c1f4806aSKris Buschelman #define __FUNCT__ "MatMatMultNumeric"
3265c66b693SKris Buschelman /*@
3275c66b693SKris Buschelman    MatMatMultNumeric - Performs the numeric matrix-matrix product.
3285c66b693SKris Buschelman    Call this routine after first calling MatMatMultSymbolic().
3295c66b693SKris Buschelman 
3305c66b693SKris Buschelman    Collective on Mat
3315c66b693SKris Buschelman 
3325c66b693SKris Buschelman    Input Parameters:
3335c66b693SKris Buschelman +  A - the left matrix
3345c66b693SKris Buschelman -  B - the right matrix
3355c66b693SKris Buschelman 
3365c66b693SKris Buschelman    Output Parameters:
3375c66b693SKris Buschelman .  C - the product matrix, whose ij structure was defined from MatMatMultSymbolic().
3385c66b693SKris Buschelman 
3395c66b693SKris Buschelman    Notes:
3405c66b693SKris Buschelman    C must have been created with MatMatMultSymbolic.
3415c66b693SKris Buschelman 
3425c66b693SKris Buschelman    This routine is currently only implemented for SeqAIJ type matrices.
3435c66b693SKris Buschelman 
3445c66b693SKris Buschelman    Level: intermediate
3455c66b693SKris Buschelman 
3465c66b693SKris Buschelman .seealso: MatMatMult(),MatMatMultSymbolic()
3475c66b693SKris Buschelman @*/
348dfbe8321SBarry Smith PetscErrorCode MatMatMultNumeric(Mat A,Mat B,Mat C){
349dfbe8321SBarry Smith   PetscErrorCode ierr;
350cb00f407SKris Buschelman   PetscErrorCode (*Anumeric)(Mat,Mat,Mat);
351cb00f407SKris Buschelman   PetscErrorCode (*Bnumeric)(Mat,Mat,Mat);
3525c66b693SKris Buschelman 
3535c66b693SKris Buschelman   PetscFunctionBegin;
3545c66b693SKris Buschelman 
3554482741eSBarry Smith   PetscValidHeaderSpecific(A,MAT_COOKIE,1);
356c9780b6fSBarry Smith   PetscValidType(A,1);
3575c66b693SKris Buschelman   MatPreallocated(A);
3585c66b693SKris Buschelman   if (!A->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix");
3595c66b693SKris Buschelman   if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
3605c66b693SKris Buschelman 
3614482741eSBarry Smith   PetscValidHeaderSpecific(B,MAT_COOKIE,2);
362c9780b6fSBarry Smith   PetscValidType(B,2);
3635c66b693SKris Buschelman   MatPreallocated(B);
3645c66b693SKris Buschelman   if (!B->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix");
3655c66b693SKris Buschelman   if (B->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
3665c66b693SKris Buschelman 
3674482741eSBarry Smith   PetscValidHeaderSpecific(C,MAT_COOKIE,3);
368c9780b6fSBarry Smith   PetscValidType(C,3);
3695c66b693SKris Buschelman   MatPreallocated(C);
3705c66b693SKris Buschelman   if (!C->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix");
3715c66b693SKris Buschelman   if (C->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
3725c66b693SKris Buschelman 
3735c66b693SKris Buschelman   if (B->N!=C->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",B->N,C->N);
3745c66b693SKris Buschelman   if (B->M!=A->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",B->M,A->N);
3755c66b693SKris Buschelman   if (A->M!=C->M) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",A->M,C->M);
3765c66b693SKris Buschelman 
377cb00f407SKris Buschelman   /* For now, we do not dispatch based on the type of A and B */
378cb00f407SKris Buschelman   /* When implementations like _SeqAIJ_MAIJ exist, attack the multiple dispatch problem. */
379cb00f407SKris Buschelman   Anumeric = A->ops->matmultnumeric;
380cb00f407SKris Buschelman   if (!Anumeric) SETERRQ1(PETSC_ERR_SUP,"MatMatMultNumeric not supported for A of type %s",A->type_name);
381cb00f407SKris Buschelman   Bnumeric = B->ops->matmultnumeric;
382cb00f407SKris Buschelman   if (!Bnumeric) SETERRQ1(PETSC_ERR_SUP,"MatMatMultNumeric not supported for B of type %s",B->type_name);
383cb00f407SKris Buschelman   if (Bnumeric!=Anumeric) SETERRQ2(PETSC_ERR_ARG_INCOMP,"MatMatMultNumeric requires A, %s, to be compatible with B, %s",A->type_name,B->type_name);
3844d3841fdSKris Buschelman 
385cb00f407SKris Buschelman   ierr = PetscLogEventBegin(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr);
386cb00f407SKris Buschelman   ierr = (*Anumeric)(A,B,C);CHKERRQ(ierr);
387cb00f407SKris Buschelman   ierr = PetscLogEventEnd(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr);
3885c66b693SKris Buschelman 
3895c66b693SKris Buschelman   PetscFunctionReturn(0);
3905c66b693SKris Buschelman }
3915c66b693SKris Buschelman 
392d6bb3c2dSHong Zhang /* This routine is called ONLY in the case of reusing previously computed symbolic C */
393d50806bdSBarry Smith #undef __FUNCT__
39426be0446SHong Zhang #define __FUNCT__ "MatMatMultNumeric_MPIAIJ_MPIAIJ"
395dfbe8321SBarry Smith PetscErrorCode MatMatMultNumeric_MPIAIJ_MPIAIJ(Mat A,Mat B,Mat C)
39626be0446SHong Zhang {
397dfbe8321SBarry Smith   PetscErrorCode ierr;
398d6bb3c2dSHong Zhang   Mat_MatMatMultMPI *mult=(Mat_MatMatMultMPI*)C->spptr;
399d6bb3c2dSHong Zhang 
40026be0446SHong Zhang   PetscFunctionBegin;
401d6bb3c2dSHong Zhang   ierr = MatGetSubMatrices(B,1,&mult->isrowb,&mult->iscolb,MAT_REUSE_MATRIX,&mult->bseq);CHKERRQ(ierr)
402d6bb3c2dSHong Zhang   ierr = MatGetSubMatrices(A,1,&mult->isrowa,&mult->isrowb,MAT_REUSE_MATRIX,&mult->aseq);CHKERRQ(ierr);
403d6bb3c2dSHong Zhang   ierr = MatMatMult_SeqAIJ_SeqAIJ(mult->aseq[0],mult->bseq[0],MAT_REUSE_MATRIX,0.0,&mult->C_seq);CHKERRQ(ierr);
404d6bb3c2dSHong Zhang 
405d6bb3c2dSHong Zhang   ierr = PetscObjectReference((PetscObject)mult->C_seq);CHKERRQ(ierr);
406d6bb3c2dSHong Zhang   ierr = MatMerge(A->comm,mult->C_seq,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr);
407d6bb3c2dSHong Zhang 
40826be0446SHong Zhang   PetscFunctionReturn(0);
40926be0446SHong Zhang }
41026be0446SHong Zhang 
41126be0446SHong Zhang #undef __FUNCT__
41226be0446SHong Zhang #define __FUNCT__ "MatMatMultNumeric_SeqAIJ_SeqAIJ"
413dfbe8321SBarry Smith PetscErrorCode MatMatMultNumeric_SeqAIJ_SeqAIJ(Mat A,Mat B,Mat C)
414d50806bdSBarry Smith {
415dfbe8321SBarry Smith   PetscErrorCode ierr;
416dfbe8321SBarry Smith   int        flops=0;
417d50806bdSBarry Smith   Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data;
418d50806bdSBarry Smith   Mat_SeqAIJ *b = (Mat_SeqAIJ *)B->data;
419d50806bdSBarry Smith   Mat_SeqAIJ *c = (Mat_SeqAIJ *)C->data;
420d50806bdSBarry Smith   int        *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*bjj,*ci=c->i,*cj=c->j;
4215c66b693SKris Buschelman   int        am=A->M,cn=C->N;
42294e3eecaSKris Buschelman   int        i,j,k,anzi,bnzi,cnzi,brow;
423d50806bdSBarry Smith   MatScalar  *aa=a->a,*ba=b->a,*baj,*ca=c->a,*temp;
424d50806bdSBarry Smith 
425d50806bdSBarry Smith   PetscFunctionBegin;
426d50806bdSBarry Smith 
427d50806bdSBarry Smith   /* Allocate temp accumulation space to avoid searching for nonzero columns in C */
428d50806bdSBarry Smith   ierr = PetscMalloc((cn+1)*sizeof(MatScalar),&temp);CHKERRQ(ierr);
429d50806bdSBarry Smith   ierr = PetscMemzero(temp,cn*sizeof(MatScalar));CHKERRQ(ierr);
430d50806bdSBarry Smith   /* Traverse A row-wise. */
431d50806bdSBarry Smith   /* Build the ith row in C by summing over nonzero columns in A, */
432d50806bdSBarry Smith   /* the rows of B corresponding to nonzeros of A. */
433d50806bdSBarry Smith   for (i=0;i<am;i++) {
434d50806bdSBarry Smith     anzi = ai[i+1] - ai[i];
435d50806bdSBarry Smith     for (j=0;j<anzi;j++) {
436d50806bdSBarry Smith       brow = *aj++;
437d50806bdSBarry Smith       bnzi = bi[brow+1] - bi[brow];
438d50806bdSBarry Smith       bjj  = bj + bi[brow];
439d50806bdSBarry Smith       baj  = ba + bi[brow];
440d50806bdSBarry Smith       for (k=0;k<bnzi;k++) {
441d50806bdSBarry Smith         temp[bjj[k]] += (*aa)*baj[k];
442d50806bdSBarry Smith       }
443d50806bdSBarry Smith       flops += 2*bnzi;
444d50806bdSBarry Smith       aa++;
445d50806bdSBarry Smith     }
446d50806bdSBarry Smith     /* Store row back into C, and re-zero temp */
447d50806bdSBarry Smith     cnzi = ci[i+1] - ci[i];
448d50806bdSBarry Smith     for (j=0;j<cnzi;j++) {
449d50806bdSBarry Smith       ca[j] = temp[cj[j]];
450d50806bdSBarry Smith       temp[cj[j]] = 0.0;
451d50806bdSBarry Smith     }
452d50806bdSBarry Smith     ca += cnzi;
453d50806bdSBarry Smith     cj += cnzi;
454d50806bdSBarry Smith   }
455716bacf3SKris Buschelman   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
456716bacf3SKris Buschelman   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
457716bacf3SKris Buschelman 
458d50806bdSBarry Smith   /* Free temp */
459d50806bdSBarry Smith   ierr = PetscFree(temp);CHKERRQ(ierr);
460d50806bdSBarry Smith   ierr = PetscLogFlops(flops);CHKERRQ(ierr);
461d50806bdSBarry Smith   PetscFunctionReturn(0);
462d50806bdSBarry Smith }
463*bc011b1eSHong Zhang 
464*bc011b1eSHong Zhang #undef __FUNCT__
465*bc011b1eSHong Zhang #define __FUNCT__ "MatMatMultTranspose"
466*bc011b1eSHong Zhang /*@
467*bc011b1eSHong Zhang    MatMatMultTranspose - Performs Matrix-Matrix Multiplication C=A^T*B.
468*bc011b1eSHong Zhang 
469*bc011b1eSHong Zhang    Collective on Mat
470*bc011b1eSHong Zhang 
471*bc011b1eSHong Zhang    Input Parameters:
472*bc011b1eSHong Zhang +  A - the left matrix
473*bc011b1eSHong Zhang .  B - the right matrix
474*bc011b1eSHong Zhang .  scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
475*bc011b1eSHong Zhang -  fill - expected fill as ratio of nnz(C)/(nnz(A) + nnz(B))
476*bc011b1eSHong Zhang 
477*bc011b1eSHong Zhang    Output Parameters:
478*bc011b1eSHong Zhang .  C - the product matrix
479*bc011b1eSHong Zhang 
480*bc011b1eSHong Zhang    Notes:
481*bc011b1eSHong Zhang    C will be created and must be destroyed by the user with MatDestroy().
482*bc011b1eSHong Zhang 
483*bc011b1eSHong Zhang    This routine is currently only implemented for pairs of SeqAIJ matrices and classes
484*bc011b1eSHong Zhang    which inherit from SeqAIJ.  C will be of type MATSEQAIJ.
485*bc011b1eSHong Zhang 
486*bc011b1eSHong Zhang    Level: intermediate
487*bc011b1eSHong Zhang 
488*bc011b1eSHong Zhang .seealso: MatMatMultTransposeSymbolic(),MatMatMultTransposeNumeric()
489*bc011b1eSHong Zhang @*/
490*bc011b1eSHong Zhang PetscErrorCode MatMatMultTranspose(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C)
491*bc011b1eSHong Zhang {
492*bc011b1eSHong Zhang   PetscErrorCode ierr;
493*bc011b1eSHong Zhang   PetscErrorCode (*fA)(Mat,Mat,MatReuse,PetscReal,Mat*);
494*bc011b1eSHong Zhang   PetscErrorCode (*fB)(Mat,Mat,MatReuse,PetscReal,Mat*);
495*bc011b1eSHong Zhang 
496*bc011b1eSHong Zhang   PetscFunctionBegin;
497*bc011b1eSHong Zhang   PetscValidHeaderSpecific(A,MAT_COOKIE,1);
498*bc011b1eSHong Zhang   PetscValidType(A,1);
499*bc011b1eSHong Zhang   MatPreallocated(A);
500*bc011b1eSHong Zhang   if (!A->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix");
501*bc011b1eSHong Zhang   if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
502*bc011b1eSHong Zhang   PetscValidHeaderSpecific(B,MAT_COOKIE,2);
503*bc011b1eSHong Zhang   PetscValidType(B,2);
504*bc011b1eSHong Zhang   MatPreallocated(B);
505*bc011b1eSHong Zhang   if (!B->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix");
506*bc011b1eSHong Zhang   if (B->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
507*bc011b1eSHong Zhang   PetscValidPointer(C,3);
508*bc011b1eSHong Zhang   if (B->M!=A->M) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",B->M,A->M);
509*bc011b1eSHong Zhang 
510*bc011b1eSHong Zhang   if (fill <=0.0) SETERRQ1(PETSC_ERR_ARG_SIZ,"fill=%g must be > 0.0",fill);
511*bc011b1eSHong Zhang 
512*bc011b1eSHong Zhang   fA = A->ops->matmulttranspose;
513*bc011b1eSHong Zhang   if (!fA) SETERRQ1(PETSC_ERR_SUP,"MatMatMultTranspose not supported for A of type %s",A->type_name);
514*bc011b1eSHong Zhang   fB = B->ops->matmulttranspose;
515*bc011b1eSHong Zhang   if (!fB) SETERRQ1(PETSC_ERR_SUP,"MatMatMultTranspose not supported for B of type %s",B->type_name);
516*bc011b1eSHong Zhang   if (fB!=fA) SETERRQ2(PETSC_ERR_ARG_INCOMP,"MatMatMultTranspose requires A, %s, to be compatible with B, %s",A->type_name,B->type_name);
517*bc011b1eSHong Zhang 
518*bc011b1eSHong Zhang   ierr = PetscLogEventBegin(MAT_MatMultTranspose,A,B,0,0);CHKERRQ(ierr);
519*bc011b1eSHong Zhang   ierr = (*A->ops->matmulttranspose)(A,B,scall,fill,C);CHKERRQ(ierr);
520*bc011b1eSHong Zhang   ierr = PetscLogEventEnd(MAT_MatMultTranspose,A,B,0,0);CHKERRQ(ierr);
521*bc011b1eSHong Zhang 
522*bc011b1eSHong Zhang   PetscFunctionReturn(0);
523*bc011b1eSHong Zhang }
524*bc011b1eSHong Zhang 
525*bc011b1eSHong Zhang #undef __FUNCT__
526*bc011b1eSHong Zhang #define __FUNCT__ "MatMatMultTranspose_SeqAIJ_SeqAIJ"
527*bc011b1eSHong Zhang PetscErrorCode MatMatMultTranspose_SeqAIJ_SeqAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C) {
528*bc011b1eSHong Zhang   PetscErrorCode ierr;
529*bc011b1eSHong Zhang 
530*bc011b1eSHong Zhang   PetscFunctionBegin;
531*bc011b1eSHong Zhang   if (scall == MAT_INITIAL_MATRIX){
532*bc011b1eSHong Zhang     ierr = MatMatMultTransposeSymbolic_SeqAIJ_SeqAIJ(A,B,fill,C);CHKERRQ(ierr);
533*bc011b1eSHong Zhang   }
534*bc011b1eSHong Zhang   ierr = MatMatMultTransposeNumeric_SeqAIJ_SeqAIJ(A,B,*C);CHKERRQ(ierr);
535*bc011b1eSHong Zhang   PetscFunctionReturn(0);
536*bc011b1eSHong Zhang }
537*bc011b1eSHong Zhang 
538*bc011b1eSHong Zhang #undef __FUNCT__
539*bc011b1eSHong Zhang #define __FUNCT__ "MatMatMultTransposeSymbolic_SeqAIJ_SeqAIJ"
540*bc011b1eSHong Zhang PetscErrorCode MatMatMultTransposeSymbolic_SeqAIJ_SeqAIJ(Mat A,Mat B,PetscReal fill,Mat *C)
541*bc011b1eSHong Zhang {
542*bc011b1eSHong Zhang   PetscErrorCode ierr;
543*bc011b1eSHong Zhang   Mat            At;
544*bc011b1eSHong Zhang   int            *ati,*atj;
545*bc011b1eSHong Zhang 
546*bc011b1eSHong Zhang   PetscFunctionBegin;
547*bc011b1eSHong Zhang   /* create symbolic At */
548*bc011b1eSHong Zhang   ierr = MatGetSymbolicTranspose_SeqAIJ(A,&ati,&atj);CHKERRQ(ierr);
549*bc011b1eSHong Zhang   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,A->n,A->m,ati,atj,PETSC_NULL,&At);CHKERRQ(ierr);
550*bc011b1eSHong Zhang 
551*bc011b1eSHong Zhang   /* get symbolic C=At*B */
552*bc011b1eSHong Zhang   ierr = MatMatMultSymbolic_SeqAIJ_SeqAIJ(At,B,fill,C);CHKERRQ(ierr);
553*bc011b1eSHong Zhang 
554*bc011b1eSHong Zhang   /* clean up */
555*bc011b1eSHong Zhang   ierr = MatDestroy(At);CHKERRQ(ierr);
556*bc011b1eSHong Zhang   ierr = MatRestoreSymbolicTranspose_SeqAIJ(A,&ati,&atj);CHKERRQ(ierr);
557*bc011b1eSHong Zhang 
558*bc011b1eSHong Zhang   PetscFunctionReturn(0);
559*bc011b1eSHong Zhang }
560*bc011b1eSHong Zhang 
561*bc011b1eSHong Zhang #undef __FUNCT__
562*bc011b1eSHong Zhang #define __FUNCT__ "MatMatMultTransposeNumeric_SeqAIJ_SeqAIJ"
563*bc011b1eSHong Zhang PetscErrorCode MatMatMultTransposeNumeric_SeqAIJ_SeqAIJ(Mat A,Mat B,Mat C)
564*bc011b1eSHong Zhang {
565*bc011b1eSHong Zhang   PetscErrorCode ierr;
566*bc011b1eSHong Zhang   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data,
567*bc011b1eSHong Zhang                  *b = (Mat_SeqAIJ*)B->data,
568*bc011b1eSHong Zhang                  *c = (Mat_SeqAIJ*)C->data;;
569*bc011b1eSHong Zhang   int            am=A->m,anzi,*ai=b->i,*aJ=a->j,
570*bc011b1eSHong Zhang                  *bi=b->i,*bj,bnzi,nextb,bcol,
571*bc011b1eSHong Zhang                  cn=C->n,cm=C->m,*ci=c->i,*cj=c->j,crow,*cjj;
572*bc011b1eSHong Zhang   int            i,j,k,flops=0;
573*bc011b1eSHong Zhang   MatScalar      *aA=a->a,*ba,*ca=c->a,*caj;
574*bc011b1eSHong Zhang 
575*bc011b1eSHong Zhang   PetscFunctionBegin;
576*bc011b1eSHong Zhang   /* clear old values in C */
577*bc011b1eSHong Zhang   ierr = PetscMemzero(ca,ci[cm]*sizeof(MatScalar));CHKERRQ(ierr);
578*bc011b1eSHong Zhang 
579*bc011b1eSHong Zhang   /* compute A^T*B using outer product (A^T)[:,i]*B[i,:] */
580*bc011b1eSHong Zhang   for (i=0;i<am;i++) {
581*bc011b1eSHong Zhang     bj   = b->j + bi[i];
582*bc011b1eSHong Zhang     ba   = b->a + bi[i];
583*bc011b1eSHong Zhang     bnzi = bi[i+1] - bi[i];
584*bc011b1eSHong Zhang     anzi = ai[i+1] - ai[i];
585*bc011b1eSHong Zhang     for (j=0; j<anzi; j++) {
586*bc011b1eSHong Zhang       nextb = 0;
587*bc011b1eSHong Zhang       crow  = *aJ++;
588*bc011b1eSHong Zhang       cjj   = cj + ci[crow];
589*bc011b1eSHong Zhang       caj   = ca + ci[crow];
590*bc011b1eSHong Zhang       /* perform sparse axpy operation.  Note cjj includes bj. */
591*bc011b1eSHong Zhang       for (k=0; nextb<bnzi; k++) {
592*bc011b1eSHong Zhang         bcol = *(bj+nextb);
593*bc011b1eSHong Zhang         if (cjj[k] == bcol) { /* ccol == bcol */
594*bc011b1eSHong Zhang           caj[k] += (*aA)*(*(ba+nextb));
595*bc011b1eSHong Zhang           nextb++;
596*bc011b1eSHong Zhang         }
597*bc011b1eSHong Zhang       }
598*bc011b1eSHong Zhang       flops += 2*bnzi;
599*bc011b1eSHong Zhang       aA++;
600*bc011b1eSHong Zhang     }
601*bc011b1eSHong Zhang   }
602*bc011b1eSHong Zhang 
603*bc011b1eSHong Zhang   /* Assemble the final matrix and clean up */
604*bc011b1eSHong Zhang   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
605*bc011b1eSHong Zhang   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
606*bc011b1eSHong Zhang   ierr = PetscLogFlops(flops);CHKERRQ(ierr);
607*bc011b1eSHong Zhang 
608*bc011b1eSHong Zhang   PetscFunctionReturn(0);
609*bc011b1eSHong Zhang }
610