xref: /petsc/src/mat/impls/aij/seq/matmatmult.c (revision 7f79fd589f0cc333cb4f63ed4d995034cf8282ff)
1d50806bdSBarry Smith /*
2a50f8bf6SHong Zhang   Defines matrix-matrix product routines for pairs of AIJ matrices
3d50806bdSBarry Smith           C = A * B
4d50806bdSBarry Smith */
5d50806bdSBarry Smith 
6c1f4806aSKris Buschelman #include "src/mat/impls/aij/seq/aij.h" /*I "petscmat.h" I*/
770f19b1fSKris Buschelman #include "src/mat/utils/freespace.h"
82d09714cSHong Zhang #include "src/mat/impls/aij/mpi/mpiaij.h"
9be0fcf8dSHong Zhang #include "petscbt.h"
10d50806bdSBarry Smith 
11c1f4806aSKris Buschelman #undef __FUNCT__
12c1f4806aSKris Buschelman #define __FUNCT__ "MatMatMult"
135c66b693SKris Buschelman /*@
145c66b693SKris Buschelman    MatMatMult - Performs Matrix-Matrix Multiplication C=A*B.
1594e3eecaSKris Buschelman 
165c66b693SKris Buschelman    Collective on Mat
17d50806bdSBarry Smith 
185c66b693SKris Buschelman    Input Parameters:
195c66b693SKris Buschelman +  A - the left matrix
201c741599SHong Zhang .  B - the right matrix
211c741599SHong Zhang .  scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
22c5db241fSHong Zhang -  fill - expected fill as ratio of nnz(C)/(nnz(A) + nnz(B))
235c66b693SKris Buschelman 
245c66b693SKris Buschelman    Output Parameters:
255c66b693SKris Buschelman .  C - the product matrix
265c66b693SKris Buschelman 
275c66b693SKris Buschelman    Notes:
285c66b693SKris Buschelman    C will be created and must be destroyed by the user with MatDestroy().
295c66b693SKris Buschelman 
30bc011b1eSHong Zhang    This routine is currently only implemented for pairs of AIJ matrices and classes
31bc011b1eSHong Zhang    which inherit from AIJ.  C will be of type MATAIJ.
325c66b693SKris Buschelman 
335c66b693SKris Buschelman    Level: intermediate
345c66b693SKris Buschelman 
355c66b693SKris Buschelman .seealso: MatMatMultSymbolic(),MatMatMultNumeric()
365c66b693SKris Buschelman @*/
37dfbe8321SBarry Smith PetscErrorCode MatMatMult(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C)
382d09714cSHong Zhang {
39dfbe8321SBarry Smith   PetscErrorCode ierr;
40cb00f407SKris Buschelman   PetscErrorCode (*fA)(Mat,Mat,MatReuse,PetscReal,Mat*);
41cb00f407SKris Buschelman   PetscErrorCode (*fB)(Mat,Mat,MatReuse,PetscReal,Mat*);
422d09714cSHong Zhang 
43d50806bdSBarry Smith   PetscFunctionBegin;
444482741eSBarry Smith   PetscValidHeaderSpecific(A,MAT_COOKIE,1);
45c9780b6fSBarry Smith   PetscValidType(A,1);
465c66b693SKris Buschelman   MatPreallocated(A);
475c66b693SKris Buschelman   if (!A->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix");
485c66b693SKris Buschelman   if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
494482741eSBarry Smith   PetscValidHeaderSpecific(B,MAT_COOKIE,2);
50c9780b6fSBarry Smith   PetscValidType(B,2);
515c66b693SKris Buschelman   MatPreallocated(B);
525c66b693SKris Buschelman   if (!B->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix");
535c66b693SKris Buschelman   if (B->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
544482741eSBarry Smith   PetscValidPointer(C,3);
555c66b693SKris Buschelman   if (B->M!=A->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",B->M,A->N);
56d50806bdSBarry Smith 
571c24bd37SHong Zhang   if (fill <=0.0) SETERRQ1(PETSC_ERR_ARG_SIZ,"fill=%g must be > 0.0",fill);
58c5db241fSHong Zhang 
59cb00f407SKris Buschelman   /* For now, we do not dispatch based on the type of A and B */
60cb00f407SKris Buschelman   /* When implementations like _SeqAIJ_MAIJ exist, attack the multiple dispatch problem. */
61cb00f407SKris Buschelman   fA = A->ops->matmult;
62cb00f407SKris Buschelman   if (!fA) SETERRQ1(PETSC_ERR_SUP,"MatMatMult not supported for A of type %s",A->type_name);
63cb00f407SKris Buschelman   fB = B->ops->matmult;
64cb00f407SKris Buschelman   if (!fB) SETERRQ1(PETSC_ERR_SUP,"MatMatMult not supported for B of type %s",B->type_name);
65cb00f407SKris Buschelman   if (fB!=fA) SETERRQ2(PETSC_ERR_ARG_INCOMP,"MatMatMult requires A, %s, to be compatible with B, %s",A->type_name,B->type_name);
66cb00f407SKris Buschelman 
676284ec50SHong Zhang   ierr = PetscLogEventBegin(MAT_MatMult,A,B,0,0);CHKERRQ(ierr);
681c741599SHong Zhang   ierr = (*A->ops->matmult)(A,B,scall,fill,C);CHKERRQ(ierr);
696284ec50SHong Zhang   ierr = PetscLogEventEnd(MAT_MatMult,A,B,0,0);CHKERRQ(ierr);
704d3841fdSKris Buschelman 
716284ec50SHong Zhang   PetscFunctionReturn(0);
726284ec50SHong Zhang }
736284ec50SHong Zhang 
746284ec50SHong Zhang #undef __FUNCT__
756284ec50SHong Zhang #define __FUNCT__ "MatMatMult_MPIAIJ_MPIAIJ"
76dfbe8321SBarry Smith PetscErrorCode MatMatMult_MPIAIJ_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill, Mat *C)
772d09714cSHong Zhang {
78dfbe8321SBarry Smith   PetscErrorCode ierr;
796284ec50SHong Zhang 
806284ec50SHong Zhang   PetscFunctionBegin;
8126be0446SHong Zhang   if (scall == MAT_INITIAL_MATRIX){
82d6bb3c2dSHong Zhang     ierr = MatMatMultSymbolic_MPIAIJ_MPIAIJ(A,B,fill,C);CHKERRQ(ierr);/* numeric product is computed as well */
83d6bb3c2dSHong Zhang   } else if (scall == MAT_REUSE_MATRIX){
8426be0446SHong Zhang     ierr = MatMatMultNumeric_MPIAIJ_MPIAIJ(A,B,*C);CHKERRQ(ierr);
85d6bb3c2dSHong Zhang   } else {
86d6bb3c2dSHong Zhang     SETERRQ1(PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",scall);
87d6bb3c2dSHong Zhang   }
88d50806bdSBarry Smith   PetscFunctionReturn(0);
89d50806bdSBarry Smith }
905c66b693SKris Buschelman 
915c66b693SKris Buschelman #undef __FUNCT__
925c66b693SKris Buschelman #define __FUNCT__ "MatMatMult_SeqAIJ_SeqAIJ"
93dfbe8321SBarry Smith PetscErrorCode MatMatMult_SeqAIJ_SeqAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C) {
94dfbe8321SBarry Smith   PetscErrorCode ierr;
955c66b693SKris Buschelman 
965c66b693SKris Buschelman   PetscFunctionBegin;
9726be0446SHong Zhang   if (scall == MAT_INITIAL_MATRIX){
9826be0446SHong Zhang     ierr = MatMatMultSymbolic_SeqAIJ_SeqAIJ(A,B,fill,C);CHKERRQ(ierr);
9926be0446SHong Zhang   }
10026be0446SHong Zhang   ierr = MatMatMultNumeric_SeqAIJ_SeqAIJ(A,B,*C);CHKERRQ(ierr);
1015c66b693SKris Buschelman   PetscFunctionReturn(0);
1025c66b693SKris Buschelman }
1035c66b693SKris Buschelman 
104c1f4806aSKris Buschelman #undef __FUNCT__
105c1f4806aSKris Buschelman #define __FUNCT__ "MatMatMultSymbolic"
1065c66b693SKris Buschelman /*@
1075c66b693SKris Buschelman    MatMatMultSymbolic - Performs construction, preallocation, and computes the ij structure
1085c66b693SKris Buschelman    of the matrix-matrix product C=A*B.  Call this routine before calling MatMatMultNumeric().
1095c66b693SKris Buschelman 
1105c66b693SKris Buschelman    Collective on Mat
1115c66b693SKris Buschelman 
1125c66b693SKris Buschelman    Input Parameters:
1135c66b693SKris Buschelman +  A - the left matrix
114c5db241fSHong Zhang .  B - the right matrix
115c5db241fSHong Zhang -  fill - expected fill as ratio of nnz(C)/(nnz(A) + nnz(B))
1165c66b693SKris Buschelman 
1175c66b693SKris Buschelman    Output Parameters:
1185c66b693SKris Buschelman .  C - the matrix containing the ij structure of product matrix
1195c66b693SKris Buschelman 
1205c66b693SKris Buschelman    Notes:
1214d3841fdSKris Buschelman    C will be created as a MATSEQAIJ matrix and must be destroyed by the user with MatDestroy().
1225c66b693SKris Buschelman 
1234d3841fdSKris Buschelman    This routine is currently only implemented for SeqAIJ matrices and classes which inherit from SeqAIJ.
1245c66b693SKris Buschelman 
1255c66b693SKris Buschelman    Level: intermediate
1265c66b693SKris Buschelman 
1275c66b693SKris Buschelman .seealso: MatMatMult(),MatMatMultNumeric()
1285c66b693SKris Buschelman @*/
129dfbe8321SBarry Smith PetscErrorCode MatMatMultSymbolic(Mat A,Mat B,PetscReal fill,Mat *C) {
130dfbe8321SBarry Smith   PetscErrorCode ierr;
131cb00f407SKris Buschelman   PetscErrorCode (*Asymbolic)(Mat,Mat,PetscReal,Mat *);
132cb00f407SKris Buschelman   PetscErrorCode (*Bsymbolic)(Mat,Mat,PetscReal,Mat *);
1335c66b693SKris Buschelman 
1345c66b693SKris Buschelman   PetscFunctionBegin;
1354482741eSBarry Smith   PetscValidHeaderSpecific(A,MAT_COOKIE,1);
136c9780b6fSBarry Smith   PetscValidType(A,1);
1375c66b693SKris Buschelman   MatPreallocated(A);
1385c66b693SKris Buschelman   if (!A->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix");
1395c66b693SKris Buschelman   if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
1405c66b693SKris Buschelman 
1414482741eSBarry Smith   PetscValidHeaderSpecific(B,MAT_COOKIE,2);
142c9780b6fSBarry Smith   PetscValidType(B,2);
1435c66b693SKris Buschelman   MatPreallocated(B);
1445c66b693SKris Buschelman   if (!B->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix");
1455c66b693SKris Buschelman   if (B->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
1464482741eSBarry Smith   PetscValidPointer(C,3);
1474482741eSBarry Smith 
1485c66b693SKris Buschelman   if (B->M!=A->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",B->M,A->N);
1491c24bd37SHong Zhang   if (fill <=0.0) SETERRQ1(PETSC_ERR_ARG_SIZ,"fill=%g must be > 0.0",fill);
1505c66b693SKris Buschelman 
151cb00f407SKris Buschelman   /* For now, we do not dispatch based on the type of A and P */
152cb00f407SKris Buschelman   /* When implementations like _SeqAIJ_MAIJ exist, attack the multiple dispatch problem. */
153cb00f407SKris Buschelman   Asymbolic = A->ops->matmultsymbolic;
154cb00f407SKris Buschelman   if (!Asymbolic) SETERRQ1(PETSC_ERR_SUP,"C=A*B not implemented for A of type %s",A->type_name);
155cb00f407SKris Buschelman   Bsymbolic = B->ops->matmultsymbolic;
156cb00f407SKris Buschelman   if (!Bsymbolic) SETERRQ1(PETSC_ERR_SUP,"C=A*B not implemented for B of type %s",B->type_name);
157cb00f407SKris Buschelman   if (Bsymbolic!=Asymbolic) SETERRQ2(PETSC_ERR_ARG_INCOMP,"MatMatMultSymbolic requires A, %s, to be compatible with B, %s",A->type_name,B->type_name);
158cb00f407SKris Buschelman 
159cb00f407SKris Buschelman   ierr = PetscLogEventBegin(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr);
160cb00f407SKris Buschelman   ierr = (*Asymbolic)(A,B,fill,C);CHKERRQ(ierr);
161cb00f407SKris Buschelman   ierr = PetscLogEventEnd(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr);
1625c66b693SKris Buschelman 
1635c66b693SKris Buschelman   PetscFunctionReturn(0);
1645c66b693SKris Buschelman }
1651c24bd37SHong Zhang 
166dfbe8321SBarry Smith EXTERN PetscErrorCode MatDestroy_MPIAIJ(Mat);
16726be0446SHong Zhang #undef __FUNCT__
16826be0446SHong Zhang #define __FUNCT__ "MatDestroy_MPIAIJ_MatMatMult"
169dfbe8321SBarry Smith PetscErrorCode MatDestroy_MPIAIJ_MatMatMult(Mat A)
17026be0446SHong Zhang {
171dfbe8321SBarry Smith   PetscErrorCode       ierr;
1727f93b363SHong Zhang   Mat_MatMatMultMPI    *mult;
1737f93b363SHong Zhang   PetscObjectContainer container;
17426be0446SHong Zhang 
17526be0446SHong Zhang   PetscFunctionBegin;
1767f93b363SHong Zhang   ierr = PetscObjectQuery((PetscObject)A,"MatMatMultMPI",(PetscObject *)&container);CHKERRQ(ierr);
1777f93b363SHong Zhang   if (container) {
178*7f79fd58SMatthew Knepley     ierr  = PetscObjectContainerGetPointer(container,(void **)&mult);CHKERRQ(ierr);
179d6bb3c2dSHong Zhang     ierr = ISDestroy(mult->isrowb);CHKERRQ(ierr);
180d6bb3c2dSHong Zhang     ierr = ISDestroy(mult->iscolb);CHKERRQ(ierr);
181d6bb3c2dSHong Zhang     ierr = ISDestroy(mult->isrowa);CHKERRQ(ierr);
18297c2bf28SHong Zhang     ierr = MatDestroy(mult->A_loc);CHKERRQ(ierr);
18397c2bf28SHong Zhang     ierr = MatDestroy(mult->B_seq);CHKERRQ(ierr);
184d6bb3c2dSHong Zhang     ierr = MatDestroy(mult->C_seq);CHKERRQ(ierr);
185d6bb3c2dSHong Zhang 
1867f93b363SHong Zhang     ierr = PetscObjectContainerDestroy(container);CHKERRQ(ierr);
1877f93b363SHong Zhang     ierr = PetscObjectCompose((PetscObject)A,"MatMatMultMPI",0);CHKERRQ(ierr);
1887f93b363SHong Zhang   }
1897f93b363SHong Zhang   ierr = PetscFree(mult);CHKERRQ(ierr);
19026be0446SHong Zhang   ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr);
19126be0446SHong Zhang 
19226be0446SHong Zhang   PetscFunctionReturn(0);
19326be0446SHong Zhang }
19458c24d83SHong Zhang 
19558c24d83SHong Zhang #undef __FUNCT__
19626be0446SHong Zhang #define __FUNCT__ "MatMatMultSymbolic_MPIAIJ_MPIAIJ"
197dfbe8321SBarry Smith PetscErrorCode MatMatMultSymbolic_MPIAIJ_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat *C)
19826be0446SHong Zhang {
199ff134f7aSHong Zhang   Mat_MPIAIJ           *a=(Mat_MPIAIJ*)A->data,*b=(Mat_MPIAIJ*)B->data;
200dfbe8321SBarry Smith   PetscErrorCode       ierr;
20116130426SHong Zhang   int                  start,end;
20226be0446SHong Zhang   Mat_MatMatMultMPI    *mult;
2037f93b363SHong Zhang   PetscObjectContainer container;
20426be0446SHong Zhang 
20526be0446SHong Zhang   PetscFunctionBegin;
206ff134f7aSHong Zhang   if (a->cstart != b->rstart || a->cend != b->rend){
207ff134f7aSHong Zhang     SETERRQ4(PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",a->cstart,a->cend,b->rstart,b->rend);
208ff134f7aSHong Zhang   }
209d6bb3c2dSHong Zhang   ierr = PetscNew(Mat_MatMatMultMPI,&mult);CHKERRQ(ierr);
210d6bb3c2dSHong Zhang 
21126be0446SHong Zhang   /* create a seq matrix B_seq = submatrix of B by taking rows of B that equal to nonzero col of A */
21297c2bf28SHong Zhang   ierr = MatGetBrowsOfAcols(A,B,MAT_INITIAL_MATRIX,&mult->isrowb,&mult->iscolb,&mult->brstart,&mult->B_seq);CHKERRQ(ierr);
21326be0446SHong Zhang 
21426be0446SHong Zhang   /*  create a seq matrix A_seq = submatrix of A by taking all local rows of A */
21526be0446SHong Zhang   start = a->rstart; end = a->rend;
216d6bb3c2dSHong Zhang   ierr = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&mult->isrowa);CHKERRQ(ierr);
21797c2bf28SHong Zhang   ierr = MatGetLocalMat(A,MAT_INITIAL_MATRIX,&mult->isrowa,&mult->isrowb,&mult->A_loc);CHKERRQ(ierr);
21826be0446SHong Zhang 
21926be0446SHong Zhang   /* compute C_seq = A_seq * B_seq */
22097c2bf28SHong Zhang   ierr = MatMatMult_SeqAIJ_SeqAIJ(mult->A_loc,mult->B_seq,MAT_INITIAL_MATRIX,fill,&mult->C_seq);CHKERRQ(ierr);
22126be0446SHong Zhang 
22226be0446SHong Zhang   /* create mpi matrix C by concatinating C_seq */
223d6bb3c2dSHong Zhang   ierr = PetscObjectReference((PetscObject)mult->C_seq);CHKERRQ(ierr); /* prevent C_seq being destroyed by MatMerge() */
2240e36024fSHong Zhang   ierr = MatMerge(A->comm,mult->C_seq,B->n,MAT_INITIAL_MATRIX,C);CHKERRQ(ierr);
22526be0446SHong Zhang 
22626be0446SHong Zhang   /* attach the supporting struct to C for reuse of symbolic C */
2277f93b363SHong Zhang   ierr = PetscObjectContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr);
2287f93b363SHong Zhang   ierr = PetscObjectContainerSetPointer(container,mult);CHKERRQ(ierr);
2297f93b363SHong Zhang   ierr = PetscObjectCompose((PetscObject)(*C),"MatMatMultMPI",(PetscObject)container);CHKERRQ(ierr);
2307f93b363SHong Zhang 
23126be0446SHong Zhang   (*C)->ops->destroy  = MatDestroy_MPIAIJ_MatMatMult;
23226be0446SHong Zhang 
23326be0446SHong Zhang   PetscFunctionReturn(0);
23426be0446SHong Zhang }
23526be0446SHong Zhang 
23626be0446SHong Zhang #undef __FUNCT__
23726be0446SHong Zhang #define __FUNCT__ "MatMatMultSymbolic_SeqAIJ_SeqAIJ"
238dfbe8321SBarry Smith PetscErrorCode MatMatMultSymbolic_SeqAIJ_SeqAIJ(Mat A,Mat B,PetscReal fill,Mat *C)
23958c24d83SHong Zhang {
240dfbe8321SBarry Smith   PetscErrorCode ierr;
24158c24d83SHong Zhang   FreeSpaceList  free_space=PETSC_NULL,current_space=PETSC_NULL;
24258c24d83SHong Zhang   Mat_SeqAIJ     *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data,*c;
243be0fcf8dSHong Zhang   int            *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*bjj,*ci,*cj;
2445c66b693SKris Buschelman   int            am=A->M,bn=B->N,bm=B->M;
245be0fcf8dSHong Zhang   int            i,j,anzi,brow,bnzj,cnzi,nlnk,*lnk,nspacedouble=0;
24658c24d83SHong Zhang   MatScalar      *ca;
247be0fcf8dSHong Zhang   PetscBT        lnkbt;
24858c24d83SHong Zhang 
24958c24d83SHong Zhang   PetscFunctionBegin;
25058c24d83SHong Zhang   /* Set up */
25158c24d83SHong Zhang   /* Allocate ci array, arrays for fill computation and */
25258c24d83SHong Zhang   /* free space for accumulating nonzero column info */
25358c24d83SHong Zhang   ierr = PetscMalloc(((am+1)+1)*sizeof(int),&ci);CHKERRQ(ierr);
25458c24d83SHong Zhang   ci[0] = 0;
25558c24d83SHong Zhang 
256be0fcf8dSHong Zhang   /* create and initialize a linked list */
257be0fcf8dSHong Zhang   nlnk = bn+1;
258be0fcf8dSHong Zhang   ierr = PetscLLCreate(bn,bn,nlnk,lnk,lnkbt);CHKERRQ(ierr);
25958c24d83SHong Zhang 
260c5db241fSHong Zhang   /* Initial FreeSpace size is fill*(nnz(A)+nnz(B)) */
261d6bb3c2dSHong Zhang   ierr = GetMoreSpace((int)(fill*(ai[am]+bi[bm])),&free_space);CHKERRQ(ierr);
26258c24d83SHong Zhang   current_space = free_space;
26358c24d83SHong Zhang 
26458c24d83SHong Zhang   /* Determine symbolic info for each row of the product: */
26558c24d83SHong Zhang   for (i=0;i<am;i++) {
26658c24d83SHong Zhang     anzi = ai[i+1] - ai[i];
26758c24d83SHong Zhang     cnzi = 0;
2682d09714cSHong Zhang     j    = anzi;
2692d09714cSHong Zhang     aj   = a->j + ai[i];
2702d09714cSHong Zhang     while (j){/* assume cols are almost in increasing order, starting from its end saves computation */
2712d09714cSHong Zhang       j--;
2722d09714cSHong Zhang       brow = *(aj + j);
27358c24d83SHong Zhang       bnzj = bi[brow+1] - bi[brow];
27458c24d83SHong Zhang       bjj  = bj + bi[brow];
2751c239cc6SHong Zhang       /* add non-zero cols of B into the sorted linked list lnk */
276be0fcf8dSHong Zhang       ierr = PetscLLAdd(bnzj,bjj,bn,nlnk,lnk,lnkbt);CHKERRQ(ierr);
2771c239cc6SHong Zhang       cnzi += nlnk;
27858c24d83SHong Zhang     }
27958c24d83SHong Zhang 
28058c24d83SHong Zhang     /* If free space is not available, make more free space */
28158c24d83SHong Zhang     /* Double the amount of total space in the list */
28258c24d83SHong Zhang     if (current_space->local_remaining<cnzi) {
28358c24d83SHong Zhang       ierr = GetMoreSpace(current_space->total_array_size,&current_space);CHKERRQ(ierr);
284c5db241fSHong Zhang       nspacedouble++;
28558c24d83SHong Zhang     }
28658c24d83SHong Zhang 
287c5db241fSHong Zhang     /* Copy data into free space, then initialize lnk */
288be0fcf8dSHong Zhang     ierr = PetscLLClean(bn,bn,cnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
289c5db241fSHong Zhang     current_space->array           += cnzi;
29058c24d83SHong Zhang     current_space->local_used      += cnzi;
29158c24d83SHong Zhang     current_space->local_remaining -= cnzi;
29258c24d83SHong Zhang 
29358c24d83SHong Zhang     ci[i+1] = ci[i] + cnzi;
29458c24d83SHong Zhang   }
29558c24d83SHong Zhang 
29658c24d83SHong Zhang   /* Column indices are in the list of free space */
29758c24d83SHong Zhang   /* Allocate space for cj, initialize cj, and */
29858c24d83SHong Zhang   /* destroy list of free space and other temporary array(s) */
29958c24d83SHong Zhang   ierr = PetscMalloc((ci[am]+1)*sizeof(int),&cj);CHKERRQ(ierr);
30058c24d83SHong Zhang   ierr = MakeSpaceContiguous(&free_space,cj);CHKERRQ(ierr);
301be0fcf8dSHong Zhang   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
30258c24d83SHong Zhang 
30358c24d83SHong Zhang   /* Allocate space for ca */
30458c24d83SHong Zhang   ierr = PetscMalloc((ci[am]+1)*sizeof(MatScalar),&ca);CHKERRQ(ierr);
30558c24d83SHong Zhang   ierr = PetscMemzero(ca,(ci[am]+1)*sizeof(MatScalar));CHKERRQ(ierr);
30658c24d83SHong Zhang 
30726be0446SHong Zhang   /* put together the new symbolic matrix */
30858c24d83SHong Zhang   ierr = MatCreateSeqAIJWithArrays(A->comm,am,bn,ci,cj,ca,C);CHKERRQ(ierr);
30958c24d83SHong Zhang 
31058c24d83SHong Zhang   /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
31158c24d83SHong Zhang   /* These are PETSc arrays, so change flags so arrays can be deleted by PETSc */
31258c24d83SHong Zhang   c = (Mat_SeqAIJ *)((*C)->data);
31358c24d83SHong Zhang   c->freedata = PETSC_TRUE;
31458c24d83SHong Zhang   c->nonew    = 0;
31558c24d83SHong Zhang 
316be0fcf8dSHong Zhang   if (nspacedouble){
317be0fcf8dSHong Zhang     PetscLogInfo((PetscObject)(*C),"MatMatMultSymbolic_SeqAIJ_SeqAIJ: nspacedouble:%d, nnz(A):%d, nnz(B):%d, fill:%g, nnz(C):%d\n",nspacedouble,ai[am],bi[bm],fill,ci[am]);
318be0fcf8dSHong Zhang   }
31958c24d83SHong Zhang   PetscFunctionReturn(0);
32058c24d83SHong Zhang }
321d50806bdSBarry Smith 
322c1f4806aSKris Buschelman #undef __FUNCT__
323c1f4806aSKris Buschelman #define __FUNCT__ "MatMatMultNumeric"
3245c66b693SKris Buschelman /*@
3255c66b693SKris Buschelman    MatMatMultNumeric - Performs the numeric matrix-matrix product.
3265c66b693SKris Buschelman    Call this routine after first calling MatMatMultSymbolic().
3275c66b693SKris Buschelman 
3285c66b693SKris Buschelman    Collective on Mat
3295c66b693SKris Buschelman 
3305c66b693SKris Buschelman    Input Parameters:
3315c66b693SKris Buschelman +  A - the left matrix
3325c66b693SKris Buschelman -  B - the right matrix
3335c66b693SKris Buschelman 
3345c66b693SKris Buschelman    Output Parameters:
3355c66b693SKris Buschelman .  C - the product matrix, whose ij structure was defined from MatMatMultSymbolic().
3365c66b693SKris Buschelman 
3375c66b693SKris Buschelman    Notes:
3385c66b693SKris Buschelman    C must have been created with MatMatMultSymbolic.
3395c66b693SKris Buschelman 
3405c66b693SKris Buschelman    This routine is currently only implemented for SeqAIJ type matrices.
3415c66b693SKris Buschelman 
3425c66b693SKris Buschelman    Level: intermediate
3435c66b693SKris Buschelman 
3445c66b693SKris Buschelman .seealso: MatMatMult(),MatMatMultSymbolic()
3455c66b693SKris Buschelman @*/
346dfbe8321SBarry Smith PetscErrorCode MatMatMultNumeric(Mat A,Mat B,Mat C){
347dfbe8321SBarry Smith   PetscErrorCode ierr;
348cb00f407SKris Buschelman   PetscErrorCode (*Anumeric)(Mat,Mat,Mat);
349cb00f407SKris Buschelman   PetscErrorCode (*Bnumeric)(Mat,Mat,Mat);
3505c66b693SKris Buschelman 
3515c66b693SKris Buschelman   PetscFunctionBegin;
3525c66b693SKris Buschelman 
3534482741eSBarry Smith   PetscValidHeaderSpecific(A,MAT_COOKIE,1);
354c9780b6fSBarry Smith   PetscValidType(A,1);
3555c66b693SKris Buschelman   MatPreallocated(A);
3565c66b693SKris Buschelman   if (!A->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix");
3575c66b693SKris Buschelman   if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
3585c66b693SKris Buschelman 
3594482741eSBarry Smith   PetscValidHeaderSpecific(B,MAT_COOKIE,2);
360c9780b6fSBarry Smith   PetscValidType(B,2);
3615c66b693SKris Buschelman   MatPreallocated(B);
3625c66b693SKris Buschelman   if (!B->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix");
3635c66b693SKris Buschelman   if (B->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
3645c66b693SKris Buschelman 
3654482741eSBarry Smith   PetscValidHeaderSpecific(C,MAT_COOKIE,3);
366c9780b6fSBarry Smith   PetscValidType(C,3);
3675c66b693SKris Buschelman   MatPreallocated(C);
3685c66b693SKris Buschelman   if (!C->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix");
3695c66b693SKris Buschelman   if (C->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
3705c66b693SKris Buschelman 
3715c66b693SKris Buschelman   if (B->N!=C->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",B->N,C->N);
3725c66b693SKris Buschelman   if (B->M!=A->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",B->M,A->N);
3735c66b693SKris Buschelman   if (A->M!=C->M) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",A->M,C->M);
3745c66b693SKris Buschelman 
375cb00f407SKris Buschelman   /* For now, we do not dispatch based on the type of A and B */
376cb00f407SKris Buschelman   /* When implementations like _SeqAIJ_MAIJ exist, attack the multiple dispatch problem. */
377cb00f407SKris Buschelman   Anumeric = A->ops->matmultnumeric;
378cb00f407SKris Buschelman   if (!Anumeric) SETERRQ1(PETSC_ERR_SUP,"MatMatMultNumeric not supported for A of type %s",A->type_name);
379cb00f407SKris Buschelman   Bnumeric = B->ops->matmultnumeric;
380cb00f407SKris Buschelman   if (!Bnumeric) SETERRQ1(PETSC_ERR_SUP,"MatMatMultNumeric not supported for B of type %s",B->type_name);
381cb00f407SKris Buschelman   if (Bnumeric!=Anumeric) SETERRQ2(PETSC_ERR_ARG_INCOMP,"MatMatMultNumeric requires A, %s, to be compatible with B, %s",A->type_name,B->type_name);
3824d3841fdSKris Buschelman 
383cb00f407SKris Buschelman   ierr = PetscLogEventBegin(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr);
384cb00f407SKris Buschelman   ierr = (*Anumeric)(A,B,C);CHKERRQ(ierr);
385cb00f407SKris Buschelman   ierr = PetscLogEventEnd(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr);
3865c66b693SKris Buschelman 
3875c66b693SKris Buschelman   PetscFunctionReturn(0);
3885c66b693SKris Buschelman }
3895c66b693SKris Buschelman 
390d6bb3c2dSHong Zhang /* This routine is called ONLY in the case of reusing previously computed symbolic C */
391d50806bdSBarry Smith #undef __FUNCT__
39226be0446SHong Zhang #define __FUNCT__ "MatMatMultNumeric_MPIAIJ_MPIAIJ"
393dfbe8321SBarry Smith PetscErrorCode MatMatMultNumeric_MPIAIJ_MPIAIJ(Mat A,Mat B,Mat C)
39426be0446SHong Zhang {
395dfbe8321SBarry Smith   PetscErrorCode       ierr;
39697c2bf28SHong Zhang   Mat                  *seq;
3977f93b363SHong Zhang   Mat_MatMatMultMPI    *mult;
3987f93b363SHong Zhang   PetscObjectContainer container;
399d6bb3c2dSHong Zhang 
40026be0446SHong Zhang   PetscFunctionBegin;
4017f93b363SHong Zhang   ierr = PetscObjectQuery((PetscObject)C,"MatMatMultMPI",(PetscObject *)&container);CHKERRQ(ierr);
4027f93b363SHong Zhang   if (container) {
403*7f79fd58SMatthew Knepley     ierr  = PetscObjectContainerGetPointer(container,(void **)&mult);CHKERRQ(ierr);
4047f93b363SHong Zhang   }
4057f93b363SHong Zhang 
40697c2bf28SHong Zhang   seq = &mult->B_seq;
40797c2bf28SHong Zhang   ierr = MatGetSubMatrices(B,1,&mult->isrowb,&mult->iscolb,MAT_REUSE_MATRIX,&seq);CHKERRQ(ierr);
40897c2bf28SHong Zhang   mult->B_seq = *seq;
40997c2bf28SHong Zhang 
41097c2bf28SHong Zhang   seq = &mult->A_loc;
41197c2bf28SHong Zhang   ierr = MatGetSubMatrices(A,1,&mult->isrowa,&mult->isrowb,MAT_REUSE_MATRIX,&seq);CHKERRQ(ierr);
41297c2bf28SHong Zhang   mult->A_loc = *seq;
41397c2bf28SHong Zhang 
41497c2bf28SHong Zhang   ierr = MatMatMult_SeqAIJ_SeqAIJ(mult->A_loc,mult->B_seq,MAT_REUSE_MATRIX,0.0,&mult->C_seq);CHKERRQ(ierr);
415d6bb3c2dSHong Zhang 
416d6bb3c2dSHong Zhang   ierr = PetscObjectReference((PetscObject)mult->C_seq);CHKERRQ(ierr);
4170e36024fSHong Zhang   ierr = MatMerge(A->comm,mult->C_seq,B->n,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr);
418d6bb3c2dSHong Zhang 
41926be0446SHong Zhang   PetscFunctionReturn(0);
42026be0446SHong Zhang }
42126be0446SHong Zhang 
42226be0446SHong Zhang #undef __FUNCT__
42326be0446SHong Zhang #define __FUNCT__ "MatMatMultNumeric_SeqAIJ_SeqAIJ"
424dfbe8321SBarry Smith PetscErrorCode MatMatMultNumeric_SeqAIJ_SeqAIJ(Mat A,Mat B,Mat C)
425d50806bdSBarry Smith {
426dfbe8321SBarry Smith   PetscErrorCode ierr;
427dfbe8321SBarry Smith   int        flops=0;
428d50806bdSBarry Smith   Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data;
429d50806bdSBarry Smith   Mat_SeqAIJ *b = (Mat_SeqAIJ *)B->data;
430d50806bdSBarry Smith   Mat_SeqAIJ *c = (Mat_SeqAIJ *)C->data;
431d50806bdSBarry Smith   int        *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*bjj,*ci=c->i,*cj=c->j;
4325c66b693SKris Buschelman   int        am=A->M,cn=C->N;
43394e3eecaSKris Buschelman   int        i,j,k,anzi,bnzi,cnzi,brow;
434d50806bdSBarry Smith   MatScalar  *aa=a->a,*ba=b->a,*baj,*ca=c->a,*temp;
435d50806bdSBarry Smith 
436d50806bdSBarry Smith   PetscFunctionBegin;
437d50806bdSBarry Smith 
438d50806bdSBarry Smith   /* Allocate temp accumulation space to avoid searching for nonzero columns in C */
439d50806bdSBarry Smith   ierr = PetscMalloc((cn+1)*sizeof(MatScalar),&temp);CHKERRQ(ierr);
440d50806bdSBarry Smith   ierr = PetscMemzero(temp,cn*sizeof(MatScalar));CHKERRQ(ierr);
441d50806bdSBarry Smith   /* Traverse A row-wise. */
442d50806bdSBarry Smith   /* Build the ith row in C by summing over nonzero columns in A, */
443d50806bdSBarry Smith   /* the rows of B corresponding to nonzeros of A. */
444d50806bdSBarry Smith   for (i=0;i<am;i++) {
445d50806bdSBarry Smith     anzi = ai[i+1] - ai[i];
446d50806bdSBarry Smith     for (j=0;j<anzi;j++) {
447d50806bdSBarry Smith       brow = *aj++;
448d50806bdSBarry Smith       bnzi = bi[brow+1] - bi[brow];
449d50806bdSBarry Smith       bjj  = bj + bi[brow];
450d50806bdSBarry Smith       baj  = ba + bi[brow];
451d50806bdSBarry Smith       for (k=0;k<bnzi;k++) {
452d50806bdSBarry Smith         temp[bjj[k]] += (*aa)*baj[k];
453d50806bdSBarry Smith       }
454d50806bdSBarry Smith       flops += 2*bnzi;
455d50806bdSBarry Smith       aa++;
456d50806bdSBarry Smith     }
457d50806bdSBarry Smith     /* Store row back into C, and re-zero temp */
458d50806bdSBarry Smith     cnzi = ci[i+1] - ci[i];
459d50806bdSBarry Smith     for (j=0;j<cnzi;j++) {
460d50806bdSBarry Smith       ca[j] = temp[cj[j]];
461d50806bdSBarry Smith       temp[cj[j]] = 0.0;
462d50806bdSBarry Smith     }
463d50806bdSBarry Smith     ca += cnzi;
464d50806bdSBarry Smith     cj += cnzi;
465d50806bdSBarry Smith   }
466716bacf3SKris Buschelman   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
467716bacf3SKris Buschelman   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
468716bacf3SKris Buschelman 
469d50806bdSBarry Smith   /* Free temp */
470d50806bdSBarry Smith   ierr = PetscFree(temp);CHKERRQ(ierr);
471d50806bdSBarry Smith   ierr = PetscLogFlops(flops);CHKERRQ(ierr);
472d50806bdSBarry Smith   PetscFunctionReturn(0);
473d50806bdSBarry Smith }
474bc011b1eSHong Zhang 
475bc011b1eSHong Zhang #undef __FUNCT__
476bc011b1eSHong Zhang #define __FUNCT__ "MatMatMultTranspose"
477bc011b1eSHong Zhang /*@
478bc011b1eSHong Zhang    MatMatMultTranspose - Performs Matrix-Matrix Multiplication C=A^T*B.
479bc011b1eSHong Zhang 
480bc011b1eSHong Zhang    Collective on Mat
481bc011b1eSHong Zhang 
482bc011b1eSHong Zhang    Input Parameters:
483bc011b1eSHong Zhang +  A - the left matrix
484bc011b1eSHong Zhang .  B - the right matrix
485bc011b1eSHong Zhang .  scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
486bc011b1eSHong Zhang -  fill - expected fill as ratio of nnz(C)/(nnz(A) + nnz(B))
487bc011b1eSHong Zhang 
488bc011b1eSHong Zhang    Output Parameters:
489bc011b1eSHong Zhang .  C - the product matrix
490bc011b1eSHong Zhang 
491bc011b1eSHong Zhang    Notes:
492bc011b1eSHong Zhang    C will be created and must be destroyed by the user with MatDestroy().
493bc011b1eSHong Zhang 
494bc011b1eSHong Zhang    This routine is currently only implemented for pairs of SeqAIJ matrices and classes
495bc011b1eSHong Zhang    which inherit from SeqAIJ.  C will be of type MATSEQAIJ.
496bc011b1eSHong Zhang 
497bc011b1eSHong Zhang    Level: intermediate
498bc011b1eSHong Zhang 
499bc011b1eSHong Zhang .seealso: MatMatMultTransposeSymbolic(),MatMatMultTransposeNumeric()
500bc011b1eSHong Zhang @*/
501bc011b1eSHong Zhang PetscErrorCode MatMatMultTranspose(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C)
502bc011b1eSHong Zhang {
503bc011b1eSHong Zhang   PetscErrorCode ierr;
504bc011b1eSHong Zhang   PetscErrorCode (*fA)(Mat,Mat,MatReuse,PetscReal,Mat*);
505bc011b1eSHong Zhang   PetscErrorCode (*fB)(Mat,Mat,MatReuse,PetscReal,Mat*);
506bc011b1eSHong Zhang 
507bc011b1eSHong Zhang   PetscFunctionBegin;
508bc011b1eSHong Zhang   PetscValidHeaderSpecific(A,MAT_COOKIE,1);
509bc011b1eSHong Zhang   PetscValidType(A,1);
510bc011b1eSHong Zhang   MatPreallocated(A);
511bc011b1eSHong Zhang   if (!A->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix");
512bc011b1eSHong Zhang   if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
513bc011b1eSHong Zhang   PetscValidHeaderSpecific(B,MAT_COOKIE,2);
514bc011b1eSHong Zhang   PetscValidType(B,2);
515bc011b1eSHong Zhang   MatPreallocated(B);
516bc011b1eSHong Zhang   if (!B->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix");
517bc011b1eSHong Zhang   if (B->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
518bc011b1eSHong Zhang   PetscValidPointer(C,3);
519bc011b1eSHong Zhang   if (B->M!=A->M) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",B->M,A->M);
520bc011b1eSHong Zhang 
521bc011b1eSHong Zhang   if (fill <=0.0) SETERRQ1(PETSC_ERR_ARG_SIZ,"fill=%g must be > 0.0",fill);
522bc011b1eSHong Zhang 
523bc011b1eSHong Zhang   fA = A->ops->matmulttranspose;
524bc011b1eSHong Zhang   if (!fA) SETERRQ1(PETSC_ERR_SUP,"MatMatMultTranspose not supported for A of type %s",A->type_name);
525bc011b1eSHong Zhang   fB = B->ops->matmulttranspose;
526bc011b1eSHong Zhang   if (!fB) SETERRQ1(PETSC_ERR_SUP,"MatMatMultTranspose not supported for B of type %s",B->type_name);
527bc011b1eSHong Zhang   if (fB!=fA) SETERRQ2(PETSC_ERR_ARG_INCOMP,"MatMatMultTranspose requires A, %s, to be compatible with B, %s",A->type_name,B->type_name);
528bc011b1eSHong Zhang 
529bc011b1eSHong Zhang   ierr = PetscLogEventBegin(MAT_MatMultTranspose,A,B,0,0);CHKERRQ(ierr);
530bc011b1eSHong Zhang   ierr = (*A->ops->matmulttranspose)(A,B,scall,fill,C);CHKERRQ(ierr);
531bc011b1eSHong Zhang   ierr = PetscLogEventEnd(MAT_MatMultTranspose,A,B,0,0);CHKERRQ(ierr);
532bc011b1eSHong Zhang 
533bc011b1eSHong Zhang   PetscFunctionReturn(0);
534bc011b1eSHong Zhang }
535bc011b1eSHong Zhang 
536bc011b1eSHong Zhang #undef __FUNCT__
537bc011b1eSHong Zhang #define __FUNCT__ "MatMatMultTranspose_SeqAIJ_SeqAIJ"
538bc011b1eSHong Zhang PetscErrorCode MatMatMultTranspose_SeqAIJ_SeqAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C) {
539bc011b1eSHong Zhang   PetscErrorCode ierr;
540bc011b1eSHong Zhang 
541bc011b1eSHong Zhang   PetscFunctionBegin;
542bc011b1eSHong Zhang   if (scall == MAT_INITIAL_MATRIX){
543bc011b1eSHong Zhang     ierr = MatMatMultTransposeSymbolic_SeqAIJ_SeqAIJ(A,B,fill,C);CHKERRQ(ierr);
544bc011b1eSHong Zhang   }
545bc011b1eSHong Zhang   ierr = MatMatMultTransposeNumeric_SeqAIJ_SeqAIJ(A,B,*C);CHKERRQ(ierr);
546bc011b1eSHong Zhang   PetscFunctionReturn(0);
547bc011b1eSHong Zhang }
548bc011b1eSHong Zhang 
549bc011b1eSHong Zhang #undef __FUNCT__
550bc011b1eSHong Zhang #define __FUNCT__ "MatMatMultTransposeSymbolic_SeqAIJ_SeqAIJ"
551bc011b1eSHong Zhang PetscErrorCode MatMatMultTransposeSymbolic_SeqAIJ_SeqAIJ(Mat A,Mat B,PetscReal fill,Mat *C)
552bc011b1eSHong Zhang {
553bc011b1eSHong Zhang   PetscErrorCode ierr;
554bc011b1eSHong Zhang   Mat            At;
555bc011b1eSHong Zhang   int            *ati,*atj;
556bc011b1eSHong Zhang 
557bc011b1eSHong Zhang   PetscFunctionBegin;
558bc011b1eSHong Zhang   /* create symbolic At */
559bc011b1eSHong Zhang   ierr = MatGetSymbolicTranspose_SeqAIJ(A,&ati,&atj);CHKERRQ(ierr);
560bc011b1eSHong Zhang   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,A->n,A->m,ati,atj,PETSC_NULL,&At);CHKERRQ(ierr);
561bc011b1eSHong Zhang 
562bc011b1eSHong Zhang   /* get symbolic C=At*B */
563bc011b1eSHong Zhang   ierr = MatMatMultSymbolic_SeqAIJ_SeqAIJ(At,B,fill,C);CHKERRQ(ierr);
564bc011b1eSHong Zhang 
565bc011b1eSHong Zhang   /* clean up */
566bc011b1eSHong Zhang   ierr = MatDestroy(At);CHKERRQ(ierr);
567bc011b1eSHong Zhang   ierr = MatRestoreSymbolicTranspose_SeqAIJ(A,&ati,&atj);CHKERRQ(ierr);
568bc011b1eSHong Zhang 
569bc011b1eSHong Zhang   PetscFunctionReturn(0);
570bc011b1eSHong Zhang }
571bc011b1eSHong Zhang 
572bc011b1eSHong Zhang #undef __FUNCT__
573bc011b1eSHong Zhang #define __FUNCT__ "MatMatMultTransposeNumeric_SeqAIJ_SeqAIJ"
574bc011b1eSHong Zhang PetscErrorCode MatMatMultTransposeNumeric_SeqAIJ_SeqAIJ(Mat A,Mat B,Mat C)
575bc011b1eSHong Zhang {
576bc011b1eSHong Zhang   PetscErrorCode ierr;
5770fbc74f4SHong Zhang   Mat_SeqAIJ     *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data,*c=(Mat_SeqAIJ*)C->data;
578170ef064SHong Zhang   int            am=A->m,anzi,*ai=a->i,*aj=a->j,*bi=b->i,*bj,bnzi,nextb;
5790fbc74f4SHong Zhang   int            cm=C->m,*ci=c->i,*cj=c->j,crow,*cjj,i,j,k,flops=0;
5800fbc74f4SHong Zhang   MatScalar      *aa=a->a,*ba,*ca=c->a,*caj;
581bc011b1eSHong Zhang 
582bc011b1eSHong Zhang   PetscFunctionBegin;
583bc011b1eSHong Zhang   /* clear old values in C */
584bc011b1eSHong Zhang   ierr = PetscMemzero(ca,ci[cm]*sizeof(MatScalar));CHKERRQ(ierr);
585bc011b1eSHong Zhang 
586bc011b1eSHong Zhang   /* compute A^T*B using outer product (A^T)[:,i]*B[i,:] */
587bc011b1eSHong Zhang   for (i=0;i<am;i++) {
588bc011b1eSHong Zhang     bj   = b->j + bi[i];
589bc011b1eSHong Zhang     ba   = b->a + bi[i];
590bc011b1eSHong Zhang     bnzi = bi[i+1] - bi[i];
591bc011b1eSHong Zhang     anzi = ai[i+1] - ai[i];
592bc011b1eSHong Zhang     for (j=0; j<anzi; j++) {
593bc011b1eSHong Zhang       nextb = 0;
5940fbc74f4SHong Zhang       crow  = *aj++;
595bc011b1eSHong Zhang       cjj   = cj + ci[crow];
596bc011b1eSHong Zhang       caj   = ca + ci[crow];
597bc011b1eSHong Zhang       /* perform sparse axpy operation.  Note cjj includes bj. */
598bc011b1eSHong Zhang       for (k=0; nextb<bnzi; k++) {
5990fbc74f4SHong Zhang         if (cjj[k] == *(bj+nextb)) { /* ccol == bcol */
6000fbc74f4SHong Zhang           caj[k] += (*aa)*(*(ba+nextb));
601bc011b1eSHong Zhang           nextb++;
602bc011b1eSHong Zhang         }
603bc011b1eSHong Zhang       }
604bc011b1eSHong Zhang       flops += 2*bnzi;
6050fbc74f4SHong Zhang       aa++;
606bc011b1eSHong Zhang     }
607bc011b1eSHong Zhang   }
608bc011b1eSHong Zhang 
609bc011b1eSHong Zhang   /* Assemble the final matrix and clean up */
610bc011b1eSHong Zhang   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
611bc011b1eSHong Zhang   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
612bc011b1eSHong Zhang   ierr = PetscLogFlops(flops);CHKERRQ(ierr);
613bc011b1eSHong Zhang   PetscFunctionReturn(0);
614bc011b1eSHong Zhang }
615