xref: /petsc/src/mat/impls/aij/seq/matmatmult.c (revision 38baddfdda9fcd85a080f141519fb97fb01bc0a4)
1d50806bdSBarry Smith /*
2a50f8bf6SHong Zhang   Defines matrix-matrix product routines for pairs of AIJ matrices
3d50806bdSBarry Smith           C = A * B
4d50806bdSBarry Smith */
5d50806bdSBarry Smith 
6c1f4806aSKris Buschelman #include "src/mat/impls/aij/seq/aij.h" /*I "petscmat.h" I*/
770f19b1fSKris Buschelman #include "src/mat/utils/freespace.h"
82d09714cSHong Zhang #include "src/mat/impls/aij/mpi/mpiaij.h"
9be0fcf8dSHong Zhang #include "petscbt.h"
10d50806bdSBarry Smith 
11c1f4806aSKris Buschelman #undef __FUNCT__
12c1f4806aSKris Buschelman #define __FUNCT__ "MatMatMult"
135c66b693SKris Buschelman /*@
145c66b693SKris Buschelman    MatMatMult - Performs Matrix-Matrix Multiplication C=A*B.
1594e3eecaSKris Buschelman 
165c66b693SKris Buschelman    Collective on Mat
17d50806bdSBarry Smith 
185c66b693SKris Buschelman    Input Parameters:
195c66b693SKris Buschelman +  A - the left matrix
201c741599SHong Zhang .  B - the right matrix
211c741599SHong Zhang .  scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
22c5db241fSHong Zhang -  fill - expected fill as ratio of nnz(C)/(nnz(A) + nnz(B))
235c66b693SKris Buschelman 
245c66b693SKris Buschelman    Output Parameters:
255c66b693SKris Buschelman .  C - the product matrix
265c66b693SKris Buschelman 
275c66b693SKris Buschelman    Notes:
285c66b693SKris Buschelman    C will be created and must be destroyed by the user with MatDestroy().
295c66b693SKris Buschelman 
30bc011b1eSHong Zhang    This routine is currently only implemented for pairs of AIJ matrices and classes
31bc011b1eSHong Zhang    which inherit from AIJ.  C will be of type MATAIJ.
325c66b693SKris Buschelman 
335c66b693SKris Buschelman    Level: intermediate
345c66b693SKris Buschelman 
355c66b693SKris Buschelman .seealso: MatMatMultSymbolic(),MatMatMultNumeric()
365c66b693SKris Buschelman @*/
37dfbe8321SBarry Smith PetscErrorCode MatMatMult(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C)
382d09714cSHong Zhang {
39dfbe8321SBarry Smith   PetscErrorCode ierr;
40cb00f407SKris Buschelman   PetscErrorCode (*fA)(Mat,Mat,MatReuse,PetscReal,Mat*);
41cb00f407SKris Buschelman   PetscErrorCode (*fB)(Mat,Mat,MatReuse,PetscReal,Mat*);
422d09714cSHong Zhang 
43d50806bdSBarry Smith   PetscFunctionBegin;
444482741eSBarry Smith   PetscValidHeaderSpecific(A,MAT_COOKIE,1);
45c9780b6fSBarry Smith   PetscValidType(A,1);
465c66b693SKris Buschelman   MatPreallocated(A);
475c66b693SKris Buschelman   if (!A->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix");
485c66b693SKris Buschelman   if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
494482741eSBarry Smith   PetscValidHeaderSpecific(B,MAT_COOKIE,2);
50c9780b6fSBarry Smith   PetscValidType(B,2);
515c66b693SKris Buschelman   MatPreallocated(B);
525c66b693SKris Buschelman   if (!B->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix");
535c66b693SKris Buschelman   if (B->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
544482741eSBarry Smith   PetscValidPointer(C,3);
555c66b693SKris Buschelman   if (B->M!=A->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",B->M,A->N);
56d50806bdSBarry Smith 
571c24bd37SHong Zhang   if (fill <=0.0) SETERRQ1(PETSC_ERR_ARG_SIZ,"fill=%g must be > 0.0",fill);
58c5db241fSHong Zhang 
59cb00f407SKris Buschelman   /* For now, we do not dispatch based on the type of A and B */
60cb00f407SKris Buschelman   /* When implementations like _SeqAIJ_MAIJ exist, attack the multiple dispatch problem. */
61cb00f407SKris Buschelman   fA = A->ops->matmult;
62cb00f407SKris Buschelman   if (!fA) SETERRQ1(PETSC_ERR_SUP,"MatMatMult not supported for A of type %s",A->type_name);
63cb00f407SKris Buschelman   fB = B->ops->matmult;
64cb00f407SKris Buschelman   if (!fB) SETERRQ1(PETSC_ERR_SUP,"MatMatMult not supported for B of type %s",B->type_name);
65cb00f407SKris Buschelman   if (fB!=fA) SETERRQ2(PETSC_ERR_ARG_INCOMP,"MatMatMult requires A, %s, to be compatible with B, %s",A->type_name,B->type_name);
66cb00f407SKris Buschelman 
676284ec50SHong Zhang   ierr = PetscLogEventBegin(MAT_MatMult,A,B,0,0);CHKERRQ(ierr);
681c741599SHong Zhang   ierr = (*A->ops->matmult)(A,B,scall,fill,C);CHKERRQ(ierr);
696284ec50SHong Zhang   ierr = PetscLogEventEnd(MAT_MatMult,A,B,0,0);CHKERRQ(ierr);
704d3841fdSKris Buschelman 
716284ec50SHong Zhang   PetscFunctionReturn(0);
726284ec50SHong Zhang }
736284ec50SHong Zhang 
746284ec50SHong Zhang #undef __FUNCT__
756284ec50SHong Zhang #define __FUNCT__ "MatMatMult_MPIAIJ_MPIAIJ"
76dfbe8321SBarry Smith PetscErrorCode MatMatMult_MPIAIJ_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill, Mat *C)
772d09714cSHong Zhang {
78dfbe8321SBarry Smith   PetscErrorCode ierr;
796284ec50SHong Zhang 
806284ec50SHong Zhang   PetscFunctionBegin;
8126be0446SHong Zhang   if (scall == MAT_INITIAL_MATRIX){
82d6bb3c2dSHong Zhang     ierr = MatMatMultSymbolic_MPIAIJ_MPIAIJ(A,B,fill,C);CHKERRQ(ierr);/* numeric product is computed as well */
83d6bb3c2dSHong Zhang   } else if (scall == MAT_REUSE_MATRIX){
8426be0446SHong Zhang     ierr = MatMatMultNumeric_MPIAIJ_MPIAIJ(A,B,*C);CHKERRQ(ierr);
85d6bb3c2dSHong Zhang   } else {
86d6bb3c2dSHong Zhang     SETERRQ1(PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",scall);
87d6bb3c2dSHong Zhang   }
88d50806bdSBarry Smith   PetscFunctionReturn(0);
89d50806bdSBarry Smith }
905c66b693SKris Buschelman 
915c66b693SKris Buschelman #undef __FUNCT__
925c66b693SKris Buschelman #define __FUNCT__ "MatMatMult_SeqAIJ_SeqAIJ"
93*38baddfdSBarry Smith PetscErrorCode MatMatMult_SeqAIJ_SeqAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C)
94*38baddfdSBarry Smith {
95dfbe8321SBarry Smith   PetscErrorCode ierr;
965c66b693SKris Buschelman 
975c66b693SKris Buschelman   PetscFunctionBegin;
9826be0446SHong Zhang   if (scall == MAT_INITIAL_MATRIX){
9926be0446SHong Zhang     ierr = MatMatMultSymbolic_SeqAIJ_SeqAIJ(A,B,fill,C);CHKERRQ(ierr);
10026be0446SHong Zhang   }
10126be0446SHong Zhang   ierr = MatMatMultNumeric_SeqAIJ_SeqAIJ(A,B,*C);CHKERRQ(ierr);
1025c66b693SKris Buschelman   PetscFunctionReturn(0);
1035c66b693SKris Buschelman }
1045c66b693SKris Buschelman 
105c1f4806aSKris Buschelman #undef __FUNCT__
106c1f4806aSKris Buschelman #define __FUNCT__ "MatMatMultSymbolic"
1075c66b693SKris Buschelman /*@
1085c66b693SKris Buschelman    MatMatMultSymbolic - Performs construction, preallocation, and computes the ij structure
1095c66b693SKris Buschelman    of the matrix-matrix product C=A*B.  Call this routine before calling MatMatMultNumeric().
1105c66b693SKris Buschelman 
1115c66b693SKris Buschelman    Collective on Mat
1125c66b693SKris Buschelman 
1135c66b693SKris Buschelman    Input Parameters:
1145c66b693SKris Buschelman +  A - the left matrix
115c5db241fSHong Zhang .  B - the right matrix
116c5db241fSHong Zhang -  fill - expected fill as ratio of nnz(C)/(nnz(A) + nnz(B))
1175c66b693SKris Buschelman 
1185c66b693SKris Buschelman    Output Parameters:
1195c66b693SKris Buschelman .  C - the matrix containing the ij structure of product matrix
1205c66b693SKris Buschelman 
1215c66b693SKris Buschelman    Notes:
1224d3841fdSKris Buschelman    C will be created as a MATSEQAIJ matrix and must be destroyed by the user with MatDestroy().
1235c66b693SKris Buschelman 
1244d3841fdSKris Buschelman    This routine is currently only implemented for SeqAIJ matrices and classes which inherit from SeqAIJ.
1255c66b693SKris Buschelman 
1265c66b693SKris Buschelman    Level: intermediate
1275c66b693SKris Buschelman 
1285c66b693SKris Buschelman .seealso: MatMatMult(),MatMatMultNumeric()
1295c66b693SKris Buschelman @*/
130*38baddfdSBarry Smith PetscErrorCode MatMatMultSymbolic(Mat A,Mat B,PetscReal fill,Mat *C)
131*38baddfdSBarry Smith {
132dfbe8321SBarry Smith   PetscErrorCode ierr;
133cb00f407SKris Buschelman   PetscErrorCode (*Asymbolic)(Mat,Mat,PetscReal,Mat *);
134cb00f407SKris Buschelman   PetscErrorCode (*Bsymbolic)(Mat,Mat,PetscReal,Mat *);
1355c66b693SKris Buschelman 
1365c66b693SKris Buschelman   PetscFunctionBegin;
1374482741eSBarry Smith   PetscValidHeaderSpecific(A,MAT_COOKIE,1);
138c9780b6fSBarry Smith   PetscValidType(A,1);
1395c66b693SKris Buschelman   MatPreallocated(A);
1405c66b693SKris Buschelman   if (!A->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix");
1415c66b693SKris Buschelman   if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
1425c66b693SKris Buschelman 
1434482741eSBarry Smith   PetscValidHeaderSpecific(B,MAT_COOKIE,2);
144c9780b6fSBarry Smith   PetscValidType(B,2);
1455c66b693SKris Buschelman   MatPreallocated(B);
1465c66b693SKris Buschelman   if (!B->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix");
1475c66b693SKris Buschelman   if (B->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
1484482741eSBarry Smith   PetscValidPointer(C,3);
1494482741eSBarry Smith 
1505c66b693SKris Buschelman   if (B->M!=A->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",B->M,A->N);
1511c24bd37SHong Zhang   if (fill <=0.0) SETERRQ1(PETSC_ERR_ARG_SIZ,"fill=%g must be > 0.0",fill);
1525c66b693SKris Buschelman 
153cb00f407SKris Buschelman   /* For now, we do not dispatch based on the type of A and P */
154cb00f407SKris Buschelman   /* When implementations like _SeqAIJ_MAIJ exist, attack the multiple dispatch problem. */
155cb00f407SKris Buschelman   Asymbolic = A->ops->matmultsymbolic;
156cb00f407SKris Buschelman   if (!Asymbolic) SETERRQ1(PETSC_ERR_SUP,"C=A*B not implemented for A of type %s",A->type_name);
157cb00f407SKris Buschelman   Bsymbolic = B->ops->matmultsymbolic;
158cb00f407SKris Buschelman   if (!Bsymbolic) SETERRQ1(PETSC_ERR_SUP,"C=A*B not implemented for B of type %s",B->type_name);
159cb00f407SKris Buschelman   if (Bsymbolic!=Asymbolic) SETERRQ2(PETSC_ERR_ARG_INCOMP,"MatMatMultSymbolic requires A, %s, to be compatible with B, %s",A->type_name,B->type_name);
160cb00f407SKris Buschelman 
161cb00f407SKris Buschelman   ierr = PetscLogEventBegin(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr);
162cb00f407SKris Buschelman   ierr = (*Asymbolic)(A,B,fill,C);CHKERRQ(ierr);
163cb00f407SKris Buschelman   ierr = PetscLogEventEnd(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr);
1645c66b693SKris Buschelman 
1655c66b693SKris Buschelman   PetscFunctionReturn(0);
1665c66b693SKris Buschelman }
1671c24bd37SHong Zhang 
168dfbe8321SBarry Smith EXTERN PetscErrorCode MatDestroy_MPIAIJ(Mat);
16926be0446SHong Zhang #undef __FUNCT__
17026be0446SHong Zhang #define __FUNCT__ "MatDestroy_MPIAIJ_MatMatMult"
171dfbe8321SBarry Smith PetscErrorCode MatDestroy_MPIAIJ_MatMatMult(Mat A)
17226be0446SHong Zhang {
173dfbe8321SBarry Smith   PetscErrorCode       ierr;
1747f93b363SHong Zhang   Mat_MatMatMultMPI    *mult;
1757f93b363SHong Zhang   PetscObjectContainer container;
17626be0446SHong Zhang 
17726be0446SHong Zhang   PetscFunctionBegin;
1787f93b363SHong Zhang   ierr = PetscObjectQuery((PetscObject)A,"MatMatMultMPI",(PetscObject *)&container);CHKERRQ(ierr);
1797f93b363SHong Zhang   if (container) {
1807f79fd58SMatthew Knepley     ierr  = PetscObjectContainerGetPointer(container,(void **)&mult);CHKERRQ(ierr);
181d6bb3c2dSHong Zhang     ierr = ISDestroy(mult->isrowb);CHKERRQ(ierr);
182d6bb3c2dSHong Zhang     ierr = ISDestroy(mult->iscolb);CHKERRQ(ierr);
183d6bb3c2dSHong Zhang     ierr = ISDestroy(mult->isrowa);CHKERRQ(ierr);
18497c2bf28SHong Zhang     ierr = MatDestroy(mult->A_loc);CHKERRQ(ierr);
18597c2bf28SHong Zhang     ierr = MatDestroy(mult->B_seq);CHKERRQ(ierr);
186d6bb3c2dSHong Zhang     ierr = MatDestroy(mult->C_seq);CHKERRQ(ierr);
187d6bb3c2dSHong Zhang 
1887f93b363SHong Zhang     ierr = PetscObjectContainerDestroy(container);CHKERRQ(ierr);
1897f93b363SHong Zhang     ierr = PetscObjectCompose((PetscObject)A,"MatMatMultMPI",0);CHKERRQ(ierr);
1907f93b363SHong Zhang   }
1917f93b363SHong Zhang   ierr = PetscFree(mult);CHKERRQ(ierr);
19226be0446SHong Zhang   ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr);
19326be0446SHong Zhang 
19426be0446SHong Zhang   PetscFunctionReturn(0);
19526be0446SHong Zhang }
19658c24d83SHong Zhang 
19758c24d83SHong Zhang #undef __FUNCT__
19826be0446SHong Zhang #define __FUNCT__ "MatMatMultSymbolic_MPIAIJ_MPIAIJ"
199dfbe8321SBarry Smith PetscErrorCode MatMatMultSymbolic_MPIAIJ_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat *C)
20026be0446SHong Zhang {
201ff134f7aSHong Zhang   Mat_MPIAIJ           *a=(Mat_MPIAIJ*)A->data,*b=(Mat_MPIAIJ*)B->data;
202dfbe8321SBarry Smith   PetscErrorCode       ierr;
203*38baddfdSBarry Smith   PetscInt             start,end;
20426be0446SHong Zhang   Mat_MatMatMultMPI    *mult;
2057f93b363SHong Zhang   PetscObjectContainer container;
20626be0446SHong Zhang 
20726be0446SHong Zhang   PetscFunctionBegin;
208ff134f7aSHong Zhang   if (a->cstart != b->rstart || a->cend != b->rend){
209ff134f7aSHong Zhang     SETERRQ4(PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",a->cstart,a->cend,b->rstart,b->rend);
210ff134f7aSHong Zhang   }
211d6bb3c2dSHong Zhang   ierr = PetscNew(Mat_MatMatMultMPI,&mult);CHKERRQ(ierr);
212d6bb3c2dSHong Zhang 
21326be0446SHong Zhang   /* create a seq matrix B_seq = submatrix of B by taking rows of B that equal to nonzero col of A */
21497c2bf28SHong Zhang   ierr = MatGetBrowsOfAcols(A,B,MAT_INITIAL_MATRIX,&mult->isrowb,&mult->iscolb,&mult->brstart,&mult->B_seq);CHKERRQ(ierr);
21526be0446SHong Zhang 
21626be0446SHong Zhang   /*  create a seq matrix A_seq = submatrix of A by taking all local rows of A */
21726be0446SHong Zhang   start = a->rstart; end = a->rend;
218d6bb3c2dSHong Zhang   ierr = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&mult->isrowa);CHKERRQ(ierr);
21997c2bf28SHong Zhang   ierr = MatGetLocalMat(A,MAT_INITIAL_MATRIX,&mult->isrowa,&mult->isrowb,&mult->A_loc);CHKERRQ(ierr);
22026be0446SHong Zhang 
22126be0446SHong Zhang   /* compute C_seq = A_seq * B_seq */
22297c2bf28SHong Zhang   ierr = MatMatMult_SeqAIJ_SeqAIJ(mult->A_loc,mult->B_seq,MAT_INITIAL_MATRIX,fill,&mult->C_seq);CHKERRQ(ierr);
22326be0446SHong Zhang 
22426be0446SHong Zhang   /* create mpi matrix C by concatinating C_seq */
225d6bb3c2dSHong Zhang   ierr = PetscObjectReference((PetscObject)mult->C_seq);CHKERRQ(ierr); /* prevent C_seq being destroyed by MatMerge() */
2260e36024fSHong Zhang   ierr = MatMerge(A->comm,mult->C_seq,B->n,MAT_INITIAL_MATRIX,C);CHKERRQ(ierr);
22726be0446SHong Zhang 
22826be0446SHong Zhang   /* attach the supporting struct to C for reuse of symbolic C */
2297f93b363SHong Zhang   ierr = PetscObjectContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr);
2307f93b363SHong Zhang   ierr = PetscObjectContainerSetPointer(container,mult);CHKERRQ(ierr);
2317f93b363SHong Zhang   ierr = PetscObjectCompose((PetscObject)(*C),"MatMatMultMPI",(PetscObject)container);CHKERRQ(ierr);
2327f93b363SHong Zhang 
23326be0446SHong Zhang   (*C)->ops->destroy  = MatDestroy_MPIAIJ_MatMatMult;
23426be0446SHong Zhang 
23526be0446SHong Zhang   PetscFunctionReturn(0);
23626be0446SHong Zhang }
23726be0446SHong Zhang 
23826be0446SHong Zhang #undef __FUNCT__
23926be0446SHong Zhang #define __FUNCT__ "MatMatMultSymbolic_SeqAIJ_SeqAIJ"
240dfbe8321SBarry Smith PetscErrorCode MatMatMultSymbolic_SeqAIJ_SeqAIJ(Mat A,Mat B,PetscReal fill,Mat *C)
24158c24d83SHong Zhang {
242dfbe8321SBarry Smith   PetscErrorCode ierr;
24358c24d83SHong Zhang   FreeSpaceList  free_space=PETSC_NULL,current_space=PETSC_NULL;
24458c24d83SHong Zhang   Mat_SeqAIJ     *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data,*c;
245*38baddfdSBarry Smith   PetscInt       *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*bjj,*ci,*cj;
246*38baddfdSBarry Smith   PetscInt       am=A->M,bn=B->N,bm=B->M;
247*38baddfdSBarry Smith   PetscInt       i,j,anzi,brow,bnzj,cnzi,nlnk,*lnk,nspacedouble=0;
24858c24d83SHong Zhang   MatScalar      *ca;
249be0fcf8dSHong Zhang   PetscBT        lnkbt;
25058c24d83SHong Zhang 
25158c24d83SHong Zhang   PetscFunctionBegin;
25258c24d83SHong Zhang   /* Set up */
25358c24d83SHong Zhang   /* Allocate ci array, arrays for fill computation and */
25458c24d83SHong Zhang   /* free space for accumulating nonzero column info */
255*38baddfdSBarry Smith   ierr = PetscMalloc(((am+1)+1)*sizeof(PetscInt),&ci);CHKERRQ(ierr);
25658c24d83SHong Zhang   ci[0] = 0;
25758c24d83SHong Zhang 
258be0fcf8dSHong Zhang   /* create and initialize a linked list */
259be0fcf8dSHong Zhang   nlnk = bn+1;
260be0fcf8dSHong Zhang   ierr = PetscLLCreate(bn,bn,nlnk,lnk,lnkbt);CHKERRQ(ierr);
26158c24d83SHong Zhang 
262c5db241fSHong Zhang   /* Initial FreeSpace size is fill*(nnz(A)+nnz(B)) */
263*38baddfdSBarry Smith   ierr = GetMoreSpace((PetscInt)(fill*(ai[am]+bi[bm])),&free_space);CHKERRQ(ierr);
26458c24d83SHong Zhang   current_space = free_space;
26558c24d83SHong Zhang 
26658c24d83SHong Zhang   /* Determine symbolic info for each row of the product: */
26758c24d83SHong Zhang   for (i=0;i<am;i++) {
26858c24d83SHong Zhang     anzi = ai[i+1] - ai[i];
26958c24d83SHong Zhang     cnzi = 0;
2702d09714cSHong Zhang     j    = anzi;
2712d09714cSHong Zhang     aj   = a->j + ai[i];
2722d09714cSHong Zhang     while (j){/* assume cols are almost in increasing order, starting from its end saves computation */
2732d09714cSHong Zhang       j--;
2742d09714cSHong Zhang       brow = *(aj + j);
27558c24d83SHong Zhang       bnzj = bi[brow+1] - bi[brow];
27658c24d83SHong Zhang       bjj  = bj + bi[brow];
2771c239cc6SHong Zhang       /* add non-zero cols of B into the sorted linked list lnk */
278be0fcf8dSHong Zhang       ierr = PetscLLAdd(bnzj,bjj,bn,nlnk,lnk,lnkbt);CHKERRQ(ierr);
2791c239cc6SHong Zhang       cnzi += nlnk;
28058c24d83SHong Zhang     }
28158c24d83SHong Zhang 
28258c24d83SHong Zhang     /* If free space is not available, make more free space */
28358c24d83SHong Zhang     /* Double the amount of total space in the list */
28458c24d83SHong Zhang     if (current_space->local_remaining<cnzi) {
28558c24d83SHong Zhang       ierr = GetMoreSpace(current_space->total_array_size,&current_space);CHKERRQ(ierr);
286c5db241fSHong Zhang       nspacedouble++;
28758c24d83SHong Zhang     }
28858c24d83SHong Zhang 
289c5db241fSHong Zhang     /* Copy data into free space, then initialize lnk */
290be0fcf8dSHong Zhang     ierr = PetscLLClean(bn,bn,cnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
291c5db241fSHong Zhang     current_space->array           += cnzi;
29258c24d83SHong Zhang     current_space->local_used      += cnzi;
29358c24d83SHong Zhang     current_space->local_remaining -= cnzi;
29458c24d83SHong Zhang 
29558c24d83SHong Zhang     ci[i+1] = ci[i] + cnzi;
29658c24d83SHong Zhang   }
29758c24d83SHong Zhang 
29858c24d83SHong Zhang   /* Column indices are in the list of free space */
29958c24d83SHong Zhang   /* Allocate space for cj, initialize cj, and */
30058c24d83SHong Zhang   /* destroy list of free space and other temporary array(s) */
301*38baddfdSBarry Smith   ierr = PetscMalloc((ci[am]+1)*sizeof(PetscInt),&cj);CHKERRQ(ierr);
30258c24d83SHong Zhang   ierr = MakeSpaceContiguous(&free_space,cj);CHKERRQ(ierr);
303be0fcf8dSHong Zhang   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
30458c24d83SHong Zhang 
30558c24d83SHong Zhang   /* Allocate space for ca */
30658c24d83SHong Zhang   ierr = PetscMalloc((ci[am]+1)*sizeof(MatScalar),&ca);CHKERRQ(ierr);
30758c24d83SHong Zhang   ierr = PetscMemzero(ca,(ci[am]+1)*sizeof(MatScalar));CHKERRQ(ierr);
30858c24d83SHong Zhang 
30926be0446SHong Zhang   /* put together the new symbolic matrix */
31058c24d83SHong Zhang   ierr = MatCreateSeqAIJWithArrays(A->comm,am,bn,ci,cj,ca,C);CHKERRQ(ierr);
31158c24d83SHong Zhang 
31258c24d83SHong Zhang   /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
31358c24d83SHong Zhang   /* These are PETSc arrays, so change flags so arrays can be deleted by PETSc */
31458c24d83SHong Zhang   c = (Mat_SeqAIJ *)((*C)->data);
31558c24d83SHong Zhang   c->freedata = PETSC_TRUE;
31658c24d83SHong Zhang   c->nonew    = 0;
31758c24d83SHong Zhang 
318be0fcf8dSHong Zhang   if (nspacedouble){
319be0fcf8dSHong Zhang     PetscLogInfo((PetscObject)(*C),"MatMatMultSymbolic_SeqAIJ_SeqAIJ: nspacedouble:%d, nnz(A):%d, nnz(B):%d, fill:%g, nnz(C):%d\n",nspacedouble,ai[am],bi[bm],fill,ci[am]);
320be0fcf8dSHong Zhang   }
32158c24d83SHong Zhang   PetscFunctionReturn(0);
32258c24d83SHong Zhang }
323d50806bdSBarry Smith 
324c1f4806aSKris Buschelman #undef __FUNCT__
325c1f4806aSKris Buschelman #define __FUNCT__ "MatMatMultNumeric"
3265c66b693SKris Buschelman /*@
3275c66b693SKris Buschelman    MatMatMultNumeric - Performs the numeric matrix-matrix product.
3285c66b693SKris Buschelman    Call this routine after first calling MatMatMultSymbolic().
3295c66b693SKris Buschelman 
3305c66b693SKris Buschelman    Collective on Mat
3315c66b693SKris Buschelman 
3325c66b693SKris Buschelman    Input Parameters:
3335c66b693SKris Buschelman +  A - the left matrix
3345c66b693SKris Buschelman -  B - the right matrix
3355c66b693SKris Buschelman 
3365c66b693SKris Buschelman    Output Parameters:
3375c66b693SKris Buschelman .  C - the product matrix, whose ij structure was defined from MatMatMultSymbolic().
3385c66b693SKris Buschelman 
3395c66b693SKris Buschelman    Notes:
3405c66b693SKris Buschelman    C must have been created with MatMatMultSymbolic.
3415c66b693SKris Buschelman 
3425c66b693SKris Buschelman    This routine is currently only implemented for SeqAIJ type matrices.
3435c66b693SKris Buschelman 
3445c66b693SKris Buschelman    Level: intermediate
3455c66b693SKris Buschelman 
3465c66b693SKris Buschelman .seealso: MatMatMult(),MatMatMultSymbolic()
3475c66b693SKris Buschelman @*/
348*38baddfdSBarry Smith PetscErrorCode MatMatMultNumeric(Mat A,Mat B,Mat C)
349*38baddfdSBarry Smith {
350dfbe8321SBarry Smith   PetscErrorCode ierr;
351cb00f407SKris Buschelman   PetscErrorCode (*Anumeric)(Mat,Mat,Mat);
352cb00f407SKris Buschelman   PetscErrorCode (*Bnumeric)(Mat,Mat,Mat);
3535c66b693SKris Buschelman 
3545c66b693SKris Buschelman   PetscFunctionBegin;
3555c66b693SKris Buschelman 
3564482741eSBarry Smith   PetscValidHeaderSpecific(A,MAT_COOKIE,1);
357c9780b6fSBarry Smith   PetscValidType(A,1);
3585c66b693SKris Buschelman   MatPreallocated(A);
3595c66b693SKris Buschelman   if (!A->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix");
3605c66b693SKris Buschelman   if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
3615c66b693SKris Buschelman 
3624482741eSBarry Smith   PetscValidHeaderSpecific(B,MAT_COOKIE,2);
363c9780b6fSBarry Smith   PetscValidType(B,2);
3645c66b693SKris Buschelman   MatPreallocated(B);
3655c66b693SKris Buschelman   if (!B->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix");
3665c66b693SKris Buschelman   if (B->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
3675c66b693SKris Buschelman 
3684482741eSBarry Smith   PetscValidHeaderSpecific(C,MAT_COOKIE,3);
369c9780b6fSBarry Smith   PetscValidType(C,3);
3705c66b693SKris Buschelman   MatPreallocated(C);
3715c66b693SKris Buschelman   if (!C->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix");
3725c66b693SKris Buschelman   if (C->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
3735c66b693SKris Buschelman 
3745c66b693SKris Buschelman   if (B->N!=C->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",B->N,C->N);
3755c66b693SKris Buschelman   if (B->M!=A->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",B->M,A->N);
3765c66b693SKris Buschelman   if (A->M!=C->M) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",A->M,C->M);
3775c66b693SKris Buschelman 
378cb00f407SKris Buschelman   /* For now, we do not dispatch based on the type of A and B */
379cb00f407SKris Buschelman   /* When implementations like _SeqAIJ_MAIJ exist, attack the multiple dispatch problem. */
380cb00f407SKris Buschelman   Anumeric = A->ops->matmultnumeric;
381cb00f407SKris Buschelman   if (!Anumeric) SETERRQ1(PETSC_ERR_SUP,"MatMatMultNumeric not supported for A of type %s",A->type_name);
382cb00f407SKris Buschelman   Bnumeric = B->ops->matmultnumeric;
383cb00f407SKris Buschelman   if (!Bnumeric) SETERRQ1(PETSC_ERR_SUP,"MatMatMultNumeric not supported for B of type %s",B->type_name);
384cb00f407SKris Buschelman   if (Bnumeric!=Anumeric) SETERRQ2(PETSC_ERR_ARG_INCOMP,"MatMatMultNumeric requires A, %s, to be compatible with B, %s",A->type_name,B->type_name);
3854d3841fdSKris Buschelman 
386cb00f407SKris Buschelman   ierr = PetscLogEventBegin(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr);
387cb00f407SKris Buschelman   ierr = (*Anumeric)(A,B,C);CHKERRQ(ierr);
388cb00f407SKris Buschelman   ierr = PetscLogEventEnd(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr);
3895c66b693SKris Buschelman 
3905c66b693SKris Buschelman   PetscFunctionReturn(0);
3915c66b693SKris Buschelman }
3925c66b693SKris Buschelman 
393d6bb3c2dSHong Zhang /* This routine is called ONLY in the case of reusing previously computed symbolic C */
394d50806bdSBarry Smith #undef __FUNCT__
39526be0446SHong Zhang #define __FUNCT__ "MatMatMultNumeric_MPIAIJ_MPIAIJ"
396dfbe8321SBarry Smith PetscErrorCode MatMatMultNumeric_MPIAIJ_MPIAIJ(Mat A,Mat B,Mat C)
39726be0446SHong Zhang {
398dfbe8321SBarry Smith   PetscErrorCode       ierr;
39997c2bf28SHong Zhang   Mat                  *seq;
4007f93b363SHong Zhang   Mat_MatMatMultMPI    *mult;
4017f93b363SHong Zhang   PetscObjectContainer container;
402d6bb3c2dSHong Zhang 
40326be0446SHong Zhang   PetscFunctionBegin;
4047f93b363SHong Zhang   ierr = PetscObjectQuery((PetscObject)C,"MatMatMultMPI",(PetscObject *)&container);CHKERRQ(ierr);
4057f93b363SHong Zhang   if (container) {
4067f79fd58SMatthew Knepley     ierr  = PetscObjectContainerGetPointer(container,(void **)&mult);CHKERRQ(ierr);
4077f93b363SHong Zhang   }
4087f93b363SHong Zhang 
40997c2bf28SHong Zhang   seq = &mult->B_seq;
41097c2bf28SHong Zhang   ierr = MatGetSubMatrices(B,1,&mult->isrowb,&mult->iscolb,MAT_REUSE_MATRIX,&seq);CHKERRQ(ierr);
41197c2bf28SHong Zhang   mult->B_seq = *seq;
41297c2bf28SHong Zhang 
41397c2bf28SHong Zhang   seq = &mult->A_loc;
41497c2bf28SHong Zhang   ierr = MatGetSubMatrices(A,1,&mult->isrowa,&mult->isrowb,MAT_REUSE_MATRIX,&seq);CHKERRQ(ierr);
41597c2bf28SHong Zhang   mult->A_loc = *seq;
41697c2bf28SHong Zhang 
41797c2bf28SHong Zhang   ierr = MatMatMult_SeqAIJ_SeqAIJ(mult->A_loc,mult->B_seq,MAT_REUSE_MATRIX,0.0,&mult->C_seq);CHKERRQ(ierr);
418d6bb3c2dSHong Zhang 
419d6bb3c2dSHong Zhang   ierr = PetscObjectReference((PetscObject)mult->C_seq);CHKERRQ(ierr);
4200e36024fSHong Zhang   ierr = MatMerge(A->comm,mult->C_seq,B->n,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr);
421d6bb3c2dSHong Zhang 
42226be0446SHong Zhang   PetscFunctionReturn(0);
42326be0446SHong Zhang }
42426be0446SHong Zhang 
42526be0446SHong Zhang #undef __FUNCT__
42626be0446SHong Zhang #define __FUNCT__ "MatMatMultNumeric_SeqAIJ_SeqAIJ"
427dfbe8321SBarry Smith PetscErrorCode MatMatMultNumeric_SeqAIJ_SeqAIJ(Mat A,Mat B,Mat C)
428d50806bdSBarry Smith {
429dfbe8321SBarry Smith   PetscErrorCode ierr;
430*38baddfdSBarry Smith   PetscInt       flops=0;
431d50806bdSBarry Smith   Mat_SeqAIJ     *a = (Mat_SeqAIJ *)A->data;
432d50806bdSBarry Smith   Mat_SeqAIJ     *b = (Mat_SeqAIJ *)B->data;
433d50806bdSBarry Smith   Mat_SeqAIJ     *c = (Mat_SeqAIJ *)C->data;
434*38baddfdSBarry Smith   PetscInt       *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*bjj,*ci=c->i,*cj=c->j;
435*38baddfdSBarry Smith   PetscInt       am=A->M,cn=C->N;
436*38baddfdSBarry Smith   PetscInt       i,j,k,anzi,bnzi,cnzi,brow;
437d50806bdSBarry Smith   MatScalar      *aa=a->a,*ba=b->a,*baj,*ca=c->a,*temp;
438d50806bdSBarry Smith 
439d50806bdSBarry Smith   PetscFunctionBegin;
440d50806bdSBarry Smith 
441d50806bdSBarry Smith   /* Allocate temp accumulation space to avoid searching for nonzero columns in C */
442d50806bdSBarry Smith   ierr = PetscMalloc((cn+1)*sizeof(MatScalar),&temp);CHKERRQ(ierr);
443d50806bdSBarry Smith   ierr = PetscMemzero(temp,cn*sizeof(MatScalar));CHKERRQ(ierr);
444d50806bdSBarry Smith   /* Traverse A row-wise. */
445d50806bdSBarry Smith   /* Build the ith row in C by summing over nonzero columns in A, */
446d50806bdSBarry Smith   /* the rows of B corresponding to nonzeros of A. */
447d50806bdSBarry Smith   for (i=0;i<am;i++) {
448d50806bdSBarry Smith     anzi = ai[i+1] - ai[i];
449d50806bdSBarry Smith     for (j=0;j<anzi;j++) {
450d50806bdSBarry Smith       brow = *aj++;
451d50806bdSBarry Smith       bnzi = bi[brow+1] - bi[brow];
452d50806bdSBarry Smith       bjj  = bj + bi[brow];
453d50806bdSBarry Smith       baj  = ba + bi[brow];
454d50806bdSBarry Smith       for (k=0;k<bnzi;k++) {
455d50806bdSBarry Smith         temp[bjj[k]] += (*aa)*baj[k];
456d50806bdSBarry Smith       }
457d50806bdSBarry Smith       flops += 2*bnzi;
458d50806bdSBarry Smith       aa++;
459d50806bdSBarry Smith     }
460d50806bdSBarry Smith     /* Store row back into C, and re-zero temp */
461d50806bdSBarry Smith     cnzi = ci[i+1] - ci[i];
462d50806bdSBarry Smith     for (j=0;j<cnzi;j++) {
463d50806bdSBarry Smith       ca[j] = temp[cj[j]];
464d50806bdSBarry Smith       temp[cj[j]] = 0.0;
465d50806bdSBarry Smith     }
466d50806bdSBarry Smith     ca += cnzi;
467d50806bdSBarry Smith     cj += cnzi;
468d50806bdSBarry Smith   }
469716bacf3SKris Buschelman   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
470716bacf3SKris Buschelman   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
471716bacf3SKris Buschelman 
472d50806bdSBarry Smith   /* Free temp */
473d50806bdSBarry Smith   ierr = PetscFree(temp);CHKERRQ(ierr);
474d50806bdSBarry Smith   ierr = PetscLogFlops(flops);CHKERRQ(ierr);
475d50806bdSBarry Smith   PetscFunctionReturn(0);
476d50806bdSBarry Smith }
477bc011b1eSHong Zhang 
478bc011b1eSHong Zhang #undef __FUNCT__
479bc011b1eSHong Zhang #define __FUNCT__ "MatMatMultTranspose"
480bc011b1eSHong Zhang /*@
481bc011b1eSHong Zhang    MatMatMultTranspose - Performs Matrix-Matrix Multiplication C=A^T*B.
482bc011b1eSHong Zhang 
483bc011b1eSHong Zhang    Collective on Mat
484bc011b1eSHong Zhang 
485bc011b1eSHong Zhang    Input Parameters:
486bc011b1eSHong Zhang +  A - the left matrix
487bc011b1eSHong Zhang .  B - the right matrix
488bc011b1eSHong Zhang .  scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
489bc011b1eSHong Zhang -  fill - expected fill as ratio of nnz(C)/(nnz(A) + nnz(B))
490bc011b1eSHong Zhang 
491bc011b1eSHong Zhang    Output Parameters:
492bc011b1eSHong Zhang .  C - the product matrix
493bc011b1eSHong Zhang 
494bc011b1eSHong Zhang    Notes:
495bc011b1eSHong Zhang    C will be created and must be destroyed by the user with MatDestroy().
496bc011b1eSHong Zhang 
497bc011b1eSHong Zhang    This routine is currently only implemented for pairs of SeqAIJ matrices and classes
498bc011b1eSHong Zhang    which inherit from SeqAIJ.  C will be of type MATSEQAIJ.
499bc011b1eSHong Zhang 
500bc011b1eSHong Zhang    Level: intermediate
501bc011b1eSHong Zhang 
502bc011b1eSHong Zhang .seealso: MatMatMultTransposeSymbolic(),MatMatMultTransposeNumeric()
503bc011b1eSHong Zhang @*/
504bc011b1eSHong Zhang PetscErrorCode MatMatMultTranspose(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C)
505bc011b1eSHong Zhang {
506bc011b1eSHong Zhang   PetscErrorCode ierr;
507bc011b1eSHong Zhang   PetscErrorCode (*fA)(Mat,Mat,MatReuse,PetscReal,Mat*);
508bc011b1eSHong Zhang   PetscErrorCode (*fB)(Mat,Mat,MatReuse,PetscReal,Mat*);
509bc011b1eSHong Zhang 
510bc011b1eSHong Zhang   PetscFunctionBegin;
511bc011b1eSHong Zhang   PetscValidHeaderSpecific(A,MAT_COOKIE,1);
512bc011b1eSHong Zhang   PetscValidType(A,1);
513bc011b1eSHong Zhang   MatPreallocated(A);
514bc011b1eSHong Zhang   if (!A->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix");
515bc011b1eSHong Zhang   if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
516bc011b1eSHong Zhang   PetscValidHeaderSpecific(B,MAT_COOKIE,2);
517bc011b1eSHong Zhang   PetscValidType(B,2);
518bc011b1eSHong Zhang   MatPreallocated(B);
519bc011b1eSHong Zhang   if (!B->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix");
520bc011b1eSHong Zhang   if (B->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
521bc011b1eSHong Zhang   PetscValidPointer(C,3);
522bc011b1eSHong Zhang   if (B->M!=A->M) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",B->M,A->M);
523bc011b1eSHong Zhang 
524bc011b1eSHong Zhang   if (fill <=0.0) SETERRQ1(PETSC_ERR_ARG_SIZ,"fill=%g must be > 0.0",fill);
525bc011b1eSHong Zhang 
526bc011b1eSHong Zhang   fA = A->ops->matmulttranspose;
527bc011b1eSHong Zhang   if (!fA) SETERRQ1(PETSC_ERR_SUP,"MatMatMultTranspose not supported for A of type %s",A->type_name);
528bc011b1eSHong Zhang   fB = B->ops->matmulttranspose;
529bc011b1eSHong Zhang   if (!fB) SETERRQ1(PETSC_ERR_SUP,"MatMatMultTranspose not supported for B of type %s",B->type_name);
530bc011b1eSHong Zhang   if (fB!=fA) SETERRQ2(PETSC_ERR_ARG_INCOMP,"MatMatMultTranspose requires A, %s, to be compatible with B, %s",A->type_name,B->type_name);
531bc011b1eSHong Zhang 
532bc011b1eSHong Zhang   ierr = PetscLogEventBegin(MAT_MatMultTranspose,A,B,0,0);CHKERRQ(ierr);
533bc011b1eSHong Zhang   ierr = (*A->ops->matmulttranspose)(A,B,scall,fill,C);CHKERRQ(ierr);
534bc011b1eSHong Zhang   ierr = PetscLogEventEnd(MAT_MatMultTranspose,A,B,0,0);CHKERRQ(ierr);
535bc011b1eSHong Zhang 
536bc011b1eSHong Zhang   PetscFunctionReturn(0);
537bc011b1eSHong Zhang }
538bc011b1eSHong Zhang 
539bc011b1eSHong Zhang #undef __FUNCT__
540bc011b1eSHong Zhang #define __FUNCT__ "MatMatMultTranspose_SeqAIJ_SeqAIJ"
541bc011b1eSHong Zhang PetscErrorCode MatMatMultTranspose_SeqAIJ_SeqAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C) {
542bc011b1eSHong Zhang   PetscErrorCode ierr;
543bc011b1eSHong Zhang 
544bc011b1eSHong Zhang   PetscFunctionBegin;
545bc011b1eSHong Zhang   if (scall == MAT_INITIAL_MATRIX){
546bc011b1eSHong Zhang     ierr = MatMatMultTransposeSymbolic_SeqAIJ_SeqAIJ(A,B,fill,C);CHKERRQ(ierr);
547bc011b1eSHong Zhang   }
548bc011b1eSHong Zhang   ierr = MatMatMultTransposeNumeric_SeqAIJ_SeqAIJ(A,B,*C);CHKERRQ(ierr);
549bc011b1eSHong Zhang   PetscFunctionReturn(0);
550bc011b1eSHong Zhang }
551bc011b1eSHong Zhang 
552bc011b1eSHong Zhang #undef __FUNCT__
553bc011b1eSHong Zhang #define __FUNCT__ "MatMatMultTransposeSymbolic_SeqAIJ_SeqAIJ"
554bc011b1eSHong Zhang PetscErrorCode MatMatMultTransposeSymbolic_SeqAIJ_SeqAIJ(Mat A,Mat B,PetscReal fill,Mat *C)
555bc011b1eSHong Zhang {
556bc011b1eSHong Zhang   PetscErrorCode ierr;
557bc011b1eSHong Zhang   Mat            At;
558*38baddfdSBarry Smith   PetscInt       *ati,*atj;
559bc011b1eSHong Zhang 
560bc011b1eSHong Zhang   PetscFunctionBegin;
561bc011b1eSHong Zhang   /* create symbolic At */
562bc011b1eSHong Zhang   ierr = MatGetSymbolicTranspose_SeqAIJ(A,&ati,&atj);CHKERRQ(ierr);
563bc011b1eSHong Zhang   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,A->n,A->m,ati,atj,PETSC_NULL,&At);CHKERRQ(ierr);
564bc011b1eSHong Zhang 
565bc011b1eSHong Zhang   /* get symbolic C=At*B */
566bc011b1eSHong Zhang   ierr = MatMatMultSymbolic_SeqAIJ_SeqAIJ(At,B,fill,C);CHKERRQ(ierr);
567bc011b1eSHong Zhang 
568bc011b1eSHong Zhang   /* clean up */
569bc011b1eSHong Zhang   ierr = MatDestroy(At);CHKERRQ(ierr);
570bc011b1eSHong Zhang   ierr = MatRestoreSymbolicTranspose_SeqAIJ(A,&ati,&atj);CHKERRQ(ierr);
571bc011b1eSHong Zhang 
572bc011b1eSHong Zhang   PetscFunctionReturn(0);
573bc011b1eSHong Zhang }
574bc011b1eSHong Zhang 
575bc011b1eSHong Zhang #undef __FUNCT__
576bc011b1eSHong Zhang #define __FUNCT__ "MatMatMultTransposeNumeric_SeqAIJ_SeqAIJ"
577bc011b1eSHong Zhang PetscErrorCode MatMatMultTransposeNumeric_SeqAIJ_SeqAIJ(Mat A,Mat B,Mat C)
578bc011b1eSHong Zhang {
579bc011b1eSHong Zhang   PetscErrorCode ierr;
5800fbc74f4SHong Zhang   Mat_SeqAIJ     *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data,*c=(Mat_SeqAIJ*)C->data;
581*38baddfdSBarry Smith   PetscInt       am=A->m,anzi,*ai=a->i,*aj=a->j,*bi=b->i,*bj,bnzi,nextb;
582*38baddfdSBarry Smith   PetscInt       cm=C->m,*ci=c->i,*cj=c->j,crow,*cjj,i,j,k,flops=0;
5830fbc74f4SHong Zhang   MatScalar      *aa=a->a,*ba,*ca=c->a,*caj;
584bc011b1eSHong Zhang 
585bc011b1eSHong Zhang   PetscFunctionBegin;
586bc011b1eSHong Zhang   /* clear old values in C */
587bc011b1eSHong Zhang   ierr = PetscMemzero(ca,ci[cm]*sizeof(MatScalar));CHKERRQ(ierr);
588bc011b1eSHong Zhang 
589bc011b1eSHong Zhang   /* compute A^T*B using outer product (A^T)[:,i]*B[i,:] */
590bc011b1eSHong Zhang   for (i=0;i<am;i++) {
591bc011b1eSHong Zhang     bj   = b->j + bi[i];
592bc011b1eSHong Zhang     ba   = b->a + bi[i];
593bc011b1eSHong Zhang     bnzi = bi[i+1] - bi[i];
594bc011b1eSHong Zhang     anzi = ai[i+1] - ai[i];
595bc011b1eSHong Zhang     for (j=0; j<anzi; j++) {
596bc011b1eSHong Zhang       nextb = 0;
5970fbc74f4SHong Zhang       crow  = *aj++;
598bc011b1eSHong Zhang       cjj   = cj + ci[crow];
599bc011b1eSHong Zhang       caj   = ca + ci[crow];
600bc011b1eSHong Zhang       /* perform sparse axpy operation.  Note cjj includes bj. */
601bc011b1eSHong Zhang       for (k=0; nextb<bnzi; k++) {
6020fbc74f4SHong Zhang         if (cjj[k] == *(bj+nextb)) { /* ccol == bcol */
6030fbc74f4SHong Zhang           caj[k] += (*aa)*(*(ba+nextb));
604bc011b1eSHong Zhang           nextb++;
605bc011b1eSHong Zhang         }
606bc011b1eSHong Zhang       }
607bc011b1eSHong Zhang       flops += 2*bnzi;
6080fbc74f4SHong Zhang       aa++;
609bc011b1eSHong Zhang     }
610bc011b1eSHong Zhang   }
611bc011b1eSHong Zhang 
612bc011b1eSHong Zhang   /* Assemble the final matrix and clean up */
613bc011b1eSHong Zhang   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
614bc011b1eSHong Zhang   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
615bc011b1eSHong Zhang   ierr = PetscLogFlops(flops);CHKERRQ(ierr);
616bc011b1eSHong Zhang   PetscFunctionReturn(0);
617bc011b1eSHong Zhang }
618