xref: /petsc/src/mat/impls/aij/seq/matmatmult.c (revision be0fcf8d388fea71c80b6bef2ab7056556598fc0)
1d50806bdSBarry Smith /*
2a50f8bf6SHong Zhang   Defines matrix-matrix product routines for pairs of AIJ matrices
3d50806bdSBarry Smith           C = A * B
4d50806bdSBarry Smith */
5d50806bdSBarry Smith 
6c1f4806aSKris Buschelman #include "src/mat/impls/aij/seq/aij.h" /*I "petscmat.h" I*/
770f19b1fSKris Buschelman #include "src/mat/utils/freespace.h"
82d09714cSHong Zhang #include "src/mat/impls/aij/mpi/mpiaij.h"
9*be0fcf8dSHong Zhang #include "petscbt.h"
10d50806bdSBarry Smith 
11c1f4806aSKris Buschelman #undef __FUNCT__
12c1f4806aSKris Buschelman #define __FUNCT__ "MatMatMult"
135c66b693SKris Buschelman /*@
145c66b693SKris Buschelman    MatMatMult - Performs Matrix-Matrix Multiplication C=A*B.
1594e3eecaSKris Buschelman 
165c66b693SKris Buschelman    Collective on Mat
17d50806bdSBarry Smith 
185c66b693SKris Buschelman    Input Parameters:
195c66b693SKris Buschelman +  A - the left matrix
201c741599SHong Zhang .  B - the right matrix
211c741599SHong Zhang .  scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
22c5db241fSHong Zhang -  fill - expected fill as ratio of nnz(C)/(nnz(A) + nnz(B))
235c66b693SKris Buschelman 
245c66b693SKris Buschelman    Output Parameters:
255c66b693SKris Buschelman .  C - the product matrix
265c66b693SKris Buschelman 
275c66b693SKris Buschelman    Notes:
285c66b693SKris Buschelman    C will be created and must be destroyed by the user with MatDestroy().
295c66b693SKris Buschelman 
30bc011b1eSHong Zhang    This routine is currently only implemented for pairs of AIJ matrices and classes
31bc011b1eSHong Zhang    which inherit from AIJ.  C will be of type MATAIJ.
325c66b693SKris Buschelman 
335c66b693SKris Buschelman    Level: intermediate
345c66b693SKris Buschelman 
355c66b693SKris Buschelman .seealso: MatMatMultSymbolic(),MatMatMultNumeric()
365c66b693SKris Buschelman @*/
37dfbe8321SBarry Smith PetscErrorCode MatMatMult(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C)
382d09714cSHong Zhang {
39dfbe8321SBarry Smith   PetscErrorCode ierr;
40cb00f407SKris Buschelman   PetscErrorCode (*fA)(Mat,Mat,MatReuse,PetscReal,Mat*);
41cb00f407SKris Buschelman   PetscErrorCode (*fB)(Mat,Mat,MatReuse,PetscReal,Mat*);
422d09714cSHong Zhang 
43d50806bdSBarry Smith   PetscFunctionBegin;
444482741eSBarry Smith   PetscValidHeaderSpecific(A,MAT_COOKIE,1);
45c9780b6fSBarry Smith   PetscValidType(A,1);
465c66b693SKris Buschelman   MatPreallocated(A);
475c66b693SKris Buschelman   if (!A->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix");
485c66b693SKris Buschelman   if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
494482741eSBarry Smith   PetscValidHeaderSpecific(B,MAT_COOKIE,2);
50c9780b6fSBarry Smith   PetscValidType(B,2);
515c66b693SKris Buschelman   MatPreallocated(B);
525c66b693SKris Buschelman   if (!B->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix");
535c66b693SKris Buschelman   if (B->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
544482741eSBarry Smith   PetscValidPointer(C,3);
555c66b693SKris Buschelman   if (B->M!=A->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",B->M,A->N);
56d50806bdSBarry Smith 
571c24bd37SHong Zhang   if (fill <=0.0) SETERRQ1(PETSC_ERR_ARG_SIZ,"fill=%g must be > 0.0",fill);
58c5db241fSHong Zhang 
59cb00f407SKris Buschelman   /* For now, we do not dispatch based on the type of A and B */
60cb00f407SKris Buschelman   /* When implementations like _SeqAIJ_MAIJ exist, attack the multiple dispatch problem. */
61cb00f407SKris Buschelman   fA = A->ops->matmult;
62cb00f407SKris Buschelman   if (!fA) SETERRQ1(PETSC_ERR_SUP,"MatMatMult not supported for A of type %s",A->type_name);
63cb00f407SKris Buschelman   fB = B->ops->matmult;
64cb00f407SKris Buschelman   if (!fB) SETERRQ1(PETSC_ERR_SUP,"MatMatMult not supported for B of type %s",B->type_name);
65cb00f407SKris Buschelman   if (fB!=fA) SETERRQ2(PETSC_ERR_ARG_INCOMP,"MatMatMult requires A, %s, to be compatible with B, %s",A->type_name,B->type_name);
66cb00f407SKris Buschelman 
676284ec50SHong Zhang   ierr = PetscLogEventBegin(MAT_MatMult,A,B,0,0);CHKERRQ(ierr);
681c741599SHong Zhang   ierr = (*A->ops->matmult)(A,B,scall,fill,C);CHKERRQ(ierr);
696284ec50SHong Zhang   ierr = PetscLogEventEnd(MAT_MatMult,A,B,0,0);CHKERRQ(ierr);
704d3841fdSKris Buschelman 
716284ec50SHong Zhang   PetscFunctionReturn(0);
726284ec50SHong Zhang }
736284ec50SHong Zhang 
746284ec50SHong Zhang #undef __FUNCT__
756284ec50SHong Zhang #define __FUNCT__ "MatMatMult_MPIAIJ_MPIAIJ"
76dfbe8321SBarry Smith PetscErrorCode MatMatMult_MPIAIJ_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill, Mat *C)
772d09714cSHong Zhang {
78dfbe8321SBarry Smith   PetscErrorCode ierr;
796284ec50SHong Zhang 
806284ec50SHong Zhang   PetscFunctionBegin;
8126be0446SHong Zhang   if (scall == MAT_INITIAL_MATRIX){
82d6bb3c2dSHong Zhang     ierr = MatMatMultSymbolic_MPIAIJ_MPIAIJ(A,B,fill,C);CHKERRQ(ierr);/* numeric product is computed as well */
83d6bb3c2dSHong Zhang   } else if (scall == MAT_REUSE_MATRIX){
8426be0446SHong Zhang     ierr = MatMatMultNumeric_MPIAIJ_MPIAIJ(A,B,*C);CHKERRQ(ierr);
85d6bb3c2dSHong Zhang   } else {
86d6bb3c2dSHong Zhang     SETERRQ1(PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",scall);
87d6bb3c2dSHong Zhang   }
88d50806bdSBarry Smith   PetscFunctionReturn(0);
89d50806bdSBarry Smith }
905c66b693SKris Buschelman 
915c66b693SKris Buschelman #undef __FUNCT__
925c66b693SKris Buschelman #define __FUNCT__ "MatMatMult_SeqAIJ_SeqAIJ"
93dfbe8321SBarry Smith PetscErrorCode MatMatMult_SeqAIJ_SeqAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C) {
94dfbe8321SBarry Smith   PetscErrorCode ierr;
955c66b693SKris Buschelman 
965c66b693SKris Buschelman   PetscFunctionBegin;
9726be0446SHong Zhang   if (scall == MAT_INITIAL_MATRIX){
9826be0446SHong Zhang     ierr = MatMatMultSymbolic_SeqAIJ_SeqAIJ(A,B,fill,C);CHKERRQ(ierr);
9926be0446SHong Zhang   }
10026be0446SHong Zhang   ierr = MatMatMultNumeric_SeqAIJ_SeqAIJ(A,B,*C);CHKERRQ(ierr);
1015c66b693SKris Buschelman   PetscFunctionReturn(0);
1025c66b693SKris Buschelman }
1035c66b693SKris Buschelman 
104c1f4806aSKris Buschelman #undef __FUNCT__
105c1f4806aSKris Buschelman #define __FUNCT__ "MatMatMultSymbolic"
1065c66b693SKris Buschelman /*@
1075c66b693SKris Buschelman    MatMatMultSymbolic - Performs construction, preallocation, and computes the ij structure
1085c66b693SKris Buschelman    of the matrix-matrix product C=A*B.  Call this routine before calling MatMatMultNumeric().
1095c66b693SKris Buschelman 
1105c66b693SKris Buschelman    Collective on Mat
1115c66b693SKris Buschelman 
1125c66b693SKris Buschelman    Input Parameters:
1135c66b693SKris Buschelman +  A - the left matrix
114c5db241fSHong Zhang .  B - the right matrix
115c5db241fSHong Zhang -  fill - expected fill as ratio of nnz(C)/(nnz(A) + nnz(B))
1165c66b693SKris Buschelman 
1175c66b693SKris Buschelman    Output Parameters:
1185c66b693SKris Buschelman .  C - the matrix containing the ij structure of product matrix
1195c66b693SKris Buschelman 
1205c66b693SKris Buschelman    Notes:
1214d3841fdSKris Buschelman    C will be created as a MATSEQAIJ matrix and must be destroyed by the user with MatDestroy().
1225c66b693SKris Buschelman 
1234d3841fdSKris Buschelman    This routine is currently only implemented for SeqAIJ matrices and classes which inherit from SeqAIJ.
1245c66b693SKris Buschelman 
1255c66b693SKris Buschelman    Level: intermediate
1265c66b693SKris Buschelman 
1275c66b693SKris Buschelman .seealso: MatMatMult(),MatMatMultNumeric()
1285c66b693SKris Buschelman @*/
129dfbe8321SBarry Smith PetscErrorCode MatMatMultSymbolic(Mat A,Mat B,PetscReal fill,Mat *C) {
130dfbe8321SBarry Smith   PetscErrorCode ierr;
131cb00f407SKris Buschelman   PetscErrorCode (*Asymbolic)(Mat,Mat,PetscReal,Mat *);
132cb00f407SKris Buschelman   PetscErrorCode (*Bsymbolic)(Mat,Mat,PetscReal,Mat *);
1335c66b693SKris Buschelman 
1345c66b693SKris Buschelman   PetscFunctionBegin;
1354482741eSBarry Smith   PetscValidHeaderSpecific(A,MAT_COOKIE,1);
136c9780b6fSBarry Smith   PetscValidType(A,1);
1375c66b693SKris Buschelman   MatPreallocated(A);
1385c66b693SKris Buschelman   if (!A->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix");
1395c66b693SKris Buschelman   if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
1405c66b693SKris Buschelman 
1414482741eSBarry Smith   PetscValidHeaderSpecific(B,MAT_COOKIE,2);
142c9780b6fSBarry Smith   PetscValidType(B,2);
1435c66b693SKris Buschelman   MatPreallocated(B);
1445c66b693SKris Buschelman   if (!B->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix");
1455c66b693SKris Buschelman   if (B->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
1464482741eSBarry Smith   PetscValidPointer(C,3);
1474482741eSBarry Smith 
1485c66b693SKris Buschelman   if (B->M!=A->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",B->M,A->N);
1491c24bd37SHong Zhang   if (fill <=0.0) SETERRQ1(PETSC_ERR_ARG_SIZ,"fill=%g must be > 0.0",fill);
1505c66b693SKris Buschelman 
151cb00f407SKris Buschelman   /* For now, we do not dispatch based on the type of A and P */
152cb00f407SKris Buschelman   /* When implementations like _SeqAIJ_MAIJ exist, attack the multiple dispatch problem. */
153cb00f407SKris Buschelman   Asymbolic = A->ops->matmultsymbolic;
154cb00f407SKris Buschelman   if (!Asymbolic) SETERRQ1(PETSC_ERR_SUP,"C=A*B not implemented for A of type %s",A->type_name);
155cb00f407SKris Buschelman   Bsymbolic = B->ops->matmultsymbolic;
156cb00f407SKris Buschelman   if (!Bsymbolic) SETERRQ1(PETSC_ERR_SUP,"C=A*B not implemented for B of type %s",B->type_name);
157cb00f407SKris Buschelman   if (Bsymbolic!=Asymbolic) SETERRQ2(PETSC_ERR_ARG_INCOMP,"MatMatMultSymbolic requires A, %s, to be compatible with B, %s",A->type_name,B->type_name);
158cb00f407SKris Buschelman 
159cb00f407SKris Buschelman   ierr = PetscLogEventBegin(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr);
160cb00f407SKris Buschelman   ierr = (*Asymbolic)(A,B,fill,C);CHKERRQ(ierr);
161cb00f407SKris Buschelman   ierr = PetscLogEventEnd(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr);
1625c66b693SKris Buschelman 
1635c66b693SKris Buschelman   PetscFunctionReturn(0);
1645c66b693SKris Buschelman }
1651c24bd37SHong Zhang 
166dfbe8321SBarry Smith EXTERN PetscErrorCode MatDestroy_MPIAIJ(Mat);
16726be0446SHong Zhang #undef __FUNCT__
16826be0446SHong Zhang #define __FUNCT__ "MatDestroy_MPIAIJ_MatMatMult"
169dfbe8321SBarry Smith PetscErrorCode MatDestroy_MPIAIJ_MatMatMult(Mat A)
17026be0446SHong Zhang {
171dfbe8321SBarry Smith   PetscErrorCode ierr;
17226be0446SHong Zhang   Mat_MatMatMultMPI *mult=(Mat_MatMatMultMPI*)A->spptr;
17326be0446SHong Zhang 
17426be0446SHong Zhang   PetscFunctionBegin;
175d6bb3c2dSHong Zhang   ierr = ISDestroy(mult->isrowb);CHKERRQ(ierr);
176d6bb3c2dSHong Zhang   ierr = ISDestroy(mult->iscolb);CHKERRQ(ierr);
177d6bb3c2dSHong Zhang   ierr = ISDestroy(mult->isrowa);CHKERRQ(ierr);
178d6bb3c2dSHong Zhang   ierr = MatDestroyMatrices(1,&mult->aseq);CHKERRQ(ierr);
179d6bb3c2dSHong Zhang   ierr = MatDestroyMatrices(1,&mult->bseq);CHKERRQ(ierr);
180d6bb3c2dSHong Zhang   ierr = MatDestroy(mult->C_seq);CHKERRQ(ierr);
18126be0446SHong Zhang   ierr = PetscFree(mult);CHKERRQ(ierr);
182d6bb3c2dSHong Zhang 
18326be0446SHong Zhang   ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr);
18426be0446SHong Zhang 
18526be0446SHong Zhang   PetscFunctionReturn(0);
18626be0446SHong Zhang }
18758c24d83SHong Zhang 
18858c24d83SHong Zhang #undef __FUNCT__
18926be0446SHong Zhang #define __FUNCT__ "MatMatMultSymbolic_MPIAIJ_MPIAIJ"
190dfbe8321SBarry Smith PetscErrorCode MatMatMultSymbolic_MPIAIJ_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat *C)
19126be0446SHong Zhang {
192ff134f7aSHong Zhang   Mat_MPIAIJ        *a=(Mat_MPIAIJ*)A->data,*b=(Mat_MPIAIJ*)B->data;
193dfbe8321SBarry Smith   PetscErrorCode    ierr;
194ff134f7aSHong Zhang   int               *idx,i,start,end,ncols,nzA,nzB,*cmap;
19526be0446SHong Zhang   Mat_MatMatMultMPI *mult;
19626be0446SHong Zhang 
19726be0446SHong Zhang   PetscFunctionBegin;
198ff134f7aSHong Zhang   if (a->cstart != b->rstart || a->cend != b->rend){
199ff134f7aSHong Zhang     SETERRQ4(PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",a->cstart,a->cend,b->rstart,b->rend);
200ff134f7aSHong Zhang   }
201d6bb3c2dSHong Zhang   ierr = PetscNew(Mat_MatMatMultMPI,&mult);CHKERRQ(ierr);
202d6bb3c2dSHong Zhang 
20326be0446SHong Zhang   /* create a seq matrix B_seq = submatrix of B by taking rows of B that equal to nonzero col of A */
20426be0446SHong Zhang   start = a->cstart;
20526be0446SHong Zhang   cmap  = a->garray;
20626be0446SHong Zhang   nzA   = a->A->n;
20726be0446SHong Zhang   nzB   = a->B->n;
20826be0446SHong Zhang   ierr  = PetscMalloc((nzA+nzB)*sizeof(int), &idx);CHKERRQ(ierr);
20926be0446SHong Zhang   ncols = 0;
21026be0446SHong Zhang   for (i=0; i<nzB; i++) {
21126be0446SHong Zhang     if (cmap[i] < start) idx[ncols++] = cmap[i];
21226be0446SHong Zhang     else break;
21326be0446SHong Zhang   }
214ff134f7aSHong Zhang   mult->brstart = i;
21526be0446SHong Zhang   for (i=0; i<nzA; i++) idx[ncols++] = start + i;
216ff134f7aSHong Zhang   for (i=mult->brstart; i<nzB; i++) idx[ncols++] = cmap[i];
217d6bb3c2dSHong Zhang   ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,&mult->isrowb);CHKERRQ(ierr);
21826be0446SHong Zhang   ierr = PetscFree(idx);CHKERRQ(ierr);
219d6bb3c2dSHong Zhang   ierr = ISCreateStride(PETSC_COMM_SELF,B->N,0,1,&mult->iscolb);CHKERRQ(ierr);
220ff134f7aSHong Zhang   ierr = MatGetSubMatrices(B,1,&mult->isrowb,&mult->iscolb,MAT_INITIAL_MATRIX,&mult->bseq);CHKERRQ(ierr);
22126be0446SHong Zhang 
22226be0446SHong Zhang   /*  create a seq matrix A_seq = submatrix of A by taking all local rows of A */
22326be0446SHong Zhang   start = a->rstart; end = a->rend;
224d6bb3c2dSHong Zhang   ierr = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&mult->isrowa);CHKERRQ(ierr);
225d6bb3c2dSHong Zhang   ierr = MatGetSubMatrices(A,1,&mult->isrowa,&mult->isrowb,MAT_INITIAL_MATRIX,&mult->aseq);CHKERRQ(ierr);
22626be0446SHong Zhang 
22726be0446SHong Zhang   /* compute C_seq = A_seq * B_seq */
228d6bb3c2dSHong Zhang   ierr = MatMatMult_SeqAIJ_SeqAIJ(mult->aseq[0],mult->bseq[0],MAT_INITIAL_MATRIX,fill,&mult->C_seq);CHKERRQ(ierr);
22926be0446SHong Zhang 
23026be0446SHong Zhang   /* create mpi matrix C by concatinating C_seq */
231d6bb3c2dSHong Zhang   ierr = PetscObjectReference((PetscObject)mult->C_seq);CHKERRQ(ierr); /* prevent C_seq being destroyed by MatMerge() */
232d6bb3c2dSHong Zhang   ierr = MatMerge(A->comm,mult->C_seq,MAT_INITIAL_MATRIX,C);CHKERRQ(ierr);
23326be0446SHong Zhang 
23426be0446SHong Zhang   /* attach the supporting struct to C for reuse of symbolic C */
23526be0446SHong Zhang   (*C)->spptr         = (void*)mult;
23626be0446SHong Zhang   (*C)->ops->destroy  = MatDestroy_MPIAIJ_MatMatMult;
23726be0446SHong Zhang 
23826be0446SHong Zhang   PetscFunctionReturn(0);
23926be0446SHong Zhang }
24026be0446SHong Zhang 
24126be0446SHong Zhang #undef __FUNCT__
24226be0446SHong Zhang #define __FUNCT__ "MatMatMultSymbolic_SeqAIJ_SeqAIJ"
243dfbe8321SBarry Smith PetscErrorCode MatMatMultSymbolic_SeqAIJ_SeqAIJ(Mat A,Mat B,PetscReal fill,Mat *C)
24458c24d83SHong Zhang {
245dfbe8321SBarry Smith   PetscErrorCode ierr;
24658c24d83SHong Zhang   FreeSpaceList  free_space=PETSC_NULL,current_space=PETSC_NULL;
24758c24d83SHong Zhang   Mat_SeqAIJ     *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data,*c;
248*be0fcf8dSHong Zhang   int            *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*bjj,*ci,*cj;
2495c66b693SKris Buschelman   int            am=A->M,bn=B->N,bm=B->M;
250*be0fcf8dSHong Zhang   int            i,j,anzi,brow,bnzj,cnzi,nlnk,*lnk,nspacedouble=0;
25158c24d83SHong Zhang   MatScalar      *ca;
252*be0fcf8dSHong Zhang   PetscBT        lnkbt;
25358c24d83SHong Zhang 
25458c24d83SHong Zhang   PetscFunctionBegin;
25558c24d83SHong Zhang   /* Set up */
25658c24d83SHong Zhang   /* Allocate ci array, arrays for fill computation and */
25758c24d83SHong Zhang   /* free space for accumulating nonzero column info */
25858c24d83SHong Zhang   ierr = PetscMalloc(((am+1)+1)*sizeof(int),&ci);CHKERRQ(ierr);
25958c24d83SHong Zhang   ci[0] = 0;
26058c24d83SHong Zhang 
261*be0fcf8dSHong Zhang   /* create and initialize a linked list */
262*be0fcf8dSHong Zhang   nlnk = bn+1;
263*be0fcf8dSHong Zhang   ierr = PetscLLCreate(bn,bn,nlnk,lnk,lnkbt);CHKERRQ(ierr);
26458c24d83SHong Zhang 
265c5db241fSHong Zhang   /* Initial FreeSpace size is fill*(nnz(A)+nnz(B)) */
266d6bb3c2dSHong Zhang   ierr = GetMoreSpace((int)(fill*(ai[am]+bi[bm])),&free_space);CHKERRQ(ierr);
26758c24d83SHong Zhang   current_space = free_space;
26858c24d83SHong Zhang 
26958c24d83SHong Zhang   /* Determine symbolic info for each row of the product: */
27058c24d83SHong Zhang   for (i=0;i<am;i++) {
27158c24d83SHong Zhang     anzi = ai[i+1] - ai[i];
27258c24d83SHong Zhang     cnzi = 0;
2732d09714cSHong Zhang     j    = anzi;
2742d09714cSHong Zhang     aj   = a->j + ai[i];
2752d09714cSHong Zhang     while (j){/* assume cols are almost in increasing order, starting from its end saves computation */
2762d09714cSHong Zhang       j--;
2772d09714cSHong Zhang       brow = *(aj + j);
27858c24d83SHong Zhang       bnzj = bi[brow+1] - bi[brow];
27958c24d83SHong Zhang       bjj  = bj + bi[brow];
2801c239cc6SHong Zhang       /* add non-zero cols of B into the sorted linked list lnk */
281*be0fcf8dSHong Zhang       ierr = PetscLLAdd(bnzj,bjj,bn,nlnk,lnk,lnkbt);CHKERRQ(ierr);
2821c239cc6SHong Zhang       cnzi += nlnk;
28358c24d83SHong Zhang     }
28458c24d83SHong Zhang 
28558c24d83SHong Zhang     /* If free space is not available, make more free space */
28658c24d83SHong Zhang     /* Double the amount of total space in the list */
28758c24d83SHong Zhang     if (current_space->local_remaining<cnzi) {
28858c24d83SHong Zhang       ierr = GetMoreSpace(current_space->total_array_size,&current_space);CHKERRQ(ierr);
289c5db241fSHong Zhang       nspacedouble++;
29058c24d83SHong Zhang     }
29158c24d83SHong Zhang 
292c5db241fSHong Zhang     /* Copy data into free space, then initialize lnk */
293*be0fcf8dSHong Zhang     ierr = PetscLLClean(bn,bn,cnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
294c5db241fSHong Zhang     current_space->array           += cnzi;
29558c24d83SHong Zhang     current_space->local_used      += cnzi;
29658c24d83SHong Zhang     current_space->local_remaining -= cnzi;
29758c24d83SHong Zhang 
29858c24d83SHong Zhang     ci[i+1] = ci[i] + cnzi;
29958c24d83SHong Zhang   }
30058c24d83SHong Zhang 
30158c24d83SHong Zhang   /* Column indices are in the list of free space */
30258c24d83SHong Zhang   /* Allocate space for cj, initialize cj, and */
30358c24d83SHong Zhang   /* destroy list of free space and other temporary array(s) */
30458c24d83SHong Zhang   ierr = PetscMalloc((ci[am]+1)*sizeof(int),&cj);CHKERRQ(ierr);
30558c24d83SHong Zhang   ierr = MakeSpaceContiguous(&free_space,cj);CHKERRQ(ierr);
306*be0fcf8dSHong Zhang   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
30758c24d83SHong Zhang 
30858c24d83SHong Zhang   /* Allocate space for ca */
30958c24d83SHong Zhang   ierr = PetscMalloc((ci[am]+1)*sizeof(MatScalar),&ca);CHKERRQ(ierr);
31058c24d83SHong Zhang   ierr = PetscMemzero(ca,(ci[am]+1)*sizeof(MatScalar));CHKERRQ(ierr);
31158c24d83SHong Zhang 
31226be0446SHong Zhang   /* put together the new symbolic matrix */
31358c24d83SHong Zhang   ierr = MatCreateSeqAIJWithArrays(A->comm,am,bn,ci,cj,ca,C);CHKERRQ(ierr);
31458c24d83SHong Zhang 
31558c24d83SHong Zhang   /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
31658c24d83SHong Zhang   /* These are PETSc arrays, so change flags so arrays can be deleted by PETSc */
31758c24d83SHong Zhang   c = (Mat_SeqAIJ *)((*C)->data);
31858c24d83SHong Zhang   c->freedata = PETSC_TRUE;
31958c24d83SHong Zhang   c->nonew    = 0;
32058c24d83SHong Zhang 
321*be0fcf8dSHong Zhang   if (nspacedouble){
322*be0fcf8dSHong Zhang     PetscLogInfo((PetscObject)(*C),"MatMatMultSymbolic_SeqAIJ_SeqAIJ: nspacedouble:%d, nnz(A):%d, nnz(B):%d, fill:%g, nnz(C):%d\n",nspacedouble,ai[am],bi[bm],fill,ci[am]);
323*be0fcf8dSHong Zhang   }
32458c24d83SHong Zhang   PetscFunctionReturn(0);
32558c24d83SHong Zhang }
326d50806bdSBarry Smith 
327c1f4806aSKris Buschelman #undef __FUNCT__
328c1f4806aSKris Buschelman #define __FUNCT__ "MatMatMultNumeric"
3295c66b693SKris Buschelman /*@
3305c66b693SKris Buschelman    MatMatMultNumeric - Performs the numeric matrix-matrix product.
3315c66b693SKris Buschelman    Call this routine after first calling MatMatMultSymbolic().
3325c66b693SKris Buschelman 
3335c66b693SKris Buschelman    Collective on Mat
3345c66b693SKris Buschelman 
3355c66b693SKris Buschelman    Input Parameters:
3365c66b693SKris Buschelman +  A - the left matrix
3375c66b693SKris Buschelman -  B - the right matrix
3385c66b693SKris Buschelman 
3395c66b693SKris Buschelman    Output Parameters:
3405c66b693SKris Buschelman .  C - the product matrix, whose ij structure was defined from MatMatMultSymbolic().
3415c66b693SKris Buschelman 
3425c66b693SKris Buschelman    Notes:
3435c66b693SKris Buschelman    C must have been created with MatMatMultSymbolic.
3445c66b693SKris Buschelman 
3455c66b693SKris Buschelman    This routine is currently only implemented for SeqAIJ type matrices.
3465c66b693SKris Buschelman 
3475c66b693SKris Buschelman    Level: intermediate
3485c66b693SKris Buschelman 
3495c66b693SKris Buschelman .seealso: MatMatMult(),MatMatMultSymbolic()
3505c66b693SKris Buschelman @*/
351dfbe8321SBarry Smith PetscErrorCode MatMatMultNumeric(Mat A,Mat B,Mat C){
352dfbe8321SBarry Smith   PetscErrorCode ierr;
353cb00f407SKris Buschelman   PetscErrorCode (*Anumeric)(Mat,Mat,Mat);
354cb00f407SKris Buschelman   PetscErrorCode (*Bnumeric)(Mat,Mat,Mat);
3555c66b693SKris Buschelman 
3565c66b693SKris Buschelman   PetscFunctionBegin;
3575c66b693SKris Buschelman 
3584482741eSBarry Smith   PetscValidHeaderSpecific(A,MAT_COOKIE,1);
359c9780b6fSBarry Smith   PetscValidType(A,1);
3605c66b693SKris Buschelman   MatPreallocated(A);
3615c66b693SKris Buschelman   if (!A->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix");
3625c66b693SKris Buschelman   if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
3635c66b693SKris Buschelman 
3644482741eSBarry Smith   PetscValidHeaderSpecific(B,MAT_COOKIE,2);
365c9780b6fSBarry Smith   PetscValidType(B,2);
3665c66b693SKris Buschelman   MatPreallocated(B);
3675c66b693SKris Buschelman   if (!B->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix");
3685c66b693SKris Buschelman   if (B->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
3695c66b693SKris Buschelman 
3704482741eSBarry Smith   PetscValidHeaderSpecific(C,MAT_COOKIE,3);
371c9780b6fSBarry Smith   PetscValidType(C,3);
3725c66b693SKris Buschelman   MatPreallocated(C);
3735c66b693SKris Buschelman   if (!C->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix");
3745c66b693SKris Buschelman   if (C->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
3755c66b693SKris Buschelman 
3765c66b693SKris Buschelman   if (B->N!=C->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",B->N,C->N);
3775c66b693SKris Buschelman   if (B->M!=A->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",B->M,A->N);
3785c66b693SKris Buschelman   if (A->M!=C->M) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",A->M,C->M);
3795c66b693SKris Buschelman 
380cb00f407SKris Buschelman   /* For now, we do not dispatch based on the type of A and B */
381cb00f407SKris Buschelman   /* When implementations like _SeqAIJ_MAIJ exist, attack the multiple dispatch problem. */
382cb00f407SKris Buschelman   Anumeric = A->ops->matmultnumeric;
383cb00f407SKris Buschelman   if (!Anumeric) SETERRQ1(PETSC_ERR_SUP,"MatMatMultNumeric not supported for A of type %s",A->type_name);
384cb00f407SKris Buschelman   Bnumeric = B->ops->matmultnumeric;
385cb00f407SKris Buschelman   if (!Bnumeric) SETERRQ1(PETSC_ERR_SUP,"MatMatMultNumeric not supported for B of type %s",B->type_name);
386cb00f407SKris Buschelman   if (Bnumeric!=Anumeric) SETERRQ2(PETSC_ERR_ARG_INCOMP,"MatMatMultNumeric requires A, %s, to be compatible with B, %s",A->type_name,B->type_name);
3874d3841fdSKris Buschelman 
388cb00f407SKris Buschelman   ierr = PetscLogEventBegin(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr);
389cb00f407SKris Buschelman   ierr = (*Anumeric)(A,B,C);CHKERRQ(ierr);
390cb00f407SKris Buschelman   ierr = PetscLogEventEnd(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr);
3915c66b693SKris Buschelman 
3925c66b693SKris Buschelman   PetscFunctionReturn(0);
3935c66b693SKris Buschelman }
3945c66b693SKris Buschelman 
395d6bb3c2dSHong Zhang /* This routine is called ONLY in the case of reusing previously computed symbolic C */
396d50806bdSBarry Smith #undef __FUNCT__
39726be0446SHong Zhang #define __FUNCT__ "MatMatMultNumeric_MPIAIJ_MPIAIJ"
398dfbe8321SBarry Smith PetscErrorCode MatMatMultNumeric_MPIAIJ_MPIAIJ(Mat A,Mat B,Mat C)
39926be0446SHong Zhang {
400dfbe8321SBarry Smith   PetscErrorCode ierr;
401d6bb3c2dSHong Zhang   Mat_MatMatMultMPI *mult=(Mat_MatMatMultMPI*)C->spptr;
402d6bb3c2dSHong Zhang 
40326be0446SHong Zhang   PetscFunctionBegin;
404d6bb3c2dSHong Zhang   ierr = MatGetSubMatrices(B,1,&mult->isrowb,&mult->iscolb,MAT_REUSE_MATRIX,&mult->bseq);CHKERRQ(ierr)
405d6bb3c2dSHong Zhang   ierr = MatGetSubMatrices(A,1,&mult->isrowa,&mult->isrowb,MAT_REUSE_MATRIX,&mult->aseq);CHKERRQ(ierr);
406d6bb3c2dSHong Zhang   ierr = MatMatMult_SeqAIJ_SeqAIJ(mult->aseq[0],mult->bseq[0],MAT_REUSE_MATRIX,0.0,&mult->C_seq);CHKERRQ(ierr);
407d6bb3c2dSHong Zhang 
408d6bb3c2dSHong Zhang   ierr = PetscObjectReference((PetscObject)mult->C_seq);CHKERRQ(ierr);
409d6bb3c2dSHong Zhang   ierr = MatMerge(A->comm,mult->C_seq,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr);
410d6bb3c2dSHong Zhang 
41126be0446SHong Zhang   PetscFunctionReturn(0);
41226be0446SHong Zhang }
41326be0446SHong Zhang 
41426be0446SHong Zhang #undef __FUNCT__
41526be0446SHong Zhang #define __FUNCT__ "MatMatMultNumeric_SeqAIJ_SeqAIJ"
416dfbe8321SBarry Smith PetscErrorCode MatMatMultNumeric_SeqAIJ_SeqAIJ(Mat A,Mat B,Mat C)
417d50806bdSBarry Smith {
418dfbe8321SBarry Smith   PetscErrorCode ierr;
419dfbe8321SBarry Smith   int        flops=0;
420d50806bdSBarry Smith   Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data;
421d50806bdSBarry Smith   Mat_SeqAIJ *b = (Mat_SeqAIJ *)B->data;
422d50806bdSBarry Smith   Mat_SeqAIJ *c = (Mat_SeqAIJ *)C->data;
423d50806bdSBarry Smith   int        *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*bjj,*ci=c->i,*cj=c->j;
4245c66b693SKris Buschelman   int        am=A->M,cn=C->N;
42594e3eecaSKris Buschelman   int        i,j,k,anzi,bnzi,cnzi,brow;
426d50806bdSBarry Smith   MatScalar  *aa=a->a,*ba=b->a,*baj,*ca=c->a,*temp;
427d50806bdSBarry Smith 
428d50806bdSBarry Smith   PetscFunctionBegin;
429d50806bdSBarry Smith 
430d50806bdSBarry Smith   /* Allocate temp accumulation space to avoid searching for nonzero columns in C */
431d50806bdSBarry Smith   ierr = PetscMalloc((cn+1)*sizeof(MatScalar),&temp);CHKERRQ(ierr);
432d50806bdSBarry Smith   ierr = PetscMemzero(temp,cn*sizeof(MatScalar));CHKERRQ(ierr);
433d50806bdSBarry Smith   /* Traverse A row-wise. */
434d50806bdSBarry Smith   /* Build the ith row in C by summing over nonzero columns in A, */
435d50806bdSBarry Smith   /* the rows of B corresponding to nonzeros of A. */
436d50806bdSBarry Smith   for (i=0;i<am;i++) {
437d50806bdSBarry Smith     anzi = ai[i+1] - ai[i];
438d50806bdSBarry Smith     for (j=0;j<anzi;j++) {
439d50806bdSBarry Smith       brow = *aj++;
440d50806bdSBarry Smith       bnzi = bi[brow+1] - bi[brow];
441d50806bdSBarry Smith       bjj  = bj + bi[brow];
442d50806bdSBarry Smith       baj  = ba + bi[brow];
443d50806bdSBarry Smith       for (k=0;k<bnzi;k++) {
444d50806bdSBarry Smith         temp[bjj[k]] += (*aa)*baj[k];
445d50806bdSBarry Smith       }
446d50806bdSBarry Smith       flops += 2*bnzi;
447d50806bdSBarry Smith       aa++;
448d50806bdSBarry Smith     }
449d50806bdSBarry Smith     /* Store row back into C, and re-zero temp */
450d50806bdSBarry Smith     cnzi = ci[i+1] - ci[i];
451d50806bdSBarry Smith     for (j=0;j<cnzi;j++) {
452d50806bdSBarry Smith       ca[j] = temp[cj[j]];
453d50806bdSBarry Smith       temp[cj[j]] = 0.0;
454d50806bdSBarry Smith     }
455d50806bdSBarry Smith     ca += cnzi;
456d50806bdSBarry Smith     cj += cnzi;
457d50806bdSBarry Smith   }
458716bacf3SKris Buschelman   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
459716bacf3SKris Buschelman   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
460716bacf3SKris Buschelman 
461d50806bdSBarry Smith   /* Free temp */
462d50806bdSBarry Smith   ierr = PetscFree(temp);CHKERRQ(ierr);
463d50806bdSBarry Smith   ierr = PetscLogFlops(flops);CHKERRQ(ierr);
464d50806bdSBarry Smith   PetscFunctionReturn(0);
465d50806bdSBarry Smith }
466bc011b1eSHong Zhang 
467bc011b1eSHong Zhang #undef __FUNCT__
468bc011b1eSHong Zhang #define __FUNCT__ "MatMatMultTranspose"
469bc011b1eSHong Zhang /*@
470bc011b1eSHong Zhang    MatMatMultTranspose - Performs Matrix-Matrix Multiplication C=A^T*B.
471bc011b1eSHong Zhang 
472bc011b1eSHong Zhang    Collective on Mat
473bc011b1eSHong Zhang 
474bc011b1eSHong Zhang    Input Parameters:
475bc011b1eSHong Zhang +  A - the left matrix
476bc011b1eSHong Zhang .  B - the right matrix
477bc011b1eSHong Zhang .  scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
478bc011b1eSHong Zhang -  fill - expected fill as ratio of nnz(C)/(nnz(A) + nnz(B))
479bc011b1eSHong Zhang 
480bc011b1eSHong Zhang    Output Parameters:
481bc011b1eSHong Zhang .  C - the product matrix
482bc011b1eSHong Zhang 
483bc011b1eSHong Zhang    Notes:
484bc011b1eSHong Zhang    C will be created and must be destroyed by the user with MatDestroy().
485bc011b1eSHong Zhang 
486bc011b1eSHong Zhang    This routine is currently only implemented for pairs of SeqAIJ matrices and classes
487bc011b1eSHong Zhang    which inherit from SeqAIJ.  C will be of type MATSEQAIJ.
488bc011b1eSHong Zhang 
489bc011b1eSHong Zhang    Level: intermediate
490bc011b1eSHong Zhang 
491bc011b1eSHong Zhang .seealso: MatMatMultTransposeSymbolic(),MatMatMultTransposeNumeric()
492bc011b1eSHong Zhang @*/
493bc011b1eSHong Zhang PetscErrorCode MatMatMultTranspose(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C)
494bc011b1eSHong Zhang {
495bc011b1eSHong Zhang   PetscErrorCode ierr;
496bc011b1eSHong Zhang   PetscErrorCode (*fA)(Mat,Mat,MatReuse,PetscReal,Mat*);
497bc011b1eSHong Zhang   PetscErrorCode (*fB)(Mat,Mat,MatReuse,PetscReal,Mat*);
498bc011b1eSHong Zhang 
499bc011b1eSHong Zhang   PetscFunctionBegin;
500bc011b1eSHong Zhang   PetscValidHeaderSpecific(A,MAT_COOKIE,1);
501bc011b1eSHong Zhang   PetscValidType(A,1);
502bc011b1eSHong Zhang   MatPreallocated(A);
503bc011b1eSHong Zhang   if (!A->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix");
504bc011b1eSHong Zhang   if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
505bc011b1eSHong Zhang   PetscValidHeaderSpecific(B,MAT_COOKIE,2);
506bc011b1eSHong Zhang   PetscValidType(B,2);
507bc011b1eSHong Zhang   MatPreallocated(B);
508bc011b1eSHong Zhang   if (!B->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix");
509bc011b1eSHong Zhang   if (B->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
510bc011b1eSHong Zhang   PetscValidPointer(C,3);
511bc011b1eSHong Zhang   if (B->M!=A->M) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",B->M,A->M);
512bc011b1eSHong Zhang 
513bc011b1eSHong Zhang   if (fill <=0.0) SETERRQ1(PETSC_ERR_ARG_SIZ,"fill=%g must be > 0.0",fill);
514bc011b1eSHong Zhang 
515bc011b1eSHong Zhang   fA = A->ops->matmulttranspose;
516bc011b1eSHong Zhang   if (!fA) SETERRQ1(PETSC_ERR_SUP,"MatMatMultTranspose not supported for A of type %s",A->type_name);
517bc011b1eSHong Zhang   fB = B->ops->matmulttranspose;
518bc011b1eSHong Zhang   if (!fB) SETERRQ1(PETSC_ERR_SUP,"MatMatMultTranspose not supported for B of type %s",B->type_name);
519bc011b1eSHong Zhang   if (fB!=fA) SETERRQ2(PETSC_ERR_ARG_INCOMP,"MatMatMultTranspose requires A, %s, to be compatible with B, %s",A->type_name,B->type_name);
520bc011b1eSHong Zhang 
521bc011b1eSHong Zhang   ierr = PetscLogEventBegin(MAT_MatMultTranspose,A,B,0,0);CHKERRQ(ierr);
522bc011b1eSHong Zhang   ierr = (*A->ops->matmulttranspose)(A,B,scall,fill,C);CHKERRQ(ierr);
523bc011b1eSHong Zhang   ierr = PetscLogEventEnd(MAT_MatMultTranspose,A,B,0,0);CHKERRQ(ierr);
524bc011b1eSHong Zhang 
525bc011b1eSHong Zhang   PetscFunctionReturn(0);
526bc011b1eSHong Zhang }
527bc011b1eSHong Zhang 
528bc011b1eSHong Zhang #undef __FUNCT__
529bc011b1eSHong Zhang #define __FUNCT__ "MatMatMultTranspose_SeqAIJ_SeqAIJ"
530bc011b1eSHong Zhang PetscErrorCode MatMatMultTranspose_SeqAIJ_SeqAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C) {
531bc011b1eSHong Zhang   PetscErrorCode ierr;
532bc011b1eSHong Zhang 
533bc011b1eSHong Zhang   PetscFunctionBegin;
534*be0fcf8dSHong Zhang   int rank;
535*be0fcf8dSHong Zhang   ierr = MPI_Comm_rank(PETSC_COMM_WORLD,&rank);CHKERRQ(ierr);
536*be0fcf8dSHong Zhang   printf(" [%d] MatMatMultTranspose_SeqAIJ_SeqAIJ is called\n",rank);
537bc011b1eSHong Zhang   if (scall == MAT_INITIAL_MATRIX){
538bc011b1eSHong Zhang     ierr = MatMatMultTransposeSymbolic_SeqAIJ_SeqAIJ(A,B,fill,C);CHKERRQ(ierr);
539bc011b1eSHong Zhang   }
540bc011b1eSHong Zhang   ierr = MatMatMultTransposeNumeric_SeqAIJ_SeqAIJ(A,B,*C);CHKERRQ(ierr);
541bc011b1eSHong Zhang   PetscFunctionReturn(0);
542bc011b1eSHong Zhang }
543bc011b1eSHong Zhang 
544bc011b1eSHong Zhang #undef __FUNCT__
545bc011b1eSHong Zhang #define __FUNCT__ "MatMatMultTransposeSymbolic_SeqAIJ_SeqAIJ"
546bc011b1eSHong Zhang PetscErrorCode MatMatMultTransposeSymbolic_SeqAIJ_SeqAIJ(Mat A,Mat B,PetscReal fill,Mat *C)
547bc011b1eSHong Zhang {
548bc011b1eSHong Zhang   PetscErrorCode ierr;
549bc011b1eSHong Zhang   Mat            At;
550bc011b1eSHong Zhang   int            *ati,*atj;
551bc011b1eSHong Zhang 
552bc011b1eSHong Zhang   PetscFunctionBegin;
553bc011b1eSHong Zhang   /* create symbolic At */
554bc011b1eSHong Zhang   ierr = MatGetSymbolicTranspose_SeqAIJ(A,&ati,&atj);CHKERRQ(ierr);
555bc011b1eSHong Zhang   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,A->n,A->m,ati,atj,PETSC_NULL,&At);CHKERRQ(ierr);
556bc011b1eSHong Zhang 
557bc011b1eSHong Zhang   /* get symbolic C=At*B */
558bc011b1eSHong Zhang   ierr = MatMatMultSymbolic_SeqAIJ_SeqAIJ(At,B,fill,C);CHKERRQ(ierr);
559bc011b1eSHong Zhang 
560bc011b1eSHong Zhang   /* clean up */
561bc011b1eSHong Zhang   ierr = MatDestroy(At);CHKERRQ(ierr);
562bc011b1eSHong Zhang   ierr = MatRestoreSymbolicTranspose_SeqAIJ(A,&ati,&atj);CHKERRQ(ierr);
563bc011b1eSHong Zhang 
564bc011b1eSHong Zhang   PetscFunctionReturn(0);
565bc011b1eSHong Zhang }
566bc011b1eSHong Zhang 
567bc011b1eSHong Zhang #undef __FUNCT__
568bc011b1eSHong Zhang #define __FUNCT__ "MatMatMultTransposeNumeric_SeqAIJ_SeqAIJ"
569bc011b1eSHong Zhang PetscErrorCode MatMatMultTransposeNumeric_SeqAIJ_SeqAIJ(Mat A,Mat B,Mat C)
570bc011b1eSHong Zhang {
571bc011b1eSHong Zhang   PetscErrorCode ierr;
5720fbc74f4SHong Zhang   Mat_SeqAIJ     *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data,*c=(Mat_SeqAIJ*)C->data;
573170ef064SHong Zhang   int            am=A->m,anzi,*ai=a->i,*aj=a->j,*bi=b->i,*bj,bnzi,nextb;
5740fbc74f4SHong Zhang   int            cm=C->m,*ci=c->i,*cj=c->j,crow,*cjj,i,j,k,flops=0;
5750fbc74f4SHong Zhang   MatScalar      *aa=a->a,*ba,*ca=c->a,*caj;
576bc011b1eSHong Zhang 
577bc011b1eSHong Zhang   PetscFunctionBegin;
578bc011b1eSHong Zhang   /* clear old values in C */
579bc011b1eSHong Zhang   ierr = PetscMemzero(ca,ci[cm]*sizeof(MatScalar));CHKERRQ(ierr);
580bc011b1eSHong Zhang 
581bc011b1eSHong Zhang   /* compute A^T*B using outer product (A^T)[:,i]*B[i,:] */
582bc011b1eSHong Zhang   for (i=0;i<am;i++) {
583bc011b1eSHong Zhang     bj   = b->j + bi[i];
584bc011b1eSHong Zhang     ba   = b->a + bi[i];
585bc011b1eSHong Zhang     bnzi = bi[i+1] - bi[i];
586bc011b1eSHong Zhang     anzi = ai[i+1] - ai[i];
587bc011b1eSHong Zhang     for (j=0; j<anzi; j++) {
588bc011b1eSHong Zhang       nextb = 0;
5890fbc74f4SHong Zhang       crow  = *aj++;
590bc011b1eSHong Zhang       cjj   = cj + ci[crow];
591bc011b1eSHong Zhang       caj   = ca + ci[crow];
592bc011b1eSHong Zhang       /* perform sparse axpy operation.  Note cjj includes bj. */
593bc011b1eSHong Zhang       for (k=0; nextb<bnzi; k++) {
5940fbc74f4SHong Zhang         if (cjj[k] == *(bj+nextb)) { /* ccol == bcol */
5950fbc74f4SHong Zhang           caj[k] += (*aa)*(*(ba+nextb));
596bc011b1eSHong Zhang           nextb++;
597bc011b1eSHong Zhang         }
598bc011b1eSHong Zhang       }
599bc011b1eSHong Zhang       flops += 2*bnzi;
6000fbc74f4SHong Zhang       aa++;
601bc011b1eSHong Zhang     }
602bc011b1eSHong Zhang   }
603bc011b1eSHong Zhang 
604bc011b1eSHong Zhang   /* Assemble the final matrix and clean up */
605bc011b1eSHong Zhang   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
606bc011b1eSHong Zhang   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
607bc011b1eSHong Zhang   ierr = PetscLogFlops(flops);CHKERRQ(ierr);
608bc011b1eSHong Zhang   PetscFunctionReturn(0);
609bc011b1eSHong Zhang }
610