xref: /petsc/src/mat/impls/aij/seq/matmatmult.c (revision 2499ec7893278572238d73a6b308a0d02f10e2c6)
1d50806bdSBarry Smith /*
2*2499ec78SHong Zhang   Defines matrix-matrix product routines for pairs of SeqAIJ matrices
3d50806bdSBarry Smith           C = A * B
4d50806bdSBarry Smith */
5d50806bdSBarry Smith 
6c1f4806aSKris Buschelman #include "src/mat/impls/aij/seq/aij.h" /*I "petscmat.h" I*/
770f19b1fSKris Buschelman #include "src/mat/utils/freespace.h"
8be0fcf8dSHong Zhang #include "petscbt.h"
9d50806bdSBarry Smith 
10c1f4806aSKris Buschelman #undef __FUNCT__
11c1f4806aSKris Buschelman #define __FUNCT__ "MatMatMult"
125c66b693SKris Buschelman /*@
135c66b693SKris Buschelman    MatMatMult - Performs Matrix-Matrix Multiplication C=A*B.
1494e3eecaSKris Buschelman 
155c66b693SKris Buschelman    Collective on Mat
16d50806bdSBarry Smith 
175c66b693SKris Buschelman    Input Parameters:
185c66b693SKris Buschelman +  A - the left matrix
191c741599SHong Zhang .  B - the right matrix
201c741599SHong Zhang .  scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
21c5db241fSHong Zhang -  fill - expected fill as ratio of nnz(C)/(nnz(A) + nnz(B))
225c66b693SKris Buschelman 
235c66b693SKris Buschelman    Output Parameters:
245c66b693SKris Buschelman .  C - the product matrix
255c66b693SKris Buschelman 
265c66b693SKris Buschelman    Notes:
275c66b693SKris Buschelman    C will be created and must be destroyed by the user with MatDestroy().
285c66b693SKris Buschelman 
29bc011b1eSHong Zhang    This routine is currently only implemented for pairs of AIJ matrices and classes
30bc011b1eSHong Zhang    which inherit from AIJ.  C will be of type MATAIJ.
315c66b693SKris Buschelman 
325c66b693SKris Buschelman    Level: intermediate
335c66b693SKris Buschelman 
345c66b693SKris Buschelman .seealso: MatMatMultSymbolic(),MatMatMultNumeric()
355c66b693SKris Buschelman @*/
36dfbe8321SBarry Smith PetscErrorCode MatMatMult(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C)
372d09714cSHong Zhang {
38dfbe8321SBarry Smith   PetscErrorCode ierr;
39cb00f407SKris Buschelman   PetscErrorCode (*fA)(Mat,Mat,MatReuse,PetscReal,Mat*);
40cb00f407SKris Buschelman   PetscErrorCode (*fB)(Mat,Mat,MatReuse,PetscReal,Mat*);
412d09714cSHong Zhang 
42d50806bdSBarry Smith   PetscFunctionBegin;
434482741eSBarry Smith   PetscValidHeaderSpecific(A,MAT_COOKIE,1);
44c9780b6fSBarry Smith   PetscValidType(A,1);
455c66b693SKris Buschelman   MatPreallocated(A);
465c66b693SKris Buschelman   if (!A->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix");
475c66b693SKris Buschelman   if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
484482741eSBarry Smith   PetscValidHeaderSpecific(B,MAT_COOKIE,2);
49c9780b6fSBarry Smith   PetscValidType(B,2);
505c66b693SKris Buschelman   MatPreallocated(B);
515c66b693SKris Buschelman   if (!B->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix");
525c66b693SKris Buschelman   if (B->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
534482741eSBarry Smith   PetscValidPointer(C,3);
5477431f27SBarry Smith   if (B->M!=A->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %D != %D",B->M,A->N);
55d50806bdSBarry Smith 
561c24bd37SHong Zhang   if (fill <=0.0) SETERRQ1(PETSC_ERR_ARG_SIZ,"fill=%g must be > 0.0",fill);
57c5db241fSHong Zhang 
58cb00f407SKris Buschelman   /* For now, we do not dispatch based on the type of A and B */
59cb00f407SKris Buschelman   /* When implementations like _SeqAIJ_MAIJ exist, attack the multiple dispatch problem. */
60cb00f407SKris Buschelman   fA = A->ops->matmult;
61cb00f407SKris Buschelman   if (!fA) SETERRQ1(PETSC_ERR_SUP,"MatMatMult not supported for A of type %s",A->type_name);
62cb00f407SKris Buschelman   fB = B->ops->matmult;
63cb00f407SKris Buschelman   if (!fB) SETERRQ1(PETSC_ERR_SUP,"MatMatMult not supported for B of type %s",B->type_name);
64cb00f407SKris Buschelman   if (fB!=fA) SETERRQ2(PETSC_ERR_ARG_INCOMP,"MatMatMult requires A, %s, to be compatible with B, %s",A->type_name,B->type_name);
65cb00f407SKris Buschelman 
666284ec50SHong Zhang   ierr = PetscLogEventBegin(MAT_MatMult,A,B,0,0);CHKERRQ(ierr);
671c741599SHong Zhang   ierr = (*A->ops->matmult)(A,B,scall,fill,C);CHKERRQ(ierr);
686284ec50SHong Zhang   ierr = PetscLogEventEnd(MAT_MatMult,A,B,0,0);CHKERRQ(ierr);
694d3841fdSKris Buschelman 
706284ec50SHong Zhang   PetscFunctionReturn(0);
716284ec50SHong Zhang }
726284ec50SHong Zhang 
736284ec50SHong Zhang #undef __FUNCT__
745c66b693SKris Buschelman #define __FUNCT__ "MatMatMult_SeqAIJ_SeqAIJ"
7538baddfdSBarry Smith PetscErrorCode MatMatMult_SeqAIJ_SeqAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C)
7638baddfdSBarry Smith {
77dfbe8321SBarry Smith   PetscErrorCode ierr;
785c66b693SKris Buschelman 
795c66b693SKris Buschelman   PetscFunctionBegin;
8026be0446SHong Zhang   if (scall == MAT_INITIAL_MATRIX){
8126be0446SHong Zhang     ierr = MatMatMultSymbolic_SeqAIJ_SeqAIJ(A,B,fill,C);CHKERRQ(ierr);
8226be0446SHong Zhang   }
8326be0446SHong Zhang   ierr = MatMatMultNumeric_SeqAIJ_SeqAIJ(A,B,*C);CHKERRQ(ierr);
845c66b693SKris Buschelman   PetscFunctionReturn(0);
855c66b693SKris Buschelman }
865c66b693SKris Buschelman 
87c1f4806aSKris Buschelman #undef __FUNCT__
88c1f4806aSKris Buschelman #define __FUNCT__ "MatMatMultSymbolic"
895c66b693SKris Buschelman /*@
905c66b693SKris Buschelman    MatMatMultSymbolic - Performs construction, preallocation, and computes the ij structure
915c66b693SKris Buschelman    of the matrix-matrix product C=A*B.  Call this routine before calling MatMatMultNumeric().
925c66b693SKris Buschelman 
935c66b693SKris Buschelman    Collective on Mat
945c66b693SKris Buschelman 
955c66b693SKris Buschelman    Input Parameters:
965c66b693SKris Buschelman +  A - the left matrix
97c5db241fSHong Zhang .  B - the right matrix
98c5db241fSHong Zhang -  fill - expected fill as ratio of nnz(C)/(nnz(A) + nnz(B))
995c66b693SKris Buschelman 
1005c66b693SKris Buschelman    Output Parameters:
1015c66b693SKris Buschelman .  C - the matrix containing the ij structure of product matrix
1025c66b693SKris Buschelman 
1035c66b693SKris Buschelman    Notes:
1044d3841fdSKris Buschelman    C will be created as a MATSEQAIJ matrix and must be destroyed by the user with MatDestroy().
1055c66b693SKris Buschelman 
1064d3841fdSKris Buschelman    This routine is currently only implemented for SeqAIJ matrices and classes which inherit from SeqAIJ.
1075c66b693SKris Buschelman 
1085c66b693SKris Buschelman    Level: intermediate
1095c66b693SKris Buschelman 
1105c66b693SKris Buschelman .seealso: MatMatMult(),MatMatMultNumeric()
1115c66b693SKris Buschelman @*/
11238baddfdSBarry Smith PetscErrorCode MatMatMultSymbolic(Mat A,Mat B,PetscReal fill,Mat *C)
11338baddfdSBarry Smith {
114dfbe8321SBarry Smith   PetscErrorCode ierr;
115cb00f407SKris Buschelman   PetscErrorCode (*Asymbolic)(Mat,Mat,PetscReal,Mat *);
116cb00f407SKris Buschelman   PetscErrorCode (*Bsymbolic)(Mat,Mat,PetscReal,Mat *);
1175c66b693SKris Buschelman 
1185c66b693SKris Buschelman   PetscFunctionBegin;
1194482741eSBarry Smith   PetscValidHeaderSpecific(A,MAT_COOKIE,1);
120c9780b6fSBarry Smith   PetscValidType(A,1);
1215c66b693SKris Buschelman   MatPreallocated(A);
1225c66b693SKris Buschelman   if (!A->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix");
1235c66b693SKris Buschelman   if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
1245c66b693SKris Buschelman 
1254482741eSBarry Smith   PetscValidHeaderSpecific(B,MAT_COOKIE,2);
126c9780b6fSBarry Smith   PetscValidType(B,2);
1275c66b693SKris Buschelman   MatPreallocated(B);
1285c66b693SKris Buschelman   if (!B->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix");
1295c66b693SKris Buschelman   if (B->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
1304482741eSBarry Smith   PetscValidPointer(C,3);
1314482741eSBarry Smith 
13277431f27SBarry Smith   if (B->M!=A->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %D != %D",B->M,A->N);
1331c24bd37SHong Zhang   if (fill <=0.0) SETERRQ1(PETSC_ERR_ARG_SIZ,"fill=%g must be > 0.0",fill);
1345c66b693SKris Buschelman 
135cb00f407SKris Buschelman   /* For now, we do not dispatch based on the type of A and P */
136cb00f407SKris Buschelman   /* When implementations like _SeqAIJ_MAIJ exist, attack the multiple dispatch problem. */
137cb00f407SKris Buschelman   Asymbolic = A->ops->matmultsymbolic;
138cb00f407SKris Buschelman   if (!Asymbolic) SETERRQ1(PETSC_ERR_SUP,"C=A*B not implemented for A of type %s",A->type_name);
139cb00f407SKris Buschelman   Bsymbolic = B->ops->matmultsymbolic;
140cb00f407SKris Buschelman   if (!Bsymbolic) SETERRQ1(PETSC_ERR_SUP,"C=A*B not implemented for B of type %s",B->type_name);
141cb00f407SKris Buschelman   if (Bsymbolic!=Asymbolic) SETERRQ2(PETSC_ERR_ARG_INCOMP,"MatMatMultSymbolic requires A, %s, to be compatible with B, %s",A->type_name,B->type_name);
142cb00f407SKris Buschelman 
143cb00f407SKris Buschelman   ierr = PetscLogEventBegin(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr);
144cb00f407SKris Buschelman   ierr = (*Asymbolic)(A,B,fill,C);CHKERRQ(ierr);
145cb00f407SKris Buschelman   ierr = PetscLogEventEnd(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr);
1465c66b693SKris Buschelman 
1475c66b693SKris Buschelman   PetscFunctionReturn(0);
1485c66b693SKris Buschelman }
1491c24bd37SHong Zhang 
15026be0446SHong Zhang #undef __FUNCT__
15126be0446SHong Zhang #define __FUNCT__ "MatMatMultSymbolic_SeqAIJ_SeqAIJ"
152dfbe8321SBarry Smith PetscErrorCode MatMatMultSymbolic_SeqAIJ_SeqAIJ(Mat A,Mat B,PetscReal fill,Mat *C)
15358c24d83SHong Zhang {
154dfbe8321SBarry Smith   PetscErrorCode ierr;
15558c24d83SHong Zhang   FreeSpaceList  free_space=PETSC_NULL,current_space=PETSC_NULL;
15658c24d83SHong Zhang   Mat_SeqAIJ     *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data,*c;
15738baddfdSBarry Smith   PetscInt       *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*bjj,*ci,*cj;
15838baddfdSBarry Smith   PetscInt       am=A->M,bn=B->N,bm=B->M;
15938baddfdSBarry Smith   PetscInt       i,j,anzi,brow,bnzj,cnzi,nlnk,*lnk,nspacedouble=0;
16058c24d83SHong Zhang   MatScalar      *ca;
161be0fcf8dSHong Zhang   PetscBT        lnkbt;
16258c24d83SHong Zhang 
16358c24d83SHong Zhang   PetscFunctionBegin;
16458c24d83SHong Zhang   /* Set up */
16558c24d83SHong Zhang   /* Allocate ci array, arrays for fill computation and */
16658c24d83SHong Zhang   /* free space for accumulating nonzero column info */
16738baddfdSBarry Smith   ierr = PetscMalloc(((am+1)+1)*sizeof(PetscInt),&ci);CHKERRQ(ierr);
16858c24d83SHong Zhang   ci[0] = 0;
16958c24d83SHong Zhang 
170be0fcf8dSHong Zhang   /* create and initialize a linked list */
171be0fcf8dSHong Zhang   nlnk = bn+1;
172be0fcf8dSHong Zhang   ierr = PetscLLCreate(bn,bn,nlnk,lnk,lnkbt);CHKERRQ(ierr);
17358c24d83SHong Zhang 
174c5db241fSHong Zhang   /* Initial FreeSpace size is fill*(nnz(A)+nnz(B)) */
17538baddfdSBarry Smith   ierr = GetMoreSpace((PetscInt)(fill*(ai[am]+bi[bm])),&free_space);CHKERRQ(ierr);
17658c24d83SHong Zhang   current_space = free_space;
17758c24d83SHong Zhang 
17858c24d83SHong Zhang   /* Determine symbolic info for each row of the product: */
17958c24d83SHong Zhang   for (i=0;i<am;i++) {
18058c24d83SHong Zhang     anzi = ai[i+1] - ai[i];
18158c24d83SHong Zhang     cnzi = 0;
1822d09714cSHong Zhang     j    = anzi;
1832d09714cSHong Zhang     aj   = a->j + ai[i];
1842d09714cSHong Zhang     while (j){/* assume cols are almost in increasing order, starting from its end saves computation */
1852d09714cSHong Zhang       j--;
1862d09714cSHong Zhang       brow = *(aj + j);
18758c24d83SHong Zhang       bnzj = bi[brow+1] - bi[brow];
18858c24d83SHong Zhang       bjj  = bj + bi[brow];
1891c239cc6SHong Zhang       /* add non-zero cols of B into the sorted linked list lnk */
190be0fcf8dSHong Zhang       ierr = PetscLLAdd(bnzj,bjj,bn,nlnk,lnk,lnkbt);CHKERRQ(ierr);
1911c239cc6SHong Zhang       cnzi += nlnk;
19258c24d83SHong Zhang     }
19358c24d83SHong Zhang 
19458c24d83SHong Zhang     /* If free space is not available, make more free space */
19558c24d83SHong Zhang     /* Double the amount of total space in the list */
19658c24d83SHong Zhang     if (current_space->local_remaining<cnzi) {
19758c24d83SHong Zhang       ierr = GetMoreSpace(current_space->total_array_size,&current_space);CHKERRQ(ierr);
198c5db241fSHong Zhang       nspacedouble++;
19958c24d83SHong Zhang     }
20058c24d83SHong Zhang 
201c5db241fSHong Zhang     /* Copy data into free space, then initialize lnk */
202be0fcf8dSHong Zhang     ierr = PetscLLClean(bn,bn,cnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
203c5db241fSHong Zhang     current_space->array           += cnzi;
20458c24d83SHong Zhang     current_space->local_used      += cnzi;
20558c24d83SHong Zhang     current_space->local_remaining -= cnzi;
20658c24d83SHong Zhang 
20758c24d83SHong Zhang     ci[i+1] = ci[i] + cnzi;
20858c24d83SHong Zhang   }
20958c24d83SHong Zhang 
21058c24d83SHong Zhang   /* Column indices are in the list of free space */
21158c24d83SHong Zhang   /* Allocate space for cj, initialize cj, and */
21258c24d83SHong Zhang   /* destroy list of free space and other temporary array(s) */
21338baddfdSBarry Smith   ierr = PetscMalloc((ci[am]+1)*sizeof(PetscInt),&cj);CHKERRQ(ierr);
21458c24d83SHong Zhang   ierr = MakeSpaceContiguous(&free_space,cj);CHKERRQ(ierr);
215be0fcf8dSHong Zhang   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
21658c24d83SHong Zhang 
21758c24d83SHong Zhang   /* Allocate space for ca */
21858c24d83SHong Zhang   ierr = PetscMalloc((ci[am]+1)*sizeof(MatScalar),&ca);CHKERRQ(ierr);
21958c24d83SHong Zhang   ierr = PetscMemzero(ca,(ci[am]+1)*sizeof(MatScalar));CHKERRQ(ierr);
22058c24d83SHong Zhang 
22126be0446SHong Zhang   /* put together the new symbolic matrix */
22258c24d83SHong Zhang   ierr = MatCreateSeqAIJWithArrays(A->comm,am,bn,ci,cj,ca,C);CHKERRQ(ierr);
22358c24d83SHong Zhang 
22458c24d83SHong Zhang   /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
22558c24d83SHong Zhang   /* These are PETSc arrays, so change flags so arrays can be deleted by PETSc */
22658c24d83SHong Zhang   c = (Mat_SeqAIJ *)((*C)->data);
22758c24d83SHong Zhang   c->freedata = PETSC_TRUE;
22858c24d83SHong Zhang   c->nonew    = 0;
22958c24d83SHong Zhang 
230be0fcf8dSHong Zhang   if (nspacedouble){
23177431f27SBarry Smith     PetscLogInfo((PetscObject)(*C),"MatMatMultSymbolic_SeqAIJ_SeqAIJ: nspacedouble:%D, nnz(A):%D, nnz(B):%D, fill:%g, nnz(C):%D\n",nspacedouble,ai[am],bi[bm],fill,ci[am]);
232be0fcf8dSHong Zhang   }
23358c24d83SHong Zhang   PetscFunctionReturn(0);
23458c24d83SHong Zhang }
235d50806bdSBarry Smith 
236c1f4806aSKris Buschelman #undef __FUNCT__
237c1f4806aSKris Buschelman #define __FUNCT__ "MatMatMultNumeric"
2385c66b693SKris Buschelman /*@
2395c66b693SKris Buschelman    MatMatMultNumeric - Performs the numeric matrix-matrix product.
2405c66b693SKris Buschelman    Call this routine after first calling MatMatMultSymbolic().
2415c66b693SKris Buschelman 
2425c66b693SKris Buschelman    Collective on Mat
2435c66b693SKris Buschelman 
2445c66b693SKris Buschelman    Input Parameters:
2455c66b693SKris Buschelman +  A - the left matrix
2465c66b693SKris Buschelman -  B - the right matrix
2475c66b693SKris Buschelman 
2485c66b693SKris Buschelman    Output Parameters:
2495c66b693SKris Buschelman .  C - the product matrix, whose ij structure was defined from MatMatMultSymbolic().
2505c66b693SKris Buschelman 
2515c66b693SKris Buschelman    Notes:
2525c66b693SKris Buschelman    C must have been created with MatMatMultSymbolic.
2535c66b693SKris Buschelman 
2545c66b693SKris Buschelman    This routine is currently only implemented for SeqAIJ type matrices.
2555c66b693SKris Buschelman 
2565c66b693SKris Buschelman    Level: intermediate
2575c66b693SKris Buschelman 
2585c66b693SKris Buschelman .seealso: MatMatMult(),MatMatMultSymbolic()
2595c66b693SKris Buschelman @*/
26038baddfdSBarry Smith PetscErrorCode MatMatMultNumeric(Mat A,Mat B,Mat C)
26138baddfdSBarry Smith {
262dfbe8321SBarry Smith   PetscErrorCode ierr;
263cb00f407SKris Buschelman   PetscErrorCode (*Anumeric)(Mat,Mat,Mat);
264cb00f407SKris Buschelman   PetscErrorCode (*Bnumeric)(Mat,Mat,Mat);
2655c66b693SKris Buschelman 
2665c66b693SKris Buschelman   PetscFunctionBegin;
2675c66b693SKris Buschelman 
2684482741eSBarry Smith   PetscValidHeaderSpecific(A,MAT_COOKIE,1);
269c9780b6fSBarry Smith   PetscValidType(A,1);
2705c66b693SKris Buschelman   MatPreallocated(A);
2715c66b693SKris Buschelman   if (!A->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix");
2725c66b693SKris Buschelman   if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
2735c66b693SKris Buschelman 
2744482741eSBarry Smith   PetscValidHeaderSpecific(B,MAT_COOKIE,2);
275c9780b6fSBarry Smith   PetscValidType(B,2);
2765c66b693SKris Buschelman   MatPreallocated(B);
2775c66b693SKris Buschelman   if (!B->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix");
2785c66b693SKris Buschelman   if (B->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
2795c66b693SKris Buschelman 
2804482741eSBarry Smith   PetscValidHeaderSpecific(C,MAT_COOKIE,3);
281c9780b6fSBarry Smith   PetscValidType(C,3);
2825c66b693SKris Buschelman   MatPreallocated(C);
2835c66b693SKris Buschelman   if (!C->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix");
2845c66b693SKris Buschelman   if (C->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
2855c66b693SKris Buschelman 
28677431f27SBarry Smith   if (B->N!=C->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %D != %D",B->N,C->N);
28777431f27SBarry Smith   if (B->M!=A->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %D != %D",B->M,A->N);
28877431f27SBarry Smith   if (A->M!=C->M) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %D != %D",A->M,C->M);
2895c66b693SKris Buschelman 
290cb00f407SKris Buschelman   /* For now, we do not dispatch based on the type of A and B */
291cb00f407SKris Buschelman   /* When implementations like _SeqAIJ_MAIJ exist, attack the multiple dispatch problem. */
292cb00f407SKris Buschelman   Anumeric = A->ops->matmultnumeric;
293cb00f407SKris Buschelman   if (!Anumeric) SETERRQ1(PETSC_ERR_SUP,"MatMatMultNumeric not supported for A of type %s",A->type_name);
294cb00f407SKris Buschelman   Bnumeric = B->ops->matmultnumeric;
295cb00f407SKris Buschelman   if (!Bnumeric) SETERRQ1(PETSC_ERR_SUP,"MatMatMultNumeric not supported for B of type %s",B->type_name);
296cb00f407SKris Buschelman   if (Bnumeric!=Anumeric) SETERRQ2(PETSC_ERR_ARG_INCOMP,"MatMatMultNumeric requires A, %s, to be compatible with B, %s",A->type_name,B->type_name);
2974d3841fdSKris Buschelman 
298cb00f407SKris Buschelman   ierr = PetscLogEventBegin(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr);
299cb00f407SKris Buschelman   ierr = (*Anumeric)(A,B,C);CHKERRQ(ierr);
300cb00f407SKris Buschelman   ierr = PetscLogEventEnd(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr);
3015c66b693SKris Buschelman 
3025c66b693SKris Buschelman   PetscFunctionReturn(0);
3035c66b693SKris Buschelman }
3045c66b693SKris Buschelman 
30526be0446SHong Zhang #undef __FUNCT__
30626be0446SHong Zhang #define __FUNCT__ "MatMatMultNumeric_SeqAIJ_SeqAIJ"
307dfbe8321SBarry Smith PetscErrorCode MatMatMultNumeric_SeqAIJ_SeqAIJ(Mat A,Mat B,Mat C)
308d50806bdSBarry Smith {
309dfbe8321SBarry Smith   PetscErrorCode ierr;
31038baddfdSBarry Smith   PetscInt       flops=0;
311d50806bdSBarry Smith   Mat_SeqAIJ     *a = (Mat_SeqAIJ *)A->data;
312d50806bdSBarry Smith   Mat_SeqAIJ     *b = (Mat_SeqAIJ *)B->data;
313d50806bdSBarry Smith   Mat_SeqAIJ     *c = (Mat_SeqAIJ *)C->data;
31438baddfdSBarry Smith   PetscInt       *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*bjj,*ci=c->i,*cj=c->j;
315c124e916SHong Zhang   PetscInt       am=A->M,cm=C->M;
316c124e916SHong Zhang   PetscInt       i,j,k,anzi,bnzi,cnzi,brow,nextb;
317c124e916SHong Zhang   MatScalar      *aa=a->a,*ba=b->a,*baj,*ca=c->a;
318d50806bdSBarry Smith 
319d50806bdSBarry Smith   PetscFunctionBegin;
320c124e916SHong Zhang   /* clean old values in C */
321c124e916SHong Zhang   ierr = PetscMemzero(ca,ci[cm]*sizeof(MatScalar));CHKERRQ(ierr);
322d50806bdSBarry Smith   /* Traverse A row-wise. */
323d50806bdSBarry Smith   /* Build the ith row in C by summing over nonzero columns in A, */
324d50806bdSBarry Smith   /* the rows of B corresponding to nonzeros of A. */
325d50806bdSBarry Smith   for (i=0;i<am;i++) {
326d50806bdSBarry Smith     anzi = ai[i+1] - ai[i];
327d50806bdSBarry Smith     for (j=0;j<anzi;j++) {
328d50806bdSBarry Smith       brow = *aj++;
329d50806bdSBarry Smith       bnzi = bi[brow+1] - bi[brow];
330d50806bdSBarry Smith       bjj  = bj + bi[brow];
331d50806bdSBarry Smith       baj  = ba + bi[brow];
332c124e916SHong Zhang       nextb = 0;
333c124e916SHong Zhang       for (k=0; nextb<bnzi; k++) {
334c124e916SHong Zhang         if (cj[k] == bjj[nextb]){ /* ccol == bcol */
335c124e916SHong Zhang           ca[k] += (*aa)*baj[nextb++];
336c124e916SHong Zhang         }
337d50806bdSBarry Smith       }
338d50806bdSBarry Smith       flops += 2*bnzi;
339d50806bdSBarry Smith       aa++;
340d50806bdSBarry Smith     }
341d50806bdSBarry Smith     cnzi = ci[i+1] - ci[i];
342d50806bdSBarry Smith     ca += cnzi;
343d50806bdSBarry Smith     cj += cnzi;
344d50806bdSBarry Smith   }
345716bacf3SKris Buschelman   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
346716bacf3SKris Buschelman   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
347716bacf3SKris Buschelman 
348d50806bdSBarry Smith   ierr = PetscLogFlops(flops);CHKERRQ(ierr);
349d50806bdSBarry Smith   PetscFunctionReturn(0);
350d50806bdSBarry Smith }
351bc011b1eSHong Zhang 
352bc011b1eSHong Zhang #undef __FUNCT__
353bc011b1eSHong Zhang #define __FUNCT__ "MatMatMultTranspose"
354bc011b1eSHong Zhang /*@
355bc011b1eSHong Zhang    MatMatMultTranspose - Performs Matrix-Matrix Multiplication C=A^T*B.
356bc011b1eSHong Zhang 
357bc011b1eSHong Zhang    Collective on Mat
358bc011b1eSHong Zhang 
359bc011b1eSHong Zhang    Input Parameters:
360bc011b1eSHong Zhang +  A - the left matrix
361bc011b1eSHong Zhang .  B - the right matrix
362bc011b1eSHong Zhang .  scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
363bc011b1eSHong Zhang -  fill - expected fill as ratio of nnz(C)/(nnz(A) + nnz(B))
364bc011b1eSHong Zhang 
365bc011b1eSHong Zhang    Output Parameters:
366bc011b1eSHong Zhang .  C - the product matrix
367bc011b1eSHong Zhang 
368bc011b1eSHong Zhang    Notes:
369bc011b1eSHong Zhang    C will be created and must be destroyed by the user with MatDestroy().
370bc011b1eSHong Zhang 
371bc011b1eSHong Zhang    This routine is currently only implemented for pairs of SeqAIJ matrices and classes
372bc011b1eSHong Zhang    which inherit from SeqAIJ.  C will be of type MATSEQAIJ.
373bc011b1eSHong Zhang 
374bc011b1eSHong Zhang    Level: intermediate
375bc011b1eSHong Zhang 
376bc011b1eSHong Zhang .seealso: MatMatMultTransposeSymbolic(),MatMatMultTransposeNumeric()
377bc011b1eSHong Zhang @*/
378bc011b1eSHong Zhang PetscErrorCode MatMatMultTranspose(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C)
379bc011b1eSHong Zhang {
380bc011b1eSHong Zhang   PetscErrorCode ierr;
381bc011b1eSHong Zhang   PetscErrorCode (*fA)(Mat,Mat,MatReuse,PetscReal,Mat*);
382bc011b1eSHong Zhang   PetscErrorCode (*fB)(Mat,Mat,MatReuse,PetscReal,Mat*);
383bc011b1eSHong Zhang 
384bc011b1eSHong Zhang   PetscFunctionBegin;
385bc011b1eSHong Zhang   PetscValidHeaderSpecific(A,MAT_COOKIE,1);
386bc011b1eSHong Zhang   PetscValidType(A,1);
387bc011b1eSHong Zhang   MatPreallocated(A);
388bc011b1eSHong Zhang   if (!A->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix");
389bc011b1eSHong Zhang   if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
390bc011b1eSHong Zhang   PetscValidHeaderSpecific(B,MAT_COOKIE,2);
391bc011b1eSHong Zhang   PetscValidType(B,2);
392bc011b1eSHong Zhang   MatPreallocated(B);
393bc011b1eSHong Zhang   if (!B->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix");
394bc011b1eSHong Zhang   if (B->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
395bc011b1eSHong Zhang   PetscValidPointer(C,3);
39677431f27SBarry Smith   if (B->M!=A->M) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %D != %D",B->M,A->M);
397bc011b1eSHong Zhang 
398bc011b1eSHong Zhang   if (fill <=0.0) SETERRQ1(PETSC_ERR_ARG_SIZ,"fill=%g must be > 0.0",fill);
399bc011b1eSHong Zhang 
400bc011b1eSHong Zhang   fA = A->ops->matmulttranspose;
401bc011b1eSHong Zhang   if (!fA) SETERRQ1(PETSC_ERR_SUP,"MatMatMultTranspose not supported for A of type %s",A->type_name);
402bc011b1eSHong Zhang   fB = B->ops->matmulttranspose;
403bc011b1eSHong Zhang   if (!fB) SETERRQ1(PETSC_ERR_SUP,"MatMatMultTranspose not supported for B of type %s",B->type_name);
404bc011b1eSHong Zhang   if (fB!=fA) SETERRQ2(PETSC_ERR_ARG_INCOMP,"MatMatMultTranspose requires A, %s, to be compatible with B, %s",A->type_name,B->type_name);
405bc011b1eSHong Zhang 
406bc011b1eSHong Zhang   ierr = PetscLogEventBegin(MAT_MatMultTranspose,A,B,0,0);CHKERRQ(ierr);
407bc011b1eSHong Zhang   ierr = (*A->ops->matmulttranspose)(A,B,scall,fill,C);CHKERRQ(ierr);
408bc011b1eSHong Zhang   ierr = PetscLogEventEnd(MAT_MatMultTranspose,A,B,0,0);CHKERRQ(ierr);
409bc011b1eSHong Zhang 
410bc011b1eSHong Zhang   PetscFunctionReturn(0);
411bc011b1eSHong Zhang }
412bc011b1eSHong Zhang 
413bc011b1eSHong Zhang #undef __FUNCT__
414bc011b1eSHong Zhang #define __FUNCT__ "MatMatMultTranspose_SeqAIJ_SeqAIJ"
415bc011b1eSHong Zhang PetscErrorCode MatMatMultTranspose_SeqAIJ_SeqAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C) {
416bc011b1eSHong Zhang   PetscErrorCode ierr;
417bc011b1eSHong Zhang 
418bc011b1eSHong Zhang   PetscFunctionBegin;
419bc011b1eSHong Zhang   if (scall == MAT_INITIAL_MATRIX){
420bc011b1eSHong Zhang     ierr = MatMatMultTransposeSymbolic_SeqAIJ_SeqAIJ(A,B,fill,C);CHKERRQ(ierr);
421bc011b1eSHong Zhang   }
422bc011b1eSHong Zhang   ierr = MatMatMultTransposeNumeric_SeqAIJ_SeqAIJ(A,B,*C);CHKERRQ(ierr);
423bc011b1eSHong Zhang   PetscFunctionReturn(0);
424bc011b1eSHong Zhang }
425bc011b1eSHong Zhang 
426bc011b1eSHong Zhang #undef __FUNCT__
427bc011b1eSHong Zhang #define __FUNCT__ "MatMatMultTransposeSymbolic_SeqAIJ_SeqAIJ"
428bc011b1eSHong Zhang PetscErrorCode MatMatMultTransposeSymbolic_SeqAIJ_SeqAIJ(Mat A,Mat B,PetscReal fill,Mat *C)
429bc011b1eSHong Zhang {
430bc011b1eSHong Zhang   PetscErrorCode ierr;
431bc011b1eSHong Zhang   Mat            At;
43238baddfdSBarry Smith   PetscInt       *ati,*atj;
433bc011b1eSHong Zhang 
434bc011b1eSHong Zhang   PetscFunctionBegin;
435bc011b1eSHong Zhang   /* create symbolic At */
436bc011b1eSHong Zhang   ierr = MatGetSymbolicTranspose_SeqAIJ(A,&ati,&atj);CHKERRQ(ierr);
437bc011b1eSHong Zhang   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,A->n,A->m,ati,atj,PETSC_NULL,&At);CHKERRQ(ierr);
438bc011b1eSHong Zhang 
439bc011b1eSHong Zhang   /* get symbolic C=At*B */
440bc011b1eSHong Zhang   ierr = MatMatMultSymbolic_SeqAIJ_SeqAIJ(At,B,fill,C);CHKERRQ(ierr);
441bc011b1eSHong Zhang 
442bc011b1eSHong Zhang   /* clean up */
443bc011b1eSHong Zhang   ierr = MatDestroy(At);CHKERRQ(ierr);
444bc011b1eSHong Zhang   ierr = MatRestoreSymbolicTranspose_SeqAIJ(A,&ati,&atj);CHKERRQ(ierr);
445bc011b1eSHong Zhang 
446bc011b1eSHong Zhang   PetscFunctionReturn(0);
447bc011b1eSHong Zhang }
448bc011b1eSHong Zhang 
449bc011b1eSHong Zhang #undef __FUNCT__
450bc011b1eSHong Zhang #define __FUNCT__ "MatMatMultTransposeNumeric_SeqAIJ_SeqAIJ"
451bc011b1eSHong Zhang PetscErrorCode MatMatMultTransposeNumeric_SeqAIJ_SeqAIJ(Mat A,Mat B,Mat C)
452bc011b1eSHong Zhang {
453bc011b1eSHong Zhang   PetscErrorCode ierr;
4540fbc74f4SHong Zhang   Mat_SeqAIJ     *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data,*c=(Mat_SeqAIJ*)C->data;
45538baddfdSBarry Smith   PetscInt       am=A->m,anzi,*ai=a->i,*aj=a->j,*bi=b->i,*bj,bnzi,nextb;
45638baddfdSBarry Smith   PetscInt       cm=C->m,*ci=c->i,*cj=c->j,crow,*cjj,i,j,k,flops=0;
4570fbc74f4SHong Zhang   MatScalar      *aa=a->a,*ba,*ca=c->a,*caj;
458bc011b1eSHong Zhang 
459bc011b1eSHong Zhang   PetscFunctionBegin;
460bc011b1eSHong Zhang   /* clear old values in C */
461bc011b1eSHong Zhang   ierr = PetscMemzero(ca,ci[cm]*sizeof(MatScalar));CHKERRQ(ierr);
462bc011b1eSHong Zhang 
463bc011b1eSHong Zhang   /* compute A^T*B using outer product (A^T)[:,i]*B[i,:] */
464bc011b1eSHong Zhang   for (i=0;i<am;i++) {
465bc011b1eSHong Zhang     bj   = b->j + bi[i];
466bc011b1eSHong Zhang     ba   = b->a + bi[i];
467bc011b1eSHong Zhang     bnzi = bi[i+1] - bi[i];
468bc011b1eSHong Zhang     anzi = ai[i+1] - ai[i];
469bc011b1eSHong Zhang     for (j=0; j<anzi; j++) {
470bc011b1eSHong Zhang       nextb = 0;
4710fbc74f4SHong Zhang       crow  = *aj++;
472bc011b1eSHong Zhang       cjj   = cj + ci[crow];
473bc011b1eSHong Zhang       caj   = ca + ci[crow];
474bc011b1eSHong Zhang       /* perform sparse axpy operation.  Note cjj includes bj. */
475bc011b1eSHong Zhang       for (k=0; nextb<bnzi; k++) {
4760fbc74f4SHong Zhang         if (cjj[k] == *(bj+nextb)) { /* ccol == bcol */
4770fbc74f4SHong Zhang           caj[k] += (*aa)*(*(ba+nextb));
478bc011b1eSHong Zhang           nextb++;
479bc011b1eSHong Zhang         }
480bc011b1eSHong Zhang       }
481bc011b1eSHong Zhang       flops += 2*bnzi;
4820fbc74f4SHong Zhang       aa++;
483bc011b1eSHong Zhang     }
484bc011b1eSHong Zhang   }
485bc011b1eSHong Zhang 
486bc011b1eSHong Zhang   /* Assemble the final matrix and clean up */
487bc011b1eSHong Zhang   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
488bc011b1eSHong Zhang   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
489bc011b1eSHong Zhang   ierr = PetscLogFlops(flops);CHKERRQ(ierr);
490bc011b1eSHong Zhang   PetscFunctionReturn(0);
491bc011b1eSHong Zhang }
492