1d50806bdSBarry Smith /* 2*2499ec78SHong Zhang Defines matrix-matrix product routines for pairs of SeqAIJ matrices 3d50806bdSBarry Smith C = A * B 4d50806bdSBarry Smith */ 5d50806bdSBarry Smith 6c1f4806aSKris Buschelman #include "src/mat/impls/aij/seq/aij.h" /*I "petscmat.h" I*/ 770f19b1fSKris Buschelman #include "src/mat/utils/freespace.h" 8be0fcf8dSHong Zhang #include "petscbt.h" 9d50806bdSBarry Smith 10c1f4806aSKris Buschelman #undef __FUNCT__ 11c1f4806aSKris Buschelman #define __FUNCT__ "MatMatMult" 125c66b693SKris Buschelman /*@ 135c66b693SKris Buschelman MatMatMult - Performs Matrix-Matrix Multiplication C=A*B. 1494e3eecaSKris Buschelman 155c66b693SKris Buschelman Collective on Mat 16d50806bdSBarry Smith 175c66b693SKris Buschelman Input Parameters: 185c66b693SKris Buschelman + A - the left matrix 191c741599SHong Zhang . B - the right matrix 201c741599SHong Zhang . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 21c5db241fSHong Zhang - fill - expected fill as ratio of nnz(C)/(nnz(A) + nnz(B)) 225c66b693SKris Buschelman 235c66b693SKris Buschelman Output Parameters: 245c66b693SKris Buschelman . C - the product matrix 255c66b693SKris Buschelman 265c66b693SKris Buschelman Notes: 275c66b693SKris Buschelman C will be created and must be destroyed by the user with MatDestroy(). 285c66b693SKris Buschelman 29bc011b1eSHong Zhang This routine is currently only implemented for pairs of AIJ matrices and classes 30bc011b1eSHong Zhang which inherit from AIJ. C will be of type MATAIJ. 315c66b693SKris Buschelman 325c66b693SKris Buschelman Level: intermediate 335c66b693SKris Buschelman 345c66b693SKris Buschelman .seealso: MatMatMultSymbolic(),MatMatMultNumeric() 355c66b693SKris Buschelman @*/ 36dfbe8321SBarry Smith PetscErrorCode MatMatMult(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C) 372d09714cSHong Zhang { 38dfbe8321SBarry Smith PetscErrorCode ierr; 39cb00f407SKris Buschelman PetscErrorCode (*fA)(Mat,Mat,MatReuse,PetscReal,Mat*); 40cb00f407SKris Buschelman PetscErrorCode (*fB)(Mat,Mat,MatReuse,PetscReal,Mat*); 412d09714cSHong Zhang 42d50806bdSBarry Smith PetscFunctionBegin; 434482741eSBarry Smith PetscValidHeaderSpecific(A,MAT_COOKIE,1); 44c9780b6fSBarry Smith PetscValidType(A,1); 455c66b693SKris Buschelman MatPreallocated(A); 465c66b693SKris Buschelman if (!A->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix"); 475c66b693SKris Buschelman if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 484482741eSBarry Smith PetscValidHeaderSpecific(B,MAT_COOKIE,2); 49c9780b6fSBarry Smith PetscValidType(B,2); 505c66b693SKris Buschelman MatPreallocated(B); 515c66b693SKris Buschelman if (!B->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix"); 525c66b693SKris Buschelman if (B->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 534482741eSBarry Smith PetscValidPointer(C,3); 5477431f27SBarry Smith if (B->M!=A->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %D != %D",B->M,A->N); 55d50806bdSBarry Smith 561c24bd37SHong Zhang if (fill <=0.0) SETERRQ1(PETSC_ERR_ARG_SIZ,"fill=%g must be > 0.0",fill); 57c5db241fSHong Zhang 58cb00f407SKris Buschelman /* For now, we do not dispatch based on the type of A and B */ 59cb00f407SKris Buschelman /* When implementations like _SeqAIJ_MAIJ exist, attack the multiple dispatch problem. */ 60cb00f407SKris Buschelman fA = A->ops->matmult; 61cb00f407SKris Buschelman if (!fA) SETERRQ1(PETSC_ERR_SUP,"MatMatMult not supported for A of type %s",A->type_name); 62cb00f407SKris Buschelman fB = B->ops->matmult; 63cb00f407SKris Buschelman if (!fB) SETERRQ1(PETSC_ERR_SUP,"MatMatMult not supported for B of type %s",B->type_name); 64cb00f407SKris Buschelman if (fB!=fA) SETERRQ2(PETSC_ERR_ARG_INCOMP,"MatMatMult requires A, %s, to be compatible with B, %s",A->type_name,B->type_name); 65cb00f407SKris Buschelman 666284ec50SHong Zhang ierr = PetscLogEventBegin(MAT_MatMult,A,B,0,0);CHKERRQ(ierr); 671c741599SHong Zhang ierr = (*A->ops->matmult)(A,B,scall,fill,C);CHKERRQ(ierr); 686284ec50SHong Zhang ierr = PetscLogEventEnd(MAT_MatMult,A,B,0,0);CHKERRQ(ierr); 694d3841fdSKris Buschelman 706284ec50SHong Zhang PetscFunctionReturn(0); 716284ec50SHong Zhang } 726284ec50SHong Zhang 736284ec50SHong Zhang #undef __FUNCT__ 745c66b693SKris Buschelman #define __FUNCT__ "MatMatMult_SeqAIJ_SeqAIJ" 7538baddfdSBarry Smith PetscErrorCode MatMatMult_SeqAIJ_SeqAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C) 7638baddfdSBarry Smith { 77dfbe8321SBarry Smith PetscErrorCode ierr; 785c66b693SKris Buschelman 795c66b693SKris Buschelman PetscFunctionBegin; 8026be0446SHong Zhang if (scall == MAT_INITIAL_MATRIX){ 8126be0446SHong Zhang ierr = MatMatMultSymbolic_SeqAIJ_SeqAIJ(A,B,fill,C);CHKERRQ(ierr); 8226be0446SHong Zhang } 8326be0446SHong Zhang ierr = MatMatMultNumeric_SeqAIJ_SeqAIJ(A,B,*C);CHKERRQ(ierr); 845c66b693SKris Buschelman PetscFunctionReturn(0); 855c66b693SKris Buschelman } 865c66b693SKris Buschelman 87c1f4806aSKris Buschelman #undef __FUNCT__ 88c1f4806aSKris Buschelman #define __FUNCT__ "MatMatMultSymbolic" 895c66b693SKris Buschelman /*@ 905c66b693SKris Buschelman MatMatMultSymbolic - Performs construction, preallocation, and computes the ij structure 915c66b693SKris Buschelman of the matrix-matrix product C=A*B. Call this routine before calling MatMatMultNumeric(). 925c66b693SKris Buschelman 935c66b693SKris Buschelman Collective on Mat 945c66b693SKris Buschelman 955c66b693SKris Buschelman Input Parameters: 965c66b693SKris Buschelman + A - the left matrix 97c5db241fSHong Zhang . B - the right matrix 98c5db241fSHong Zhang - fill - expected fill as ratio of nnz(C)/(nnz(A) + nnz(B)) 995c66b693SKris Buschelman 1005c66b693SKris Buschelman Output Parameters: 1015c66b693SKris Buschelman . C - the matrix containing the ij structure of product matrix 1025c66b693SKris Buschelman 1035c66b693SKris Buschelman Notes: 1044d3841fdSKris Buschelman C will be created as a MATSEQAIJ matrix and must be destroyed by the user with MatDestroy(). 1055c66b693SKris Buschelman 1064d3841fdSKris Buschelman This routine is currently only implemented for SeqAIJ matrices and classes which inherit from SeqAIJ. 1075c66b693SKris Buschelman 1085c66b693SKris Buschelman Level: intermediate 1095c66b693SKris Buschelman 1105c66b693SKris Buschelman .seealso: MatMatMult(),MatMatMultNumeric() 1115c66b693SKris Buschelman @*/ 11238baddfdSBarry Smith PetscErrorCode MatMatMultSymbolic(Mat A,Mat B,PetscReal fill,Mat *C) 11338baddfdSBarry Smith { 114dfbe8321SBarry Smith PetscErrorCode ierr; 115cb00f407SKris Buschelman PetscErrorCode (*Asymbolic)(Mat,Mat,PetscReal,Mat *); 116cb00f407SKris Buschelman PetscErrorCode (*Bsymbolic)(Mat,Mat,PetscReal,Mat *); 1175c66b693SKris Buschelman 1185c66b693SKris Buschelman PetscFunctionBegin; 1194482741eSBarry Smith PetscValidHeaderSpecific(A,MAT_COOKIE,1); 120c9780b6fSBarry Smith PetscValidType(A,1); 1215c66b693SKris Buschelman MatPreallocated(A); 1225c66b693SKris Buschelman if (!A->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix"); 1235c66b693SKris Buschelman if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 1245c66b693SKris Buschelman 1254482741eSBarry Smith PetscValidHeaderSpecific(B,MAT_COOKIE,2); 126c9780b6fSBarry Smith PetscValidType(B,2); 1275c66b693SKris Buschelman MatPreallocated(B); 1285c66b693SKris Buschelman if (!B->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix"); 1295c66b693SKris Buschelman if (B->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 1304482741eSBarry Smith PetscValidPointer(C,3); 1314482741eSBarry Smith 13277431f27SBarry Smith if (B->M!=A->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %D != %D",B->M,A->N); 1331c24bd37SHong Zhang if (fill <=0.0) SETERRQ1(PETSC_ERR_ARG_SIZ,"fill=%g must be > 0.0",fill); 1345c66b693SKris Buschelman 135cb00f407SKris Buschelman /* For now, we do not dispatch based on the type of A and P */ 136cb00f407SKris Buschelman /* When implementations like _SeqAIJ_MAIJ exist, attack the multiple dispatch problem. */ 137cb00f407SKris Buschelman Asymbolic = A->ops->matmultsymbolic; 138cb00f407SKris Buschelman if (!Asymbolic) SETERRQ1(PETSC_ERR_SUP,"C=A*B not implemented for A of type %s",A->type_name); 139cb00f407SKris Buschelman Bsymbolic = B->ops->matmultsymbolic; 140cb00f407SKris Buschelman if (!Bsymbolic) SETERRQ1(PETSC_ERR_SUP,"C=A*B not implemented for B of type %s",B->type_name); 141cb00f407SKris Buschelman if (Bsymbolic!=Asymbolic) SETERRQ2(PETSC_ERR_ARG_INCOMP,"MatMatMultSymbolic requires A, %s, to be compatible with B, %s",A->type_name,B->type_name); 142cb00f407SKris Buschelman 143cb00f407SKris Buschelman ierr = PetscLogEventBegin(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr); 144cb00f407SKris Buschelman ierr = (*Asymbolic)(A,B,fill,C);CHKERRQ(ierr); 145cb00f407SKris Buschelman ierr = PetscLogEventEnd(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr); 1465c66b693SKris Buschelman 1475c66b693SKris Buschelman PetscFunctionReturn(0); 1485c66b693SKris Buschelman } 1491c24bd37SHong Zhang 15026be0446SHong Zhang #undef __FUNCT__ 15126be0446SHong Zhang #define __FUNCT__ "MatMatMultSymbolic_SeqAIJ_SeqAIJ" 152dfbe8321SBarry Smith PetscErrorCode MatMatMultSymbolic_SeqAIJ_SeqAIJ(Mat A,Mat B,PetscReal fill,Mat *C) 15358c24d83SHong Zhang { 154dfbe8321SBarry Smith PetscErrorCode ierr; 15558c24d83SHong Zhang FreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 15658c24d83SHong Zhang Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data,*c; 15738baddfdSBarry Smith PetscInt *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*bjj,*ci,*cj; 15838baddfdSBarry Smith PetscInt am=A->M,bn=B->N,bm=B->M; 15938baddfdSBarry Smith PetscInt i,j,anzi,brow,bnzj,cnzi,nlnk,*lnk,nspacedouble=0; 16058c24d83SHong Zhang MatScalar *ca; 161be0fcf8dSHong Zhang PetscBT lnkbt; 16258c24d83SHong Zhang 16358c24d83SHong Zhang PetscFunctionBegin; 16458c24d83SHong Zhang /* Set up */ 16558c24d83SHong Zhang /* Allocate ci array, arrays for fill computation and */ 16658c24d83SHong Zhang /* free space for accumulating nonzero column info */ 16738baddfdSBarry Smith ierr = PetscMalloc(((am+1)+1)*sizeof(PetscInt),&ci);CHKERRQ(ierr); 16858c24d83SHong Zhang ci[0] = 0; 16958c24d83SHong Zhang 170be0fcf8dSHong Zhang /* create and initialize a linked list */ 171be0fcf8dSHong Zhang nlnk = bn+1; 172be0fcf8dSHong Zhang ierr = PetscLLCreate(bn,bn,nlnk,lnk,lnkbt);CHKERRQ(ierr); 17358c24d83SHong Zhang 174c5db241fSHong Zhang /* Initial FreeSpace size is fill*(nnz(A)+nnz(B)) */ 17538baddfdSBarry Smith ierr = GetMoreSpace((PetscInt)(fill*(ai[am]+bi[bm])),&free_space);CHKERRQ(ierr); 17658c24d83SHong Zhang current_space = free_space; 17758c24d83SHong Zhang 17858c24d83SHong Zhang /* Determine symbolic info for each row of the product: */ 17958c24d83SHong Zhang for (i=0;i<am;i++) { 18058c24d83SHong Zhang anzi = ai[i+1] - ai[i]; 18158c24d83SHong Zhang cnzi = 0; 1822d09714cSHong Zhang j = anzi; 1832d09714cSHong Zhang aj = a->j + ai[i]; 1842d09714cSHong Zhang while (j){/* assume cols are almost in increasing order, starting from its end saves computation */ 1852d09714cSHong Zhang j--; 1862d09714cSHong Zhang brow = *(aj + j); 18758c24d83SHong Zhang bnzj = bi[brow+1] - bi[brow]; 18858c24d83SHong Zhang bjj = bj + bi[brow]; 1891c239cc6SHong Zhang /* add non-zero cols of B into the sorted linked list lnk */ 190be0fcf8dSHong Zhang ierr = PetscLLAdd(bnzj,bjj,bn,nlnk,lnk,lnkbt);CHKERRQ(ierr); 1911c239cc6SHong Zhang cnzi += nlnk; 19258c24d83SHong Zhang } 19358c24d83SHong Zhang 19458c24d83SHong Zhang /* If free space is not available, make more free space */ 19558c24d83SHong Zhang /* Double the amount of total space in the list */ 19658c24d83SHong Zhang if (current_space->local_remaining<cnzi) { 19758c24d83SHong Zhang ierr = GetMoreSpace(current_space->total_array_size,¤t_space);CHKERRQ(ierr); 198c5db241fSHong Zhang nspacedouble++; 19958c24d83SHong Zhang } 20058c24d83SHong Zhang 201c5db241fSHong Zhang /* Copy data into free space, then initialize lnk */ 202be0fcf8dSHong Zhang ierr = PetscLLClean(bn,bn,cnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 203c5db241fSHong Zhang current_space->array += cnzi; 20458c24d83SHong Zhang current_space->local_used += cnzi; 20558c24d83SHong Zhang current_space->local_remaining -= cnzi; 20658c24d83SHong Zhang 20758c24d83SHong Zhang ci[i+1] = ci[i] + cnzi; 20858c24d83SHong Zhang } 20958c24d83SHong Zhang 21058c24d83SHong Zhang /* Column indices are in the list of free space */ 21158c24d83SHong Zhang /* Allocate space for cj, initialize cj, and */ 21258c24d83SHong Zhang /* destroy list of free space and other temporary array(s) */ 21338baddfdSBarry Smith ierr = PetscMalloc((ci[am]+1)*sizeof(PetscInt),&cj);CHKERRQ(ierr); 21458c24d83SHong Zhang ierr = MakeSpaceContiguous(&free_space,cj);CHKERRQ(ierr); 215be0fcf8dSHong Zhang ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 21658c24d83SHong Zhang 21758c24d83SHong Zhang /* Allocate space for ca */ 21858c24d83SHong Zhang ierr = PetscMalloc((ci[am]+1)*sizeof(MatScalar),&ca);CHKERRQ(ierr); 21958c24d83SHong Zhang ierr = PetscMemzero(ca,(ci[am]+1)*sizeof(MatScalar));CHKERRQ(ierr); 22058c24d83SHong Zhang 22126be0446SHong Zhang /* put together the new symbolic matrix */ 22258c24d83SHong Zhang ierr = MatCreateSeqAIJWithArrays(A->comm,am,bn,ci,cj,ca,C);CHKERRQ(ierr); 22358c24d83SHong Zhang 22458c24d83SHong Zhang /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 22558c24d83SHong Zhang /* These are PETSc arrays, so change flags so arrays can be deleted by PETSc */ 22658c24d83SHong Zhang c = (Mat_SeqAIJ *)((*C)->data); 22758c24d83SHong Zhang c->freedata = PETSC_TRUE; 22858c24d83SHong Zhang c->nonew = 0; 22958c24d83SHong Zhang 230be0fcf8dSHong Zhang if (nspacedouble){ 23177431f27SBarry Smith PetscLogInfo((PetscObject)(*C),"MatMatMultSymbolic_SeqAIJ_SeqAIJ: nspacedouble:%D, nnz(A):%D, nnz(B):%D, fill:%g, nnz(C):%D\n",nspacedouble,ai[am],bi[bm],fill,ci[am]); 232be0fcf8dSHong Zhang } 23358c24d83SHong Zhang PetscFunctionReturn(0); 23458c24d83SHong Zhang } 235d50806bdSBarry Smith 236c1f4806aSKris Buschelman #undef __FUNCT__ 237c1f4806aSKris Buschelman #define __FUNCT__ "MatMatMultNumeric" 2385c66b693SKris Buschelman /*@ 2395c66b693SKris Buschelman MatMatMultNumeric - Performs the numeric matrix-matrix product. 2405c66b693SKris Buschelman Call this routine after first calling MatMatMultSymbolic(). 2415c66b693SKris Buschelman 2425c66b693SKris Buschelman Collective on Mat 2435c66b693SKris Buschelman 2445c66b693SKris Buschelman Input Parameters: 2455c66b693SKris Buschelman + A - the left matrix 2465c66b693SKris Buschelman - B - the right matrix 2475c66b693SKris Buschelman 2485c66b693SKris Buschelman Output Parameters: 2495c66b693SKris Buschelman . C - the product matrix, whose ij structure was defined from MatMatMultSymbolic(). 2505c66b693SKris Buschelman 2515c66b693SKris Buschelman Notes: 2525c66b693SKris Buschelman C must have been created with MatMatMultSymbolic. 2535c66b693SKris Buschelman 2545c66b693SKris Buschelman This routine is currently only implemented for SeqAIJ type matrices. 2555c66b693SKris Buschelman 2565c66b693SKris Buschelman Level: intermediate 2575c66b693SKris Buschelman 2585c66b693SKris Buschelman .seealso: MatMatMult(),MatMatMultSymbolic() 2595c66b693SKris Buschelman @*/ 26038baddfdSBarry Smith PetscErrorCode MatMatMultNumeric(Mat A,Mat B,Mat C) 26138baddfdSBarry Smith { 262dfbe8321SBarry Smith PetscErrorCode ierr; 263cb00f407SKris Buschelman PetscErrorCode (*Anumeric)(Mat,Mat,Mat); 264cb00f407SKris Buschelman PetscErrorCode (*Bnumeric)(Mat,Mat,Mat); 2655c66b693SKris Buschelman 2665c66b693SKris Buschelman PetscFunctionBegin; 2675c66b693SKris Buschelman 2684482741eSBarry Smith PetscValidHeaderSpecific(A,MAT_COOKIE,1); 269c9780b6fSBarry Smith PetscValidType(A,1); 2705c66b693SKris Buschelman MatPreallocated(A); 2715c66b693SKris Buschelman if (!A->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix"); 2725c66b693SKris Buschelman if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 2735c66b693SKris Buschelman 2744482741eSBarry Smith PetscValidHeaderSpecific(B,MAT_COOKIE,2); 275c9780b6fSBarry Smith PetscValidType(B,2); 2765c66b693SKris Buschelman MatPreallocated(B); 2775c66b693SKris Buschelman if (!B->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix"); 2785c66b693SKris Buschelman if (B->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 2795c66b693SKris Buschelman 2804482741eSBarry Smith PetscValidHeaderSpecific(C,MAT_COOKIE,3); 281c9780b6fSBarry Smith PetscValidType(C,3); 2825c66b693SKris Buschelman MatPreallocated(C); 2835c66b693SKris Buschelman if (!C->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix"); 2845c66b693SKris Buschelman if (C->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 2855c66b693SKris Buschelman 28677431f27SBarry Smith if (B->N!=C->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %D != %D",B->N,C->N); 28777431f27SBarry Smith if (B->M!=A->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %D != %D",B->M,A->N); 28877431f27SBarry Smith if (A->M!=C->M) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %D != %D",A->M,C->M); 2895c66b693SKris Buschelman 290cb00f407SKris Buschelman /* For now, we do not dispatch based on the type of A and B */ 291cb00f407SKris Buschelman /* When implementations like _SeqAIJ_MAIJ exist, attack the multiple dispatch problem. */ 292cb00f407SKris Buschelman Anumeric = A->ops->matmultnumeric; 293cb00f407SKris Buschelman if (!Anumeric) SETERRQ1(PETSC_ERR_SUP,"MatMatMultNumeric not supported for A of type %s",A->type_name); 294cb00f407SKris Buschelman Bnumeric = B->ops->matmultnumeric; 295cb00f407SKris Buschelman if (!Bnumeric) SETERRQ1(PETSC_ERR_SUP,"MatMatMultNumeric not supported for B of type %s",B->type_name); 296cb00f407SKris Buschelman if (Bnumeric!=Anumeric) SETERRQ2(PETSC_ERR_ARG_INCOMP,"MatMatMultNumeric requires A, %s, to be compatible with B, %s",A->type_name,B->type_name); 2974d3841fdSKris Buschelman 298cb00f407SKris Buschelman ierr = PetscLogEventBegin(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr); 299cb00f407SKris Buschelman ierr = (*Anumeric)(A,B,C);CHKERRQ(ierr); 300cb00f407SKris Buschelman ierr = PetscLogEventEnd(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr); 3015c66b693SKris Buschelman 3025c66b693SKris Buschelman PetscFunctionReturn(0); 3035c66b693SKris Buschelman } 3045c66b693SKris Buschelman 30526be0446SHong Zhang #undef __FUNCT__ 30626be0446SHong Zhang #define __FUNCT__ "MatMatMultNumeric_SeqAIJ_SeqAIJ" 307dfbe8321SBarry Smith PetscErrorCode MatMatMultNumeric_SeqAIJ_SeqAIJ(Mat A,Mat B,Mat C) 308d50806bdSBarry Smith { 309dfbe8321SBarry Smith PetscErrorCode ierr; 31038baddfdSBarry Smith PetscInt flops=0; 311d50806bdSBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 312d50806bdSBarry Smith Mat_SeqAIJ *b = (Mat_SeqAIJ *)B->data; 313d50806bdSBarry Smith Mat_SeqAIJ *c = (Mat_SeqAIJ *)C->data; 31438baddfdSBarry Smith PetscInt *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*bjj,*ci=c->i,*cj=c->j; 315c124e916SHong Zhang PetscInt am=A->M,cm=C->M; 316c124e916SHong Zhang PetscInt i,j,k,anzi,bnzi,cnzi,brow,nextb; 317c124e916SHong Zhang MatScalar *aa=a->a,*ba=b->a,*baj,*ca=c->a; 318d50806bdSBarry Smith 319d50806bdSBarry Smith PetscFunctionBegin; 320c124e916SHong Zhang /* clean old values in C */ 321c124e916SHong Zhang ierr = PetscMemzero(ca,ci[cm]*sizeof(MatScalar));CHKERRQ(ierr); 322d50806bdSBarry Smith /* Traverse A row-wise. */ 323d50806bdSBarry Smith /* Build the ith row in C by summing over nonzero columns in A, */ 324d50806bdSBarry Smith /* the rows of B corresponding to nonzeros of A. */ 325d50806bdSBarry Smith for (i=0;i<am;i++) { 326d50806bdSBarry Smith anzi = ai[i+1] - ai[i]; 327d50806bdSBarry Smith for (j=0;j<anzi;j++) { 328d50806bdSBarry Smith brow = *aj++; 329d50806bdSBarry Smith bnzi = bi[brow+1] - bi[brow]; 330d50806bdSBarry Smith bjj = bj + bi[brow]; 331d50806bdSBarry Smith baj = ba + bi[brow]; 332c124e916SHong Zhang nextb = 0; 333c124e916SHong Zhang for (k=0; nextb<bnzi; k++) { 334c124e916SHong Zhang if (cj[k] == bjj[nextb]){ /* ccol == bcol */ 335c124e916SHong Zhang ca[k] += (*aa)*baj[nextb++]; 336c124e916SHong Zhang } 337d50806bdSBarry Smith } 338d50806bdSBarry Smith flops += 2*bnzi; 339d50806bdSBarry Smith aa++; 340d50806bdSBarry Smith } 341d50806bdSBarry Smith cnzi = ci[i+1] - ci[i]; 342d50806bdSBarry Smith ca += cnzi; 343d50806bdSBarry Smith cj += cnzi; 344d50806bdSBarry Smith } 345716bacf3SKris Buschelman ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 346716bacf3SKris Buschelman ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 347716bacf3SKris Buschelman 348d50806bdSBarry Smith ierr = PetscLogFlops(flops);CHKERRQ(ierr); 349d50806bdSBarry Smith PetscFunctionReturn(0); 350d50806bdSBarry Smith } 351bc011b1eSHong Zhang 352bc011b1eSHong Zhang #undef __FUNCT__ 353bc011b1eSHong Zhang #define __FUNCT__ "MatMatMultTranspose" 354bc011b1eSHong Zhang /*@ 355bc011b1eSHong Zhang MatMatMultTranspose - Performs Matrix-Matrix Multiplication C=A^T*B. 356bc011b1eSHong Zhang 357bc011b1eSHong Zhang Collective on Mat 358bc011b1eSHong Zhang 359bc011b1eSHong Zhang Input Parameters: 360bc011b1eSHong Zhang + A - the left matrix 361bc011b1eSHong Zhang . B - the right matrix 362bc011b1eSHong Zhang . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 363bc011b1eSHong Zhang - fill - expected fill as ratio of nnz(C)/(nnz(A) + nnz(B)) 364bc011b1eSHong Zhang 365bc011b1eSHong Zhang Output Parameters: 366bc011b1eSHong Zhang . C - the product matrix 367bc011b1eSHong Zhang 368bc011b1eSHong Zhang Notes: 369bc011b1eSHong Zhang C will be created and must be destroyed by the user with MatDestroy(). 370bc011b1eSHong Zhang 371bc011b1eSHong Zhang This routine is currently only implemented for pairs of SeqAIJ matrices and classes 372bc011b1eSHong Zhang which inherit from SeqAIJ. C will be of type MATSEQAIJ. 373bc011b1eSHong Zhang 374bc011b1eSHong Zhang Level: intermediate 375bc011b1eSHong Zhang 376bc011b1eSHong Zhang .seealso: MatMatMultTransposeSymbolic(),MatMatMultTransposeNumeric() 377bc011b1eSHong Zhang @*/ 378bc011b1eSHong Zhang PetscErrorCode MatMatMultTranspose(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C) 379bc011b1eSHong Zhang { 380bc011b1eSHong Zhang PetscErrorCode ierr; 381bc011b1eSHong Zhang PetscErrorCode (*fA)(Mat,Mat,MatReuse,PetscReal,Mat*); 382bc011b1eSHong Zhang PetscErrorCode (*fB)(Mat,Mat,MatReuse,PetscReal,Mat*); 383bc011b1eSHong Zhang 384bc011b1eSHong Zhang PetscFunctionBegin; 385bc011b1eSHong Zhang PetscValidHeaderSpecific(A,MAT_COOKIE,1); 386bc011b1eSHong Zhang PetscValidType(A,1); 387bc011b1eSHong Zhang MatPreallocated(A); 388bc011b1eSHong Zhang if (!A->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix"); 389bc011b1eSHong Zhang if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 390bc011b1eSHong Zhang PetscValidHeaderSpecific(B,MAT_COOKIE,2); 391bc011b1eSHong Zhang PetscValidType(B,2); 392bc011b1eSHong Zhang MatPreallocated(B); 393bc011b1eSHong Zhang if (!B->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix"); 394bc011b1eSHong Zhang if (B->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 395bc011b1eSHong Zhang PetscValidPointer(C,3); 39677431f27SBarry Smith if (B->M!=A->M) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %D != %D",B->M,A->M); 397bc011b1eSHong Zhang 398bc011b1eSHong Zhang if (fill <=0.0) SETERRQ1(PETSC_ERR_ARG_SIZ,"fill=%g must be > 0.0",fill); 399bc011b1eSHong Zhang 400bc011b1eSHong Zhang fA = A->ops->matmulttranspose; 401bc011b1eSHong Zhang if (!fA) SETERRQ1(PETSC_ERR_SUP,"MatMatMultTranspose not supported for A of type %s",A->type_name); 402bc011b1eSHong Zhang fB = B->ops->matmulttranspose; 403bc011b1eSHong Zhang if (!fB) SETERRQ1(PETSC_ERR_SUP,"MatMatMultTranspose not supported for B of type %s",B->type_name); 404bc011b1eSHong Zhang if (fB!=fA) SETERRQ2(PETSC_ERR_ARG_INCOMP,"MatMatMultTranspose requires A, %s, to be compatible with B, %s",A->type_name,B->type_name); 405bc011b1eSHong Zhang 406bc011b1eSHong Zhang ierr = PetscLogEventBegin(MAT_MatMultTranspose,A,B,0,0);CHKERRQ(ierr); 407bc011b1eSHong Zhang ierr = (*A->ops->matmulttranspose)(A,B,scall,fill,C);CHKERRQ(ierr); 408bc011b1eSHong Zhang ierr = PetscLogEventEnd(MAT_MatMultTranspose,A,B,0,0);CHKERRQ(ierr); 409bc011b1eSHong Zhang 410bc011b1eSHong Zhang PetscFunctionReturn(0); 411bc011b1eSHong Zhang } 412bc011b1eSHong Zhang 413bc011b1eSHong Zhang #undef __FUNCT__ 414bc011b1eSHong Zhang #define __FUNCT__ "MatMatMultTranspose_SeqAIJ_SeqAIJ" 415bc011b1eSHong Zhang PetscErrorCode MatMatMultTranspose_SeqAIJ_SeqAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C) { 416bc011b1eSHong Zhang PetscErrorCode ierr; 417bc011b1eSHong Zhang 418bc011b1eSHong Zhang PetscFunctionBegin; 419bc011b1eSHong Zhang if (scall == MAT_INITIAL_MATRIX){ 420bc011b1eSHong Zhang ierr = MatMatMultTransposeSymbolic_SeqAIJ_SeqAIJ(A,B,fill,C);CHKERRQ(ierr); 421bc011b1eSHong Zhang } 422bc011b1eSHong Zhang ierr = MatMatMultTransposeNumeric_SeqAIJ_SeqAIJ(A,B,*C);CHKERRQ(ierr); 423bc011b1eSHong Zhang PetscFunctionReturn(0); 424bc011b1eSHong Zhang } 425bc011b1eSHong Zhang 426bc011b1eSHong Zhang #undef __FUNCT__ 427bc011b1eSHong Zhang #define __FUNCT__ "MatMatMultTransposeSymbolic_SeqAIJ_SeqAIJ" 428bc011b1eSHong Zhang PetscErrorCode MatMatMultTransposeSymbolic_SeqAIJ_SeqAIJ(Mat A,Mat B,PetscReal fill,Mat *C) 429bc011b1eSHong Zhang { 430bc011b1eSHong Zhang PetscErrorCode ierr; 431bc011b1eSHong Zhang Mat At; 43238baddfdSBarry Smith PetscInt *ati,*atj; 433bc011b1eSHong Zhang 434bc011b1eSHong Zhang PetscFunctionBegin; 435bc011b1eSHong Zhang /* create symbolic At */ 436bc011b1eSHong Zhang ierr = MatGetSymbolicTranspose_SeqAIJ(A,&ati,&atj);CHKERRQ(ierr); 437bc011b1eSHong Zhang ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,A->n,A->m,ati,atj,PETSC_NULL,&At);CHKERRQ(ierr); 438bc011b1eSHong Zhang 439bc011b1eSHong Zhang /* get symbolic C=At*B */ 440bc011b1eSHong Zhang ierr = MatMatMultSymbolic_SeqAIJ_SeqAIJ(At,B,fill,C);CHKERRQ(ierr); 441bc011b1eSHong Zhang 442bc011b1eSHong Zhang /* clean up */ 443bc011b1eSHong Zhang ierr = MatDestroy(At);CHKERRQ(ierr); 444bc011b1eSHong Zhang ierr = MatRestoreSymbolicTranspose_SeqAIJ(A,&ati,&atj);CHKERRQ(ierr); 445bc011b1eSHong Zhang 446bc011b1eSHong Zhang PetscFunctionReturn(0); 447bc011b1eSHong Zhang } 448bc011b1eSHong Zhang 449bc011b1eSHong Zhang #undef __FUNCT__ 450bc011b1eSHong Zhang #define __FUNCT__ "MatMatMultTransposeNumeric_SeqAIJ_SeqAIJ" 451bc011b1eSHong Zhang PetscErrorCode MatMatMultTransposeNumeric_SeqAIJ_SeqAIJ(Mat A,Mat B,Mat C) 452bc011b1eSHong Zhang { 453bc011b1eSHong Zhang PetscErrorCode ierr; 4540fbc74f4SHong Zhang Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data,*c=(Mat_SeqAIJ*)C->data; 45538baddfdSBarry Smith PetscInt am=A->m,anzi,*ai=a->i,*aj=a->j,*bi=b->i,*bj,bnzi,nextb; 45638baddfdSBarry Smith PetscInt cm=C->m,*ci=c->i,*cj=c->j,crow,*cjj,i,j,k,flops=0; 4570fbc74f4SHong Zhang MatScalar *aa=a->a,*ba,*ca=c->a,*caj; 458bc011b1eSHong Zhang 459bc011b1eSHong Zhang PetscFunctionBegin; 460bc011b1eSHong Zhang /* clear old values in C */ 461bc011b1eSHong Zhang ierr = PetscMemzero(ca,ci[cm]*sizeof(MatScalar));CHKERRQ(ierr); 462bc011b1eSHong Zhang 463bc011b1eSHong Zhang /* compute A^T*B using outer product (A^T)[:,i]*B[i,:] */ 464bc011b1eSHong Zhang for (i=0;i<am;i++) { 465bc011b1eSHong Zhang bj = b->j + bi[i]; 466bc011b1eSHong Zhang ba = b->a + bi[i]; 467bc011b1eSHong Zhang bnzi = bi[i+1] - bi[i]; 468bc011b1eSHong Zhang anzi = ai[i+1] - ai[i]; 469bc011b1eSHong Zhang for (j=0; j<anzi; j++) { 470bc011b1eSHong Zhang nextb = 0; 4710fbc74f4SHong Zhang crow = *aj++; 472bc011b1eSHong Zhang cjj = cj + ci[crow]; 473bc011b1eSHong Zhang caj = ca + ci[crow]; 474bc011b1eSHong Zhang /* perform sparse axpy operation. Note cjj includes bj. */ 475bc011b1eSHong Zhang for (k=0; nextb<bnzi; k++) { 4760fbc74f4SHong Zhang if (cjj[k] == *(bj+nextb)) { /* ccol == bcol */ 4770fbc74f4SHong Zhang caj[k] += (*aa)*(*(ba+nextb)); 478bc011b1eSHong Zhang nextb++; 479bc011b1eSHong Zhang } 480bc011b1eSHong Zhang } 481bc011b1eSHong Zhang flops += 2*bnzi; 4820fbc74f4SHong Zhang aa++; 483bc011b1eSHong Zhang } 484bc011b1eSHong Zhang } 485bc011b1eSHong Zhang 486bc011b1eSHong Zhang /* Assemble the final matrix and clean up */ 487bc011b1eSHong Zhang ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 488bc011b1eSHong Zhang ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 489bc011b1eSHong Zhang ierr = PetscLogFlops(flops);CHKERRQ(ierr); 490bc011b1eSHong Zhang PetscFunctionReturn(0); 491bc011b1eSHong Zhang } 492