1d50806bdSBarry Smith /* 2a50f8bf6SHong Zhang Defines matrix-matrix product routines for pairs of AIJ matrices 3d50806bdSBarry Smith C = A * B 4d50806bdSBarry Smith */ 5d50806bdSBarry Smith 6c1f4806aSKris Buschelman #include "src/mat/impls/aij/seq/aij.h" /*I "petscmat.h" I*/ 770f19b1fSKris Buschelman #include "src/mat/utils/freespace.h" 82d09714cSHong Zhang #include "src/mat/impls/aij/mpi/mpiaij.h" 9be0fcf8dSHong Zhang #include "petscbt.h" 10d50806bdSBarry Smith 11c1f4806aSKris Buschelman #undef __FUNCT__ 12c1f4806aSKris Buschelman #define __FUNCT__ "MatMatMult" 135c66b693SKris Buschelman /*@ 145c66b693SKris Buschelman MatMatMult - Performs Matrix-Matrix Multiplication C=A*B. 1594e3eecaSKris Buschelman 165c66b693SKris Buschelman Collective on Mat 17d50806bdSBarry Smith 185c66b693SKris Buschelman Input Parameters: 195c66b693SKris Buschelman + A - the left matrix 201c741599SHong Zhang . B - the right matrix 211c741599SHong Zhang . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 22c5db241fSHong Zhang - fill - expected fill as ratio of nnz(C)/(nnz(A) + nnz(B)) 235c66b693SKris Buschelman 245c66b693SKris Buschelman Output Parameters: 255c66b693SKris Buschelman . C - the product matrix 265c66b693SKris Buschelman 275c66b693SKris Buschelman Notes: 285c66b693SKris Buschelman C will be created and must be destroyed by the user with MatDestroy(). 295c66b693SKris Buschelman 30bc011b1eSHong Zhang This routine is currently only implemented for pairs of AIJ matrices and classes 31bc011b1eSHong Zhang which inherit from AIJ. C will be of type MATAIJ. 325c66b693SKris Buschelman 335c66b693SKris Buschelman Level: intermediate 345c66b693SKris Buschelman 355c66b693SKris Buschelman .seealso: MatMatMultSymbolic(),MatMatMultNumeric() 365c66b693SKris Buschelman @*/ 37dfbe8321SBarry Smith PetscErrorCode MatMatMult(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C) 382d09714cSHong Zhang { 39dfbe8321SBarry Smith PetscErrorCode ierr; 40cb00f407SKris Buschelman PetscErrorCode (*fA)(Mat,Mat,MatReuse,PetscReal,Mat*); 41cb00f407SKris Buschelman PetscErrorCode (*fB)(Mat,Mat,MatReuse,PetscReal,Mat*); 422d09714cSHong Zhang 43d50806bdSBarry Smith PetscFunctionBegin; 444482741eSBarry Smith PetscValidHeaderSpecific(A,MAT_COOKIE,1); 45c9780b6fSBarry Smith PetscValidType(A,1); 465c66b693SKris Buschelman MatPreallocated(A); 475c66b693SKris Buschelman if (!A->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix"); 485c66b693SKris Buschelman if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 494482741eSBarry Smith PetscValidHeaderSpecific(B,MAT_COOKIE,2); 50c9780b6fSBarry Smith PetscValidType(B,2); 515c66b693SKris Buschelman MatPreallocated(B); 525c66b693SKris Buschelman if (!B->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix"); 535c66b693SKris Buschelman if (B->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 544482741eSBarry Smith PetscValidPointer(C,3); 555c66b693SKris Buschelman if (B->M!=A->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",B->M,A->N); 56d50806bdSBarry Smith 571c24bd37SHong Zhang if (fill <=0.0) SETERRQ1(PETSC_ERR_ARG_SIZ,"fill=%g must be > 0.0",fill); 58c5db241fSHong Zhang 59cb00f407SKris Buschelman /* For now, we do not dispatch based on the type of A and B */ 60cb00f407SKris Buschelman /* When implementations like _SeqAIJ_MAIJ exist, attack the multiple dispatch problem. */ 61cb00f407SKris Buschelman fA = A->ops->matmult; 62cb00f407SKris Buschelman if (!fA) SETERRQ1(PETSC_ERR_SUP,"MatMatMult not supported for A of type %s",A->type_name); 63cb00f407SKris Buschelman fB = B->ops->matmult; 64cb00f407SKris Buschelman if (!fB) SETERRQ1(PETSC_ERR_SUP,"MatMatMult not supported for B of type %s",B->type_name); 65cb00f407SKris Buschelman if (fB!=fA) SETERRQ2(PETSC_ERR_ARG_INCOMP,"MatMatMult requires A, %s, to be compatible with B, %s",A->type_name,B->type_name); 66cb00f407SKris Buschelman 676284ec50SHong Zhang ierr = PetscLogEventBegin(MAT_MatMult,A,B,0,0);CHKERRQ(ierr); 681c741599SHong Zhang ierr = (*A->ops->matmult)(A,B,scall,fill,C);CHKERRQ(ierr); 696284ec50SHong Zhang ierr = PetscLogEventEnd(MAT_MatMult,A,B,0,0);CHKERRQ(ierr); 704d3841fdSKris Buschelman 716284ec50SHong Zhang PetscFunctionReturn(0); 726284ec50SHong Zhang } 736284ec50SHong Zhang 746284ec50SHong Zhang #undef __FUNCT__ 756284ec50SHong Zhang #define __FUNCT__ "MatMatMult_MPIAIJ_MPIAIJ" 76dfbe8321SBarry Smith PetscErrorCode MatMatMult_MPIAIJ_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill, Mat *C) 772d09714cSHong Zhang { 78dfbe8321SBarry Smith PetscErrorCode ierr; 796284ec50SHong Zhang 806284ec50SHong Zhang PetscFunctionBegin; 8126be0446SHong Zhang if (scall == MAT_INITIAL_MATRIX){ 82d6bb3c2dSHong Zhang ierr = MatMatMultSymbolic_MPIAIJ_MPIAIJ(A,B,fill,C);CHKERRQ(ierr);/* numeric product is computed as well */ 83d6bb3c2dSHong Zhang } else if (scall == MAT_REUSE_MATRIX){ 8426be0446SHong Zhang ierr = MatMatMultNumeric_MPIAIJ_MPIAIJ(A,B,*C);CHKERRQ(ierr); 85d6bb3c2dSHong Zhang } else { 86d6bb3c2dSHong Zhang SETERRQ1(PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",scall); 87d6bb3c2dSHong Zhang } 88d50806bdSBarry Smith PetscFunctionReturn(0); 89d50806bdSBarry Smith } 905c66b693SKris Buschelman 915c66b693SKris Buschelman #undef __FUNCT__ 925c66b693SKris Buschelman #define __FUNCT__ "MatMatMult_SeqAIJ_SeqAIJ" 93dfbe8321SBarry Smith PetscErrorCode MatMatMult_SeqAIJ_SeqAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C) { 94dfbe8321SBarry Smith PetscErrorCode ierr; 955c66b693SKris Buschelman 965c66b693SKris Buschelman PetscFunctionBegin; 9726be0446SHong Zhang if (scall == MAT_INITIAL_MATRIX){ 9826be0446SHong Zhang ierr = MatMatMultSymbolic_SeqAIJ_SeqAIJ(A,B,fill,C);CHKERRQ(ierr); 9926be0446SHong Zhang } 10026be0446SHong Zhang ierr = MatMatMultNumeric_SeqAIJ_SeqAIJ(A,B,*C);CHKERRQ(ierr); 1015c66b693SKris Buschelman PetscFunctionReturn(0); 1025c66b693SKris Buschelman } 1035c66b693SKris Buschelman 104c1f4806aSKris Buschelman #undef __FUNCT__ 105c1f4806aSKris Buschelman #define __FUNCT__ "MatMatMultSymbolic" 1065c66b693SKris Buschelman /*@ 1075c66b693SKris Buschelman MatMatMultSymbolic - Performs construction, preallocation, and computes the ij structure 1085c66b693SKris Buschelman of the matrix-matrix product C=A*B. Call this routine before calling MatMatMultNumeric(). 1095c66b693SKris Buschelman 1105c66b693SKris Buschelman Collective on Mat 1115c66b693SKris Buschelman 1125c66b693SKris Buschelman Input Parameters: 1135c66b693SKris Buschelman + A - the left matrix 114c5db241fSHong Zhang . B - the right matrix 115c5db241fSHong Zhang - fill - expected fill as ratio of nnz(C)/(nnz(A) + nnz(B)) 1165c66b693SKris Buschelman 1175c66b693SKris Buschelman Output Parameters: 1185c66b693SKris Buschelman . C - the matrix containing the ij structure of product matrix 1195c66b693SKris Buschelman 1205c66b693SKris Buschelman Notes: 1214d3841fdSKris Buschelman C will be created as a MATSEQAIJ matrix and must be destroyed by the user with MatDestroy(). 1225c66b693SKris Buschelman 1234d3841fdSKris Buschelman This routine is currently only implemented for SeqAIJ matrices and classes which inherit from SeqAIJ. 1245c66b693SKris Buschelman 1255c66b693SKris Buschelman Level: intermediate 1265c66b693SKris Buschelman 1275c66b693SKris Buschelman .seealso: MatMatMult(),MatMatMultNumeric() 1285c66b693SKris Buschelman @*/ 129dfbe8321SBarry Smith PetscErrorCode MatMatMultSymbolic(Mat A,Mat B,PetscReal fill,Mat *C) { 130dfbe8321SBarry Smith PetscErrorCode ierr; 131cb00f407SKris Buschelman PetscErrorCode (*Asymbolic)(Mat,Mat,PetscReal,Mat *); 132cb00f407SKris Buschelman PetscErrorCode (*Bsymbolic)(Mat,Mat,PetscReal,Mat *); 1335c66b693SKris Buschelman 1345c66b693SKris Buschelman PetscFunctionBegin; 1354482741eSBarry Smith PetscValidHeaderSpecific(A,MAT_COOKIE,1); 136c9780b6fSBarry Smith PetscValidType(A,1); 1375c66b693SKris Buschelman MatPreallocated(A); 1385c66b693SKris Buschelman if (!A->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix"); 1395c66b693SKris Buschelman if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 1405c66b693SKris Buschelman 1414482741eSBarry Smith PetscValidHeaderSpecific(B,MAT_COOKIE,2); 142c9780b6fSBarry Smith PetscValidType(B,2); 1435c66b693SKris Buschelman MatPreallocated(B); 1445c66b693SKris Buschelman if (!B->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix"); 1455c66b693SKris Buschelman if (B->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 1464482741eSBarry Smith PetscValidPointer(C,3); 1474482741eSBarry Smith 1485c66b693SKris Buschelman if (B->M!=A->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",B->M,A->N); 1491c24bd37SHong Zhang if (fill <=0.0) SETERRQ1(PETSC_ERR_ARG_SIZ,"fill=%g must be > 0.0",fill); 1505c66b693SKris Buschelman 151cb00f407SKris Buschelman /* For now, we do not dispatch based on the type of A and P */ 152cb00f407SKris Buschelman /* When implementations like _SeqAIJ_MAIJ exist, attack the multiple dispatch problem. */ 153cb00f407SKris Buschelman Asymbolic = A->ops->matmultsymbolic; 154cb00f407SKris Buschelman if (!Asymbolic) SETERRQ1(PETSC_ERR_SUP,"C=A*B not implemented for A of type %s",A->type_name); 155cb00f407SKris Buschelman Bsymbolic = B->ops->matmultsymbolic; 156cb00f407SKris Buschelman if (!Bsymbolic) SETERRQ1(PETSC_ERR_SUP,"C=A*B not implemented for B of type %s",B->type_name); 157cb00f407SKris Buschelman if (Bsymbolic!=Asymbolic) SETERRQ2(PETSC_ERR_ARG_INCOMP,"MatMatMultSymbolic requires A, %s, to be compatible with B, %s",A->type_name,B->type_name); 158cb00f407SKris Buschelman 159cb00f407SKris Buschelman ierr = PetscLogEventBegin(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr); 160cb00f407SKris Buschelman ierr = (*Asymbolic)(A,B,fill,C);CHKERRQ(ierr); 161cb00f407SKris Buschelman ierr = PetscLogEventEnd(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr); 1625c66b693SKris Buschelman 1635c66b693SKris Buschelman PetscFunctionReturn(0); 1645c66b693SKris Buschelman } 1651c24bd37SHong Zhang 166dfbe8321SBarry Smith EXTERN PetscErrorCode MatDestroy_MPIAIJ(Mat); 16726be0446SHong Zhang #undef __FUNCT__ 16826be0446SHong Zhang #define __FUNCT__ "MatDestroy_MPIAIJ_MatMatMult" 169dfbe8321SBarry Smith PetscErrorCode MatDestroy_MPIAIJ_MatMatMult(Mat A) 17026be0446SHong Zhang { 171dfbe8321SBarry Smith PetscErrorCode ierr; 1727f93b363SHong Zhang Mat_MatMatMultMPI *mult; 1737f93b363SHong Zhang PetscObjectContainer container; 17426be0446SHong Zhang 17526be0446SHong Zhang PetscFunctionBegin; 1767f93b363SHong Zhang ierr = PetscObjectQuery((PetscObject)A,"MatMatMultMPI",(PetscObject *)&container);CHKERRQ(ierr); 1777f93b363SHong Zhang if (container) { 178*7f79fd58SMatthew Knepley ierr = PetscObjectContainerGetPointer(container,(void **)&mult);CHKERRQ(ierr); 179d6bb3c2dSHong Zhang ierr = ISDestroy(mult->isrowb);CHKERRQ(ierr); 180d6bb3c2dSHong Zhang ierr = ISDestroy(mult->iscolb);CHKERRQ(ierr); 181d6bb3c2dSHong Zhang ierr = ISDestroy(mult->isrowa);CHKERRQ(ierr); 18297c2bf28SHong Zhang ierr = MatDestroy(mult->A_loc);CHKERRQ(ierr); 18397c2bf28SHong Zhang ierr = MatDestroy(mult->B_seq);CHKERRQ(ierr); 184d6bb3c2dSHong Zhang ierr = MatDestroy(mult->C_seq);CHKERRQ(ierr); 185d6bb3c2dSHong Zhang 1867f93b363SHong Zhang ierr = PetscObjectContainerDestroy(container);CHKERRQ(ierr); 1877f93b363SHong Zhang ierr = PetscObjectCompose((PetscObject)A,"MatMatMultMPI",0);CHKERRQ(ierr); 1887f93b363SHong Zhang } 1897f93b363SHong Zhang ierr = PetscFree(mult);CHKERRQ(ierr); 19026be0446SHong Zhang ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr); 19126be0446SHong Zhang 19226be0446SHong Zhang PetscFunctionReturn(0); 19326be0446SHong Zhang } 19458c24d83SHong Zhang 19558c24d83SHong Zhang #undef __FUNCT__ 19626be0446SHong Zhang #define __FUNCT__ "MatMatMultSymbolic_MPIAIJ_MPIAIJ" 197dfbe8321SBarry Smith PetscErrorCode MatMatMultSymbolic_MPIAIJ_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat *C) 19826be0446SHong Zhang { 199ff134f7aSHong Zhang Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data,*b=(Mat_MPIAIJ*)B->data; 200dfbe8321SBarry Smith PetscErrorCode ierr; 20116130426SHong Zhang int start,end; 20226be0446SHong Zhang Mat_MatMatMultMPI *mult; 2037f93b363SHong Zhang PetscObjectContainer container; 20426be0446SHong Zhang 20526be0446SHong Zhang PetscFunctionBegin; 206ff134f7aSHong Zhang if (a->cstart != b->rstart || a->cend != b->rend){ 207ff134f7aSHong Zhang SETERRQ4(PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",a->cstart,a->cend,b->rstart,b->rend); 208ff134f7aSHong Zhang } 209d6bb3c2dSHong Zhang ierr = PetscNew(Mat_MatMatMultMPI,&mult);CHKERRQ(ierr); 210d6bb3c2dSHong Zhang 21126be0446SHong Zhang /* create a seq matrix B_seq = submatrix of B by taking rows of B that equal to nonzero col of A */ 21297c2bf28SHong Zhang ierr = MatGetBrowsOfAcols(A,B,MAT_INITIAL_MATRIX,&mult->isrowb,&mult->iscolb,&mult->brstart,&mult->B_seq);CHKERRQ(ierr); 21326be0446SHong Zhang 21426be0446SHong Zhang /* create a seq matrix A_seq = submatrix of A by taking all local rows of A */ 21526be0446SHong Zhang start = a->rstart; end = a->rend; 216d6bb3c2dSHong Zhang ierr = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&mult->isrowa);CHKERRQ(ierr); 21797c2bf28SHong Zhang ierr = MatGetLocalMat(A,MAT_INITIAL_MATRIX,&mult->isrowa,&mult->isrowb,&mult->A_loc);CHKERRQ(ierr); 21826be0446SHong Zhang 21926be0446SHong Zhang /* compute C_seq = A_seq * B_seq */ 22097c2bf28SHong Zhang ierr = MatMatMult_SeqAIJ_SeqAIJ(mult->A_loc,mult->B_seq,MAT_INITIAL_MATRIX,fill,&mult->C_seq);CHKERRQ(ierr); 22126be0446SHong Zhang 22226be0446SHong Zhang /* create mpi matrix C by concatinating C_seq */ 223d6bb3c2dSHong Zhang ierr = PetscObjectReference((PetscObject)mult->C_seq);CHKERRQ(ierr); /* prevent C_seq being destroyed by MatMerge() */ 2240e36024fSHong Zhang ierr = MatMerge(A->comm,mult->C_seq,B->n,MAT_INITIAL_MATRIX,C);CHKERRQ(ierr); 22526be0446SHong Zhang 22626be0446SHong Zhang /* attach the supporting struct to C for reuse of symbolic C */ 2277f93b363SHong Zhang ierr = PetscObjectContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr); 2287f93b363SHong Zhang ierr = PetscObjectContainerSetPointer(container,mult);CHKERRQ(ierr); 2297f93b363SHong Zhang ierr = PetscObjectCompose((PetscObject)(*C),"MatMatMultMPI",(PetscObject)container);CHKERRQ(ierr); 2307f93b363SHong Zhang 23126be0446SHong Zhang (*C)->ops->destroy = MatDestroy_MPIAIJ_MatMatMult; 23226be0446SHong Zhang 23326be0446SHong Zhang PetscFunctionReturn(0); 23426be0446SHong Zhang } 23526be0446SHong Zhang 23626be0446SHong Zhang #undef __FUNCT__ 23726be0446SHong Zhang #define __FUNCT__ "MatMatMultSymbolic_SeqAIJ_SeqAIJ" 238dfbe8321SBarry Smith PetscErrorCode MatMatMultSymbolic_SeqAIJ_SeqAIJ(Mat A,Mat B,PetscReal fill,Mat *C) 23958c24d83SHong Zhang { 240dfbe8321SBarry Smith PetscErrorCode ierr; 24158c24d83SHong Zhang FreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 24258c24d83SHong Zhang Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data,*c; 243be0fcf8dSHong Zhang int *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*bjj,*ci,*cj; 2445c66b693SKris Buschelman int am=A->M,bn=B->N,bm=B->M; 245be0fcf8dSHong Zhang int i,j,anzi,brow,bnzj,cnzi,nlnk,*lnk,nspacedouble=0; 24658c24d83SHong Zhang MatScalar *ca; 247be0fcf8dSHong Zhang PetscBT lnkbt; 24858c24d83SHong Zhang 24958c24d83SHong Zhang PetscFunctionBegin; 25058c24d83SHong Zhang /* Set up */ 25158c24d83SHong Zhang /* Allocate ci array, arrays for fill computation and */ 25258c24d83SHong Zhang /* free space for accumulating nonzero column info */ 25358c24d83SHong Zhang ierr = PetscMalloc(((am+1)+1)*sizeof(int),&ci);CHKERRQ(ierr); 25458c24d83SHong Zhang ci[0] = 0; 25558c24d83SHong Zhang 256be0fcf8dSHong Zhang /* create and initialize a linked list */ 257be0fcf8dSHong Zhang nlnk = bn+1; 258be0fcf8dSHong Zhang ierr = PetscLLCreate(bn,bn,nlnk,lnk,lnkbt);CHKERRQ(ierr); 25958c24d83SHong Zhang 260c5db241fSHong Zhang /* Initial FreeSpace size is fill*(nnz(A)+nnz(B)) */ 261d6bb3c2dSHong Zhang ierr = GetMoreSpace((int)(fill*(ai[am]+bi[bm])),&free_space);CHKERRQ(ierr); 26258c24d83SHong Zhang current_space = free_space; 26358c24d83SHong Zhang 26458c24d83SHong Zhang /* Determine symbolic info for each row of the product: */ 26558c24d83SHong Zhang for (i=0;i<am;i++) { 26658c24d83SHong Zhang anzi = ai[i+1] - ai[i]; 26758c24d83SHong Zhang cnzi = 0; 2682d09714cSHong Zhang j = anzi; 2692d09714cSHong Zhang aj = a->j + ai[i]; 2702d09714cSHong Zhang while (j){/* assume cols are almost in increasing order, starting from its end saves computation */ 2712d09714cSHong Zhang j--; 2722d09714cSHong Zhang brow = *(aj + j); 27358c24d83SHong Zhang bnzj = bi[brow+1] - bi[brow]; 27458c24d83SHong Zhang bjj = bj + bi[brow]; 2751c239cc6SHong Zhang /* add non-zero cols of B into the sorted linked list lnk */ 276be0fcf8dSHong Zhang ierr = PetscLLAdd(bnzj,bjj,bn,nlnk,lnk,lnkbt);CHKERRQ(ierr); 2771c239cc6SHong Zhang cnzi += nlnk; 27858c24d83SHong Zhang } 27958c24d83SHong Zhang 28058c24d83SHong Zhang /* If free space is not available, make more free space */ 28158c24d83SHong Zhang /* Double the amount of total space in the list */ 28258c24d83SHong Zhang if (current_space->local_remaining<cnzi) { 28358c24d83SHong Zhang ierr = GetMoreSpace(current_space->total_array_size,¤t_space);CHKERRQ(ierr); 284c5db241fSHong Zhang nspacedouble++; 28558c24d83SHong Zhang } 28658c24d83SHong Zhang 287c5db241fSHong Zhang /* Copy data into free space, then initialize lnk */ 288be0fcf8dSHong Zhang ierr = PetscLLClean(bn,bn,cnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 289c5db241fSHong Zhang current_space->array += cnzi; 29058c24d83SHong Zhang current_space->local_used += cnzi; 29158c24d83SHong Zhang current_space->local_remaining -= cnzi; 29258c24d83SHong Zhang 29358c24d83SHong Zhang ci[i+1] = ci[i] + cnzi; 29458c24d83SHong Zhang } 29558c24d83SHong Zhang 29658c24d83SHong Zhang /* Column indices are in the list of free space */ 29758c24d83SHong Zhang /* Allocate space for cj, initialize cj, and */ 29858c24d83SHong Zhang /* destroy list of free space and other temporary array(s) */ 29958c24d83SHong Zhang ierr = PetscMalloc((ci[am]+1)*sizeof(int),&cj);CHKERRQ(ierr); 30058c24d83SHong Zhang ierr = MakeSpaceContiguous(&free_space,cj);CHKERRQ(ierr); 301be0fcf8dSHong Zhang ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 30258c24d83SHong Zhang 30358c24d83SHong Zhang /* Allocate space for ca */ 30458c24d83SHong Zhang ierr = PetscMalloc((ci[am]+1)*sizeof(MatScalar),&ca);CHKERRQ(ierr); 30558c24d83SHong Zhang ierr = PetscMemzero(ca,(ci[am]+1)*sizeof(MatScalar));CHKERRQ(ierr); 30658c24d83SHong Zhang 30726be0446SHong Zhang /* put together the new symbolic matrix */ 30858c24d83SHong Zhang ierr = MatCreateSeqAIJWithArrays(A->comm,am,bn,ci,cj,ca,C);CHKERRQ(ierr); 30958c24d83SHong Zhang 31058c24d83SHong Zhang /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 31158c24d83SHong Zhang /* These are PETSc arrays, so change flags so arrays can be deleted by PETSc */ 31258c24d83SHong Zhang c = (Mat_SeqAIJ *)((*C)->data); 31358c24d83SHong Zhang c->freedata = PETSC_TRUE; 31458c24d83SHong Zhang c->nonew = 0; 31558c24d83SHong Zhang 316be0fcf8dSHong Zhang if (nspacedouble){ 317be0fcf8dSHong Zhang PetscLogInfo((PetscObject)(*C),"MatMatMultSymbolic_SeqAIJ_SeqAIJ: nspacedouble:%d, nnz(A):%d, nnz(B):%d, fill:%g, nnz(C):%d\n",nspacedouble,ai[am],bi[bm],fill,ci[am]); 318be0fcf8dSHong Zhang } 31958c24d83SHong Zhang PetscFunctionReturn(0); 32058c24d83SHong Zhang } 321d50806bdSBarry Smith 322c1f4806aSKris Buschelman #undef __FUNCT__ 323c1f4806aSKris Buschelman #define __FUNCT__ "MatMatMultNumeric" 3245c66b693SKris Buschelman /*@ 3255c66b693SKris Buschelman MatMatMultNumeric - Performs the numeric matrix-matrix product. 3265c66b693SKris Buschelman Call this routine after first calling MatMatMultSymbolic(). 3275c66b693SKris Buschelman 3285c66b693SKris Buschelman Collective on Mat 3295c66b693SKris Buschelman 3305c66b693SKris Buschelman Input Parameters: 3315c66b693SKris Buschelman + A - the left matrix 3325c66b693SKris Buschelman - B - the right matrix 3335c66b693SKris Buschelman 3345c66b693SKris Buschelman Output Parameters: 3355c66b693SKris Buschelman . C - the product matrix, whose ij structure was defined from MatMatMultSymbolic(). 3365c66b693SKris Buschelman 3375c66b693SKris Buschelman Notes: 3385c66b693SKris Buschelman C must have been created with MatMatMultSymbolic. 3395c66b693SKris Buschelman 3405c66b693SKris Buschelman This routine is currently only implemented for SeqAIJ type matrices. 3415c66b693SKris Buschelman 3425c66b693SKris Buschelman Level: intermediate 3435c66b693SKris Buschelman 3445c66b693SKris Buschelman .seealso: MatMatMult(),MatMatMultSymbolic() 3455c66b693SKris Buschelman @*/ 346dfbe8321SBarry Smith PetscErrorCode MatMatMultNumeric(Mat A,Mat B,Mat C){ 347dfbe8321SBarry Smith PetscErrorCode ierr; 348cb00f407SKris Buschelman PetscErrorCode (*Anumeric)(Mat,Mat,Mat); 349cb00f407SKris Buschelman PetscErrorCode (*Bnumeric)(Mat,Mat,Mat); 3505c66b693SKris Buschelman 3515c66b693SKris Buschelman PetscFunctionBegin; 3525c66b693SKris Buschelman 3534482741eSBarry Smith PetscValidHeaderSpecific(A,MAT_COOKIE,1); 354c9780b6fSBarry Smith PetscValidType(A,1); 3555c66b693SKris Buschelman MatPreallocated(A); 3565c66b693SKris Buschelman if (!A->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix"); 3575c66b693SKris Buschelman if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 3585c66b693SKris Buschelman 3594482741eSBarry Smith PetscValidHeaderSpecific(B,MAT_COOKIE,2); 360c9780b6fSBarry Smith PetscValidType(B,2); 3615c66b693SKris Buschelman MatPreallocated(B); 3625c66b693SKris Buschelman if (!B->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix"); 3635c66b693SKris Buschelman if (B->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 3645c66b693SKris Buschelman 3654482741eSBarry Smith PetscValidHeaderSpecific(C,MAT_COOKIE,3); 366c9780b6fSBarry Smith PetscValidType(C,3); 3675c66b693SKris Buschelman MatPreallocated(C); 3685c66b693SKris Buschelman if (!C->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix"); 3695c66b693SKris Buschelman if (C->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 3705c66b693SKris Buschelman 3715c66b693SKris Buschelman if (B->N!=C->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",B->N,C->N); 3725c66b693SKris Buschelman if (B->M!=A->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",B->M,A->N); 3735c66b693SKris Buschelman if (A->M!=C->M) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",A->M,C->M); 3745c66b693SKris Buschelman 375cb00f407SKris Buschelman /* For now, we do not dispatch based on the type of A and B */ 376cb00f407SKris Buschelman /* When implementations like _SeqAIJ_MAIJ exist, attack the multiple dispatch problem. */ 377cb00f407SKris Buschelman Anumeric = A->ops->matmultnumeric; 378cb00f407SKris Buschelman if (!Anumeric) SETERRQ1(PETSC_ERR_SUP,"MatMatMultNumeric not supported for A of type %s",A->type_name); 379cb00f407SKris Buschelman Bnumeric = B->ops->matmultnumeric; 380cb00f407SKris Buschelman if (!Bnumeric) SETERRQ1(PETSC_ERR_SUP,"MatMatMultNumeric not supported for B of type %s",B->type_name); 381cb00f407SKris Buschelman if (Bnumeric!=Anumeric) SETERRQ2(PETSC_ERR_ARG_INCOMP,"MatMatMultNumeric requires A, %s, to be compatible with B, %s",A->type_name,B->type_name); 3824d3841fdSKris Buschelman 383cb00f407SKris Buschelman ierr = PetscLogEventBegin(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr); 384cb00f407SKris Buschelman ierr = (*Anumeric)(A,B,C);CHKERRQ(ierr); 385cb00f407SKris Buschelman ierr = PetscLogEventEnd(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr); 3865c66b693SKris Buschelman 3875c66b693SKris Buschelman PetscFunctionReturn(0); 3885c66b693SKris Buschelman } 3895c66b693SKris Buschelman 390d6bb3c2dSHong Zhang /* This routine is called ONLY in the case of reusing previously computed symbolic C */ 391d50806bdSBarry Smith #undef __FUNCT__ 39226be0446SHong Zhang #define __FUNCT__ "MatMatMultNumeric_MPIAIJ_MPIAIJ" 393dfbe8321SBarry Smith PetscErrorCode MatMatMultNumeric_MPIAIJ_MPIAIJ(Mat A,Mat B,Mat C) 39426be0446SHong Zhang { 395dfbe8321SBarry Smith PetscErrorCode ierr; 39697c2bf28SHong Zhang Mat *seq; 3977f93b363SHong Zhang Mat_MatMatMultMPI *mult; 3987f93b363SHong Zhang PetscObjectContainer container; 399d6bb3c2dSHong Zhang 40026be0446SHong Zhang PetscFunctionBegin; 4017f93b363SHong Zhang ierr = PetscObjectQuery((PetscObject)C,"MatMatMultMPI",(PetscObject *)&container);CHKERRQ(ierr); 4027f93b363SHong Zhang if (container) { 403*7f79fd58SMatthew Knepley ierr = PetscObjectContainerGetPointer(container,(void **)&mult);CHKERRQ(ierr); 4047f93b363SHong Zhang } 4057f93b363SHong Zhang 40697c2bf28SHong Zhang seq = &mult->B_seq; 40797c2bf28SHong Zhang ierr = MatGetSubMatrices(B,1,&mult->isrowb,&mult->iscolb,MAT_REUSE_MATRIX,&seq);CHKERRQ(ierr); 40897c2bf28SHong Zhang mult->B_seq = *seq; 40997c2bf28SHong Zhang 41097c2bf28SHong Zhang seq = &mult->A_loc; 41197c2bf28SHong Zhang ierr = MatGetSubMatrices(A,1,&mult->isrowa,&mult->isrowb,MAT_REUSE_MATRIX,&seq);CHKERRQ(ierr); 41297c2bf28SHong Zhang mult->A_loc = *seq; 41397c2bf28SHong Zhang 41497c2bf28SHong Zhang ierr = MatMatMult_SeqAIJ_SeqAIJ(mult->A_loc,mult->B_seq,MAT_REUSE_MATRIX,0.0,&mult->C_seq);CHKERRQ(ierr); 415d6bb3c2dSHong Zhang 416d6bb3c2dSHong Zhang ierr = PetscObjectReference((PetscObject)mult->C_seq);CHKERRQ(ierr); 4170e36024fSHong Zhang ierr = MatMerge(A->comm,mult->C_seq,B->n,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr); 418d6bb3c2dSHong Zhang 41926be0446SHong Zhang PetscFunctionReturn(0); 42026be0446SHong Zhang } 42126be0446SHong Zhang 42226be0446SHong Zhang #undef __FUNCT__ 42326be0446SHong Zhang #define __FUNCT__ "MatMatMultNumeric_SeqAIJ_SeqAIJ" 424dfbe8321SBarry Smith PetscErrorCode MatMatMultNumeric_SeqAIJ_SeqAIJ(Mat A,Mat B,Mat C) 425d50806bdSBarry Smith { 426dfbe8321SBarry Smith PetscErrorCode ierr; 427dfbe8321SBarry Smith int flops=0; 428d50806bdSBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 429d50806bdSBarry Smith Mat_SeqAIJ *b = (Mat_SeqAIJ *)B->data; 430d50806bdSBarry Smith Mat_SeqAIJ *c = (Mat_SeqAIJ *)C->data; 431d50806bdSBarry Smith int *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*bjj,*ci=c->i,*cj=c->j; 4325c66b693SKris Buschelman int am=A->M,cn=C->N; 43394e3eecaSKris Buschelman int i,j,k,anzi,bnzi,cnzi,brow; 434d50806bdSBarry Smith MatScalar *aa=a->a,*ba=b->a,*baj,*ca=c->a,*temp; 435d50806bdSBarry Smith 436d50806bdSBarry Smith PetscFunctionBegin; 437d50806bdSBarry Smith 438d50806bdSBarry Smith /* Allocate temp accumulation space to avoid searching for nonzero columns in C */ 439d50806bdSBarry Smith ierr = PetscMalloc((cn+1)*sizeof(MatScalar),&temp);CHKERRQ(ierr); 440d50806bdSBarry Smith ierr = PetscMemzero(temp,cn*sizeof(MatScalar));CHKERRQ(ierr); 441d50806bdSBarry Smith /* Traverse A row-wise. */ 442d50806bdSBarry Smith /* Build the ith row in C by summing over nonzero columns in A, */ 443d50806bdSBarry Smith /* the rows of B corresponding to nonzeros of A. */ 444d50806bdSBarry Smith for (i=0;i<am;i++) { 445d50806bdSBarry Smith anzi = ai[i+1] - ai[i]; 446d50806bdSBarry Smith for (j=0;j<anzi;j++) { 447d50806bdSBarry Smith brow = *aj++; 448d50806bdSBarry Smith bnzi = bi[brow+1] - bi[brow]; 449d50806bdSBarry Smith bjj = bj + bi[brow]; 450d50806bdSBarry Smith baj = ba + bi[brow]; 451d50806bdSBarry Smith for (k=0;k<bnzi;k++) { 452d50806bdSBarry Smith temp[bjj[k]] += (*aa)*baj[k]; 453d50806bdSBarry Smith } 454d50806bdSBarry Smith flops += 2*bnzi; 455d50806bdSBarry Smith aa++; 456d50806bdSBarry Smith } 457d50806bdSBarry Smith /* Store row back into C, and re-zero temp */ 458d50806bdSBarry Smith cnzi = ci[i+1] - ci[i]; 459d50806bdSBarry Smith for (j=0;j<cnzi;j++) { 460d50806bdSBarry Smith ca[j] = temp[cj[j]]; 461d50806bdSBarry Smith temp[cj[j]] = 0.0; 462d50806bdSBarry Smith } 463d50806bdSBarry Smith ca += cnzi; 464d50806bdSBarry Smith cj += cnzi; 465d50806bdSBarry Smith } 466716bacf3SKris Buschelman ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 467716bacf3SKris Buschelman ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 468716bacf3SKris Buschelman 469d50806bdSBarry Smith /* Free temp */ 470d50806bdSBarry Smith ierr = PetscFree(temp);CHKERRQ(ierr); 471d50806bdSBarry Smith ierr = PetscLogFlops(flops);CHKERRQ(ierr); 472d50806bdSBarry Smith PetscFunctionReturn(0); 473d50806bdSBarry Smith } 474bc011b1eSHong Zhang 475bc011b1eSHong Zhang #undef __FUNCT__ 476bc011b1eSHong Zhang #define __FUNCT__ "MatMatMultTranspose" 477bc011b1eSHong Zhang /*@ 478bc011b1eSHong Zhang MatMatMultTranspose - Performs Matrix-Matrix Multiplication C=A^T*B. 479bc011b1eSHong Zhang 480bc011b1eSHong Zhang Collective on Mat 481bc011b1eSHong Zhang 482bc011b1eSHong Zhang Input Parameters: 483bc011b1eSHong Zhang + A - the left matrix 484bc011b1eSHong Zhang . B - the right matrix 485bc011b1eSHong Zhang . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 486bc011b1eSHong Zhang - fill - expected fill as ratio of nnz(C)/(nnz(A) + nnz(B)) 487bc011b1eSHong Zhang 488bc011b1eSHong Zhang Output Parameters: 489bc011b1eSHong Zhang . C - the product matrix 490bc011b1eSHong Zhang 491bc011b1eSHong Zhang Notes: 492bc011b1eSHong Zhang C will be created and must be destroyed by the user with MatDestroy(). 493bc011b1eSHong Zhang 494bc011b1eSHong Zhang This routine is currently only implemented for pairs of SeqAIJ matrices and classes 495bc011b1eSHong Zhang which inherit from SeqAIJ. C will be of type MATSEQAIJ. 496bc011b1eSHong Zhang 497bc011b1eSHong Zhang Level: intermediate 498bc011b1eSHong Zhang 499bc011b1eSHong Zhang .seealso: MatMatMultTransposeSymbolic(),MatMatMultTransposeNumeric() 500bc011b1eSHong Zhang @*/ 501bc011b1eSHong Zhang PetscErrorCode MatMatMultTranspose(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C) 502bc011b1eSHong Zhang { 503bc011b1eSHong Zhang PetscErrorCode ierr; 504bc011b1eSHong Zhang PetscErrorCode (*fA)(Mat,Mat,MatReuse,PetscReal,Mat*); 505bc011b1eSHong Zhang PetscErrorCode (*fB)(Mat,Mat,MatReuse,PetscReal,Mat*); 506bc011b1eSHong Zhang 507bc011b1eSHong Zhang PetscFunctionBegin; 508bc011b1eSHong Zhang PetscValidHeaderSpecific(A,MAT_COOKIE,1); 509bc011b1eSHong Zhang PetscValidType(A,1); 510bc011b1eSHong Zhang MatPreallocated(A); 511bc011b1eSHong Zhang if (!A->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix"); 512bc011b1eSHong Zhang if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 513bc011b1eSHong Zhang PetscValidHeaderSpecific(B,MAT_COOKIE,2); 514bc011b1eSHong Zhang PetscValidType(B,2); 515bc011b1eSHong Zhang MatPreallocated(B); 516bc011b1eSHong Zhang if (!B->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix"); 517bc011b1eSHong Zhang if (B->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 518bc011b1eSHong Zhang PetscValidPointer(C,3); 519bc011b1eSHong Zhang if (B->M!=A->M) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",B->M,A->M); 520bc011b1eSHong Zhang 521bc011b1eSHong Zhang if (fill <=0.0) SETERRQ1(PETSC_ERR_ARG_SIZ,"fill=%g must be > 0.0",fill); 522bc011b1eSHong Zhang 523bc011b1eSHong Zhang fA = A->ops->matmulttranspose; 524bc011b1eSHong Zhang if (!fA) SETERRQ1(PETSC_ERR_SUP,"MatMatMultTranspose not supported for A of type %s",A->type_name); 525bc011b1eSHong Zhang fB = B->ops->matmulttranspose; 526bc011b1eSHong Zhang if (!fB) SETERRQ1(PETSC_ERR_SUP,"MatMatMultTranspose not supported for B of type %s",B->type_name); 527bc011b1eSHong Zhang if (fB!=fA) SETERRQ2(PETSC_ERR_ARG_INCOMP,"MatMatMultTranspose requires A, %s, to be compatible with B, %s",A->type_name,B->type_name); 528bc011b1eSHong Zhang 529bc011b1eSHong Zhang ierr = PetscLogEventBegin(MAT_MatMultTranspose,A,B,0,0);CHKERRQ(ierr); 530bc011b1eSHong Zhang ierr = (*A->ops->matmulttranspose)(A,B,scall,fill,C);CHKERRQ(ierr); 531bc011b1eSHong Zhang ierr = PetscLogEventEnd(MAT_MatMultTranspose,A,B,0,0);CHKERRQ(ierr); 532bc011b1eSHong Zhang 533bc011b1eSHong Zhang PetscFunctionReturn(0); 534bc011b1eSHong Zhang } 535bc011b1eSHong Zhang 536bc011b1eSHong Zhang #undef __FUNCT__ 537bc011b1eSHong Zhang #define __FUNCT__ "MatMatMultTranspose_SeqAIJ_SeqAIJ" 538bc011b1eSHong Zhang PetscErrorCode MatMatMultTranspose_SeqAIJ_SeqAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C) { 539bc011b1eSHong Zhang PetscErrorCode ierr; 540bc011b1eSHong Zhang 541bc011b1eSHong Zhang PetscFunctionBegin; 542bc011b1eSHong Zhang if (scall == MAT_INITIAL_MATRIX){ 543bc011b1eSHong Zhang ierr = MatMatMultTransposeSymbolic_SeqAIJ_SeqAIJ(A,B,fill,C);CHKERRQ(ierr); 544bc011b1eSHong Zhang } 545bc011b1eSHong Zhang ierr = MatMatMultTransposeNumeric_SeqAIJ_SeqAIJ(A,B,*C);CHKERRQ(ierr); 546bc011b1eSHong Zhang PetscFunctionReturn(0); 547bc011b1eSHong Zhang } 548bc011b1eSHong Zhang 549bc011b1eSHong Zhang #undef __FUNCT__ 550bc011b1eSHong Zhang #define __FUNCT__ "MatMatMultTransposeSymbolic_SeqAIJ_SeqAIJ" 551bc011b1eSHong Zhang PetscErrorCode MatMatMultTransposeSymbolic_SeqAIJ_SeqAIJ(Mat A,Mat B,PetscReal fill,Mat *C) 552bc011b1eSHong Zhang { 553bc011b1eSHong Zhang PetscErrorCode ierr; 554bc011b1eSHong Zhang Mat At; 555bc011b1eSHong Zhang int *ati,*atj; 556bc011b1eSHong Zhang 557bc011b1eSHong Zhang PetscFunctionBegin; 558bc011b1eSHong Zhang /* create symbolic At */ 559bc011b1eSHong Zhang ierr = MatGetSymbolicTranspose_SeqAIJ(A,&ati,&atj);CHKERRQ(ierr); 560bc011b1eSHong Zhang ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,A->n,A->m,ati,atj,PETSC_NULL,&At);CHKERRQ(ierr); 561bc011b1eSHong Zhang 562bc011b1eSHong Zhang /* get symbolic C=At*B */ 563bc011b1eSHong Zhang ierr = MatMatMultSymbolic_SeqAIJ_SeqAIJ(At,B,fill,C);CHKERRQ(ierr); 564bc011b1eSHong Zhang 565bc011b1eSHong Zhang /* clean up */ 566bc011b1eSHong Zhang ierr = MatDestroy(At);CHKERRQ(ierr); 567bc011b1eSHong Zhang ierr = MatRestoreSymbolicTranspose_SeqAIJ(A,&ati,&atj);CHKERRQ(ierr); 568bc011b1eSHong Zhang 569bc011b1eSHong Zhang PetscFunctionReturn(0); 570bc011b1eSHong Zhang } 571bc011b1eSHong Zhang 572bc011b1eSHong Zhang #undef __FUNCT__ 573bc011b1eSHong Zhang #define __FUNCT__ "MatMatMultTransposeNumeric_SeqAIJ_SeqAIJ" 574bc011b1eSHong Zhang PetscErrorCode MatMatMultTransposeNumeric_SeqAIJ_SeqAIJ(Mat A,Mat B,Mat C) 575bc011b1eSHong Zhang { 576bc011b1eSHong Zhang PetscErrorCode ierr; 5770fbc74f4SHong Zhang Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data,*c=(Mat_SeqAIJ*)C->data; 578170ef064SHong Zhang int am=A->m,anzi,*ai=a->i,*aj=a->j,*bi=b->i,*bj,bnzi,nextb; 5790fbc74f4SHong Zhang int cm=C->m,*ci=c->i,*cj=c->j,crow,*cjj,i,j,k,flops=0; 5800fbc74f4SHong Zhang MatScalar *aa=a->a,*ba,*ca=c->a,*caj; 581bc011b1eSHong Zhang 582bc011b1eSHong Zhang PetscFunctionBegin; 583bc011b1eSHong Zhang /* clear old values in C */ 584bc011b1eSHong Zhang ierr = PetscMemzero(ca,ci[cm]*sizeof(MatScalar));CHKERRQ(ierr); 585bc011b1eSHong Zhang 586bc011b1eSHong Zhang /* compute A^T*B using outer product (A^T)[:,i]*B[i,:] */ 587bc011b1eSHong Zhang for (i=0;i<am;i++) { 588bc011b1eSHong Zhang bj = b->j + bi[i]; 589bc011b1eSHong Zhang ba = b->a + bi[i]; 590bc011b1eSHong Zhang bnzi = bi[i+1] - bi[i]; 591bc011b1eSHong Zhang anzi = ai[i+1] - ai[i]; 592bc011b1eSHong Zhang for (j=0; j<anzi; j++) { 593bc011b1eSHong Zhang nextb = 0; 5940fbc74f4SHong Zhang crow = *aj++; 595bc011b1eSHong Zhang cjj = cj + ci[crow]; 596bc011b1eSHong Zhang caj = ca + ci[crow]; 597bc011b1eSHong Zhang /* perform sparse axpy operation. Note cjj includes bj. */ 598bc011b1eSHong Zhang for (k=0; nextb<bnzi; k++) { 5990fbc74f4SHong Zhang if (cjj[k] == *(bj+nextb)) { /* ccol == bcol */ 6000fbc74f4SHong Zhang caj[k] += (*aa)*(*(ba+nextb)); 601bc011b1eSHong Zhang nextb++; 602bc011b1eSHong Zhang } 603bc011b1eSHong Zhang } 604bc011b1eSHong Zhang flops += 2*bnzi; 6050fbc74f4SHong Zhang aa++; 606bc011b1eSHong Zhang } 607bc011b1eSHong Zhang } 608bc011b1eSHong Zhang 609bc011b1eSHong Zhang /* Assemble the final matrix and clean up */ 610bc011b1eSHong Zhang ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 611bc011b1eSHong Zhang ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 612bc011b1eSHong Zhang ierr = PetscLogFlops(flops);CHKERRQ(ierr); 613bc011b1eSHong Zhang PetscFunctionReturn(0); 614bc011b1eSHong Zhang } 615