1d50806bdSBarry Smith /* 2a50f8bf6SHong Zhang Defines matrix-matrix product routines for pairs of AIJ matrices 3d50806bdSBarry Smith C = A * B 4d50806bdSBarry Smith */ 5d50806bdSBarry Smith 6c1f4806aSKris Buschelman #include "src/mat/impls/aij/seq/aij.h" /*I "petscmat.h" I*/ 770f19b1fSKris Buschelman #include "src/mat/utils/freespace.h" 82d09714cSHong Zhang #include "src/mat/impls/aij/mpi/mpiaij.h" 9d50806bdSBarry Smith 10c1f4806aSKris Buschelman #undef __FUNCT__ 11c1f4806aSKris Buschelman #define __FUNCT__ "MatMatMult" 125c66b693SKris Buschelman /*@ 135c66b693SKris Buschelman MatMatMult - Performs Matrix-Matrix Multiplication C=A*B. 1494e3eecaSKris Buschelman 155c66b693SKris Buschelman Collective on Mat 16d50806bdSBarry Smith 175c66b693SKris Buschelman Input Parameters: 185c66b693SKris Buschelman + A - the left matrix 191c741599SHong Zhang . B - the right matrix 201c741599SHong Zhang . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 21c5db241fSHong Zhang - fill - expected fill as ratio of nnz(C)/(nnz(A) + nnz(B)) 225c66b693SKris Buschelman 235c66b693SKris Buschelman Output Parameters: 245c66b693SKris Buschelman . C - the product matrix 255c66b693SKris Buschelman 265c66b693SKris Buschelman Notes: 275c66b693SKris Buschelman C will be created and must be destroyed by the user with MatDestroy(). 285c66b693SKris Buschelman 29*bc011b1eSHong Zhang This routine is currently only implemented for pairs of AIJ matrices and classes 30*bc011b1eSHong Zhang which inherit from AIJ. C will be of type MATAIJ. 315c66b693SKris Buschelman 325c66b693SKris Buschelman Level: intermediate 335c66b693SKris Buschelman 345c66b693SKris Buschelman .seealso: MatMatMultSymbolic(),MatMatMultNumeric() 355c66b693SKris Buschelman @*/ 36dfbe8321SBarry Smith PetscErrorCode MatMatMult(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C) 372d09714cSHong Zhang { 38dfbe8321SBarry Smith PetscErrorCode ierr; 39cb00f407SKris Buschelman PetscErrorCode (*fA)(Mat,Mat,MatReuse,PetscReal,Mat*); 40cb00f407SKris Buschelman PetscErrorCode (*fB)(Mat,Mat,MatReuse,PetscReal,Mat*); 412d09714cSHong Zhang 42d50806bdSBarry Smith PetscFunctionBegin; 434482741eSBarry Smith PetscValidHeaderSpecific(A,MAT_COOKIE,1); 44c9780b6fSBarry Smith PetscValidType(A,1); 455c66b693SKris Buschelman MatPreallocated(A); 465c66b693SKris Buschelman if (!A->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix"); 475c66b693SKris Buschelman if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 484482741eSBarry Smith PetscValidHeaderSpecific(B,MAT_COOKIE,2); 49c9780b6fSBarry Smith PetscValidType(B,2); 505c66b693SKris Buschelman MatPreallocated(B); 515c66b693SKris Buschelman if (!B->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix"); 525c66b693SKris Buschelman if (B->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 534482741eSBarry Smith PetscValidPointer(C,3); 545c66b693SKris Buschelman if (B->M!=A->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",B->M,A->N); 55d50806bdSBarry Smith 561c24bd37SHong Zhang if (fill <=0.0) SETERRQ1(PETSC_ERR_ARG_SIZ,"fill=%g must be > 0.0",fill); 57c5db241fSHong Zhang 58cb00f407SKris Buschelman /* For now, we do not dispatch based on the type of A and B */ 59cb00f407SKris Buschelman /* When implementations like _SeqAIJ_MAIJ exist, attack the multiple dispatch problem. */ 60cb00f407SKris Buschelman fA = A->ops->matmult; 61cb00f407SKris Buschelman if (!fA) SETERRQ1(PETSC_ERR_SUP,"MatMatMult not supported for A of type %s",A->type_name); 62cb00f407SKris Buschelman fB = B->ops->matmult; 63cb00f407SKris Buschelman if (!fB) SETERRQ1(PETSC_ERR_SUP,"MatMatMult not supported for B of type %s",B->type_name); 64cb00f407SKris Buschelman if (fB!=fA) SETERRQ2(PETSC_ERR_ARG_INCOMP,"MatMatMult requires A, %s, to be compatible with B, %s",A->type_name,B->type_name); 65cb00f407SKris Buschelman 666284ec50SHong Zhang ierr = PetscLogEventBegin(MAT_MatMult,A,B,0,0);CHKERRQ(ierr); 671c741599SHong Zhang ierr = (*A->ops->matmult)(A,B,scall,fill,C);CHKERRQ(ierr); 686284ec50SHong Zhang ierr = PetscLogEventEnd(MAT_MatMult,A,B,0,0);CHKERRQ(ierr); 694d3841fdSKris Buschelman 706284ec50SHong Zhang PetscFunctionReturn(0); 716284ec50SHong Zhang } 726284ec50SHong Zhang 736284ec50SHong Zhang #undef __FUNCT__ 746284ec50SHong Zhang #define __FUNCT__ "MatMatMult_MPIAIJ_MPIAIJ" 75dfbe8321SBarry Smith PetscErrorCode MatMatMult_MPIAIJ_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill, Mat *C) 762d09714cSHong Zhang { 77dfbe8321SBarry Smith PetscErrorCode ierr; 786284ec50SHong Zhang 796284ec50SHong Zhang PetscFunctionBegin; 8026be0446SHong Zhang if (scall == MAT_INITIAL_MATRIX){ 81d6bb3c2dSHong Zhang ierr = MatMatMultSymbolic_MPIAIJ_MPIAIJ(A,B,fill,C);CHKERRQ(ierr);/* numeric product is computed as well */ 82d6bb3c2dSHong Zhang } else if (scall == MAT_REUSE_MATRIX){ 8326be0446SHong Zhang ierr = MatMatMultNumeric_MPIAIJ_MPIAIJ(A,B,*C);CHKERRQ(ierr); 84d6bb3c2dSHong Zhang } else { 85d6bb3c2dSHong Zhang SETERRQ1(PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",scall); 86d6bb3c2dSHong Zhang } 87d50806bdSBarry Smith PetscFunctionReturn(0); 88d50806bdSBarry Smith } 895c66b693SKris Buschelman 905c66b693SKris Buschelman #undef __FUNCT__ 915c66b693SKris Buschelman #define __FUNCT__ "MatMatMult_SeqAIJ_SeqAIJ" 92dfbe8321SBarry Smith PetscErrorCode MatMatMult_SeqAIJ_SeqAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C) { 93dfbe8321SBarry Smith PetscErrorCode ierr; 945c66b693SKris Buschelman 955c66b693SKris Buschelman PetscFunctionBegin; 9626be0446SHong Zhang if (scall == MAT_INITIAL_MATRIX){ 9726be0446SHong Zhang ierr = MatMatMultSymbolic_SeqAIJ_SeqAIJ(A,B,fill,C);CHKERRQ(ierr); 9826be0446SHong Zhang } 9926be0446SHong Zhang ierr = MatMatMultNumeric_SeqAIJ_SeqAIJ(A,B,*C);CHKERRQ(ierr); 1005c66b693SKris Buschelman PetscFunctionReturn(0); 1015c66b693SKris Buschelman } 1025c66b693SKris Buschelman 103c1f4806aSKris Buschelman #undef __FUNCT__ 104c1f4806aSKris Buschelman #define __FUNCT__ "MatMatMultSymbolic" 1055c66b693SKris Buschelman /*@ 1065c66b693SKris Buschelman MatMatMultSymbolic - Performs construction, preallocation, and computes the ij structure 1075c66b693SKris Buschelman of the matrix-matrix product C=A*B. Call this routine before calling MatMatMultNumeric(). 1085c66b693SKris Buschelman 1095c66b693SKris Buschelman Collective on Mat 1105c66b693SKris Buschelman 1115c66b693SKris Buschelman Input Parameters: 1125c66b693SKris Buschelman + A - the left matrix 113c5db241fSHong Zhang . B - the right matrix 114c5db241fSHong Zhang - fill - expected fill as ratio of nnz(C)/(nnz(A) + nnz(B)) 1155c66b693SKris Buschelman 1165c66b693SKris Buschelman Output Parameters: 1175c66b693SKris Buschelman . C - the matrix containing the ij structure of product matrix 1185c66b693SKris Buschelman 1195c66b693SKris Buschelman Notes: 1204d3841fdSKris Buschelman C will be created as a MATSEQAIJ matrix and must be destroyed by the user with MatDestroy(). 1215c66b693SKris Buschelman 1224d3841fdSKris Buschelman This routine is currently only implemented for SeqAIJ matrices and classes which inherit from SeqAIJ. 1235c66b693SKris Buschelman 1245c66b693SKris Buschelman Level: intermediate 1255c66b693SKris Buschelman 1265c66b693SKris Buschelman .seealso: MatMatMult(),MatMatMultNumeric() 1275c66b693SKris Buschelman @*/ 128dfbe8321SBarry Smith PetscErrorCode MatMatMultSymbolic(Mat A,Mat B,PetscReal fill,Mat *C) { 129dfbe8321SBarry Smith PetscErrorCode ierr; 130cb00f407SKris Buschelman PetscErrorCode (*Asymbolic)(Mat,Mat,PetscReal,Mat *); 131cb00f407SKris Buschelman PetscErrorCode (*Bsymbolic)(Mat,Mat,PetscReal,Mat *); 1325c66b693SKris Buschelman 1335c66b693SKris Buschelman PetscFunctionBegin; 1344482741eSBarry Smith PetscValidHeaderSpecific(A,MAT_COOKIE,1); 135c9780b6fSBarry Smith PetscValidType(A,1); 1365c66b693SKris Buschelman MatPreallocated(A); 1375c66b693SKris Buschelman if (!A->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix"); 1385c66b693SKris Buschelman if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 1395c66b693SKris Buschelman 1404482741eSBarry Smith PetscValidHeaderSpecific(B,MAT_COOKIE,2); 141c9780b6fSBarry Smith PetscValidType(B,2); 1425c66b693SKris Buschelman MatPreallocated(B); 1435c66b693SKris Buschelman if (!B->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix"); 1445c66b693SKris Buschelman if (B->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 1454482741eSBarry Smith PetscValidPointer(C,3); 1464482741eSBarry Smith 1475c66b693SKris Buschelman if (B->M!=A->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",B->M,A->N); 1481c24bd37SHong Zhang if (fill <=0.0) SETERRQ1(PETSC_ERR_ARG_SIZ,"fill=%g must be > 0.0",fill); 1495c66b693SKris Buschelman 150cb00f407SKris Buschelman /* For now, we do not dispatch based on the type of A and P */ 151cb00f407SKris Buschelman /* When implementations like _SeqAIJ_MAIJ exist, attack the multiple dispatch problem. */ 152cb00f407SKris Buschelman Asymbolic = A->ops->matmultsymbolic; 153cb00f407SKris Buschelman if (!Asymbolic) SETERRQ1(PETSC_ERR_SUP,"C=A*B not implemented for A of type %s",A->type_name); 154cb00f407SKris Buschelman Bsymbolic = B->ops->matmultsymbolic; 155cb00f407SKris Buschelman if (!Bsymbolic) SETERRQ1(PETSC_ERR_SUP,"C=A*B not implemented for B of type %s",B->type_name); 156cb00f407SKris Buschelman if (Bsymbolic!=Asymbolic) SETERRQ2(PETSC_ERR_ARG_INCOMP,"MatMatMultSymbolic requires A, %s, to be compatible with B, %s",A->type_name,B->type_name); 157cb00f407SKris Buschelman 158cb00f407SKris Buschelman ierr = PetscLogEventBegin(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr); 159cb00f407SKris Buschelman ierr = (*Asymbolic)(A,B,fill,C);CHKERRQ(ierr); 160cb00f407SKris Buschelman ierr = PetscLogEventEnd(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr); 1615c66b693SKris Buschelman 1625c66b693SKris Buschelman PetscFunctionReturn(0); 1635c66b693SKris Buschelman } 1641c24bd37SHong Zhang 165dfbe8321SBarry Smith EXTERN PetscErrorCode MatDestroy_MPIAIJ(Mat); 16626be0446SHong Zhang #undef __FUNCT__ 16726be0446SHong Zhang #define __FUNCT__ "MatDestroy_MPIAIJ_MatMatMult" 168dfbe8321SBarry Smith PetscErrorCode MatDestroy_MPIAIJ_MatMatMult(Mat A) 16926be0446SHong Zhang { 170dfbe8321SBarry Smith PetscErrorCode ierr; 17126be0446SHong Zhang Mat_MatMatMultMPI *mult=(Mat_MatMatMultMPI*)A->spptr; 17226be0446SHong Zhang 17326be0446SHong Zhang PetscFunctionBegin; 174d6bb3c2dSHong Zhang ierr = ISDestroy(mult->isrowb);CHKERRQ(ierr); 175d6bb3c2dSHong Zhang ierr = ISDestroy(mult->iscolb);CHKERRQ(ierr); 176d6bb3c2dSHong Zhang ierr = ISDestroy(mult->isrowa);CHKERRQ(ierr); 177d6bb3c2dSHong Zhang ierr = MatDestroyMatrices(1,&mult->aseq);CHKERRQ(ierr); 178d6bb3c2dSHong Zhang ierr = MatDestroyMatrices(1,&mult->bseq);CHKERRQ(ierr); 179d6bb3c2dSHong Zhang ierr = MatDestroy(mult->C_seq);CHKERRQ(ierr); 18026be0446SHong Zhang ierr = PetscFree(mult);CHKERRQ(ierr); 181d6bb3c2dSHong Zhang 18226be0446SHong Zhang ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr); 18326be0446SHong Zhang 18426be0446SHong Zhang PetscFunctionReturn(0); 18526be0446SHong Zhang } 18658c24d83SHong Zhang 18758c24d83SHong Zhang #undef __FUNCT__ 18826be0446SHong Zhang #define __FUNCT__ "MatMatMultSymbolic_MPIAIJ_MPIAIJ" 189dfbe8321SBarry Smith PetscErrorCode MatMatMultSymbolic_MPIAIJ_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat *C) 19026be0446SHong Zhang { 191ff134f7aSHong Zhang Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data,*b=(Mat_MPIAIJ*)B->data; 192dfbe8321SBarry Smith PetscErrorCode ierr; 193ff134f7aSHong Zhang int *idx,i,start,end,ncols,nzA,nzB,*cmap; 19426be0446SHong Zhang Mat_MatMatMultMPI *mult; 19526be0446SHong Zhang 19626be0446SHong Zhang PetscFunctionBegin; 197ff134f7aSHong Zhang if (a->cstart != b->rstart || a->cend != b->rend){ 198ff134f7aSHong Zhang SETERRQ4(PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",a->cstart,a->cend,b->rstart,b->rend); 199ff134f7aSHong Zhang } 200d6bb3c2dSHong Zhang ierr = PetscNew(Mat_MatMatMultMPI,&mult);CHKERRQ(ierr); 201d6bb3c2dSHong Zhang 20226be0446SHong Zhang /* create a seq matrix B_seq = submatrix of B by taking rows of B that equal to nonzero col of A */ 20326be0446SHong Zhang start = a->cstart; 20426be0446SHong Zhang cmap = a->garray; 20526be0446SHong Zhang nzA = a->A->n; 20626be0446SHong Zhang nzB = a->B->n; 20726be0446SHong Zhang ierr = PetscMalloc((nzA+nzB)*sizeof(int), &idx);CHKERRQ(ierr); 20826be0446SHong Zhang ncols = 0; 20926be0446SHong Zhang for (i=0; i<nzB; i++) { 21026be0446SHong Zhang if (cmap[i] < start) idx[ncols++] = cmap[i]; 21126be0446SHong Zhang else break; 21226be0446SHong Zhang } 213ff134f7aSHong Zhang mult->brstart = i; 21426be0446SHong Zhang for (i=0; i<nzA; i++) idx[ncols++] = start + i; 215ff134f7aSHong Zhang for (i=mult->brstart; i<nzB; i++) idx[ncols++] = cmap[i]; 216d6bb3c2dSHong Zhang ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,&mult->isrowb);CHKERRQ(ierr); 21726be0446SHong Zhang ierr = PetscFree(idx);CHKERRQ(ierr); 218d6bb3c2dSHong Zhang ierr = ISCreateStride(PETSC_COMM_SELF,B->N,0,1,&mult->iscolb);CHKERRQ(ierr); 219ff134f7aSHong Zhang ierr = MatGetSubMatrices(B,1,&mult->isrowb,&mult->iscolb,MAT_INITIAL_MATRIX,&mult->bseq);CHKERRQ(ierr); 22026be0446SHong Zhang 22126be0446SHong Zhang /* create a seq matrix A_seq = submatrix of A by taking all local rows of A */ 22226be0446SHong Zhang start = a->rstart; end = a->rend; 223d6bb3c2dSHong Zhang ierr = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&mult->isrowa);CHKERRQ(ierr); 224d6bb3c2dSHong Zhang ierr = MatGetSubMatrices(A,1,&mult->isrowa,&mult->isrowb,MAT_INITIAL_MATRIX,&mult->aseq);CHKERRQ(ierr); 22526be0446SHong Zhang 22626be0446SHong Zhang /* compute C_seq = A_seq * B_seq */ 227d6bb3c2dSHong Zhang ierr = MatMatMult_SeqAIJ_SeqAIJ(mult->aseq[0],mult->bseq[0],MAT_INITIAL_MATRIX,fill,&mult->C_seq);CHKERRQ(ierr); 22826be0446SHong Zhang 22926be0446SHong Zhang /* create mpi matrix C by concatinating C_seq */ 230d6bb3c2dSHong Zhang ierr = PetscObjectReference((PetscObject)mult->C_seq);CHKERRQ(ierr); /* prevent C_seq being destroyed by MatMerge() */ 231d6bb3c2dSHong Zhang ierr = MatMerge(A->comm,mult->C_seq,MAT_INITIAL_MATRIX,C);CHKERRQ(ierr); 23226be0446SHong Zhang 23326be0446SHong Zhang /* attach the supporting struct to C for reuse of symbolic C */ 23426be0446SHong Zhang (*C)->spptr = (void*)mult; 23526be0446SHong Zhang (*C)->ops->destroy = MatDestroy_MPIAIJ_MatMatMult; 23626be0446SHong Zhang 23726be0446SHong Zhang PetscFunctionReturn(0); 23826be0446SHong Zhang } 23926be0446SHong Zhang 24026be0446SHong Zhang #undef __FUNCT__ 24126be0446SHong Zhang #define __FUNCT__ "MatMatMultSymbolic_SeqAIJ_SeqAIJ" 242dfbe8321SBarry Smith PetscErrorCode MatMatMultSymbolic_SeqAIJ_SeqAIJ(Mat A,Mat B,PetscReal fill,Mat *C) 24358c24d83SHong Zhang { 244dfbe8321SBarry Smith PetscErrorCode ierr; 24558c24d83SHong Zhang FreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 24658c24d83SHong Zhang Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data,*c; 24758c24d83SHong Zhang int *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*bjj; 2481c24bd37SHong Zhang int *ci,*cj,*lnk; 2495c66b693SKris Buschelman int am=A->M,bn=B->N,bm=B->M; 2500e76890bSHong Zhang int i,j,anzi,brow,bnzj,cnzi,nlnk,nspacedouble=0; 25158c24d83SHong Zhang MatScalar *ca; 25258c24d83SHong Zhang 25358c24d83SHong Zhang PetscFunctionBegin; 25458c24d83SHong Zhang /* Set up */ 25558c24d83SHong Zhang /* Allocate ci array, arrays for fill computation and */ 25658c24d83SHong Zhang /* free space for accumulating nonzero column info */ 25758c24d83SHong Zhang ierr = PetscMalloc(((am+1)+1)*sizeof(int),&ci);CHKERRQ(ierr); 25858c24d83SHong Zhang ci[0] = 0; 25958c24d83SHong Zhang 2600e76890bSHong Zhang /* create and initialize a linked list for symbolic product */ 26158c24d83SHong Zhang ierr = PetscMalloc((bn+1)*sizeof(int),&lnk);CHKERRQ(ierr); 2620e76890bSHong Zhang ierr = PetscLLInitialize(bn,bn);CHKERRQ(ierr); 26358c24d83SHong Zhang 264c5db241fSHong Zhang /* Initial FreeSpace size is fill*(nnz(A)+nnz(B)) */ 265d6bb3c2dSHong Zhang ierr = GetMoreSpace((int)(fill*(ai[am]+bi[bm])),&free_space);CHKERRQ(ierr); 26658c24d83SHong Zhang current_space = free_space; 26758c24d83SHong Zhang 26858c24d83SHong Zhang /* Determine symbolic info for each row of the product: */ 26958c24d83SHong Zhang for (i=0;i<am;i++) { 27058c24d83SHong Zhang anzi = ai[i+1] - ai[i]; 27158c24d83SHong Zhang cnzi = 0; 2722d09714cSHong Zhang j = anzi; 2732d09714cSHong Zhang aj = a->j + ai[i]; 2742d09714cSHong Zhang while (j){/* assume cols are almost in increasing order, starting from its end saves computation */ 2752d09714cSHong Zhang j--; 2762d09714cSHong Zhang brow = *(aj + j); 27758c24d83SHong Zhang bnzj = bi[brow+1] - bi[brow]; 27858c24d83SHong Zhang bjj = bj + bi[brow]; 2791c239cc6SHong Zhang /* add non-zero cols of B into the sorted linked list lnk */ 2800e76890bSHong Zhang ierr = PetscLLAdd(bnzj,bjj,bn,nlnk,lnk);CHKERRQ(ierr); 2811c239cc6SHong Zhang cnzi += nlnk; 28258c24d83SHong Zhang } 28358c24d83SHong Zhang 28458c24d83SHong Zhang /* If free space is not available, make more free space */ 28558c24d83SHong Zhang /* Double the amount of total space in the list */ 28658c24d83SHong Zhang if (current_space->local_remaining<cnzi) { 28758c24d83SHong Zhang ierr = GetMoreSpace(current_space->total_array_size,¤t_space);CHKERRQ(ierr); 288c5db241fSHong Zhang nspacedouble++; 28958c24d83SHong Zhang } 29058c24d83SHong Zhang 291c5db241fSHong Zhang /* Copy data into free space, then initialize lnk */ 2920e76890bSHong Zhang ierr = PetscLLClean(bn,bn,cnzi,lnk,current_space->array);CHKERRQ(ierr); 293c5db241fSHong Zhang current_space->array += cnzi; 29458c24d83SHong Zhang current_space->local_used += cnzi; 29558c24d83SHong Zhang current_space->local_remaining -= cnzi; 29658c24d83SHong Zhang 29758c24d83SHong Zhang ci[i+1] = ci[i] + cnzi; 29858c24d83SHong Zhang } 29958c24d83SHong Zhang 30058c24d83SHong Zhang /* Column indices are in the list of free space */ 30158c24d83SHong Zhang /* Allocate space for cj, initialize cj, and */ 30258c24d83SHong Zhang /* destroy list of free space and other temporary array(s) */ 30358c24d83SHong Zhang ierr = PetscMalloc((ci[am]+1)*sizeof(int),&cj);CHKERRQ(ierr); 30458c24d83SHong Zhang ierr = MakeSpaceContiguous(&free_space,cj);CHKERRQ(ierr); 30558c24d83SHong Zhang ierr = PetscFree(lnk);CHKERRQ(ierr); 30658c24d83SHong Zhang 30758c24d83SHong Zhang /* Allocate space for ca */ 30858c24d83SHong Zhang ierr = PetscMalloc((ci[am]+1)*sizeof(MatScalar),&ca);CHKERRQ(ierr); 30958c24d83SHong Zhang ierr = PetscMemzero(ca,(ci[am]+1)*sizeof(MatScalar));CHKERRQ(ierr); 31058c24d83SHong Zhang 31126be0446SHong Zhang /* put together the new symbolic matrix */ 31258c24d83SHong Zhang ierr = MatCreateSeqAIJWithArrays(A->comm,am,bn,ci,cj,ca,C);CHKERRQ(ierr); 31358c24d83SHong Zhang 31458c24d83SHong Zhang /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 31558c24d83SHong Zhang /* These are PETSc arrays, so change flags so arrays can be deleted by PETSc */ 31658c24d83SHong Zhang c = (Mat_SeqAIJ *)((*C)->data); 31758c24d83SHong Zhang c->freedata = PETSC_TRUE; 31858c24d83SHong Zhang c->nonew = 0; 31958c24d83SHong Zhang 320c5db241fSHong Zhang PetscLogInfo((PetscObject)(*C),"Number of calls to GetMoreSpace(): %d\n",nspacedouble); 32158c24d83SHong Zhang PetscFunctionReturn(0); 32258c24d83SHong Zhang } 323d50806bdSBarry Smith 324c1f4806aSKris Buschelman #undef __FUNCT__ 325c1f4806aSKris Buschelman #define __FUNCT__ "MatMatMultNumeric" 3265c66b693SKris Buschelman /*@ 3275c66b693SKris Buschelman MatMatMultNumeric - Performs the numeric matrix-matrix product. 3285c66b693SKris Buschelman Call this routine after first calling MatMatMultSymbolic(). 3295c66b693SKris Buschelman 3305c66b693SKris Buschelman Collective on Mat 3315c66b693SKris Buschelman 3325c66b693SKris Buschelman Input Parameters: 3335c66b693SKris Buschelman + A - the left matrix 3345c66b693SKris Buschelman - B - the right matrix 3355c66b693SKris Buschelman 3365c66b693SKris Buschelman Output Parameters: 3375c66b693SKris Buschelman . C - the product matrix, whose ij structure was defined from MatMatMultSymbolic(). 3385c66b693SKris Buschelman 3395c66b693SKris Buschelman Notes: 3405c66b693SKris Buschelman C must have been created with MatMatMultSymbolic. 3415c66b693SKris Buschelman 3425c66b693SKris Buschelman This routine is currently only implemented for SeqAIJ type matrices. 3435c66b693SKris Buschelman 3445c66b693SKris Buschelman Level: intermediate 3455c66b693SKris Buschelman 3465c66b693SKris Buschelman .seealso: MatMatMult(),MatMatMultSymbolic() 3475c66b693SKris Buschelman @*/ 348dfbe8321SBarry Smith PetscErrorCode MatMatMultNumeric(Mat A,Mat B,Mat C){ 349dfbe8321SBarry Smith PetscErrorCode ierr; 350cb00f407SKris Buschelman PetscErrorCode (*Anumeric)(Mat,Mat,Mat); 351cb00f407SKris Buschelman PetscErrorCode (*Bnumeric)(Mat,Mat,Mat); 3525c66b693SKris Buschelman 3535c66b693SKris Buschelman PetscFunctionBegin; 3545c66b693SKris Buschelman 3554482741eSBarry Smith PetscValidHeaderSpecific(A,MAT_COOKIE,1); 356c9780b6fSBarry Smith PetscValidType(A,1); 3575c66b693SKris Buschelman MatPreallocated(A); 3585c66b693SKris Buschelman if (!A->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix"); 3595c66b693SKris Buschelman if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 3605c66b693SKris Buschelman 3614482741eSBarry Smith PetscValidHeaderSpecific(B,MAT_COOKIE,2); 362c9780b6fSBarry Smith PetscValidType(B,2); 3635c66b693SKris Buschelman MatPreallocated(B); 3645c66b693SKris Buschelman if (!B->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix"); 3655c66b693SKris Buschelman if (B->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 3665c66b693SKris Buschelman 3674482741eSBarry Smith PetscValidHeaderSpecific(C,MAT_COOKIE,3); 368c9780b6fSBarry Smith PetscValidType(C,3); 3695c66b693SKris Buschelman MatPreallocated(C); 3705c66b693SKris Buschelman if (!C->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix"); 3715c66b693SKris Buschelman if (C->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 3725c66b693SKris Buschelman 3735c66b693SKris Buschelman if (B->N!=C->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",B->N,C->N); 3745c66b693SKris Buschelman if (B->M!=A->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",B->M,A->N); 3755c66b693SKris Buschelman if (A->M!=C->M) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",A->M,C->M); 3765c66b693SKris Buschelman 377cb00f407SKris Buschelman /* For now, we do not dispatch based on the type of A and B */ 378cb00f407SKris Buschelman /* When implementations like _SeqAIJ_MAIJ exist, attack the multiple dispatch problem. */ 379cb00f407SKris Buschelman Anumeric = A->ops->matmultnumeric; 380cb00f407SKris Buschelman if (!Anumeric) SETERRQ1(PETSC_ERR_SUP,"MatMatMultNumeric not supported for A of type %s",A->type_name); 381cb00f407SKris Buschelman Bnumeric = B->ops->matmultnumeric; 382cb00f407SKris Buschelman if (!Bnumeric) SETERRQ1(PETSC_ERR_SUP,"MatMatMultNumeric not supported for B of type %s",B->type_name); 383cb00f407SKris Buschelman if (Bnumeric!=Anumeric) SETERRQ2(PETSC_ERR_ARG_INCOMP,"MatMatMultNumeric requires A, %s, to be compatible with B, %s",A->type_name,B->type_name); 3844d3841fdSKris Buschelman 385cb00f407SKris Buschelman ierr = PetscLogEventBegin(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr); 386cb00f407SKris Buschelman ierr = (*Anumeric)(A,B,C);CHKERRQ(ierr); 387cb00f407SKris Buschelman ierr = PetscLogEventEnd(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr); 3885c66b693SKris Buschelman 3895c66b693SKris Buschelman PetscFunctionReturn(0); 3905c66b693SKris Buschelman } 3915c66b693SKris Buschelman 392d6bb3c2dSHong Zhang /* This routine is called ONLY in the case of reusing previously computed symbolic C */ 393d50806bdSBarry Smith #undef __FUNCT__ 39426be0446SHong Zhang #define __FUNCT__ "MatMatMultNumeric_MPIAIJ_MPIAIJ" 395dfbe8321SBarry Smith PetscErrorCode MatMatMultNumeric_MPIAIJ_MPIAIJ(Mat A,Mat B,Mat C) 39626be0446SHong Zhang { 397dfbe8321SBarry Smith PetscErrorCode ierr; 398d6bb3c2dSHong Zhang Mat_MatMatMultMPI *mult=(Mat_MatMatMultMPI*)C->spptr; 399d6bb3c2dSHong Zhang 40026be0446SHong Zhang PetscFunctionBegin; 401d6bb3c2dSHong Zhang ierr = MatGetSubMatrices(B,1,&mult->isrowb,&mult->iscolb,MAT_REUSE_MATRIX,&mult->bseq);CHKERRQ(ierr) 402d6bb3c2dSHong Zhang ierr = MatGetSubMatrices(A,1,&mult->isrowa,&mult->isrowb,MAT_REUSE_MATRIX,&mult->aseq);CHKERRQ(ierr); 403d6bb3c2dSHong Zhang ierr = MatMatMult_SeqAIJ_SeqAIJ(mult->aseq[0],mult->bseq[0],MAT_REUSE_MATRIX,0.0,&mult->C_seq);CHKERRQ(ierr); 404d6bb3c2dSHong Zhang 405d6bb3c2dSHong Zhang ierr = PetscObjectReference((PetscObject)mult->C_seq);CHKERRQ(ierr); 406d6bb3c2dSHong Zhang ierr = MatMerge(A->comm,mult->C_seq,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr); 407d6bb3c2dSHong Zhang 40826be0446SHong Zhang PetscFunctionReturn(0); 40926be0446SHong Zhang } 41026be0446SHong Zhang 41126be0446SHong Zhang #undef __FUNCT__ 41226be0446SHong Zhang #define __FUNCT__ "MatMatMultNumeric_SeqAIJ_SeqAIJ" 413dfbe8321SBarry Smith PetscErrorCode MatMatMultNumeric_SeqAIJ_SeqAIJ(Mat A,Mat B,Mat C) 414d50806bdSBarry Smith { 415dfbe8321SBarry Smith PetscErrorCode ierr; 416dfbe8321SBarry Smith int flops=0; 417d50806bdSBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 418d50806bdSBarry Smith Mat_SeqAIJ *b = (Mat_SeqAIJ *)B->data; 419d50806bdSBarry Smith Mat_SeqAIJ *c = (Mat_SeqAIJ *)C->data; 420d50806bdSBarry Smith int *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*bjj,*ci=c->i,*cj=c->j; 4215c66b693SKris Buschelman int am=A->M,cn=C->N; 42294e3eecaSKris Buschelman int i,j,k,anzi,bnzi,cnzi,brow; 423d50806bdSBarry Smith MatScalar *aa=a->a,*ba=b->a,*baj,*ca=c->a,*temp; 424d50806bdSBarry Smith 425d50806bdSBarry Smith PetscFunctionBegin; 426d50806bdSBarry Smith 427d50806bdSBarry Smith /* Allocate temp accumulation space to avoid searching for nonzero columns in C */ 428d50806bdSBarry Smith ierr = PetscMalloc((cn+1)*sizeof(MatScalar),&temp);CHKERRQ(ierr); 429d50806bdSBarry Smith ierr = PetscMemzero(temp,cn*sizeof(MatScalar));CHKERRQ(ierr); 430d50806bdSBarry Smith /* Traverse A row-wise. */ 431d50806bdSBarry Smith /* Build the ith row in C by summing over nonzero columns in A, */ 432d50806bdSBarry Smith /* the rows of B corresponding to nonzeros of A. */ 433d50806bdSBarry Smith for (i=0;i<am;i++) { 434d50806bdSBarry Smith anzi = ai[i+1] - ai[i]; 435d50806bdSBarry Smith for (j=0;j<anzi;j++) { 436d50806bdSBarry Smith brow = *aj++; 437d50806bdSBarry Smith bnzi = bi[brow+1] - bi[brow]; 438d50806bdSBarry Smith bjj = bj + bi[brow]; 439d50806bdSBarry Smith baj = ba + bi[brow]; 440d50806bdSBarry Smith for (k=0;k<bnzi;k++) { 441d50806bdSBarry Smith temp[bjj[k]] += (*aa)*baj[k]; 442d50806bdSBarry Smith } 443d50806bdSBarry Smith flops += 2*bnzi; 444d50806bdSBarry Smith aa++; 445d50806bdSBarry Smith } 446d50806bdSBarry Smith /* Store row back into C, and re-zero temp */ 447d50806bdSBarry Smith cnzi = ci[i+1] - ci[i]; 448d50806bdSBarry Smith for (j=0;j<cnzi;j++) { 449d50806bdSBarry Smith ca[j] = temp[cj[j]]; 450d50806bdSBarry Smith temp[cj[j]] = 0.0; 451d50806bdSBarry Smith } 452d50806bdSBarry Smith ca += cnzi; 453d50806bdSBarry Smith cj += cnzi; 454d50806bdSBarry Smith } 455716bacf3SKris Buschelman ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 456716bacf3SKris Buschelman ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 457716bacf3SKris Buschelman 458d50806bdSBarry Smith /* Free temp */ 459d50806bdSBarry Smith ierr = PetscFree(temp);CHKERRQ(ierr); 460d50806bdSBarry Smith ierr = PetscLogFlops(flops);CHKERRQ(ierr); 461d50806bdSBarry Smith PetscFunctionReturn(0); 462d50806bdSBarry Smith } 463*bc011b1eSHong Zhang 464*bc011b1eSHong Zhang #undef __FUNCT__ 465*bc011b1eSHong Zhang #define __FUNCT__ "MatMatMultTranspose" 466*bc011b1eSHong Zhang /*@ 467*bc011b1eSHong Zhang MatMatMultTranspose - Performs Matrix-Matrix Multiplication C=A^T*B. 468*bc011b1eSHong Zhang 469*bc011b1eSHong Zhang Collective on Mat 470*bc011b1eSHong Zhang 471*bc011b1eSHong Zhang Input Parameters: 472*bc011b1eSHong Zhang + A - the left matrix 473*bc011b1eSHong Zhang . B - the right matrix 474*bc011b1eSHong Zhang . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 475*bc011b1eSHong Zhang - fill - expected fill as ratio of nnz(C)/(nnz(A) + nnz(B)) 476*bc011b1eSHong Zhang 477*bc011b1eSHong Zhang Output Parameters: 478*bc011b1eSHong Zhang . C - the product matrix 479*bc011b1eSHong Zhang 480*bc011b1eSHong Zhang Notes: 481*bc011b1eSHong Zhang C will be created and must be destroyed by the user with MatDestroy(). 482*bc011b1eSHong Zhang 483*bc011b1eSHong Zhang This routine is currently only implemented for pairs of SeqAIJ matrices and classes 484*bc011b1eSHong Zhang which inherit from SeqAIJ. C will be of type MATSEQAIJ. 485*bc011b1eSHong Zhang 486*bc011b1eSHong Zhang Level: intermediate 487*bc011b1eSHong Zhang 488*bc011b1eSHong Zhang .seealso: MatMatMultTransposeSymbolic(),MatMatMultTransposeNumeric() 489*bc011b1eSHong Zhang @*/ 490*bc011b1eSHong Zhang PetscErrorCode MatMatMultTranspose(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C) 491*bc011b1eSHong Zhang { 492*bc011b1eSHong Zhang PetscErrorCode ierr; 493*bc011b1eSHong Zhang PetscErrorCode (*fA)(Mat,Mat,MatReuse,PetscReal,Mat*); 494*bc011b1eSHong Zhang PetscErrorCode (*fB)(Mat,Mat,MatReuse,PetscReal,Mat*); 495*bc011b1eSHong Zhang 496*bc011b1eSHong Zhang PetscFunctionBegin; 497*bc011b1eSHong Zhang PetscValidHeaderSpecific(A,MAT_COOKIE,1); 498*bc011b1eSHong Zhang PetscValidType(A,1); 499*bc011b1eSHong Zhang MatPreallocated(A); 500*bc011b1eSHong Zhang if (!A->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix"); 501*bc011b1eSHong Zhang if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 502*bc011b1eSHong Zhang PetscValidHeaderSpecific(B,MAT_COOKIE,2); 503*bc011b1eSHong Zhang PetscValidType(B,2); 504*bc011b1eSHong Zhang MatPreallocated(B); 505*bc011b1eSHong Zhang if (!B->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix"); 506*bc011b1eSHong Zhang if (B->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 507*bc011b1eSHong Zhang PetscValidPointer(C,3); 508*bc011b1eSHong Zhang if (B->M!=A->M) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",B->M,A->M); 509*bc011b1eSHong Zhang 510*bc011b1eSHong Zhang if (fill <=0.0) SETERRQ1(PETSC_ERR_ARG_SIZ,"fill=%g must be > 0.0",fill); 511*bc011b1eSHong Zhang 512*bc011b1eSHong Zhang fA = A->ops->matmulttranspose; 513*bc011b1eSHong Zhang if (!fA) SETERRQ1(PETSC_ERR_SUP,"MatMatMultTranspose not supported for A of type %s",A->type_name); 514*bc011b1eSHong Zhang fB = B->ops->matmulttranspose; 515*bc011b1eSHong Zhang if (!fB) SETERRQ1(PETSC_ERR_SUP,"MatMatMultTranspose not supported for B of type %s",B->type_name); 516*bc011b1eSHong Zhang if (fB!=fA) SETERRQ2(PETSC_ERR_ARG_INCOMP,"MatMatMultTranspose requires A, %s, to be compatible with B, %s",A->type_name,B->type_name); 517*bc011b1eSHong Zhang 518*bc011b1eSHong Zhang ierr = PetscLogEventBegin(MAT_MatMultTranspose,A,B,0,0);CHKERRQ(ierr); 519*bc011b1eSHong Zhang ierr = (*A->ops->matmulttranspose)(A,B,scall,fill,C);CHKERRQ(ierr); 520*bc011b1eSHong Zhang ierr = PetscLogEventEnd(MAT_MatMultTranspose,A,B,0,0);CHKERRQ(ierr); 521*bc011b1eSHong Zhang 522*bc011b1eSHong Zhang PetscFunctionReturn(0); 523*bc011b1eSHong Zhang } 524*bc011b1eSHong Zhang 525*bc011b1eSHong Zhang #undef __FUNCT__ 526*bc011b1eSHong Zhang #define __FUNCT__ "MatMatMultTranspose_SeqAIJ_SeqAIJ" 527*bc011b1eSHong Zhang PetscErrorCode MatMatMultTranspose_SeqAIJ_SeqAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C) { 528*bc011b1eSHong Zhang PetscErrorCode ierr; 529*bc011b1eSHong Zhang 530*bc011b1eSHong Zhang PetscFunctionBegin; 531*bc011b1eSHong Zhang if (scall == MAT_INITIAL_MATRIX){ 532*bc011b1eSHong Zhang ierr = MatMatMultTransposeSymbolic_SeqAIJ_SeqAIJ(A,B,fill,C);CHKERRQ(ierr); 533*bc011b1eSHong Zhang } 534*bc011b1eSHong Zhang ierr = MatMatMultTransposeNumeric_SeqAIJ_SeqAIJ(A,B,*C);CHKERRQ(ierr); 535*bc011b1eSHong Zhang PetscFunctionReturn(0); 536*bc011b1eSHong Zhang } 537*bc011b1eSHong Zhang 538*bc011b1eSHong Zhang #undef __FUNCT__ 539*bc011b1eSHong Zhang #define __FUNCT__ "MatMatMultTransposeSymbolic_SeqAIJ_SeqAIJ" 540*bc011b1eSHong Zhang PetscErrorCode MatMatMultTransposeSymbolic_SeqAIJ_SeqAIJ(Mat A,Mat B,PetscReal fill,Mat *C) 541*bc011b1eSHong Zhang { 542*bc011b1eSHong Zhang PetscErrorCode ierr; 543*bc011b1eSHong Zhang Mat At; 544*bc011b1eSHong Zhang int *ati,*atj; 545*bc011b1eSHong Zhang 546*bc011b1eSHong Zhang PetscFunctionBegin; 547*bc011b1eSHong Zhang /* create symbolic At */ 548*bc011b1eSHong Zhang ierr = MatGetSymbolicTranspose_SeqAIJ(A,&ati,&atj);CHKERRQ(ierr); 549*bc011b1eSHong Zhang ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,A->n,A->m,ati,atj,PETSC_NULL,&At);CHKERRQ(ierr); 550*bc011b1eSHong Zhang 551*bc011b1eSHong Zhang /* get symbolic C=At*B */ 552*bc011b1eSHong Zhang ierr = MatMatMultSymbolic_SeqAIJ_SeqAIJ(At,B,fill,C);CHKERRQ(ierr); 553*bc011b1eSHong Zhang 554*bc011b1eSHong Zhang /* clean up */ 555*bc011b1eSHong Zhang ierr = MatDestroy(At);CHKERRQ(ierr); 556*bc011b1eSHong Zhang ierr = MatRestoreSymbolicTranspose_SeqAIJ(A,&ati,&atj);CHKERRQ(ierr); 557*bc011b1eSHong Zhang 558*bc011b1eSHong Zhang PetscFunctionReturn(0); 559*bc011b1eSHong Zhang } 560*bc011b1eSHong Zhang 561*bc011b1eSHong Zhang #undef __FUNCT__ 562*bc011b1eSHong Zhang #define __FUNCT__ "MatMatMultTransposeNumeric_SeqAIJ_SeqAIJ" 563*bc011b1eSHong Zhang PetscErrorCode MatMatMultTransposeNumeric_SeqAIJ_SeqAIJ(Mat A,Mat B,Mat C) 564*bc011b1eSHong Zhang { 565*bc011b1eSHong Zhang PetscErrorCode ierr; 566*bc011b1eSHong Zhang Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data, 567*bc011b1eSHong Zhang *b = (Mat_SeqAIJ*)B->data, 568*bc011b1eSHong Zhang *c = (Mat_SeqAIJ*)C->data;; 569*bc011b1eSHong Zhang int am=A->m,anzi,*ai=b->i,*aJ=a->j, 570*bc011b1eSHong Zhang *bi=b->i,*bj,bnzi,nextb,bcol, 571*bc011b1eSHong Zhang cn=C->n,cm=C->m,*ci=c->i,*cj=c->j,crow,*cjj; 572*bc011b1eSHong Zhang int i,j,k,flops=0; 573*bc011b1eSHong Zhang MatScalar *aA=a->a,*ba,*ca=c->a,*caj; 574*bc011b1eSHong Zhang 575*bc011b1eSHong Zhang PetscFunctionBegin; 576*bc011b1eSHong Zhang /* clear old values in C */ 577*bc011b1eSHong Zhang ierr = PetscMemzero(ca,ci[cm]*sizeof(MatScalar));CHKERRQ(ierr); 578*bc011b1eSHong Zhang 579*bc011b1eSHong Zhang /* compute A^T*B using outer product (A^T)[:,i]*B[i,:] */ 580*bc011b1eSHong Zhang for (i=0;i<am;i++) { 581*bc011b1eSHong Zhang bj = b->j + bi[i]; 582*bc011b1eSHong Zhang ba = b->a + bi[i]; 583*bc011b1eSHong Zhang bnzi = bi[i+1] - bi[i]; 584*bc011b1eSHong Zhang anzi = ai[i+1] - ai[i]; 585*bc011b1eSHong Zhang for (j=0; j<anzi; j++) { 586*bc011b1eSHong Zhang nextb = 0; 587*bc011b1eSHong Zhang crow = *aJ++; 588*bc011b1eSHong Zhang cjj = cj + ci[crow]; 589*bc011b1eSHong Zhang caj = ca + ci[crow]; 590*bc011b1eSHong Zhang /* perform sparse axpy operation. Note cjj includes bj. */ 591*bc011b1eSHong Zhang for (k=0; nextb<bnzi; k++) { 592*bc011b1eSHong Zhang bcol = *(bj+nextb); 593*bc011b1eSHong Zhang if (cjj[k] == bcol) { /* ccol == bcol */ 594*bc011b1eSHong Zhang caj[k] += (*aA)*(*(ba+nextb)); 595*bc011b1eSHong Zhang nextb++; 596*bc011b1eSHong Zhang } 597*bc011b1eSHong Zhang } 598*bc011b1eSHong Zhang flops += 2*bnzi; 599*bc011b1eSHong Zhang aA++; 600*bc011b1eSHong Zhang } 601*bc011b1eSHong Zhang } 602*bc011b1eSHong Zhang 603*bc011b1eSHong Zhang /* Assemble the final matrix and clean up */ 604*bc011b1eSHong Zhang ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 605*bc011b1eSHong Zhang ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 606*bc011b1eSHong Zhang ierr = PetscLogFlops(flops);CHKERRQ(ierr); 607*bc011b1eSHong Zhang 608*bc011b1eSHong Zhang PetscFunctionReturn(0); 609*bc011b1eSHong Zhang } 610