1d50806bdSBarry Smith /* 2a50f8bf6SHong Zhang Defines matrix-matrix product routines for pairs of AIJ matrices 3d50806bdSBarry Smith C = A * B 4d50806bdSBarry Smith */ 5d50806bdSBarry Smith 6c1f4806aSKris Buschelman #include "src/mat/impls/aij/seq/aij.h" /*I "petscmat.h" I*/ 770f19b1fSKris Buschelman #include "src/mat/utils/freespace.h" 82d09714cSHong Zhang #include "src/mat/impls/aij/mpi/mpiaij.h" 9be0fcf8dSHong Zhang #include "petscbt.h" 10d50806bdSBarry Smith 11c1f4806aSKris Buschelman #undef __FUNCT__ 12c1f4806aSKris Buschelman #define __FUNCT__ "MatMatMult" 135c66b693SKris Buschelman /*@ 145c66b693SKris Buschelman MatMatMult - Performs Matrix-Matrix Multiplication C=A*B. 1594e3eecaSKris Buschelman 165c66b693SKris Buschelman Collective on Mat 17d50806bdSBarry Smith 185c66b693SKris Buschelman Input Parameters: 195c66b693SKris Buschelman + A - the left matrix 201c741599SHong Zhang . B - the right matrix 211c741599SHong Zhang . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 22c5db241fSHong Zhang - fill - expected fill as ratio of nnz(C)/(nnz(A) + nnz(B)) 235c66b693SKris Buschelman 245c66b693SKris Buschelman Output Parameters: 255c66b693SKris Buschelman . C - the product matrix 265c66b693SKris Buschelman 275c66b693SKris Buschelman Notes: 285c66b693SKris Buschelman C will be created and must be destroyed by the user with MatDestroy(). 295c66b693SKris Buschelman 30bc011b1eSHong Zhang This routine is currently only implemented for pairs of AIJ matrices and classes 31bc011b1eSHong Zhang which inherit from AIJ. C will be of type MATAIJ. 325c66b693SKris Buschelman 335c66b693SKris Buschelman Level: intermediate 345c66b693SKris Buschelman 355c66b693SKris Buschelman .seealso: MatMatMultSymbolic(),MatMatMultNumeric() 365c66b693SKris Buschelman @*/ 37dfbe8321SBarry Smith PetscErrorCode MatMatMult(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C) 382d09714cSHong Zhang { 39dfbe8321SBarry Smith PetscErrorCode ierr; 40cb00f407SKris Buschelman PetscErrorCode (*fA)(Mat,Mat,MatReuse,PetscReal,Mat*); 41cb00f407SKris Buschelman PetscErrorCode (*fB)(Mat,Mat,MatReuse,PetscReal,Mat*); 422d09714cSHong Zhang 43d50806bdSBarry Smith PetscFunctionBegin; 444482741eSBarry Smith PetscValidHeaderSpecific(A,MAT_COOKIE,1); 45c9780b6fSBarry Smith PetscValidType(A,1); 465c66b693SKris Buschelman MatPreallocated(A); 475c66b693SKris Buschelman if (!A->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix"); 485c66b693SKris Buschelman if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 494482741eSBarry Smith PetscValidHeaderSpecific(B,MAT_COOKIE,2); 50c9780b6fSBarry Smith PetscValidType(B,2); 515c66b693SKris Buschelman MatPreallocated(B); 525c66b693SKris Buschelman if (!B->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix"); 535c66b693SKris Buschelman if (B->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 544482741eSBarry Smith PetscValidPointer(C,3); 555c66b693SKris Buschelman if (B->M!=A->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",B->M,A->N); 56d50806bdSBarry Smith 571c24bd37SHong Zhang if (fill <=0.0) SETERRQ1(PETSC_ERR_ARG_SIZ,"fill=%g must be > 0.0",fill); 58c5db241fSHong Zhang 59cb00f407SKris Buschelman /* For now, we do not dispatch based on the type of A and B */ 60cb00f407SKris Buschelman /* When implementations like _SeqAIJ_MAIJ exist, attack the multiple dispatch problem. */ 61cb00f407SKris Buschelman fA = A->ops->matmult; 62cb00f407SKris Buschelman if (!fA) SETERRQ1(PETSC_ERR_SUP,"MatMatMult not supported for A of type %s",A->type_name); 63cb00f407SKris Buschelman fB = B->ops->matmult; 64cb00f407SKris Buschelman if (!fB) SETERRQ1(PETSC_ERR_SUP,"MatMatMult not supported for B of type %s",B->type_name); 65cb00f407SKris Buschelman if (fB!=fA) SETERRQ2(PETSC_ERR_ARG_INCOMP,"MatMatMult requires A, %s, to be compatible with B, %s",A->type_name,B->type_name); 66cb00f407SKris Buschelman 676284ec50SHong Zhang ierr = PetscLogEventBegin(MAT_MatMult,A,B,0,0);CHKERRQ(ierr); 681c741599SHong Zhang ierr = (*A->ops->matmult)(A,B,scall,fill,C);CHKERRQ(ierr); 696284ec50SHong Zhang ierr = PetscLogEventEnd(MAT_MatMult,A,B,0,0);CHKERRQ(ierr); 704d3841fdSKris Buschelman 716284ec50SHong Zhang PetscFunctionReturn(0); 726284ec50SHong Zhang } 736284ec50SHong Zhang 746284ec50SHong Zhang #undef __FUNCT__ 756284ec50SHong Zhang #define __FUNCT__ "MatMatMult_MPIAIJ_MPIAIJ" 76dfbe8321SBarry Smith PetscErrorCode MatMatMult_MPIAIJ_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill, Mat *C) 772d09714cSHong Zhang { 78dfbe8321SBarry Smith PetscErrorCode ierr; 796284ec50SHong Zhang 806284ec50SHong Zhang PetscFunctionBegin; 8126be0446SHong Zhang if (scall == MAT_INITIAL_MATRIX){ 82d6bb3c2dSHong Zhang ierr = MatMatMultSymbolic_MPIAIJ_MPIAIJ(A,B,fill,C);CHKERRQ(ierr);/* numeric product is computed as well */ 83d6bb3c2dSHong Zhang } else if (scall == MAT_REUSE_MATRIX){ 8426be0446SHong Zhang ierr = MatMatMultNumeric_MPIAIJ_MPIAIJ(A,B,*C);CHKERRQ(ierr); 85d6bb3c2dSHong Zhang } else { 86d6bb3c2dSHong Zhang SETERRQ1(PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",scall); 87d6bb3c2dSHong Zhang } 88d50806bdSBarry Smith PetscFunctionReturn(0); 89d50806bdSBarry Smith } 905c66b693SKris Buschelman 915c66b693SKris Buschelman #undef __FUNCT__ 925c66b693SKris Buschelman #define __FUNCT__ "MatMatMult_SeqAIJ_SeqAIJ" 93dfbe8321SBarry Smith PetscErrorCode MatMatMult_SeqAIJ_SeqAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C) { 94dfbe8321SBarry Smith PetscErrorCode ierr; 955c66b693SKris Buschelman 965c66b693SKris Buschelman PetscFunctionBegin; 9726be0446SHong Zhang if (scall == MAT_INITIAL_MATRIX){ 9826be0446SHong Zhang ierr = MatMatMultSymbolic_SeqAIJ_SeqAIJ(A,B,fill,C);CHKERRQ(ierr); 9926be0446SHong Zhang } 10026be0446SHong Zhang ierr = MatMatMultNumeric_SeqAIJ_SeqAIJ(A,B,*C);CHKERRQ(ierr); 1015c66b693SKris Buschelman PetscFunctionReturn(0); 1025c66b693SKris Buschelman } 1035c66b693SKris Buschelman 104c1f4806aSKris Buschelman #undef __FUNCT__ 105c1f4806aSKris Buschelman #define __FUNCT__ "MatMatMultSymbolic" 1065c66b693SKris Buschelman /*@ 1075c66b693SKris Buschelman MatMatMultSymbolic - Performs construction, preallocation, and computes the ij structure 1085c66b693SKris Buschelman of the matrix-matrix product C=A*B. Call this routine before calling MatMatMultNumeric(). 1095c66b693SKris Buschelman 1105c66b693SKris Buschelman Collective on Mat 1115c66b693SKris Buschelman 1125c66b693SKris Buschelman Input Parameters: 1135c66b693SKris Buschelman + A - the left matrix 114c5db241fSHong Zhang . B - the right matrix 115c5db241fSHong Zhang - fill - expected fill as ratio of nnz(C)/(nnz(A) + nnz(B)) 1165c66b693SKris Buschelman 1175c66b693SKris Buschelman Output Parameters: 1185c66b693SKris Buschelman . C - the matrix containing the ij structure of product matrix 1195c66b693SKris Buschelman 1205c66b693SKris Buschelman Notes: 1214d3841fdSKris Buschelman C will be created as a MATSEQAIJ matrix and must be destroyed by the user with MatDestroy(). 1225c66b693SKris Buschelman 1234d3841fdSKris Buschelman This routine is currently only implemented for SeqAIJ matrices and classes which inherit from SeqAIJ. 1245c66b693SKris Buschelman 1255c66b693SKris Buschelman Level: intermediate 1265c66b693SKris Buschelman 1275c66b693SKris Buschelman .seealso: MatMatMult(),MatMatMultNumeric() 1285c66b693SKris Buschelman @*/ 129dfbe8321SBarry Smith PetscErrorCode MatMatMultSymbolic(Mat A,Mat B,PetscReal fill,Mat *C) { 130dfbe8321SBarry Smith PetscErrorCode ierr; 131cb00f407SKris Buschelman PetscErrorCode (*Asymbolic)(Mat,Mat,PetscReal,Mat *); 132cb00f407SKris Buschelman PetscErrorCode (*Bsymbolic)(Mat,Mat,PetscReal,Mat *); 1335c66b693SKris Buschelman 1345c66b693SKris Buschelman PetscFunctionBegin; 1354482741eSBarry Smith PetscValidHeaderSpecific(A,MAT_COOKIE,1); 136c9780b6fSBarry Smith PetscValidType(A,1); 1375c66b693SKris Buschelman MatPreallocated(A); 1385c66b693SKris Buschelman if (!A->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix"); 1395c66b693SKris Buschelman if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 1405c66b693SKris Buschelman 1414482741eSBarry Smith PetscValidHeaderSpecific(B,MAT_COOKIE,2); 142c9780b6fSBarry Smith PetscValidType(B,2); 1435c66b693SKris Buschelman MatPreallocated(B); 1445c66b693SKris Buschelman if (!B->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix"); 1455c66b693SKris Buschelman if (B->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 1464482741eSBarry Smith PetscValidPointer(C,3); 1474482741eSBarry Smith 1485c66b693SKris Buschelman if (B->M!=A->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",B->M,A->N); 1491c24bd37SHong Zhang if (fill <=0.0) SETERRQ1(PETSC_ERR_ARG_SIZ,"fill=%g must be > 0.0",fill); 1505c66b693SKris Buschelman 151cb00f407SKris Buschelman /* For now, we do not dispatch based on the type of A and P */ 152cb00f407SKris Buschelman /* When implementations like _SeqAIJ_MAIJ exist, attack the multiple dispatch problem. */ 153cb00f407SKris Buschelman Asymbolic = A->ops->matmultsymbolic; 154cb00f407SKris Buschelman if (!Asymbolic) SETERRQ1(PETSC_ERR_SUP,"C=A*B not implemented for A of type %s",A->type_name); 155cb00f407SKris Buschelman Bsymbolic = B->ops->matmultsymbolic; 156cb00f407SKris Buschelman if (!Bsymbolic) SETERRQ1(PETSC_ERR_SUP,"C=A*B not implemented for B of type %s",B->type_name); 157cb00f407SKris Buschelman if (Bsymbolic!=Asymbolic) SETERRQ2(PETSC_ERR_ARG_INCOMP,"MatMatMultSymbolic requires A, %s, to be compatible with B, %s",A->type_name,B->type_name); 158cb00f407SKris Buschelman 159cb00f407SKris Buschelman ierr = PetscLogEventBegin(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr); 160cb00f407SKris Buschelman ierr = (*Asymbolic)(A,B,fill,C);CHKERRQ(ierr); 161cb00f407SKris Buschelman ierr = PetscLogEventEnd(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr); 1625c66b693SKris Buschelman 1635c66b693SKris Buschelman PetscFunctionReturn(0); 1645c66b693SKris Buschelman } 1651c24bd37SHong Zhang 166dfbe8321SBarry Smith EXTERN PetscErrorCode MatDestroy_MPIAIJ(Mat); 16726be0446SHong Zhang #undef __FUNCT__ 16826be0446SHong Zhang #define __FUNCT__ "MatDestroy_MPIAIJ_MatMatMult" 169dfbe8321SBarry Smith PetscErrorCode MatDestroy_MPIAIJ_MatMatMult(Mat A) 17026be0446SHong Zhang { 171dfbe8321SBarry Smith PetscErrorCode ierr; 17226be0446SHong Zhang Mat_MatMatMultMPI *mult=(Mat_MatMatMultMPI*)A->spptr; 17326be0446SHong Zhang 17426be0446SHong Zhang PetscFunctionBegin; 175d6bb3c2dSHong Zhang ierr = ISDestroy(mult->isrowb);CHKERRQ(ierr); 176d6bb3c2dSHong Zhang ierr = ISDestroy(mult->iscolb);CHKERRQ(ierr); 177d6bb3c2dSHong Zhang ierr = ISDestroy(mult->isrowa);CHKERRQ(ierr); 178*97c2bf28SHong Zhang ierr = MatDestroy(mult->A_loc);CHKERRQ(ierr); 179*97c2bf28SHong Zhang ierr = MatDestroy(mult->B_seq);CHKERRQ(ierr); 180*97c2bf28SHong Zhang /* 181d6bb3c2dSHong Zhang ierr = MatDestroyMatrices(1,&mult->aseq);CHKERRQ(ierr); 182d6bb3c2dSHong Zhang ierr = MatDestroyMatrices(1,&mult->bseq);CHKERRQ(ierr); 183*97c2bf28SHong Zhang */ 184d6bb3c2dSHong Zhang ierr = MatDestroy(mult->C_seq);CHKERRQ(ierr); 18526be0446SHong Zhang ierr = PetscFree(mult);CHKERRQ(ierr); 186d6bb3c2dSHong Zhang 18726be0446SHong Zhang ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr); 18826be0446SHong Zhang 18926be0446SHong Zhang PetscFunctionReturn(0); 19026be0446SHong Zhang } 19158c24d83SHong Zhang 19258c24d83SHong Zhang #undef __FUNCT__ 19326be0446SHong Zhang #define __FUNCT__ "MatMatMultSymbolic_MPIAIJ_MPIAIJ" 194dfbe8321SBarry Smith PetscErrorCode MatMatMultSymbolic_MPIAIJ_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat *C) 19526be0446SHong Zhang { 196ff134f7aSHong Zhang Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data,*b=(Mat_MPIAIJ*)B->data; 197dfbe8321SBarry Smith PetscErrorCode ierr; 198ff134f7aSHong Zhang int *idx,i,start,end,ncols,nzA,nzB,*cmap; 19926be0446SHong Zhang Mat_MatMatMultMPI *mult; 20026be0446SHong Zhang 20126be0446SHong Zhang PetscFunctionBegin; 202ff134f7aSHong Zhang if (a->cstart != b->rstart || a->cend != b->rend){ 203ff134f7aSHong Zhang SETERRQ4(PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",a->cstart,a->cend,b->rstart,b->rend); 204ff134f7aSHong Zhang } 205d6bb3c2dSHong Zhang ierr = PetscNew(Mat_MatMatMultMPI,&mult);CHKERRQ(ierr); 206d6bb3c2dSHong Zhang 20726be0446SHong Zhang /* create a seq matrix B_seq = submatrix of B by taking rows of B that equal to nonzero col of A */ 208*97c2bf28SHong Zhang ierr = MatGetBrowsOfAcols(A,B,MAT_INITIAL_MATRIX,&mult->isrowb,&mult->iscolb,&mult->brstart,&mult->B_seq);CHKERRQ(ierr); 20926be0446SHong Zhang 21026be0446SHong Zhang /* create a seq matrix A_seq = submatrix of A by taking all local rows of A */ 21126be0446SHong Zhang start = a->rstart; end = a->rend; 212d6bb3c2dSHong Zhang ierr = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&mult->isrowa);CHKERRQ(ierr); 213*97c2bf28SHong Zhang ierr = MatGetLocalMat(A,MAT_INITIAL_MATRIX,&mult->isrowa,&mult->isrowb,&mult->A_loc);CHKERRQ(ierr); 21426be0446SHong Zhang 21526be0446SHong Zhang /* compute C_seq = A_seq * B_seq */ 216*97c2bf28SHong Zhang ierr = MatMatMult_SeqAIJ_SeqAIJ(mult->A_loc,mult->B_seq,MAT_INITIAL_MATRIX,fill,&mult->C_seq);CHKERRQ(ierr); 21726be0446SHong Zhang 21826be0446SHong Zhang /* create mpi matrix C by concatinating C_seq */ 219d6bb3c2dSHong Zhang ierr = PetscObjectReference((PetscObject)mult->C_seq);CHKERRQ(ierr); /* prevent C_seq being destroyed by MatMerge() */ 2200e36024fSHong Zhang ierr = MatMerge(A->comm,mult->C_seq,B->n,MAT_INITIAL_MATRIX,C);CHKERRQ(ierr); 22126be0446SHong Zhang 22226be0446SHong Zhang /* attach the supporting struct to C for reuse of symbolic C */ 22326be0446SHong Zhang (*C)->spptr = (void*)mult; 22426be0446SHong Zhang (*C)->ops->destroy = MatDestroy_MPIAIJ_MatMatMult; 22526be0446SHong Zhang 22626be0446SHong Zhang PetscFunctionReturn(0); 22726be0446SHong Zhang } 22826be0446SHong Zhang 22926be0446SHong Zhang #undef __FUNCT__ 23026be0446SHong Zhang #define __FUNCT__ "MatMatMultSymbolic_SeqAIJ_SeqAIJ" 231dfbe8321SBarry Smith PetscErrorCode MatMatMultSymbolic_SeqAIJ_SeqAIJ(Mat A,Mat B,PetscReal fill,Mat *C) 23258c24d83SHong Zhang { 233dfbe8321SBarry Smith PetscErrorCode ierr; 23458c24d83SHong Zhang FreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 23558c24d83SHong Zhang Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data,*c; 236be0fcf8dSHong Zhang int *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*bjj,*ci,*cj; 2375c66b693SKris Buschelman int am=A->M,bn=B->N,bm=B->M; 238be0fcf8dSHong Zhang int i,j,anzi,brow,bnzj,cnzi,nlnk,*lnk,nspacedouble=0; 23958c24d83SHong Zhang MatScalar *ca; 240be0fcf8dSHong Zhang PetscBT lnkbt; 24158c24d83SHong Zhang 24258c24d83SHong Zhang PetscFunctionBegin; 24358c24d83SHong Zhang /* Set up */ 24458c24d83SHong Zhang /* Allocate ci array, arrays for fill computation and */ 24558c24d83SHong Zhang /* free space for accumulating nonzero column info */ 24658c24d83SHong Zhang ierr = PetscMalloc(((am+1)+1)*sizeof(int),&ci);CHKERRQ(ierr); 24758c24d83SHong Zhang ci[0] = 0; 24858c24d83SHong Zhang 249be0fcf8dSHong Zhang /* create and initialize a linked list */ 250be0fcf8dSHong Zhang nlnk = bn+1; 251be0fcf8dSHong Zhang ierr = PetscLLCreate(bn,bn,nlnk,lnk,lnkbt);CHKERRQ(ierr); 25258c24d83SHong Zhang 253c5db241fSHong Zhang /* Initial FreeSpace size is fill*(nnz(A)+nnz(B)) */ 254d6bb3c2dSHong Zhang ierr = GetMoreSpace((int)(fill*(ai[am]+bi[bm])),&free_space);CHKERRQ(ierr); 25558c24d83SHong Zhang current_space = free_space; 25658c24d83SHong Zhang 25758c24d83SHong Zhang /* Determine symbolic info for each row of the product: */ 25858c24d83SHong Zhang for (i=0;i<am;i++) { 25958c24d83SHong Zhang anzi = ai[i+1] - ai[i]; 26058c24d83SHong Zhang cnzi = 0; 2612d09714cSHong Zhang j = anzi; 2622d09714cSHong Zhang aj = a->j + ai[i]; 2632d09714cSHong Zhang while (j){/* assume cols are almost in increasing order, starting from its end saves computation */ 2642d09714cSHong Zhang j--; 2652d09714cSHong Zhang brow = *(aj + j); 26658c24d83SHong Zhang bnzj = bi[brow+1] - bi[brow]; 26758c24d83SHong Zhang bjj = bj + bi[brow]; 2681c239cc6SHong Zhang /* add non-zero cols of B into the sorted linked list lnk */ 269be0fcf8dSHong Zhang ierr = PetscLLAdd(bnzj,bjj,bn,nlnk,lnk,lnkbt);CHKERRQ(ierr); 2701c239cc6SHong Zhang cnzi += nlnk; 27158c24d83SHong Zhang } 27258c24d83SHong Zhang 27358c24d83SHong Zhang /* If free space is not available, make more free space */ 27458c24d83SHong Zhang /* Double the amount of total space in the list */ 27558c24d83SHong Zhang if (current_space->local_remaining<cnzi) { 27658c24d83SHong Zhang ierr = GetMoreSpace(current_space->total_array_size,¤t_space);CHKERRQ(ierr); 277c5db241fSHong Zhang nspacedouble++; 27858c24d83SHong Zhang } 27958c24d83SHong Zhang 280c5db241fSHong Zhang /* Copy data into free space, then initialize lnk */ 281be0fcf8dSHong Zhang ierr = PetscLLClean(bn,bn,cnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 282c5db241fSHong Zhang current_space->array += cnzi; 28358c24d83SHong Zhang current_space->local_used += cnzi; 28458c24d83SHong Zhang current_space->local_remaining -= cnzi; 28558c24d83SHong Zhang 28658c24d83SHong Zhang ci[i+1] = ci[i] + cnzi; 28758c24d83SHong Zhang } 28858c24d83SHong Zhang 28958c24d83SHong Zhang /* Column indices are in the list of free space */ 29058c24d83SHong Zhang /* Allocate space for cj, initialize cj, and */ 29158c24d83SHong Zhang /* destroy list of free space and other temporary array(s) */ 29258c24d83SHong Zhang ierr = PetscMalloc((ci[am]+1)*sizeof(int),&cj);CHKERRQ(ierr); 29358c24d83SHong Zhang ierr = MakeSpaceContiguous(&free_space,cj);CHKERRQ(ierr); 294be0fcf8dSHong Zhang ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 29558c24d83SHong Zhang 29658c24d83SHong Zhang /* Allocate space for ca */ 29758c24d83SHong Zhang ierr = PetscMalloc((ci[am]+1)*sizeof(MatScalar),&ca);CHKERRQ(ierr); 29858c24d83SHong Zhang ierr = PetscMemzero(ca,(ci[am]+1)*sizeof(MatScalar));CHKERRQ(ierr); 29958c24d83SHong Zhang 30026be0446SHong Zhang /* put together the new symbolic matrix */ 30158c24d83SHong Zhang ierr = MatCreateSeqAIJWithArrays(A->comm,am,bn,ci,cj,ca,C);CHKERRQ(ierr); 30258c24d83SHong Zhang 30358c24d83SHong Zhang /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 30458c24d83SHong Zhang /* These are PETSc arrays, so change flags so arrays can be deleted by PETSc */ 30558c24d83SHong Zhang c = (Mat_SeqAIJ *)((*C)->data); 30658c24d83SHong Zhang c->freedata = PETSC_TRUE; 30758c24d83SHong Zhang c->nonew = 0; 30858c24d83SHong Zhang 309be0fcf8dSHong Zhang if (nspacedouble){ 310be0fcf8dSHong Zhang PetscLogInfo((PetscObject)(*C),"MatMatMultSymbolic_SeqAIJ_SeqAIJ: nspacedouble:%d, nnz(A):%d, nnz(B):%d, fill:%g, nnz(C):%d\n",nspacedouble,ai[am],bi[bm],fill,ci[am]); 311be0fcf8dSHong Zhang } 31258c24d83SHong Zhang PetscFunctionReturn(0); 31358c24d83SHong Zhang } 314d50806bdSBarry Smith 315c1f4806aSKris Buschelman #undef __FUNCT__ 316c1f4806aSKris Buschelman #define __FUNCT__ "MatMatMultNumeric" 3175c66b693SKris Buschelman /*@ 3185c66b693SKris Buschelman MatMatMultNumeric - Performs the numeric matrix-matrix product. 3195c66b693SKris Buschelman Call this routine after first calling MatMatMultSymbolic(). 3205c66b693SKris Buschelman 3215c66b693SKris Buschelman Collective on Mat 3225c66b693SKris Buschelman 3235c66b693SKris Buschelman Input Parameters: 3245c66b693SKris Buschelman + A - the left matrix 3255c66b693SKris Buschelman - B - the right matrix 3265c66b693SKris Buschelman 3275c66b693SKris Buschelman Output Parameters: 3285c66b693SKris Buschelman . C - the product matrix, whose ij structure was defined from MatMatMultSymbolic(). 3295c66b693SKris Buschelman 3305c66b693SKris Buschelman Notes: 3315c66b693SKris Buschelman C must have been created with MatMatMultSymbolic. 3325c66b693SKris Buschelman 3335c66b693SKris Buschelman This routine is currently only implemented for SeqAIJ type matrices. 3345c66b693SKris Buschelman 3355c66b693SKris Buschelman Level: intermediate 3365c66b693SKris Buschelman 3375c66b693SKris Buschelman .seealso: MatMatMult(),MatMatMultSymbolic() 3385c66b693SKris Buschelman @*/ 339dfbe8321SBarry Smith PetscErrorCode MatMatMultNumeric(Mat A,Mat B,Mat C){ 340dfbe8321SBarry Smith PetscErrorCode ierr; 341cb00f407SKris Buschelman PetscErrorCode (*Anumeric)(Mat,Mat,Mat); 342cb00f407SKris Buschelman PetscErrorCode (*Bnumeric)(Mat,Mat,Mat); 3435c66b693SKris Buschelman 3445c66b693SKris Buschelman PetscFunctionBegin; 3455c66b693SKris Buschelman 3464482741eSBarry Smith PetscValidHeaderSpecific(A,MAT_COOKIE,1); 347c9780b6fSBarry Smith PetscValidType(A,1); 3485c66b693SKris Buschelman MatPreallocated(A); 3495c66b693SKris Buschelman if (!A->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix"); 3505c66b693SKris Buschelman if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 3515c66b693SKris Buschelman 3524482741eSBarry Smith PetscValidHeaderSpecific(B,MAT_COOKIE,2); 353c9780b6fSBarry Smith PetscValidType(B,2); 3545c66b693SKris Buschelman MatPreallocated(B); 3555c66b693SKris Buschelman if (!B->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix"); 3565c66b693SKris Buschelman if (B->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 3575c66b693SKris Buschelman 3584482741eSBarry Smith PetscValidHeaderSpecific(C,MAT_COOKIE,3); 359c9780b6fSBarry Smith PetscValidType(C,3); 3605c66b693SKris Buschelman MatPreallocated(C); 3615c66b693SKris Buschelman if (!C->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix"); 3625c66b693SKris Buschelman if (C->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 3635c66b693SKris Buschelman 3645c66b693SKris Buschelman if (B->N!=C->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",B->N,C->N); 3655c66b693SKris Buschelman if (B->M!=A->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",B->M,A->N); 3665c66b693SKris Buschelman if (A->M!=C->M) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",A->M,C->M); 3675c66b693SKris Buschelman 368cb00f407SKris Buschelman /* For now, we do not dispatch based on the type of A and B */ 369cb00f407SKris Buschelman /* When implementations like _SeqAIJ_MAIJ exist, attack the multiple dispatch problem. */ 370cb00f407SKris Buschelman Anumeric = A->ops->matmultnumeric; 371cb00f407SKris Buschelman if (!Anumeric) SETERRQ1(PETSC_ERR_SUP,"MatMatMultNumeric not supported for A of type %s",A->type_name); 372cb00f407SKris Buschelman Bnumeric = B->ops->matmultnumeric; 373cb00f407SKris Buschelman if (!Bnumeric) SETERRQ1(PETSC_ERR_SUP,"MatMatMultNumeric not supported for B of type %s",B->type_name); 374cb00f407SKris Buschelman if (Bnumeric!=Anumeric) SETERRQ2(PETSC_ERR_ARG_INCOMP,"MatMatMultNumeric requires A, %s, to be compatible with B, %s",A->type_name,B->type_name); 3754d3841fdSKris Buschelman 376cb00f407SKris Buschelman ierr = PetscLogEventBegin(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr); 377cb00f407SKris Buschelman ierr = (*Anumeric)(A,B,C);CHKERRQ(ierr); 378cb00f407SKris Buschelman ierr = PetscLogEventEnd(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr); 3795c66b693SKris Buschelman 3805c66b693SKris Buschelman PetscFunctionReturn(0); 3815c66b693SKris Buschelman } 3825c66b693SKris Buschelman 383d6bb3c2dSHong Zhang /* This routine is called ONLY in the case of reusing previously computed symbolic C */ 384d50806bdSBarry Smith #undef __FUNCT__ 38526be0446SHong Zhang #define __FUNCT__ "MatMatMultNumeric_MPIAIJ_MPIAIJ" 386dfbe8321SBarry Smith PetscErrorCode MatMatMultNumeric_MPIAIJ_MPIAIJ(Mat A,Mat B,Mat C) 38726be0446SHong Zhang { 388dfbe8321SBarry Smith PetscErrorCode ierr; 389d6bb3c2dSHong Zhang Mat_MatMatMultMPI *mult=(Mat_MatMatMultMPI*)C->spptr; 390*97c2bf28SHong Zhang Mat *seq; 391d6bb3c2dSHong Zhang 39226be0446SHong Zhang PetscFunctionBegin; 393*97c2bf28SHong Zhang seq = &mult->B_seq; 394*97c2bf28SHong Zhang ierr = MatGetSubMatrices(B,1,&mult->isrowb,&mult->iscolb,MAT_REUSE_MATRIX,&seq);CHKERRQ(ierr); 395*97c2bf28SHong Zhang mult->B_seq = *seq; 396*97c2bf28SHong Zhang 397*97c2bf28SHong Zhang seq = &mult->A_loc; 398*97c2bf28SHong Zhang ierr = MatGetSubMatrices(A,1,&mult->isrowa,&mult->isrowb,MAT_REUSE_MATRIX,&seq);CHKERRQ(ierr); 399*97c2bf28SHong Zhang mult->A_loc = *seq; 400*97c2bf28SHong Zhang 401*97c2bf28SHong Zhang ierr = MatMatMult_SeqAIJ_SeqAIJ(mult->A_loc,mult->B_seq,MAT_REUSE_MATRIX,0.0,&mult->C_seq);CHKERRQ(ierr); 402d6bb3c2dSHong Zhang 403d6bb3c2dSHong Zhang ierr = PetscObjectReference((PetscObject)mult->C_seq);CHKERRQ(ierr); 4040e36024fSHong Zhang ierr = MatMerge(A->comm,mult->C_seq,B->n,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr); 405d6bb3c2dSHong Zhang 40626be0446SHong Zhang PetscFunctionReturn(0); 40726be0446SHong Zhang } 40826be0446SHong Zhang 40926be0446SHong Zhang #undef __FUNCT__ 41026be0446SHong Zhang #define __FUNCT__ "MatMatMultNumeric_SeqAIJ_SeqAIJ" 411dfbe8321SBarry Smith PetscErrorCode MatMatMultNumeric_SeqAIJ_SeqAIJ(Mat A,Mat B,Mat C) 412d50806bdSBarry Smith { 413dfbe8321SBarry Smith PetscErrorCode ierr; 414dfbe8321SBarry Smith int flops=0; 415d50806bdSBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 416d50806bdSBarry Smith Mat_SeqAIJ *b = (Mat_SeqAIJ *)B->data; 417d50806bdSBarry Smith Mat_SeqAIJ *c = (Mat_SeqAIJ *)C->data; 418d50806bdSBarry Smith int *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*bjj,*ci=c->i,*cj=c->j; 4195c66b693SKris Buschelman int am=A->M,cn=C->N; 42094e3eecaSKris Buschelman int i,j,k,anzi,bnzi,cnzi,brow; 421d50806bdSBarry Smith MatScalar *aa=a->a,*ba=b->a,*baj,*ca=c->a,*temp; 422d50806bdSBarry Smith 423d50806bdSBarry Smith PetscFunctionBegin; 424d50806bdSBarry Smith 425d50806bdSBarry Smith /* Allocate temp accumulation space to avoid searching for nonzero columns in C */ 426d50806bdSBarry Smith ierr = PetscMalloc((cn+1)*sizeof(MatScalar),&temp);CHKERRQ(ierr); 427d50806bdSBarry Smith ierr = PetscMemzero(temp,cn*sizeof(MatScalar));CHKERRQ(ierr); 428d50806bdSBarry Smith /* Traverse A row-wise. */ 429d50806bdSBarry Smith /* Build the ith row in C by summing over nonzero columns in A, */ 430d50806bdSBarry Smith /* the rows of B corresponding to nonzeros of A. */ 431d50806bdSBarry Smith for (i=0;i<am;i++) { 432d50806bdSBarry Smith anzi = ai[i+1] - ai[i]; 433d50806bdSBarry Smith for (j=0;j<anzi;j++) { 434d50806bdSBarry Smith brow = *aj++; 435d50806bdSBarry Smith bnzi = bi[brow+1] - bi[brow]; 436d50806bdSBarry Smith bjj = bj + bi[brow]; 437d50806bdSBarry Smith baj = ba + bi[brow]; 438d50806bdSBarry Smith for (k=0;k<bnzi;k++) { 439d50806bdSBarry Smith temp[bjj[k]] += (*aa)*baj[k]; 440d50806bdSBarry Smith } 441d50806bdSBarry Smith flops += 2*bnzi; 442d50806bdSBarry Smith aa++; 443d50806bdSBarry Smith } 444d50806bdSBarry Smith /* Store row back into C, and re-zero temp */ 445d50806bdSBarry Smith cnzi = ci[i+1] - ci[i]; 446d50806bdSBarry Smith for (j=0;j<cnzi;j++) { 447d50806bdSBarry Smith ca[j] = temp[cj[j]]; 448d50806bdSBarry Smith temp[cj[j]] = 0.0; 449d50806bdSBarry Smith } 450d50806bdSBarry Smith ca += cnzi; 451d50806bdSBarry Smith cj += cnzi; 452d50806bdSBarry Smith } 453716bacf3SKris Buschelman ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 454716bacf3SKris Buschelman ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 455716bacf3SKris Buschelman 456d50806bdSBarry Smith /* Free temp */ 457d50806bdSBarry Smith ierr = PetscFree(temp);CHKERRQ(ierr); 458d50806bdSBarry Smith ierr = PetscLogFlops(flops);CHKERRQ(ierr); 459d50806bdSBarry Smith PetscFunctionReturn(0); 460d50806bdSBarry Smith } 461bc011b1eSHong Zhang 462bc011b1eSHong Zhang #undef __FUNCT__ 463bc011b1eSHong Zhang #define __FUNCT__ "MatMatMultTranspose" 464bc011b1eSHong Zhang /*@ 465bc011b1eSHong Zhang MatMatMultTranspose - Performs Matrix-Matrix Multiplication C=A^T*B. 466bc011b1eSHong Zhang 467bc011b1eSHong Zhang Collective on Mat 468bc011b1eSHong Zhang 469bc011b1eSHong Zhang Input Parameters: 470bc011b1eSHong Zhang + A - the left matrix 471bc011b1eSHong Zhang . B - the right matrix 472bc011b1eSHong Zhang . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 473bc011b1eSHong Zhang - fill - expected fill as ratio of nnz(C)/(nnz(A) + nnz(B)) 474bc011b1eSHong Zhang 475bc011b1eSHong Zhang Output Parameters: 476bc011b1eSHong Zhang . C - the product matrix 477bc011b1eSHong Zhang 478bc011b1eSHong Zhang Notes: 479bc011b1eSHong Zhang C will be created and must be destroyed by the user with MatDestroy(). 480bc011b1eSHong Zhang 481bc011b1eSHong Zhang This routine is currently only implemented for pairs of SeqAIJ matrices and classes 482bc011b1eSHong Zhang which inherit from SeqAIJ. C will be of type MATSEQAIJ. 483bc011b1eSHong Zhang 484bc011b1eSHong Zhang Level: intermediate 485bc011b1eSHong Zhang 486bc011b1eSHong Zhang .seealso: MatMatMultTransposeSymbolic(),MatMatMultTransposeNumeric() 487bc011b1eSHong Zhang @*/ 488bc011b1eSHong Zhang PetscErrorCode MatMatMultTranspose(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C) 489bc011b1eSHong Zhang { 490bc011b1eSHong Zhang PetscErrorCode ierr; 491bc011b1eSHong Zhang PetscErrorCode (*fA)(Mat,Mat,MatReuse,PetscReal,Mat*); 492bc011b1eSHong Zhang PetscErrorCode (*fB)(Mat,Mat,MatReuse,PetscReal,Mat*); 493bc011b1eSHong Zhang 494bc011b1eSHong Zhang PetscFunctionBegin; 495bc011b1eSHong Zhang PetscValidHeaderSpecific(A,MAT_COOKIE,1); 496bc011b1eSHong Zhang PetscValidType(A,1); 497bc011b1eSHong Zhang MatPreallocated(A); 498bc011b1eSHong Zhang if (!A->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix"); 499bc011b1eSHong Zhang if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 500bc011b1eSHong Zhang PetscValidHeaderSpecific(B,MAT_COOKIE,2); 501bc011b1eSHong Zhang PetscValidType(B,2); 502bc011b1eSHong Zhang MatPreallocated(B); 503bc011b1eSHong Zhang if (!B->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix"); 504bc011b1eSHong Zhang if (B->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 505bc011b1eSHong Zhang PetscValidPointer(C,3); 506bc011b1eSHong Zhang if (B->M!=A->M) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",B->M,A->M); 507bc011b1eSHong Zhang 508bc011b1eSHong Zhang if (fill <=0.0) SETERRQ1(PETSC_ERR_ARG_SIZ,"fill=%g must be > 0.0",fill); 509bc011b1eSHong Zhang 510bc011b1eSHong Zhang fA = A->ops->matmulttranspose; 511bc011b1eSHong Zhang if (!fA) SETERRQ1(PETSC_ERR_SUP,"MatMatMultTranspose not supported for A of type %s",A->type_name); 512bc011b1eSHong Zhang fB = B->ops->matmulttranspose; 513bc011b1eSHong Zhang if (!fB) SETERRQ1(PETSC_ERR_SUP,"MatMatMultTranspose not supported for B of type %s",B->type_name); 514bc011b1eSHong Zhang if (fB!=fA) SETERRQ2(PETSC_ERR_ARG_INCOMP,"MatMatMultTranspose requires A, %s, to be compatible with B, %s",A->type_name,B->type_name); 515bc011b1eSHong Zhang 516bc011b1eSHong Zhang ierr = PetscLogEventBegin(MAT_MatMultTranspose,A,B,0,0);CHKERRQ(ierr); 517bc011b1eSHong Zhang ierr = (*A->ops->matmulttranspose)(A,B,scall,fill,C);CHKERRQ(ierr); 518bc011b1eSHong Zhang ierr = PetscLogEventEnd(MAT_MatMultTranspose,A,B,0,0);CHKERRQ(ierr); 519bc011b1eSHong Zhang 520bc011b1eSHong Zhang PetscFunctionReturn(0); 521bc011b1eSHong Zhang } 522bc011b1eSHong Zhang 523bc011b1eSHong Zhang #undef __FUNCT__ 524bc011b1eSHong Zhang #define __FUNCT__ "MatMatMultTranspose_SeqAIJ_SeqAIJ" 525bc011b1eSHong Zhang PetscErrorCode MatMatMultTranspose_SeqAIJ_SeqAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C) { 526bc011b1eSHong Zhang PetscErrorCode ierr; 527bc011b1eSHong Zhang 528bc011b1eSHong Zhang PetscFunctionBegin; 529bc011b1eSHong Zhang if (scall == MAT_INITIAL_MATRIX){ 530bc011b1eSHong Zhang ierr = MatMatMultTransposeSymbolic_SeqAIJ_SeqAIJ(A,B,fill,C);CHKERRQ(ierr); 531bc011b1eSHong Zhang } 532bc011b1eSHong Zhang ierr = MatMatMultTransposeNumeric_SeqAIJ_SeqAIJ(A,B,*C);CHKERRQ(ierr); 533bc011b1eSHong Zhang PetscFunctionReturn(0); 534bc011b1eSHong Zhang } 535bc011b1eSHong Zhang 536bc011b1eSHong Zhang #undef __FUNCT__ 537bc011b1eSHong Zhang #define __FUNCT__ "MatMatMultTransposeSymbolic_SeqAIJ_SeqAIJ" 538bc011b1eSHong Zhang PetscErrorCode MatMatMultTransposeSymbolic_SeqAIJ_SeqAIJ(Mat A,Mat B,PetscReal fill,Mat *C) 539bc011b1eSHong Zhang { 540bc011b1eSHong Zhang PetscErrorCode ierr; 541bc011b1eSHong Zhang Mat At; 542bc011b1eSHong Zhang int *ati,*atj; 543bc011b1eSHong Zhang 544bc011b1eSHong Zhang PetscFunctionBegin; 545bc011b1eSHong Zhang /* create symbolic At */ 546bc011b1eSHong Zhang ierr = MatGetSymbolicTranspose_SeqAIJ(A,&ati,&atj);CHKERRQ(ierr); 547bc011b1eSHong Zhang ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,A->n,A->m,ati,atj,PETSC_NULL,&At);CHKERRQ(ierr); 548bc011b1eSHong Zhang 549bc011b1eSHong Zhang /* get symbolic C=At*B */ 550bc011b1eSHong Zhang ierr = MatMatMultSymbolic_SeqAIJ_SeqAIJ(At,B,fill,C);CHKERRQ(ierr); 551bc011b1eSHong Zhang 552bc011b1eSHong Zhang /* clean up */ 553bc011b1eSHong Zhang ierr = MatDestroy(At);CHKERRQ(ierr); 554bc011b1eSHong Zhang ierr = MatRestoreSymbolicTranspose_SeqAIJ(A,&ati,&atj);CHKERRQ(ierr); 555bc011b1eSHong Zhang 556bc011b1eSHong Zhang PetscFunctionReturn(0); 557bc011b1eSHong Zhang } 558bc011b1eSHong Zhang 559bc011b1eSHong Zhang #undef __FUNCT__ 560bc011b1eSHong Zhang #define __FUNCT__ "MatMatMultTransposeNumeric_SeqAIJ_SeqAIJ" 561bc011b1eSHong Zhang PetscErrorCode MatMatMultTransposeNumeric_SeqAIJ_SeqAIJ(Mat A,Mat B,Mat C) 562bc011b1eSHong Zhang { 563bc011b1eSHong Zhang PetscErrorCode ierr; 5640fbc74f4SHong Zhang Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data,*c=(Mat_SeqAIJ*)C->data; 565170ef064SHong Zhang int am=A->m,anzi,*ai=a->i,*aj=a->j,*bi=b->i,*bj,bnzi,nextb; 5660fbc74f4SHong Zhang int cm=C->m,*ci=c->i,*cj=c->j,crow,*cjj,i,j,k,flops=0; 5670fbc74f4SHong Zhang MatScalar *aa=a->a,*ba,*ca=c->a,*caj; 568bc011b1eSHong Zhang 569bc011b1eSHong Zhang PetscFunctionBegin; 570bc011b1eSHong Zhang /* clear old values in C */ 571bc011b1eSHong Zhang ierr = PetscMemzero(ca,ci[cm]*sizeof(MatScalar));CHKERRQ(ierr); 572bc011b1eSHong Zhang 573bc011b1eSHong Zhang /* compute A^T*B using outer product (A^T)[:,i]*B[i,:] */ 574bc011b1eSHong Zhang for (i=0;i<am;i++) { 575bc011b1eSHong Zhang bj = b->j + bi[i]; 576bc011b1eSHong Zhang ba = b->a + bi[i]; 577bc011b1eSHong Zhang bnzi = bi[i+1] - bi[i]; 578bc011b1eSHong Zhang anzi = ai[i+1] - ai[i]; 579bc011b1eSHong Zhang for (j=0; j<anzi; j++) { 580bc011b1eSHong Zhang nextb = 0; 5810fbc74f4SHong Zhang crow = *aj++; 582bc011b1eSHong Zhang cjj = cj + ci[crow]; 583bc011b1eSHong Zhang caj = ca + ci[crow]; 584bc011b1eSHong Zhang /* perform sparse axpy operation. Note cjj includes bj. */ 585bc011b1eSHong Zhang for (k=0; nextb<bnzi; k++) { 5860fbc74f4SHong Zhang if (cjj[k] == *(bj+nextb)) { /* ccol == bcol */ 5870fbc74f4SHong Zhang caj[k] += (*aa)*(*(ba+nextb)); 588bc011b1eSHong Zhang nextb++; 589bc011b1eSHong Zhang } 590bc011b1eSHong Zhang } 591bc011b1eSHong Zhang flops += 2*bnzi; 5920fbc74f4SHong Zhang aa++; 593bc011b1eSHong Zhang } 594bc011b1eSHong Zhang } 595bc011b1eSHong Zhang 596bc011b1eSHong Zhang /* Assemble the final matrix and clean up */ 597bc011b1eSHong Zhang ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 598bc011b1eSHong Zhang ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 599bc011b1eSHong Zhang ierr = PetscLogFlops(flops);CHKERRQ(ierr); 600bc011b1eSHong Zhang PetscFunctionReturn(0); 601bc011b1eSHong Zhang } 602