1d50806bdSBarry Smith /* 2a50f8bf6SHong Zhang Defines matrix-matrix product routines for pairs of AIJ matrices 3d50806bdSBarry Smith C = A * B 4d50806bdSBarry Smith */ 5d50806bdSBarry Smith 6c1f4806aSKris Buschelman #include "src/mat/impls/aij/seq/aij.h" /*I "petscmat.h" I*/ 770f19b1fSKris Buschelman #include "src/mat/utils/freespace.h" 82d09714cSHong Zhang #include "src/mat/impls/aij/mpi/mpiaij.h" 9be0fcf8dSHong Zhang #include "petscbt.h" 10d50806bdSBarry Smith 11c1f4806aSKris Buschelman #undef __FUNCT__ 12c1f4806aSKris Buschelman #define __FUNCT__ "MatMatMult" 135c66b693SKris Buschelman /*@ 145c66b693SKris Buschelman MatMatMult - Performs Matrix-Matrix Multiplication C=A*B. 1594e3eecaSKris Buschelman 165c66b693SKris Buschelman Collective on Mat 17d50806bdSBarry Smith 185c66b693SKris Buschelman Input Parameters: 195c66b693SKris Buschelman + A - the left matrix 201c741599SHong Zhang . B - the right matrix 211c741599SHong Zhang . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 22c5db241fSHong Zhang - fill - expected fill as ratio of nnz(C)/(nnz(A) + nnz(B)) 235c66b693SKris Buschelman 245c66b693SKris Buschelman Output Parameters: 255c66b693SKris Buschelman . C - the product matrix 265c66b693SKris Buschelman 275c66b693SKris Buschelman Notes: 285c66b693SKris Buschelman C will be created and must be destroyed by the user with MatDestroy(). 295c66b693SKris Buschelman 30bc011b1eSHong Zhang This routine is currently only implemented for pairs of AIJ matrices and classes 31bc011b1eSHong Zhang which inherit from AIJ. C will be of type MATAIJ. 325c66b693SKris Buschelman 335c66b693SKris Buschelman Level: intermediate 345c66b693SKris Buschelman 355c66b693SKris Buschelman .seealso: MatMatMultSymbolic(),MatMatMultNumeric() 365c66b693SKris Buschelman @*/ 37dfbe8321SBarry Smith PetscErrorCode MatMatMult(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C) 382d09714cSHong Zhang { 39dfbe8321SBarry Smith PetscErrorCode ierr; 40cb00f407SKris Buschelman PetscErrorCode (*fA)(Mat,Mat,MatReuse,PetscReal,Mat*); 41cb00f407SKris Buschelman PetscErrorCode (*fB)(Mat,Mat,MatReuse,PetscReal,Mat*); 422d09714cSHong Zhang 43d50806bdSBarry Smith PetscFunctionBegin; 444482741eSBarry Smith PetscValidHeaderSpecific(A,MAT_COOKIE,1); 45c9780b6fSBarry Smith PetscValidType(A,1); 465c66b693SKris Buschelman MatPreallocated(A); 475c66b693SKris Buschelman if (!A->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix"); 485c66b693SKris Buschelman if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 494482741eSBarry Smith PetscValidHeaderSpecific(B,MAT_COOKIE,2); 50c9780b6fSBarry Smith PetscValidType(B,2); 515c66b693SKris Buschelman MatPreallocated(B); 525c66b693SKris Buschelman if (!B->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix"); 535c66b693SKris Buschelman if (B->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 544482741eSBarry Smith PetscValidPointer(C,3); 555c66b693SKris Buschelman if (B->M!=A->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",B->M,A->N); 56d50806bdSBarry Smith 571c24bd37SHong Zhang if (fill <=0.0) SETERRQ1(PETSC_ERR_ARG_SIZ,"fill=%g must be > 0.0",fill); 58c5db241fSHong Zhang 59cb00f407SKris Buschelman /* For now, we do not dispatch based on the type of A and B */ 60cb00f407SKris Buschelman /* When implementations like _SeqAIJ_MAIJ exist, attack the multiple dispatch problem. */ 61cb00f407SKris Buschelman fA = A->ops->matmult; 62cb00f407SKris Buschelman if (!fA) SETERRQ1(PETSC_ERR_SUP,"MatMatMult not supported for A of type %s",A->type_name); 63cb00f407SKris Buschelman fB = B->ops->matmult; 64cb00f407SKris Buschelman if (!fB) SETERRQ1(PETSC_ERR_SUP,"MatMatMult not supported for B of type %s",B->type_name); 65cb00f407SKris Buschelman if (fB!=fA) SETERRQ2(PETSC_ERR_ARG_INCOMP,"MatMatMult requires A, %s, to be compatible with B, %s",A->type_name,B->type_name); 66cb00f407SKris Buschelman 676284ec50SHong Zhang ierr = PetscLogEventBegin(MAT_MatMult,A,B,0,0);CHKERRQ(ierr); 681c741599SHong Zhang ierr = (*A->ops->matmult)(A,B,scall,fill,C);CHKERRQ(ierr); 696284ec50SHong Zhang ierr = PetscLogEventEnd(MAT_MatMult,A,B,0,0);CHKERRQ(ierr); 704d3841fdSKris Buschelman 716284ec50SHong Zhang PetscFunctionReturn(0); 726284ec50SHong Zhang } 736284ec50SHong Zhang 746284ec50SHong Zhang #undef __FUNCT__ 756284ec50SHong Zhang #define __FUNCT__ "MatMatMult_MPIAIJ_MPIAIJ" 76dfbe8321SBarry Smith PetscErrorCode MatMatMult_MPIAIJ_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill, Mat *C) 772d09714cSHong Zhang { 78dfbe8321SBarry Smith PetscErrorCode ierr; 796284ec50SHong Zhang 806284ec50SHong Zhang PetscFunctionBegin; 8126be0446SHong Zhang if (scall == MAT_INITIAL_MATRIX){ 82d6bb3c2dSHong Zhang ierr = MatMatMultSymbolic_MPIAIJ_MPIAIJ(A,B,fill,C);CHKERRQ(ierr);/* numeric product is computed as well */ 83d6bb3c2dSHong Zhang } else if (scall == MAT_REUSE_MATRIX){ 8426be0446SHong Zhang ierr = MatMatMultNumeric_MPIAIJ_MPIAIJ(A,B,*C);CHKERRQ(ierr); 85d6bb3c2dSHong Zhang } else { 86d6bb3c2dSHong Zhang SETERRQ1(PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",scall); 87d6bb3c2dSHong Zhang } 88d50806bdSBarry Smith PetscFunctionReturn(0); 89d50806bdSBarry Smith } 905c66b693SKris Buschelman 915c66b693SKris Buschelman #undef __FUNCT__ 925c66b693SKris Buschelman #define __FUNCT__ "MatMatMult_SeqAIJ_SeqAIJ" 93*38baddfdSBarry Smith PetscErrorCode MatMatMult_SeqAIJ_SeqAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C) 94*38baddfdSBarry Smith { 95dfbe8321SBarry Smith PetscErrorCode ierr; 965c66b693SKris Buschelman 975c66b693SKris Buschelman PetscFunctionBegin; 9826be0446SHong Zhang if (scall == MAT_INITIAL_MATRIX){ 9926be0446SHong Zhang ierr = MatMatMultSymbolic_SeqAIJ_SeqAIJ(A,B,fill,C);CHKERRQ(ierr); 10026be0446SHong Zhang } 10126be0446SHong Zhang ierr = MatMatMultNumeric_SeqAIJ_SeqAIJ(A,B,*C);CHKERRQ(ierr); 1025c66b693SKris Buschelman PetscFunctionReturn(0); 1035c66b693SKris Buschelman } 1045c66b693SKris Buschelman 105c1f4806aSKris Buschelman #undef __FUNCT__ 106c1f4806aSKris Buschelman #define __FUNCT__ "MatMatMultSymbolic" 1075c66b693SKris Buschelman /*@ 1085c66b693SKris Buschelman MatMatMultSymbolic - Performs construction, preallocation, and computes the ij structure 1095c66b693SKris Buschelman of the matrix-matrix product C=A*B. Call this routine before calling MatMatMultNumeric(). 1105c66b693SKris Buschelman 1115c66b693SKris Buschelman Collective on Mat 1125c66b693SKris Buschelman 1135c66b693SKris Buschelman Input Parameters: 1145c66b693SKris Buschelman + A - the left matrix 115c5db241fSHong Zhang . B - the right matrix 116c5db241fSHong Zhang - fill - expected fill as ratio of nnz(C)/(nnz(A) + nnz(B)) 1175c66b693SKris Buschelman 1185c66b693SKris Buschelman Output Parameters: 1195c66b693SKris Buschelman . C - the matrix containing the ij structure of product matrix 1205c66b693SKris Buschelman 1215c66b693SKris Buschelman Notes: 1224d3841fdSKris Buschelman C will be created as a MATSEQAIJ matrix and must be destroyed by the user with MatDestroy(). 1235c66b693SKris Buschelman 1244d3841fdSKris Buschelman This routine is currently only implemented for SeqAIJ matrices and classes which inherit from SeqAIJ. 1255c66b693SKris Buschelman 1265c66b693SKris Buschelman Level: intermediate 1275c66b693SKris Buschelman 1285c66b693SKris Buschelman .seealso: MatMatMult(),MatMatMultNumeric() 1295c66b693SKris Buschelman @*/ 130*38baddfdSBarry Smith PetscErrorCode MatMatMultSymbolic(Mat A,Mat B,PetscReal fill,Mat *C) 131*38baddfdSBarry Smith { 132dfbe8321SBarry Smith PetscErrorCode ierr; 133cb00f407SKris Buschelman PetscErrorCode (*Asymbolic)(Mat,Mat,PetscReal,Mat *); 134cb00f407SKris Buschelman PetscErrorCode (*Bsymbolic)(Mat,Mat,PetscReal,Mat *); 1355c66b693SKris Buschelman 1365c66b693SKris Buschelman PetscFunctionBegin; 1374482741eSBarry Smith PetscValidHeaderSpecific(A,MAT_COOKIE,1); 138c9780b6fSBarry Smith PetscValidType(A,1); 1395c66b693SKris Buschelman MatPreallocated(A); 1405c66b693SKris Buschelman if (!A->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix"); 1415c66b693SKris Buschelman if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 1425c66b693SKris Buschelman 1434482741eSBarry Smith PetscValidHeaderSpecific(B,MAT_COOKIE,2); 144c9780b6fSBarry Smith PetscValidType(B,2); 1455c66b693SKris Buschelman MatPreallocated(B); 1465c66b693SKris Buschelman if (!B->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix"); 1475c66b693SKris Buschelman if (B->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 1484482741eSBarry Smith PetscValidPointer(C,3); 1494482741eSBarry Smith 1505c66b693SKris Buschelman if (B->M!=A->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",B->M,A->N); 1511c24bd37SHong Zhang if (fill <=0.0) SETERRQ1(PETSC_ERR_ARG_SIZ,"fill=%g must be > 0.0",fill); 1525c66b693SKris Buschelman 153cb00f407SKris Buschelman /* For now, we do not dispatch based on the type of A and P */ 154cb00f407SKris Buschelman /* When implementations like _SeqAIJ_MAIJ exist, attack the multiple dispatch problem. */ 155cb00f407SKris Buschelman Asymbolic = A->ops->matmultsymbolic; 156cb00f407SKris Buschelman if (!Asymbolic) SETERRQ1(PETSC_ERR_SUP,"C=A*B not implemented for A of type %s",A->type_name); 157cb00f407SKris Buschelman Bsymbolic = B->ops->matmultsymbolic; 158cb00f407SKris Buschelman if (!Bsymbolic) SETERRQ1(PETSC_ERR_SUP,"C=A*B not implemented for B of type %s",B->type_name); 159cb00f407SKris Buschelman if (Bsymbolic!=Asymbolic) SETERRQ2(PETSC_ERR_ARG_INCOMP,"MatMatMultSymbolic requires A, %s, to be compatible with B, %s",A->type_name,B->type_name); 160cb00f407SKris Buschelman 161cb00f407SKris Buschelman ierr = PetscLogEventBegin(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr); 162cb00f407SKris Buschelman ierr = (*Asymbolic)(A,B,fill,C);CHKERRQ(ierr); 163cb00f407SKris Buschelman ierr = PetscLogEventEnd(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr); 1645c66b693SKris Buschelman 1655c66b693SKris Buschelman PetscFunctionReturn(0); 1665c66b693SKris Buschelman } 1671c24bd37SHong Zhang 168dfbe8321SBarry Smith EXTERN PetscErrorCode MatDestroy_MPIAIJ(Mat); 16926be0446SHong Zhang #undef __FUNCT__ 17026be0446SHong Zhang #define __FUNCT__ "MatDestroy_MPIAIJ_MatMatMult" 171dfbe8321SBarry Smith PetscErrorCode MatDestroy_MPIAIJ_MatMatMult(Mat A) 17226be0446SHong Zhang { 173dfbe8321SBarry Smith PetscErrorCode ierr; 1747f93b363SHong Zhang Mat_MatMatMultMPI *mult; 1757f93b363SHong Zhang PetscObjectContainer container; 17626be0446SHong Zhang 17726be0446SHong Zhang PetscFunctionBegin; 1787f93b363SHong Zhang ierr = PetscObjectQuery((PetscObject)A,"MatMatMultMPI",(PetscObject *)&container);CHKERRQ(ierr); 1797f93b363SHong Zhang if (container) { 1807f79fd58SMatthew Knepley ierr = PetscObjectContainerGetPointer(container,(void **)&mult);CHKERRQ(ierr); 181d6bb3c2dSHong Zhang ierr = ISDestroy(mult->isrowb);CHKERRQ(ierr); 182d6bb3c2dSHong Zhang ierr = ISDestroy(mult->iscolb);CHKERRQ(ierr); 183d6bb3c2dSHong Zhang ierr = ISDestroy(mult->isrowa);CHKERRQ(ierr); 18497c2bf28SHong Zhang ierr = MatDestroy(mult->A_loc);CHKERRQ(ierr); 18597c2bf28SHong Zhang ierr = MatDestroy(mult->B_seq);CHKERRQ(ierr); 186d6bb3c2dSHong Zhang ierr = MatDestroy(mult->C_seq);CHKERRQ(ierr); 187d6bb3c2dSHong Zhang 1887f93b363SHong Zhang ierr = PetscObjectContainerDestroy(container);CHKERRQ(ierr); 1897f93b363SHong Zhang ierr = PetscObjectCompose((PetscObject)A,"MatMatMultMPI",0);CHKERRQ(ierr); 1907f93b363SHong Zhang } 1917f93b363SHong Zhang ierr = PetscFree(mult);CHKERRQ(ierr); 19226be0446SHong Zhang ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr); 19326be0446SHong Zhang 19426be0446SHong Zhang PetscFunctionReturn(0); 19526be0446SHong Zhang } 19658c24d83SHong Zhang 19758c24d83SHong Zhang #undef __FUNCT__ 19826be0446SHong Zhang #define __FUNCT__ "MatMatMultSymbolic_MPIAIJ_MPIAIJ" 199dfbe8321SBarry Smith PetscErrorCode MatMatMultSymbolic_MPIAIJ_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat *C) 20026be0446SHong Zhang { 201ff134f7aSHong Zhang Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data,*b=(Mat_MPIAIJ*)B->data; 202dfbe8321SBarry Smith PetscErrorCode ierr; 203*38baddfdSBarry Smith PetscInt start,end; 20426be0446SHong Zhang Mat_MatMatMultMPI *mult; 2057f93b363SHong Zhang PetscObjectContainer container; 20626be0446SHong Zhang 20726be0446SHong Zhang PetscFunctionBegin; 208ff134f7aSHong Zhang if (a->cstart != b->rstart || a->cend != b->rend){ 209ff134f7aSHong Zhang SETERRQ4(PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",a->cstart,a->cend,b->rstart,b->rend); 210ff134f7aSHong Zhang } 211d6bb3c2dSHong Zhang ierr = PetscNew(Mat_MatMatMultMPI,&mult);CHKERRQ(ierr); 212d6bb3c2dSHong Zhang 21326be0446SHong Zhang /* create a seq matrix B_seq = submatrix of B by taking rows of B that equal to nonzero col of A */ 21497c2bf28SHong Zhang ierr = MatGetBrowsOfAcols(A,B,MAT_INITIAL_MATRIX,&mult->isrowb,&mult->iscolb,&mult->brstart,&mult->B_seq);CHKERRQ(ierr); 21526be0446SHong Zhang 21626be0446SHong Zhang /* create a seq matrix A_seq = submatrix of A by taking all local rows of A */ 21726be0446SHong Zhang start = a->rstart; end = a->rend; 218d6bb3c2dSHong Zhang ierr = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&mult->isrowa);CHKERRQ(ierr); 21997c2bf28SHong Zhang ierr = MatGetLocalMat(A,MAT_INITIAL_MATRIX,&mult->isrowa,&mult->isrowb,&mult->A_loc);CHKERRQ(ierr); 22026be0446SHong Zhang 22126be0446SHong Zhang /* compute C_seq = A_seq * B_seq */ 22297c2bf28SHong Zhang ierr = MatMatMult_SeqAIJ_SeqAIJ(mult->A_loc,mult->B_seq,MAT_INITIAL_MATRIX,fill,&mult->C_seq);CHKERRQ(ierr); 22326be0446SHong Zhang 22426be0446SHong Zhang /* create mpi matrix C by concatinating C_seq */ 225d6bb3c2dSHong Zhang ierr = PetscObjectReference((PetscObject)mult->C_seq);CHKERRQ(ierr); /* prevent C_seq being destroyed by MatMerge() */ 2260e36024fSHong Zhang ierr = MatMerge(A->comm,mult->C_seq,B->n,MAT_INITIAL_MATRIX,C);CHKERRQ(ierr); 22726be0446SHong Zhang 22826be0446SHong Zhang /* attach the supporting struct to C for reuse of symbolic C */ 2297f93b363SHong Zhang ierr = PetscObjectContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr); 2307f93b363SHong Zhang ierr = PetscObjectContainerSetPointer(container,mult);CHKERRQ(ierr); 2317f93b363SHong Zhang ierr = PetscObjectCompose((PetscObject)(*C),"MatMatMultMPI",(PetscObject)container);CHKERRQ(ierr); 2327f93b363SHong Zhang 23326be0446SHong Zhang (*C)->ops->destroy = MatDestroy_MPIAIJ_MatMatMult; 23426be0446SHong Zhang 23526be0446SHong Zhang PetscFunctionReturn(0); 23626be0446SHong Zhang } 23726be0446SHong Zhang 23826be0446SHong Zhang #undef __FUNCT__ 23926be0446SHong Zhang #define __FUNCT__ "MatMatMultSymbolic_SeqAIJ_SeqAIJ" 240dfbe8321SBarry Smith PetscErrorCode MatMatMultSymbolic_SeqAIJ_SeqAIJ(Mat A,Mat B,PetscReal fill,Mat *C) 24158c24d83SHong Zhang { 242dfbe8321SBarry Smith PetscErrorCode ierr; 24358c24d83SHong Zhang FreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 24458c24d83SHong Zhang Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data,*c; 245*38baddfdSBarry Smith PetscInt *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*bjj,*ci,*cj; 246*38baddfdSBarry Smith PetscInt am=A->M,bn=B->N,bm=B->M; 247*38baddfdSBarry Smith PetscInt i,j,anzi,brow,bnzj,cnzi,nlnk,*lnk,nspacedouble=0; 24858c24d83SHong Zhang MatScalar *ca; 249be0fcf8dSHong Zhang PetscBT lnkbt; 25058c24d83SHong Zhang 25158c24d83SHong Zhang PetscFunctionBegin; 25258c24d83SHong Zhang /* Set up */ 25358c24d83SHong Zhang /* Allocate ci array, arrays for fill computation and */ 25458c24d83SHong Zhang /* free space for accumulating nonzero column info */ 255*38baddfdSBarry Smith ierr = PetscMalloc(((am+1)+1)*sizeof(PetscInt),&ci);CHKERRQ(ierr); 25658c24d83SHong Zhang ci[0] = 0; 25758c24d83SHong Zhang 258be0fcf8dSHong Zhang /* create and initialize a linked list */ 259be0fcf8dSHong Zhang nlnk = bn+1; 260be0fcf8dSHong Zhang ierr = PetscLLCreate(bn,bn,nlnk,lnk,lnkbt);CHKERRQ(ierr); 26158c24d83SHong Zhang 262c5db241fSHong Zhang /* Initial FreeSpace size is fill*(nnz(A)+nnz(B)) */ 263*38baddfdSBarry Smith ierr = GetMoreSpace((PetscInt)(fill*(ai[am]+bi[bm])),&free_space);CHKERRQ(ierr); 26458c24d83SHong Zhang current_space = free_space; 26558c24d83SHong Zhang 26658c24d83SHong Zhang /* Determine symbolic info for each row of the product: */ 26758c24d83SHong Zhang for (i=0;i<am;i++) { 26858c24d83SHong Zhang anzi = ai[i+1] - ai[i]; 26958c24d83SHong Zhang cnzi = 0; 2702d09714cSHong Zhang j = anzi; 2712d09714cSHong Zhang aj = a->j + ai[i]; 2722d09714cSHong Zhang while (j){/* assume cols are almost in increasing order, starting from its end saves computation */ 2732d09714cSHong Zhang j--; 2742d09714cSHong Zhang brow = *(aj + j); 27558c24d83SHong Zhang bnzj = bi[brow+1] - bi[brow]; 27658c24d83SHong Zhang bjj = bj + bi[brow]; 2771c239cc6SHong Zhang /* add non-zero cols of B into the sorted linked list lnk */ 278be0fcf8dSHong Zhang ierr = PetscLLAdd(bnzj,bjj,bn,nlnk,lnk,lnkbt);CHKERRQ(ierr); 2791c239cc6SHong Zhang cnzi += nlnk; 28058c24d83SHong Zhang } 28158c24d83SHong Zhang 28258c24d83SHong Zhang /* If free space is not available, make more free space */ 28358c24d83SHong Zhang /* Double the amount of total space in the list */ 28458c24d83SHong Zhang if (current_space->local_remaining<cnzi) { 28558c24d83SHong Zhang ierr = GetMoreSpace(current_space->total_array_size,¤t_space);CHKERRQ(ierr); 286c5db241fSHong Zhang nspacedouble++; 28758c24d83SHong Zhang } 28858c24d83SHong Zhang 289c5db241fSHong Zhang /* Copy data into free space, then initialize lnk */ 290be0fcf8dSHong Zhang ierr = PetscLLClean(bn,bn,cnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 291c5db241fSHong Zhang current_space->array += cnzi; 29258c24d83SHong Zhang current_space->local_used += cnzi; 29358c24d83SHong Zhang current_space->local_remaining -= cnzi; 29458c24d83SHong Zhang 29558c24d83SHong Zhang ci[i+1] = ci[i] + cnzi; 29658c24d83SHong Zhang } 29758c24d83SHong Zhang 29858c24d83SHong Zhang /* Column indices are in the list of free space */ 29958c24d83SHong Zhang /* Allocate space for cj, initialize cj, and */ 30058c24d83SHong Zhang /* destroy list of free space and other temporary array(s) */ 301*38baddfdSBarry Smith ierr = PetscMalloc((ci[am]+1)*sizeof(PetscInt),&cj);CHKERRQ(ierr); 30258c24d83SHong Zhang ierr = MakeSpaceContiguous(&free_space,cj);CHKERRQ(ierr); 303be0fcf8dSHong Zhang ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 30458c24d83SHong Zhang 30558c24d83SHong Zhang /* Allocate space for ca */ 30658c24d83SHong Zhang ierr = PetscMalloc((ci[am]+1)*sizeof(MatScalar),&ca);CHKERRQ(ierr); 30758c24d83SHong Zhang ierr = PetscMemzero(ca,(ci[am]+1)*sizeof(MatScalar));CHKERRQ(ierr); 30858c24d83SHong Zhang 30926be0446SHong Zhang /* put together the new symbolic matrix */ 31058c24d83SHong Zhang ierr = MatCreateSeqAIJWithArrays(A->comm,am,bn,ci,cj,ca,C);CHKERRQ(ierr); 31158c24d83SHong Zhang 31258c24d83SHong Zhang /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 31358c24d83SHong Zhang /* These are PETSc arrays, so change flags so arrays can be deleted by PETSc */ 31458c24d83SHong Zhang c = (Mat_SeqAIJ *)((*C)->data); 31558c24d83SHong Zhang c->freedata = PETSC_TRUE; 31658c24d83SHong Zhang c->nonew = 0; 31758c24d83SHong Zhang 318be0fcf8dSHong Zhang if (nspacedouble){ 319be0fcf8dSHong Zhang PetscLogInfo((PetscObject)(*C),"MatMatMultSymbolic_SeqAIJ_SeqAIJ: nspacedouble:%d, nnz(A):%d, nnz(B):%d, fill:%g, nnz(C):%d\n",nspacedouble,ai[am],bi[bm],fill,ci[am]); 320be0fcf8dSHong Zhang } 32158c24d83SHong Zhang PetscFunctionReturn(0); 32258c24d83SHong Zhang } 323d50806bdSBarry Smith 324c1f4806aSKris Buschelman #undef __FUNCT__ 325c1f4806aSKris Buschelman #define __FUNCT__ "MatMatMultNumeric" 3265c66b693SKris Buschelman /*@ 3275c66b693SKris Buschelman MatMatMultNumeric - Performs the numeric matrix-matrix product. 3285c66b693SKris Buschelman Call this routine after first calling MatMatMultSymbolic(). 3295c66b693SKris Buschelman 3305c66b693SKris Buschelman Collective on Mat 3315c66b693SKris Buschelman 3325c66b693SKris Buschelman Input Parameters: 3335c66b693SKris Buschelman + A - the left matrix 3345c66b693SKris Buschelman - B - the right matrix 3355c66b693SKris Buschelman 3365c66b693SKris Buschelman Output Parameters: 3375c66b693SKris Buschelman . C - the product matrix, whose ij structure was defined from MatMatMultSymbolic(). 3385c66b693SKris Buschelman 3395c66b693SKris Buschelman Notes: 3405c66b693SKris Buschelman C must have been created with MatMatMultSymbolic. 3415c66b693SKris Buschelman 3425c66b693SKris Buschelman This routine is currently only implemented for SeqAIJ type matrices. 3435c66b693SKris Buschelman 3445c66b693SKris Buschelman Level: intermediate 3455c66b693SKris Buschelman 3465c66b693SKris Buschelman .seealso: MatMatMult(),MatMatMultSymbolic() 3475c66b693SKris Buschelman @*/ 348*38baddfdSBarry Smith PetscErrorCode MatMatMultNumeric(Mat A,Mat B,Mat C) 349*38baddfdSBarry Smith { 350dfbe8321SBarry Smith PetscErrorCode ierr; 351cb00f407SKris Buschelman PetscErrorCode (*Anumeric)(Mat,Mat,Mat); 352cb00f407SKris Buschelman PetscErrorCode (*Bnumeric)(Mat,Mat,Mat); 3535c66b693SKris Buschelman 3545c66b693SKris Buschelman PetscFunctionBegin; 3555c66b693SKris Buschelman 3564482741eSBarry Smith PetscValidHeaderSpecific(A,MAT_COOKIE,1); 357c9780b6fSBarry Smith PetscValidType(A,1); 3585c66b693SKris Buschelman MatPreallocated(A); 3595c66b693SKris Buschelman if (!A->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix"); 3605c66b693SKris Buschelman if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 3615c66b693SKris Buschelman 3624482741eSBarry Smith PetscValidHeaderSpecific(B,MAT_COOKIE,2); 363c9780b6fSBarry Smith PetscValidType(B,2); 3645c66b693SKris Buschelman MatPreallocated(B); 3655c66b693SKris Buschelman if (!B->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix"); 3665c66b693SKris Buschelman if (B->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 3675c66b693SKris Buschelman 3684482741eSBarry Smith PetscValidHeaderSpecific(C,MAT_COOKIE,3); 369c9780b6fSBarry Smith PetscValidType(C,3); 3705c66b693SKris Buschelman MatPreallocated(C); 3715c66b693SKris Buschelman if (!C->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix"); 3725c66b693SKris Buschelman if (C->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 3735c66b693SKris Buschelman 3745c66b693SKris Buschelman if (B->N!=C->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",B->N,C->N); 3755c66b693SKris Buschelman if (B->M!=A->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",B->M,A->N); 3765c66b693SKris Buschelman if (A->M!=C->M) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",A->M,C->M); 3775c66b693SKris Buschelman 378cb00f407SKris Buschelman /* For now, we do not dispatch based on the type of A and B */ 379cb00f407SKris Buschelman /* When implementations like _SeqAIJ_MAIJ exist, attack the multiple dispatch problem. */ 380cb00f407SKris Buschelman Anumeric = A->ops->matmultnumeric; 381cb00f407SKris Buschelman if (!Anumeric) SETERRQ1(PETSC_ERR_SUP,"MatMatMultNumeric not supported for A of type %s",A->type_name); 382cb00f407SKris Buschelman Bnumeric = B->ops->matmultnumeric; 383cb00f407SKris Buschelman if (!Bnumeric) SETERRQ1(PETSC_ERR_SUP,"MatMatMultNumeric not supported for B of type %s",B->type_name); 384cb00f407SKris Buschelman if (Bnumeric!=Anumeric) SETERRQ2(PETSC_ERR_ARG_INCOMP,"MatMatMultNumeric requires A, %s, to be compatible with B, %s",A->type_name,B->type_name); 3854d3841fdSKris Buschelman 386cb00f407SKris Buschelman ierr = PetscLogEventBegin(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr); 387cb00f407SKris Buschelman ierr = (*Anumeric)(A,B,C);CHKERRQ(ierr); 388cb00f407SKris Buschelman ierr = PetscLogEventEnd(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr); 3895c66b693SKris Buschelman 3905c66b693SKris Buschelman PetscFunctionReturn(0); 3915c66b693SKris Buschelman } 3925c66b693SKris Buschelman 393d6bb3c2dSHong Zhang /* This routine is called ONLY in the case of reusing previously computed symbolic C */ 394d50806bdSBarry Smith #undef __FUNCT__ 39526be0446SHong Zhang #define __FUNCT__ "MatMatMultNumeric_MPIAIJ_MPIAIJ" 396dfbe8321SBarry Smith PetscErrorCode MatMatMultNumeric_MPIAIJ_MPIAIJ(Mat A,Mat B,Mat C) 39726be0446SHong Zhang { 398dfbe8321SBarry Smith PetscErrorCode ierr; 39997c2bf28SHong Zhang Mat *seq; 4007f93b363SHong Zhang Mat_MatMatMultMPI *mult; 4017f93b363SHong Zhang PetscObjectContainer container; 402d6bb3c2dSHong Zhang 40326be0446SHong Zhang PetscFunctionBegin; 4047f93b363SHong Zhang ierr = PetscObjectQuery((PetscObject)C,"MatMatMultMPI",(PetscObject *)&container);CHKERRQ(ierr); 4057f93b363SHong Zhang if (container) { 4067f79fd58SMatthew Knepley ierr = PetscObjectContainerGetPointer(container,(void **)&mult);CHKERRQ(ierr); 4077f93b363SHong Zhang } 4087f93b363SHong Zhang 40997c2bf28SHong Zhang seq = &mult->B_seq; 41097c2bf28SHong Zhang ierr = MatGetSubMatrices(B,1,&mult->isrowb,&mult->iscolb,MAT_REUSE_MATRIX,&seq);CHKERRQ(ierr); 41197c2bf28SHong Zhang mult->B_seq = *seq; 41297c2bf28SHong Zhang 41397c2bf28SHong Zhang seq = &mult->A_loc; 41497c2bf28SHong Zhang ierr = MatGetSubMatrices(A,1,&mult->isrowa,&mult->isrowb,MAT_REUSE_MATRIX,&seq);CHKERRQ(ierr); 41597c2bf28SHong Zhang mult->A_loc = *seq; 41697c2bf28SHong Zhang 41797c2bf28SHong Zhang ierr = MatMatMult_SeqAIJ_SeqAIJ(mult->A_loc,mult->B_seq,MAT_REUSE_MATRIX,0.0,&mult->C_seq);CHKERRQ(ierr); 418d6bb3c2dSHong Zhang 419d6bb3c2dSHong Zhang ierr = PetscObjectReference((PetscObject)mult->C_seq);CHKERRQ(ierr); 4200e36024fSHong Zhang ierr = MatMerge(A->comm,mult->C_seq,B->n,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr); 421d6bb3c2dSHong Zhang 42226be0446SHong Zhang PetscFunctionReturn(0); 42326be0446SHong Zhang } 42426be0446SHong Zhang 42526be0446SHong Zhang #undef __FUNCT__ 42626be0446SHong Zhang #define __FUNCT__ "MatMatMultNumeric_SeqAIJ_SeqAIJ" 427dfbe8321SBarry Smith PetscErrorCode MatMatMultNumeric_SeqAIJ_SeqAIJ(Mat A,Mat B,Mat C) 428d50806bdSBarry Smith { 429dfbe8321SBarry Smith PetscErrorCode ierr; 430*38baddfdSBarry Smith PetscInt flops=0; 431d50806bdSBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 432d50806bdSBarry Smith Mat_SeqAIJ *b = (Mat_SeqAIJ *)B->data; 433d50806bdSBarry Smith Mat_SeqAIJ *c = (Mat_SeqAIJ *)C->data; 434*38baddfdSBarry Smith PetscInt *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*bjj,*ci=c->i,*cj=c->j; 435*38baddfdSBarry Smith PetscInt am=A->M,cn=C->N; 436*38baddfdSBarry Smith PetscInt i,j,k,anzi,bnzi,cnzi,brow; 437d50806bdSBarry Smith MatScalar *aa=a->a,*ba=b->a,*baj,*ca=c->a,*temp; 438d50806bdSBarry Smith 439d50806bdSBarry Smith PetscFunctionBegin; 440d50806bdSBarry Smith 441d50806bdSBarry Smith /* Allocate temp accumulation space to avoid searching for nonzero columns in C */ 442d50806bdSBarry Smith ierr = PetscMalloc((cn+1)*sizeof(MatScalar),&temp);CHKERRQ(ierr); 443d50806bdSBarry Smith ierr = PetscMemzero(temp,cn*sizeof(MatScalar));CHKERRQ(ierr); 444d50806bdSBarry Smith /* Traverse A row-wise. */ 445d50806bdSBarry Smith /* Build the ith row in C by summing over nonzero columns in A, */ 446d50806bdSBarry Smith /* the rows of B corresponding to nonzeros of A. */ 447d50806bdSBarry Smith for (i=0;i<am;i++) { 448d50806bdSBarry Smith anzi = ai[i+1] - ai[i]; 449d50806bdSBarry Smith for (j=0;j<anzi;j++) { 450d50806bdSBarry Smith brow = *aj++; 451d50806bdSBarry Smith bnzi = bi[brow+1] - bi[brow]; 452d50806bdSBarry Smith bjj = bj + bi[brow]; 453d50806bdSBarry Smith baj = ba + bi[brow]; 454d50806bdSBarry Smith for (k=0;k<bnzi;k++) { 455d50806bdSBarry Smith temp[bjj[k]] += (*aa)*baj[k]; 456d50806bdSBarry Smith } 457d50806bdSBarry Smith flops += 2*bnzi; 458d50806bdSBarry Smith aa++; 459d50806bdSBarry Smith } 460d50806bdSBarry Smith /* Store row back into C, and re-zero temp */ 461d50806bdSBarry Smith cnzi = ci[i+1] - ci[i]; 462d50806bdSBarry Smith for (j=0;j<cnzi;j++) { 463d50806bdSBarry Smith ca[j] = temp[cj[j]]; 464d50806bdSBarry Smith temp[cj[j]] = 0.0; 465d50806bdSBarry Smith } 466d50806bdSBarry Smith ca += cnzi; 467d50806bdSBarry Smith cj += cnzi; 468d50806bdSBarry Smith } 469716bacf3SKris Buschelman ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 470716bacf3SKris Buschelman ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 471716bacf3SKris Buschelman 472d50806bdSBarry Smith /* Free temp */ 473d50806bdSBarry Smith ierr = PetscFree(temp);CHKERRQ(ierr); 474d50806bdSBarry Smith ierr = PetscLogFlops(flops);CHKERRQ(ierr); 475d50806bdSBarry Smith PetscFunctionReturn(0); 476d50806bdSBarry Smith } 477bc011b1eSHong Zhang 478bc011b1eSHong Zhang #undef __FUNCT__ 479bc011b1eSHong Zhang #define __FUNCT__ "MatMatMultTranspose" 480bc011b1eSHong Zhang /*@ 481bc011b1eSHong Zhang MatMatMultTranspose - Performs Matrix-Matrix Multiplication C=A^T*B. 482bc011b1eSHong Zhang 483bc011b1eSHong Zhang Collective on Mat 484bc011b1eSHong Zhang 485bc011b1eSHong Zhang Input Parameters: 486bc011b1eSHong Zhang + A - the left matrix 487bc011b1eSHong Zhang . B - the right matrix 488bc011b1eSHong Zhang . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 489bc011b1eSHong Zhang - fill - expected fill as ratio of nnz(C)/(nnz(A) + nnz(B)) 490bc011b1eSHong Zhang 491bc011b1eSHong Zhang Output Parameters: 492bc011b1eSHong Zhang . C - the product matrix 493bc011b1eSHong Zhang 494bc011b1eSHong Zhang Notes: 495bc011b1eSHong Zhang C will be created and must be destroyed by the user with MatDestroy(). 496bc011b1eSHong Zhang 497bc011b1eSHong Zhang This routine is currently only implemented for pairs of SeqAIJ matrices and classes 498bc011b1eSHong Zhang which inherit from SeqAIJ. C will be of type MATSEQAIJ. 499bc011b1eSHong Zhang 500bc011b1eSHong Zhang Level: intermediate 501bc011b1eSHong Zhang 502bc011b1eSHong Zhang .seealso: MatMatMultTransposeSymbolic(),MatMatMultTransposeNumeric() 503bc011b1eSHong Zhang @*/ 504bc011b1eSHong Zhang PetscErrorCode MatMatMultTranspose(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C) 505bc011b1eSHong Zhang { 506bc011b1eSHong Zhang PetscErrorCode ierr; 507bc011b1eSHong Zhang PetscErrorCode (*fA)(Mat,Mat,MatReuse,PetscReal,Mat*); 508bc011b1eSHong Zhang PetscErrorCode (*fB)(Mat,Mat,MatReuse,PetscReal,Mat*); 509bc011b1eSHong Zhang 510bc011b1eSHong Zhang PetscFunctionBegin; 511bc011b1eSHong Zhang PetscValidHeaderSpecific(A,MAT_COOKIE,1); 512bc011b1eSHong Zhang PetscValidType(A,1); 513bc011b1eSHong Zhang MatPreallocated(A); 514bc011b1eSHong Zhang if (!A->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix"); 515bc011b1eSHong Zhang if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 516bc011b1eSHong Zhang PetscValidHeaderSpecific(B,MAT_COOKIE,2); 517bc011b1eSHong Zhang PetscValidType(B,2); 518bc011b1eSHong Zhang MatPreallocated(B); 519bc011b1eSHong Zhang if (!B->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix"); 520bc011b1eSHong Zhang if (B->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 521bc011b1eSHong Zhang PetscValidPointer(C,3); 522bc011b1eSHong Zhang if (B->M!=A->M) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",B->M,A->M); 523bc011b1eSHong Zhang 524bc011b1eSHong Zhang if (fill <=0.0) SETERRQ1(PETSC_ERR_ARG_SIZ,"fill=%g must be > 0.0",fill); 525bc011b1eSHong Zhang 526bc011b1eSHong Zhang fA = A->ops->matmulttranspose; 527bc011b1eSHong Zhang if (!fA) SETERRQ1(PETSC_ERR_SUP,"MatMatMultTranspose not supported for A of type %s",A->type_name); 528bc011b1eSHong Zhang fB = B->ops->matmulttranspose; 529bc011b1eSHong Zhang if (!fB) SETERRQ1(PETSC_ERR_SUP,"MatMatMultTranspose not supported for B of type %s",B->type_name); 530bc011b1eSHong Zhang if (fB!=fA) SETERRQ2(PETSC_ERR_ARG_INCOMP,"MatMatMultTranspose requires A, %s, to be compatible with B, %s",A->type_name,B->type_name); 531bc011b1eSHong Zhang 532bc011b1eSHong Zhang ierr = PetscLogEventBegin(MAT_MatMultTranspose,A,B,0,0);CHKERRQ(ierr); 533bc011b1eSHong Zhang ierr = (*A->ops->matmulttranspose)(A,B,scall,fill,C);CHKERRQ(ierr); 534bc011b1eSHong Zhang ierr = PetscLogEventEnd(MAT_MatMultTranspose,A,B,0,0);CHKERRQ(ierr); 535bc011b1eSHong Zhang 536bc011b1eSHong Zhang PetscFunctionReturn(0); 537bc011b1eSHong Zhang } 538bc011b1eSHong Zhang 539bc011b1eSHong Zhang #undef __FUNCT__ 540bc011b1eSHong Zhang #define __FUNCT__ "MatMatMultTranspose_SeqAIJ_SeqAIJ" 541bc011b1eSHong Zhang PetscErrorCode MatMatMultTranspose_SeqAIJ_SeqAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C) { 542bc011b1eSHong Zhang PetscErrorCode ierr; 543bc011b1eSHong Zhang 544bc011b1eSHong Zhang PetscFunctionBegin; 545bc011b1eSHong Zhang if (scall == MAT_INITIAL_MATRIX){ 546bc011b1eSHong Zhang ierr = MatMatMultTransposeSymbolic_SeqAIJ_SeqAIJ(A,B,fill,C);CHKERRQ(ierr); 547bc011b1eSHong Zhang } 548bc011b1eSHong Zhang ierr = MatMatMultTransposeNumeric_SeqAIJ_SeqAIJ(A,B,*C);CHKERRQ(ierr); 549bc011b1eSHong Zhang PetscFunctionReturn(0); 550bc011b1eSHong Zhang } 551bc011b1eSHong Zhang 552bc011b1eSHong Zhang #undef __FUNCT__ 553bc011b1eSHong Zhang #define __FUNCT__ "MatMatMultTransposeSymbolic_SeqAIJ_SeqAIJ" 554bc011b1eSHong Zhang PetscErrorCode MatMatMultTransposeSymbolic_SeqAIJ_SeqAIJ(Mat A,Mat B,PetscReal fill,Mat *C) 555bc011b1eSHong Zhang { 556bc011b1eSHong Zhang PetscErrorCode ierr; 557bc011b1eSHong Zhang Mat At; 558*38baddfdSBarry Smith PetscInt *ati,*atj; 559bc011b1eSHong Zhang 560bc011b1eSHong Zhang PetscFunctionBegin; 561bc011b1eSHong Zhang /* create symbolic At */ 562bc011b1eSHong Zhang ierr = MatGetSymbolicTranspose_SeqAIJ(A,&ati,&atj);CHKERRQ(ierr); 563bc011b1eSHong Zhang ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,A->n,A->m,ati,atj,PETSC_NULL,&At);CHKERRQ(ierr); 564bc011b1eSHong Zhang 565bc011b1eSHong Zhang /* get symbolic C=At*B */ 566bc011b1eSHong Zhang ierr = MatMatMultSymbolic_SeqAIJ_SeqAIJ(At,B,fill,C);CHKERRQ(ierr); 567bc011b1eSHong Zhang 568bc011b1eSHong Zhang /* clean up */ 569bc011b1eSHong Zhang ierr = MatDestroy(At);CHKERRQ(ierr); 570bc011b1eSHong Zhang ierr = MatRestoreSymbolicTranspose_SeqAIJ(A,&ati,&atj);CHKERRQ(ierr); 571bc011b1eSHong Zhang 572bc011b1eSHong Zhang PetscFunctionReturn(0); 573bc011b1eSHong Zhang } 574bc011b1eSHong Zhang 575bc011b1eSHong Zhang #undef __FUNCT__ 576bc011b1eSHong Zhang #define __FUNCT__ "MatMatMultTransposeNumeric_SeqAIJ_SeqAIJ" 577bc011b1eSHong Zhang PetscErrorCode MatMatMultTransposeNumeric_SeqAIJ_SeqAIJ(Mat A,Mat B,Mat C) 578bc011b1eSHong Zhang { 579bc011b1eSHong Zhang PetscErrorCode ierr; 5800fbc74f4SHong Zhang Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data,*c=(Mat_SeqAIJ*)C->data; 581*38baddfdSBarry Smith PetscInt am=A->m,anzi,*ai=a->i,*aj=a->j,*bi=b->i,*bj,bnzi,nextb; 582*38baddfdSBarry Smith PetscInt cm=C->m,*ci=c->i,*cj=c->j,crow,*cjj,i,j,k,flops=0; 5830fbc74f4SHong Zhang MatScalar *aa=a->a,*ba,*ca=c->a,*caj; 584bc011b1eSHong Zhang 585bc011b1eSHong Zhang PetscFunctionBegin; 586bc011b1eSHong Zhang /* clear old values in C */ 587bc011b1eSHong Zhang ierr = PetscMemzero(ca,ci[cm]*sizeof(MatScalar));CHKERRQ(ierr); 588bc011b1eSHong Zhang 589bc011b1eSHong Zhang /* compute A^T*B using outer product (A^T)[:,i]*B[i,:] */ 590bc011b1eSHong Zhang for (i=0;i<am;i++) { 591bc011b1eSHong Zhang bj = b->j + bi[i]; 592bc011b1eSHong Zhang ba = b->a + bi[i]; 593bc011b1eSHong Zhang bnzi = bi[i+1] - bi[i]; 594bc011b1eSHong Zhang anzi = ai[i+1] - ai[i]; 595bc011b1eSHong Zhang for (j=0; j<anzi; j++) { 596bc011b1eSHong Zhang nextb = 0; 5970fbc74f4SHong Zhang crow = *aj++; 598bc011b1eSHong Zhang cjj = cj + ci[crow]; 599bc011b1eSHong Zhang caj = ca + ci[crow]; 600bc011b1eSHong Zhang /* perform sparse axpy operation. Note cjj includes bj. */ 601bc011b1eSHong Zhang for (k=0; nextb<bnzi; k++) { 6020fbc74f4SHong Zhang if (cjj[k] == *(bj+nextb)) { /* ccol == bcol */ 6030fbc74f4SHong Zhang caj[k] += (*aa)*(*(ba+nextb)); 604bc011b1eSHong Zhang nextb++; 605bc011b1eSHong Zhang } 606bc011b1eSHong Zhang } 607bc011b1eSHong Zhang flops += 2*bnzi; 6080fbc74f4SHong Zhang aa++; 609bc011b1eSHong Zhang } 610bc011b1eSHong Zhang } 611bc011b1eSHong Zhang 612bc011b1eSHong Zhang /* Assemble the final matrix and clean up */ 613bc011b1eSHong Zhang ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 614bc011b1eSHong Zhang ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 615bc011b1eSHong Zhang ierr = PetscLogFlops(flops);CHKERRQ(ierr); 616bc011b1eSHong Zhang PetscFunctionReturn(0); 617bc011b1eSHong Zhang } 618