1d50806bdSBarry Smith /* 2a50f8bf6SHong Zhang Defines matrix-matrix product routines for pairs of AIJ matrices 3d50806bdSBarry Smith C = A * B 4d50806bdSBarry Smith */ 5d50806bdSBarry Smith 6c1f4806aSKris Buschelman #include "src/mat/impls/aij/seq/aij.h" /*I "petscmat.h" I*/ 770f19b1fSKris Buschelman #include "src/mat/utils/freespace.h" 82d09714cSHong Zhang #include "src/mat/impls/aij/mpi/mpiaij.h" 9d50806bdSBarry Smith 101c239cc6SHong Zhang /* 11c5db241fSHong Zhang Initialize a linked list 12c5db241fSHong Zhang Input Parameters: 13c5db241fSHong Zhang lnk_init - the initial index value indicating the entry in the list is not set yet 14c5db241fSHong Zhang nlnk - max length of the list 15c5db241fSHong Zhang lnk - linked list(an integer array) that is allocated 16c5db241fSHong Zhang output Parameters: 17c5db241fSHong Zhang lnk - the linked list with all values set as lnk_int 18c5db241fSHong Zhang */ 19c5db241fSHong Zhang #define LNKLISTINITIALIZE(lnk_init,nlnk,lnk){\ 20c5db241fSHong Zhang int _i;\ 21c5db241fSHong Zhang for (_i=0; _i<nlnk; _i++) lnk[_i] = lnk_init;\ 22c5db241fSHong Zhang } 23c5db241fSHong Zhang 24c5db241fSHong Zhang /* 251c239cc6SHong Zhang Add a index set into a sorted linked list 26a50f8bf6SHong Zhang Input Parameters: 271c239cc6SHong Zhang nidx - number of input indices 281c239cc6SHong Zhang indices - interger array 29c5db241fSHong Zhang lnk_head - the header of the list 30c5db241fSHong Zhang lnk_init - the initial index value indicating the entry in the list is not set yet 311c239cc6SHong Zhang lnk - linked list(an integer array) that is created 32a50f8bf6SHong Zhang output Parameters: 331c239cc6SHong Zhang nlnk - number of newly added indices 341c239cc6SHong Zhang lnk - the sorted(increasing order) linked list containing new and non-redundate entries from indices 351c239cc6SHong Zhang */ 36c5db241fSHong Zhang #define LNKLISTADD(nidx,indices,lnk_head,lnk_init,nlnk,lnk){\ 37c5db241fSHong Zhang int _k,_entry,_lidx=lnk_head,_idx;\ 38a50f8bf6SHong Zhang nlnk = 0;\ 39a50f8bf6SHong Zhang _k=nidx;\ 40a50f8bf6SHong Zhang while (_k){/* assume indices are almost in increasing order, starting from its end saves computation */\ 41a50f8bf6SHong Zhang _entry = indices[--_k];\ 42c5db241fSHong Zhang if (lnk[_entry] == lnk_init) { /* new col */\ 43a50f8bf6SHong Zhang do {\ 44a50f8bf6SHong Zhang _idx = _lidx;\ 45a50f8bf6SHong Zhang _lidx = lnk[_idx];\ 46a50f8bf6SHong Zhang } while (_entry > _lidx);\ 47a50f8bf6SHong Zhang lnk[_idx] = _entry;\ 48a50f8bf6SHong Zhang lnk[_entry] = _lidx;\ 49a50f8bf6SHong Zhang nlnk++;\ 50a50f8bf6SHong Zhang }\ 51a50f8bf6SHong Zhang }\ 521c239cc6SHong Zhang } 53c5db241fSHong Zhang /* 54c5db241fSHong Zhang Copy data on the list into an array, then initialize the list 55c5db241fSHong Zhang Input Parameters: 56c5db241fSHong Zhang lnk_head - the header of the list 57c5db241fSHong Zhang lnk_init - the initial index value indicating the entry in the list is not set yet 58c5db241fSHong Zhang nlnk - number of data on the list to be copied 59c5db241fSHong Zhang lnk - linked list 60c5db241fSHong Zhang output Parameters: 61c5db241fSHong Zhang indices - array that contains the copied data 62c5db241fSHong Zhang */ 63c5db241fSHong Zhang #define LNKLISTCLEAR(lnk_head,lnk_init,nlnk,lnk,indices){\ 64c5db241fSHong Zhang int _j,_idx=lnk_head,_idx0;\ 65c5db241fSHong Zhang for (_j=0; _j<nlnk; _j++){\ 66c5db241fSHong Zhang _idx0 = _idx; _idx = lnk[_idx0];\ 67c5db241fSHong Zhang *(indices+_j) = _idx;\ 68c5db241fSHong Zhang lnk[_idx0] = lnk_init;\ 69c5db241fSHong Zhang }\ 70c5db241fSHong Zhang lnk[_idx] = lnk_init;\ 71c5db241fSHong Zhang } 721c239cc6SHong Zhang 73*26be0446SHong Zhang typedef struct { /* used by MatMatMult_MPIAIJ_MPIAIJ for reusing symbolic mat product */ 74*26be0446SHong Zhang IS isrowa,isrowb,iscolb; 75*26be0446SHong Zhang Mat *aseq,*bseq,C_seq; 76*26be0446SHong Zhang } Mat_MatMatMultMPI; 77*26be0446SHong Zhang 782216b3a4SKris Buschelman static int logkey_matmatmult_symbolic = 0; 792216b3a4SKris Buschelman static int logkey_matmatmult_numeric = 0; 802216b3a4SKris Buschelman 81c1f4806aSKris Buschelman #undef __FUNCT__ 82c1f4806aSKris Buschelman #define __FUNCT__ "MatMatMult" 835c66b693SKris Buschelman /*@ 845c66b693SKris Buschelman MatMatMult - Performs Matrix-Matrix Multiplication C=A*B. 8594e3eecaSKris Buschelman 865c66b693SKris Buschelman Collective on Mat 87d50806bdSBarry Smith 885c66b693SKris Buschelman Input Parameters: 895c66b693SKris Buschelman + A - the left matrix 901c741599SHong Zhang . B - the right matrix 911c741599SHong Zhang . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 92c5db241fSHong Zhang - fill - expected fill as ratio of nnz(C)/(nnz(A) + nnz(B)) 935c66b693SKris Buschelman 945c66b693SKris Buschelman Output Parameters: 955c66b693SKris Buschelman . C - the product matrix 965c66b693SKris Buschelman 975c66b693SKris Buschelman Notes: 985c66b693SKris Buschelman C will be created and must be destroyed by the user with MatDestroy(). 995c66b693SKris Buschelman 1004d3841fdSKris Buschelman This routine is currently only implemented for pairs of SeqAIJ matrices and classes 1014d3841fdSKris Buschelman which inherit from SeqAIJ. C will be of type MATSEQAIJ. 1025c66b693SKris Buschelman 1035c66b693SKris Buschelman Level: intermediate 1045c66b693SKris Buschelman 1055c66b693SKris Buschelman .seealso: MatMatMultSymbolic(),MatMatMultNumeric() 1065c66b693SKris Buschelman @*/ 1071c741599SHong Zhang int MatMatMult(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C) 1082d09714cSHong Zhang { 109d50806bdSBarry Smith int ierr; 1102d09714cSHong Zhang 111d50806bdSBarry Smith PetscFunctionBegin; 1124482741eSBarry Smith PetscValidHeaderSpecific(A,MAT_COOKIE,1); 113c9780b6fSBarry Smith PetscValidType(A,1); 1145c66b693SKris Buschelman MatPreallocated(A); 1155c66b693SKris Buschelman if (!A->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix"); 1165c66b693SKris Buschelman if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 1174482741eSBarry Smith PetscValidHeaderSpecific(B,MAT_COOKIE,2); 118c9780b6fSBarry Smith PetscValidType(B,2); 1195c66b693SKris Buschelman MatPreallocated(B); 1205c66b693SKris Buschelman if (!B->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix"); 1215c66b693SKris Buschelman if (B->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 1224482741eSBarry Smith PetscValidPointer(C,3); 1235c66b693SKris Buschelman if (B->M!=A->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",B->M,A->N); 124d50806bdSBarry Smith 125c5db241fSHong Zhang if (fill <=0) SETERRQ1(PETSC_ERR_ARG_SIZ,"fill=%d must be > 0",fill); 126c5db241fSHong Zhang 1276284ec50SHong Zhang ierr = PetscLogEventBegin(MAT_MatMult,A,B,0,0);CHKERRQ(ierr); 1281c741599SHong Zhang ierr = (*A->ops->matmult)(A,B,scall,fill,C);CHKERRQ(ierr); 1296284ec50SHong Zhang ierr = PetscLogEventEnd(MAT_MatMult,A,B,0,0);CHKERRQ(ierr); 1304d3841fdSKris Buschelman 1316284ec50SHong Zhang PetscFunctionReturn(0); 1326284ec50SHong Zhang } 1336284ec50SHong Zhang 1346284ec50SHong Zhang #undef __FUNCT__ 1356284ec50SHong Zhang #define __FUNCT__ "MatMatMult_MPIAIJ_MPIAIJ" 1361c741599SHong Zhang int MatMatMult_MPIAIJ_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill, Mat *C) 1372d09714cSHong Zhang { 138*26be0446SHong Zhang int ierr; 1396284ec50SHong Zhang 1406284ec50SHong Zhang PetscFunctionBegin; 141*26be0446SHong Zhang if (scall == MAT_INITIAL_MATRIX){ 142*26be0446SHong Zhang ierr = MatMatMultSymbolic_MPIAIJ_MPIAIJ(A,B,fill,C);CHKERRQ(ierr); 1432d09714cSHong Zhang } 144*26be0446SHong Zhang ierr = MatMatMultNumeric_MPIAIJ_MPIAIJ(A,B,*C);CHKERRQ(ierr); 145d50806bdSBarry Smith PetscFunctionReturn(0); 146d50806bdSBarry Smith } 1475c66b693SKris Buschelman 1485c66b693SKris Buschelman #undef __FUNCT__ 1495c66b693SKris Buschelman #define __FUNCT__ "MatMatMult_SeqAIJ_SeqAIJ" 1501c741599SHong Zhang int MatMatMult_SeqAIJ_SeqAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C) { 1515c66b693SKris Buschelman int ierr; 1525c66b693SKris Buschelman 1535c66b693SKris Buschelman PetscFunctionBegin; 154*26be0446SHong Zhang if (scall == MAT_INITIAL_MATRIX){ 155*26be0446SHong Zhang ierr = MatMatMultSymbolic_SeqAIJ_SeqAIJ(A,B,fill,C);CHKERRQ(ierr); 156*26be0446SHong Zhang } 157*26be0446SHong Zhang ierr = MatMatMultNumeric_SeqAIJ_SeqAIJ(A,B,*C);CHKERRQ(ierr); 1585c66b693SKris Buschelman PetscFunctionReturn(0); 1595c66b693SKris Buschelman } 1605c66b693SKris Buschelman 161c1f4806aSKris Buschelman #undef __FUNCT__ 162c1f4806aSKris Buschelman #define __FUNCT__ "MatMatMultSymbolic" 1635c66b693SKris Buschelman /*@ 1645c66b693SKris Buschelman MatMatMultSymbolic - Performs construction, preallocation, and computes the ij structure 1655c66b693SKris Buschelman of the matrix-matrix product C=A*B. Call this routine before calling MatMatMultNumeric(). 1665c66b693SKris Buschelman 1675c66b693SKris Buschelman Collective on Mat 1685c66b693SKris Buschelman 1695c66b693SKris Buschelman Input Parameters: 1705c66b693SKris Buschelman + A - the left matrix 171c5db241fSHong Zhang . B - the right matrix 172c5db241fSHong Zhang - fill - expected fill as ratio of nnz(C)/(nnz(A) + nnz(B)) 1735c66b693SKris Buschelman 1745c66b693SKris Buschelman Output Parameters: 1755c66b693SKris Buschelman . C - the matrix containing the ij structure of product matrix 1765c66b693SKris Buschelman 1775c66b693SKris Buschelman Notes: 1784d3841fdSKris Buschelman C will be created as a MATSEQAIJ matrix and must be destroyed by the user with MatDestroy(). 1795c66b693SKris Buschelman 1804d3841fdSKris Buschelman This routine is currently only implemented for SeqAIJ matrices and classes which inherit from SeqAIJ. 1815c66b693SKris Buschelman 1825c66b693SKris Buschelman Level: intermediate 1835c66b693SKris Buschelman 1845c66b693SKris Buschelman .seealso: MatMatMult(),MatMatMultNumeric() 1855c66b693SKris Buschelman @*/ 186c5db241fSHong Zhang int MatMatMultSymbolic(Mat A,Mat B,PetscReal fill,Mat *C) { 1875c66b693SKris Buschelman /* Perhaps this "interface" routine should be moved into the interface directory.*/ 1885c66b693SKris Buschelman /* To facilitate implementations with varying types, QueryFunction is used.*/ 1895c66b693SKris Buschelman /* It is assumed that implementations will be composed as "MatMatMultSymbolic_<type of A><type of B>". */ 1905c66b693SKris Buschelman int ierr; 1914d3841fdSKris Buschelman char symfunct[80]; 192c5db241fSHong Zhang int (*symbolic)(Mat,Mat,PetscReal,Mat *); 1935c66b693SKris Buschelman 1945c66b693SKris Buschelman PetscFunctionBegin; 1954482741eSBarry Smith PetscValidHeaderSpecific(A,MAT_COOKIE,1); 196c9780b6fSBarry Smith PetscValidType(A,1); 1975c66b693SKris Buschelman MatPreallocated(A); 1985c66b693SKris Buschelman if (!A->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix"); 1995c66b693SKris Buschelman if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 2005c66b693SKris Buschelman 2014482741eSBarry Smith PetscValidHeaderSpecific(B,MAT_COOKIE,2); 202c9780b6fSBarry Smith PetscValidType(B,2); 2035c66b693SKris Buschelman MatPreallocated(B); 2045c66b693SKris Buschelman if (!B->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix"); 2055c66b693SKris Buschelman if (B->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 2064482741eSBarry Smith PetscValidPointer(C,3); 2074482741eSBarry Smith 2085c66b693SKris Buschelman if (B->M!=A->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",B->M,A->N); 209c5db241fSHong Zhang if (fill <=0) SETERRQ1(PETSC_ERR_ARG_SIZ,"fill=%d must be > 0",fill); 2105c66b693SKris Buschelman 2114d3841fdSKris Buschelman /* Currently only _seqaijseqaij is implemented, so just query for it in A and B. */ 2124d3841fdSKris Buschelman /* When other implementations exist, attack the multiple dispatch problem. */ 2134d3841fdSKris Buschelman ierr = PetscStrcpy(symfunct,"MatMatMultSymbolic_seqaijseqaij");CHKERRQ(ierr); 2144d3841fdSKris Buschelman ierr = PetscObjectQueryFunction((PetscObject)B,symfunct,(PetscVoidFunction)&symbolic);CHKERRQ(ierr); 2154d3841fdSKris Buschelman if (!symbolic) SETERRQ1(PETSC_ERR_SUP,"C=A*B not implemented for B of type %s",B->type_name); 2164d3841fdSKris Buschelman ierr = PetscObjectQueryFunction((PetscObject)A,symfunct,(PetscVoidFunction)&symbolic);CHKERRQ(ierr); 2174d3841fdSKris Buschelman if (!symbolic) SETERRQ1(PETSC_ERR_SUP,"C=A*B not implemented for A of type %s",A->type_name); 218c5db241fSHong Zhang ierr = (*symbolic)(A,B,fill,C);CHKERRQ(ierr); 2195c66b693SKris Buschelman 2205c66b693SKris Buschelman PetscFunctionReturn(0); 2215c66b693SKris Buschelman } 222*26be0446SHong Zhang #undef __FUNCT__ 223*26be0446SHong Zhang #define __FUNCT__ "MatDestroy_MPIAIJ_MatMatMult" 224*26be0446SHong Zhang int MatDestroy_MPIAIJ_MatMatMult(Mat A) 225*26be0446SHong Zhang { 226*26be0446SHong Zhang int ierr; 227*26be0446SHong Zhang Mat_MatMatMultMPI *mult = ( Mat_MatMatMultMPI*)A->spptr; 228*26be0446SHong Zhang 229*26be0446SHong Zhang PetscFunctionBegin; 230*26be0446SHong Zhang /* printf(" ...MatDestroy_MPIAIJ_MatMatMult is called\n"); */ 231*26be0446SHong Zhang ierr = PetscFree(mult);CHKERRQ(ierr); 232*26be0446SHong Zhang ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr); 233*26be0446SHong Zhang 234*26be0446SHong Zhang PetscFunctionReturn(0); 235*26be0446SHong Zhang } 23658c24d83SHong Zhang 23758c24d83SHong Zhang #undef __FUNCT__ 238*26be0446SHong Zhang #define __FUNCT__ "MatMatMultSymbolic_MPIAIJ_MPIAIJ" 239*26be0446SHong Zhang int MatMatMultSymbolic_MPIAIJ_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat *C) 240*26be0446SHong Zhang { 241*26be0446SHong Zhang Mat *aseq,*bseq,A_seq=PETSC_NULL,B_seq=PETSC_NULL,C_seq; 242*26be0446SHong Zhang Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 243*26be0446SHong Zhang int ierr,*idx,i,start,end,ncols,imark,nzA,nzB,*cmap; 244*26be0446SHong Zhang IS isrow,iscol; 245*26be0446SHong Zhang Mat_MatMatMultMPI *mult; 246*26be0446SHong Zhang 247*26be0446SHong Zhang PetscFunctionBegin; 248*26be0446SHong Zhang /* create a seq matrix B_seq = submatrix of B by taking rows of B that equal to nonzero col of A */ 249*26be0446SHong Zhang start = a->cstart; 250*26be0446SHong Zhang cmap = a->garray; 251*26be0446SHong Zhang nzA = a->A->n; 252*26be0446SHong Zhang nzB = a->B->n; 253*26be0446SHong Zhang ierr = PetscMalloc((nzA+nzB)*sizeof(int), &idx);CHKERRQ(ierr); 254*26be0446SHong Zhang ncols = 0; 255*26be0446SHong Zhang for (i=0; i<nzB; i++) { 256*26be0446SHong Zhang if (cmap[i] < start) idx[ncols++] = cmap[i]; 257*26be0446SHong Zhang else break; 258*26be0446SHong Zhang } 259*26be0446SHong Zhang imark = i; 260*26be0446SHong Zhang for (i=0; i<nzA; i++) idx[ncols++] = start + i; 261*26be0446SHong Zhang for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; 262*26be0446SHong Zhang ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,&isrow);CHKERRQ(ierr); /* isrow of B = iscol of A! */ 263*26be0446SHong Zhang ierr = PetscFree(idx);CHKERRQ(ierr); 264*26be0446SHong Zhang ierr = ISCreateStride(PETSC_COMM_SELF,B->N,0,1,&iscol);CHKERRQ(ierr); 265*26be0446SHong Zhang ierr = MatGetSubMatrices(B,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&bseq);CHKERRQ(ierr); 266*26be0446SHong Zhang ierr = ISDestroy(iscol);CHKERRQ(ierr); 267*26be0446SHong Zhang B_seq = bseq[0]; 268*26be0446SHong Zhang 269*26be0446SHong Zhang /* create a seq matrix A_seq = submatrix of A by taking all local rows of A */ 270*26be0446SHong Zhang start = a->rstart; end = a->rend; 271*26be0446SHong Zhang ierr = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&iscol);CHKERRQ(ierr); /* isrow of A = iscol */ 272*26be0446SHong Zhang ierr = MatGetSubMatrices(A,1,&iscol,&isrow,MAT_INITIAL_MATRIX,&aseq);CHKERRQ(ierr); 273*26be0446SHong Zhang ierr = ISDestroy(isrow);CHKERRQ(ierr); 274*26be0446SHong Zhang ierr = ISDestroy(iscol);CHKERRQ(ierr); 275*26be0446SHong Zhang A_seq = aseq[0]; 276*26be0446SHong Zhang 277*26be0446SHong Zhang /* compute C_seq = A_seq * B_seq */ 278*26be0446SHong Zhang ierr = MatMatMult_SeqAIJ_SeqAIJ(A_seq,B_seq,MAT_INITIAL_MATRIX,fill,&C_seq);CHKERRQ(ierr); 279*26be0446SHong Zhang /* 280*26be0446SHong Zhang int rank; 281*26be0446SHong Zhang ierr = MPI_Comm_rank(A->comm,&rank);CHKERRQ(ierr); 282*26be0446SHong Zhang ierr = PetscPrintf(PETSC_COMM_SELF," [%d] A_seq: %d, %d; B_seq: %d, %d; C_seq: %d, %d;\n",rank,A_seq->m,A_seq->n,B_seq->m,B_seq->n,C_seq->m,C_seq->n); 283*26be0446SHong Zhang */ 284*26be0446SHong Zhang ierr = MatDestroyMatrices(1,&aseq);CHKERRQ(ierr); 285*26be0446SHong Zhang ierr = MatDestroyMatrices(1,&bseq);CHKERRQ(ierr); 286*26be0446SHong Zhang 287*26be0446SHong Zhang /* create mpi matrix C by concatinating C_seq */ 288*26be0446SHong Zhang ierr = MatMerge(A->comm,C_seq,C);CHKERRQ(ierr); 289*26be0446SHong Zhang 290*26be0446SHong Zhang /* attach the supporting struct to C for reuse of symbolic C */ 291*26be0446SHong Zhang ierr = PetscNew(Mat_MatMatMultMPI,&mult);CHKERRQ(ierr); 292*26be0446SHong Zhang (*C)->spptr = (void*)mult; 293*26be0446SHong Zhang (*C)->ops->destroy = MatDestroy_MPIAIJ_MatMatMult; 294*26be0446SHong Zhang 295*26be0446SHong Zhang PetscFunctionReturn(0); 296*26be0446SHong Zhang } 297*26be0446SHong Zhang 298*26be0446SHong Zhang #undef __FUNCT__ 299*26be0446SHong Zhang #define __FUNCT__ "MatMatMultSymbolic_SeqAIJ_SeqAIJ" 300*26be0446SHong Zhang int MatMatMultSymbolic_SeqAIJ_SeqAIJ(Mat A,Mat B,PetscReal fill,Mat *C) 30158c24d83SHong Zhang { 30258c24d83SHong Zhang int ierr; 30358c24d83SHong Zhang FreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 30458c24d83SHong Zhang Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data,*c; 30558c24d83SHong Zhang int *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*bjj; 3061c239cc6SHong Zhang int *ci,*cj,*lnk,idx0,idx; 3075c66b693SKris Buschelman int am=A->M,bn=B->N,bm=B->M; 308c5db241fSHong Zhang int i,j,anzi,brow,bnzj,cnzi,nlnk,lnk_init=-1,nspacedouble=0; 30958c24d83SHong Zhang MatScalar *ca; 31058c24d83SHong Zhang 31158c24d83SHong Zhang PetscFunctionBegin; 3125c66b693SKris Buschelman /* Start timers */ 31358c24d83SHong Zhang ierr = PetscLogEventBegin(logkey_matmatmult_symbolic,A,B,0,0);CHKERRQ(ierr); 31458c24d83SHong Zhang /* Set up */ 31558c24d83SHong Zhang /* Allocate ci array, arrays for fill computation and */ 31658c24d83SHong Zhang /* free space for accumulating nonzero column info */ 31758c24d83SHong Zhang ierr = PetscMalloc(((am+1)+1)*sizeof(int),&ci);CHKERRQ(ierr); 31858c24d83SHong Zhang ci[0] = 0; 31958c24d83SHong Zhang 32058c24d83SHong Zhang ierr = PetscMalloc((bn+1)*sizeof(int),&lnk);CHKERRQ(ierr); 321c5db241fSHong Zhang LNKLISTINITIALIZE(lnk_init,bn,lnk); 32258c24d83SHong Zhang 323c5db241fSHong Zhang /* Initial FreeSpace size is fill*(nnz(A)+nnz(B)) */ 324c5db241fSHong Zhang ierr = GetMoreSpace(fill*(ai[am]+bi[bm]),&free_space);CHKERRQ(ierr); 32558c24d83SHong Zhang current_space = free_space; 32658c24d83SHong Zhang 32758c24d83SHong Zhang /* Determine symbolic info for each row of the product: */ 32858c24d83SHong Zhang for (i=0;i<am;i++) { 32958c24d83SHong Zhang anzi = ai[i+1] - ai[i]; 33058c24d83SHong Zhang cnzi = 0; 33158c24d83SHong Zhang lnk[bn] = bn; 3322d09714cSHong Zhang j = anzi; 3332d09714cSHong Zhang aj = a->j + ai[i]; 3342d09714cSHong Zhang while (j){/* assume cols are almost in increasing order, starting from its end saves computation */ 3352d09714cSHong Zhang j--; 3362d09714cSHong Zhang brow = *(aj + j); 33758c24d83SHong Zhang bnzj = bi[brow+1] - bi[brow]; 33858c24d83SHong Zhang bjj = bj + bi[brow]; 3391c239cc6SHong Zhang /* add non-zero cols of B into the sorted linked list lnk */ 340c5db241fSHong Zhang LNKLISTADD(bnzj,bjj,bn,lnk_init,nlnk,lnk); 3411c239cc6SHong Zhang cnzi += nlnk; 34258c24d83SHong Zhang } 34358c24d83SHong Zhang 34458c24d83SHong Zhang /* If free space is not available, make more free space */ 34558c24d83SHong Zhang /* Double the amount of total space in the list */ 34658c24d83SHong Zhang if (current_space->local_remaining<cnzi) { 347*26be0446SHong Zhang /* printf("MatMatMultSymbolic_SeqAIJ_SeqAIJ()...%d -th row, double space ...\n",i);*/ 34858c24d83SHong Zhang ierr = GetMoreSpace(current_space->total_array_size,¤t_space);CHKERRQ(ierr); 349c5db241fSHong Zhang nspacedouble++; 35058c24d83SHong Zhang } 35158c24d83SHong Zhang 352c5db241fSHong Zhang /* Copy data into free space, then initialize lnk */ 353c5db241fSHong Zhang LNKLISTCLEAR(bn,lnk_init,cnzi,lnk,current_space->array); 354c5db241fSHong Zhang current_space->array += cnzi; 35558c24d83SHong Zhang 35658c24d83SHong Zhang current_space->local_used += cnzi; 35758c24d83SHong Zhang current_space->local_remaining -= cnzi; 35858c24d83SHong Zhang 35958c24d83SHong Zhang ci[i+1] = ci[i] + cnzi; 36058c24d83SHong Zhang } 36158c24d83SHong Zhang 36258c24d83SHong Zhang /* Column indices are in the list of free space */ 36358c24d83SHong Zhang /* Allocate space for cj, initialize cj, and */ 36458c24d83SHong Zhang /* destroy list of free space and other temporary array(s) */ 36558c24d83SHong Zhang ierr = PetscMalloc((ci[am]+1)*sizeof(int),&cj);CHKERRQ(ierr); 36658c24d83SHong Zhang ierr = MakeSpaceContiguous(&free_space,cj);CHKERRQ(ierr); 36758c24d83SHong Zhang ierr = PetscFree(lnk);CHKERRQ(ierr); 36858c24d83SHong Zhang 36958c24d83SHong Zhang /* Allocate space for ca */ 37058c24d83SHong Zhang ierr = PetscMalloc((ci[am]+1)*sizeof(MatScalar),&ca);CHKERRQ(ierr); 37158c24d83SHong Zhang ierr = PetscMemzero(ca,(ci[am]+1)*sizeof(MatScalar));CHKERRQ(ierr); 37258c24d83SHong Zhang 373*26be0446SHong Zhang /* put together the new symbolic matrix */ 37458c24d83SHong Zhang ierr = MatCreateSeqAIJWithArrays(A->comm,am,bn,ci,cj,ca,C);CHKERRQ(ierr); 37558c24d83SHong Zhang 37658c24d83SHong Zhang /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 37758c24d83SHong Zhang /* These are PETSc arrays, so change flags so arrays can be deleted by PETSc */ 37858c24d83SHong Zhang c = (Mat_SeqAIJ *)((*C)->data); 37958c24d83SHong Zhang c->freedata = PETSC_TRUE; 38058c24d83SHong Zhang c->nonew = 0; 38158c24d83SHong Zhang 38258c24d83SHong Zhang ierr = PetscLogEventEnd(logkey_matmatmult_symbolic,A,B,0,0);CHKERRQ(ierr); 383c5db241fSHong Zhang PetscLogInfo((PetscObject)(*C),"Number of calls to GetMoreSpace(): %d\n",nspacedouble); 38458c24d83SHong Zhang PetscFunctionReturn(0); 38558c24d83SHong Zhang } 386d50806bdSBarry Smith 387c1f4806aSKris Buschelman #undef __FUNCT__ 388c1f4806aSKris Buschelman #define __FUNCT__ "MatMatMultNumeric" 3895c66b693SKris Buschelman /*@ 3905c66b693SKris Buschelman MatMatMultNumeric - Performs the numeric matrix-matrix product. 3915c66b693SKris Buschelman Call this routine after first calling MatMatMultSymbolic(). 3925c66b693SKris Buschelman 3935c66b693SKris Buschelman Collective on Mat 3945c66b693SKris Buschelman 3955c66b693SKris Buschelman Input Parameters: 3965c66b693SKris Buschelman + A - the left matrix 3975c66b693SKris Buschelman - B - the right matrix 3985c66b693SKris Buschelman 3995c66b693SKris Buschelman Output Parameters: 4005c66b693SKris Buschelman . C - the product matrix, whose ij structure was defined from MatMatMultSymbolic(). 4015c66b693SKris Buschelman 4025c66b693SKris Buschelman Notes: 4035c66b693SKris Buschelman C must have been created with MatMatMultSymbolic. 4045c66b693SKris Buschelman 4055c66b693SKris Buschelman This routine is currently only implemented for SeqAIJ type matrices. 4065c66b693SKris Buschelman 4075c66b693SKris Buschelman Level: intermediate 4085c66b693SKris Buschelman 4095c66b693SKris Buschelman .seealso: MatMatMult(),MatMatMultSymbolic() 4105c66b693SKris Buschelman @*/ 4115c66b693SKris Buschelman int MatMatMultNumeric(Mat A,Mat B,Mat C){ 4125c66b693SKris Buschelman /* Perhaps this "interface" routine should be moved into the interface directory.*/ 4135c66b693SKris Buschelman /* To facilitate implementations with varying types, QueryFunction is used.*/ 4145c66b693SKris Buschelman /* It is assumed that implementations will be composed as "MatMatMultNumeric_<type of A><type of B>". */ 4155c66b693SKris Buschelman int ierr; 4164d3841fdSKris Buschelman char numfunct[80]; 4175c66b693SKris Buschelman int (*numeric)(Mat,Mat,Mat); 4185c66b693SKris Buschelman 4195c66b693SKris Buschelman PetscFunctionBegin; 4205c66b693SKris Buschelman 4214482741eSBarry Smith PetscValidHeaderSpecific(A,MAT_COOKIE,1); 422c9780b6fSBarry Smith PetscValidType(A,1); 4235c66b693SKris Buschelman MatPreallocated(A); 4245c66b693SKris Buschelman if (!A->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix"); 4255c66b693SKris Buschelman if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 4265c66b693SKris Buschelman 4274482741eSBarry Smith PetscValidHeaderSpecific(B,MAT_COOKIE,2); 428c9780b6fSBarry Smith PetscValidType(B,2); 4295c66b693SKris Buschelman MatPreallocated(B); 4305c66b693SKris Buschelman if (!B->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix"); 4315c66b693SKris Buschelman if (B->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 4325c66b693SKris Buschelman 4334482741eSBarry Smith PetscValidHeaderSpecific(C,MAT_COOKIE,3); 434c9780b6fSBarry Smith PetscValidType(C,3); 4355c66b693SKris Buschelman MatPreallocated(C); 4365c66b693SKris Buschelman if (!C->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix"); 4375c66b693SKris Buschelman if (C->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 4385c66b693SKris Buschelman 4395c66b693SKris Buschelman if (B->N!=C->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",B->N,C->N); 4405c66b693SKris Buschelman if (B->M!=A->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",B->M,A->N); 4415c66b693SKris Buschelman if (A->M!=C->M) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",A->M,C->M); 4425c66b693SKris Buschelman 4434d3841fdSKris Buschelman /* Currently only _seqaijseqaij is implemented, so just query for it in A and B. */ 4444d3841fdSKris Buschelman /* When other implementations exist, attack the multiple dispatch problem. */ 4454d3841fdSKris Buschelman ierr = PetscStrcpy(numfunct,"MatMatMultNumeric_seqaijseqaij");CHKERRQ(ierr); 4464d3841fdSKris Buschelman ierr = PetscObjectQueryFunction((PetscObject)A,numfunct,(PetscVoidFunction)&numeric);CHKERRQ(ierr); 4474d3841fdSKris Buschelman if (!numeric) SETERRQ1(PETSC_ERR_SUP,"C=A*B not implemented for A of type %s",A->type_name); 4484d3841fdSKris Buschelman ierr = PetscObjectQueryFunction((PetscObject)B,numfunct,(PetscVoidFunction)&numeric);CHKERRQ(ierr); 4494d3841fdSKris Buschelman if (!numeric) SETERRQ1(PETSC_ERR_SUP,"C=A*B not implemented for B of type %s",B->type_name); 4504d3841fdSKris Buschelman 4515c66b693SKris Buschelman ierr = (*numeric)(A,B,C);CHKERRQ(ierr); 4525c66b693SKris Buschelman 4535c66b693SKris Buschelman PetscFunctionReturn(0); 4545c66b693SKris Buschelman } 4555c66b693SKris Buschelman 456d50806bdSBarry Smith #undef __FUNCT__ 457*26be0446SHong Zhang #define __FUNCT__ "MatMatMultNumeric_MPIAIJ_MPIAIJ" 458*26be0446SHong Zhang int MatMatMultNumeric_MPIAIJ_MPIAIJ(Mat A,Mat B,Mat C) 459*26be0446SHong Zhang { 460*26be0446SHong Zhang PetscFunctionBegin; 461*26be0446SHong Zhang PetscFunctionReturn(0); 462*26be0446SHong Zhang } 463*26be0446SHong Zhang 464*26be0446SHong Zhang #undef __FUNCT__ 465*26be0446SHong Zhang #define __FUNCT__ "MatMatMultNumeric_SeqAIJ_SeqAIJ" 466*26be0446SHong Zhang int MatMatMultNumeric_SeqAIJ_SeqAIJ(Mat A,Mat B,Mat C) 467d50806bdSBarry Smith { 46894e3eecaSKris Buschelman int ierr,flops=0; 469d50806bdSBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 470d50806bdSBarry Smith Mat_SeqAIJ *b = (Mat_SeqAIJ *)B->data; 471d50806bdSBarry Smith Mat_SeqAIJ *c = (Mat_SeqAIJ *)C->data; 472d50806bdSBarry Smith int *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*bjj,*ci=c->i,*cj=c->j; 4735c66b693SKris Buschelman int am=A->M,cn=C->N; 47494e3eecaSKris Buschelman int i,j,k,anzi,bnzi,cnzi,brow; 475d50806bdSBarry Smith MatScalar *aa=a->a,*ba=b->a,*baj,*ca=c->a,*temp; 476d50806bdSBarry Smith 477d50806bdSBarry Smith PetscFunctionBegin; 478d50806bdSBarry Smith 4795c66b693SKris Buschelman /* Start timers */ 480d50806bdSBarry Smith ierr = PetscLogEventBegin(logkey_matmatmult_numeric,A,B,C,0);CHKERRQ(ierr); 48194e3eecaSKris Buschelman 482d50806bdSBarry Smith /* Allocate temp accumulation space to avoid searching for nonzero columns in C */ 483d50806bdSBarry Smith ierr = PetscMalloc((cn+1)*sizeof(MatScalar),&temp);CHKERRQ(ierr); 484d50806bdSBarry Smith ierr = PetscMemzero(temp,cn*sizeof(MatScalar));CHKERRQ(ierr); 485d50806bdSBarry Smith /* Traverse A row-wise. */ 486d50806bdSBarry Smith /* Build the ith row in C by summing over nonzero columns in A, */ 487d50806bdSBarry Smith /* the rows of B corresponding to nonzeros of A. */ 488d50806bdSBarry Smith for (i=0;i<am;i++) { 489d50806bdSBarry Smith anzi = ai[i+1] - ai[i]; 490d50806bdSBarry Smith for (j=0;j<anzi;j++) { 491d50806bdSBarry Smith brow = *aj++; 492d50806bdSBarry Smith bnzi = bi[brow+1] - bi[brow]; 493d50806bdSBarry Smith bjj = bj + bi[brow]; 494d50806bdSBarry Smith baj = ba + bi[brow]; 495d50806bdSBarry Smith for (k=0;k<bnzi;k++) { 496d50806bdSBarry Smith temp[bjj[k]] += (*aa)*baj[k]; 497d50806bdSBarry Smith } 498d50806bdSBarry Smith flops += 2*bnzi; 499d50806bdSBarry Smith aa++; 500d50806bdSBarry Smith } 501d50806bdSBarry Smith /* Store row back into C, and re-zero temp */ 502d50806bdSBarry Smith cnzi = ci[i+1] - ci[i]; 503d50806bdSBarry Smith for (j=0;j<cnzi;j++) { 504d50806bdSBarry Smith ca[j] = temp[cj[j]]; 505d50806bdSBarry Smith temp[cj[j]] = 0.0; 506d50806bdSBarry Smith } 507d50806bdSBarry Smith ca += cnzi; 508d50806bdSBarry Smith cj += cnzi; 509d50806bdSBarry Smith } 510716bacf3SKris Buschelman ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 511716bacf3SKris Buschelman ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 512716bacf3SKris Buschelman 513d50806bdSBarry Smith /* Free temp */ 514d50806bdSBarry Smith ierr = PetscFree(temp);CHKERRQ(ierr); 515d50806bdSBarry Smith ierr = PetscLogFlops(flops);CHKERRQ(ierr); 516d50806bdSBarry Smith ierr = PetscLogEventEnd(logkey_matmatmult_numeric,A,B,C,0);CHKERRQ(ierr); 517d50806bdSBarry Smith PetscFunctionReturn(0); 518d50806bdSBarry Smith } 519d50806bdSBarry Smith 520d50806bdSBarry Smith #undef __FUNCT__ 5215c66b693SKris Buschelman #define __FUNCT__ "RegisterMatMatMultRoutines_Private" 5225c66b693SKris Buschelman int RegisterMatMatMultRoutines_Private(Mat A) { 523d50806bdSBarry Smith int ierr; 524d50806bdSBarry Smith 525d50806bdSBarry Smith PetscFunctionBegin; 5265c66b693SKris Buschelman if (!logkey_matmatmult_symbolic) { 527*26be0446SHong Zhang ierr = PetscLogEventRegister(&logkey_matmatmult_symbolic,"MatMatMultSymbolic",MAT_COOKIE);CHKERRQ(ierr); 528d50806bdSBarry Smith } 529*26be0446SHong Zhang /* 5305c66b693SKris Buschelman ierr = PetscObjectComposeFunctionDynamic((PetscObject)A,"MatMatMultSymbolic_seqaijseqaij", 531*26be0446SHong Zhang "MatMatMultSymbolic_SeqAIJ_SeqAIJ", 532*26be0446SHong Zhang MatMatMultSymbolic_SeqAIJ_SeqAIJ);CHKERRQ(ierr);*/ 5335c66b693SKris Buschelman if (!logkey_matmatmult_numeric) { 534*26be0446SHong Zhang ierr = PetscLogEventRegister(&logkey_matmatmult_numeric,"MatMatMultNumeric",MAT_COOKIE);CHKERRQ(ierr); 53594e3eecaSKris Buschelman } 536*26be0446SHong Zhang /* 5375c66b693SKris Buschelman ierr = PetscObjectComposeFunctionDynamic((PetscObject)A,"MatMatMultNumeric_seqaijseqaij", 538*26be0446SHong Zhang "MatMatMultNumeric_SeqAIJ_SeqAIJ", 539*26be0446SHong Zhang MatMatMultNumeric_SeqAIJ_SeqAIJ);CHKERRQ(ierr);*/ 54094e3eecaSKris Buschelman PetscFunctionReturn(0); 54194e3eecaSKris Buschelman } 542