1d50806bdSBarry Smith /* 22c9ce0e5SKris Buschelman Defines matrix-matrix product routines for pairs of SeqAIJ matrices 3d50806bdSBarry Smith C = A * B 4d50806bdSBarry Smith */ 5d50806bdSBarry Smith 6c1f4806aSKris Buschelman #include "src/mat/impls/aij/seq/aij.h" /*I "petscmat.h" I*/ 770f19b1fSKris Buschelman #include "src/mat/utils/freespace.h" 82d09714cSHong Zhang #include "src/mat/impls/aij/mpi/mpiaij.h" 9d50806bdSBarry Smith 101c239cc6SHong Zhang /* 111c239cc6SHong Zhang Add a index set into a sorted linked list 121c239cc6SHong Zhang input: 131c239cc6SHong Zhang nidx - number of input indices 141c239cc6SHong Zhang indices - interger array 151c239cc6SHong Zhang idx_head - the header of the list 161c239cc6SHong Zhang idx_unset - the value indicating the entry in the list is not set yet 171c239cc6SHong Zhang lnk - linked list(an integer array) that is created 181c239cc6SHong Zhang output: 191c239cc6SHong Zhang nlnk - number of newly added indices 201c239cc6SHong Zhang lnk - the sorted(increasing order) linked list containing new and non-redundate entries from indices 211c239cc6SHong Zhang */ 221c239cc6SHong Zhang #undef __FUNCT__ 231c239cc6SHong Zhang #define __FUNCT__ "LnklistAdd" 241c239cc6SHong Zhang int LnklistAdd(int nidx,int *indices,int idx_head,int idx_unset,int *nlnk,int *lnk) 251c239cc6SHong Zhang { 261c239cc6SHong Zhang int i,idx,lidx,entry,n; 271c239cc6SHong Zhang 281c239cc6SHong Zhang PetscFunctionBegin; 291c239cc6SHong Zhang n = 0; 301c239cc6SHong Zhang lidx = idx_head; 311c239cc6SHong Zhang i = nidx; 321c239cc6SHong Zhang while (i){ 332d09714cSHong Zhang /* assume indices are almost in increasing order, starting from its end saves computation */ 341c239cc6SHong Zhang entry = indices[--i]; 351c239cc6SHong Zhang if (lnk[entry] == idx_unset) { /* new entry */ 361c239cc6SHong Zhang do { 371c239cc6SHong Zhang idx = lidx; 381c239cc6SHong Zhang lidx = lnk[idx]; 391c239cc6SHong Zhang } while (entry > lidx); 401c239cc6SHong Zhang lnk[idx] = entry; 411c239cc6SHong Zhang lnk[entry] = lidx; 421c239cc6SHong Zhang n++; 431c239cc6SHong Zhang } 441c239cc6SHong Zhang } 451c239cc6SHong Zhang *nlnk = n; 461c239cc6SHong Zhang PetscFunctionReturn(0); 471c239cc6SHong Zhang } 481c239cc6SHong Zhang 491c239cc6SHong Zhang 502216b3a4SKris Buschelman static int logkey_matmatmult = 0; 512216b3a4SKris Buschelman static int logkey_matmatmult_symbolic = 0; 522216b3a4SKris Buschelman static int logkey_matmatmult_numeric = 0; 532216b3a4SKris Buschelman 54c1f4806aSKris Buschelman #undef __FUNCT__ 55c1f4806aSKris Buschelman #define __FUNCT__ "MatMatMult" 565c66b693SKris Buschelman /*@ 575c66b693SKris Buschelman MatMatMult - Performs Matrix-Matrix Multiplication C=A*B. 5894e3eecaSKris Buschelman 595c66b693SKris Buschelman Collective on Mat 60d50806bdSBarry Smith 615c66b693SKris Buschelman Input Parameters: 625c66b693SKris Buschelman + A - the left matrix 63*1c741599SHong Zhang . B - the right matrix 64*1c741599SHong Zhang . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 65*1c741599SHong Zhang - fill - expected fill as ratio of nonzeros in product matrix/nonzeros in original matrix 665c66b693SKris Buschelman 675c66b693SKris Buschelman Output Parameters: 685c66b693SKris Buschelman . C - the product matrix 695c66b693SKris Buschelman 705c66b693SKris Buschelman Notes: 715c66b693SKris Buschelman C will be created and must be destroyed by the user with MatDestroy(). 725c66b693SKris Buschelman 734d3841fdSKris Buschelman This routine is currently only implemented for pairs of SeqAIJ matrices and classes 744d3841fdSKris Buschelman which inherit from SeqAIJ. C will be of type MATSEQAIJ. 755c66b693SKris Buschelman 765c66b693SKris Buschelman Level: intermediate 775c66b693SKris Buschelman 785c66b693SKris Buschelman .seealso: MatMatMultSymbolic(),MatMatMultNumeric() 795c66b693SKris Buschelman @*/ 80*1c741599SHong Zhang int MatMatMult(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C) 812d09714cSHong Zhang { 82d50806bdSBarry Smith int ierr; 832d09714cSHong Zhang 84d50806bdSBarry Smith PetscFunctionBegin; 854482741eSBarry Smith PetscValidHeaderSpecific(A,MAT_COOKIE,1); 86c9780b6fSBarry Smith PetscValidType(A,1); 875c66b693SKris Buschelman MatPreallocated(A); 885c66b693SKris Buschelman if (!A->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix"); 895c66b693SKris Buschelman if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 90d50806bdSBarry Smith 914482741eSBarry Smith PetscValidHeaderSpecific(B,MAT_COOKIE,2); 92c9780b6fSBarry Smith PetscValidType(B,2); 935c66b693SKris Buschelman MatPreallocated(B); 945c66b693SKris Buschelman if (!B->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix"); 955c66b693SKris Buschelman if (B->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 96d50806bdSBarry Smith 974482741eSBarry Smith PetscValidPointer(C,3); 984482741eSBarry Smith 995c66b693SKris Buschelman if (B->M!=A->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",B->M,A->N); 100d50806bdSBarry Smith 1016284ec50SHong Zhang ierr = PetscLogEventBegin(MAT_MatMult,A,B,0,0);CHKERRQ(ierr); 102*1c741599SHong Zhang ierr = (*A->ops->matmult)(A,B,scall,fill,C);CHKERRQ(ierr); 1036284ec50SHong Zhang ierr = PetscLogEventEnd(MAT_MatMult,A,B,0,0);CHKERRQ(ierr); 1044d3841fdSKris Buschelman 1056284ec50SHong Zhang PetscFunctionReturn(0); 1066284ec50SHong Zhang } 1076284ec50SHong Zhang 1086284ec50SHong Zhang #undef __FUNCT__ 1096284ec50SHong Zhang #define __FUNCT__ "MatMatMult_MPIAIJ_MPIAIJ" 110*1c741599SHong Zhang int MatMatMult_MPIAIJ_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill, Mat *C) 1112d09714cSHong Zhang { 1122d09714cSHong Zhang Mat *aseq,*bseq,A_seq=PETSC_NULL,B_seq=PETSC_NULL,*cseq,C_seq,C_mpi; 1132d09714cSHong Zhang Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1142d09714cSHong Zhang Mat_SeqAIJ *c; 1152d09714cSHong Zhang MatScalar *ca; 1162d09714cSHong Zhang int ierr,*idx,i,start,end,ncols,imark,nzA,nzB,*cmap,*ci,*cj,grow,*d_nnz,*o_nnz; 1172d09714cSHong Zhang IS isrow,iscol; 1186284ec50SHong Zhang 1196284ec50SHong Zhang PetscFunctionBegin; 1202d09714cSHong Zhang /* create a seq matrix B_seq = submatrix of B by taking rows of B that equal to nonzero col of A */ 1212d09714cSHong Zhang start = a->cstart; 1222d09714cSHong Zhang cmap = a->garray; 1232d09714cSHong Zhang nzA = a->A->n; 1242d09714cSHong Zhang nzB = a->B->n; 1252d09714cSHong Zhang ierr = PetscMalloc((nzA+nzB)*sizeof(int), &idx);CHKERRQ(ierr); 1262d09714cSHong Zhang ncols = 0; 1272d09714cSHong Zhang for (i=0; i<nzB; i++) { 1282d09714cSHong Zhang if (cmap[i] < start) idx[ncols++] = cmap[i]; 1292d09714cSHong Zhang else break; 1302d09714cSHong Zhang } 1312d09714cSHong Zhang imark = i; 1322d09714cSHong Zhang for (i=0; i<nzA; i++) idx[ncols++] = start + i; 1332d09714cSHong Zhang for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; 1342d09714cSHong Zhang ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,&isrow);CHKERRQ(ierr); /* isrow of B = iscol of A! */ 1352d09714cSHong Zhang ierr = PetscFree(idx);CHKERRQ(ierr); 1362d09714cSHong Zhang ierr = ISCreateStride(PETSC_COMM_SELF,B->N,0,1,&iscol);CHKERRQ(ierr); 137*1c741599SHong Zhang ierr = MatGetSubMatrices(B,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&bseq);CHKERRQ(ierr);/* reuse */ 1382d09714cSHong Zhang ierr = ISDestroy(iscol);CHKERRQ(ierr); 1392d09714cSHong Zhang B_seq = bseq[0]; 1406284ec50SHong Zhang 1412d09714cSHong Zhang /* create a seq matrix A_seq = submatrix of A by taking all local rows of A */ 1422d09714cSHong Zhang start = a->rstart; end = a->rend; 1432d09714cSHong Zhang ierr = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&iscol);CHKERRQ(ierr); /* isrow of A = iscol */ 144*1c741599SHong Zhang ierr = MatGetSubMatrices(A,1,&iscol,&isrow,MAT_INITIAL_MATRIX,&aseq);CHKERRQ(ierr); /* reuse */ 1452d09714cSHong Zhang ierr = ISDestroy(isrow);CHKERRQ(ierr); 1462d09714cSHong Zhang ierr = ISDestroy(iscol);CHKERRQ(ierr); 1472d09714cSHong Zhang A_seq = aseq[0]; 1482d09714cSHong Zhang 1492d09714cSHong Zhang /* compute C_seq = A_seq * B_seq */ 150*1c741599SHong Zhang ierr = MatMatMult_SeqAIJ_SeqAIJ(A_seq, B_seq, scall, fill,&C_seq);CHKERRQ(ierr); 1512d09714cSHong Zhang /* 1522d09714cSHong Zhang int rank; 1532d09714cSHong Zhang ierr = MPI_Comm_rank(A->comm,&rank);CHKERRQ(ierr); 1542d09714cSHong Zhang ierr = PetscPrintf(PETSC_COMM_SELF," [%d] A_seq: %d, %d; B_seq: %d, %d; C_seq: %d, %d;\n",rank,A_seq->m,A_seq->n,B_seq->m,B_seq->n,C_seq->m,C_seq->n); 1552d09714cSHong Zhang */ 1562d09714cSHong Zhang ierr = MatDestroyMatrices(1,&aseq);CHKERRQ(ierr); 1572d09714cSHong Zhang ierr = MatDestroyMatrices(1,&bseq);CHKERRQ(ierr); 1582d09714cSHong Zhang 1592d09714cSHong Zhang /* create a mpi matrix C_mpi that has C_seq as its local entries */ 1602d09714cSHong Zhang ierr = MatCreate(A->comm,C_seq->m,PETSC_DECIDE,PETSC_DECIDE,C_seq->N,&C_mpi);CHKERRQ(ierr); 1612d09714cSHong Zhang ierr = MatSetType(C_mpi,MATMPIAIJ);CHKERRQ(ierr); 1622d09714cSHong Zhang 1632d09714cSHong Zhang c = (Mat_SeqAIJ*)C_seq->data; 1642d09714cSHong Zhang ci = c->i; cj = c->j; ca = c->a; 1652d09714cSHong Zhang ierr = PetscMalloc((2*C_seq->m+1)*sizeof(int),&d_nnz);CHKERRQ(ierr); 1662d09714cSHong Zhang o_nnz = d_nnz + C_seq->m; 1672d09714cSHong Zhang nzA = end-start; /* max nonezero cols of the local diagonal part of C_mpi */ 1682d09714cSHong Zhang nzB = C_seq->n-nzA; /* max nonezero cols of the local off-diagonal part of C_mpi */ 1692d09714cSHong Zhang for (i=0; i< C_seq->m; i++){ 1702d09714cSHong Zhang ncols = ci[i+1] - ci[i]; 1712d09714cSHong Zhang d_nnz[i] = PetscMin(ncols,nzA); 1722d09714cSHong Zhang o_nnz[i] = PetscMin(ncols,nzB); 1732d09714cSHong Zhang } 1742d09714cSHong Zhang ierr = MatMPIAIJSetPreallocation(C_mpi,PETSC_DECIDE,d_nnz,PETSC_DECIDE,o_nnz);CHKERRQ(ierr); 1752d09714cSHong Zhang ierr = PetscFree(d_nnz);CHKERRQ(ierr); 1762d09714cSHong Zhang 1772d09714cSHong Zhang /* set row values of C_mpi */ 1782d09714cSHong Zhang for (i=0; i< C_seq->m; i++){ 1792d09714cSHong Zhang grow = start + i; 1802d09714cSHong Zhang ncols = ci[i+1] - ci[i]; 181*1c741599SHong Zhang ierr = MatSetValues_MPIAIJ(C_mpi,1,&grow,ncols,cj+ci[i],ca+ci[i],INSERT_VALUES);CHKERRQ(ierr); 1822d09714cSHong Zhang } 1832d09714cSHong Zhang ierr = MatAssemblyBegin(C_mpi,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1842d09714cSHong Zhang ierr = MatAssemblyEnd(C_mpi,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1852d09714cSHong Zhang ierr = MatDestroy(C_seq);CHKERRQ(ierr); 1862d09714cSHong Zhang 1872d09714cSHong Zhang *C = C_mpi; 188d50806bdSBarry Smith PetscFunctionReturn(0); 189d50806bdSBarry Smith } 1905c66b693SKris Buschelman 1915c66b693SKris Buschelman #undef __FUNCT__ 1925c66b693SKris Buschelman #define __FUNCT__ "MatMatMult_SeqAIJ_SeqAIJ" 193*1c741599SHong Zhang int MatMatMult_SeqAIJ_SeqAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C) { 1945c66b693SKris Buschelman int ierr; 1954d3841fdSKris Buschelman char symfunct[80],numfunct[80]; 1965c66b693SKris Buschelman int (*symbolic)(Mat,Mat,Mat*),(*numeric)(Mat,Mat,Mat); 1975c66b693SKris Buschelman 1985c66b693SKris Buschelman PetscFunctionBegin; 1994d3841fdSKris Buschelman /* Currently only _seqaijseqaij is implemented, so just query for it in A and B. */ 2004d3841fdSKris Buschelman /* When other implementations exist, attack the multiple dispatch problem. */ 2014d3841fdSKris Buschelman ierr = PetscStrcpy(symfunct,"MatMatMultSymbolic_seqaijseqaij");CHKERRQ(ierr); 2024d3841fdSKris Buschelman ierr = PetscObjectQueryFunction((PetscObject)B,symfunct,(PetscVoidFunction)&symbolic);CHKERRQ(ierr); 2034d3841fdSKris Buschelman if (!symbolic) SETERRQ1(PETSC_ERR_SUP,"C=A*B not implemented for B of type %s",B->type_name); 2045c66b693SKris Buschelman ierr = PetscObjectQueryFunction((PetscObject)A,symfunct,(PetscVoidFunction)&symbolic);CHKERRQ(ierr); 2054d3841fdSKris Buschelman if (!symbolic) SETERRQ1(PETSC_ERR_SUP,"C=A*B not implemented for A of type %s",A->type_name); 2064d3841fdSKris Buschelman 2074d3841fdSKris Buschelman ierr = PetscStrcpy(numfunct,"MatMatMultNumeric_seqaijseqaij");CHKERRQ(ierr); 2085c66b693SKris Buschelman ierr = PetscObjectQueryFunction((PetscObject)A,numfunct,(PetscVoidFunction)&numeric);CHKERRQ(ierr); 2094d3841fdSKris Buschelman if (!numeric) SETERRQ1(PETSC_ERR_SUP,"C=A*B not implemented for A of type %s",A->type_name); 2104d3841fdSKris Buschelman ierr = PetscObjectQueryFunction((PetscObject)B,numfunct,(PetscVoidFunction)&numeric);CHKERRQ(ierr); 2114d3841fdSKris Buschelman if (!numeric) SETERRQ1(PETSC_ERR_SUP,"C=A*B not implemented for B of type %s",B->type_name); 2124d3841fdSKris Buschelman 2135c66b693SKris Buschelman ierr = PetscLogEventBegin(logkey_matmatmult,A,B,0,0);CHKERRQ(ierr); 2145c66b693SKris Buschelman ierr = (*symbolic)(A,B,C);CHKERRQ(ierr); 2155c66b693SKris Buschelman ierr = (*numeric)(A,B,*C);CHKERRQ(ierr); 2165c66b693SKris Buschelman ierr = PetscLogEventEnd(logkey_matmatmult,A,B,0,0);CHKERRQ(ierr); 2175c66b693SKris Buschelman PetscFunctionReturn(0); 2185c66b693SKris Buschelman } 2195c66b693SKris Buschelman 220c1f4806aSKris Buschelman #undef __FUNCT__ 221c1f4806aSKris Buschelman #define __FUNCT__ "MatMatMultSymbolic" 2225c66b693SKris Buschelman /*@ 2235c66b693SKris Buschelman MatMatMultSymbolic - Performs construction, preallocation, and computes the ij structure 2245c66b693SKris Buschelman of the matrix-matrix product C=A*B. Call this routine before calling MatMatMultNumeric(). 2255c66b693SKris Buschelman 2265c66b693SKris Buschelman Collective on Mat 2275c66b693SKris Buschelman 2285c66b693SKris Buschelman Input Parameters: 2295c66b693SKris Buschelman + A - the left matrix 2305c66b693SKris Buschelman - B - the right matrix 2315c66b693SKris Buschelman 2325c66b693SKris Buschelman Output Parameters: 2335c66b693SKris Buschelman . C - the matrix containing the ij structure of product matrix 2345c66b693SKris Buschelman 2355c66b693SKris Buschelman Notes: 2364d3841fdSKris Buschelman C will be created as a MATSEQAIJ matrix and must be destroyed by the user with MatDestroy(). 2375c66b693SKris Buschelman 2384d3841fdSKris Buschelman This routine is currently only implemented for SeqAIJ matrices and classes which inherit from SeqAIJ. 2395c66b693SKris Buschelman 2405c66b693SKris Buschelman Level: intermediate 2415c66b693SKris Buschelman 2425c66b693SKris Buschelman .seealso: MatMatMult(),MatMatMultNumeric() 2435c66b693SKris Buschelman @*/ 2445c66b693SKris Buschelman int MatMatMultSymbolic(Mat A,Mat B,Mat *C) { 2455c66b693SKris Buschelman /* Perhaps this "interface" routine should be moved into the interface directory.*/ 2465c66b693SKris Buschelman /* To facilitate implementations with varying types, QueryFunction is used.*/ 2475c66b693SKris Buschelman /* It is assumed that implementations will be composed as "MatMatMultSymbolic_<type of A><type of B>". */ 2485c66b693SKris Buschelman int ierr; 2494d3841fdSKris Buschelman char symfunct[80]; 2505c66b693SKris Buschelman int (*symbolic)(Mat,Mat,Mat *); 2515c66b693SKris Buschelman 2525c66b693SKris Buschelman PetscFunctionBegin; 2534482741eSBarry Smith PetscValidHeaderSpecific(A,MAT_COOKIE,1); 254c9780b6fSBarry Smith PetscValidType(A,1); 2555c66b693SKris Buschelman MatPreallocated(A); 2565c66b693SKris Buschelman if (!A->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix"); 2575c66b693SKris Buschelman if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 2585c66b693SKris Buschelman 2594482741eSBarry Smith PetscValidHeaderSpecific(B,MAT_COOKIE,2); 260c9780b6fSBarry Smith PetscValidType(B,2); 2615c66b693SKris Buschelman MatPreallocated(B); 2625c66b693SKris Buschelman if (!B->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix"); 2635c66b693SKris Buschelman if (B->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 2644482741eSBarry Smith PetscValidPointer(C,3); 2654482741eSBarry Smith 2665c66b693SKris Buschelman 2675c66b693SKris Buschelman if (B->M!=A->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",B->M,A->N); 2685c66b693SKris Buschelman 2694d3841fdSKris Buschelman /* Currently only _seqaijseqaij is implemented, so just query for it in A and B. */ 2704d3841fdSKris Buschelman /* When other implementations exist, attack the multiple dispatch problem. */ 2714d3841fdSKris Buschelman ierr = PetscStrcpy(symfunct,"MatMatMultSymbolic_seqaijseqaij");CHKERRQ(ierr); 2724d3841fdSKris Buschelman ierr = PetscObjectQueryFunction((PetscObject)B,symfunct,(PetscVoidFunction)&symbolic);CHKERRQ(ierr); 2734d3841fdSKris Buschelman if (!symbolic) SETERRQ1(PETSC_ERR_SUP,"C=A*B not implemented for B of type %s",B->type_name); 2744d3841fdSKris Buschelman ierr = PetscObjectQueryFunction((PetscObject)A,symfunct,(PetscVoidFunction)&symbolic);CHKERRQ(ierr); 2754d3841fdSKris Buschelman if (!symbolic) SETERRQ1(PETSC_ERR_SUP,"C=A*B not implemented for A of type %s",A->type_name); 2765c66b693SKris Buschelman ierr = (*symbolic)(A,B,C);CHKERRQ(ierr); 2775c66b693SKris Buschelman 2785c66b693SKris Buschelman PetscFunctionReturn(0); 2795c66b693SKris Buschelman } 28058c24d83SHong Zhang 28158c24d83SHong Zhang #undef __FUNCT__ 28258c24d83SHong Zhang #define __FUNCT__ "MatMatMult_Symbolic_SeqAIJ_SeqAIJ" 28358c24d83SHong Zhang int MatMatMult_Symbolic_SeqAIJ_SeqAIJ(Mat A,Mat B,Mat *C) 28458c24d83SHong Zhang { 28558c24d83SHong Zhang int ierr; 28658c24d83SHong Zhang FreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 28758c24d83SHong Zhang Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data,*c; 28858c24d83SHong Zhang int *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*bjj; 2891c239cc6SHong Zhang int *ci,*cj,*lnk,idx0,idx; 2905c66b693SKris Buschelman int am=A->M,bn=B->N,bm=B->M; 291bf812060SBarry Smith int i,j,anzi,brow,bnzj,cnzi,nlnk; 29258c24d83SHong Zhang MatScalar *ca; 29358c24d83SHong Zhang 29458c24d83SHong Zhang PetscFunctionBegin; 2955c66b693SKris Buschelman /* Start timers */ 29658c24d83SHong Zhang ierr = PetscLogEventBegin(logkey_matmatmult_symbolic,A,B,0,0);CHKERRQ(ierr); 29758c24d83SHong Zhang /* Set up */ 29858c24d83SHong Zhang /* Allocate ci array, arrays for fill computation and */ 29958c24d83SHong Zhang /* free space for accumulating nonzero column info */ 30058c24d83SHong Zhang ierr = PetscMalloc(((am+1)+1)*sizeof(int),&ci);CHKERRQ(ierr); 30158c24d83SHong Zhang ci[0] = 0; 30258c24d83SHong Zhang 30358c24d83SHong Zhang ierr = PetscMalloc((bn+1)*sizeof(int),&lnk);CHKERRQ(ierr); 30458c24d83SHong Zhang for (i=0; i<bn; i++) lnk[i] = -1; 30558c24d83SHong Zhang 30658c24d83SHong Zhang /* Initial FreeSpace size is nnz(B)=4*bi[bm] */ 30758c24d83SHong Zhang ierr = GetMoreSpace(4*bi[bm],&free_space);CHKERRQ(ierr); 30858c24d83SHong Zhang current_space = free_space; 30958c24d83SHong Zhang 31058c24d83SHong Zhang /* Determine symbolic info for each row of the product: */ 31158c24d83SHong Zhang for (i=0;i<am;i++) { 31258c24d83SHong Zhang anzi = ai[i+1] - ai[i]; 31358c24d83SHong Zhang cnzi = 0; 31458c24d83SHong Zhang lnk[bn] = bn; 3152d09714cSHong Zhang j = anzi; 3162d09714cSHong Zhang aj = a->j + ai[i]; 3172d09714cSHong Zhang while (j){/* assume cols are almost in increasing order, starting from its end saves computation */ 3182d09714cSHong Zhang j--; 3192d09714cSHong Zhang brow = *(aj + j); 32058c24d83SHong Zhang bnzj = bi[brow+1] - bi[brow]; 32158c24d83SHong Zhang bjj = bj + bi[brow]; 3221c239cc6SHong Zhang /* add non-zero cols of B into the sorted linked list lnk */ 3231c239cc6SHong Zhang ierr = LnklistAdd(bnzj,bjj,bn,-1,&nlnk,lnk);CHKERRQ(ierr); 3241c239cc6SHong Zhang cnzi += nlnk; 32558c24d83SHong Zhang } 32658c24d83SHong Zhang 32758c24d83SHong Zhang /* If free space is not available, make more free space */ 32858c24d83SHong Zhang /* Double the amount of total space in the list */ 32958c24d83SHong Zhang if (current_space->local_remaining<cnzi) { 330*1c741599SHong Zhang printf("MatMatMult_Symbolic_SeqAIJ_SeqAIJ()...%d -th row, double space ...\n",i); 33158c24d83SHong Zhang ierr = GetMoreSpace(current_space->total_array_size,¤t_space);CHKERRQ(ierr); 33258c24d83SHong Zhang } 33358c24d83SHong Zhang 33458c24d83SHong Zhang /* Copy data into free space, and zero out denserow and lnk */ 33558c24d83SHong Zhang idx = bn; 33658c24d83SHong Zhang for (j=0; j<cnzi; j++){ 33758c24d83SHong Zhang idx0 = idx; 33858c24d83SHong Zhang idx = lnk[idx0]; 33958c24d83SHong Zhang *current_space->array++ = idx; 34058c24d83SHong Zhang lnk[idx0] = -1; 34158c24d83SHong Zhang } 34258c24d83SHong Zhang lnk[idx] = -1; 34358c24d83SHong Zhang 34458c24d83SHong Zhang current_space->local_used += cnzi; 34558c24d83SHong Zhang current_space->local_remaining -= cnzi; 34658c24d83SHong Zhang 34758c24d83SHong Zhang ci[i+1] = ci[i] + cnzi; 34858c24d83SHong Zhang } 34958c24d83SHong Zhang 35058c24d83SHong Zhang /* Column indices are in the list of free space */ 35158c24d83SHong Zhang /* Allocate space for cj, initialize cj, and */ 35258c24d83SHong Zhang /* destroy list of free space and other temporary array(s) */ 35358c24d83SHong Zhang ierr = PetscMalloc((ci[am]+1)*sizeof(int),&cj);CHKERRQ(ierr); 35458c24d83SHong Zhang ierr = MakeSpaceContiguous(&free_space,cj);CHKERRQ(ierr); 35558c24d83SHong Zhang ierr = PetscFree(lnk);CHKERRQ(ierr); 35658c24d83SHong Zhang 35758c24d83SHong Zhang /* Allocate space for ca */ 35858c24d83SHong Zhang ierr = PetscMalloc((ci[am]+1)*sizeof(MatScalar),&ca);CHKERRQ(ierr); 35958c24d83SHong Zhang ierr = PetscMemzero(ca,(ci[am]+1)*sizeof(MatScalar));CHKERRQ(ierr); 36058c24d83SHong Zhang 36158c24d83SHong Zhang /* put together the new matrix */ 36258c24d83SHong Zhang ierr = MatCreateSeqAIJWithArrays(A->comm,am,bn,ci,cj,ca,C);CHKERRQ(ierr); 36358c24d83SHong Zhang 36458c24d83SHong Zhang /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 36558c24d83SHong Zhang /* These are PETSc arrays, so change flags so arrays can be deleted by PETSc */ 36658c24d83SHong Zhang c = (Mat_SeqAIJ *)((*C)->data); 36758c24d83SHong Zhang c->freedata = PETSC_TRUE; 36858c24d83SHong Zhang c->nonew = 0; 36958c24d83SHong Zhang 37058c24d83SHong Zhang ierr = PetscLogEventEnd(logkey_matmatmult_symbolic,A,B,0,0);CHKERRQ(ierr); 37158c24d83SHong Zhang PetscFunctionReturn(0); 37258c24d83SHong Zhang } 373d50806bdSBarry Smith 374c1f4806aSKris Buschelman #undef __FUNCT__ 375c1f4806aSKris Buschelman #define __FUNCT__ "MatMatMultNumeric" 3765c66b693SKris Buschelman /*@ 3775c66b693SKris Buschelman MatMatMultNumeric - Performs the numeric matrix-matrix product. 3785c66b693SKris Buschelman Call this routine after first calling MatMatMultSymbolic(). 3795c66b693SKris Buschelman 3805c66b693SKris Buschelman Collective on Mat 3815c66b693SKris Buschelman 3825c66b693SKris Buschelman Input Parameters: 3835c66b693SKris Buschelman + A - the left matrix 3845c66b693SKris Buschelman - B - the right matrix 3855c66b693SKris Buschelman 3865c66b693SKris Buschelman Output Parameters: 3875c66b693SKris Buschelman . C - the product matrix, whose ij structure was defined from MatMatMultSymbolic(). 3885c66b693SKris Buschelman 3895c66b693SKris Buschelman Notes: 3905c66b693SKris Buschelman C must have been created with MatMatMultSymbolic. 3915c66b693SKris Buschelman 3925c66b693SKris Buschelman This routine is currently only implemented for SeqAIJ type matrices. 3935c66b693SKris Buschelman 3945c66b693SKris Buschelman Level: intermediate 3955c66b693SKris Buschelman 3965c66b693SKris Buschelman .seealso: MatMatMult(),MatMatMultSymbolic() 3975c66b693SKris Buschelman @*/ 3985c66b693SKris Buschelman int MatMatMultNumeric(Mat A,Mat B,Mat C){ 3995c66b693SKris Buschelman /* Perhaps this "interface" routine should be moved into the interface directory.*/ 4005c66b693SKris Buschelman /* To facilitate implementations with varying types, QueryFunction is used.*/ 4015c66b693SKris Buschelman /* It is assumed that implementations will be composed as "MatMatMultNumeric_<type of A><type of B>". */ 4025c66b693SKris Buschelman int ierr; 4034d3841fdSKris Buschelman char numfunct[80]; 4045c66b693SKris Buschelman int (*numeric)(Mat,Mat,Mat); 4055c66b693SKris Buschelman 4065c66b693SKris Buschelman PetscFunctionBegin; 4075c66b693SKris Buschelman 4084482741eSBarry Smith PetscValidHeaderSpecific(A,MAT_COOKIE,1); 409c9780b6fSBarry Smith PetscValidType(A,1); 4105c66b693SKris Buschelman MatPreallocated(A); 4115c66b693SKris Buschelman if (!A->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix"); 4125c66b693SKris Buschelman if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 4135c66b693SKris Buschelman 4144482741eSBarry Smith PetscValidHeaderSpecific(B,MAT_COOKIE,2); 415c9780b6fSBarry Smith PetscValidType(B,2); 4165c66b693SKris Buschelman MatPreallocated(B); 4175c66b693SKris Buschelman if (!B->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix"); 4185c66b693SKris Buschelman if (B->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 4195c66b693SKris Buschelman 4204482741eSBarry Smith PetscValidHeaderSpecific(C,MAT_COOKIE,3); 421c9780b6fSBarry Smith PetscValidType(C,3); 4225c66b693SKris Buschelman MatPreallocated(C); 4235c66b693SKris Buschelman if (!C->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix"); 4245c66b693SKris Buschelman if (C->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 4255c66b693SKris Buschelman 4265c66b693SKris Buschelman if (B->N!=C->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",B->N,C->N); 4275c66b693SKris Buschelman if (B->M!=A->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",B->M,A->N); 4285c66b693SKris Buschelman if (A->M!=C->M) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",A->M,C->M); 4295c66b693SKris Buschelman 4304d3841fdSKris Buschelman /* Currently only _seqaijseqaij is implemented, so just query for it in A and B. */ 4314d3841fdSKris Buschelman /* When other implementations exist, attack the multiple dispatch problem. */ 4324d3841fdSKris Buschelman ierr = PetscStrcpy(numfunct,"MatMatMultNumeric_seqaijseqaij");CHKERRQ(ierr); 4334d3841fdSKris Buschelman ierr = PetscObjectQueryFunction((PetscObject)A,numfunct,(PetscVoidFunction)&numeric);CHKERRQ(ierr); 4344d3841fdSKris Buschelman if (!numeric) SETERRQ1(PETSC_ERR_SUP,"C=A*B not implemented for A of type %s",A->type_name); 4354d3841fdSKris Buschelman ierr = PetscObjectQueryFunction((PetscObject)B,numfunct,(PetscVoidFunction)&numeric);CHKERRQ(ierr); 4364d3841fdSKris Buschelman if (!numeric) SETERRQ1(PETSC_ERR_SUP,"C=A*B not implemented for B of type %s",B->type_name); 4374d3841fdSKris Buschelman 4385c66b693SKris Buschelman ierr = (*numeric)(A,B,C);CHKERRQ(ierr); 4395c66b693SKris Buschelman 4405c66b693SKris Buschelman PetscFunctionReturn(0); 4415c66b693SKris Buschelman } 4425c66b693SKris Buschelman 443d50806bdSBarry Smith #undef __FUNCT__ 44494e3eecaSKris Buschelman #define __FUNCT__ "MatMatMult_Numeric_SeqAIJ_SeqAIJ" 44594e3eecaSKris Buschelman int MatMatMult_Numeric_SeqAIJ_SeqAIJ(Mat A,Mat B,Mat C) 446d50806bdSBarry Smith { 44794e3eecaSKris Buschelman int ierr,flops=0; 448d50806bdSBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 449d50806bdSBarry Smith Mat_SeqAIJ *b = (Mat_SeqAIJ *)B->data; 450d50806bdSBarry Smith Mat_SeqAIJ *c = (Mat_SeqAIJ *)C->data; 451d50806bdSBarry Smith int *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*bjj,*ci=c->i,*cj=c->j; 4525c66b693SKris Buschelman int am=A->M,cn=C->N; 45394e3eecaSKris Buschelman int i,j,k,anzi,bnzi,cnzi,brow; 454d50806bdSBarry Smith MatScalar *aa=a->a,*ba=b->a,*baj,*ca=c->a,*temp; 455d50806bdSBarry Smith 456d50806bdSBarry Smith PetscFunctionBegin; 457d50806bdSBarry Smith 4585c66b693SKris Buschelman /* Start timers */ 459d50806bdSBarry Smith ierr = PetscLogEventBegin(logkey_matmatmult_numeric,A,B,C,0);CHKERRQ(ierr); 46094e3eecaSKris Buschelman 461d50806bdSBarry Smith /* Allocate temp accumulation space to avoid searching for nonzero columns in C */ 462d50806bdSBarry Smith ierr = PetscMalloc((cn+1)*sizeof(MatScalar),&temp);CHKERRQ(ierr); 463d50806bdSBarry Smith ierr = PetscMemzero(temp,cn*sizeof(MatScalar));CHKERRQ(ierr); 464d50806bdSBarry Smith /* Traverse A row-wise. */ 465d50806bdSBarry Smith /* Build the ith row in C by summing over nonzero columns in A, */ 466d50806bdSBarry Smith /* the rows of B corresponding to nonzeros of A. */ 467d50806bdSBarry Smith for (i=0;i<am;i++) { 468d50806bdSBarry Smith anzi = ai[i+1] - ai[i]; 469d50806bdSBarry Smith for (j=0;j<anzi;j++) { 470d50806bdSBarry Smith brow = *aj++; 471d50806bdSBarry Smith bnzi = bi[brow+1] - bi[brow]; 472d50806bdSBarry Smith bjj = bj + bi[brow]; 473d50806bdSBarry Smith baj = ba + bi[brow]; 474d50806bdSBarry Smith for (k=0;k<bnzi;k++) { 475d50806bdSBarry Smith temp[bjj[k]] += (*aa)*baj[k]; 476d50806bdSBarry Smith } 477d50806bdSBarry Smith flops += 2*bnzi; 478d50806bdSBarry Smith aa++; 479d50806bdSBarry Smith } 480d50806bdSBarry Smith /* Store row back into C, and re-zero temp */ 481d50806bdSBarry Smith cnzi = ci[i+1] - ci[i]; 482d50806bdSBarry Smith for (j=0;j<cnzi;j++) { 483d50806bdSBarry Smith ca[j] = temp[cj[j]]; 484d50806bdSBarry Smith temp[cj[j]] = 0.0; 485d50806bdSBarry Smith } 486d50806bdSBarry Smith ca += cnzi; 487d50806bdSBarry Smith cj += cnzi; 488d50806bdSBarry Smith } 489716bacf3SKris Buschelman ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 490716bacf3SKris Buschelman ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 491716bacf3SKris Buschelman 492d50806bdSBarry Smith /* Free temp */ 493d50806bdSBarry Smith ierr = PetscFree(temp);CHKERRQ(ierr); 494d50806bdSBarry Smith ierr = PetscLogFlops(flops);CHKERRQ(ierr); 495d50806bdSBarry Smith ierr = PetscLogEventEnd(logkey_matmatmult_numeric,A,B,C,0);CHKERRQ(ierr); 496d50806bdSBarry Smith PetscFunctionReturn(0); 497d50806bdSBarry Smith } 498d50806bdSBarry Smith 499d50806bdSBarry Smith #undef __FUNCT__ 5005c66b693SKris Buschelman #define __FUNCT__ "RegisterMatMatMultRoutines_Private" 5015c66b693SKris Buschelman int RegisterMatMatMultRoutines_Private(Mat A) { 502d50806bdSBarry Smith int ierr; 503d50806bdSBarry Smith 504d50806bdSBarry Smith PetscFunctionBegin; 505*1c741599SHong Zhang #ifndef OLD 5062216b3a4SKris Buschelman if (!logkey_matmatmult) { 5072216b3a4SKris Buschelman ierr = PetscLogEventRegister(&logkey_matmatmult,"MatMatMult",MAT_COOKIE);CHKERRQ(ierr); 5082216b3a4SKris Buschelman } 5096284ec50SHong Zhang #endif 5105c66b693SKris Buschelman if (!logkey_matmatmult_symbolic) { 5115c66b693SKris Buschelman ierr = PetscLogEventRegister(&logkey_matmatmult_symbolic,"MatMatMult_Symbolic",MAT_COOKIE);CHKERRQ(ierr); 512d50806bdSBarry Smith } 5135c66b693SKris Buschelman ierr = PetscObjectComposeFunctionDynamic((PetscObject)A,"MatMatMultSymbolic_seqaijseqaij", 5145c66b693SKris Buschelman "MatMatMult_Symbolic_SeqAIJ_SeqAIJ", 5155c66b693SKris Buschelman MatMatMult_Symbolic_SeqAIJ_SeqAIJ);CHKERRQ(ierr); 5165c66b693SKris Buschelman if (!logkey_matmatmult_numeric) { 5175c66b693SKris Buschelman ierr = PetscLogEventRegister(&logkey_matmatmult_numeric,"MatMatMult_Numeric",MAT_COOKIE);CHKERRQ(ierr); 51894e3eecaSKris Buschelman } 5195c66b693SKris Buschelman ierr = PetscObjectComposeFunctionDynamic((PetscObject)A,"MatMatMultNumeric_seqaijseqaij", 5205c66b693SKris Buschelman "MatMatMult_Numeric_SeqAIJ_SeqAIJ", 5215c66b693SKris Buschelman MatMatMult_Numeric_SeqAIJ_SeqAIJ);CHKERRQ(ierr); 52294e3eecaSKris Buschelman PetscFunctionReturn(0); 52394e3eecaSKris Buschelman } 524