1d50806bdSBarry Smith /*$Id: matmatmult.c,v 1.15 2001/09/07 20:04:44 buschelm Exp $*/ 2d50806bdSBarry Smith /* 32c9ce0e5SKris Buschelman Defines matrix-matrix product routines for pairs of SeqAIJ matrices 4d50806bdSBarry Smith C = A * B 5d50806bdSBarry Smith */ 6d50806bdSBarry Smith 7d50806bdSBarry Smith #include "src/mat/impls/aij/seq/aij.h" 870f19b1fSKris Buschelman #include "src/mat/utils/freespace.h" 9d50806bdSBarry Smith 102216b3a4SKris Buschelman static int logkey_matmatmult = 0; 112216b3a4SKris Buschelman static int logkey_matmatmult_symbolic = 0; 122216b3a4SKris Buschelman static int logkey_matmatmult_numeric = 0; 132216b3a4SKris Buschelman 14*5c66b693SKris Buschelman /*@ 15*5c66b693SKris Buschelman MatMatMult - Performs Matrix-Matrix Multiplication C=A*B. 1694e3eecaSKris Buschelman 17*5c66b693SKris Buschelman Collective on Mat 18d50806bdSBarry Smith 19*5c66b693SKris Buschelman Input Parameters: 20*5c66b693SKris Buschelman + A - the left matrix 21*5c66b693SKris Buschelman - B - the right matrix 22*5c66b693SKris Buschelman 23*5c66b693SKris Buschelman Output Parameters: 24*5c66b693SKris Buschelman . C - the product matrix 25*5c66b693SKris Buschelman 26*5c66b693SKris Buschelman Notes: 27*5c66b693SKris Buschelman C will be created and must be destroyed by the user with MatDestroy(). 28*5c66b693SKris Buschelman 29*5c66b693SKris Buschelman This routine is currently only implemented for pairs of SeqAIJ matrices. 30*5c66b693SKris Buschelman 31*5c66b693SKris Buschelman Level: intermediate 32*5c66b693SKris Buschelman 33*5c66b693SKris Buschelman .seealso: MatMatMultSymbolic(),MatMatMultNumeric() 34*5c66b693SKris Buschelman @*/ 35d50806bdSBarry Smith #undef __FUNCT__ 36*5c66b693SKris Buschelman #define __FUNCT__ "MatMatMult" 37*5c66b693SKris Buschelman int MatMatMult(Mat A,Mat B, Mat *C) { 38*5c66b693SKris Buschelman /* Perhaps this "interface" routine should be moved into the interface directory.*/ 39*5c66b693SKris Buschelman /* To facilitate implementations with varying types, QueryFunction is used.*/ 40*5c66b693SKris Buschelman /* It is assumed that implementations will be composed as "MatMatMult_<type of A><type of B>". */ 41d50806bdSBarry Smith int ierr; 42*5c66b693SKris Buschelman char funct[80]; 43*5c66b693SKris Buschelman int (*mult)(Mat,Mat,Mat*); 44d50806bdSBarry Smith 45d50806bdSBarry Smith PetscFunctionBegin; 46*5c66b693SKris Buschelman PetscValidPointer(C); 47d50806bdSBarry Smith 48*5c66b693SKris Buschelman PetscValidHeaderSpecific(A,MAT_COOKIE); 49*5c66b693SKris Buschelman PetscValidType(A); 50*5c66b693SKris Buschelman MatPreallocated(A); 51*5c66b693SKris Buschelman if (!A->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix"); 52*5c66b693SKris Buschelman if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 53d50806bdSBarry Smith 54*5c66b693SKris Buschelman PetscValidHeaderSpecific(B,MAT_COOKIE); 55*5c66b693SKris Buschelman PetscValidType(B); 56*5c66b693SKris Buschelman MatPreallocated(B); 57*5c66b693SKris Buschelman if (!B->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix"); 58*5c66b693SKris Buschelman if (B->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 59d50806bdSBarry Smith 60*5c66b693SKris Buschelman if (B->M!=A->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",B->M,A->N); 61d50806bdSBarry Smith 62*5c66b693SKris Buschelman ierr = PetscStrcpy(funct,"MatMatMult_");CHKERRQ(ierr); 63*5c66b693SKris Buschelman ierr = PetscStrcat(funct,A->type_name);CHKERRQ(ierr); 64*5c66b693SKris Buschelman ierr = PetscStrcat(funct,B->type_name);CHKERRQ(ierr); 65*5c66b693SKris Buschelman ierr = PetscObjectQueryFunction((PetscObject)A,funct,(PetscVoidFunction)&mult);CHKERRQ(ierr); 66*5c66b693SKris Buschelman if (!mult) SETERRQ2(PETSC_ERR_SUP, 67*5c66b693SKris Buschelman "C=A*B not implemented for A of type %s and B of type %s", 68*5c66b693SKris Buschelman A->type_name,B->type_name); 69*5c66b693SKris Buschelman ierr = (*mult)(A,B,C);CHKERRQ(ierr); 70d50806bdSBarry Smith PetscFunctionReturn(0); 71d50806bdSBarry Smith } 72*5c66b693SKris Buschelman 73*5c66b693SKris Buschelman #undef __FUNCT__ 74*5c66b693SKris Buschelman #define __FUNCT__ "MatMatMult_SeqAIJ_SeqAIJ" 75*5c66b693SKris Buschelman int MatMatMult_SeqAIJ_SeqAIJ(Mat A,Mat B, Mat *C) { 76*5c66b693SKris Buschelman int ierr; 77*5c66b693SKris Buschelman char symfunct[80],numfunct[80],types[80]; 78*5c66b693SKris Buschelman int (*symbolic)(Mat,Mat,Mat*),(*numeric)(Mat,Mat,Mat); 79*5c66b693SKris Buschelman 80*5c66b693SKris Buschelman PetscFunctionBegin; 81*5c66b693SKris Buschelman ierr = PetscStrcpy(types,A->type_name);CHKERRQ(ierr); 82*5c66b693SKris Buschelman ierr = PetscStrcat(types,B->type_name);CHKERRQ(ierr); 83*5c66b693SKris Buschelman ierr = PetscStrcpy(symfunct,"MatMatMultSymbolic_");CHKERRQ(ierr); 84*5c66b693SKris Buschelman ierr = PetscStrcat(symfunct,types);CHKERRQ(ierr); 85*5c66b693SKris Buschelman ierr = PetscObjectQueryFunction((PetscObject)A,symfunct,(PetscVoidFunction)&symbolic);CHKERRQ(ierr); 86*5c66b693SKris Buschelman if (!symbolic) SETERRQ2(PETSC_ERR_SUP, 87*5c66b693SKris Buschelman "C=A*B not implemented for A of type %s and B of type %s", 88*5c66b693SKris Buschelman A->type_name,B->type_name); 89*5c66b693SKris Buschelman ierr = PetscStrcpy(numfunct,"MatMatMultNumeric_");CHKERRQ(ierr); 90*5c66b693SKris Buschelman ierr = PetscStrcat(numfunct,types);CHKERRQ(ierr); 91*5c66b693SKris Buschelman ierr = PetscObjectQueryFunction((PetscObject)A,numfunct,(PetscVoidFunction)&numeric);CHKERRQ(ierr); 92*5c66b693SKris Buschelman if (!numeric) SETERRQ2(PETSC_ERR_SUP, 93*5c66b693SKris Buschelman "C=A*B not implemented for A of type %s and B of type %s", 94*5c66b693SKris Buschelman A->type_name,B->type_name); 95*5c66b693SKris Buschelman ierr = PetscLogEventBegin(logkey_matmatmult,A,B,0,0);CHKERRQ(ierr); 96*5c66b693SKris Buschelman ierr = (*symbolic)(A,B,C);CHKERRQ(ierr); 97*5c66b693SKris Buschelman ierr = (*numeric)(A,B,*C);CHKERRQ(ierr); 98*5c66b693SKris Buschelman ierr = PetscLogEventEnd(logkey_matmatmult,A,B,0,0);CHKERRQ(ierr); 99*5c66b693SKris Buschelman PetscFunctionReturn(0); 100*5c66b693SKris Buschelman } 101*5c66b693SKris Buschelman 102*5c66b693SKris Buschelman /*@ 103*5c66b693SKris Buschelman MatMatMultSymbolic - Performs construction, preallocation, and computes the ij structure 104*5c66b693SKris Buschelman of the matrix-matrix product C=A*B. Call this routine before calling MatMatMultNumeric(). 105*5c66b693SKris Buschelman 106*5c66b693SKris Buschelman Collective on Mat 107*5c66b693SKris Buschelman 108*5c66b693SKris Buschelman Input Parameters: 109*5c66b693SKris Buschelman + A - the left matrix 110*5c66b693SKris Buschelman - B - the right matrix 111*5c66b693SKris Buschelman 112*5c66b693SKris Buschelman Output Parameters: 113*5c66b693SKris Buschelman . C - the matrix containing the ij structure of product matrix 114*5c66b693SKris Buschelman 115*5c66b693SKris Buschelman Notes: 116*5c66b693SKris Buschelman C will be created and must be destroyed by the user with MatDestroy(). 117*5c66b693SKris Buschelman 118*5c66b693SKris Buschelman This routine is currently only implemented for SeqAIJ type matrices. 119*5c66b693SKris Buschelman 120*5c66b693SKris Buschelman Level: intermediate 121*5c66b693SKris Buschelman 122*5c66b693SKris Buschelman .seealso: MatMatMult(),MatMatMultNumeric() 123*5c66b693SKris Buschelman @*/ 124*5c66b693SKris Buschelman #undef __FUNCT__ 125*5c66b693SKris Buschelman #define __FUNCT__ "MatMatMultSymbolic" 126*5c66b693SKris Buschelman int MatMatMultSymbolic(Mat A,Mat B,Mat *C) { 127*5c66b693SKris Buschelman /* Perhaps this "interface" routine should be moved into the interface directory.*/ 128*5c66b693SKris Buschelman /* To facilitate implementations with varying types, QueryFunction is used.*/ 129*5c66b693SKris Buschelman /* It is assumed that implementations will be composed as "MatMatMultSymbolic_<type of A><type of B>". */ 130*5c66b693SKris Buschelman int ierr; 131*5c66b693SKris Buschelman char funct[80]; 132*5c66b693SKris Buschelman int (*symbolic)(Mat,Mat,Mat *); 133*5c66b693SKris Buschelman 134*5c66b693SKris Buschelman PetscFunctionBegin; 135*5c66b693SKris Buschelman PetscValidPointer(C); 136*5c66b693SKris Buschelman 137*5c66b693SKris Buschelman PetscValidHeaderSpecific(A,MAT_COOKIE); 138*5c66b693SKris Buschelman PetscValidType(A); 139*5c66b693SKris Buschelman MatPreallocated(A); 140*5c66b693SKris Buschelman if (!A->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix"); 141*5c66b693SKris Buschelman if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 142*5c66b693SKris Buschelman 143*5c66b693SKris Buschelman PetscValidHeaderSpecific(B,MAT_COOKIE); 144*5c66b693SKris Buschelman PetscValidType(B); 145*5c66b693SKris Buschelman MatPreallocated(B); 146*5c66b693SKris Buschelman if (!B->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix"); 147*5c66b693SKris Buschelman if (B->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 148*5c66b693SKris Buschelman 149*5c66b693SKris Buschelman if (B->M!=A->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",B->M,A->N); 150*5c66b693SKris Buschelman 151*5c66b693SKris Buschelman ierr = PetscStrcpy(funct,"MatMatMultSymbolic_");CHKERRQ(ierr); 152*5c66b693SKris Buschelman ierr = PetscStrcat(funct,A->type_name);CHKERRQ(ierr); 153*5c66b693SKris Buschelman ierr = PetscStrcat(funct,B->type_name);CHKERRQ(ierr); 154*5c66b693SKris Buschelman ierr = PetscObjectQueryFunction((PetscObject)A,funct,(PetscVoidFunction)&symbolic);CHKERRQ(ierr); 155*5c66b693SKris Buschelman if (!symbolic) SETERRQ2(PETSC_ERR_SUP, 156*5c66b693SKris Buschelman "C=A*B not implemented for A of type %s and B of type %s", 157*5c66b693SKris Buschelman A->type_name,B->type_name); 158*5c66b693SKris Buschelman ierr = (*symbolic)(A,B,C);CHKERRQ(ierr); 159*5c66b693SKris Buschelman 160*5c66b693SKris Buschelman PetscFunctionReturn(0); 161*5c66b693SKris Buschelman } 16258c24d83SHong Zhang 16358c24d83SHong Zhang #undef __FUNCT__ 16458c24d83SHong Zhang #define __FUNCT__ "MatMatMult_Symbolic_SeqAIJ_SeqAIJ" 16558c24d83SHong Zhang int MatMatMult_Symbolic_SeqAIJ_SeqAIJ(Mat A,Mat B,Mat *C) 16658c24d83SHong Zhang { 16758c24d83SHong Zhang int ierr; 16858c24d83SHong Zhang FreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 16958c24d83SHong Zhang Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data,*c; 17058c24d83SHong Zhang int *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*bjj; 17158c24d83SHong Zhang int *ci,*cj,*lnk,idx0,idx,bcol; 172*5c66b693SKris Buschelman int am=A->M,bn=B->N,bm=B->M; 17358c24d83SHong Zhang int i,j,k,anzi,brow,bnzj,cnzi; 17458c24d83SHong Zhang MatScalar *ca; 17558c24d83SHong Zhang 17658c24d83SHong Zhang PetscFunctionBegin; 177*5c66b693SKris Buschelman /* Start timers */ 17858c24d83SHong Zhang ierr = PetscLogEventBegin(logkey_matmatmult_symbolic,A,B,0,0);CHKERRQ(ierr); 17958c24d83SHong Zhang 18058c24d83SHong Zhang /* Set up */ 18158c24d83SHong Zhang /* Allocate ci array, arrays for fill computation and */ 18258c24d83SHong Zhang /* free space for accumulating nonzero column info */ 18358c24d83SHong Zhang ierr = PetscMalloc(((am+1)+1)*sizeof(int),&ci);CHKERRQ(ierr); 18458c24d83SHong Zhang ci[0] = 0; 18558c24d83SHong Zhang 18658c24d83SHong Zhang ierr = PetscMalloc((bn+1)*sizeof(int),&lnk);CHKERRQ(ierr); 18758c24d83SHong Zhang for (i=0; i<bn; i++) lnk[i] = -1; 18858c24d83SHong Zhang 18958c24d83SHong Zhang /* Initial FreeSpace size is nnz(B)=4*bi[bm] */ 19058c24d83SHong Zhang ierr = GetMoreSpace(4*bi[bm],&free_space);CHKERRQ(ierr); 19158c24d83SHong Zhang current_space = free_space; 19258c24d83SHong Zhang 19358c24d83SHong Zhang /* Determine symbolic info for each row of the product: */ 19458c24d83SHong Zhang for (i=0;i<am;i++) { 19558c24d83SHong Zhang anzi = ai[i+1] - ai[i]; 19658c24d83SHong Zhang cnzi = 0; 19758c24d83SHong Zhang lnk[bn] = bn; 19858c24d83SHong Zhang for (j=0;j<anzi;j++) { 19958c24d83SHong Zhang brow = *aj++; 20058c24d83SHong Zhang bnzj = bi[brow+1] - bi[brow]; 20158c24d83SHong Zhang bjj = bj + bi[brow]; 20258c24d83SHong Zhang idx = bn; 20358c24d83SHong Zhang for (k=0;k<bnzj;k++) { 20458c24d83SHong Zhang bcol = bjj[k]; 20558c24d83SHong Zhang if (lnk[bcol] == -1) { /* new col */ 20658c24d83SHong Zhang if (k>0) idx = bjj[k-1]; 20758c24d83SHong Zhang do { 20858c24d83SHong Zhang idx0 = idx; 20958c24d83SHong Zhang idx = lnk[idx0]; 21058c24d83SHong Zhang } while (bcol > idx); 21158c24d83SHong Zhang lnk[idx0] = bcol; 21258c24d83SHong Zhang lnk[bcol] = idx; 21358c24d83SHong Zhang cnzi++; 21458c24d83SHong Zhang } 21558c24d83SHong Zhang } 21658c24d83SHong Zhang } 21758c24d83SHong Zhang 21858c24d83SHong Zhang /* If free space is not available, make more free space */ 21958c24d83SHong Zhang /* Double the amount of total space in the list */ 22058c24d83SHong Zhang if (current_space->local_remaining<cnzi) { 22158c24d83SHong Zhang printf("...%d -th row, double space ...\n",i); 22258c24d83SHong Zhang ierr = GetMoreSpace(current_space->total_array_size,¤t_space);CHKERRQ(ierr); 22358c24d83SHong Zhang } 22458c24d83SHong Zhang 22558c24d83SHong Zhang /* Copy data into free space, and zero out denserow and lnk */ 22658c24d83SHong Zhang idx = bn; 22758c24d83SHong Zhang for (j=0; j<cnzi; j++){ 22858c24d83SHong Zhang idx0 = idx; 22958c24d83SHong Zhang idx = lnk[idx0]; 23058c24d83SHong Zhang *current_space->array++ = idx; 23158c24d83SHong Zhang lnk[idx0] = -1; 23258c24d83SHong Zhang } 23358c24d83SHong Zhang lnk[idx] = -1; 23458c24d83SHong Zhang 23558c24d83SHong Zhang current_space->local_used += cnzi; 23658c24d83SHong Zhang current_space->local_remaining -= cnzi; 23758c24d83SHong Zhang 23858c24d83SHong Zhang ci[i+1] = ci[i] + cnzi; 23958c24d83SHong Zhang } 24058c24d83SHong Zhang 24158c24d83SHong Zhang /* Column indices are in the list of free space */ 24258c24d83SHong Zhang /* Allocate space for cj, initialize cj, and */ 24358c24d83SHong Zhang /* destroy list of free space and other temporary array(s) */ 24458c24d83SHong Zhang ierr = PetscMalloc((ci[am]+1)*sizeof(int),&cj);CHKERRQ(ierr); 24558c24d83SHong Zhang ierr = MakeSpaceContiguous(&free_space,cj);CHKERRQ(ierr); 24658c24d83SHong Zhang ierr = PetscFree(lnk);CHKERRQ(ierr); 24758c24d83SHong Zhang 24858c24d83SHong Zhang /* Allocate space for ca */ 24958c24d83SHong Zhang ierr = PetscMalloc((ci[am]+1)*sizeof(MatScalar),&ca);CHKERRQ(ierr); 25058c24d83SHong Zhang ierr = PetscMemzero(ca,(ci[am]+1)*sizeof(MatScalar));CHKERRQ(ierr); 25158c24d83SHong Zhang 25258c24d83SHong Zhang /* put together the new matrix */ 25358c24d83SHong Zhang ierr = MatCreateSeqAIJWithArrays(A->comm,am,bn,ci,cj,ca,C);CHKERRQ(ierr); 25458c24d83SHong Zhang 25558c24d83SHong Zhang /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 25658c24d83SHong Zhang /* These are PETSc arrays, so change flags so arrays can be deleted by PETSc */ 25758c24d83SHong Zhang c = (Mat_SeqAIJ *)((*C)->data); 25858c24d83SHong Zhang c->freedata = PETSC_TRUE; 25958c24d83SHong Zhang c->nonew = 0; 26058c24d83SHong Zhang 26158c24d83SHong Zhang ierr = PetscLogEventEnd(logkey_matmatmult_symbolic,A,B,0,0);CHKERRQ(ierr); 26258c24d83SHong Zhang PetscFunctionReturn(0); 26358c24d83SHong Zhang } 264d50806bdSBarry Smith 265*5c66b693SKris Buschelman /*@ 266*5c66b693SKris Buschelman MatMatMultNumeric - Performs the numeric matrix-matrix product. 267*5c66b693SKris Buschelman Call this routine after first calling MatMatMultSymbolic(). 268*5c66b693SKris Buschelman 269*5c66b693SKris Buschelman Collective on Mat 270*5c66b693SKris Buschelman 271*5c66b693SKris Buschelman Input Parameters: 272*5c66b693SKris Buschelman + A - the left matrix 273*5c66b693SKris Buschelman - B - the right matrix 274*5c66b693SKris Buschelman 275*5c66b693SKris Buschelman Output Parameters: 276*5c66b693SKris Buschelman . C - the product matrix, whose ij structure was defined from MatMatMultSymbolic(). 277*5c66b693SKris Buschelman 278*5c66b693SKris Buschelman Notes: 279*5c66b693SKris Buschelman C must have been created with MatMatMultSymbolic. 280*5c66b693SKris Buschelman 281*5c66b693SKris Buschelman This routine is currently only implemented for SeqAIJ type matrices. 282*5c66b693SKris Buschelman 283*5c66b693SKris Buschelman Level: intermediate 284*5c66b693SKris Buschelman 285*5c66b693SKris Buschelman .seealso: MatMatMult(),MatMatMultSymbolic() 286*5c66b693SKris Buschelman @*/ 287*5c66b693SKris Buschelman #undef __FUNCT__ 288*5c66b693SKris Buschelman #define __FUNCT__ "MatMatMultNumeric" 289*5c66b693SKris Buschelman int MatMatMultNumeric(Mat A,Mat B,Mat C){ 290*5c66b693SKris Buschelman /* Perhaps this "interface" routine should be moved into the interface directory.*/ 291*5c66b693SKris Buschelman /* To facilitate implementations with varying types, QueryFunction is used.*/ 292*5c66b693SKris Buschelman /* It is assumed that implementations will be composed as "MatMatMultNumeric_<type of A><type of B>". */ 293*5c66b693SKris Buschelman int ierr; 294*5c66b693SKris Buschelman char funct[80]; 295*5c66b693SKris Buschelman int (*numeric)(Mat,Mat,Mat); 296*5c66b693SKris Buschelman 297*5c66b693SKris Buschelman PetscFunctionBegin; 298*5c66b693SKris Buschelman 299*5c66b693SKris Buschelman PetscValidHeaderSpecific(A,MAT_COOKIE); 300*5c66b693SKris Buschelman PetscValidType(A); 301*5c66b693SKris Buschelman MatPreallocated(A); 302*5c66b693SKris Buschelman if (!A->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix"); 303*5c66b693SKris Buschelman if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 304*5c66b693SKris Buschelman 305*5c66b693SKris Buschelman PetscValidHeaderSpecific(B,MAT_COOKIE); 306*5c66b693SKris Buschelman PetscValidType(B); 307*5c66b693SKris Buschelman MatPreallocated(B); 308*5c66b693SKris Buschelman if (!B->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix"); 309*5c66b693SKris Buschelman if (B->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 310*5c66b693SKris Buschelman 311*5c66b693SKris Buschelman PetscValidHeaderSpecific(C,MAT_COOKIE); 312*5c66b693SKris Buschelman PetscValidType(C); 313*5c66b693SKris Buschelman MatPreallocated(C); 314*5c66b693SKris Buschelman if (!C->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix"); 315*5c66b693SKris Buschelman if (C->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 316*5c66b693SKris Buschelman 317*5c66b693SKris Buschelman if (B->N!=C->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",B->N,C->N); 318*5c66b693SKris Buschelman if (B->M!=A->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",B->M,A->N); 319*5c66b693SKris Buschelman if (A->M!=C->M) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",A->M,C->M); 320*5c66b693SKris Buschelman 321*5c66b693SKris Buschelman /* Query A for ApplyPtAP implementation based on types of P */ 322*5c66b693SKris Buschelman ierr = PetscStrcpy(funct,"MatMatMultNumeric_");CHKERRQ(ierr); 323*5c66b693SKris Buschelman ierr = PetscStrcat(funct,A->type_name);CHKERRQ(ierr); 324*5c66b693SKris Buschelman ierr = PetscStrcat(funct,B->type_name);CHKERRQ(ierr); 325*5c66b693SKris Buschelman ierr = PetscObjectQueryFunction((PetscObject)A,funct,(PetscVoidFunction)&numeric);CHKERRQ(ierr); 326*5c66b693SKris Buschelman if (!numeric) SETERRQ2(PETSC_ERR_SUP, 327*5c66b693SKris Buschelman "C=A*B not implemented for A of type %s and B of type %s", 328*5c66b693SKris Buschelman A->type_name,B->type_name); 329*5c66b693SKris Buschelman ierr = (*numeric)(A,B,C);CHKERRQ(ierr); 330*5c66b693SKris Buschelman 331*5c66b693SKris Buschelman PetscFunctionReturn(0); 332*5c66b693SKris Buschelman } 333*5c66b693SKris Buschelman 334d50806bdSBarry Smith #undef __FUNCT__ 33594e3eecaSKris Buschelman #define __FUNCT__ "MatMatMult_Numeric_SeqAIJ_SeqAIJ" 33694e3eecaSKris Buschelman int MatMatMult_Numeric_SeqAIJ_SeqAIJ(Mat A,Mat B,Mat C) 337d50806bdSBarry Smith { 33894e3eecaSKris Buschelman int ierr,flops=0; 339d50806bdSBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 340d50806bdSBarry Smith Mat_SeqAIJ *b = (Mat_SeqAIJ *)B->data; 341d50806bdSBarry Smith Mat_SeqAIJ *c = (Mat_SeqAIJ *)C->data; 342d50806bdSBarry Smith int *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*bjj,*ci=c->i,*cj=c->j; 343*5c66b693SKris Buschelman int am=A->M,cn=C->N; 34494e3eecaSKris Buschelman int i,j,k,anzi,bnzi,cnzi,brow; 345d50806bdSBarry Smith MatScalar *aa=a->a,*ba=b->a,*baj,*ca=c->a,*temp; 346d50806bdSBarry Smith 347d50806bdSBarry Smith PetscFunctionBegin; 348d50806bdSBarry Smith 349*5c66b693SKris Buschelman /* Start timers */ 350d50806bdSBarry Smith ierr = PetscLogEventBegin(logkey_matmatmult_numeric,A,B,C,0);CHKERRQ(ierr); 35194e3eecaSKris Buschelman 352d50806bdSBarry Smith /* Allocate temp accumulation space to avoid searching for nonzero columns in C */ 353d50806bdSBarry Smith ierr = PetscMalloc((cn+1)*sizeof(MatScalar),&temp);CHKERRQ(ierr); 354d50806bdSBarry Smith ierr = PetscMemzero(temp,cn*sizeof(MatScalar));CHKERRQ(ierr); 355d50806bdSBarry Smith /* Traverse A row-wise. */ 356d50806bdSBarry Smith /* Build the ith row in C by summing over nonzero columns in A, */ 357d50806bdSBarry Smith /* the rows of B corresponding to nonzeros of A. */ 358d50806bdSBarry Smith for (i=0;i<am;i++) { 359d50806bdSBarry Smith anzi = ai[i+1] - ai[i]; 360d50806bdSBarry Smith for (j=0;j<anzi;j++) { 361d50806bdSBarry Smith brow = *aj++; 362d50806bdSBarry Smith bnzi = bi[brow+1] - bi[brow]; 363d50806bdSBarry Smith bjj = bj + bi[brow]; 364d50806bdSBarry Smith baj = ba + bi[brow]; 365d50806bdSBarry Smith for (k=0;k<bnzi;k++) { 366d50806bdSBarry Smith temp[bjj[k]] += (*aa)*baj[k]; 367d50806bdSBarry Smith } 368d50806bdSBarry Smith flops += 2*bnzi; 369d50806bdSBarry Smith aa++; 370d50806bdSBarry Smith } 371d50806bdSBarry Smith /* Store row back into C, and re-zero temp */ 372d50806bdSBarry Smith cnzi = ci[i+1] - ci[i]; 373d50806bdSBarry Smith for (j=0;j<cnzi;j++) { 374d50806bdSBarry Smith ca[j] = temp[cj[j]]; 375d50806bdSBarry Smith temp[cj[j]] = 0.0; 376d50806bdSBarry Smith } 377d50806bdSBarry Smith ca += cnzi; 378d50806bdSBarry Smith cj += cnzi; 379d50806bdSBarry Smith } 380716bacf3SKris Buschelman ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 381716bacf3SKris Buschelman ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 382716bacf3SKris Buschelman 383d50806bdSBarry Smith /* Free temp */ 384d50806bdSBarry Smith ierr = PetscFree(temp);CHKERRQ(ierr); 385d50806bdSBarry Smith ierr = PetscLogFlops(flops);CHKERRQ(ierr); 386d50806bdSBarry Smith ierr = PetscLogEventEnd(logkey_matmatmult_numeric,A,B,C,0);CHKERRQ(ierr); 387d50806bdSBarry Smith PetscFunctionReturn(0); 388d50806bdSBarry Smith } 389d50806bdSBarry Smith 390d50806bdSBarry Smith #undef __FUNCT__ 391*5c66b693SKris Buschelman #define __FUNCT__ "RegisterMatMatMultRoutines_Private" 392*5c66b693SKris Buschelman int RegisterMatMatMultRoutines_Private(Mat A) { 393d50806bdSBarry Smith int ierr; 394d50806bdSBarry Smith 395d50806bdSBarry Smith PetscFunctionBegin; 3962216b3a4SKris Buschelman if (!logkey_matmatmult) { 3972216b3a4SKris Buschelman ierr = PetscLogEventRegister(&logkey_matmatmult,"MatMatMult",MAT_COOKIE);CHKERRQ(ierr); 3982216b3a4SKris Buschelman } 399*5c66b693SKris Buschelman ierr = PetscObjectComposeFunctionDynamic((PetscObject)A,"MatMatMult_seqaijseqaij", 400*5c66b693SKris Buschelman "MatMatMult_SeqAIJ_SeqAIJ", 401*5c66b693SKris Buschelman MatMatMult_SeqAIJ_SeqAIJ);CHKERRQ(ierr); 402*5c66b693SKris Buschelman if (!logkey_matmatmult_symbolic) { 403*5c66b693SKris Buschelman ierr = PetscLogEventRegister(&logkey_matmatmult_symbolic,"MatMatMult_Symbolic",MAT_COOKIE);CHKERRQ(ierr); 404d50806bdSBarry Smith } 405*5c66b693SKris Buschelman ierr = PetscObjectComposeFunctionDynamic((PetscObject)A,"MatMatMultSymbolic_seqaijseqaij", 406*5c66b693SKris Buschelman "MatMatMult_Symbolic_SeqAIJ_SeqAIJ", 407*5c66b693SKris Buschelman MatMatMult_Symbolic_SeqAIJ_SeqAIJ);CHKERRQ(ierr); 408*5c66b693SKris Buschelman if (!logkey_matmatmult_numeric) { 409*5c66b693SKris Buschelman ierr = PetscLogEventRegister(&logkey_matmatmult_numeric,"MatMatMult_Numeric",MAT_COOKIE);CHKERRQ(ierr); 41094e3eecaSKris Buschelman } 411*5c66b693SKris Buschelman ierr = PetscObjectComposeFunctionDynamic((PetscObject)A,"MatMatMultNumeric_seqaijseqaij", 412*5c66b693SKris Buschelman "MatMatMult_Numeric_SeqAIJ_SeqAIJ", 413*5c66b693SKris Buschelman MatMatMult_Numeric_SeqAIJ_SeqAIJ);CHKERRQ(ierr); 41494e3eecaSKris Buschelman PetscFunctionReturn(0); 41594e3eecaSKris Buschelman } 416