1*807171c5SHong Zhang 2*807171c5SHong Zhang /* 3*807171c5SHong Zhang Defines matrix-matrix product routines for pairs of SeqAIJ matrices 4*807171c5SHong Zhang C = P * A * P^T 5*807171c5SHong Zhang */ 6*807171c5SHong Zhang 7*807171c5SHong Zhang #include <../src/mat/impls/aij/seq/aij.h> 8*807171c5SHong Zhang #include <../src/mat/utils/freespace.h> 9*807171c5SHong Zhang #include <../src/mat/impls/dense/seq/dense.h> /*I "petscmat.h" I*/ 10*807171c5SHong Zhang 11*807171c5SHong Zhang /* 12*807171c5SHong Zhang MatApplyPAPt_Symbolic_SeqAIJ_SeqAIJ - Forms the symbolic product of two SeqAIJ matrices 13*807171c5SHong Zhang C = P * A * P^T; 14*807171c5SHong Zhang 15*807171c5SHong Zhang Note: C is assumed to be uncreated. 16*807171c5SHong Zhang If this is not the case, Destroy C before calling this routine. 17*807171c5SHong Zhang */ 18*807171c5SHong Zhang #undef __FUNCT__ 19*807171c5SHong Zhang #define __FUNCT__ "MatApplyPAPt_Symbolic_SeqAIJ_SeqAIJ" 20*807171c5SHong Zhang PetscErrorCode MatApplyPAPt_Symbolic_SeqAIJ_SeqAIJ(Mat A,Mat P,Mat *C) 21*807171c5SHong Zhang { 22*807171c5SHong Zhang /* Note: This code is virtually identical to that of MatApplyPtAP_SeqAIJ_Symbolic */ 23*807171c5SHong Zhang /* and MatMatMult_SeqAIJ_SeqAIJ_Symbolic. Perhaps they could be merged nicely. */ 24*807171c5SHong Zhang PetscErrorCode ierr; 25*807171c5SHong Zhang PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 26*807171c5SHong Zhang Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*p=(Mat_SeqAIJ*)P->data,*c; 27*807171c5SHong Zhang PetscInt *ai=a->i,*aj=a->j,*ajj,*pi=p->i,*pj=p->j,*pti,*ptj,*ptjj; 28*807171c5SHong Zhang PetscInt *ci,*cj,*paj,*padenserow,*pasparserow,*denserow,*sparserow; 29*807171c5SHong Zhang PetscInt an=A->cmap->N,am=A->rmap->N,pn=P->cmap->N,pm=P->rmap->N; 30*807171c5SHong Zhang PetscInt i,j,k,pnzi,arow,anzj,panzi,ptrow,ptnzj,cnzi; 31*807171c5SHong Zhang MatScalar *ca; 32*807171c5SHong Zhang 33*807171c5SHong Zhang PetscFunctionBegin; 34*807171c5SHong Zhang /* some error checking which could be moved into interface layer */ 35*807171c5SHong Zhang if (pn!=am) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %D != %D",pn,am); 36*807171c5SHong Zhang if (am!=an) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix 'A' must be square, %D != %D",am, an); 37*807171c5SHong Zhang 38*807171c5SHong Zhang /* Set up timers */ 39*807171c5SHong Zhang ierr = PetscLogEventBegin(MAT_Applypapt_symbolic,A,P,0,0);CHKERRQ(ierr); 40*807171c5SHong Zhang 41*807171c5SHong Zhang /* Create ij structure of P^T */ 42*807171c5SHong Zhang ierr = MatGetSymbolicTranspose_SeqAIJ(P,&pti,&ptj);CHKERRQ(ierr); 43*807171c5SHong Zhang 44*807171c5SHong Zhang /* Allocate ci array, arrays for fill computation and */ 45*807171c5SHong Zhang /* free space for accumulating nonzero column info */ 46*807171c5SHong Zhang ierr = PetscMalloc(((pm+1)*1)*sizeof(PetscInt),&ci);CHKERRQ(ierr); 47*807171c5SHong Zhang ci[0] = 0; 48*807171c5SHong Zhang 49*807171c5SHong Zhang ierr = PetscMalloc4(an,PetscInt,&padenserow,an,PetscInt,&pasparserow,pm,PetscInt,&denserow,pm,PetscInt,&sparserow);CHKERRQ(ierr); 50*807171c5SHong Zhang ierr = PetscMemzero(padenserow,an*sizeof(PetscInt));CHKERRQ(ierr); 51*807171c5SHong Zhang ierr = PetscMemzero(pasparserow,an*sizeof(PetscInt));CHKERRQ(ierr); 52*807171c5SHong Zhang ierr = PetscMemzero(denserow,pm*sizeof(PetscInt));CHKERRQ(ierr); 53*807171c5SHong Zhang ierr = PetscMemzero(sparserow,pm*sizeof(PetscInt));CHKERRQ(ierr); 54*807171c5SHong Zhang 55*807171c5SHong Zhang /* Set initial free space to be nnz(A) scaled by aspect ratio of Pt. */ 56*807171c5SHong Zhang /* This should be reasonable if sparsity of PAPt is similar to that of A. */ 57*807171c5SHong Zhang ierr = PetscFreeSpaceGet((ai[am]/pn)*pm,&free_space); 58*807171c5SHong Zhang current_space = free_space; 59*807171c5SHong Zhang 60*807171c5SHong Zhang /* Determine fill for each row of C: */ 61*807171c5SHong Zhang for (i=0;i<pm;i++) { 62*807171c5SHong Zhang pnzi = pi[i+1] - pi[i]; 63*807171c5SHong Zhang panzi = 0; 64*807171c5SHong Zhang /* Get symbolic sparse row of PA: */ 65*807171c5SHong Zhang for (j=0;j<pnzi;j++) { 66*807171c5SHong Zhang arow = *pj++; 67*807171c5SHong Zhang anzj = ai[arow+1] - ai[arow]; 68*807171c5SHong Zhang ajj = aj + ai[arow]; 69*807171c5SHong Zhang for (k=0;k<anzj;k++) { 70*807171c5SHong Zhang if (!padenserow[ajj[k]]) { 71*807171c5SHong Zhang padenserow[ajj[k]] = -1; 72*807171c5SHong Zhang pasparserow[panzi++] = ajj[k]; 73*807171c5SHong Zhang } 74*807171c5SHong Zhang } 75*807171c5SHong Zhang } 76*807171c5SHong Zhang /* Using symbolic row of PA, determine symbolic row of C: */ 77*807171c5SHong Zhang paj = pasparserow; 78*807171c5SHong Zhang cnzi = 0; 79*807171c5SHong Zhang for (j=0;j<panzi;j++) { 80*807171c5SHong Zhang ptrow = *paj++; 81*807171c5SHong Zhang ptnzj = pti[ptrow+1] - pti[ptrow]; 82*807171c5SHong Zhang ptjj = ptj + pti[ptrow]; 83*807171c5SHong Zhang for (k=0;k<ptnzj;k++) { 84*807171c5SHong Zhang if (!denserow[ptjj[k]]) { 85*807171c5SHong Zhang denserow[ptjj[k]] = -1; 86*807171c5SHong Zhang sparserow[cnzi++] = ptjj[k]; 87*807171c5SHong Zhang } 88*807171c5SHong Zhang } 89*807171c5SHong Zhang } 90*807171c5SHong Zhang 91*807171c5SHong Zhang /* sort sparse representation */ 92*807171c5SHong Zhang ierr = PetscSortInt(cnzi,sparserow);CHKERRQ(ierr); 93*807171c5SHong Zhang 94*807171c5SHong Zhang /* If free space is not available, make more free space */ 95*807171c5SHong Zhang /* Double the amount of total space in the list */ 96*807171c5SHong Zhang if (current_space->local_remaining<cnzi) { 97*807171c5SHong Zhang ierr = PetscFreeSpaceGet(cnzi+current_space->total_array_size,¤t_space);CHKERRQ(ierr); 98*807171c5SHong Zhang } 99*807171c5SHong Zhang 100*807171c5SHong Zhang /* Copy data into free space, and zero out dense row */ 101*807171c5SHong Zhang ierr = PetscMemcpy(current_space->array,sparserow,cnzi*sizeof(PetscInt));CHKERRQ(ierr); 102*807171c5SHong Zhang current_space->array += cnzi; 103*807171c5SHong Zhang current_space->local_used += cnzi; 104*807171c5SHong Zhang current_space->local_remaining -= cnzi; 105*807171c5SHong Zhang 106*807171c5SHong Zhang for (j=0;j<panzi;j++) { 107*807171c5SHong Zhang padenserow[pasparserow[j]] = 0; 108*807171c5SHong Zhang } 109*807171c5SHong Zhang for (j=0;j<cnzi;j++) { 110*807171c5SHong Zhang denserow[sparserow[j]] = 0; 111*807171c5SHong Zhang } 112*807171c5SHong Zhang ci[i+1] = ci[i] + cnzi; 113*807171c5SHong Zhang } 114*807171c5SHong Zhang /* column indices are in the list of free space */ 115*807171c5SHong Zhang /* Allocate space for cj, initialize cj, and */ 116*807171c5SHong Zhang /* destroy list of free space and other temporary array(s) */ 117*807171c5SHong Zhang ierr = PetscMalloc((ci[pm]+1)*sizeof(PetscInt),&cj);CHKERRQ(ierr); 118*807171c5SHong Zhang ierr = PetscFreeSpaceContiguous(&free_space,cj);CHKERRQ(ierr); 119*807171c5SHong Zhang ierr = PetscFree4(padenserow,pasparserow,denserow,sparserow);CHKERRQ(ierr); 120*807171c5SHong Zhang 121*807171c5SHong Zhang /* Allocate space for ca */ 122*807171c5SHong Zhang ierr = PetscMalloc((ci[pm]+1)*sizeof(MatScalar),&ca);CHKERRQ(ierr); 123*807171c5SHong Zhang ierr = PetscMemzero(ca,(ci[pm]+1)*sizeof(MatScalar));CHKERRQ(ierr); 124*807171c5SHong Zhang 125*807171c5SHong Zhang /* put together the new matrix */ 126*807171c5SHong Zhang ierr = MatCreateSeqAIJWithArrays(((PetscObject)A)->comm,pm,pm,ci,cj,ca,C);CHKERRQ(ierr); 127*807171c5SHong Zhang 128*807171c5SHong Zhang /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 129*807171c5SHong Zhang /* Since these are PETSc arrays, change flags to free them as necessary. */ 130*807171c5SHong Zhang c = (Mat_SeqAIJ *)((*C)->data); 131*807171c5SHong Zhang c->free_a = PETSC_TRUE; 132*807171c5SHong Zhang c->free_ij = PETSC_TRUE; 133*807171c5SHong Zhang c->nonew = 0; 134*807171c5SHong Zhang 135*807171c5SHong Zhang /* Clean up. */ 136*807171c5SHong Zhang ierr = MatRestoreSymbolicTranspose_SeqAIJ(P,&pti,&ptj);CHKERRQ(ierr); 137*807171c5SHong Zhang 138*807171c5SHong Zhang ierr = PetscLogEventEnd(MAT_Applypapt_symbolic,A,P,0,0);CHKERRQ(ierr); 139*807171c5SHong Zhang PetscFunctionReturn(0); 140*807171c5SHong Zhang } 141*807171c5SHong Zhang 142*807171c5SHong Zhang /* 143*807171c5SHong Zhang MatApplyPAPt_Numeric_SeqAIJ - Forms the numeric product of two SeqAIJ matrices 144*807171c5SHong Zhang C = P * A * P^T; 145*807171c5SHong Zhang Note: C must have been created by calling MatApplyPAPt_Symbolic_SeqAIJ. 146*807171c5SHong Zhang */ 147*807171c5SHong Zhang #undef __FUNCT__ 148*807171c5SHong Zhang #define __FUNCT__ "MatApplyPAPt_Numeric_SeqAIJ_SeqAIJ" 149*807171c5SHong Zhang PetscErrorCode MatApplyPAPt_Numeric_SeqAIJ_SeqAIJ(Mat A,Mat P,Mat C) 150*807171c5SHong Zhang { 151*807171c5SHong Zhang PetscErrorCode ierr; 152*807171c5SHong Zhang PetscInt flops=0; 153*807171c5SHong Zhang Mat_SeqAIJ *a = (Mat_SeqAIJ *) A->data; 154*807171c5SHong Zhang Mat_SeqAIJ *p = (Mat_SeqAIJ *) P->data; 155*807171c5SHong Zhang Mat_SeqAIJ *c = (Mat_SeqAIJ *) C->data; 156*807171c5SHong Zhang PetscInt *ai=a->i,*aj=a->j,*ajj,*pi=p->i,*pj=p->j,*pjj=p->j,*paj,*pajdense,*ptj; 157*807171c5SHong Zhang PetscInt *ci=c->i,*cj=c->j; 158*807171c5SHong Zhang PetscInt an=A->cmap->N,am=A->rmap->N,pn=P->cmap->N,pm=P->rmap->N,cn=C->cmap->N,cm=C->rmap->N; 159*807171c5SHong Zhang PetscInt i,j,k,k1,k2,pnzi,anzj,panzj,arow,ptcol,ptnzj,cnzi; 160*807171c5SHong Zhang MatScalar *aa=a->a,*pa=p->a,*pta=p->a,*ptaj,*paa,*aaj,*ca=c->a,sum; 161*807171c5SHong Zhang 162*807171c5SHong Zhang PetscFunctionBegin; 163*807171c5SHong Zhang /* This error checking should be unnecessary if the symbolic was performed */ 164*807171c5SHong Zhang if (pm!=cm) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %D != %D",pm,cm); 165*807171c5SHong Zhang if (pn!=am) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %D != %D",pn,am); 166*807171c5SHong Zhang if (am!=an) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix 'A' must be square, %D != %D",am, an); 167*807171c5SHong Zhang if (pm!=cn) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %D != %D",pm, cn); 168*807171c5SHong Zhang 169*807171c5SHong Zhang /* Set up timers */ 170*807171c5SHong Zhang ierr = PetscLogEventBegin(MAT_Applypapt_numeric,A,P,C,0);CHKERRQ(ierr); 171*807171c5SHong Zhang ierr = PetscMemzero(ca,ci[cm]*sizeof(MatScalar));CHKERRQ(ierr); 172*807171c5SHong Zhang 173*807171c5SHong Zhang ierr = PetscMalloc3(an,MatScalar,&paa,an,PetscInt,&paj,an,PetscInt,&pajdense);CHKERRQ(ierr); 174*807171c5SHong Zhang ierr = PetscMemzero(paa,an*(sizeof(MatScalar)+2*sizeof(PetscInt)));CHKERRQ(ierr); 175*807171c5SHong Zhang 176*807171c5SHong Zhang for (i=0;i<pm;i++) { 177*807171c5SHong Zhang /* Form sparse row of P*A */ 178*807171c5SHong Zhang pnzi = pi[i+1] - pi[i]; 179*807171c5SHong Zhang panzj = 0; 180*807171c5SHong Zhang for (j=0;j<pnzi;j++) { 181*807171c5SHong Zhang arow = *pj++; 182*807171c5SHong Zhang anzj = ai[arow+1] - ai[arow]; 183*807171c5SHong Zhang ajj = aj + ai[arow]; 184*807171c5SHong Zhang aaj = aa + ai[arow]; 185*807171c5SHong Zhang for (k=0;k<anzj;k++) { 186*807171c5SHong Zhang if (!pajdense[ajj[k]]) { 187*807171c5SHong Zhang pajdense[ajj[k]] = -1; 188*807171c5SHong Zhang paj[panzj++] = ajj[k]; 189*807171c5SHong Zhang } 190*807171c5SHong Zhang paa[ajj[k]] += (*pa)*aaj[k]; 191*807171c5SHong Zhang } 192*807171c5SHong Zhang flops += 2*anzj; 193*807171c5SHong Zhang pa++; 194*807171c5SHong Zhang } 195*807171c5SHong Zhang 196*807171c5SHong Zhang /* Sort the j index array for quick sparse axpy. */ 197*807171c5SHong Zhang ierr = PetscSortInt(panzj,paj);CHKERRQ(ierr); 198*807171c5SHong Zhang 199*807171c5SHong Zhang /* Compute P*A*P^T using sparse inner products. */ 200*807171c5SHong Zhang /* Take advantage of pre-computed (i,j) of C for locations of non-zeros. */ 201*807171c5SHong Zhang cnzi = ci[i+1] - ci[i]; 202*807171c5SHong Zhang for (j=0;j<cnzi;j++) { 203*807171c5SHong Zhang /* Form sparse inner product of current row of P*A with (*cj++) col of P^T. */ 204*807171c5SHong Zhang ptcol = *cj++; 205*807171c5SHong Zhang ptnzj = pi[ptcol+1] - pi[ptcol]; 206*807171c5SHong Zhang ptj = pjj + pi[ptcol]; 207*807171c5SHong Zhang ptaj = pta + pi[ptcol]; 208*807171c5SHong Zhang sum = 0.; 209*807171c5SHong Zhang k1 = 0; 210*807171c5SHong Zhang k2 = 0; 211*807171c5SHong Zhang while ((k1<panzj) && (k2<ptnzj)) { 212*807171c5SHong Zhang if (paj[k1]==ptj[k2]) { 213*807171c5SHong Zhang sum += paa[paj[k1++]]*ptaj[k2++]; 214*807171c5SHong Zhang } else if (paj[k1] < ptj[k2]) { 215*807171c5SHong Zhang k1++; 216*807171c5SHong Zhang } else /* if (paj[k1] > ptj[k2]) */ { 217*807171c5SHong Zhang k2++; 218*807171c5SHong Zhang } 219*807171c5SHong Zhang } 220*807171c5SHong Zhang *ca++ = sum; 221*807171c5SHong Zhang } 222*807171c5SHong Zhang 223*807171c5SHong Zhang /* Zero the current row info for P*A */ 224*807171c5SHong Zhang for (j=0;j<panzj;j++) { 225*807171c5SHong Zhang paa[paj[j]] = 0.; 226*807171c5SHong Zhang pajdense[paj[j]] = 0; 227*807171c5SHong Zhang } 228*807171c5SHong Zhang } 229*807171c5SHong Zhang 230*807171c5SHong Zhang ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 231*807171c5SHong Zhang ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 232*807171c5SHong Zhang ierr = PetscFree3(paa,paj,pajdense);CHKERRQ(ierr); 233*807171c5SHong Zhang ierr = PetscLogFlops(flops);CHKERRQ(ierr); 234*807171c5SHong Zhang ierr = PetscLogEventEnd(MAT_Applypapt_numeric,A,P,C,0);CHKERRQ(ierr); 235*807171c5SHong Zhang PetscFunctionReturn(0); 236*807171c5SHong Zhang } 237*807171c5SHong Zhang 238*807171c5SHong Zhang #undef __FUNCT__ 239*807171c5SHong Zhang #define __FUNCT__ "MatApplyPAPt_SeqAIJ_SeqAIJ" 240*807171c5SHong Zhang PetscErrorCode MatApplyPAPt_SeqAIJ_SeqAIJ(Mat A,Mat P,Mat *C) 241*807171c5SHong Zhang { 242*807171c5SHong Zhang PetscErrorCode ierr; 243*807171c5SHong Zhang 244*807171c5SHong Zhang PetscFunctionBegin; 245*807171c5SHong Zhang ierr = PetscLogEventBegin(MAT_Applypapt,A,P,0,0);CHKERRQ(ierr); 246*807171c5SHong Zhang ierr = MatApplyPAPt_Symbolic_SeqAIJ_SeqAIJ(A,P,C);CHKERRQ(ierr); 247*807171c5SHong Zhang ierr = MatApplyPAPt_Numeric_SeqAIJ_SeqAIJ(A,P,*C);CHKERRQ(ierr); 248*807171c5SHong Zhang ierr = PetscLogEventEnd(MAT_Applypapt,A,P,0,0);CHKERRQ(ierr); 249*807171c5SHong Zhang PetscFunctionReturn(0); 250*807171c5SHong Zhang } 251*807171c5SHong Zhang 252*807171c5SHong Zhang /*--------------------------------------------------*/ 253*807171c5SHong Zhang /* 254*807171c5SHong Zhang Defines projective product routines where A is a SeqAIJ matrix 255*807171c5SHong Zhang C = R * A * R^T 256*807171c5SHong Zhang */ 257*807171c5SHong Zhang 258*807171c5SHong Zhang #undef __FUNCT__ 259*807171c5SHong Zhang #define __FUNCT__ "PetscContainerDestroy_Mat_RARt" 260*807171c5SHong Zhang PetscErrorCode PetscContainerDestroy_Mat_RARt(void *ptr) 261*807171c5SHong Zhang { 262*807171c5SHong Zhang PetscErrorCode ierr; 263*807171c5SHong Zhang Mat_RARt *rart=(Mat_RARt*)ptr; 264*807171c5SHong Zhang 265*807171c5SHong Zhang PetscFunctionBegin; 266*807171c5SHong Zhang ierr = MatTransposeColoringDestroy(&rart->matcoloring);CHKERRQ(ierr); 267*807171c5SHong Zhang ierr = MatDestroy(&rart->Rt);CHKERRQ(ierr); 268*807171c5SHong Zhang ierr = MatDestroy(&rart->RARt);CHKERRQ(ierr); 269*807171c5SHong Zhang ierr = PetscFree(rart->work);CHKERRQ(ierr); 270*807171c5SHong Zhang ierr = PetscFree(rart);CHKERRQ(ierr); 271*807171c5SHong Zhang PetscFunctionReturn(0); 272*807171c5SHong Zhang } 273*807171c5SHong Zhang 274*807171c5SHong Zhang #undef __FUNCT__ 275*807171c5SHong Zhang #define __FUNCT__ "MatDestroy_SeqAIJ_RARt" 276*807171c5SHong Zhang PetscErrorCode MatDestroy_SeqAIJ_RARt(Mat A) 277*807171c5SHong Zhang { 278*807171c5SHong Zhang PetscErrorCode ierr; 279*807171c5SHong Zhang PetscContainer container; 280*807171c5SHong Zhang Mat_RARt *rart=PETSC_NULL; 281*807171c5SHong Zhang 282*807171c5SHong Zhang PetscFunctionBegin; 283*807171c5SHong Zhang ierr = PetscObjectQuery((PetscObject)A,"Mat_RARt",(PetscObject *)&container);CHKERRQ(ierr); 284*807171c5SHong Zhang if (!container) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Container does not exit"); 285*807171c5SHong Zhang ierr = PetscContainerGetPointer(container,(void **)&rart);CHKERRQ(ierr); 286*807171c5SHong Zhang A->ops->destroy = rart->destroy; 287*807171c5SHong Zhang if (A->ops->destroy) { 288*807171c5SHong Zhang ierr = (*A->ops->destroy)(A);CHKERRQ(ierr); 289*807171c5SHong Zhang } 290*807171c5SHong Zhang ierr = PetscObjectCompose((PetscObject)A,"Mat_RARt",0);CHKERRQ(ierr); 291*807171c5SHong Zhang PetscFunctionReturn(0); 292*807171c5SHong Zhang } 293*807171c5SHong Zhang 294*807171c5SHong Zhang #undef __FUNCT__ 295*807171c5SHong Zhang #define __FUNCT__ "MatRARtSymbolic_SeqAIJ_SeqAIJ" 296*807171c5SHong Zhang PetscErrorCode MatRARtSymbolic_SeqAIJ_SeqAIJ(Mat A,Mat R,PetscReal fill,Mat *C) 297*807171c5SHong Zhang { 298*807171c5SHong Zhang PetscErrorCode ierr; 299*807171c5SHong Zhang Mat P; 300*807171c5SHong Zhang PetscInt *rti,*rtj; 301*807171c5SHong Zhang Mat_RARt *rart; 302*807171c5SHong Zhang PetscContainer container; 303*807171c5SHong Zhang MatTransposeColoring matcoloring; 304*807171c5SHong Zhang ISColoring iscoloring; 305*807171c5SHong Zhang Mat Rt_dense,ARt_dense,RARt_dense; 306*807171c5SHong Zhang PetscLogDouble GColor=0.0,MCCreate=0.0,MDenCreate=0.0,t0,tf,etime=0.0; 307*807171c5SHong Zhang Mat_SeqAIJ *c; 308*807171c5SHong Zhang 309*807171c5SHong Zhang PetscFunctionBegin; 310*807171c5SHong Zhang ierr = PetscGetTime(&t0);CHKERRQ(ierr); 311*807171c5SHong Zhang /* create symbolic P=Rt */ 312*807171c5SHong Zhang ierr = MatGetSymbolicTranspose_SeqAIJ(R,&rti,&rtj);CHKERRQ(ierr); 313*807171c5SHong Zhang ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,R->cmap->n,R->rmap->n,rti,rtj,PETSC_NULL,&P);CHKERRQ(ierr); 314*807171c5SHong Zhang 315*807171c5SHong Zhang /* get symbolic C=Pt*A*P */ 316*807171c5SHong Zhang ierr = MatPtAPSymbolic_SeqAIJ_SeqAIJ(A,P,fill,C);CHKERRQ(ierr); 317*807171c5SHong Zhang 318*807171c5SHong Zhang /* create a supporting struct */ 319*807171c5SHong Zhang ierr = PetscNew(Mat_RARt,&rart);CHKERRQ(ierr); 320*807171c5SHong Zhang 321*807171c5SHong Zhang /* attach the supporting struct to C */ 322*807171c5SHong Zhang ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr); 323*807171c5SHong Zhang ierr = PetscContainerSetPointer(container,rart);CHKERRQ(ierr); 324*807171c5SHong Zhang ierr = PetscContainerSetUserDestroy(container,PetscContainerDestroy_Mat_RARt);CHKERRQ(ierr); 325*807171c5SHong Zhang ierr = PetscObjectCompose((PetscObject)(*C),"Mat_RARt",(PetscObject)container);CHKERRQ(ierr); 326*807171c5SHong Zhang ierr = PetscContainerDestroy(&container);CHKERRQ(ierr); 327*807171c5SHong Zhang ierr = PetscGetTime(&tf);CHKERRQ(ierr); 328*807171c5SHong Zhang etime += tf - t0; 329*807171c5SHong Zhang 330*807171c5SHong Zhang /* Create MatTransposeColoring from symbolic C=R*A*R^T */ 331*807171c5SHong Zhang c=(Mat_SeqAIJ*)(*C)->data; 332*807171c5SHong Zhang ierr = PetscGetTime(&t0);CHKERRQ(ierr); 333*807171c5SHong Zhang ierr = MatGetColoring(*C,MATCOLORINGLF,&iscoloring);CHKERRQ(ierr); 334*807171c5SHong Zhang ierr = PetscGetTime(&tf);CHKERRQ(ierr); 335*807171c5SHong Zhang GColor += tf - t0; 336*807171c5SHong Zhang 337*807171c5SHong Zhang ierr = PetscGetTime(&t0);CHKERRQ(ierr); 338*807171c5SHong Zhang ierr = MatTransposeColoringCreate(*C,iscoloring,&matcoloring);CHKERRQ(ierr); 339*807171c5SHong Zhang rart->matcoloring = matcoloring; 340*807171c5SHong Zhang ierr = ISColoringDestroy(&iscoloring);CHKERRQ(ierr); 341*807171c5SHong Zhang ierr = PetscGetTime(&tf);CHKERRQ(ierr); 342*807171c5SHong Zhang MCCreate += tf - t0; 343*807171c5SHong Zhang 344*807171c5SHong Zhang ierr = PetscGetTime(&t0);CHKERRQ(ierr); 345*807171c5SHong Zhang /* Create Rt_dense */ 346*807171c5SHong Zhang ierr = MatCreate(PETSC_COMM_SELF,&Rt_dense);CHKERRQ(ierr); 347*807171c5SHong Zhang ierr = MatSetSizes(Rt_dense,A->cmap->n,matcoloring->ncolors,A->cmap->n,matcoloring->ncolors);CHKERRQ(ierr); 348*807171c5SHong Zhang ierr = MatSetType(Rt_dense,MATSEQDENSE);CHKERRQ(ierr); 349*807171c5SHong Zhang ierr = MatSeqDenseSetPreallocation(Rt_dense,PETSC_NULL);CHKERRQ(ierr); 350*807171c5SHong Zhang Rt_dense->assembled = PETSC_TRUE; 351*807171c5SHong Zhang rart->Rt = Rt_dense; 352*807171c5SHong Zhang 353*807171c5SHong Zhang /* Create RARt_dense = R*A*Rt_dense */ 354*807171c5SHong Zhang ierr = MatCreate(PETSC_COMM_SELF,&RARt_dense);CHKERRQ(ierr); 355*807171c5SHong Zhang ierr = MatSetSizes(RARt_dense,(*C)->rmap->n,matcoloring->ncolors,(*C)->rmap->n,matcoloring->ncolors);CHKERRQ(ierr); 356*807171c5SHong Zhang ierr = MatSetType(RARt_dense,MATSEQDENSE);CHKERRQ(ierr); 357*807171c5SHong Zhang ierr = MatSeqDenseSetPreallocation(RARt_dense,PETSC_NULL);CHKERRQ(ierr); 358*807171c5SHong Zhang rart->RARt = RARt_dense; 359*807171c5SHong Zhang 360*807171c5SHong Zhang /* Allocate work array to store columns of A*R^T used in MatMatMatMultNumeric_SeqAIJ_SeqAIJ_SeqDense() */ 361*807171c5SHong Zhang ierr = PetscMalloc(A->rmap->n*4*sizeof(PetscScalar),&rart->work);CHKERRQ(ierr); 362*807171c5SHong Zhang 363*807171c5SHong Zhang ierr = PetscGetTime(&tf);CHKERRQ(ierr); 364*807171c5SHong Zhang MDenCreate += tf - t0; 365*807171c5SHong Zhang 366*807171c5SHong Zhang rart->destroy = (*C)->ops->destroy; 367*807171c5SHong Zhang (*C)->ops->destroy = MatDestroy_SeqAIJ_RARt; 368*807171c5SHong Zhang 369*807171c5SHong Zhang /* clean up */ 370*807171c5SHong Zhang ierr = MatRestoreSymbolicTranspose_SeqAIJ(R,&rti,&rtj);CHKERRQ(ierr); 371*807171c5SHong Zhang ierr = MatDestroy(&P);CHKERRQ(ierr); 372*807171c5SHong Zhang 373*807171c5SHong Zhang #if defined(PETSC_USE_INFO) 374*807171c5SHong Zhang PetscReal density= (PetscReal)(c->nz)/(RARt_dense->rmap->n*RARt_dense->cmap->n); 375*807171c5SHong Zhang ierr = PetscInfo6(*C,"RARt_den %D %D; Rt_den %D %D, (RARt->nz %D)/(m*ncolors)=%g\n",RARt_dense->rmap->n,RARt_dense->cmap->n,Rt_dense->rmap->n,Rt_dense->cmap->n,c->nz,density); 376*807171c5SHong Zhang ierr = PetscInfo5(*C,"Sym = GetColor %g + MColorCreate %g + MDenCreate %g + other %g = %g\n",GColor,MCCreate,MDenCreate,etime,GColor+MCCreate+MDenCreate+etime); 377*807171c5SHong Zhang #endif 378*807171c5SHong Zhang PetscFunctionReturn(0); 379*807171c5SHong Zhang } 380*807171c5SHong Zhang 381*807171c5SHong Zhang /* 382*807171c5SHong Zhang RAB = R * A * B, R and A in seqaij format, B in dense format; 383*807171c5SHong Zhang */ 384*807171c5SHong Zhang #undef __FUNCT__ 385*807171c5SHong Zhang #define __FUNCT__ "MatMatMatMultNumeric_SeqAIJ_SeqAIJ_SeqDense" 386*807171c5SHong Zhang PetscErrorCode MatMatMatMultNumeric_SeqAIJ_SeqAIJ_SeqDense(Mat R,Mat A,Mat B,Mat RAB,PetscScalar *work) 387*807171c5SHong Zhang { 388*807171c5SHong Zhang Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*r=(Mat_SeqAIJ*)R->data; 389*807171c5SHong Zhang PetscErrorCode ierr; 390*807171c5SHong Zhang PetscScalar *b,r1,r2,r3,r4,*b1,*b2,*b3,*b4; 391*807171c5SHong Zhang MatScalar *aa,*ra; 392*807171c5SHong Zhang PetscInt cn=B->cmap->n,bm=B->rmap->n,col,i,j,n,*ai=a->i,*aj,am=A->rmap->n; 393*807171c5SHong Zhang PetscInt am2=2*am,am3=3*am,bm4=4*bm; 394*807171c5SHong Zhang PetscScalar *d,*c,*c2,*c3,*c4; 395*807171c5SHong Zhang PetscInt *rj,rm=R->rmap->n,dm=RAB->rmap->n,dn=RAB->cmap->n; 396*807171c5SHong Zhang PetscInt rm2=2*rm,rm3=3*rm,colrm; 397*807171c5SHong Zhang 398*807171c5SHong Zhang PetscFunctionBegin; 399*807171c5SHong Zhang if (!dm || !dn) PetscFunctionReturn(0); 400*807171c5SHong Zhang if (bm != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Number columns in A %D not equal rows in B %D\n",A->cmap->n,bm); 401*807171c5SHong Zhang if (am != R->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Number columns in R %D not equal rows in A %D\n",R->cmap->n,am); 402*807171c5SHong Zhang if (R->rmap->n != RAB->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Number rows in RAB %D not equal rows in R %D\n",RAB->rmap->n,R->rmap->n); 403*807171c5SHong Zhang if (B->cmap->n != RAB->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Number columns in RAB %D not equal columns in B %D\n",RAB->cmap->n,B->cmap->n); 404*807171c5SHong Zhang 405*807171c5SHong Zhang ierr = MatGetArray(B,&b);CHKERRQ(ierr); 406*807171c5SHong Zhang ierr = MatGetArray(RAB,&d);CHKERRQ(ierr); 407*807171c5SHong Zhang b1 = b; b2 = b1 + bm; b3 = b2 + bm; b4 = b3 + bm; 408*807171c5SHong Zhang c = work; c2 = c + am; c3 = c2 + am; c4 = c3 + am; 409*807171c5SHong Zhang for (col=0; col<cn-4; col += 4){ /* over columns of C */ 410*807171c5SHong Zhang for (i=0; i<am; i++) { /* over rows of A in those columns */ 411*807171c5SHong Zhang r1 = r2 = r3 = r4 = 0.0; 412*807171c5SHong Zhang n = ai[i+1] - ai[i]; 413*807171c5SHong Zhang aj = a->j + ai[i]; 414*807171c5SHong Zhang aa = a->a + ai[i]; 415*807171c5SHong Zhang for (j=0; j<n; j++) { 416*807171c5SHong Zhang r1 += (*aa)*b1[*aj]; 417*807171c5SHong Zhang r2 += (*aa)*b2[*aj]; 418*807171c5SHong Zhang r3 += (*aa)*b3[*aj]; 419*807171c5SHong Zhang r4 += (*aa++)*b4[*aj++]; 420*807171c5SHong Zhang } 421*807171c5SHong Zhang c[i] = r1; 422*807171c5SHong Zhang c[am + i] = r2; 423*807171c5SHong Zhang c[am2 + i] = r3; 424*807171c5SHong Zhang c[am3 + i] = r4; 425*807171c5SHong Zhang } 426*807171c5SHong Zhang b1 += bm4; 427*807171c5SHong Zhang b2 += bm4; 428*807171c5SHong Zhang b3 += bm4; 429*807171c5SHong Zhang b4 += bm4; 430*807171c5SHong Zhang 431*807171c5SHong Zhang /* RAB[:,col] = R*C[:,col] */ 432*807171c5SHong Zhang colrm = col*rm; 433*807171c5SHong Zhang for (i=0; i<rm; i++) { /* over rows of R in those columns */ 434*807171c5SHong Zhang r1 = r2 = r3 = r4 = 0.0; 435*807171c5SHong Zhang n = r->i[i+1] - r->i[i]; 436*807171c5SHong Zhang rj = r->j + r->i[i]; 437*807171c5SHong Zhang ra = r->a + r->i[i]; 438*807171c5SHong Zhang for (j=0; j<n; j++) { 439*807171c5SHong Zhang r1 += (*ra)*c[*rj]; 440*807171c5SHong Zhang r2 += (*ra)*c2[*rj]; 441*807171c5SHong Zhang r3 += (*ra)*c3[*rj]; 442*807171c5SHong Zhang r4 += (*ra++)*c4[*rj++]; 443*807171c5SHong Zhang } 444*807171c5SHong Zhang d[colrm + i] = r1; 445*807171c5SHong Zhang d[colrm + rm + i] = r2; 446*807171c5SHong Zhang d[colrm + rm2 + i] = r3; 447*807171c5SHong Zhang d[colrm + rm3 + i] = r4; 448*807171c5SHong Zhang } 449*807171c5SHong Zhang } 450*807171c5SHong Zhang for (;col<cn; col++){ /* over extra columns of C */ 451*807171c5SHong Zhang for (i=0; i<am; i++) { /* over rows of A in those columns */ 452*807171c5SHong Zhang r1 = 0.0; 453*807171c5SHong Zhang n = a->i[i+1] - a->i[i]; 454*807171c5SHong Zhang aj = a->j + a->i[i]; 455*807171c5SHong Zhang aa = a->a + a->i[i]; 456*807171c5SHong Zhang for (j=0; j<n; j++) { 457*807171c5SHong Zhang r1 += (*aa++)*b1[*aj++]; 458*807171c5SHong Zhang } 459*807171c5SHong Zhang c[i] = r1; 460*807171c5SHong Zhang } 461*807171c5SHong Zhang b1 += bm; 462*807171c5SHong Zhang 463*807171c5SHong Zhang for (i=0; i<rm; i++) { /* over rows of R in those columns */ 464*807171c5SHong Zhang r1 = 0.0; 465*807171c5SHong Zhang n = r->i[i+1] - r->i[i]; 466*807171c5SHong Zhang rj = r->j + r->i[i]; 467*807171c5SHong Zhang ra = r->a + r->i[i]; 468*807171c5SHong Zhang for (j=0; j<n; j++) { 469*807171c5SHong Zhang r1 += (*ra++)*c[*rj++]; 470*807171c5SHong Zhang } 471*807171c5SHong Zhang d[col*rm + i] = r1; 472*807171c5SHong Zhang } 473*807171c5SHong Zhang } 474*807171c5SHong Zhang ierr = PetscLogFlops(cn*2.0*(a->nz + r->nz));CHKERRQ(ierr); 475*807171c5SHong Zhang 476*807171c5SHong Zhang ierr = MatRestoreArray(B,&b);CHKERRQ(ierr); 477*807171c5SHong Zhang ierr = MatRestoreArray(RAB,&d);CHKERRQ(ierr); 478*807171c5SHong Zhang ierr = MatAssemblyBegin(RAB,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 479*807171c5SHong Zhang ierr = MatAssemblyEnd(RAB,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 480*807171c5SHong Zhang PetscFunctionReturn(0); 481*807171c5SHong Zhang } 482*807171c5SHong Zhang 483*807171c5SHong Zhang #undef __FUNCT__ 484*807171c5SHong Zhang #define __FUNCT__ "MatRARtNumeric_SeqAIJ_SeqAIJ" 485*807171c5SHong Zhang PetscErrorCode MatRARtNumeric_SeqAIJ_SeqAIJ(Mat A,Mat R,Mat C) 486*807171c5SHong Zhang { 487*807171c5SHong Zhang PetscErrorCode ierr; 488*807171c5SHong Zhang Mat_RARt *rart; 489*807171c5SHong Zhang PetscContainer container; 490*807171c5SHong Zhang MatTransposeColoring matcoloring; 491*807171c5SHong Zhang Mat Rt,ARt,RARt; 492*807171c5SHong Zhang PetscLogDouble Mult_sp_den=0.0,app1=0.0,app2=0.0,t0,tf; 493*807171c5SHong Zhang 494*807171c5SHong Zhang PetscFunctionBegin; 495*807171c5SHong Zhang ierr = PetscObjectQuery((PetscObject)C,"Mat_RARt",(PetscObject *)&container);CHKERRQ(ierr); 496*807171c5SHong Zhang if (!container) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Container does not exit"); 497*807171c5SHong Zhang ierr = PetscContainerGetPointer(container,(void **)&rart);CHKERRQ(ierr); 498*807171c5SHong Zhang 499*807171c5SHong Zhang /* Get dense Rt by Apply MatTransposeColoring to R */ 500*807171c5SHong Zhang matcoloring = rart->matcoloring; 501*807171c5SHong Zhang Rt = rart->Rt; 502*807171c5SHong Zhang ierr = PetscGetTime(&t0);CHKERRQ(ierr); 503*807171c5SHong Zhang ierr = MatTransColoringApplySpToDen(matcoloring,R,Rt);CHKERRQ(ierr); 504*807171c5SHong Zhang ierr = PetscGetTime(&tf);CHKERRQ(ierr); 505*807171c5SHong Zhang app1 += tf - t0; 506*807171c5SHong Zhang 507*807171c5SHong Zhang /* Get dense RARt = R*A*Rt */ 508*807171c5SHong Zhang ierr = PetscGetTime(&t0);CHKERRQ(ierr); 509*807171c5SHong Zhang RARt = rart->RARt; 510*807171c5SHong Zhang ierr = MatMatMatMultNumeric_SeqAIJ_SeqAIJ_SeqDense(R,A,Rt,RARt,rart->work);CHKERRQ(ierr); 511*807171c5SHong Zhang ierr = PetscGetTime(&tf);CHKERRQ(ierr); 512*807171c5SHong Zhang Mult_sp_den += tf - t0; 513*807171c5SHong Zhang 514*807171c5SHong Zhang /* Recover C from C_dense */ 515*807171c5SHong Zhang ierr = PetscGetTime(&t0);CHKERRQ(ierr); 516*807171c5SHong Zhang ierr = MatTransColoringApplyDenToSp(matcoloring,RARt,C);CHKERRQ(ierr); 517*807171c5SHong Zhang ierr = PetscGetTime(&tf);CHKERRQ(ierr); 518*807171c5SHong Zhang app2 += tf - t0; 519*807171c5SHong Zhang 520*807171c5SHong Zhang #if defined(PETSC_USE_INFO) 521*807171c5SHong Zhang ierr = PetscInfo4(C,"Num = ColorApp %g + %g + Mult_sp_den %g = %g\n",app1,app2,Mult_sp_den,app1+app2+Mult_sp_den); 522*807171c5SHong Zhang #endif 523*807171c5SHong Zhang PetscFunctionReturn(0); 524*807171c5SHong Zhang } 525*807171c5SHong Zhang 526*807171c5SHong Zhang EXTERN_C_BEGIN 527*807171c5SHong Zhang #undef __FUNCT__ 528*807171c5SHong Zhang #define __FUNCT__ "MatRARt_SeqAIJ_SeqAIJ" 529*807171c5SHong Zhang PetscErrorCode MatRARt_SeqAIJ_SeqAIJ(Mat A,Mat R,MatReuse scall,PetscReal fill,Mat *C) 530*807171c5SHong Zhang { 531*807171c5SHong Zhang PetscErrorCode ierr; 532*807171c5SHong Zhang 533*807171c5SHong Zhang PetscFunctionBegin; 534*807171c5SHong Zhang if (scall == MAT_INITIAL_MATRIX){ 535*807171c5SHong Zhang ierr = MatRARtSymbolic_SeqAIJ_SeqAIJ(A,R,fill,C);CHKERRQ(ierr); 536*807171c5SHong Zhang } 537*807171c5SHong Zhang ierr = MatRARtNumeric_SeqAIJ_SeqAIJ(A,R,*C);CHKERRQ(ierr); 538*807171c5SHong Zhang PetscFunctionReturn(0); 539*807171c5SHong Zhang } 540*807171c5SHong Zhang EXTERN_C_END 541