1807171c5SHong Zhang 23b1b9624SHong Zhang /* 33b1b9624SHong Zhang Defines projective product routines where A is a SeqAIJ matrix 43b1b9624SHong Zhang C = R * A * R^T 53b1b9624SHong Zhang */ 63b1b9624SHong Zhang 7807171c5SHong Zhang #include <../src/mat/impls/aij/seq/aij.h> 8807171c5SHong Zhang #include <../src/mat/utils/freespace.h> 9807171c5SHong Zhang #include <../src/mat/impls/dense/seq/dense.h> /*I "petscmat.h" I*/ 108563dfccSBarry Smith #include <petsctime.h> 11807171c5SHong Zhang 123b1b9624SHong Zhang /* #define RART_PROFILE */ 13807171c5SHong Zhang 14807171c5SHong Zhang #undef __FUNCT__ 15257c235dSHong Zhang #define __FUNCT__ "MatDestroy_SeqAIJ_RARt" 16257c235dSHong Zhang PetscErrorCode MatDestroy_SeqAIJ_RARt(Mat A) 17807171c5SHong Zhang { 18807171c5SHong Zhang PetscErrorCode ierr; 19257c235dSHong Zhang Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 20257c235dSHong Zhang Mat_RARt *rart = a->rart; 21807171c5SHong Zhang 22807171c5SHong Zhang PetscFunctionBegin; 23807171c5SHong Zhang ierr = MatTransposeColoringDestroy(&rart->matcoloring);CHKERRQ(ierr); 24807171c5SHong Zhang ierr = MatDestroy(&rart->Rt);CHKERRQ(ierr); 25807171c5SHong Zhang ierr = MatDestroy(&rart->RARt);CHKERRQ(ierr); 263b1b9624SHong Zhang ierr = MatDestroy(&rart->ARt);CHKERRQ(ierr); 27807171c5SHong Zhang ierr = PetscFree(rart->work);CHKERRQ(ierr); 28807171c5SHong Zhang 29807171c5SHong Zhang A->ops->destroy = rart->destroy; 30807171c5SHong Zhang if (A->ops->destroy) { 31807171c5SHong Zhang ierr = (*A->ops->destroy)(A);CHKERRQ(ierr); 32807171c5SHong Zhang } 33257c235dSHong Zhang ierr = PetscFree(rart);CHKERRQ(ierr); 34807171c5SHong Zhang PetscFunctionReturn(0); 35807171c5SHong Zhang } 36807171c5SHong Zhang 37807171c5SHong Zhang #undef __FUNCT__ 3855bea0ebSHong Zhang #define __FUNCT__ "MatRARtSymbolic_SeqAIJ_SeqAIJ_colorrart" 3955bea0ebSHong Zhang PetscErrorCode MatRARtSymbolic_SeqAIJ_SeqAIJ_colorrart(Mat A,Mat R,PetscReal fill,Mat *C) 40807171c5SHong Zhang { 41807171c5SHong Zhang PetscErrorCode ierr; 42807171c5SHong Zhang Mat P; 43807171c5SHong Zhang PetscInt *rti,*rtj; 44807171c5SHong Zhang Mat_RARt *rart; 45807171c5SHong Zhang MatTransposeColoring matcoloring; 46807171c5SHong Zhang ISColoring iscoloring; 478d0a38e4SHong Zhang Mat Rt_dense,RARt_dense; 483b1b9624SHong Zhang #if defined(RART_PROFILE) 49807171c5SHong Zhang PetscLogDouble GColor=0.0,MCCreate=0.0,MDenCreate=0.0,t0,tf,etime=0.0; 503b1b9624SHong Zhang #endif 51807171c5SHong Zhang Mat_SeqAIJ *c; 52807171c5SHong Zhang 53807171c5SHong Zhang PetscFunctionBegin; 543b1b9624SHong Zhang #if defined(RART_PROFILE) 558563dfccSBarry Smith ierr = PetscTime(&t0);CHKERRQ(ierr); 563b1b9624SHong Zhang #endif 57807171c5SHong Zhang /* create symbolic P=Rt */ 58807171c5SHong Zhang ierr = MatGetSymbolicTranspose_SeqAIJ(R,&rti,&rtj);CHKERRQ(ierr); 590298fd71SBarry Smith ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,R->cmap->n,R->rmap->n,rti,rtj,NULL,&P);CHKERRQ(ierr); 60807171c5SHong Zhang 61807171c5SHong Zhang /* get symbolic C=Pt*A*P */ 62807171c5SHong Zhang ierr = MatPtAPSymbolic_SeqAIJ_SeqAIJ(A,P,fill,C);CHKERRQ(ierr); 63fd8e3dafSHong Zhang (*C)->rmap->bs = R->rmap->bs; 64fd8e3dafSHong Zhang (*C)->cmap->bs = R->rmap->bs; 6555bea0ebSHong Zhang (*C)->ops->rartnumeric = MatRARtNumeric_SeqAIJ_SeqAIJ_colorrart; 66807171c5SHong Zhang 67807171c5SHong Zhang /* create a supporting struct */ 68807171c5SHong Zhang ierr = PetscNew(Mat_RARt,&rart);CHKERRQ(ierr); 69257c235dSHong Zhang c = (Mat_SeqAIJ*)(*C)->data; 70257c235dSHong Zhang c->rart = rart; 713b1b9624SHong Zhang #if defined(RART_PROFILE) 728563dfccSBarry Smith ierr = PetscTime(&tf);CHKERRQ(ierr); 73807171c5SHong Zhang etime += tf - t0; 743b1b9624SHong Zhang #endif 75807171c5SHong Zhang 7622e94b5dSHong Zhang /* ------ Use coloring ---------- */ 77*4d478ae7SHong Zhang /* inode causes memory problem, don't know why */ 78*4d478ae7SHong Zhang if (c->inode.use) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MAT_USE_INODES is not supported. Use '-mat_no_inode'"); 79*4d478ae7SHong Zhang 80807171c5SHong Zhang /* Create MatTransposeColoring from symbolic C=R*A*R^T */ 813b1b9624SHong Zhang #if defined(RART_PROFILE) 828563dfccSBarry Smith ierr = PetscTime(&t0);CHKERRQ(ierr); 833b1b9624SHong Zhang #endif 84807171c5SHong Zhang ierr = MatGetColoring(*C,MATCOLORINGLF,&iscoloring);CHKERRQ(ierr); 853b1b9624SHong Zhang #if defined(RART_PROFILE) 868563dfccSBarry Smith ierr = PetscTime(&tf);CHKERRQ(ierr); 87807171c5SHong Zhang GColor += tf - t0; 888563dfccSBarry Smith ierr = PetscTime(&t0);CHKERRQ(ierr); 893b1b9624SHong Zhang #endif 90807171c5SHong Zhang ierr = MatTransposeColoringCreate(*C,iscoloring,&matcoloring);CHKERRQ(ierr); 912205254eSKarl Rupp 92807171c5SHong Zhang rart->matcoloring = matcoloring; 932205254eSKarl Rupp 94807171c5SHong Zhang ierr = ISColoringDestroy(&iscoloring);CHKERRQ(ierr); 953b1b9624SHong Zhang #if defined(RART_PROFILE) 968563dfccSBarry Smith ierr = PetscTime(&tf);CHKERRQ(ierr); 97807171c5SHong Zhang MCCreate += tf - t0; 988563dfccSBarry Smith ierr = PetscTime(&t0);CHKERRQ(ierr); 993b1b9624SHong Zhang #endif 1003b1b9624SHong Zhang 101807171c5SHong Zhang /* Create Rt_dense */ 102807171c5SHong Zhang ierr = MatCreate(PETSC_COMM_SELF,&Rt_dense);CHKERRQ(ierr); 103807171c5SHong Zhang ierr = MatSetSizes(Rt_dense,A->cmap->n,matcoloring->ncolors,A->cmap->n,matcoloring->ncolors);CHKERRQ(ierr); 104807171c5SHong Zhang ierr = MatSetType(Rt_dense,MATSEQDENSE);CHKERRQ(ierr); 1050298fd71SBarry Smith ierr = MatSeqDenseSetPreallocation(Rt_dense,NULL);CHKERRQ(ierr); 1062205254eSKarl Rupp 107807171c5SHong Zhang Rt_dense->assembled = PETSC_TRUE; 108807171c5SHong Zhang rart->Rt = Rt_dense; 109807171c5SHong Zhang 110807171c5SHong Zhang /* Create RARt_dense = R*A*Rt_dense */ 111807171c5SHong Zhang ierr = MatCreate(PETSC_COMM_SELF,&RARt_dense);CHKERRQ(ierr); 112807171c5SHong Zhang ierr = MatSetSizes(RARt_dense,(*C)->rmap->n,matcoloring->ncolors,(*C)->rmap->n,matcoloring->ncolors);CHKERRQ(ierr); 113807171c5SHong Zhang ierr = MatSetType(RARt_dense,MATSEQDENSE);CHKERRQ(ierr); 1140298fd71SBarry Smith ierr = MatSeqDenseSetPreallocation(RARt_dense,NULL);CHKERRQ(ierr); 1152205254eSKarl Rupp 116807171c5SHong Zhang rart->RARt = RARt_dense; 117807171c5SHong Zhang 118807171c5SHong Zhang /* Allocate work array to store columns of A*R^T used in MatMatMatMultNumeric_SeqAIJ_SeqAIJ_SeqDense() */ 119807171c5SHong Zhang ierr = PetscMalloc(A->rmap->n*4*sizeof(PetscScalar),&rart->work);CHKERRQ(ierr); 120807171c5SHong Zhang 1213b1b9624SHong Zhang #if defined(RART_PROFILE) 1228563dfccSBarry Smith ierr = PetscTime(&tf);CHKERRQ(ierr); 123807171c5SHong Zhang MDenCreate += tf - t0; 1243b1b9624SHong Zhang #endif 125807171c5SHong Zhang 126807171c5SHong Zhang rart->destroy = (*C)->ops->destroy; 127807171c5SHong Zhang (*C)->ops->destroy = MatDestroy_SeqAIJ_RARt; 128807171c5SHong Zhang 129807171c5SHong Zhang /* clean up */ 130807171c5SHong Zhang ierr = MatRestoreSymbolicTranspose_SeqAIJ(R,&rti,&rtj);CHKERRQ(ierr); 131807171c5SHong Zhang ierr = MatDestroy(&P);CHKERRQ(ierr); 132807171c5SHong Zhang 133807171c5SHong Zhang #if defined(PETSC_USE_INFO) 1341ce71dffSSatish Balay { 135807171c5SHong Zhang PetscReal density= (PetscReal)(c->nz)/(RARt_dense->rmap->n*RARt_dense->cmap->n); 136a2ea699eSBarry Smith ierr = PetscInfo6(*C,"RARt_den %D %D; Rt_den %D %D, (RARt->nz %D)/(m*ncolors)=%g\n",RARt_dense->rmap->n,RARt_dense->cmap->n,Rt_dense->rmap->n,Rt_dense->cmap->n,c->nz,density);CHKERRQ(ierr); 1373b1b9624SHong Zhang #if defined(RART_PROFILE) 138a2ea699eSBarry Smith ierr = PetscInfo5(*C,"Sym = GetColor %g + MColorCreate %g + MDenCreate %g + other %g = %g\n",GColor,MCCreate,MDenCreate,etime,GColor+MCCreate+MDenCreate+etime);CHKERRQ(ierr); 1393b1b9624SHong Zhang #endif 1401ce71dffSSatish Balay } 141807171c5SHong Zhang #endif 142807171c5SHong Zhang PetscFunctionReturn(0); 143807171c5SHong Zhang } 144807171c5SHong Zhang 145807171c5SHong Zhang /* 146807171c5SHong Zhang RAB = R * A * B, R and A in seqaij format, B in dense format; 147807171c5SHong Zhang */ 148807171c5SHong Zhang #undef __FUNCT__ 149807171c5SHong Zhang #define __FUNCT__ "MatMatMatMultNumeric_SeqAIJ_SeqAIJ_SeqDense" 150807171c5SHong Zhang PetscErrorCode MatMatMatMultNumeric_SeqAIJ_SeqAIJ_SeqDense(Mat R,Mat A,Mat B,Mat RAB,PetscScalar *work) 151807171c5SHong Zhang { 152807171c5SHong Zhang Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*r=(Mat_SeqAIJ*)R->data; 153807171c5SHong Zhang PetscErrorCode ierr; 154807171c5SHong Zhang PetscScalar *b,r1,r2,r3,r4,*b1,*b2,*b3,*b4; 155807171c5SHong Zhang MatScalar *aa,*ra; 156807171c5SHong Zhang PetscInt cn =B->cmap->n,bm=B->rmap->n,col,i,j,n,*ai=a->i,*aj,am=A->rmap->n; 157807171c5SHong Zhang PetscInt am2=2*am,am3=3*am,bm4=4*bm; 158807171c5SHong Zhang PetscScalar *d,*c,*c2,*c3,*c4; 159807171c5SHong Zhang PetscInt *rj,rm=R->rmap->n,dm=RAB->rmap->n,dn=RAB->cmap->n; 160807171c5SHong Zhang PetscInt rm2=2*rm,rm3=3*rm,colrm; 1613b1b9624SHong Zhang #if defined(RART_PROFILE) 162274010c0SHong Zhang PetscLogDouble t0,tf,Mult_sp_den1=0.0,Mult_sp_den2=0.0; 1633b1b9624SHong Zhang #endif 164807171c5SHong Zhang 165807171c5SHong Zhang PetscFunctionBegin; 166807171c5SHong Zhang if (!dm || !dn) PetscFunctionReturn(0); 167807171c5SHong Zhang if (bm != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Number columns in A %D not equal rows in B %D\n",A->cmap->n,bm); 168807171c5SHong Zhang if (am != R->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Number columns in R %D not equal rows in A %D\n",R->cmap->n,am); 169807171c5SHong Zhang if (R->rmap->n != RAB->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Number rows in RAB %D not equal rows in R %D\n",RAB->rmap->n,R->rmap->n); 170807171c5SHong Zhang if (B->cmap->n != RAB->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Number columns in RAB %D not equal columns in B %D\n",RAB->cmap->n,B->cmap->n); 171807171c5SHong Zhang 172274010c0SHong Zhang { /* 173274010c0SHong Zhang This approach is not as good as original ones (will be removed later), but it reveals that 174274010c0SHong Zhang AB_den=A*B takes almost all execution time in R*A*B for src/ksp/ksp/examples/tutorials/ex56.c 175274010c0SHong Zhang */ 176274010c0SHong Zhang PetscBool via_matmatmult=PETSC_FALSE; 177274010c0SHong Zhang ierr = PetscOptionsGetBool(NULL,"-matrart_via_matmatmult",&via_matmatmult,NULL);CHKERRQ(ierr); 178274010c0SHong Zhang if (via_matmatmult) { 179274010c0SHong Zhang Mat AB_den; 180274010c0SHong Zhang ierr = MatMatMultSymbolic_SeqAIJ_SeqDense(A,B,0.0,&AB_den);CHKERRQ(ierr); 181274010c0SHong Zhang 1823b1b9624SHong Zhang #if defined(RART_PROFILE) 18326be7272SHong Zhang ierr = PetscTime(&t0);CHKERRQ(ierr); 1843b1b9624SHong Zhang #endif 185274010c0SHong Zhang ierr = MatMatMultNumeric_SeqAIJ_SeqDense(A,B,AB_den);CHKERRQ(ierr); 1863b1b9624SHong Zhang #if defined(RART_PROFILE) 18726be7272SHong Zhang ierr = PetscTime(&tf);CHKERRQ(ierr); 188274010c0SHong Zhang Mult_sp_den1 += tf - t0; 18926be7272SHong Zhang ierr = PetscTime(&t0);CHKERRQ(ierr); 1903b1b9624SHong Zhang #endif 191274010c0SHong Zhang ierr = MatMatMultNumeric_SeqAIJ_SeqDense(R,AB_den,RAB);CHKERRQ(ierr); 1923b1b9624SHong Zhang #if defined(RART_PROFILE) 19326be7272SHong Zhang ierr = PetscTime(&tf);CHKERRQ(ierr); 194274010c0SHong Zhang Mult_sp_den2 += tf - t0; 195274010c0SHong Zhang printf(" MatMatMatMultNumeric_SeqAIJ_SeqAIJ_SeqDense...time = %g + %g = %g; ncolor: %d\n",Mult_sp_den1, Mult_sp_den2, Mult_sp_den1+ Mult_sp_den2,B->cmap->n); 1963b1b9624SHong Zhang #endif 197274010c0SHong Zhang ierr = MatDestroy(&AB_den);CHKERRQ(ierr); 198274010c0SHong Zhang PetscFunctionReturn(0); 199274010c0SHong Zhang } 200274010c0SHong Zhang } 201274010c0SHong Zhang 2028c778c55SBarry Smith ierr = MatDenseGetArray(B,&b);CHKERRQ(ierr); 2038c778c55SBarry Smith ierr = MatDenseGetArray(RAB,&d);CHKERRQ(ierr); 204807171c5SHong Zhang b1 = b; b2 = b1 + bm; b3 = b2 + bm; b4 = b3 + bm; 205807171c5SHong Zhang c = work; c2 = c + am; c3 = c2 + am; c4 = c3 + am; 206807171c5SHong Zhang for (col=0; col<cn-4; col += 4) { /* over columns of C */ 207807171c5SHong Zhang for (i=0; i<am; i++) { /* over rows of A in those columns */ 208807171c5SHong Zhang r1 = r2 = r3 = r4 = 0.0; 209807171c5SHong Zhang n = ai[i+1] - ai[i]; 210807171c5SHong Zhang aj = a->j + ai[i]; 211807171c5SHong Zhang aa = a->a + ai[i]; 212807171c5SHong Zhang for (j=0; j<n; j++) { 213807171c5SHong Zhang r1 += (*aa)*b1[*aj]; 214807171c5SHong Zhang r2 += (*aa)*b2[*aj]; 215807171c5SHong Zhang r3 += (*aa)*b3[*aj]; 216807171c5SHong Zhang r4 += (*aa++)*b4[*aj++]; 217807171c5SHong Zhang } 218807171c5SHong Zhang c[i] = r1; 219807171c5SHong Zhang c[am + i] = r2; 220807171c5SHong Zhang c[am2 + i] = r3; 221807171c5SHong Zhang c[am3 + i] = r4; 222807171c5SHong Zhang } 223807171c5SHong Zhang b1 += bm4; 224807171c5SHong Zhang b2 += bm4; 225807171c5SHong Zhang b3 += bm4; 226807171c5SHong Zhang b4 += bm4; 227807171c5SHong Zhang 228807171c5SHong Zhang /* RAB[:,col] = R*C[:,col] */ 229807171c5SHong Zhang colrm = col*rm; 230807171c5SHong Zhang for (i=0; i<rm; i++) { /* over rows of R in those columns */ 231807171c5SHong Zhang r1 = r2 = r3 = r4 = 0.0; 232807171c5SHong Zhang n = r->i[i+1] - r->i[i]; 233807171c5SHong Zhang rj = r->j + r->i[i]; 234807171c5SHong Zhang ra = r->a + r->i[i]; 235807171c5SHong Zhang for (j=0; j<n; j++) { 236807171c5SHong Zhang r1 += (*ra)*c[*rj]; 237807171c5SHong Zhang r2 += (*ra)*c2[*rj]; 238807171c5SHong Zhang r3 += (*ra)*c3[*rj]; 239807171c5SHong Zhang r4 += (*ra++)*c4[*rj++]; 240807171c5SHong Zhang } 241807171c5SHong Zhang d[colrm + i] = r1; 242807171c5SHong Zhang d[colrm + rm + i] = r2; 243807171c5SHong Zhang d[colrm + rm2 + i] = r3; 244807171c5SHong Zhang d[colrm + rm3 + i] = r4; 245807171c5SHong Zhang } 246807171c5SHong Zhang } 247807171c5SHong Zhang for (; col<cn; col++) { /* over extra columns of C */ 248807171c5SHong Zhang for (i=0; i<am; i++) { /* over rows of A in those columns */ 249807171c5SHong Zhang r1 = 0.0; 250807171c5SHong Zhang n = a->i[i+1] - a->i[i]; 251807171c5SHong Zhang aj = a->j + a->i[i]; 252807171c5SHong Zhang aa = a->a + a->i[i]; 253807171c5SHong Zhang for (j=0; j<n; j++) { 254807171c5SHong Zhang r1 += (*aa++)*b1[*aj++]; 255807171c5SHong Zhang } 256807171c5SHong Zhang c[i] = r1; 257807171c5SHong Zhang } 258807171c5SHong Zhang b1 += bm; 259807171c5SHong Zhang 260807171c5SHong Zhang for (i=0; i<rm; i++) { /* over rows of R in those columns */ 261807171c5SHong Zhang r1 = 0.0; 262807171c5SHong Zhang n = r->i[i+1] - r->i[i]; 263807171c5SHong Zhang rj = r->j + r->i[i]; 264807171c5SHong Zhang ra = r->a + r->i[i]; 265807171c5SHong Zhang for (j=0; j<n; j++) { 266807171c5SHong Zhang r1 += (*ra++)*c[*rj++]; 267807171c5SHong Zhang } 268807171c5SHong Zhang d[col*rm + i] = r1; 269807171c5SHong Zhang } 270807171c5SHong Zhang } 271807171c5SHong Zhang ierr = PetscLogFlops(cn*2.0*(a->nz + r->nz));CHKERRQ(ierr); 272807171c5SHong Zhang 2738c778c55SBarry Smith ierr = MatDenseRestoreArray(B,&b);CHKERRQ(ierr); 2748c778c55SBarry Smith ierr = MatDenseRestoreArray(RAB,&d);CHKERRQ(ierr); 275807171c5SHong Zhang ierr = MatAssemblyBegin(RAB,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 276807171c5SHong Zhang ierr = MatAssemblyEnd(RAB,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 277807171c5SHong Zhang PetscFunctionReturn(0); 278807171c5SHong Zhang } 279807171c5SHong Zhang 280807171c5SHong Zhang #undef __FUNCT__ 28155bea0ebSHong Zhang #define __FUNCT__ "MatRARtNumeric_SeqAIJ_SeqAIJ_colorrart" 28255bea0ebSHong Zhang PetscErrorCode MatRARtNumeric_SeqAIJ_SeqAIJ_colorrart(Mat A,Mat R,Mat C) 283807171c5SHong Zhang { 284807171c5SHong Zhang PetscErrorCode ierr; 285257c235dSHong Zhang Mat_SeqAIJ *c = (Mat_SeqAIJ*)C->data; 286257c235dSHong Zhang Mat_RARt *rart=c->rart; 287807171c5SHong Zhang MatTransposeColoring matcoloring; 2888d0a38e4SHong Zhang Mat Rt,RARt; 2893b1b9624SHong Zhang #if defined(RART_PROFILE) 290807171c5SHong Zhang PetscLogDouble Mult_sp_den=0.0,app1=0.0,app2=0.0,t0,tf; 2913b1b9624SHong Zhang #endif 292807171c5SHong Zhang 293807171c5SHong Zhang PetscFunctionBegin; 294807171c5SHong Zhang /* Get dense Rt by Apply MatTransposeColoring to R */ 295807171c5SHong Zhang matcoloring = rart->matcoloring; 296807171c5SHong Zhang Rt = rart->Rt; 2972205254eSKarl Rupp 2983b1b9624SHong Zhang #if defined(RART_PROFILE) 2998563dfccSBarry Smith ierr = PetscTime(&t0);CHKERRQ(ierr); 3003b1b9624SHong Zhang #endif 301807171c5SHong Zhang ierr = MatTransColoringApplySpToDen(matcoloring,R,Rt);CHKERRQ(ierr); 3023b1b9624SHong Zhang #if defined(RART_PROFILE) 3038563dfccSBarry Smith ierr = PetscTime(&tf);CHKERRQ(ierr); 304807171c5SHong Zhang app1 += tf - t0; 3053b1b9624SHong Zhang #endif 306807171c5SHong Zhang 30750647e95SHong Zhang /* Get dense RARt = R*A*Rt -- dominates! */ 3083b1b9624SHong Zhang #if defined(RART_PROFILE) 3098563dfccSBarry Smith ierr = PetscTime(&t0);CHKERRQ(ierr); 3103b1b9624SHong Zhang #endif 311807171c5SHong Zhang RARt = rart->RARt; 312807171c5SHong Zhang ierr = MatMatMatMultNumeric_SeqAIJ_SeqAIJ_SeqDense(R,A,Rt,RARt,rart->work);CHKERRQ(ierr); 3133b1b9624SHong Zhang #if defined(RART_PROFILE) 3148563dfccSBarry Smith ierr = PetscTime(&tf);CHKERRQ(ierr); 315807171c5SHong Zhang Mult_sp_den += tf - t0; 3163b1b9624SHong Zhang #endif 317807171c5SHong Zhang 318807171c5SHong Zhang /* Recover C from C_dense */ 3193b1b9624SHong Zhang #if defined(RART_PROFILE) 3208563dfccSBarry Smith ierr = PetscTime(&t0);CHKERRQ(ierr); 3213b1b9624SHong Zhang #endif 322807171c5SHong Zhang ierr = MatTransColoringApplyDenToSp(matcoloring,RARt,C);CHKERRQ(ierr); 3233b1b9624SHong Zhang #if defined(RART_PROFILE) 3248563dfccSBarry Smith ierr = PetscTime(&tf);CHKERRQ(ierr); 325807171c5SHong Zhang app2 += tf - t0; 3263b1b9624SHong Zhang #endif 327807171c5SHong Zhang 3283b1b9624SHong Zhang #if defined(RART_PROFILE) 3293b1b9624SHong Zhang printf("MatRARtNumeric_SeqAIJ_SeqAIJ: ColorApp %g + %g + Mult_sp_den %g = %g\n",app1,app2,Mult_sp_den,app1+app2+Mult_sp_den);CHKERRQ(ierr); 330807171c5SHong Zhang #endif 331807171c5SHong Zhang PetscFunctionReturn(0); 332807171c5SHong Zhang } 333807171c5SHong Zhang 334807171c5SHong Zhang #undef __FUNCT__ 33555bea0ebSHong Zhang #define __FUNCT__ "MatRARtSymbolic_SeqAIJ_SeqAIJ_matmattransposemult" 33655bea0ebSHong Zhang PetscErrorCode MatRARtSymbolic_SeqAIJ_SeqAIJ_matmattransposemult(Mat A,Mat R,PetscReal fill,Mat *C) 337807171c5SHong Zhang { 338807171c5SHong Zhang PetscErrorCode ierr; 33955bea0ebSHong Zhang Mat ARt,RARt; 3404fa85fdeSHong Zhang Mat_SeqAIJ *c; 3414fa85fdeSHong Zhang Mat_RARt *rart; 3424fa85fdeSHong Zhang 34395a72cc5SHong Zhang PetscFunctionBegin; 344b3b4fc5aSHong Zhang /* must use '-mat_no_inode' with '-matmattransmult_color 1' - do not knwo why? */ 34536104f73SHong Zhang ierr = MatMatTransposeMultSymbolic_SeqAIJ_SeqAIJ(A,R,fill,&ARt);CHKERRQ(ierr); 34636104f73SHong Zhang ierr = MatMatMultSymbolic_SeqAIJ_SeqAIJ(R,ARt,fill,&RARt);CHKERRQ(ierr); 3473b1b9624SHong Zhang *C = RARt; 34855bea0ebSHong Zhang RARt->ops->rartnumeric = MatRARtNumeric_SeqAIJ_SeqAIJ_matmattransposemult; 34955bea0ebSHong Zhang 3503b1b9624SHong Zhang ierr = PetscNew(Mat_RARt,&rart);CHKERRQ(ierr); 3513b1b9624SHong Zhang c = (Mat_SeqAIJ*)(*C)->data; 3523b1b9624SHong Zhang c->rart = rart; 3533b1b9624SHong Zhang rart->ARt = ARt; 3543b1b9624SHong Zhang rart->destroy = RARt->ops->destroy; 3553b1b9624SHong Zhang RARt->ops->destroy = MatDestroy_SeqAIJ_RARt; 35655bea0ebSHong Zhang PetscFunctionReturn(0); 35736104f73SHong Zhang } 35855bea0ebSHong Zhang 35955bea0ebSHong Zhang #undef __FUNCT__ 36055bea0ebSHong Zhang #define __FUNCT__ "MatRARtNumeric_SeqAIJ_SeqAIJ_matmattransposemult" 36155bea0ebSHong Zhang PetscErrorCode MatRARtNumeric_SeqAIJ_SeqAIJ_matmattransposemult(Mat A,Mat R,Mat C) 36255bea0ebSHong Zhang { 36355bea0ebSHong Zhang PetscErrorCode ierr; 36455bea0ebSHong Zhang Mat_SeqAIJ *c=(Mat_SeqAIJ*)C->data; 36555bea0ebSHong Zhang Mat_RARt *rart=c->rart; 36655bea0ebSHong Zhang Mat ARt=rart->ARt; 36755bea0ebSHong Zhang #if defined(RART_PROFILE) 36855bea0ebSHong Zhang PetscLogDouble t0,t1,t2; 36955bea0ebSHong Zhang #endif 37055bea0ebSHong Zhang 37155bea0ebSHong Zhang PetscFunctionBegin; 3723b1b9624SHong Zhang #if defined(RART_PROFILE) 37326be7272SHong Zhang ierr = PetscTime(&t0);CHKERRQ(ierr); 3743b1b9624SHong Zhang #endif 375b3b4fc5aSHong Zhang ierr = MatMatTransposeMultNumeric_SeqAIJ_SeqAIJ(A,R,ARt);CHKERRQ(ierr); /* dominate! */ 3763b1b9624SHong Zhang #if defined(RART_PROFILE) 37726be7272SHong Zhang ierr = PetscTime(&t1);CHKERRQ(ierr); 3783b1b9624SHong Zhang #endif 37955bea0ebSHong Zhang ierr = MatMatMultNumeric_SeqAIJ_SeqAIJ(R,ARt,C);CHKERRQ(ierr); 3803b1b9624SHong Zhang #if defined(RART_PROFILE) 38126be7272SHong Zhang ierr = PetscTime(&t2);CHKERRQ(ierr); 382b3b4fc5aSHong Zhang printf(" matrart_color_art_num = %g + %g = %g\n",t1-t0,t2-t1,t2-t0); 3833b1b9624SHong Zhang #endif 38455bea0ebSHong Zhang 38595a72cc5SHong Zhang #if defined(PETSC_USE_INFO) 38655bea0ebSHong Zhang ierr = PetscInfo(C,"Use ARt=A*R^T, C=R*ARt via MatMatTransposeMult(). Coloring can be applied to A*R^T.\n");CHKERRQ(ierr); 38795a72cc5SHong Zhang #endif 38855bea0ebSHong Zhang PetscFunctionReturn(0); 389807171c5SHong Zhang } 39055bea0ebSHong Zhang 39155bea0ebSHong Zhang #undef __FUNCT__ 39255bea0ebSHong Zhang #define __FUNCT__ "MatRARtSymbolic_SeqAIJ_SeqAIJ" 39355bea0ebSHong Zhang PetscErrorCode MatRARtSymbolic_SeqAIJ_SeqAIJ(Mat A,Mat R,PetscReal fill,Mat *C) 39455bea0ebSHong Zhang { 39555bea0ebSHong Zhang PetscErrorCode ierr; 39695a72cc5SHong Zhang Mat Rt; 39755bea0ebSHong Zhang Mat_SeqAIJ *c; 39855bea0ebSHong Zhang Mat_RARt *rart; 39955bea0ebSHong Zhang 40055bea0ebSHong Zhang PetscFunctionBegin; 4015008f5a7SHong Zhang ierr = MatTranspose_SeqAIJ(R,MAT_INITIAL_MATRIX,&Rt);CHKERRQ(ierr); 4025008f5a7SHong Zhang ierr = MatMatMatMultSymbolic_SeqAIJ_SeqAIJ_SeqAIJ(R,A,Rt,fill,C);CHKERRQ(ierr); 40395a72cc5SHong Zhang 40495a72cc5SHong Zhang ierr = PetscNew(Mat_RARt,&rart);CHKERRQ(ierr); 40595a72cc5SHong Zhang rart->Rt = Rt; 40695a72cc5SHong Zhang c = (Mat_SeqAIJ*)(*C)->data; 40795a72cc5SHong Zhang c->rart = rart; 40895a72cc5SHong Zhang rart->destroy = (*C)->ops->destroy; 40995a72cc5SHong Zhang (*C)->ops->destroy = MatDestroy_SeqAIJ_RARt; 41055bea0ebSHong Zhang (*C)->ops->rartnumeric = MatRARtNumeric_SeqAIJ_SeqAIJ; 41155bea0ebSHong Zhang PetscFunctionReturn(0); 41255bea0ebSHong Zhang } 41355bea0ebSHong Zhang 41455bea0ebSHong Zhang #undef __FUNCT__ 41555bea0ebSHong Zhang #define __FUNCT__ "MatRARtNumeric_SeqAIJ_SeqAIJ" 41655bea0ebSHong Zhang PetscErrorCode MatRARtNumeric_SeqAIJ_SeqAIJ(Mat A,Mat R,Mat C) 41755bea0ebSHong Zhang { 41855bea0ebSHong Zhang PetscErrorCode ierr; 41955bea0ebSHong Zhang Mat_SeqAIJ *c = (Mat_SeqAIJ*)C->data; 42055bea0ebSHong Zhang Mat_RARt *rart = c->rart; 42155bea0ebSHong Zhang Mat Rt = rart->Rt; 42255bea0ebSHong Zhang 42355bea0ebSHong Zhang PetscFunctionBegin; 42455bea0ebSHong Zhang ierr = MatTranspose_SeqAIJ(R,MAT_REUSE_MATRIX,&Rt);CHKERRQ(ierr); 42555bea0ebSHong Zhang ierr = MatMatMatMultNumeric_SeqAIJ_SeqAIJ_SeqAIJ(R,A,Rt,C);CHKERRQ(ierr); 42655bea0ebSHong Zhang #if defined(PETSC_USE_INFO) 42755bea0ebSHong Zhang ierr = PetscInfo(C,"Use Rt=R^T and C=R*A*Rt via MatMatMatMult() to avoid sparse inner products\n");CHKERRQ(ierr); 42855bea0ebSHong Zhang #endif 42955bea0ebSHong Zhang PetscFunctionReturn(0); 43055bea0ebSHong Zhang } 43155bea0ebSHong Zhang 43255bea0ebSHong Zhang #undef __FUNCT__ 43355bea0ebSHong Zhang #define __FUNCT__ "MatRARt_SeqAIJ_SeqAIJ" 43455bea0ebSHong Zhang PetscErrorCode MatRARt_SeqAIJ_SeqAIJ(Mat A,Mat R,MatReuse scall,PetscReal fill,Mat *C) 43555bea0ebSHong Zhang { 43655bea0ebSHong Zhang PetscErrorCode ierr; 43755bea0ebSHong Zhang const char *algTypes[3] = {"matmatmatmult","matmattransposemult","coloring_rart"}; 43855bea0ebSHong Zhang PetscInt alg=0; /* set default algorithm */ 43955bea0ebSHong Zhang 44055bea0ebSHong Zhang PetscFunctionBegin; 44155bea0ebSHong Zhang if (scall == MAT_INITIAL_MATRIX) { 44255bea0ebSHong Zhang /* ierr = PetscObjectOptionsBegin((PetscObject)A);CHKERRQ(ierr); -- prefix ? */ 44355bea0ebSHong Zhang ierr = PetscOptionsEList("-matrart_via","Algorithmic approach","MatRARt",algTypes,3,algTypes[0],&alg,NULL);CHKERRQ(ierr); 44455bea0ebSHong Zhang /* ierr = PetscOptionsEnd();CHKERRQ(ierr); */ 44555bea0ebSHong Zhang ierr = PetscLogEventBegin(MAT_RARtSymbolic,A,R,0,0);CHKERRQ(ierr); 44655bea0ebSHong Zhang switch (alg) { 44755bea0ebSHong Zhang case 1: 44855bea0ebSHong Zhang /* via matmattransposemult: ARt=A*R^T, C=R*ARt - matrix coloring can be applied to A*R^T */ 44955bea0ebSHong Zhang ierr = MatRARtSymbolic_SeqAIJ_SeqAIJ_matmattransposemult(A,R,fill,C);CHKERRQ(ierr); 45055bea0ebSHong Zhang break; 45155bea0ebSHong Zhang case 2: 45255bea0ebSHong Zhang /* via coloring_rart: apply coloring C = R*A*R^T */ 45355bea0ebSHong Zhang ierr = MatRARtSymbolic_SeqAIJ_SeqAIJ_colorrart(A,R,fill,C);CHKERRQ(ierr); 45455bea0ebSHong Zhang break; 45555bea0ebSHong Zhang default: 45655bea0ebSHong Zhang /* via matmatmatmult: Rt=R^T, C=R*A*Rt - avoid inefficient sparse inner products */ 45755bea0ebSHong Zhang ierr = MatRARtSymbolic_SeqAIJ_SeqAIJ(A,R,fill,C);CHKERRQ(ierr); 45855bea0ebSHong Zhang break; 45955bea0ebSHong Zhang } 4605008f5a7SHong Zhang ierr = PetscLogEventEnd(MAT_RARtSymbolic,A,R,0,0);CHKERRQ(ierr); 4615008f5a7SHong Zhang } 4625008f5a7SHong Zhang 4635008f5a7SHong Zhang ierr = PetscLogEventBegin(MAT_RARtNumeric,A,R,0,0);CHKERRQ(ierr); 46455bea0ebSHong Zhang ierr = (*(*C)->ops->rartnumeric)(A,R,*C);CHKERRQ(ierr); 4655008f5a7SHong Zhang ierr = PetscLogEventEnd(MAT_RARtNumeric,A,R,0,0);CHKERRQ(ierr); 466807171c5SHong Zhang PetscFunctionReturn(0); 467807171c5SHong Zhang } 468