1 2 #include <../src/mat/impls/aij/seq/aij.h> 3 #include <../src/mat/utils/freespace.h> 4 #include <../src/mat/impls/dense/seq/dense.h> /*I "petscmat.h" I*/ 5 #include <petsctime.h> 6 7 /* 8 Defines projective product routines where A is a SeqAIJ matrix 9 C = R * A * R^T 10 */ 11 12 #undef __FUNCT__ 13 #define __FUNCT__ "MatDestroy_SeqAIJ_RARt" 14 PetscErrorCode MatDestroy_SeqAIJ_RARt(Mat A) 15 { 16 PetscErrorCode ierr; 17 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 18 Mat_RARt *rart = a->rart; 19 20 PetscFunctionBegin; 21 ierr = MatTransposeColoringDestroy(&rart->matcoloring);CHKERRQ(ierr); 22 ierr = MatDestroy(&rart->Rt);CHKERRQ(ierr); 23 ierr = MatDestroy(&rart->RARt);CHKERRQ(ierr); 24 ierr = PetscFree(rart->work);CHKERRQ(ierr); 25 26 A->ops->destroy = rart->destroy; 27 if (A->ops->destroy) { 28 ierr = (*A->ops->destroy)(A);CHKERRQ(ierr); 29 } 30 ierr = PetscFree(rart);CHKERRQ(ierr); 31 PetscFunctionReturn(0); 32 } 33 34 #undef __FUNCT__ 35 #define __FUNCT__ "MatRARtSymbolic_SeqAIJ_SeqAIJ" 36 PetscErrorCode MatRARtSymbolic_SeqAIJ_SeqAIJ(Mat A,Mat R,PetscReal fill,Mat *C) 37 { 38 PetscErrorCode ierr; 39 Mat P; 40 PetscInt *rti,*rtj; 41 Mat_RARt *rart; 42 MatTransposeColoring matcoloring; 43 ISColoring iscoloring; 44 Mat Rt_dense,RARt_dense; 45 PetscLogDouble GColor=0.0,MCCreate=0.0,MDenCreate=0.0,t0,tf,etime=0.0; 46 Mat_SeqAIJ *c; 47 48 PetscFunctionBegin; 49 ierr = PetscTime(&t0);CHKERRQ(ierr); 50 /* create symbolic P=Rt */ 51 ierr = MatGetSymbolicTranspose_SeqAIJ(R,&rti,&rtj);CHKERRQ(ierr); 52 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,R->cmap->n,R->rmap->n,rti,rtj,NULL,&P);CHKERRQ(ierr); 53 54 /* get symbolic C=Pt*A*P */ 55 ierr = MatPtAPSymbolic_SeqAIJ_SeqAIJ(A,P,fill,C);CHKERRQ(ierr); 56 (*C)->rmap->bs = R->rmap->bs; 57 (*C)->cmap->bs = R->rmap->bs; 58 59 /* create a supporting struct */ 60 ierr = PetscNew(Mat_RARt,&rart);CHKERRQ(ierr); 61 c = (Mat_SeqAIJ*)(*C)->data; 62 c->rart = rart; 63 ierr = PetscTime(&tf);CHKERRQ(ierr); 64 etime += tf - t0; 65 66 /* ------ Use coloring ---------- */ 67 /* Create MatTransposeColoring from symbolic C=R*A*R^T */ 68 ierr = PetscTime(&t0);CHKERRQ(ierr); 69 ierr = MatGetColoring(*C,MATCOLORINGLF,&iscoloring);CHKERRQ(ierr); 70 ierr = PetscTime(&tf);CHKERRQ(ierr); 71 GColor += tf - t0; 72 73 ierr = PetscTime(&t0);CHKERRQ(ierr); 74 ierr = MatTransposeColoringCreate(*C,iscoloring,&matcoloring);CHKERRQ(ierr); 75 76 rart->matcoloring = matcoloring; 77 78 ierr = ISColoringDestroy(&iscoloring);CHKERRQ(ierr); 79 ierr = PetscTime(&tf);CHKERRQ(ierr); 80 MCCreate += tf - t0; 81 82 ierr = PetscTime(&t0);CHKERRQ(ierr); 83 /* Create Rt_dense */ 84 ierr = MatCreate(PETSC_COMM_SELF,&Rt_dense);CHKERRQ(ierr); 85 ierr = MatSetSizes(Rt_dense,A->cmap->n,matcoloring->ncolors,A->cmap->n,matcoloring->ncolors);CHKERRQ(ierr); 86 ierr = MatSetType(Rt_dense,MATSEQDENSE);CHKERRQ(ierr); 87 ierr = MatSeqDenseSetPreallocation(Rt_dense,NULL);CHKERRQ(ierr); 88 89 Rt_dense->assembled = PETSC_TRUE; 90 rart->Rt = Rt_dense; 91 92 /* Create RARt_dense = R*A*Rt_dense */ 93 ierr = MatCreate(PETSC_COMM_SELF,&RARt_dense);CHKERRQ(ierr); 94 ierr = MatSetSizes(RARt_dense,(*C)->rmap->n,matcoloring->ncolors,(*C)->rmap->n,matcoloring->ncolors);CHKERRQ(ierr); 95 ierr = MatSetType(RARt_dense,MATSEQDENSE);CHKERRQ(ierr); 96 ierr = MatSeqDenseSetPreallocation(RARt_dense,NULL);CHKERRQ(ierr); 97 98 rart->RARt = RARt_dense; 99 100 /* Allocate work array to store columns of A*R^T used in MatMatMatMultNumeric_SeqAIJ_SeqAIJ_SeqDense() */ 101 ierr = PetscMalloc(A->rmap->n*4*sizeof(PetscScalar),&rart->work);CHKERRQ(ierr); 102 103 ierr = PetscTime(&tf);CHKERRQ(ierr); 104 MDenCreate += tf - t0; 105 106 rart->destroy = (*C)->ops->destroy; 107 (*C)->ops->destroy = MatDestroy_SeqAIJ_RARt; 108 109 /* clean up */ 110 ierr = MatRestoreSymbolicTranspose_SeqAIJ(R,&rti,&rtj);CHKERRQ(ierr); 111 ierr = MatDestroy(&P);CHKERRQ(ierr); 112 113 #if defined(PETSC_USE_INFO) 114 { 115 PetscReal density= (PetscReal)(c->nz)/(RARt_dense->rmap->n*RARt_dense->cmap->n); 116 ierr = PetscInfo6(*C,"RARt_den %D %D; Rt_den %D %D, (RARt->nz %D)/(m*ncolors)=%g\n",RARt_dense->rmap->n,RARt_dense->cmap->n,Rt_dense->rmap->n,Rt_dense->cmap->n,c->nz,density);CHKERRQ(ierr); 117 ierr = PetscInfo5(*C,"Sym = GetColor %g + MColorCreate %g + MDenCreate %g + other %g = %g\n",GColor,MCCreate,MDenCreate,etime,GColor+MCCreate+MDenCreate+etime);CHKERRQ(ierr); 118 } 119 #endif 120 PetscFunctionReturn(0); 121 } 122 123 /* 124 RAB = R * A * B, R and A in seqaij format, B in dense format; 125 */ 126 #undef __FUNCT__ 127 #define __FUNCT__ "MatMatMatMultNumeric_SeqAIJ_SeqAIJ_SeqDense" 128 PetscErrorCode MatMatMatMultNumeric_SeqAIJ_SeqAIJ_SeqDense(Mat R,Mat A,Mat B,Mat RAB,PetscScalar *work) 129 { 130 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*r=(Mat_SeqAIJ*)R->data; 131 PetscErrorCode ierr; 132 PetscScalar *b,r1,r2,r3,r4,*b1,*b2,*b3,*b4; 133 MatScalar *aa,*ra; 134 PetscInt cn =B->cmap->n,bm=B->rmap->n,col,i,j,n,*ai=a->i,*aj,am=A->rmap->n; 135 PetscInt am2=2*am,am3=3*am,bm4=4*bm; 136 PetscScalar *d,*c,*c2,*c3,*c4; 137 PetscInt *rj,rm=R->rmap->n,dm=RAB->rmap->n,dn=RAB->cmap->n; 138 PetscInt rm2=2*rm,rm3=3*rm,colrm; 139 PetscLogDouble t0,tf,Mult_sp_den1=0.0,Mult_sp_den2=0.0; 140 141 PetscFunctionBegin; 142 if (!dm || !dn) PetscFunctionReturn(0); 143 if (bm != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Number columns in A %D not equal rows in B %D\n",A->cmap->n,bm); 144 if (am != R->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Number columns in R %D not equal rows in A %D\n",R->cmap->n,am); 145 if (R->rmap->n != RAB->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Number rows in RAB %D not equal rows in R %D\n",RAB->rmap->n,R->rmap->n); 146 if (B->cmap->n != RAB->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Number columns in RAB %D not equal columns in B %D\n",RAB->cmap->n,B->cmap->n); 147 148 { /* 149 This approach is not as good as original ones (will be removed later), but it reveals that 150 AB_den=A*B takes almost all execution time in R*A*B for src/ksp/ksp/examples/tutorials/ex56.c 151 */ 152 PetscBool via_matmatmult=PETSC_FALSE; 153 ierr = PetscOptionsGetBool(NULL,"-matrart_via_matmatmult",&via_matmatmult,NULL);CHKERRQ(ierr); 154 if (via_matmatmult) { 155 Mat AB_den; 156 ierr = MatMatMultSymbolic_SeqAIJ_SeqDense(A,B,0.0,&AB_den);CHKERRQ(ierr); 157 158 ierr = PetscTime(&t0);CHKERRQ(ierr); 159 ierr = MatMatMultNumeric_SeqAIJ_SeqDense(A,B,AB_den);CHKERRQ(ierr); 160 ierr = PetscTime(&tf);CHKERRQ(ierr); 161 Mult_sp_den1 += tf - t0; 162 ierr = PetscTime(&t0);CHKERRQ(ierr); 163 ierr = MatMatMultNumeric_SeqAIJ_SeqDense(R,AB_den,RAB);CHKERRQ(ierr); 164 ierr = PetscTime(&tf);CHKERRQ(ierr); 165 Mult_sp_den2 += tf - t0; 166 printf(" MatMatMatMultNumeric_SeqAIJ_SeqAIJ_SeqDense...time = %g + %g = %g; ncolor: %d\n",Mult_sp_den1, Mult_sp_den2, Mult_sp_den1+ Mult_sp_den2,B->cmap->n); 167 ierr = MatDestroy(&AB_den);CHKERRQ(ierr); 168 PetscFunctionReturn(0); 169 } 170 } 171 172 ierr = MatDenseGetArray(B,&b);CHKERRQ(ierr); 173 ierr = MatDenseGetArray(RAB,&d);CHKERRQ(ierr); 174 b1 = b; b2 = b1 + bm; b3 = b2 + bm; b4 = b3 + bm; 175 c = work; c2 = c + am; c3 = c2 + am; c4 = c3 + am; 176 for (col=0; col<cn-4; col += 4) { /* over columns of C */ 177 for (i=0; i<am; i++) { /* over rows of A in those columns */ 178 r1 = r2 = r3 = r4 = 0.0; 179 n = ai[i+1] - ai[i]; 180 aj = a->j + ai[i]; 181 aa = a->a + ai[i]; 182 for (j=0; j<n; j++) { 183 r1 += (*aa)*b1[*aj]; 184 r2 += (*aa)*b2[*aj]; 185 r3 += (*aa)*b3[*aj]; 186 r4 += (*aa++)*b4[*aj++]; 187 } 188 c[i] = r1; 189 c[am + i] = r2; 190 c[am2 + i] = r3; 191 c[am3 + i] = r4; 192 } 193 b1 += bm4; 194 b2 += bm4; 195 b3 += bm4; 196 b4 += bm4; 197 198 /* RAB[:,col] = R*C[:,col] */ 199 colrm = col*rm; 200 for (i=0; i<rm; i++) { /* over rows of R in those columns */ 201 r1 = r2 = r3 = r4 = 0.0; 202 n = r->i[i+1] - r->i[i]; 203 rj = r->j + r->i[i]; 204 ra = r->a + r->i[i]; 205 for (j=0; j<n; j++) { 206 r1 += (*ra)*c[*rj]; 207 r2 += (*ra)*c2[*rj]; 208 r3 += (*ra)*c3[*rj]; 209 r4 += (*ra++)*c4[*rj++]; 210 } 211 d[colrm + i] = r1; 212 d[colrm + rm + i] = r2; 213 d[colrm + rm2 + i] = r3; 214 d[colrm + rm3 + i] = r4; 215 } 216 } 217 for (; col<cn; col++) { /* over extra columns of C */ 218 for (i=0; i<am; i++) { /* over rows of A in those columns */ 219 r1 = 0.0; 220 n = a->i[i+1] - a->i[i]; 221 aj = a->j + a->i[i]; 222 aa = a->a + a->i[i]; 223 for (j=0; j<n; j++) { 224 r1 += (*aa++)*b1[*aj++]; 225 } 226 c[i] = r1; 227 } 228 b1 += bm; 229 230 for (i=0; i<rm; i++) { /* over rows of R in those columns */ 231 r1 = 0.0; 232 n = r->i[i+1] - r->i[i]; 233 rj = r->j + r->i[i]; 234 ra = r->a + r->i[i]; 235 for (j=0; j<n; j++) { 236 r1 += (*ra++)*c[*rj++]; 237 } 238 d[col*rm + i] = r1; 239 } 240 } 241 ierr = PetscLogFlops(cn*2.0*(a->nz + r->nz));CHKERRQ(ierr); 242 243 ierr = MatDenseRestoreArray(B,&b);CHKERRQ(ierr); 244 ierr = MatDenseRestoreArray(RAB,&d);CHKERRQ(ierr); 245 ierr = MatAssemblyBegin(RAB,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 246 ierr = MatAssemblyEnd(RAB,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 247 PetscFunctionReturn(0); 248 } 249 250 #undef __FUNCT__ 251 #define __FUNCT__ "MatRARtNumeric_SeqAIJ_SeqAIJ" 252 PetscErrorCode MatRARtNumeric_SeqAIJ_SeqAIJ(Mat A,Mat R,Mat C) 253 { 254 PetscErrorCode ierr; 255 Mat_SeqAIJ *c = (Mat_SeqAIJ*)C->data; 256 Mat_RARt *rart=c->rart; 257 MatTransposeColoring matcoloring; 258 Mat Rt,RARt; 259 PetscLogDouble Mult_sp_den=0.0,app1=0.0,app2=0.0,t0,tf; 260 261 PetscFunctionBegin; 262 /* Get dense Rt by Apply MatTransposeColoring to R */ 263 matcoloring = rart->matcoloring; 264 Rt = rart->Rt; 265 266 ierr = PetscTime(&t0);CHKERRQ(ierr); 267 ierr = MatTransColoringApplySpToDen(matcoloring,R,Rt);CHKERRQ(ierr); 268 ierr = PetscTime(&tf);CHKERRQ(ierr); 269 app1 += tf - t0; 270 271 /* Get dense RARt = R*A*Rt -- dominates! */ 272 ierr = PetscTime(&t0);CHKERRQ(ierr); 273 RARt = rart->RARt; 274 ierr = MatMatMatMultNumeric_SeqAIJ_SeqAIJ_SeqDense(R,A,Rt,RARt,rart->work);CHKERRQ(ierr); 275 ierr = PetscTime(&tf);CHKERRQ(ierr); 276 277 Mult_sp_den += tf - t0; 278 279 /* Recover C from C_dense */ 280 ierr = PetscTime(&t0);CHKERRQ(ierr); 281 ierr = MatTransColoringApplyDenToSp(matcoloring,RARt,C);CHKERRQ(ierr); 282 ierr = PetscTime(&tf);CHKERRQ(ierr); 283 284 app2 += tf - t0; 285 286 #if defined(PETSC_USE_INFO) 287 ierr = PetscInfo4(C,"Num = ColorApp %g + %g + Mult_sp_den %g = %g\n",app1,app2,Mult_sp_den,app1+app2+Mult_sp_den);CHKERRQ(ierr); 288 #endif 289 PetscFunctionReturn(0); 290 } 291 292 #undef __FUNCT__ 293 #define __FUNCT__ "MatRARt_SeqAIJ_SeqAIJ" 294 PetscErrorCode MatRARt_SeqAIJ_SeqAIJ(Mat A,Mat R,MatReuse scall,PetscReal fill,Mat *C) 295 { 296 PetscErrorCode ierr; 297 PetscBool usecoloring = PETSC_FALSE,color_art; 298 299 PetscFunctionBegin; 300 ierr = PetscOptionsGetBool(NULL,"-matrart_color",&usecoloring,NULL);CHKERRQ(ierr); 301 302 if (!usecoloring) { /* Rt=R^T, C=R*A*Rt - avoid inefficient sparse inner products */ 303 Mat Rt; 304 Mat_SeqAIJ *c; 305 Mat_RARt *rart; 306 307 if (scall == MAT_INITIAL_MATRIX) { 308 ierr = MatTranspose(R,MAT_INITIAL_MATRIX,&Rt);CHKERRQ(ierr); 309 ierr = MatMatMatMult(R,A,Rt,scall,fill,C);CHKERRQ(ierr); 310 311 ierr = PetscNew(Mat_RARt,&rart);CHKERRQ(ierr); 312 rart->Rt = Rt; 313 c = (Mat_SeqAIJ*)(*C)->data; 314 c->rart = rart; 315 rart->destroy = (*C)->ops->destroy; 316 (*C)->ops->destroy = MatDestroy_SeqAIJ_RARt; 317 } else { 318 c = (Mat_SeqAIJ*)(*C)->data; 319 rart = c->rart; 320 Rt = rart->Rt; 321 ierr = MatTranspose(R,MAT_REUSE_MATRIX,&Rt);CHKERRQ(ierr); 322 ierr = MatMatMatMult(R,A,Rt,scall,fill,C);CHKERRQ(ierr); 323 } 324 PetscFunctionReturn(0); 325 } 326 327 /* use coloring */ 328 /*--------------*/ 329 ierr = PetscOptionsGetBool(NULL,"-matrart_color_art",&color_art,NULL);CHKERRQ(ierr); 330 if (color_art) { /* apply coloring to A*R^T, not C = R*A*R^T */ 331 Mat ARt,RARt; 332 PetscLogDouble t0,t1,t2; 333 printf(" MatRARt_color_art ....\n"); 334 if (scall == MAT_INITIAL_MATRIX) { 335 ierr = PetscLogEventBegin(MAT_RARtSymbolic,A,R,0,0);CHKERRQ(ierr); 336 /* must use '-mat_no_inode' with '-matmattransmult_color 1' - do not knwo why? */ 337 ierr = MatMatTransposeMultSymbolic_SeqAIJ_SeqAIJ(A,R,fill,&ARt);CHKERRQ(ierr); 338 ierr = MatMatMultSymbolic_SeqAIJ_SeqAIJ(R,ARt,fill,&RARt);CHKERRQ(ierr); 339 ierr = PetscLogEventEnd(MAT_RARtSymbolic,A,R,0,0);CHKERRQ(ierr); 340 } 341 ierr = PetscLogEventBegin(MAT_RARtNumeric,A,R,0,0);CHKERRQ(ierr); 342 ierr = PetscTime(&t0);CHKERRQ(ierr); 343 ierr = MatMatTransposeMultNumeric_SeqAIJ_SeqAIJ(A,R,ARt);CHKERRQ(ierr); /* dominate! */ 344 ierr = PetscTime(&t1);CHKERRQ(ierr); 345 ierr = MatMatMultNumeric_SeqAIJ_SeqAIJ(R,ARt,RARt);CHKERRQ(ierr); 346 ierr = PetscTime(&t2);CHKERRQ(ierr); 347 printf(" matrart_color_art_num = %g + %g = %g\n",t1-t0,t2-t1,t2-t0); 348 *C = RARt; 349 ierr = PetscLogEventEnd(MAT_RARtNumeric,A,R,0,0);CHKERRQ(ierr); 350 351 ierr = MatDestroy(&ARt);CHKERRQ(ierr); 352 PetscFunctionReturn(0); 353 } 354 355 if (scall == MAT_INITIAL_MATRIX) { /* apply coloring to C=R*A*R^T */ 356 ierr = PetscLogEventBegin(MAT_RARtSymbolic,A,R,0,0);CHKERRQ(ierr); 357 ierr = MatRARtSymbolic_SeqAIJ_SeqAIJ(A,R,fill,C);CHKERRQ(ierr); 358 ierr = PetscLogEventEnd(MAT_RARtSymbolic,A,R,0,0);CHKERRQ(ierr); 359 } 360 ierr = PetscLogEventBegin(MAT_RARtNumeric,A,R,0,0);CHKERRQ(ierr); 361 ierr = MatRARtNumeric_SeqAIJ_SeqAIJ(A,R,*C);CHKERRQ(ierr); 362 ierr = PetscLogEventEnd(MAT_RARtNumeric,A,R,0,0);CHKERRQ(ierr); 363 PetscFunctionReturn(0); 364 } 365