1 2 /* 3 Defines projective product routines where A is a SeqAIJ matrix 4 C = R * A * R^T 5 */ 6 7 #include <../src/mat/impls/aij/seq/aij.h> 8 #include <../src/mat/utils/freespace.h> 9 #include <../src/mat/impls/dense/seq/dense.h> /*I "petscmat.h" I*/ 10 11 PetscErrorCode MatDestroy_SeqAIJ_RARt(void *data) 12 { 13 Mat_RARt *rart = (Mat_RARt*)data; 14 15 PetscFunctionBegin; 16 PetscCall(MatTransposeColoringDestroy(&rart->matcoloring)); 17 PetscCall(MatDestroy(&rart->Rt)); 18 PetscCall(MatDestroy(&rart->RARt)); 19 PetscCall(MatDestroy(&rart->ARt)); 20 PetscCall(PetscFree(rart->work)); 21 if (rart->destroy) PetscCall((*rart->destroy)(rart->data)); 22 PetscCall(PetscFree(rart)); 23 PetscFunctionReturn(0); 24 } 25 26 PetscErrorCode MatRARtSymbolic_SeqAIJ_SeqAIJ_colorrart(Mat A,Mat R,PetscReal fill,Mat C) 27 { 28 Mat P; 29 Mat_RARt *rart; 30 MatColoring coloring; 31 MatTransposeColoring matcoloring; 32 ISColoring iscoloring; 33 Mat Rt_dense,RARt_dense; 34 35 PetscFunctionBegin; 36 MatCheckProduct(C,4); 37 PetscCheck(!C->product->data,PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data not empty"); 38 /* create symbolic P=Rt */ 39 PetscCall(MatTransposeSymbolic(R,&P)); 40 41 /* get symbolic C=Pt*A*P */ 42 PetscCall(MatPtAPSymbolic_SeqAIJ_SeqAIJ_SparseAxpy(A,P,fill,C)); 43 PetscCall(MatSetBlockSizes(C,PetscAbs(R->rmap->bs),PetscAbs(R->rmap->bs))); 44 C->ops->rartnumeric = MatRARtNumeric_SeqAIJ_SeqAIJ_colorrart; 45 46 /* create a supporting struct */ 47 PetscCall(PetscNew(&rart)); 48 C->product->data = rart; 49 C->product->destroy = MatDestroy_SeqAIJ_RARt; 50 51 /* ------ Use coloring ---------- */ 52 /* inode causes memory problem */ 53 PetscCall(MatSetOption(C,MAT_USE_INODES,PETSC_FALSE)); 54 55 /* Create MatTransposeColoring from symbolic C=R*A*R^T */ 56 PetscCall(MatColoringCreate(C,&coloring)); 57 PetscCall(MatColoringSetDistance(coloring,2)); 58 PetscCall(MatColoringSetType(coloring,MATCOLORINGSL)); 59 PetscCall(MatColoringSetFromOptions(coloring)); 60 PetscCall(MatColoringApply(coloring,&iscoloring)); 61 PetscCall(MatColoringDestroy(&coloring)); 62 PetscCall(MatTransposeColoringCreate(C,iscoloring,&matcoloring)); 63 64 rart->matcoloring = matcoloring; 65 PetscCall(ISColoringDestroy(&iscoloring)); 66 67 /* Create Rt_dense */ 68 PetscCall(MatCreate(PETSC_COMM_SELF,&Rt_dense)); 69 PetscCall(MatSetSizes(Rt_dense,A->cmap->n,matcoloring->ncolors,A->cmap->n,matcoloring->ncolors)); 70 PetscCall(MatSetType(Rt_dense,MATSEQDENSE)); 71 PetscCall(MatSeqDenseSetPreallocation(Rt_dense,NULL)); 72 73 Rt_dense->assembled = PETSC_TRUE; 74 rart->Rt = Rt_dense; 75 76 /* Create RARt_dense = R*A*Rt_dense */ 77 PetscCall(MatCreate(PETSC_COMM_SELF,&RARt_dense)); 78 PetscCall(MatSetSizes(RARt_dense,C->rmap->n,matcoloring->ncolors,C->rmap->n,matcoloring->ncolors)); 79 PetscCall(MatSetType(RARt_dense,MATSEQDENSE)); 80 PetscCall(MatSeqDenseSetPreallocation(RARt_dense,NULL)); 81 82 rart->RARt = RARt_dense; 83 84 /* Allocate work array to store columns of A*R^T used in MatMatMatMultNumeric_SeqAIJ_SeqAIJ_SeqDense() */ 85 PetscCall(PetscMalloc1(A->rmap->n*4,&rart->work)); 86 87 /* clean up */ 88 PetscCall(MatDestroy(&P)); 89 90 #if defined(PETSC_USE_INFO) 91 { 92 Mat_SeqAIJ *c = (Mat_SeqAIJ*)C->data; 93 PetscReal density = (PetscReal)(c->nz)/(RARt_dense->rmap->n*RARt_dense->cmap->n); 94 PetscCall(PetscInfo(C,"C=R*(A*Rt) via coloring C - use sparse-dense inner products\n")); 95 PetscCall(PetscInfo(C,"RARt_den %" PetscInt_FMT " %" PetscInt_FMT "; Rt %" PetscInt_FMT " %" PetscInt_FMT " (RARt->nz %" PetscInt_FMT ")/(m*ncolors)=%g\n",RARt_dense->rmap->n,RARt_dense->cmap->n,R->cmap->n,R->rmap->n,c->nz,(double)density)); 96 } 97 #endif 98 PetscFunctionReturn(0); 99 } 100 101 /* 102 RAB = R * A * B, R and A in seqaij format, B in dense format; 103 */ 104 PetscErrorCode MatMatMatMultNumeric_SeqAIJ_SeqAIJ_SeqDense(Mat R,Mat A,Mat B,Mat RAB,PetscScalar *work) 105 { 106 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*r=(Mat_SeqAIJ*)R->data; 107 PetscScalar r1,r2,r3,r4; 108 const PetscScalar *b,*b1,*b2,*b3,*b4; 109 MatScalar *aa,*ra; 110 PetscInt cn =B->cmap->n,bm=B->rmap->n,col,i,j,n,*ai=a->i,*aj,am=A->rmap->n; 111 PetscInt am2=2*am,am3=3*am,bm4=4*bm; 112 PetscScalar *d,*c,*c2,*c3,*c4; 113 PetscInt *rj,rm=R->rmap->n,dm=RAB->rmap->n,dn=RAB->cmap->n; 114 PetscInt rm2=2*rm,rm3=3*rm,colrm; 115 116 PetscFunctionBegin; 117 if (!dm || !dn) PetscFunctionReturn(0); 118 PetscCheck(bm == A->cmap->n,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Number columns in A %" PetscInt_FMT " not equal rows in B %" PetscInt_FMT,A->cmap->n,bm); 119 PetscCheck(am == R->cmap->n,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Number columns in R %" PetscInt_FMT " not equal rows in A %" PetscInt_FMT,R->cmap->n,am); 120 PetscCheck(R->rmap->n == RAB->rmap->n,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Number rows in RAB %" PetscInt_FMT " not equal rows in R %" PetscInt_FMT,RAB->rmap->n,R->rmap->n); 121 PetscCheck(B->cmap->n == RAB->cmap->n,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Number columns in RAB %" PetscInt_FMT " not equal columns in B %" PetscInt_FMT,RAB->cmap->n,B->cmap->n); 122 123 { /* 124 This approach is not as good as original ones (will be removed later), but it reveals that 125 AB_den=A*B takes almost all execution time in R*A*B for src/ksp/ksp/tutorials/ex56.c 126 */ 127 PetscBool via_matmatmult=PETSC_FALSE; 128 PetscCall(PetscOptionsGetBool(NULL,NULL,"-matrart_via_matmatmult",&via_matmatmult,NULL)); 129 if (via_matmatmult) { 130 Mat AB_den = NULL; 131 PetscCall(MatCreate(PetscObjectComm((PetscObject)A),&AB_den)); 132 PetscCall(MatMatMultSymbolic_SeqAIJ_SeqDense(A,B,0.0,AB_den)); 133 PetscCall(MatMatMultNumeric_SeqAIJ_SeqDense(A,B,AB_den)); 134 PetscCall(MatMatMultNumeric_SeqAIJ_SeqDense(R,AB_den,RAB)); 135 PetscCall(MatDestroy(&AB_den)); 136 PetscFunctionReturn(0); 137 } 138 } 139 140 PetscCall(MatDenseGetArrayRead(B,&b)); 141 PetscCall(MatDenseGetArray(RAB,&d)); 142 b1 = b; b2 = b1 + bm; b3 = b2 + bm; b4 = b3 + bm; 143 c = work; c2 = c + am; c3 = c2 + am; c4 = c3 + am; 144 for (col=0; col<cn-4; col += 4) { /* over columns of C */ 145 for (i=0; i<am; i++) { /* over rows of A in those columns */ 146 r1 = r2 = r3 = r4 = 0.0; 147 n = ai[i+1] - ai[i]; 148 aj = a->j + ai[i]; 149 aa = a->a + ai[i]; 150 for (j=0; j<n; j++) { 151 r1 += (*aa)*b1[*aj]; 152 r2 += (*aa)*b2[*aj]; 153 r3 += (*aa)*b3[*aj]; 154 r4 += (*aa++)*b4[*aj++]; 155 } 156 c[i] = r1; 157 c[am + i] = r2; 158 c[am2 + i] = r3; 159 c[am3 + i] = r4; 160 } 161 b1 += bm4; 162 b2 += bm4; 163 b3 += bm4; 164 b4 += bm4; 165 166 /* RAB[:,col] = R*C[:,col] */ 167 colrm = col*rm; 168 for (i=0; i<rm; i++) { /* over rows of R in those columns */ 169 r1 = r2 = r3 = r4 = 0.0; 170 n = r->i[i+1] - r->i[i]; 171 rj = r->j + r->i[i]; 172 ra = r->a + r->i[i]; 173 for (j=0; j<n; j++) { 174 r1 += (*ra)*c[*rj]; 175 r2 += (*ra)*c2[*rj]; 176 r3 += (*ra)*c3[*rj]; 177 r4 += (*ra++)*c4[*rj++]; 178 } 179 d[colrm + i] = r1; 180 d[colrm + rm + i] = r2; 181 d[colrm + rm2 + i] = r3; 182 d[colrm + rm3 + i] = r4; 183 } 184 } 185 for (; col<cn; col++) { /* over extra columns of C */ 186 for (i=0; i<am; i++) { /* over rows of A in those columns */ 187 r1 = 0.0; 188 n = a->i[i+1] - a->i[i]; 189 aj = a->j + a->i[i]; 190 aa = a->a + a->i[i]; 191 for (j=0; j<n; j++) { 192 r1 += (*aa++)*b1[*aj++]; 193 } 194 c[i] = r1; 195 } 196 b1 += bm; 197 198 for (i=0; i<rm; i++) { /* over rows of R in those columns */ 199 r1 = 0.0; 200 n = r->i[i+1] - r->i[i]; 201 rj = r->j + r->i[i]; 202 ra = r->a + r->i[i]; 203 for (j=0; j<n; j++) { 204 r1 += (*ra++)*c[*rj++]; 205 } 206 d[col*rm + i] = r1; 207 } 208 } 209 PetscCall(PetscLogFlops(cn*2.0*(a->nz + r->nz))); 210 211 PetscCall(MatDenseRestoreArrayRead(B,&b)); 212 PetscCall(MatDenseRestoreArray(RAB,&d)); 213 PetscCall(MatAssemblyBegin(RAB,MAT_FINAL_ASSEMBLY)); 214 PetscCall(MatAssemblyEnd(RAB,MAT_FINAL_ASSEMBLY)); 215 PetscFunctionReturn(0); 216 } 217 218 PetscErrorCode MatRARtNumeric_SeqAIJ_SeqAIJ_colorrart(Mat A,Mat R,Mat C) 219 { 220 Mat_RARt *rart; 221 MatTransposeColoring matcoloring; 222 Mat Rt,RARt; 223 224 PetscFunctionBegin; 225 MatCheckProduct(C,3); 226 PetscCheck(C->product->data,PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data empty"); 227 rart = (Mat_RARt*)C->product->data; 228 229 /* Get dense Rt by Apply MatTransposeColoring to R */ 230 matcoloring = rart->matcoloring; 231 Rt = rart->Rt; 232 PetscCall(MatTransColoringApplySpToDen(matcoloring,R,Rt)); 233 234 /* Get dense RARt = R*A*Rt -- dominates! */ 235 RARt = rart->RARt; 236 PetscCall(MatMatMatMultNumeric_SeqAIJ_SeqAIJ_SeqDense(R,A,Rt,RARt,rart->work)); 237 238 /* Recover C from C_dense */ 239 PetscCall(MatTransColoringApplyDenToSp(matcoloring,RARt,C)); 240 PetscFunctionReturn(0); 241 } 242 243 PetscErrorCode MatRARtSymbolic_SeqAIJ_SeqAIJ_matmattransposemult(Mat A,Mat R,PetscReal fill,Mat C) 244 { 245 Mat ARt; 246 Mat_RARt *rart; 247 char *alg; 248 249 PetscFunctionBegin; 250 MatCheckProduct(C,4); 251 PetscCheck(!C->product->data,PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data not empty"); 252 /* create symbolic ARt = A*R^T */ 253 PetscCall(MatProductCreate(A,R,NULL,&ARt)); 254 PetscCall(MatProductSetType(ARt,MATPRODUCT_ABt)); 255 PetscCall(MatProductSetAlgorithm(ARt,"sorted")); 256 PetscCall(MatProductSetFill(ARt,fill)); 257 PetscCall(MatProductSetFromOptions(ARt)); 258 PetscCall(MatProductSymbolic(ARt)); 259 260 /* compute symbolic C = R*ARt */ 261 /* set algorithm for C = R*ARt */ 262 PetscCall(PetscStrallocpy(C->product->alg,&alg)); 263 PetscCall(MatProductSetAlgorithm(C,"sorted")); 264 PetscCall(MatMatMultSymbolic_SeqAIJ_SeqAIJ(R,ARt,fill,C)); 265 /* resume original algorithm for C */ 266 PetscCall(MatProductSetAlgorithm(C,alg)); 267 PetscCall(PetscFree(alg)); 268 269 C->ops->rartnumeric = MatRARtNumeric_SeqAIJ_SeqAIJ_matmattransposemult; 270 271 PetscCall(PetscNew(&rart)); 272 rart->ARt = ARt; 273 C->product->data = rart; 274 C->product->destroy = MatDestroy_SeqAIJ_RARt; 275 PetscCall(PetscInfo(C,"Use ARt=A*R^T, C=R*ARt via MatMatTransposeMult(). Coloring can be applied to A*R^T.\n")); 276 PetscFunctionReturn(0); 277 } 278 279 PetscErrorCode MatRARtNumeric_SeqAIJ_SeqAIJ_matmattransposemult(Mat A,Mat R,Mat C) 280 { 281 Mat_RARt *rart; 282 283 PetscFunctionBegin; 284 MatCheckProduct(C,3); 285 PetscCheck(C->product->data,PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data empty"); 286 rart = (Mat_RARt*)C->product->data; 287 PetscCall(MatMatTransposeMultNumeric_SeqAIJ_SeqAIJ(A,R,rart->ARt)); /* dominate! */ 288 PetscCall(MatMatMultNumeric_SeqAIJ_SeqAIJ(R,rart->ARt,C)); 289 PetscFunctionReturn(0); 290 } 291 292 PetscErrorCode MatRARtSymbolic_SeqAIJ_SeqAIJ(Mat A,Mat R,PetscReal fill,Mat C) 293 { 294 Mat Rt; 295 Mat_RARt *rart; 296 297 PetscFunctionBegin; 298 MatCheckProduct(C,4); 299 PetscCheck(!C->product->data,PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data not empty"); 300 PetscCall(MatTranspose(R,MAT_INITIAL_MATRIX,&Rt)); 301 PetscCall(MatMatMatMultSymbolic_SeqAIJ_SeqAIJ_SeqAIJ(R,A,Rt,fill,C)); 302 303 PetscCall(PetscNew(&rart)); 304 rart->data = C->product->data; 305 rart->destroy = C->product->destroy; 306 rart->Rt = Rt; 307 C->product->data = rart; 308 C->product->destroy = MatDestroy_SeqAIJ_RARt; 309 C->ops->rartnumeric = MatRARtNumeric_SeqAIJ_SeqAIJ; 310 PetscCall(PetscInfo(C,"Use Rt=R^T and C=R*A*Rt via MatMatMatMult() to avoid sparse inner products\n")); 311 PetscFunctionReturn(0); 312 } 313 314 PetscErrorCode MatRARtNumeric_SeqAIJ_SeqAIJ(Mat A,Mat R,Mat C) 315 { 316 Mat_RARt *rart; 317 318 PetscFunctionBegin; 319 MatCheckProduct(C,3); 320 PetscCheck(C->product->data,PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data empty"); 321 rart = (Mat_RARt*)C->product->data; 322 PetscCall(MatTranspose(R,MAT_REUSE_MATRIX,&rart->Rt)); 323 /* MatMatMatMultSymbolic used a different data */ 324 C->product->data = rart->data; 325 PetscCall(MatMatMatMultNumeric_SeqAIJ_SeqAIJ_SeqAIJ(R,A,rart->Rt,C)); 326 C->product->data = rart; 327 PetscFunctionReturn(0); 328 } 329 330 PetscErrorCode MatRARt_SeqAIJ_SeqAIJ(Mat A,Mat R,MatReuse scall,PetscReal fill,Mat *C) 331 { 332 const char *algTypes[3] = {"matmatmatmult","matmattransposemult","coloring_rart"}; 333 PetscInt alg=0; /* set default algorithm */ 334 335 PetscFunctionBegin; 336 if (scall == MAT_INITIAL_MATRIX) { 337 PetscOptionsBegin(PetscObjectComm((PetscObject)A),((PetscObject)A)->prefix,"MatRARt","Mat"); 338 PetscCall(PetscOptionsEList("-matrart_via","Algorithmic approach","MatRARt",algTypes,3,algTypes[0],&alg,NULL)); 339 PetscOptionsEnd(); 340 341 PetscCall(PetscLogEventBegin(MAT_RARtSymbolic,A,R,0,0)); 342 PetscCall(MatCreate(PETSC_COMM_SELF,C)); 343 switch (alg) { 344 case 1: 345 /* via matmattransposemult: ARt=A*R^T, C=R*ARt - matrix coloring can be applied to A*R^T */ 346 PetscCall(MatRARtSymbolic_SeqAIJ_SeqAIJ_matmattransposemult(A,R,fill,*C)); 347 break; 348 case 2: 349 /* via coloring_rart: apply coloring C = R*A*R^T */ 350 PetscCall(MatRARtSymbolic_SeqAIJ_SeqAIJ_colorrart(A,R,fill,*C)); 351 break; 352 default: 353 /* via matmatmatmult: Rt=R^T, C=R*A*Rt - avoid inefficient sparse inner products */ 354 PetscCall(MatRARtSymbolic_SeqAIJ_SeqAIJ(A,R,fill,*C)); 355 break; 356 } 357 PetscCall(PetscLogEventEnd(MAT_RARtSymbolic,A,R,0,0)); 358 } 359 360 PetscCall(PetscLogEventBegin(MAT_RARtNumeric,A,R,0,0)); 361 PetscCall(((*C)->ops->rartnumeric)(A,R,*C)); 362 PetscCall(PetscLogEventEnd(MAT_RARtNumeric,A,R,0,0)); 363 PetscFunctionReturn(0); 364 } 365 366 /* ------------------------------------------------------------- */ 367 PetscErrorCode MatProductSymbolic_RARt_SeqAIJ_SeqAIJ(Mat C) 368 { 369 Mat_Product *product = C->product; 370 Mat A=product->A,R=product->B; 371 MatProductAlgorithm alg=product->alg; 372 PetscReal fill=product->fill; 373 PetscBool flg; 374 375 PetscFunctionBegin; 376 PetscCall(PetscStrcmp(alg,"r*a*rt",&flg)); 377 if (flg) { 378 PetscCall(MatRARtSymbolic_SeqAIJ_SeqAIJ(A,R,fill,C)); 379 goto next; 380 } 381 382 PetscCall(PetscStrcmp(alg,"r*art",&flg)); 383 if (flg) { 384 PetscCall(MatRARtSymbolic_SeqAIJ_SeqAIJ_matmattransposemult(A,R,fill,C)); 385 goto next; 386 } 387 388 PetscCall(PetscStrcmp(alg,"coloring_rart",&flg)); 389 if (flg) { 390 PetscCall(MatRARtSymbolic_SeqAIJ_SeqAIJ_colorrart(A,R,fill,C)); 391 goto next; 392 } 393 394 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatProductAlgorithm is not supported"); 395 396 next: 397 C->ops->productnumeric = MatProductNumeric_RARt; 398 PetscFunctionReturn(0); 399 } 400