1 2 /* 3 Defines projective product routines where A is a SeqAIJ matrix 4 C = R * A * R^T 5 */ 6 7 #include <../src/mat/impls/aij/seq/aij.h> 8 #include <../src/mat/utils/freespace.h> 9 #include <../src/mat/impls/dense/seq/dense.h> /*I "petscmat.h" I*/ 10 11 PetscErrorCode MatDestroy_SeqAIJ_RARt(void *data) 12 { 13 Mat_RARt *rart = (Mat_RARt*)data; 14 15 PetscFunctionBegin; 16 PetscCall(MatTransposeColoringDestroy(&rart->matcoloring)); 17 PetscCall(MatDestroy(&rart->Rt)); 18 PetscCall(MatDestroy(&rart->RARt)); 19 PetscCall(MatDestroy(&rart->ARt)); 20 PetscCall(PetscFree(rart->work)); 21 if (rart->destroy) { 22 PetscCall((*rart->destroy)(rart->data)); 23 } 24 PetscCall(PetscFree(rart)); 25 PetscFunctionReturn(0); 26 } 27 28 PetscErrorCode MatRARtSymbolic_SeqAIJ_SeqAIJ_colorrart(Mat A,Mat R,PetscReal fill,Mat C) 29 { 30 Mat P; 31 PetscInt *rti,*rtj; 32 Mat_RARt *rart; 33 MatColoring coloring; 34 MatTransposeColoring matcoloring; 35 ISColoring iscoloring; 36 Mat Rt_dense,RARt_dense; 37 38 PetscFunctionBegin; 39 MatCheckProduct(C,4); 40 PetscCheck(!C->product->data,PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data not empty"); 41 /* create symbolic P=Rt */ 42 PetscCall(MatGetSymbolicTranspose_SeqAIJ(R,&rti,&rtj)); 43 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,R->cmap->n,R->rmap->n,rti,rtj,NULL,&P)); 44 45 /* get symbolic C=Pt*A*P */ 46 PetscCall(MatPtAPSymbolic_SeqAIJ_SeqAIJ_SparseAxpy(A,P,fill,C)); 47 PetscCall(MatSetBlockSizes(C,PetscAbs(R->rmap->bs),PetscAbs(R->rmap->bs))); 48 C->ops->rartnumeric = MatRARtNumeric_SeqAIJ_SeqAIJ_colorrart; 49 50 /* create a supporting struct */ 51 PetscCall(PetscNew(&rart)); 52 C->product->data = rart; 53 C->product->destroy = MatDestroy_SeqAIJ_RARt; 54 55 /* ------ Use coloring ---------- */ 56 /* inode causes memory problem */ 57 PetscCall(MatSetOption(C,MAT_USE_INODES,PETSC_FALSE)); 58 59 /* Create MatTransposeColoring from symbolic C=R*A*R^T */ 60 PetscCall(MatColoringCreate(C,&coloring)); 61 PetscCall(MatColoringSetDistance(coloring,2)); 62 PetscCall(MatColoringSetType(coloring,MATCOLORINGSL)); 63 PetscCall(MatColoringSetFromOptions(coloring)); 64 PetscCall(MatColoringApply(coloring,&iscoloring)); 65 PetscCall(MatColoringDestroy(&coloring)); 66 PetscCall(MatTransposeColoringCreate(C,iscoloring,&matcoloring)); 67 68 rart->matcoloring = matcoloring; 69 PetscCall(ISColoringDestroy(&iscoloring)); 70 71 /* Create Rt_dense */ 72 PetscCall(MatCreate(PETSC_COMM_SELF,&Rt_dense)); 73 PetscCall(MatSetSizes(Rt_dense,A->cmap->n,matcoloring->ncolors,A->cmap->n,matcoloring->ncolors)); 74 PetscCall(MatSetType(Rt_dense,MATSEQDENSE)); 75 PetscCall(MatSeqDenseSetPreallocation(Rt_dense,NULL)); 76 77 Rt_dense->assembled = PETSC_TRUE; 78 rart->Rt = Rt_dense; 79 80 /* Create RARt_dense = R*A*Rt_dense */ 81 PetscCall(MatCreate(PETSC_COMM_SELF,&RARt_dense)); 82 PetscCall(MatSetSizes(RARt_dense,C->rmap->n,matcoloring->ncolors,C->rmap->n,matcoloring->ncolors)); 83 PetscCall(MatSetType(RARt_dense,MATSEQDENSE)); 84 PetscCall(MatSeqDenseSetPreallocation(RARt_dense,NULL)); 85 86 rart->RARt = RARt_dense; 87 88 /* Allocate work array to store columns of A*R^T used in MatMatMatMultNumeric_SeqAIJ_SeqAIJ_SeqDense() */ 89 PetscCall(PetscMalloc1(A->rmap->n*4,&rart->work)); 90 91 /* clean up */ 92 PetscCall(MatRestoreSymbolicTranspose_SeqAIJ(R,&rti,&rtj)); 93 PetscCall(MatDestroy(&P)); 94 95 #if defined(PETSC_USE_INFO) 96 { 97 Mat_SeqAIJ *c = (Mat_SeqAIJ*)C->data; 98 PetscReal density = (PetscReal)(c->nz)/(RARt_dense->rmap->n*RARt_dense->cmap->n); 99 PetscCall(PetscInfo(C,"C=R*(A*Rt) via coloring C - use sparse-dense inner products\n")); 100 PetscCall(PetscInfo(C,"RARt_den %" PetscInt_FMT " %" PetscInt_FMT "; Rt %" PetscInt_FMT " %" PetscInt_FMT " (RARt->nz %" PetscInt_FMT ")/(m*ncolors)=%g\n",RARt_dense->rmap->n,RARt_dense->cmap->n,R->cmap->n,R->rmap->n,c->nz,(double)density)); 101 } 102 #endif 103 PetscFunctionReturn(0); 104 } 105 106 /* 107 RAB = R * A * B, R and A in seqaij format, B in dense format; 108 */ 109 PetscErrorCode MatMatMatMultNumeric_SeqAIJ_SeqAIJ_SeqDense(Mat R,Mat A,Mat B,Mat RAB,PetscScalar *work) 110 { 111 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*r=(Mat_SeqAIJ*)R->data; 112 PetscScalar r1,r2,r3,r4; 113 const PetscScalar *b,*b1,*b2,*b3,*b4; 114 MatScalar *aa,*ra; 115 PetscInt cn =B->cmap->n,bm=B->rmap->n,col,i,j,n,*ai=a->i,*aj,am=A->rmap->n; 116 PetscInt am2=2*am,am3=3*am,bm4=4*bm; 117 PetscScalar *d,*c,*c2,*c3,*c4; 118 PetscInt *rj,rm=R->rmap->n,dm=RAB->rmap->n,dn=RAB->cmap->n; 119 PetscInt rm2=2*rm,rm3=3*rm,colrm; 120 121 PetscFunctionBegin; 122 if (!dm || !dn) PetscFunctionReturn(0); 123 PetscCheck(bm == A->cmap->n,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Number columns in A %" PetscInt_FMT " not equal rows in B %" PetscInt_FMT,A->cmap->n,bm); 124 PetscCheck(am == R->cmap->n,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Number columns in R %" PetscInt_FMT " not equal rows in A %" PetscInt_FMT,R->cmap->n,am); 125 PetscCheck(R->rmap->n == RAB->rmap->n,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Number rows in RAB %" PetscInt_FMT " not equal rows in R %" PetscInt_FMT,RAB->rmap->n,R->rmap->n); 126 PetscCheck(B->cmap->n == RAB->cmap->n,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Number columns in RAB %" PetscInt_FMT " not equal columns in B %" PetscInt_FMT,RAB->cmap->n,B->cmap->n); 127 128 { /* 129 This approach is not as good as original ones (will be removed later), but it reveals that 130 AB_den=A*B takes almost all execution time in R*A*B for src/ksp/ksp/tutorials/ex56.c 131 */ 132 PetscBool via_matmatmult=PETSC_FALSE; 133 PetscCall(PetscOptionsGetBool(NULL,NULL,"-matrart_via_matmatmult",&via_matmatmult,NULL)); 134 if (via_matmatmult) { 135 Mat AB_den = NULL; 136 PetscCall(MatCreate(PetscObjectComm((PetscObject)A),&AB_den)); 137 PetscCall(MatMatMultSymbolic_SeqAIJ_SeqDense(A,B,0.0,AB_den)); 138 PetscCall(MatMatMultNumeric_SeqAIJ_SeqDense(A,B,AB_den)); 139 PetscCall(MatMatMultNumeric_SeqAIJ_SeqDense(R,AB_den,RAB)); 140 PetscCall(MatDestroy(&AB_den)); 141 PetscFunctionReturn(0); 142 } 143 } 144 145 PetscCall(MatDenseGetArrayRead(B,&b)); 146 PetscCall(MatDenseGetArray(RAB,&d)); 147 b1 = b; b2 = b1 + bm; b3 = b2 + bm; b4 = b3 + bm; 148 c = work; c2 = c + am; c3 = c2 + am; c4 = c3 + am; 149 for (col=0; col<cn-4; col += 4) { /* over columns of C */ 150 for (i=0; i<am; i++) { /* over rows of A in those columns */ 151 r1 = r2 = r3 = r4 = 0.0; 152 n = ai[i+1] - ai[i]; 153 aj = a->j + ai[i]; 154 aa = a->a + ai[i]; 155 for (j=0; j<n; j++) { 156 r1 += (*aa)*b1[*aj]; 157 r2 += (*aa)*b2[*aj]; 158 r3 += (*aa)*b3[*aj]; 159 r4 += (*aa++)*b4[*aj++]; 160 } 161 c[i] = r1; 162 c[am + i] = r2; 163 c[am2 + i] = r3; 164 c[am3 + i] = r4; 165 } 166 b1 += bm4; 167 b2 += bm4; 168 b3 += bm4; 169 b4 += bm4; 170 171 /* RAB[:,col] = R*C[:,col] */ 172 colrm = col*rm; 173 for (i=0; i<rm; i++) { /* over rows of R in those columns */ 174 r1 = r2 = r3 = r4 = 0.0; 175 n = r->i[i+1] - r->i[i]; 176 rj = r->j + r->i[i]; 177 ra = r->a + r->i[i]; 178 for (j=0; j<n; j++) { 179 r1 += (*ra)*c[*rj]; 180 r2 += (*ra)*c2[*rj]; 181 r3 += (*ra)*c3[*rj]; 182 r4 += (*ra++)*c4[*rj++]; 183 } 184 d[colrm + i] = r1; 185 d[colrm + rm + i] = r2; 186 d[colrm + rm2 + i] = r3; 187 d[colrm + rm3 + i] = r4; 188 } 189 } 190 for (; col<cn; col++) { /* over extra columns of C */ 191 for (i=0; i<am; i++) { /* over rows of A in those columns */ 192 r1 = 0.0; 193 n = a->i[i+1] - a->i[i]; 194 aj = a->j + a->i[i]; 195 aa = a->a + a->i[i]; 196 for (j=0; j<n; j++) { 197 r1 += (*aa++)*b1[*aj++]; 198 } 199 c[i] = r1; 200 } 201 b1 += bm; 202 203 for (i=0; i<rm; i++) { /* over rows of R in those columns */ 204 r1 = 0.0; 205 n = r->i[i+1] - r->i[i]; 206 rj = r->j + r->i[i]; 207 ra = r->a + r->i[i]; 208 for (j=0; j<n; j++) { 209 r1 += (*ra++)*c[*rj++]; 210 } 211 d[col*rm + i] = r1; 212 } 213 } 214 PetscCall(PetscLogFlops(cn*2.0*(a->nz + r->nz))); 215 216 PetscCall(MatDenseRestoreArrayRead(B,&b)); 217 PetscCall(MatDenseRestoreArray(RAB,&d)); 218 PetscCall(MatAssemblyBegin(RAB,MAT_FINAL_ASSEMBLY)); 219 PetscCall(MatAssemblyEnd(RAB,MAT_FINAL_ASSEMBLY)); 220 PetscFunctionReturn(0); 221 } 222 223 PetscErrorCode MatRARtNumeric_SeqAIJ_SeqAIJ_colorrart(Mat A,Mat R,Mat C) 224 { 225 Mat_RARt *rart; 226 MatTransposeColoring matcoloring; 227 Mat Rt,RARt; 228 229 PetscFunctionBegin; 230 MatCheckProduct(C,3); 231 PetscCheck(C->product->data,PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data empty"); 232 rart = (Mat_RARt*)C->product->data; 233 234 /* Get dense Rt by Apply MatTransposeColoring to R */ 235 matcoloring = rart->matcoloring; 236 Rt = rart->Rt; 237 PetscCall(MatTransColoringApplySpToDen(matcoloring,R,Rt)); 238 239 /* Get dense RARt = R*A*Rt -- dominates! */ 240 RARt = rart->RARt; 241 PetscCall(MatMatMatMultNumeric_SeqAIJ_SeqAIJ_SeqDense(R,A,Rt,RARt,rart->work)); 242 243 /* Recover C from C_dense */ 244 PetscCall(MatTransColoringApplyDenToSp(matcoloring,RARt,C)); 245 PetscFunctionReturn(0); 246 } 247 248 PetscErrorCode MatRARtSymbolic_SeqAIJ_SeqAIJ_matmattransposemult(Mat A,Mat R,PetscReal fill,Mat C) 249 { 250 Mat ARt; 251 Mat_RARt *rart; 252 char *alg; 253 254 PetscFunctionBegin; 255 MatCheckProduct(C,4); 256 PetscCheck(!C->product->data,PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data not empty"); 257 /* create symbolic ARt = A*R^T */ 258 PetscCall(MatProductCreate(A,R,NULL,&ARt)); 259 PetscCall(MatProductSetType(ARt,MATPRODUCT_ABt)); 260 PetscCall(MatProductSetAlgorithm(ARt,"sorted")); 261 PetscCall(MatProductSetFill(ARt,fill)); 262 PetscCall(MatProductSetFromOptions(ARt)); 263 PetscCall(MatProductSymbolic(ARt)); 264 265 /* compute symbolic C = R*ARt */ 266 /* set algorithm for C = R*ARt */ 267 PetscCall(PetscStrallocpy(C->product->alg,&alg)); 268 PetscCall(MatProductSetAlgorithm(C,"sorted")); 269 PetscCall(MatMatMultSymbolic_SeqAIJ_SeqAIJ(R,ARt,fill,C)); 270 /* resume original algorithm for C */ 271 PetscCall(MatProductSetAlgorithm(C,alg)); 272 PetscCall(PetscFree(alg)); 273 274 C->ops->rartnumeric = MatRARtNumeric_SeqAIJ_SeqAIJ_matmattransposemult; 275 276 PetscCall(PetscNew(&rart)); 277 rart->ARt = ARt; 278 C->product->data = rart; 279 C->product->destroy = MatDestroy_SeqAIJ_RARt; 280 PetscCall(PetscInfo(C,"Use ARt=A*R^T, C=R*ARt via MatMatTransposeMult(). Coloring can be applied to A*R^T.\n")); 281 PetscFunctionReturn(0); 282 } 283 284 PetscErrorCode MatRARtNumeric_SeqAIJ_SeqAIJ_matmattransposemult(Mat A,Mat R,Mat C) 285 { 286 Mat_RARt *rart; 287 288 PetscFunctionBegin; 289 MatCheckProduct(C,3); 290 PetscCheck(C->product->data,PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data empty"); 291 rart = (Mat_RARt*)C->product->data; 292 PetscCall(MatMatTransposeMultNumeric_SeqAIJ_SeqAIJ(A,R,rart->ARt)); /* dominate! */ 293 PetscCall(MatMatMultNumeric_SeqAIJ_SeqAIJ(R,rart->ARt,C)); 294 PetscFunctionReturn(0); 295 } 296 297 PetscErrorCode MatRARtSymbolic_SeqAIJ_SeqAIJ(Mat A,Mat R,PetscReal fill,Mat C) 298 { 299 Mat Rt; 300 Mat_RARt *rart; 301 302 PetscFunctionBegin; 303 MatCheckProduct(C,4); 304 PetscCheck(!C->product->data,PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data not empty"); 305 PetscCall(MatTranspose_SeqAIJ(R,MAT_INITIAL_MATRIX,&Rt)); 306 PetscCall(MatMatMatMultSymbolic_SeqAIJ_SeqAIJ_SeqAIJ(R,A,Rt,fill,C)); 307 308 PetscCall(PetscNew(&rart)); 309 rart->data = C->product->data; 310 rart->destroy = C->product->destroy; 311 rart->Rt = Rt; 312 C->product->data = rart; 313 C->product->destroy = MatDestroy_SeqAIJ_RARt; 314 C->ops->rartnumeric = MatRARtNumeric_SeqAIJ_SeqAIJ; 315 PetscCall(PetscInfo(C,"Use Rt=R^T and C=R*A*Rt via MatMatMatMult() to avoid sparse inner products\n")); 316 PetscFunctionReturn(0); 317 } 318 319 PetscErrorCode MatRARtNumeric_SeqAIJ_SeqAIJ(Mat A,Mat R,Mat C) 320 { 321 Mat_RARt *rart; 322 323 PetscFunctionBegin; 324 MatCheckProduct(C,3); 325 PetscCheck(C->product->data,PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data empty"); 326 rart = (Mat_RARt*)C->product->data; 327 PetscCall(MatTranspose_SeqAIJ(R,MAT_REUSE_MATRIX,&rart->Rt)); 328 /* MatMatMatMultSymbolic used a different data */ 329 C->product->data = rart->data; 330 PetscCall(MatMatMatMultNumeric_SeqAIJ_SeqAIJ_SeqAIJ(R,A,rart->Rt,C)); 331 C->product->data = rart; 332 PetscFunctionReturn(0); 333 } 334 335 PetscErrorCode MatRARt_SeqAIJ_SeqAIJ(Mat A,Mat R,MatReuse scall,PetscReal fill,Mat *C) 336 { 337 const char *algTypes[3] = {"matmatmatmult","matmattransposemult","coloring_rart"}; 338 PetscInt alg=0; /* set default algorithm */ 339 340 PetscFunctionBegin; 341 if (scall == MAT_INITIAL_MATRIX) { 342 PetscOptionsBegin(PetscObjectComm((PetscObject)A),((PetscObject)A)->prefix,"MatRARt","Mat"); 343 PetscCall(PetscOptionsEList("-matrart_via","Algorithmic approach","MatRARt",algTypes,3,algTypes[0],&alg,NULL)); 344 PetscOptionsEnd(); 345 346 PetscCall(PetscLogEventBegin(MAT_RARtSymbolic,A,R,0,0)); 347 PetscCall(MatCreate(PETSC_COMM_SELF,C)); 348 switch (alg) { 349 case 1: 350 /* via matmattransposemult: ARt=A*R^T, C=R*ARt - matrix coloring can be applied to A*R^T */ 351 PetscCall(MatRARtSymbolic_SeqAIJ_SeqAIJ_matmattransposemult(A,R,fill,*C)); 352 break; 353 case 2: 354 /* via coloring_rart: apply coloring C = R*A*R^T */ 355 PetscCall(MatRARtSymbolic_SeqAIJ_SeqAIJ_colorrart(A,R,fill,*C)); 356 break; 357 default: 358 /* via matmatmatmult: Rt=R^T, C=R*A*Rt - avoid inefficient sparse inner products */ 359 PetscCall(MatRARtSymbolic_SeqAIJ_SeqAIJ(A,R,fill,*C)); 360 break; 361 } 362 PetscCall(PetscLogEventEnd(MAT_RARtSymbolic,A,R,0,0)); 363 } 364 365 PetscCall(PetscLogEventBegin(MAT_RARtNumeric,A,R,0,0)); 366 PetscCall(((*C)->ops->rartnumeric)(A,R,*C)); 367 PetscCall(PetscLogEventEnd(MAT_RARtNumeric,A,R,0,0)); 368 PetscFunctionReturn(0); 369 } 370 371 /* ------------------------------------------------------------- */ 372 PetscErrorCode MatProductSymbolic_RARt_SeqAIJ_SeqAIJ(Mat C) 373 { 374 Mat_Product *product = C->product; 375 Mat A=product->A,R=product->B; 376 MatProductAlgorithm alg=product->alg; 377 PetscReal fill=product->fill; 378 PetscBool flg; 379 380 PetscFunctionBegin; 381 PetscCall(PetscStrcmp(alg,"r*a*rt",&flg)); 382 if (flg) { 383 PetscCall(MatRARtSymbolic_SeqAIJ_SeqAIJ(A,R,fill,C)); 384 goto next; 385 } 386 387 PetscCall(PetscStrcmp(alg,"r*art",&flg)); 388 if (flg) { 389 PetscCall(MatRARtSymbolic_SeqAIJ_SeqAIJ_matmattransposemult(A,R,fill,C)); 390 goto next; 391 } 392 393 PetscCall(PetscStrcmp(alg,"coloring_rart",&flg)); 394 if (flg) { 395 PetscCall(MatRARtSymbolic_SeqAIJ_SeqAIJ_colorrart(A,R,fill,C)); 396 goto next; 397 } 398 399 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatProductAlgorithm is not supported"); 400 401 next: 402 C->ops->productnumeric = MatProductNumeric_RARt; 403 PetscFunctionReturn(0); 404 } 405