1 2 /* 3 Defines projective product routines where A is a SeqAIJ matrix 4 C = R * A * R^T 5 */ 6 7 #include <../src/mat/impls/aij/seq/aij.h> 8 #include <../src/mat/utils/freespace.h> 9 #include <../src/mat/impls/dense/seq/dense.h> /*I "petscmat.h" I*/ 10 11 PetscErrorCode MatDestroy_SeqAIJ_RARt(void *data) 12 { 13 Mat_RARt *rart = (Mat_RARt*)data; 14 15 PetscFunctionBegin; 16 PetscCall(MatTransposeColoringDestroy(&rart->matcoloring)); 17 PetscCall(MatDestroy(&rart->Rt)); 18 PetscCall(MatDestroy(&rart->RARt)); 19 PetscCall(MatDestroy(&rart->ARt)); 20 PetscCall(PetscFree(rart->work)); 21 if (rart->destroy) PetscCall((*rart->destroy)(rart->data)); 22 PetscCall(PetscFree(rart)); 23 PetscFunctionReturn(0); 24 } 25 26 PetscErrorCode MatRARtSymbolic_SeqAIJ_SeqAIJ_colorrart(Mat A,Mat R,PetscReal fill,Mat C) 27 { 28 Mat P; 29 PetscInt *rti,*rtj; 30 Mat_RARt *rart; 31 MatColoring coloring; 32 MatTransposeColoring matcoloring; 33 ISColoring iscoloring; 34 Mat Rt_dense,RARt_dense; 35 36 PetscFunctionBegin; 37 MatCheckProduct(C,4); 38 PetscCheck(!C->product->data,PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data not empty"); 39 /* create symbolic P=Rt */ 40 PetscCall(MatGetSymbolicTranspose_SeqAIJ(R,&rti,&rtj)); 41 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,R->cmap->n,R->rmap->n,rti,rtj,NULL,&P)); 42 43 /* get symbolic C=Pt*A*P */ 44 PetscCall(MatPtAPSymbolic_SeqAIJ_SeqAIJ_SparseAxpy(A,P,fill,C)); 45 PetscCall(MatSetBlockSizes(C,PetscAbs(R->rmap->bs),PetscAbs(R->rmap->bs))); 46 C->ops->rartnumeric = MatRARtNumeric_SeqAIJ_SeqAIJ_colorrart; 47 48 /* create a supporting struct */ 49 PetscCall(PetscNew(&rart)); 50 C->product->data = rart; 51 C->product->destroy = MatDestroy_SeqAIJ_RARt; 52 53 /* ------ Use coloring ---------- */ 54 /* inode causes memory problem */ 55 PetscCall(MatSetOption(C,MAT_USE_INODES,PETSC_FALSE)); 56 57 /* Create MatTransposeColoring from symbolic C=R*A*R^T */ 58 PetscCall(MatColoringCreate(C,&coloring)); 59 PetscCall(MatColoringSetDistance(coloring,2)); 60 PetscCall(MatColoringSetType(coloring,MATCOLORINGSL)); 61 PetscCall(MatColoringSetFromOptions(coloring)); 62 PetscCall(MatColoringApply(coloring,&iscoloring)); 63 PetscCall(MatColoringDestroy(&coloring)); 64 PetscCall(MatTransposeColoringCreate(C,iscoloring,&matcoloring)); 65 66 rart->matcoloring = matcoloring; 67 PetscCall(ISColoringDestroy(&iscoloring)); 68 69 /* Create Rt_dense */ 70 PetscCall(MatCreate(PETSC_COMM_SELF,&Rt_dense)); 71 PetscCall(MatSetSizes(Rt_dense,A->cmap->n,matcoloring->ncolors,A->cmap->n,matcoloring->ncolors)); 72 PetscCall(MatSetType(Rt_dense,MATSEQDENSE)); 73 PetscCall(MatSeqDenseSetPreallocation(Rt_dense,NULL)); 74 75 Rt_dense->assembled = PETSC_TRUE; 76 rart->Rt = Rt_dense; 77 78 /* Create RARt_dense = R*A*Rt_dense */ 79 PetscCall(MatCreate(PETSC_COMM_SELF,&RARt_dense)); 80 PetscCall(MatSetSizes(RARt_dense,C->rmap->n,matcoloring->ncolors,C->rmap->n,matcoloring->ncolors)); 81 PetscCall(MatSetType(RARt_dense,MATSEQDENSE)); 82 PetscCall(MatSeqDenseSetPreallocation(RARt_dense,NULL)); 83 84 rart->RARt = RARt_dense; 85 86 /* Allocate work array to store columns of A*R^T used in MatMatMatMultNumeric_SeqAIJ_SeqAIJ_SeqDense() */ 87 PetscCall(PetscMalloc1(A->rmap->n*4,&rart->work)); 88 89 /* clean up */ 90 PetscCall(MatRestoreSymbolicTranspose_SeqAIJ(R,&rti,&rtj)); 91 PetscCall(MatDestroy(&P)); 92 93 #if defined(PETSC_USE_INFO) 94 { 95 Mat_SeqAIJ *c = (Mat_SeqAIJ*)C->data; 96 PetscReal density = (PetscReal)(c->nz)/(RARt_dense->rmap->n*RARt_dense->cmap->n); 97 PetscCall(PetscInfo(C,"C=R*(A*Rt) via coloring C - use sparse-dense inner products\n")); 98 PetscCall(PetscInfo(C,"RARt_den %" PetscInt_FMT " %" PetscInt_FMT "; Rt %" PetscInt_FMT " %" PetscInt_FMT " (RARt->nz %" PetscInt_FMT ")/(m*ncolors)=%g\n",RARt_dense->rmap->n,RARt_dense->cmap->n,R->cmap->n,R->rmap->n,c->nz,(double)density)); 99 } 100 #endif 101 PetscFunctionReturn(0); 102 } 103 104 /* 105 RAB = R * A * B, R and A in seqaij format, B in dense format; 106 */ 107 PetscErrorCode MatMatMatMultNumeric_SeqAIJ_SeqAIJ_SeqDense(Mat R,Mat A,Mat B,Mat RAB,PetscScalar *work) 108 { 109 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*r=(Mat_SeqAIJ*)R->data; 110 PetscScalar r1,r2,r3,r4; 111 const PetscScalar *b,*b1,*b2,*b3,*b4; 112 MatScalar *aa,*ra; 113 PetscInt cn =B->cmap->n,bm=B->rmap->n,col,i,j,n,*ai=a->i,*aj,am=A->rmap->n; 114 PetscInt am2=2*am,am3=3*am,bm4=4*bm; 115 PetscScalar *d,*c,*c2,*c3,*c4; 116 PetscInt *rj,rm=R->rmap->n,dm=RAB->rmap->n,dn=RAB->cmap->n; 117 PetscInt rm2=2*rm,rm3=3*rm,colrm; 118 119 PetscFunctionBegin; 120 if (!dm || !dn) PetscFunctionReturn(0); 121 PetscCheck(bm == A->cmap->n,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Number columns in A %" PetscInt_FMT " not equal rows in B %" PetscInt_FMT,A->cmap->n,bm); 122 PetscCheck(am == R->cmap->n,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Number columns in R %" PetscInt_FMT " not equal rows in A %" PetscInt_FMT,R->cmap->n,am); 123 PetscCheck(R->rmap->n == RAB->rmap->n,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Number rows in RAB %" PetscInt_FMT " not equal rows in R %" PetscInt_FMT,RAB->rmap->n,R->rmap->n); 124 PetscCheck(B->cmap->n == RAB->cmap->n,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Number columns in RAB %" PetscInt_FMT " not equal columns in B %" PetscInt_FMT,RAB->cmap->n,B->cmap->n); 125 126 { /* 127 This approach is not as good as original ones (will be removed later), but it reveals that 128 AB_den=A*B takes almost all execution time in R*A*B for src/ksp/ksp/tutorials/ex56.c 129 */ 130 PetscBool via_matmatmult=PETSC_FALSE; 131 PetscCall(PetscOptionsGetBool(NULL,NULL,"-matrart_via_matmatmult",&via_matmatmult,NULL)); 132 if (via_matmatmult) { 133 Mat AB_den = NULL; 134 PetscCall(MatCreate(PetscObjectComm((PetscObject)A),&AB_den)); 135 PetscCall(MatMatMultSymbolic_SeqAIJ_SeqDense(A,B,0.0,AB_den)); 136 PetscCall(MatMatMultNumeric_SeqAIJ_SeqDense(A,B,AB_den)); 137 PetscCall(MatMatMultNumeric_SeqAIJ_SeqDense(R,AB_den,RAB)); 138 PetscCall(MatDestroy(&AB_den)); 139 PetscFunctionReturn(0); 140 } 141 } 142 143 PetscCall(MatDenseGetArrayRead(B,&b)); 144 PetscCall(MatDenseGetArray(RAB,&d)); 145 b1 = b; b2 = b1 + bm; b3 = b2 + bm; b4 = b3 + bm; 146 c = work; c2 = c + am; c3 = c2 + am; c4 = c3 + am; 147 for (col=0; col<cn-4; col += 4) { /* over columns of C */ 148 for (i=0; i<am; i++) { /* over rows of A in those columns */ 149 r1 = r2 = r3 = r4 = 0.0; 150 n = ai[i+1] - ai[i]; 151 aj = a->j + ai[i]; 152 aa = a->a + ai[i]; 153 for (j=0; j<n; j++) { 154 r1 += (*aa)*b1[*aj]; 155 r2 += (*aa)*b2[*aj]; 156 r3 += (*aa)*b3[*aj]; 157 r4 += (*aa++)*b4[*aj++]; 158 } 159 c[i] = r1; 160 c[am + i] = r2; 161 c[am2 + i] = r3; 162 c[am3 + i] = r4; 163 } 164 b1 += bm4; 165 b2 += bm4; 166 b3 += bm4; 167 b4 += bm4; 168 169 /* RAB[:,col] = R*C[:,col] */ 170 colrm = col*rm; 171 for (i=0; i<rm; i++) { /* over rows of R in those columns */ 172 r1 = r2 = r3 = r4 = 0.0; 173 n = r->i[i+1] - r->i[i]; 174 rj = r->j + r->i[i]; 175 ra = r->a + r->i[i]; 176 for (j=0; j<n; j++) { 177 r1 += (*ra)*c[*rj]; 178 r2 += (*ra)*c2[*rj]; 179 r3 += (*ra)*c3[*rj]; 180 r4 += (*ra++)*c4[*rj++]; 181 } 182 d[colrm + i] = r1; 183 d[colrm + rm + i] = r2; 184 d[colrm + rm2 + i] = r3; 185 d[colrm + rm3 + i] = r4; 186 } 187 } 188 for (; col<cn; col++) { /* over extra columns of C */ 189 for (i=0; i<am; i++) { /* over rows of A in those columns */ 190 r1 = 0.0; 191 n = a->i[i+1] - a->i[i]; 192 aj = a->j + a->i[i]; 193 aa = a->a + a->i[i]; 194 for (j=0; j<n; j++) { 195 r1 += (*aa++)*b1[*aj++]; 196 } 197 c[i] = r1; 198 } 199 b1 += bm; 200 201 for (i=0; i<rm; i++) { /* over rows of R in those columns */ 202 r1 = 0.0; 203 n = r->i[i+1] - r->i[i]; 204 rj = r->j + r->i[i]; 205 ra = r->a + r->i[i]; 206 for (j=0; j<n; j++) { 207 r1 += (*ra++)*c[*rj++]; 208 } 209 d[col*rm + i] = r1; 210 } 211 } 212 PetscCall(PetscLogFlops(cn*2.0*(a->nz + r->nz))); 213 214 PetscCall(MatDenseRestoreArrayRead(B,&b)); 215 PetscCall(MatDenseRestoreArray(RAB,&d)); 216 PetscCall(MatAssemblyBegin(RAB,MAT_FINAL_ASSEMBLY)); 217 PetscCall(MatAssemblyEnd(RAB,MAT_FINAL_ASSEMBLY)); 218 PetscFunctionReturn(0); 219 } 220 221 PetscErrorCode MatRARtNumeric_SeqAIJ_SeqAIJ_colorrart(Mat A,Mat R,Mat C) 222 { 223 Mat_RARt *rart; 224 MatTransposeColoring matcoloring; 225 Mat Rt,RARt; 226 227 PetscFunctionBegin; 228 MatCheckProduct(C,3); 229 PetscCheck(C->product->data,PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data empty"); 230 rart = (Mat_RARt*)C->product->data; 231 232 /* Get dense Rt by Apply MatTransposeColoring to R */ 233 matcoloring = rart->matcoloring; 234 Rt = rart->Rt; 235 PetscCall(MatTransColoringApplySpToDen(matcoloring,R,Rt)); 236 237 /* Get dense RARt = R*A*Rt -- dominates! */ 238 RARt = rart->RARt; 239 PetscCall(MatMatMatMultNumeric_SeqAIJ_SeqAIJ_SeqDense(R,A,Rt,RARt,rart->work)); 240 241 /* Recover C from C_dense */ 242 PetscCall(MatTransColoringApplyDenToSp(matcoloring,RARt,C)); 243 PetscFunctionReturn(0); 244 } 245 246 PetscErrorCode MatRARtSymbolic_SeqAIJ_SeqAIJ_matmattransposemult(Mat A,Mat R,PetscReal fill,Mat C) 247 { 248 Mat ARt; 249 Mat_RARt *rart; 250 char *alg; 251 252 PetscFunctionBegin; 253 MatCheckProduct(C,4); 254 PetscCheck(!C->product->data,PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data not empty"); 255 /* create symbolic ARt = A*R^T */ 256 PetscCall(MatProductCreate(A,R,NULL,&ARt)); 257 PetscCall(MatProductSetType(ARt,MATPRODUCT_ABt)); 258 PetscCall(MatProductSetAlgorithm(ARt,"sorted")); 259 PetscCall(MatProductSetFill(ARt,fill)); 260 PetscCall(MatProductSetFromOptions(ARt)); 261 PetscCall(MatProductSymbolic(ARt)); 262 263 /* compute symbolic C = R*ARt */ 264 /* set algorithm for C = R*ARt */ 265 PetscCall(PetscStrallocpy(C->product->alg,&alg)); 266 PetscCall(MatProductSetAlgorithm(C,"sorted")); 267 PetscCall(MatMatMultSymbolic_SeqAIJ_SeqAIJ(R,ARt,fill,C)); 268 /* resume original algorithm for C */ 269 PetscCall(MatProductSetAlgorithm(C,alg)); 270 PetscCall(PetscFree(alg)); 271 272 C->ops->rartnumeric = MatRARtNumeric_SeqAIJ_SeqAIJ_matmattransposemult; 273 274 PetscCall(PetscNew(&rart)); 275 rart->ARt = ARt; 276 C->product->data = rart; 277 C->product->destroy = MatDestroy_SeqAIJ_RARt; 278 PetscCall(PetscInfo(C,"Use ARt=A*R^T, C=R*ARt via MatMatTransposeMult(). Coloring can be applied to A*R^T.\n")); 279 PetscFunctionReturn(0); 280 } 281 282 PetscErrorCode MatRARtNumeric_SeqAIJ_SeqAIJ_matmattransposemult(Mat A,Mat R,Mat C) 283 { 284 Mat_RARt *rart; 285 286 PetscFunctionBegin; 287 MatCheckProduct(C,3); 288 PetscCheck(C->product->data,PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data empty"); 289 rart = (Mat_RARt*)C->product->data; 290 PetscCall(MatMatTransposeMultNumeric_SeqAIJ_SeqAIJ(A,R,rart->ARt)); /* dominate! */ 291 PetscCall(MatMatMultNumeric_SeqAIJ_SeqAIJ(R,rart->ARt,C)); 292 PetscFunctionReturn(0); 293 } 294 295 PetscErrorCode MatRARtSymbolic_SeqAIJ_SeqAIJ(Mat A,Mat R,PetscReal fill,Mat C) 296 { 297 Mat Rt; 298 Mat_RARt *rart; 299 300 PetscFunctionBegin; 301 MatCheckProduct(C,4); 302 PetscCheck(!C->product->data,PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data not empty"); 303 PetscCall(MatTranspose_SeqAIJ(R,MAT_INITIAL_MATRIX,&Rt)); 304 PetscCall(MatMatMatMultSymbolic_SeqAIJ_SeqAIJ_SeqAIJ(R,A,Rt,fill,C)); 305 306 PetscCall(PetscNew(&rart)); 307 rart->data = C->product->data; 308 rart->destroy = C->product->destroy; 309 rart->Rt = Rt; 310 C->product->data = rart; 311 C->product->destroy = MatDestroy_SeqAIJ_RARt; 312 C->ops->rartnumeric = MatRARtNumeric_SeqAIJ_SeqAIJ; 313 PetscCall(PetscInfo(C,"Use Rt=R^T and C=R*A*Rt via MatMatMatMult() to avoid sparse inner products\n")); 314 PetscFunctionReturn(0); 315 } 316 317 PetscErrorCode MatRARtNumeric_SeqAIJ_SeqAIJ(Mat A,Mat R,Mat C) 318 { 319 Mat_RARt *rart; 320 321 PetscFunctionBegin; 322 MatCheckProduct(C,3); 323 PetscCheck(C->product->data,PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data empty"); 324 rart = (Mat_RARt*)C->product->data; 325 PetscCall(MatTranspose_SeqAIJ(R,MAT_REUSE_MATRIX,&rart->Rt)); 326 /* MatMatMatMultSymbolic used a different data */ 327 C->product->data = rart->data; 328 PetscCall(MatMatMatMultNumeric_SeqAIJ_SeqAIJ_SeqAIJ(R,A,rart->Rt,C)); 329 C->product->data = rart; 330 PetscFunctionReturn(0); 331 } 332 333 PetscErrorCode MatRARt_SeqAIJ_SeqAIJ(Mat A,Mat R,MatReuse scall,PetscReal fill,Mat *C) 334 { 335 const char *algTypes[3] = {"matmatmatmult","matmattransposemult","coloring_rart"}; 336 PetscInt alg=0; /* set default algorithm */ 337 338 PetscFunctionBegin; 339 if (scall == MAT_INITIAL_MATRIX) { 340 PetscOptionsBegin(PetscObjectComm((PetscObject)A),((PetscObject)A)->prefix,"MatRARt","Mat"); 341 PetscCall(PetscOptionsEList("-matrart_via","Algorithmic approach","MatRARt",algTypes,3,algTypes[0],&alg,NULL)); 342 PetscOptionsEnd(); 343 344 PetscCall(PetscLogEventBegin(MAT_RARtSymbolic,A,R,0,0)); 345 PetscCall(MatCreate(PETSC_COMM_SELF,C)); 346 switch (alg) { 347 case 1: 348 /* via matmattransposemult: ARt=A*R^T, C=R*ARt - matrix coloring can be applied to A*R^T */ 349 PetscCall(MatRARtSymbolic_SeqAIJ_SeqAIJ_matmattransposemult(A,R,fill,*C)); 350 break; 351 case 2: 352 /* via coloring_rart: apply coloring C = R*A*R^T */ 353 PetscCall(MatRARtSymbolic_SeqAIJ_SeqAIJ_colorrart(A,R,fill,*C)); 354 break; 355 default: 356 /* via matmatmatmult: Rt=R^T, C=R*A*Rt - avoid inefficient sparse inner products */ 357 PetscCall(MatRARtSymbolic_SeqAIJ_SeqAIJ(A,R,fill,*C)); 358 break; 359 } 360 PetscCall(PetscLogEventEnd(MAT_RARtSymbolic,A,R,0,0)); 361 } 362 363 PetscCall(PetscLogEventBegin(MAT_RARtNumeric,A,R,0,0)); 364 PetscCall(((*C)->ops->rartnumeric)(A,R,*C)); 365 PetscCall(PetscLogEventEnd(MAT_RARtNumeric,A,R,0,0)); 366 PetscFunctionReturn(0); 367 } 368 369 /* ------------------------------------------------------------- */ 370 PetscErrorCode MatProductSymbolic_RARt_SeqAIJ_SeqAIJ(Mat C) 371 { 372 Mat_Product *product = C->product; 373 Mat A=product->A,R=product->B; 374 MatProductAlgorithm alg=product->alg; 375 PetscReal fill=product->fill; 376 PetscBool flg; 377 378 PetscFunctionBegin; 379 PetscCall(PetscStrcmp(alg,"r*a*rt",&flg)); 380 if (flg) { 381 PetscCall(MatRARtSymbolic_SeqAIJ_SeqAIJ(A,R,fill,C)); 382 goto next; 383 } 384 385 PetscCall(PetscStrcmp(alg,"r*art",&flg)); 386 if (flg) { 387 PetscCall(MatRARtSymbolic_SeqAIJ_SeqAIJ_matmattransposemult(A,R,fill,C)); 388 goto next; 389 } 390 391 PetscCall(PetscStrcmp(alg,"coloring_rart",&flg)); 392 if (flg) { 393 PetscCall(MatRARtSymbolic_SeqAIJ_SeqAIJ_colorrart(A,R,fill,C)); 394 goto next; 395 } 396 397 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatProductAlgorithm is not supported"); 398 399 next: 400 C->ops->productnumeric = MatProductNumeric_RARt; 401 PetscFunctionReturn(0); 402 } 403