1 2 /* 3 Defines projective product routines where A is a SeqAIJ matrix 4 C = R * A * R^T 5 */ 6 7 #include <../src/mat/impls/aij/seq/aij.h> 8 #include <../src/mat/utils/freespace.h> 9 #include <../src/mat/impls/dense/seq/dense.h> /*I "petscmat.h" I*/ 10 11 PetscErrorCode MatDestroy_SeqAIJ_RARt(void *data) 12 { 13 Mat_RARt *rart = (Mat_RARt *)data; 14 15 PetscFunctionBegin; 16 PetscCall(MatTransposeColoringDestroy(&rart->matcoloring)); 17 PetscCall(MatDestroy(&rart->Rt)); 18 PetscCall(MatDestroy(&rart->RARt)); 19 PetscCall(MatDestroy(&rart->ARt)); 20 PetscCall(PetscFree(rart->work)); 21 if (rart->destroy) PetscCall((*rart->destroy)(rart->data)); 22 PetscCall(PetscFree(rart)); 23 PetscFunctionReturn(0); 24 } 25 26 PetscErrorCode MatRARtSymbolic_SeqAIJ_SeqAIJ_colorrart(Mat A, Mat R, PetscReal fill, Mat C) 27 { 28 Mat P; 29 Mat_RARt *rart; 30 MatColoring coloring; 31 MatTransposeColoring matcoloring; 32 ISColoring iscoloring; 33 Mat Rt_dense, RARt_dense; 34 35 PetscFunctionBegin; 36 MatCheckProduct(C, 4); 37 PetscCheck(!C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data not empty"); 38 /* create symbolic P=Rt */ 39 PetscCall(MatTransposeSymbolic(R, &P)); 40 41 /* get symbolic C=Pt*A*P */ 42 PetscCall(MatPtAPSymbolic_SeqAIJ_SeqAIJ_SparseAxpy(A, P, fill, C)); 43 PetscCall(MatSetBlockSizes(C, PetscAbs(R->rmap->bs), PetscAbs(R->rmap->bs))); 44 C->ops->rartnumeric = MatRARtNumeric_SeqAIJ_SeqAIJ_colorrart; 45 46 /* create a supporting struct */ 47 PetscCall(PetscNew(&rart)); 48 C->product->data = rart; 49 C->product->destroy = MatDestroy_SeqAIJ_RARt; 50 51 /* ------ Use coloring ---------- */ 52 /* inode causes memory problem */ 53 PetscCall(MatSetOption(C, MAT_USE_INODES, PETSC_FALSE)); 54 55 /* Create MatTransposeColoring from symbolic C=R*A*R^T */ 56 PetscCall(MatColoringCreate(C, &coloring)); 57 PetscCall(MatColoringSetDistance(coloring, 2)); 58 PetscCall(MatColoringSetType(coloring, MATCOLORINGSL)); 59 PetscCall(MatColoringSetFromOptions(coloring)); 60 PetscCall(MatColoringApply(coloring, &iscoloring)); 61 PetscCall(MatColoringDestroy(&coloring)); 62 PetscCall(MatTransposeColoringCreate(C, iscoloring, &matcoloring)); 63 64 rart->matcoloring = matcoloring; 65 PetscCall(ISColoringDestroy(&iscoloring)); 66 67 /* Create Rt_dense */ 68 PetscCall(MatCreate(PETSC_COMM_SELF, &Rt_dense)); 69 PetscCall(MatSetSizes(Rt_dense, A->cmap->n, matcoloring->ncolors, A->cmap->n, matcoloring->ncolors)); 70 PetscCall(MatSetType(Rt_dense, MATSEQDENSE)); 71 PetscCall(MatSeqDenseSetPreallocation(Rt_dense, NULL)); 72 73 Rt_dense->assembled = PETSC_TRUE; 74 rart->Rt = Rt_dense; 75 76 /* Create RARt_dense = R*A*Rt_dense */ 77 PetscCall(MatCreate(PETSC_COMM_SELF, &RARt_dense)); 78 PetscCall(MatSetSizes(RARt_dense, C->rmap->n, matcoloring->ncolors, C->rmap->n, matcoloring->ncolors)); 79 PetscCall(MatSetType(RARt_dense, MATSEQDENSE)); 80 PetscCall(MatSeqDenseSetPreallocation(RARt_dense, NULL)); 81 82 rart->RARt = RARt_dense; 83 84 /* Allocate work array to store columns of A*R^T used in MatMatMatMultNumeric_SeqAIJ_SeqAIJ_SeqDense() */ 85 PetscCall(PetscMalloc1(A->rmap->n * 4, &rart->work)); 86 87 /* clean up */ 88 PetscCall(MatDestroy(&P)); 89 90 #if defined(PETSC_USE_INFO) 91 { 92 Mat_SeqAIJ *c = (Mat_SeqAIJ *)C->data; 93 PetscReal density = (PetscReal)(c->nz) / (RARt_dense->rmap->n * RARt_dense->cmap->n); 94 PetscCall(PetscInfo(C, "C=R*(A*Rt) via coloring C - use sparse-dense inner products\n")); 95 PetscCall(PetscInfo(C, "RARt_den %" PetscInt_FMT " %" PetscInt_FMT "; Rt %" PetscInt_FMT " %" PetscInt_FMT " (RARt->nz %" PetscInt_FMT ")/(m*ncolors)=%g\n", RARt_dense->rmap->n, RARt_dense->cmap->n, R->cmap->n, R->rmap->n, c->nz, (double)density)); 96 } 97 #endif 98 PetscFunctionReturn(0); 99 } 100 101 /* 102 RAB = R * A * B, R and A in seqaij format, B in dense format; 103 */ 104 PetscErrorCode MatMatMatMultNumeric_SeqAIJ_SeqAIJ_SeqDense(Mat R, Mat A, Mat B, Mat RAB, PetscScalar *work) 105 { 106 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data, *r = (Mat_SeqAIJ *)R->data; 107 PetscScalar r1, r2, r3, r4; 108 const PetscScalar *b, *b1, *b2, *b3, *b4; 109 MatScalar *aa, *ra; 110 PetscInt cn = B->cmap->n, bm = B->rmap->n, col, i, j, n, *ai = a->i, *aj, am = A->rmap->n; 111 PetscInt am2 = 2 * am, am3 = 3 * am, bm4 = 4 * bm; 112 PetscScalar *d, *c, *c2, *c3, *c4; 113 PetscInt *rj, rm = R->rmap->n, dm = RAB->rmap->n, dn = RAB->cmap->n; 114 PetscInt rm2 = 2 * rm, rm3 = 3 * rm, colrm; 115 116 PetscFunctionBegin; 117 if (!dm || !dn) PetscFunctionReturn(0); 118 PetscCheck(bm == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Number columns in A %" PetscInt_FMT " not equal rows in B %" PetscInt_FMT, A->cmap->n, bm); 119 PetscCheck(am == R->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Number columns in R %" PetscInt_FMT " not equal rows in A %" PetscInt_FMT, R->cmap->n, am); 120 PetscCheck(R->rmap->n == RAB->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Number rows in RAB %" PetscInt_FMT " not equal rows in R %" PetscInt_FMT, RAB->rmap->n, R->rmap->n); 121 PetscCheck(B->cmap->n == RAB->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Number columns in RAB %" PetscInt_FMT " not equal columns in B %" PetscInt_FMT, RAB->cmap->n, B->cmap->n); 122 123 { /* 124 This approach is not as good as original ones (will be removed later), but it reveals that 125 AB_den=A*B takes almost all execution time in R*A*B for src/ksp/ksp/tutorials/ex56.c 126 */ 127 PetscBool via_matmatmult = PETSC_FALSE; 128 PetscCall(PetscOptionsGetBool(NULL, NULL, "-matrart_via_matmatmult", &via_matmatmult, NULL)); 129 if (via_matmatmult) { 130 Mat AB_den = NULL; 131 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &AB_den)); 132 PetscCall(MatMatMultSymbolic_SeqAIJ_SeqDense(A, B, 0.0, AB_den)); 133 PetscCall(MatMatMultNumeric_SeqAIJ_SeqDense(A, B, AB_den)); 134 PetscCall(MatMatMultNumeric_SeqAIJ_SeqDense(R, AB_den, RAB)); 135 PetscCall(MatDestroy(&AB_den)); 136 PetscFunctionReturn(0); 137 } 138 } 139 140 PetscCall(MatDenseGetArrayRead(B, &b)); 141 PetscCall(MatDenseGetArray(RAB, &d)); 142 b1 = b; 143 b2 = b1 + bm; 144 b3 = b2 + bm; 145 b4 = b3 + bm; 146 c = work; 147 c2 = c + am; 148 c3 = c2 + am; 149 c4 = c3 + am; 150 for (col = 0; col < cn - 4; col += 4) { /* over columns of C */ 151 for (i = 0; i < am; i++) { /* over rows of A in those columns */ 152 r1 = r2 = r3 = r4 = 0.0; 153 n = ai[i + 1] - ai[i]; 154 aj = a->j + ai[i]; 155 aa = a->a + ai[i]; 156 for (j = 0; j < n; j++) { 157 r1 += (*aa) * b1[*aj]; 158 r2 += (*aa) * b2[*aj]; 159 r3 += (*aa) * b3[*aj]; 160 r4 += (*aa++) * b4[*aj++]; 161 } 162 c[i] = r1; 163 c[am + i] = r2; 164 c[am2 + i] = r3; 165 c[am3 + i] = r4; 166 } 167 b1 += bm4; 168 b2 += bm4; 169 b3 += bm4; 170 b4 += bm4; 171 172 /* RAB[:,col] = R*C[:,col] */ 173 colrm = col * rm; 174 for (i = 0; i < rm; i++) { /* over rows of R in those columns */ 175 r1 = r2 = r3 = r4 = 0.0; 176 n = r->i[i + 1] - r->i[i]; 177 rj = r->j + r->i[i]; 178 ra = r->a + r->i[i]; 179 for (j = 0; j < n; j++) { 180 r1 += (*ra) * c[*rj]; 181 r2 += (*ra) * c2[*rj]; 182 r3 += (*ra) * c3[*rj]; 183 r4 += (*ra++) * c4[*rj++]; 184 } 185 d[colrm + i] = r1; 186 d[colrm + rm + i] = r2; 187 d[colrm + rm2 + i] = r3; 188 d[colrm + rm3 + i] = r4; 189 } 190 } 191 for (; col < cn; col++) { /* over extra columns of C */ 192 for (i = 0; i < am; i++) { /* over rows of A in those columns */ 193 r1 = 0.0; 194 n = a->i[i + 1] - a->i[i]; 195 aj = a->j + a->i[i]; 196 aa = a->a + a->i[i]; 197 for (j = 0; j < n; j++) r1 += (*aa++) * b1[*aj++]; 198 c[i] = r1; 199 } 200 b1 += bm; 201 202 for (i = 0; i < rm; i++) { /* over rows of R in those columns */ 203 r1 = 0.0; 204 n = r->i[i + 1] - r->i[i]; 205 rj = r->j + r->i[i]; 206 ra = r->a + r->i[i]; 207 for (j = 0; j < n; j++) r1 += (*ra++) * c[*rj++]; 208 d[col * rm + i] = r1; 209 } 210 } 211 PetscCall(PetscLogFlops(cn * 2.0 * (a->nz + r->nz))); 212 213 PetscCall(MatDenseRestoreArrayRead(B, &b)); 214 PetscCall(MatDenseRestoreArray(RAB, &d)); 215 PetscCall(MatAssemblyBegin(RAB, MAT_FINAL_ASSEMBLY)); 216 PetscCall(MatAssemblyEnd(RAB, MAT_FINAL_ASSEMBLY)); 217 PetscFunctionReturn(0); 218 } 219 220 PetscErrorCode MatRARtNumeric_SeqAIJ_SeqAIJ_colorrart(Mat A, Mat R, Mat C) 221 { 222 Mat_RARt *rart; 223 MatTransposeColoring matcoloring; 224 Mat Rt, RARt; 225 226 PetscFunctionBegin; 227 MatCheckProduct(C, 3); 228 PetscCheck(C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data empty"); 229 rart = (Mat_RARt *)C->product->data; 230 231 /* Get dense Rt by Apply MatTransposeColoring to R */ 232 matcoloring = rart->matcoloring; 233 Rt = rart->Rt; 234 PetscCall(MatTransColoringApplySpToDen(matcoloring, R, Rt)); 235 236 /* Get dense RARt = R*A*Rt -- dominates! */ 237 RARt = rart->RARt; 238 PetscCall(MatMatMatMultNumeric_SeqAIJ_SeqAIJ_SeqDense(R, A, Rt, RARt, rart->work)); 239 240 /* Recover C from C_dense */ 241 PetscCall(MatTransColoringApplyDenToSp(matcoloring, RARt, C)); 242 PetscFunctionReturn(0); 243 } 244 245 PetscErrorCode MatRARtSymbolic_SeqAIJ_SeqAIJ_matmattransposemult(Mat A, Mat R, PetscReal fill, Mat C) 246 { 247 Mat ARt; 248 Mat_RARt *rart; 249 char *alg; 250 251 PetscFunctionBegin; 252 MatCheckProduct(C, 4); 253 PetscCheck(!C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data not empty"); 254 /* create symbolic ARt = A*R^T */ 255 PetscCall(MatProductCreate(A, R, NULL, &ARt)); 256 PetscCall(MatProductSetType(ARt, MATPRODUCT_ABt)); 257 PetscCall(MatProductSetAlgorithm(ARt, "sorted")); 258 PetscCall(MatProductSetFill(ARt, fill)); 259 PetscCall(MatProductSetFromOptions(ARt)); 260 PetscCall(MatProductSymbolic(ARt)); 261 262 /* compute symbolic C = R*ARt */ 263 /* set algorithm for C = R*ARt */ 264 PetscCall(PetscStrallocpy(C->product->alg, &alg)); 265 PetscCall(MatProductSetAlgorithm(C, "sorted")); 266 PetscCall(MatMatMultSymbolic_SeqAIJ_SeqAIJ(R, ARt, fill, C)); 267 /* resume original algorithm for C */ 268 PetscCall(MatProductSetAlgorithm(C, alg)); 269 PetscCall(PetscFree(alg)); 270 271 C->ops->rartnumeric = MatRARtNumeric_SeqAIJ_SeqAIJ_matmattransposemult; 272 273 PetscCall(PetscNew(&rart)); 274 rart->ARt = ARt; 275 C->product->data = rart; 276 C->product->destroy = MatDestroy_SeqAIJ_RARt; 277 PetscCall(PetscInfo(C, "Use ARt=A*R^T, C=R*ARt via MatMatTransposeMult(). Coloring can be applied to A*R^T.\n")); 278 PetscFunctionReturn(0); 279 } 280 281 PetscErrorCode MatRARtNumeric_SeqAIJ_SeqAIJ_matmattransposemult(Mat A, Mat R, Mat C) 282 { 283 Mat_RARt *rart; 284 285 PetscFunctionBegin; 286 MatCheckProduct(C, 3); 287 PetscCheck(C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data empty"); 288 rart = (Mat_RARt *)C->product->data; 289 PetscCall(MatMatTransposeMultNumeric_SeqAIJ_SeqAIJ(A, R, rart->ARt)); /* dominate! */ 290 PetscCall(MatMatMultNumeric_SeqAIJ_SeqAIJ(R, rart->ARt, C)); 291 PetscFunctionReturn(0); 292 } 293 294 PetscErrorCode MatRARtSymbolic_SeqAIJ_SeqAIJ(Mat A, Mat R, PetscReal fill, Mat C) 295 { 296 Mat Rt; 297 Mat_RARt *rart; 298 299 PetscFunctionBegin; 300 MatCheckProduct(C, 4); 301 PetscCheck(!C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data not empty"); 302 PetscCall(MatTranspose(R, MAT_INITIAL_MATRIX, &Rt)); 303 PetscCall(MatMatMatMultSymbolic_SeqAIJ_SeqAIJ_SeqAIJ(R, A, Rt, fill, C)); 304 305 PetscCall(PetscNew(&rart)); 306 rart->data = C->product->data; 307 rart->destroy = C->product->destroy; 308 rart->Rt = Rt; 309 C->product->data = rart; 310 C->product->destroy = MatDestroy_SeqAIJ_RARt; 311 C->ops->rartnumeric = MatRARtNumeric_SeqAIJ_SeqAIJ; 312 PetscCall(PetscInfo(C, "Use Rt=R^T and C=R*A*Rt via MatMatMatMult() to avoid sparse inner products\n")); 313 PetscFunctionReturn(0); 314 } 315 316 PetscErrorCode MatRARtNumeric_SeqAIJ_SeqAIJ(Mat A, Mat R, Mat C) 317 { 318 Mat_RARt *rart; 319 320 PetscFunctionBegin; 321 MatCheckProduct(C, 3); 322 PetscCheck(C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data empty"); 323 rart = (Mat_RARt *)C->product->data; 324 PetscCall(MatTranspose(R, MAT_REUSE_MATRIX, &rart->Rt)); 325 /* MatMatMatMultSymbolic used a different data */ 326 C->product->data = rart->data; 327 PetscCall(MatMatMatMultNumeric_SeqAIJ_SeqAIJ_SeqAIJ(R, A, rart->Rt, C)); 328 C->product->data = rart; 329 PetscFunctionReturn(0); 330 } 331 332 PetscErrorCode MatRARt_SeqAIJ_SeqAIJ(Mat A, Mat R, MatReuse scall, PetscReal fill, Mat *C) 333 { 334 const char *algTypes[3] = {"matmatmatmult", "matmattransposemult", "coloring_rart"}; 335 PetscInt alg = 0; /* set default algorithm */ 336 337 PetscFunctionBegin; 338 if (scall == MAT_INITIAL_MATRIX) { 339 PetscOptionsBegin(PetscObjectComm((PetscObject)A), ((PetscObject)A)->prefix, "MatRARt", "Mat"); 340 PetscCall(PetscOptionsEList("-matrart_via", "Algorithmic approach", "MatRARt", algTypes, 3, algTypes[0], &alg, NULL)); 341 PetscOptionsEnd(); 342 343 PetscCall(PetscLogEventBegin(MAT_RARtSymbolic, A, R, 0, 0)); 344 PetscCall(MatCreate(PETSC_COMM_SELF, C)); 345 switch (alg) { 346 case 1: 347 /* via matmattransposemult: ARt=A*R^T, C=R*ARt - matrix coloring can be applied to A*R^T */ 348 PetscCall(MatRARtSymbolic_SeqAIJ_SeqAIJ_matmattransposemult(A, R, fill, *C)); 349 break; 350 case 2: 351 /* via coloring_rart: apply coloring C = R*A*R^T */ 352 PetscCall(MatRARtSymbolic_SeqAIJ_SeqAIJ_colorrart(A, R, fill, *C)); 353 break; 354 default: 355 /* via matmatmatmult: Rt=R^T, C=R*A*Rt - avoid inefficient sparse inner products */ 356 PetscCall(MatRARtSymbolic_SeqAIJ_SeqAIJ(A, R, fill, *C)); 357 break; 358 } 359 PetscCall(PetscLogEventEnd(MAT_RARtSymbolic, A, R, 0, 0)); 360 } 361 362 PetscCall(PetscLogEventBegin(MAT_RARtNumeric, A, R, 0, 0)); 363 PetscCall(((*C)->ops->rartnumeric)(A, R, *C)); 364 PetscCall(PetscLogEventEnd(MAT_RARtNumeric, A, R, 0, 0)); 365 PetscFunctionReturn(0); 366 } 367 368 /* ------------------------------------------------------------- */ 369 PetscErrorCode MatProductSymbolic_RARt_SeqAIJ_SeqAIJ(Mat C) 370 { 371 Mat_Product *product = C->product; 372 Mat A = product->A, R = product->B; 373 MatProductAlgorithm alg = product->alg; 374 PetscReal fill = product->fill; 375 PetscBool flg; 376 377 PetscFunctionBegin; 378 PetscCall(PetscStrcmp(alg, "r*a*rt", &flg)); 379 if (flg) { 380 PetscCall(MatRARtSymbolic_SeqAIJ_SeqAIJ(A, R, fill, C)); 381 goto next; 382 } 383 384 PetscCall(PetscStrcmp(alg, "r*art", &flg)); 385 if (flg) { 386 PetscCall(MatRARtSymbolic_SeqAIJ_SeqAIJ_matmattransposemult(A, R, fill, C)); 387 goto next; 388 } 389 390 PetscCall(PetscStrcmp(alg, "coloring_rart", &flg)); 391 if (flg) { 392 PetscCall(MatRARtSymbolic_SeqAIJ_SeqAIJ_colorrart(A, R, fill, C)); 393 goto next; 394 } 395 396 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "MatProductAlgorithm is not supported"); 397 398 next: 399 C->ops->productnumeric = MatProductNumeric_RARt; 400 PetscFunctionReturn(0); 401 } 402