1 2 #include <../src/mat/impls/maij/maij.h> /*I "petscmat.h" I*/ 3 #include <../src/mat/utils/freespace.h> 4 5 /*@ 6 MatMAIJGetAIJ - Get the `MATAIJ` matrix describing the blockwise action of the `MATMAIJ` matrix 7 8 Not Collective, but if the `MATMAIJ` matrix is parallel, the `MATAIJ` matrix is also parallel 9 10 Input Parameter: 11 . A - the `MATMAIJ` matrix 12 13 Output Parameter: 14 . B - the `MATAIJ` matrix 15 16 Level: advanced 17 18 Note: 19 The reference count on the `MATAIJ` matrix is not increased so you should not destroy it. 20 21 .seealso: `MATMAIJ`, `MATAIJ`, `MatCreateMAIJ()` 22 @*/ 23 PetscErrorCode MatMAIJGetAIJ(Mat A, Mat *B) 24 { 25 PetscBool ismpimaij, isseqmaij; 26 27 PetscFunctionBegin; 28 PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIMAIJ, &ismpimaij)); 29 PetscCall(PetscObjectTypeCompare((PetscObject)A, MATSEQMAIJ, &isseqmaij)); 30 if (ismpimaij) { 31 Mat_MPIMAIJ *b = (Mat_MPIMAIJ *)A->data; 32 33 *B = b->A; 34 } else if (isseqmaij) { 35 Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data; 36 37 *B = b->AIJ; 38 } else { 39 *B = A; 40 } 41 PetscFunctionReturn(0); 42 } 43 44 /*@ 45 MatMAIJRedimension - Get a new `MATMAIJ` matrix with the same action, but for a different block size 46 47 Logically Collective 48 49 Input Parameters: 50 + A - the `MATMAIJ` matrix 51 - dof - the block size for the new matrix 52 53 Output Parameter: 54 . B - the new `MATMAIJ` matrix 55 56 Level: advanced 57 58 .seealso: `MATMAIJ`, `MatCreateMAIJ()` 59 @*/ 60 PetscErrorCode MatMAIJRedimension(Mat A, PetscInt dof, Mat *B) 61 { 62 Mat Aij = NULL; 63 64 PetscFunctionBegin; 65 PetscValidLogicalCollectiveInt(A, dof, 2); 66 PetscCall(MatMAIJGetAIJ(A, &Aij)); 67 PetscCall(MatCreateMAIJ(Aij, dof, B)); 68 PetscFunctionReturn(0); 69 } 70 71 PetscErrorCode MatDestroy_SeqMAIJ(Mat A) 72 { 73 Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data; 74 75 PetscFunctionBegin; 76 PetscCall(MatDestroy(&b->AIJ)); 77 PetscCall(PetscFree(A->data)); 78 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_seqmaij_seqaijcusparse_C", NULL)); 79 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_seqmaij_seqaij_C", NULL)); 80 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatProductSetFromOptions_seqaij_seqmaij_C", NULL)); 81 PetscFunctionReturn(0); 82 } 83 84 PetscErrorCode MatSetUp_MAIJ(Mat A) 85 { 86 PetscFunctionBegin; 87 SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Must use MatCreateMAIJ() to create MAIJ matrices"); 88 } 89 90 PetscErrorCode MatView_SeqMAIJ(Mat A, PetscViewer viewer) 91 { 92 Mat B; 93 94 PetscFunctionBegin; 95 PetscCall(MatConvert(A, MATSEQAIJ, MAT_INITIAL_MATRIX, &B)); 96 PetscCall(MatView(B, viewer)); 97 PetscCall(MatDestroy(&B)); 98 PetscFunctionReturn(0); 99 } 100 101 PetscErrorCode MatView_MPIMAIJ(Mat A, PetscViewer viewer) 102 { 103 Mat B; 104 105 PetscFunctionBegin; 106 PetscCall(MatConvert(A, MATMPIAIJ, MAT_INITIAL_MATRIX, &B)); 107 PetscCall(MatView(B, viewer)); 108 PetscCall(MatDestroy(&B)); 109 PetscFunctionReturn(0); 110 } 111 112 PetscErrorCode MatDestroy_MPIMAIJ(Mat A) 113 { 114 Mat_MPIMAIJ *b = (Mat_MPIMAIJ *)A->data; 115 116 PetscFunctionBegin; 117 PetscCall(MatDestroy(&b->AIJ)); 118 PetscCall(MatDestroy(&b->OAIJ)); 119 PetscCall(MatDestroy(&b->A)); 120 PetscCall(VecScatterDestroy(&b->ctx)); 121 PetscCall(VecDestroy(&b->w)); 122 PetscCall(PetscFree(A->data)); 123 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_mpimaij_mpiaijcusparse_C", NULL)); 124 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_mpimaij_mpiaij_C", NULL)); 125 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatProductSetFromOptions_mpiaij_mpimaij_C", NULL)); 126 PetscCall(PetscObjectChangeTypeName((PetscObject)A, NULL)); 127 PetscFunctionReturn(0); 128 } 129 130 /*MC 131 MATMAIJ - MATMAIJ = "maij" - A matrix type to be used for restriction and interpolation operations for 132 multicomponent problems, interpolating or restricting each component the same way independently. 133 The matrix type is based on `MATSEQAIJ` for sequential matrices, and `MATMPIAIJ` for distributed matrices. 134 135 Operations provided: 136 .vb 137 MatMult() 138 MatMultTranspose() 139 MatMultAdd() 140 MatMultTransposeAdd() 141 .ve 142 143 Level: advanced 144 145 .seealso: `MATAIJ`, `MatMAIJGetAIJ()`, `MatMAIJRedimension()`, `MatCreateMAIJ()` 146 M*/ 147 148 PETSC_EXTERN PetscErrorCode MatCreate_MAIJ(Mat A) 149 { 150 Mat_MPIMAIJ *b; 151 PetscMPIInt size; 152 153 PetscFunctionBegin; 154 PetscCall(PetscNew(&b)); 155 A->data = (void *)b; 156 157 PetscCall(PetscMemzero(A->ops, sizeof(struct _MatOps))); 158 159 A->ops->setup = MatSetUp_MAIJ; 160 161 b->AIJ = NULL; 162 b->dof = 0; 163 b->OAIJ = NULL; 164 b->ctx = NULL; 165 b->w = NULL; 166 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size)); 167 if (size == 1) { 168 PetscCall(PetscObjectChangeTypeName((PetscObject)A, MATSEQMAIJ)); 169 } else { 170 PetscCall(PetscObjectChangeTypeName((PetscObject)A, MATMPIMAIJ)); 171 } 172 A->preallocated = PETSC_TRUE; 173 A->assembled = PETSC_TRUE; 174 PetscFunctionReturn(0); 175 } 176 177 /* --------------------------------------------------------------------------------------*/ 178 PetscErrorCode MatMult_SeqMAIJ_2(Mat A, Vec xx, Vec yy) 179 { 180 Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data; 181 Mat_SeqAIJ *a = (Mat_SeqAIJ *)b->AIJ->data; 182 const PetscScalar *x, *v; 183 PetscScalar *y, sum1, sum2; 184 PetscInt nonzerorow = 0, n, i, jrow, j; 185 const PetscInt m = b->AIJ->rmap->n, *idx, *ii; 186 187 PetscFunctionBegin; 188 PetscCall(VecGetArrayRead(xx, &x)); 189 PetscCall(VecGetArray(yy, &y)); 190 idx = a->j; 191 v = a->a; 192 ii = a->i; 193 194 for (i = 0; i < m; i++) { 195 jrow = ii[i]; 196 n = ii[i + 1] - jrow; 197 sum1 = 0.0; 198 sum2 = 0.0; 199 200 nonzerorow += (n > 0); 201 for (j = 0; j < n; j++) { 202 sum1 += v[jrow] * x[2 * idx[jrow]]; 203 sum2 += v[jrow] * x[2 * idx[jrow] + 1]; 204 jrow++; 205 } 206 y[2 * i] = sum1; 207 y[2 * i + 1] = sum2; 208 } 209 210 PetscCall(PetscLogFlops(4.0 * a->nz - 2.0 * nonzerorow)); 211 PetscCall(VecRestoreArrayRead(xx, &x)); 212 PetscCall(VecRestoreArray(yy, &y)); 213 PetscFunctionReturn(0); 214 } 215 216 PetscErrorCode MatMultTranspose_SeqMAIJ_2(Mat A, Vec xx, Vec yy) 217 { 218 Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data; 219 Mat_SeqAIJ *a = (Mat_SeqAIJ *)b->AIJ->data; 220 const PetscScalar *x, *v; 221 PetscScalar *y, alpha1, alpha2; 222 PetscInt n, i; 223 const PetscInt m = b->AIJ->rmap->n, *idx; 224 225 PetscFunctionBegin; 226 PetscCall(VecSet(yy, 0.0)); 227 PetscCall(VecGetArrayRead(xx, &x)); 228 PetscCall(VecGetArray(yy, &y)); 229 230 for (i = 0; i < m; i++) { 231 idx = a->j + a->i[i]; 232 v = a->a + a->i[i]; 233 n = a->i[i + 1] - a->i[i]; 234 alpha1 = x[2 * i]; 235 alpha2 = x[2 * i + 1]; 236 while (n-- > 0) { 237 y[2 * (*idx)] += alpha1 * (*v); 238 y[2 * (*idx) + 1] += alpha2 * (*v); 239 idx++; 240 v++; 241 } 242 } 243 PetscCall(PetscLogFlops(4.0 * a->nz)); 244 PetscCall(VecRestoreArrayRead(xx, &x)); 245 PetscCall(VecRestoreArray(yy, &y)); 246 PetscFunctionReturn(0); 247 } 248 249 PetscErrorCode MatMultAdd_SeqMAIJ_2(Mat A, Vec xx, Vec yy, Vec zz) 250 { 251 Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data; 252 Mat_SeqAIJ *a = (Mat_SeqAIJ *)b->AIJ->data; 253 const PetscScalar *x, *v; 254 PetscScalar *y, sum1, sum2; 255 PetscInt n, i, jrow, j; 256 const PetscInt m = b->AIJ->rmap->n, *idx, *ii; 257 258 PetscFunctionBegin; 259 if (yy != zz) PetscCall(VecCopy(yy, zz)); 260 PetscCall(VecGetArrayRead(xx, &x)); 261 PetscCall(VecGetArray(zz, &y)); 262 idx = a->j; 263 v = a->a; 264 ii = a->i; 265 266 for (i = 0; i < m; i++) { 267 jrow = ii[i]; 268 n = ii[i + 1] - jrow; 269 sum1 = 0.0; 270 sum2 = 0.0; 271 for (j = 0; j < n; j++) { 272 sum1 += v[jrow] * x[2 * idx[jrow]]; 273 sum2 += v[jrow] * x[2 * idx[jrow] + 1]; 274 jrow++; 275 } 276 y[2 * i] += sum1; 277 y[2 * i + 1] += sum2; 278 } 279 280 PetscCall(PetscLogFlops(4.0 * a->nz)); 281 PetscCall(VecRestoreArrayRead(xx, &x)); 282 PetscCall(VecRestoreArray(zz, &y)); 283 PetscFunctionReturn(0); 284 } 285 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_2(Mat A, Vec xx, Vec yy, Vec zz) 286 { 287 Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data; 288 Mat_SeqAIJ *a = (Mat_SeqAIJ *)b->AIJ->data; 289 const PetscScalar *x, *v; 290 PetscScalar *y, alpha1, alpha2; 291 PetscInt n, i; 292 const PetscInt m = b->AIJ->rmap->n, *idx; 293 294 PetscFunctionBegin; 295 if (yy != zz) PetscCall(VecCopy(yy, zz)); 296 PetscCall(VecGetArrayRead(xx, &x)); 297 PetscCall(VecGetArray(zz, &y)); 298 299 for (i = 0; i < m; i++) { 300 idx = a->j + a->i[i]; 301 v = a->a + a->i[i]; 302 n = a->i[i + 1] - a->i[i]; 303 alpha1 = x[2 * i]; 304 alpha2 = x[2 * i + 1]; 305 while (n-- > 0) { 306 y[2 * (*idx)] += alpha1 * (*v); 307 y[2 * (*idx) + 1] += alpha2 * (*v); 308 idx++; 309 v++; 310 } 311 } 312 PetscCall(PetscLogFlops(4.0 * a->nz)); 313 PetscCall(VecRestoreArrayRead(xx, &x)); 314 PetscCall(VecRestoreArray(zz, &y)); 315 PetscFunctionReturn(0); 316 } 317 /* --------------------------------------------------------------------------------------*/ 318 PetscErrorCode MatMult_SeqMAIJ_3(Mat A, Vec xx, Vec yy) 319 { 320 Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data; 321 Mat_SeqAIJ *a = (Mat_SeqAIJ *)b->AIJ->data; 322 const PetscScalar *x, *v; 323 PetscScalar *y, sum1, sum2, sum3; 324 PetscInt nonzerorow = 0, n, i, jrow, j; 325 const PetscInt m = b->AIJ->rmap->n, *idx, *ii; 326 327 PetscFunctionBegin; 328 PetscCall(VecGetArrayRead(xx, &x)); 329 PetscCall(VecGetArray(yy, &y)); 330 idx = a->j; 331 v = a->a; 332 ii = a->i; 333 334 for (i = 0; i < m; i++) { 335 jrow = ii[i]; 336 n = ii[i + 1] - jrow; 337 sum1 = 0.0; 338 sum2 = 0.0; 339 sum3 = 0.0; 340 341 nonzerorow += (n > 0); 342 for (j = 0; j < n; j++) { 343 sum1 += v[jrow] * x[3 * idx[jrow]]; 344 sum2 += v[jrow] * x[3 * idx[jrow] + 1]; 345 sum3 += v[jrow] * x[3 * idx[jrow] + 2]; 346 jrow++; 347 } 348 y[3 * i] = sum1; 349 y[3 * i + 1] = sum2; 350 y[3 * i + 2] = sum3; 351 } 352 353 PetscCall(PetscLogFlops(6.0 * a->nz - 3.0 * nonzerorow)); 354 PetscCall(VecRestoreArrayRead(xx, &x)); 355 PetscCall(VecRestoreArray(yy, &y)); 356 PetscFunctionReturn(0); 357 } 358 359 PetscErrorCode MatMultTranspose_SeqMAIJ_3(Mat A, Vec xx, Vec yy) 360 { 361 Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data; 362 Mat_SeqAIJ *a = (Mat_SeqAIJ *)b->AIJ->data; 363 const PetscScalar *x, *v; 364 PetscScalar *y, alpha1, alpha2, alpha3; 365 PetscInt n, i; 366 const PetscInt m = b->AIJ->rmap->n, *idx; 367 368 PetscFunctionBegin; 369 PetscCall(VecSet(yy, 0.0)); 370 PetscCall(VecGetArrayRead(xx, &x)); 371 PetscCall(VecGetArray(yy, &y)); 372 373 for (i = 0; i < m; i++) { 374 idx = a->j + a->i[i]; 375 v = a->a + a->i[i]; 376 n = a->i[i + 1] - a->i[i]; 377 alpha1 = x[3 * i]; 378 alpha2 = x[3 * i + 1]; 379 alpha3 = x[3 * i + 2]; 380 while (n-- > 0) { 381 y[3 * (*idx)] += alpha1 * (*v); 382 y[3 * (*idx) + 1] += alpha2 * (*v); 383 y[3 * (*idx) + 2] += alpha3 * (*v); 384 idx++; 385 v++; 386 } 387 } 388 PetscCall(PetscLogFlops(6.0 * a->nz)); 389 PetscCall(VecRestoreArrayRead(xx, &x)); 390 PetscCall(VecRestoreArray(yy, &y)); 391 PetscFunctionReturn(0); 392 } 393 394 PetscErrorCode MatMultAdd_SeqMAIJ_3(Mat A, Vec xx, Vec yy, Vec zz) 395 { 396 Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data; 397 Mat_SeqAIJ *a = (Mat_SeqAIJ *)b->AIJ->data; 398 const PetscScalar *x, *v; 399 PetscScalar *y, sum1, sum2, sum3; 400 PetscInt n, i, jrow, j; 401 const PetscInt m = b->AIJ->rmap->n, *idx, *ii; 402 403 PetscFunctionBegin; 404 if (yy != zz) PetscCall(VecCopy(yy, zz)); 405 PetscCall(VecGetArrayRead(xx, &x)); 406 PetscCall(VecGetArray(zz, &y)); 407 idx = a->j; 408 v = a->a; 409 ii = a->i; 410 411 for (i = 0; i < m; i++) { 412 jrow = ii[i]; 413 n = ii[i + 1] - jrow; 414 sum1 = 0.0; 415 sum2 = 0.0; 416 sum3 = 0.0; 417 for (j = 0; j < n; j++) { 418 sum1 += v[jrow] * x[3 * idx[jrow]]; 419 sum2 += v[jrow] * x[3 * idx[jrow] + 1]; 420 sum3 += v[jrow] * x[3 * idx[jrow] + 2]; 421 jrow++; 422 } 423 y[3 * i] += sum1; 424 y[3 * i + 1] += sum2; 425 y[3 * i + 2] += sum3; 426 } 427 428 PetscCall(PetscLogFlops(6.0 * a->nz)); 429 PetscCall(VecRestoreArrayRead(xx, &x)); 430 PetscCall(VecRestoreArray(zz, &y)); 431 PetscFunctionReturn(0); 432 } 433 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_3(Mat A, Vec xx, Vec yy, Vec zz) 434 { 435 Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data; 436 Mat_SeqAIJ *a = (Mat_SeqAIJ *)b->AIJ->data; 437 const PetscScalar *x, *v; 438 PetscScalar *y, alpha1, alpha2, alpha3; 439 PetscInt n, i; 440 const PetscInt m = b->AIJ->rmap->n, *idx; 441 442 PetscFunctionBegin; 443 if (yy != zz) PetscCall(VecCopy(yy, zz)); 444 PetscCall(VecGetArrayRead(xx, &x)); 445 PetscCall(VecGetArray(zz, &y)); 446 for (i = 0; i < m; i++) { 447 idx = a->j + a->i[i]; 448 v = a->a + a->i[i]; 449 n = a->i[i + 1] - a->i[i]; 450 alpha1 = x[3 * i]; 451 alpha2 = x[3 * i + 1]; 452 alpha3 = x[3 * i + 2]; 453 while (n-- > 0) { 454 y[3 * (*idx)] += alpha1 * (*v); 455 y[3 * (*idx) + 1] += alpha2 * (*v); 456 y[3 * (*idx) + 2] += alpha3 * (*v); 457 idx++; 458 v++; 459 } 460 } 461 PetscCall(PetscLogFlops(6.0 * a->nz)); 462 PetscCall(VecRestoreArrayRead(xx, &x)); 463 PetscCall(VecRestoreArray(zz, &y)); 464 PetscFunctionReturn(0); 465 } 466 467 /* ------------------------------------------------------------------------------*/ 468 PetscErrorCode MatMult_SeqMAIJ_4(Mat A, Vec xx, Vec yy) 469 { 470 Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data; 471 Mat_SeqAIJ *a = (Mat_SeqAIJ *)b->AIJ->data; 472 const PetscScalar *x, *v; 473 PetscScalar *y, sum1, sum2, sum3, sum4; 474 PetscInt nonzerorow = 0, n, i, jrow, j; 475 const PetscInt m = b->AIJ->rmap->n, *idx, *ii; 476 477 PetscFunctionBegin; 478 PetscCall(VecGetArrayRead(xx, &x)); 479 PetscCall(VecGetArray(yy, &y)); 480 idx = a->j; 481 v = a->a; 482 ii = a->i; 483 484 for (i = 0; i < m; i++) { 485 jrow = ii[i]; 486 n = ii[i + 1] - jrow; 487 sum1 = 0.0; 488 sum2 = 0.0; 489 sum3 = 0.0; 490 sum4 = 0.0; 491 nonzerorow += (n > 0); 492 for (j = 0; j < n; j++) { 493 sum1 += v[jrow] * x[4 * idx[jrow]]; 494 sum2 += v[jrow] * x[4 * idx[jrow] + 1]; 495 sum3 += v[jrow] * x[4 * idx[jrow] + 2]; 496 sum4 += v[jrow] * x[4 * idx[jrow] + 3]; 497 jrow++; 498 } 499 y[4 * i] = sum1; 500 y[4 * i + 1] = sum2; 501 y[4 * i + 2] = sum3; 502 y[4 * i + 3] = sum4; 503 } 504 505 PetscCall(PetscLogFlops(8.0 * a->nz - 4.0 * nonzerorow)); 506 PetscCall(VecRestoreArrayRead(xx, &x)); 507 PetscCall(VecRestoreArray(yy, &y)); 508 PetscFunctionReturn(0); 509 } 510 511 PetscErrorCode MatMultTranspose_SeqMAIJ_4(Mat A, Vec xx, Vec yy) 512 { 513 Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data; 514 Mat_SeqAIJ *a = (Mat_SeqAIJ *)b->AIJ->data; 515 const PetscScalar *x, *v; 516 PetscScalar *y, alpha1, alpha2, alpha3, alpha4; 517 PetscInt n, i; 518 const PetscInt m = b->AIJ->rmap->n, *idx; 519 520 PetscFunctionBegin; 521 PetscCall(VecSet(yy, 0.0)); 522 PetscCall(VecGetArrayRead(xx, &x)); 523 PetscCall(VecGetArray(yy, &y)); 524 for (i = 0; i < m; i++) { 525 idx = a->j + a->i[i]; 526 v = a->a + a->i[i]; 527 n = a->i[i + 1] - a->i[i]; 528 alpha1 = x[4 * i]; 529 alpha2 = x[4 * i + 1]; 530 alpha3 = x[4 * i + 2]; 531 alpha4 = x[4 * i + 3]; 532 while (n-- > 0) { 533 y[4 * (*idx)] += alpha1 * (*v); 534 y[4 * (*idx) + 1] += alpha2 * (*v); 535 y[4 * (*idx) + 2] += alpha3 * (*v); 536 y[4 * (*idx) + 3] += alpha4 * (*v); 537 idx++; 538 v++; 539 } 540 } 541 PetscCall(PetscLogFlops(8.0 * a->nz)); 542 PetscCall(VecRestoreArrayRead(xx, &x)); 543 PetscCall(VecRestoreArray(yy, &y)); 544 PetscFunctionReturn(0); 545 } 546 547 PetscErrorCode MatMultAdd_SeqMAIJ_4(Mat A, Vec xx, Vec yy, Vec zz) 548 { 549 Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data; 550 Mat_SeqAIJ *a = (Mat_SeqAIJ *)b->AIJ->data; 551 const PetscScalar *x, *v; 552 PetscScalar *y, sum1, sum2, sum3, sum4; 553 PetscInt n, i, jrow, j; 554 const PetscInt m = b->AIJ->rmap->n, *idx, *ii; 555 556 PetscFunctionBegin; 557 if (yy != zz) PetscCall(VecCopy(yy, zz)); 558 PetscCall(VecGetArrayRead(xx, &x)); 559 PetscCall(VecGetArray(zz, &y)); 560 idx = a->j; 561 v = a->a; 562 ii = a->i; 563 564 for (i = 0; i < m; i++) { 565 jrow = ii[i]; 566 n = ii[i + 1] - jrow; 567 sum1 = 0.0; 568 sum2 = 0.0; 569 sum3 = 0.0; 570 sum4 = 0.0; 571 for (j = 0; j < n; j++) { 572 sum1 += v[jrow] * x[4 * idx[jrow]]; 573 sum2 += v[jrow] * x[4 * idx[jrow] + 1]; 574 sum3 += v[jrow] * x[4 * idx[jrow] + 2]; 575 sum4 += v[jrow] * x[4 * idx[jrow] + 3]; 576 jrow++; 577 } 578 y[4 * i] += sum1; 579 y[4 * i + 1] += sum2; 580 y[4 * i + 2] += sum3; 581 y[4 * i + 3] += sum4; 582 } 583 584 PetscCall(PetscLogFlops(8.0 * a->nz)); 585 PetscCall(VecRestoreArrayRead(xx, &x)); 586 PetscCall(VecRestoreArray(zz, &y)); 587 PetscFunctionReturn(0); 588 } 589 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_4(Mat A, Vec xx, Vec yy, Vec zz) 590 { 591 Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data; 592 Mat_SeqAIJ *a = (Mat_SeqAIJ *)b->AIJ->data; 593 const PetscScalar *x, *v; 594 PetscScalar *y, alpha1, alpha2, alpha3, alpha4; 595 PetscInt n, i; 596 const PetscInt m = b->AIJ->rmap->n, *idx; 597 598 PetscFunctionBegin; 599 if (yy != zz) PetscCall(VecCopy(yy, zz)); 600 PetscCall(VecGetArrayRead(xx, &x)); 601 PetscCall(VecGetArray(zz, &y)); 602 603 for (i = 0; i < m; i++) { 604 idx = a->j + a->i[i]; 605 v = a->a + a->i[i]; 606 n = a->i[i + 1] - a->i[i]; 607 alpha1 = x[4 * i]; 608 alpha2 = x[4 * i + 1]; 609 alpha3 = x[4 * i + 2]; 610 alpha4 = x[4 * i + 3]; 611 while (n-- > 0) { 612 y[4 * (*idx)] += alpha1 * (*v); 613 y[4 * (*idx) + 1] += alpha2 * (*v); 614 y[4 * (*idx) + 2] += alpha3 * (*v); 615 y[4 * (*idx) + 3] += alpha4 * (*v); 616 idx++; 617 v++; 618 } 619 } 620 PetscCall(PetscLogFlops(8.0 * a->nz)); 621 PetscCall(VecRestoreArrayRead(xx, &x)); 622 PetscCall(VecRestoreArray(zz, &y)); 623 PetscFunctionReturn(0); 624 } 625 /* ------------------------------------------------------------------------------*/ 626 627 PetscErrorCode MatMult_SeqMAIJ_5(Mat A, Vec xx, Vec yy) 628 { 629 Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data; 630 Mat_SeqAIJ *a = (Mat_SeqAIJ *)b->AIJ->data; 631 const PetscScalar *x, *v; 632 PetscScalar *y, sum1, sum2, sum3, sum4, sum5; 633 PetscInt nonzerorow = 0, n, i, jrow, j; 634 const PetscInt m = b->AIJ->rmap->n, *idx, *ii; 635 636 PetscFunctionBegin; 637 PetscCall(VecGetArrayRead(xx, &x)); 638 PetscCall(VecGetArray(yy, &y)); 639 idx = a->j; 640 v = a->a; 641 ii = a->i; 642 643 for (i = 0; i < m; i++) { 644 jrow = ii[i]; 645 n = ii[i + 1] - jrow; 646 sum1 = 0.0; 647 sum2 = 0.0; 648 sum3 = 0.0; 649 sum4 = 0.0; 650 sum5 = 0.0; 651 652 nonzerorow += (n > 0); 653 for (j = 0; j < n; j++) { 654 sum1 += v[jrow] * x[5 * idx[jrow]]; 655 sum2 += v[jrow] * x[5 * idx[jrow] + 1]; 656 sum3 += v[jrow] * x[5 * idx[jrow] + 2]; 657 sum4 += v[jrow] * x[5 * idx[jrow] + 3]; 658 sum5 += v[jrow] * x[5 * idx[jrow] + 4]; 659 jrow++; 660 } 661 y[5 * i] = sum1; 662 y[5 * i + 1] = sum2; 663 y[5 * i + 2] = sum3; 664 y[5 * i + 3] = sum4; 665 y[5 * i + 4] = sum5; 666 } 667 668 PetscCall(PetscLogFlops(10.0 * a->nz - 5.0 * nonzerorow)); 669 PetscCall(VecRestoreArrayRead(xx, &x)); 670 PetscCall(VecRestoreArray(yy, &y)); 671 PetscFunctionReturn(0); 672 } 673 674 PetscErrorCode MatMultTranspose_SeqMAIJ_5(Mat A, Vec xx, Vec yy) 675 { 676 Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data; 677 Mat_SeqAIJ *a = (Mat_SeqAIJ *)b->AIJ->data; 678 const PetscScalar *x, *v; 679 PetscScalar *y, alpha1, alpha2, alpha3, alpha4, alpha5; 680 PetscInt n, i; 681 const PetscInt m = b->AIJ->rmap->n, *idx; 682 683 PetscFunctionBegin; 684 PetscCall(VecSet(yy, 0.0)); 685 PetscCall(VecGetArrayRead(xx, &x)); 686 PetscCall(VecGetArray(yy, &y)); 687 688 for (i = 0; i < m; i++) { 689 idx = a->j + a->i[i]; 690 v = a->a + a->i[i]; 691 n = a->i[i + 1] - a->i[i]; 692 alpha1 = x[5 * i]; 693 alpha2 = x[5 * i + 1]; 694 alpha3 = x[5 * i + 2]; 695 alpha4 = x[5 * i + 3]; 696 alpha5 = x[5 * i + 4]; 697 while (n-- > 0) { 698 y[5 * (*idx)] += alpha1 * (*v); 699 y[5 * (*idx) + 1] += alpha2 * (*v); 700 y[5 * (*idx) + 2] += alpha3 * (*v); 701 y[5 * (*idx) + 3] += alpha4 * (*v); 702 y[5 * (*idx) + 4] += alpha5 * (*v); 703 idx++; 704 v++; 705 } 706 } 707 PetscCall(PetscLogFlops(10.0 * a->nz)); 708 PetscCall(VecRestoreArrayRead(xx, &x)); 709 PetscCall(VecRestoreArray(yy, &y)); 710 PetscFunctionReturn(0); 711 } 712 713 PetscErrorCode MatMultAdd_SeqMAIJ_5(Mat A, Vec xx, Vec yy, Vec zz) 714 { 715 Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data; 716 Mat_SeqAIJ *a = (Mat_SeqAIJ *)b->AIJ->data; 717 const PetscScalar *x, *v; 718 PetscScalar *y, sum1, sum2, sum3, sum4, sum5; 719 PetscInt n, i, jrow, j; 720 const PetscInt m = b->AIJ->rmap->n, *idx, *ii; 721 722 PetscFunctionBegin; 723 if (yy != zz) PetscCall(VecCopy(yy, zz)); 724 PetscCall(VecGetArrayRead(xx, &x)); 725 PetscCall(VecGetArray(zz, &y)); 726 idx = a->j; 727 v = a->a; 728 ii = a->i; 729 730 for (i = 0; i < m; i++) { 731 jrow = ii[i]; 732 n = ii[i + 1] - jrow; 733 sum1 = 0.0; 734 sum2 = 0.0; 735 sum3 = 0.0; 736 sum4 = 0.0; 737 sum5 = 0.0; 738 for (j = 0; j < n; j++) { 739 sum1 += v[jrow] * x[5 * idx[jrow]]; 740 sum2 += v[jrow] * x[5 * idx[jrow] + 1]; 741 sum3 += v[jrow] * x[5 * idx[jrow] + 2]; 742 sum4 += v[jrow] * x[5 * idx[jrow] + 3]; 743 sum5 += v[jrow] * x[5 * idx[jrow] + 4]; 744 jrow++; 745 } 746 y[5 * i] += sum1; 747 y[5 * i + 1] += sum2; 748 y[5 * i + 2] += sum3; 749 y[5 * i + 3] += sum4; 750 y[5 * i + 4] += sum5; 751 } 752 753 PetscCall(PetscLogFlops(10.0 * a->nz)); 754 PetscCall(VecRestoreArrayRead(xx, &x)); 755 PetscCall(VecRestoreArray(zz, &y)); 756 PetscFunctionReturn(0); 757 } 758 759 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_5(Mat A, Vec xx, Vec yy, Vec zz) 760 { 761 Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data; 762 Mat_SeqAIJ *a = (Mat_SeqAIJ *)b->AIJ->data; 763 const PetscScalar *x, *v; 764 PetscScalar *y, alpha1, alpha2, alpha3, alpha4, alpha5; 765 PetscInt n, i; 766 const PetscInt m = b->AIJ->rmap->n, *idx; 767 768 PetscFunctionBegin; 769 if (yy != zz) PetscCall(VecCopy(yy, zz)); 770 PetscCall(VecGetArrayRead(xx, &x)); 771 PetscCall(VecGetArray(zz, &y)); 772 773 for (i = 0; i < m; i++) { 774 idx = a->j + a->i[i]; 775 v = a->a + a->i[i]; 776 n = a->i[i + 1] - a->i[i]; 777 alpha1 = x[5 * i]; 778 alpha2 = x[5 * i + 1]; 779 alpha3 = x[5 * i + 2]; 780 alpha4 = x[5 * i + 3]; 781 alpha5 = x[5 * i + 4]; 782 while (n-- > 0) { 783 y[5 * (*idx)] += alpha1 * (*v); 784 y[5 * (*idx) + 1] += alpha2 * (*v); 785 y[5 * (*idx) + 2] += alpha3 * (*v); 786 y[5 * (*idx) + 3] += alpha4 * (*v); 787 y[5 * (*idx) + 4] += alpha5 * (*v); 788 idx++; 789 v++; 790 } 791 } 792 PetscCall(PetscLogFlops(10.0 * a->nz)); 793 PetscCall(VecRestoreArrayRead(xx, &x)); 794 PetscCall(VecRestoreArray(zz, &y)); 795 PetscFunctionReturn(0); 796 } 797 798 /* ------------------------------------------------------------------------------*/ 799 PetscErrorCode MatMult_SeqMAIJ_6(Mat A, Vec xx, Vec yy) 800 { 801 Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data; 802 Mat_SeqAIJ *a = (Mat_SeqAIJ *)b->AIJ->data; 803 const PetscScalar *x, *v; 804 PetscScalar *y, sum1, sum2, sum3, sum4, sum5, sum6; 805 PetscInt nonzerorow = 0, n, i, jrow, j; 806 const PetscInt m = b->AIJ->rmap->n, *idx, *ii; 807 808 PetscFunctionBegin; 809 PetscCall(VecGetArrayRead(xx, &x)); 810 PetscCall(VecGetArray(yy, &y)); 811 idx = a->j; 812 v = a->a; 813 ii = a->i; 814 815 for (i = 0; i < m; i++) { 816 jrow = ii[i]; 817 n = ii[i + 1] - jrow; 818 sum1 = 0.0; 819 sum2 = 0.0; 820 sum3 = 0.0; 821 sum4 = 0.0; 822 sum5 = 0.0; 823 sum6 = 0.0; 824 825 nonzerorow += (n > 0); 826 for (j = 0; j < n; j++) { 827 sum1 += v[jrow] * x[6 * idx[jrow]]; 828 sum2 += v[jrow] * x[6 * idx[jrow] + 1]; 829 sum3 += v[jrow] * x[6 * idx[jrow] + 2]; 830 sum4 += v[jrow] * x[6 * idx[jrow] + 3]; 831 sum5 += v[jrow] * x[6 * idx[jrow] + 4]; 832 sum6 += v[jrow] * x[6 * idx[jrow] + 5]; 833 jrow++; 834 } 835 y[6 * i] = sum1; 836 y[6 * i + 1] = sum2; 837 y[6 * i + 2] = sum3; 838 y[6 * i + 3] = sum4; 839 y[6 * i + 4] = sum5; 840 y[6 * i + 5] = sum6; 841 } 842 843 PetscCall(PetscLogFlops(12.0 * a->nz - 6.0 * nonzerorow)); 844 PetscCall(VecRestoreArrayRead(xx, &x)); 845 PetscCall(VecRestoreArray(yy, &y)); 846 PetscFunctionReturn(0); 847 } 848 849 PetscErrorCode MatMultTranspose_SeqMAIJ_6(Mat A, Vec xx, Vec yy) 850 { 851 Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data; 852 Mat_SeqAIJ *a = (Mat_SeqAIJ *)b->AIJ->data; 853 const PetscScalar *x, *v; 854 PetscScalar *y, alpha1, alpha2, alpha3, alpha4, alpha5, alpha6; 855 PetscInt n, i; 856 const PetscInt m = b->AIJ->rmap->n, *idx; 857 858 PetscFunctionBegin; 859 PetscCall(VecSet(yy, 0.0)); 860 PetscCall(VecGetArrayRead(xx, &x)); 861 PetscCall(VecGetArray(yy, &y)); 862 863 for (i = 0; i < m; i++) { 864 idx = a->j + a->i[i]; 865 v = a->a + a->i[i]; 866 n = a->i[i + 1] - a->i[i]; 867 alpha1 = x[6 * i]; 868 alpha2 = x[6 * i + 1]; 869 alpha3 = x[6 * i + 2]; 870 alpha4 = x[6 * i + 3]; 871 alpha5 = x[6 * i + 4]; 872 alpha6 = x[6 * i + 5]; 873 while (n-- > 0) { 874 y[6 * (*idx)] += alpha1 * (*v); 875 y[6 * (*idx) + 1] += alpha2 * (*v); 876 y[6 * (*idx) + 2] += alpha3 * (*v); 877 y[6 * (*idx) + 3] += alpha4 * (*v); 878 y[6 * (*idx) + 4] += alpha5 * (*v); 879 y[6 * (*idx) + 5] += alpha6 * (*v); 880 idx++; 881 v++; 882 } 883 } 884 PetscCall(PetscLogFlops(12.0 * a->nz)); 885 PetscCall(VecRestoreArrayRead(xx, &x)); 886 PetscCall(VecRestoreArray(yy, &y)); 887 PetscFunctionReturn(0); 888 } 889 890 PetscErrorCode MatMultAdd_SeqMAIJ_6(Mat A, Vec xx, Vec yy, Vec zz) 891 { 892 Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data; 893 Mat_SeqAIJ *a = (Mat_SeqAIJ *)b->AIJ->data; 894 const PetscScalar *x, *v; 895 PetscScalar *y, sum1, sum2, sum3, sum4, sum5, sum6; 896 PetscInt n, i, jrow, j; 897 const PetscInt m = b->AIJ->rmap->n, *idx, *ii; 898 899 PetscFunctionBegin; 900 if (yy != zz) PetscCall(VecCopy(yy, zz)); 901 PetscCall(VecGetArrayRead(xx, &x)); 902 PetscCall(VecGetArray(zz, &y)); 903 idx = a->j; 904 v = a->a; 905 ii = a->i; 906 907 for (i = 0; i < m; i++) { 908 jrow = ii[i]; 909 n = ii[i + 1] - jrow; 910 sum1 = 0.0; 911 sum2 = 0.0; 912 sum3 = 0.0; 913 sum4 = 0.0; 914 sum5 = 0.0; 915 sum6 = 0.0; 916 for (j = 0; j < n; j++) { 917 sum1 += v[jrow] * x[6 * idx[jrow]]; 918 sum2 += v[jrow] * x[6 * idx[jrow] + 1]; 919 sum3 += v[jrow] * x[6 * idx[jrow] + 2]; 920 sum4 += v[jrow] * x[6 * idx[jrow] + 3]; 921 sum5 += v[jrow] * x[6 * idx[jrow] + 4]; 922 sum6 += v[jrow] * x[6 * idx[jrow] + 5]; 923 jrow++; 924 } 925 y[6 * i] += sum1; 926 y[6 * i + 1] += sum2; 927 y[6 * i + 2] += sum3; 928 y[6 * i + 3] += sum4; 929 y[6 * i + 4] += sum5; 930 y[6 * i + 5] += sum6; 931 } 932 933 PetscCall(PetscLogFlops(12.0 * a->nz)); 934 PetscCall(VecRestoreArrayRead(xx, &x)); 935 PetscCall(VecRestoreArray(zz, &y)); 936 PetscFunctionReturn(0); 937 } 938 939 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_6(Mat A, Vec xx, Vec yy, Vec zz) 940 { 941 Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data; 942 Mat_SeqAIJ *a = (Mat_SeqAIJ *)b->AIJ->data; 943 const PetscScalar *x, *v; 944 PetscScalar *y, alpha1, alpha2, alpha3, alpha4, alpha5, alpha6; 945 PetscInt n, i; 946 const PetscInt m = b->AIJ->rmap->n, *idx; 947 948 PetscFunctionBegin; 949 if (yy != zz) PetscCall(VecCopy(yy, zz)); 950 PetscCall(VecGetArrayRead(xx, &x)); 951 PetscCall(VecGetArray(zz, &y)); 952 953 for (i = 0; i < m; i++) { 954 idx = a->j + a->i[i]; 955 v = a->a + a->i[i]; 956 n = a->i[i + 1] - a->i[i]; 957 alpha1 = x[6 * i]; 958 alpha2 = x[6 * i + 1]; 959 alpha3 = x[6 * i + 2]; 960 alpha4 = x[6 * i + 3]; 961 alpha5 = x[6 * i + 4]; 962 alpha6 = x[6 * i + 5]; 963 while (n-- > 0) { 964 y[6 * (*idx)] += alpha1 * (*v); 965 y[6 * (*idx) + 1] += alpha2 * (*v); 966 y[6 * (*idx) + 2] += alpha3 * (*v); 967 y[6 * (*idx) + 3] += alpha4 * (*v); 968 y[6 * (*idx) + 4] += alpha5 * (*v); 969 y[6 * (*idx) + 5] += alpha6 * (*v); 970 idx++; 971 v++; 972 } 973 } 974 PetscCall(PetscLogFlops(12.0 * a->nz)); 975 PetscCall(VecRestoreArrayRead(xx, &x)); 976 PetscCall(VecRestoreArray(zz, &y)); 977 PetscFunctionReturn(0); 978 } 979 980 /* ------------------------------------------------------------------------------*/ 981 PetscErrorCode MatMult_SeqMAIJ_7(Mat A, Vec xx, Vec yy) 982 { 983 Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data; 984 Mat_SeqAIJ *a = (Mat_SeqAIJ *)b->AIJ->data; 985 const PetscScalar *x, *v; 986 PetscScalar *y, sum1, sum2, sum3, sum4, sum5, sum6, sum7; 987 const PetscInt m = b->AIJ->rmap->n, *idx, *ii; 988 PetscInt nonzerorow = 0, n, i, jrow, j; 989 990 PetscFunctionBegin; 991 PetscCall(VecGetArrayRead(xx, &x)); 992 PetscCall(VecGetArray(yy, &y)); 993 idx = a->j; 994 v = a->a; 995 ii = a->i; 996 997 for (i = 0; i < m; i++) { 998 jrow = ii[i]; 999 n = ii[i + 1] - jrow; 1000 sum1 = 0.0; 1001 sum2 = 0.0; 1002 sum3 = 0.0; 1003 sum4 = 0.0; 1004 sum5 = 0.0; 1005 sum6 = 0.0; 1006 sum7 = 0.0; 1007 1008 nonzerorow += (n > 0); 1009 for (j = 0; j < n; j++) { 1010 sum1 += v[jrow] * x[7 * idx[jrow]]; 1011 sum2 += v[jrow] * x[7 * idx[jrow] + 1]; 1012 sum3 += v[jrow] * x[7 * idx[jrow] + 2]; 1013 sum4 += v[jrow] * x[7 * idx[jrow] + 3]; 1014 sum5 += v[jrow] * x[7 * idx[jrow] + 4]; 1015 sum6 += v[jrow] * x[7 * idx[jrow] + 5]; 1016 sum7 += v[jrow] * x[7 * idx[jrow] + 6]; 1017 jrow++; 1018 } 1019 y[7 * i] = sum1; 1020 y[7 * i + 1] = sum2; 1021 y[7 * i + 2] = sum3; 1022 y[7 * i + 3] = sum4; 1023 y[7 * i + 4] = sum5; 1024 y[7 * i + 5] = sum6; 1025 y[7 * i + 6] = sum7; 1026 } 1027 1028 PetscCall(PetscLogFlops(14.0 * a->nz - 7.0 * nonzerorow)); 1029 PetscCall(VecRestoreArrayRead(xx, &x)); 1030 PetscCall(VecRestoreArray(yy, &y)); 1031 PetscFunctionReturn(0); 1032 } 1033 1034 PetscErrorCode MatMultTranspose_SeqMAIJ_7(Mat A, Vec xx, Vec yy) 1035 { 1036 Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data; 1037 Mat_SeqAIJ *a = (Mat_SeqAIJ *)b->AIJ->data; 1038 const PetscScalar *x, *v; 1039 PetscScalar *y, alpha1, alpha2, alpha3, alpha4, alpha5, alpha6, alpha7; 1040 const PetscInt m = b->AIJ->rmap->n, *idx; 1041 PetscInt n, i; 1042 1043 PetscFunctionBegin; 1044 PetscCall(VecSet(yy, 0.0)); 1045 PetscCall(VecGetArrayRead(xx, &x)); 1046 PetscCall(VecGetArray(yy, &y)); 1047 1048 for (i = 0; i < m; i++) { 1049 idx = a->j + a->i[i]; 1050 v = a->a + a->i[i]; 1051 n = a->i[i + 1] - a->i[i]; 1052 alpha1 = x[7 * i]; 1053 alpha2 = x[7 * i + 1]; 1054 alpha3 = x[7 * i + 2]; 1055 alpha4 = x[7 * i + 3]; 1056 alpha5 = x[7 * i + 4]; 1057 alpha6 = x[7 * i + 5]; 1058 alpha7 = x[7 * i + 6]; 1059 while (n-- > 0) { 1060 y[7 * (*idx)] += alpha1 * (*v); 1061 y[7 * (*idx) + 1] += alpha2 * (*v); 1062 y[7 * (*idx) + 2] += alpha3 * (*v); 1063 y[7 * (*idx) + 3] += alpha4 * (*v); 1064 y[7 * (*idx) + 4] += alpha5 * (*v); 1065 y[7 * (*idx) + 5] += alpha6 * (*v); 1066 y[7 * (*idx) + 6] += alpha7 * (*v); 1067 idx++; 1068 v++; 1069 } 1070 } 1071 PetscCall(PetscLogFlops(14.0 * a->nz)); 1072 PetscCall(VecRestoreArrayRead(xx, &x)); 1073 PetscCall(VecRestoreArray(yy, &y)); 1074 PetscFunctionReturn(0); 1075 } 1076 1077 PetscErrorCode MatMultAdd_SeqMAIJ_7(Mat A, Vec xx, Vec yy, Vec zz) 1078 { 1079 Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data; 1080 Mat_SeqAIJ *a = (Mat_SeqAIJ *)b->AIJ->data; 1081 const PetscScalar *x, *v; 1082 PetscScalar *y, sum1, sum2, sum3, sum4, sum5, sum6, sum7; 1083 const PetscInt m = b->AIJ->rmap->n, *idx, *ii; 1084 PetscInt n, i, jrow, j; 1085 1086 PetscFunctionBegin; 1087 if (yy != zz) PetscCall(VecCopy(yy, zz)); 1088 PetscCall(VecGetArrayRead(xx, &x)); 1089 PetscCall(VecGetArray(zz, &y)); 1090 idx = a->j; 1091 v = a->a; 1092 ii = a->i; 1093 1094 for (i = 0; i < m; i++) { 1095 jrow = ii[i]; 1096 n = ii[i + 1] - jrow; 1097 sum1 = 0.0; 1098 sum2 = 0.0; 1099 sum3 = 0.0; 1100 sum4 = 0.0; 1101 sum5 = 0.0; 1102 sum6 = 0.0; 1103 sum7 = 0.0; 1104 for (j = 0; j < n; j++) { 1105 sum1 += v[jrow] * x[7 * idx[jrow]]; 1106 sum2 += v[jrow] * x[7 * idx[jrow] + 1]; 1107 sum3 += v[jrow] * x[7 * idx[jrow] + 2]; 1108 sum4 += v[jrow] * x[7 * idx[jrow] + 3]; 1109 sum5 += v[jrow] * x[7 * idx[jrow] + 4]; 1110 sum6 += v[jrow] * x[7 * idx[jrow] + 5]; 1111 sum7 += v[jrow] * x[7 * idx[jrow] + 6]; 1112 jrow++; 1113 } 1114 y[7 * i] += sum1; 1115 y[7 * i + 1] += sum2; 1116 y[7 * i + 2] += sum3; 1117 y[7 * i + 3] += sum4; 1118 y[7 * i + 4] += sum5; 1119 y[7 * i + 5] += sum6; 1120 y[7 * i + 6] += sum7; 1121 } 1122 1123 PetscCall(PetscLogFlops(14.0 * a->nz)); 1124 PetscCall(VecRestoreArrayRead(xx, &x)); 1125 PetscCall(VecRestoreArray(zz, &y)); 1126 PetscFunctionReturn(0); 1127 } 1128 1129 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_7(Mat A, Vec xx, Vec yy, Vec zz) 1130 { 1131 Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data; 1132 Mat_SeqAIJ *a = (Mat_SeqAIJ *)b->AIJ->data; 1133 const PetscScalar *x, *v; 1134 PetscScalar *y, alpha1, alpha2, alpha3, alpha4, alpha5, alpha6, alpha7; 1135 const PetscInt m = b->AIJ->rmap->n, *idx; 1136 PetscInt n, i; 1137 1138 PetscFunctionBegin; 1139 if (yy != zz) PetscCall(VecCopy(yy, zz)); 1140 PetscCall(VecGetArrayRead(xx, &x)); 1141 PetscCall(VecGetArray(zz, &y)); 1142 for (i = 0; i < m; i++) { 1143 idx = a->j + a->i[i]; 1144 v = a->a + a->i[i]; 1145 n = a->i[i + 1] - a->i[i]; 1146 alpha1 = x[7 * i]; 1147 alpha2 = x[7 * i + 1]; 1148 alpha3 = x[7 * i + 2]; 1149 alpha4 = x[7 * i + 3]; 1150 alpha5 = x[7 * i + 4]; 1151 alpha6 = x[7 * i + 5]; 1152 alpha7 = x[7 * i + 6]; 1153 while (n-- > 0) { 1154 y[7 * (*idx)] += alpha1 * (*v); 1155 y[7 * (*idx) + 1] += alpha2 * (*v); 1156 y[7 * (*idx) + 2] += alpha3 * (*v); 1157 y[7 * (*idx) + 3] += alpha4 * (*v); 1158 y[7 * (*idx) + 4] += alpha5 * (*v); 1159 y[7 * (*idx) + 5] += alpha6 * (*v); 1160 y[7 * (*idx) + 6] += alpha7 * (*v); 1161 idx++; 1162 v++; 1163 } 1164 } 1165 PetscCall(PetscLogFlops(14.0 * a->nz)); 1166 PetscCall(VecRestoreArrayRead(xx, &x)); 1167 PetscCall(VecRestoreArray(zz, &y)); 1168 PetscFunctionReturn(0); 1169 } 1170 1171 PetscErrorCode MatMult_SeqMAIJ_8(Mat A, Vec xx, Vec yy) 1172 { 1173 Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data; 1174 Mat_SeqAIJ *a = (Mat_SeqAIJ *)b->AIJ->data; 1175 const PetscScalar *x, *v; 1176 PetscScalar *y, sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8; 1177 const PetscInt m = b->AIJ->rmap->n, *idx, *ii; 1178 PetscInt nonzerorow = 0, n, i, jrow, j; 1179 1180 PetscFunctionBegin; 1181 PetscCall(VecGetArrayRead(xx, &x)); 1182 PetscCall(VecGetArray(yy, &y)); 1183 idx = a->j; 1184 v = a->a; 1185 ii = a->i; 1186 1187 for (i = 0; i < m; i++) { 1188 jrow = ii[i]; 1189 n = ii[i + 1] - jrow; 1190 sum1 = 0.0; 1191 sum2 = 0.0; 1192 sum3 = 0.0; 1193 sum4 = 0.0; 1194 sum5 = 0.0; 1195 sum6 = 0.0; 1196 sum7 = 0.0; 1197 sum8 = 0.0; 1198 1199 nonzerorow += (n > 0); 1200 for (j = 0; j < n; j++) { 1201 sum1 += v[jrow] * x[8 * idx[jrow]]; 1202 sum2 += v[jrow] * x[8 * idx[jrow] + 1]; 1203 sum3 += v[jrow] * x[8 * idx[jrow] + 2]; 1204 sum4 += v[jrow] * x[8 * idx[jrow] + 3]; 1205 sum5 += v[jrow] * x[8 * idx[jrow] + 4]; 1206 sum6 += v[jrow] * x[8 * idx[jrow] + 5]; 1207 sum7 += v[jrow] * x[8 * idx[jrow] + 6]; 1208 sum8 += v[jrow] * x[8 * idx[jrow] + 7]; 1209 jrow++; 1210 } 1211 y[8 * i] = sum1; 1212 y[8 * i + 1] = sum2; 1213 y[8 * i + 2] = sum3; 1214 y[8 * i + 3] = sum4; 1215 y[8 * i + 4] = sum5; 1216 y[8 * i + 5] = sum6; 1217 y[8 * i + 6] = sum7; 1218 y[8 * i + 7] = sum8; 1219 } 1220 1221 PetscCall(PetscLogFlops(16.0 * a->nz - 8.0 * nonzerorow)); 1222 PetscCall(VecRestoreArrayRead(xx, &x)); 1223 PetscCall(VecRestoreArray(yy, &y)); 1224 PetscFunctionReturn(0); 1225 } 1226 1227 PetscErrorCode MatMultTranspose_SeqMAIJ_8(Mat A, Vec xx, Vec yy) 1228 { 1229 Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data; 1230 Mat_SeqAIJ *a = (Mat_SeqAIJ *)b->AIJ->data; 1231 const PetscScalar *x, *v; 1232 PetscScalar *y, alpha1, alpha2, alpha3, alpha4, alpha5, alpha6, alpha7, alpha8; 1233 const PetscInt m = b->AIJ->rmap->n, *idx; 1234 PetscInt n, i; 1235 1236 PetscFunctionBegin; 1237 PetscCall(VecSet(yy, 0.0)); 1238 PetscCall(VecGetArrayRead(xx, &x)); 1239 PetscCall(VecGetArray(yy, &y)); 1240 1241 for (i = 0; i < m; i++) { 1242 idx = a->j + a->i[i]; 1243 v = a->a + a->i[i]; 1244 n = a->i[i + 1] - a->i[i]; 1245 alpha1 = x[8 * i]; 1246 alpha2 = x[8 * i + 1]; 1247 alpha3 = x[8 * i + 2]; 1248 alpha4 = x[8 * i + 3]; 1249 alpha5 = x[8 * i + 4]; 1250 alpha6 = x[8 * i + 5]; 1251 alpha7 = x[8 * i + 6]; 1252 alpha8 = x[8 * i + 7]; 1253 while (n-- > 0) { 1254 y[8 * (*idx)] += alpha1 * (*v); 1255 y[8 * (*idx) + 1] += alpha2 * (*v); 1256 y[8 * (*idx) + 2] += alpha3 * (*v); 1257 y[8 * (*idx) + 3] += alpha4 * (*v); 1258 y[8 * (*idx) + 4] += alpha5 * (*v); 1259 y[8 * (*idx) + 5] += alpha6 * (*v); 1260 y[8 * (*idx) + 6] += alpha7 * (*v); 1261 y[8 * (*idx) + 7] += alpha8 * (*v); 1262 idx++; 1263 v++; 1264 } 1265 } 1266 PetscCall(PetscLogFlops(16.0 * a->nz)); 1267 PetscCall(VecRestoreArrayRead(xx, &x)); 1268 PetscCall(VecRestoreArray(yy, &y)); 1269 PetscFunctionReturn(0); 1270 } 1271 1272 PetscErrorCode MatMultAdd_SeqMAIJ_8(Mat A, Vec xx, Vec yy, Vec zz) 1273 { 1274 Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data; 1275 Mat_SeqAIJ *a = (Mat_SeqAIJ *)b->AIJ->data; 1276 const PetscScalar *x, *v; 1277 PetscScalar *y, sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8; 1278 const PetscInt m = b->AIJ->rmap->n, *idx, *ii; 1279 PetscInt n, i, jrow, j; 1280 1281 PetscFunctionBegin; 1282 if (yy != zz) PetscCall(VecCopy(yy, zz)); 1283 PetscCall(VecGetArrayRead(xx, &x)); 1284 PetscCall(VecGetArray(zz, &y)); 1285 idx = a->j; 1286 v = a->a; 1287 ii = a->i; 1288 1289 for (i = 0; i < m; i++) { 1290 jrow = ii[i]; 1291 n = ii[i + 1] - jrow; 1292 sum1 = 0.0; 1293 sum2 = 0.0; 1294 sum3 = 0.0; 1295 sum4 = 0.0; 1296 sum5 = 0.0; 1297 sum6 = 0.0; 1298 sum7 = 0.0; 1299 sum8 = 0.0; 1300 for (j = 0; j < n; j++) { 1301 sum1 += v[jrow] * x[8 * idx[jrow]]; 1302 sum2 += v[jrow] * x[8 * idx[jrow] + 1]; 1303 sum3 += v[jrow] * x[8 * idx[jrow] + 2]; 1304 sum4 += v[jrow] * x[8 * idx[jrow] + 3]; 1305 sum5 += v[jrow] * x[8 * idx[jrow] + 4]; 1306 sum6 += v[jrow] * x[8 * idx[jrow] + 5]; 1307 sum7 += v[jrow] * x[8 * idx[jrow] + 6]; 1308 sum8 += v[jrow] * x[8 * idx[jrow] + 7]; 1309 jrow++; 1310 } 1311 y[8 * i] += sum1; 1312 y[8 * i + 1] += sum2; 1313 y[8 * i + 2] += sum3; 1314 y[8 * i + 3] += sum4; 1315 y[8 * i + 4] += sum5; 1316 y[8 * i + 5] += sum6; 1317 y[8 * i + 6] += sum7; 1318 y[8 * i + 7] += sum8; 1319 } 1320 1321 PetscCall(PetscLogFlops(16.0 * a->nz)); 1322 PetscCall(VecRestoreArrayRead(xx, &x)); 1323 PetscCall(VecRestoreArray(zz, &y)); 1324 PetscFunctionReturn(0); 1325 } 1326 1327 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_8(Mat A, Vec xx, Vec yy, Vec zz) 1328 { 1329 Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data; 1330 Mat_SeqAIJ *a = (Mat_SeqAIJ *)b->AIJ->data; 1331 const PetscScalar *x, *v; 1332 PetscScalar *y, alpha1, alpha2, alpha3, alpha4, alpha5, alpha6, alpha7, alpha8; 1333 const PetscInt m = b->AIJ->rmap->n, *idx; 1334 PetscInt n, i; 1335 1336 PetscFunctionBegin; 1337 if (yy != zz) PetscCall(VecCopy(yy, zz)); 1338 PetscCall(VecGetArrayRead(xx, &x)); 1339 PetscCall(VecGetArray(zz, &y)); 1340 for (i = 0; i < m; i++) { 1341 idx = a->j + a->i[i]; 1342 v = a->a + a->i[i]; 1343 n = a->i[i + 1] - a->i[i]; 1344 alpha1 = x[8 * i]; 1345 alpha2 = x[8 * i + 1]; 1346 alpha3 = x[8 * i + 2]; 1347 alpha4 = x[8 * i + 3]; 1348 alpha5 = x[8 * i + 4]; 1349 alpha6 = x[8 * i + 5]; 1350 alpha7 = x[8 * i + 6]; 1351 alpha8 = x[8 * i + 7]; 1352 while (n-- > 0) { 1353 y[8 * (*idx)] += alpha1 * (*v); 1354 y[8 * (*idx) + 1] += alpha2 * (*v); 1355 y[8 * (*idx) + 2] += alpha3 * (*v); 1356 y[8 * (*idx) + 3] += alpha4 * (*v); 1357 y[8 * (*idx) + 4] += alpha5 * (*v); 1358 y[8 * (*idx) + 5] += alpha6 * (*v); 1359 y[8 * (*idx) + 6] += alpha7 * (*v); 1360 y[8 * (*idx) + 7] += alpha8 * (*v); 1361 idx++; 1362 v++; 1363 } 1364 } 1365 PetscCall(PetscLogFlops(16.0 * a->nz)); 1366 PetscCall(VecRestoreArrayRead(xx, &x)); 1367 PetscCall(VecRestoreArray(zz, &y)); 1368 PetscFunctionReturn(0); 1369 } 1370 1371 /* ------------------------------------------------------------------------------*/ 1372 PetscErrorCode MatMult_SeqMAIJ_9(Mat A, Vec xx, Vec yy) 1373 { 1374 Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data; 1375 Mat_SeqAIJ *a = (Mat_SeqAIJ *)b->AIJ->data; 1376 const PetscScalar *x, *v; 1377 PetscScalar *y, sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9; 1378 const PetscInt m = b->AIJ->rmap->n, *idx, *ii; 1379 PetscInt nonzerorow = 0, n, i, jrow, j; 1380 1381 PetscFunctionBegin; 1382 PetscCall(VecGetArrayRead(xx, &x)); 1383 PetscCall(VecGetArray(yy, &y)); 1384 idx = a->j; 1385 v = a->a; 1386 ii = a->i; 1387 1388 for (i = 0; i < m; i++) { 1389 jrow = ii[i]; 1390 n = ii[i + 1] - jrow; 1391 sum1 = 0.0; 1392 sum2 = 0.0; 1393 sum3 = 0.0; 1394 sum4 = 0.0; 1395 sum5 = 0.0; 1396 sum6 = 0.0; 1397 sum7 = 0.0; 1398 sum8 = 0.0; 1399 sum9 = 0.0; 1400 1401 nonzerorow += (n > 0); 1402 for (j = 0; j < n; j++) { 1403 sum1 += v[jrow] * x[9 * idx[jrow]]; 1404 sum2 += v[jrow] * x[9 * idx[jrow] + 1]; 1405 sum3 += v[jrow] * x[9 * idx[jrow] + 2]; 1406 sum4 += v[jrow] * x[9 * idx[jrow] + 3]; 1407 sum5 += v[jrow] * x[9 * idx[jrow] + 4]; 1408 sum6 += v[jrow] * x[9 * idx[jrow] + 5]; 1409 sum7 += v[jrow] * x[9 * idx[jrow] + 6]; 1410 sum8 += v[jrow] * x[9 * idx[jrow] + 7]; 1411 sum9 += v[jrow] * x[9 * idx[jrow] + 8]; 1412 jrow++; 1413 } 1414 y[9 * i] = sum1; 1415 y[9 * i + 1] = sum2; 1416 y[9 * i + 2] = sum3; 1417 y[9 * i + 3] = sum4; 1418 y[9 * i + 4] = sum5; 1419 y[9 * i + 5] = sum6; 1420 y[9 * i + 6] = sum7; 1421 y[9 * i + 7] = sum8; 1422 y[9 * i + 8] = sum9; 1423 } 1424 1425 PetscCall(PetscLogFlops(18.0 * a->nz - 9 * nonzerorow)); 1426 PetscCall(VecRestoreArrayRead(xx, &x)); 1427 PetscCall(VecRestoreArray(yy, &y)); 1428 PetscFunctionReturn(0); 1429 } 1430 1431 /* ------------------------------------------------------------------------------*/ 1432 1433 PetscErrorCode MatMultTranspose_SeqMAIJ_9(Mat A, Vec xx, Vec yy) 1434 { 1435 Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data; 1436 Mat_SeqAIJ *a = (Mat_SeqAIJ *)b->AIJ->data; 1437 const PetscScalar *x, *v; 1438 PetscScalar *y, alpha1, alpha2, alpha3, alpha4, alpha5, alpha6, alpha7, alpha8, alpha9; 1439 const PetscInt m = b->AIJ->rmap->n, *idx; 1440 PetscInt n, i; 1441 1442 PetscFunctionBegin; 1443 PetscCall(VecSet(yy, 0.0)); 1444 PetscCall(VecGetArrayRead(xx, &x)); 1445 PetscCall(VecGetArray(yy, &y)); 1446 1447 for (i = 0; i < m; i++) { 1448 idx = a->j + a->i[i]; 1449 v = a->a + a->i[i]; 1450 n = a->i[i + 1] - a->i[i]; 1451 alpha1 = x[9 * i]; 1452 alpha2 = x[9 * i + 1]; 1453 alpha3 = x[9 * i + 2]; 1454 alpha4 = x[9 * i + 3]; 1455 alpha5 = x[9 * i + 4]; 1456 alpha6 = x[9 * i + 5]; 1457 alpha7 = x[9 * i + 6]; 1458 alpha8 = x[9 * i + 7]; 1459 alpha9 = x[9 * i + 8]; 1460 while (n-- > 0) { 1461 y[9 * (*idx)] += alpha1 * (*v); 1462 y[9 * (*idx) + 1] += alpha2 * (*v); 1463 y[9 * (*idx) + 2] += alpha3 * (*v); 1464 y[9 * (*idx) + 3] += alpha4 * (*v); 1465 y[9 * (*idx) + 4] += alpha5 * (*v); 1466 y[9 * (*idx) + 5] += alpha6 * (*v); 1467 y[9 * (*idx) + 6] += alpha7 * (*v); 1468 y[9 * (*idx) + 7] += alpha8 * (*v); 1469 y[9 * (*idx) + 8] += alpha9 * (*v); 1470 idx++; 1471 v++; 1472 } 1473 } 1474 PetscCall(PetscLogFlops(18.0 * a->nz)); 1475 PetscCall(VecRestoreArrayRead(xx, &x)); 1476 PetscCall(VecRestoreArray(yy, &y)); 1477 PetscFunctionReturn(0); 1478 } 1479 1480 PetscErrorCode MatMultAdd_SeqMAIJ_9(Mat A, Vec xx, Vec yy, Vec zz) 1481 { 1482 Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data; 1483 Mat_SeqAIJ *a = (Mat_SeqAIJ *)b->AIJ->data; 1484 const PetscScalar *x, *v; 1485 PetscScalar *y, sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9; 1486 const PetscInt m = b->AIJ->rmap->n, *idx, *ii; 1487 PetscInt n, i, jrow, j; 1488 1489 PetscFunctionBegin; 1490 if (yy != zz) PetscCall(VecCopy(yy, zz)); 1491 PetscCall(VecGetArrayRead(xx, &x)); 1492 PetscCall(VecGetArray(zz, &y)); 1493 idx = a->j; 1494 v = a->a; 1495 ii = a->i; 1496 1497 for (i = 0; i < m; i++) { 1498 jrow = ii[i]; 1499 n = ii[i + 1] - jrow; 1500 sum1 = 0.0; 1501 sum2 = 0.0; 1502 sum3 = 0.0; 1503 sum4 = 0.0; 1504 sum5 = 0.0; 1505 sum6 = 0.0; 1506 sum7 = 0.0; 1507 sum8 = 0.0; 1508 sum9 = 0.0; 1509 for (j = 0; j < n; j++) { 1510 sum1 += v[jrow] * x[9 * idx[jrow]]; 1511 sum2 += v[jrow] * x[9 * idx[jrow] + 1]; 1512 sum3 += v[jrow] * x[9 * idx[jrow] + 2]; 1513 sum4 += v[jrow] * x[9 * idx[jrow] + 3]; 1514 sum5 += v[jrow] * x[9 * idx[jrow] + 4]; 1515 sum6 += v[jrow] * x[9 * idx[jrow] + 5]; 1516 sum7 += v[jrow] * x[9 * idx[jrow] + 6]; 1517 sum8 += v[jrow] * x[9 * idx[jrow] + 7]; 1518 sum9 += v[jrow] * x[9 * idx[jrow] + 8]; 1519 jrow++; 1520 } 1521 y[9 * i] += sum1; 1522 y[9 * i + 1] += sum2; 1523 y[9 * i + 2] += sum3; 1524 y[9 * i + 3] += sum4; 1525 y[9 * i + 4] += sum5; 1526 y[9 * i + 5] += sum6; 1527 y[9 * i + 6] += sum7; 1528 y[9 * i + 7] += sum8; 1529 y[9 * i + 8] += sum9; 1530 } 1531 1532 PetscCall(PetscLogFlops(18.0 * a->nz)); 1533 PetscCall(VecRestoreArrayRead(xx, &x)); 1534 PetscCall(VecRestoreArray(zz, &y)); 1535 PetscFunctionReturn(0); 1536 } 1537 1538 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_9(Mat A, Vec xx, Vec yy, Vec zz) 1539 { 1540 Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data; 1541 Mat_SeqAIJ *a = (Mat_SeqAIJ *)b->AIJ->data; 1542 const PetscScalar *x, *v; 1543 PetscScalar *y, alpha1, alpha2, alpha3, alpha4, alpha5, alpha6, alpha7, alpha8, alpha9; 1544 const PetscInt m = b->AIJ->rmap->n, *idx; 1545 PetscInt n, i; 1546 1547 PetscFunctionBegin; 1548 if (yy != zz) PetscCall(VecCopy(yy, zz)); 1549 PetscCall(VecGetArrayRead(xx, &x)); 1550 PetscCall(VecGetArray(zz, &y)); 1551 for (i = 0; i < m; i++) { 1552 idx = a->j + a->i[i]; 1553 v = a->a + a->i[i]; 1554 n = a->i[i + 1] - a->i[i]; 1555 alpha1 = x[9 * i]; 1556 alpha2 = x[9 * i + 1]; 1557 alpha3 = x[9 * i + 2]; 1558 alpha4 = x[9 * i + 3]; 1559 alpha5 = x[9 * i + 4]; 1560 alpha6 = x[9 * i + 5]; 1561 alpha7 = x[9 * i + 6]; 1562 alpha8 = x[9 * i + 7]; 1563 alpha9 = x[9 * i + 8]; 1564 while (n-- > 0) { 1565 y[9 * (*idx)] += alpha1 * (*v); 1566 y[9 * (*idx) + 1] += alpha2 * (*v); 1567 y[9 * (*idx) + 2] += alpha3 * (*v); 1568 y[9 * (*idx) + 3] += alpha4 * (*v); 1569 y[9 * (*idx) + 4] += alpha5 * (*v); 1570 y[9 * (*idx) + 5] += alpha6 * (*v); 1571 y[9 * (*idx) + 6] += alpha7 * (*v); 1572 y[9 * (*idx) + 7] += alpha8 * (*v); 1573 y[9 * (*idx) + 8] += alpha9 * (*v); 1574 idx++; 1575 v++; 1576 } 1577 } 1578 PetscCall(PetscLogFlops(18.0 * a->nz)); 1579 PetscCall(VecRestoreArrayRead(xx, &x)); 1580 PetscCall(VecRestoreArray(zz, &y)); 1581 PetscFunctionReturn(0); 1582 } 1583 PetscErrorCode MatMult_SeqMAIJ_10(Mat A, Vec xx, Vec yy) 1584 { 1585 Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data; 1586 Mat_SeqAIJ *a = (Mat_SeqAIJ *)b->AIJ->data; 1587 const PetscScalar *x, *v; 1588 PetscScalar *y, sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9, sum10; 1589 const PetscInt m = b->AIJ->rmap->n, *idx, *ii; 1590 PetscInt nonzerorow = 0, n, i, jrow, j; 1591 1592 PetscFunctionBegin; 1593 PetscCall(VecGetArrayRead(xx, &x)); 1594 PetscCall(VecGetArray(yy, &y)); 1595 idx = a->j; 1596 v = a->a; 1597 ii = a->i; 1598 1599 for (i = 0; i < m; i++) { 1600 jrow = ii[i]; 1601 n = ii[i + 1] - jrow; 1602 sum1 = 0.0; 1603 sum2 = 0.0; 1604 sum3 = 0.0; 1605 sum4 = 0.0; 1606 sum5 = 0.0; 1607 sum6 = 0.0; 1608 sum7 = 0.0; 1609 sum8 = 0.0; 1610 sum9 = 0.0; 1611 sum10 = 0.0; 1612 1613 nonzerorow += (n > 0); 1614 for (j = 0; j < n; j++) { 1615 sum1 += v[jrow] * x[10 * idx[jrow]]; 1616 sum2 += v[jrow] * x[10 * idx[jrow] + 1]; 1617 sum3 += v[jrow] * x[10 * idx[jrow] + 2]; 1618 sum4 += v[jrow] * x[10 * idx[jrow] + 3]; 1619 sum5 += v[jrow] * x[10 * idx[jrow] + 4]; 1620 sum6 += v[jrow] * x[10 * idx[jrow] + 5]; 1621 sum7 += v[jrow] * x[10 * idx[jrow] + 6]; 1622 sum8 += v[jrow] * x[10 * idx[jrow] + 7]; 1623 sum9 += v[jrow] * x[10 * idx[jrow] + 8]; 1624 sum10 += v[jrow] * x[10 * idx[jrow] + 9]; 1625 jrow++; 1626 } 1627 y[10 * i] = sum1; 1628 y[10 * i + 1] = sum2; 1629 y[10 * i + 2] = sum3; 1630 y[10 * i + 3] = sum4; 1631 y[10 * i + 4] = sum5; 1632 y[10 * i + 5] = sum6; 1633 y[10 * i + 6] = sum7; 1634 y[10 * i + 7] = sum8; 1635 y[10 * i + 8] = sum9; 1636 y[10 * i + 9] = sum10; 1637 } 1638 1639 PetscCall(PetscLogFlops(20.0 * a->nz - 10.0 * nonzerorow)); 1640 PetscCall(VecRestoreArrayRead(xx, &x)); 1641 PetscCall(VecRestoreArray(yy, &y)); 1642 PetscFunctionReturn(0); 1643 } 1644 1645 PetscErrorCode MatMultAdd_SeqMAIJ_10(Mat A, Vec xx, Vec yy, Vec zz) 1646 { 1647 Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data; 1648 Mat_SeqAIJ *a = (Mat_SeqAIJ *)b->AIJ->data; 1649 const PetscScalar *x, *v; 1650 PetscScalar *y, sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9, sum10; 1651 const PetscInt m = b->AIJ->rmap->n, *idx, *ii; 1652 PetscInt n, i, jrow, j; 1653 1654 PetscFunctionBegin; 1655 if (yy != zz) PetscCall(VecCopy(yy, zz)); 1656 PetscCall(VecGetArrayRead(xx, &x)); 1657 PetscCall(VecGetArray(zz, &y)); 1658 idx = a->j; 1659 v = a->a; 1660 ii = a->i; 1661 1662 for (i = 0; i < m; i++) { 1663 jrow = ii[i]; 1664 n = ii[i + 1] - jrow; 1665 sum1 = 0.0; 1666 sum2 = 0.0; 1667 sum3 = 0.0; 1668 sum4 = 0.0; 1669 sum5 = 0.0; 1670 sum6 = 0.0; 1671 sum7 = 0.0; 1672 sum8 = 0.0; 1673 sum9 = 0.0; 1674 sum10 = 0.0; 1675 for (j = 0; j < n; j++) { 1676 sum1 += v[jrow] * x[10 * idx[jrow]]; 1677 sum2 += v[jrow] * x[10 * idx[jrow] + 1]; 1678 sum3 += v[jrow] * x[10 * idx[jrow] + 2]; 1679 sum4 += v[jrow] * x[10 * idx[jrow] + 3]; 1680 sum5 += v[jrow] * x[10 * idx[jrow] + 4]; 1681 sum6 += v[jrow] * x[10 * idx[jrow] + 5]; 1682 sum7 += v[jrow] * x[10 * idx[jrow] + 6]; 1683 sum8 += v[jrow] * x[10 * idx[jrow] + 7]; 1684 sum9 += v[jrow] * x[10 * idx[jrow] + 8]; 1685 sum10 += v[jrow] * x[10 * idx[jrow] + 9]; 1686 jrow++; 1687 } 1688 y[10 * i] += sum1; 1689 y[10 * i + 1] += sum2; 1690 y[10 * i + 2] += sum3; 1691 y[10 * i + 3] += sum4; 1692 y[10 * i + 4] += sum5; 1693 y[10 * i + 5] += sum6; 1694 y[10 * i + 6] += sum7; 1695 y[10 * i + 7] += sum8; 1696 y[10 * i + 8] += sum9; 1697 y[10 * i + 9] += sum10; 1698 } 1699 1700 PetscCall(PetscLogFlops(20.0 * a->nz)); 1701 PetscCall(VecRestoreArrayRead(xx, &x)); 1702 PetscCall(VecRestoreArray(yy, &y)); 1703 PetscFunctionReturn(0); 1704 } 1705 1706 PetscErrorCode MatMultTranspose_SeqMAIJ_10(Mat A, Vec xx, Vec yy) 1707 { 1708 Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data; 1709 Mat_SeqAIJ *a = (Mat_SeqAIJ *)b->AIJ->data; 1710 const PetscScalar *x, *v; 1711 PetscScalar *y, alpha1, alpha2, alpha3, alpha4, alpha5, alpha6, alpha7, alpha8, alpha9, alpha10; 1712 const PetscInt m = b->AIJ->rmap->n, *idx; 1713 PetscInt n, i; 1714 1715 PetscFunctionBegin; 1716 PetscCall(VecSet(yy, 0.0)); 1717 PetscCall(VecGetArrayRead(xx, &x)); 1718 PetscCall(VecGetArray(yy, &y)); 1719 1720 for (i = 0; i < m; i++) { 1721 idx = a->j + a->i[i]; 1722 v = a->a + a->i[i]; 1723 n = a->i[i + 1] - a->i[i]; 1724 alpha1 = x[10 * i]; 1725 alpha2 = x[10 * i + 1]; 1726 alpha3 = x[10 * i + 2]; 1727 alpha4 = x[10 * i + 3]; 1728 alpha5 = x[10 * i + 4]; 1729 alpha6 = x[10 * i + 5]; 1730 alpha7 = x[10 * i + 6]; 1731 alpha8 = x[10 * i + 7]; 1732 alpha9 = x[10 * i + 8]; 1733 alpha10 = x[10 * i + 9]; 1734 while (n-- > 0) { 1735 y[10 * (*idx)] += alpha1 * (*v); 1736 y[10 * (*idx) + 1] += alpha2 * (*v); 1737 y[10 * (*idx) + 2] += alpha3 * (*v); 1738 y[10 * (*idx) + 3] += alpha4 * (*v); 1739 y[10 * (*idx) + 4] += alpha5 * (*v); 1740 y[10 * (*idx) + 5] += alpha6 * (*v); 1741 y[10 * (*idx) + 6] += alpha7 * (*v); 1742 y[10 * (*idx) + 7] += alpha8 * (*v); 1743 y[10 * (*idx) + 8] += alpha9 * (*v); 1744 y[10 * (*idx) + 9] += alpha10 * (*v); 1745 idx++; 1746 v++; 1747 } 1748 } 1749 PetscCall(PetscLogFlops(20.0 * a->nz)); 1750 PetscCall(VecRestoreArrayRead(xx, &x)); 1751 PetscCall(VecRestoreArray(yy, &y)); 1752 PetscFunctionReturn(0); 1753 } 1754 1755 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_10(Mat A, Vec xx, Vec yy, Vec zz) 1756 { 1757 Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data; 1758 Mat_SeqAIJ *a = (Mat_SeqAIJ *)b->AIJ->data; 1759 const PetscScalar *x, *v; 1760 PetscScalar *y, alpha1, alpha2, alpha3, alpha4, alpha5, alpha6, alpha7, alpha8, alpha9, alpha10; 1761 const PetscInt m = b->AIJ->rmap->n, *idx; 1762 PetscInt n, i; 1763 1764 PetscFunctionBegin; 1765 if (yy != zz) PetscCall(VecCopy(yy, zz)); 1766 PetscCall(VecGetArrayRead(xx, &x)); 1767 PetscCall(VecGetArray(zz, &y)); 1768 for (i = 0; i < m; i++) { 1769 idx = a->j + a->i[i]; 1770 v = a->a + a->i[i]; 1771 n = a->i[i + 1] - a->i[i]; 1772 alpha1 = x[10 * i]; 1773 alpha2 = x[10 * i + 1]; 1774 alpha3 = x[10 * i + 2]; 1775 alpha4 = x[10 * i + 3]; 1776 alpha5 = x[10 * i + 4]; 1777 alpha6 = x[10 * i + 5]; 1778 alpha7 = x[10 * i + 6]; 1779 alpha8 = x[10 * i + 7]; 1780 alpha9 = x[10 * i + 8]; 1781 alpha10 = x[10 * i + 9]; 1782 while (n-- > 0) { 1783 y[10 * (*idx)] += alpha1 * (*v); 1784 y[10 * (*idx) + 1] += alpha2 * (*v); 1785 y[10 * (*idx) + 2] += alpha3 * (*v); 1786 y[10 * (*idx) + 3] += alpha4 * (*v); 1787 y[10 * (*idx) + 4] += alpha5 * (*v); 1788 y[10 * (*idx) + 5] += alpha6 * (*v); 1789 y[10 * (*idx) + 6] += alpha7 * (*v); 1790 y[10 * (*idx) + 7] += alpha8 * (*v); 1791 y[10 * (*idx) + 8] += alpha9 * (*v); 1792 y[10 * (*idx) + 9] += alpha10 * (*v); 1793 idx++; 1794 v++; 1795 } 1796 } 1797 PetscCall(PetscLogFlops(20.0 * a->nz)); 1798 PetscCall(VecRestoreArrayRead(xx, &x)); 1799 PetscCall(VecRestoreArray(zz, &y)); 1800 PetscFunctionReturn(0); 1801 } 1802 1803 /*--------------------------------------------------------------------------------------------*/ 1804 PetscErrorCode MatMult_SeqMAIJ_11(Mat A, Vec xx, Vec yy) 1805 { 1806 Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data; 1807 Mat_SeqAIJ *a = (Mat_SeqAIJ *)b->AIJ->data; 1808 const PetscScalar *x, *v; 1809 PetscScalar *y, sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9, sum10, sum11; 1810 const PetscInt m = b->AIJ->rmap->n, *idx, *ii; 1811 PetscInt nonzerorow = 0, n, i, jrow, j; 1812 1813 PetscFunctionBegin; 1814 PetscCall(VecGetArrayRead(xx, &x)); 1815 PetscCall(VecGetArray(yy, &y)); 1816 idx = a->j; 1817 v = a->a; 1818 ii = a->i; 1819 1820 for (i = 0; i < m; i++) { 1821 jrow = ii[i]; 1822 n = ii[i + 1] - jrow; 1823 sum1 = 0.0; 1824 sum2 = 0.0; 1825 sum3 = 0.0; 1826 sum4 = 0.0; 1827 sum5 = 0.0; 1828 sum6 = 0.0; 1829 sum7 = 0.0; 1830 sum8 = 0.0; 1831 sum9 = 0.0; 1832 sum10 = 0.0; 1833 sum11 = 0.0; 1834 1835 nonzerorow += (n > 0); 1836 for (j = 0; j < n; j++) { 1837 sum1 += v[jrow] * x[11 * idx[jrow]]; 1838 sum2 += v[jrow] * x[11 * idx[jrow] + 1]; 1839 sum3 += v[jrow] * x[11 * idx[jrow] + 2]; 1840 sum4 += v[jrow] * x[11 * idx[jrow] + 3]; 1841 sum5 += v[jrow] * x[11 * idx[jrow] + 4]; 1842 sum6 += v[jrow] * x[11 * idx[jrow] + 5]; 1843 sum7 += v[jrow] * x[11 * idx[jrow] + 6]; 1844 sum8 += v[jrow] * x[11 * idx[jrow] + 7]; 1845 sum9 += v[jrow] * x[11 * idx[jrow] + 8]; 1846 sum10 += v[jrow] * x[11 * idx[jrow] + 9]; 1847 sum11 += v[jrow] * x[11 * idx[jrow] + 10]; 1848 jrow++; 1849 } 1850 y[11 * i] = sum1; 1851 y[11 * i + 1] = sum2; 1852 y[11 * i + 2] = sum3; 1853 y[11 * i + 3] = sum4; 1854 y[11 * i + 4] = sum5; 1855 y[11 * i + 5] = sum6; 1856 y[11 * i + 6] = sum7; 1857 y[11 * i + 7] = sum8; 1858 y[11 * i + 8] = sum9; 1859 y[11 * i + 9] = sum10; 1860 y[11 * i + 10] = sum11; 1861 } 1862 1863 PetscCall(PetscLogFlops(22.0 * a->nz - 11 * nonzerorow)); 1864 PetscCall(VecRestoreArrayRead(xx, &x)); 1865 PetscCall(VecRestoreArray(yy, &y)); 1866 PetscFunctionReturn(0); 1867 } 1868 1869 PetscErrorCode MatMultAdd_SeqMAIJ_11(Mat A, Vec xx, Vec yy, Vec zz) 1870 { 1871 Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data; 1872 Mat_SeqAIJ *a = (Mat_SeqAIJ *)b->AIJ->data; 1873 const PetscScalar *x, *v; 1874 PetscScalar *y, sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9, sum10, sum11; 1875 const PetscInt m = b->AIJ->rmap->n, *idx, *ii; 1876 PetscInt n, i, jrow, j; 1877 1878 PetscFunctionBegin; 1879 if (yy != zz) PetscCall(VecCopy(yy, zz)); 1880 PetscCall(VecGetArrayRead(xx, &x)); 1881 PetscCall(VecGetArray(zz, &y)); 1882 idx = a->j; 1883 v = a->a; 1884 ii = a->i; 1885 1886 for (i = 0; i < m; i++) { 1887 jrow = ii[i]; 1888 n = ii[i + 1] - jrow; 1889 sum1 = 0.0; 1890 sum2 = 0.0; 1891 sum3 = 0.0; 1892 sum4 = 0.0; 1893 sum5 = 0.0; 1894 sum6 = 0.0; 1895 sum7 = 0.0; 1896 sum8 = 0.0; 1897 sum9 = 0.0; 1898 sum10 = 0.0; 1899 sum11 = 0.0; 1900 for (j = 0; j < n; j++) { 1901 sum1 += v[jrow] * x[11 * idx[jrow]]; 1902 sum2 += v[jrow] * x[11 * idx[jrow] + 1]; 1903 sum3 += v[jrow] * x[11 * idx[jrow] + 2]; 1904 sum4 += v[jrow] * x[11 * idx[jrow] + 3]; 1905 sum5 += v[jrow] * x[11 * idx[jrow] + 4]; 1906 sum6 += v[jrow] * x[11 * idx[jrow] + 5]; 1907 sum7 += v[jrow] * x[11 * idx[jrow] + 6]; 1908 sum8 += v[jrow] * x[11 * idx[jrow] + 7]; 1909 sum9 += v[jrow] * x[11 * idx[jrow] + 8]; 1910 sum10 += v[jrow] * x[11 * idx[jrow] + 9]; 1911 sum11 += v[jrow] * x[11 * idx[jrow] + 10]; 1912 jrow++; 1913 } 1914 y[11 * i] += sum1; 1915 y[11 * i + 1] += sum2; 1916 y[11 * i + 2] += sum3; 1917 y[11 * i + 3] += sum4; 1918 y[11 * i + 4] += sum5; 1919 y[11 * i + 5] += sum6; 1920 y[11 * i + 6] += sum7; 1921 y[11 * i + 7] += sum8; 1922 y[11 * i + 8] += sum9; 1923 y[11 * i + 9] += sum10; 1924 y[11 * i + 10] += sum11; 1925 } 1926 1927 PetscCall(PetscLogFlops(22.0 * a->nz)); 1928 PetscCall(VecRestoreArrayRead(xx, &x)); 1929 PetscCall(VecRestoreArray(yy, &y)); 1930 PetscFunctionReturn(0); 1931 } 1932 1933 PetscErrorCode MatMultTranspose_SeqMAIJ_11(Mat A, Vec xx, Vec yy) 1934 { 1935 Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data; 1936 Mat_SeqAIJ *a = (Mat_SeqAIJ *)b->AIJ->data; 1937 const PetscScalar *x, *v; 1938 PetscScalar *y, alpha1, alpha2, alpha3, alpha4, alpha5, alpha6, alpha7, alpha8, alpha9, alpha10, alpha11; 1939 const PetscInt m = b->AIJ->rmap->n, *idx; 1940 PetscInt n, i; 1941 1942 PetscFunctionBegin; 1943 PetscCall(VecSet(yy, 0.0)); 1944 PetscCall(VecGetArrayRead(xx, &x)); 1945 PetscCall(VecGetArray(yy, &y)); 1946 1947 for (i = 0; i < m; i++) { 1948 idx = a->j + a->i[i]; 1949 v = a->a + a->i[i]; 1950 n = a->i[i + 1] - a->i[i]; 1951 alpha1 = x[11 * i]; 1952 alpha2 = x[11 * i + 1]; 1953 alpha3 = x[11 * i + 2]; 1954 alpha4 = x[11 * i + 3]; 1955 alpha5 = x[11 * i + 4]; 1956 alpha6 = x[11 * i + 5]; 1957 alpha7 = x[11 * i + 6]; 1958 alpha8 = x[11 * i + 7]; 1959 alpha9 = x[11 * i + 8]; 1960 alpha10 = x[11 * i + 9]; 1961 alpha11 = x[11 * i + 10]; 1962 while (n-- > 0) { 1963 y[11 * (*idx)] += alpha1 * (*v); 1964 y[11 * (*idx) + 1] += alpha2 * (*v); 1965 y[11 * (*idx) + 2] += alpha3 * (*v); 1966 y[11 * (*idx) + 3] += alpha4 * (*v); 1967 y[11 * (*idx) + 4] += alpha5 * (*v); 1968 y[11 * (*idx) + 5] += alpha6 * (*v); 1969 y[11 * (*idx) + 6] += alpha7 * (*v); 1970 y[11 * (*idx) + 7] += alpha8 * (*v); 1971 y[11 * (*idx) + 8] += alpha9 * (*v); 1972 y[11 * (*idx) + 9] += alpha10 * (*v); 1973 y[11 * (*idx) + 10] += alpha11 * (*v); 1974 idx++; 1975 v++; 1976 } 1977 } 1978 PetscCall(PetscLogFlops(22.0 * a->nz)); 1979 PetscCall(VecRestoreArrayRead(xx, &x)); 1980 PetscCall(VecRestoreArray(yy, &y)); 1981 PetscFunctionReturn(0); 1982 } 1983 1984 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_11(Mat A, Vec xx, Vec yy, Vec zz) 1985 { 1986 Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data; 1987 Mat_SeqAIJ *a = (Mat_SeqAIJ *)b->AIJ->data; 1988 const PetscScalar *x, *v; 1989 PetscScalar *y, alpha1, alpha2, alpha3, alpha4, alpha5, alpha6, alpha7, alpha8, alpha9, alpha10, alpha11; 1990 const PetscInt m = b->AIJ->rmap->n, *idx; 1991 PetscInt n, i; 1992 1993 PetscFunctionBegin; 1994 if (yy != zz) PetscCall(VecCopy(yy, zz)); 1995 PetscCall(VecGetArrayRead(xx, &x)); 1996 PetscCall(VecGetArray(zz, &y)); 1997 for (i = 0; i < m; i++) { 1998 idx = a->j + a->i[i]; 1999 v = a->a + a->i[i]; 2000 n = a->i[i + 1] - a->i[i]; 2001 alpha1 = x[11 * i]; 2002 alpha2 = x[11 * i + 1]; 2003 alpha3 = x[11 * i + 2]; 2004 alpha4 = x[11 * i + 3]; 2005 alpha5 = x[11 * i + 4]; 2006 alpha6 = x[11 * i + 5]; 2007 alpha7 = x[11 * i + 6]; 2008 alpha8 = x[11 * i + 7]; 2009 alpha9 = x[11 * i + 8]; 2010 alpha10 = x[11 * i + 9]; 2011 alpha11 = x[11 * i + 10]; 2012 while (n-- > 0) { 2013 y[11 * (*idx)] += alpha1 * (*v); 2014 y[11 * (*idx) + 1] += alpha2 * (*v); 2015 y[11 * (*idx) + 2] += alpha3 * (*v); 2016 y[11 * (*idx) + 3] += alpha4 * (*v); 2017 y[11 * (*idx) + 4] += alpha5 * (*v); 2018 y[11 * (*idx) + 5] += alpha6 * (*v); 2019 y[11 * (*idx) + 6] += alpha7 * (*v); 2020 y[11 * (*idx) + 7] += alpha8 * (*v); 2021 y[11 * (*idx) + 8] += alpha9 * (*v); 2022 y[11 * (*idx) + 9] += alpha10 * (*v); 2023 y[11 * (*idx) + 10] += alpha11 * (*v); 2024 idx++; 2025 v++; 2026 } 2027 } 2028 PetscCall(PetscLogFlops(22.0 * a->nz)); 2029 PetscCall(VecRestoreArrayRead(xx, &x)); 2030 PetscCall(VecRestoreArray(zz, &y)); 2031 PetscFunctionReturn(0); 2032 } 2033 2034 /*--------------------------------------------------------------------------------------------*/ 2035 PetscErrorCode MatMult_SeqMAIJ_16(Mat A, Vec xx, Vec yy) 2036 { 2037 Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data; 2038 Mat_SeqAIJ *a = (Mat_SeqAIJ *)b->AIJ->data; 2039 const PetscScalar *x, *v; 2040 PetscScalar *y, sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8; 2041 PetscScalar sum9, sum10, sum11, sum12, sum13, sum14, sum15, sum16; 2042 const PetscInt m = b->AIJ->rmap->n, *idx, *ii; 2043 PetscInt nonzerorow = 0, n, i, jrow, j; 2044 2045 PetscFunctionBegin; 2046 PetscCall(VecGetArrayRead(xx, &x)); 2047 PetscCall(VecGetArray(yy, &y)); 2048 idx = a->j; 2049 v = a->a; 2050 ii = a->i; 2051 2052 for (i = 0; i < m; i++) { 2053 jrow = ii[i]; 2054 n = ii[i + 1] - jrow; 2055 sum1 = 0.0; 2056 sum2 = 0.0; 2057 sum3 = 0.0; 2058 sum4 = 0.0; 2059 sum5 = 0.0; 2060 sum6 = 0.0; 2061 sum7 = 0.0; 2062 sum8 = 0.0; 2063 sum9 = 0.0; 2064 sum10 = 0.0; 2065 sum11 = 0.0; 2066 sum12 = 0.0; 2067 sum13 = 0.0; 2068 sum14 = 0.0; 2069 sum15 = 0.0; 2070 sum16 = 0.0; 2071 2072 nonzerorow += (n > 0); 2073 for (j = 0; j < n; j++) { 2074 sum1 += v[jrow] * x[16 * idx[jrow]]; 2075 sum2 += v[jrow] * x[16 * idx[jrow] + 1]; 2076 sum3 += v[jrow] * x[16 * idx[jrow] + 2]; 2077 sum4 += v[jrow] * x[16 * idx[jrow] + 3]; 2078 sum5 += v[jrow] * x[16 * idx[jrow] + 4]; 2079 sum6 += v[jrow] * x[16 * idx[jrow] + 5]; 2080 sum7 += v[jrow] * x[16 * idx[jrow] + 6]; 2081 sum8 += v[jrow] * x[16 * idx[jrow] + 7]; 2082 sum9 += v[jrow] * x[16 * idx[jrow] + 8]; 2083 sum10 += v[jrow] * x[16 * idx[jrow] + 9]; 2084 sum11 += v[jrow] * x[16 * idx[jrow] + 10]; 2085 sum12 += v[jrow] * x[16 * idx[jrow] + 11]; 2086 sum13 += v[jrow] * x[16 * idx[jrow] + 12]; 2087 sum14 += v[jrow] * x[16 * idx[jrow] + 13]; 2088 sum15 += v[jrow] * x[16 * idx[jrow] + 14]; 2089 sum16 += v[jrow] * x[16 * idx[jrow] + 15]; 2090 jrow++; 2091 } 2092 y[16 * i] = sum1; 2093 y[16 * i + 1] = sum2; 2094 y[16 * i + 2] = sum3; 2095 y[16 * i + 3] = sum4; 2096 y[16 * i + 4] = sum5; 2097 y[16 * i + 5] = sum6; 2098 y[16 * i + 6] = sum7; 2099 y[16 * i + 7] = sum8; 2100 y[16 * i + 8] = sum9; 2101 y[16 * i + 9] = sum10; 2102 y[16 * i + 10] = sum11; 2103 y[16 * i + 11] = sum12; 2104 y[16 * i + 12] = sum13; 2105 y[16 * i + 13] = sum14; 2106 y[16 * i + 14] = sum15; 2107 y[16 * i + 15] = sum16; 2108 } 2109 2110 PetscCall(PetscLogFlops(32.0 * a->nz - 16.0 * nonzerorow)); 2111 PetscCall(VecRestoreArrayRead(xx, &x)); 2112 PetscCall(VecRestoreArray(yy, &y)); 2113 PetscFunctionReturn(0); 2114 } 2115 2116 PetscErrorCode MatMultTranspose_SeqMAIJ_16(Mat A, Vec xx, Vec yy) 2117 { 2118 Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data; 2119 Mat_SeqAIJ *a = (Mat_SeqAIJ *)b->AIJ->data; 2120 const PetscScalar *x, *v; 2121 PetscScalar *y, alpha1, alpha2, alpha3, alpha4, alpha5, alpha6, alpha7, alpha8; 2122 PetscScalar alpha9, alpha10, alpha11, alpha12, alpha13, alpha14, alpha15, alpha16; 2123 const PetscInt m = b->AIJ->rmap->n, *idx; 2124 PetscInt n, i; 2125 2126 PetscFunctionBegin; 2127 PetscCall(VecSet(yy, 0.0)); 2128 PetscCall(VecGetArrayRead(xx, &x)); 2129 PetscCall(VecGetArray(yy, &y)); 2130 2131 for (i = 0; i < m; i++) { 2132 idx = a->j + a->i[i]; 2133 v = a->a + a->i[i]; 2134 n = a->i[i + 1] - a->i[i]; 2135 alpha1 = x[16 * i]; 2136 alpha2 = x[16 * i + 1]; 2137 alpha3 = x[16 * i + 2]; 2138 alpha4 = x[16 * i + 3]; 2139 alpha5 = x[16 * i + 4]; 2140 alpha6 = x[16 * i + 5]; 2141 alpha7 = x[16 * i + 6]; 2142 alpha8 = x[16 * i + 7]; 2143 alpha9 = x[16 * i + 8]; 2144 alpha10 = x[16 * i + 9]; 2145 alpha11 = x[16 * i + 10]; 2146 alpha12 = x[16 * i + 11]; 2147 alpha13 = x[16 * i + 12]; 2148 alpha14 = x[16 * i + 13]; 2149 alpha15 = x[16 * i + 14]; 2150 alpha16 = x[16 * i + 15]; 2151 while (n-- > 0) { 2152 y[16 * (*idx)] += alpha1 * (*v); 2153 y[16 * (*idx) + 1] += alpha2 * (*v); 2154 y[16 * (*idx) + 2] += alpha3 * (*v); 2155 y[16 * (*idx) + 3] += alpha4 * (*v); 2156 y[16 * (*idx) + 4] += alpha5 * (*v); 2157 y[16 * (*idx) + 5] += alpha6 * (*v); 2158 y[16 * (*idx) + 6] += alpha7 * (*v); 2159 y[16 * (*idx) + 7] += alpha8 * (*v); 2160 y[16 * (*idx) + 8] += alpha9 * (*v); 2161 y[16 * (*idx) + 9] += alpha10 * (*v); 2162 y[16 * (*idx) + 10] += alpha11 * (*v); 2163 y[16 * (*idx) + 11] += alpha12 * (*v); 2164 y[16 * (*idx) + 12] += alpha13 * (*v); 2165 y[16 * (*idx) + 13] += alpha14 * (*v); 2166 y[16 * (*idx) + 14] += alpha15 * (*v); 2167 y[16 * (*idx) + 15] += alpha16 * (*v); 2168 idx++; 2169 v++; 2170 } 2171 } 2172 PetscCall(PetscLogFlops(32.0 * a->nz)); 2173 PetscCall(VecRestoreArrayRead(xx, &x)); 2174 PetscCall(VecRestoreArray(yy, &y)); 2175 PetscFunctionReturn(0); 2176 } 2177 2178 PetscErrorCode MatMultAdd_SeqMAIJ_16(Mat A, Vec xx, Vec yy, Vec zz) 2179 { 2180 Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data; 2181 Mat_SeqAIJ *a = (Mat_SeqAIJ *)b->AIJ->data; 2182 const PetscScalar *x, *v; 2183 PetscScalar *y, sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8; 2184 PetscScalar sum9, sum10, sum11, sum12, sum13, sum14, sum15, sum16; 2185 const PetscInt m = b->AIJ->rmap->n, *idx, *ii; 2186 PetscInt n, i, jrow, j; 2187 2188 PetscFunctionBegin; 2189 if (yy != zz) PetscCall(VecCopy(yy, zz)); 2190 PetscCall(VecGetArrayRead(xx, &x)); 2191 PetscCall(VecGetArray(zz, &y)); 2192 idx = a->j; 2193 v = a->a; 2194 ii = a->i; 2195 2196 for (i = 0; i < m; i++) { 2197 jrow = ii[i]; 2198 n = ii[i + 1] - jrow; 2199 sum1 = 0.0; 2200 sum2 = 0.0; 2201 sum3 = 0.0; 2202 sum4 = 0.0; 2203 sum5 = 0.0; 2204 sum6 = 0.0; 2205 sum7 = 0.0; 2206 sum8 = 0.0; 2207 sum9 = 0.0; 2208 sum10 = 0.0; 2209 sum11 = 0.0; 2210 sum12 = 0.0; 2211 sum13 = 0.0; 2212 sum14 = 0.0; 2213 sum15 = 0.0; 2214 sum16 = 0.0; 2215 for (j = 0; j < n; j++) { 2216 sum1 += v[jrow] * x[16 * idx[jrow]]; 2217 sum2 += v[jrow] * x[16 * idx[jrow] + 1]; 2218 sum3 += v[jrow] * x[16 * idx[jrow] + 2]; 2219 sum4 += v[jrow] * x[16 * idx[jrow] + 3]; 2220 sum5 += v[jrow] * x[16 * idx[jrow] + 4]; 2221 sum6 += v[jrow] * x[16 * idx[jrow] + 5]; 2222 sum7 += v[jrow] * x[16 * idx[jrow] + 6]; 2223 sum8 += v[jrow] * x[16 * idx[jrow] + 7]; 2224 sum9 += v[jrow] * x[16 * idx[jrow] + 8]; 2225 sum10 += v[jrow] * x[16 * idx[jrow] + 9]; 2226 sum11 += v[jrow] * x[16 * idx[jrow] + 10]; 2227 sum12 += v[jrow] * x[16 * idx[jrow] + 11]; 2228 sum13 += v[jrow] * x[16 * idx[jrow] + 12]; 2229 sum14 += v[jrow] * x[16 * idx[jrow] + 13]; 2230 sum15 += v[jrow] * x[16 * idx[jrow] + 14]; 2231 sum16 += v[jrow] * x[16 * idx[jrow] + 15]; 2232 jrow++; 2233 } 2234 y[16 * i] += sum1; 2235 y[16 * i + 1] += sum2; 2236 y[16 * i + 2] += sum3; 2237 y[16 * i + 3] += sum4; 2238 y[16 * i + 4] += sum5; 2239 y[16 * i + 5] += sum6; 2240 y[16 * i + 6] += sum7; 2241 y[16 * i + 7] += sum8; 2242 y[16 * i + 8] += sum9; 2243 y[16 * i + 9] += sum10; 2244 y[16 * i + 10] += sum11; 2245 y[16 * i + 11] += sum12; 2246 y[16 * i + 12] += sum13; 2247 y[16 * i + 13] += sum14; 2248 y[16 * i + 14] += sum15; 2249 y[16 * i + 15] += sum16; 2250 } 2251 2252 PetscCall(PetscLogFlops(32.0 * a->nz)); 2253 PetscCall(VecRestoreArrayRead(xx, &x)); 2254 PetscCall(VecRestoreArray(zz, &y)); 2255 PetscFunctionReturn(0); 2256 } 2257 2258 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_16(Mat A, Vec xx, Vec yy, Vec zz) 2259 { 2260 Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data; 2261 Mat_SeqAIJ *a = (Mat_SeqAIJ *)b->AIJ->data; 2262 const PetscScalar *x, *v; 2263 PetscScalar *y, alpha1, alpha2, alpha3, alpha4, alpha5, alpha6, alpha7, alpha8; 2264 PetscScalar alpha9, alpha10, alpha11, alpha12, alpha13, alpha14, alpha15, alpha16; 2265 const PetscInt m = b->AIJ->rmap->n, *idx; 2266 PetscInt n, i; 2267 2268 PetscFunctionBegin; 2269 if (yy != zz) PetscCall(VecCopy(yy, zz)); 2270 PetscCall(VecGetArrayRead(xx, &x)); 2271 PetscCall(VecGetArray(zz, &y)); 2272 for (i = 0; i < m; i++) { 2273 idx = a->j + a->i[i]; 2274 v = a->a + a->i[i]; 2275 n = a->i[i + 1] - a->i[i]; 2276 alpha1 = x[16 * i]; 2277 alpha2 = x[16 * i + 1]; 2278 alpha3 = x[16 * i + 2]; 2279 alpha4 = x[16 * i + 3]; 2280 alpha5 = x[16 * i + 4]; 2281 alpha6 = x[16 * i + 5]; 2282 alpha7 = x[16 * i + 6]; 2283 alpha8 = x[16 * i + 7]; 2284 alpha9 = x[16 * i + 8]; 2285 alpha10 = x[16 * i + 9]; 2286 alpha11 = x[16 * i + 10]; 2287 alpha12 = x[16 * i + 11]; 2288 alpha13 = x[16 * i + 12]; 2289 alpha14 = x[16 * i + 13]; 2290 alpha15 = x[16 * i + 14]; 2291 alpha16 = x[16 * i + 15]; 2292 while (n-- > 0) { 2293 y[16 * (*idx)] += alpha1 * (*v); 2294 y[16 * (*idx) + 1] += alpha2 * (*v); 2295 y[16 * (*idx) + 2] += alpha3 * (*v); 2296 y[16 * (*idx) + 3] += alpha4 * (*v); 2297 y[16 * (*idx) + 4] += alpha5 * (*v); 2298 y[16 * (*idx) + 5] += alpha6 * (*v); 2299 y[16 * (*idx) + 6] += alpha7 * (*v); 2300 y[16 * (*idx) + 7] += alpha8 * (*v); 2301 y[16 * (*idx) + 8] += alpha9 * (*v); 2302 y[16 * (*idx) + 9] += alpha10 * (*v); 2303 y[16 * (*idx) + 10] += alpha11 * (*v); 2304 y[16 * (*idx) + 11] += alpha12 * (*v); 2305 y[16 * (*idx) + 12] += alpha13 * (*v); 2306 y[16 * (*idx) + 13] += alpha14 * (*v); 2307 y[16 * (*idx) + 14] += alpha15 * (*v); 2308 y[16 * (*idx) + 15] += alpha16 * (*v); 2309 idx++; 2310 v++; 2311 } 2312 } 2313 PetscCall(PetscLogFlops(32.0 * a->nz)); 2314 PetscCall(VecRestoreArrayRead(xx, &x)); 2315 PetscCall(VecRestoreArray(zz, &y)); 2316 PetscFunctionReturn(0); 2317 } 2318 2319 /*--------------------------------------------------------------------------------------------*/ 2320 PetscErrorCode MatMult_SeqMAIJ_18(Mat A, Vec xx, Vec yy) 2321 { 2322 Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data; 2323 Mat_SeqAIJ *a = (Mat_SeqAIJ *)b->AIJ->data; 2324 const PetscScalar *x, *v; 2325 PetscScalar *y, sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8; 2326 PetscScalar sum9, sum10, sum11, sum12, sum13, sum14, sum15, sum16, sum17, sum18; 2327 const PetscInt m = b->AIJ->rmap->n, *idx, *ii; 2328 PetscInt nonzerorow = 0, n, i, jrow, j; 2329 2330 PetscFunctionBegin; 2331 PetscCall(VecGetArrayRead(xx, &x)); 2332 PetscCall(VecGetArray(yy, &y)); 2333 idx = a->j; 2334 v = a->a; 2335 ii = a->i; 2336 2337 for (i = 0; i < m; i++) { 2338 jrow = ii[i]; 2339 n = ii[i + 1] - jrow; 2340 sum1 = 0.0; 2341 sum2 = 0.0; 2342 sum3 = 0.0; 2343 sum4 = 0.0; 2344 sum5 = 0.0; 2345 sum6 = 0.0; 2346 sum7 = 0.0; 2347 sum8 = 0.0; 2348 sum9 = 0.0; 2349 sum10 = 0.0; 2350 sum11 = 0.0; 2351 sum12 = 0.0; 2352 sum13 = 0.0; 2353 sum14 = 0.0; 2354 sum15 = 0.0; 2355 sum16 = 0.0; 2356 sum17 = 0.0; 2357 sum18 = 0.0; 2358 2359 nonzerorow += (n > 0); 2360 for (j = 0; j < n; j++) { 2361 sum1 += v[jrow] * x[18 * idx[jrow]]; 2362 sum2 += v[jrow] * x[18 * idx[jrow] + 1]; 2363 sum3 += v[jrow] * x[18 * idx[jrow] + 2]; 2364 sum4 += v[jrow] * x[18 * idx[jrow] + 3]; 2365 sum5 += v[jrow] * x[18 * idx[jrow] + 4]; 2366 sum6 += v[jrow] * x[18 * idx[jrow] + 5]; 2367 sum7 += v[jrow] * x[18 * idx[jrow] + 6]; 2368 sum8 += v[jrow] * x[18 * idx[jrow] + 7]; 2369 sum9 += v[jrow] * x[18 * idx[jrow] + 8]; 2370 sum10 += v[jrow] * x[18 * idx[jrow] + 9]; 2371 sum11 += v[jrow] * x[18 * idx[jrow] + 10]; 2372 sum12 += v[jrow] * x[18 * idx[jrow] + 11]; 2373 sum13 += v[jrow] * x[18 * idx[jrow] + 12]; 2374 sum14 += v[jrow] * x[18 * idx[jrow] + 13]; 2375 sum15 += v[jrow] * x[18 * idx[jrow] + 14]; 2376 sum16 += v[jrow] * x[18 * idx[jrow] + 15]; 2377 sum17 += v[jrow] * x[18 * idx[jrow] + 16]; 2378 sum18 += v[jrow] * x[18 * idx[jrow] + 17]; 2379 jrow++; 2380 } 2381 y[18 * i] = sum1; 2382 y[18 * i + 1] = sum2; 2383 y[18 * i + 2] = sum3; 2384 y[18 * i + 3] = sum4; 2385 y[18 * i + 4] = sum5; 2386 y[18 * i + 5] = sum6; 2387 y[18 * i + 6] = sum7; 2388 y[18 * i + 7] = sum8; 2389 y[18 * i + 8] = sum9; 2390 y[18 * i + 9] = sum10; 2391 y[18 * i + 10] = sum11; 2392 y[18 * i + 11] = sum12; 2393 y[18 * i + 12] = sum13; 2394 y[18 * i + 13] = sum14; 2395 y[18 * i + 14] = sum15; 2396 y[18 * i + 15] = sum16; 2397 y[18 * i + 16] = sum17; 2398 y[18 * i + 17] = sum18; 2399 } 2400 2401 PetscCall(PetscLogFlops(36.0 * a->nz - 18.0 * nonzerorow)); 2402 PetscCall(VecRestoreArrayRead(xx, &x)); 2403 PetscCall(VecRestoreArray(yy, &y)); 2404 PetscFunctionReturn(0); 2405 } 2406 2407 PetscErrorCode MatMultTranspose_SeqMAIJ_18(Mat A, Vec xx, Vec yy) 2408 { 2409 Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data; 2410 Mat_SeqAIJ *a = (Mat_SeqAIJ *)b->AIJ->data; 2411 const PetscScalar *x, *v; 2412 PetscScalar *y, alpha1, alpha2, alpha3, alpha4, alpha5, alpha6, alpha7, alpha8; 2413 PetscScalar alpha9, alpha10, alpha11, alpha12, alpha13, alpha14, alpha15, alpha16, alpha17, alpha18; 2414 const PetscInt m = b->AIJ->rmap->n, *idx; 2415 PetscInt n, i; 2416 2417 PetscFunctionBegin; 2418 PetscCall(VecSet(yy, 0.0)); 2419 PetscCall(VecGetArrayRead(xx, &x)); 2420 PetscCall(VecGetArray(yy, &y)); 2421 2422 for (i = 0; i < m; i++) { 2423 idx = a->j + a->i[i]; 2424 v = a->a + a->i[i]; 2425 n = a->i[i + 1] - a->i[i]; 2426 alpha1 = x[18 * i]; 2427 alpha2 = x[18 * i + 1]; 2428 alpha3 = x[18 * i + 2]; 2429 alpha4 = x[18 * i + 3]; 2430 alpha5 = x[18 * i + 4]; 2431 alpha6 = x[18 * i + 5]; 2432 alpha7 = x[18 * i + 6]; 2433 alpha8 = x[18 * i + 7]; 2434 alpha9 = x[18 * i + 8]; 2435 alpha10 = x[18 * i + 9]; 2436 alpha11 = x[18 * i + 10]; 2437 alpha12 = x[18 * i + 11]; 2438 alpha13 = x[18 * i + 12]; 2439 alpha14 = x[18 * i + 13]; 2440 alpha15 = x[18 * i + 14]; 2441 alpha16 = x[18 * i + 15]; 2442 alpha17 = x[18 * i + 16]; 2443 alpha18 = x[18 * i + 17]; 2444 while (n-- > 0) { 2445 y[18 * (*idx)] += alpha1 * (*v); 2446 y[18 * (*idx) + 1] += alpha2 * (*v); 2447 y[18 * (*idx) + 2] += alpha3 * (*v); 2448 y[18 * (*idx) + 3] += alpha4 * (*v); 2449 y[18 * (*idx) + 4] += alpha5 * (*v); 2450 y[18 * (*idx) + 5] += alpha6 * (*v); 2451 y[18 * (*idx) + 6] += alpha7 * (*v); 2452 y[18 * (*idx) + 7] += alpha8 * (*v); 2453 y[18 * (*idx) + 8] += alpha9 * (*v); 2454 y[18 * (*idx) + 9] += alpha10 * (*v); 2455 y[18 * (*idx) + 10] += alpha11 * (*v); 2456 y[18 * (*idx) + 11] += alpha12 * (*v); 2457 y[18 * (*idx) + 12] += alpha13 * (*v); 2458 y[18 * (*idx) + 13] += alpha14 * (*v); 2459 y[18 * (*idx) + 14] += alpha15 * (*v); 2460 y[18 * (*idx) + 15] += alpha16 * (*v); 2461 y[18 * (*idx) + 16] += alpha17 * (*v); 2462 y[18 * (*idx) + 17] += alpha18 * (*v); 2463 idx++; 2464 v++; 2465 } 2466 } 2467 PetscCall(PetscLogFlops(36.0 * a->nz)); 2468 PetscCall(VecRestoreArrayRead(xx, &x)); 2469 PetscCall(VecRestoreArray(yy, &y)); 2470 PetscFunctionReturn(0); 2471 } 2472 2473 PetscErrorCode MatMultAdd_SeqMAIJ_18(Mat A, Vec xx, Vec yy, Vec zz) 2474 { 2475 Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data; 2476 Mat_SeqAIJ *a = (Mat_SeqAIJ *)b->AIJ->data; 2477 const PetscScalar *x, *v; 2478 PetscScalar *y, sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8; 2479 PetscScalar sum9, sum10, sum11, sum12, sum13, sum14, sum15, sum16, sum17, sum18; 2480 const PetscInt m = b->AIJ->rmap->n, *idx, *ii; 2481 PetscInt n, i, jrow, j; 2482 2483 PetscFunctionBegin; 2484 if (yy != zz) PetscCall(VecCopy(yy, zz)); 2485 PetscCall(VecGetArrayRead(xx, &x)); 2486 PetscCall(VecGetArray(zz, &y)); 2487 idx = a->j; 2488 v = a->a; 2489 ii = a->i; 2490 2491 for (i = 0; i < m; i++) { 2492 jrow = ii[i]; 2493 n = ii[i + 1] - jrow; 2494 sum1 = 0.0; 2495 sum2 = 0.0; 2496 sum3 = 0.0; 2497 sum4 = 0.0; 2498 sum5 = 0.0; 2499 sum6 = 0.0; 2500 sum7 = 0.0; 2501 sum8 = 0.0; 2502 sum9 = 0.0; 2503 sum10 = 0.0; 2504 sum11 = 0.0; 2505 sum12 = 0.0; 2506 sum13 = 0.0; 2507 sum14 = 0.0; 2508 sum15 = 0.0; 2509 sum16 = 0.0; 2510 sum17 = 0.0; 2511 sum18 = 0.0; 2512 for (j = 0; j < n; j++) { 2513 sum1 += v[jrow] * x[18 * idx[jrow]]; 2514 sum2 += v[jrow] * x[18 * idx[jrow] + 1]; 2515 sum3 += v[jrow] * x[18 * idx[jrow] + 2]; 2516 sum4 += v[jrow] * x[18 * idx[jrow] + 3]; 2517 sum5 += v[jrow] * x[18 * idx[jrow] + 4]; 2518 sum6 += v[jrow] * x[18 * idx[jrow] + 5]; 2519 sum7 += v[jrow] * x[18 * idx[jrow] + 6]; 2520 sum8 += v[jrow] * x[18 * idx[jrow] + 7]; 2521 sum9 += v[jrow] * x[18 * idx[jrow] + 8]; 2522 sum10 += v[jrow] * x[18 * idx[jrow] + 9]; 2523 sum11 += v[jrow] * x[18 * idx[jrow] + 10]; 2524 sum12 += v[jrow] * x[18 * idx[jrow] + 11]; 2525 sum13 += v[jrow] * x[18 * idx[jrow] + 12]; 2526 sum14 += v[jrow] * x[18 * idx[jrow] + 13]; 2527 sum15 += v[jrow] * x[18 * idx[jrow] + 14]; 2528 sum16 += v[jrow] * x[18 * idx[jrow] + 15]; 2529 sum17 += v[jrow] * x[18 * idx[jrow] + 16]; 2530 sum18 += v[jrow] * x[18 * idx[jrow] + 17]; 2531 jrow++; 2532 } 2533 y[18 * i] += sum1; 2534 y[18 * i + 1] += sum2; 2535 y[18 * i + 2] += sum3; 2536 y[18 * i + 3] += sum4; 2537 y[18 * i + 4] += sum5; 2538 y[18 * i + 5] += sum6; 2539 y[18 * i + 6] += sum7; 2540 y[18 * i + 7] += sum8; 2541 y[18 * i + 8] += sum9; 2542 y[18 * i + 9] += sum10; 2543 y[18 * i + 10] += sum11; 2544 y[18 * i + 11] += sum12; 2545 y[18 * i + 12] += sum13; 2546 y[18 * i + 13] += sum14; 2547 y[18 * i + 14] += sum15; 2548 y[18 * i + 15] += sum16; 2549 y[18 * i + 16] += sum17; 2550 y[18 * i + 17] += sum18; 2551 } 2552 2553 PetscCall(PetscLogFlops(36.0 * a->nz)); 2554 PetscCall(VecRestoreArrayRead(xx, &x)); 2555 PetscCall(VecRestoreArray(zz, &y)); 2556 PetscFunctionReturn(0); 2557 } 2558 2559 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_18(Mat A, Vec xx, Vec yy, Vec zz) 2560 { 2561 Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data; 2562 Mat_SeqAIJ *a = (Mat_SeqAIJ *)b->AIJ->data; 2563 const PetscScalar *x, *v; 2564 PetscScalar *y, alpha1, alpha2, alpha3, alpha4, alpha5, alpha6, alpha7, alpha8; 2565 PetscScalar alpha9, alpha10, alpha11, alpha12, alpha13, alpha14, alpha15, alpha16, alpha17, alpha18; 2566 const PetscInt m = b->AIJ->rmap->n, *idx; 2567 PetscInt n, i; 2568 2569 PetscFunctionBegin; 2570 if (yy != zz) PetscCall(VecCopy(yy, zz)); 2571 PetscCall(VecGetArrayRead(xx, &x)); 2572 PetscCall(VecGetArray(zz, &y)); 2573 for (i = 0; i < m; i++) { 2574 idx = a->j + a->i[i]; 2575 v = a->a + a->i[i]; 2576 n = a->i[i + 1] - a->i[i]; 2577 alpha1 = x[18 * i]; 2578 alpha2 = x[18 * i + 1]; 2579 alpha3 = x[18 * i + 2]; 2580 alpha4 = x[18 * i + 3]; 2581 alpha5 = x[18 * i + 4]; 2582 alpha6 = x[18 * i + 5]; 2583 alpha7 = x[18 * i + 6]; 2584 alpha8 = x[18 * i + 7]; 2585 alpha9 = x[18 * i + 8]; 2586 alpha10 = x[18 * i + 9]; 2587 alpha11 = x[18 * i + 10]; 2588 alpha12 = x[18 * i + 11]; 2589 alpha13 = x[18 * i + 12]; 2590 alpha14 = x[18 * i + 13]; 2591 alpha15 = x[18 * i + 14]; 2592 alpha16 = x[18 * i + 15]; 2593 alpha17 = x[18 * i + 16]; 2594 alpha18 = x[18 * i + 17]; 2595 while (n-- > 0) { 2596 y[18 * (*idx)] += alpha1 * (*v); 2597 y[18 * (*idx) + 1] += alpha2 * (*v); 2598 y[18 * (*idx) + 2] += alpha3 * (*v); 2599 y[18 * (*idx) + 3] += alpha4 * (*v); 2600 y[18 * (*idx) + 4] += alpha5 * (*v); 2601 y[18 * (*idx) + 5] += alpha6 * (*v); 2602 y[18 * (*idx) + 6] += alpha7 * (*v); 2603 y[18 * (*idx) + 7] += alpha8 * (*v); 2604 y[18 * (*idx) + 8] += alpha9 * (*v); 2605 y[18 * (*idx) + 9] += alpha10 * (*v); 2606 y[18 * (*idx) + 10] += alpha11 * (*v); 2607 y[18 * (*idx) + 11] += alpha12 * (*v); 2608 y[18 * (*idx) + 12] += alpha13 * (*v); 2609 y[18 * (*idx) + 13] += alpha14 * (*v); 2610 y[18 * (*idx) + 14] += alpha15 * (*v); 2611 y[18 * (*idx) + 15] += alpha16 * (*v); 2612 y[18 * (*idx) + 16] += alpha17 * (*v); 2613 y[18 * (*idx) + 17] += alpha18 * (*v); 2614 idx++; 2615 v++; 2616 } 2617 } 2618 PetscCall(PetscLogFlops(36.0 * a->nz)); 2619 PetscCall(VecRestoreArrayRead(xx, &x)); 2620 PetscCall(VecRestoreArray(zz, &y)); 2621 PetscFunctionReturn(0); 2622 } 2623 2624 PetscErrorCode MatMult_SeqMAIJ_N(Mat A, Vec xx, Vec yy) 2625 { 2626 Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data; 2627 Mat_SeqAIJ *a = (Mat_SeqAIJ *)b->AIJ->data; 2628 const PetscScalar *x, *v; 2629 PetscScalar *y, *sums; 2630 const PetscInt m = b->AIJ->rmap->n, *idx, *ii; 2631 PetscInt n, i, jrow, j, dof = b->dof, k; 2632 2633 PetscFunctionBegin; 2634 PetscCall(VecGetArrayRead(xx, &x)); 2635 PetscCall(VecSet(yy, 0.0)); 2636 PetscCall(VecGetArray(yy, &y)); 2637 idx = a->j; 2638 v = a->a; 2639 ii = a->i; 2640 2641 for (i = 0; i < m; i++) { 2642 jrow = ii[i]; 2643 n = ii[i + 1] - jrow; 2644 sums = y + dof * i; 2645 for (j = 0; j < n; j++) { 2646 for (k = 0; k < dof; k++) sums[k] += v[jrow] * x[dof * idx[jrow] + k]; 2647 jrow++; 2648 } 2649 } 2650 2651 PetscCall(PetscLogFlops(2.0 * dof * a->nz)); 2652 PetscCall(VecRestoreArrayRead(xx, &x)); 2653 PetscCall(VecRestoreArray(yy, &y)); 2654 PetscFunctionReturn(0); 2655 } 2656 2657 PetscErrorCode MatMultAdd_SeqMAIJ_N(Mat A, Vec xx, Vec yy, Vec zz) 2658 { 2659 Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data; 2660 Mat_SeqAIJ *a = (Mat_SeqAIJ *)b->AIJ->data; 2661 const PetscScalar *x, *v; 2662 PetscScalar *y, *sums; 2663 const PetscInt m = b->AIJ->rmap->n, *idx, *ii; 2664 PetscInt n, i, jrow, j, dof = b->dof, k; 2665 2666 PetscFunctionBegin; 2667 if (yy != zz) PetscCall(VecCopy(yy, zz)); 2668 PetscCall(VecGetArrayRead(xx, &x)); 2669 PetscCall(VecGetArray(zz, &y)); 2670 idx = a->j; 2671 v = a->a; 2672 ii = a->i; 2673 2674 for (i = 0; i < m; i++) { 2675 jrow = ii[i]; 2676 n = ii[i + 1] - jrow; 2677 sums = y + dof * i; 2678 for (j = 0; j < n; j++) { 2679 for (k = 0; k < dof; k++) sums[k] += v[jrow] * x[dof * idx[jrow] + k]; 2680 jrow++; 2681 } 2682 } 2683 2684 PetscCall(PetscLogFlops(2.0 * dof * a->nz)); 2685 PetscCall(VecRestoreArrayRead(xx, &x)); 2686 PetscCall(VecRestoreArray(zz, &y)); 2687 PetscFunctionReturn(0); 2688 } 2689 2690 PetscErrorCode MatMultTranspose_SeqMAIJ_N(Mat A, Vec xx, Vec yy) 2691 { 2692 Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data; 2693 Mat_SeqAIJ *a = (Mat_SeqAIJ *)b->AIJ->data; 2694 const PetscScalar *x, *v, *alpha; 2695 PetscScalar *y; 2696 const PetscInt m = b->AIJ->rmap->n, *idx, dof = b->dof; 2697 PetscInt n, i, k; 2698 2699 PetscFunctionBegin; 2700 PetscCall(VecGetArrayRead(xx, &x)); 2701 PetscCall(VecSet(yy, 0.0)); 2702 PetscCall(VecGetArray(yy, &y)); 2703 for (i = 0; i < m; i++) { 2704 idx = a->j + a->i[i]; 2705 v = a->a + a->i[i]; 2706 n = a->i[i + 1] - a->i[i]; 2707 alpha = x + dof * i; 2708 while (n-- > 0) { 2709 for (k = 0; k < dof; k++) y[dof * (*idx) + k] += alpha[k] * (*v); 2710 idx++; 2711 v++; 2712 } 2713 } 2714 PetscCall(PetscLogFlops(2.0 * dof * a->nz)); 2715 PetscCall(VecRestoreArrayRead(xx, &x)); 2716 PetscCall(VecRestoreArray(yy, &y)); 2717 PetscFunctionReturn(0); 2718 } 2719 2720 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_N(Mat A, Vec xx, Vec yy, Vec zz) 2721 { 2722 Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data; 2723 Mat_SeqAIJ *a = (Mat_SeqAIJ *)b->AIJ->data; 2724 const PetscScalar *x, *v, *alpha; 2725 PetscScalar *y; 2726 const PetscInt m = b->AIJ->rmap->n, *idx, dof = b->dof; 2727 PetscInt n, i, k; 2728 2729 PetscFunctionBegin; 2730 if (yy != zz) PetscCall(VecCopy(yy, zz)); 2731 PetscCall(VecGetArrayRead(xx, &x)); 2732 PetscCall(VecGetArray(zz, &y)); 2733 for (i = 0; i < m; i++) { 2734 idx = a->j + a->i[i]; 2735 v = a->a + a->i[i]; 2736 n = a->i[i + 1] - a->i[i]; 2737 alpha = x + dof * i; 2738 while (n-- > 0) { 2739 for (k = 0; k < dof; k++) y[dof * (*idx) + k] += alpha[k] * (*v); 2740 idx++; 2741 v++; 2742 } 2743 } 2744 PetscCall(PetscLogFlops(2.0 * dof * a->nz)); 2745 PetscCall(VecRestoreArrayRead(xx, &x)); 2746 PetscCall(VecRestoreArray(zz, &y)); 2747 PetscFunctionReturn(0); 2748 } 2749 2750 /*===================================================================================*/ 2751 PetscErrorCode MatMult_MPIMAIJ_dof(Mat A, Vec xx, Vec yy) 2752 { 2753 Mat_MPIMAIJ *b = (Mat_MPIMAIJ *)A->data; 2754 2755 PetscFunctionBegin; 2756 /* start the scatter */ 2757 PetscCall(VecScatterBegin(b->ctx, xx, b->w, INSERT_VALUES, SCATTER_FORWARD)); 2758 PetscCall((*b->AIJ->ops->mult)(b->AIJ, xx, yy)); 2759 PetscCall(VecScatterEnd(b->ctx, xx, b->w, INSERT_VALUES, SCATTER_FORWARD)); 2760 PetscCall((*b->OAIJ->ops->multadd)(b->OAIJ, b->w, yy, yy)); 2761 PetscFunctionReturn(0); 2762 } 2763 2764 PetscErrorCode MatMultTranspose_MPIMAIJ_dof(Mat A, Vec xx, Vec yy) 2765 { 2766 Mat_MPIMAIJ *b = (Mat_MPIMAIJ *)A->data; 2767 2768 PetscFunctionBegin; 2769 PetscCall((*b->OAIJ->ops->multtranspose)(b->OAIJ, xx, b->w)); 2770 PetscCall((*b->AIJ->ops->multtranspose)(b->AIJ, xx, yy)); 2771 PetscCall(VecScatterBegin(b->ctx, b->w, yy, ADD_VALUES, SCATTER_REVERSE)); 2772 PetscCall(VecScatterEnd(b->ctx, b->w, yy, ADD_VALUES, SCATTER_REVERSE)); 2773 PetscFunctionReturn(0); 2774 } 2775 2776 PetscErrorCode MatMultAdd_MPIMAIJ_dof(Mat A, Vec xx, Vec yy, Vec zz) 2777 { 2778 Mat_MPIMAIJ *b = (Mat_MPIMAIJ *)A->data; 2779 2780 PetscFunctionBegin; 2781 /* start the scatter */ 2782 PetscCall(VecScatterBegin(b->ctx, xx, b->w, INSERT_VALUES, SCATTER_FORWARD)); 2783 PetscCall((*b->AIJ->ops->multadd)(b->AIJ, xx, yy, zz)); 2784 PetscCall(VecScatterEnd(b->ctx, xx, b->w, INSERT_VALUES, SCATTER_FORWARD)); 2785 PetscCall((*b->OAIJ->ops->multadd)(b->OAIJ, b->w, zz, zz)); 2786 PetscFunctionReturn(0); 2787 } 2788 2789 PetscErrorCode MatMultTransposeAdd_MPIMAIJ_dof(Mat A, Vec xx, Vec yy, Vec zz) 2790 { 2791 Mat_MPIMAIJ *b = (Mat_MPIMAIJ *)A->data; 2792 2793 PetscFunctionBegin; 2794 PetscCall((*b->OAIJ->ops->multtranspose)(b->OAIJ, xx, b->w)); 2795 PetscCall((*b->AIJ->ops->multtransposeadd)(b->AIJ, xx, yy, zz)); 2796 PetscCall(VecScatterBegin(b->ctx, b->w, zz, ADD_VALUES, SCATTER_REVERSE)); 2797 PetscCall(VecScatterEnd(b->ctx, b->w, zz, ADD_VALUES, SCATTER_REVERSE)); 2798 PetscFunctionReturn(0); 2799 } 2800 2801 /* ----------------------------------------------------------------*/ 2802 PetscErrorCode MatProductSetFromOptions_SeqAIJ_SeqMAIJ(Mat C) 2803 { 2804 Mat_Product *product = C->product; 2805 2806 PetscFunctionBegin; 2807 if (product->type == MATPRODUCT_PtAP) { 2808 C->ops->productsymbolic = MatProductSymbolic_PtAP_SeqAIJ_SeqMAIJ; 2809 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Mat Product type %s is not supported for SeqAIJ and SeqMAIJ matrices", MatProductTypes[product->type]); 2810 PetscFunctionReturn(0); 2811 } 2812 2813 PetscErrorCode MatProductSetFromOptions_MPIAIJ_MPIMAIJ(Mat C) 2814 { 2815 Mat_Product *product = C->product; 2816 PetscBool flg = PETSC_FALSE; 2817 Mat A = product->A, P = product->B; 2818 PetscInt alg = 1; /* set default algorithm */ 2819 #if !defined(PETSC_HAVE_HYPRE) 2820 const char *algTypes[4] = {"scalable", "nonscalable", "allatonce", "allatonce_merged"}; 2821 PetscInt nalg = 4; 2822 #else 2823 const char *algTypes[5] = {"scalable", "nonscalable", "allatonce", "allatonce_merged", "hypre"}; 2824 PetscInt nalg = 5; 2825 #endif 2826 2827 PetscFunctionBegin; 2828 PetscCheck(product->type == MATPRODUCT_PtAP, PETSC_COMM_SELF, PETSC_ERR_SUP, "Mat Product type %s is not supported for MPIAIJ and MPIMAIJ matrices", MatProductTypes[product->type]); 2829 2830 /* PtAP */ 2831 /* Check matrix local sizes */ 2832 PetscCheck(A->rmap->rstart == P->rmap->rstart && A->rmap->rend == P->rmap->rend, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, Arow (%" PetscInt_FMT ", %" PetscInt_FMT ") != Prow (%" PetscInt_FMT ",%" PetscInt_FMT ")", 2833 A->rmap->rstart, A->rmap->rend, P->rmap->rstart, P->rmap->rend); 2834 PetscCheck(A->cmap->rstart == P->rmap->rstart && A->cmap->rend == P->rmap->rend, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, Acol (%" PetscInt_FMT ", %" PetscInt_FMT ") != Prow (%" PetscInt_FMT ",%" PetscInt_FMT ")", 2835 A->cmap->rstart, A->cmap->rend, P->rmap->rstart, P->rmap->rend); 2836 2837 /* Set the default algorithm */ 2838 PetscCall(PetscStrcmp(C->product->alg, "default", &flg)); 2839 if (flg) PetscCall(MatProductSetAlgorithm(C, (MatProductAlgorithm)algTypes[alg])); 2840 2841 /* Get runtime option */ 2842 PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatProduct_PtAP", "Mat"); 2843 PetscCall(PetscOptionsEList("-mat_product_algorithm", "Algorithmic approach", "MatPtAP", algTypes, nalg, algTypes[alg], &alg, &flg)); 2844 if (flg) PetscCall(MatProductSetAlgorithm(C, (MatProductAlgorithm)algTypes[alg])); 2845 PetscOptionsEnd(); 2846 2847 PetscCall(PetscStrcmp(C->product->alg, "allatonce", &flg)); 2848 if (flg) { 2849 C->ops->productsymbolic = MatProductSymbolic_PtAP_MPIAIJ_MPIMAIJ; 2850 PetscFunctionReturn(0); 2851 } 2852 2853 PetscCall(PetscStrcmp(C->product->alg, "allatonce_merged", &flg)); 2854 if (flg) { 2855 C->ops->productsymbolic = MatProductSymbolic_PtAP_MPIAIJ_MPIMAIJ; 2856 PetscFunctionReturn(0); 2857 } 2858 2859 /* Convert P from MAIJ to AIJ matrix since implementation not available for MAIJ */ 2860 PetscCall(PetscInfo((PetscObject)A, "Converting from MAIJ to AIJ matrix since implementation not available for MAIJ\n")); 2861 PetscCall(MatConvert(P, MATMPIAIJ, MAT_INPLACE_MATRIX, &P)); 2862 PetscCall(MatProductSetFromOptions(C)); 2863 PetscFunctionReturn(0); 2864 } 2865 2866 /* ----------------------------------------------------------------*/ 2867 PetscErrorCode MatPtAPSymbolic_SeqAIJ_SeqMAIJ(Mat A, Mat PP, PetscReal fill, Mat C) 2868 { 2869 PetscFreeSpaceList free_space = NULL, current_space = NULL; 2870 Mat_SeqMAIJ *pp = (Mat_SeqMAIJ *)PP->data; 2871 Mat P = pp->AIJ; 2872 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data, *p = (Mat_SeqAIJ *)P->data, *c; 2873 PetscInt *pti, *ptj, *ptJ; 2874 PetscInt *ci, *cj, *ptadenserow, *ptasparserow, *denserow, *sparserow, *ptaj; 2875 const PetscInt an = A->cmap->N, am = A->rmap->N, pn = P->cmap->N, pm = P->rmap->N, ppdof = pp->dof; 2876 PetscInt i, j, k, dof, pshift, ptnzi, arow, anzj, ptanzi, prow, pnzj, cnzi, cn; 2877 MatScalar *ca; 2878 const PetscInt *pi = p->i, *pj = p->j, *pjj, *ai = a->i, *aj = a->j, *ajj; 2879 2880 PetscFunctionBegin; 2881 /* Get ij structure of P^T */ 2882 PetscCall(MatGetSymbolicTranspose_SeqAIJ(P, &pti, &ptj)); 2883 2884 cn = pn * ppdof; 2885 /* Allocate ci array, arrays for fill computation and */ 2886 /* free space for accumulating nonzero column info */ 2887 PetscCall(PetscMalloc1(cn + 1, &ci)); 2888 ci[0] = 0; 2889 2890 /* Work arrays for rows of P^T*A */ 2891 PetscCall(PetscMalloc4(an, &ptadenserow, an, &ptasparserow, cn, &denserow, cn, &sparserow)); 2892 PetscCall(PetscArrayzero(ptadenserow, an)); 2893 PetscCall(PetscArrayzero(denserow, cn)); 2894 2895 /* Set initial free space to be nnz(A) scaled by aspect ratio of P. */ 2896 /* This should be reasonable if sparsity of PtAP is similar to that of A. */ 2897 /* Note, aspect ratio of P is the same as the aspect ratio of SeqAIJ inside P */ 2898 PetscCall(PetscFreeSpaceGet(PetscIntMultTruncate(ai[am] / pm, pn), &free_space)); 2899 current_space = free_space; 2900 2901 /* Determine symbolic info for each row of C: */ 2902 for (i = 0; i < pn; i++) { 2903 ptnzi = pti[i + 1] - pti[i]; 2904 ptJ = ptj + pti[i]; 2905 for (dof = 0; dof < ppdof; dof++) { 2906 ptanzi = 0; 2907 /* Determine symbolic row of PtA: */ 2908 for (j = 0; j < ptnzi; j++) { 2909 /* Expand ptJ[j] by block size and shift by dof to get the right row of A */ 2910 arow = ptJ[j] * ppdof + dof; 2911 /* Nonzeros of P^T*A will be in same locations as any element of A in that row */ 2912 anzj = ai[arow + 1] - ai[arow]; 2913 ajj = aj + ai[arow]; 2914 for (k = 0; k < anzj; k++) { 2915 if (!ptadenserow[ajj[k]]) { 2916 ptadenserow[ajj[k]] = -1; 2917 ptasparserow[ptanzi++] = ajj[k]; 2918 } 2919 } 2920 } 2921 /* Using symbolic info for row of PtA, determine symbolic info for row of C: */ 2922 ptaj = ptasparserow; 2923 cnzi = 0; 2924 for (j = 0; j < ptanzi; j++) { 2925 /* Get offset within block of P */ 2926 pshift = *ptaj % ppdof; 2927 /* Get block row of P */ 2928 prow = (*ptaj++) / ppdof; /* integer division */ 2929 /* P has same number of nonzeros per row as the compressed form */ 2930 pnzj = pi[prow + 1] - pi[prow]; 2931 pjj = pj + pi[prow]; 2932 for (k = 0; k < pnzj; k++) { 2933 /* Locations in C are shifted by the offset within the block */ 2934 /* Note: we cannot use PetscLLAdd here because of the additional offset for the write location */ 2935 if (!denserow[pjj[k] * ppdof + pshift]) { 2936 denserow[pjj[k] * ppdof + pshift] = -1; 2937 sparserow[cnzi++] = pjj[k] * ppdof + pshift; 2938 } 2939 } 2940 } 2941 2942 /* sort sparserow */ 2943 PetscCall(PetscSortInt(cnzi, sparserow)); 2944 2945 /* If free space is not available, make more free space */ 2946 /* Double the amount of total space in the list */ 2947 if (current_space->local_remaining < cnzi) PetscCall(PetscFreeSpaceGet(PetscIntSumTruncate(cnzi, current_space->total_array_size), ¤t_space)); 2948 2949 /* Copy data into free space, and zero out denserows */ 2950 PetscCall(PetscArraycpy(current_space->array, sparserow, cnzi)); 2951 2952 current_space->array += cnzi; 2953 current_space->local_used += cnzi; 2954 current_space->local_remaining -= cnzi; 2955 2956 for (j = 0; j < ptanzi; j++) ptadenserow[ptasparserow[j]] = 0; 2957 for (j = 0; j < cnzi; j++) denserow[sparserow[j]] = 0; 2958 2959 /* Aside: Perhaps we should save the pta info for the numerical factorization. */ 2960 /* For now, we will recompute what is needed. */ 2961 ci[i * ppdof + 1 + dof] = ci[i * ppdof + dof] + cnzi; 2962 } 2963 } 2964 /* nnz is now stored in ci[ptm], column indices are in the list of free space */ 2965 /* Allocate space for cj, initialize cj, and */ 2966 /* destroy list of free space and other temporary array(s) */ 2967 PetscCall(PetscMalloc1(ci[cn] + 1, &cj)); 2968 PetscCall(PetscFreeSpaceContiguous(&free_space, cj)); 2969 PetscCall(PetscFree4(ptadenserow, ptasparserow, denserow, sparserow)); 2970 2971 /* Allocate space for ca */ 2972 PetscCall(PetscCalloc1(ci[cn] + 1, &ca)); 2973 2974 /* put together the new matrix */ 2975 PetscCall(MatSetSeqAIJWithArrays_private(PetscObjectComm((PetscObject)A), cn, cn, ci, cj, ca, NULL, C)); 2976 PetscCall(MatSetBlockSize(C, pp->dof)); 2977 2978 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 2979 /* Since these are PETSc arrays, change flags to free them as necessary. */ 2980 c = (Mat_SeqAIJ *)(C->data); 2981 c->free_a = PETSC_TRUE; 2982 c->free_ij = PETSC_TRUE; 2983 c->nonew = 0; 2984 2985 C->ops->ptapnumeric = MatPtAPNumeric_SeqAIJ_SeqMAIJ; 2986 C->ops->productnumeric = MatProductNumeric_PtAP; 2987 2988 /* Clean up. */ 2989 PetscCall(MatRestoreSymbolicTranspose_SeqAIJ(P, &pti, &ptj)); 2990 PetscFunctionReturn(0); 2991 } 2992 2993 PetscErrorCode MatPtAPNumeric_SeqAIJ_SeqMAIJ(Mat A, Mat PP, Mat C) 2994 { 2995 /* This routine requires testing -- first draft only */ 2996 Mat_SeqMAIJ *pp = (Mat_SeqMAIJ *)PP->data; 2997 Mat P = pp->AIJ; 2998 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 2999 Mat_SeqAIJ *p = (Mat_SeqAIJ *)P->data; 3000 Mat_SeqAIJ *c = (Mat_SeqAIJ *)C->data; 3001 const PetscInt *ai = a->i, *aj = a->j, *pi = p->i, *pj = p->j, *pJ, *pjj; 3002 const PetscInt *ci = c->i, *cj = c->j, *cjj; 3003 const PetscInt am = A->rmap->N, cn = C->cmap->N, cm = C->rmap->N, ppdof = pp->dof; 3004 PetscInt i, j, k, pshift, poffset, anzi, pnzi, apnzj, nextap, pnzj, prow, crow, *apj, *apjdense; 3005 const MatScalar *aa = a->a, *pa = p->a, *pA, *paj; 3006 MatScalar *ca = c->a, *caj, *apa; 3007 3008 PetscFunctionBegin; 3009 /* Allocate temporary array for storage of one row of A*P */ 3010 PetscCall(PetscCalloc3(cn, &apa, cn, &apj, cn, &apjdense)); 3011 3012 /* Clear old values in C */ 3013 PetscCall(PetscArrayzero(ca, ci[cm])); 3014 3015 for (i = 0; i < am; i++) { 3016 /* Form sparse row of A*P */ 3017 anzi = ai[i + 1] - ai[i]; 3018 apnzj = 0; 3019 for (j = 0; j < anzi; j++) { 3020 /* Get offset within block of P */ 3021 pshift = *aj % ppdof; 3022 /* Get block row of P */ 3023 prow = *aj++ / ppdof; /* integer division */ 3024 pnzj = pi[prow + 1] - pi[prow]; 3025 pjj = pj + pi[prow]; 3026 paj = pa + pi[prow]; 3027 for (k = 0; k < pnzj; k++) { 3028 poffset = pjj[k] * ppdof + pshift; 3029 if (!apjdense[poffset]) { 3030 apjdense[poffset] = -1; 3031 apj[apnzj++] = poffset; 3032 } 3033 apa[poffset] += (*aa) * paj[k]; 3034 } 3035 PetscCall(PetscLogFlops(2.0 * pnzj)); 3036 aa++; 3037 } 3038 3039 /* Sort the j index array for quick sparse axpy. */ 3040 /* Note: a array does not need sorting as it is in dense storage locations. */ 3041 PetscCall(PetscSortInt(apnzj, apj)); 3042 3043 /* Compute P^T*A*P using outer product (P^T)[:,j]*(A*P)[j,:]. */ 3044 prow = i / ppdof; /* integer division */ 3045 pshift = i % ppdof; 3046 poffset = pi[prow]; 3047 pnzi = pi[prow + 1] - poffset; 3048 /* Reset pJ and pA so we can traverse the same row of P 'dof' times. */ 3049 pJ = pj + poffset; 3050 pA = pa + poffset; 3051 for (j = 0; j < pnzi; j++) { 3052 crow = (*pJ) * ppdof + pshift; 3053 cjj = cj + ci[crow]; 3054 caj = ca + ci[crow]; 3055 pJ++; 3056 /* Perform sparse axpy operation. Note cjj includes apj. */ 3057 for (k = 0, nextap = 0; nextap < apnzj; k++) { 3058 if (cjj[k] == apj[nextap]) caj[k] += (*pA) * apa[apj[nextap++]]; 3059 } 3060 PetscCall(PetscLogFlops(2.0 * apnzj)); 3061 pA++; 3062 } 3063 3064 /* Zero the current row info for A*P */ 3065 for (j = 0; j < apnzj; j++) { 3066 apa[apj[j]] = 0.; 3067 apjdense[apj[j]] = 0; 3068 } 3069 } 3070 3071 /* Assemble the final matrix and clean up */ 3072 PetscCall(MatAssemblyBegin(C, MAT_FINAL_ASSEMBLY)); 3073 PetscCall(MatAssemblyEnd(C, MAT_FINAL_ASSEMBLY)); 3074 PetscCall(PetscFree3(apa, apj, apjdense)); 3075 PetscFunctionReturn(0); 3076 } 3077 3078 PETSC_INTERN PetscErrorCode MatProductSymbolic_PtAP_SeqAIJ_SeqMAIJ(Mat C) 3079 { 3080 Mat_Product *product = C->product; 3081 Mat A = product->A, P = product->B; 3082 3083 PetscFunctionBegin; 3084 PetscCall(MatPtAPSymbolic_SeqAIJ_SeqMAIJ(A, P, product->fill, C)); 3085 PetscFunctionReturn(0); 3086 } 3087 3088 PetscErrorCode MatPtAPSymbolic_MPIAIJ_MPIMAIJ(Mat A, Mat PP, PetscReal fill, Mat *C) 3089 { 3090 PetscFunctionBegin; 3091 SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "MatPtAPSymbolic is not implemented for MPIMAIJ matrix yet"); 3092 } 3093 3094 PetscErrorCode MatPtAPNumeric_MPIAIJ_MPIMAIJ(Mat A, Mat PP, Mat C) 3095 { 3096 PetscFunctionBegin; 3097 SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "MatPtAPNumeric is not implemented for MPIMAIJ matrix yet"); 3098 } 3099 3100 PetscErrorCode MatPtAPNumeric_MPIAIJ_MPIXAIJ_allatonce(Mat, Mat, PetscInt, Mat); 3101 3102 PETSC_INTERN PetscErrorCode MatPtAPNumeric_MPIAIJ_MPIMAIJ_allatonce(Mat A, Mat P, Mat C) 3103 { 3104 Mat_MPIMAIJ *maij = (Mat_MPIMAIJ *)P->data; 3105 3106 PetscFunctionBegin; 3107 3108 PetscCall(MatPtAPNumeric_MPIAIJ_MPIXAIJ_allatonce(A, maij->A, maij->dof, C)); 3109 PetscFunctionReturn(0); 3110 } 3111 3112 PetscErrorCode MatPtAPSymbolic_MPIAIJ_MPIXAIJ_allatonce(Mat, Mat, PetscInt, PetscReal, Mat); 3113 3114 PETSC_INTERN PetscErrorCode MatPtAPSymbolic_MPIAIJ_MPIMAIJ_allatonce(Mat A, Mat P, PetscReal fill, Mat C) 3115 { 3116 Mat_MPIMAIJ *maij = (Mat_MPIMAIJ *)P->data; 3117 3118 PetscFunctionBegin; 3119 PetscCall(MatPtAPSymbolic_MPIAIJ_MPIXAIJ_allatonce(A, maij->A, maij->dof, fill, C)); 3120 C->ops->ptapnumeric = MatPtAPNumeric_MPIAIJ_MPIMAIJ_allatonce; 3121 PetscFunctionReturn(0); 3122 } 3123 3124 PetscErrorCode MatPtAPNumeric_MPIAIJ_MPIXAIJ_allatonce_merged(Mat, Mat, PetscInt, Mat); 3125 3126 PETSC_INTERN PetscErrorCode MatPtAPNumeric_MPIAIJ_MPIMAIJ_allatonce_merged(Mat A, Mat P, Mat C) 3127 { 3128 Mat_MPIMAIJ *maij = (Mat_MPIMAIJ *)P->data; 3129 3130 PetscFunctionBegin; 3131 3132 PetscCall(MatPtAPNumeric_MPIAIJ_MPIXAIJ_allatonce_merged(A, maij->A, maij->dof, C)); 3133 PetscFunctionReturn(0); 3134 } 3135 3136 PetscErrorCode MatPtAPSymbolic_MPIAIJ_MPIXAIJ_allatonce_merged(Mat, Mat, PetscInt, PetscReal, Mat); 3137 3138 PETSC_INTERN PetscErrorCode MatPtAPSymbolic_MPIAIJ_MPIMAIJ_allatonce_merged(Mat A, Mat P, PetscReal fill, Mat C) 3139 { 3140 Mat_MPIMAIJ *maij = (Mat_MPIMAIJ *)P->data; 3141 3142 PetscFunctionBegin; 3143 3144 PetscCall(MatPtAPSymbolic_MPIAIJ_MPIXAIJ_allatonce_merged(A, maij->A, maij->dof, fill, C)); 3145 C->ops->ptapnumeric = MatPtAPNumeric_MPIAIJ_MPIMAIJ_allatonce_merged; 3146 PetscFunctionReturn(0); 3147 } 3148 3149 PETSC_INTERN PetscErrorCode MatProductSymbolic_PtAP_MPIAIJ_MPIMAIJ(Mat C) 3150 { 3151 Mat_Product *product = C->product; 3152 Mat A = product->A, P = product->B; 3153 PetscBool flg; 3154 3155 PetscFunctionBegin; 3156 PetscCall(PetscStrcmp(product->alg, "allatonce", &flg)); 3157 if (flg) { 3158 PetscCall(MatPtAPSymbolic_MPIAIJ_MPIMAIJ_allatonce(A, P, product->fill, C)); 3159 C->ops->productnumeric = MatProductNumeric_PtAP; 3160 PetscFunctionReturn(0); 3161 } 3162 3163 PetscCall(PetscStrcmp(product->alg, "allatonce_merged", &flg)); 3164 if (flg) { 3165 PetscCall(MatPtAPSymbolic_MPIAIJ_MPIMAIJ_allatonce_merged(A, P, product->fill, C)); 3166 C->ops->productnumeric = MatProductNumeric_PtAP; 3167 PetscFunctionReturn(0); 3168 } 3169 3170 SETERRQ(PetscObjectComm((PetscObject)C), PETSC_ERR_SUP, "Mat Product Algorithm is not supported"); 3171 } 3172 3173 PETSC_INTERN PetscErrorCode MatConvert_SeqMAIJ_SeqAIJ(Mat A, MatType newtype, MatReuse reuse, Mat *newmat) 3174 { 3175 Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data; 3176 Mat a = b->AIJ, B; 3177 Mat_SeqAIJ *aij = (Mat_SeqAIJ *)a->data; 3178 PetscInt m, n, i, ncols, *ilen, nmax = 0, *icols, j, k, ii, dof = b->dof; 3179 PetscInt *cols; 3180 PetscScalar *vals; 3181 3182 PetscFunctionBegin; 3183 PetscCall(MatGetSize(a, &m, &n)); 3184 PetscCall(PetscMalloc1(dof * m, &ilen)); 3185 for (i = 0; i < m; i++) { 3186 nmax = PetscMax(nmax, aij->ilen[i]); 3187 for (j = 0; j < dof; j++) ilen[dof * i + j] = aij->ilen[i]; 3188 } 3189 PetscCall(MatCreate(PETSC_COMM_SELF, &B)); 3190 PetscCall(MatSetSizes(B, dof * m, dof * n, dof * m, dof * n)); 3191 PetscCall(MatSetType(B, newtype)); 3192 PetscCall(MatSeqAIJSetPreallocation(B, 0, ilen)); 3193 PetscCall(PetscFree(ilen)); 3194 PetscCall(PetscMalloc1(nmax, &icols)); 3195 ii = 0; 3196 for (i = 0; i < m; i++) { 3197 PetscCall(MatGetRow_SeqAIJ(a, i, &ncols, &cols, &vals)); 3198 for (j = 0; j < dof; j++) { 3199 for (k = 0; k < ncols; k++) icols[k] = dof * cols[k] + j; 3200 PetscCall(MatSetValues_SeqAIJ(B, 1, &ii, ncols, icols, vals, INSERT_VALUES)); 3201 ii++; 3202 } 3203 PetscCall(MatRestoreRow_SeqAIJ(a, i, &ncols, &cols, &vals)); 3204 } 3205 PetscCall(PetscFree(icols)); 3206 PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY)); 3207 PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY)); 3208 3209 if (reuse == MAT_INPLACE_MATRIX) { 3210 PetscCall(MatHeaderReplace(A, &B)); 3211 } else { 3212 *newmat = B; 3213 } 3214 PetscFunctionReturn(0); 3215 } 3216 3217 #include <../src/mat/impls/aij/mpi/mpiaij.h> 3218 3219 PETSC_INTERN PetscErrorCode MatConvert_MPIMAIJ_MPIAIJ(Mat A, MatType newtype, MatReuse reuse, Mat *newmat) 3220 { 3221 Mat_MPIMAIJ *maij = (Mat_MPIMAIJ *)A->data; 3222 Mat MatAIJ = ((Mat_SeqMAIJ *)maij->AIJ->data)->AIJ, B; 3223 Mat MatOAIJ = ((Mat_SeqMAIJ *)maij->OAIJ->data)->AIJ; 3224 Mat_SeqAIJ *AIJ = (Mat_SeqAIJ *)MatAIJ->data; 3225 Mat_SeqAIJ *OAIJ = (Mat_SeqAIJ *)MatOAIJ->data; 3226 Mat_MPIAIJ *mpiaij = (Mat_MPIAIJ *)maij->A->data; 3227 PetscInt dof = maij->dof, i, j, *dnz = NULL, *onz = NULL, nmax = 0, onmax = 0; 3228 PetscInt *oicols = NULL, *icols = NULL, ncols, *cols = NULL, oncols, *ocols = NULL; 3229 PetscInt rstart, cstart, *garray, ii, k; 3230 PetscScalar *vals, *ovals; 3231 3232 PetscFunctionBegin; 3233 PetscCall(PetscMalloc2(A->rmap->n, &dnz, A->rmap->n, &onz)); 3234 for (i = 0; i < A->rmap->n / dof; i++) { 3235 nmax = PetscMax(nmax, AIJ->ilen[i]); 3236 onmax = PetscMax(onmax, OAIJ->ilen[i]); 3237 for (j = 0; j < dof; j++) { 3238 dnz[dof * i + j] = AIJ->ilen[i]; 3239 onz[dof * i + j] = OAIJ->ilen[i]; 3240 } 3241 } 3242 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B)); 3243 PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N)); 3244 PetscCall(MatSetType(B, newtype)); 3245 PetscCall(MatMPIAIJSetPreallocation(B, 0, dnz, 0, onz)); 3246 PetscCall(MatSetBlockSize(B, dof)); 3247 PetscCall(PetscFree2(dnz, onz)); 3248 3249 PetscCall(PetscMalloc2(nmax, &icols, onmax, &oicols)); 3250 rstart = dof * maij->A->rmap->rstart; 3251 cstart = dof * maij->A->cmap->rstart; 3252 garray = mpiaij->garray; 3253 3254 ii = rstart; 3255 for (i = 0; i < A->rmap->n / dof; i++) { 3256 PetscCall(MatGetRow_SeqAIJ(MatAIJ, i, &ncols, &cols, &vals)); 3257 PetscCall(MatGetRow_SeqAIJ(MatOAIJ, i, &oncols, &ocols, &ovals)); 3258 for (j = 0; j < dof; j++) { 3259 for (k = 0; k < ncols; k++) icols[k] = cstart + dof * cols[k] + j; 3260 for (k = 0; k < oncols; k++) oicols[k] = dof * garray[ocols[k]] + j; 3261 PetscCall(MatSetValues_MPIAIJ(B, 1, &ii, ncols, icols, vals, INSERT_VALUES)); 3262 PetscCall(MatSetValues_MPIAIJ(B, 1, &ii, oncols, oicols, ovals, INSERT_VALUES)); 3263 ii++; 3264 } 3265 PetscCall(MatRestoreRow_SeqAIJ(MatAIJ, i, &ncols, &cols, &vals)); 3266 PetscCall(MatRestoreRow_SeqAIJ(MatOAIJ, i, &oncols, &ocols, &ovals)); 3267 } 3268 PetscCall(PetscFree2(icols, oicols)); 3269 3270 PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY)); 3271 PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY)); 3272 3273 if (reuse == MAT_INPLACE_MATRIX) { 3274 PetscInt refct = ((PetscObject)A)->refct; /* save ((PetscObject)A)->refct */ 3275 ((PetscObject)A)->refct = 1; 3276 3277 PetscCall(MatHeaderReplace(A, &B)); 3278 3279 ((PetscObject)A)->refct = refct; /* restore ((PetscObject)A)->refct */ 3280 } else { 3281 *newmat = B; 3282 } 3283 PetscFunctionReturn(0); 3284 } 3285 3286 PetscErrorCode MatCreateSubMatrix_MAIJ(Mat mat, IS isrow, IS iscol, MatReuse cll, Mat *newmat) 3287 { 3288 Mat A; 3289 3290 PetscFunctionBegin; 3291 PetscCall(MatConvert(mat, MATAIJ, MAT_INITIAL_MATRIX, &A)); 3292 PetscCall(MatCreateSubMatrix(A, isrow, iscol, cll, newmat)); 3293 PetscCall(MatDestroy(&A)); 3294 PetscFunctionReturn(0); 3295 } 3296 3297 PetscErrorCode MatCreateSubMatrices_MAIJ(Mat mat, PetscInt n, const IS irow[], const IS icol[], MatReuse scall, Mat *submat[]) 3298 { 3299 Mat A; 3300 3301 PetscFunctionBegin; 3302 PetscCall(MatConvert(mat, MATAIJ, MAT_INITIAL_MATRIX, &A)); 3303 PetscCall(MatCreateSubMatrices(A, n, irow, icol, scall, submat)); 3304 PetscCall(MatDestroy(&A)); 3305 PetscFunctionReturn(0); 3306 } 3307 3308 /* ---------------------------------------------------------------------------------- */ 3309 /*@ 3310 MatCreateMAIJ - Creates a matrix type providing restriction and interpolation 3311 operations for multicomponent problems. It interpolates each component the same 3312 way independently. The matrix type is based on `MATSEQAIJ` for sequential matrices, 3313 and `MATMPIAIJ` for distributed matrices. 3314 3315 Collective 3316 3317 Input Parameters: 3318 + A - the `MATAIJ` matrix describing the action on blocks 3319 - dof - the block size (number of components per node) 3320 3321 Output Parameter: 3322 . maij - the new `MATMAIJ` matrix 3323 3324 Operations provided: 3325 .vb 3326 MatMult() 3327 MatMultTranspose() 3328 MatMultAdd() 3329 MatMultTransposeAdd() 3330 MatView() 3331 .ve 3332 3333 Level: advanced 3334 3335 .seealso: `MATAIJ`, `MATMAIJ`, `MatMAIJGetAIJ()`, `MatMAIJRedimension()`, `MATMAIJ` 3336 @*/ 3337 PetscErrorCode MatCreateMAIJ(Mat A, PetscInt dof, Mat *maij) 3338 { 3339 PetscInt n; 3340 Mat B; 3341 PetscBool flg; 3342 #if defined(PETSC_HAVE_CUDA) 3343 /* hack to prevent conversion to AIJ format for CUDA when used inside a parallel MAIJ */ 3344 PetscBool convert = dof < 0 ? PETSC_FALSE : PETSC_TRUE; 3345 #endif 3346 3347 PetscFunctionBegin; 3348 dof = PetscAbs(dof); 3349 PetscCall(PetscObjectReference((PetscObject)A)); 3350 3351 if (dof == 1) *maij = A; 3352 else { 3353 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B)); 3354 /* propagate vec type */ 3355 PetscCall(MatSetVecType(B, A->defaultvectype)); 3356 PetscCall(MatSetSizes(B, dof * A->rmap->n, dof * A->cmap->n, dof * A->rmap->N, dof * A->cmap->N)); 3357 PetscCall(PetscLayoutSetBlockSize(B->rmap, dof)); 3358 PetscCall(PetscLayoutSetBlockSize(B->cmap, dof)); 3359 PetscCall(PetscLayoutSetUp(B->rmap)); 3360 PetscCall(PetscLayoutSetUp(B->cmap)); 3361 3362 B->assembled = PETSC_TRUE; 3363 3364 PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATSEQAIJ, &flg)); 3365 if (flg) { 3366 Mat_SeqMAIJ *b; 3367 3368 PetscCall(MatSetType(B, MATSEQMAIJ)); 3369 3370 B->ops->setup = NULL; 3371 B->ops->destroy = MatDestroy_SeqMAIJ; 3372 B->ops->view = MatView_SeqMAIJ; 3373 3374 b = (Mat_SeqMAIJ *)B->data; 3375 b->dof = dof; 3376 b->AIJ = A; 3377 3378 if (dof == 2) { 3379 B->ops->mult = MatMult_SeqMAIJ_2; 3380 B->ops->multadd = MatMultAdd_SeqMAIJ_2; 3381 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_2; 3382 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_2; 3383 } else if (dof == 3) { 3384 B->ops->mult = MatMult_SeqMAIJ_3; 3385 B->ops->multadd = MatMultAdd_SeqMAIJ_3; 3386 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_3; 3387 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_3; 3388 } else if (dof == 4) { 3389 B->ops->mult = MatMult_SeqMAIJ_4; 3390 B->ops->multadd = MatMultAdd_SeqMAIJ_4; 3391 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_4; 3392 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_4; 3393 } else if (dof == 5) { 3394 B->ops->mult = MatMult_SeqMAIJ_5; 3395 B->ops->multadd = MatMultAdd_SeqMAIJ_5; 3396 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_5; 3397 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_5; 3398 } else if (dof == 6) { 3399 B->ops->mult = MatMult_SeqMAIJ_6; 3400 B->ops->multadd = MatMultAdd_SeqMAIJ_6; 3401 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_6; 3402 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_6; 3403 } else if (dof == 7) { 3404 B->ops->mult = MatMult_SeqMAIJ_7; 3405 B->ops->multadd = MatMultAdd_SeqMAIJ_7; 3406 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_7; 3407 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_7; 3408 } else if (dof == 8) { 3409 B->ops->mult = MatMult_SeqMAIJ_8; 3410 B->ops->multadd = MatMultAdd_SeqMAIJ_8; 3411 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_8; 3412 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_8; 3413 } else if (dof == 9) { 3414 B->ops->mult = MatMult_SeqMAIJ_9; 3415 B->ops->multadd = MatMultAdd_SeqMAIJ_9; 3416 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_9; 3417 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_9; 3418 } else if (dof == 10) { 3419 B->ops->mult = MatMult_SeqMAIJ_10; 3420 B->ops->multadd = MatMultAdd_SeqMAIJ_10; 3421 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_10; 3422 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_10; 3423 } else if (dof == 11) { 3424 B->ops->mult = MatMult_SeqMAIJ_11; 3425 B->ops->multadd = MatMultAdd_SeqMAIJ_11; 3426 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_11; 3427 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_11; 3428 } else if (dof == 16) { 3429 B->ops->mult = MatMult_SeqMAIJ_16; 3430 B->ops->multadd = MatMultAdd_SeqMAIJ_16; 3431 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_16; 3432 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_16; 3433 } else if (dof == 18) { 3434 B->ops->mult = MatMult_SeqMAIJ_18; 3435 B->ops->multadd = MatMultAdd_SeqMAIJ_18; 3436 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_18; 3437 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_18; 3438 } else { 3439 B->ops->mult = MatMult_SeqMAIJ_N; 3440 B->ops->multadd = MatMultAdd_SeqMAIJ_N; 3441 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_N; 3442 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_N; 3443 } 3444 #if defined(PETSC_HAVE_CUDA) 3445 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_seqmaij_seqaijcusparse_C", MatConvert_SeqMAIJ_SeqAIJ)); 3446 #endif 3447 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_seqmaij_seqaij_C", MatConvert_SeqMAIJ_SeqAIJ)); 3448 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_seqaij_seqmaij_C", MatProductSetFromOptions_SeqAIJ_SeqMAIJ)); 3449 } else { 3450 Mat_MPIAIJ *mpiaij = (Mat_MPIAIJ *)A->data; 3451 Mat_MPIMAIJ *b; 3452 IS from, to; 3453 Vec gvec; 3454 3455 PetscCall(MatSetType(B, MATMPIMAIJ)); 3456 3457 B->ops->setup = NULL; 3458 B->ops->destroy = MatDestroy_MPIMAIJ; 3459 B->ops->view = MatView_MPIMAIJ; 3460 3461 b = (Mat_MPIMAIJ *)B->data; 3462 b->dof = dof; 3463 b->A = A; 3464 3465 PetscCall(MatCreateMAIJ(mpiaij->A, -dof, &b->AIJ)); 3466 PetscCall(MatCreateMAIJ(mpiaij->B, -dof, &b->OAIJ)); 3467 3468 PetscCall(VecGetSize(mpiaij->lvec, &n)); 3469 PetscCall(VecCreate(PETSC_COMM_SELF, &b->w)); 3470 PetscCall(VecSetSizes(b->w, n * dof, n * dof)); 3471 PetscCall(VecSetBlockSize(b->w, dof)); 3472 PetscCall(VecSetType(b->w, VECSEQ)); 3473 3474 /* create two temporary Index sets for build scatter gather */ 3475 PetscCall(ISCreateBlock(PetscObjectComm((PetscObject)A), dof, n, mpiaij->garray, PETSC_COPY_VALUES, &from)); 3476 PetscCall(ISCreateStride(PETSC_COMM_SELF, n * dof, 0, 1, &to)); 3477 3478 /* create temporary global vector to generate scatter context */ 3479 PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)A), dof, dof * A->cmap->n, dof * A->cmap->N, NULL, &gvec)); 3480 3481 /* generate the scatter context */ 3482 PetscCall(VecScatterCreate(gvec, from, b->w, to, &b->ctx)); 3483 3484 PetscCall(ISDestroy(&from)); 3485 PetscCall(ISDestroy(&to)); 3486 PetscCall(VecDestroy(&gvec)); 3487 3488 B->ops->mult = MatMult_MPIMAIJ_dof; 3489 B->ops->multtranspose = MatMultTranspose_MPIMAIJ_dof; 3490 B->ops->multadd = MatMultAdd_MPIMAIJ_dof; 3491 B->ops->multtransposeadd = MatMultTransposeAdd_MPIMAIJ_dof; 3492 3493 #if defined(PETSC_HAVE_CUDA) 3494 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpimaij_mpiaijcusparse_C", MatConvert_MPIMAIJ_MPIAIJ)); 3495 #endif 3496 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpimaij_mpiaij_C", MatConvert_MPIMAIJ_MPIAIJ)); 3497 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_mpiaij_mpimaij_C", MatProductSetFromOptions_MPIAIJ_MPIMAIJ)); 3498 } 3499 B->ops->createsubmatrix = MatCreateSubMatrix_MAIJ; 3500 B->ops->createsubmatrices = MatCreateSubMatrices_MAIJ; 3501 PetscCall(MatSetUp(B)); 3502 #if defined(PETSC_HAVE_CUDA) 3503 /* temporary until we have CUDA implementation of MAIJ */ 3504 { 3505 PetscBool flg; 3506 if (convert) { 3507 PetscCall(PetscObjectTypeCompareAny((PetscObject)A, &flg, MATSEQAIJCUSPARSE, MATMPIAIJCUSPARSE, MATAIJCUSPARSE, "")); 3508 if (flg) PetscCall(MatConvert(B, ((PetscObject)A)->type_name, MAT_INPLACE_MATRIX, &B)); 3509 } 3510 } 3511 #endif 3512 *maij = B; 3513 } 3514 PetscFunctionReturn(0); 3515 } 3516