1 2 /* 3 Defines the basic matrix operations for the MAIJ matrix storage format. 4 This format is used for restriction and interpolation operations for 5 multicomponent problems. It interpolates each component the same way 6 independently. 7 8 We provide: 9 MatMult() 10 MatMultTranspose() 11 MatMultTransposeAdd() 12 MatMultAdd() 13 and 14 MatCreateMAIJ(Mat,dof,Mat*) 15 16 This single directory handles both the sequential and parallel codes 17 */ 18 19 #include <../src/mat/impls/maij/maij.h> /*I "petscmat.h" I*/ 20 #include <../src/mat/utils/freespace.h> 21 22 /*@ 23 MatMAIJGetAIJ - Get the AIJ matrix describing the blockwise action of the MAIJ matrix 24 25 Not Collective, but if the MAIJ matrix is parallel, the AIJ matrix is also parallel 26 27 Input Parameter: 28 . A - the MAIJ matrix 29 30 Output Parameter: 31 . B - the AIJ matrix 32 33 Level: advanced 34 35 Notes: 36 The reference count on the AIJ matrix is not increased so you should not destroy it. 37 38 .seealso: `MatCreateMAIJ()` 39 @*/ 40 PetscErrorCode MatMAIJGetAIJ(Mat A,Mat *B) 41 { 42 PetscBool ismpimaij,isseqmaij; 43 44 PetscFunctionBegin; 45 PetscCall(PetscObjectTypeCompare((PetscObject)A,MATMPIMAIJ,&ismpimaij)); 46 PetscCall(PetscObjectTypeCompare((PetscObject)A,MATSEQMAIJ,&isseqmaij)); 47 if (ismpimaij) { 48 Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data; 49 50 *B = b->A; 51 } else if (isseqmaij) { 52 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 53 54 *B = b->AIJ; 55 } else { 56 *B = A; 57 } 58 PetscFunctionReturn(0); 59 } 60 61 /*@ 62 MatMAIJRedimension - Get an MAIJ matrix with the same action, but for a different block size 63 64 Logically Collective 65 66 Input Parameters: 67 + A - the MAIJ matrix 68 - dof - the block size for the new matrix 69 70 Output Parameter: 71 . B - the new MAIJ matrix 72 73 Level: advanced 74 75 .seealso: `MatCreateMAIJ()` 76 @*/ 77 PetscErrorCode MatMAIJRedimension(Mat A,PetscInt dof,Mat *B) 78 { 79 Mat Aij = NULL; 80 81 PetscFunctionBegin; 82 PetscValidLogicalCollectiveInt(A,dof,2); 83 PetscCall(MatMAIJGetAIJ(A,&Aij)); 84 PetscCall(MatCreateMAIJ(Aij,dof,B)); 85 PetscFunctionReturn(0); 86 } 87 88 PetscErrorCode MatDestroy_SeqMAIJ(Mat A) 89 { 90 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 91 92 PetscFunctionBegin; 93 PetscCall(MatDestroy(&b->AIJ)); 94 PetscCall(PetscFree(A->data)); 95 PetscCall(PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqmaij_seqaijcusparse_C",NULL)); 96 PetscCall(PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqmaij_seqaij_C",NULL)); 97 PetscCall(PetscObjectComposeFunction((PetscObject)A,"MatProductSetFromOptions_seqaij_seqmaij_C",NULL)); 98 PetscFunctionReturn(0); 99 } 100 101 PetscErrorCode MatSetUp_MAIJ(Mat A) 102 { 103 PetscFunctionBegin; 104 SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Must use MatCreateMAIJ() to create MAIJ matrices"); 105 } 106 107 PetscErrorCode MatView_SeqMAIJ(Mat A,PetscViewer viewer) 108 { 109 Mat B; 110 111 PetscFunctionBegin; 112 PetscCall(MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B)); 113 PetscCall(MatView(B,viewer)); 114 PetscCall(MatDestroy(&B)); 115 PetscFunctionReturn(0); 116 } 117 118 PetscErrorCode MatView_MPIMAIJ(Mat A,PetscViewer viewer) 119 { 120 Mat B; 121 122 PetscFunctionBegin; 123 PetscCall(MatConvert(A,MATMPIAIJ,MAT_INITIAL_MATRIX,&B)); 124 PetscCall(MatView(B,viewer)); 125 PetscCall(MatDestroy(&B)); 126 PetscFunctionReturn(0); 127 } 128 129 PetscErrorCode MatDestroy_MPIMAIJ(Mat A) 130 { 131 Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data; 132 133 PetscFunctionBegin; 134 PetscCall(MatDestroy(&b->AIJ)); 135 PetscCall(MatDestroy(&b->OAIJ)); 136 PetscCall(MatDestroy(&b->A)); 137 PetscCall(VecScatterDestroy(&b->ctx)); 138 PetscCall(VecDestroy(&b->w)); 139 PetscCall(PetscFree(A->data)); 140 PetscCall(PetscObjectComposeFunction((PetscObject)A,"MatConvert_mpimaij_mpiaijcusparse_C",NULL)); 141 PetscCall(PetscObjectComposeFunction((PetscObject)A,"MatConvert_mpimaij_mpiaij_C",NULL)); 142 PetscCall(PetscObjectComposeFunction((PetscObject)A,"MatProductSetFromOptions_mpiaij_mpimaij_C",NULL)); 143 PetscCall(PetscObjectChangeTypeName((PetscObject)A,NULL)); 144 PetscFunctionReturn(0); 145 } 146 147 /*MC 148 MATMAIJ - MATMAIJ = "maij" - A matrix type to be used for restriction and interpolation operations for 149 multicomponent problems, interpolating or restricting each component the same way independently. 150 The matrix type is based on MATSEQAIJ for sequential matrices, and MATMPIAIJ for distributed matrices. 151 152 Operations provided: 153 .vb 154 MatMult 155 MatMultTranspose 156 MatMultAdd 157 MatMultTransposeAdd 158 .ve 159 160 Level: advanced 161 162 .seealso: `MatMAIJGetAIJ()`, `MatMAIJRedimension()`, `MatCreateMAIJ()` 163 M*/ 164 165 PETSC_EXTERN PetscErrorCode MatCreate_MAIJ(Mat A) 166 { 167 Mat_MPIMAIJ *b; 168 PetscMPIInt size; 169 170 PetscFunctionBegin; 171 PetscCall(PetscNewLog(A,&b)); 172 A->data = (void*)b; 173 174 PetscCall(PetscMemzero(A->ops,sizeof(struct _MatOps))); 175 176 A->ops->setup = MatSetUp_MAIJ; 177 178 b->AIJ = NULL; 179 b->dof = 0; 180 b->OAIJ = NULL; 181 b->ctx = NULL; 182 b->w = NULL; 183 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A),&size)); 184 if (size == 1) { 185 PetscCall(PetscObjectChangeTypeName((PetscObject)A,MATSEQMAIJ)); 186 } else { 187 PetscCall(PetscObjectChangeTypeName((PetscObject)A,MATMPIMAIJ)); 188 } 189 A->preallocated = PETSC_TRUE; 190 A->assembled = PETSC_TRUE; 191 PetscFunctionReturn(0); 192 } 193 194 /* --------------------------------------------------------------------------------------*/ 195 PetscErrorCode MatMult_SeqMAIJ_2(Mat A,Vec xx,Vec yy) 196 { 197 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 198 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 199 const PetscScalar *x,*v; 200 PetscScalar *y, sum1, sum2; 201 PetscInt nonzerorow=0,n,i,jrow,j; 202 const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 203 204 PetscFunctionBegin; 205 PetscCall(VecGetArrayRead(xx,&x)); 206 PetscCall(VecGetArray(yy,&y)); 207 idx = a->j; 208 v = a->a; 209 ii = a->i; 210 211 for (i=0; i<m; i++) { 212 jrow = ii[i]; 213 n = ii[i+1] - jrow; 214 sum1 = 0.0; 215 sum2 = 0.0; 216 217 nonzerorow += (n>0); 218 for (j=0; j<n; j++) { 219 sum1 += v[jrow]*x[2*idx[jrow]]; 220 sum2 += v[jrow]*x[2*idx[jrow]+1]; 221 jrow++; 222 } 223 y[2*i] = sum1; 224 y[2*i+1] = sum2; 225 } 226 227 PetscCall(PetscLogFlops(4.0*a->nz - 2.0*nonzerorow)); 228 PetscCall(VecRestoreArrayRead(xx,&x)); 229 PetscCall(VecRestoreArray(yy,&y)); 230 PetscFunctionReturn(0); 231 } 232 233 PetscErrorCode MatMultTranspose_SeqMAIJ_2(Mat A,Vec xx,Vec yy) 234 { 235 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 236 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 237 const PetscScalar *x,*v; 238 PetscScalar *y,alpha1,alpha2; 239 PetscInt n,i; 240 const PetscInt m = b->AIJ->rmap->n,*idx; 241 242 PetscFunctionBegin; 243 PetscCall(VecSet(yy,0.0)); 244 PetscCall(VecGetArrayRead(xx,&x)); 245 PetscCall(VecGetArray(yy,&y)); 246 247 for (i=0; i<m; i++) { 248 idx = a->j + a->i[i]; 249 v = a->a + a->i[i]; 250 n = a->i[i+1] - a->i[i]; 251 alpha1 = x[2*i]; 252 alpha2 = x[2*i+1]; 253 while (n-->0) { 254 y[2*(*idx)] += alpha1*(*v); 255 y[2*(*idx)+1] += alpha2*(*v); 256 idx++; v++; 257 } 258 } 259 PetscCall(PetscLogFlops(4.0*a->nz)); 260 PetscCall(VecRestoreArrayRead(xx,&x)); 261 PetscCall(VecRestoreArray(yy,&y)); 262 PetscFunctionReturn(0); 263 } 264 265 PetscErrorCode MatMultAdd_SeqMAIJ_2(Mat A,Vec xx,Vec yy,Vec zz) 266 { 267 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 268 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 269 const PetscScalar *x,*v; 270 PetscScalar *y,sum1, sum2; 271 PetscInt n,i,jrow,j; 272 const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 273 274 PetscFunctionBegin; 275 if (yy != zz) PetscCall(VecCopy(yy,zz)); 276 PetscCall(VecGetArrayRead(xx,&x)); 277 PetscCall(VecGetArray(zz,&y)); 278 idx = a->j; 279 v = a->a; 280 ii = a->i; 281 282 for (i=0; i<m; i++) { 283 jrow = ii[i]; 284 n = ii[i+1] - jrow; 285 sum1 = 0.0; 286 sum2 = 0.0; 287 for (j=0; j<n; j++) { 288 sum1 += v[jrow]*x[2*idx[jrow]]; 289 sum2 += v[jrow]*x[2*idx[jrow]+1]; 290 jrow++; 291 } 292 y[2*i] += sum1; 293 y[2*i+1] += sum2; 294 } 295 296 PetscCall(PetscLogFlops(4.0*a->nz)); 297 PetscCall(VecRestoreArrayRead(xx,&x)); 298 PetscCall(VecRestoreArray(zz,&y)); 299 PetscFunctionReturn(0); 300 } 301 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_2(Mat A,Vec xx,Vec yy,Vec zz) 302 { 303 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 304 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 305 const PetscScalar *x,*v; 306 PetscScalar *y,alpha1,alpha2; 307 PetscInt n,i; 308 const PetscInt m = b->AIJ->rmap->n,*idx; 309 310 PetscFunctionBegin; 311 if (yy != zz) PetscCall(VecCopy(yy,zz)); 312 PetscCall(VecGetArrayRead(xx,&x)); 313 PetscCall(VecGetArray(zz,&y)); 314 315 for (i=0; i<m; i++) { 316 idx = a->j + a->i[i]; 317 v = a->a + a->i[i]; 318 n = a->i[i+1] - a->i[i]; 319 alpha1 = x[2*i]; 320 alpha2 = x[2*i+1]; 321 while (n-->0) { 322 y[2*(*idx)] += alpha1*(*v); 323 y[2*(*idx)+1] += alpha2*(*v); 324 idx++; v++; 325 } 326 } 327 PetscCall(PetscLogFlops(4.0*a->nz)); 328 PetscCall(VecRestoreArrayRead(xx,&x)); 329 PetscCall(VecRestoreArray(zz,&y)); 330 PetscFunctionReturn(0); 331 } 332 /* --------------------------------------------------------------------------------------*/ 333 PetscErrorCode MatMult_SeqMAIJ_3(Mat A,Vec xx,Vec yy) 334 { 335 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 336 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 337 const PetscScalar *x,*v; 338 PetscScalar *y,sum1, sum2, sum3; 339 PetscInt nonzerorow=0,n,i,jrow,j; 340 const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 341 342 PetscFunctionBegin; 343 PetscCall(VecGetArrayRead(xx,&x)); 344 PetscCall(VecGetArray(yy,&y)); 345 idx = a->j; 346 v = a->a; 347 ii = a->i; 348 349 for (i=0; i<m; i++) { 350 jrow = ii[i]; 351 n = ii[i+1] - jrow; 352 sum1 = 0.0; 353 sum2 = 0.0; 354 sum3 = 0.0; 355 356 nonzerorow += (n>0); 357 for (j=0; j<n; j++) { 358 sum1 += v[jrow]*x[3*idx[jrow]]; 359 sum2 += v[jrow]*x[3*idx[jrow]+1]; 360 sum3 += v[jrow]*x[3*idx[jrow]+2]; 361 jrow++; 362 } 363 y[3*i] = sum1; 364 y[3*i+1] = sum2; 365 y[3*i+2] = sum3; 366 } 367 368 PetscCall(PetscLogFlops(6.0*a->nz - 3.0*nonzerorow)); 369 PetscCall(VecRestoreArrayRead(xx,&x)); 370 PetscCall(VecRestoreArray(yy,&y)); 371 PetscFunctionReturn(0); 372 } 373 374 PetscErrorCode MatMultTranspose_SeqMAIJ_3(Mat A,Vec xx,Vec yy) 375 { 376 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 377 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 378 const PetscScalar *x,*v; 379 PetscScalar *y,alpha1,alpha2,alpha3; 380 PetscInt n,i; 381 const PetscInt m = b->AIJ->rmap->n,*idx; 382 383 PetscFunctionBegin; 384 PetscCall(VecSet(yy,0.0)); 385 PetscCall(VecGetArrayRead(xx,&x)); 386 PetscCall(VecGetArray(yy,&y)); 387 388 for (i=0; i<m; i++) { 389 idx = a->j + a->i[i]; 390 v = a->a + a->i[i]; 391 n = a->i[i+1] - a->i[i]; 392 alpha1 = x[3*i]; 393 alpha2 = x[3*i+1]; 394 alpha3 = x[3*i+2]; 395 while (n-->0) { 396 y[3*(*idx)] += alpha1*(*v); 397 y[3*(*idx)+1] += alpha2*(*v); 398 y[3*(*idx)+2] += alpha3*(*v); 399 idx++; v++; 400 } 401 } 402 PetscCall(PetscLogFlops(6.0*a->nz)); 403 PetscCall(VecRestoreArrayRead(xx,&x)); 404 PetscCall(VecRestoreArray(yy,&y)); 405 PetscFunctionReturn(0); 406 } 407 408 PetscErrorCode MatMultAdd_SeqMAIJ_3(Mat A,Vec xx,Vec yy,Vec zz) 409 { 410 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 411 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 412 const PetscScalar *x,*v; 413 PetscScalar *y,sum1, sum2, sum3; 414 PetscInt n,i,jrow,j; 415 const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 416 417 PetscFunctionBegin; 418 if (yy != zz) PetscCall(VecCopy(yy,zz)); 419 PetscCall(VecGetArrayRead(xx,&x)); 420 PetscCall(VecGetArray(zz,&y)); 421 idx = a->j; 422 v = a->a; 423 ii = a->i; 424 425 for (i=0; i<m; i++) { 426 jrow = ii[i]; 427 n = ii[i+1] - jrow; 428 sum1 = 0.0; 429 sum2 = 0.0; 430 sum3 = 0.0; 431 for (j=0; j<n; j++) { 432 sum1 += v[jrow]*x[3*idx[jrow]]; 433 sum2 += v[jrow]*x[3*idx[jrow]+1]; 434 sum3 += v[jrow]*x[3*idx[jrow]+2]; 435 jrow++; 436 } 437 y[3*i] += sum1; 438 y[3*i+1] += sum2; 439 y[3*i+2] += sum3; 440 } 441 442 PetscCall(PetscLogFlops(6.0*a->nz)); 443 PetscCall(VecRestoreArrayRead(xx,&x)); 444 PetscCall(VecRestoreArray(zz,&y)); 445 PetscFunctionReturn(0); 446 } 447 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_3(Mat A,Vec xx,Vec yy,Vec zz) 448 { 449 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 450 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 451 const PetscScalar *x,*v; 452 PetscScalar *y,alpha1,alpha2,alpha3; 453 PetscInt n,i; 454 const PetscInt m = b->AIJ->rmap->n,*idx; 455 456 PetscFunctionBegin; 457 if (yy != zz) PetscCall(VecCopy(yy,zz)); 458 PetscCall(VecGetArrayRead(xx,&x)); 459 PetscCall(VecGetArray(zz,&y)); 460 for (i=0; i<m; i++) { 461 idx = a->j + a->i[i]; 462 v = a->a + a->i[i]; 463 n = a->i[i+1] - a->i[i]; 464 alpha1 = x[3*i]; 465 alpha2 = x[3*i+1]; 466 alpha3 = x[3*i+2]; 467 while (n-->0) { 468 y[3*(*idx)] += alpha1*(*v); 469 y[3*(*idx)+1] += alpha2*(*v); 470 y[3*(*idx)+2] += alpha3*(*v); 471 idx++; v++; 472 } 473 } 474 PetscCall(PetscLogFlops(6.0*a->nz)); 475 PetscCall(VecRestoreArrayRead(xx,&x)); 476 PetscCall(VecRestoreArray(zz,&y)); 477 PetscFunctionReturn(0); 478 } 479 480 /* ------------------------------------------------------------------------------*/ 481 PetscErrorCode MatMult_SeqMAIJ_4(Mat A,Vec xx,Vec yy) 482 { 483 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 484 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 485 const PetscScalar *x,*v; 486 PetscScalar *y,sum1, sum2, sum3, sum4; 487 PetscInt nonzerorow=0,n,i,jrow,j; 488 const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 489 490 PetscFunctionBegin; 491 PetscCall(VecGetArrayRead(xx,&x)); 492 PetscCall(VecGetArray(yy,&y)); 493 idx = a->j; 494 v = a->a; 495 ii = a->i; 496 497 for (i=0; i<m; i++) { 498 jrow = ii[i]; 499 n = ii[i+1] - jrow; 500 sum1 = 0.0; 501 sum2 = 0.0; 502 sum3 = 0.0; 503 sum4 = 0.0; 504 nonzerorow += (n>0); 505 for (j=0; j<n; j++) { 506 sum1 += v[jrow]*x[4*idx[jrow]]; 507 sum2 += v[jrow]*x[4*idx[jrow]+1]; 508 sum3 += v[jrow]*x[4*idx[jrow]+2]; 509 sum4 += v[jrow]*x[4*idx[jrow]+3]; 510 jrow++; 511 } 512 y[4*i] = sum1; 513 y[4*i+1] = sum2; 514 y[4*i+2] = sum3; 515 y[4*i+3] = sum4; 516 } 517 518 PetscCall(PetscLogFlops(8.0*a->nz - 4.0*nonzerorow)); 519 PetscCall(VecRestoreArrayRead(xx,&x)); 520 PetscCall(VecRestoreArray(yy,&y)); 521 PetscFunctionReturn(0); 522 } 523 524 PetscErrorCode MatMultTranspose_SeqMAIJ_4(Mat A,Vec xx,Vec yy) 525 { 526 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 527 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 528 const PetscScalar *x,*v; 529 PetscScalar *y,alpha1,alpha2,alpha3,alpha4; 530 PetscInt n,i; 531 const PetscInt m = b->AIJ->rmap->n,*idx; 532 533 PetscFunctionBegin; 534 PetscCall(VecSet(yy,0.0)); 535 PetscCall(VecGetArrayRead(xx,&x)); 536 PetscCall(VecGetArray(yy,&y)); 537 for (i=0; i<m; i++) { 538 idx = a->j + a->i[i]; 539 v = a->a + a->i[i]; 540 n = a->i[i+1] - a->i[i]; 541 alpha1 = x[4*i]; 542 alpha2 = x[4*i+1]; 543 alpha3 = x[4*i+2]; 544 alpha4 = x[4*i+3]; 545 while (n-->0) { 546 y[4*(*idx)] += alpha1*(*v); 547 y[4*(*idx)+1] += alpha2*(*v); 548 y[4*(*idx)+2] += alpha3*(*v); 549 y[4*(*idx)+3] += alpha4*(*v); 550 idx++; v++; 551 } 552 } 553 PetscCall(PetscLogFlops(8.0*a->nz)); 554 PetscCall(VecRestoreArrayRead(xx,&x)); 555 PetscCall(VecRestoreArray(yy,&y)); 556 PetscFunctionReturn(0); 557 } 558 559 PetscErrorCode MatMultAdd_SeqMAIJ_4(Mat A,Vec xx,Vec yy,Vec zz) 560 { 561 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 562 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 563 const PetscScalar *x,*v; 564 PetscScalar *y,sum1, sum2, sum3, sum4; 565 PetscInt n,i,jrow,j; 566 const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 567 568 PetscFunctionBegin; 569 if (yy != zz) PetscCall(VecCopy(yy,zz)); 570 PetscCall(VecGetArrayRead(xx,&x)); 571 PetscCall(VecGetArray(zz,&y)); 572 idx = a->j; 573 v = a->a; 574 ii = a->i; 575 576 for (i=0; i<m; i++) { 577 jrow = ii[i]; 578 n = ii[i+1] - jrow; 579 sum1 = 0.0; 580 sum2 = 0.0; 581 sum3 = 0.0; 582 sum4 = 0.0; 583 for (j=0; j<n; j++) { 584 sum1 += v[jrow]*x[4*idx[jrow]]; 585 sum2 += v[jrow]*x[4*idx[jrow]+1]; 586 sum3 += v[jrow]*x[4*idx[jrow]+2]; 587 sum4 += v[jrow]*x[4*idx[jrow]+3]; 588 jrow++; 589 } 590 y[4*i] += sum1; 591 y[4*i+1] += sum2; 592 y[4*i+2] += sum3; 593 y[4*i+3] += sum4; 594 } 595 596 PetscCall(PetscLogFlops(8.0*a->nz)); 597 PetscCall(VecRestoreArrayRead(xx,&x)); 598 PetscCall(VecRestoreArray(zz,&y)); 599 PetscFunctionReturn(0); 600 } 601 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_4(Mat A,Vec xx,Vec yy,Vec zz) 602 { 603 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 604 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 605 const PetscScalar *x,*v; 606 PetscScalar *y,alpha1,alpha2,alpha3,alpha4; 607 PetscInt n,i; 608 const PetscInt m = b->AIJ->rmap->n,*idx; 609 610 PetscFunctionBegin; 611 if (yy != zz) PetscCall(VecCopy(yy,zz)); 612 PetscCall(VecGetArrayRead(xx,&x)); 613 PetscCall(VecGetArray(zz,&y)); 614 615 for (i=0; i<m; i++) { 616 idx = a->j + a->i[i]; 617 v = a->a + a->i[i]; 618 n = a->i[i+1] - a->i[i]; 619 alpha1 = x[4*i]; 620 alpha2 = x[4*i+1]; 621 alpha3 = x[4*i+2]; 622 alpha4 = x[4*i+3]; 623 while (n-->0) { 624 y[4*(*idx)] += alpha1*(*v); 625 y[4*(*idx)+1] += alpha2*(*v); 626 y[4*(*idx)+2] += alpha3*(*v); 627 y[4*(*idx)+3] += alpha4*(*v); 628 idx++; v++; 629 } 630 } 631 PetscCall(PetscLogFlops(8.0*a->nz)); 632 PetscCall(VecRestoreArrayRead(xx,&x)); 633 PetscCall(VecRestoreArray(zz,&y)); 634 PetscFunctionReturn(0); 635 } 636 /* ------------------------------------------------------------------------------*/ 637 638 PetscErrorCode MatMult_SeqMAIJ_5(Mat A,Vec xx,Vec yy) 639 { 640 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 641 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 642 const PetscScalar *x,*v; 643 PetscScalar *y,sum1, sum2, sum3, sum4, sum5; 644 PetscInt nonzerorow=0,n,i,jrow,j; 645 const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 646 647 PetscFunctionBegin; 648 PetscCall(VecGetArrayRead(xx,&x)); 649 PetscCall(VecGetArray(yy,&y)); 650 idx = a->j; 651 v = a->a; 652 ii = a->i; 653 654 for (i=0; i<m; i++) { 655 jrow = ii[i]; 656 n = ii[i+1] - jrow; 657 sum1 = 0.0; 658 sum2 = 0.0; 659 sum3 = 0.0; 660 sum4 = 0.0; 661 sum5 = 0.0; 662 663 nonzerorow += (n>0); 664 for (j=0; j<n; j++) { 665 sum1 += v[jrow]*x[5*idx[jrow]]; 666 sum2 += v[jrow]*x[5*idx[jrow]+1]; 667 sum3 += v[jrow]*x[5*idx[jrow]+2]; 668 sum4 += v[jrow]*x[5*idx[jrow]+3]; 669 sum5 += v[jrow]*x[5*idx[jrow]+4]; 670 jrow++; 671 } 672 y[5*i] = sum1; 673 y[5*i+1] = sum2; 674 y[5*i+2] = sum3; 675 y[5*i+3] = sum4; 676 y[5*i+4] = sum5; 677 } 678 679 PetscCall(PetscLogFlops(10.0*a->nz - 5.0*nonzerorow)); 680 PetscCall(VecRestoreArrayRead(xx,&x)); 681 PetscCall(VecRestoreArray(yy,&y)); 682 PetscFunctionReturn(0); 683 } 684 685 PetscErrorCode MatMultTranspose_SeqMAIJ_5(Mat A,Vec xx,Vec yy) 686 { 687 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 688 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 689 const PetscScalar *x,*v; 690 PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5; 691 PetscInt n,i; 692 const PetscInt m = b->AIJ->rmap->n,*idx; 693 694 PetscFunctionBegin; 695 PetscCall(VecSet(yy,0.0)); 696 PetscCall(VecGetArrayRead(xx,&x)); 697 PetscCall(VecGetArray(yy,&y)); 698 699 for (i=0; i<m; i++) { 700 idx = a->j + a->i[i]; 701 v = a->a + a->i[i]; 702 n = a->i[i+1] - a->i[i]; 703 alpha1 = x[5*i]; 704 alpha2 = x[5*i+1]; 705 alpha3 = x[5*i+2]; 706 alpha4 = x[5*i+3]; 707 alpha5 = x[5*i+4]; 708 while (n-->0) { 709 y[5*(*idx)] += alpha1*(*v); 710 y[5*(*idx)+1] += alpha2*(*v); 711 y[5*(*idx)+2] += alpha3*(*v); 712 y[5*(*idx)+3] += alpha4*(*v); 713 y[5*(*idx)+4] += alpha5*(*v); 714 idx++; v++; 715 } 716 } 717 PetscCall(PetscLogFlops(10.0*a->nz)); 718 PetscCall(VecRestoreArrayRead(xx,&x)); 719 PetscCall(VecRestoreArray(yy,&y)); 720 PetscFunctionReturn(0); 721 } 722 723 PetscErrorCode MatMultAdd_SeqMAIJ_5(Mat A,Vec xx,Vec yy,Vec zz) 724 { 725 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 726 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 727 const PetscScalar *x,*v; 728 PetscScalar *y,sum1, sum2, sum3, sum4, sum5; 729 PetscInt n,i,jrow,j; 730 const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 731 732 PetscFunctionBegin; 733 if (yy != zz) PetscCall(VecCopy(yy,zz)); 734 PetscCall(VecGetArrayRead(xx,&x)); 735 PetscCall(VecGetArray(zz,&y)); 736 idx = a->j; 737 v = a->a; 738 ii = a->i; 739 740 for (i=0; i<m; i++) { 741 jrow = ii[i]; 742 n = ii[i+1] - jrow; 743 sum1 = 0.0; 744 sum2 = 0.0; 745 sum3 = 0.0; 746 sum4 = 0.0; 747 sum5 = 0.0; 748 for (j=0; j<n; j++) { 749 sum1 += v[jrow]*x[5*idx[jrow]]; 750 sum2 += v[jrow]*x[5*idx[jrow]+1]; 751 sum3 += v[jrow]*x[5*idx[jrow]+2]; 752 sum4 += v[jrow]*x[5*idx[jrow]+3]; 753 sum5 += v[jrow]*x[5*idx[jrow]+4]; 754 jrow++; 755 } 756 y[5*i] += sum1; 757 y[5*i+1] += sum2; 758 y[5*i+2] += sum3; 759 y[5*i+3] += sum4; 760 y[5*i+4] += sum5; 761 } 762 763 PetscCall(PetscLogFlops(10.0*a->nz)); 764 PetscCall(VecRestoreArrayRead(xx,&x)); 765 PetscCall(VecRestoreArray(zz,&y)); 766 PetscFunctionReturn(0); 767 } 768 769 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_5(Mat A,Vec xx,Vec yy,Vec zz) 770 { 771 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 772 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 773 const PetscScalar *x,*v; 774 PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5; 775 PetscInt n,i; 776 const PetscInt m = b->AIJ->rmap->n,*idx; 777 778 PetscFunctionBegin; 779 if (yy != zz) PetscCall(VecCopy(yy,zz)); 780 PetscCall(VecGetArrayRead(xx,&x)); 781 PetscCall(VecGetArray(zz,&y)); 782 783 for (i=0; i<m; i++) { 784 idx = a->j + a->i[i]; 785 v = a->a + a->i[i]; 786 n = a->i[i+1] - a->i[i]; 787 alpha1 = x[5*i]; 788 alpha2 = x[5*i+1]; 789 alpha3 = x[5*i+2]; 790 alpha4 = x[5*i+3]; 791 alpha5 = x[5*i+4]; 792 while (n-->0) { 793 y[5*(*idx)] += alpha1*(*v); 794 y[5*(*idx)+1] += alpha2*(*v); 795 y[5*(*idx)+2] += alpha3*(*v); 796 y[5*(*idx)+3] += alpha4*(*v); 797 y[5*(*idx)+4] += alpha5*(*v); 798 idx++; v++; 799 } 800 } 801 PetscCall(PetscLogFlops(10.0*a->nz)); 802 PetscCall(VecRestoreArrayRead(xx,&x)); 803 PetscCall(VecRestoreArray(zz,&y)); 804 PetscFunctionReturn(0); 805 } 806 807 /* ------------------------------------------------------------------------------*/ 808 PetscErrorCode MatMult_SeqMAIJ_6(Mat A,Vec xx,Vec yy) 809 { 810 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 811 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 812 const PetscScalar *x,*v; 813 PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6; 814 PetscInt nonzerorow=0,n,i,jrow,j; 815 const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 816 817 PetscFunctionBegin; 818 PetscCall(VecGetArrayRead(xx,&x)); 819 PetscCall(VecGetArray(yy,&y)); 820 idx = a->j; 821 v = a->a; 822 ii = a->i; 823 824 for (i=0; i<m; i++) { 825 jrow = ii[i]; 826 n = ii[i+1] - jrow; 827 sum1 = 0.0; 828 sum2 = 0.0; 829 sum3 = 0.0; 830 sum4 = 0.0; 831 sum5 = 0.0; 832 sum6 = 0.0; 833 834 nonzerorow += (n>0); 835 for (j=0; j<n; j++) { 836 sum1 += v[jrow]*x[6*idx[jrow]]; 837 sum2 += v[jrow]*x[6*idx[jrow]+1]; 838 sum3 += v[jrow]*x[6*idx[jrow]+2]; 839 sum4 += v[jrow]*x[6*idx[jrow]+3]; 840 sum5 += v[jrow]*x[6*idx[jrow]+4]; 841 sum6 += v[jrow]*x[6*idx[jrow]+5]; 842 jrow++; 843 } 844 y[6*i] = sum1; 845 y[6*i+1] = sum2; 846 y[6*i+2] = sum3; 847 y[6*i+3] = sum4; 848 y[6*i+4] = sum5; 849 y[6*i+5] = sum6; 850 } 851 852 PetscCall(PetscLogFlops(12.0*a->nz - 6.0*nonzerorow)); 853 PetscCall(VecRestoreArrayRead(xx,&x)); 854 PetscCall(VecRestoreArray(yy,&y)); 855 PetscFunctionReturn(0); 856 } 857 858 PetscErrorCode MatMultTranspose_SeqMAIJ_6(Mat A,Vec xx,Vec yy) 859 { 860 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 861 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 862 const PetscScalar *x,*v; 863 PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6; 864 PetscInt n,i; 865 const PetscInt m = b->AIJ->rmap->n,*idx; 866 867 PetscFunctionBegin; 868 PetscCall(VecSet(yy,0.0)); 869 PetscCall(VecGetArrayRead(xx,&x)); 870 PetscCall(VecGetArray(yy,&y)); 871 872 for (i=0; i<m; i++) { 873 idx = a->j + a->i[i]; 874 v = a->a + a->i[i]; 875 n = a->i[i+1] - a->i[i]; 876 alpha1 = x[6*i]; 877 alpha2 = x[6*i+1]; 878 alpha3 = x[6*i+2]; 879 alpha4 = x[6*i+3]; 880 alpha5 = x[6*i+4]; 881 alpha6 = x[6*i+5]; 882 while (n-->0) { 883 y[6*(*idx)] += alpha1*(*v); 884 y[6*(*idx)+1] += alpha2*(*v); 885 y[6*(*idx)+2] += alpha3*(*v); 886 y[6*(*idx)+3] += alpha4*(*v); 887 y[6*(*idx)+4] += alpha5*(*v); 888 y[6*(*idx)+5] += alpha6*(*v); 889 idx++; v++; 890 } 891 } 892 PetscCall(PetscLogFlops(12.0*a->nz)); 893 PetscCall(VecRestoreArrayRead(xx,&x)); 894 PetscCall(VecRestoreArray(yy,&y)); 895 PetscFunctionReturn(0); 896 } 897 898 PetscErrorCode MatMultAdd_SeqMAIJ_6(Mat A,Vec xx,Vec yy,Vec zz) 899 { 900 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 901 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 902 const PetscScalar *x,*v; 903 PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6; 904 PetscInt n,i,jrow,j; 905 const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 906 907 PetscFunctionBegin; 908 if (yy != zz) PetscCall(VecCopy(yy,zz)); 909 PetscCall(VecGetArrayRead(xx,&x)); 910 PetscCall(VecGetArray(zz,&y)); 911 idx = a->j; 912 v = a->a; 913 ii = a->i; 914 915 for (i=0; i<m; i++) { 916 jrow = ii[i]; 917 n = ii[i+1] - jrow; 918 sum1 = 0.0; 919 sum2 = 0.0; 920 sum3 = 0.0; 921 sum4 = 0.0; 922 sum5 = 0.0; 923 sum6 = 0.0; 924 for (j=0; j<n; j++) { 925 sum1 += v[jrow]*x[6*idx[jrow]]; 926 sum2 += v[jrow]*x[6*idx[jrow]+1]; 927 sum3 += v[jrow]*x[6*idx[jrow]+2]; 928 sum4 += v[jrow]*x[6*idx[jrow]+3]; 929 sum5 += v[jrow]*x[6*idx[jrow]+4]; 930 sum6 += v[jrow]*x[6*idx[jrow]+5]; 931 jrow++; 932 } 933 y[6*i] += sum1; 934 y[6*i+1] += sum2; 935 y[6*i+2] += sum3; 936 y[6*i+3] += sum4; 937 y[6*i+4] += sum5; 938 y[6*i+5] += sum6; 939 } 940 941 PetscCall(PetscLogFlops(12.0*a->nz)); 942 PetscCall(VecRestoreArrayRead(xx,&x)); 943 PetscCall(VecRestoreArray(zz,&y)); 944 PetscFunctionReturn(0); 945 } 946 947 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_6(Mat A,Vec xx,Vec yy,Vec zz) 948 { 949 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 950 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 951 const PetscScalar *x,*v; 952 PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6; 953 PetscInt n,i; 954 const PetscInt m = b->AIJ->rmap->n,*idx; 955 956 PetscFunctionBegin; 957 if (yy != zz) PetscCall(VecCopy(yy,zz)); 958 PetscCall(VecGetArrayRead(xx,&x)); 959 PetscCall(VecGetArray(zz,&y)); 960 961 for (i=0; i<m; i++) { 962 idx = a->j + a->i[i]; 963 v = a->a + a->i[i]; 964 n = a->i[i+1] - a->i[i]; 965 alpha1 = x[6*i]; 966 alpha2 = x[6*i+1]; 967 alpha3 = x[6*i+2]; 968 alpha4 = x[6*i+3]; 969 alpha5 = x[6*i+4]; 970 alpha6 = x[6*i+5]; 971 while (n-->0) { 972 y[6*(*idx)] += alpha1*(*v); 973 y[6*(*idx)+1] += alpha2*(*v); 974 y[6*(*idx)+2] += alpha3*(*v); 975 y[6*(*idx)+3] += alpha4*(*v); 976 y[6*(*idx)+4] += alpha5*(*v); 977 y[6*(*idx)+5] += alpha6*(*v); 978 idx++; v++; 979 } 980 } 981 PetscCall(PetscLogFlops(12.0*a->nz)); 982 PetscCall(VecRestoreArrayRead(xx,&x)); 983 PetscCall(VecRestoreArray(zz,&y)); 984 PetscFunctionReturn(0); 985 } 986 987 /* ------------------------------------------------------------------------------*/ 988 PetscErrorCode MatMult_SeqMAIJ_7(Mat A,Vec xx,Vec yy) 989 { 990 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 991 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 992 const PetscScalar *x,*v; 993 PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7; 994 const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 995 PetscInt nonzerorow=0,n,i,jrow,j; 996 997 PetscFunctionBegin; 998 PetscCall(VecGetArrayRead(xx,&x)); 999 PetscCall(VecGetArray(yy,&y)); 1000 idx = a->j; 1001 v = a->a; 1002 ii = a->i; 1003 1004 for (i=0; i<m; i++) { 1005 jrow = ii[i]; 1006 n = ii[i+1] - jrow; 1007 sum1 = 0.0; 1008 sum2 = 0.0; 1009 sum3 = 0.0; 1010 sum4 = 0.0; 1011 sum5 = 0.0; 1012 sum6 = 0.0; 1013 sum7 = 0.0; 1014 1015 nonzerorow += (n>0); 1016 for (j=0; j<n; j++) { 1017 sum1 += v[jrow]*x[7*idx[jrow]]; 1018 sum2 += v[jrow]*x[7*idx[jrow]+1]; 1019 sum3 += v[jrow]*x[7*idx[jrow]+2]; 1020 sum4 += v[jrow]*x[7*idx[jrow]+3]; 1021 sum5 += v[jrow]*x[7*idx[jrow]+4]; 1022 sum6 += v[jrow]*x[7*idx[jrow]+5]; 1023 sum7 += v[jrow]*x[7*idx[jrow]+6]; 1024 jrow++; 1025 } 1026 y[7*i] = sum1; 1027 y[7*i+1] = sum2; 1028 y[7*i+2] = sum3; 1029 y[7*i+3] = sum4; 1030 y[7*i+4] = sum5; 1031 y[7*i+5] = sum6; 1032 y[7*i+6] = sum7; 1033 } 1034 1035 PetscCall(PetscLogFlops(14.0*a->nz - 7.0*nonzerorow)); 1036 PetscCall(VecRestoreArrayRead(xx,&x)); 1037 PetscCall(VecRestoreArray(yy,&y)); 1038 PetscFunctionReturn(0); 1039 } 1040 1041 PetscErrorCode MatMultTranspose_SeqMAIJ_7(Mat A,Vec xx,Vec yy) 1042 { 1043 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1044 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1045 const PetscScalar *x,*v; 1046 PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7; 1047 const PetscInt m = b->AIJ->rmap->n,*idx; 1048 PetscInt n,i; 1049 1050 PetscFunctionBegin; 1051 PetscCall(VecSet(yy,0.0)); 1052 PetscCall(VecGetArrayRead(xx,&x)); 1053 PetscCall(VecGetArray(yy,&y)); 1054 1055 for (i=0; i<m; i++) { 1056 idx = a->j + a->i[i]; 1057 v = a->a + a->i[i]; 1058 n = a->i[i+1] - a->i[i]; 1059 alpha1 = x[7*i]; 1060 alpha2 = x[7*i+1]; 1061 alpha3 = x[7*i+2]; 1062 alpha4 = x[7*i+3]; 1063 alpha5 = x[7*i+4]; 1064 alpha6 = x[7*i+5]; 1065 alpha7 = x[7*i+6]; 1066 while (n-->0) { 1067 y[7*(*idx)] += alpha1*(*v); 1068 y[7*(*idx)+1] += alpha2*(*v); 1069 y[7*(*idx)+2] += alpha3*(*v); 1070 y[7*(*idx)+3] += alpha4*(*v); 1071 y[7*(*idx)+4] += alpha5*(*v); 1072 y[7*(*idx)+5] += alpha6*(*v); 1073 y[7*(*idx)+6] += alpha7*(*v); 1074 idx++; v++; 1075 } 1076 } 1077 PetscCall(PetscLogFlops(14.0*a->nz)); 1078 PetscCall(VecRestoreArrayRead(xx,&x)); 1079 PetscCall(VecRestoreArray(yy,&y)); 1080 PetscFunctionReturn(0); 1081 } 1082 1083 PetscErrorCode MatMultAdd_SeqMAIJ_7(Mat A,Vec xx,Vec yy,Vec zz) 1084 { 1085 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1086 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1087 const PetscScalar *x,*v; 1088 PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7; 1089 const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 1090 PetscInt n,i,jrow,j; 1091 1092 PetscFunctionBegin; 1093 if (yy != zz) PetscCall(VecCopy(yy,zz)); 1094 PetscCall(VecGetArrayRead(xx,&x)); 1095 PetscCall(VecGetArray(zz,&y)); 1096 idx = a->j; 1097 v = a->a; 1098 ii = a->i; 1099 1100 for (i=0; i<m; i++) { 1101 jrow = ii[i]; 1102 n = ii[i+1] - jrow; 1103 sum1 = 0.0; 1104 sum2 = 0.0; 1105 sum3 = 0.0; 1106 sum4 = 0.0; 1107 sum5 = 0.0; 1108 sum6 = 0.0; 1109 sum7 = 0.0; 1110 for (j=0; j<n; j++) { 1111 sum1 += v[jrow]*x[7*idx[jrow]]; 1112 sum2 += v[jrow]*x[7*idx[jrow]+1]; 1113 sum3 += v[jrow]*x[7*idx[jrow]+2]; 1114 sum4 += v[jrow]*x[7*idx[jrow]+3]; 1115 sum5 += v[jrow]*x[7*idx[jrow]+4]; 1116 sum6 += v[jrow]*x[7*idx[jrow]+5]; 1117 sum7 += v[jrow]*x[7*idx[jrow]+6]; 1118 jrow++; 1119 } 1120 y[7*i] += sum1; 1121 y[7*i+1] += sum2; 1122 y[7*i+2] += sum3; 1123 y[7*i+3] += sum4; 1124 y[7*i+4] += sum5; 1125 y[7*i+5] += sum6; 1126 y[7*i+6] += sum7; 1127 } 1128 1129 PetscCall(PetscLogFlops(14.0*a->nz)); 1130 PetscCall(VecRestoreArrayRead(xx,&x)); 1131 PetscCall(VecRestoreArray(zz,&y)); 1132 PetscFunctionReturn(0); 1133 } 1134 1135 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_7(Mat A,Vec xx,Vec yy,Vec zz) 1136 { 1137 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1138 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1139 const PetscScalar *x,*v; 1140 PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7; 1141 const PetscInt m = b->AIJ->rmap->n,*idx; 1142 PetscInt n,i; 1143 1144 PetscFunctionBegin; 1145 if (yy != zz) PetscCall(VecCopy(yy,zz)); 1146 PetscCall(VecGetArrayRead(xx,&x)); 1147 PetscCall(VecGetArray(zz,&y)); 1148 for (i=0; i<m; i++) { 1149 idx = a->j + a->i[i]; 1150 v = a->a + a->i[i]; 1151 n = a->i[i+1] - a->i[i]; 1152 alpha1 = x[7*i]; 1153 alpha2 = x[7*i+1]; 1154 alpha3 = x[7*i+2]; 1155 alpha4 = x[7*i+3]; 1156 alpha5 = x[7*i+4]; 1157 alpha6 = x[7*i+5]; 1158 alpha7 = x[7*i+6]; 1159 while (n-->0) { 1160 y[7*(*idx)] += alpha1*(*v); 1161 y[7*(*idx)+1] += alpha2*(*v); 1162 y[7*(*idx)+2] += alpha3*(*v); 1163 y[7*(*idx)+3] += alpha4*(*v); 1164 y[7*(*idx)+4] += alpha5*(*v); 1165 y[7*(*idx)+5] += alpha6*(*v); 1166 y[7*(*idx)+6] += alpha7*(*v); 1167 idx++; v++; 1168 } 1169 } 1170 PetscCall(PetscLogFlops(14.0*a->nz)); 1171 PetscCall(VecRestoreArrayRead(xx,&x)); 1172 PetscCall(VecRestoreArray(zz,&y)); 1173 PetscFunctionReturn(0); 1174 } 1175 1176 PetscErrorCode MatMult_SeqMAIJ_8(Mat A,Vec xx,Vec yy) 1177 { 1178 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1179 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1180 const PetscScalar *x,*v; 1181 PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8; 1182 const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 1183 PetscInt nonzerorow=0,n,i,jrow,j; 1184 1185 PetscFunctionBegin; 1186 PetscCall(VecGetArrayRead(xx,&x)); 1187 PetscCall(VecGetArray(yy,&y)); 1188 idx = a->j; 1189 v = a->a; 1190 ii = a->i; 1191 1192 for (i=0; i<m; i++) { 1193 jrow = ii[i]; 1194 n = ii[i+1] - jrow; 1195 sum1 = 0.0; 1196 sum2 = 0.0; 1197 sum3 = 0.0; 1198 sum4 = 0.0; 1199 sum5 = 0.0; 1200 sum6 = 0.0; 1201 sum7 = 0.0; 1202 sum8 = 0.0; 1203 1204 nonzerorow += (n>0); 1205 for (j=0; j<n; j++) { 1206 sum1 += v[jrow]*x[8*idx[jrow]]; 1207 sum2 += v[jrow]*x[8*idx[jrow]+1]; 1208 sum3 += v[jrow]*x[8*idx[jrow]+2]; 1209 sum4 += v[jrow]*x[8*idx[jrow]+3]; 1210 sum5 += v[jrow]*x[8*idx[jrow]+4]; 1211 sum6 += v[jrow]*x[8*idx[jrow]+5]; 1212 sum7 += v[jrow]*x[8*idx[jrow]+6]; 1213 sum8 += v[jrow]*x[8*idx[jrow]+7]; 1214 jrow++; 1215 } 1216 y[8*i] = sum1; 1217 y[8*i+1] = sum2; 1218 y[8*i+2] = sum3; 1219 y[8*i+3] = sum4; 1220 y[8*i+4] = sum5; 1221 y[8*i+5] = sum6; 1222 y[8*i+6] = sum7; 1223 y[8*i+7] = sum8; 1224 } 1225 1226 PetscCall(PetscLogFlops(16.0*a->nz - 8.0*nonzerorow)); 1227 PetscCall(VecRestoreArrayRead(xx,&x)); 1228 PetscCall(VecRestoreArray(yy,&y)); 1229 PetscFunctionReturn(0); 1230 } 1231 1232 PetscErrorCode MatMultTranspose_SeqMAIJ_8(Mat A,Vec xx,Vec yy) 1233 { 1234 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1235 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1236 const PetscScalar *x,*v; 1237 PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8; 1238 const PetscInt m = b->AIJ->rmap->n,*idx; 1239 PetscInt n,i; 1240 1241 PetscFunctionBegin; 1242 PetscCall(VecSet(yy,0.0)); 1243 PetscCall(VecGetArrayRead(xx,&x)); 1244 PetscCall(VecGetArray(yy,&y)); 1245 1246 for (i=0; i<m; i++) { 1247 idx = a->j + a->i[i]; 1248 v = a->a + a->i[i]; 1249 n = a->i[i+1] - a->i[i]; 1250 alpha1 = x[8*i]; 1251 alpha2 = x[8*i+1]; 1252 alpha3 = x[8*i+2]; 1253 alpha4 = x[8*i+3]; 1254 alpha5 = x[8*i+4]; 1255 alpha6 = x[8*i+5]; 1256 alpha7 = x[8*i+6]; 1257 alpha8 = x[8*i+7]; 1258 while (n-->0) { 1259 y[8*(*idx)] += alpha1*(*v); 1260 y[8*(*idx)+1] += alpha2*(*v); 1261 y[8*(*idx)+2] += alpha3*(*v); 1262 y[8*(*idx)+3] += alpha4*(*v); 1263 y[8*(*idx)+4] += alpha5*(*v); 1264 y[8*(*idx)+5] += alpha6*(*v); 1265 y[8*(*idx)+6] += alpha7*(*v); 1266 y[8*(*idx)+7] += alpha8*(*v); 1267 idx++; v++; 1268 } 1269 } 1270 PetscCall(PetscLogFlops(16.0*a->nz)); 1271 PetscCall(VecRestoreArrayRead(xx,&x)); 1272 PetscCall(VecRestoreArray(yy,&y)); 1273 PetscFunctionReturn(0); 1274 } 1275 1276 PetscErrorCode MatMultAdd_SeqMAIJ_8(Mat A,Vec xx,Vec yy,Vec zz) 1277 { 1278 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1279 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1280 const PetscScalar *x,*v; 1281 PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8; 1282 const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 1283 PetscInt n,i,jrow,j; 1284 1285 PetscFunctionBegin; 1286 if (yy != zz) PetscCall(VecCopy(yy,zz)); 1287 PetscCall(VecGetArrayRead(xx,&x)); 1288 PetscCall(VecGetArray(zz,&y)); 1289 idx = a->j; 1290 v = a->a; 1291 ii = a->i; 1292 1293 for (i=0; i<m; i++) { 1294 jrow = ii[i]; 1295 n = ii[i+1] - jrow; 1296 sum1 = 0.0; 1297 sum2 = 0.0; 1298 sum3 = 0.0; 1299 sum4 = 0.0; 1300 sum5 = 0.0; 1301 sum6 = 0.0; 1302 sum7 = 0.0; 1303 sum8 = 0.0; 1304 for (j=0; j<n; j++) { 1305 sum1 += v[jrow]*x[8*idx[jrow]]; 1306 sum2 += v[jrow]*x[8*idx[jrow]+1]; 1307 sum3 += v[jrow]*x[8*idx[jrow]+2]; 1308 sum4 += v[jrow]*x[8*idx[jrow]+3]; 1309 sum5 += v[jrow]*x[8*idx[jrow]+4]; 1310 sum6 += v[jrow]*x[8*idx[jrow]+5]; 1311 sum7 += v[jrow]*x[8*idx[jrow]+6]; 1312 sum8 += v[jrow]*x[8*idx[jrow]+7]; 1313 jrow++; 1314 } 1315 y[8*i] += sum1; 1316 y[8*i+1] += sum2; 1317 y[8*i+2] += sum3; 1318 y[8*i+3] += sum4; 1319 y[8*i+4] += sum5; 1320 y[8*i+5] += sum6; 1321 y[8*i+6] += sum7; 1322 y[8*i+7] += sum8; 1323 } 1324 1325 PetscCall(PetscLogFlops(16.0*a->nz)); 1326 PetscCall(VecRestoreArrayRead(xx,&x)); 1327 PetscCall(VecRestoreArray(zz,&y)); 1328 PetscFunctionReturn(0); 1329 } 1330 1331 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_8(Mat A,Vec xx,Vec yy,Vec zz) 1332 { 1333 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1334 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1335 const PetscScalar *x,*v; 1336 PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8; 1337 const PetscInt m = b->AIJ->rmap->n,*idx; 1338 PetscInt n,i; 1339 1340 PetscFunctionBegin; 1341 if (yy != zz) PetscCall(VecCopy(yy,zz)); 1342 PetscCall(VecGetArrayRead(xx,&x)); 1343 PetscCall(VecGetArray(zz,&y)); 1344 for (i=0; i<m; i++) { 1345 idx = a->j + a->i[i]; 1346 v = a->a + a->i[i]; 1347 n = a->i[i+1] - a->i[i]; 1348 alpha1 = x[8*i]; 1349 alpha2 = x[8*i+1]; 1350 alpha3 = x[8*i+2]; 1351 alpha4 = x[8*i+3]; 1352 alpha5 = x[8*i+4]; 1353 alpha6 = x[8*i+5]; 1354 alpha7 = x[8*i+6]; 1355 alpha8 = x[8*i+7]; 1356 while (n-->0) { 1357 y[8*(*idx)] += alpha1*(*v); 1358 y[8*(*idx)+1] += alpha2*(*v); 1359 y[8*(*idx)+2] += alpha3*(*v); 1360 y[8*(*idx)+3] += alpha4*(*v); 1361 y[8*(*idx)+4] += alpha5*(*v); 1362 y[8*(*idx)+5] += alpha6*(*v); 1363 y[8*(*idx)+6] += alpha7*(*v); 1364 y[8*(*idx)+7] += alpha8*(*v); 1365 idx++; v++; 1366 } 1367 } 1368 PetscCall(PetscLogFlops(16.0*a->nz)); 1369 PetscCall(VecRestoreArrayRead(xx,&x)); 1370 PetscCall(VecRestoreArray(zz,&y)); 1371 PetscFunctionReturn(0); 1372 } 1373 1374 /* ------------------------------------------------------------------------------*/ 1375 PetscErrorCode MatMult_SeqMAIJ_9(Mat A,Vec xx,Vec yy) 1376 { 1377 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1378 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1379 const PetscScalar *x,*v; 1380 PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9; 1381 const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 1382 PetscInt nonzerorow=0,n,i,jrow,j; 1383 1384 PetscFunctionBegin; 1385 PetscCall(VecGetArrayRead(xx,&x)); 1386 PetscCall(VecGetArray(yy,&y)); 1387 idx = a->j; 1388 v = a->a; 1389 ii = a->i; 1390 1391 for (i=0; i<m; i++) { 1392 jrow = ii[i]; 1393 n = ii[i+1] - jrow; 1394 sum1 = 0.0; 1395 sum2 = 0.0; 1396 sum3 = 0.0; 1397 sum4 = 0.0; 1398 sum5 = 0.0; 1399 sum6 = 0.0; 1400 sum7 = 0.0; 1401 sum8 = 0.0; 1402 sum9 = 0.0; 1403 1404 nonzerorow += (n>0); 1405 for (j=0; j<n; j++) { 1406 sum1 += v[jrow]*x[9*idx[jrow]]; 1407 sum2 += v[jrow]*x[9*idx[jrow]+1]; 1408 sum3 += v[jrow]*x[9*idx[jrow]+2]; 1409 sum4 += v[jrow]*x[9*idx[jrow]+3]; 1410 sum5 += v[jrow]*x[9*idx[jrow]+4]; 1411 sum6 += v[jrow]*x[9*idx[jrow]+5]; 1412 sum7 += v[jrow]*x[9*idx[jrow]+6]; 1413 sum8 += v[jrow]*x[9*idx[jrow]+7]; 1414 sum9 += v[jrow]*x[9*idx[jrow]+8]; 1415 jrow++; 1416 } 1417 y[9*i] = sum1; 1418 y[9*i+1] = sum2; 1419 y[9*i+2] = sum3; 1420 y[9*i+3] = sum4; 1421 y[9*i+4] = sum5; 1422 y[9*i+5] = sum6; 1423 y[9*i+6] = sum7; 1424 y[9*i+7] = sum8; 1425 y[9*i+8] = sum9; 1426 } 1427 1428 PetscCall(PetscLogFlops(18.0*a->nz - 9*nonzerorow)); 1429 PetscCall(VecRestoreArrayRead(xx,&x)); 1430 PetscCall(VecRestoreArray(yy,&y)); 1431 PetscFunctionReturn(0); 1432 } 1433 1434 /* ------------------------------------------------------------------------------*/ 1435 1436 PetscErrorCode MatMultTranspose_SeqMAIJ_9(Mat A,Vec xx,Vec yy) 1437 { 1438 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1439 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1440 const PetscScalar *x,*v; 1441 PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9; 1442 const PetscInt m = b->AIJ->rmap->n,*idx; 1443 PetscInt n,i; 1444 1445 PetscFunctionBegin; 1446 PetscCall(VecSet(yy,0.0)); 1447 PetscCall(VecGetArrayRead(xx,&x)); 1448 PetscCall(VecGetArray(yy,&y)); 1449 1450 for (i=0; i<m; i++) { 1451 idx = a->j + a->i[i]; 1452 v = a->a + a->i[i]; 1453 n = a->i[i+1] - a->i[i]; 1454 alpha1 = x[9*i]; 1455 alpha2 = x[9*i+1]; 1456 alpha3 = x[9*i+2]; 1457 alpha4 = x[9*i+3]; 1458 alpha5 = x[9*i+4]; 1459 alpha6 = x[9*i+5]; 1460 alpha7 = x[9*i+6]; 1461 alpha8 = x[9*i+7]; 1462 alpha9 = x[9*i+8]; 1463 while (n-->0) { 1464 y[9*(*idx)] += alpha1*(*v); 1465 y[9*(*idx)+1] += alpha2*(*v); 1466 y[9*(*idx)+2] += alpha3*(*v); 1467 y[9*(*idx)+3] += alpha4*(*v); 1468 y[9*(*idx)+4] += alpha5*(*v); 1469 y[9*(*idx)+5] += alpha6*(*v); 1470 y[9*(*idx)+6] += alpha7*(*v); 1471 y[9*(*idx)+7] += alpha8*(*v); 1472 y[9*(*idx)+8] += alpha9*(*v); 1473 idx++; v++; 1474 } 1475 } 1476 PetscCall(PetscLogFlops(18.0*a->nz)); 1477 PetscCall(VecRestoreArrayRead(xx,&x)); 1478 PetscCall(VecRestoreArray(yy,&y)); 1479 PetscFunctionReturn(0); 1480 } 1481 1482 PetscErrorCode MatMultAdd_SeqMAIJ_9(Mat A,Vec xx,Vec yy,Vec zz) 1483 { 1484 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1485 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1486 const PetscScalar *x,*v; 1487 PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9; 1488 const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 1489 PetscInt n,i,jrow,j; 1490 1491 PetscFunctionBegin; 1492 if (yy != zz) PetscCall(VecCopy(yy,zz)); 1493 PetscCall(VecGetArrayRead(xx,&x)); 1494 PetscCall(VecGetArray(zz,&y)); 1495 idx = a->j; 1496 v = a->a; 1497 ii = a->i; 1498 1499 for (i=0; i<m; i++) { 1500 jrow = ii[i]; 1501 n = ii[i+1] - jrow; 1502 sum1 = 0.0; 1503 sum2 = 0.0; 1504 sum3 = 0.0; 1505 sum4 = 0.0; 1506 sum5 = 0.0; 1507 sum6 = 0.0; 1508 sum7 = 0.0; 1509 sum8 = 0.0; 1510 sum9 = 0.0; 1511 for (j=0; j<n; j++) { 1512 sum1 += v[jrow]*x[9*idx[jrow]]; 1513 sum2 += v[jrow]*x[9*idx[jrow]+1]; 1514 sum3 += v[jrow]*x[9*idx[jrow]+2]; 1515 sum4 += v[jrow]*x[9*idx[jrow]+3]; 1516 sum5 += v[jrow]*x[9*idx[jrow]+4]; 1517 sum6 += v[jrow]*x[9*idx[jrow]+5]; 1518 sum7 += v[jrow]*x[9*idx[jrow]+6]; 1519 sum8 += v[jrow]*x[9*idx[jrow]+7]; 1520 sum9 += v[jrow]*x[9*idx[jrow]+8]; 1521 jrow++; 1522 } 1523 y[9*i] += sum1; 1524 y[9*i+1] += sum2; 1525 y[9*i+2] += sum3; 1526 y[9*i+3] += sum4; 1527 y[9*i+4] += sum5; 1528 y[9*i+5] += sum6; 1529 y[9*i+6] += sum7; 1530 y[9*i+7] += sum8; 1531 y[9*i+8] += sum9; 1532 } 1533 1534 PetscCall(PetscLogFlops(18.0*a->nz)); 1535 PetscCall(VecRestoreArrayRead(xx,&x)); 1536 PetscCall(VecRestoreArray(zz,&y)); 1537 PetscFunctionReturn(0); 1538 } 1539 1540 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_9(Mat A,Vec xx,Vec yy,Vec zz) 1541 { 1542 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1543 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1544 const PetscScalar *x,*v; 1545 PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9; 1546 const PetscInt m = b->AIJ->rmap->n,*idx; 1547 PetscInt n,i; 1548 1549 PetscFunctionBegin; 1550 if (yy != zz) PetscCall(VecCopy(yy,zz)); 1551 PetscCall(VecGetArrayRead(xx,&x)); 1552 PetscCall(VecGetArray(zz,&y)); 1553 for (i=0; i<m; i++) { 1554 idx = a->j + a->i[i]; 1555 v = a->a + a->i[i]; 1556 n = a->i[i+1] - a->i[i]; 1557 alpha1 = x[9*i]; 1558 alpha2 = x[9*i+1]; 1559 alpha3 = x[9*i+2]; 1560 alpha4 = x[9*i+3]; 1561 alpha5 = x[9*i+4]; 1562 alpha6 = x[9*i+5]; 1563 alpha7 = x[9*i+6]; 1564 alpha8 = x[9*i+7]; 1565 alpha9 = x[9*i+8]; 1566 while (n-->0) { 1567 y[9*(*idx)] += alpha1*(*v); 1568 y[9*(*idx)+1] += alpha2*(*v); 1569 y[9*(*idx)+2] += alpha3*(*v); 1570 y[9*(*idx)+3] += alpha4*(*v); 1571 y[9*(*idx)+4] += alpha5*(*v); 1572 y[9*(*idx)+5] += alpha6*(*v); 1573 y[9*(*idx)+6] += alpha7*(*v); 1574 y[9*(*idx)+7] += alpha8*(*v); 1575 y[9*(*idx)+8] += alpha9*(*v); 1576 idx++; v++; 1577 } 1578 } 1579 PetscCall(PetscLogFlops(18.0*a->nz)); 1580 PetscCall(VecRestoreArrayRead(xx,&x)); 1581 PetscCall(VecRestoreArray(zz,&y)); 1582 PetscFunctionReturn(0); 1583 } 1584 PetscErrorCode MatMult_SeqMAIJ_10(Mat A,Vec xx,Vec yy) 1585 { 1586 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1587 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1588 const PetscScalar *x,*v; 1589 PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9, sum10; 1590 const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 1591 PetscInt nonzerorow=0,n,i,jrow,j; 1592 1593 PetscFunctionBegin; 1594 PetscCall(VecGetArrayRead(xx,&x)); 1595 PetscCall(VecGetArray(yy,&y)); 1596 idx = a->j; 1597 v = a->a; 1598 ii = a->i; 1599 1600 for (i=0; i<m; i++) { 1601 jrow = ii[i]; 1602 n = ii[i+1] - jrow; 1603 sum1 = 0.0; 1604 sum2 = 0.0; 1605 sum3 = 0.0; 1606 sum4 = 0.0; 1607 sum5 = 0.0; 1608 sum6 = 0.0; 1609 sum7 = 0.0; 1610 sum8 = 0.0; 1611 sum9 = 0.0; 1612 sum10 = 0.0; 1613 1614 nonzerorow += (n>0); 1615 for (j=0; j<n; j++) { 1616 sum1 += v[jrow]*x[10*idx[jrow]]; 1617 sum2 += v[jrow]*x[10*idx[jrow]+1]; 1618 sum3 += v[jrow]*x[10*idx[jrow]+2]; 1619 sum4 += v[jrow]*x[10*idx[jrow]+3]; 1620 sum5 += v[jrow]*x[10*idx[jrow]+4]; 1621 sum6 += v[jrow]*x[10*idx[jrow]+5]; 1622 sum7 += v[jrow]*x[10*idx[jrow]+6]; 1623 sum8 += v[jrow]*x[10*idx[jrow]+7]; 1624 sum9 += v[jrow]*x[10*idx[jrow]+8]; 1625 sum10 += v[jrow]*x[10*idx[jrow]+9]; 1626 jrow++; 1627 } 1628 y[10*i] = sum1; 1629 y[10*i+1] = sum2; 1630 y[10*i+2] = sum3; 1631 y[10*i+3] = sum4; 1632 y[10*i+4] = sum5; 1633 y[10*i+5] = sum6; 1634 y[10*i+6] = sum7; 1635 y[10*i+7] = sum8; 1636 y[10*i+8] = sum9; 1637 y[10*i+9] = sum10; 1638 } 1639 1640 PetscCall(PetscLogFlops(20.0*a->nz - 10.0*nonzerorow)); 1641 PetscCall(VecRestoreArrayRead(xx,&x)); 1642 PetscCall(VecRestoreArray(yy,&y)); 1643 PetscFunctionReturn(0); 1644 } 1645 1646 PetscErrorCode MatMultAdd_SeqMAIJ_10(Mat A,Vec xx,Vec yy,Vec zz) 1647 { 1648 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1649 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1650 const PetscScalar *x,*v; 1651 PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9, sum10; 1652 const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 1653 PetscInt n,i,jrow,j; 1654 1655 PetscFunctionBegin; 1656 if (yy != zz) PetscCall(VecCopy(yy,zz)); 1657 PetscCall(VecGetArrayRead(xx,&x)); 1658 PetscCall(VecGetArray(zz,&y)); 1659 idx = a->j; 1660 v = a->a; 1661 ii = a->i; 1662 1663 for (i=0; i<m; i++) { 1664 jrow = ii[i]; 1665 n = ii[i+1] - jrow; 1666 sum1 = 0.0; 1667 sum2 = 0.0; 1668 sum3 = 0.0; 1669 sum4 = 0.0; 1670 sum5 = 0.0; 1671 sum6 = 0.0; 1672 sum7 = 0.0; 1673 sum8 = 0.0; 1674 sum9 = 0.0; 1675 sum10 = 0.0; 1676 for (j=0; j<n; j++) { 1677 sum1 += v[jrow]*x[10*idx[jrow]]; 1678 sum2 += v[jrow]*x[10*idx[jrow]+1]; 1679 sum3 += v[jrow]*x[10*idx[jrow]+2]; 1680 sum4 += v[jrow]*x[10*idx[jrow]+3]; 1681 sum5 += v[jrow]*x[10*idx[jrow]+4]; 1682 sum6 += v[jrow]*x[10*idx[jrow]+5]; 1683 sum7 += v[jrow]*x[10*idx[jrow]+6]; 1684 sum8 += v[jrow]*x[10*idx[jrow]+7]; 1685 sum9 += v[jrow]*x[10*idx[jrow]+8]; 1686 sum10 += v[jrow]*x[10*idx[jrow]+9]; 1687 jrow++; 1688 } 1689 y[10*i] += sum1; 1690 y[10*i+1] += sum2; 1691 y[10*i+2] += sum3; 1692 y[10*i+3] += sum4; 1693 y[10*i+4] += sum5; 1694 y[10*i+5] += sum6; 1695 y[10*i+6] += sum7; 1696 y[10*i+7] += sum8; 1697 y[10*i+8] += sum9; 1698 y[10*i+9] += sum10; 1699 } 1700 1701 PetscCall(PetscLogFlops(20.0*a->nz)); 1702 PetscCall(VecRestoreArrayRead(xx,&x)); 1703 PetscCall(VecRestoreArray(yy,&y)); 1704 PetscFunctionReturn(0); 1705 } 1706 1707 PetscErrorCode MatMultTranspose_SeqMAIJ_10(Mat A,Vec xx,Vec yy) 1708 { 1709 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1710 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1711 const PetscScalar *x,*v; 1712 PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9,alpha10; 1713 const PetscInt m = b->AIJ->rmap->n,*idx; 1714 PetscInt n,i; 1715 1716 PetscFunctionBegin; 1717 PetscCall(VecSet(yy,0.0)); 1718 PetscCall(VecGetArrayRead(xx,&x)); 1719 PetscCall(VecGetArray(yy,&y)); 1720 1721 for (i=0; i<m; i++) { 1722 idx = a->j + a->i[i]; 1723 v = a->a + a->i[i]; 1724 n = a->i[i+1] - a->i[i]; 1725 alpha1 = x[10*i]; 1726 alpha2 = x[10*i+1]; 1727 alpha3 = x[10*i+2]; 1728 alpha4 = x[10*i+3]; 1729 alpha5 = x[10*i+4]; 1730 alpha6 = x[10*i+5]; 1731 alpha7 = x[10*i+6]; 1732 alpha8 = x[10*i+7]; 1733 alpha9 = x[10*i+8]; 1734 alpha10 = x[10*i+9]; 1735 while (n-->0) { 1736 y[10*(*idx)] += alpha1*(*v); 1737 y[10*(*idx)+1] += alpha2*(*v); 1738 y[10*(*idx)+2] += alpha3*(*v); 1739 y[10*(*idx)+3] += alpha4*(*v); 1740 y[10*(*idx)+4] += alpha5*(*v); 1741 y[10*(*idx)+5] += alpha6*(*v); 1742 y[10*(*idx)+6] += alpha7*(*v); 1743 y[10*(*idx)+7] += alpha8*(*v); 1744 y[10*(*idx)+8] += alpha9*(*v); 1745 y[10*(*idx)+9] += alpha10*(*v); 1746 idx++; v++; 1747 } 1748 } 1749 PetscCall(PetscLogFlops(20.0*a->nz)); 1750 PetscCall(VecRestoreArrayRead(xx,&x)); 1751 PetscCall(VecRestoreArray(yy,&y)); 1752 PetscFunctionReturn(0); 1753 } 1754 1755 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_10(Mat A,Vec xx,Vec yy,Vec zz) 1756 { 1757 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1758 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1759 const PetscScalar *x,*v; 1760 PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9,alpha10; 1761 const PetscInt m = b->AIJ->rmap->n,*idx; 1762 PetscInt n,i; 1763 1764 PetscFunctionBegin; 1765 if (yy != zz) PetscCall(VecCopy(yy,zz)); 1766 PetscCall(VecGetArrayRead(xx,&x)); 1767 PetscCall(VecGetArray(zz,&y)); 1768 for (i=0; i<m; i++) { 1769 idx = a->j + a->i[i]; 1770 v = a->a + a->i[i]; 1771 n = a->i[i+1] - a->i[i]; 1772 alpha1 = x[10*i]; 1773 alpha2 = x[10*i+1]; 1774 alpha3 = x[10*i+2]; 1775 alpha4 = x[10*i+3]; 1776 alpha5 = x[10*i+4]; 1777 alpha6 = x[10*i+5]; 1778 alpha7 = x[10*i+6]; 1779 alpha8 = x[10*i+7]; 1780 alpha9 = x[10*i+8]; 1781 alpha10 = x[10*i+9]; 1782 while (n-->0) { 1783 y[10*(*idx)] += alpha1*(*v); 1784 y[10*(*idx)+1] += alpha2*(*v); 1785 y[10*(*idx)+2] += alpha3*(*v); 1786 y[10*(*idx)+3] += alpha4*(*v); 1787 y[10*(*idx)+4] += alpha5*(*v); 1788 y[10*(*idx)+5] += alpha6*(*v); 1789 y[10*(*idx)+6] += alpha7*(*v); 1790 y[10*(*idx)+7] += alpha8*(*v); 1791 y[10*(*idx)+8] += alpha9*(*v); 1792 y[10*(*idx)+9] += alpha10*(*v); 1793 idx++; v++; 1794 } 1795 } 1796 PetscCall(PetscLogFlops(20.0*a->nz)); 1797 PetscCall(VecRestoreArrayRead(xx,&x)); 1798 PetscCall(VecRestoreArray(zz,&y)); 1799 PetscFunctionReturn(0); 1800 } 1801 1802 /*--------------------------------------------------------------------------------------------*/ 1803 PetscErrorCode MatMult_SeqMAIJ_11(Mat A,Vec xx,Vec yy) 1804 { 1805 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1806 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1807 const PetscScalar *x,*v; 1808 PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9, sum10, sum11; 1809 const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 1810 PetscInt nonzerorow=0,n,i,jrow,j; 1811 1812 PetscFunctionBegin; 1813 PetscCall(VecGetArrayRead(xx,&x)); 1814 PetscCall(VecGetArray(yy,&y)); 1815 idx = a->j; 1816 v = a->a; 1817 ii = a->i; 1818 1819 for (i=0; i<m; i++) { 1820 jrow = ii[i]; 1821 n = ii[i+1] - jrow; 1822 sum1 = 0.0; 1823 sum2 = 0.0; 1824 sum3 = 0.0; 1825 sum4 = 0.0; 1826 sum5 = 0.0; 1827 sum6 = 0.0; 1828 sum7 = 0.0; 1829 sum8 = 0.0; 1830 sum9 = 0.0; 1831 sum10 = 0.0; 1832 sum11 = 0.0; 1833 1834 nonzerorow += (n>0); 1835 for (j=0; j<n; j++) { 1836 sum1 += v[jrow]*x[11*idx[jrow]]; 1837 sum2 += v[jrow]*x[11*idx[jrow]+1]; 1838 sum3 += v[jrow]*x[11*idx[jrow]+2]; 1839 sum4 += v[jrow]*x[11*idx[jrow]+3]; 1840 sum5 += v[jrow]*x[11*idx[jrow]+4]; 1841 sum6 += v[jrow]*x[11*idx[jrow]+5]; 1842 sum7 += v[jrow]*x[11*idx[jrow]+6]; 1843 sum8 += v[jrow]*x[11*idx[jrow]+7]; 1844 sum9 += v[jrow]*x[11*idx[jrow]+8]; 1845 sum10 += v[jrow]*x[11*idx[jrow]+9]; 1846 sum11 += v[jrow]*x[11*idx[jrow]+10]; 1847 jrow++; 1848 } 1849 y[11*i] = sum1; 1850 y[11*i+1] = sum2; 1851 y[11*i+2] = sum3; 1852 y[11*i+3] = sum4; 1853 y[11*i+4] = sum5; 1854 y[11*i+5] = sum6; 1855 y[11*i+6] = sum7; 1856 y[11*i+7] = sum8; 1857 y[11*i+8] = sum9; 1858 y[11*i+9] = sum10; 1859 y[11*i+10] = sum11; 1860 } 1861 1862 PetscCall(PetscLogFlops(22.0*a->nz - 11*nonzerorow)); 1863 PetscCall(VecRestoreArrayRead(xx,&x)); 1864 PetscCall(VecRestoreArray(yy,&y)); 1865 PetscFunctionReturn(0); 1866 } 1867 1868 PetscErrorCode MatMultAdd_SeqMAIJ_11(Mat A,Vec xx,Vec yy,Vec zz) 1869 { 1870 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1871 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1872 const PetscScalar *x,*v; 1873 PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9, sum10, sum11; 1874 const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 1875 PetscInt n,i,jrow,j; 1876 1877 PetscFunctionBegin; 1878 if (yy != zz) PetscCall(VecCopy(yy,zz)); 1879 PetscCall(VecGetArrayRead(xx,&x)); 1880 PetscCall(VecGetArray(zz,&y)); 1881 idx = a->j; 1882 v = a->a; 1883 ii = a->i; 1884 1885 for (i=0; i<m; i++) { 1886 jrow = ii[i]; 1887 n = ii[i+1] - jrow; 1888 sum1 = 0.0; 1889 sum2 = 0.0; 1890 sum3 = 0.0; 1891 sum4 = 0.0; 1892 sum5 = 0.0; 1893 sum6 = 0.0; 1894 sum7 = 0.0; 1895 sum8 = 0.0; 1896 sum9 = 0.0; 1897 sum10 = 0.0; 1898 sum11 = 0.0; 1899 for (j=0; j<n; j++) { 1900 sum1 += v[jrow]*x[11*idx[jrow]]; 1901 sum2 += v[jrow]*x[11*idx[jrow]+1]; 1902 sum3 += v[jrow]*x[11*idx[jrow]+2]; 1903 sum4 += v[jrow]*x[11*idx[jrow]+3]; 1904 sum5 += v[jrow]*x[11*idx[jrow]+4]; 1905 sum6 += v[jrow]*x[11*idx[jrow]+5]; 1906 sum7 += v[jrow]*x[11*idx[jrow]+6]; 1907 sum8 += v[jrow]*x[11*idx[jrow]+7]; 1908 sum9 += v[jrow]*x[11*idx[jrow]+8]; 1909 sum10 += v[jrow]*x[11*idx[jrow]+9]; 1910 sum11 += v[jrow]*x[11*idx[jrow]+10]; 1911 jrow++; 1912 } 1913 y[11*i] += sum1; 1914 y[11*i+1] += sum2; 1915 y[11*i+2] += sum3; 1916 y[11*i+3] += sum4; 1917 y[11*i+4] += sum5; 1918 y[11*i+5] += sum6; 1919 y[11*i+6] += sum7; 1920 y[11*i+7] += sum8; 1921 y[11*i+8] += sum9; 1922 y[11*i+9] += sum10; 1923 y[11*i+10] += sum11; 1924 } 1925 1926 PetscCall(PetscLogFlops(22.0*a->nz)); 1927 PetscCall(VecRestoreArrayRead(xx,&x)); 1928 PetscCall(VecRestoreArray(yy,&y)); 1929 PetscFunctionReturn(0); 1930 } 1931 1932 PetscErrorCode MatMultTranspose_SeqMAIJ_11(Mat A,Vec xx,Vec yy) 1933 { 1934 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1935 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1936 const PetscScalar *x,*v; 1937 PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9,alpha10,alpha11; 1938 const PetscInt m = b->AIJ->rmap->n,*idx; 1939 PetscInt n,i; 1940 1941 PetscFunctionBegin; 1942 PetscCall(VecSet(yy,0.0)); 1943 PetscCall(VecGetArrayRead(xx,&x)); 1944 PetscCall(VecGetArray(yy,&y)); 1945 1946 for (i=0; i<m; i++) { 1947 idx = a->j + a->i[i]; 1948 v = a->a + a->i[i]; 1949 n = a->i[i+1] - a->i[i]; 1950 alpha1 = x[11*i]; 1951 alpha2 = x[11*i+1]; 1952 alpha3 = x[11*i+2]; 1953 alpha4 = x[11*i+3]; 1954 alpha5 = x[11*i+4]; 1955 alpha6 = x[11*i+5]; 1956 alpha7 = x[11*i+6]; 1957 alpha8 = x[11*i+7]; 1958 alpha9 = x[11*i+8]; 1959 alpha10 = x[11*i+9]; 1960 alpha11 = x[11*i+10]; 1961 while (n-->0) { 1962 y[11*(*idx)] += alpha1*(*v); 1963 y[11*(*idx)+1] += alpha2*(*v); 1964 y[11*(*idx)+2] += alpha3*(*v); 1965 y[11*(*idx)+3] += alpha4*(*v); 1966 y[11*(*idx)+4] += alpha5*(*v); 1967 y[11*(*idx)+5] += alpha6*(*v); 1968 y[11*(*idx)+6] += alpha7*(*v); 1969 y[11*(*idx)+7] += alpha8*(*v); 1970 y[11*(*idx)+8] += alpha9*(*v); 1971 y[11*(*idx)+9] += alpha10*(*v); 1972 y[11*(*idx)+10] += alpha11*(*v); 1973 idx++; v++; 1974 } 1975 } 1976 PetscCall(PetscLogFlops(22.0*a->nz)); 1977 PetscCall(VecRestoreArrayRead(xx,&x)); 1978 PetscCall(VecRestoreArray(yy,&y)); 1979 PetscFunctionReturn(0); 1980 } 1981 1982 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_11(Mat A,Vec xx,Vec yy,Vec zz) 1983 { 1984 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1985 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1986 const PetscScalar *x,*v; 1987 PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9,alpha10,alpha11; 1988 const PetscInt m = b->AIJ->rmap->n,*idx; 1989 PetscInt n,i; 1990 1991 PetscFunctionBegin; 1992 if (yy != zz) PetscCall(VecCopy(yy,zz)); 1993 PetscCall(VecGetArrayRead(xx,&x)); 1994 PetscCall(VecGetArray(zz,&y)); 1995 for (i=0; i<m; i++) { 1996 idx = a->j + a->i[i]; 1997 v = a->a + a->i[i]; 1998 n = a->i[i+1] - a->i[i]; 1999 alpha1 = x[11*i]; 2000 alpha2 = x[11*i+1]; 2001 alpha3 = x[11*i+2]; 2002 alpha4 = x[11*i+3]; 2003 alpha5 = x[11*i+4]; 2004 alpha6 = x[11*i+5]; 2005 alpha7 = x[11*i+6]; 2006 alpha8 = x[11*i+7]; 2007 alpha9 = x[11*i+8]; 2008 alpha10 = x[11*i+9]; 2009 alpha11 = x[11*i+10]; 2010 while (n-->0) { 2011 y[11*(*idx)] += alpha1*(*v); 2012 y[11*(*idx)+1] += alpha2*(*v); 2013 y[11*(*idx)+2] += alpha3*(*v); 2014 y[11*(*idx)+3] += alpha4*(*v); 2015 y[11*(*idx)+4] += alpha5*(*v); 2016 y[11*(*idx)+5] += alpha6*(*v); 2017 y[11*(*idx)+6] += alpha7*(*v); 2018 y[11*(*idx)+7] += alpha8*(*v); 2019 y[11*(*idx)+8] += alpha9*(*v); 2020 y[11*(*idx)+9] += alpha10*(*v); 2021 y[11*(*idx)+10] += alpha11*(*v); 2022 idx++; v++; 2023 } 2024 } 2025 PetscCall(PetscLogFlops(22.0*a->nz)); 2026 PetscCall(VecRestoreArrayRead(xx,&x)); 2027 PetscCall(VecRestoreArray(zz,&y)); 2028 PetscFunctionReturn(0); 2029 } 2030 2031 /*--------------------------------------------------------------------------------------------*/ 2032 PetscErrorCode MatMult_SeqMAIJ_16(Mat A,Vec xx,Vec yy) 2033 { 2034 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 2035 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 2036 const PetscScalar *x,*v; 2037 PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8; 2038 PetscScalar sum9, sum10, sum11, sum12, sum13, sum14, sum15, sum16; 2039 const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 2040 PetscInt nonzerorow=0,n,i,jrow,j; 2041 2042 PetscFunctionBegin; 2043 PetscCall(VecGetArrayRead(xx,&x)); 2044 PetscCall(VecGetArray(yy,&y)); 2045 idx = a->j; 2046 v = a->a; 2047 ii = a->i; 2048 2049 for (i=0; i<m; i++) { 2050 jrow = ii[i]; 2051 n = ii[i+1] - jrow; 2052 sum1 = 0.0; 2053 sum2 = 0.0; 2054 sum3 = 0.0; 2055 sum4 = 0.0; 2056 sum5 = 0.0; 2057 sum6 = 0.0; 2058 sum7 = 0.0; 2059 sum8 = 0.0; 2060 sum9 = 0.0; 2061 sum10 = 0.0; 2062 sum11 = 0.0; 2063 sum12 = 0.0; 2064 sum13 = 0.0; 2065 sum14 = 0.0; 2066 sum15 = 0.0; 2067 sum16 = 0.0; 2068 2069 nonzerorow += (n>0); 2070 for (j=0; j<n; j++) { 2071 sum1 += v[jrow]*x[16*idx[jrow]]; 2072 sum2 += v[jrow]*x[16*idx[jrow]+1]; 2073 sum3 += v[jrow]*x[16*idx[jrow]+2]; 2074 sum4 += v[jrow]*x[16*idx[jrow]+3]; 2075 sum5 += v[jrow]*x[16*idx[jrow]+4]; 2076 sum6 += v[jrow]*x[16*idx[jrow]+5]; 2077 sum7 += v[jrow]*x[16*idx[jrow]+6]; 2078 sum8 += v[jrow]*x[16*idx[jrow]+7]; 2079 sum9 += v[jrow]*x[16*idx[jrow]+8]; 2080 sum10 += v[jrow]*x[16*idx[jrow]+9]; 2081 sum11 += v[jrow]*x[16*idx[jrow]+10]; 2082 sum12 += v[jrow]*x[16*idx[jrow]+11]; 2083 sum13 += v[jrow]*x[16*idx[jrow]+12]; 2084 sum14 += v[jrow]*x[16*idx[jrow]+13]; 2085 sum15 += v[jrow]*x[16*idx[jrow]+14]; 2086 sum16 += v[jrow]*x[16*idx[jrow]+15]; 2087 jrow++; 2088 } 2089 y[16*i] = sum1; 2090 y[16*i+1] = sum2; 2091 y[16*i+2] = sum3; 2092 y[16*i+3] = sum4; 2093 y[16*i+4] = sum5; 2094 y[16*i+5] = sum6; 2095 y[16*i+6] = sum7; 2096 y[16*i+7] = sum8; 2097 y[16*i+8] = sum9; 2098 y[16*i+9] = sum10; 2099 y[16*i+10] = sum11; 2100 y[16*i+11] = sum12; 2101 y[16*i+12] = sum13; 2102 y[16*i+13] = sum14; 2103 y[16*i+14] = sum15; 2104 y[16*i+15] = sum16; 2105 } 2106 2107 PetscCall(PetscLogFlops(32.0*a->nz - 16.0*nonzerorow)); 2108 PetscCall(VecRestoreArrayRead(xx,&x)); 2109 PetscCall(VecRestoreArray(yy,&y)); 2110 PetscFunctionReturn(0); 2111 } 2112 2113 PetscErrorCode MatMultTranspose_SeqMAIJ_16(Mat A,Vec xx,Vec yy) 2114 { 2115 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 2116 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 2117 const PetscScalar *x,*v; 2118 PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8; 2119 PetscScalar alpha9,alpha10,alpha11,alpha12,alpha13,alpha14,alpha15,alpha16; 2120 const PetscInt m = b->AIJ->rmap->n,*idx; 2121 PetscInt n,i; 2122 2123 PetscFunctionBegin; 2124 PetscCall(VecSet(yy,0.0)); 2125 PetscCall(VecGetArrayRead(xx,&x)); 2126 PetscCall(VecGetArray(yy,&y)); 2127 2128 for (i=0; i<m; i++) { 2129 idx = a->j + a->i[i]; 2130 v = a->a + a->i[i]; 2131 n = a->i[i+1] - a->i[i]; 2132 alpha1 = x[16*i]; 2133 alpha2 = x[16*i+1]; 2134 alpha3 = x[16*i+2]; 2135 alpha4 = x[16*i+3]; 2136 alpha5 = x[16*i+4]; 2137 alpha6 = x[16*i+5]; 2138 alpha7 = x[16*i+6]; 2139 alpha8 = x[16*i+7]; 2140 alpha9 = x[16*i+8]; 2141 alpha10 = x[16*i+9]; 2142 alpha11 = x[16*i+10]; 2143 alpha12 = x[16*i+11]; 2144 alpha13 = x[16*i+12]; 2145 alpha14 = x[16*i+13]; 2146 alpha15 = x[16*i+14]; 2147 alpha16 = x[16*i+15]; 2148 while (n-->0) { 2149 y[16*(*idx)] += alpha1*(*v); 2150 y[16*(*idx)+1] += alpha2*(*v); 2151 y[16*(*idx)+2] += alpha3*(*v); 2152 y[16*(*idx)+3] += alpha4*(*v); 2153 y[16*(*idx)+4] += alpha5*(*v); 2154 y[16*(*idx)+5] += alpha6*(*v); 2155 y[16*(*idx)+6] += alpha7*(*v); 2156 y[16*(*idx)+7] += alpha8*(*v); 2157 y[16*(*idx)+8] += alpha9*(*v); 2158 y[16*(*idx)+9] += alpha10*(*v); 2159 y[16*(*idx)+10] += alpha11*(*v); 2160 y[16*(*idx)+11] += alpha12*(*v); 2161 y[16*(*idx)+12] += alpha13*(*v); 2162 y[16*(*idx)+13] += alpha14*(*v); 2163 y[16*(*idx)+14] += alpha15*(*v); 2164 y[16*(*idx)+15] += alpha16*(*v); 2165 idx++; v++; 2166 } 2167 } 2168 PetscCall(PetscLogFlops(32.0*a->nz)); 2169 PetscCall(VecRestoreArrayRead(xx,&x)); 2170 PetscCall(VecRestoreArray(yy,&y)); 2171 PetscFunctionReturn(0); 2172 } 2173 2174 PetscErrorCode MatMultAdd_SeqMAIJ_16(Mat A,Vec xx,Vec yy,Vec zz) 2175 { 2176 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 2177 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 2178 const PetscScalar *x,*v; 2179 PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8; 2180 PetscScalar sum9, sum10, sum11, sum12, sum13, sum14, sum15, sum16; 2181 const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 2182 PetscInt n,i,jrow,j; 2183 2184 PetscFunctionBegin; 2185 if (yy != zz) PetscCall(VecCopy(yy,zz)); 2186 PetscCall(VecGetArrayRead(xx,&x)); 2187 PetscCall(VecGetArray(zz,&y)); 2188 idx = a->j; 2189 v = a->a; 2190 ii = a->i; 2191 2192 for (i=0; i<m; i++) { 2193 jrow = ii[i]; 2194 n = ii[i+1] - jrow; 2195 sum1 = 0.0; 2196 sum2 = 0.0; 2197 sum3 = 0.0; 2198 sum4 = 0.0; 2199 sum5 = 0.0; 2200 sum6 = 0.0; 2201 sum7 = 0.0; 2202 sum8 = 0.0; 2203 sum9 = 0.0; 2204 sum10 = 0.0; 2205 sum11 = 0.0; 2206 sum12 = 0.0; 2207 sum13 = 0.0; 2208 sum14 = 0.0; 2209 sum15 = 0.0; 2210 sum16 = 0.0; 2211 for (j=0; j<n; j++) { 2212 sum1 += v[jrow]*x[16*idx[jrow]]; 2213 sum2 += v[jrow]*x[16*idx[jrow]+1]; 2214 sum3 += v[jrow]*x[16*idx[jrow]+2]; 2215 sum4 += v[jrow]*x[16*idx[jrow]+3]; 2216 sum5 += v[jrow]*x[16*idx[jrow]+4]; 2217 sum6 += v[jrow]*x[16*idx[jrow]+5]; 2218 sum7 += v[jrow]*x[16*idx[jrow]+6]; 2219 sum8 += v[jrow]*x[16*idx[jrow]+7]; 2220 sum9 += v[jrow]*x[16*idx[jrow]+8]; 2221 sum10 += v[jrow]*x[16*idx[jrow]+9]; 2222 sum11 += v[jrow]*x[16*idx[jrow]+10]; 2223 sum12 += v[jrow]*x[16*idx[jrow]+11]; 2224 sum13 += v[jrow]*x[16*idx[jrow]+12]; 2225 sum14 += v[jrow]*x[16*idx[jrow]+13]; 2226 sum15 += v[jrow]*x[16*idx[jrow]+14]; 2227 sum16 += v[jrow]*x[16*idx[jrow]+15]; 2228 jrow++; 2229 } 2230 y[16*i] += sum1; 2231 y[16*i+1] += sum2; 2232 y[16*i+2] += sum3; 2233 y[16*i+3] += sum4; 2234 y[16*i+4] += sum5; 2235 y[16*i+5] += sum6; 2236 y[16*i+6] += sum7; 2237 y[16*i+7] += sum8; 2238 y[16*i+8] += sum9; 2239 y[16*i+9] += sum10; 2240 y[16*i+10] += sum11; 2241 y[16*i+11] += sum12; 2242 y[16*i+12] += sum13; 2243 y[16*i+13] += sum14; 2244 y[16*i+14] += sum15; 2245 y[16*i+15] += sum16; 2246 } 2247 2248 PetscCall(PetscLogFlops(32.0*a->nz)); 2249 PetscCall(VecRestoreArrayRead(xx,&x)); 2250 PetscCall(VecRestoreArray(zz,&y)); 2251 PetscFunctionReturn(0); 2252 } 2253 2254 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_16(Mat A,Vec xx,Vec yy,Vec zz) 2255 { 2256 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 2257 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 2258 const PetscScalar *x,*v; 2259 PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8; 2260 PetscScalar alpha9,alpha10,alpha11,alpha12,alpha13,alpha14,alpha15,alpha16; 2261 const PetscInt m = b->AIJ->rmap->n,*idx; 2262 PetscInt n,i; 2263 2264 PetscFunctionBegin; 2265 if (yy != zz) PetscCall(VecCopy(yy,zz)); 2266 PetscCall(VecGetArrayRead(xx,&x)); 2267 PetscCall(VecGetArray(zz,&y)); 2268 for (i=0; i<m; i++) { 2269 idx = a->j + a->i[i]; 2270 v = a->a + a->i[i]; 2271 n = a->i[i+1] - a->i[i]; 2272 alpha1 = x[16*i]; 2273 alpha2 = x[16*i+1]; 2274 alpha3 = x[16*i+2]; 2275 alpha4 = x[16*i+3]; 2276 alpha5 = x[16*i+4]; 2277 alpha6 = x[16*i+5]; 2278 alpha7 = x[16*i+6]; 2279 alpha8 = x[16*i+7]; 2280 alpha9 = x[16*i+8]; 2281 alpha10 = x[16*i+9]; 2282 alpha11 = x[16*i+10]; 2283 alpha12 = x[16*i+11]; 2284 alpha13 = x[16*i+12]; 2285 alpha14 = x[16*i+13]; 2286 alpha15 = x[16*i+14]; 2287 alpha16 = x[16*i+15]; 2288 while (n-->0) { 2289 y[16*(*idx)] += alpha1*(*v); 2290 y[16*(*idx)+1] += alpha2*(*v); 2291 y[16*(*idx)+2] += alpha3*(*v); 2292 y[16*(*idx)+3] += alpha4*(*v); 2293 y[16*(*idx)+4] += alpha5*(*v); 2294 y[16*(*idx)+5] += alpha6*(*v); 2295 y[16*(*idx)+6] += alpha7*(*v); 2296 y[16*(*idx)+7] += alpha8*(*v); 2297 y[16*(*idx)+8] += alpha9*(*v); 2298 y[16*(*idx)+9] += alpha10*(*v); 2299 y[16*(*idx)+10] += alpha11*(*v); 2300 y[16*(*idx)+11] += alpha12*(*v); 2301 y[16*(*idx)+12] += alpha13*(*v); 2302 y[16*(*idx)+13] += alpha14*(*v); 2303 y[16*(*idx)+14] += alpha15*(*v); 2304 y[16*(*idx)+15] += alpha16*(*v); 2305 idx++; v++; 2306 } 2307 } 2308 PetscCall(PetscLogFlops(32.0*a->nz)); 2309 PetscCall(VecRestoreArrayRead(xx,&x)); 2310 PetscCall(VecRestoreArray(zz,&y)); 2311 PetscFunctionReturn(0); 2312 } 2313 2314 /*--------------------------------------------------------------------------------------------*/ 2315 PetscErrorCode MatMult_SeqMAIJ_18(Mat A,Vec xx,Vec yy) 2316 { 2317 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 2318 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 2319 const PetscScalar *x,*v; 2320 PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8; 2321 PetscScalar sum9, sum10, sum11, sum12, sum13, sum14, sum15, sum16, sum17, sum18; 2322 const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 2323 PetscInt nonzerorow=0,n,i,jrow,j; 2324 2325 PetscFunctionBegin; 2326 PetscCall(VecGetArrayRead(xx,&x)); 2327 PetscCall(VecGetArray(yy,&y)); 2328 idx = a->j; 2329 v = a->a; 2330 ii = a->i; 2331 2332 for (i=0; i<m; i++) { 2333 jrow = ii[i]; 2334 n = ii[i+1] - jrow; 2335 sum1 = 0.0; 2336 sum2 = 0.0; 2337 sum3 = 0.0; 2338 sum4 = 0.0; 2339 sum5 = 0.0; 2340 sum6 = 0.0; 2341 sum7 = 0.0; 2342 sum8 = 0.0; 2343 sum9 = 0.0; 2344 sum10 = 0.0; 2345 sum11 = 0.0; 2346 sum12 = 0.0; 2347 sum13 = 0.0; 2348 sum14 = 0.0; 2349 sum15 = 0.0; 2350 sum16 = 0.0; 2351 sum17 = 0.0; 2352 sum18 = 0.0; 2353 2354 nonzerorow += (n>0); 2355 for (j=0; j<n; j++) { 2356 sum1 += v[jrow]*x[18*idx[jrow]]; 2357 sum2 += v[jrow]*x[18*idx[jrow]+1]; 2358 sum3 += v[jrow]*x[18*idx[jrow]+2]; 2359 sum4 += v[jrow]*x[18*idx[jrow]+3]; 2360 sum5 += v[jrow]*x[18*idx[jrow]+4]; 2361 sum6 += v[jrow]*x[18*idx[jrow]+5]; 2362 sum7 += v[jrow]*x[18*idx[jrow]+6]; 2363 sum8 += v[jrow]*x[18*idx[jrow]+7]; 2364 sum9 += v[jrow]*x[18*idx[jrow]+8]; 2365 sum10 += v[jrow]*x[18*idx[jrow]+9]; 2366 sum11 += v[jrow]*x[18*idx[jrow]+10]; 2367 sum12 += v[jrow]*x[18*idx[jrow]+11]; 2368 sum13 += v[jrow]*x[18*idx[jrow]+12]; 2369 sum14 += v[jrow]*x[18*idx[jrow]+13]; 2370 sum15 += v[jrow]*x[18*idx[jrow]+14]; 2371 sum16 += v[jrow]*x[18*idx[jrow]+15]; 2372 sum17 += v[jrow]*x[18*idx[jrow]+16]; 2373 sum18 += v[jrow]*x[18*idx[jrow]+17]; 2374 jrow++; 2375 } 2376 y[18*i] = sum1; 2377 y[18*i+1] = sum2; 2378 y[18*i+2] = sum3; 2379 y[18*i+3] = sum4; 2380 y[18*i+4] = sum5; 2381 y[18*i+5] = sum6; 2382 y[18*i+6] = sum7; 2383 y[18*i+7] = sum8; 2384 y[18*i+8] = sum9; 2385 y[18*i+9] = sum10; 2386 y[18*i+10] = sum11; 2387 y[18*i+11] = sum12; 2388 y[18*i+12] = sum13; 2389 y[18*i+13] = sum14; 2390 y[18*i+14] = sum15; 2391 y[18*i+15] = sum16; 2392 y[18*i+16] = sum17; 2393 y[18*i+17] = sum18; 2394 } 2395 2396 PetscCall(PetscLogFlops(36.0*a->nz - 18.0*nonzerorow)); 2397 PetscCall(VecRestoreArrayRead(xx,&x)); 2398 PetscCall(VecRestoreArray(yy,&y)); 2399 PetscFunctionReturn(0); 2400 } 2401 2402 PetscErrorCode MatMultTranspose_SeqMAIJ_18(Mat A,Vec xx,Vec yy) 2403 { 2404 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 2405 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 2406 const PetscScalar *x,*v; 2407 PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8; 2408 PetscScalar alpha9,alpha10,alpha11,alpha12,alpha13,alpha14,alpha15,alpha16,alpha17,alpha18; 2409 const PetscInt m = b->AIJ->rmap->n,*idx; 2410 PetscInt n,i; 2411 2412 PetscFunctionBegin; 2413 PetscCall(VecSet(yy,0.0)); 2414 PetscCall(VecGetArrayRead(xx,&x)); 2415 PetscCall(VecGetArray(yy,&y)); 2416 2417 for (i=0; i<m; i++) { 2418 idx = a->j + a->i[i]; 2419 v = a->a + a->i[i]; 2420 n = a->i[i+1] - a->i[i]; 2421 alpha1 = x[18*i]; 2422 alpha2 = x[18*i+1]; 2423 alpha3 = x[18*i+2]; 2424 alpha4 = x[18*i+3]; 2425 alpha5 = x[18*i+4]; 2426 alpha6 = x[18*i+5]; 2427 alpha7 = x[18*i+6]; 2428 alpha8 = x[18*i+7]; 2429 alpha9 = x[18*i+8]; 2430 alpha10 = x[18*i+9]; 2431 alpha11 = x[18*i+10]; 2432 alpha12 = x[18*i+11]; 2433 alpha13 = x[18*i+12]; 2434 alpha14 = x[18*i+13]; 2435 alpha15 = x[18*i+14]; 2436 alpha16 = x[18*i+15]; 2437 alpha17 = x[18*i+16]; 2438 alpha18 = x[18*i+17]; 2439 while (n-->0) { 2440 y[18*(*idx)] += alpha1*(*v); 2441 y[18*(*idx)+1] += alpha2*(*v); 2442 y[18*(*idx)+2] += alpha3*(*v); 2443 y[18*(*idx)+3] += alpha4*(*v); 2444 y[18*(*idx)+4] += alpha5*(*v); 2445 y[18*(*idx)+5] += alpha6*(*v); 2446 y[18*(*idx)+6] += alpha7*(*v); 2447 y[18*(*idx)+7] += alpha8*(*v); 2448 y[18*(*idx)+8] += alpha9*(*v); 2449 y[18*(*idx)+9] += alpha10*(*v); 2450 y[18*(*idx)+10] += alpha11*(*v); 2451 y[18*(*idx)+11] += alpha12*(*v); 2452 y[18*(*idx)+12] += alpha13*(*v); 2453 y[18*(*idx)+13] += alpha14*(*v); 2454 y[18*(*idx)+14] += alpha15*(*v); 2455 y[18*(*idx)+15] += alpha16*(*v); 2456 y[18*(*idx)+16] += alpha17*(*v); 2457 y[18*(*idx)+17] += alpha18*(*v); 2458 idx++; v++; 2459 } 2460 } 2461 PetscCall(PetscLogFlops(36.0*a->nz)); 2462 PetscCall(VecRestoreArrayRead(xx,&x)); 2463 PetscCall(VecRestoreArray(yy,&y)); 2464 PetscFunctionReturn(0); 2465 } 2466 2467 PetscErrorCode MatMultAdd_SeqMAIJ_18(Mat A,Vec xx,Vec yy,Vec zz) 2468 { 2469 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 2470 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 2471 const PetscScalar *x,*v; 2472 PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8; 2473 PetscScalar sum9, sum10, sum11, sum12, sum13, sum14, sum15, sum16, sum17, sum18; 2474 const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 2475 PetscInt n,i,jrow,j; 2476 2477 PetscFunctionBegin; 2478 if (yy != zz) PetscCall(VecCopy(yy,zz)); 2479 PetscCall(VecGetArrayRead(xx,&x)); 2480 PetscCall(VecGetArray(zz,&y)); 2481 idx = a->j; 2482 v = a->a; 2483 ii = a->i; 2484 2485 for (i=0; i<m; i++) { 2486 jrow = ii[i]; 2487 n = ii[i+1] - jrow; 2488 sum1 = 0.0; 2489 sum2 = 0.0; 2490 sum3 = 0.0; 2491 sum4 = 0.0; 2492 sum5 = 0.0; 2493 sum6 = 0.0; 2494 sum7 = 0.0; 2495 sum8 = 0.0; 2496 sum9 = 0.0; 2497 sum10 = 0.0; 2498 sum11 = 0.0; 2499 sum12 = 0.0; 2500 sum13 = 0.0; 2501 sum14 = 0.0; 2502 sum15 = 0.0; 2503 sum16 = 0.0; 2504 sum17 = 0.0; 2505 sum18 = 0.0; 2506 for (j=0; j<n; j++) { 2507 sum1 += v[jrow]*x[18*idx[jrow]]; 2508 sum2 += v[jrow]*x[18*idx[jrow]+1]; 2509 sum3 += v[jrow]*x[18*idx[jrow]+2]; 2510 sum4 += v[jrow]*x[18*idx[jrow]+3]; 2511 sum5 += v[jrow]*x[18*idx[jrow]+4]; 2512 sum6 += v[jrow]*x[18*idx[jrow]+5]; 2513 sum7 += v[jrow]*x[18*idx[jrow]+6]; 2514 sum8 += v[jrow]*x[18*idx[jrow]+7]; 2515 sum9 += v[jrow]*x[18*idx[jrow]+8]; 2516 sum10 += v[jrow]*x[18*idx[jrow]+9]; 2517 sum11 += v[jrow]*x[18*idx[jrow]+10]; 2518 sum12 += v[jrow]*x[18*idx[jrow]+11]; 2519 sum13 += v[jrow]*x[18*idx[jrow]+12]; 2520 sum14 += v[jrow]*x[18*idx[jrow]+13]; 2521 sum15 += v[jrow]*x[18*idx[jrow]+14]; 2522 sum16 += v[jrow]*x[18*idx[jrow]+15]; 2523 sum17 += v[jrow]*x[18*idx[jrow]+16]; 2524 sum18 += v[jrow]*x[18*idx[jrow]+17]; 2525 jrow++; 2526 } 2527 y[18*i] += sum1; 2528 y[18*i+1] += sum2; 2529 y[18*i+2] += sum3; 2530 y[18*i+3] += sum4; 2531 y[18*i+4] += sum5; 2532 y[18*i+5] += sum6; 2533 y[18*i+6] += sum7; 2534 y[18*i+7] += sum8; 2535 y[18*i+8] += sum9; 2536 y[18*i+9] += sum10; 2537 y[18*i+10] += sum11; 2538 y[18*i+11] += sum12; 2539 y[18*i+12] += sum13; 2540 y[18*i+13] += sum14; 2541 y[18*i+14] += sum15; 2542 y[18*i+15] += sum16; 2543 y[18*i+16] += sum17; 2544 y[18*i+17] += sum18; 2545 } 2546 2547 PetscCall(PetscLogFlops(36.0*a->nz)); 2548 PetscCall(VecRestoreArrayRead(xx,&x)); 2549 PetscCall(VecRestoreArray(zz,&y)); 2550 PetscFunctionReturn(0); 2551 } 2552 2553 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_18(Mat A,Vec xx,Vec yy,Vec zz) 2554 { 2555 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 2556 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 2557 const PetscScalar *x,*v; 2558 PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8; 2559 PetscScalar alpha9,alpha10,alpha11,alpha12,alpha13,alpha14,alpha15,alpha16,alpha17,alpha18; 2560 const PetscInt m = b->AIJ->rmap->n,*idx; 2561 PetscInt n,i; 2562 2563 PetscFunctionBegin; 2564 if (yy != zz) PetscCall(VecCopy(yy,zz)); 2565 PetscCall(VecGetArrayRead(xx,&x)); 2566 PetscCall(VecGetArray(zz,&y)); 2567 for (i=0; i<m; i++) { 2568 idx = a->j + a->i[i]; 2569 v = a->a + a->i[i]; 2570 n = a->i[i+1] - a->i[i]; 2571 alpha1 = x[18*i]; 2572 alpha2 = x[18*i+1]; 2573 alpha3 = x[18*i+2]; 2574 alpha4 = x[18*i+3]; 2575 alpha5 = x[18*i+4]; 2576 alpha6 = x[18*i+5]; 2577 alpha7 = x[18*i+6]; 2578 alpha8 = x[18*i+7]; 2579 alpha9 = x[18*i+8]; 2580 alpha10 = x[18*i+9]; 2581 alpha11 = x[18*i+10]; 2582 alpha12 = x[18*i+11]; 2583 alpha13 = x[18*i+12]; 2584 alpha14 = x[18*i+13]; 2585 alpha15 = x[18*i+14]; 2586 alpha16 = x[18*i+15]; 2587 alpha17 = x[18*i+16]; 2588 alpha18 = x[18*i+17]; 2589 while (n-->0) { 2590 y[18*(*idx)] += alpha1*(*v); 2591 y[18*(*idx)+1] += alpha2*(*v); 2592 y[18*(*idx)+2] += alpha3*(*v); 2593 y[18*(*idx)+3] += alpha4*(*v); 2594 y[18*(*idx)+4] += alpha5*(*v); 2595 y[18*(*idx)+5] += alpha6*(*v); 2596 y[18*(*idx)+6] += alpha7*(*v); 2597 y[18*(*idx)+7] += alpha8*(*v); 2598 y[18*(*idx)+8] += alpha9*(*v); 2599 y[18*(*idx)+9] += alpha10*(*v); 2600 y[18*(*idx)+10] += alpha11*(*v); 2601 y[18*(*idx)+11] += alpha12*(*v); 2602 y[18*(*idx)+12] += alpha13*(*v); 2603 y[18*(*idx)+13] += alpha14*(*v); 2604 y[18*(*idx)+14] += alpha15*(*v); 2605 y[18*(*idx)+15] += alpha16*(*v); 2606 y[18*(*idx)+16] += alpha17*(*v); 2607 y[18*(*idx)+17] += alpha18*(*v); 2608 idx++; v++; 2609 } 2610 } 2611 PetscCall(PetscLogFlops(36.0*a->nz)); 2612 PetscCall(VecRestoreArrayRead(xx,&x)); 2613 PetscCall(VecRestoreArray(zz,&y)); 2614 PetscFunctionReturn(0); 2615 } 2616 2617 PetscErrorCode MatMult_SeqMAIJ_N(Mat A,Vec xx,Vec yy) 2618 { 2619 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 2620 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 2621 const PetscScalar *x,*v; 2622 PetscScalar *y,*sums; 2623 const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 2624 PetscInt n,i,jrow,j,dof = b->dof,k; 2625 2626 PetscFunctionBegin; 2627 PetscCall(VecGetArrayRead(xx,&x)); 2628 PetscCall(VecSet(yy,0.0)); 2629 PetscCall(VecGetArray(yy,&y)); 2630 idx = a->j; 2631 v = a->a; 2632 ii = a->i; 2633 2634 for (i=0; i<m; i++) { 2635 jrow = ii[i]; 2636 n = ii[i+1] - jrow; 2637 sums = y + dof*i; 2638 for (j=0; j<n; j++) { 2639 for (k=0; k<dof; k++) { 2640 sums[k] += v[jrow]*x[dof*idx[jrow]+k]; 2641 } 2642 jrow++; 2643 } 2644 } 2645 2646 PetscCall(PetscLogFlops(2.0*dof*a->nz)); 2647 PetscCall(VecRestoreArrayRead(xx,&x)); 2648 PetscCall(VecRestoreArray(yy,&y)); 2649 PetscFunctionReturn(0); 2650 } 2651 2652 PetscErrorCode MatMultAdd_SeqMAIJ_N(Mat A,Vec xx,Vec yy,Vec zz) 2653 { 2654 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 2655 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 2656 const PetscScalar *x,*v; 2657 PetscScalar *y,*sums; 2658 const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 2659 PetscInt n,i,jrow,j,dof = b->dof,k; 2660 2661 PetscFunctionBegin; 2662 if (yy != zz) PetscCall(VecCopy(yy,zz)); 2663 PetscCall(VecGetArrayRead(xx,&x)); 2664 PetscCall(VecGetArray(zz,&y)); 2665 idx = a->j; 2666 v = a->a; 2667 ii = a->i; 2668 2669 for (i=0; i<m; i++) { 2670 jrow = ii[i]; 2671 n = ii[i+1] - jrow; 2672 sums = y + dof*i; 2673 for (j=0; j<n; j++) { 2674 for (k=0; k<dof; k++) { 2675 sums[k] += v[jrow]*x[dof*idx[jrow]+k]; 2676 } 2677 jrow++; 2678 } 2679 } 2680 2681 PetscCall(PetscLogFlops(2.0*dof*a->nz)); 2682 PetscCall(VecRestoreArrayRead(xx,&x)); 2683 PetscCall(VecRestoreArray(zz,&y)); 2684 PetscFunctionReturn(0); 2685 } 2686 2687 PetscErrorCode MatMultTranspose_SeqMAIJ_N(Mat A,Vec xx,Vec yy) 2688 { 2689 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 2690 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 2691 const PetscScalar *x,*v,*alpha; 2692 PetscScalar *y; 2693 const PetscInt m = b->AIJ->rmap->n,*idx,dof = b->dof; 2694 PetscInt n,i,k; 2695 2696 PetscFunctionBegin; 2697 PetscCall(VecGetArrayRead(xx,&x)); 2698 PetscCall(VecSet(yy,0.0)); 2699 PetscCall(VecGetArray(yy,&y)); 2700 for (i=0; i<m; i++) { 2701 idx = a->j + a->i[i]; 2702 v = a->a + a->i[i]; 2703 n = a->i[i+1] - a->i[i]; 2704 alpha = x + dof*i; 2705 while (n-->0) { 2706 for (k=0; k<dof; k++) { 2707 y[dof*(*idx)+k] += alpha[k]*(*v); 2708 } 2709 idx++; v++; 2710 } 2711 } 2712 PetscCall(PetscLogFlops(2.0*dof*a->nz)); 2713 PetscCall(VecRestoreArrayRead(xx,&x)); 2714 PetscCall(VecRestoreArray(yy,&y)); 2715 PetscFunctionReturn(0); 2716 } 2717 2718 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_N(Mat A,Vec xx,Vec yy,Vec zz) 2719 { 2720 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 2721 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 2722 const PetscScalar *x,*v,*alpha; 2723 PetscScalar *y; 2724 const PetscInt m = b->AIJ->rmap->n,*idx,dof = b->dof; 2725 PetscInt n,i,k; 2726 2727 PetscFunctionBegin; 2728 if (yy != zz) PetscCall(VecCopy(yy,zz)); 2729 PetscCall(VecGetArrayRead(xx,&x)); 2730 PetscCall(VecGetArray(zz,&y)); 2731 for (i=0; i<m; i++) { 2732 idx = a->j + a->i[i]; 2733 v = a->a + a->i[i]; 2734 n = a->i[i+1] - a->i[i]; 2735 alpha = x + dof*i; 2736 while (n-->0) { 2737 for (k=0; k<dof; k++) { 2738 y[dof*(*idx)+k] += alpha[k]*(*v); 2739 } 2740 idx++; v++; 2741 } 2742 } 2743 PetscCall(PetscLogFlops(2.0*dof*a->nz)); 2744 PetscCall(VecRestoreArrayRead(xx,&x)); 2745 PetscCall(VecRestoreArray(zz,&y)); 2746 PetscFunctionReturn(0); 2747 } 2748 2749 /*===================================================================================*/ 2750 PetscErrorCode MatMult_MPIMAIJ_dof(Mat A,Vec xx,Vec yy) 2751 { 2752 Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data; 2753 2754 PetscFunctionBegin; 2755 /* start the scatter */ 2756 PetscCall(VecScatterBegin(b->ctx,xx,b->w,INSERT_VALUES,SCATTER_FORWARD)); 2757 PetscCall((*b->AIJ->ops->mult)(b->AIJ,xx,yy)); 2758 PetscCall(VecScatterEnd(b->ctx,xx,b->w,INSERT_VALUES,SCATTER_FORWARD)); 2759 PetscCall((*b->OAIJ->ops->multadd)(b->OAIJ,b->w,yy,yy)); 2760 PetscFunctionReturn(0); 2761 } 2762 2763 PetscErrorCode MatMultTranspose_MPIMAIJ_dof(Mat A,Vec xx,Vec yy) 2764 { 2765 Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data; 2766 2767 PetscFunctionBegin; 2768 PetscCall((*b->OAIJ->ops->multtranspose)(b->OAIJ,xx,b->w)); 2769 PetscCall((*b->AIJ->ops->multtranspose)(b->AIJ,xx,yy)); 2770 PetscCall(VecScatterBegin(b->ctx,b->w,yy,ADD_VALUES,SCATTER_REVERSE)); 2771 PetscCall(VecScatterEnd(b->ctx,b->w,yy,ADD_VALUES,SCATTER_REVERSE)); 2772 PetscFunctionReturn(0); 2773 } 2774 2775 PetscErrorCode MatMultAdd_MPIMAIJ_dof(Mat A,Vec xx,Vec yy,Vec zz) 2776 { 2777 Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data; 2778 2779 PetscFunctionBegin; 2780 /* start the scatter */ 2781 PetscCall(VecScatterBegin(b->ctx,xx,b->w,INSERT_VALUES,SCATTER_FORWARD)); 2782 PetscCall((*b->AIJ->ops->multadd)(b->AIJ,xx,yy,zz)); 2783 PetscCall(VecScatterEnd(b->ctx,xx,b->w,INSERT_VALUES,SCATTER_FORWARD)); 2784 PetscCall((*b->OAIJ->ops->multadd)(b->OAIJ,b->w,zz,zz)); 2785 PetscFunctionReturn(0); 2786 } 2787 2788 PetscErrorCode MatMultTransposeAdd_MPIMAIJ_dof(Mat A,Vec xx,Vec yy,Vec zz) 2789 { 2790 Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data; 2791 2792 PetscFunctionBegin; 2793 PetscCall((*b->OAIJ->ops->multtranspose)(b->OAIJ,xx,b->w)); 2794 PetscCall((*b->AIJ->ops->multtransposeadd)(b->AIJ,xx,yy,zz)); 2795 PetscCall(VecScatterBegin(b->ctx,b->w,zz,ADD_VALUES,SCATTER_REVERSE)); 2796 PetscCall(VecScatterEnd(b->ctx,b->w,zz,ADD_VALUES,SCATTER_REVERSE)); 2797 PetscFunctionReturn(0); 2798 } 2799 2800 /* ----------------------------------------------------------------*/ 2801 PetscErrorCode MatProductSetFromOptions_SeqAIJ_SeqMAIJ(Mat C) 2802 { 2803 Mat_Product *product = C->product; 2804 2805 PetscFunctionBegin; 2806 if (product->type == MATPRODUCT_PtAP) { 2807 C->ops->productsymbolic = MatProductSymbolic_PtAP_SeqAIJ_SeqMAIJ; 2808 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Mat Product type %s is not supported for SeqAIJ and SeqMAIJ matrices",MatProductTypes[product->type]); 2809 PetscFunctionReturn(0); 2810 } 2811 2812 PetscErrorCode MatProductSetFromOptions_MPIAIJ_MPIMAIJ(Mat C) 2813 { 2814 Mat_Product *product = C->product; 2815 PetscBool flg = PETSC_FALSE; 2816 Mat A=product->A,P=product->B; 2817 PetscInt alg=1; /* set default algorithm */ 2818 #if !defined(PETSC_HAVE_HYPRE) 2819 const char *algTypes[4] = {"scalable","nonscalable","allatonce","allatonce_merged"}; 2820 PetscInt nalg=4; 2821 #else 2822 const char *algTypes[5] = {"scalable","nonscalable","allatonce","allatonce_merged","hypre"}; 2823 PetscInt nalg=5; 2824 #endif 2825 2826 PetscFunctionBegin; 2827 PetscCheck(product->type == MATPRODUCT_PtAP,PETSC_COMM_SELF,PETSC_ERR_SUP,"Mat Product type %s is not supported for MPIAIJ and MPIMAIJ matrices",MatProductTypes[product->type]); 2828 2829 /* PtAP */ 2830 /* Check matrix local sizes */ 2831 PetscCheck(A->rmap->rstart == P->rmap->rstart && A->rmap->rend == P->rmap->rend,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, Arow (%" PetscInt_FMT ", %" PetscInt_FMT ") != Prow (%" PetscInt_FMT ",%" PetscInt_FMT ")",A->rmap->rstart,A->rmap->rend,P->rmap->rstart,P->rmap->rend); 2832 PetscCheck(A->cmap->rstart == P->rmap->rstart && A->cmap->rend == P->rmap->rend,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, Acol (%" PetscInt_FMT ", %" PetscInt_FMT ") != Prow (%" PetscInt_FMT ",%" PetscInt_FMT ")",A->cmap->rstart,A->cmap->rend,P->rmap->rstart,P->rmap->rend); 2833 2834 /* Set the default algorithm */ 2835 PetscCall(PetscStrcmp(C->product->alg,"default",&flg)); 2836 if (flg) { 2837 PetscCall(MatProductSetAlgorithm(C,(MatProductAlgorithm)algTypes[alg])); 2838 } 2839 2840 /* Get runtime option */ 2841 PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatProduct_PtAP","Mat"); 2842 PetscCall(PetscOptionsEList("-mat_product_algorithm","Algorithmic approach","MatPtAP",algTypes,nalg,algTypes[alg],&alg,&flg)); 2843 if (flg) { 2844 PetscCall(MatProductSetAlgorithm(C,(MatProductAlgorithm)algTypes[alg])); 2845 } 2846 PetscOptionsEnd(); 2847 2848 PetscCall(PetscStrcmp(C->product->alg,"allatonce",&flg)); 2849 if (flg) { 2850 C->ops->productsymbolic = MatProductSymbolic_PtAP_MPIAIJ_MPIMAIJ; 2851 PetscFunctionReturn(0); 2852 } 2853 2854 PetscCall(PetscStrcmp(C->product->alg,"allatonce_merged",&flg)); 2855 if (flg) { 2856 C->ops->productsymbolic = MatProductSymbolic_PtAP_MPIAIJ_MPIMAIJ; 2857 PetscFunctionReturn(0); 2858 } 2859 2860 /* Convert P from MAIJ to AIJ matrix since implementation not available for MAIJ */ 2861 PetscCall(PetscInfo((PetscObject)A,"Converting from MAIJ to AIJ matrix since implementation not available for MAIJ\n")); 2862 PetscCall(MatConvert(P,MATMPIAIJ,MAT_INPLACE_MATRIX,&P)); 2863 PetscCall(MatProductSetFromOptions(C)); 2864 PetscFunctionReturn(0); 2865 } 2866 2867 /* ----------------------------------------------------------------*/ 2868 PetscErrorCode MatPtAPSymbolic_SeqAIJ_SeqMAIJ(Mat A,Mat PP,PetscReal fill,Mat C) 2869 { 2870 PetscFreeSpaceList free_space=NULL,current_space=NULL; 2871 Mat_SeqMAIJ *pp =(Mat_SeqMAIJ*)PP->data; 2872 Mat P =pp->AIJ; 2873 Mat_SeqAIJ *a =(Mat_SeqAIJ*)A->data,*p=(Mat_SeqAIJ*)P->data,*c; 2874 PetscInt *pti,*ptj,*ptJ; 2875 PetscInt *ci,*cj,*ptadenserow,*ptasparserow,*denserow,*sparserow,*ptaj; 2876 const PetscInt an=A->cmap->N,am=A->rmap->N,pn=P->cmap->N,pm=P->rmap->N,ppdof=pp->dof; 2877 PetscInt i,j,k,dof,pshift,ptnzi,arow,anzj,ptanzi,prow,pnzj,cnzi,cn; 2878 MatScalar *ca; 2879 const PetscInt *pi = p->i,*pj = p->j,*pjj,*ai=a->i,*aj=a->j,*ajj; 2880 2881 PetscFunctionBegin; 2882 /* Get ij structure of P^T */ 2883 PetscCall(MatGetSymbolicTranspose_SeqAIJ(P,&pti,&ptj)); 2884 2885 cn = pn*ppdof; 2886 /* Allocate ci array, arrays for fill computation and */ 2887 /* free space for accumulating nonzero column info */ 2888 PetscCall(PetscMalloc1(cn+1,&ci)); 2889 ci[0] = 0; 2890 2891 /* Work arrays for rows of P^T*A */ 2892 PetscCall(PetscMalloc4(an,&ptadenserow,an,&ptasparserow,cn,&denserow,cn,&sparserow)); 2893 PetscCall(PetscArrayzero(ptadenserow,an)); 2894 PetscCall(PetscArrayzero(denserow,cn)); 2895 2896 /* Set initial free space to be nnz(A) scaled by aspect ratio of P. */ 2897 /* This should be reasonable if sparsity of PtAP is similar to that of A. */ 2898 /* Note, aspect ratio of P is the same as the aspect ratio of SeqAIJ inside P */ 2899 PetscCall(PetscFreeSpaceGet(PetscIntMultTruncate(ai[am]/pm,pn),&free_space)); 2900 current_space = free_space; 2901 2902 /* Determine symbolic info for each row of C: */ 2903 for (i=0; i<pn; i++) { 2904 ptnzi = pti[i+1] - pti[i]; 2905 ptJ = ptj + pti[i]; 2906 for (dof=0; dof<ppdof; dof++) { 2907 ptanzi = 0; 2908 /* Determine symbolic row of PtA: */ 2909 for (j=0; j<ptnzi; j++) { 2910 /* Expand ptJ[j] by block size and shift by dof to get the right row of A */ 2911 arow = ptJ[j]*ppdof + dof; 2912 /* Nonzeros of P^T*A will be in same locations as any element of A in that row */ 2913 anzj = ai[arow+1] - ai[arow]; 2914 ajj = aj + ai[arow]; 2915 for (k=0; k<anzj; k++) { 2916 if (!ptadenserow[ajj[k]]) { 2917 ptadenserow[ajj[k]] = -1; 2918 ptasparserow[ptanzi++] = ajj[k]; 2919 } 2920 } 2921 } 2922 /* Using symbolic info for row of PtA, determine symbolic info for row of C: */ 2923 ptaj = ptasparserow; 2924 cnzi = 0; 2925 for (j=0; j<ptanzi; j++) { 2926 /* Get offset within block of P */ 2927 pshift = *ptaj%ppdof; 2928 /* Get block row of P */ 2929 prow = (*ptaj++)/ppdof; /* integer division */ 2930 /* P has same number of nonzeros per row as the compressed form */ 2931 pnzj = pi[prow+1] - pi[prow]; 2932 pjj = pj + pi[prow]; 2933 for (k=0;k<pnzj;k++) { 2934 /* Locations in C are shifted by the offset within the block */ 2935 /* Note: we cannot use PetscLLAdd here because of the additional offset for the write location */ 2936 if (!denserow[pjj[k]*ppdof+pshift]) { 2937 denserow[pjj[k]*ppdof+pshift] = -1; 2938 sparserow[cnzi++] = pjj[k]*ppdof+pshift; 2939 } 2940 } 2941 } 2942 2943 /* sort sparserow */ 2944 PetscCall(PetscSortInt(cnzi,sparserow)); 2945 2946 /* If free space is not available, make more free space */ 2947 /* Double the amount of total space in the list */ 2948 if (current_space->local_remaining<cnzi) { 2949 PetscCall(PetscFreeSpaceGet(PetscIntSumTruncate(cnzi,current_space->total_array_size),¤t_space)); 2950 } 2951 2952 /* Copy data into free space, and zero out denserows */ 2953 PetscCall(PetscArraycpy(current_space->array,sparserow,cnzi)); 2954 2955 current_space->array += cnzi; 2956 current_space->local_used += cnzi; 2957 current_space->local_remaining -= cnzi; 2958 2959 for (j=0; j<ptanzi; j++) ptadenserow[ptasparserow[j]] = 0; 2960 for (j=0; j<cnzi; j++) denserow[sparserow[j]] = 0; 2961 2962 /* Aside: Perhaps we should save the pta info for the numerical factorization. */ 2963 /* For now, we will recompute what is needed. */ 2964 ci[i*ppdof+1+dof] = ci[i*ppdof+dof] + cnzi; 2965 } 2966 } 2967 /* nnz is now stored in ci[ptm], column indices are in the list of free space */ 2968 /* Allocate space for cj, initialize cj, and */ 2969 /* destroy list of free space and other temporary array(s) */ 2970 PetscCall(PetscMalloc1(ci[cn]+1,&cj)); 2971 PetscCall(PetscFreeSpaceContiguous(&free_space,cj)); 2972 PetscCall(PetscFree4(ptadenserow,ptasparserow,denserow,sparserow)); 2973 2974 /* Allocate space for ca */ 2975 PetscCall(PetscCalloc1(ci[cn]+1,&ca)); 2976 2977 /* put together the new matrix */ 2978 PetscCall(MatSetSeqAIJWithArrays_private(PetscObjectComm((PetscObject)A),cn,cn,ci,cj,ca,NULL,C)); 2979 PetscCall(MatSetBlockSize(C,pp->dof)); 2980 2981 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 2982 /* Since these are PETSc arrays, change flags to free them as necessary. */ 2983 c = (Mat_SeqAIJ*)(C->data); 2984 c->free_a = PETSC_TRUE; 2985 c->free_ij = PETSC_TRUE; 2986 c->nonew = 0; 2987 2988 C->ops->ptapnumeric = MatPtAPNumeric_SeqAIJ_SeqMAIJ; 2989 C->ops->productnumeric = MatProductNumeric_PtAP; 2990 2991 /* Clean up. */ 2992 PetscCall(MatRestoreSymbolicTranspose_SeqAIJ(P,&pti,&ptj)); 2993 PetscFunctionReturn(0); 2994 } 2995 2996 PetscErrorCode MatPtAPNumeric_SeqAIJ_SeqMAIJ(Mat A,Mat PP,Mat C) 2997 { 2998 /* This routine requires testing -- first draft only */ 2999 Mat_SeqMAIJ *pp=(Mat_SeqMAIJ*)PP->data; 3000 Mat P =pp->AIJ; 3001 Mat_SeqAIJ *a = (Mat_SeqAIJ*) A->data; 3002 Mat_SeqAIJ *p = (Mat_SeqAIJ*) P->data; 3003 Mat_SeqAIJ *c = (Mat_SeqAIJ*) C->data; 3004 const PetscInt *ai=a->i,*aj=a->j,*pi=p->i,*pj=p->j,*pJ,*pjj; 3005 const PetscInt *ci=c->i,*cj=c->j,*cjj; 3006 const PetscInt am =A->rmap->N,cn=C->cmap->N,cm=C->rmap->N,ppdof=pp->dof; 3007 PetscInt i,j,k,pshift,poffset,anzi,pnzi,apnzj,nextap,pnzj,prow,crow,*apj,*apjdense; 3008 const MatScalar *aa=a->a,*pa=p->a,*pA,*paj; 3009 MatScalar *ca=c->a,*caj,*apa; 3010 3011 PetscFunctionBegin; 3012 /* Allocate temporary array for storage of one row of A*P */ 3013 PetscCall(PetscCalloc3(cn,&apa,cn,&apj,cn,&apjdense)); 3014 3015 /* Clear old values in C */ 3016 PetscCall(PetscArrayzero(ca,ci[cm])); 3017 3018 for (i=0; i<am; i++) { 3019 /* Form sparse row of A*P */ 3020 anzi = ai[i+1] - ai[i]; 3021 apnzj = 0; 3022 for (j=0; j<anzi; j++) { 3023 /* Get offset within block of P */ 3024 pshift = *aj%ppdof; 3025 /* Get block row of P */ 3026 prow = *aj++/ppdof; /* integer division */ 3027 pnzj = pi[prow+1] - pi[prow]; 3028 pjj = pj + pi[prow]; 3029 paj = pa + pi[prow]; 3030 for (k=0; k<pnzj; k++) { 3031 poffset = pjj[k]*ppdof+pshift; 3032 if (!apjdense[poffset]) { 3033 apjdense[poffset] = -1; 3034 apj[apnzj++] = poffset; 3035 } 3036 apa[poffset] += (*aa)*paj[k]; 3037 } 3038 PetscCall(PetscLogFlops(2.0*pnzj)); 3039 aa++; 3040 } 3041 3042 /* Sort the j index array for quick sparse axpy. */ 3043 /* Note: a array does not need sorting as it is in dense storage locations. */ 3044 PetscCall(PetscSortInt(apnzj,apj)); 3045 3046 /* Compute P^T*A*P using outer product (P^T)[:,j]*(A*P)[j,:]. */ 3047 prow = i/ppdof; /* integer division */ 3048 pshift = i%ppdof; 3049 poffset = pi[prow]; 3050 pnzi = pi[prow+1] - poffset; 3051 /* Reset pJ and pA so we can traverse the same row of P 'dof' times. */ 3052 pJ = pj+poffset; 3053 pA = pa+poffset; 3054 for (j=0; j<pnzi; j++) { 3055 crow = (*pJ)*ppdof+pshift; 3056 cjj = cj + ci[crow]; 3057 caj = ca + ci[crow]; 3058 pJ++; 3059 /* Perform sparse axpy operation. Note cjj includes apj. */ 3060 for (k=0,nextap=0; nextap<apnzj; k++) { 3061 if (cjj[k] == apj[nextap]) caj[k] += (*pA)*apa[apj[nextap++]]; 3062 } 3063 PetscCall(PetscLogFlops(2.0*apnzj)); 3064 pA++; 3065 } 3066 3067 /* Zero the current row info for A*P */ 3068 for (j=0; j<apnzj; j++) { 3069 apa[apj[j]] = 0.; 3070 apjdense[apj[j]] = 0; 3071 } 3072 } 3073 3074 /* Assemble the final matrix and clean up */ 3075 PetscCall(MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY)); 3076 PetscCall(MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY)); 3077 PetscCall(PetscFree3(apa,apj,apjdense)); 3078 PetscFunctionReturn(0); 3079 } 3080 3081 PETSC_INTERN PetscErrorCode MatProductSymbolic_PtAP_SeqAIJ_SeqMAIJ(Mat C) 3082 { 3083 Mat_Product *product = C->product; 3084 Mat A=product->A,P=product->B; 3085 3086 PetscFunctionBegin; 3087 PetscCall(MatPtAPSymbolic_SeqAIJ_SeqMAIJ(A,P,product->fill,C)); 3088 PetscFunctionReturn(0); 3089 } 3090 3091 PetscErrorCode MatPtAPSymbolic_MPIAIJ_MPIMAIJ(Mat A,Mat PP,PetscReal fill,Mat *C) 3092 { 3093 PetscFunctionBegin; 3094 SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"MatPtAPSymbolic is not implemented for MPIMAIJ matrix yet"); 3095 } 3096 3097 PetscErrorCode MatPtAPNumeric_MPIAIJ_MPIMAIJ(Mat A,Mat PP,Mat C) 3098 { 3099 PetscFunctionBegin; 3100 SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"MatPtAPNumeric is not implemented for MPIMAIJ matrix yet"); 3101 } 3102 3103 PetscErrorCode MatPtAPNumeric_MPIAIJ_MPIXAIJ_allatonce(Mat,Mat,PetscInt,Mat); 3104 3105 PETSC_INTERN PetscErrorCode MatPtAPNumeric_MPIAIJ_MPIMAIJ_allatonce(Mat A,Mat P,Mat C) 3106 { 3107 Mat_MPIMAIJ *maij = (Mat_MPIMAIJ*)P->data; 3108 3109 PetscFunctionBegin; 3110 3111 PetscCall(MatPtAPNumeric_MPIAIJ_MPIXAIJ_allatonce(A,maij->A,maij->dof,C)); 3112 PetscFunctionReturn(0); 3113 } 3114 3115 PetscErrorCode MatPtAPSymbolic_MPIAIJ_MPIXAIJ_allatonce(Mat,Mat,PetscInt,PetscReal,Mat); 3116 3117 PETSC_INTERN PetscErrorCode MatPtAPSymbolic_MPIAIJ_MPIMAIJ_allatonce(Mat A,Mat P,PetscReal fill,Mat C) 3118 { 3119 Mat_MPIMAIJ *maij = (Mat_MPIMAIJ*)P->data; 3120 3121 PetscFunctionBegin; 3122 PetscCall(MatPtAPSymbolic_MPIAIJ_MPIXAIJ_allatonce(A,maij->A,maij->dof,fill,C)); 3123 C->ops->ptapnumeric = MatPtAPNumeric_MPIAIJ_MPIMAIJ_allatonce; 3124 PetscFunctionReturn(0); 3125 } 3126 3127 PetscErrorCode MatPtAPNumeric_MPIAIJ_MPIXAIJ_allatonce_merged(Mat,Mat,PetscInt,Mat); 3128 3129 PETSC_INTERN PetscErrorCode MatPtAPNumeric_MPIAIJ_MPIMAIJ_allatonce_merged(Mat A,Mat P,Mat C) 3130 { 3131 Mat_MPIMAIJ *maij = (Mat_MPIMAIJ*)P->data; 3132 3133 PetscFunctionBegin; 3134 3135 PetscCall(MatPtAPNumeric_MPIAIJ_MPIXAIJ_allatonce_merged(A,maij->A,maij->dof,C)); 3136 PetscFunctionReturn(0); 3137 } 3138 3139 PetscErrorCode MatPtAPSymbolic_MPIAIJ_MPIXAIJ_allatonce_merged(Mat,Mat,PetscInt,PetscReal,Mat); 3140 3141 PETSC_INTERN PetscErrorCode MatPtAPSymbolic_MPIAIJ_MPIMAIJ_allatonce_merged(Mat A,Mat P,PetscReal fill,Mat C) 3142 { 3143 Mat_MPIMAIJ *maij = (Mat_MPIMAIJ*)P->data; 3144 3145 PetscFunctionBegin; 3146 3147 PetscCall(MatPtAPSymbolic_MPIAIJ_MPIXAIJ_allatonce_merged(A,maij->A,maij->dof,fill,C)); 3148 C->ops->ptapnumeric = MatPtAPNumeric_MPIAIJ_MPIMAIJ_allatonce_merged; 3149 PetscFunctionReturn(0); 3150 } 3151 3152 PETSC_INTERN PetscErrorCode MatProductSymbolic_PtAP_MPIAIJ_MPIMAIJ(Mat C) 3153 { 3154 Mat_Product *product = C->product; 3155 Mat A=product->A,P=product->B; 3156 PetscBool flg; 3157 3158 PetscFunctionBegin; 3159 PetscCall(PetscStrcmp(product->alg,"allatonce",&flg)); 3160 if (flg) { 3161 PetscCall(MatPtAPSymbolic_MPIAIJ_MPIMAIJ_allatonce(A,P,product->fill,C)); 3162 C->ops->productnumeric = MatProductNumeric_PtAP; 3163 PetscFunctionReturn(0); 3164 } 3165 3166 PetscCall(PetscStrcmp(product->alg,"allatonce_merged",&flg)); 3167 if (flg) { 3168 PetscCall(MatPtAPSymbolic_MPIAIJ_MPIMAIJ_allatonce_merged(A,P,product->fill,C)); 3169 C->ops->productnumeric = MatProductNumeric_PtAP; 3170 PetscFunctionReturn(0); 3171 } 3172 3173 SETERRQ(PetscObjectComm((PetscObject)C),PETSC_ERR_SUP,"Mat Product Algorithm is not supported"); 3174 } 3175 3176 PETSC_INTERN PetscErrorCode MatConvert_SeqMAIJ_SeqAIJ(Mat A, MatType newtype,MatReuse reuse,Mat *newmat) 3177 { 3178 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 3179 Mat a = b->AIJ,B; 3180 Mat_SeqAIJ *aij = (Mat_SeqAIJ*)a->data; 3181 PetscInt m,n,i,ncols,*ilen,nmax = 0,*icols,j,k,ii,dof = b->dof; 3182 PetscInt *cols; 3183 PetscScalar *vals; 3184 3185 PetscFunctionBegin; 3186 PetscCall(MatGetSize(a,&m,&n)); 3187 PetscCall(PetscMalloc1(dof*m,&ilen)); 3188 for (i=0; i<m; i++) { 3189 nmax = PetscMax(nmax,aij->ilen[i]); 3190 for (j=0; j<dof; j++) ilen[dof*i+j] = aij->ilen[i]; 3191 } 3192 PetscCall(MatCreate(PETSC_COMM_SELF,&B)); 3193 PetscCall(MatSetSizes(B,dof*m,dof*n,dof*m,dof*n)); 3194 PetscCall(MatSetType(B,newtype)); 3195 PetscCall(MatSeqAIJSetPreallocation(B,0,ilen)); 3196 PetscCall(PetscFree(ilen)); 3197 PetscCall(PetscMalloc1(nmax,&icols)); 3198 ii = 0; 3199 for (i=0; i<m; i++) { 3200 PetscCall(MatGetRow_SeqAIJ(a,i,&ncols,&cols,&vals)); 3201 for (j=0; j<dof; j++) { 3202 for (k=0; k<ncols; k++) icols[k] = dof*cols[k]+j; 3203 PetscCall(MatSetValues_SeqAIJ(B,1,&ii,ncols,icols,vals,INSERT_VALUES)); 3204 ii++; 3205 } 3206 PetscCall(MatRestoreRow_SeqAIJ(a,i,&ncols,&cols,&vals)); 3207 } 3208 PetscCall(PetscFree(icols)); 3209 PetscCall(MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY)); 3210 PetscCall(MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY)); 3211 3212 if (reuse == MAT_INPLACE_MATRIX) { 3213 PetscCall(MatHeaderReplace(A,&B)); 3214 } else { 3215 *newmat = B; 3216 } 3217 PetscFunctionReturn(0); 3218 } 3219 3220 #include <../src/mat/impls/aij/mpi/mpiaij.h> 3221 3222 PETSC_INTERN PetscErrorCode MatConvert_MPIMAIJ_MPIAIJ(Mat A, MatType newtype,MatReuse reuse,Mat *newmat) 3223 { 3224 Mat_MPIMAIJ *maij = (Mat_MPIMAIJ*)A->data; 3225 Mat MatAIJ = ((Mat_SeqMAIJ*)maij->AIJ->data)->AIJ,B; 3226 Mat MatOAIJ = ((Mat_SeqMAIJ*)maij->OAIJ->data)->AIJ; 3227 Mat_SeqAIJ *AIJ = (Mat_SeqAIJ*) MatAIJ->data; 3228 Mat_SeqAIJ *OAIJ =(Mat_SeqAIJ*) MatOAIJ->data; 3229 Mat_MPIAIJ *mpiaij = (Mat_MPIAIJ*) maij->A->data; 3230 PetscInt dof = maij->dof,i,j,*dnz = NULL,*onz = NULL,nmax = 0,onmax = 0; 3231 PetscInt *oicols = NULL,*icols = NULL,ncols,*cols = NULL,oncols,*ocols = NULL; 3232 PetscInt rstart,cstart,*garray,ii,k; 3233 PetscScalar *vals,*ovals; 3234 3235 PetscFunctionBegin; 3236 PetscCall(PetscMalloc2(A->rmap->n,&dnz,A->rmap->n,&onz)); 3237 for (i=0; i<A->rmap->n/dof; i++) { 3238 nmax = PetscMax(nmax,AIJ->ilen[i]); 3239 onmax = PetscMax(onmax,OAIJ->ilen[i]); 3240 for (j=0; j<dof; j++) { 3241 dnz[dof*i+j] = AIJ->ilen[i]; 3242 onz[dof*i+j] = OAIJ->ilen[i]; 3243 } 3244 } 3245 PetscCall(MatCreate(PetscObjectComm((PetscObject)A),&B)); 3246 PetscCall(MatSetSizes(B,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N)); 3247 PetscCall(MatSetType(B,newtype)); 3248 PetscCall(MatMPIAIJSetPreallocation(B,0,dnz,0,onz)); 3249 PetscCall(MatSetBlockSize(B,dof)); 3250 PetscCall(PetscFree2(dnz,onz)); 3251 3252 PetscCall(PetscMalloc2(nmax,&icols,onmax,&oicols)); 3253 rstart = dof*maij->A->rmap->rstart; 3254 cstart = dof*maij->A->cmap->rstart; 3255 garray = mpiaij->garray; 3256 3257 ii = rstart; 3258 for (i=0; i<A->rmap->n/dof; i++) { 3259 PetscCall(MatGetRow_SeqAIJ(MatAIJ,i,&ncols,&cols,&vals)); 3260 PetscCall(MatGetRow_SeqAIJ(MatOAIJ,i,&oncols,&ocols,&ovals)); 3261 for (j=0; j<dof; j++) { 3262 for (k=0; k<ncols; k++) { 3263 icols[k] = cstart + dof*cols[k]+j; 3264 } 3265 for (k=0; k<oncols; k++) { 3266 oicols[k] = dof*garray[ocols[k]]+j; 3267 } 3268 PetscCall(MatSetValues_MPIAIJ(B,1,&ii,ncols,icols,vals,INSERT_VALUES)); 3269 PetscCall(MatSetValues_MPIAIJ(B,1,&ii,oncols,oicols,ovals,INSERT_VALUES)); 3270 ii++; 3271 } 3272 PetscCall(MatRestoreRow_SeqAIJ(MatAIJ,i,&ncols,&cols,&vals)); 3273 PetscCall(MatRestoreRow_SeqAIJ(MatOAIJ,i,&oncols,&ocols,&ovals)); 3274 } 3275 PetscCall(PetscFree2(icols,oicols)); 3276 3277 PetscCall(MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY)); 3278 PetscCall(MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY)); 3279 3280 if (reuse == MAT_INPLACE_MATRIX) { 3281 PetscInt refct = ((PetscObject)A)->refct; /* save ((PetscObject)A)->refct */ 3282 ((PetscObject)A)->refct = 1; 3283 3284 PetscCall(MatHeaderReplace(A,&B)); 3285 3286 ((PetscObject)A)->refct = refct; /* restore ((PetscObject)A)->refct */ 3287 } else { 3288 *newmat = B; 3289 } 3290 PetscFunctionReturn(0); 3291 } 3292 3293 PetscErrorCode MatCreateSubMatrix_MAIJ(Mat mat,IS isrow,IS iscol,MatReuse cll,Mat *newmat) 3294 { 3295 Mat A; 3296 3297 PetscFunctionBegin; 3298 PetscCall(MatConvert(mat,MATAIJ,MAT_INITIAL_MATRIX,&A)); 3299 PetscCall(MatCreateSubMatrix(A,isrow,iscol,cll,newmat)); 3300 PetscCall(MatDestroy(&A)); 3301 PetscFunctionReturn(0); 3302 } 3303 3304 PetscErrorCode MatCreateSubMatrices_MAIJ(Mat mat,PetscInt n,const IS irow[],const IS icol[],MatReuse scall,Mat *submat[]) 3305 { 3306 Mat A; 3307 3308 PetscFunctionBegin; 3309 PetscCall(MatConvert(mat,MATAIJ,MAT_INITIAL_MATRIX,&A)); 3310 PetscCall(MatCreateSubMatrices(A,n,irow,icol,scall,submat)); 3311 PetscCall(MatDestroy(&A)); 3312 PetscFunctionReturn(0); 3313 } 3314 3315 /* ---------------------------------------------------------------------------------- */ 3316 /*@ 3317 MatCreateMAIJ - Creates a matrix type providing restriction and interpolation 3318 operations for multicomponent problems. It interpolates each component the same 3319 way independently. The matrix type is based on MATSEQAIJ for sequential matrices, 3320 and MATMPIAIJ for distributed matrices. 3321 3322 Collective 3323 3324 Input Parameters: 3325 + A - the AIJ matrix describing the action on blocks 3326 - dof - the block size (number of components per node) 3327 3328 Output Parameter: 3329 . maij - the new MAIJ matrix 3330 3331 Operations provided: 3332 .vb 3333 MatMult 3334 MatMultTranspose 3335 MatMultAdd 3336 MatMultTransposeAdd 3337 MatView 3338 .ve 3339 3340 Level: advanced 3341 3342 .seealso: `MatMAIJGetAIJ()`, `MatMAIJRedimension()`, `MATMAIJ` 3343 @*/ 3344 PetscErrorCode MatCreateMAIJ(Mat A,PetscInt dof,Mat *maij) 3345 { 3346 PetscMPIInt size; 3347 PetscInt n; 3348 Mat B; 3349 #if defined(PETSC_HAVE_CUDA) 3350 /* hack to prevent conversion to AIJ format for CUDA when used inside a parallel MAIJ */ 3351 PetscBool convert = dof < 0 ? PETSC_FALSE : PETSC_TRUE; 3352 #endif 3353 3354 PetscFunctionBegin; 3355 dof = PetscAbs(dof); 3356 PetscCall(PetscObjectReference((PetscObject)A)); 3357 3358 if (dof == 1) *maij = A; 3359 else { 3360 PetscCall(MatCreate(PetscObjectComm((PetscObject)A),&B)); 3361 /* propagate vec type */ 3362 PetscCall(MatSetVecType(B,A->defaultvectype)); 3363 PetscCall(MatSetSizes(B,dof*A->rmap->n,dof*A->cmap->n,dof*A->rmap->N,dof*A->cmap->N)); 3364 PetscCall(PetscLayoutSetBlockSize(B->rmap,dof)); 3365 PetscCall(PetscLayoutSetBlockSize(B->cmap,dof)); 3366 PetscCall(PetscLayoutSetUp(B->rmap)); 3367 PetscCall(PetscLayoutSetUp(B->cmap)); 3368 3369 B->assembled = PETSC_TRUE; 3370 3371 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A),&size)); 3372 if (size == 1) { 3373 Mat_SeqMAIJ *b; 3374 3375 PetscCall(MatSetType(B,MATSEQMAIJ)); 3376 3377 B->ops->setup = NULL; 3378 B->ops->destroy = MatDestroy_SeqMAIJ; 3379 B->ops->view = MatView_SeqMAIJ; 3380 3381 b = (Mat_SeqMAIJ*)B->data; 3382 b->dof = dof; 3383 b->AIJ = A; 3384 3385 if (dof == 2) { 3386 B->ops->mult = MatMult_SeqMAIJ_2; 3387 B->ops->multadd = MatMultAdd_SeqMAIJ_2; 3388 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_2; 3389 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_2; 3390 } else if (dof == 3) { 3391 B->ops->mult = MatMult_SeqMAIJ_3; 3392 B->ops->multadd = MatMultAdd_SeqMAIJ_3; 3393 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_3; 3394 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_3; 3395 } else if (dof == 4) { 3396 B->ops->mult = MatMult_SeqMAIJ_4; 3397 B->ops->multadd = MatMultAdd_SeqMAIJ_4; 3398 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_4; 3399 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_4; 3400 } else if (dof == 5) { 3401 B->ops->mult = MatMult_SeqMAIJ_5; 3402 B->ops->multadd = MatMultAdd_SeqMAIJ_5; 3403 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_5; 3404 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_5; 3405 } else if (dof == 6) { 3406 B->ops->mult = MatMult_SeqMAIJ_6; 3407 B->ops->multadd = MatMultAdd_SeqMAIJ_6; 3408 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_6; 3409 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_6; 3410 } else if (dof == 7) { 3411 B->ops->mult = MatMult_SeqMAIJ_7; 3412 B->ops->multadd = MatMultAdd_SeqMAIJ_7; 3413 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_7; 3414 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_7; 3415 } else if (dof == 8) { 3416 B->ops->mult = MatMult_SeqMAIJ_8; 3417 B->ops->multadd = MatMultAdd_SeqMAIJ_8; 3418 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_8; 3419 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_8; 3420 } else if (dof == 9) { 3421 B->ops->mult = MatMult_SeqMAIJ_9; 3422 B->ops->multadd = MatMultAdd_SeqMAIJ_9; 3423 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_9; 3424 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_9; 3425 } else if (dof == 10) { 3426 B->ops->mult = MatMult_SeqMAIJ_10; 3427 B->ops->multadd = MatMultAdd_SeqMAIJ_10; 3428 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_10; 3429 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_10; 3430 } else if (dof == 11) { 3431 B->ops->mult = MatMult_SeqMAIJ_11; 3432 B->ops->multadd = MatMultAdd_SeqMAIJ_11; 3433 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_11; 3434 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_11; 3435 } else if (dof == 16) { 3436 B->ops->mult = MatMult_SeqMAIJ_16; 3437 B->ops->multadd = MatMultAdd_SeqMAIJ_16; 3438 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_16; 3439 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_16; 3440 } else if (dof == 18) { 3441 B->ops->mult = MatMult_SeqMAIJ_18; 3442 B->ops->multadd = MatMultAdd_SeqMAIJ_18; 3443 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_18; 3444 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_18; 3445 } else { 3446 B->ops->mult = MatMult_SeqMAIJ_N; 3447 B->ops->multadd = MatMultAdd_SeqMAIJ_N; 3448 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_N; 3449 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_N; 3450 } 3451 #if defined(PETSC_HAVE_CUDA) 3452 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqmaij_seqaijcusparse_C",MatConvert_SeqMAIJ_SeqAIJ)); 3453 #endif 3454 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqmaij_seqaij_C",MatConvert_SeqMAIJ_SeqAIJ)); 3455 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_seqaij_seqmaij_C",MatProductSetFromOptions_SeqAIJ_SeqMAIJ)); 3456 } else { 3457 Mat_MPIAIJ *mpiaij = (Mat_MPIAIJ*)A->data; 3458 Mat_MPIMAIJ *b; 3459 IS from,to; 3460 Vec gvec; 3461 3462 PetscCall(MatSetType(B,MATMPIMAIJ)); 3463 3464 B->ops->setup = NULL; 3465 B->ops->destroy = MatDestroy_MPIMAIJ; 3466 B->ops->view = MatView_MPIMAIJ; 3467 3468 b = (Mat_MPIMAIJ*)B->data; 3469 b->dof = dof; 3470 b->A = A; 3471 3472 PetscCall(MatCreateMAIJ(mpiaij->A,-dof,&b->AIJ)); 3473 PetscCall(MatCreateMAIJ(mpiaij->B,-dof,&b->OAIJ)); 3474 3475 PetscCall(VecGetSize(mpiaij->lvec,&n)); 3476 PetscCall(VecCreate(PETSC_COMM_SELF,&b->w)); 3477 PetscCall(VecSetSizes(b->w,n*dof,n*dof)); 3478 PetscCall(VecSetBlockSize(b->w,dof)); 3479 PetscCall(VecSetType(b->w,VECSEQ)); 3480 3481 /* create two temporary Index sets for build scatter gather */ 3482 PetscCall(ISCreateBlock(PetscObjectComm((PetscObject)A),dof,n,mpiaij->garray,PETSC_COPY_VALUES,&from)); 3483 PetscCall(ISCreateStride(PETSC_COMM_SELF,n*dof,0,1,&to)); 3484 3485 /* create temporary global vector to generate scatter context */ 3486 PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)A),dof,dof*A->cmap->n,dof*A->cmap->N,NULL,&gvec)); 3487 3488 /* generate the scatter context */ 3489 PetscCall(VecScatterCreate(gvec,from,b->w,to,&b->ctx)); 3490 3491 PetscCall(ISDestroy(&from)); 3492 PetscCall(ISDestroy(&to)); 3493 PetscCall(VecDestroy(&gvec)); 3494 3495 B->ops->mult = MatMult_MPIMAIJ_dof; 3496 B->ops->multtranspose = MatMultTranspose_MPIMAIJ_dof; 3497 B->ops->multadd = MatMultAdd_MPIMAIJ_dof; 3498 B->ops->multtransposeadd = MatMultTransposeAdd_MPIMAIJ_dof; 3499 3500 #if defined(PETSC_HAVE_CUDA) 3501 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpimaij_mpiaijcusparse_C",MatConvert_MPIMAIJ_MPIAIJ)); 3502 #endif 3503 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpimaij_mpiaij_C",MatConvert_MPIMAIJ_MPIAIJ)); 3504 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpiaij_mpimaij_C",MatProductSetFromOptions_MPIAIJ_MPIMAIJ)); 3505 } 3506 B->ops->createsubmatrix = MatCreateSubMatrix_MAIJ; 3507 B->ops->createsubmatrices = MatCreateSubMatrices_MAIJ; 3508 PetscCall(MatSetUp(B)); 3509 #if defined(PETSC_HAVE_CUDA) 3510 /* temporary until we have CUDA implementation of MAIJ */ 3511 { 3512 PetscBool flg; 3513 if (convert) { 3514 PetscCall(PetscObjectTypeCompareAny((PetscObject)A,&flg,MATSEQAIJCUSPARSE,MATMPIAIJCUSPARSE,MATAIJCUSPARSE,"")); 3515 if (flg) { 3516 PetscCall(MatConvert(B,((PetscObject)A)->type_name,MAT_INPLACE_MATRIX,&B)); 3517 } 3518 } 3519 } 3520 #endif 3521 *maij = B; 3522 } 3523 PetscFunctionReturn(0); 3524 } 3525