1 2 /* 3 Defines the basic matrix operations for the MAIJ matrix storage format. 4 This format is used for restriction and interpolation operations for 5 multicomponent problems. It interpolates each component the same way 6 independently. 7 8 We provide: 9 MatMult() 10 MatMultTranspose() 11 MatMultTransposeAdd() 12 MatMultAdd() 13 and 14 MatCreateMAIJ(Mat,dof,Mat*) 15 16 This single directory handles both the sequential and parallel codes 17 */ 18 19 #include <../src/mat/impls/maij/maij.h> /*I "petscmat.h" I*/ 20 #include <../src/mat/utils/freespace.h> 21 22 /*@ 23 MatMAIJGetAIJ - Get the AIJ matrix describing the blockwise action of the MAIJ matrix 24 25 Not Collective, but if the MAIJ matrix is parallel, the AIJ matrix is also parallel 26 27 Input Parameter: 28 . A - the MAIJ matrix 29 30 Output Parameter: 31 . B - the AIJ matrix 32 33 Level: advanced 34 35 Notes: 36 The reference count on the AIJ matrix is not increased so you should not destroy it. 37 38 .seealso: MatCreateMAIJ() 39 @*/ 40 PetscErrorCode MatMAIJGetAIJ(Mat A,Mat *B) 41 { 42 PetscBool ismpimaij,isseqmaij; 43 44 PetscFunctionBegin; 45 PetscCall(PetscObjectTypeCompare((PetscObject)A,MATMPIMAIJ,&ismpimaij)); 46 PetscCall(PetscObjectTypeCompare((PetscObject)A,MATSEQMAIJ,&isseqmaij)); 47 if (ismpimaij) { 48 Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data; 49 50 *B = b->A; 51 } else if (isseqmaij) { 52 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 53 54 *B = b->AIJ; 55 } else { 56 *B = A; 57 } 58 PetscFunctionReturn(0); 59 } 60 61 /*@ 62 MatMAIJRedimension - Get an MAIJ matrix with the same action, but for a different block size 63 64 Logically Collective 65 66 Input Parameters: 67 + A - the MAIJ matrix 68 - dof - the block size for the new matrix 69 70 Output Parameter: 71 . B - the new MAIJ matrix 72 73 Level: advanced 74 75 .seealso: MatCreateMAIJ() 76 @*/ 77 PetscErrorCode MatMAIJRedimension(Mat A,PetscInt dof,Mat *B) 78 { 79 Mat Aij = NULL; 80 81 PetscFunctionBegin; 82 PetscValidLogicalCollectiveInt(A,dof,2); 83 PetscCall(MatMAIJGetAIJ(A,&Aij)); 84 PetscCall(MatCreateMAIJ(Aij,dof,B)); 85 PetscFunctionReturn(0); 86 } 87 88 PetscErrorCode MatDestroy_SeqMAIJ(Mat A) 89 { 90 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 91 92 PetscFunctionBegin; 93 PetscCall(MatDestroy(&b->AIJ)); 94 PetscCall(PetscFree(A->data)); 95 PetscCall(PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqmaij_seqaij_C",NULL)); 96 PetscCall(PetscObjectComposeFunction((PetscObject)A,"MatProductSetFromOptions_seqaij_seqmaij_C",NULL)); 97 PetscFunctionReturn(0); 98 } 99 100 PetscErrorCode MatSetUp_MAIJ(Mat A) 101 { 102 PetscFunctionBegin; 103 SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Must use MatCreateMAIJ() to create MAIJ matrices"); 104 } 105 106 PetscErrorCode MatView_SeqMAIJ(Mat A,PetscViewer viewer) 107 { 108 Mat B; 109 110 PetscFunctionBegin; 111 PetscCall(MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B)); 112 PetscCall(MatView(B,viewer)); 113 PetscCall(MatDestroy(&B)); 114 PetscFunctionReturn(0); 115 } 116 117 PetscErrorCode MatView_MPIMAIJ(Mat A,PetscViewer viewer) 118 { 119 Mat B; 120 121 PetscFunctionBegin; 122 PetscCall(MatConvert(A,MATMPIAIJ,MAT_INITIAL_MATRIX,&B)); 123 PetscCall(MatView(B,viewer)); 124 PetscCall(MatDestroy(&B)); 125 PetscFunctionReturn(0); 126 } 127 128 PetscErrorCode MatDestroy_MPIMAIJ(Mat A) 129 { 130 Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data; 131 132 PetscFunctionBegin; 133 PetscCall(MatDestroy(&b->AIJ)); 134 PetscCall(MatDestroy(&b->OAIJ)); 135 PetscCall(MatDestroy(&b->A)); 136 PetscCall(VecScatterDestroy(&b->ctx)); 137 PetscCall(VecDestroy(&b->w)); 138 PetscCall(PetscFree(A->data)); 139 PetscCall(PetscObjectComposeFunction((PetscObject)A,"MatConvert_mpimaij_mpiaij_C",NULL)); 140 PetscCall(PetscObjectComposeFunction((PetscObject)A,"MatProductSetFromOptions_mpiaij_mpimaij_C",NULL)); 141 PetscCall(PetscObjectChangeTypeName((PetscObject)A,NULL)); 142 PetscFunctionReturn(0); 143 } 144 145 /*MC 146 MATMAIJ - MATMAIJ = "maij" - A matrix type to be used for restriction and interpolation operations for 147 multicomponent problems, interpolating or restricting each component the same way independently. 148 The matrix type is based on MATSEQAIJ for sequential matrices, and MATMPIAIJ for distributed matrices. 149 150 Operations provided: 151 .vb 152 MatMult 153 MatMultTranspose 154 MatMultAdd 155 MatMultTransposeAdd 156 .ve 157 158 Level: advanced 159 160 .seealso: MatMAIJGetAIJ(), MatMAIJRedimension(), MatCreateMAIJ() 161 M*/ 162 163 PETSC_EXTERN PetscErrorCode MatCreate_MAIJ(Mat A) 164 { 165 Mat_MPIMAIJ *b; 166 PetscMPIInt size; 167 168 PetscFunctionBegin; 169 PetscCall(PetscNewLog(A,&b)); 170 A->data = (void*)b; 171 172 PetscCall(PetscMemzero(A->ops,sizeof(struct _MatOps))); 173 174 A->ops->setup = MatSetUp_MAIJ; 175 176 b->AIJ = NULL; 177 b->dof = 0; 178 b->OAIJ = NULL; 179 b->ctx = NULL; 180 b->w = NULL; 181 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A),&size)); 182 if (size == 1) { 183 PetscCall(PetscObjectChangeTypeName((PetscObject)A,MATSEQMAIJ)); 184 } else { 185 PetscCall(PetscObjectChangeTypeName((PetscObject)A,MATMPIMAIJ)); 186 } 187 A->preallocated = PETSC_TRUE; 188 A->assembled = PETSC_TRUE; 189 PetscFunctionReturn(0); 190 } 191 192 /* --------------------------------------------------------------------------------------*/ 193 PetscErrorCode MatMult_SeqMAIJ_2(Mat A,Vec xx,Vec yy) 194 { 195 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 196 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 197 const PetscScalar *x,*v; 198 PetscScalar *y, sum1, sum2; 199 PetscInt nonzerorow=0,n,i,jrow,j; 200 const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 201 202 PetscFunctionBegin; 203 PetscCall(VecGetArrayRead(xx,&x)); 204 PetscCall(VecGetArray(yy,&y)); 205 idx = a->j; 206 v = a->a; 207 ii = a->i; 208 209 for (i=0; i<m; i++) { 210 jrow = ii[i]; 211 n = ii[i+1] - jrow; 212 sum1 = 0.0; 213 sum2 = 0.0; 214 215 nonzerorow += (n>0); 216 for (j=0; j<n; j++) { 217 sum1 += v[jrow]*x[2*idx[jrow]]; 218 sum2 += v[jrow]*x[2*idx[jrow]+1]; 219 jrow++; 220 } 221 y[2*i] = sum1; 222 y[2*i+1] = sum2; 223 } 224 225 PetscCall(PetscLogFlops(4.0*a->nz - 2.0*nonzerorow)); 226 PetscCall(VecRestoreArrayRead(xx,&x)); 227 PetscCall(VecRestoreArray(yy,&y)); 228 PetscFunctionReturn(0); 229 } 230 231 PetscErrorCode MatMultTranspose_SeqMAIJ_2(Mat A,Vec xx,Vec yy) 232 { 233 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 234 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 235 const PetscScalar *x,*v; 236 PetscScalar *y,alpha1,alpha2; 237 PetscInt n,i; 238 const PetscInt m = b->AIJ->rmap->n,*idx; 239 240 PetscFunctionBegin; 241 PetscCall(VecSet(yy,0.0)); 242 PetscCall(VecGetArrayRead(xx,&x)); 243 PetscCall(VecGetArray(yy,&y)); 244 245 for (i=0; i<m; i++) { 246 idx = a->j + a->i[i]; 247 v = a->a + a->i[i]; 248 n = a->i[i+1] - a->i[i]; 249 alpha1 = x[2*i]; 250 alpha2 = x[2*i+1]; 251 while (n-->0) { 252 y[2*(*idx)] += alpha1*(*v); 253 y[2*(*idx)+1] += alpha2*(*v); 254 idx++; v++; 255 } 256 } 257 PetscCall(PetscLogFlops(4.0*a->nz)); 258 PetscCall(VecRestoreArrayRead(xx,&x)); 259 PetscCall(VecRestoreArray(yy,&y)); 260 PetscFunctionReturn(0); 261 } 262 263 PetscErrorCode MatMultAdd_SeqMAIJ_2(Mat A,Vec xx,Vec yy,Vec zz) 264 { 265 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 266 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 267 const PetscScalar *x,*v; 268 PetscScalar *y,sum1, sum2; 269 PetscInt n,i,jrow,j; 270 const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 271 272 PetscFunctionBegin; 273 if (yy != zz) PetscCall(VecCopy(yy,zz)); 274 PetscCall(VecGetArrayRead(xx,&x)); 275 PetscCall(VecGetArray(zz,&y)); 276 idx = a->j; 277 v = a->a; 278 ii = a->i; 279 280 for (i=0; i<m; i++) { 281 jrow = ii[i]; 282 n = ii[i+1] - jrow; 283 sum1 = 0.0; 284 sum2 = 0.0; 285 for (j=0; j<n; j++) { 286 sum1 += v[jrow]*x[2*idx[jrow]]; 287 sum2 += v[jrow]*x[2*idx[jrow]+1]; 288 jrow++; 289 } 290 y[2*i] += sum1; 291 y[2*i+1] += sum2; 292 } 293 294 PetscCall(PetscLogFlops(4.0*a->nz)); 295 PetscCall(VecRestoreArrayRead(xx,&x)); 296 PetscCall(VecRestoreArray(zz,&y)); 297 PetscFunctionReturn(0); 298 } 299 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_2(Mat A,Vec xx,Vec yy,Vec zz) 300 { 301 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 302 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 303 const PetscScalar *x,*v; 304 PetscScalar *y,alpha1,alpha2; 305 PetscInt n,i; 306 const PetscInt m = b->AIJ->rmap->n,*idx; 307 308 PetscFunctionBegin; 309 if (yy != zz) PetscCall(VecCopy(yy,zz)); 310 PetscCall(VecGetArrayRead(xx,&x)); 311 PetscCall(VecGetArray(zz,&y)); 312 313 for (i=0; i<m; i++) { 314 idx = a->j + a->i[i]; 315 v = a->a + a->i[i]; 316 n = a->i[i+1] - a->i[i]; 317 alpha1 = x[2*i]; 318 alpha2 = x[2*i+1]; 319 while (n-->0) { 320 y[2*(*idx)] += alpha1*(*v); 321 y[2*(*idx)+1] += alpha2*(*v); 322 idx++; v++; 323 } 324 } 325 PetscCall(PetscLogFlops(4.0*a->nz)); 326 PetscCall(VecRestoreArrayRead(xx,&x)); 327 PetscCall(VecRestoreArray(zz,&y)); 328 PetscFunctionReturn(0); 329 } 330 /* --------------------------------------------------------------------------------------*/ 331 PetscErrorCode MatMult_SeqMAIJ_3(Mat A,Vec xx,Vec yy) 332 { 333 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 334 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 335 const PetscScalar *x,*v; 336 PetscScalar *y,sum1, sum2, sum3; 337 PetscInt nonzerorow=0,n,i,jrow,j; 338 const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 339 340 PetscFunctionBegin; 341 PetscCall(VecGetArrayRead(xx,&x)); 342 PetscCall(VecGetArray(yy,&y)); 343 idx = a->j; 344 v = a->a; 345 ii = a->i; 346 347 for (i=0; i<m; i++) { 348 jrow = ii[i]; 349 n = ii[i+1] - jrow; 350 sum1 = 0.0; 351 sum2 = 0.0; 352 sum3 = 0.0; 353 354 nonzerorow += (n>0); 355 for (j=0; j<n; j++) { 356 sum1 += v[jrow]*x[3*idx[jrow]]; 357 sum2 += v[jrow]*x[3*idx[jrow]+1]; 358 sum3 += v[jrow]*x[3*idx[jrow]+2]; 359 jrow++; 360 } 361 y[3*i] = sum1; 362 y[3*i+1] = sum2; 363 y[3*i+2] = sum3; 364 } 365 366 PetscCall(PetscLogFlops(6.0*a->nz - 3.0*nonzerorow)); 367 PetscCall(VecRestoreArrayRead(xx,&x)); 368 PetscCall(VecRestoreArray(yy,&y)); 369 PetscFunctionReturn(0); 370 } 371 372 PetscErrorCode MatMultTranspose_SeqMAIJ_3(Mat A,Vec xx,Vec yy) 373 { 374 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 375 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 376 const PetscScalar *x,*v; 377 PetscScalar *y,alpha1,alpha2,alpha3; 378 PetscInt n,i; 379 const PetscInt m = b->AIJ->rmap->n,*idx; 380 381 PetscFunctionBegin; 382 PetscCall(VecSet(yy,0.0)); 383 PetscCall(VecGetArrayRead(xx,&x)); 384 PetscCall(VecGetArray(yy,&y)); 385 386 for (i=0; i<m; i++) { 387 idx = a->j + a->i[i]; 388 v = a->a + a->i[i]; 389 n = a->i[i+1] - a->i[i]; 390 alpha1 = x[3*i]; 391 alpha2 = x[3*i+1]; 392 alpha3 = x[3*i+2]; 393 while (n-->0) { 394 y[3*(*idx)] += alpha1*(*v); 395 y[3*(*idx)+1] += alpha2*(*v); 396 y[3*(*idx)+2] += alpha3*(*v); 397 idx++; v++; 398 } 399 } 400 PetscCall(PetscLogFlops(6.0*a->nz)); 401 PetscCall(VecRestoreArrayRead(xx,&x)); 402 PetscCall(VecRestoreArray(yy,&y)); 403 PetscFunctionReturn(0); 404 } 405 406 PetscErrorCode MatMultAdd_SeqMAIJ_3(Mat A,Vec xx,Vec yy,Vec zz) 407 { 408 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 409 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 410 const PetscScalar *x,*v; 411 PetscScalar *y,sum1, sum2, sum3; 412 PetscInt n,i,jrow,j; 413 const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 414 415 PetscFunctionBegin; 416 if (yy != zz) PetscCall(VecCopy(yy,zz)); 417 PetscCall(VecGetArrayRead(xx,&x)); 418 PetscCall(VecGetArray(zz,&y)); 419 idx = a->j; 420 v = a->a; 421 ii = a->i; 422 423 for (i=0; i<m; i++) { 424 jrow = ii[i]; 425 n = ii[i+1] - jrow; 426 sum1 = 0.0; 427 sum2 = 0.0; 428 sum3 = 0.0; 429 for (j=0; j<n; j++) { 430 sum1 += v[jrow]*x[3*idx[jrow]]; 431 sum2 += v[jrow]*x[3*idx[jrow]+1]; 432 sum3 += v[jrow]*x[3*idx[jrow]+2]; 433 jrow++; 434 } 435 y[3*i] += sum1; 436 y[3*i+1] += sum2; 437 y[3*i+2] += sum3; 438 } 439 440 PetscCall(PetscLogFlops(6.0*a->nz)); 441 PetscCall(VecRestoreArrayRead(xx,&x)); 442 PetscCall(VecRestoreArray(zz,&y)); 443 PetscFunctionReturn(0); 444 } 445 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_3(Mat A,Vec xx,Vec yy,Vec zz) 446 { 447 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 448 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 449 const PetscScalar *x,*v; 450 PetscScalar *y,alpha1,alpha2,alpha3; 451 PetscInt n,i; 452 const PetscInt m = b->AIJ->rmap->n,*idx; 453 454 PetscFunctionBegin; 455 if (yy != zz) PetscCall(VecCopy(yy,zz)); 456 PetscCall(VecGetArrayRead(xx,&x)); 457 PetscCall(VecGetArray(zz,&y)); 458 for (i=0; i<m; i++) { 459 idx = a->j + a->i[i]; 460 v = a->a + a->i[i]; 461 n = a->i[i+1] - a->i[i]; 462 alpha1 = x[3*i]; 463 alpha2 = x[3*i+1]; 464 alpha3 = x[3*i+2]; 465 while (n-->0) { 466 y[3*(*idx)] += alpha1*(*v); 467 y[3*(*idx)+1] += alpha2*(*v); 468 y[3*(*idx)+2] += alpha3*(*v); 469 idx++; v++; 470 } 471 } 472 PetscCall(PetscLogFlops(6.0*a->nz)); 473 PetscCall(VecRestoreArrayRead(xx,&x)); 474 PetscCall(VecRestoreArray(zz,&y)); 475 PetscFunctionReturn(0); 476 } 477 478 /* ------------------------------------------------------------------------------*/ 479 PetscErrorCode MatMult_SeqMAIJ_4(Mat A,Vec xx,Vec yy) 480 { 481 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 482 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 483 const PetscScalar *x,*v; 484 PetscScalar *y,sum1, sum2, sum3, sum4; 485 PetscInt nonzerorow=0,n,i,jrow,j; 486 const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 487 488 PetscFunctionBegin; 489 PetscCall(VecGetArrayRead(xx,&x)); 490 PetscCall(VecGetArray(yy,&y)); 491 idx = a->j; 492 v = a->a; 493 ii = a->i; 494 495 for (i=0; i<m; i++) { 496 jrow = ii[i]; 497 n = ii[i+1] - jrow; 498 sum1 = 0.0; 499 sum2 = 0.0; 500 sum3 = 0.0; 501 sum4 = 0.0; 502 nonzerorow += (n>0); 503 for (j=0; j<n; j++) { 504 sum1 += v[jrow]*x[4*idx[jrow]]; 505 sum2 += v[jrow]*x[4*idx[jrow]+1]; 506 sum3 += v[jrow]*x[4*idx[jrow]+2]; 507 sum4 += v[jrow]*x[4*idx[jrow]+3]; 508 jrow++; 509 } 510 y[4*i] = sum1; 511 y[4*i+1] = sum2; 512 y[4*i+2] = sum3; 513 y[4*i+3] = sum4; 514 } 515 516 PetscCall(PetscLogFlops(8.0*a->nz - 4.0*nonzerorow)); 517 PetscCall(VecRestoreArrayRead(xx,&x)); 518 PetscCall(VecRestoreArray(yy,&y)); 519 PetscFunctionReturn(0); 520 } 521 522 PetscErrorCode MatMultTranspose_SeqMAIJ_4(Mat A,Vec xx,Vec yy) 523 { 524 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 525 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 526 const PetscScalar *x,*v; 527 PetscScalar *y,alpha1,alpha2,alpha3,alpha4; 528 PetscInt n,i; 529 const PetscInt m = b->AIJ->rmap->n,*idx; 530 531 PetscFunctionBegin; 532 PetscCall(VecSet(yy,0.0)); 533 PetscCall(VecGetArrayRead(xx,&x)); 534 PetscCall(VecGetArray(yy,&y)); 535 for (i=0; i<m; i++) { 536 idx = a->j + a->i[i]; 537 v = a->a + a->i[i]; 538 n = a->i[i+1] - a->i[i]; 539 alpha1 = x[4*i]; 540 alpha2 = x[4*i+1]; 541 alpha3 = x[4*i+2]; 542 alpha4 = x[4*i+3]; 543 while (n-->0) { 544 y[4*(*idx)] += alpha1*(*v); 545 y[4*(*idx)+1] += alpha2*(*v); 546 y[4*(*idx)+2] += alpha3*(*v); 547 y[4*(*idx)+3] += alpha4*(*v); 548 idx++; v++; 549 } 550 } 551 PetscCall(PetscLogFlops(8.0*a->nz)); 552 PetscCall(VecRestoreArrayRead(xx,&x)); 553 PetscCall(VecRestoreArray(yy,&y)); 554 PetscFunctionReturn(0); 555 } 556 557 PetscErrorCode MatMultAdd_SeqMAIJ_4(Mat A,Vec xx,Vec yy,Vec zz) 558 { 559 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 560 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 561 const PetscScalar *x,*v; 562 PetscScalar *y,sum1, sum2, sum3, sum4; 563 PetscInt n,i,jrow,j; 564 const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 565 566 PetscFunctionBegin; 567 if (yy != zz) PetscCall(VecCopy(yy,zz)); 568 PetscCall(VecGetArrayRead(xx,&x)); 569 PetscCall(VecGetArray(zz,&y)); 570 idx = a->j; 571 v = a->a; 572 ii = a->i; 573 574 for (i=0; i<m; i++) { 575 jrow = ii[i]; 576 n = ii[i+1] - jrow; 577 sum1 = 0.0; 578 sum2 = 0.0; 579 sum3 = 0.0; 580 sum4 = 0.0; 581 for (j=0; j<n; j++) { 582 sum1 += v[jrow]*x[4*idx[jrow]]; 583 sum2 += v[jrow]*x[4*idx[jrow]+1]; 584 sum3 += v[jrow]*x[4*idx[jrow]+2]; 585 sum4 += v[jrow]*x[4*idx[jrow]+3]; 586 jrow++; 587 } 588 y[4*i] += sum1; 589 y[4*i+1] += sum2; 590 y[4*i+2] += sum3; 591 y[4*i+3] += sum4; 592 } 593 594 PetscCall(PetscLogFlops(8.0*a->nz)); 595 PetscCall(VecRestoreArrayRead(xx,&x)); 596 PetscCall(VecRestoreArray(zz,&y)); 597 PetscFunctionReturn(0); 598 } 599 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_4(Mat A,Vec xx,Vec yy,Vec zz) 600 { 601 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 602 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 603 const PetscScalar *x,*v; 604 PetscScalar *y,alpha1,alpha2,alpha3,alpha4; 605 PetscInt n,i; 606 const PetscInt m = b->AIJ->rmap->n,*idx; 607 608 PetscFunctionBegin; 609 if (yy != zz) PetscCall(VecCopy(yy,zz)); 610 PetscCall(VecGetArrayRead(xx,&x)); 611 PetscCall(VecGetArray(zz,&y)); 612 613 for (i=0; i<m; i++) { 614 idx = a->j + a->i[i]; 615 v = a->a + a->i[i]; 616 n = a->i[i+1] - a->i[i]; 617 alpha1 = x[4*i]; 618 alpha2 = x[4*i+1]; 619 alpha3 = x[4*i+2]; 620 alpha4 = x[4*i+3]; 621 while (n-->0) { 622 y[4*(*idx)] += alpha1*(*v); 623 y[4*(*idx)+1] += alpha2*(*v); 624 y[4*(*idx)+2] += alpha3*(*v); 625 y[4*(*idx)+3] += alpha4*(*v); 626 idx++; v++; 627 } 628 } 629 PetscCall(PetscLogFlops(8.0*a->nz)); 630 PetscCall(VecRestoreArrayRead(xx,&x)); 631 PetscCall(VecRestoreArray(zz,&y)); 632 PetscFunctionReturn(0); 633 } 634 /* ------------------------------------------------------------------------------*/ 635 636 PetscErrorCode MatMult_SeqMAIJ_5(Mat A,Vec xx,Vec yy) 637 { 638 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 639 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 640 const PetscScalar *x,*v; 641 PetscScalar *y,sum1, sum2, sum3, sum4, sum5; 642 PetscInt nonzerorow=0,n,i,jrow,j; 643 const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 644 645 PetscFunctionBegin; 646 PetscCall(VecGetArrayRead(xx,&x)); 647 PetscCall(VecGetArray(yy,&y)); 648 idx = a->j; 649 v = a->a; 650 ii = a->i; 651 652 for (i=0; i<m; i++) { 653 jrow = ii[i]; 654 n = ii[i+1] - jrow; 655 sum1 = 0.0; 656 sum2 = 0.0; 657 sum3 = 0.0; 658 sum4 = 0.0; 659 sum5 = 0.0; 660 661 nonzerorow += (n>0); 662 for (j=0; j<n; j++) { 663 sum1 += v[jrow]*x[5*idx[jrow]]; 664 sum2 += v[jrow]*x[5*idx[jrow]+1]; 665 sum3 += v[jrow]*x[5*idx[jrow]+2]; 666 sum4 += v[jrow]*x[5*idx[jrow]+3]; 667 sum5 += v[jrow]*x[5*idx[jrow]+4]; 668 jrow++; 669 } 670 y[5*i] = sum1; 671 y[5*i+1] = sum2; 672 y[5*i+2] = sum3; 673 y[5*i+3] = sum4; 674 y[5*i+4] = sum5; 675 } 676 677 PetscCall(PetscLogFlops(10.0*a->nz - 5.0*nonzerorow)); 678 PetscCall(VecRestoreArrayRead(xx,&x)); 679 PetscCall(VecRestoreArray(yy,&y)); 680 PetscFunctionReturn(0); 681 } 682 683 PetscErrorCode MatMultTranspose_SeqMAIJ_5(Mat A,Vec xx,Vec yy) 684 { 685 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 686 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 687 const PetscScalar *x,*v; 688 PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5; 689 PetscInt n,i; 690 const PetscInt m = b->AIJ->rmap->n,*idx; 691 692 PetscFunctionBegin; 693 PetscCall(VecSet(yy,0.0)); 694 PetscCall(VecGetArrayRead(xx,&x)); 695 PetscCall(VecGetArray(yy,&y)); 696 697 for (i=0; i<m; i++) { 698 idx = a->j + a->i[i]; 699 v = a->a + a->i[i]; 700 n = a->i[i+1] - a->i[i]; 701 alpha1 = x[5*i]; 702 alpha2 = x[5*i+1]; 703 alpha3 = x[5*i+2]; 704 alpha4 = x[5*i+3]; 705 alpha5 = x[5*i+4]; 706 while (n-->0) { 707 y[5*(*idx)] += alpha1*(*v); 708 y[5*(*idx)+1] += alpha2*(*v); 709 y[5*(*idx)+2] += alpha3*(*v); 710 y[5*(*idx)+3] += alpha4*(*v); 711 y[5*(*idx)+4] += alpha5*(*v); 712 idx++; v++; 713 } 714 } 715 PetscCall(PetscLogFlops(10.0*a->nz)); 716 PetscCall(VecRestoreArrayRead(xx,&x)); 717 PetscCall(VecRestoreArray(yy,&y)); 718 PetscFunctionReturn(0); 719 } 720 721 PetscErrorCode MatMultAdd_SeqMAIJ_5(Mat A,Vec xx,Vec yy,Vec zz) 722 { 723 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 724 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 725 const PetscScalar *x,*v; 726 PetscScalar *y,sum1, sum2, sum3, sum4, sum5; 727 PetscInt n,i,jrow,j; 728 const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 729 730 PetscFunctionBegin; 731 if (yy != zz) PetscCall(VecCopy(yy,zz)); 732 PetscCall(VecGetArrayRead(xx,&x)); 733 PetscCall(VecGetArray(zz,&y)); 734 idx = a->j; 735 v = a->a; 736 ii = a->i; 737 738 for (i=0; i<m; i++) { 739 jrow = ii[i]; 740 n = ii[i+1] - jrow; 741 sum1 = 0.0; 742 sum2 = 0.0; 743 sum3 = 0.0; 744 sum4 = 0.0; 745 sum5 = 0.0; 746 for (j=0; j<n; j++) { 747 sum1 += v[jrow]*x[5*idx[jrow]]; 748 sum2 += v[jrow]*x[5*idx[jrow]+1]; 749 sum3 += v[jrow]*x[5*idx[jrow]+2]; 750 sum4 += v[jrow]*x[5*idx[jrow]+3]; 751 sum5 += v[jrow]*x[5*idx[jrow]+4]; 752 jrow++; 753 } 754 y[5*i] += sum1; 755 y[5*i+1] += sum2; 756 y[5*i+2] += sum3; 757 y[5*i+3] += sum4; 758 y[5*i+4] += sum5; 759 } 760 761 PetscCall(PetscLogFlops(10.0*a->nz)); 762 PetscCall(VecRestoreArrayRead(xx,&x)); 763 PetscCall(VecRestoreArray(zz,&y)); 764 PetscFunctionReturn(0); 765 } 766 767 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_5(Mat A,Vec xx,Vec yy,Vec zz) 768 { 769 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 770 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 771 const PetscScalar *x,*v; 772 PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5; 773 PetscInt n,i; 774 const PetscInt m = b->AIJ->rmap->n,*idx; 775 776 PetscFunctionBegin; 777 if (yy != zz) PetscCall(VecCopy(yy,zz)); 778 PetscCall(VecGetArrayRead(xx,&x)); 779 PetscCall(VecGetArray(zz,&y)); 780 781 for (i=0; i<m; i++) { 782 idx = a->j + a->i[i]; 783 v = a->a + a->i[i]; 784 n = a->i[i+1] - a->i[i]; 785 alpha1 = x[5*i]; 786 alpha2 = x[5*i+1]; 787 alpha3 = x[5*i+2]; 788 alpha4 = x[5*i+3]; 789 alpha5 = x[5*i+4]; 790 while (n-->0) { 791 y[5*(*idx)] += alpha1*(*v); 792 y[5*(*idx)+1] += alpha2*(*v); 793 y[5*(*idx)+2] += alpha3*(*v); 794 y[5*(*idx)+3] += alpha4*(*v); 795 y[5*(*idx)+4] += alpha5*(*v); 796 idx++; v++; 797 } 798 } 799 PetscCall(PetscLogFlops(10.0*a->nz)); 800 PetscCall(VecRestoreArrayRead(xx,&x)); 801 PetscCall(VecRestoreArray(zz,&y)); 802 PetscFunctionReturn(0); 803 } 804 805 /* ------------------------------------------------------------------------------*/ 806 PetscErrorCode MatMult_SeqMAIJ_6(Mat A,Vec xx,Vec yy) 807 { 808 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 809 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 810 const PetscScalar *x,*v; 811 PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6; 812 PetscInt nonzerorow=0,n,i,jrow,j; 813 const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 814 815 PetscFunctionBegin; 816 PetscCall(VecGetArrayRead(xx,&x)); 817 PetscCall(VecGetArray(yy,&y)); 818 idx = a->j; 819 v = a->a; 820 ii = a->i; 821 822 for (i=0; i<m; i++) { 823 jrow = ii[i]; 824 n = ii[i+1] - jrow; 825 sum1 = 0.0; 826 sum2 = 0.0; 827 sum3 = 0.0; 828 sum4 = 0.0; 829 sum5 = 0.0; 830 sum6 = 0.0; 831 832 nonzerorow += (n>0); 833 for (j=0; j<n; j++) { 834 sum1 += v[jrow]*x[6*idx[jrow]]; 835 sum2 += v[jrow]*x[6*idx[jrow]+1]; 836 sum3 += v[jrow]*x[6*idx[jrow]+2]; 837 sum4 += v[jrow]*x[6*idx[jrow]+3]; 838 sum5 += v[jrow]*x[6*idx[jrow]+4]; 839 sum6 += v[jrow]*x[6*idx[jrow]+5]; 840 jrow++; 841 } 842 y[6*i] = sum1; 843 y[6*i+1] = sum2; 844 y[6*i+2] = sum3; 845 y[6*i+3] = sum4; 846 y[6*i+4] = sum5; 847 y[6*i+5] = sum6; 848 } 849 850 PetscCall(PetscLogFlops(12.0*a->nz - 6.0*nonzerorow)); 851 PetscCall(VecRestoreArrayRead(xx,&x)); 852 PetscCall(VecRestoreArray(yy,&y)); 853 PetscFunctionReturn(0); 854 } 855 856 PetscErrorCode MatMultTranspose_SeqMAIJ_6(Mat A,Vec xx,Vec yy) 857 { 858 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 859 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 860 const PetscScalar *x,*v; 861 PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6; 862 PetscInt n,i; 863 const PetscInt m = b->AIJ->rmap->n,*idx; 864 865 PetscFunctionBegin; 866 PetscCall(VecSet(yy,0.0)); 867 PetscCall(VecGetArrayRead(xx,&x)); 868 PetscCall(VecGetArray(yy,&y)); 869 870 for (i=0; i<m; i++) { 871 idx = a->j + a->i[i]; 872 v = a->a + a->i[i]; 873 n = a->i[i+1] - a->i[i]; 874 alpha1 = x[6*i]; 875 alpha2 = x[6*i+1]; 876 alpha3 = x[6*i+2]; 877 alpha4 = x[6*i+3]; 878 alpha5 = x[6*i+4]; 879 alpha6 = x[6*i+5]; 880 while (n-->0) { 881 y[6*(*idx)] += alpha1*(*v); 882 y[6*(*idx)+1] += alpha2*(*v); 883 y[6*(*idx)+2] += alpha3*(*v); 884 y[6*(*idx)+3] += alpha4*(*v); 885 y[6*(*idx)+4] += alpha5*(*v); 886 y[6*(*idx)+5] += alpha6*(*v); 887 idx++; v++; 888 } 889 } 890 PetscCall(PetscLogFlops(12.0*a->nz)); 891 PetscCall(VecRestoreArrayRead(xx,&x)); 892 PetscCall(VecRestoreArray(yy,&y)); 893 PetscFunctionReturn(0); 894 } 895 896 PetscErrorCode MatMultAdd_SeqMAIJ_6(Mat A,Vec xx,Vec yy,Vec zz) 897 { 898 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 899 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 900 const PetscScalar *x,*v; 901 PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6; 902 PetscInt n,i,jrow,j; 903 const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 904 905 PetscFunctionBegin; 906 if (yy != zz) PetscCall(VecCopy(yy,zz)); 907 PetscCall(VecGetArrayRead(xx,&x)); 908 PetscCall(VecGetArray(zz,&y)); 909 idx = a->j; 910 v = a->a; 911 ii = a->i; 912 913 for (i=0; i<m; i++) { 914 jrow = ii[i]; 915 n = ii[i+1] - jrow; 916 sum1 = 0.0; 917 sum2 = 0.0; 918 sum3 = 0.0; 919 sum4 = 0.0; 920 sum5 = 0.0; 921 sum6 = 0.0; 922 for (j=0; j<n; j++) { 923 sum1 += v[jrow]*x[6*idx[jrow]]; 924 sum2 += v[jrow]*x[6*idx[jrow]+1]; 925 sum3 += v[jrow]*x[6*idx[jrow]+2]; 926 sum4 += v[jrow]*x[6*idx[jrow]+3]; 927 sum5 += v[jrow]*x[6*idx[jrow]+4]; 928 sum6 += v[jrow]*x[6*idx[jrow]+5]; 929 jrow++; 930 } 931 y[6*i] += sum1; 932 y[6*i+1] += sum2; 933 y[6*i+2] += sum3; 934 y[6*i+3] += sum4; 935 y[6*i+4] += sum5; 936 y[6*i+5] += sum6; 937 } 938 939 PetscCall(PetscLogFlops(12.0*a->nz)); 940 PetscCall(VecRestoreArrayRead(xx,&x)); 941 PetscCall(VecRestoreArray(zz,&y)); 942 PetscFunctionReturn(0); 943 } 944 945 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_6(Mat A,Vec xx,Vec yy,Vec zz) 946 { 947 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 948 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 949 const PetscScalar *x,*v; 950 PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6; 951 PetscInt n,i; 952 const PetscInt m = b->AIJ->rmap->n,*idx; 953 954 PetscFunctionBegin; 955 if (yy != zz) PetscCall(VecCopy(yy,zz)); 956 PetscCall(VecGetArrayRead(xx,&x)); 957 PetscCall(VecGetArray(zz,&y)); 958 959 for (i=0; i<m; i++) { 960 idx = a->j + a->i[i]; 961 v = a->a + a->i[i]; 962 n = a->i[i+1] - a->i[i]; 963 alpha1 = x[6*i]; 964 alpha2 = x[6*i+1]; 965 alpha3 = x[6*i+2]; 966 alpha4 = x[6*i+3]; 967 alpha5 = x[6*i+4]; 968 alpha6 = x[6*i+5]; 969 while (n-->0) { 970 y[6*(*idx)] += alpha1*(*v); 971 y[6*(*idx)+1] += alpha2*(*v); 972 y[6*(*idx)+2] += alpha3*(*v); 973 y[6*(*idx)+3] += alpha4*(*v); 974 y[6*(*idx)+4] += alpha5*(*v); 975 y[6*(*idx)+5] += alpha6*(*v); 976 idx++; v++; 977 } 978 } 979 PetscCall(PetscLogFlops(12.0*a->nz)); 980 PetscCall(VecRestoreArrayRead(xx,&x)); 981 PetscCall(VecRestoreArray(zz,&y)); 982 PetscFunctionReturn(0); 983 } 984 985 /* ------------------------------------------------------------------------------*/ 986 PetscErrorCode MatMult_SeqMAIJ_7(Mat A,Vec xx,Vec yy) 987 { 988 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 989 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 990 const PetscScalar *x,*v; 991 PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7; 992 const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 993 PetscInt nonzerorow=0,n,i,jrow,j; 994 995 PetscFunctionBegin; 996 PetscCall(VecGetArrayRead(xx,&x)); 997 PetscCall(VecGetArray(yy,&y)); 998 idx = a->j; 999 v = a->a; 1000 ii = a->i; 1001 1002 for (i=0; i<m; i++) { 1003 jrow = ii[i]; 1004 n = ii[i+1] - jrow; 1005 sum1 = 0.0; 1006 sum2 = 0.0; 1007 sum3 = 0.0; 1008 sum4 = 0.0; 1009 sum5 = 0.0; 1010 sum6 = 0.0; 1011 sum7 = 0.0; 1012 1013 nonzerorow += (n>0); 1014 for (j=0; j<n; j++) { 1015 sum1 += v[jrow]*x[7*idx[jrow]]; 1016 sum2 += v[jrow]*x[7*idx[jrow]+1]; 1017 sum3 += v[jrow]*x[7*idx[jrow]+2]; 1018 sum4 += v[jrow]*x[7*idx[jrow]+3]; 1019 sum5 += v[jrow]*x[7*idx[jrow]+4]; 1020 sum6 += v[jrow]*x[7*idx[jrow]+5]; 1021 sum7 += v[jrow]*x[7*idx[jrow]+6]; 1022 jrow++; 1023 } 1024 y[7*i] = sum1; 1025 y[7*i+1] = sum2; 1026 y[7*i+2] = sum3; 1027 y[7*i+3] = sum4; 1028 y[7*i+4] = sum5; 1029 y[7*i+5] = sum6; 1030 y[7*i+6] = sum7; 1031 } 1032 1033 PetscCall(PetscLogFlops(14.0*a->nz - 7.0*nonzerorow)); 1034 PetscCall(VecRestoreArrayRead(xx,&x)); 1035 PetscCall(VecRestoreArray(yy,&y)); 1036 PetscFunctionReturn(0); 1037 } 1038 1039 PetscErrorCode MatMultTranspose_SeqMAIJ_7(Mat A,Vec xx,Vec yy) 1040 { 1041 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1042 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1043 const PetscScalar *x,*v; 1044 PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7; 1045 const PetscInt m = b->AIJ->rmap->n,*idx; 1046 PetscInt n,i; 1047 1048 PetscFunctionBegin; 1049 PetscCall(VecSet(yy,0.0)); 1050 PetscCall(VecGetArrayRead(xx,&x)); 1051 PetscCall(VecGetArray(yy,&y)); 1052 1053 for (i=0; i<m; i++) { 1054 idx = a->j + a->i[i]; 1055 v = a->a + a->i[i]; 1056 n = a->i[i+1] - a->i[i]; 1057 alpha1 = x[7*i]; 1058 alpha2 = x[7*i+1]; 1059 alpha3 = x[7*i+2]; 1060 alpha4 = x[7*i+3]; 1061 alpha5 = x[7*i+4]; 1062 alpha6 = x[7*i+5]; 1063 alpha7 = x[7*i+6]; 1064 while (n-->0) { 1065 y[7*(*idx)] += alpha1*(*v); 1066 y[7*(*idx)+1] += alpha2*(*v); 1067 y[7*(*idx)+2] += alpha3*(*v); 1068 y[7*(*idx)+3] += alpha4*(*v); 1069 y[7*(*idx)+4] += alpha5*(*v); 1070 y[7*(*idx)+5] += alpha6*(*v); 1071 y[7*(*idx)+6] += alpha7*(*v); 1072 idx++; v++; 1073 } 1074 } 1075 PetscCall(PetscLogFlops(14.0*a->nz)); 1076 PetscCall(VecRestoreArrayRead(xx,&x)); 1077 PetscCall(VecRestoreArray(yy,&y)); 1078 PetscFunctionReturn(0); 1079 } 1080 1081 PetscErrorCode MatMultAdd_SeqMAIJ_7(Mat A,Vec xx,Vec yy,Vec zz) 1082 { 1083 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1084 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1085 const PetscScalar *x,*v; 1086 PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7; 1087 const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 1088 PetscInt n,i,jrow,j; 1089 1090 PetscFunctionBegin; 1091 if (yy != zz) PetscCall(VecCopy(yy,zz)); 1092 PetscCall(VecGetArrayRead(xx,&x)); 1093 PetscCall(VecGetArray(zz,&y)); 1094 idx = a->j; 1095 v = a->a; 1096 ii = a->i; 1097 1098 for (i=0; i<m; i++) { 1099 jrow = ii[i]; 1100 n = ii[i+1] - jrow; 1101 sum1 = 0.0; 1102 sum2 = 0.0; 1103 sum3 = 0.0; 1104 sum4 = 0.0; 1105 sum5 = 0.0; 1106 sum6 = 0.0; 1107 sum7 = 0.0; 1108 for (j=0; j<n; j++) { 1109 sum1 += v[jrow]*x[7*idx[jrow]]; 1110 sum2 += v[jrow]*x[7*idx[jrow]+1]; 1111 sum3 += v[jrow]*x[7*idx[jrow]+2]; 1112 sum4 += v[jrow]*x[7*idx[jrow]+3]; 1113 sum5 += v[jrow]*x[7*idx[jrow]+4]; 1114 sum6 += v[jrow]*x[7*idx[jrow]+5]; 1115 sum7 += v[jrow]*x[7*idx[jrow]+6]; 1116 jrow++; 1117 } 1118 y[7*i] += sum1; 1119 y[7*i+1] += sum2; 1120 y[7*i+2] += sum3; 1121 y[7*i+3] += sum4; 1122 y[7*i+4] += sum5; 1123 y[7*i+5] += sum6; 1124 y[7*i+6] += sum7; 1125 } 1126 1127 PetscCall(PetscLogFlops(14.0*a->nz)); 1128 PetscCall(VecRestoreArrayRead(xx,&x)); 1129 PetscCall(VecRestoreArray(zz,&y)); 1130 PetscFunctionReturn(0); 1131 } 1132 1133 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_7(Mat A,Vec xx,Vec yy,Vec zz) 1134 { 1135 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1136 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1137 const PetscScalar *x,*v; 1138 PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7; 1139 const PetscInt m = b->AIJ->rmap->n,*idx; 1140 PetscInt n,i; 1141 1142 PetscFunctionBegin; 1143 if (yy != zz) PetscCall(VecCopy(yy,zz)); 1144 PetscCall(VecGetArrayRead(xx,&x)); 1145 PetscCall(VecGetArray(zz,&y)); 1146 for (i=0; i<m; i++) { 1147 idx = a->j + a->i[i]; 1148 v = a->a + a->i[i]; 1149 n = a->i[i+1] - a->i[i]; 1150 alpha1 = x[7*i]; 1151 alpha2 = x[7*i+1]; 1152 alpha3 = x[7*i+2]; 1153 alpha4 = x[7*i+3]; 1154 alpha5 = x[7*i+4]; 1155 alpha6 = x[7*i+5]; 1156 alpha7 = x[7*i+6]; 1157 while (n-->0) { 1158 y[7*(*idx)] += alpha1*(*v); 1159 y[7*(*idx)+1] += alpha2*(*v); 1160 y[7*(*idx)+2] += alpha3*(*v); 1161 y[7*(*idx)+3] += alpha4*(*v); 1162 y[7*(*idx)+4] += alpha5*(*v); 1163 y[7*(*idx)+5] += alpha6*(*v); 1164 y[7*(*idx)+6] += alpha7*(*v); 1165 idx++; v++; 1166 } 1167 } 1168 PetscCall(PetscLogFlops(14.0*a->nz)); 1169 PetscCall(VecRestoreArrayRead(xx,&x)); 1170 PetscCall(VecRestoreArray(zz,&y)); 1171 PetscFunctionReturn(0); 1172 } 1173 1174 PetscErrorCode MatMult_SeqMAIJ_8(Mat A,Vec xx,Vec yy) 1175 { 1176 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1177 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1178 const PetscScalar *x,*v; 1179 PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8; 1180 const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 1181 PetscInt nonzerorow=0,n,i,jrow,j; 1182 1183 PetscFunctionBegin; 1184 PetscCall(VecGetArrayRead(xx,&x)); 1185 PetscCall(VecGetArray(yy,&y)); 1186 idx = a->j; 1187 v = a->a; 1188 ii = a->i; 1189 1190 for (i=0; i<m; i++) { 1191 jrow = ii[i]; 1192 n = ii[i+1] - jrow; 1193 sum1 = 0.0; 1194 sum2 = 0.0; 1195 sum3 = 0.0; 1196 sum4 = 0.0; 1197 sum5 = 0.0; 1198 sum6 = 0.0; 1199 sum7 = 0.0; 1200 sum8 = 0.0; 1201 1202 nonzerorow += (n>0); 1203 for (j=0; j<n; j++) { 1204 sum1 += v[jrow]*x[8*idx[jrow]]; 1205 sum2 += v[jrow]*x[8*idx[jrow]+1]; 1206 sum3 += v[jrow]*x[8*idx[jrow]+2]; 1207 sum4 += v[jrow]*x[8*idx[jrow]+3]; 1208 sum5 += v[jrow]*x[8*idx[jrow]+4]; 1209 sum6 += v[jrow]*x[8*idx[jrow]+5]; 1210 sum7 += v[jrow]*x[8*idx[jrow]+6]; 1211 sum8 += v[jrow]*x[8*idx[jrow]+7]; 1212 jrow++; 1213 } 1214 y[8*i] = sum1; 1215 y[8*i+1] = sum2; 1216 y[8*i+2] = sum3; 1217 y[8*i+3] = sum4; 1218 y[8*i+4] = sum5; 1219 y[8*i+5] = sum6; 1220 y[8*i+6] = sum7; 1221 y[8*i+7] = sum8; 1222 } 1223 1224 PetscCall(PetscLogFlops(16.0*a->nz - 8.0*nonzerorow)); 1225 PetscCall(VecRestoreArrayRead(xx,&x)); 1226 PetscCall(VecRestoreArray(yy,&y)); 1227 PetscFunctionReturn(0); 1228 } 1229 1230 PetscErrorCode MatMultTranspose_SeqMAIJ_8(Mat A,Vec xx,Vec yy) 1231 { 1232 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1233 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1234 const PetscScalar *x,*v; 1235 PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8; 1236 const PetscInt m = b->AIJ->rmap->n,*idx; 1237 PetscInt n,i; 1238 1239 PetscFunctionBegin; 1240 PetscCall(VecSet(yy,0.0)); 1241 PetscCall(VecGetArrayRead(xx,&x)); 1242 PetscCall(VecGetArray(yy,&y)); 1243 1244 for (i=0; i<m; i++) { 1245 idx = a->j + a->i[i]; 1246 v = a->a + a->i[i]; 1247 n = a->i[i+1] - a->i[i]; 1248 alpha1 = x[8*i]; 1249 alpha2 = x[8*i+1]; 1250 alpha3 = x[8*i+2]; 1251 alpha4 = x[8*i+3]; 1252 alpha5 = x[8*i+4]; 1253 alpha6 = x[8*i+5]; 1254 alpha7 = x[8*i+6]; 1255 alpha8 = x[8*i+7]; 1256 while (n-->0) { 1257 y[8*(*idx)] += alpha1*(*v); 1258 y[8*(*idx)+1] += alpha2*(*v); 1259 y[8*(*idx)+2] += alpha3*(*v); 1260 y[8*(*idx)+3] += alpha4*(*v); 1261 y[8*(*idx)+4] += alpha5*(*v); 1262 y[8*(*idx)+5] += alpha6*(*v); 1263 y[8*(*idx)+6] += alpha7*(*v); 1264 y[8*(*idx)+7] += alpha8*(*v); 1265 idx++; v++; 1266 } 1267 } 1268 PetscCall(PetscLogFlops(16.0*a->nz)); 1269 PetscCall(VecRestoreArrayRead(xx,&x)); 1270 PetscCall(VecRestoreArray(yy,&y)); 1271 PetscFunctionReturn(0); 1272 } 1273 1274 PetscErrorCode MatMultAdd_SeqMAIJ_8(Mat A,Vec xx,Vec yy,Vec zz) 1275 { 1276 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1277 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1278 const PetscScalar *x,*v; 1279 PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8; 1280 const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 1281 PetscInt n,i,jrow,j; 1282 1283 PetscFunctionBegin; 1284 if (yy != zz) PetscCall(VecCopy(yy,zz)); 1285 PetscCall(VecGetArrayRead(xx,&x)); 1286 PetscCall(VecGetArray(zz,&y)); 1287 idx = a->j; 1288 v = a->a; 1289 ii = a->i; 1290 1291 for (i=0; i<m; i++) { 1292 jrow = ii[i]; 1293 n = ii[i+1] - jrow; 1294 sum1 = 0.0; 1295 sum2 = 0.0; 1296 sum3 = 0.0; 1297 sum4 = 0.0; 1298 sum5 = 0.0; 1299 sum6 = 0.0; 1300 sum7 = 0.0; 1301 sum8 = 0.0; 1302 for (j=0; j<n; j++) { 1303 sum1 += v[jrow]*x[8*idx[jrow]]; 1304 sum2 += v[jrow]*x[8*idx[jrow]+1]; 1305 sum3 += v[jrow]*x[8*idx[jrow]+2]; 1306 sum4 += v[jrow]*x[8*idx[jrow]+3]; 1307 sum5 += v[jrow]*x[8*idx[jrow]+4]; 1308 sum6 += v[jrow]*x[8*idx[jrow]+5]; 1309 sum7 += v[jrow]*x[8*idx[jrow]+6]; 1310 sum8 += v[jrow]*x[8*idx[jrow]+7]; 1311 jrow++; 1312 } 1313 y[8*i] += sum1; 1314 y[8*i+1] += sum2; 1315 y[8*i+2] += sum3; 1316 y[8*i+3] += sum4; 1317 y[8*i+4] += sum5; 1318 y[8*i+5] += sum6; 1319 y[8*i+6] += sum7; 1320 y[8*i+7] += sum8; 1321 } 1322 1323 PetscCall(PetscLogFlops(16.0*a->nz)); 1324 PetscCall(VecRestoreArrayRead(xx,&x)); 1325 PetscCall(VecRestoreArray(zz,&y)); 1326 PetscFunctionReturn(0); 1327 } 1328 1329 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_8(Mat A,Vec xx,Vec yy,Vec zz) 1330 { 1331 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1332 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1333 const PetscScalar *x,*v; 1334 PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8; 1335 const PetscInt m = b->AIJ->rmap->n,*idx; 1336 PetscInt n,i; 1337 1338 PetscFunctionBegin; 1339 if (yy != zz) PetscCall(VecCopy(yy,zz)); 1340 PetscCall(VecGetArrayRead(xx,&x)); 1341 PetscCall(VecGetArray(zz,&y)); 1342 for (i=0; i<m; i++) { 1343 idx = a->j + a->i[i]; 1344 v = a->a + a->i[i]; 1345 n = a->i[i+1] - a->i[i]; 1346 alpha1 = x[8*i]; 1347 alpha2 = x[8*i+1]; 1348 alpha3 = x[8*i+2]; 1349 alpha4 = x[8*i+3]; 1350 alpha5 = x[8*i+4]; 1351 alpha6 = x[8*i+5]; 1352 alpha7 = x[8*i+6]; 1353 alpha8 = x[8*i+7]; 1354 while (n-->0) { 1355 y[8*(*idx)] += alpha1*(*v); 1356 y[8*(*idx)+1] += alpha2*(*v); 1357 y[8*(*idx)+2] += alpha3*(*v); 1358 y[8*(*idx)+3] += alpha4*(*v); 1359 y[8*(*idx)+4] += alpha5*(*v); 1360 y[8*(*idx)+5] += alpha6*(*v); 1361 y[8*(*idx)+6] += alpha7*(*v); 1362 y[8*(*idx)+7] += alpha8*(*v); 1363 idx++; v++; 1364 } 1365 } 1366 PetscCall(PetscLogFlops(16.0*a->nz)); 1367 PetscCall(VecRestoreArrayRead(xx,&x)); 1368 PetscCall(VecRestoreArray(zz,&y)); 1369 PetscFunctionReturn(0); 1370 } 1371 1372 /* ------------------------------------------------------------------------------*/ 1373 PetscErrorCode MatMult_SeqMAIJ_9(Mat A,Vec xx,Vec yy) 1374 { 1375 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1376 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1377 const PetscScalar *x,*v; 1378 PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9; 1379 const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 1380 PetscInt nonzerorow=0,n,i,jrow,j; 1381 1382 PetscFunctionBegin; 1383 PetscCall(VecGetArrayRead(xx,&x)); 1384 PetscCall(VecGetArray(yy,&y)); 1385 idx = a->j; 1386 v = a->a; 1387 ii = a->i; 1388 1389 for (i=0; i<m; i++) { 1390 jrow = ii[i]; 1391 n = ii[i+1] - jrow; 1392 sum1 = 0.0; 1393 sum2 = 0.0; 1394 sum3 = 0.0; 1395 sum4 = 0.0; 1396 sum5 = 0.0; 1397 sum6 = 0.0; 1398 sum7 = 0.0; 1399 sum8 = 0.0; 1400 sum9 = 0.0; 1401 1402 nonzerorow += (n>0); 1403 for (j=0; j<n; j++) { 1404 sum1 += v[jrow]*x[9*idx[jrow]]; 1405 sum2 += v[jrow]*x[9*idx[jrow]+1]; 1406 sum3 += v[jrow]*x[9*idx[jrow]+2]; 1407 sum4 += v[jrow]*x[9*idx[jrow]+3]; 1408 sum5 += v[jrow]*x[9*idx[jrow]+4]; 1409 sum6 += v[jrow]*x[9*idx[jrow]+5]; 1410 sum7 += v[jrow]*x[9*idx[jrow]+6]; 1411 sum8 += v[jrow]*x[9*idx[jrow]+7]; 1412 sum9 += v[jrow]*x[9*idx[jrow]+8]; 1413 jrow++; 1414 } 1415 y[9*i] = sum1; 1416 y[9*i+1] = sum2; 1417 y[9*i+2] = sum3; 1418 y[9*i+3] = sum4; 1419 y[9*i+4] = sum5; 1420 y[9*i+5] = sum6; 1421 y[9*i+6] = sum7; 1422 y[9*i+7] = sum8; 1423 y[9*i+8] = sum9; 1424 } 1425 1426 PetscCall(PetscLogFlops(18.0*a->nz - 9*nonzerorow)); 1427 PetscCall(VecRestoreArrayRead(xx,&x)); 1428 PetscCall(VecRestoreArray(yy,&y)); 1429 PetscFunctionReturn(0); 1430 } 1431 1432 /* ------------------------------------------------------------------------------*/ 1433 1434 PetscErrorCode MatMultTranspose_SeqMAIJ_9(Mat A,Vec xx,Vec yy) 1435 { 1436 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1437 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1438 const PetscScalar *x,*v; 1439 PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9; 1440 const PetscInt m = b->AIJ->rmap->n,*idx; 1441 PetscInt n,i; 1442 1443 PetscFunctionBegin; 1444 PetscCall(VecSet(yy,0.0)); 1445 PetscCall(VecGetArrayRead(xx,&x)); 1446 PetscCall(VecGetArray(yy,&y)); 1447 1448 for (i=0; i<m; i++) { 1449 idx = a->j + a->i[i]; 1450 v = a->a + a->i[i]; 1451 n = a->i[i+1] - a->i[i]; 1452 alpha1 = x[9*i]; 1453 alpha2 = x[9*i+1]; 1454 alpha3 = x[9*i+2]; 1455 alpha4 = x[9*i+3]; 1456 alpha5 = x[9*i+4]; 1457 alpha6 = x[9*i+5]; 1458 alpha7 = x[9*i+6]; 1459 alpha8 = x[9*i+7]; 1460 alpha9 = x[9*i+8]; 1461 while (n-->0) { 1462 y[9*(*idx)] += alpha1*(*v); 1463 y[9*(*idx)+1] += alpha2*(*v); 1464 y[9*(*idx)+2] += alpha3*(*v); 1465 y[9*(*idx)+3] += alpha4*(*v); 1466 y[9*(*idx)+4] += alpha5*(*v); 1467 y[9*(*idx)+5] += alpha6*(*v); 1468 y[9*(*idx)+6] += alpha7*(*v); 1469 y[9*(*idx)+7] += alpha8*(*v); 1470 y[9*(*idx)+8] += alpha9*(*v); 1471 idx++; v++; 1472 } 1473 } 1474 PetscCall(PetscLogFlops(18.0*a->nz)); 1475 PetscCall(VecRestoreArrayRead(xx,&x)); 1476 PetscCall(VecRestoreArray(yy,&y)); 1477 PetscFunctionReturn(0); 1478 } 1479 1480 PetscErrorCode MatMultAdd_SeqMAIJ_9(Mat A,Vec xx,Vec yy,Vec zz) 1481 { 1482 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1483 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1484 const PetscScalar *x,*v; 1485 PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9; 1486 const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 1487 PetscInt n,i,jrow,j; 1488 1489 PetscFunctionBegin; 1490 if (yy != zz) PetscCall(VecCopy(yy,zz)); 1491 PetscCall(VecGetArrayRead(xx,&x)); 1492 PetscCall(VecGetArray(zz,&y)); 1493 idx = a->j; 1494 v = a->a; 1495 ii = a->i; 1496 1497 for (i=0; i<m; i++) { 1498 jrow = ii[i]; 1499 n = ii[i+1] - jrow; 1500 sum1 = 0.0; 1501 sum2 = 0.0; 1502 sum3 = 0.0; 1503 sum4 = 0.0; 1504 sum5 = 0.0; 1505 sum6 = 0.0; 1506 sum7 = 0.0; 1507 sum8 = 0.0; 1508 sum9 = 0.0; 1509 for (j=0; j<n; j++) { 1510 sum1 += v[jrow]*x[9*idx[jrow]]; 1511 sum2 += v[jrow]*x[9*idx[jrow]+1]; 1512 sum3 += v[jrow]*x[9*idx[jrow]+2]; 1513 sum4 += v[jrow]*x[9*idx[jrow]+3]; 1514 sum5 += v[jrow]*x[9*idx[jrow]+4]; 1515 sum6 += v[jrow]*x[9*idx[jrow]+5]; 1516 sum7 += v[jrow]*x[9*idx[jrow]+6]; 1517 sum8 += v[jrow]*x[9*idx[jrow]+7]; 1518 sum9 += v[jrow]*x[9*idx[jrow]+8]; 1519 jrow++; 1520 } 1521 y[9*i] += sum1; 1522 y[9*i+1] += sum2; 1523 y[9*i+2] += sum3; 1524 y[9*i+3] += sum4; 1525 y[9*i+4] += sum5; 1526 y[9*i+5] += sum6; 1527 y[9*i+6] += sum7; 1528 y[9*i+7] += sum8; 1529 y[9*i+8] += sum9; 1530 } 1531 1532 PetscCall(PetscLogFlops(18.0*a->nz)); 1533 PetscCall(VecRestoreArrayRead(xx,&x)); 1534 PetscCall(VecRestoreArray(zz,&y)); 1535 PetscFunctionReturn(0); 1536 } 1537 1538 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_9(Mat A,Vec xx,Vec yy,Vec zz) 1539 { 1540 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1541 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1542 const PetscScalar *x,*v; 1543 PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9; 1544 const PetscInt m = b->AIJ->rmap->n,*idx; 1545 PetscInt n,i; 1546 1547 PetscFunctionBegin; 1548 if (yy != zz) PetscCall(VecCopy(yy,zz)); 1549 PetscCall(VecGetArrayRead(xx,&x)); 1550 PetscCall(VecGetArray(zz,&y)); 1551 for (i=0; i<m; i++) { 1552 idx = a->j + a->i[i]; 1553 v = a->a + a->i[i]; 1554 n = a->i[i+1] - a->i[i]; 1555 alpha1 = x[9*i]; 1556 alpha2 = x[9*i+1]; 1557 alpha3 = x[9*i+2]; 1558 alpha4 = x[9*i+3]; 1559 alpha5 = x[9*i+4]; 1560 alpha6 = x[9*i+5]; 1561 alpha7 = x[9*i+6]; 1562 alpha8 = x[9*i+7]; 1563 alpha9 = x[9*i+8]; 1564 while (n-->0) { 1565 y[9*(*idx)] += alpha1*(*v); 1566 y[9*(*idx)+1] += alpha2*(*v); 1567 y[9*(*idx)+2] += alpha3*(*v); 1568 y[9*(*idx)+3] += alpha4*(*v); 1569 y[9*(*idx)+4] += alpha5*(*v); 1570 y[9*(*idx)+5] += alpha6*(*v); 1571 y[9*(*idx)+6] += alpha7*(*v); 1572 y[9*(*idx)+7] += alpha8*(*v); 1573 y[9*(*idx)+8] += alpha9*(*v); 1574 idx++; v++; 1575 } 1576 } 1577 PetscCall(PetscLogFlops(18.0*a->nz)); 1578 PetscCall(VecRestoreArrayRead(xx,&x)); 1579 PetscCall(VecRestoreArray(zz,&y)); 1580 PetscFunctionReturn(0); 1581 } 1582 PetscErrorCode MatMult_SeqMAIJ_10(Mat A,Vec xx,Vec yy) 1583 { 1584 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1585 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1586 const PetscScalar *x,*v; 1587 PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9, sum10; 1588 const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 1589 PetscInt nonzerorow=0,n,i,jrow,j; 1590 1591 PetscFunctionBegin; 1592 PetscCall(VecGetArrayRead(xx,&x)); 1593 PetscCall(VecGetArray(yy,&y)); 1594 idx = a->j; 1595 v = a->a; 1596 ii = a->i; 1597 1598 for (i=0; i<m; i++) { 1599 jrow = ii[i]; 1600 n = ii[i+1] - jrow; 1601 sum1 = 0.0; 1602 sum2 = 0.0; 1603 sum3 = 0.0; 1604 sum4 = 0.0; 1605 sum5 = 0.0; 1606 sum6 = 0.0; 1607 sum7 = 0.0; 1608 sum8 = 0.0; 1609 sum9 = 0.0; 1610 sum10 = 0.0; 1611 1612 nonzerorow += (n>0); 1613 for (j=0; j<n; j++) { 1614 sum1 += v[jrow]*x[10*idx[jrow]]; 1615 sum2 += v[jrow]*x[10*idx[jrow]+1]; 1616 sum3 += v[jrow]*x[10*idx[jrow]+2]; 1617 sum4 += v[jrow]*x[10*idx[jrow]+3]; 1618 sum5 += v[jrow]*x[10*idx[jrow]+4]; 1619 sum6 += v[jrow]*x[10*idx[jrow]+5]; 1620 sum7 += v[jrow]*x[10*idx[jrow]+6]; 1621 sum8 += v[jrow]*x[10*idx[jrow]+7]; 1622 sum9 += v[jrow]*x[10*idx[jrow]+8]; 1623 sum10 += v[jrow]*x[10*idx[jrow]+9]; 1624 jrow++; 1625 } 1626 y[10*i] = sum1; 1627 y[10*i+1] = sum2; 1628 y[10*i+2] = sum3; 1629 y[10*i+3] = sum4; 1630 y[10*i+4] = sum5; 1631 y[10*i+5] = sum6; 1632 y[10*i+6] = sum7; 1633 y[10*i+7] = sum8; 1634 y[10*i+8] = sum9; 1635 y[10*i+9] = sum10; 1636 } 1637 1638 PetscCall(PetscLogFlops(20.0*a->nz - 10.0*nonzerorow)); 1639 PetscCall(VecRestoreArrayRead(xx,&x)); 1640 PetscCall(VecRestoreArray(yy,&y)); 1641 PetscFunctionReturn(0); 1642 } 1643 1644 PetscErrorCode MatMultAdd_SeqMAIJ_10(Mat A,Vec xx,Vec yy,Vec zz) 1645 { 1646 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1647 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1648 const PetscScalar *x,*v; 1649 PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9, sum10; 1650 const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 1651 PetscInt n,i,jrow,j; 1652 1653 PetscFunctionBegin; 1654 if (yy != zz) PetscCall(VecCopy(yy,zz)); 1655 PetscCall(VecGetArrayRead(xx,&x)); 1656 PetscCall(VecGetArray(zz,&y)); 1657 idx = a->j; 1658 v = a->a; 1659 ii = a->i; 1660 1661 for (i=0; i<m; i++) { 1662 jrow = ii[i]; 1663 n = ii[i+1] - jrow; 1664 sum1 = 0.0; 1665 sum2 = 0.0; 1666 sum3 = 0.0; 1667 sum4 = 0.0; 1668 sum5 = 0.0; 1669 sum6 = 0.0; 1670 sum7 = 0.0; 1671 sum8 = 0.0; 1672 sum9 = 0.0; 1673 sum10 = 0.0; 1674 for (j=0; j<n; j++) { 1675 sum1 += v[jrow]*x[10*idx[jrow]]; 1676 sum2 += v[jrow]*x[10*idx[jrow]+1]; 1677 sum3 += v[jrow]*x[10*idx[jrow]+2]; 1678 sum4 += v[jrow]*x[10*idx[jrow]+3]; 1679 sum5 += v[jrow]*x[10*idx[jrow]+4]; 1680 sum6 += v[jrow]*x[10*idx[jrow]+5]; 1681 sum7 += v[jrow]*x[10*idx[jrow]+6]; 1682 sum8 += v[jrow]*x[10*idx[jrow]+7]; 1683 sum9 += v[jrow]*x[10*idx[jrow]+8]; 1684 sum10 += v[jrow]*x[10*idx[jrow]+9]; 1685 jrow++; 1686 } 1687 y[10*i] += sum1; 1688 y[10*i+1] += sum2; 1689 y[10*i+2] += sum3; 1690 y[10*i+3] += sum4; 1691 y[10*i+4] += sum5; 1692 y[10*i+5] += sum6; 1693 y[10*i+6] += sum7; 1694 y[10*i+7] += sum8; 1695 y[10*i+8] += sum9; 1696 y[10*i+9] += sum10; 1697 } 1698 1699 PetscCall(PetscLogFlops(20.0*a->nz)); 1700 PetscCall(VecRestoreArrayRead(xx,&x)); 1701 PetscCall(VecRestoreArray(yy,&y)); 1702 PetscFunctionReturn(0); 1703 } 1704 1705 PetscErrorCode MatMultTranspose_SeqMAIJ_10(Mat A,Vec xx,Vec yy) 1706 { 1707 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1708 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1709 const PetscScalar *x,*v; 1710 PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9,alpha10; 1711 const PetscInt m = b->AIJ->rmap->n,*idx; 1712 PetscInt n,i; 1713 1714 PetscFunctionBegin; 1715 PetscCall(VecSet(yy,0.0)); 1716 PetscCall(VecGetArrayRead(xx,&x)); 1717 PetscCall(VecGetArray(yy,&y)); 1718 1719 for (i=0; i<m; i++) { 1720 idx = a->j + a->i[i]; 1721 v = a->a + a->i[i]; 1722 n = a->i[i+1] - a->i[i]; 1723 alpha1 = x[10*i]; 1724 alpha2 = x[10*i+1]; 1725 alpha3 = x[10*i+2]; 1726 alpha4 = x[10*i+3]; 1727 alpha5 = x[10*i+4]; 1728 alpha6 = x[10*i+5]; 1729 alpha7 = x[10*i+6]; 1730 alpha8 = x[10*i+7]; 1731 alpha9 = x[10*i+8]; 1732 alpha10 = x[10*i+9]; 1733 while (n-->0) { 1734 y[10*(*idx)] += alpha1*(*v); 1735 y[10*(*idx)+1] += alpha2*(*v); 1736 y[10*(*idx)+2] += alpha3*(*v); 1737 y[10*(*idx)+3] += alpha4*(*v); 1738 y[10*(*idx)+4] += alpha5*(*v); 1739 y[10*(*idx)+5] += alpha6*(*v); 1740 y[10*(*idx)+6] += alpha7*(*v); 1741 y[10*(*idx)+7] += alpha8*(*v); 1742 y[10*(*idx)+8] += alpha9*(*v); 1743 y[10*(*idx)+9] += alpha10*(*v); 1744 idx++; v++; 1745 } 1746 } 1747 PetscCall(PetscLogFlops(20.0*a->nz)); 1748 PetscCall(VecRestoreArrayRead(xx,&x)); 1749 PetscCall(VecRestoreArray(yy,&y)); 1750 PetscFunctionReturn(0); 1751 } 1752 1753 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_10(Mat A,Vec xx,Vec yy,Vec zz) 1754 { 1755 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1756 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1757 const PetscScalar *x,*v; 1758 PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9,alpha10; 1759 const PetscInt m = b->AIJ->rmap->n,*idx; 1760 PetscInt n,i; 1761 1762 PetscFunctionBegin; 1763 if (yy != zz) PetscCall(VecCopy(yy,zz)); 1764 PetscCall(VecGetArrayRead(xx,&x)); 1765 PetscCall(VecGetArray(zz,&y)); 1766 for (i=0; i<m; i++) { 1767 idx = a->j + a->i[i]; 1768 v = a->a + a->i[i]; 1769 n = a->i[i+1] - a->i[i]; 1770 alpha1 = x[10*i]; 1771 alpha2 = x[10*i+1]; 1772 alpha3 = x[10*i+2]; 1773 alpha4 = x[10*i+3]; 1774 alpha5 = x[10*i+4]; 1775 alpha6 = x[10*i+5]; 1776 alpha7 = x[10*i+6]; 1777 alpha8 = x[10*i+7]; 1778 alpha9 = x[10*i+8]; 1779 alpha10 = x[10*i+9]; 1780 while (n-->0) { 1781 y[10*(*idx)] += alpha1*(*v); 1782 y[10*(*idx)+1] += alpha2*(*v); 1783 y[10*(*idx)+2] += alpha3*(*v); 1784 y[10*(*idx)+3] += alpha4*(*v); 1785 y[10*(*idx)+4] += alpha5*(*v); 1786 y[10*(*idx)+5] += alpha6*(*v); 1787 y[10*(*idx)+6] += alpha7*(*v); 1788 y[10*(*idx)+7] += alpha8*(*v); 1789 y[10*(*idx)+8] += alpha9*(*v); 1790 y[10*(*idx)+9] += alpha10*(*v); 1791 idx++; v++; 1792 } 1793 } 1794 PetscCall(PetscLogFlops(20.0*a->nz)); 1795 PetscCall(VecRestoreArrayRead(xx,&x)); 1796 PetscCall(VecRestoreArray(zz,&y)); 1797 PetscFunctionReturn(0); 1798 } 1799 1800 /*--------------------------------------------------------------------------------------------*/ 1801 PetscErrorCode MatMult_SeqMAIJ_11(Mat A,Vec xx,Vec yy) 1802 { 1803 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1804 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1805 const PetscScalar *x,*v; 1806 PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9, sum10, sum11; 1807 const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 1808 PetscInt nonzerorow=0,n,i,jrow,j; 1809 1810 PetscFunctionBegin; 1811 PetscCall(VecGetArrayRead(xx,&x)); 1812 PetscCall(VecGetArray(yy,&y)); 1813 idx = a->j; 1814 v = a->a; 1815 ii = a->i; 1816 1817 for (i=0; i<m; i++) { 1818 jrow = ii[i]; 1819 n = ii[i+1] - jrow; 1820 sum1 = 0.0; 1821 sum2 = 0.0; 1822 sum3 = 0.0; 1823 sum4 = 0.0; 1824 sum5 = 0.0; 1825 sum6 = 0.0; 1826 sum7 = 0.0; 1827 sum8 = 0.0; 1828 sum9 = 0.0; 1829 sum10 = 0.0; 1830 sum11 = 0.0; 1831 1832 nonzerorow += (n>0); 1833 for (j=0; j<n; j++) { 1834 sum1 += v[jrow]*x[11*idx[jrow]]; 1835 sum2 += v[jrow]*x[11*idx[jrow]+1]; 1836 sum3 += v[jrow]*x[11*idx[jrow]+2]; 1837 sum4 += v[jrow]*x[11*idx[jrow]+3]; 1838 sum5 += v[jrow]*x[11*idx[jrow]+4]; 1839 sum6 += v[jrow]*x[11*idx[jrow]+5]; 1840 sum7 += v[jrow]*x[11*idx[jrow]+6]; 1841 sum8 += v[jrow]*x[11*idx[jrow]+7]; 1842 sum9 += v[jrow]*x[11*idx[jrow]+8]; 1843 sum10 += v[jrow]*x[11*idx[jrow]+9]; 1844 sum11 += v[jrow]*x[11*idx[jrow]+10]; 1845 jrow++; 1846 } 1847 y[11*i] = sum1; 1848 y[11*i+1] = sum2; 1849 y[11*i+2] = sum3; 1850 y[11*i+3] = sum4; 1851 y[11*i+4] = sum5; 1852 y[11*i+5] = sum6; 1853 y[11*i+6] = sum7; 1854 y[11*i+7] = sum8; 1855 y[11*i+8] = sum9; 1856 y[11*i+9] = sum10; 1857 y[11*i+10] = sum11; 1858 } 1859 1860 PetscCall(PetscLogFlops(22.0*a->nz - 11*nonzerorow)); 1861 PetscCall(VecRestoreArrayRead(xx,&x)); 1862 PetscCall(VecRestoreArray(yy,&y)); 1863 PetscFunctionReturn(0); 1864 } 1865 1866 PetscErrorCode MatMultAdd_SeqMAIJ_11(Mat A,Vec xx,Vec yy,Vec zz) 1867 { 1868 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1869 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1870 const PetscScalar *x,*v; 1871 PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9, sum10, sum11; 1872 const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 1873 PetscInt n,i,jrow,j; 1874 1875 PetscFunctionBegin; 1876 if (yy != zz) PetscCall(VecCopy(yy,zz)); 1877 PetscCall(VecGetArrayRead(xx,&x)); 1878 PetscCall(VecGetArray(zz,&y)); 1879 idx = a->j; 1880 v = a->a; 1881 ii = a->i; 1882 1883 for (i=0; i<m; i++) { 1884 jrow = ii[i]; 1885 n = ii[i+1] - jrow; 1886 sum1 = 0.0; 1887 sum2 = 0.0; 1888 sum3 = 0.0; 1889 sum4 = 0.0; 1890 sum5 = 0.0; 1891 sum6 = 0.0; 1892 sum7 = 0.0; 1893 sum8 = 0.0; 1894 sum9 = 0.0; 1895 sum10 = 0.0; 1896 sum11 = 0.0; 1897 for (j=0; j<n; j++) { 1898 sum1 += v[jrow]*x[11*idx[jrow]]; 1899 sum2 += v[jrow]*x[11*idx[jrow]+1]; 1900 sum3 += v[jrow]*x[11*idx[jrow]+2]; 1901 sum4 += v[jrow]*x[11*idx[jrow]+3]; 1902 sum5 += v[jrow]*x[11*idx[jrow]+4]; 1903 sum6 += v[jrow]*x[11*idx[jrow]+5]; 1904 sum7 += v[jrow]*x[11*idx[jrow]+6]; 1905 sum8 += v[jrow]*x[11*idx[jrow]+7]; 1906 sum9 += v[jrow]*x[11*idx[jrow]+8]; 1907 sum10 += v[jrow]*x[11*idx[jrow]+9]; 1908 sum11 += v[jrow]*x[11*idx[jrow]+10]; 1909 jrow++; 1910 } 1911 y[11*i] += sum1; 1912 y[11*i+1] += sum2; 1913 y[11*i+2] += sum3; 1914 y[11*i+3] += sum4; 1915 y[11*i+4] += sum5; 1916 y[11*i+5] += sum6; 1917 y[11*i+6] += sum7; 1918 y[11*i+7] += sum8; 1919 y[11*i+8] += sum9; 1920 y[11*i+9] += sum10; 1921 y[11*i+10] += sum11; 1922 } 1923 1924 PetscCall(PetscLogFlops(22.0*a->nz)); 1925 PetscCall(VecRestoreArrayRead(xx,&x)); 1926 PetscCall(VecRestoreArray(yy,&y)); 1927 PetscFunctionReturn(0); 1928 } 1929 1930 PetscErrorCode MatMultTranspose_SeqMAIJ_11(Mat A,Vec xx,Vec yy) 1931 { 1932 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1933 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1934 const PetscScalar *x,*v; 1935 PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9,alpha10,alpha11; 1936 const PetscInt m = b->AIJ->rmap->n,*idx; 1937 PetscInt n,i; 1938 1939 PetscFunctionBegin; 1940 PetscCall(VecSet(yy,0.0)); 1941 PetscCall(VecGetArrayRead(xx,&x)); 1942 PetscCall(VecGetArray(yy,&y)); 1943 1944 for (i=0; i<m; i++) { 1945 idx = a->j + a->i[i]; 1946 v = a->a + a->i[i]; 1947 n = a->i[i+1] - a->i[i]; 1948 alpha1 = x[11*i]; 1949 alpha2 = x[11*i+1]; 1950 alpha3 = x[11*i+2]; 1951 alpha4 = x[11*i+3]; 1952 alpha5 = x[11*i+4]; 1953 alpha6 = x[11*i+5]; 1954 alpha7 = x[11*i+6]; 1955 alpha8 = x[11*i+7]; 1956 alpha9 = x[11*i+8]; 1957 alpha10 = x[11*i+9]; 1958 alpha11 = x[11*i+10]; 1959 while (n-->0) { 1960 y[11*(*idx)] += alpha1*(*v); 1961 y[11*(*idx)+1] += alpha2*(*v); 1962 y[11*(*idx)+2] += alpha3*(*v); 1963 y[11*(*idx)+3] += alpha4*(*v); 1964 y[11*(*idx)+4] += alpha5*(*v); 1965 y[11*(*idx)+5] += alpha6*(*v); 1966 y[11*(*idx)+6] += alpha7*(*v); 1967 y[11*(*idx)+7] += alpha8*(*v); 1968 y[11*(*idx)+8] += alpha9*(*v); 1969 y[11*(*idx)+9] += alpha10*(*v); 1970 y[11*(*idx)+10] += alpha11*(*v); 1971 idx++; v++; 1972 } 1973 } 1974 PetscCall(PetscLogFlops(22.0*a->nz)); 1975 PetscCall(VecRestoreArrayRead(xx,&x)); 1976 PetscCall(VecRestoreArray(yy,&y)); 1977 PetscFunctionReturn(0); 1978 } 1979 1980 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_11(Mat A,Vec xx,Vec yy,Vec zz) 1981 { 1982 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1983 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1984 const PetscScalar *x,*v; 1985 PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9,alpha10,alpha11; 1986 const PetscInt m = b->AIJ->rmap->n,*idx; 1987 PetscInt n,i; 1988 1989 PetscFunctionBegin; 1990 if (yy != zz) PetscCall(VecCopy(yy,zz)); 1991 PetscCall(VecGetArrayRead(xx,&x)); 1992 PetscCall(VecGetArray(zz,&y)); 1993 for (i=0; i<m; i++) { 1994 idx = a->j + a->i[i]; 1995 v = a->a + a->i[i]; 1996 n = a->i[i+1] - a->i[i]; 1997 alpha1 = x[11*i]; 1998 alpha2 = x[11*i+1]; 1999 alpha3 = x[11*i+2]; 2000 alpha4 = x[11*i+3]; 2001 alpha5 = x[11*i+4]; 2002 alpha6 = x[11*i+5]; 2003 alpha7 = x[11*i+6]; 2004 alpha8 = x[11*i+7]; 2005 alpha9 = x[11*i+8]; 2006 alpha10 = x[11*i+9]; 2007 alpha11 = x[11*i+10]; 2008 while (n-->0) { 2009 y[11*(*idx)] += alpha1*(*v); 2010 y[11*(*idx)+1] += alpha2*(*v); 2011 y[11*(*idx)+2] += alpha3*(*v); 2012 y[11*(*idx)+3] += alpha4*(*v); 2013 y[11*(*idx)+4] += alpha5*(*v); 2014 y[11*(*idx)+5] += alpha6*(*v); 2015 y[11*(*idx)+6] += alpha7*(*v); 2016 y[11*(*idx)+7] += alpha8*(*v); 2017 y[11*(*idx)+8] += alpha9*(*v); 2018 y[11*(*idx)+9] += alpha10*(*v); 2019 y[11*(*idx)+10] += alpha11*(*v); 2020 idx++; v++; 2021 } 2022 } 2023 PetscCall(PetscLogFlops(22.0*a->nz)); 2024 PetscCall(VecRestoreArrayRead(xx,&x)); 2025 PetscCall(VecRestoreArray(zz,&y)); 2026 PetscFunctionReturn(0); 2027 } 2028 2029 /*--------------------------------------------------------------------------------------------*/ 2030 PetscErrorCode MatMult_SeqMAIJ_16(Mat A,Vec xx,Vec yy) 2031 { 2032 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 2033 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 2034 const PetscScalar *x,*v; 2035 PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8; 2036 PetscScalar sum9, sum10, sum11, sum12, sum13, sum14, sum15, sum16; 2037 const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 2038 PetscInt nonzerorow=0,n,i,jrow,j; 2039 2040 PetscFunctionBegin; 2041 PetscCall(VecGetArrayRead(xx,&x)); 2042 PetscCall(VecGetArray(yy,&y)); 2043 idx = a->j; 2044 v = a->a; 2045 ii = a->i; 2046 2047 for (i=0; i<m; i++) { 2048 jrow = ii[i]; 2049 n = ii[i+1] - jrow; 2050 sum1 = 0.0; 2051 sum2 = 0.0; 2052 sum3 = 0.0; 2053 sum4 = 0.0; 2054 sum5 = 0.0; 2055 sum6 = 0.0; 2056 sum7 = 0.0; 2057 sum8 = 0.0; 2058 sum9 = 0.0; 2059 sum10 = 0.0; 2060 sum11 = 0.0; 2061 sum12 = 0.0; 2062 sum13 = 0.0; 2063 sum14 = 0.0; 2064 sum15 = 0.0; 2065 sum16 = 0.0; 2066 2067 nonzerorow += (n>0); 2068 for (j=0; j<n; j++) { 2069 sum1 += v[jrow]*x[16*idx[jrow]]; 2070 sum2 += v[jrow]*x[16*idx[jrow]+1]; 2071 sum3 += v[jrow]*x[16*idx[jrow]+2]; 2072 sum4 += v[jrow]*x[16*idx[jrow]+3]; 2073 sum5 += v[jrow]*x[16*idx[jrow]+4]; 2074 sum6 += v[jrow]*x[16*idx[jrow]+5]; 2075 sum7 += v[jrow]*x[16*idx[jrow]+6]; 2076 sum8 += v[jrow]*x[16*idx[jrow]+7]; 2077 sum9 += v[jrow]*x[16*idx[jrow]+8]; 2078 sum10 += v[jrow]*x[16*idx[jrow]+9]; 2079 sum11 += v[jrow]*x[16*idx[jrow]+10]; 2080 sum12 += v[jrow]*x[16*idx[jrow]+11]; 2081 sum13 += v[jrow]*x[16*idx[jrow]+12]; 2082 sum14 += v[jrow]*x[16*idx[jrow]+13]; 2083 sum15 += v[jrow]*x[16*idx[jrow]+14]; 2084 sum16 += v[jrow]*x[16*idx[jrow]+15]; 2085 jrow++; 2086 } 2087 y[16*i] = sum1; 2088 y[16*i+1] = sum2; 2089 y[16*i+2] = sum3; 2090 y[16*i+3] = sum4; 2091 y[16*i+4] = sum5; 2092 y[16*i+5] = sum6; 2093 y[16*i+6] = sum7; 2094 y[16*i+7] = sum8; 2095 y[16*i+8] = sum9; 2096 y[16*i+9] = sum10; 2097 y[16*i+10] = sum11; 2098 y[16*i+11] = sum12; 2099 y[16*i+12] = sum13; 2100 y[16*i+13] = sum14; 2101 y[16*i+14] = sum15; 2102 y[16*i+15] = sum16; 2103 } 2104 2105 PetscCall(PetscLogFlops(32.0*a->nz - 16.0*nonzerorow)); 2106 PetscCall(VecRestoreArrayRead(xx,&x)); 2107 PetscCall(VecRestoreArray(yy,&y)); 2108 PetscFunctionReturn(0); 2109 } 2110 2111 PetscErrorCode MatMultTranspose_SeqMAIJ_16(Mat A,Vec xx,Vec yy) 2112 { 2113 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 2114 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 2115 const PetscScalar *x,*v; 2116 PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8; 2117 PetscScalar alpha9,alpha10,alpha11,alpha12,alpha13,alpha14,alpha15,alpha16; 2118 const PetscInt m = b->AIJ->rmap->n,*idx; 2119 PetscInt n,i; 2120 2121 PetscFunctionBegin; 2122 PetscCall(VecSet(yy,0.0)); 2123 PetscCall(VecGetArrayRead(xx,&x)); 2124 PetscCall(VecGetArray(yy,&y)); 2125 2126 for (i=0; i<m; i++) { 2127 idx = a->j + a->i[i]; 2128 v = a->a + a->i[i]; 2129 n = a->i[i+1] - a->i[i]; 2130 alpha1 = x[16*i]; 2131 alpha2 = x[16*i+1]; 2132 alpha3 = x[16*i+2]; 2133 alpha4 = x[16*i+3]; 2134 alpha5 = x[16*i+4]; 2135 alpha6 = x[16*i+5]; 2136 alpha7 = x[16*i+6]; 2137 alpha8 = x[16*i+7]; 2138 alpha9 = x[16*i+8]; 2139 alpha10 = x[16*i+9]; 2140 alpha11 = x[16*i+10]; 2141 alpha12 = x[16*i+11]; 2142 alpha13 = x[16*i+12]; 2143 alpha14 = x[16*i+13]; 2144 alpha15 = x[16*i+14]; 2145 alpha16 = x[16*i+15]; 2146 while (n-->0) { 2147 y[16*(*idx)] += alpha1*(*v); 2148 y[16*(*idx)+1] += alpha2*(*v); 2149 y[16*(*idx)+2] += alpha3*(*v); 2150 y[16*(*idx)+3] += alpha4*(*v); 2151 y[16*(*idx)+4] += alpha5*(*v); 2152 y[16*(*idx)+5] += alpha6*(*v); 2153 y[16*(*idx)+6] += alpha7*(*v); 2154 y[16*(*idx)+7] += alpha8*(*v); 2155 y[16*(*idx)+8] += alpha9*(*v); 2156 y[16*(*idx)+9] += alpha10*(*v); 2157 y[16*(*idx)+10] += alpha11*(*v); 2158 y[16*(*idx)+11] += alpha12*(*v); 2159 y[16*(*idx)+12] += alpha13*(*v); 2160 y[16*(*idx)+13] += alpha14*(*v); 2161 y[16*(*idx)+14] += alpha15*(*v); 2162 y[16*(*idx)+15] += alpha16*(*v); 2163 idx++; v++; 2164 } 2165 } 2166 PetscCall(PetscLogFlops(32.0*a->nz)); 2167 PetscCall(VecRestoreArrayRead(xx,&x)); 2168 PetscCall(VecRestoreArray(yy,&y)); 2169 PetscFunctionReturn(0); 2170 } 2171 2172 PetscErrorCode MatMultAdd_SeqMAIJ_16(Mat A,Vec xx,Vec yy,Vec zz) 2173 { 2174 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 2175 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 2176 const PetscScalar *x,*v; 2177 PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8; 2178 PetscScalar sum9, sum10, sum11, sum12, sum13, sum14, sum15, sum16; 2179 const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 2180 PetscInt n,i,jrow,j; 2181 2182 PetscFunctionBegin; 2183 if (yy != zz) PetscCall(VecCopy(yy,zz)); 2184 PetscCall(VecGetArrayRead(xx,&x)); 2185 PetscCall(VecGetArray(zz,&y)); 2186 idx = a->j; 2187 v = a->a; 2188 ii = a->i; 2189 2190 for (i=0; i<m; i++) { 2191 jrow = ii[i]; 2192 n = ii[i+1] - jrow; 2193 sum1 = 0.0; 2194 sum2 = 0.0; 2195 sum3 = 0.0; 2196 sum4 = 0.0; 2197 sum5 = 0.0; 2198 sum6 = 0.0; 2199 sum7 = 0.0; 2200 sum8 = 0.0; 2201 sum9 = 0.0; 2202 sum10 = 0.0; 2203 sum11 = 0.0; 2204 sum12 = 0.0; 2205 sum13 = 0.0; 2206 sum14 = 0.0; 2207 sum15 = 0.0; 2208 sum16 = 0.0; 2209 for (j=0; j<n; j++) { 2210 sum1 += v[jrow]*x[16*idx[jrow]]; 2211 sum2 += v[jrow]*x[16*idx[jrow]+1]; 2212 sum3 += v[jrow]*x[16*idx[jrow]+2]; 2213 sum4 += v[jrow]*x[16*idx[jrow]+3]; 2214 sum5 += v[jrow]*x[16*idx[jrow]+4]; 2215 sum6 += v[jrow]*x[16*idx[jrow]+5]; 2216 sum7 += v[jrow]*x[16*idx[jrow]+6]; 2217 sum8 += v[jrow]*x[16*idx[jrow]+7]; 2218 sum9 += v[jrow]*x[16*idx[jrow]+8]; 2219 sum10 += v[jrow]*x[16*idx[jrow]+9]; 2220 sum11 += v[jrow]*x[16*idx[jrow]+10]; 2221 sum12 += v[jrow]*x[16*idx[jrow]+11]; 2222 sum13 += v[jrow]*x[16*idx[jrow]+12]; 2223 sum14 += v[jrow]*x[16*idx[jrow]+13]; 2224 sum15 += v[jrow]*x[16*idx[jrow]+14]; 2225 sum16 += v[jrow]*x[16*idx[jrow]+15]; 2226 jrow++; 2227 } 2228 y[16*i] += sum1; 2229 y[16*i+1] += sum2; 2230 y[16*i+2] += sum3; 2231 y[16*i+3] += sum4; 2232 y[16*i+4] += sum5; 2233 y[16*i+5] += sum6; 2234 y[16*i+6] += sum7; 2235 y[16*i+7] += sum8; 2236 y[16*i+8] += sum9; 2237 y[16*i+9] += sum10; 2238 y[16*i+10] += sum11; 2239 y[16*i+11] += sum12; 2240 y[16*i+12] += sum13; 2241 y[16*i+13] += sum14; 2242 y[16*i+14] += sum15; 2243 y[16*i+15] += sum16; 2244 } 2245 2246 PetscCall(PetscLogFlops(32.0*a->nz)); 2247 PetscCall(VecRestoreArrayRead(xx,&x)); 2248 PetscCall(VecRestoreArray(zz,&y)); 2249 PetscFunctionReturn(0); 2250 } 2251 2252 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_16(Mat A,Vec xx,Vec yy,Vec zz) 2253 { 2254 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 2255 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 2256 const PetscScalar *x,*v; 2257 PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8; 2258 PetscScalar alpha9,alpha10,alpha11,alpha12,alpha13,alpha14,alpha15,alpha16; 2259 const PetscInt m = b->AIJ->rmap->n,*idx; 2260 PetscInt n,i; 2261 2262 PetscFunctionBegin; 2263 if (yy != zz) PetscCall(VecCopy(yy,zz)); 2264 PetscCall(VecGetArrayRead(xx,&x)); 2265 PetscCall(VecGetArray(zz,&y)); 2266 for (i=0; i<m; i++) { 2267 idx = a->j + a->i[i]; 2268 v = a->a + a->i[i]; 2269 n = a->i[i+1] - a->i[i]; 2270 alpha1 = x[16*i]; 2271 alpha2 = x[16*i+1]; 2272 alpha3 = x[16*i+2]; 2273 alpha4 = x[16*i+3]; 2274 alpha5 = x[16*i+4]; 2275 alpha6 = x[16*i+5]; 2276 alpha7 = x[16*i+6]; 2277 alpha8 = x[16*i+7]; 2278 alpha9 = x[16*i+8]; 2279 alpha10 = x[16*i+9]; 2280 alpha11 = x[16*i+10]; 2281 alpha12 = x[16*i+11]; 2282 alpha13 = x[16*i+12]; 2283 alpha14 = x[16*i+13]; 2284 alpha15 = x[16*i+14]; 2285 alpha16 = x[16*i+15]; 2286 while (n-->0) { 2287 y[16*(*idx)] += alpha1*(*v); 2288 y[16*(*idx)+1] += alpha2*(*v); 2289 y[16*(*idx)+2] += alpha3*(*v); 2290 y[16*(*idx)+3] += alpha4*(*v); 2291 y[16*(*idx)+4] += alpha5*(*v); 2292 y[16*(*idx)+5] += alpha6*(*v); 2293 y[16*(*idx)+6] += alpha7*(*v); 2294 y[16*(*idx)+7] += alpha8*(*v); 2295 y[16*(*idx)+8] += alpha9*(*v); 2296 y[16*(*idx)+9] += alpha10*(*v); 2297 y[16*(*idx)+10] += alpha11*(*v); 2298 y[16*(*idx)+11] += alpha12*(*v); 2299 y[16*(*idx)+12] += alpha13*(*v); 2300 y[16*(*idx)+13] += alpha14*(*v); 2301 y[16*(*idx)+14] += alpha15*(*v); 2302 y[16*(*idx)+15] += alpha16*(*v); 2303 idx++; v++; 2304 } 2305 } 2306 PetscCall(PetscLogFlops(32.0*a->nz)); 2307 PetscCall(VecRestoreArrayRead(xx,&x)); 2308 PetscCall(VecRestoreArray(zz,&y)); 2309 PetscFunctionReturn(0); 2310 } 2311 2312 /*--------------------------------------------------------------------------------------------*/ 2313 PetscErrorCode MatMult_SeqMAIJ_18(Mat A,Vec xx,Vec yy) 2314 { 2315 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 2316 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 2317 const PetscScalar *x,*v; 2318 PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8; 2319 PetscScalar sum9, sum10, sum11, sum12, sum13, sum14, sum15, sum16, sum17, sum18; 2320 const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 2321 PetscInt nonzerorow=0,n,i,jrow,j; 2322 2323 PetscFunctionBegin; 2324 PetscCall(VecGetArrayRead(xx,&x)); 2325 PetscCall(VecGetArray(yy,&y)); 2326 idx = a->j; 2327 v = a->a; 2328 ii = a->i; 2329 2330 for (i=0; i<m; i++) { 2331 jrow = ii[i]; 2332 n = ii[i+1] - jrow; 2333 sum1 = 0.0; 2334 sum2 = 0.0; 2335 sum3 = 0.0; 2336 sum4 = 0.0; 2337 sum5 = 0.0; 2338 sum6 = 0.0; 2339 sum7 = 0.0; 2340 sum8 = 0.0; 2341 sum9 = 0.0; 2342 sum10 = 0.0; 2343 sum11 = 0.0; 2344 sum12 = 0.0; 2345 sum13 = 0.0; 2346 sum14 = 0.0; 2347 sum15 = 0.0; 2348 sum16 = 0.0; 2349 sum17 = 0.0; 2350 sum18 = 0.0; 2351 2352 nonzerorow += (n>0); 2353 for (j=0; j<n; j++) { 2354 sum1 += v[jrow]*x[18*idx[jrow]]; 2355 sum2 += v[jrow]*x[18*idx[jrow]+1]; 2356 sum3 += v[jrow]*x[18*idx[jrow]+2]; 2357 sum4 += v[jrow]*x[18*idx[jrow]+3]; 2358 sum5 += v[jrow]*x[18*idx[jrow]+4]; 2359 sum6 += v[jrow]*x[18*idx[jrow]+5]; 2360 sum7 += v[jrow]*x[18*idx[jrow]+6]; 2361 sum8 += v[jrow]*x[18*idx[jrow]+7]; 2362 sum9 += v[jrow]*x[18*idx[jrow]+8]; 2363 sum10 += v[jrow]*x[18*idx[jrow]+9]; 2364 sum11 += v[jrow]*x[18*idx[jrow]+10]; 2365 sum12 += v[jrow]*x[18*idx[jrow]+11]; 2366 sum13 += v[jrow]*x[18*idx[jrow]+12]; 2367 sum14 += v[jrow]*x[18*idx[jrow]+13]; 2368 sum15 += v[jrow]*x[18*idx[jrow]+14]; 2369 sum16 += v[jrow]*x[18*idx[jrow]+15]; 2370 sum17 += v[jrow]*x[18*idx[jrow]+16]; 2371 sum18 += v[jrow]*x[18*idx[jrow]+17]; 2372 jrow++; 2373 } 2374 y[18*i] = sum1; 2375 y[18*i+1] = sum2; 2376 y[18*i+2] = sum3; 2377 y[18*i+3] = sum4; 2378 y[18*i+4] = sum5; 2379 y[18*i+5] = sum6; 2380 y[18*i+6] = sum7; 2381 y[18*i+7] = sum8; 2382 y[18*i+8] = sum9; 2383 y[18*i+9] = sum10; 2384 y[18*i+10] = sum11; 2385 y[18*i+11] = sum12; 2386 y[18*i+12] = sum13; 2387 y[18*i+13] = sum14; 2388 y[18*i+14] = sum15; 2389 y[18*i+15] = sum16; 2390 y[18*i+16] = sum17; 2391 y[18*i+17] = sum18; 2392 } 2393 2394 PetscCall(PetscLogFlops(36.0*a->nz - 18.0*nonzerorow)); 2395 PetscCall(VecRestoreArrayRead(xx,&x)); 2396 PetscCall(VecRestoreArray(yy,&y)); 2397 PetscFunctionReturn(0); 2398 } 2399 2400 PetscErrorCode MatMultTranspose_SeqMAIJ_18(Mat A,Vec xx,Vec yy) 2401 { 2402 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 2403 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 2404 const PetscScalar *x,*v; 2405 PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8; 2406 PetscScalar alpha9,alpha10,alpha11,alpha12,alpha13,alpha14,alpha15,alpha16,alpha17,alpha18; 2407 const PetscInt m = b->AIJ->rmap->n,*idx; 2408 PetscInt n,i; 2409 2410 PetscFunctionBegin; 2411 PetscCall(VecSet(yy,0.0)); 2412 PetscCall(VecGetArrayRead(xx,&x)); 2413 PetscCall(VecGetArray(yy,&y)); 2414 2415 for (i=0; i<m; i++) { 2416 idx = a->j + a->i[i]; 2417 v = a->a + a->i[i]; 2418 n = a->i[i+1] - a->i[i]; 2419 alpha1 = x[18*i]; 2420 alpha2 = x[18*i+1]; 2421 alpha3 = x[18*i+2]; 2422 alpha4 = x[18*i+3]; 2423 alpha5 = x[18*i+4]; 2424 alpha6 = x[18*i+5]; 2425 alpha7 = x[18*i+6]; 2426 alpha8 = x[18*i+7]; 2427 alpha9 = x[18*i+8]; 2428 alpha10 = x[18*i+9]; 2429 alpha11 = x[18*i+10]; 2430 alpha12 = x[18*i+11]; 2431 alpha13 = x[18*i+12]; 2432 alpha14 = x[18*i+13]; 2433 alpha15 = x[18*i+14]; 2434 alpha16 = x[18*i+15]; 2435 alpha17 = x[18*i+16]; 2436 alpha18 = x[18*i+17]; 2437 while (n-->0) { 2438 y[18*(*idx)] += alpha1*(*v); 2439 y[18*(*idx)+1] += alpha2*(*v); 2440 y[18*(*idx)+2] += alpha3*(*v); 2441 y[18*(*idx)+3] += alpha4*(*v); 2442 y[18*(*idx)+4] += alpha5*(*v); 2443 y[18*(*idx)+5] += alpha6*(*v); 2444 y[18*(*idx)+6] += alpha7*(*v); 2445 y[18*(*idx)+7] += alpha8*(*v); 2446 y[18*(*idx)+8] += alpha9*(*v); 2447 y[18*(*idx)+9] += alpha10*(*v); 2448 y[18*(*idx)+10] += alpha11*(*v); 2449 y[18*(*idx)+11] += alpha12*(*v); 2450 y[18*(*idx)+12] += alpha13*(*v); 2451 y[18*(*idx)+13] += alpha14*(*v); 2452 y[18*(*idx)+14] += alpha15*(*v); 2453 y[18*(*idx)+15] += alpha16*(*v); 2454 y[18*(*idx)+16] += alpha17*(*v); 2455 y[18*(*idx)+17] += alpha18*(*v); 2456 idx++; v++; 2457 } 2458 } 2459 PetscCall(PetscLogFlops(36.0*a->nz)); 2460 PetscCall(VecRestoreArrayRead(xx,&x)); 2461 PetscCall(VecRestoreArray(yy,&y)); 2462 PetscFunctionReturn(0); 2463 } 2464 2465 PetscErrorCode MatMultAdd_SeqMAIJ_18(Mat A,Vec xx,Vec yy,Vec zz) 2466 { 2467 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 2468 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 2469 const PetscScalar *x,*v; 2470 PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8; 2471 PetscScalar sum9, sum10, sum11, sum12, sum13, sum14, sum15, sum16, sum17, sum18; 2472 const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 2473 PetscInt n,i,jrow,j; 2474 2475 PetscFunctionBegin; 2476 if (yy != zz) PetscCall(VecCopy(yy,zz)); 2477 PetscCall(VecGetArrayRead(xx,&x)); 2478 PetscCall(VecGetArray(zz,&y)); 2479 idx = a->j; 2480 v = a->a; 2481 ii = a->i; 2482 2483 for (i=0; i<m; i++) { 2484 jrow = ii[i]; 2485 n = ii[i+1] - jrow; 2486 sum1 = 0.0; 2487 sum2 = 0.0; 2488 sum3 = 0.0; 2489 sum4 = 0.0; 2490 sum5 = 0.0; 2491 sum6 = 0.0; 2492 sum7 = 0.0; 2493 sum8 = 0.0; 2494 sum9 = 0.0; 2495 sum10 = 0.0; 2496 sum11 = 0.0; 2497 sum12 = 0.0; 2498 sum13 = 0.0; 2499 sum14 = 0.0; 2500 sum15 = 0.0; 2501 sum16 = 0.0; 2502 sum17 = 0.0; 2503 sum18 = 0.0; 2504 for (j=0; j<n; j++) { 2505 sum1 += v[jrow]*x[18*idx[jrow]]; 2506 sum2 += v[jrow]*x[18*idx[jrow]+1]; 2507 sum3 += v[jrow]*x[18*idx[jrow]+2]; 2508 sum4 += v[jrow]*x[18*idx[jrow]+3]; 2509 sum5 += v[jrow]*x[18*idx[jrow]+4]; 2510 sum6 += v[jrow]*x[18*idx[jrow]+5]; 2511 sum7 += v[jrow]*x[18*idx[jrow]+6]; 2512 sum8 += v[jrow]*x[18*idx[jrow]+7]; 2513 sum9 += v[jrow]*x[18*idx[jrow]+8]; 2514 sum10 += v[jrow]*x[18*idx[jrow]+9]; 2515 sum11 += v[jrow]*x[18*idx[jrow]+10]; 2516 sum12 += v[jrow]*x[18*idx[jrow]+11]; 2517 sum13 += v[jrow]*x[18*idx[jrow]+12]; 2518 sum14 += v[jrow]*x[18*idx[jrow]+13]; 2519 sum15 += v[jrow]*x[18*idx[jrow]+14]; 2520 sum16 += v[jrow]*x[18*idx[jrow]+15]; 2521 sum17 += v[jrow]*x[18*idx[jrow]+16]; 2522 sum18 += v[jrow]*x[18*idx[jrow]+17]; 2523 jrow++; 2524 } 2525 y[18*i] += sum1; 2526 y[18*i+1] += sum2; 2527 y[18*i+2] += sum3; 2528 y[18*i+3] += sum4; 2529 y[18*i+4] += sum5; 2530 y[18*i+5] += sum6; 2531 y[18*i+6] += sum7; 2532 y[18*i+7] += sum8; 2533 y[18*i+8] += sum9; 2534 y[18*i+9] += sum10; 2535 y[18*i+10] += sum11; 2536 y[18*i+11] += sum12; 2537 y[18*i+12] += sum13; 2538 y[18*i+13] += sum14; 2539 y[18*i+14] += sum15; 2540 y[18*i+15] += sum16; 2541 y[18*i+16] += sum17; 2542 y[18*i+17] += sum18; 2543 } 2544 2545 PetscCall(PetscLogFlops(36.0*a->nz)); 2546 PetscCall(VecRestoreArrayRead(xx,&x)); 2547 PetscCall(VecRestoreArray(zz,&y)); 2548 PetscFunctionReturn(0); 2549 } 2550 2551 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_18(Mat A,Vec xx,Vec yy,Vec zz) 2552 { 2553 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 2554 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 2555 const PetscScalar *x,*v; 2556 PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8; 2557 PetscScalar alpha9,alpha10,alpha11,alpha12,alpha13,alpha14,alpha15,alpha16,alpha17,alpha18; 2558 const PetscInt m = b->AIJ->rmap->n,*idx; 2559 PetscInt n,i; 2560 2561 PetscFunctionBegin; 2562 if (yy != zz) PetscCall(VecCopy(yy,zz)); 2563 PetscCall(VecGetArrayRead(xx,&x)); 2564 PetscCall(VecGetArray(zz,&y)); 2565 for (i=0; i<m; i++) { 2566 idx = a->j + a->i[i]; 2567 v = a->a + a->i[i]; 2568 n = a->i[i+1] - a->i[i]; 2569 alpha1 = x[18*i]; 2570 alpha2 = x[18*i+1]; 2571 alpha3 = x[18*i+2]; 2572 alpha4 = x[18*i+3]; 2573 alpha5 = x[18*i+4]; 2574 alpha6 = x[18*i+5]; 2575 alpha7 = x[18*i+6]; 2576 alpha8 = x[18*i+7]; 2577 alpha9 = x[18*i+8]; 2578 alpha10 = x[18*i+9]; 2579 alpha11 = x[18*i+10]; 2580 alpha12 = x[18*i+11]; 2581 alpha13 = x[18*i+12]; 2582 alpha14 = x[18*i+13]; 2583 alpha15 = x[18*i+14]; 2584 alpha16 = x[18*i+15]; 2585 alpha17 = x[18*i+16]; 2586 alpha18 = x[18*i+17]; 2587 while (n-->0) { 2588 y[18*(*idx)] += alpha1*(*v); 2589 y[18*(*idx)+1] += alpha2*(*v); 2590 y[18*(*idx)+2] += alpha3*(*v); 2591 y[18*(*idx)+3] += alpha4*(*v); 2592 y[18*(*idx)+4] += alpha5*(*v); 2593 y[18*(*idx)+5] += alpha6*(*v); 2594 y[18*(*idx)+6] += alpha7*(*v); 2595 y[18*(*idx)+7] += alpha8*(*v); 2596 y[18*(*idx)+8] += alpha9*(*v); 2597 y[18*(*idx)+9] += alpha10*(*v); 2598 y[18*(*idx)+10] += alpha11*(*v); 2599 y[18*(*idx)+11] += alpha12*(*v); 2600 y[18*(*idx)+12] += alpha13*(*v); 2601 y[18*(*idx)+13] += alpha14*(*v); 2602 y[18*(*idx)+14] += alpha15*(*v); 2603 y[18*(*idx)+15] += alpha16*(*v); 2604 y[18*(*idx)+16] += alpha17*(*v); 2605 y[18*(*idx)+17] += alpha18*(*v); 2606 idx++; v++; 2607 } 2608 } 2609 PetscCall(PetscLogFlops(36.0*a->nz)); 2610 PetscCall(VecRestoreArrayRead(xx,&x)); 2611 PetscCall(VecRestoreArray(zz,&y)); 2612 PetscFunctionReturn(0); 2613 } 2614 2615 PetscErrorCode MatMult_SeqMAIJ_N(Mat A,Vec xx,Vec yy) 2616 { 2617 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 2618 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 2619 const PetscScalar *x,*v; 2620 PetscScalar *y,*sums; 2621 const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 2622 PetscInt n,i,jrow,j,dof = b->dof,k; 2623 2624 PetscFunctionBegin; 2625 PetscCall(VecGetArrayRead(xx,&x)); 2626 PetscCall(VecSet(yy,0.0)); 2627 PetscCall(VecGetArray(yy,&y)); 2628 idx = a->j; 2629 v = a->a; 2630 ii = a->i; 2631 2632 for (i=0; i<m; i++) { 2633 jrow = ii[i]; 2634 n = ii[i+1] - jrow; 2635 sums = y + dof*i; 2636 for (j=0; j<n; j++) { 2637 for (k=0; k<dof; k++) { 2638 sums[k] += v[jrow]*x[dof*idx[jrow]+k]; 2639 } 2640 jrow++; 2641 } 2642 } 2643 2644 PetscCall(PetscLogFlops(2.0*dof*a->nz)); 2645 PetscCall(VecRestoreArrayRead(xx,&x)); 2646 PetscCall(VecRestoreArray(yy,&y)); 2647 PetscFunctionReturn(0); 2648 } 2649 2650 PetscErrorCode MatMultAdd_SeqMAIJ_N(Mat A,Vec xx,Vec yy,Vec zz) 2651 { 2652 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 2653 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 2654 const PetscScalar *x,*v; 2655 PetscScalar *y,*sums; 2656 const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 2657 PetscInt n,i,jrow,j,dof = b->dof,k; 2658 2659 PetscFunctionBegin; 2660 if (yy != zz) PetscCall(VecCopy(yy,zz)); 2661 PetscCall(VecGetArrayRead(xx,&x)); 2662 PetscCall(VecGetArray(zz,&y)); 2663 idx = a->j; 2664 v = a->a; 2665 ii = a->i; 2666 2667 for (i=0; i<m; i++) { 2668 jrow = ii[i]; 2669 n = ii[i+1] - jrow; 2670 sums = y + dof*i; 2671 for (j=0; j<n; j++) { 2672 for (k=0; k<dof; k++) { 2673 sums[k] += v[jrow]*x[dof*idx[jrow]+k]; 2674 } 2675 jrow++; 2676 } 2677 } 2678 2679 PetscCall(PetscLogFlops(2.0*dof*a->nz)); 2680 PetscCall(VecRestoreArrayRead(xx,&x)); 2681 PetscCall(VecRestoreArray(zz,&y)); 2682 PetscFunctionReturn(0); 2683 } 2684 2685 PetscErrorCode MatMultTranspose_SeqMAIJ_N(Mat A,Vec xx,Vec yy) 2686 { 2687 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 2688 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 2689 const PetscScalar *x,*v,*alpha; 2690 PetscScalar *y; 2691 const PetscInt m = b->AIJ->rmap->n,*idx,dof = b->dof; 2692 PetscInt n,i,k; 2693 2694 PetscFunctionBegin; 2695 PetscCall(VecGetArrayRead(xx,&x)); 2696 PetscCall(VecSet(yy,0.0)); 2697 PetscCall(VecGetArray(yy,&y)); 2698 for (i=0; i<m; i++) { 2699 idx = a->j + a->i[i]; 2700 v = a->a + a->i[i]; 2701 n = a->i[i+1] - a->i[i]; 2702 alpha = x + dof*i; 2703 while (n-->0) { 2704 for (k=0; k<dof; k++) { 2705 y[dof*(*idx)+k] += alpha[k]*(*v); 2706 } 2707 idx++; v++; 2708 } 2709 } 2710 PetscCall(PetscLogFlops(2.0*dof*a->nz)); 2711 PetscCall(VecRestoreArrayRead(xx,&x)); 2712 PetscCall(VecRestoreArray(yy,&y)); 2713 PetscFunctionReturn(0); 2714 } 2715 2716 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_N(Mat A,Vec xx,Vec yy,Vec zz) 2717 { 2718 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 2719 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 2720 const PetscScalar *x,*v,*alpha; 2721 PetscScalar *y; 2722 const PetscInt m = b->AIJ->rmap->n,*idx,dof = b->dof; 2723 PetscInt n,i,k; 2724 2725 PetscFunctionBegin; 2726 if (yy != zz) PetscCall(VecCopy(yy,zz)); 2727 PetscCall(VecGetArrayRead(xx,&x)); 2728 PetscCall(VecGetArray(zz,&y)); 2729 for (i=0; i<m; i++) { 2730 idx = a->j + a->i[i]; 2731 v = a->a + a->i[i]; 2732 n = a->i[i+1] - a->i[i]; 2733 alpha = x + dof*i; 2734 while (n-->0) { 2735 for (k=0; k<dof; k++) { 2736 y[dof*(*idx)+k] += alpha[k]*(*v); 2737 } 2738 idx++; v++; 2739 } 2740 } 2741 PetscCall(PetscLogFlops(2.0*dof*a->nz)); 2742 PetscCall(VecRestoreArrayRead(xx,&x)); 2743 PetscCall(VecRestoreArray(zz,&y)); 2744 PetscFunctionReturn(0); 2745 } 2746 2747 /*===================================================================================*/ 2748 PetscErrorCode MatMult_MPIMAIJ_dof(Mat A,Vec xx,Vec yy) 2749 { 2750 Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data; 2751 2752 PetscFunctionBegin; 2753 /* start the scatter */ 2754 PetscCall(VecScatterBegin(b->ctx,xx,b->w,INSERT_VALUES,SCATTER_FORWARD)); 2755 PetscCall((*b->AIJ->ops->mult)(b->AIJ,xx,yy)); 2756 PetscCall(VecScatterEnd(b->ctx,xx,b->w,INSERT_VALUES,SCATTER_FORWARD)); 2757 PetscCall((*b->OAIJ->ops->multadd)(b->OAIJ,b->w,yy,yy)); 2758 PetscFunctionReturn(0); 2759 } 2760 2761 PetscErrorCode MatMultTranspose_MPIMAIJ_dof(Mat A,Vec xx,Vec yy) 2762 { 2763 Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data; 2764 2765 PetscFunctionBegin; 2766 PetscCall((*b->OAIJ->ops->multtranspose)(b->OAIJ,xx,b->w)); 2767 PetscCall((*b->AIJ->ops->multtranspose)(b->AIJ,xx,yy)); 2768 PetscCall(VecScatterBegin(b->ctx,b->w,yy,ADD_VALUES,SCATTER_REVERSE)); 2769 PetscCall(VecScatterEnd(b->ctx,b->w,yy,ADD_VALUES,SCATTER_REVERSE)); 2770 PetscFunctionReturn(0); 2771 } 2772 2773 PetscErrorCode MatMultAdd_MPIMAIJ_dof(Mat A,Vec xx,Vec yy,Vec zz) 2774 { 2775 Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data; 2776 2777 PetscFunctionBegin; 2778 /* start the scatter */ 2779 PetscCall(VecScatterBegin(b->ctx,xx,b->w,INSERT_VALUES,SCATTER_FORWARD)); 2780 PetscCall((*b->AIJ->ops->multadd)(b->AIJ,xx,yy,zz)); 2781 PetscCall(VecScatterEnd(b->ctx,xx,b->w,INSERT_VALUES,SCATTER_FORWARD)); 2782 PetscCall((*b->OAIJ->ops->multadd)(b->OAIJ,b->w,zz,zz)); 2783 PetscFunctionReturn(0); 2784 } 2785 2786 PetscErrorCode MatMultTransposeAdd_MPIMAIJ_dof(Mat A,Vec xx,Vec yy,Vec zz) 2787 { 2788 Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data; 2789 2790 PetscFunctionBegin; 2791 PetscCall((*b->OAIJ->ops->multtranspose)(b->OAIJ,xx,b->w)); 2792 PetscCall((*b->AIJ->ops->multtransposeadd)(b->AIJ,xx,yy,zz)); 2793 PetscCall(VecScatterBegin(b->ctx,b->w,zz,ADD_VALUES,SCATTER_REVERSE)); 2794 PetscCall(VecScatterEnd(b->ctx,b->w,zz,ADD_VALUES,SCATTER_REVERSE)); 2795 PetscFunctionReturn(0); 2796 } 2797 2798 /* ----------------------------------------------------------------*/ 2799 PetscErrorCode MatProductSetFromOptions_SeqAIJ_SeqMAIJ(Mat C) 2800 { 2801 Mat_Product *product = C->product; 2802 2803 PetscFunctionBegin; 2804 if (product->type == MATPRODUCT_PtAP) { 2805 C->ops->productsymbolic = MatProductSymbolic_PtAP_SeqAIJ_SeqMAIJ; 2806 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Mat Product type %s is not supported for SeqAIJ and SeqMAIJ matrices",MatProductTypes[product->type]); 2807 PetscFunctionReturn(0); 2808 } 2809 2810 PetscErrorCode MatProductSetFromOptions_MPIAIJ_MPIMAIJ(Mat C) 2811 { 2812 Mat_Product *product = C->product; 2813 PetscBool flg = PETSC_FALSE; 2814 Mat A=product->A,P=product->B; 2815 PetscInt alg=1; /* set default algorithm */ 2816 #if !defined(PETSC_HAVE_HYPRE) 2817 const char *algTypes[4] = {"scalable","nonscalable","allatonce","allatonce_merged"}; 2818 PetscInt nalg=4; 2819 #else 2820 const char *algTypes[5] = {"scalable","nonscalable","allatonce","allatonce_merged","hypre"}; 2821 PetscInt nalg=5; 2822 #endif 2823 2824 PetscFunctionBegin; 2825 PetscCheck(product->type == MATPRODUCT_PtAP,PETSC_COMM_SELF,PETSC_ERR_SUP,"Mat Product type %s is not supported for MPIAIJ and MPIMAIJ matrices",MatProductTypes[product->type]); 2826 2827 /* PtAP */ 2828 /* Check matrix local sizes */ 2829 PetscCheckFalse(A->rmap->rstart != P->rmap->rstart || A->rmap->rend != P->rmap->rend,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, Arow (%" PetscInt_FMT ", %" PetscInt_FMT ") != Prow (%" PetscInt_FMT ",%" PetscInt_FMT ")",A->rmap->rstart,A->rmap->rend,P->rmap->rstart,P->rmap->rend); 2830 PetscCheckFalse(A->cmap->rstart != P->rmap->rstart || A->cmap->rend != P->rmap->rend,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, Acol (%" PetscInt_FMT ", %" PetscInt_FMT ") != Prow (%" PetscInt_FMT ",%" PetscInt_FMT ")",A->cmap->rstart,A->cmap->rend,P->rmap->rstart,P->rmap->rend); 2831 2832 /* Set the default algorithm */ 2833 PetscCall(PetscStrcmp(C->product->alg,"default",&flg)); 2834 if (flg) { 2835 PetscCall(MatProductSetAlgorithm(C,(MatProductAlgorithm)algTypes[alg])); 2836 } 2837 2838 /* Get runtime option */ 2839 PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatProduct_PtAP","Mat"); 2840 PetscCall(PetscOptionsEList("-mat_product_algorithm","Algorithmic approach","MatPtAP",algTypes,nalg,algTypes[alg],&alg,&flg)); 2841 if (flg) { 2842 PetscCall(MatProductSetAlgorithm(C,(MatProductAlgorithm)algTypes[alg])); 2843 } 2844 PetscOptionsEnd(); 2845 2846 PetscCall(PetscStrcmp(C->product->alg,"allatonce",&flg)); 2847 if (flg) { 2848 C->ops->productsymbolic = MatProductSymbolic_PtAP_MPIAIJ_MPIMAIJ; 2849 PetscFunctionReturn(0); 2850 } 2851 2852 PetscCall(PetscStrcmp(C->product->alg,"allatonce_merged",&flg)); 2853 if (flg) { 2854 C->ops->productsymbolic = MatProductSymbolic_PtAP_MPIAIJ_MPIMAIJ; 2855 PetscFunctionReturn(0); 2856 } 2857 2858 /* Convert P from MAIJ to AIJ matrix since implementation not available for MAIJ */ 2859 PetscCall(PetscInfo((PetscObject)A,"Converting from MAIJ to AIJ matrix since implementation not available for MAIJ\n")); 2860 PetscCall(MatConvert(P,MATMPIAIJ,MAT_INPLACE_MATRIX,&P)); 2861 PetscCall(MatProductSetFromOptions(C)); 2862 PetscFunctionReturn(0); 2863 } 2864 2865 /* ----------------------------------------------------------------*/ 2866 PetscErrorCode MatPtAPSymbolic_SeqAIJ_SeqMAIJ(Mat A,Mat PP,PetscReal fill,Mat C) 2867 { 2868 PetscFreeSpaceList free_space=NULL,current_space=NULL; 2869 Mat_SeqMAIJ *pp =(Mat_SeqMAIJ*)PP->data; 2870 Mat P =pp->AIJ; 2871 Mat_SeqAIJ *a =(Mat_SeqAIJ*)A->data,*p=(Mat_SeqAIJ*)P->data,*c; 2872 PetscInt *pti,*ptj,*ptJ; 2873 PetscInt *ci,*cj,*ptadenserow,*ptasparserow,*denserow,*sparserow,*ptaj; 2874 const PetscInt an=A->cmap->N,am=A->rmap->N,pn=P->cmap->N,pm=P->rmap->N,ppdof=pp->dof; 2875 PetscInt i,j,k,dof,pshift,ptnzi,arow,anzj,ptanzi,prow,pnzj,cnzi,cn; 2876 MatScalar *ca; 2877 const PetscInt *pi = p->i,*pj = p->j,*pjj,*ai=a->i,*aj=a->j,*ajj; 2878 2879 PetscFunctionBegin; 2880 /* Get ij structure of P^T */ 2881 PetscCall(MatGetSymbolicTranspose_SeqAIJ(P,&pti,&ptj)); 2882 2883 cn = pn*ppdof; 2884 /* Allocate ci array, arrays for fill computation and */ 2885 /* free space for accumulating nonzero column info */ 2886 PetscCall(PetscMalloc1(cn+1,&ci)); 2887 ci[0] = 0; 2888 2889 /* Work arrays for rows of P^T*A */ 2890 PetscCall(PetscMalloc4(an,&ptadenserow,an,&ptasparserow,cn,&denserow,cn,&sparserow)); 2891 PetscCall(PetscArrayzero(ptadenserow,an)); 2892 PetscCall(PetscArrayzero(denserow,cn)); 2893 2894 /* Set initial free space to be nnz(A) scaled by aspect ratio of P. */ 2895 /* This should be reasonable if sparsity of PtAP is similar to that of A. */ 2896 /* Note, aspect ratio of P is the same as the aspect ratio of SeqAIJ inside P */ 2897 PetscCall(PetscFreeSpaceGet(PetscIntMultTruncate(ai[am]/pm,pn),&free_space)); 2898 current_space = free_space; 2899 2900 /* Determine symbolic info for each row of C: */ 2901 for (i=0; i<pn; i++) { 2902 ptnzi = pti[i+1] - pti[i]; 2903 ptJ = ptj + pti[i]; 2904 for (dof=0; dof<ppdof; dof++) { 2905 ptanzi = 0; 2906 /* Determine symbolic row of PtA: */ 2907 for (j=0; j<ptnzi; j++) { 2908 /* Expand ptJ[j] by block size and shift by dof to get the right row of A */ 2909 arow = ptJ[j]*ppdof + dof; 2910 /* Nonzeros of P^T*A will be in same locations as any element of A in that row */ 2911 anzj = ai[arow+1] - ai[arow]; 2912 ajj = aj + ai[arow]; 2913 for (k=0; k<anzj; k++) { 2914 if (!ptadenserow[ajj[k]]) { 2915 ptadenserow[ajj[k]] = -1; 2916 ptasparserow[ptanzi++] = ajj[k]; 2917 } 2918 } 2919 } 2920 /* Using symbolic info for row of PtA, determine symbolic info for row of C: */ 2921 ptaj = ptasparserow; 2922 cnzi = 0; 2923 for (j=0; j<ptanzi; j++) { 2924 /* Get offset within block of P */ 2925 pshift = *ptaj%ppdof; 2926 /* Get block row of P */ 2927 prow = (*ptaj++)/ppdof; /* integer division */ 2928 /* P has same number of nonzeros per row as the compressed form */ 2929 pnzj = pi[prow+1] - pi[prow]; 2930 pjj = pj + pi[prow]; 2931 for (k=0;k<pnzj;k++) { 2932 /* Locations in C are shifted by the offset within the block */ 2933 /* Note: we cannot use PetscLLAdd here because of the additional offset for the write location */ 2934 if (!denserow[pjj[k]*ppdof+pshift]) { 2935 denserow[pjj[k]*ppdof+pshift] = -1; 2936 sparserow[cnzi++] = pjj[k]*ppdof+pshift; 2937 } 2938 } 2939 } 2940 2941 /* sort sparserow */ 2942 PetscCall(PetscSortInt(cnzi,sparserow)); 2943 2944 /* If free space is not available, make more free space */ 2945 /* Double the amount of total space in the list */ 2946 if (current_space->local_remaining<cnzi) { 2947 PetscCall(PetscFreeSpaceGet(PetscIntSumTruncate(cnzi,current_space->total_array_size),¤t_space)); 2948 } 2949 2950 /* Copy data into free space, and zero out denserows */ 2951 PetscCall(PetscArraycpy(current_space->array,sparserow,cnzi)); 2952 2953 current_space->array += cnzi; 2954 current_space->local_used += cnzi; 2955 current_space->local_remaining -= cnzi; 2956 2957 for (j=0; j<ptanzi; j++) ptadenserow[ptasparserow[j]] = 0; 2958 for (j=0; j<cnzi; j++) denserow[sparserow[j]] = 0; 2959 2960 /* Aside: Perhaps we should save the pta info for the numerical factorization. */ 2961 /* For now, we will recompute what is needed. */ 2962 ci[i*ppdof+1+dof] = ci[i*ppdof+dof] + cnzi; 2963 } 2964 } 2965 /* nnz is now stored in ci[ptm], column indices are in the list of free space */ 2966 /* Allocate space for cj, initialize cj, and */ 2967 /* destroy list of free space and other temporary array(s) */ 2968 PetscCall(PetscMalloc1(ci[cn]+1,&cj)); 2969 PetscCall(PetscFreeSpaceContiguous(&free_space,cj)); 2970 PetscCall(PetscFree4(ptadenserow,ptasparserow,denserow,sparserow)); 2971 2972 /* Allocate space for ca */ 2973 PetscCall(PetscCalloc1(ci[cn]+1,&ca)); 2974 2975 /* put together the new matrix */ 2976 PetscCall(MatSetSeqAIJWithArrays_private(PetscObjectComm((PetscObject)A),cn,cn,ci,cj,ca,NULL,C)); 2977 PetscCall(MatSetBlockSize(C,pp->dof)); 2978 2979 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 2980 /* Since these are PETSc arrays, change flags to free them as necessary. */ 2981 c = (Mat_SeqAIJ*)(C->data); 2982 c->free_a = PETSC_TRUE; 2983 c->free_ij = PETSC_TRUE; 2984 c->nonew = 0; 2985 2986 C->ops->ptapnumeric = MatPtAPNumeric_SeqAIJ_SeqMAIJ; 2987 C->ops->productnumeric = MatProductNumeric_PtAP; 2988 2989 /* Clean up. */ 2990 PetscCall(MatRestoreSymbolicTranspose_SeqAIJ(P,&pti,&ptj)); 2991 PetscFunctionReturn(0); 2992 } 2993 2994 PetscErrorCode MatPtAPNumeric_SeqAIJ_SeqMAIJ(Mat A,Mat PP,Mat C) 2995 { 2996 /* This routine requires testing -- first draft only */ 2997 Mat_SeqMAIJ *pp=(Mat_SeqMAIJ*)PP->data; 2998 Mat P =pp->AIJ; 2999 Mat_SeqAIJ *a = (Mat_SeqAIJ*) A->data; 3000 Mat_SeqAIJ *p = (Mat_SeqAIJ*) P->data; 3001 Mat_SeqAIJ *c = (Mat_SeqAIJ*) C->data; 3002 const PetscInt *ai=a->i,*aj=a->j,*pi=p->i,*pj=p->j,*pJ,*pjj; 3003 const PetscInt *ci=c->i,*cj=c->j,*cjj; 3004 const PetscInt am =A->rmap->N,cn=C->cmap->N,cm=C->rmap->N,ppdof=pp->dof; 3005 PetscInt i,j,k,pshift,poffset,anzi,pnzi,apnzj,nextap,pnzj,prow,crow,*apj,*apjdense; 3006 const MatScalar *aa=a->a,*pa=p->a,*pA,*paj; 3007 MatScalar *ca=c->a,*caj,*apa; 3008 3009 PetscFunctionBegin; 3010 /* Allocate temporary array for storage of one row of A*P */ 3011 PetscCall(PetscCalloc3(cn,&apa,cn,&apj,cn,&apjdense)); 3012 3013 /* Clear old values in C */ 3014 PetscCall(PetscArrayzero(ca,ci[cm])); 3015 3016 for (i=0; i<am; i++) { 3017 /* Form sparse row of A*P */ 3018 anzi = ai[i+1] - ai[i]; 3019 apnzj = 0; 3020 for (j=0; j<anzi; j++) { 3021 /* Get offset within block of P */ 3022 pshift = *aj%ppdof; 3023 /* Get block row of P */ 3024 prow = *aj++/ppdof; /* integer division */ 3025 pnzj = pi[prow+1] - pi[prow]; 3026 pjj = pj + pi[prow]; 3027 paj = pa + pi[prow]; 3028 for (k=0; k<pnzj; k++) { 3029 poffset = pjj[k]*ppdof+pshift; 3030 if (!apjdense[poffset]) { 3031 apjdense[poffset] = -1; 3032 apj[apnzj++] = poffset; 3033 } 3034 apa[poffset] += (*aa)*paj[k]; 3035 } 3036 PetscCall(PetscLogFlops(2.0*pnzj)); 3037 aa++; 3038 } 3039 3040 /* Sort the j index array for quick sparse axpy. */ 3041 /* Note: a array does not need sorting as it is in dense storage locations. */ 3042 PetscCall(PetscSortInt(apnzj,apj)); 3043 3044 /* Compute P^T*A*P using outer product (P^T)[:,j]*(A*P)[j,:]. */ 3045 prow = i/ppdof; /* integer division */ 3046 pshift = i%ppdof; 3047 poffset = pi[prow]; 3048 pnzi = pi[prow+1] - poffset; 3049 /* Reset pJ and pA so we can traverse the same row of P 'dof' times. */ 3050 pJ = pj+poffset; 3051 pA = pa+poffset; 3052 for (j=0; j<pnzi; j++) { 3053 crow = (*pJ)*ppdof+pshift; 3054 cjj = cj + ci[crow]; 3055 caj = ca + ci[crow]; 3056 pJ++; 3057 /* Perform sparse axpy operation. Note cjj includes apj. */ 3058 for (k=0,nextap=0; nextap<apnzj; k++) { 3059 if (cjj[k] == apj[nextap]) caj[k] += (*pA)*apa[apj[nextap++]]; 3060 } 3061 PetscCall(PetscLogFlops(2.0*apnzj)); 3062 pA++; 3063 } 3064 3065 /* Zero the current row info for A*P */ 3066 for (j=0; j<apnzj; j++) { 3067 apa[apj[j]] = 0.; 3068 apjdense[apj[j]] = 0; 3069 } 3070 } 3071 3072 /* Assemble the final matrix and clean up */ 3073 PetscCall(MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY)); 3074 PetscCall(MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY)); 3075 PetscCall(PetscFree3(apa,apj,apjdense)); 3076 PetscFunctionReturn(0); 3077 } 3078 3079 PETSC_INTERN PetscErrorCode MatProductSymbolic_PtAP_SeqAIJ_SeqMAIJ(Mat C) 3080 { 3081 Mat_Product *product = C->product; 3082 Mat A=product->A,P=product->B; 3083 3084 PetscFunctionBegin; 3085 PetscCall(MatPtAPSymbolic_SeqAIJ_SeqMAIJ(A,P,product->fill,C)); 3086 PetscFunctionReturn(0); 3087 } 3088 3089 PetscErrorCode MatPtAPSymbolic_MPIAIJ_MPIMAIJ(Mat A,Mat PP,PetscReal fill,Mat *C) 3090 { 3091 PetscFunctionBegin; 3092 SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"MatPtAPSymbolic is not implemented for MPIMAIJ matrix yet"); 3093 } 3094 3095 PetscErrorCode MatPtAPNumeric_MPIAIJ_MPIMAIJ(Mat A,Mat PP,Mat C) 3096 { 3097 PetscFunctionBegin; 3098 SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"MatPtAPNumeric is not implemented for MPIMAIJ matrix yet"); 3099 } 3100 3101 PetscErrorCode MatPtAPNumeric_MPIAIJ_MPIXAIJ_allatonce(Mat,Mat,PetscInt,Mat); 3102 3103 PETSC_INTERN PetscErrorCode MatPtAPNumeric_MPIAIJ_MPIMAIJ_allatonce(Mat A,Mat P,Mat C) 3104 { 3105 Mat_MPIMAIJ *maij = (Mat_MPIMAIJ*)P->data; 3106 3107 PetscFunctionBegin; 3108 3109 PetscCall(MatPtAPNumeric_MPIAIJ_MPIXAIJ_allatonce(A,maij->A,maij->dof,C)); 3110 PetscFunctionReturn(0); 3111 } 3112 3113 PetscErrorCode MatPtAPSymbolic_MPIAIJ_MPIXAIJ_allatonce(Mat,Mat,PetscInt,PetscReal,Mat); 3114 3115 PETSC_INTERN PetscErrorCode MatPtAPSymbolic_MPIAIJ_MPIMAIJ_allatonce(Mat A,Mat P,PetscReal fill,Mat C) 3116 { 3117 Mat_MPIMAIJ *maij = (Mat_MPIMAIJ*)P->data; 3118 3119 PetscFunctionBegin; 3120 PetscCall(MatPtAPSymbolic_MPIAIJ_MPIXAIJ_allatonce(A,maij->A,maij->dof,fill,C)); 3121 C->ops->ptapnumeric = MatPtAPNumeric_MPIAIJ_MPIMAIJ_allatonce; 3122 PetscFunctionReturn(0); 3123 } 3124 3125 PetscErrorCode MatPtAPNumeric_MPIAIJ_MPIXAIJ_allatonce_merged(Mat,Mat,PetscInt,Mat); 3126 3127 PETSC_INTERN PetscErrorCode MatPtAPNumeric_MPIAIJ_MPIMAIJ_allatonce_merged(Mat A,Mat P,Mat C) 3128 { 3129 Mat_MPIMAIJ *maij = (Mat_MPIMAIJ*)P->data; 3130 3131 PetscFunctionBegin; 3132 3133 PetscCall(MatPtAPNumeric_MPIAIJ_MPIXAIJ_allatonce_merged(A,maij->A,maij->dof,C)); 3134 PetscFunctionReturn(0); 3135 } 3136 3137 PetscErrorCode MatPtAPSymbolic_MPIAIJ_MPIXAIJ_allatonce_merged(Mat,Mat,PetscInt,PetscReal,Mat); 3138 3139 PETSC_INTERN PetscErrorCode MatPtAPSymbolic_MPIAIJ_MPIMAIJ_allatonce_merged(Mat A,Mat P,PetscReal fill,Mat C) 3140 { 3141 Mat_MPIMAIJ *maij = (Mat_MPIMAIJ*)P->data; 3142 3143 PetscFunctionBegin; 3144 3145 PetscCall(MatPtAPSymbolic_MPIAIJ_MPIXAIJ_allatonce_merged(A,maij->A,maij->dof,fill,C)); 3146 C->ops->ptapnumeric = MatPtAPNumeric_MPIAIJ_MPIMAIJ_allatonce_merged; 3147 PetscFunctionReturn(0); 3148 } 3149 3150 PETSC_INTERN PetscErrorCode MatProductSymbolic_PtAP_MPIAIJ_MPIMAIJ(Mat C) 3151 { 3152 Mat_Product *product = C->product; 3153 Mat A=product->A,P=product->B; 3154 PetscBool flg; 3155 3156 PetscFunctionBegin; 3157 PetscCall(PetscStrcmp(product->alg,"allatonce",&flg)); 3158 if (flg) { 3159 PetscCall(MatPtAPSymbolic_MPIAIJ_MPIMAIJ_allatonce(A,P,product->fill,C)); 3160 C->ops->productnumeric = MatProductNumeric_PtAP; 3161 PetscFunctionReturn(0); 3162 } 3163 3164 PetscCall(PetscStrcmp(product->alg,"allatonce_merged",&flg)); 3165 if (flg) { 3166 PetscCall(MatPtAPSymbolic_MPIAIJ_MPIMAIJ_allatonce_merged(A,P,product->fill,C)); 3167 C->ops->productnumeric = MatProductNumeric_PtAP; 3168 PetscFunctionReturn(0); 3169 } 3170 3171 SETERRQ(PetscObjectComm((PetscObject)C),PETSC_ERR_SUP,"Mat Product Algorithm is not supported"); 3172 } 3173 3174 PETSC_INTERN PetscErrorCode MatConvert_SeqMAIJ_SeqAIJ(Mat A, MatType newtype,MatReuse reuse,Mat *newmat) 3175 { 3176 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 3177 Mat a = b->AIJ,B; 3178 Mat_SeqAIJ *aij = (Mat_SeqAIJ*)a->data; 3179 PetscInt m,n,i,ncols,*ilen,nmax = 0,*icols,j,k,ii,dof = b->dof; 3180 PetscInt *cols; 3181 PetscScalar *vals; 3182 3183 PetscFunctionBegin; 3184 PetscCall(MatGetSize(a,&m,&n)); 3185 PetscCall(PetscMalloc1(dof*m,&ilen)); 3186 for (i=0; i<m; i++) { 3187 nmax = PetscMax(nmax,aij->ilen[i]); 3188 for (j=0; j<dof; j++) ilen[dof*i+j] = aij->ilen[i]; 3189 } 3190 PetscCall(MatCreate(PETSC_COMM_SELF,&B)); 3191 PetscCall(MatSetSizes(B,dof*m,dof*n,dof*m,dof*n)); 3192 PetscCall(MatSetType(B,newtype)); 3193 PetscCall(MatSeqAIJSetPreallocation(B,0,ilen)); 3194 PetscCall(PetscFree(ilen)); 3195 PetscCall(PetscMalloc1(nmax,&icols)); 3196 ii = 0; 3197 for (i=0; i<m; i++) { 3198 PetscCall(MatGetRow_SeqAIJ(a,i,&ncols,&cols,&vals)); 3199 for (j=0; j<dof; j++) { 3200 for (k=0; k<ncols; k++) icols[k] = dof*cols[k]+j; 3201 PetscCall(MatSetValues_SeqAIJ(B,1,&ii,ncols,icols,vals,INSERT_VALUES)); 3202 ii++; 3203 } 3204 PetscCall(MatRestoreRow_SeqAIJ(a,i,&ncols,&cols,&vals)); 3205 } 3206 PetscCall(PetscFree(icols)); 3207 PetscCall(MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY)); 3208 PetscCall(MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY)); 3209 3210 if (reuse == MAT_INPLACE_MATRIX) { 3211 PetscCall(MatHeaderReplace(A,&B)); 3212 } else { 3213 *newmat = B; 3214 } 3215 PetscFunctionReturn(0); 3216 } 3217 3218 #include <../src/mat/impls/aij/mpi/mpiaij.h> 3219 3220 PETSC_INTERN PetscErrorCode MatConvert_MPIMAIJ_MPIAIJ(Mat A, MatType newtype,MatReuse reuse,Mat *newmat) 3221 { 3222 Mat_MPIMAIJ *maij = (Mat_MPIMAIJ*)A->data; 3223 Mat MatAIJ = ((Mat_SeqMAIJ*)maij->AIJ->data)->AIJ,B; 3224 Mat MatOAIJ = ((Mat_SeqMAIJ*)maij->OAIJ->data)->AIJ; 3225 Mat_SeqAIJ *AIJ = (Mat_SeqAIJ*) MatAIJ->data; 3226 Mat_SeqAIJ *OAIJ =(Mat_SeqAIJ*) MatOAIJ->data; 3227 Mat_MPIAIJ *mpiaij = (Mat_MPIAIJ*) maij->A->data; 3228 PetscInt dof = maij->dof,i,j,*dnz = NULL,*onz = NULL,nmax = 0,onmax = 0; 3229 PetscInt *oicols = NULL,*icols = NULL,ncols,*cols = NULL,oncols,*ocols = NULL; 3230 PetscInt rstart,cstart,*garray,ii,k; 3231 PetscScalar *vals,*ovals; 3232 3233 PetscFunctionBegin; 3234 PetscCall(PetscMalloc2(A->rmap->n,&dnz,A->rmap->n,&onz)); 3235 for (i=0; i<A->rmap->n/dof; i++) { 3236 nmax = PetscMax(nmax,AIJ->ilen[i]); 3237 onmax = PetscMax(onmax,OAIJ->ilen[i]); 3238 for (j=0; j<dof; j++) { 3239 dnz[dof*i+j] = AIJ->ilen[i]; 3240 onz[dof*i+j] = OAIJ->ilen[i]; 3241 } 3242 } 3243 PetscCall(MatCreate(PetscObjectComm((PetscObject)A),&B)); 3244 PetscCall(MatSetSizes(B,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N)); 3245 PetscCall(MatSetType(B,newtype)); 3246 PetscCall(MatMPIAIJSetPreallocation(B,0,dnz,0,onz)); 3247 PetscCall(MatSetBlockSize(B,dof)); 3248 PetscCall(PetscFree2(dnz,onz)); 3249 3250 PetscCall(PetscMalloc2(nmax,&icols,onmax,&oicols)); 3251 rstart = dof*maij->A->rmap->rstart; 3252 cstart = dof*maij->A->cmap->rstart; 3253 garray = mpiaij->garray; 3254 3255 ii = rstart; 3256 for (i=0; i<A->rmap->n/dof; i++) { 3257 PetscCall(MatGetRow_SeqAIJ(MatAIJ,i,&ncols,&cols,&vals)); 3258 PetscCall(MatGetRow_SeqAIJ(MatOAIJ,i,&oncols,&ocols,&ovals)); 3259 for (j=0; j<dof; j++) { 3260 for (k=0; k<ncols; k++) { 3261 icols[k] = cstart + dof*cols[k]+j; 3262 } 3263 for (k=0; k<oncols; k++) { 3264 oicols[k] = dof*garray[ocols[k]]+j; 3265 } 3266 PetscCall(MatSetValues_MPIAIJ(B,1,&ii,ncols,icols,vals,INSERT_VALUES)); 3267 PetscCall(MatSetValues_MPIAIJ(B,1,&ii,oncols,oicols,ovals,INSERT_VALUES)); 3268 ii++; 3269 } 3270 PetscCall(MatRestoreRow_SeqAIJ(MatAIJ,i,&ncols,&cols,&vals)); 3271 PetscCall(MatRestoreRow_SeqAIJ(MatOAIJ,i,&oncols,&ocols,&ovals)); 3272 } 3273 PetscCall(PetscFree2(icols,oicols)); 3274 3275 PetscCall(MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY)); 3276 PetscCall(MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY)); 3277 3278 if (reuse == MAT_INPLACE_MATRIX) { 3279 PetscInt refct = ((PetscObject)A)->refct; /* save ((PetscObject)A)->refct */ 3280 ((PetscObject)A)->refct = 1; 3281 3282 PetscCall(MatHeaderReplace(A,&B)); 3283 3284 ((PetscObject)A)->refct = refct; /* restore ((PetscObject)A)->refct */ 3285 } else { 3286 *newmat = B; 3287 } 3288 PetscFunctionReturn(0); 3289 } 3290 3291 PetscErrorCode MatCreateSubMatrix_MAIJ(Mat mat,IS isrow,IS iscol,MatReuse cll,Mat *newmat) 3292 { 3293 Mat A; 3294 3295 PetscFunctionBegin; 3296 PetscCall(MatConvert(mat,MATAIJ,MAT_INITIAL_MATRIX,&A)); 3297 PetscCall(MatCreateSubMatrix(A,isrow,iscol,cll,newmat)); 3298 PetscCall(MatDestroy(&A)); 3299 PetscFunctionReturn(0); 3300 } 3301 3302 PetscErrorCode MatCreateSubMatrices_MAIJ(Mat mat,PetscInt n,const IS irow[],const IS icol[],MatReuse scall,Mat *submat[]) 3303 { 3304 Mat A; 3305 3306 PetscFunctionBegin; 3307 PetscCall(MatConvert(mat,MATAIJ,MAT_INITIAL_MATRIX,&A)); 3308 PetscCall(MatCreateSubMatrices(A,n,irow,icol,scall,submat)); 3309 PetscCall(MatDestroy(&A)); 3310 PetscFunctionReturn(0); 3311 } 3312 3313 /* ---------------------------------------------------------------------------------- */ 3314 /*@ 3315 MatCreateMAIJ - Creates a matrix type providing restriction and interpolation 3316 operations for multicomponent problems. It interpolates each component the same 3317 way independently. The matrix type is based on MATSEQAIJ for sequential matrices, 3318 and MATMPIAIJ for distributed matrices. 3319 3320 Collective 3321 3322 Input Parameters: 3323 + A - the AIJ matrix describing the action on blocks 3324 - dof - the block size (number of components per node) 3325 3326 Output Parameter: 3327 . maij - the new MAIJ matrix 3328 3329 Operations provided: 3330 .vb 3331 MatMult 3332 MatMultTranspose 3333 MatMultAdd 3334 MatMultTransposeAdd 3335 MatView 3336 .ve 3337 3338 Level: advanced 3339 3340 .seealso: MatMAIJGetAIJ(), MatMAIJRedimension(), MATMAIJ 3341 @*/ 3342 PetscErrorCode MatCreateMAIJ(Mat A,PetscInt dof,Mat *maij) 3343 { 3344 PetscMPIInt size; 3345 PetscInt n; 3346 Mat B; 3347 #if defined(PETSC_HAVE_CUDA) 3348 /* hack to prevent conversion to AIJ format for CUDA when used inside a parallel MAIJ */ 3349 PetscBool convert = dof < 0 ? PETSC_FALSE : PETSC_TRUE; 3350 #endif 3351 3352 PetscFunctionBegin; 3353 dof = PetscAbs(dof); 3354 PetscCall(PetscObjectReference((PetscObject)A)); 3355 3356 if (dof == 1) *maij = A; 3357 else { 3358 PetscCall(MatCreate(PetscObjectComm((PetscObject)A),&B)); 3359 /* propagate vec type */ 3360 PetscCall(MatSetVecType(B,A->defaultvectype)); 3361 PetscCall(MatSetSizes(B,dof*A->rmap->n,dof*A->cmap->n,dof*A->rmap->N,dof*A->cmap->N)); 3362 PetscCall(PetscLayoutSetBlockSize(B->rmap,dof)); 3363 PetscCall(PetscLayoutSetBlockSize(B->cmap,dof)); 3364 PetscCall(PetscLayoutSetUp(B->rmap)); 3365 PetscCall(PetscLayoutSetUp(B->cmap)); 3366 3367 B->assembled = PETSC_TRUE; 3368 3369 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A),&size)); 3370 if (size == 1) { 3371 Mat_SeqMAIJ *b; 3372 3373 PetscCall(MatSetType(B,MATSEQMAIJ)); 3374 3375 B->ops->setup = NULL; 3376 B->ops->destroy = MatDestroy_SeqMAIJ; 3377 B->ops->view = MatView_SeqMAIJ; 3378 3379 b = (Mat_SeqMAIJ*)B->data; 3380 b->dof = dof; 3381 b->AIJ = A; 3382 3383 if (dof == 2) { 3384 B->ops->mult = MatMult_SeqMAIJ_2; 3385 B->ops->multadd = MatMultAdd_SeqMAIJ_2; 3386 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_2; 3387 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_2; 3388 } else if (dof == 3) { 3389 B->ops->mult = MatMult_SeqMAIJ_3; 3390 B->ops->multadd = MatMultAdd_SeqMAIJ_3; 3391 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_3; 3392 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_3; 3393 } else if (dof == 4) { 3394 B->ops->mult = MatMult_SeqMAIJ_4; 3395 B->ops->multadd = MatMultAdd_SeqMAIJ_4; 3396 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_4; 3397 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_4; 3398 } else if (dof == 5) { 3399 B->ops->mult = MatMult_SeqMAIJ_5; 3400 B->ops->multadd = MatMultAdd_SeqMAIJ_5; 3401 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_5; 3402 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_5; 3403 } else if (dof == 6) { 3404 B->ops->mult = MatMult_SeqMAIJ_6; 3405 B->ops->multadd = MatMultAdd_SeqMAIJ_6; 3406 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_6; 3407 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_6; 3408 } else if (dof == 7) { 3409 B->ops->mult = MatMult_SeqMAIJ_7; 3410 B->ops->multadd = MatMultAdd_SeqMAIJ_7; 3411 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_7; 3412 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_7; 3413 } else if (dof == 8) { 3414 B->ops->mult = MatMult_SeqMAIJ_8; 3415 B->ops->multadd = MatMultAdd_SeqMAIJ_8; 3416 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_8; 3417 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_8; 3418 } else if (dof == 9) { 3419 B->ops->mult = MatMult_SeqMAIJ_9; 3420 B->ops->multadd = MatMultAdd_SeqMAIJ_9; 3421 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_9; 3422 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_9; 3423 } else if (dof == 10) { 3424 B->ops->mult = MatMult_SeqMAIJ_10; 3425 B->ops->multadd = MatMultAdd_SeqMAIJ_10; 3426 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_10; 3427 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_10; 3428 } else if (dof == 11) { 3429 B->ops->mult = MatMult_SeqMAIJ_11; 3430 B->ops->multadd = MatMultAdd_SeqMAIJ_11; 3431 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_11; 3432 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_11; 3433 } else if (dof == 16) { 3434 B->ops->mult = MatMult_SeqMAIJ_16; 3435 B->ops->multadd = MatMultAdd_SeqMAIJ_16; 3436 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_16; 3437 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_16; 3438 } else if (dof == 18) { 3439 B->ops->mult = MatMult_SeqMAIJ_18; 3440 B->ops->multadd = MatMultAdd_SeqMAIJ_18; 3441 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_18; 3442 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_18; 3443 } else { 3444 B->ops->mult = MatMult_SeqMAIJ_N; 3445 B->ops->multadd = MatMultAdd_SeqMAIJ_N; 3446 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_N; 3447 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_N; 3448 } 3449 #if defined(PETSC_HAVE_CUDA) 3450 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqmaij_seqaijcusparse_C",MatConvert_SeqMAIJ_SeqAIJ)); 3451 #endif 3452 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqmaij_seqaij_C",MatConvert_SeqMAIJ_SeqAIJ)); 3453 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_seqaij_seqmaij_C",MatProductSetFromOptions_SeqAIJ_SeqMAIJ)); 3454 } else { 3455 Mat_MPIAIJ *mpiaij = (Mat_MPIAIJ*)A->data; 3456 Mat_MPIMAIJ *b; 3457 IS from,to; 3458 Vec gvec; 3459 3460 PetscCall(MatSetType(B,MATMPIMAIJ)); 3461 3462 B->ops->setup = NULL; 3463 B->ops->destroy = MatDestroy_MPIMAIJ; 3464 B->ops->view = MatView_MPIMAIJ; 3465 3466 b = (Mat_MPIMAIJ*)B->data; 3467 b->dof = dof; 3468 b->A = A; 3469 3470 PetscCall(MatCreateMAIJ(mpiaij->A,-dof,&b->AIJ)); 3471 PetscCall(MatCreateMAIJ(mpiaij->B,-dof,&b->OAIJ)); 3472 3473 PetscCall(VecGetSize(mpiaij->lvec,&n)); 3474 PetscCall(VecCreate(PETSC_COMM_SELF,&b->w)); 3475 PetscCall(VecSetSizes(b->w,n*dof,n*dof)); 3476 PetscCall(VecSetBlockSize(b->w,dof)); 3477 PetscCall(VecSetType(b->w,VECSEQ)); 3478 3479 /* create two temporary Index sets for build scatter gather */ 3480 PetscCall(ISCreateBlock(PetscObjectComm((PetscObject)A),dof,n,mpiaij->garray,PETSC_COPY_VALUES,&from)); 3481 PetscCall(ISCreateStride(PETSC_COMM_SELF,n*dof,0,1,&to)); 3482 3483 /* create temporary global vector to generate scatter context */ 3484 PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)A),dof,dof*A->cmap->n,dof*A->cmap->N,NULL,&gvec)); 3485 3486 /* generate the scatter context */ 3487 PetscCall(VecScatterCreate(gvec,from,b->w,to,&b->ctx)); 3488 3489 PetscCall(ISDestroy(&from)); 3490 PetscCall(ISDestroy(&to)); 3491 PetscCall(VecDestroy(&gvec)); 3492 3493 B->ops->mult = MatMult_MPIMAIJ_dof; 3494 B->ops->multtranspose = MatMultTranspose_MPIMAIJ_dof; 3495 B->ops->multadd = MatMultAdd_MPIMAIJ_dof; 3496 B->ops->multtransposeadd = MatMultTransposeAdd_MPIMAIJ_dof; 3497 3498 #if defined(PETSC_HAVE_CUDA) 3499 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpimaij_mpiaijcusparse_C",MatConvert_MPIMAIJ_MPIAIJ)); 3500 #endif 3501 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpimaij_mpiaij_C",MatConvert_MPIMAIJ_MPIAIJ)); 3502 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpiaij_mpimaij_C",MatProductSetFromOptions_MPIAIJ_MPIMAIJ)); 3503 } 3504 B->ops->createsubmatrix = MatCreateSubMatrix_MAIJ; 3505 B->ops->createsubmatrices = MatCreateSubMatrices_MAIJ; 3506 PetscCall(MatSetUp(B)); 3507 #if defined(PETSC_HAVE_CUDA) 3508 /* temporary until we have CUDA implementation of MAIJ */ 3509 { 3510 PetscBool flg; 3511 if (convert) { 3512 PetscCall(PetscObjectTypeCompareAny((PetscObject)A,&flg,MATSEQAIJCUSPARSE,MATMPIAIJCUSPARSE,MATAIJCUSPARSE,"")); 3513 if (flg) { 3514 PetscCall(MatConvert(B,((PetscObject)A)->type_name,MAT_INPLACE_MATRIX,&B)); 3515 } 3516 } 3517 } 3518 #endif 3519 *maij = B; 3520 } 3521 PetscFunctionReturn(0); 3522 } 3523