1 2 /* 3 Defines the basic matrix operations for the MAIJ matrix storage format. 4 This format is used for restriction and interpolation operations for 5 multicomponent problems. It interpolates each component the same way 6 independently. 7 8 We provide: 9 MatMult() 10 MatMultTranspose() 11 MatMultTransposeAdd() 12 MatMultAdd() 13 and 14 MatCreateMAIJ(Mat,dof,Mat*) 15 16 This single directory handles both the sequential and parallel codes 17 */ 18 19 #include <../src/mat/impls/maij/maij.h> /*I "petscmat.h" I*/ 20 #include <../src/mat/utils/freespace.h> 21 22 /*@ 23 MatMAIJGetAIJ - Get the AIJ matrix describing the blockwise action of the MAIJ matrix 24 25 Not Collective, but if the MAIJ matrix is parallel, the AIJ matrix is also parallel 26 27 Input Parameter: 28 . A - the MAIJ matrix 29 30 Output Parameter: 31 . B - the AIJ matrix 32 33 Level: advanced 34 35 Notes: The reference count on the AIJ matrix is not increased so you should not destroy it. 36 37 .seealso: MatCreateMAIJ() 38 @*/ 39 PetscErrorCode MatMAIJGetAIJ(Mat A,Mat *B) 40 { 41 PetscErrorCode ierr; 42 PetscBool ismpimaij,isseqmaij; 43 44 PetscFunctionBegin; 45 ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIMAIJ,&ismpimaij);CHKERRQ(ierr); 46 ierr = PetscObjectTypeCompare((PetscObject)A,MATSEQMAIJ,&isseqmaij);CHKERRQ(ierr); 47 if (ismpimaij) { 48 Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data; 49 50 *B = b->A; 51 } else if (isseqmaij) { 52 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 53 54 *B = b->AIJ; 55 } else { 56 *B = A; 57 } 58 PetscFunctionReturn(0); 59 } 60 61 /*@ 62 MatMAIJRedimension - Get an MAIJ matrix with the same action, but for a different block size 63 64 Logically Collective 65 66 Input Parameter: 67 + A - the MAIJ matrix 68 - dof - the block size for the new matrix 69 70 Output Parameter: 71 . B - the new MAIJ matrix 72 73 Level: advanced 74 75 .seealso: MatCreateMAIJ() 76 @*/ 77 PetscErrorCode MatMAIJRedimension(Mat A,PetscInt dof,Mat *B) 78 { 79 PetscErrorCode ierr; 80 Mat Aij = NULL; 81 82 PetscFunctionBegin; 83 PetscValidLogicalCollectiveInt(A,dof,2); 84 ierr = MatMAIJGetAIJ(A,&Aij);CHKERRQ(ierr); 85 ierr = MatCreateMAIJ(Aij,dof,B);CHKERRQ(ierr); 86 PetscFunctionReturn(0); 87 } 88 89 PetscErrorCode MatDestroy_SeqMAIJ(Mat A) 90 { 91 PetscErrorCode ierr; 92 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 93 94 PetscFunctionBegin; 95 ierr = MatDestroy(&b->AIJ);CHKERRQ(ierr); 96 ierr = PetscFree(A->data);CHKERRQ(ierr); 97 ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqmaij_seqaij_C",NULL);CHKERRQ(ierr); 98 ierr = PetscObjectComposeFunction((PetscObject)A,"MatPtAP_seqaij_seqmaij_C",NULL);CHKERRQ(ierr); 99 ierr = PetscObjectComposeFunction((PetscObject)A,"MatPtAP_seqaijmkl_seqmaij_C",NULL);CHKERRQ(ierr); 100 PetscFunctionReturn(0); 101 } 102 103 PetscErrorCode MatSetUp_MAIJ(Mat A) 104 { 105 PetscFunctionBegin; 106 SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Must use MatCreateMAIJ() to create MAIJ matrices"); 107 PetscFunctionReturn(0); 108 } 109 110 PetscErrorCode MatView_SeqMAIJ(Mat A,PetscViewer viewer) 111 { 112 PetscErrorCode ierr; 113 Mat B; 114 115 PetscFunctionBegin; 116 ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr); 117 ierr = MatView(B,viewer);CHKERRQ(ierr); 118 ierr = MatDestroy(&B);CHKERRQ(ierr); 119 PetscFunctionReturn(0); 120 } 121 122 PetscErrorCode MatView_MPIMAIJ(Mat A,PetscViewer viewer) 123 { 124 PetscErrorCode ierr; 125 Mat B; 126 127 PetscFunctionBegin; 128 ierr = MatConvert(A,MATMPIAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr); 129 ierr = MatView(B,viewer);CHKERRQ(ierr); 130 ierr = MatDestroy(&B);CHKERRQ(ierr); 131 PetscFunctionReturn(0); 132 } 133 134 PetscErrorCode MatDestroy_MPIMAIJ(Mat A) 135 { 136 PetscErrorCode ierr; 137 Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data; 138 139 PetscFunctionBegin; 140 ierr = MatDestroy(&b->AIJ);CHKERRQ(ierr); 141 ierr = MatDestroy(&b->OAIJ);CHKERRQ(ierr); 142 ierr = MatDestroy(&b->A);CHKERRQ(ierr); 143 ierr = VecScatterDestroy(&b->ctx);CHKERRQ(ierr); 144 ierr = VecDestroy(&b->w);CHKERRQ(ierr); 145 ierr = PetscFree(A->data);CHKERRQ(ierr); 146 ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_mpimaij_mpiaij_C",NULL);CHKERRQ(ierr); 147 ierr = PetscObjectComposeFunction((PetscObject)A,"MatPtAP_mpiaij_mpimaij_C",NULL);CHKERRQ(ierr); 148 ierr = PetscObjectChangeTypeName((PetscObject)A,0);CHKERRQ(ierr); 149 PetscFunctionReturn(0); 150 } 151 152 /*MC 153 MATMAIJ - MATMAIJ = "maij" - A matrix type to be used for restriction and interpolation operations for 154 multicomponent problems, interpolating or restricting each component the same way independently. 155 The matrix type is based on MATSEQAIJ for sequential matrices, and MATMPIAIJ for distributed matrices. 156 157 Operations provided: 158 . MatMult 159 . MatMultTranspose 160 . MatMultAdd 161 . MatMultTransposeAdd 162 163 Level: advanced 164 165 .seealso: MatMAIJGetAIJ(), MatMAIJRedimension(), MatCreateMAIJ() 166 M*/ 167 168 PETSC_EXTERN PetscErrorCode MatCreate_MAIJ(Mat A) 169 { 170 PetscErrorCode ierr; 171 Mat_MPIMAIJ *b; 172 PetscMPIInt size; 173 174 PetscFunctionBegin; 175 ierr = PetscNewLog(A,&b);CHKERRQ(ierr); 176 A->data = (void*)b; 177 178 ierr = PetscMemzero(A->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 179 180 A->ops->setup = MatSetUp_MAIJ; 181 182 b->AIJ = 0; 183 b->dof = 0; 184 b->OAIJ = 0; 185 b->ctx = 0; 186 b->w = 0; 187 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRQ(ierr); 188 if (size == 1) { 189 ierr = PetscObjectChangeTypeName((PetscObject)A,MATSEQMAIJ);CHKERRQ(ierr); 190 } else { 191 ierr = PetscObjectChangeTypeName((PetscObject)A,MATMPIMAIJ);CHKERRQ(ierr); 192 } 193 A->preallocated = PETSC_TRUE; 194 A->assembled = PETSC_TRUE; 195 PetscFunctionReturn(0); 196 } 197 198 /* --------------------------------------------------------------------------------------*/ 199 PetscErrorCode MatMult_SeqMAIJ_2(Mat A,Vec xx,Vec yy) 200 { 201 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 202 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 203 const PetscScalar *x,*v; 204 PetscScalar *y, sum1, sum2; 205 PetscErrorCode ierr; 206 PetscInt nonzerorow=0,n,i,jrow,j; 207 const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 208 209 PetscFunctionBegin; 210 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 211 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 212 idx = a->j; 213 v = a->a; 214 ii = a->i; 215 216 for (i=0; i<m; i++) { 217 jrow = ii[i]; 218 n = ii[i+1] - jrow; 219 sum1 = 0.0; 220 sum2 = 0.0; 221 222 nonzerorow += (n>0); 223 for (j=0; j<n; j++) { 224 sum1 += v[jrow]*x[2*idx[jrow]]; 225 sum2 += v[jrow]*x[2*idx[jrow]+1]; 226 jrow++; 227 } 228 y[2*i] = sum1; 229 y[2*i+1] = sum2; 230 } 231 232 ierr = PetscLogFlops(4.0*a->nz - 2.0*nonzerorow);CHKERRQ(ierr); 233 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 234 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 235 PetscFunctionReturn(0); 236 } 237 238 PetscErrorCode MatMultTranspose_SeqMAIJ_2(Mat A,Vec xx,Vec yy) 239 { 240 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 241 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 242 const PetscScalar *x,*v; 243 PetscScalar *y,alpha1,alpha2; 244 PetscErrorCode ierr; 245 PetscInt n,i; 246 const PetscInt m = b->AIJ->rmap->n,*idx; 247 248 PetscFunctionBegin; 249 ierr = VecSet(yy,0.0);CHKERRQ(ierr); 250 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 251 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 252 253 for (i=0; i<m; i++) { 254 idx = a->j + a->i[i]; 255 v = a->a + a->i[i]; 256 n = a->i[i+1] - a->i[i]; 257 alpha1 = x[2*i]; 258 alpha2 = x[2*i+1]; 259 while (n-->0) { 260 y[2*(*idx)] += alpha1*(*v); 261 y[2*(*idx)+1] += alpha2*(*v); 262 idx++; v++; 263 } 264 } 265 ierr = PetscLogFlops(4.0*a->nz);CHKERRQ(ierr); 266 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 267 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 268 PetscFunctionReturn(0); 269 } 270 271 PetscErrorCode MatMultAdd_SeqMAIJ_2(Mat A,Vec xx,Vec yy,Vec zz) 272 { 273 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 274 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 275 const PetscScalar *x,*v; 276 PetscScalar *y,sum1, sum2; 277 PetscErrorCode ierr; 278 PetscInt n,i,jrow,j; 279 const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 280 281 PetscFunctionBegin; 282 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 283 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 284 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 285 idx = a->j; 286 v = a->a; 287 ii = a->i; 288 289 for (i=0; i<m; i++) { 290 jrow = ii[i]; 291 n = ii[i+1] - jrow; 292 sum1 = 0.0; 293 sum2 = 0.0; 294 for (j=0; j<n; j++) { 295 sum1 += v[jrow]*x[2*idx[jrow]]; 296 sum2 += v[jrow]*x[2*idx[jrow]+1]; 297 jrow++; 298 } 299 y[2*i] += sum1; 300 y[2*i+1] += sum2; 301 } 302 303 ierr = PetscLogFlops(4.0*a->nz);CHKERRQ(ierr); 304 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 305 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 306 PetscFunctionReturn(0); 307 } 308 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_2(Mat A,Vec xx,Vec yy,Vec zz) 309 { 310 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 311 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 312 const PetscScalar *x,*v; 313 PetscScalar *y,alpha1,alpha2; 314 PetscErrorCode ierr; 315 PetscInt n,i; 316 const PetscInt m = b->AIJ->rmap->n,*idx; 317 318 PetscFunctionBegin; 319 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 320 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 321 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 322 323 for (i=0; i<m; i++) { 324 idx = a->j + a->i[i]; 325 v = a->a + a->i[i]; 326 n = a->i[i+1] - a->i[i]; 327 alpha1 = x[2*i]; 328 alpha2 = x[2*i+1]; 329 while (n-->0) { 330 y[2*(*idx)] += alpha1*(*v); 331 y[2*(*idx)+1] += alpha2*(*v); 332 idx++; v++; 333 } 334 } 335 ierr = PetscLogFlops(4.0*a->nz);CHKERRQ(ierr); 336 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 337 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 338 PetscFunctionReturn(0); 339 } 340 /* --------------------------------------------------------------------------------------*/ 341 PetscErrorCode MatMult_SeqMAIJ_3(Mat A,Vec xx,Vec yy) 342 { 343 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 344 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 345 const PetscScalar *x,*v; 346 PetscScalar *y,sum1, sum2, sum3; 347 PetscErrorCode ierr; 348 PetscInt nonzerorow=0,n,i,jrow,j; 349 const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 350 351 PetscFunctionBegin; 352 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 353 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 354 idx = a->j; 355 v = a->a; 356 ii = a->i; 357 358 for (i=0; i<m; i++) { 359 jrow = ii[i]; 360 n = ii[i+1] - jrow; 361 sum1 = 0.0; 362 sum2 = 0.0; 363 sum3 = 0.0; 364 365 nonzerorow += (n>0); 366 for (j=0; j<n; j++) { 367 sum1 += v[jrow]*x[3*idx[jrow]]; 368 sum2 += v[jrow]*x[3*idx[jrow]+1]; 369 sum3 += v[jrow]*x[3*idx[jrow]+2]; 370 jrow++; 371 } 372 y[3*i] = sum1; 373 y[3*i+1] = sum2; 374 y[3*i+2] = sum3; 375 } 376 377 ierr = PetscLogFlops(6.0*a->nz - 3.0*nonzerorow);CHKERRQ(ierr); 378 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 379 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 380 PetscFunctionReturn(0); 381 } 382 383 PetscErrorCode MatMultTranspose_SeqMAIJ_3(Mat A,Vec xx,Vec yy) 384 { 385 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 386 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 387 const PetscScalar *x,*v; 388 PetscScalar *y,alpha1,alpha2,alpha3; 389 PetscErrorCode ierr; 390 PetscInt n,i; 391 const PetscInt m = b->AIJ->rmap->n,*idx; 392 393 PetscFunctionBegin; 394 ierr = VecSet(yy,0.0);CHKERRQ(ierr); 395 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 396 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 397 398 for (i=0; i<m; i++) { 399 idx = a->j + a->i[i]; 400 v = a->a + a->i[i]; 401 n = a->i[i+1] - a->i[i]; 402 alpha1 = x[3*i]; 403 alpha2 = x[3*i+1]; 404 alpha3 = x[3*i+2]; 405 while (n-->0) { 406 y[3*(*idx)] += alpha1*(*v); 407 y[3*(*idx)+1] += alpha2*(*v); 408 y[3*(*idx)+2] += alpha3*(*v); 409 idx++; v++; 410 } 411 } 412 ierr = PetscLogFlops(6.0*a->nz);CHKERRQ(ierr); 413 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 414 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 415 PetscFunctionReturn(0); 416 } 417 418 PetscErrorCode MatMultAdd_SeqMAIJ_3(Mat A,Vec xx,Vec yy,Vec zz) 419 { 420 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 421 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 422 const PetscScalar *x,*v; 423 PetscScalar *y,sum1, sum2, sum3; 424 PetscErrorCode ierr; 425 PetscInt n,i,jrow,j; 426 const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 427 428 PetscFunctionBegin; 429 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 430 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 431 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 432 idx = a->j; 433 v = a->a; 434 ii = a->i; 435 436 for (i=0; i<m; i++) { 437 jrow = ii[i]; 438 n = ii[i+1] - jrow; 439 sum1 = 0.0; 440 sum2 = 0.0; 441 sum3 = 0.0; 442 for (j=0; j<n; j++) { 443 sum1 += v[jrow]*x[3*idx[jrow]]; 444 sum2 += v[jrow]*x[3*idx[jrow]+1]; 445 sum3 += v[jrow]*x[3*idx[jrow]+2]; 446 jrow++; 447 } 448 y[3*i] += sum1; 449 y[3*i+1] += sum2; 450 y[3*i+2] += sum3; 451 } 452 453 ierr = PetscLogFlops(6.0*a->nz);CHKERRQ(ierr); 454 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 455 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 456 PetscFunctionReturn(0); 457 } 458 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_3(Mat A,Vec xx,Vec yy,Vec zz) 459 { 460 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 461 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 462 const PetscScalar *x,*v; 463 PetscScalar *y,alpha1,alpha2,alpha3; 464 PetscErrorCode ierr; 465 PetscInt n,i; 466 const PetscInt m = b->AIJ->rmap->n,*idx; 467 468 PetscFunctionBegin; 469 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 470 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 471 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 472 for (i=0; i<m; i++) { 473 idx = a->j + a->i[i]; 474 v = a->a + a->i[i]; 475 n = a->i[i+1] - a->i[i]; 476 alpha1 = x[3*i]; 477 alpha2 = x[3*i+1]; 478 alpha3 = x[3*i+2]; 479 while (n-->0) { 480 y[3*(*idx)] += alpha1*(*v); 481 y[3*(*idx)+1] += alpha2*(*v); 482 y[3*(*idx)+2] += alpha3*(*v); 483 idx++; v++; 484 } 485 } 486 ierr = PetscLogFlops(6.0*a->nz);CHKERRQ(ierr); 487 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 488 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 489 PetscFunctionReturn(0); 490 } 491 492 /* ------------------------------------------------------------------------------*/ 493 PetscErrorCode MatMult_SeqMAIJ_4(Mat A,Vec xx,Vec yy) 494 { 495 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 496 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 497 const PetscScalar *x,*v; 498 PetscScalar *y,sum1, sum2, sum3, sum4; 499 PetscErrorCode ierr; 500 PetscInt nonzerorow=0,n,i,jrow,j; 501 const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 502 503 PetscFunctionBegin; 504 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 505 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 506 idx = a->j; 507 v = a->a; 508 ii = a->i; 509 510 for (i=0; i<m; i++) { 511 jrow = ii[i]; 512 n = ii[i+1] - jrow; 513 sum1 = 0.0; 514 sum2 = 0.0; 515 sum3 = 0.0; 516 sum4 = 0.0; 517 nonzerorow += (n>0); 518 for (j=0; j<n; j++) { 519 sum1 += v[jrow]*x[4*idx[jrow]]; 520 sum2 += v[jrow]*x[4*idx[jrow]+1]; 521 sum3 += v[jrow]*x[4*idx[jrow]+2]; 522 sum4 += v[jrow]*x[4*idx[jrow]+3]; 523 jrow++; 524 } 525 y[4*i] = sum1; 526 y[4*i+1] = sum2; 527 y[4*i+2] = sum3; 528 y[4*i+3] = sum4; 529 } 530 531 ierr = PetscLogFlops(8.0*a->nz - 4.0*nonzerorow);CHKERRQ(ierr); 532 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 533 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 534 PetscFunctionReturn(0); 535 } 536 537 PetscErrorCode MatMultTranspose_SeqMAIJ_4(Mat A,Vec xx,Vec yy) 538 { 539 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 540 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 541 const PetscScalar *x,*v; 542 PetscScalar *y,alpha1,alpha2,alpha3,alpha4; 543 PetscErrorCode ierr; 544 PetscInt n,i; 545 const PetscInt m = b->AIJ->rmap->n,*idx; 546 547 PetscFunctionBegin; 548 ierr = VecSet(yy,0.0);CHKERRQ(ierr); 549 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 550 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 551 for (i=0; i<m; i++) { 552 idx = a->j + a->i[i]; 553 v = a->a + a->i[i]; 554 n = a->i[i+1] - a->i[i]; 555 alpha1 = x[4*i]; 556 alpha2 = x[4*i+1]; 557 alpha3 = x[4*i+2]; 558 alpha4 = x[4*i+3]; 559 while (n-->0) { 560 y[4*(*idx)] += alpha1*(*v); 561 y[4*(*idx)+1] += alpha2*(*v); 562 y[4*(*idx)+2] += alpha3*(*v); 563 y[4*(*idx)+3] += alpha4*(*v); 564 idx++; v++; 565 } 566 } 567 ierr = PetscLogFlops(8.0*a->nz);CHKERRQ(ierr); 568 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 569 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 570 PetscFunctionReturn(0); 571 } 572 573 PetscErrorCode MatMultAdd_SeqMAIJ_4(Mat A,Vec xx,Vec yy,Vec zz) 574 { 575 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 576 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 577 const PetscScalar *x,*v; 578 PetscScalar *y,sum1, sum2, sum3, sum4; 579 PetscErrorCode ierr; 580 PetscInt n,i,jrow,j; 581 const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 582 583 PetscFunctionBegin; 584 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 585 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 586 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 587 idx = a->j; 588 v = a->a; 589 ii = a->i; 590 591 for (i=0; i<m; i++) { 592 jrow = ii[i]; 593 n = ii[i+1] - jrow; 594 sum1 = 0.0; 595 sum2 = 0.0; 596 sum3 = 0.0; 597 sum4 = 0.0; 598 for (j=0; j<n; j++) { 599 sum1 += v[jrow]*x[4*idx[jrow]]; 600 sum2 += v[jrow]*x[4*idx[jrow]+1]; 601 sum3 += v[jrow]*x[4*idx[jrow]+2]; 602 sum4 += v[jrow]*x[4*idx[jrow]+3]; 603 jrow++; 604 } 605 y[4*i] += sum1; 606 y[4*i+1] += sum2; 607 y[4*i+2] += sum3; 608 y[4*i+3] += sum4; 609 } 610 611 ierr = PetscLogFlops(8.0*a->nz);CHKERRQ(ierr); 612 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 613 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 614 PetscFunctionReturn(0); 615 } 616 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_4(Mat A,Vec xx,Vec yy,Vec zz) 617 { 618 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 619 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 620 const PetscScalar *x,*v; 621 PetscScalar *y,alpha1,alpha2,alpha3,alpha4; 622 PetscErrorCode ierr; 623 PetscInt n,i; 624 const PetscInt m = b->AIJ->rmap->n,*idx; 625 626 PetscFunctionBegin; 627 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 628 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 629 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 630 631 for (i=0; i<m; i++) { 632 idx = a->j + a->i[i]; 633 v = a->a + a->i[i]; 634 n = a->i[i+1] - a->i[i]; 635 alpha1 = x[4*i]; 636 alpha2 = x[4*i+1]; 637 alpha3 = x[4*i+2]; 638 alpha4 = x[4*i+3]; 639 while (n-->0) { 640 y[4*(*idx)] += alpha1*(*v); 641 y[4*(*idx)+1] += alpha2*(*v); 642 y[4*(*idx)+2] += alpha3*(*v); 643 y[4*(*idx)+3] += alpha4*(*v); 644 idx++; v++; 645 } 646 } 647 ierr = PetscLogFlops(8.0*a->nz);CHKERRQ(ierr); 648 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 649 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 650 PetscFunctionReturn(0); 651 } 652 /* ------------------------------------------------------------------------------*/ 653 654 PetscErrorCode MatMult_SeqMAIJ_5(Mat A,Vec xx,Vec yy) 655 { 656 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 657 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 658 const PetscScalar *x,*v; 659 PetscScalar *y,sum1, sum2, sum3, sum4, sum5; 660 PetscErrorCode ierr; 661 PetscInt nonzerorow=0,n,i,jrow,j; 662 const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 663 664 PetscFunctionBegin; 665 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 666 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 667 idx = a->j; 668 v = a->a; 669 ii = a->i; 670 671 for (i=0; i<m; i++) { 672 jrow = ii[i]; 673 n = ii[i+1] - jrow; 674 sum1 = 0.0; 675 sum2 = 0.0; 676 sum3 = 0.0; 677 sum4 = 0.0; 678 sum5 = 0.0; 679 680 nonzerorow += (n>0); 681 for (j=0; j<n; j++) { 682 sum1 += v[jrow]*x[5*idx[jrow]]; 683 sum2 += v[jrow]*x[5*idx[jrow]+1]; 684 sum3 += v[jrow]*x[5*idx[jrow]+2]; 685 sum4 += v[jrow]*x[5*idx[jrow]+3]; 686 sum5 += v[jrow]*x[5*idx[jrow]+4]; 687 jrow++; 688 } 689 y[5*i] = sum1; 690 y[5*i+1] = sum2; 691 y[5*i+2] = sum3; 692 y[5*i+3] = sum4; 693 y[5*i+4] = sum5; 694 } 695 696 ierr = PetscLogFlops(10.0*a->nz - 5.0*nonzerorow);CHKERRQ(ierr); 697 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 698 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 699 PetscFunctionReturn(0); 700 } 701 702 PetscErrorCode MatMultTranspose_SeqMAIJ_5(Mat A,Vec xx,Vec yy) 703 { 704 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 705 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 706 const PetscScalar *x,*v; 707 PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5; 708 PetscErrorCode ierr; 709 PetscInt n,i; 710 const PetscInt m = b->AIJ->rmap->n,*idx; 711 712 PetscFunctionBegin; 713 ierr = VecSet(yy,0.0);CHKERRQ(ierr); 714 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 715 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 716 717 for (i=0; i<m; i++) { 718 idx = a->j + a->i[i]; 719 v = a->a + a->i[i]; 720 n = a->i[i+1] - a->i[i]; 721 alpha1 = x[5*i]; 722 alpha2 = x[5*i+1]; 723 alpha3 = x[5*i+2]; 724 alpha4 = x[5*i+3]; 725 alpha5 = x[5*i+4]; 726 while (n-->0) { 727 y[5*(*idx)] += alpha1*(*v); 728 y[5*(*idx)+1] += alpha2*(*v); 729 y[5*(*idx)+2] += alpha3*(*v); 730 y[5*(*idx)+3] += alpha4*(*v); 731 y[5*(*idx)+4] += alpha5*(*v); 732 idx++; v++; 733 } 734 } 735 ierr = PetscLogFlops(10.0*a->nz);CHKERRQ(ierr); 736 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 737 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 738 PetscFunctionReturn(0); 739 } 740 741 PetscErrorCode MatMultAdd_SeqMAIJ_5(Mat A,Vec xx,Vec yy,Vec zz) 742 { 743 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 744 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 745 const PetscScalar *x,*v; 746 PetscScalar *y,sum1, sum2, sum3, sum4, sum5; 747 PetscErrorCode ierr; 748 PetscInt n,i,jrow,j; 749 const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 750 751 PetscFunctionBegin; 752 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 753 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 754 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 755 idx = a->j; 756 v = a->a; 757 ii = a->i; 758 759 for (i=0; i<m; i++) { 760 jrow = ii[i]; 761 n = ii[i+1] - jrow; 762 sum1 = 0.0; 763 sum2 = 0.0; 764 sum3 = 0.0; 765 sum4 = 0.0; 766 sum5 = 0.0; 767 for (j=0; j<n; j++) { 768 sum1 += v[jrow]*x[5*idx[jrow]]; 769 sum2 += v[jrow]*x[5*idx[jrow]+1]; 770 sum3 += v[jrow]*x[5*idx[jrow]+2]; 771 sum4 += v[jrow]*x[5*idx[jrow]+3]; 772 sum5 += v[jrow]*x[5*idx[jrow]+4]; 773 jrow++; 774 } 775 y[5*i] += sum1; 776 y[5*i+1] += sum2; 777 y[5*i+2] += sum3; 778 y[5*i+3] += sum4; 779 y[5*i+4] += sum5; 780 } 781 782 ierr = PetscLogFlops(10.0*a->nz);CHKERRQ(ierr); 783 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 784 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 785 PetscFunctionReturn(0); 786 } 787 788 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_5(Mat A,Vec xx,Vec yy,Vec zz) 789 { 790 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 791 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 792 const PetscScalar *x,*v; 793 PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5; 794 PetscErrorCode ierr; 795 PetscInt n,i; 796 const PetscInt m = b->AIJ->rmap->n,*idx; 797 798 PetscFunctionBegin; 799 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 800 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 801 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 802 803 for (i=0; i<m; i++) { 804 idx = a->j + a->i[i]; 805 v = a->a + a->i[i]; 806 n = a->i[i+1] - a->i[i]; 807 alpha1 = x[5*i]; 808 alpha2 = x[5*i+1]; 809 alpha3 = x[5*i+2]; 810 alpha4 = x[5*i+3]; 811 alpha5 = x[5*i+4]; 812 while (n-->0) { 813 y[5*(*idx)] += alpha1*(*v); 814 y[5*(*idx)+1] += alpha2*(*v); 815 y[5*(*idx)+2] += alpha3*(*v); 816 y[5*(*idx)+3] += alpha4*(*v); 817 y[5*(*idx)+4] += alpha5*(*v); 818 idx++; v++; 819 } 820 } 821 ierr = PetscLogFlops(10.0*a->nz);CHKERRQ(ierr); 822 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 823 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 824 PetscFunctionReturn(0); 825 } 826 827 /* ------------------------------------------------------------------------------*/ 828 PetscErrorCode MatMult_SeqMAIJ_6(Mat A,Vec xx,Vec yy) 829 { 830 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 831 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 832 const PetscScalar *x,*v; 833 PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6; 834 PetscErrorCode ierr; 835 PetscInt nonzerorow=0,n,i,jrow,j; 836 const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 837 838 PetscFunctionBegin; 839 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 840 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 841 idx = a->j; 842 v = a->a; 843 ii = a->i; 844 845 for (i=0; i<m; i++) { 846 jrow = ii[i]; 847 n = ii[i+1] - jrow; 848 sum1 = 0.0; 849 sum2 = 0.0; 850 sum3 = 0.0; 851 sum4 = 0.0; 852 sum5 = 0.0; 853 sum6 = 0.0; 854 855 nonzerorow += (n>0); 856 for (j=0; j<n; j++) { 857 sum1 += v[jrow]*x[6*idx[jrow]]; 858 sum2 += v[jrow]*x[6*idx[jrow]+1]; 859 sum3 += v[jrow]*x[6*idx[jrow]+2]; 860 sum4 += v[jrow]*x[6*idx[jrow]+3]; 861 sum5 += v[jrow]*x[6*idx[jrow]+4]; 862 sum6 += v[jrow]*x[6*idx[jrow]+5]; 863 jrow++; 864 } 865 y[6*i] = sum1; 866 y[6*i+1] = sum2; 867 y[6*i+2] = sum3; 868 y[6*i+3] = sum4; 869 y[6*i+4] = sum5; 870 y[6*i+5] = sum6; 871 } 872 873 ierr = PetscLogFlops(12.0*a->nz - 6.0*nonzerorow);CHKERRQ(ierr); 874 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 875 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 876 PetscFunctionReturn(0); 877 } 878 879 PetscErrorCode MatMultTranspose_SeqMAIJ_6(Mat A,Vec xx,Vec yy) 880 { 881 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 882 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 883 const PetscScalar *x,*v; 884 PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6; 885 PetscErrorCode ierr; 886 PetscInt n,i; 887 const PetscInt m = b->AIJ->rmap->n,*idx; 888 889 PetscFunctionBegin; 890 ierr = VecSet(yy,0.0);CHKERRQ(ierr); 891 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 892 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 893 894 for (i=0; i<m; i++) { 895 idx = a->j + a->i[i]; 896 v = a->a + a->i[i]; 897 n = a->i[i+1] - a->i[i]; 898 alpha1 = x[6*i]; 899 alpha2 = x[6*i+1]; 900 alpha3 = x[6*i+2]; 901 alpha4 = x[6*i+3]; 902 alpha5 = x[6*i+4]; 903 alpha6 = x[6*i+5]; 904 while (n-->0) { 905 y[6*(*idx)] += alpha1*(*v); 906 y[6*(*idx)+1] += alpha2*(*v); 907 y[6*(*idx)+2] += alpha3*(*v); 908 y[6*(*idx)+3] += alpha4*(*v); 909 y[6*(*idx)+4] += alpha5*(*v); 910 y[6*(*idx)+5] += alpha6*(*v); 911 idx++; v++; 912 } 913 } 914 ierr = PetscLogFlops(12.0*a->nz);CHKERRQ(ierr); 915 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 916 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 917 PetscFunctionReturn(0); 918 } 919 920 PetscErrorCode MatMultAdd_SeqMAIJ_6(Mat A,Vec xx,Vec yy,Vec zz) 921 { 922 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 923 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 924 const PetscScalar *x,*v; 925 PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6; 926 PetscErrorCode ierr; 927 PetscInt n,i,jrow,j; 928 const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 929 930 PetscFunctionBegin; 931 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 932 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 933 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 934 idx = a->j; 935 v = a->a; 936 ii = a->i; 937 938 for (i=0; i<m; i++) { 939 jrow = ii[i]; 940 n = ii[i+1] - jrow; 941 sum1 = 0.0; 942 sum2 = 0.0; 943 sum3 = 0.0; 944 sum4 = 0.0; 945 sum5 = 0.0; 946 sum6 = 0.0; 947 for (j=0; j<n; j++) { 948 sum1 += v[jrow]*x[6*idx[jrow]]; 949 sum2 += v[jrow]*x[6*idx[jrow]+1]; 950 sum3 += v[jrow]*x[6*idx[jrow]+2]; 951 sum4 += v[jrow]*x[6*idx[jrow]+3]; 952 sum5 += v[jrow]*x[6*idx[jrow]+4]; 953 sum6 += v[jrow]*x[6*idx[jrow]+5]; 954 jrow++; 955 } 956 y[6*i] += sum1; 957 y[6*i+1] += sum2; 958 y[6*i+2] += sum3; 959 y[6*i+3] += sum4; 960 y[6*i+4] += sum5; 961 y[6*i+5] += sum6; 962 } 963 964 ierr = PetscLogFlops(12.0*a->nz);CHKERRQ(ierr); 965 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 966 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 967 PetscFunctionReturn(0); 968 } 969 970 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_6(Mat A,Vec xx,Vec yy,Vec zz) 971 { 972 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 973 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 974 const PetscScalar *x,*v; 975 PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6; 976 PetscErrorCode ierr; 977 PetscInt n,i; 978 const PetscInt m = b->AIJ->rmap->n,*idx; 979 980 PetscFunctionBegin; 981 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 982 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 983 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 984 985 for (i=0; i<m; i++) { 986 idx = a->j + a->i[i]; 987 v = a->a + a->i[i]; 988 n = a->i[i+1] - a->i[i]; 989 alpha1 = x[6*i]; 990 alpha2 = x[6*i+1]; 991 alpha3 = x[6*i+2]; 992 alpha4 = x[6*i+3]; 993 alpha5 = x[6*i+4]; 994 alpha6 = x[6*i+5]; 995 while (n-->0) { 996 y[6*(*idx)] += alpha1*(*v); 997 y[6*(*idx)+1] += alpha2*(*v); 998 y[6*(*idx)+2] += alpha3*(*v); 999 y[6*(*idx)+3] += alpha4*(*v); 1000 y[6*(*idx)+4] += alpha5*(*v); 1001 y[6*(*idx)+5] += alpha6*(*v); 1002 idx++; v++; 1003 } 1004 } 1005 ierr = PetscLogFlops(12.0*a->nz);CHKERRQ(ierr); 1006 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 1007 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 1008 PetscFunctionReturn(0); 1009 } 1010 1011 /* ------------------------------------------------------------------------------*/ 1012 PetscErrorCode MatMult_SeqMAIJ_7(Mat A,Vec xx,Vec yy) 1013 { 1014 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1015 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1016 const PetscScalar *x,*v; 1017 PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7; 1018 PetscErrorCode ierr; 1019 const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 1020 PetscInt nonzerorow=0,n,i,jrow,j; 1021 1022 PetscFunctionBegin; 1023 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 1024 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 1025 idx = a->j; 1026 v = a->a; 1027 ii = a->i; 1028 1029 for (i=0; i<m; i++) { 1030 jrow = ii[i]; 1031 n = ii[i+1] - jrow; 1032 sum1 = 0.0; 1033 sum2 = 0.0; 1034 sum3 = 0.0; 1035 sum4 = 0.0; 1036 sum5 = 0.0; 1037 sum6 = 0.0; 1038 sum7 = 0.0; 1039 1040 nonzerorow += (n>0); 1041 for (j=0; j<n; j++) { 1042 sum1 += v[jrow]*x[7*idx[jrow]]; 1043 sum2 += v[jrow]*x[7*idx[jrow]+1]; 1044 sum3 += v[jrow]*x[7*idx[jrow]+2]; 1045 sum4 += v[jrow]*x[7*idx[jrow]+3]; 1046 sum5 += v[jrow]*x[7*idx[jrow]+4]; 1047 sum6 += v[jrow]*x[7*idx[jrow]+5]; 1048 sum7 += v[jrow]*x[7*idx[jrow]+6]; 1049 jrow++; 1050 } 1051 y[7*i] = sum1; 1052 y[7*i+1] = sum2; 1053 y[7*i+2] = sum3; 1054 y[7*i+3] = sum4; 1055 y[7*i+4] = sum5; 1056 y[7*i+5] = sum6; 1057 y[7*i+6] = sum7; 1058 } 1059 1060 ierr = PetscLogFlops(14.0*a->nz - 7.0*nonzerorow);CHKERRQ(ierr); 1061 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 1062 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 1063 PetscFunctionReturn(0); 1064 } 1065 1066 PetscErrorCode MatMultTranspose_SeqMAIJ_7(Mat A,Vec xx,Vec yy) 1067 { 1068 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1069 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1070 const PetscScalar *x,*v; 1071 PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7; 1072 PetscErrorCode ierr; 1073 const PetscInt m = b->AIJ->rmap->n,*idx; 1074 PetscInt n,i; 1075 1076 PetscFunctionBegin; 1077 ierr = VecSet(yy,0.0);CHKERRQ(ierr); 1078 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 1079 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 1080 1081 for (i=0; i<m; i++) { 1082 idx = a->j + a->i[i]; 1083 v = a->a + a->i[i]; 1084 n = a->i[i+1] - a->i[i]; 1085 alpha1 = x[7*i]; 1086 alpha2 = x[7*i+1]; 1087 alpha3 = x[7*i+2]; 1088 alpha4 = x[7*i+3]; 1089 alpha5 = x[7*i+4]; 1090 alpha6 = x[7*i+5]; 1091 alpha7 = x[7*i+6]; 1092 while (n-->0) { 1093 y[7*(*idx)] += alpha1*(*v); 1094 y[7*(*idx)+1] += alpha2*(*v); 1095 y[7*(*idx)+2] += alpha3*(*v); 1096 y[7*(*idx)+3] += alpha4*(*v); 1097 y[7*(*idx)+4] += alpha5*(*v); 1098 y[7*(*idx)+5] += alpha6*(*v); 1099 y[7*(*idx)+6] += alpha7*(*v); 1100 idx++; v++; 1101 } 1102 } 1103 ierr = PetscLogFlops(14.0*a->nz);CHKERRQ(ierr); 1104 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 1105 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 1106 PetscFunctionReturn(0); 1107 } 1108 1109 PetscErrorCode MatMultAdd_SeqMAIJ_7(Mat A,Vec xx,Vec yy,Vec zz) 1110 { 1111 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1112 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1113 const PetscScalar *x,*v; 1114 PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7; 1115 PetscErrorCode ierr; 1116 const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 1117 PetscInt n,i,jrow,j; 1118 1119 PetscFunctionBegin; 1120 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 1121 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 1122 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 1123 idx = a->j; 1124 v = a->a; 1125 ii = a->i; 1126 1127 for (i=0; i<m; i++) { 1128 jrow = ii[i]; 1129 n = ii[i+1] - jrow; 1130 sum1 = 0.0; 1131 sum2 = 0.0; 1132 sum3 = 0.0; 1133 sum4 = 0.0; 1134 sum5 = 0.0; 1135 sum6 = 0.0; 1136 sum7 = 0.0; 1137 for (j=0; j<n; j++) { 1138 sum1 += v[jrow]*x[7*idx[jrow]]; 1139 sum2 += v[jrow]*x[7*idx[jrow]+1]; 1140 sum3 += v[jrow]*x[7*idx[jrow]+2]; 1141 sum4 += v[jrow]*x[7*idx[jrow]+3]; 1142 sum5 += v[jrow]*x[7*idx[jrow]+4]; 1143 sum6 += v[jrow]*x[7*idx[jrow]+5]; 1144 sum7 += v[jrow]*x[7*idx[jrow]+6]; 1145 jrow++; 1146 } 1147 y[7*i] += sum1; 1148 y[7*i+1] += sum2; 1149 y[7*i+2] += sum3; 1150 y[7*i+3] += sum4; 1151 y[7*i+4] += sum5; 1152 y[7*i+5] += sum6; 1153 y[7*i+6] += sum7; 1154 } 1155 1156 ierr = PetscLogFlops(14.0*a->nz);CHKERRQ(ierr); 1157 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 1158 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 1159 PetscFunctionReturn(0); 1160 } 1161 1162 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_7(Mat A,Vec xx,Vec yy,Vec zz) 1163 { 1164 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1165 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1166 const PetscScalar *x,*v; 1167 PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7; 1168 PetscErrorCode ierr; 1169 const PetscInt m = b->AIJ->rmap->n,*idx; 1170 PetscInt n,i; 1171 1172 PetscFunctionBegin; 1173 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 1174 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 1175 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 1176 for (i=0; i<m; i++) { 1177 idx = a->j + a->i[i]; 1178 v = a->a + a->i[i]; 1179 n = a->i[i+1] - a->i[i]; 1180 alpha1 = x[7*i]; 1181 alpha2 = x[7*i+1]; 1182 alpha3 = x[7*i+2]; 1183 alpha4 = x[7*i+3]; 1184 alpha5 = x[7*i+4]; 1185 alpha6 = x[7*i+5]; 1186 alpha7 = x[7*i+6]; 1187 while (n-->0) { 1188 y[7*(*idx)] += alpha1*(*v); 1189 y[7*(*idx)+1] += alpha2*(*v); 1190 y[7*(*idx)+2] += alpha3*(*v); 1191 y[7*(*idx)+3] += alpha4*(*v); 1192 y[7*(*idx)+4] += alpha5*(*v); 1193 y[7*(*idx)+5] += alpha6*(*v); 1194 y[7*(*idx)+6] += alpha7*(*v); 1195 idx++; v++; 1196 } 1197 } 1198 ierr = PetscLogFlops(14.0*a->nz);CHKERRQ(ierr); 1199 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 1200 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 1201 PetscFunctionReturn(0); 1202 } 1203 1204 PetscErrorCode MatMult_SeqMAIJ_8(Mat A,Vec xx,Vec yy) 1205 { 1206 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1207 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1208 const PetscScalar *x,*v; 1209 PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8; 1210 PetscErrorCode ierr; 1211 const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 1212 PetscInt nonzerorow=0,n,i,jrow,j; 1213 1214 PetscFunctionBegin; 1215 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 1216 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 1217 idx = a->j; 1218 v = a->a; 1219 ii = a->i; 1220 1221 for (i=0; i<m; i++) { 1222 jrow = ii[i]; 1223 n = ii[i+1] - jrow; 1224 sum1 = 0.0; 1225 sum2 = 0.0; 1226 sum3 = 0.0; 1227 sum4 = 0.0; 1228 sum5 = 0.0; 1229 sum6 = 0.0; 1230 sum7 = 0.0; 1231 sum8 = 0.0; 1232 1233 nonzerorow += (n>0); 1234 for (j=0; j<n; j++) { 1235 sum1 += v[jrow]*x[8*idx[jrow]]; 1236 sum2 += v[jrow]*x[8*idx[jrow]+1]; 1237 sum3 += v[jrow]*x[8*idx[jrow]+2]; 1238 sum4 += v[jrow]*x[8*idx[jrow]+3]; 1239 sum5 += v[jrow]*x[8*idx[jrow]+4]; 1240 sum6 += v[jrow]*x[8*idx[jrow]+5]; 1241 sum7 += v[jrow]*x[8*idx[jrow]+6]; 1242 sum8 += v[jrow]*x[8*idx[jrow]+7]; 1243 jrow++; 1244 } 1245 y[8*i] = sum1; 1246 y[8*i+1] = sum2; 1247 y[8*i+2] = sum3; 1248 y[8*i+3] = sum4; 1249 y[8*i+4] = sum5; 1250 y[8*i+5] = sum6; 1251 y[8*i+6] = sum7; 1252 y[8*i+7] = sum8; 1253 } 1254 1255 ierr = PetscLogFlops(16.0*a->nz - 8.0*nonzerorow);CHKERRQ(ierr); 1256 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 1257 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 1258 PetscFunctionReturn(0); 1259 } 1260 1261 PetscErrorCode MatMultTranspose_SeqMAIJ_8(Mat A,Vec xx,Vec yy) 1262 { 1263 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1264 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1265 const PetscScalar *x,*v; 1266 PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8; 1267 PetscErrorCode ierr; 1268 const PetscInt m = b->AIJ->rmap->n,*idx; 1269 PetscInt n,i; 1270 1271 PetscFunctionBegin; 1272 ierr = VecSet(yy,0.0);CHKERRQ(ierr); 1273 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 1274 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 1275 1276 for (i=0; i<m; i++) { 1277 idx = a->j + a->i[i]; 1278 v = a->a + a->i[i]; 1279 n = a->i[i+1] - a->i[i]; 1280 alpha1 = x[8*i]; 1281 alpha2 = x[8*i+1]; 1282 alpha3 = x[8*i+2]; 1283 alpha4 = x[8*i+3]; 1284 alpha5 = x[8*i+4]; 1285 alpha6 = x[8*i+5]; 1286 alpha7 = x[8*i+6]; 1287 alpha8 = x[8*i+7]; 1288 while (n-->0) { 1289 y[8*(*idx)] += alpha1*(*v); 1290 y[8*(*idx)+1] += alpha2*(*v); 1291 y[8*(*idx)+2] += alpha3*(*v); 1292 y[8*(*idx)+3] += alpha4*(*v); 1293 y[8*(*idx)+4] += alpha5*(*v); 1294 y[8*(*idx)+5] += alpha6*(*v); 1295 y[8*(*idx)+6] += alpha7*(*v); 1296 y[8*(*idx)+7] += alpha8*(*v); 1297 idx++; v++; 1298 } 1299 } 1300 ierr = PetscLogFlops(16.0*a->nz);CHKERRQ(ierr); 1301 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 1302 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 1303 PetscFunctionReturn(0); 1304 } 1305 1306 PetscErrorCode MatMultAdd_SeqMAIJ_8(Mat A,Vec xx,Vec yy,Vec zz) 1307 { 1308 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1309 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1310 const PetscScalar *x,*v; 1311 PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8; 1312 PetscErrorCode ierr; 1313 const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 1314 PetscInt n,i,jrow,j; 1315 1316 PetscFunctionBegin; 1317 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 1318 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 1319 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 1320 idx = a->j; 1321 v = a->a; 1322 ii = a->i; 1323 1324 for (i=0; i<m; i++) { 1325 jrow = ii[i]; 1326 n = ii[i+1] - jrow; 1327 sum1 = 0.0; 1328 sum2 = 0.0; 1329 sum3 = 0.0; 1330 sum4 = 0.0; 1331 sum5 = 0.0; 1332 sum6 = 0.0; 1333 sum7 = 0.0; 1334 sum8 = 0.0; 1335 for (j=0; j<n; j++) { 1336 sum1 += v[jrow]*x[8*idx[jrow]]; 1337 sum2 += v[jrow]*x[8*idx[jrow]+1]; 1338 sum3 += v[jrow]*x[8*idx[jrow]+2]; 1339 sum4 += v[jrow]*x[8*idx[jrow]+3]; 1340 sum5 += v[jrow]*x[8*idx[jrow]+4]; 1341 sum6 += v[jrow]*x[8*idx[jrow]+5]; 1342 sum7 += v[jrow]*x[8*idx[jrow]+6]; 1343 sum8 += v[jrow]*x[8*idx[jrow]+7]; 1344 jrow++; 1345 } 1346 y[8*i] += sum1; 1347 y[8*i+1] += sum2; 1348 y[8*i+2] += sum3; 1349 y[8*i+3] += sum4; 1350 y[8*i+4] += sum5; 1351 y[8*i+5] += sum6; 1352 y[8*i+6] += sum7; 1353 y[8*i+7] += sum8; 1354 } 1355 1356 ierr = PetscLogFlops(16.0*a->nz);CHKERRQ(ierr); 1357 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 1358 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 1359 PetscFunctionReturn(0); 1360 } 1361 1362 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_8(Mat A,Vec xx,Vec yy,Vec zz) 1363 { 1364 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1365 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1366 const PetscScalar *x,*v; 1367 PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8; 1368 PetscErrorCode ierr; 1369 const PetscInt m = b->AIJ->rmap->n,*idx; 1370 PetscInt n,i; 1371 1372 PetscFunctionBegin; 1373 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 1374 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 1375 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 1376 for (i=0; i<m; i++) { 1377 idx = a->j + a->i[i]; 1378 v = a->a + a->i[i]; 1379 n = a->i[i+1] - a->i[i]; 1380 alpha1 = x[8*i]; 1381 alpha2 = x[8*i+1]; 1382 alpha3 = x[8*i+2]; 1383 alpha4 = x[8*i+3]; 1384 alpha5 = x[8*i+4]; 1385 alpha6 = x[8*i+5]; 1386 alpha7 = x[8*i+6]; 1387 alpha8 = x[8*i+7]; 1388 while (n-->0) { 1389 y[8*(*idx)] += alpha1*(*v); 1390 y[8*(*idx)+1] += alpha2*(*v); 1391 y[8*(*idx)+2] += alpha3*(*v); 1392 y[8*(*idx)+3] += alpha4*(*v); 1393 y[8*(*idx)+4] += alpha5*(*v); 1394 y[8*(*idx)+5] += alpha6*(*v); 1395 y[8*(*idx)+6] += alpha7*(*v); 1396 y[8*(*idx)+7] += alpha8*(*v); 1397 idx++; v++; 1398 } 1399 } 1400 ierr = PetscLogFlops(16.0*a->nz);CHKERRQ(ierr); 1401 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 1402 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 1403 PetscFunctionReturn(0); 1404 } 1405 1406 /* ------------------------------------------------------------------------------*/ 1407 PetscErrorCode MatMult_SeqMAIJ_9(Mat A,Vec xx,Vec yy) 1408 { 1409 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1410 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1411 const PetscScalar *x,*v; 1412 PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9; 1413 PetscErrorCode ierr; 1414 const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 1415 PetscInt nonzerorow=0,n,i,jrow,j; 1416 1417 PetscFunctionBegin; 1418 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 1419 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 1420 idx = a->j; 1421 v = a->a; 1422 ii = a->i; 1423 1424 for (i=0; i<m; i++) { 1425 jrow = ii[i]; 1426 n = ii[i+1] - jrow; 1427 sum1 = 0.0; 1428 sum2 = 0.0; 1429 sum3 = 0.0; 1430 sum4 = 0.0; 1431 sum5 = 0.0; 1432 sum6 = 0.0; 1433 sum7 = 0.0; 1434 sum8 = 0.0; 1435 sum9 = 0.0; 1436 1437 nonzerorow += (n>0); 1438 for (j=0; j<n; j++) { 1439 sum1 += v[jrow]*x[9*idx[jrow]]; 1440 sum2 += v[jrow]*x[9*idx[jrow]+1]; 1441 sum3 += v[jrow]*x[9*idx[jrow]+2]; 1442 sum4 += v[jrow]*x[9*idx[jrow]+3]; 1443 sum5 += v[jrow]*x[9*idx[jrow]+4]; 1444 sum6 += v[jrow]*x[9*idx[jrow]+5]; 1445 sum7 += v[jrow]*x[9*idx[jrow]+6]; 1446 sum8 += v[jrow]*x[9*idx[jrow]+7]; 1447 sum9 += v[jrow]*x[9*idx[jrow]+8]; 1448 jrow++; 1449 } 1450 y[9*i] = sum1; 1451 y[9*i+1] = sum2; 1452 y[9*i+2] = sum3; 1453 y[9*i+3] = sum4; 1454 y[9*i+4] = sum5; 1455 y[9*i+5] = sum6; 1456 y[9*i+6] = sum7; 1457 y[9*i+7] = sum8; 1458 y[9*i+8] = sum9; 1459 } 1460 1461 ierr = PetscLogFlops(18.0*a->nz - 9*nonzerorow);CHKERRQ(ierr); 1462 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 1463 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 1464 PetscFunctionReturn(0); 1465 } 1466 1467 /* ------------------------------------------------------------------------------*/ 1468 1469 PetscErrorCode MatMultTranspose_SeqMAIJ_9(Mat A,Vec xx,Vec yy) 1470 { 1471 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1472 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1473 const PetscScalar *x,*v; 1474 PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9; 1475 PetscErrorCode ierr; 1476 const PetscInt m = b->AIJ->rmap->n,*idx; 1477 PetscInt n,i; 1478 1479 PetscFunctionBegin; 1480 ierr = VecSet(yy,0.0);CHKERRQ(ierr); 1481 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 1482 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 1483 1484 for (i=0; i<m; i++) { 1485 idx = a->j + a->i[i]; 1486 v = a->a + a->i[i]; 1487 n = a->i[i+1] - a->i[i]; 1488 alpha1 = x[9*i]; 1489 alpha2 = x[9*i+1]; 1490 alpha3 = x[9*i+2]; 1491 alpha4 = x[9*i+3]; 1492 alpha5 = x[9*i+4]; 1493 alpha6 = x[9*i+5]; 1494 alpha7 = x[9*i+6]; 1495 alpha8 = x[9*i+7]; 1496 alpha9 = x[9*i+8]; 1497 while (n-->0) { 1498 y[9*(*idx)] += alpha1*(*v); 1499 y[9*(*idx)+1] += alpha2*(*v); 1500 y[9*(*idx)+2] += alpha3*(*v); 1501 y[9*(*idx)+3] += alpha4*(*v); 1502 y[9*(*idx)+4] += alpha5*(*v); 1503 y[9*(*idx)+5] += alpha6*(*v); 1504 y[9*(*idx)+6] += alpha7*(*v); 1505 y[9*(*idx)+7] += alpha8*(*v); 1506 y[9*(*idx)+8] += alpha9*(*v); 1507 idx++; v++; 1508 } 1509 } 1510 ierr = PetscLogFlops(18.0*a->nz);CHKERRQ(ierr); 1511 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 1512 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 1513 PetscFunctionReturn(0); 1514 } 1515 1516 PetscErrorCode MatMultAdd_SeqMAIJ_9(Mat A,Vec xx,Vec yy,Vec zz) 1517 { 1518 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1519 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1520 const PetscScalar *x,*v; 1521 PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9; 1522 PetscErrorCode ierr; 1523 const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 1524 PetscInt n,i,jrow,j; 1525 1526 PetscFunctionBegin; 1527 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 1528 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 1529 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 1530 idx = a->j; 1531 v = a->a; 1532 ii = a->i; 1533 1534 for (i=0; i<m; i++) { 1535 jrow = ii[i]; 1536 n = ii[i+1] - jrow; 1537 sum1 = 0.0; 1538 sum2 = 0.0; 1539 sum3 = 0.0; 1540 sum4 = 0.0; 1541 sum5 = 0.0; 1542 sum6 = 0.0; 1543 sum7 = 0.0; 1544 sum8 = 0.0; 1545 sum9 = 0.0; 1546 for (j=0; j<n; j++) { 1547 sum1 += v[jrow]*x[9*idx[jrow]]; 1548 sum2 += v[jrow]*x[9*idx[jrow]+1]; 1549 sum3 += v[jrow]*x[9*idx[jrow]+2]; 1550 sum4 += v[jrow]*x[9*idx[jrow]+3]; 1551 sum5 += v[jrow]*x[9*idx[jrow]+4]; 1552 sum6 += v[jrow]*x[9*idx[jrow]+5]; 1553 sum7 += v[jrow]*x[9*idx[jrow]+6]; 1554 sum8 += v[jrow]*x[9*idx[jrow]+7]; 1555 sum9 += v[jrow]*x[9*idx[jrow]+8]; 1556 jrow++; 1557 } 1558 y[9*i] += sum1; 1559 y[9*i+1] += sum2; 1560 y[9*i+2] += sum3; 1561 y[9*i+3] += sum4; 1562 y[9*i+4] += sum5; 1563 y[9*i+5] += sum6; 1564 y[9*i+6] += sum7; 1565 y[9*i+7] += sum8; 1566 y[9*i+8] += sum9; 1567 } 1568 1569 ierr = PetscLogFlops(18*a->nz);CHKERRQ(ierr); 1570 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 1571 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 1572 PetscFunctionReturn(0); 1573 } 1574 1575 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_9(Mat A,Vec xx,Vec yy,Vec zz) 1576 { 1577 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1578 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1579 const PetscScalar *x,*v; 1580 PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9; 1581 PetscErrorCode ierr; 1582 const PetscInt m = b->AIJ->rmap->n,*idx; 1583 PetscInt n,i; 1584 1585 PetscFunctionBegin; 1586 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 1587 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 1588 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 1589 for (i=0; i<m; i++) { 1590 idx = a->j + a->i[i]; 1591 v = a->a + a->i[i]; 1592 n = a->i[i+1] - a->i[i]; 1593 alpha1 = x[9*i]; 1594 alpha2 = x[9*i+1]; 1595 alpha3 = x[9*i+2]; 1596 alpha4 = x[9*i+3]; 1597 alpha5 = x[9*i+4]; 1598 alpha6 = x[9*i+5]; 1599 alpha7 = x[9*i+6]; 1600 alpha8 = x[9*i+7]; 1601 alpha9 = x[9*i+8]; 1602 while (n-->0) { 1603 y[9*(*idx)] += alpha1*(*v); 1604 y[9*(*idx)+1] += alpha2*(*v); 1605 y[9*(*idx)+2] += alpha3*(*v); 1606 y[9*(*idx)+3] += alpha4*(*v); 1607 y[9*(*idx)+4] += alpha5*(*v); 1608 y[9*(*idx)+5] += alpha6*(*v); 1609 y[9*(*idx)+6] += alpha7*(*v); 1610 y[9*(*idx)+7] += alpha8*(*v); 1611 y[9*(*idx)+8] += alpha9*(*v); 1612 idx++; v++; 1613 } 1614 } 1615 ierr = PetscLogFlops(18.0*a->nz);CHKERRQ(ierr); 1616 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 1617 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 1618 PetscFunctionReturn(0); 1619 } 1620 PetscErrorCode MatMult_SeqMAIJ_10(Mat A,Vec xx,Vec yy) 1621 { 1622 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1623 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1624 const PetscScalar *x,*v; 1625 PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9, sum10; 1626 PetscErrorCode ierr; 1627 const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 1628 PetscInt nonzerorow=0,n,i,jrow,j; 1629 1630 PetscFunctionBegin; 1631 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 1632 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 1633 idx = a->j; 1634 v = a->a; 1635 ii = a->i; 1636 1637 for (i=0; i<m; i++) { 1638 jrow = ii[i]; 1639 n = ii[i+1] - jrow; 1640 sum1 = 0.0; 1641 sum2 = 0.0; 1642 sum3 = 0.0; 1643 sum4 = 0.0; 1644 sum5 = 0.0; 1645 sum6 = 0.0; 1646 sum7 = 0.0; 1647 sum8 = 0.0; 1648 sum9 = 0.0; 1649 sum10 = 0.0; 1650 1651 nonzerorow += (n>0); 1652 for (j=0; j<n; j++) { 1653 sum1 += v[jrow]*x[10*idx[jrow]]; 1654 sum2 += v[jrow]*x[10*idx[jrow]+1]; 1655 sum3 += v[jrow]*x[10*idx[jrow]+2]; 1656 sum4 += v[jrow]*x[10*idx[jrow]+3]; 1657 sum5 += v[jrow]*x[10*idx[jrow]+4]; 1658 sum6 += v[jrow]*x[10*idx[jrow]+5]; 1659 sum7 += v[jrow]*x[10*idx[jrow]+6]; 1660 sum8 += v[jrow]*x[10*idx[jrow]+7]; 1661 sum9 += v[jrow]*x[10*idx[jrow]+8]; 1662 sum10 += v[jrow]*x[10*idx[jrow]+9]; 1663 jrow++; 1664 } 1665 y[10*i] = sum1; 1666 y[10*i+1] = sum2; 1667 y[10*i+2] = sum3; 1668 y[10*i+3] = sum4; 1669 y[10*i+4] = sum5; 1670 y[10*i+5] = sum6; 1671 y[10*i+6] = sum7; 1672 y[10*i+7] = sum8; 1673 y[10*i+8] = sum9; 1674 y[10*i+9] = sum10; 1675 } 1676 1677 ierr = PetscLogFlops(20.0*a->nz - 10.0*nonzerorow);CHKERRQ(ierr); 1678 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 1679 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 1680 PetscFunctionReturn(0); 1681 } 1682 1683 PetscErrorCode MatMultAdd_SeqMAIJ_10(Mat A,Vec xx,Vec yy,Vec zz) 1684 { 1685 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1686 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1687 const PetscScalar *x,*v; 1688 PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9, sum10; 1689 PetscErrorCode ierr; 1690 const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 1691 PetscInt n,i,jrow,j; 1692 1693 PetscFunctionBegin; 1694 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 1695 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 1696 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 1697 idx = a->j; 1698 v = a->a; 1699 ii = a->i; 1700 1701 for (i=0; i<m; i++) { 1702 jrow = ii[i]; 1703 n = ii[i+1] - jrow; 1704 sum1 = 0.0; 1705 sum2 = 0.0; 1706 sum3 = 0.0; 1707 sum4 = 0.0; 1708 sum5 = 0.0; 1709 sum6 = 0.0; 1710 sum7 = 0.0; 1711 sum8 = 0.0; 1712 sum9 = 0.0; 1713 sum10 = 0.0; 1714 for (j=0; j<n; j++) { 1715 sum1 += v[jrow]*x[10*idx[jrow]]; 1716 sum2 += v[jrow]*x[10*idx[jrow]+1]; 1717 sum3 += v[jrow]*x[10*idx[jrow]+2]; 1718 sum4 += v[jrow]*x[10*idx[jrow]+3]; 1719 sum5 += v[jrow]*x[10*idx[jrow]+4]; 1720 sum6 += v[jrow]*x[10*idx[jrow]+5]; 1721 sum7 += v[jrow]*x[10*idx[jrow]+6]; 1722 sum8 += v[jrow]*x[10*idx[jrow]+7]; 1723 sum9 += v[jrow]*x[10*idx[jrow]+8]; 1724 sum10 += v[jrow]*x[10*idx[jrow]+9]; 1725 jrow++; 1726 } 1727 y[10*i] += sum1; 1728 y[10*i+1] += sum2; 1729 y[10*i+2] += sum3; 1730 y[10*i+3] += sum4; 1731 y[10*i+4] += sum5; 1732 y[10*i+5] += sum6; 1733 y[10*i+6] += sum7; 1734 y[10*i+7] += sum8; 1735 y[10*i+8] += sum9; 1736 y[10*i+9] += sum10; 1737 } 1738 1739 ierr = PetscLogFlops(20.0*a->nz);CHKERRQ(ierr); 1740 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 1741 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 1742 PetscFunctionReturn(0); 1743 } 1744 1745 PetscErrorCode MatMultTranspose_SeqMAIJ_10(Mat A,Vec xx,Vec yy) 1746 { 1747 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1748 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1749 const PetscScalar *x,*v; 1750 PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9,alpha10; 1751 PetscErrorCode ierr; 1752 const PetscInt m = b->AIJ->rmap->n,*idx; 1753 PetscInt n,i; 1754 1755 PetscFunctionBegin; 1756 ierr = VecSet(yy,0.0);CHKERRQ(ierr); 1757 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 1758 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 1759 1760 for (i=0; i<m; i++) { 1761 idx = a->j + a->i[i]; 1762 v = a->a + a->i[i]; 1763 n = a->i[i+1] - a->i[i]; 1764 alpha1 = x[10*i]; 1765 alpha2 = x[10*i+1]; 1766 alpha3 = x[10*i+2]; 1767 alpha4 = x[10*i+3]; 1768 alpha5 = x[10*i+4]; 1769 alpha6 = x[10*i+5]; 1770 alpha7 = x[10*i+6]; 1771 alpha8 = x[10*i+7]; 1772 alpha9 = x[10*i+8]; 1773 alpha10 = x[10*i+9]; 1774 while (n-->0) { 1775 y[10*(*idx)] += alpha1*(*v); 1776 y[10*(*idx)+1] += alpha2*(*v); 1777 y[10*(*idx)+2] += alpha3*(*v); 1778 y[10*(*idx)+3] += alpha4*(*v); 1779 y[10*(*idx)+4] += alpha5*(*v); 1780 y[10*(*idx)+5] += alpha6*(*v); 1781 y[10*(*idx)+6] += alpha7*(*v); 1782 y[10*(*idx)+7] += alpha8*(*v); 1783 y[10*(*idx)+8] += alpha9*(*v); 1784 y[10*(*idx)+9] += alpha10*(*v); 1785 idx++; v++; 1786 } 1787 } 1788 ierr = PetscLogFlops(20.0*a->nz);CHKERRQ(ierr); 1789 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 1790 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 1791 PetscFunctionReturn(0); 1792 } 1793 1794 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_10(Mat A,Vec xx,Vec yy,Vec zz) 1795 { 1796 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1797 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1798 const PetscScalar *x,*v; 1799 PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9,alpha10; 1800 PetscErrorCode ierr; 1801 const PetscInt m = b->AIJ->rmap->n,*idx; 1802 PetscInt n,i; 1803 1804 PetscFunctionBegin; 1805 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 1806 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 1807 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 1808 for (i=0; i<m; i++) { 1809 idx = a->j + a->i[i]; 1810 v = a->a + a->i[i]; 1811 n = a->i[i+1] - a->i[i]; 1812 alpha1 = x[10*i]; 1813 alpha2 = x[10*i+1]; 1814 alpha3 = x[10*i+2]; 1815 alpha4 = x[10*i+3]; 1816 alpha5 = x[10*i+4]; 1817 alpha6 = x[10*i+5]; 1818 alpha7 = x[10*i+6]; 1819 alpha8 = x[10*i+7]; 1820 alpha9 = x[10*i+8]; 1821 alpha10 = x[10*i+9]; 1822 while (n-->0) { 1823 y[10*(*idx)] += alpha1*(*v); 1824 y[10*(*idx)+1] += alpha2*(*v); 1825 y[10*(*idx)+2] += alpha3*(*v); 1826 y[10*(*idx)+3] += alpha4*(*v); 1827 y[10*(*idx)+4] += alpha5*(*v); 1828 y[10*(*idx)+5] += alpha6*(*v); 1829 y[10*(*idx)+6] += alpha7*(*v); 1830 y[10*(*idx)+7] += alpha8*(*v); 1831 y[10*(*idx)+8] += alpha9*(*v); 1832 y[10*(*idx)+9] += alpha10*(*v); 1833 idx++; v++; 1834 } 1835 } 1836 ierr = PetscLogFlops(20.0*a->nz);CHKERRQ(ierr); 1837 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 1838 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 1839 PetscFunctionReturn(0); 1840 } 1841 1842 1843 /*--------------------------------------------------------------------------------------------*/ 1844 PetscErrorCode MatMult_SeqMAIJ_11(Mat A,Vec xx,Vec yy) 1845 { 1846 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1847 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1848 const PetscScalar *x,*v; 1849 PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9, sum10, sum11; 1850 PetscErrorCode ierr; 1851 const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 1852 PetscInt nonzerorow=0,n,i,jrow,j; 1853 1854 PetscFunctionBegin; 1855 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 1856 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 1857 idx = a->j; 1858 v = a->a; 1859 ii = a->i; 1860 1861 for (i=0; i<m; i++) { 1862 jrow = ii[i]; 1863 n = ii[i+1] - jrow; 1864 sum1 = 0.0; 1865 sum2 = 0.0; 1866 sum3 = 0.0; 1867 sum4 = 0.0; 1868 sum5 = 0.0; 1869 sum6 = 0.0; 1870 sum7 = 0.0; 1871 sum8 = 0.0; 1872 sum9 = 0.0; 1873 sum10 = 0.0; 1874 sum11 = 0.0; 1875 1876 nonzerorow += (n>0); 1877 for (j=0; j<n; j++) { 1878 sum1 += v[jrow]*x[11*idx[jrow]]; 1879 sum2 += v[jrow]*x[11*idx[jrow]+1]; 1880 sum3 += v[jrow]*x[11*idx[jrow]+2]; 1881 sum4 += v[jrow]*x[11*idx[jrow]+3]; 1882 sum5 += v[jrow]*x[11*idx[jrow]+4]; 1883 sum6 += v[jrow]*x[11*idx[jrow]+5]; 1884 sum7 += v[jrow]*x[11*idx[jrow]+6]; 1885 sum8 += v[jrow]*x[11*idx[jrow]+7]; 1886 sum9 += v[jrow]*x[11*idx[jrow]+8]; 1887 sum10 += v[jrow]*x[11*idx[jrow]+9]; 1888 sum11 += v[jrow]*x[11*idx[jrow]+10]; 1889 jrow++; 1890 } 1891 y[11*i] = sum1; 1892 y[11*i+1] = sum2; 1893 y[11*i+2] = sum3; 1894 y[11*i+3] = sum4; 1895 y[11*i+4] = sum5; 1896 y[11*i+5] = sum6; 1897 y[11*i+6] = sum7; 1898 y[11*i+7] = sum8; 1899 y[11*i+8] = sum9; 1900 y[11*i+9] = sum10; 1901 y[11*i+10] = sum11; 1902 } 1903 1904 ierr = PetscLogFlops(22*a->nz - 11*nonzerorow);CHKERRQ(ierr); 1905 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 1906 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 1907 PetscFunctionReturn(0); 1908 } 1909 1910 PetscErrorCode MatMultAdd_SeqMAIJ_11(Mat A,Vec xx,Vec yy,Vec zz) 1911 { 1912 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1913 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1914 const PetscScalar *x,*v; 1915 PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9, sum10, sum11; 1916 PetscErrorCode ierr; 1917 const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 1918 PetscInt n,i,jrow,j; 1919 1920 PetscFunctionBegin; 1921 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 1922 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 1923 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 1924 idx = a->j; 1925 v = a->a; 1926 ii = a->i; 1927 1928 for (i=0; i<m; i++) { 1929 jrow = ii[i]; 1930 n = ii[i+1] - jrow; 1931 sum1 = 0.0; 1932 sum2 = 0.0; 1933 sum3 = 0.0; 1934 sum4 = 0.0; 1935 sum5 = 0.0; 1936 sum6 = 0.0; 1937 sum7 = 0.0; 1938 sum8 = 0.0; 1939 sum9 = 0.0; 1940 sum10 = 0.0; 1941 sum11 = 0.0; 1942 for (j=0; j<n; j++) { 1943 sum1 += v[jrow]*x[11*idx[jrow]]; 1944 sum2 += v[jrow]*x[11*idx[jrow]+1]; 1945 sum3 += v[jrow]*x[11*idx[jrow]+2]; 1946 sum4 += v[jrow]*x[11*idx[jrow]+3]; 1947 sum5 += v[jrow]*x[11*idx[jrow]+4]; 1948 sum6 += v[jrow]*x[11*idx[jrow]+5]; 1949 sum7 += v[jrow]*x[11*idx[jrow]+6]; 1950 sum8 += v[jrow]*x[11*idx[jrow]+7]; 1951 sum9 += v[jrow]*x[11*idx[jrow]+8]; 1952 sum10 += v[jrow]*x[11*idx[jrow]+9]; 1953 sum11 += v[jrow]*x[11*idx[jrow]+10]; 1954 jrow++; 1955 } 1956 y[11*i] += sum1; 1957 y[11*i+1] += sum2; 1958 y[11*i+2] += sum3; 1959 y[11*i+3] += sum4; 1960 y[11*i+4] += sum5; 1961 y[11*i+5] += sum6; 1962 y[11*i+6] += sum7; 1963 y[11*i+7] += sum8; 1964 y[11*i+8] += sum9; 1965 y[11*i+9] += sum10; 1966 y[11*i+10] += sum11; 1967 } 1968 1969 ierr = PetscLogFlops(22*a->nz);CHKERRQ(ierr); 1970 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 1971 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 1972 PetscFunctionReturn(0); 1973 } 1974 1975 PetscErrorCode MatMultTranspose_SeqMAIJ_11(Mat A,Vec xx,Vec yy) 1976 { 1977 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1978 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1979 const PetscScalar *x,*v; 1980 PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9,alpha10,alpha11; 1981 PetscErrorCode ierr; 1982 const PetscInt m = b->AIJ->rmap->n,*idx; 1983 PetscInt n,i; 1984 1985 PetscFunctionBegin; 1986 ierr = VecSet(yy,0.0);CHKERRQ(ierr); 1987 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 1988 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 1989 1990 for (i=0; i<m; i++) { 1991 idx = a->j + a->i[i]; 1992 v = a->a + a->i[i]; 1993 n = a->i[i+1] - a->i[i]; 1994 alpha1 = x[11*i]; 1995 alpha2 = x[11*i+1]; 1996 alpha3 = x[11*i+2]; 1997 alpha4 = x[11*i+3]; 1998 alpha5 = x[11*i+4]; 1999 alpha6 = x[11*i+5]; 2000 alpha7 = x[11*i+6]; 2001 alpha8 = x[11*i+7]; 2002 alpha9 = x[11*i+8]; 2003 alpha10 = x[11*i+9]; 2004 alpha11 = x[11*i+10]; 2005 while (n-->0) { 2006 y[11*(*idx)] += alpha1*(*v); 2007 y[11*(*idx)+1] += alpha2*(*v); 2008 y[11*(*idx)+2] += alpha3*(*v); 2009 y[11*(*idx)+3] += alpha4*(*v); 2010 y[11*(*idx)+4] += alpha5*(*v); 2011 y[11*(*idx)+5] += alpha6*(*v); 2012 y[11*(*idx)+6] += alpha7*(*v); 2013 y[11*(*idx)+7] += alpha8*(*v); 2014 y[11*(*idx)+8] += alpha9*(*v); 2015 y[11*(*idx)+9] += alpha10*(*v); 2016 y[11*(*idx)+10] += alpha11*(*v); 2017 idx++; v++; 2018 } 2019 } 2020 ierr = PetscLogFlops(22*a->nz);CHKERRQ(ierr); 2021 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 2022 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 2023 PetscFunctionReturn(0); 2024 } 2025 2026 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_11(Mat A,Vec xx,Vec yy,Vec zz) 2027 { 2028 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 2029 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 2030 const PetscScalar *x,*v; 2031 PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9,alpha10,alpha11; 2032 PetscErrorCode ierr; 2033 const PetscInt m = b->AIJ->rmap->n,*idx; 2034 PetscInt n,i; 2035 2036 PetscFunctionBegin; 2037 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 2038 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 2039 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 2040 for (i=0; i<m; i++) { 2041 idx = a->j + a->i[i]; 2042 v = a->a + a->i[i]; 2043 n = a->i[i+1] - a->i[i]; 2044 alpha1 = x[11*i]; 2045 alpha2 = x[11*i+1]; 2046 alpha3 = x[11*i+2]; 2047 alpha4 = x[11*i+3]; 2048 alpha5 = x[11*i+4]; 2049 alpha6 = x[11*i+5]; 2050 alpha7 = x[11*i+6]; 2051 alpha8 = x[11*i+7]; 2052 alpha9 = x[11*i+8]; 2053 alpha10 = x[11*i+9]; 2054 alpha11 = x[11*i+10]; 2055 while (n-->0) { 2056 y[11*(*idx)] += alpha1*(*v); 2057 y[11*(*idx)+1] += alpha2*(*v); 2058 y[11*(*idx)+2] += alpha3*(*v); 2059 y[11*(*idx)+3] += alpha4*(*v); 2060 y[11*(*idx)+4] += alpha5*(*v); 2061 y[11*(*idx)+5] += alpha6*(*v); 2062 y[11*(*idx)+6] += alpha7*(*v); 2063 y[11*(*idx)+7] += alpha8*(*v); 2064 y[11*(*idx)+8] += alpha9*(*v); 2065 y[11*(*idx)+9] += alpha10*(*v); 2066 y[11*(*idx)+10] += alpha11*(*v); 2067 idx++; v++; 2068 } 2069 } 2070 ierr = PetscLogFlops(22*a->nz);CHKERRQ(ierr); 2071 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 2072 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 2073 PetscFunctionReturn(0); 2074 } 2075 2076 2077 /*--------------------------------------------------------------------------------------------*/ 2078 PetscErrorCode MatMult_SeqMAIJ_16(Mat A,Vec xx,Vec yy) 2079 { 2080 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 2081 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 2082 const PetscScalar *x,*v; 2083 PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8; 2084 PetscScalar sum9, sum10, sum11, sum12, sum13, sum14, sum15, sum16; 2085 PetscErrorCode ierr; 2086 const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 2087 PetscInt nonzerorow=0,n,i,jrow,j; 2088 2089 PetscFunctionBegin; 2090 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 2091 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 2092 idx = a->j; 2093 v = a->a; 2094 ii = a->i; 2095 2096 for (i=0; i<m; i++) { 2097 jrow = ii[i]; 2098 n = ii[i+1] - jrow; 2099 sum1 = 0.0; 2100 sum2 = 0.0; 2101 sum3 = 0.0; 2102 sum4 = 0.0; 2103 sum5 = 0.0; 2104 sum6 = 0.0; 2105 sum7 = 0.0; 2106 sum8 = 0.0; 2107 sum9 = 0.0; 2108 sum10 = 0.0; 2109 sum11 = 0.0; 2110 sum12 = 0.0; 2111 sum13 = 0.0; 2112 sum14 = 0.0; 2113 sum15 = 0.0; 2114 sum16 = 0.0; 2115 2116 nonzerorow += (n>0); 2117 for (j=0; j<n; j++) { 2118 sum1 += v[jrow]*x[16*idx[jrow]]; 2119 sum2 += v[jrow]*x[16*idx[jrow]+1]; 2120 sum3 += v[jrow]*x[16*idx[jrow]+2]; 2121 sum4 += v[jrow]*x[16*idx[jrow]+3]; 2122 sum5 += v[jrow]*x[16*idx[jrow]+4]; 2123 sum6 += v[jrow]*x[16*idx[jrow]+5]; 2124 sum7 += v[jrow]*x[16*idx[jrow]+6]; 2125 sum8 += v[jrow]*x[16*idx[jrow]+7]; 2126 sum9 += v[jrow]*x[16*idx[jrow]+8]; 2127 sum10 += v[jrow]*x[16*idx[jrow]+9]; 2128 sum11 += v[jrow]*x[16*idx[jrow]+10]; 2129 sum12 += v[jrow]*x[16*idx[jrow]+11]; 2130 sum13 += v[jrow]*x[16*idx[jrow]+12]; 2131 sum14 += v[jrow]*x[16*idx[jrow]+13]; 2132 sum15 += v[jrow]*x[16*idx[jrow]+14]; 2133 sum16 += v[jrow]*x[16*idx[jrow]+15]; 2134 jrow++; 2135 } 2136 y[16*i] = sum1; 2137 y[16*i+1] = sum2; 2138 y[16*i+2] = sum3; 2139 y[16*i+3] = sum4; 2140 y[16*i+4] = sum5; 2141 y[16*i+5] = sum6; 2142 y[16*i+6] = sum7; 2143 y[16*i+7] = sum8; 2144 y[16*i+8] = sum9; 2145 y[16*i+9] = sum10; 2146 y[16*i+10] = sum11; 2147 y[16*i+11] = sum12; 2148 y[16*i+12] = sum13; 2149 y[16*i+13] = sum14; 2150 y[16*i+14] = sum15; 2151 y[16*i+15] = sum16; 2152 } 2153 2154 ierr = PetscLogFlops(32.0*a->nz - 16.0*nonzerorow);CHKERRQ(ierr); 2155 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 2156 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 2157 PetscFunctionReturn(0); 2158 } 2159 2160 PetscErrorCode MatMultTranspose_SeqMAIJ_16(Mat A,Vec xx,Vec yy) 2161 { 2162 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 2163 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 2164 const PetscScalar *x,*v; 2165 PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8; 2166 PetscScalar alpha9,alpha10,alpha11,alpha12,alpha13,alpha14,alpha15,alpha16; 2167 PetscErrorCode ierr; 2168 const PetscInt m = b->AIJ->rmap->n,*idx; 2169 PetscInt n,i; 2170 2171 PetscFunctionBegin; 2172 ierr = VecSet(yy,0.0);CHKERRQ(ierr); 2173 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 2174 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 2175 2176 for (i=0; i<m; i++) { 2177 idx = a->j + a->i[i]; 2178 v = a->a + a->i[i]; 2179 n = a->i[i+1] - a->i[i]; 2180 alpha1 = x[16*i]; 2181 alpha2 = x[16*i+1]; 2182 alpha3 = x[16*i+2]; 2183 alpha4 = x[16*i+3]; 2184 alpha5 = x[16*i+4]; 2185 alpha6 = x[16*i+5]; 2186 alpha7 = x[16*i+6]; 2187 alpha8 = x[16*i+7]; 2188 alpha9 = x[16*i+8]; 2189 alpha10 = x[16*i+9]; 2190 alpha11 = x[16*i+10]; 2191 alpha12 = x[16*i+11]; 2192 alpha13 = x[16*i+12]; 2193 alpha14 = x[16*i+13]; 2194 alpha15 = x[16*i+14]; 2195 alpha16 = x[16*i+15]; 2196 while (n-->0) { 2197 y[16*(*idx)] += alpha1*(*v); 2198 y[16*(*idx)+1] += alpha2*(*v); 2199 y[16*(*idx)+2] += alpha3*(*v); 2200 y[16*(*idx)+3] += alpha4*(*v); 2201 y[16*(*idx)+4] += alpha5*(*v); 2202 y[16*(*idx)+5] += alpha6*(*v); 2203 y[16*(*idx)+6] += alpha7*(*v); 2204 y[16*(*idx)+7] += alpha8*(*v); 2205 y[16*(*idx)+8] += alpha9*(*v); 2206 y[16*(*idx)+9] += alpha10*(*v); 2207 y[16*(*idx)+10] += alpha11*(*v); 2208 y[16*(*idx)+11] += alpha12*(*v); 2209 y[16*(*idx)+12] += alpha13*(*v); 2210 y[16*(*idx)+13] += alpha14*(*v); 2211 y[16*(*idx)+14] += alpha15*(*v); 2212 y[16*(*idx)+15] += alpha16*(*v); 2213 idx++; v++; 2214 } 2215 } 2216 ierr = PetscLogFlops(32.0*a->nz);CHKERRQ(ierr); 2217 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 2218 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 2219 PetscFunctionReturn(0); 2220 } 2221 2222 PetscErrorCode MatMultAdd_SeqMAIJ_16(Mat A,Vec xx,Vec yy,Vec zz) 2223 { 2224 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 2225 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 2226 const PetscScalar *x,*v; 2227 PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8; 2228 PetscScalar sum9, sum10, sum11, sum12, sum13, sum14, sum15, sum16; 2229 PetscErrorCode ierr; 2230 const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 2231 PetscInt n,i,jrow,j; 2232 2233 PetscFunctionBegin; 2234 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 2235 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 2236 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 2237 idx = a->j; 2238 v = a->a; 2239 ii = a->i; 2240 2241 for (i=0; i<m; i++) { 2242 jrow = ii[i]; 2243 n = ii[i+1] - jrow; 2244 sum1 = 0.0; 2245 sum2 = 0.0; 2246 sum3 = 0.0; 2247 sum4 = 0.0; 2248 sum5 = 0.0; 2249 sum6 = 0.0; 2250 sum7 = 0.0; 2251 sum8 = 0.0; 2252 sum9 = 0.0; 2253 sum10 = 0.0; 2254 sum11 = 0.0; 2255 sum12 = 0.0; 2256 sum13 = 0.0; 2257 sum14 = 0.0; 2258 sum15 = 0.0; 2259 sum16 = 0.0; 2260 for (j=0; j<n; j++) { 2261 sum1 += v[jrow]*x[16*idx[jrow]]; 2262 sum2 += v[jrow]*x[16*idx[jrow]+1]; 2263 sum3 += v[jrow]*x[16*idx[jrow]+2]; 2264 sum4 += v[jrow]*x[16*idx[jrow]+3]; 2265 sum5 += v[jrow]*x[16*idx[jrow]+4]; 2266 sum6 += v[jrow]*x[16*idx[jrow]+5]; 2267 sum7 += v[jrow]*x[16*idx[jrow]+6]; 2268 sum8 += v[jrow]*x[16*idx[jrow]+7]; 2269 sum9 += v[jrow]*x[16*idx[jrow]+8]; 2270 sum10 += v[jrow]*x[16*idx[jrow]+9]; 2271 sum11 += v[jrow]*x[16*idx[jrow]+10]; 2272 sum12 += v[jrow]*x[16*idx[jrow]+11]; 2273 sum13 += v[jrow]*x[16*idx[jrow]+12]; 2274 sum14 += v[jrow]*x[16*idx[jrow]+13]; 2275 sum15 += v[jrow]*x[16*idx[jrow]+14]; 2276 sum16 += v[jrow]*x[16*idx[jrow]+15]; 2277 jrow++; 2278 } 2279 y[16*i] += sum1; 2280 y[16*i+1] += sum2; 2281 y[16*i+2] += sum3; 2282 y[16*i+3] += sum4; 2283 y[16*i+4] += sum5; 2284 y[16*i+5] += sum6; 2285 y[16*i+6] += sum7; 2286 y[16*i+7] += sum8; 2287 y[16*i+8] += sum9; 2288 y[16*i+9] += sum10; 2289 y[16*i+10] += sum11; 2290 y[16*i+11] += sum12; 2291 y[16*i+12] += sum13; 2292 y[16*i+13] += sum14; 2293 y[16*i+14] += sum15; 2294 y[16*i+15] += sum16; 2295 } 2296 2297 ierr = PetscLogFlops(32.0*a->nz);CHKERRQ(ierr); 2298 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 2299 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 2300 PetscFunctionReturn(0); 2301 } 2302 2303 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_16(Mat A,Vec xx,Vec yy,Vec zz) 2304 { 2305 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 2306 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 2307 const PetscScalar *x,*v; 2308 PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8; 2309 PetscScalar alpha9,alpha10,alpha11,alpha12,alpha13,alpha14,alpha15,alpha16; 2310 PetscErrorCode ierr; 2311 const PetscInt m = b->AIJ->rmap->n,*idx; 2312 PetscInt n,i; 2313 2314 PetscFunctionBegin; 2315 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 2316 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 2317 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 2318 for (i=0; i<m; i++) { 2319 idx = a->j + a->i[i]; 2320 v = a->a + a->i[i]; 2321 n = a->i[i+1] - a->i[i]; 2322 alpha1 = x[16*i]; 2323 alpha2 = x[16*i+1]; 2324 alpha3 = x[16*i+2]; 2325 alpha4 = x[16*i+3]; 2326 alpha5 = x[16*i+4]; 2327 alpha6 = x[16*i+5]; 2328 alpha7 = x[16*i+6]; 2329 alpha8 = x[16*i+7]; 2330 alpha9 = x[16*i+8]; 2331 alpha10 = x[16*i+9]; 2332 alpha11 = x[16*i+10]; 2333 alpha12 = x[16*i+11]; 2334 alpha13 = x[16*i+12]; 2335 alpha14 = x[16*i+13]; 2336 alpha15 = x[16*i+14]; 2337 alpha16 = x[16*i+15]; 2338 while (n-->0) { 2339 y[16*(*idx)] += alpha1*(*v); 2340 y[16*(*idx)+1] += alpha2*(*v); 2341 y[16*(*idx)+2] += alpha3*(*v); 2342 y[16*(*idx)+3] += alpha4*(*v); 2343 y[16*(*idx)+4] += alpha5*(*v); 2344 y[16*(*idx)+5] += alpha6*(*v); 2345 y[16*(*idx)+6] += alpha7*(*v); 2346 y[16*(*idx)+7] += alpha8*(*v); 2347 y[16*(*idx)+8] += alpha9*(*v); 2348 y[16*(*idx)+9] += alpha10*(*v); 2349 y[16*(*idx)+10] += alpha11*(*v); 2350 y[16*(*idx)+11] += alpha12*(*v); 2351 y[16*(*idx)+12] += alpha13*(*v); 2352 y[16*(*idx)+13] += alpha14*(*v); 2353 y[16*(*idx)+14] += alpha15*(*v); 2354 y[16*(*idx)+15] += alpha16*(*v); 2355 idx++; v++; 2356 } 2357 } 2358 ierr = PetscLogFlops(32.0*a->nz);CHKERRQ(ierr); 2359 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 2360 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 2361 PetscFunctionReturn(0); 2362 } 2363 2364 /*--------------------------------------------------------------------------------------------*/ 2365 PetscErrorCode MatMult_SeqMAIJ_18(Mat A,Vec xx,Vec yy) 2366 { 2367 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 2368 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 2369 const PetscScalar *x,*v; 2370 PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8; 2371 PetscScalar sum9, sum10, sum11, sum12, sum13, sum14, sum15, sum16, sum17, sum18; 2372 PetscErrorCode ierr; 2373 const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 2374 PetscInt nonzerorow=0,n,i,jrow,j; 2375 2376 PetscFunctionBegin; 2377 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 2378 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 2379 idx = a->j; 2380 v = a->a; 2381 ii = a->i; 2382 2383 for (i=0; i<m; i++) { 2384 jrow = ii[i]; 2385 n = ii[i+1] - jrow; 2386 sum1 = 0.0; 2387 sum2 = 0.0; 2388 sum3 = 0.0; 2389 sum4 = 0.0; 2390 sum5 = 0.0; 2391 sum6 = 0.0; 2392 sum7 = 0.0; 2393 sum8 = 0.0; 2394 sum9 = 0.0; 2395 sum10 = 0.0; 2396 sum11 = 0.0; 2397 sum12 = 0.0; 2398 sum13 = 0.0; 2399 sum14 = 0.0; 2400 sum15 = 0.0; 2401 sum16 = 0.0; 2402 sum17 = 0.0; 2403 sum18 = 0.0; 2404 2405 nonzerorow += (n>0); 2406 for (j=0; j<n; j++) { 2407 sum1 += v[jrow]*x[18*idx[jrow]]; 2408 sum2 += v[jrow]*x[18*idx[jrow]+1]; 2409 sum3 += v[jrow]*x[18*idx[jrow]+2]; 2410 sum4 += v[jrow]*x[18*idx[jrow]+3]; 2411 sum5 += v[jrow]*x[18*idx[jrow]+4]; 2412 sum6 += v[jrow]*x[18*idx[jrow]+5]; 2413 sum7 += v[jrow]*x[18*idx[jrow]+6]; 2414 sum8 += v[jrow]*x[18*idx[jrow]+7]; 2415 sum9 += v[jrow]*x[18*idx[jrow]+8]; 2416 sum10 += v[jrow]*x[18*idx[jrow]+9]; 2417 sum11 += v[jrow]*x[18*idx[jrow]+10]; 2418 sum12 += v[jrow]*x[18*idx[jrow]+11]; 2419 sum13 += v[jrow]*x[18*idx[jrow]+12]; 2420 sum14 += v[jrow]*x[18*idx[jrow]+13]; 2421 sum15 += v[jrow]*x[18*idx[jrow]+14]; 2422 sum16 += v[jrow]*x[18*idx[jrow]+15]; 2423 sum17 += v[jrow]*x[18*idx[jrow]+16]; 2424 sum18 += v[jrow]*x[18*idx[jrow]+17]; 2425 jrow++; 2426 } 2427 y[18*i] = sum1; 2428 y[18*i+1] = sum2; 2429 y[18*i+2] = sum3; 2430 y[18*i+3] = sum4; 2431 y[18*i+4] = sum5; 2432 y[18*i+5] = sum6; 2433 y[18*i+6] = sum7; 2434 y[18*i+7] = sum8; 2435 y[18*i+8] = sum9; 2436 y[18*i+9] = sum10; 2437 y[18*i+10] = sum11; 2438 y[18*i+11] = sum12; 2439 y[18*i+12] = sum13; 2440 y[18*i+13] = sum14; 2441 y[18*i+14] = sum15; 2442 y[18*i+15] = sum16; 2443 y[18*i+16] = sum17; 2444 y[18*i+17] = sum18; 2445 } 2446 2447 ierr = PetscLogFlops(36.0*a->nz - 18.0*nonzerorow);CHKERRQ(ierr); 2448 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 2449 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 2450 PetscFunctionReturn(0); 2451 } 2452 2453 PetscErrorCode MatMultTranspose_SeqMAIJ_18(Mat A,Vec xx,Vec yy) 2454 { 2455 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 2456 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 2457 const PetscScalar *x,*v; 2458 PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8; 2459 PetscScalar alpha9,alpha10,alpha11,alpha12,alpha13,alpha14,alpha15,alpha16,alpha17,alpha18; 2460 PetscErrorCode ierr; 2461 const PetscInt m = b->AIJ->rmap->n,*idx; 2462 PetscInt n,i; 2463 2464 PetscFunctionBegin; 2465 ierr = VecSet(yy,0.0);CHKERRQ(ierr); 2466 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 2467 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 2468 2469 for (i=0; i<m; i++) { 2470 idx = a->j + a->i[i]; 2471 v = a->a + a->i[i]; 2472 n = a->i[i+1] - a->i[i]; 2473 alpha1 = x[18*i]; 2474 alpha2 = x[18*i+1]; 2475 alpha3 = x[18*i+2]; 2476 alpha4 = x[18*i+3]; 2477 alpha5 = x[18*i+4]; 2478 alpha6 = x[18*i+5]; 2479 alpha7 = x[18*i+6]; 2480 alpha8 = x[18*i+7]; 2481 alpha9 = x[18*i+8]; 2482 alpha10 = x[18*i+9]; 2483 alpha11 = x[18*i+10]; 2484 alpha12 = x[18*i+11]; 2485 alpha13 = x[18*i+12]; 2486 alpha14 = x[18*i+13]; 2487 alpha15 = x[18*i+14]; 2488 alpha16 = x[18*i+15]; 2489 alpha17 = x[18*i+16]; 2490 alpha18 = x[18*i+17]; 2491 while (n-->0) { 2492 y[18*(*idx)] += alpha1*(*v); 2493 y[18*(*idx)+1] += alpha2*(*v); 2494 y[18*(*idx)+2] += alpha3*(*v); 2495 y[18*(*idx)+3] += alpha4*(*v); 2496 y[18*(*idx)+4] += alpha5*(*v); 2497 y[18*(*idx)+5] += alpha6*(*v); 2498 y[18*(*idx)+6] += alpha7*(*v); 2499 y[18*(*idx)+7] += alpha8*(*v); 2500 y[18*(*idx)+8] += alpha9*(*v); 2501 y[18*(*idx)+9] += alpha10*(*v); 2502 y[18*(*idx)+10] += alpha11*(*v); 2503 y[18*(*idx)+11] += alpha12*(*v); 2504 y[18*(*idx)+12] += alpha13*(*v); 2505 y[18*(*idx)+13] += alpha14*(*v); 2506 y[18*(*idx)+14] += alpha15*(*v); 2507 y[18*(*idx)+15] += alpha16*(*v); 2508 y[18*(*idx)+16] += alpha17*(*v); 2509 y[18*(*idx)+17] += alpha18*(*v); 2510 idx++; v++; 2511 } 2512 } 2513 ierr = PetscLogFlops(36.0*a->nz);CHKERRQ(ierr); 2514 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 2515 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 2516 PetscFunctionReturn(0); 2517 } 2518 2519 PetscErrorCode MatMultAdd_SeqMAIJ_18(Mat A,Vec xx,Vec yy,Vec zz) 2520 { 2521 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 2522 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 2523 const PetscScalar *x,*v; 2524 PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8; 2525 PetscScalar sum9, sum10, sum11, sum12, sum13, sum14, sum15, sum16, sum17, sum18; 2526 PetscErrorCode ierr; 2527 const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 2528 PetscInt n,i,jrow,j; 2529 2530 PetscFunctionBegin; 2531 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 2532 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 2533 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 2534 idx = a->j; 2535 v = a->a; 2536 ii = a->i; 2537 2538 for (i=0; i<m; i++) { 2539 jrow = ii[i]; 2540 n = ii[i+1] - jrow; 2541 sum1 = 0.0; 2542 sum2 = 0.0; 2543 sum3 = 0.0; 2544 sum4 = 0.0; 2545 sum5 = 0.0; 2546 sum6 = 0.0; 2547 sum7 = 0.0; 2548 sum8 = 0.0; 2549 sum9 = 0.0; 2550 sum10 = 0.0; 2551 sum11 = 0.0; 2552 sum12 = 0.0; 2553 sum13 = 0.0; 2554 sum14 = 0.0; 2555 sum15 = 0.0; 2556 sum16 = 0.0; 2557 sum17 = 0.0; 2558 sum18 = 0.0; 2559 for (j=0; j<n; j++) { 2560 sum1 += v[jrow]*x[18*idx[jrow]]; 2561 sum2 += v[jrow]*x[18*idx[jrow]+1]; 2562 sum3 += v[jrow]*x[18*idx[jrow]+2]; 2563 sum4 += v[jrow]*x[18*idx[jrow]+3]; 2564 sum5 += v[jrow]*x[18*idx[jrow]+4]; 2565 sum6 += v[jrow]*x[18*idx[jrow]+5]; 2566 sum7 += v[jrow]*x[18*idx[jrow]+6]; 2567 sum8 += v[jrow]*x[18*idx[jrow]+7]; 2568 sum9 += v[jrow]*x[18*idx[jrow]+8]; 2569 sum10 += v[jrow]*x[18*idx[jrow]+9]; 2570 sum11 += v[jrow]*x[18*idx[jrow]+10]; 2571 sum12 += v[jrow]*x[18*idx[jrow]+11]; 2572 sum13 += v[jrow]*x[18*idx[jrow]+12]; 2573 sum14 += v[jrow]*x[18*idx[jrow]+13]; 2574 sum15 += v[jrow]*x[18*idx[jrow]+14]; 2575 sum16 += v[jrow]*x[18*idx[jrow]+15]; 2576 sum17 += v[jrow]*x[18*idx[jrow]+16]; 2577 sum18 += v[jrow]*x[18*idx[jrow]+17]; 2578 jrow++; 2579 } 2580 y[18*i] += sum1; 2581 y[18*i+1] += sum2; 2582 y[18*i+2] += sum3; 2583 y[18*i+3] += sum4; 2584 y[18*i+4] += sum5; 2585 y[18*i+5] += sum6; 2586 y[18*i+6] += sum7; 2587 y[18*i+7] += sum8; 2588 y[18*i+8] += sum9; 2589 y[18*i+9] += sum10; 2590 y[18*i+10] += sum11; 2591 y[18*i+11] += sum12; 2592 y[18*i+12] += sum13; 2593 y[18*i+13] += sum14; 2594 y[18*i+14] += sum15; 2595 y[18*i+15] += sum16; 2596 y[18*i+16] += sum17; 2597 y[18*i+17] += sum18; 2598 } 2599 2600 ierr = PetscLogFlops(36.0*a->nz);CHKERRQ(ierr); 2601 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 2602 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 2603 PetscFunctionReturn(0); 2604 } 2605 2606 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_18(Mat A,Vec xx,Vec yy,Vec zz) 2607 { 2608 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 2609 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 2610 const PetscScalar *x,*v; 2611 PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8; 2612 PetscScalar alpha9,alpha10,alpha11,alpha12,alpha13,alpha14,alpha15,alpha16,alpha17,alpha18; 2613 PetscErrorCode ierr; 2614 const PetscInt m = b->AIJ->rmap->n,*idx; 2615 PetscInt n,i; 2616 2617 PetscFunctionBegin; 2618 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 2619 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 2620 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 2621 for (i=0; i<m; i++) { 2622 idx = a->j + a->i[i]; 2623 v = a->a + a->i[i]; 2624 n = a->i[i+1] - a->i[i]; 2625 alpha1 = x[18*i]; 2626 alpha2 = x[18*i+1]; 2627 alpha3 = x[18*i+2]; 2628 alpha4 = x[18*i+3]; 2629 alpha5 = x[18*i+4]; 2630 alpha6 = x[18*i+5]; 2631 alpha7 = x[18*i+6]; 2632 alpha8 = x[18*i+7]; 2633 alpha9 = x[18*i+8]; 2634 alpha10 = x[18*i+9]; 2635 alpha11 = x[18*i+10]; 2636 alpha12 = x[18*i+11]; 2637 alpha13 = x[18*i+12]; 2638 alpha14 = x[18*i+13]; 2639 alpha15 = x[18*i+14]; 2640 alpha16 = x[18*i+15]; 2641 alpha17 = x[18*i+16]; 2642 alpha18 = x[18*i+17]; 2643 while (n-->0) { 2644 y[18*(*idx)] += alpha1*(*v); 2645 y[18*(*idx)+1] += alpha2*(*v); 2646 y[18*(*idx)+2] += alpha3*(*v); 2647 y[18*(*idx)+3] += alpha4*(*v); 2648 y[18*(*idx)+4] += alpha5*(*v); 2649 y[18*(*idx)+5] += alpha6*(*v); 2650 y[18*(*idx)+6] += alpha7*(*v); 2651 y[18*(*idx)+7] += alpha8*(*v); 2652 y[18*(*idx)+8] += alpha9*(*v); 2653 y[18*(*idx)+9] += alpha10*(*v); 2654 y[18*(*idx)+10] += alpha11*(*v); 2655 y[18*(*idx)+11] += alpha12*(*v); 2656 y[18*(*idx)+12] += alpha13*(*v); 2657 y[18*(*idx)+13] += alpha14*(*v); 2658 y[18*(*idx)+14] += alpha15*(*v); 2659 y[18*(*idx)+15] += alpha16*(*v); 2660 y[18*(*idx)+16] += alpha17*(*v); 2661 y[18*(*idx)+17] += alpha18*(*v); 2662 idx++; v++; 2663 } 2664 } 2665 ierr = PetscLogFlops(36.0*a->nz);CHKERRQ(ierr); 2666 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 2667 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 2668 PetscFunctionReturn(0); 2669 } 2670 2671 PetscErrorCode MatMult_SeqMAIJ_N(Mat A,Vec xx,Vec yy) 2672 { 2673 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 2674 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 2675 const PetscScalar *x,*v; 2676 PetscScalar *y,*sums; 2677 PetscErrorCode ierr; 2678 const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 2679 PetscInt n,i,jrow,j,dof = b->dof,k; 2680 2681 PetscFunctionBegin; 2682 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 2683 ierr = VecSet(yy,0.0);CHKERRQ(ierr); 2684 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 2685 idx = a->j; 2686 v = a->a; 2687 ii = a->i; 2688 2689 for (i=0; i<m; i++) { 2690 jrow = ii[i]; 2691 n = ii[i+1] - jrow; 2692 sums = y + dof*i; 2693 for (j=0; j<n; j++) { 2694 for (k=0; k<dof; k++) { 2695 sums[k] += v[jrow]*x[dof*idx[jrow]+k]; 2696 } 2697 jrow++; 2698 } 2699 } 2700 2701 ierr = PetscLogFlops(2.0*dof*a->nz);CHKERRQ(ierr); 2702 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 2703 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 2704 PetscFunctionReturn(0); 2705 } 2706 2707 PetscErrorCode MatMultAdd_SeqMAIJ_N(Mat A,Vec xx,Vec yy,Vec zz) 2708 { 2709 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 2710 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 2711 const PetscScalar *x,*v; 2712 PetscScalar *y,*sums; 2713 PetscErrorCode ierr; 2714 const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 2715 PetscInt n,i,jrow,j,dof = b->dof,k; 2716 2717 PetscFunctionBegin; 2718 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 2719 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 2720 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 2721 idx = a->j; 2722 v = a->a; 2723 ii = a->i; 2724 2725 for (i=0; i<m; i++) { 2726 jrow = ii[i]; 2727 n = ii[i+1] - jrow; 2728 sums = y + dof*i; 2729 for (j=0; j<n; j++) { 2730 for (k=0; k<dof; k++) { 2731 sums[k] += v[jrow]*x[dof*idx[jrow]+k]; 2732 } 2733 jrow++; 2734 } 2735 } 2736 2737 ierr = PetscLogFlops(2.0*dof*a->nz);CHKERRQ(ierr); 2738 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 2739 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 2740 PetscFunctionReturn(0); 2741 } 2742 2743 PetscErrorCode MatMultTranspose_SeqMAIJ_N(Mat A,Vec xx,Vec yy) 2744 { 2745 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 2746 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 2747 const PetscScalar *x,*v,*alpha; 2748 PetscScalar *y; 2749 PetscErrorCode ierr; 2750 const PetscInt m = b->AIJ->rmap->n,*idx,dof = b->dof; 2751 PetscInt n,i,k; 2752 2753 PetscFunctionBegin; 2754 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 2755 ierr = VecSet(yy,0.0);CHKERRQ(ierr); 2756 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 2757 for (i=0; i<m; i++) { 2758 idx = a->j + a->i[i]; 2759 v = a->a + a->i[i]; 2760 n = a->i[i+1] - a->i[i]; 2761 alpha = x + dof*i; 2762 while (n-->0) { 2763 for (k=0; k<dof; k++) { 2764 y[dof*(*idx)+k] += alpha[k]*(*v); 2765 } 2766 idx++; v++; 2767 } 2768 } 2769 ierr = PetscLogFlops(2.0*dof*a->nz);CHKERRQ(ierr); 2770 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 2771 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 2772 PetscFunctionReturn(0); 2773 } 2774 2775 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_N(Mat A,Vec xx,Vec yy,Vec zz) 2776 { 2777 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 2778 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 2779 const PetscScalar *x,*v,*alpha; 2780 PetscScalar *y; 2781 PetscErrorCode ierr; 2782 const PetscInt m = b->AIJ->rmap->n,*idx,dof = b->dof; 2783 PetscInt n,i,k; 2784 2785 PetscFunctionBegin; 2786 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 2787 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 2788 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 2789 for (i=0; i<m; i++) { 2790 idx = a->j + a->i[i]; 2791 v = a->a + a->i[i]; 2792 n = a->i[i+1] - a->i[i]; 2793 alpha = x + dof*i; 2794 while (n-->0) { 2795 for (k=0; k<dof; k++) { 2796 y[dof*(*idx)+k] += alpha[k]*(*v); 2797 } 2798 idx++; v++; 2799 } 2800 } 2801 ierr = PetscLogFlops(2.0*dof*a->nz);CHKERRQ(ierr); 2802 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 2803 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 2804 PetscFunctionReturn(0); 2805 } 2806 2807 /*===================================================================================*/ 2808 PetscErrorCode MatMult_MPIMAIJ_dof(Mat A,Vec xx,Vec yy) 2809 { 2810 Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data; 2811 PetscErrorCode ierr; 2812 2813 PetscFunctionBegin; 2814 /* start the scatter */ 2815 ierr = VecScatterBegin(b->ctx,xx,b->w,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2816 ierr = (*b->AIJ->ops->mult)(b->AIJ,xx,yy);CHKERRQ(ierr); 2817 ierr = VecScatterEnd(b->ctx,xx,b->w,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2818 ierr = (*b->OAIJ->ops->multadd)(b->OAIJ,b->w,yy,yy);CHKERRQ(ierr); 2819 PetscFunctionReturn(0); 2820 } 2821 2822 PetscErrorCode MatMultTranspose_MPIMAIJ_dof(Mat A,Vec xx,Vec yy) 2823 { 2824 Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data; 2825 PetscErrorCode ierr; 2826 2827 PetscFunctionBegin; 2828 ierr = (*b->OAIJ->ops->multtranspose)(b->OAIJ,xx,b->w);CHKERRQ(ierr); 2829 ierr = (*b->AIJ->ops->multtranspose)(b->AIJ,xx,yy);CHKERRQ(ierr); 2830 ierr = VecScatterBegin(b->ctx,b->w,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2831 ierr = VecScatterEnd(b->ctx,b->w,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2832 PetscFunctionReturn(0); 2833 } 2834 2835 PetscErrorCode MatMultAdd_MPIMAIJ_dof(Mat A,Vec xx,Vec yy,Vec zz) 2836 { 2837 Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data; 2838 PetscErrorCode ierr; 2839 2840 PetscFunctionBegin; 2841 /* start the scatter */ 2842 ierr = VecScatterBegin(b->ctx,xx,b->w,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2843 ierr = (*b->AIJ->ops->multadd)(b->AIJ,xx,yy,zz);CHKERRQ(ierr); 2844 ierr = VecScatterEnd(b->ctx,xx,b->w,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2845 ierr = (*b->OAIJ->ops->multadd)(b->OAIJ,b->w,zz,zz);CHKERRQ(ierr); 2846 PetscFunctionReturn(0); 2847 } 2848 2849 PetscErrorCode MatMultTransposeAdd_MPIMAIJ_dof(Mat A,Vec xx,Vec yy,Vec zz) 2850 { 2851 Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data; 2852 PetscErrorCode ierr; 2853 2854 PetscFunctionBegin; 2855 ierr = (*b->OAIJ->ops->multtranspose)(b->OAIJ,xx,b->w);CHKERRQ(ierr); 2856 ierr = VecScatterBegin(b->ctx,b->w,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2857 ierr = (*b->AIJ->ops->multtransposeadd)(b->AIJ,xx,yy,zz);CHKERRQ(ierr); 2858 ierr = VecScatterEnd(b->ctx,b->w,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2859 PetscFunctionReturn(0); 2860 } 2861 2862 /* ----------------------------------------------------------------*/ 2863 PetscErrorCode MatPtAPSymbolic_SeqAIJ_SeqMAIJ(Mat A,Mat PP,PetscReal fill,Mat *C) 2864 { 2865 PetscErrorCode ierr; 2866 PetscFreeSpaceList free_space=NULL,current_space=NULL; 2867 Mat_SeqMAIJ *pp =(Mat_SeqMAIJ*)PP->data; 2868 Mat P =pp->AIJ; 2869 Mat_SeqAIJ *a =(Mat_SeqAIJ*)A->data,*p=(Mat_SeqAIJ*)P->data,*c; 2870 PetscInt *pti,*ptj,*ptJ; 2871 PetscInt *ci,*cj,*ptadenserow,*ptasparserow,*denserow,*sparserow,*ptaj; 2872 const PetscInt an=A->cmap->N,am=A->rmap->N,pn=P->cmap->N,pm=P->rmap->N,ppdof=pp->dof; 2873 PetscInt i,j,k,dof,pshift,ptnzi,arow,anzj,ptanzi,prow,pnzj,cnzi,cn; 2874 MatScalar *ca; 2875 const PetscInt *pi = p->i,*pj = p->j,*pjj,*ai=a->i,*aj=a->j,*ajj; 2876 2877 PetscFunctionBegin; 2878 /* Get ij structure of P^T */ 2879 ierr = MatGetSymbolicTranspose_SeqAIJ(P,&pti,&ptj);CHKERRQ(ierr); 2880 2881 cn = pn*ppdof; 2882 /* Allocate ci array, arrays for fill computation and */ 2883 /* free space for accumulating nonzero column info */ 2884 ierr = PetscMalloc1(cn+1,&ci);CHKERRQ(ierr); 2885 ci[0] = 0; 2886 2887 /* Work arrays for rows of P^T*A */ 2888 ierr = PetscMalloc4(an,&ptadenserow,an,&ptasparserow,cn,&denserow,cn,&sparserow);CHKERRQ(ierr); 2889 ierr = PetscMemzero(ptadenserow,an*sizeof(PetscInt));CHKERRQ(ierr); 2890 ierr = PetscMemzero(denserow,cn*sizeof(PetscInt));CHKERRQ(ierr); 2891 2892 /* Set initial free space to be nnz(A) scaled by aspect ratio of P. */ 2893 /* This should be reasonable if sparsity of PtAP is similar to that of A. */ 2894 /* Note, aspect ratio of P is the same as the aspect ratio of SeqAIJ inside P */ 2895 ierr = PetscFreeSpaceGet(PetscIntMultTruncate(ai[am]/pm,pn),&free_space);CHKERRQ(ierr); 2896 current_space = free_space; 2897 2898 /* Determine symbolic info for each row of C: */ 2899 for (i=0; i<pn; i++) { 2900 ptnzi = pti[i+1] - pti[i]; 2901 ptJ = ptj + pti[i]; 2902 for (dof=0; dof<ppdof; dof++) { 2903 ptanzi = 0; 2904 /* Determine symbolic row of PtA: */ 2905 for (j=0; j<ptnzi; j++) { 2906 /* Expand ptJ[j] by block size and shift by dof to get the right row of A */ 2907 arow = ptJ[j]*ppdof + dof; 2908 /* Nonzeros of P^T*A will be in same locations as any element of A in that row */ 2909 anzj = ai[arow+1] - ai[arow]; 2910 ajj = aj + ai[arow]; 2911 for (k=0; k<anzj; k++) { 2912 if (!ptadenserow[ajj[k]]) { 2913 ptadenserow[ajj[k]] = -1; 2914 ptasparserow[ptanzi++] = ajj[k]; 2915 } 2916 } 2917 } 2918 /* Using symbolic info for row of PtA, determine symbolic info for row of C: */ 2919 ptaj = ptasparserow; 2920 cnzi = 0; 2921 for (j=0; j<ptanzi; j++) { 2922 /* Get offset within block of P */ 2923 pshift = *ptaj%ppdof; 2924 /* Get block row of P */ 2925 prow = (*ptaj++)/ppdof; /* integer division */ 2926 /* P has same number of nonzeros per row as the compressed form */ 2927 pnzj = pi[prow+1] - pi[prow]; 2928 pjj = pj + pi[prow]; 2929 for (k=0;k<pnzj;k++) { 2930 /* Locations in C are shifted by the offset within the block */ 2931 /* Note: we cannot use PetscLLAdd here because of the additional offset for the write location */ 2932 if (!denserow[pjj[k]*ppdof+pshift]) { 2933 denserow[pjj[k]*ppdof+pshift] = -1; 2934 sparserow[cnzi++] = pjj[k]*ppdof+pshift; 2935 } 2936 } 2937 } 2938 2939 /* sort sparserow */ 2940 ierr = PetscSortInt(cnzi,sparserow);CHKERRQ(ierr); 2941 2942 /* If free space is not available, make more free space */ 2943 /* Double the amount of total space in the list */ 2944 if (current_space->local_remaining<cnzi) { 2945 ierr = PetscFreeSpaceGet(PetscIntSumTruncate(cnzi,current_space->total_array_size),¤t_space);CHKERRQ(ierr); 2946 } 2947 2948 /* Copy data into free space, and zero out denserows */ 2949 ierr = PetscMemcpy(current_space->array,sparserow,cnzi*sizeof(PetscInt));CHKERRQ(ierr); 2950 2951 current_space->array += cnzi; 2952 current_space->local_used += cnzi; 2953 current_space->local_remaining -= cnzi; 2954 2955 for (j=0; j<ptanzi; j++) ptadenserow[ptasparserow[j]] = 0; 2956 for (j=0; j<cnzi; j++) denserow[sparserow[j]] = 0; 2957 2958 /* Aside: Perhaps we should save the pta info for the numerical factorization. */ 2959 /* For now, we will recompute what is needed. */ 2960 ci[i*ppdof+1+dof] = ci[i*ppdof+dof] + cnzi; 2961 } 2962 } 2963 /* nnz is now stored in ci[ptm], column indices are in the list of free space */ 2964 /* Allocate space for cj, initialize cj, and */ 2965 /* destroy list of free space and other temporary array(s) */ 2966 ierr = PetscMalloc1(ci[cn]+1,&cj);CHKERRQ(ierr); 2967 ierr = PetscFreeSpaceContiguous(&free_space,cj);CHKERRQ(ierr); 2968 ierr = PetscFree4(ptadenserow,ptasparserow,denserow,sparserow);CHKERRQ(ierr); 2969 2970 /* Allocate space for ca */ 2971 ierr = PetscCalloc1(ci[cn]+1,&ca);CHKERRQ(ierr); 2972 2973 /* put together the new matrix */ 2974 ierr = MatCreateSeqAIJWithArrays(PetscObjectComm((PetscObject)A),cn,cn,ci,cj,ca,C);CHKERRQ(ierr); 2975 ierr = MatSetBlockSize(*C,pp->dof);CHKERRQ(ierr); 2976 2977 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 2978 /* Since these are PETSc arrays, change flags to free them as necessary. */ 2979 c = (Mat_SeqAIJ*)((*C)->data); 2980 c->free_a = PETSC_TRUE; 2981 c->free_ij = PETSC_TRUE; 2982 c->nonew = 0; 2983 2984 (*C)->ops->ptapnumeric = MatPtAPNumeric_SeqAIJ_SeqMAIJ; 2985 2986 /* Clean up. */ 2987 ierr = MatRestoreSymbolicTranspose_SeqAIJ(P,&pti,&ptj);CHKERRQ(ierr); 2988 PetscFunctionReturn(0); 2989 } 2990 2991 PetscErrorCode MatPtAPNumeric_SeqAIJ_SeqMAIJ(Mat A,Mat PP,Mat C) 2992 { 2993 /* This routine requires testing -- first draft only */ 2994 PetscErrorCode ierr; 2995 Mat_SeqMAIJ *pp=(Mat_SeqMAIJ*)PP->data; 2996 Mat P =pp->AIJ; 2997 Mat_SeqAIJ *a = (Mat_SeqAIJ*) A->data; 2998 Mat_SeqAIJ *p = (Mat_SeqAIJ*) P->data; 2999 Mat_SeqAIJ *c = (Mat_SeqAIJ*) C->data; 3000 const PetscInt *ai=a->i,*aj=a->j,*pi=p->i,*pj=p->j,*pJ,*pjj; 3001 const PetscInt *ci=c->i,*cj=c->j,*cjj; 3002 const PetscInt am =A->rmap->N,cn=C->cmap->N,cm=C->rmap->N,ppdof=pp->dof; 3003 PetscInt i,j,k,pshift,poffset,anzi,pnzi,apnzj,nextap,pnzj,prow,crow,*apj,*apjdense; 3004 const MatScalar *aa=a->a,*pa=p->a,*pA,*paj; 3005 MatScalar *ca=c->a,*caj,*apa; 3006 3007 PetscFunctionBegin; 3008 /* Allocate temporary array for storage of one row of A*P */ 3009 ierr = PetscCalloc3(cn,&apa,cn,&apj,cn,&apjdense);CHKERRQ(ierr); 3010 3011 /* Clear old values in C */ 3012 ierr = PetscMemzero(ca,ci[cm]*sizeof(MatScalar));CHKERRQ(ierr); 3013 3014 for (i=0; i<am; i++) { 3015 /* Form sparse row of A*P */ 3016 anzi = ai[i+1] - ai[i]; 3017 apnzj = 0; 3018 for (j=0; j<anzi; j++) { 3019 /* Get offset within block of P */ 3020 pshift = *aj%ppdof; 3021 /* Get block row of P */ 3022 prow = *aj++/ppdof; /* integer division */ 3023 pnzj = pi[prow+1] - pi[prow]; 3024 pjj = pj + pi[prow]; 3025 paj = pa + pi[prow]; 3026 for (k=0; k<pnzj; k++) { 3027 poffset = pjj[k]*ppdof+pshift; 3028 if (!apjdense[poffset]) { 3029 apjdense[poffset] = -1; 3030 apj[apnzj++] = poffset; 3031 } 3032 apa[poffset] += (*aa)*paj[k]; 3033 } 3034 ierr = PetscLogFlops(2.0*pnzj);CHKERRQ(ierr); 3035 aa++; 3036 } 3037 3038 /* Sort the j index array for quick sparse axpy. */ 3039 /* Note: a array does not need sorting as it is in dense storage locations. */ 3040 ierr = PetscSortInt(apnzj,apj);CHKERRQ(ierr); 3041 3042 /* Compute P^T*A*P using outer product (P^T)[:,j]*(A*P)[j,:]. */ 3043 prow = i/ppdof; /* integer division */ 3044 pshift = i%ppdof; 3045 poffset = pi[prow]; 3046 pnzi = pi[prow+1] - poffset; 3047 /* Reset pJ and pA so we can traverse the same row of P 'dof' times. */ 3048 pJ = pj+poffset; 3049 pA = pa+poffset; 3050 for (j=0; j<pnzi; j++) { 3051 crow = (*pJ)*ppdof+pshift; 3052 cjj = cj + ci[crow]; 3053 caj = ca + ci[crow]; 3054 pJ++; 3055 /* Perform sparse axpy operation. Note cjj includes apj. */ 3056 for (k=0,nextap=0; nextap<apnzj; k++) { 3057 if (cjj[k] == apj[nextap]) caj[k] += (*pA)*apa[apj[nextap++]]; 3058 } 3059 ierr = PetscLogFlops(2.0*apnzj);CHKERRQ(ierr); 3060 pA++; 3061 } 3062 3063 /* Zero the current row info for A*P */ 3064 for (j=0; j<apnzj; j++) { 3065 apa[apj[j]] = 0.; 3066 apjdense[apj[j]] = 0; 3067 } 3068 } 3069 3070 /* Assemble the final matrix and clean up */ 3071 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3072 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3073 ierr = PetscFree3(apa,apj,apjdense);CHKERRQ(ierr); 3074 PetscFunctionReturn(0); 3075 } 3076 3077 PETSC_INTERN PetscErrorCode MatPtAP_SeqAIJ_SeqMAIJ(Mat A,Mat P,MatReuse scall,PetscReal fill,Mat *C) 3078 { 3079 PetscErrorCode ierr; 3080 3081 PetscFunctionBegin; 3082 if (scall == MAT_INITIAL_MATRIX) { 3083 ierr = PetscLogEventBegin(MAT_PtAPSymbolic,A,P,0,0);CHKERRQ(ierr); 3084 ierr = MatPtAPSymbolic_SeqAIJ_SeqMAIJ(A,P,fill,C);CHKERRQ(ierr); 3085 ierr = PetscLogEventEnd(MAT_PtAPSymbolic,A,P,0,0);CHKERRQ(ierr); 3086 } 3087 ierr = PetscLogEventBegin(MAT_PtAPNumeric,A,P,0,0);CHKERRQ(ierr); 3088 ierr = MatPtAPNumeric_SeqAIJ_SeqMAIJ(A,P,*C);CHKERRQ(ierr); 3089 ierr = PetscLogEventEnd(MAT_PtAPNumeric,A,P,0,0);CHKERRQ(ierr); 3090 PetscFunctionReturn(0); 3091 } 3092 3093 PetscErrorCode MatPtAPSymbolic_MPIAIJ_MPIMAIJ(Mat A,Mat PP,PetscReal fill,Mat *C) 3094 { 3095 PetscErrorCode ierr; 3096 3097 PetscFunctionBegin; 3098 /* MatPtAPSymbolic_MPIAIJ_MPIMAIJ() is not implemented yet. Convert PP to mpiaij format */ 3099 ierr = MatConvert(PP,MATMPIAIJ,MAT_INPLACE_MATRIX,&PP);CHKERRQ(ierr); 3100 ierr =(*PP->ops->ptapsymbolic)(A,PP,fill,C);CHKERRQ(ierr); 3101 PetscFunctionReturn(0); 3102 } 3103 3104 PetscErrorCode MatPtAPNumeric_MPIAIJ_MPIMAIJ(Mat A,Mat PP,Mat C) 3105 { 3106 PetscFunctionBegin; 3107 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatPtAPNumeric is not implemented for MPIMAIJ matrix yet"); 3108 PetscFunctionReturn(0); 3109 } 3110 3111 PETSC_INTERN PetscErrorCode MatPtAP_MPIAIJ_MPIMAIJ(Mat A,Mat P,MatReuse scall,PetscReal fill,Mat *C) 3112 { 3113 PetscErrorCode ierr; 3114 3115 PetscFunctionBegin; 3116 if (scall == MAT_INITIAL_MATRIX) { 3117 ierr = PetscLogEventBegin(MAT_PtAPSymbolic,A,P,0,0);CHKERRQ(ierr); 3118 ierr = MatPtAPSymbolic_MPIAIJ_MPIMAIJ(A,P,fill,C);CHKERRQ(ierr); 3119 ierr = PetscLogEventEnd(MAT_PtAPSymbolic,A,P,0,0);CHKERRQ(ierr); 3120 } 3121 ierr = PetscLogEventBegin(MAT_PtAPNumeric,A,P,0,0);CHKERRQ(ierr); 3122 ierr = ((*C)->ops->ptapnumeric)(A,P,*C);CHKERRQ(ierr); 3123 ierr = PetscLogEventEnd(MAT_PtAPNumeric,A,P,0,0);CHKERRQ(ierr); 3124 PetscFunctionReturn(0); 3125 } 3126 3127 PETSC_INTERN PetscErrorCode MatConvert_SeqMAIJ_SeqAIJ(Mat A, MatType newtype,MatReuse reuse,Mat *newmat) 3128 { 3129 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 3130 Mat a = b->AIJ,B; 3131 Mat_SeqAIJ *aij = (Mat_SeqAIJ*)a->data; 3132 PetscErrorCode ierr; 3133 PetscInt m,n,i,ncols,*ilen,nmax = 0,*icols,j,k,ii,dof = b->dof; 3134 PetscInt *cols; 3135 PetscScalar *vals; 3136 3137 PetscFunctionBegin; 3138 ierr = MatGetSize(a,&m,&n);CHKERRQ(ierr); 3139 ierr = PetscMalloc1(dof*m,&ilen);CHKERRQ(ierr); 3140 for (i=0; i<m; i++) { 3141 nmax = PetscMax(nmax,aij->ilen[i]); 3142 for (j=0; j<dof; j++) ilen[dof*i+j] = aij->ilen[i]; 3143 } 3144 ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,dof*m,dof*n,0,ilen,&B);CHKERRQ(ierr); 3145 ierr = PetscFree(ilen);CHKERRQ(ierr); 3146 ierr = PetscMalloc1(nmax,&icols);CHKERRQ(ierr); 3147 ii = 0; 3148 for (i=0; i<m; i++) { 3149 ierr = MatGetRow_SeqAIJ(a,i,&ncols,&cols,&vals);CHKERRQ(ierr); 3150 for (j=0; j<dof; j++) { 3151 for (k=0; k<ncols; k++) icols[k] = dof*cols[k]+j; 3152 ierr = MatSetValues_SeqAIJ(B,1,&ii,ncols,icols,vals,INSERT_VALUES);CHKERRQ(ierr); 3153 ii++; 3154 } 3155 ierr = MatRestoreRow_SeqAIJ(a,i,&ncols,&cols,&vals);CHKERRQ(ierr); 3156 } 3157 ierr = PetscFree(icols);CHKERRQ(ierr); 3158 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3159 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3160 3161 if (reuse == MAT_INPLACE_MATRIX) { 3162 ierr = MatHeaderReplace(A,&B);CHKERRQ(ierr); 3163 } else { 3164 *newmat = B; 3165 } 3166 PetscFunctionReturn(0); 3167 } 3168 3169 #include <../src/mat/impls/aij/mpi/mpiaij.h> 3170 3171 PETSC_INTERN PetscErrorCode MatConvert_MPIMAIJ_MPIAIJ(Mat A, MatType newtype,MatReuse reuse,Mat *newmat) 3172 { 3173 Mat_MPIMAIJ *maij = (Mat_MPIMAIJ*)A->data; 3174 Mat MatAIJ = ((Mat_SeqMAIJ*)maij->AIJ->data)->AIJ,B; 3175 Mat MatOAIJ = ((Mat_SeqMAIJ*)maij->OAIJ->data)->AIJ; 3176 Mat_SeqAIJ *AIJ = (Mat_SeqAIJ*) MatAIJ->data; 3177 Mat_SeqAIJ *OAIJ =(Mat_SeqAIJ*) MatOAIJ->data; 3178 Mat_MPIAIJ *mpiaij = (Mat_MPIAIJ*) maij->A->data; 3179 PetscInt dof = maij->dof,i,j,*dnz = NULL,*onz = NULL,nmax = 0,onmax = 0; 3180 PetscInt *oicols = NULL,*icols = NULL,ncols,*cols = NULL,oncols,*ocols = NULL; 3181 PetscInt rstart,cstart,*garray,ii,k; 3182 PetscErrorCode ierr; 3183 PetscScalar *vals,*ovals; 3184 3185 PetscFunctionBegin; 3186 ierr = PetscMalloc2(A->rmap->n,&dnz,A->rmap->n,&onz);CHKERRQ(ierr); 3187 for (i=0; i<A->rmap->n/dof; i++) { 3188 nmax = PetscMax(nmax,AIJ->ilen[i]); 3189 onmax = PetscMax(onmax,OAIJ->ilen[i]); 3190 for (j=0; j<dof; j++) { 3191 dnz[dof*i+j] = AIJ->ilen[i]; 3192 onz[dof*i+j] = OAIJ->ilen[i]; 3193 } 3194 } 3195 ierr = MatCreateAIJ(PetscObjectComm((PetscObject)A),A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N,0,dnz,0,onz,&B);CHKERRQ(ierr); 3196 ierr = MatSetBlockSize(B,dof);CHKERRQ(ierr); 3197 ierr = PetscFree2(dnz,onz);CHKERRQ(ierr); 3198 3199 ierr = PetscMalloc2(nmax,&icols,onmax,&oicols);CHKERRQ(ierr); 3200 rstart = dof*maij->A->rmap->rstart; 3201 cstart = dof*maij->A->cmap->rstart; 3202 garray = mpiaij->garray; 3203 3204 ii = rstart; 3205 for (i=0; i<A->rmap->n/dof; i++) { 3206 ierr = MatGetRow_SeqAIJ(MatAIJ,i,&ncols,&cols,&vals);CHKERRQ(ierr); 3207 ierr = MatGetRow_SeqAIJ(MatOAIJ,i,&oncols,&ocols,&ovals);CHKERRQ(ierr); 3208 for (j=0; j<dof; j++) { 3209 for (k=0; k<ncols; k++) { 3210 icols[k] = cstart + dof*cols[k]+j; 3211 } 3212 for (k=0; k<oncols; k++) { 3213 oicols[k] = dof*garray[ocols[k]]+j; 3214 } 3215 ierr = MatSetValues_MPIAIJ(B,1,&ii,ncols,icols,vals,INSERT_VALUES);CHKERRQ(ierr); 3216 ierr = MatSetValues_MPIAIJ(B,1,&ii,oncols,oicols,ovals,INSERT_VALUES);CHKERRQ(ierr); 3217 ii++; 3218 } 3219 ierr = MatRestoreRow_SeqAIJ(MatAIJ,i,&ncols,&cols,&vals);CHKERRQ(ierr); 3220 ierr = MatRestoreRow_SeqAIJ(MatOAIJ,i,&oncols,&ocols,&ovals);CHKERRQ(ierr); 3221 } 3222 ierr = PetscFree2(icols,oicols);CHKERRQ(ierr); 3223 3224 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3225 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3226 3227 if (reuse == MAT_INPLACE_MATRIX) { 3228 PetscInt refct = ((PetscObject)A)->refct; /* save ((PetscObject)A)->refct */ 3229 ((PetscObject)A)->refct = 1; 3230 3231 ierr = MatHeaderReplace(A,&B);CHKERRQ(ierr); 3232 3233 ((PetscObject)A)->refct = refct; /* restore ((PetscObject)A)->refct */ 3234 } else { 3235 *newmat = B; 3236 } 3237 PetscFunctionReturn(0); 3238 } 3239 3240 PetscErrorCode MatCreateSubMatrix_MAIJ(Mat mat,IS isrow,IS iscol,MatReuse cll,Mat *newmat) 3241 { 3242 PetscErrorCode ierr; 3243 Mat A; 3244 3245 PetscFunctionBegin; 3246 ierr = MatConvert(mat,MATAIJ,MAT_INITIAL_MATRIX,&A);CHKERRQ(ierr); 3247 ierr = MatCreateSubMatrix(A,isrow,iscol,cll,newmat);CHKERRQ(ierr); 3248 ierr = MatDestroy(&A);CHKERRQ(ierr); 3249 PetscFunctionReturn(0); 3250 } 3251 3252 /* ---------------------------------------------------------------------------------- */ 3253 /*@ 3254 MatCreateMAIJ - Creates a matrix type providing restriction and interpolation 3255 operations for multicomponent problems. It interpolates each component the same 3256 way independently. The matrix type is based on MATSEQAIJ for sequential matrices, 3257 and MATMPIAIJ for distributed matrices. 3258 3259 Collective 3260 3261 Input Parameters: 3262 + A - the AIJ matrix describing the action on blocks 3263 - dof - the block size (number of components per node) 3264 3265 Output Parameter: 3266 . maij - the new MAIJ matrix 3267 3268 Operations provided: 3269 + MatMult 3270 . MatMultTranspose 3271 . MatMultAdd 3272 . MatMultTransposeAdd 3273 - MatView 3274 3275 Level: advanced 3276 3277 .seealso: MatMAIJGetAIJ(), MatMAIJRedimension(), MATMAIJ 3278 @*/ 3279 PetscErrorCode MatCreateMAIJ(Mat A,PetscInt dof,Mat *maij) 3280 { 3281 PetscErrorCode ierr; 3282 PetscMPIInt size; 3283 PetscInt n; 3284 Mat B; 3285 3286 PetscFunctionBegin; 3287 ierr = PetscObjectReference((PetscObject)A);CHKERRQ(ierr); 3288 3289 if (dof == 1) *maij = A; 3290 else { 3291 ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr); 3292 ierr = MatSetSizes(B,dof*A->rmap->n,dof*A->cmap->n,dof*A->rmap->N,dof*A->cmap->N);CHKERRQ(ierr); 3293 ierr = PetscLayoutSetBlockSize(B->rmap,dof);CHKERRQ(ierr); 3294 ierr = PetscLayoutSetBlockSize(B->cmap,dof);CHKERRQ(ierr); 3295 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 3296 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 3297 3298 B->assembled = PETSC_TRUE; 3299 3300 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRQ(ierr); 3301 if (size == 1) { 3302 Mat_SeqMAIJ *b; 3303 3304 ierr = MatSetType(B,MATSEQMAIJ);CHKERRQ(ierr); 3305 3306 B->ops->setup = NULL; 3307 B->ops->destroy = MatDestroy_SeqMAIJ; 3308 B->ops->view = MatView_SeqMAIJ; 3309 b = (Mat_SeqMAIJ*)B->data; 3310 b->dof = dof; 3311 b->AIJ = A; 3312 3313 if (dof == 2) { 3314 B->ops->mult = MatMult_SeqMAIJ_2; 3315 B->ops->multadd = MatMultAdd_SeqMAIJ_2; 3316 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_2; 3317 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_2; 3318 } else if (dof == 3) { 3319 B->ops->mult = MatMult_SeqMAIJ_3; 3320 B->ops->multadd = MatMultAdd_SeqMAIJ_3; 3321 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_3; 3322 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_3; 3323 } else if (dof == 4) { 3324 B->ops->mult = MatMult_SeqMAIJ_4; 3325 B->ops->multadd = MatMultAdd_SeqMAIJ_4; 3326 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_4; 3327 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_4; 3328 } else if (dof == 5) { 3329 B->ops->mult = MatMult_SeqMAIJ_5; 3330 B->ops->multadd = MatMultAdd_SeqMAIJ_5; 3331 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_5; 3332 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_5; 3333 } else if (dof == 6) { 3334 B->ops->mult = MatMult_SeqMAIJ_6; 3335 B->ops->multadd = MatMultAdd_SeqMAIJ_6; 3336 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_6; 3337 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_6; 3338 } else if (dof == 7) { 3339 B->ops->mult = MatMult_SeqMAIJ_7; 3340 B->ops->multadd = MatMultAdd_SeqMAIJ_7; 3341 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_7; 3342 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_7; 3343 } else if (dof == 8) { 3344 B->ops->mult = MatMult_SeqMAIJ_8; 3345 B->ops->multadd = MatMultAdd_SeqMAIJ_8; 3346 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_8; 3347 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_8; 3348 } else if (dof == 9) { 3349 B->ops->mult = MatMult_SeqMAIJ_9; 3350 B->ops->multadd = MatMultAdd_SeqMAIJ_9; 3351 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_9; 3352 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_9; 3353 } else if (dof == 10) { 3354 B->ops->mult = MatMult_SeqMAIJ_10; 3355 B->ops->multadd = MatMultAdd_SeqMAIJ_10; 3356 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_10; 3357 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_10; 3358 } else if (dof == 11) { 3359 B->ops->mult = MatMult_SeqMAIJ_11; 3360 B->ops->multadd = MatMultAdd_SeqMAIJ_11; 3361 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_11; 3362 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_11; 3363 } else if (dof == 16) { 3364 B->ops->mult = MatMult_SeqMAIJ_16; 3365 B->ops->multadd = MatMultAdd_SeqMAIJ_16; 3366 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_16; 3367 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_16; 3368 } else if (dof == 18) { 3369 B->ops->mult = MatMult_SeqMAIJ_18; 3370 B->ops->multadd = MatMultAdd_SeqMAIJ_18; 3371 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_18; 3372 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_18; 3373 } else { 3374 B->ops->mult = MatMult_SeqMAIJ_N; 3375 B->ops->multadd = MatMultAdd_SeqMAIJ_N; 3376 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_N; 3377 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_N; 3378 } 3379 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqmaij_seqaij_C",MatConvert_SeqMAIJ_SeqAIJ);CHKERRQ(ierr); 3380 ierr = PetscObjectComposeFunction((PetscObject)B,"MatPtAP_seqaij_seqmaij_C",MatPtAP_SeqAIJ_SeqMAIJ);CHKERRQ(ierr); 3381 ierr = PetscObjectComposeFunction((PetscObject)B,"MatPtAP_seqaijmkl_seqmaij_C",MatPtAP_SeqAIJ_SeqMAIJ);CHKERRQ(ierr); 3382 } else { 3383 Mat_MPIAIJ *mpiaij = (Mat_MPIAIJ*)A->data; 3384 Mat_MPIMAIJ *b; 3385 IS from,to; 3386 Vec gvec; 3387 3388 ierr = MatSetType(B,MATMPIMAIJ);CHKERRQ(ierr); 3389 3390 B->ops->setup = NULL; 3391 B->ops->destroy = MatDestroy_MPIMAIJ; 3392 B->ops->view = MatView_MPIMAIJ; 3393 3394 b = (Mat_MPIMAIJ*)B->data; 3395 b->dof = dof; 3396 b->A = A; 3397 3398 ierr = MatCreateMAIJ(mpiaij->A,dof,&b->AIJ);CHKERRQ(ierr); 3399 ierr = MatCreateMAIJ(mpiaij->B,dof,&b->OAIJ);CHKERRQ(ierr); 3400 3401 ierr = VecGetSize(mpiaij->lvec,&n);CHKERRQ(ierr); 3402 ierr = VecCreate(PETSC_COMM_SELF,&b->w);CHKERRQ(ierr); 3403 ierr = VecSetSizes(b->w,n*dof,n*dof);CHKERRQ(ierr); 3404 ierr = VecSetBlockSize(b->w,dof);CHKERRQ(ierr); 3405 ierr = VecSetType(b->w,VECSEQ);CHKERRQ(ierr); 3406 3407 /* create two temporary Index sets for build scatter gather */ 3408 ierr = ISCreateBlock(PetscObjectComm((PetscObject)A),dof,n,mpiaij->garray,PETSC_COPY_VALUES,&from);CHKERRQ(ierr); 3409 ierr = ISCreateStride(PETSC_COMM_SELF,n*dof,0,1,&to);CHKERRQ(ierr); 3410 3411 /* create temporary global vector to generate scatter context */ 3412 ierr = VecCreateMPIWithArray(PetscObjectComm((PetscObject)A),dof,dof*A->cmap->n,dof*A->cmap->N,NULL,&gvec);CHKERRQ(ierr); 3413 3414 /* generate the scatter context */ 3415 ierr = VecScatterCreate(gvec,from,b->w,to,&b->ctx);CHKERRQ(ierr); 3416 3417 ierr = ISDestroy(&from);CHKERRQ(ierr); 3418 ierr = ISDestroy(&to);CHKERRQ(ierr); 3419 ierr = VecDestroy(&gvec);CHKERRQ(ierr); 3420 3421 B->ops->mult = MatMult_MPIMAIJ_dof; 3422 B->ops->multtranspose = MatMultTranspose_MPIMAIJ_dof; 3423 B->ops->multadd = MatMultAdd_MPIMAIJ_dof; 3424 B->ops->multtransposeadd = MatMultTransposeAdd_MPIMAIJ_dof; 3425 3426 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpimaij_mpiaij_C",MatConvert_MPIMAIJ_MPIAIJ);CHKERRQ(ierr); 3427 ierr = PetscObjectComposeFunction((PetscObject)B,"MatPtAP_mpiaij_mpimaij_C",MatPtAP_MPIAIJ_MPIMAIJ);CHKERRQ(ierr); 3428 } 3429 B->ops->createsubmatrix = MatCreateSubMatrix_MAIJ; 3430 ierr = MatSetUp(B);CHKERRQ(ierr); 3431 *maij = B; 3432 ierr = MatViewFromOptions(B,NULL,"-mat_view");CHKERRQ(ierr); 3433 } 3434 PetscFunctionReturn(0); 3435 } 3436