1 2 /* 3 Defines the basic matrix operations for the MAIJ matrix storage format. 4 This format is used for restriction and interpolation operations for 5 multicomponent problems. It interpolates each component the same way 6 independently. 7 8 We provide: 9 MatMult() 10 MatMultTranspose() 11 MatMultTransposeAdd() 12 MatMultAdd() 13 and 14 MatCreateMAIJ(Mat,dof,Mat*) 15 16 This single directory handles both the sequential and parallel codes 17 */ 18 19 #include <../src/mat/impls/maij/maij.h> /*I "petscmat.h" I*/ 20 #include <../src/mat/utils/freespace.h> 21 22 /*@ 23 MatMAIJGetAIJ - Get the AIJ matrix describing the blockwise action of the MAIJ matrix 24 25 Not Collective, but if the MAIJ matrix is parallel, the AIJ matrix is also parallel 26 27 Input Parameter: 28 . A - the MAIJ matrix 29 30 Output Parameter: 31 . B - the AIJ matrix 32 33 Level: advanced 34 35 Notes: 36 The reference count on the AIJ matrix is not increased so you should not destroy it. 37 38 .seealso: MatCreateMAIJ() 39 @*/ 40 PetscErrorCode MatMAIJGetAIJ(Mat A,Mat *B) 41 { 42 PetscErrorCode ierr; 43 PetscBool ismpimaij,isseqmaij; 44 45 PetscFunctionBegin; 46 ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIMAIJ,&ismpimaij);CHKERRQ(ierr); 47 ierr = PetscObjectTypeCompare((PetscObject)A,MATSEQMAIJ,&isseqmaij);CHKERRQ(ierr); 48 if (ismpimaij) { 49 Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data; 50 51 *B = b->A; 52 } else if (isseqmaij) { 53 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 54 55 *B = b->AIJ; 56 } else { 57 *B = A; 58 } 59 PetscFunctionReturn(0); 60 } 61 62 /*@ 63 MatMAIJRedimension - Get an MAIJ matrix with the same action, but for a different block size 64 65 Logically Collective 66 67 Input Parameter: 68 + A - the MAIJ matrix 69 - dof - the block size for the new matrix 70 71 Output Parameter: 72 . B - the new MAIJ matrix 73 74 Level: advanced 75 76 .seealso: MatCreateMAIJ() 77 @*/ 78 PetscErrorCode MatMAIJRedimension(Mat A,PetscInt dof,Mat *B) 79 { 80 PetscErrorCode ierr; 81 Mat Aij = NULL; 82 83 PetscFunctionBegin; 84 PetscValidLogicalCollectiveInt(A,dof,2); 85 ierr = MatMAIJGetAIJ(A,&Aij);CHKERRQ(ierr); 86 ierr = MatCreateMAIJ(Aij,dof,B);CHKERRQ(ierr); 87 PetscFunctionReturn(0); 88 } 89 90 PetscErrorCode MatDestroy_SeqMAIJ(Mat A) 91 { 92 PetscErrorCode ierr; 93 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 94 95 PetscFunctionBegin; 96 ierr = MatDestroy(&b->AIJ);CHKERRQ(ierr); 97 ierr = PetscFree(A->data);CHKERRQ(ierr); 98 ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqmaij_seqaij_C",NULL);CHKERRQ(ierr); 99 ierr = PetscObjectComposeFunction((PetscObject)A,"MatPtAP_seqaij_seqmaij_C",NULL);CHKERRQ(ierr); 100 ierr = PetscObjectComposeFunction((PetscObject)A,"MatPtAP_seqaijperm_seqmaij_C",NULL);CHKERRQ(ierr); 101 ierr = PetscObjectComposeFunction((PetscObject)A,"MatPtAP_seqaijmkl_seqmaij_C",NULL);CHKERRQ(ierr); 102 PetscFunctionReturn(0); 103 } 104 105 PetscErrorCode MatSetUp_MAIJ(Mat A) 106 { 107 PetscFunctionBegin; 108 SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Must use MatCreateMAIJ() to create MAIJ matrices"); 109 PetscFunctionReturn(0); 110 } 111 112 PetscErrorCode MatView_SeqMAIJ(Mat A,PetscViewer viewer) 113 { 114 PetscErrorCode ierr; 115 Mat B; 116 117 PetscFunctionBegin; 118 ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr); 119 ierr = MatView(B,viewer);CHKERRQ(ierr); 120 ierr = MatDestroy(&B);CHKERRQ(ierr); 121 PetscFunctionReturn(0); 122 } 123 124 PetscErrorCode MatView_MPIMAIJ(Mat A,PetscViewer viewer) 125 { 126 PetscErrorCode ierr; 127 Mat B; 128 129 PetscFunctionBegin; 130 ierr = MatConvert(A,MATMPIAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr); 131 ierr = MatView(B,viewer);CHKERRQ(ierr); 132 ierr = MatDestroy(&B);CHKERRQ(ierr); 133 PetscFunctionReturn(0); 134 } 135 136 PetscErrorCode MatDestroy_MPIMAIJ(Mat A) 137 { 138 PetscErrorCode ierr; 139 Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data; 140 141 PetscFunctionBegin; 142 ierr = MatDestroy(&b->AIJ);CHKERRQ(ierr); 143 ierr = MatDestroy(&b->OAIJ);CHKERRQ(ierr); 144 ierr = MatDestroy(&b->A);CHKERRQ(ierr); 145 ierr = VecScatterDestroy(&b->ctx);CHKERRQ(ierr); 146 ierr = VecDestroy(&b->w);CHKERRQ(ierr); 147 ierr = PetscFree(A->data);CHKERRQ(ierr); 148 ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_mpimaij_mpiaij_C",NULL);CHKERRQ(ierr); 149 ierr = PetscObjectComposeFunction((PetscObject)A,"MatPtAP_mpiaij_mpimaij_C",NULL);CHKERRQ(ierr); 150 ierr = PetscObjectChangeTypeName((PetscObject)A,0);CHKERRQ(ierr); 151 PetscFunctionReturn(0); 152 } 153 154 /*MC 155 MATMAIJ - MATMAIJ = "maij" - A matrix type to be used for restriction and interpolation operations for 156 multicomponent problems, interpolating or restricting each component the same way independently. 157 The matrix type is based on MATSEQAIJ for sequential matrices, and MATMPIAIJ for distributed matrices. 158 159 Operations provided: 160 . MatMult 161 . MatMultTranspose 162 . MatMultAdd 163 . MatMultTransposeAdd 164 165 Level: advanced 166 167 .seealso: MatMAIJGetAIJ(), MatMAIJRedimension(), MatCreateMAIJ() 168 M*/ 169 170 PETSC_EXTERN PetscErrorCode MatCreate_MAIJ(Mat A) 171 { 172 PetscErrorCode ierr; 173 Mat_MPIMAIJ *b; 174 PetscMPIInt size; 175 176 PetscFunctionBegin; 177 ierr = PetscNewLog(A,&b);CHKERRQ(ierr); 178 A->data = (void*)b; 179 180 ierr = PetscMemzero(A->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 181 182 A->ops->setup = MatSetUp_MAIJ; 183 184 b->AIJ = 0; 185 b->dof = 0; 186 b->OAIJ = 0; 187 b->ctx = 0; 188 b->w = 0; 189 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRQ(ierr); 190 if (size == 1) { 191 ierr = PetscObjectChangeTypeName((PetscObject)A,MATSEQMAIJ);CHKERRQ(ierr); 192 } else { 193 ierr = PetscObjectChangeTypeName((PetscObject)A,MATMPIMAIJ);CHKERRQ(ierr); 194 } 195 A->preallocated = PETSC_TRUE; 196 A->assembled = PETSC_TRUE; 197 PetscFunctionReturn(0); 198 } 199 200 /* --------------------------------------------------------------------------------------*/ 201 PetscErrorCode MatMult_SeqMAIJ_2(Mat A,Vec xx,Vec yy) 202 { 203 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 204 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 205 const PetscScalar *x,*v; 206 PetscScalar *y, sum1, sum2; 207 PetscErrorCode ierr; 208 PetscInt nonzerorow=0,n,i,jrow,j; 209 const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 210 211 PetscFunctionBegin; 212 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 213 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 214 idx = a->j; 215 v = a->a; 216 ii = a->i; 217 218 for (i=0; i<m; i++) { 219 jrow = ii[i]; 220 n = ii[i+1] - jrow; 221 sum1 = 0.0; 222 sum2 = 0.0; 223 224 nonzerorow += (n>0); 225 for (j=0; j<n; j++) { 226 sum1 += v[jrow]*x[2*idx[jrow]]; 227 sum2 += v[jrow]*x[2*idx[jrow]+1]; 228 jrow++; 229 } 230 y[2*i] = sum1; 231 y[2*i+1] = sum2; 232 } 233 234 ierr = PetscLogFlops(4.0*a->nz - 2.0*nonzerorow);CHKERRQ(ierr); 235 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 236 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 237 PetscFunctionReturn(0); 238 } 239 240 PetscErrorCode MatMultTranspose_SeqMAIJ_2(Mat A,Vec xx,Vec yy) 241 { 242 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 243 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 244 const PetscScalar *x,*v; 245 PetscScalar *y,alpha1,alpha2; 246 PetscErrorCode ierr; 247 PetscInt n,i; 248 const PetscInt m = b->AIJ->rmap->n,*idx; 249 250 PetscFunctionBegin; 251 ierr = VecSet(yy,0.0);CHKERRQ(ierr); 252 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 253 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 254 255 for (i=0; i<m; i++) { 256 idx = a->j + a->i[i]; 257 v = a->a + a->i[i]; 258 n = a->i[i+1] - a->i[i]; 259 alpha1 = x[2*i]; 260 alpha2 = x[2*i+1]; 261 while (n-->0) { 262 y[2*(*idx)] += alpha1*(*v); 263 y[2*(*idx)+1] += alpha2*(*v); 264 idx++; v++; 265 } 266 } 267 ierr = PetscLogFlops(4.0*a->nz);CHKERRQ(ierr); 268 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 269 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 270 PetscFunctionReturn(0); 271 } 272 273 PetscErrorCode MatMultAdd_SeqMAIJ_2(Mat A,Vec xx,Vec yy,Vec zz) 274 { 275 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 276 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 277 const PetscScalar *x,*v; 278 PetscScalar *y,sum1, sum2; 279 PetscErrorCode ierr; 280 PetscInt n,i,jrow,j; 281 const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 282 283 PetscFunctionBegin; 284 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 285 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 286 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 287 idx = a->j; 288 v = a->a; 289 ii = a->i; 290 291 for (i=0; i<m; i++) { 292 jrow = ii[i]; 293 n = ii[i+1] - jrow; 294 sum1 = 0.0; 295 sum2 = 0.0; 296 for (j=0; j<n; j++) { 297 sum1 += v[jrow]*x[2*idx[jrow]]; 298 sum2 += v[jrow]*x[2*idx[jrow]+1]; 299 jrow++; 300 } 301 y[2*i] += sum1; 302 y[2*i+1] += sum2; 303 } 304 305 ierr = PetscLogFlops(4.0*a->nz);CHKERRQ(ierr); 306 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 307 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 308 PetscFunctionReturn(0); 309 } 310 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_2(Mat A,Vec xx,Vec yy,Vec zz) 311 { 312 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 313 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 314 const PetscScalar *x,*v; 315 PetscScalar *y,alpha1,alpha2; 316 PetscErrorCode ierr; 317 PetscInt n,i; 318 const PetscInt m = b->AIJ->rmap->n,*idx; 319 320 PetscFunctionBegin; 321 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 322 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 323 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 324 325 for (i=0; i<m; i++) { 326 idx = a->j + a->i[i]; 327 v = a->a + a->i[i]; 328 n = a->i[i+1] - a->i[i]; 329 alpha1 = x[2*i]; 330 alpha2 = x[2*i+1]; 331 while (n-->0) { 332 y[2*(*idx)] += alpha1*(*v); 333 y[2*(*idx)+1] += alpha2*(*v); 334 idx++; v++; 335 } 336 } 337 ierr = PetscLogFlops(4.0*a->nz);CHKERRQ(ierr); 338 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 339 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 340 PetscFunctionReturn(0); 341 } 342 /* --------------------------------------------------------------------------------------*/ 343 PetscErrorCode MatMult_SeqMAIJ_3(Mat A,Vec xx,Vec yy) 344 { 345 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 346 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 347 const PetscScalar *x,*v; 348 PetscScalar *y,sum1, sum2, sum3; 349 PetscErrorCode ierr; 350 PetscInt nonzerorow=0,n,i,jrow,j; 351 const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 352 353 PetscFunctionBegin; 354 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 355 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 356 idx = a->j; 357 v = a->a; 358 ii = a->i; 359 360 for (i=0; i<m; i++) { 361 jrow = ii[i]; 362 n = ii[i+1] - jrow; 363 sum1 = 0.0; 364 sum2 = 0.0; 365 sum3 = 0.0; 366 367 nonzerorow += (n>0); 368 for (j=0; j<n; j++) { 369 sum1 += v[jrow]*x[3*idx[jrow]]; 370 sum2 += v[jrow]*x[3*idx[jrow]+1]; 371 sum3 += v[jrow]*x[3*idx[jrow]+2]; 372 jrow++; 373 } 374 y[3*i] = sum1; 375 y[3*i+1] = sum2; 376 y[3*i+2] = sum3; 377 } 378 379 ierr = PetscLogFlops(6.0*a->nz - 3.0*nonzerorow);CHKERRQ(ierr); 380 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 381 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 382 PetscFunctionReturn(0); 383 } 384 385 PetscErrorCode MatMultTranspose_SeqMAIJ_3(Mat A,Vec xx,Vec yy) 386 { 387 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 388 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 389 const PetscScalar *x,*v; 390 PetscScalar *y,alpha1,alpha2,alpha3; 391 PetscErrorCode ierr; 392 PetscInt n,i; 393 const PetscInt m = b->AIJ->rmap->n,*idx; 394 395 PetscFunctionBegin; 396 ierr = VecSet(yy,0.0);CHKERRQ(ierr); 397 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 398 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 399 400 for (i=0; i<m; i++) { 401 idx = a->j + a->i[i]; 402 v = a->a + a->i[i]; 403 n = a->i[i+1] - a->i[i]; 404 alpha1 = x[3*i]; 405 alpha2 = x[3*i+1]; 406 alpha3 = x[3*i+2]; 407 while (n-->0) { 408 y[3*(*idx)] += alpha1*(*v); 409 y[3*(*idx)+1] += alpha2*(*v); 410 y[3*(*idx)+2] += alpha3*(*v); 411 idx++; v++; 412 } 413 } 414 ierr = PetscLogFlops(6.0*a->nz);CHKERRQ(ierr); 415 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 416 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 417 PetscFunctionReturn(0); 418 } 419 420 PetscErrorCode MatMultAdd_SeqMAIJ_3(Mat A,Vec xx,Vec yy,Vec zz) 421 { 422 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 423 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 424 const PetscScalar *x,*v; 425 PetscScalar *y,sum1, sum2, sum3; 426 PetscErrorCode ierr; 427 PetscInt n,i,jrow,j; 428 const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 429 430 PetscFunctionBegin; 431 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 432 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 433 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 434 idx = a->j; 435 v = a->a; 436 ii = a->i; 437 438 for (i=0; i<m; i++) { 439 jrow = ii[i]; 440 n = ii[i+1] - jrow; 441 sum1 = 0.0; 442 sum2 = 0.0; 443 sum3 = 0.0; 444 for (j=0; j<n; j++) { 445 sum1 += v[jrow]*x[3*idx[jrow]]; 446 sum2 += v[jrow]*x[3*idx[jrow]+1]; 447 sum3 += v[jrow]*x[3*idx[jrow]+2]; 448 jrow++; 449 } 450 y[3*i] += sum1; 451 y[3*i+1] += sum2; 452 y[3*i+2] += sum3; 453 } 454 455 ierr = PetscLogFlops(6.0*a->nz);CHKERRQ(ierr); 456 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 457 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 458 PetscFunctionReturn(0); 459 } 460 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_3(Mat A,Vec xx,Vec yy,Vec zz) 461 { 462 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 463 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 464 const PetscScalar *x,*v; 465 PetscScalar *y,alpha1,alpha2,alpha3; 466 PetscErrorCode ierr; 467 PetscInt n,i; 468 const PetscInt m = b->AIJ->rmap->n,*idx; 469 470 PetscFunctionBegin; 471 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 472 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 473 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 474 for (i=0; i<m; i++) { 475 idx = a->j + a->i[i]; 476 v = a->a + a->i[i]; 477 n = a->i[i+1] - a->i[i]; 478 alpha1 = x[3*i]; 479 alpha2 = x[3*i+1]; 480 alpha3 = x[3*i+2]; 481 while (n-->0) { 482 y[3*(*idx)] += alpha1*(*v); 483 y[3*(*idx)+1] += alpha2*(*v); 484 y[3*(*idx)+2] += alpha3*(*v); 485 idx++; v++; 486 } 487 } 488 ierr = PetscLogFlops(6.0*a->nz);CHKERRQ(ierr); 489 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 490 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 491 PetscFunctionReturn(0); 492 } 493 494 /* ------------------------------------------------------------------------------*/ 495 PetscErrorCode MatMult_SeqMAIJ_4(Mat A,Vec xx,Vec yy) 496 { 497 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 498 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 499 const PetscScalar *x,*v; 500 PetscScalar *y,sum1, sum2, sum3, sum4; 501 PetscErrorCode ierr; 502 PetscInt nonzerorow=0,n,i,jrow,j; 503 const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 504 505 PetscFunctionBegin; 506 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 507 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 508 idx = a->j; 509 v = a->a; 510 ii = a->i; 511 512 for (i=0; i<m; i++) { 513 jrow = ii[i]; 514 n = ii[i+1] - jrow; 515 sum1 = 0.0; 516 sum2 = 0.0; 517 sum3 = 0.0; 518 sum4 = 0.0; 519 nonzerorow += (n>0); 520 for (j=0; j<n; j++) { 521 sum1 += v[jrow]*x[4*idx[jrow]]; 522 sum2 += v[jrow]*x[4*idx[jrow]+1]; 523 sum3 += v[jrow]*x[4*idx[jrow]+2]; 524 sum4 += v[jrow]*x[4*idx[jrow]+3]; 525 jrow++; 526 } 527 y[4*i] = sum1; 528 y[4*i+1] = sum2; 529 y[4*i+2] = sum3; 530 y[4*i+3] = sum4; 531 } 532 533 ierr = PetscLogFlops(8.0*a->nz - 4.0*nonzerorow);CHKERRQ(ierr); 534 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 535 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 536 PetscFunctionReturn(0); 537 } 538 539 PetscErrorCode MatMultTranspose_SeqMAIJ_4(Mat A,Vec xx,Vec yy) 540 { 541 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 542 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 543 const PetscScalar *x,*v; 544 PetscScalar *y,alpha1,alpha2,alpha3,alpha4; 545 PetscErrorCode ierr; 546 PetscInt n,i; 547 const PetscInt m = b->AIJ->rmap->n,*idx; 548 549 PetscFunctionBegin; 550 ierr = VecSet(yy,0.0);CHKERRQ(ierr); 551 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 552 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 553 for (i=0; i<m; i++) { 554 idx = a->j + a->i[i]; 555 v = a->a + a->i[i]; 556 n = a->i[i+1] - a->i[i]; 557 alpha1 = x[4*i]; 558 alpha2 = x[4*i+1]; 559 alpha3 = x[4*i+2]; 560 alpha4 = x[4*i+3]; 561 while (n-->0) { 562 y[4*(*idx)] += alpha1*(*v); 563 y[4*(*idx)+1] += alpha2*(*v); 564 y[4*(*idx)+2] += alpha3*(*v); 565 y[4*(*idx)+3] += alpha4*(*v); 566 idx++; v++; 567 } 568 } 569 ierr = PetscLogFlops(8.0*a->nz);CHKERRQ(ierr); 570 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 571 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 572 PetscFunctionReturn(0); 573 } 574 575 PetscErrorCode MatMultAdd_SeqMAIJ_4(Mat A,Vec xx,Vec yy,Vec zz) 576 { 577 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 578 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 579 const PetscScalar *x,*v; 580 PetscScalar *y,sum1, sum2, sum3, sum4; 581 PetscErrorCode ierr; 582 PetscInt n,i,jrow,j; 583 const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 584 585 PetscFunctionBegin; 586 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 587 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 588 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 589 idx = a->j; 590 v = a->a; 591 ii = a->i; 592 593 for (i=0; i<m; i++) { 594 jrow = ii[i]; 595 n = ii[i+1] - jrow; 596 sum1 = 0.0; 597 sum2 = 0.0; 598 sum3 = 0.0; 599 sum4 = 0.0; 600 for (j=0; j<n; j++) { 601 sum1 += v[jrow]*x[4*idx[jrow]]; 602 sum2 += v[jrow]*x[4*idx[jrow]+1]; 603 sum3 += v[jrow]*x[4*idx[jrow]+2]; 604 sum4 += v[jrow]*x[4*idx[jrow]+3]; 605 jrow++; 606 } 607 y[4*i] += sum1; 608 y[4*i+1] += sum2; 609 y[4*i+2] += sum3; 610 y[4*i+3] += sum4; 611 } 612 613 ierr = PetscLogFlops(8.0*a->nz);CHKERRQ(ierr); 614 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 615 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 616 PetscFunctionReturn(0); 617 } 618 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_4(Mat A,Vec xx,Vec yy,Vec zz) 619 { 620 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 621 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 622 const PetscScalar *x,*v; 623 PetscScalar *y,alpha1,alpha2,alpha3,alpha4; 624 PetscErrorCode ierr; 625 PetscInt n,i; 626 const PetscInt m = b->AIJ->rmap->n,*idx; 627 628 PetscFunctionBegin; 629 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 630 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 631 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 632 633 for (i=0; i<m; i++) { 634 idx = a->j + a->i[i]; 635 v = a->a + a->i[i]; 636 n = a->i[i+1] - a->i[i]; 637 alpha1 = x[4*i]; 638 alpha2 = x[4*i+1]; 639 alpha3 = x[4*i+2]; 640 alpha4 = x[4*i+3]; 641 while (n-->0) { 642 y[4*(*idx)] += alpha1*(*v); 643 y[4*(*idx)+1] += alpha2*(*v); 644 y[4*(*idx)+2] += alpha3*(*v); 645 y[4*(*idx)+3] += alpha4*(*v); 646 idx++; v++; 647 } 648 } 649 ierr = PetscLogFlops(8.0*a->nz);CHKERRQ(ierr); 650 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 651 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 652 PetscFunctionReturn(0); 653 } 654 /* ------------------------------------------------------------------------------*/ 655 656 PetscErrorCode MatMult_SeqMAIJ_5(Mat A,Vec xx,Vec yy) 657 { 658 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 659 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 660 const PetscScalar *x,*v; 661 PetscScalar *y,sum1, sum2, sum3, sum4, sum5; 662 PetscErrorCode ierr; 663 PetscInt nonzerorow=0,n,i,jrow,j; 664 const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 665 666 PetscFunctionBegin; 667 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 668 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 669 idx = a->j; 670 v = a->a; 671 ii = a->i; 672 673 for (i=0; i<m; i++) { 674 jrow = ii[i]; 675 n = ii[i+1] - jrow; 676 sum1 = 0.0; 677 sum2 = 0.0; 678 sum3 = 0.0; 679 sum4 = 0.0; 680 sum5 = 0.0; 681 682 nonzerorow += (n>0); 683 for (j=0; j<n; j++) { 684 sum1 += v[jrow]*x[5*idx[jrow]]; 685 sum2 += v[jrow]*x[5*idx[jrow]+1]; 686 sum3 += v[jrow]*x[5*idx[jrow]+2]; 687 sum4 += v[jrow]*x[5*idx[jrow]+3]; 688 sum5 += v[jrow]*x[5*idx[jrow]+4]; 689 jrow++; 690 } 691 y[5*i] = sum1; 692 y[5*i+1] = sum2; 693 y[5*i+2] = sum3; 694 y[5*i+3] = sum4; 695 y[5*i+4] = sum5; 696 } 697 698 ierr = PetscLogFlops(10.0*a->nz - 5.0*nonzerorow);CHKERRQ(ierr); 699 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 700 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 701 PetscFunctionReturn(0); 702 } 703 704 PetscErrorCode MatMultTranspose_SeqMAIJ_5(Mat A,Vec xx,Vec yy) 705 { 706 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 707 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 708 const PetscScalar *x,*v; 709 PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5; 710 PetscErrorCode ierr; 711 PetscInt n,i; 712 const PetscInt m = b->AIJ->rmap->n,*idx; 713 714 PetscFunctionBegin; 715 ierr = VecSet(yy,0.0);CHKERRQ(ierr); 716 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 717 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 718 719 for (i=0; i<m; i++) { 720 idx = a->j + a->i[i]; 721 v = a->a + a->i[i]; 722 n = a->i[i+1] - a->i[i]; 723 alpha1 = x[5*i]; 724 alpha2 = x[5*i+1]; 725 alpha3 = x[5*i+2]; 726 alpha4 = x[5*i+3]; 727 alpha5 = x[5*i+4]; 728 while (n-->0) { 729 y[5*(*idx)] += alpha1*(*v); 730 y[5*(*idx)+1] += alpha2*(*v); 731 y[5*(*idx)+2] += alpha3*(*v); 732 y[5*(*idx)+3] += alpha4*(*v); 733 y[5*(*idx)+4] += alpha5*(*v); 734 idx++; v++; 735 } 736 } 737 ierr = PetscLogFlops(10.0*a->nz);CHKERRQ(ierr); 738 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 739 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 740 PetscFunctionReturn(0); 741 } 742 743 PetscErrorCode MatMultAdd_SeqMAIJ_5(Mat A,Vec xx,Vec yy,Vec zz) 744 { 745 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 746 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 747 const PetscScalar *x,*v; 748 PetscScalar *y,sum1, sum2, sum3, sum4, sum5; 749 PetscErrorCode ierr; 750 PetscInt n,i,jrow,j; 751 const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 752 753 PetscFunctionBegin; 754 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 755 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 756 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 757 idx = a->j; 758 v = a->a; 759 ii = a->i; 760 761 for (i=0; i<m; i++) { 762 jrow = ii[i]; 763 n = ii[i+1] - jrow; 764 sum1 = 0.0; 765 sum2 = 0.0; 766 sum3 = 0.0; 767 sum4 = 0.0; 768 sum5 = 0.0; 769 for (j=0; j<n; j++) { 770 sum1 += v[jrow]*x[5*idx[jrow]]; 771 sum2 += v[jrow]*x[5*idx[jrow]+1]; 772 sum3 += v[jrow]*x[5*idx[jrow]+2]; 773 sum4 += v[jrow]*x[5*idx[jrow]+3]; 774 sum5 += v[jrow]*x[5*idx[jrow]+4]; 775 jrow++; 776 } 777 y[5*i] += sum1; 778 y[5*i+1] += sum2; 779 y[5*i+2] += sum3; 780 y[5*i+3] += sum4; 781 y[5*i+4] += sum5; 782 } 783 784 ierr = PetscLogFlops(10.0*a->nz);CHKERRQ(ierr); 785 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 786 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 787 PetscFunctionReturn(0); 788 } 789 790 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_5(Mat A,Vec xx,Vec yy,Vec zz) 791 { 792 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 793 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 794 const PetscScalar *x,*v; 795 PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5; 796 PetscErrorCode ierr; 797 PetscInt n,i; 798 const PetscInt m = b->AIJ->rmap->n,*idx; 799 800 PetscFunctionBegin; 801 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 802 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 803 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 804 805 for (i=0; i<m; i++) { 806 idx = a->j + a->i[i]; 807 v = a->a + a->i[i]; 808 n = a->i[i+1] - a->i[i]; 809 alpha1 = x[5*i]; 810 alpha2 = x[5*i+1]; 811 alpha3 = x[5*i+2]; 812 alpha4 = x[5*i+3]; 813 alpha5 = x[5*i+4]; 814 while (n-->0) { 815 y[5*(*idx)] += alpha1*(*v); 816 y[5*(*idx)+1] += alpha2*(*v); 817 y[5*(*idx)+2] += alpha3*(*v); 818 y[5*(*idx)+3] += alpha4*(*v); 819 y[5*(*idx)+4] += alpha5*(*v); 820 idx++; v++; 821 } 822 } 823 ierr = PetscLogFlops(10.0*a->nz);CHKERRQ(ierr); 824 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 825 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 826 PetscFunctionReturn(0); 827 } 828 829 /* ------------------------------------------------------------------------------*/ 830 PetscErrorCode MatMult_SeqMAIJ_6(Mat A,Vec xx,Vec yy) 831 { 832 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 833 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 834 const PetscScalar *x,*v; 835 PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6; 836 PetscErrorCode ierr; 837 PetscInt nonzerorow=0,n,i,jrow,j; 838 const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 839 840 PetscFunctionBegin; 841 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 842 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 843 idx = a->j; 844 v = a->a; 845 ii = a->i; 846 847 for (i=0; i<m; i++) { 848 jrow = ii[i]; 849 n = ii[i+1] - jrow; 850 sum1 = 0.0; 851 sum2 = 0.0; 852 sum3 = 0.0; 853 sum4 = 0.0; 854 sum5 = 0.0; 855 sum6 = 0.0; 856 857 nonzerorow += (n>0); 858 for (j=0; j<n; j++) { 859 sum1 += v[jrow]*x[6*idx[jrow]]; 860 sum2 += v[jrow]*x[6*idx[jrow]+1]; 861 sum3 += v[jrow]*x[6*idx[jrow]+2]; 862 sum4 += v[jrow]*x[6*idx[jrow]+3]; 863 sum5 += v[jrow]*x[6*idx[jrow]+4]; 864 sum6 += v[jrow]*x[6*idx[jrow]+5]; 865 jrow++; 866 } 867 y[6*i] = sum1; 868 y[6*i+1] = sum2; 869 y[6*i+2] = sum3; 870 y[6*i+3] = sum4; 871 y[6*i+4] = sum5; 872 y[6*i+5] = sum6; 873 } 874 875 ierr = PetscLogFlops(12.0*a->nz - 6.0*nonzerorow);CHKERRQ(ierr); 876 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 877 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 878 PetscFunctionReturn(0); 879 } 880 881 PetscErrorCode MatMultTranspose_SeqMAIJ_6(Mat A,Vec xx,Vec yy) 882 { 883 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 884 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 885 const PetscScalar *x,*v; 886 PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6; 887 PetscErrorCode ierr; 888 PetscInt n,i; 889 const PetscInt m = b->AIJ->rmap->n,*idx; 890 891 PetscFunctionBegin; 892 ierr = VecSet(yy,0.0);CHKERRQ(ierr); 893 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 894 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 895 896 for (i=0; i<m; i++) { 897 idx = a->j + a->i[i]; 898 v = a->a + a->i[i]; 899 n = a->i[i+1] - a->i[i]; 900 alpha1 = x[6*i]; 901 alpha2 = x[6*i+1]; 902 alpha3 = x[6*i+2]; 903 alpha4 = x[6*i+3]; 904 alpha5 = x[6*i+4]; 905 alpha6 = x[6*i+5]; 906 while (n-->0) { 907 y[6*(*idx)] += alpha1*(*v); 908 y[6*(*idx)+1] += alpha2*(*v); 909 y[6*(*idx)+2] += alpha3*(*v); 910 y[6*(*idx)+3] += alpha4*(*v); 911 y[6*(*idx)+4] += alpha5*(*v); 912 y[6*(*idx)+5] += alpha6*(*v); 913 idx++; v++; 914 } 915 } 916 ierr = PetscLogFlops(12.0*a->nz);CHKERRQ(ierr); 917 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 918 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 919 PetscFunctionReturn(0); 920 } 921 922 PetscErrorCode MatMultAdd_SeqMAIJ_6(Mat A,Vec xx,Vec yy,Vec zz) 923 { 924 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 925 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 926 const PetscScalar *x,*v; 927 PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6; 928 PetscErrorCode ierr; 929 PetscInt n,i,jrow,j; 930 const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 931 932 PetscFunctionBegin; 933 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 934 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 935 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 936 idx = a->j; 937 v = a->a; 938 ii = a->i; 939 940 for (i=0; i<m; i++) { 941 jrow = ii[i]; 942 n = ii[i+1] - jrow; 943 sum1 = 0.0; 944 sum2 = 0.0; 945 sum3 = 0.0; 946 sum4 = 0.0; 947 sum5 = 0.0; 948 sum6 = 0.0; 949 for (j=0; j<n; j++) { 950 sum1 += v[jrow]*x[6*idx[jrow]]; 951 sum2 += v[jrow]*x[6*idx[jrow]+1]; 952 sum3 += v[jrow]*x[6*idx[jrow]+2]; 953 sum4 += v[jrow]*x[6*idx[jrow]+3]; 954 sum5 += v[jrow]*x[6*idx[jrow]+4]; 955 sum6 += v[jrow]*x[6*idx[jrow]+5]; 956 jrow++; 957 } 958 y[6*i] += sum1; 959 y[6*i+1] += sum2; 960 y[6*i+2] += sum3; 961 y[6*i+3] += sum4; 962 y[6*i+4] += sum5; 963 y[6*i+5] += sum6; 964 } 965 966 ierr = PetscLogFlops(12.0*a->nz);CHKERRQ(ierr); 967 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 968 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 969 PetscFunctionReturn(0); 970 } 971 972 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_6(Mat A,Vec xx,Vec yy,Vec zz) 973 { 974 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 975 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 976 const PetscScalar *x,*v; 977 PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6; 978 PetscErrorCode ierr; 979 PetscInt n,i; 980 const PetscInt m = b->AIJ->rmap->n,*idx; 981 982 PetscFunctionBegin; 983 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 984 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 985 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 986 987 for (i=0; i<m; i++) { 988 idx = a->j + a->i[i]; 989 v = a->a + a->i[i]; 990 n = a->i[i+1] - a->i[i]; 991 alpha1 = x[6*i]; 992 alpha2 = x[6*i+1]; 993 alpha3 = x[6*i+2]; 994 alpha4 = x[6*i+3]; 995 alpha5 = x[6*i+4]; 996 alpha6 = x[6*i+5]; 997 while (n-->0) { 998 y[6*(*idx)] += alpha1*(*v); 999 y[6*(*idx)+1] += alpha2*(*v); 1000 y[6*(*idx)+2] += alpha3*(*v); 1001 y[6*(*idx)+3] += alpha4*(*v); 1002 y[6*(*idx)+4] += alpha5*(*v); 1003 y[6*(*idx)+5] += alpha6*(*v); 1004 idx++; v++; 1005 } 1006 } 1007 ierr = PetscLogFlops(12.0*a->nz);CHKERRQ(ierr); 1008 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 1009 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 1010 PetscFunctionReturn(0); 1011 } 1012 1013 /* ------------------------------------------------------------------------------*/ 1014 PetscErrorCode MatMult_SeqMAIJ_7(Mat A,Vec xx,Vec yy) 1015 { 1016 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1017 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1018 const PetscScalar *x,*v; 1019 PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7; 1020 PetscErrorCode ierr; 1021 const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 1022 PetscInt nonzerorow=0,n,i,jrow,j; 1023 1024 PetscFunctionBegin; 1025 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 1026 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 1027 idx = a->j; 1028 v = a->a; 1029 ii = a->i; 1030 1031 for (i=0; i<m; i++) { 1032 jrow = ii[i]; 1033 n = ii[i+1] - jrow; 1034 sum1 = 0.0; 1035 sum2 = 0.0; 1036 sum3 = 0.0; 1037 sum4 = 0.0; 1038 sum5 = 0.0; 1039 sum6 = 0.0; 1040 sum7 = 0.0; 1041 1042 nonzerorow += (n>0); 1043 for (j=0; j<n; j++) { 1044 sum1 += v[jrow]*x[7*idx[jrow]]; 1045 sum2 += v[jrow]*x[7*idx[jrow]+1]; 1046 sum3 += v[jrow]*x[7*idx[jrow]+2]; 1047 sum4 += v[jrow]*x[7*idx[jrow]+3]; 1048 sum5 += v[jrow]*x[7*idx[jrow]+4]; 1049 sum6 += v[jrow]*x[7*idx[jrow]+5]; 1050 sum7 += v[jrow]*x[7*idx[jrow]+6]; 1051 jrow++; 1052 } 1053 y[7*i] = sum1; 1054 y[7*i+1] = sum2; 1055 y[7*i+2] = sum3; 1056 y[7*i+3] = sum4; 1057 y[7*i+4] = sum5; 1058 y[7*i+5] = sum6; 1059 y[7*i+6] = sum7; 1060 } 1061 1062 ierr = PetscLogFlops(14.0*a->nz - 7.0*nonzerorow);CHKERRQ(ierr); 1063 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 1064 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 1065 PetscFunctionReturn(0); 1066 } 1067 1068 PetscErrorCode MatMultTranspose_SeqMAIJ_7(Mat A,Vec xx,Vec yy) 1069 { 1070 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1071 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1072 const PetscScalar *x,*v; 1073 PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7; 1074 PetscErrorCode ierr; 1075 const PetscInt m = b->AIJ->rmap->n,*idx; 1076 PetscInt n,i; 1077 1078 PetscFunctionBegin; 1079 ierr = VecSet(yy,0.0);CHKERRQ(ierr); 1080 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 1081 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 1082 1083 for (i=0; i<m; i++) { 1084 idx = a->j + a->i[i]; 1085 v = a->a + a->i[i]; 1086 n = a->i[i+1] - a->i[i]; 1087 alpha1 = x[7*i]; 1088 alpha2 = x[7*i+1]; 1089 alpha3 = x[7*i+2]; 1090 alpha4 = x[7*i+3]; 1091 alpha5 = x[7*i+4]; 1092 alpha6 = x[7*i+5]; 1093 alpha7 = x[7*i+6]; 1094 while (n-->0) { 1095 y[7*(*idx)] += alpha1*(*v); 1096 y[7*(*idx)+1] += alpha2*(*v); 1097 y[7*(*idx)+2] += alpha3*(*v); 1098 y[7*(*idx)+3] += alpha4*(*v); 1099 y[7*(*idx)+4] += alpha5*(*v); 1100 y[7*(*idx)+5] += alpha6*(*v); 1101 y[7*(*idx)+6] += alpha7*(*v); 1102 idx++; v++; 1103 } 1104 } 1105 ierr = PetscLogFlops(14.0*a->nz);CHKERRQ(ierr); 1106 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 1107 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 1108 PetscFunctionReturn(0); 1109 } 1110 1111 PetscErrorCode MatMultAdd_SeqMAIJ_7(Mat A,Vec xx,Vec yy,Vec zz) 1112 { 1113 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1114 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1115 const PetscScalar *x,*v; 1116 PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7; 1117 PetscErrorCode ierr; 1118 const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 1119 PetscInt n,i,jrow,j; 1120 1121 PetscFunctionBegin; 1122 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 1123 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 1124 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 1125 idx = a->j; 1126 v = a->a; 1127 ii = a->i; 1128 1129 for (i=0; i<m; i++) { 1130 jrow = ii[i]; 1131 n = ii[i+1] - jrow; 1132 sum1 = 0.0; 1133 sum2 = 0.0; 1134 sum3 = 0.0; 1135 sum4 = 0.0; 1136 sum5 = 0.0; 1137 sum6 = 0.0; 1138 sum7 = 0.0; 1139 for (j=0; j<n; j++) { 1140 sum1 += v[jrow]*x[7*idx[jrow]]; 1141 sum2 += v[jrow]*x[7*idx[jrow]+1]; 1142 sum3 += v[jrow]*x[7*idx[jrow]+2]; 1143 sum4 += v[jrow]*x[7*idx[jrow]+3]; 1144 sum5 += v[jrow]*x[7*idx[jrow]+4]; 1145 sum6 += v[jrow]*x[7*idx[jrow]+5]; 1146 sum7 += v[jrow]*x[7*idx[jrow]+6]; 1147 jrow++; 1148 } 1149 y[7*i] += sum1; 1150 y[7*i+1] += sum2; 1151 y[7*i+2] += sum3; 1152 y[7*i+3] += sum4; 1153 y[7*i+4] += sum5; 1154 y[7*i+5] += sum6; 1155 y[7*i+6] += sum7; 1156 } 1157 1158 ierr = PetscLogFlops(14.0*a->nz);CHKERRQ(ierr); 1159 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 1160 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 1161 PetscFunctionReturn(0); 1162 } 1163 1164 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_7(Mat A,Vec xx,Vec yy,Vec zz) 1165 { 1166 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1167 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1168 const PetscScalar *x,*v; 1169 PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7; 1170 PetscErrorCode ierr; 1171 const PetscInt m = b->AIJ->rmap->n,*idx; 1172 PetscInt n,i; 1173 1174 PetscFunctionBegin; 1175 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 1176 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 1177 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 1178 for (i=0; i<m; i++) { 1179 idx = a->j + a->i[i]; 1180 v = a->a + a->i[i]; 1181 n = a->i[i+1] - a->i[i]; 1182 alpha1 = x[7*i]; 1183 alpha2 = x[7*i+1]; 1184 alpha3 = x[7*i+2]; 1185 alpha4 = x[7*i+3]; 1186 alpha5 = x[7*i+4]; 1187 alpha6 = x[7*i+5]; 1188 alpha7 = x[7*i+6]; 1189 while (n-->0) { 1190 y[7*(*idx)] += alpha1*(*v); 1191 y[7*(*idx)+1] += alpha2*(*v); 1192 y[7*(*idx)+2] += alpha3*(*v); 1193 y[7*(*idx)+3] += alpha4*(*v); 1194 y[7*(*idx)+4] += alpha5*(*v); 1195 y[7*(*idx)+5] += alpha6*(*v); 1196 y[7*(*idx)+6] += alpha7*(*v); 1197 idx++; v++; 1198 } 1199 } 1200 ierr = PetscLogFlops(14.0*a->nz);CHKERRQ(ierr); 1201 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 1202 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 1203 PetscFunctionReturn(0); 1204 } 1205 1206 PetscErrorCode MatMult_SeqMAIJ_8(Mat A,Vec xx,Vec yy) 1207 { 1208 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1209 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1210 const PetscScalar *x,*v; 1211 PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8; 1212 PetscErrorCode ierr; 1213 const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 1214 PetscInt nonzerorow=0,n,i,jrow,j; 1215 1216 PetscFunctionBegin; 1217 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 1218 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 1219 idx = a->j; 1220 v = a->a; 1221 ii = a->i; 1222 1223 for (i=0; i<m; i++) { 1224 jrow = ii[i]; 1225 n = ii[i+1] - jrow; 1226 sum1 = 0.0; 1227 sum2 = 0.0; 1228 sum3 = 0.0; 1229 sum4 = 0.0; 1230 sum5 = 0.0; 1231 sum6 = 0.0; 1232 sum7 = 0.0; 1233 sum8 = 0.0; 1234 1235 nonzerorow += (n>0); 1236 for (j=0; j<n; j++) { 1237 sum1 += v[jrow]*x[8*idx[jrow]]; 1238 sum2 += v[jrow]*x[8*idx[jrow]+1]; 1239 sum3 += v[jrow]*x[8*idx[jrow]+2]; 1240 sum4 += v[jrow]*x[8*idx[jrow]+3]; 1241 sum5 += v[jrow]*x[8*idx[jrow]+4]; 1242 sum6 += v[jrow]*x[8*idx[jrow]+5]; 1243 sum7 += v[jrow]*x[8*idx[jrow]+6]; 1244 sum8 += v[jrow]*x[8*idx[jrow]+7]; 1245 jrow++; 1246 } 1247 y[8*i] = sum1; 1248 y[8*i+1] = sum2; 1249 y[8*i+2] = sum3; 1250 y[8*i+3] = sum4; 1251 y[8*i+4] = sum5; 1252 y[8*i+5] = sum6; 1253 y[8*i+6] = sum7; 1254 y[8*i+7] = sum8; 1255 } 1256 1257 ierr = PetscLogFlops(16.0*a->nz - 8.0*nonzerorow);CHKERRQ(ierr); 1258 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 1259 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 1260 PetscFunctionReturn(0); 1261 } 1262 1263 PetscErrorCode MatMultTranspose_SeqMAIJ_8(Mat A,Vec xx,Vec yy) 1264 { 1265 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1266 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1267 const PetscScalar *x,*v; 1268 PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8; 1269 PetscErrorCode ierr; 1270 const PetscInt m = b->AIJ->rmap->n,*idx; 1271 PetscInt n,i; 1272 1273 PetscFunctionBegin; 1274 ierr = VecSet(yy,0.0);CHKERRQ(ierr); 1275 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 1276 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 1277 1278 for (i=0; i<m; i++) { 1279 idx = a->j + a->i[i]; 1280 v = a->a + a->i[i]; 1281 n = a->i[i+1] - a->i[i]; 1282 alpha1 = x[8*i]; 1283 alpha2 = x[8*i+1]; 1284 alpha3 = x[8*i+2]; 1285 alpha4 = x[8*i+3]; 1286 alpha5 = x[8*i+4]; 1287 alpha6 = x[8*i+5]; 1288 alpha7 = x[8*i+6]; 1289 alpha8 = x[8*i+7]; 1290 while (n-->0) { 1291 y[8*(*idx)] += alpha1*(*v); 1292 y[8*(*idx)+1] += alpha2*(*v); 1293 y[8*(*idx)+2] += alpha3*(*v); 1294 y[8*(*idx)+3] += alpha4*(*v); 1295 y[8*(*idx)+4] += alpha5*(*v); 1296 y[8*(*idx)+5] += alpha6*(*v); 1297 y[8*(*idx)+6] += alpha7*(*v); 1298 y[8*(*idx)+7] += alpha8*(*v); 1299 idx++; v++; 1300 } 1301 } 1302 ierr = PetscLogFlops(16.0*a->nz);CHKERRQ(ierr); 1303 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 1304 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 1305 PetscFunctionReturn(0); 1306 } 1307 1308 PetscErrorCode MatMultAdd_SeqMAIJ_8(Mat A,Vec xx,Vec yy,Vec zz) 1309 { 1310 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1311 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1312 const PetscScalar *x,*v; 1313 PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8; 1314 PetscErrorCode ierr; 1315 const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 1316 PetscInt n,i,jrow,j; 1317 1318 PetscFunctionBegin; 1319 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 1320 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 1321 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 1322 idx = a->j; 1323 v = a->a; 1324 ii = a->i; 1325 1326 for (i=0; i<m; i++) { 1327 jrow = ii[i]; 1328 n = ii[i+1] - jrow; 1329 sum1 = 0.0; 1330 sum2 = 0.0; 1331 sum3 = 0.0; 1332 sum4 = 0.0; 1333 sum5 = 0.0; 1334 sum6 = 0.0; 1335 sum7 = 0.0; 1336 sum8 = 0.0; 1337 for (j=0; j<n; j++) { 1338 sum1 += v[jrow]*x[8*idx[jrow]]; 1339 sum2 += v[jrow]*x[8*idx[jrow]+1]; 1340 sum3 += v[jrow]*x[8*idx[jrow]+2]; 1341 sum4 += v[jrow]*x[8*idx[jrow]+3]; 1342 sum5 += v[jrow]*x[8*idx[jrow]+4]; 1343 sum6 += v[jrow]*x[8*idx[jrow]+5]; 1344 sum7 += v[jrow]*x[8*idx[jrow]+6]; 1345 sum8 += v[jrow]*x[8*idx[jrow]+7]; 1346 jrow++; 1347 } 1348 y[8*i] += sum1; 1349 y[8*i+1] += sum2; 1350 y[8*i+2] += sum3; 1351 y[8*i+3] += sum4; 1352 y[8*i+4] += sum5; 1353 y[8*i+5] += sum6; 1354 y[8*i+6] += sum7; 1355 y[8*i+7] += sum8; 1356 } 1357 1358 ierr = PetscLogFlops(16.0*a->nz);CHKERRQ(ierr); 1359 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 1360 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 1361 PetscFunctionReturn(0); 1362 } 1363 1364 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_8(Mat A,Vec xx,Vec yy,Vec zz) 1365 { 1366 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1367 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1368 const PetscScalar *x,*v; 1369 PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8; 1370 PetscErrorCode ierr; 1371 const PetscInt m = b->AIJ->rmap->n,*idx; 1372 PetscInt n,i; 1373 1374 PetscFunctionBegin; 1375 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 1376 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 1377 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 1378 for (i=0; i<m; i++) { 1379 idx = a->j + a->i[i]; 1380 v = a->a + a->i[i]; 1381 n = a->i[i+1] - a->i[i]; 1382 alpha1 = x[8*i]; 1383 alpha2 = x[8*i+1]; 1384 alpha3 = x[8*i+2]; 1385 alpha4 = x[8*i+3]; 1386 alpha5 = x[8*i+4]; 1387 alpha6 = x[8*i+5]; 1388 alpha7 = x[8*i+6]; 1389 alpha8 = x[8*i+7]; 1390 while (n-->0) { 1391 y[8*(*idx)] += alpha1*(*v); 1392 y[8*(*idx)+1] += alpha2*(*v); 1393 y[8*(*idx)+2] += alpha3*(*v); 1394 y[8*(*idx)+3] += alpha4*(*v); 1395 y[8*(*idx)+4] += alpha5*(*v); 1396 y[8*(*idx)+5] += alpha6*(*v); 1397 y[8*(*idx)+6] += alpha7*(*v); 1398 y[8*(*idx)+7] += alpha8*(*v); 1399 idx++; v++; 1400 } 1401 } 1402 ierr = PetscLogFlops(16.0*a->nz);CHKERRQ(ierr); 1403 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 1404 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 1405 PetscFunctionReturn(0); 1406 } 1407 1408 /* ------------------------------------------------------------------------------*/ 1409 PetscErrorCode MatMult_SeqMAIJ_9(Mat A,Vec xx,Vec yy) 1410 { 1411 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1412 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1413 const PetscScalar *x,*v; 1414 PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9; 1415 PetscErrorCode ierr; 1416 const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 1417 PetscInt nonzerorow=0,n,i,jrow,j; 1418 1419 PetscFunctionBegin; 1420 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 1421 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 1422 idx = a->j; 1423 v = a->a; 1424 ii = a->i; 1425 1426 for (i=0; i<m; i++) { 1427 jrow = ii[i]; 1428 n = ii[i+1] - jrow; 1429 sum1 = 0.0; 1430 sum2 = 0.0; 1431 sum3 = 0.0; 1432 sum4 = 0.0; 1433 sum5 = 0.0; 1434 sum6 = 0.0; 1435 sum7 = 0.0; 1436 sum8 = 0.0; 1437 sum9 = 0.0; 1438 1439 nonzerorow += (n>0); 1440 for (j=0; j<n; j++) { 1441 sum1 += v[jrow]*x[9*idx[jrow]]; 1442 sum2 += v[jrow]*x[9*idx[jrow]+1]; 1443 sum3 += v[jrow]*x[9*idx[jrow]+2]; 1444 sum4 += v[jrow]*x[9*idx[jrow]+3]; 1445 sum5 += v[jrow]*x[9*idx[jrow]+4]; 1446 sum6 += v[jrow]*x[9*idx[jrow]+5]; 1447 sum7 += v[jrow]*x[9*idx[jrow]+6]; 1448 sum8 += v[jrow]*x[9*idx[jrow]+7]; 1449 sum9 += v[jrow]*x[9*idx[jrow]+8]; 1450 jrow++; 1451 } 1452 y[9*i] = sum1; 1453 y[9*i+1] = sum2; 1454 y[9*i+2] = sum3; 1455 y[9*i+3] = sum4; 1456 y[9*i+4] = sum5; 1457 y[9*i+5] = sum6; 1458 y[9*i+6] = sum7; 1459 y[9*i+7] = sum8; 1460 y[9*i+8] = sum9; 1461 } 1462 1463 ierr = PetscLogFlops(18.0*a->nz - 9*nonzerorow);CHKERRQ(ierr); 1464 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 1465 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 1466 PetscFunctionReturn(0); 1467 } 1468 1469 /* ------------------------------------------------------------------------------*/ 1470 1471 PetscErrorCode MatMultTranspose_SeqMAIJ_9(Mat A,Vec xx,Vec yy) 1472 { 1473 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1474 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1475 const PetscScalar *x,*v; 1476 PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9; 1477 PetscErrorCode ierr; 1478 const PetscInt m = b->AIJ->rmap->n,*idx; 1479 PetscInt n,i; 1480 1481 PetscFunctionBegin; 1482 ierr = VecSet(yy,0.0);CHKERRQ(ierr); 1483 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 1484 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 1485 1486 for (i=0; i<m; i++) { 1487 idx = a->j + a->i[i]; 1488 v = a->a + a->i[i]; 1489 n = a->i[i+1] - a->i[i]; 1490 alpha1 = x[9*i]; 1491 alpha2 = x[9*i+1]; 1492 alpha3 = x[9*i+2]; 1493 alpha4 = x[9*i+3]; 1494 alpha5 = x[9*i+4]; 1495 alpha6 = x[9*i+5]; 1496 alpha7 = x[9*i+6]; 1497 alpha8 = x[9*i+7]; 1498 alpha9 = x[9*i+8]; 1499 while (n-->0) { 1500 y[9*(*idx)] += alpha1*(*v); 1501 y[9*(*idx)+1] += alpha2*(*v); 1502 y[9*(*idx)+2] += alpha3*(*v); 1503 y[9*(*idx)+3] += alpha4*(*v); 1504 y[9*(*idx)+4] += alpha5*(*v); 1505 y[9*(*idx)+5] += alpha6*(*v); 1506 y[9*(*idx)+6] += alpha7*(*v); 1507 y[9*(*idx)+7] += alpha8*(*v); 1508 y[9*(*idx)+8] += alpha9*(*v); 1509 idx++; v++; 1510 } 1511 } 1512 ierr = PetscLogFlops(18.0*a->nz);CHKERRQ(ierr); 1513 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 1514 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 1515 PetscFunctionReturn(0); 1516 } 1517 1518 PetscErrorCode MatMultAdd_SeqMAIJ_9(Mat A,Vec xx,Vec yy,Vec zz) 1519 { 1520 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1521 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1522 const PetscScalar *x,*v; 1523 PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9; 1524 PetscErrorCode ierr; 1525 const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 1526 PetscInt n,i,jrow,j; 1527 1528 PetscFunctionBegin; 1529 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 1530 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 1531 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 1532 idx = a->j; 1533 v = a->a; 1534 ii = a->i; 1535 1536 for (i=0; i<m; i++) { 1537 jrow = ii[i]; 1538 n = ii[i+1] - jrow; 1539 sum1 = 0.0; 1540 sum2 = 0.0; 1541 sum3 = 0.0; 1542 sum4 = 0.0; 1543 sum5 = 0.0; 1544 sum6 = 0.0; 1545 sum7 = 0.0; 1546 sum8 = 0.0; 1547 sum9 = 0.0; 1548 for (j=0; j<n; j++) { 1549 sum1 += v[jrow]*x[9*idx[jrow]]; 1550 sum2 += v[jrow]*x[9*idx[jrow]+1]; 1551 sum3 += v[jrow]*x[9*idx[jrow]+2]; 1552 sum4 += v[jrow]*x[9*idx[jrow]+3]; 1553 sum5 += v[jrow]*x[9*idx[jrow]+4]; 1554 sum6 += v[jrow]*x[9*idx[jrow]+5]; 1555 sum7 += v[jrow]*x[9*idx[jrow]+6]; 1556 sum8 += v[jrow]*x[9*idx[jrow]+7]; 1557 sum9 += v[jrow]*x[9*idx[jrow]+8]; 1558 jrow++; 1559 } 1560 y[9*i] += sum1; 1561 y[9*i+1] += sum2; 1562 y[9*i+2] += sum3; 1563 y[9*i+3] += sum4; 1564 y[9*i+4] += sum5; 1565 y[9*i+5] += sum6; 1566 y[9*i+6] += sum7; 1567 y[9*i+7] += sum8; 1568 y[9*i+8] += sum9; 1569 } 1570 1571 ierr = PetscLogFlops(18*a->nz);CHKERRQ(ierr); 1572 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 1573 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 1574 PetscFunctionReturn(0); 1575 } 1576 1577 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_9(Mat A,Vec xx,Vec yy,Vec zz) 1578 { 1579 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1580 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1581 const PetscScalar *x,*v; 1582 PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9; 1583 PetscErrorCode ierr; 1584 const PetscInt m = b->AIJ->rmap->n,*idx; 1585 PetscInt n,i; 1586 1587 PetscFunctionBegin; 1588 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 1589 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 1590 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 1591 for (i=0; i<m; i++) { 1592 idx = a->j + a->i[i]; 1593 v = a->a + a->i[i]; 1594 n = a->i[i+1] - a->i[i]; 1595 alpha1 = x[9*i]; 1596 alpha2 = x[9*i+1]; 1597 alpha3 = x[9*i+2]; 1598 alpha4 = x[9*i+3]; 1599 alpha5 = x[9*i+4]; 1600 alpha6 = x[9*i+5]; 1601 alpha7 = x[9*i+6]; 1602 alpha8 = x[9*i+7]; 1603 alpha9 = x[9*i+8]; 1604 while (n-->0) { 1605 y[9*(*idx)] += alpha1*(*v); 1606 y[9*(*idx)+1] += alpha2*(*v); 1607 y[9*(*idx)+2] += alpha3*(*v); 1608 y[9*(*idx)+3] += alpha4*(*v); 1609 y[9*(*idx)+4] += alpha5*(*v); 1610 y[9*(*idx)+5] += alpha6*(*v); 1611 y[9*(*idx)+6] += alpha7*(*v); 1612 y[9*(*idx)+7] += alpha8*(*v); 1613 y[9*(*idx)+8] += alpha9*(*v); 1614 idx++; v++; 1615 } 1616 } 1617 ierr = PetscLogFlops(18.0*a->nz);CHKERRQ(ierr); 1618 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 1619 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 1620 PetscFunctionReturn(0); 1621 } 1622 PetscErrorCode MatMult_SeqMAIJ_10(Mat A,Vec xx,Vec yy) 1623 { 1624 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1625 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1626 const PetscScalar *x,*v; 1627 PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9, sum10; 1628 PetscErrorCode ierr; 1629 const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 1630 PetscInt nonzerorow=0,n,i,jrow,j; 1631 1632 PetscFunctionBegin; 1633 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 1634 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 1635 idx = a->j; 1636 v = a->a; 1637 ii = a->i; 1638 1639 for (i=0; i<m; i++) { 1640 jrow = ii[i]; 1641 n = ii[i+1] - jrow; 1642 sum1 = 0.0; 1643 sum2 = 0.0; 1644 sum3 = 0.0; 1645 sum4 = 0.0; 1646 sum5 = 0.0; 1647 sum6 = 0.0; 1648 sum7 = 0.0; 1649 sum8 = 0.0; 1650 sum9 = 0.0; 1651 sum10 = 0.0; 1652 1653 nonzerorow += (n>0); 1654 for (j=0; j<n; j++) { 1655 sum1 += v[jrow]*x[10*idx[jrow]]; 1656 sum2 += v[jrow]*x[10*idx[jrow]+1]; 1657 sum3 += v[jrow]*x[10*idx[jrow]+2]; 1658 sum4 += v[jrow]*x[10*idx[jrow]+3]; 1659 sum5 += v[jrow]*x[10*idx[jrow]+4]; 1660 sum6 += v[jrow]*x[10*idx[jrow]+5]; 1661 sum7 += v[jrow]*x[10*idx[jrow]+6]; 1662 sum8 += v[jrow]*x[10*idx[jrow]+7]; 1663 sum9 += v[jrow]*x[10*idx[jrow]+8]; 1664 sum10 += v[jrow]*x[10*idx[jrow]+9]; 1665 jrow++; 1666 } 1667 y[10*i] = sum1; 1668 y[10*i+1] = sum2; 1669 y[10*i+2] = sum3; 1670 y[10*i+3] = sum4; 1671 y[10*i+4] = sum5; 1672 y[10*i+5] = sum6; 1673 y[10*i+6] = sum7; 1674 y[10*i+7] = sum8; 1675 y[10*i+8] = sum9; 1676 y[10*i+9] = sum10; 1677 } 1678 1679 ierr = PetscLogFlops(20.0*a->nz - 10.0*nonzerorow);CHKERRQ(ierr); 1680 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 1681 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 1682 PetscFunctionReturn(0); 1683 } 1684 1685 PetscErrorCode MatMultAdd_SeqMAIJ_10(Mat A,Vec xx,Vec yy,Vec zz) 1686 { 1687 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1688 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1689 const PetscScalar *x,*v; 1690 PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9, sum10; 1691 PetscErrorCode ierr; 1692 const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 1693 PetscInt n,i,jrow,j; 1694 1695 PetscFunctionBegin; 1696 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 1697 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 1698 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 1699 idx = a->j; 1700 v = a->a; 1701 ii = a->i; 1702 1703 for (i=0; i<m; i++) { 1704 jrow = ii[i]; 1705 n = ii[i+1] - jrow; 1706 sum1 = 0.0; 1707 sum2 = 0.0; 1708 sum3 = 0.0; 1709 sum4 = 0.0; 1710 sum5 = 0.0; 1711 sum6 = 0.0; 1712 sum7 = 0.0; 1713 sum8 = 0.0; 1714 sum9 = 0.0; 1715 sum10 = 0.0; 1716 for (j=0; j<n; j++) { 1717 sum1 += v[jrow]*x[10*idx[jrow]]; 1718 sum2 += v[jrow]*x[10*idx[jrow]+1]; 1719 sum3 += v[jrow]*x[10*idx[jrow]+2]; 1720 sum4 += v[jrow]*x[10*idx[jrow]+3]; 1721 sum5 += v[jrow]*x[10*idx[jrow]+4]; 1722 sum6 += v[jrow]*x[10*idx[jrow]+5]; 1723 sum7 += v[jrow]*x[10*idx[jrow]+6]; 1724 sum8 += v[jrow]*x[10*idx[jrow]+7]; 1725 sum9 += v[jrow]*x[10*idx[jrow]+8]; 1726 sum10 += v[jrow]*x[10*idx[jrow]+9]; 1727 jrow++; 1728 } 1729 y[10*i] += sum1; 1730 y[10*i+1] += sum2; 1731 y[10*i+2] += sum3; 1732 y[10*i+3] += sum4; 1733 y[10*i+4] += sum5; 1734 y[10*i+5] += sum6; 1735 y[10*i+6] += sum7; 1736 y[10*i+7] += sum8; 1737 y[10*i+8] += sum9; 1738 y[10*i+9] += sum10; 1739 } 1740 1741 ierr = PetscLogFlops(20.0*a->nz);CHKERRQ(ierr); 1742 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 1743 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 1744 PetscFunctionReturn(0); 1745 } 1746 1747 PetscErrorCode MatMultTranspose_SeqMAIJ_10(Mat A,Vec xx,Vec yy) 1748 { 1749 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1750 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1751 const PetscScalar *x,*v; 1752 PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9,alpha10; 1753 PetscErrorCode ierr; 1754 const PetscInt m = b->AIJ->rmap->n,*idx; 1755 PetscInt n,i; 1756 1757 PetscFunctionBegin; 1758 ierr = VecSet(yy,0.0);CHKERRQ(ierr); 1759 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 1760 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 1761 1762 for (i=0; i<m; i++) { 1763 idx = a->j + a->i[i]; 1764 v = a->a + a->i[i]; 1765 n = a->i[i+1] - a->i[i]; 1766 alpha1 = x[10*i]; 1767 alpha2 = x[10*i+1]; 1768 alpha3 = x[10*i+2]; 1769 alpha4 = x[10*i+3]; 1770 alpha5 = x[10*i+4]; 1771 alpha6 = x[10*i+5]; 1772 alpha7 = x[10*i+6]; 1773 alpha8 = x[10*i+7]; 1774 alpha9 = x[10*i+8]; 1775 alpha10 = x[10*i+9]; 1776 while (n-->0) { 1777 y[10*(*idx)] += alpha1*(*v); 1778 y[10*(*idx)+1] += alpha2*(*v); 1779 y[10*(*idx)+2] += alpha3*(*v); 1780 y[10*(*idx)+3] += alpha4*(*v); 1781 y[10*(*idx)+4] += alpha5*(*v); 1782 y[10*(*idx)+5] += alpha6*(*v); 1783 y[10*(*idx)+6] += alpha7*(*v); 1784 y[10*(*idx)+7] += alpha8*(*v); 1785 y[10*(*idx)+8] += alpha9*(*v); 1786 y[10*(*idx)+9] += alpha10*(*v); 1787 idx++; v++; 1788 } 1789 } 1790 ierr = PetscLogFlops(20.0*a->nz);CHKERRQ(ierr); 1791 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 1792 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 1793 PetscFunctionReturn(0); 1794 } 1795 1796 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_10(Mat A,Vec xx,Vec yy,Vec zz) 1797 { 1798 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1799 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1800 const PetscScalar *x,*v; 1801 PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9,alpha10; 1802 PetscErrorCode ierr; 1803 const PetscInt m = b->AIJ->rmap->n,*idx; 1804 PetscInt n,i; 1805 1806 PetscFunctionBegin; 1807 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 1808 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 1809 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 1810 for (i=0; i<m; i++) { 1811 idx = a->j + a->i[i]; 1812 v = a->a + a->i[i]; 1813 n = a->i[i+1] - a->i[i]; 1814 alpha1 = x[10*i]; 1815 alpha2 = x[10*i+1]; 1816 alpha3 = x[10*i+2]; 1817 alpha4 = x[10*i+3]; 1818 alpha5 = x[10*i+4]; 1819 alpha6 = x[10*i+5]; 1820 alpha7 = x[10*i+6]; 1821 alpha8 = x[10*i+7]; 1822 alpha9 = x[10*i+8]; 1823 alpha10 = x[10*i+9]; 1824 while (n-->0) { 1825 y[10*(*idx)] += alpha1*(*v); 1826 y[10*(*idx)+1] += alpha2*(*v); 1827 y[10*(*idx)+2] += alpha3*(*v); 1828 y[10*(*idx)+3] += alpha4*(*v); 1829 y[10*(*idx)+4] += alpha5*(*v); 1830 y[10*(*idx)+5] += alpha6*(*v); 1831 y[10*(*idx)+6] += alpha7*(*v); 1832 y[10*(*idx)+7] += alpha8*(*v); 1833 y[10*(*idx)+8] += alpha9*(*v); 1834 y[10*(*idx)+9] += alpha10*(*v); 1835 idx++; v++; 1836 } 1837 } 1838 ierr = PetscLogFlops(20.0*a->nz);CHKERRQ(ierr); 1839 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 1840 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 1841 PetscFunctionReturn(0); 1842 } 1843 1844 1845 /*--------------------------------------------------------------------------------------------*/ 1846 PetscErrorCode MatMult_SeqMAIJ_11(Mat A,Vec xx,Vec yy) 1847 { 1848 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1849 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1850 const PetscScalar *x,*v; 1851 PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9, sum10, sum11; 1852 PetscErrorCode ierr; 1853 const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 1854 PetscInt nonzerorow=0,n,i,jrow,j; 1855 1856 PetscFunctionBegin; 1857 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 1858 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 1859 idx = a->j; 1860 v = a->a; 1861 ii = a->i; 1862 1863 for (i=0; i<m; i++) { 1864 jrow = ii[i]; 1865 n = ii[i+1] - jrow; 1866 sum1 = 0.0; 1867 sum2 = 0.0; 1868 sum3 = 0.0; 1869 sum4 = 0.0; 1870 sum5 = 0.0; 1871 sum6 = 0.0; 1872 sum7 = 0.0; 1873 sum8 = 0.0; 1874 sum9 = 0.0; 1875 sum10 = 0.0; 1876 sum11 = 0.0; 1877 1878 nonzerorow += (n>0); 1879 for (j=0; j<n; j++) { 1880 sum1 += v[jrow]*x[11*idx[jrow]]; 1881 sum2 += v[jrow]*x[11*idx[jrow]+1]; 1882 sum3 += v[jrow]*x[11*idx[jrow]+2]; 1883 sum4 += v[jrow]*x[11*idx[jrow]+3]; 1884 sum5 += v[jrow]*x[11*idx[jrow]+4]; 1885 sum6 += v[jrow]*x[11*idx[jrow]+5]; 1886 sum7 += v[jrow]*x[11*idx[jrow]+6]; 1887 sum8 += v[jrow]*x[11*idx[jrow]+7]; 1888 sum9 += v[jrow]*x[11*idx[jrow]+8]; 1889 sum10 += v[jrow]*x[11*idx[jrow]+9]; 1890 sum11 += v[jrow]*x[11*idx[jrow]+10]; 1891 jrow++; 1892 } 1893 y[11*i] = sum1; 1894 y[11*i+1] = sum2; 1895 y[11*i+2] = sum3; 1896 y[11*i+3] = sum4; 1897 y[11*i+4] = sum5; 1898 y[11*i+5] = sum6; 1899 y[11*i+6] = sum7; 1900 y[11*i+7] = sum8; 1901 y[11*i+8] = sum9; 1902 y[11*i+9] = sum10; 1903 y[11*i+10] = sum11; 1904 } 1905 1906 ierr = PetscLogFlops(22*a->nz - 11*nonzerorow);CHKERRQ(ierr); 1907 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 1908 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 1909 PetscFunctionReturn(0); 1910 } 1911 1912 PetscErrorCode MatMultAdd_SeqMAIJ_11(Mat A,Vec xx,Vec yy,Vec zz) 1913 { 1914 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1915 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1916 const PetscScalar *x,*v; 1917 PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9, sum10, sum11; 1918 PetscErrorCode ierr; 1919 const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 1920 PetscInt n,i,jrow,j; 1921 1922 PetscFunctionBegin; 1923 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 1924 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 1925 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 1926 idx = a->j; 1927 v = a->a; 1928 ii = a->i; 1929 1930 for (i=0; i<m; i++) { 1931 jrow = ii[i]; 1932 n = ii[i+1] - jrow; 1933 sum1 = 0.0; 1934 sum2 = 0.0; 1935 sum3 = 0.0; 1936 sum4 = 0.0; 1937 sum5 = 0.0; 1938 sum6 = 0.0; 1939 sum7 = 0.0; 1940 sum8 = 0.0; 1941 sum9 = 0.0; 1942 sum10 = 0.0; 1943 sum11 = 0.0; 1944 for (j=0; j<n; j++) { 1945 sum1 += v[jrow]*x[11*idx[jrow]]; 1946 sum2 += v[jrow]*x[11*idx[jrow]+1]; 1947 sum3 += v[jrow]*x[11*idx[jrow]+2]; 1948 sum4 += v[jrow]*x[11*idx[jrow]+3]; 1949 sum5 += v[jrow]*x[11*idx[jrow]+4]; 1950 sum6 += v[jrow]*x[11*idx[jrow]+5]; 1951 sum7 += v[jrow]*x[11*idx[jrow]+6]; 1952 sum8 += v[jrow]*x[11*idx[jrow]+7]; 1953 sum9 += v[jrow]*x[11*idx[jrow]+8]; 1954 sum10 += v[jrow]*x[11*idx[jrow]+9]; 1955 sum11 += v[jrow]*x[11*idx[jrow]+10]; 1956 jrow++; 1957 } 1958 y[11*i] += sum1; 1959 y[11*i+1] += sum2; 1960 y[11*i+2] += sum3; 1961 y[11*i+3] += sum4; 1962 y[11*i+4] += sum5; 1963 y[11*i+5] += sum6; 1964 y[11*i+6] += sum7; 1965 y[11*i+7] += sum8; 1966 y[11*i+8] += sum9; 1967 y[11*i+9] += sum10; 1968 y[11*i+10] += sum11; 1969 } 1970 1971 ierr = PetscLogFlops(22*a->nz);CHKERRQ(ierr); 1972 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 1973 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 1974 PetscFunctionReturn(0); 1975 } 1976 1977 PetscErrorCode MatMultTranspose_SeqMAIJ_11(Mat A,Vec xx,Vec yy) 1978 { 1979 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1980 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1981 const PetscScalar *x,*v; 1982 PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9,alpha10,alpha11; 1983 PetscErrorCode ierr; 1984 const PetscInt m = b->AIJ->rmap->n,*idx; 1985 PetscInt n,i; 1986 1987 PetscFunctionBegin; 1988 ierr = VecSet(yy,0.0);CHKERRQ(ierr); 1989 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 1990 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 1991 1992 for (i=0; i<m; i++) { 1993 idx = a->j + a->i[i]; 1994 v = a->a + a->i[i]; 1995 n = a->i[i+1] - a->i[i]; 1996 alpha1 = x[11*i]; 1997 alpha2 = x[11*i+1]; 1998 alpha3 = x[11*i+2]; 1999 alpha4 = x[11*i+3]; 2000 alpha5 = x[11*i+4]; 2001 alpha6 = x[11*i+5]; 2002 alpha7 = x[11*i+6]; 2003 alpha8 = x[11*i+7]; 2004 alpha9 = x[11*i+8]; 2005 alpha10 = x[11*i+9]; 2006 alpha11 = x[11*i+10]; 2007 while (n-->0) { 2008 y[11*(*idx)] += alpha1*(*v); 2009 y[11*(*idx)+1] += alpha2*(*v); 2010 y[11*(*idx)+2] += alpha3*(*v); 2011 y[11*(*idx)+3] += alpha4*(*v); 2012 y[11*(*idx)+4] += alpha5*(*v); 2013 y[11*(*idx)+5] += alpha6*(*v); 2014 y[11*(*idx)+6] += alpha7*(*v); 2015 y[11*(*idx)+7] += alpha8*(*v); 2016 y[11*(*idx)+8] += alpha9*(*v); 2017 y[11*(*idx)+9] += alpha10*(*v); 2018 y[11*(*idx)+10] += alpha11*(*v); 2019 idx++; v++; 2020 } 2021 } 2022 ierr = PetscLogFlops(22*a->nz);CHKERRQ(ierr); 2023 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 2024 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 2025 PetscFunctionReturn(0); 2026 } 2027 2028 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_11(Mat A,Vec xx,Vec yy,Vec zz) 2029 { 2030 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 2031 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 2032 const PetscScalar *x,*v; 2033 PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9,alpha10,alpha11; 2034 PetscErrorCode ierr; 2035 const PetscInt m = b->AIJ->rmap->n,*idx; 2036 PetscInt n,i; 2037 2038 PetscFunctionBegin; 2039 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 2040 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 2041 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 2042 for (i=0; i<m; i++) { 2043 idx = a->j + a->i[i]; 2044 v = a->a + a->i[i]; 2045 n = a->i[i+1] - a->i[i]; 2046 alpha1 = x[11*i]; 2047 alpha2 = x[11*i+1]; 2048 alpha3 = x[11*i+2]; 2049 alpha4 = x[11*i+3]; 2050 alpha5 = x[11*i+4]; 2051 alpha6 = x[11*i+5]; 2052 alpha7 = x[11*i+6]; 2053 alpha8 = x[11*i+7]; 2054 alpha9 = x[11*i+8]; 2055 alpha10 = x[11*i+9]; 2056 alpha11 = x[11*i+10]; 2057 while (n-->0) { 2058 y[11*(*idx)] += alpha1*(*v); 2059 y[11*(*idx)+1] += alpha2*(*v); 2060 y[11*(*idx)+2] += alpha3*(*v); 2061 y[11*(*idx)+3] += alpha4*(*v); 2062 y[11*(*idx)+4] += alpha5*(*v); 2063 y[11*(*idx)+5] += alpha6*(*v); 2064 y[11*(*idx)+6] += alpha7*(*v); 2065 y[11*(*idx)+7] += alpha8*(*v); 2066 y[11*(*idx)+8] += alpha9*(*v); 2067 y[11*(*idx)+9] += alpha10*(*v); 2068 y[11*(*idx)+10] += alpha11*(*v); 2069 idx++; v++; 2070 } 2071 } 2072 ierr = PetscLogFlops(22*a->nz);CHKERRQ(ierr); 2073 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 2074 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 2075 PetscFunctionReturn(0); 2076 } 2077 2078 2079 /*--------------------------------------------------------------------------------------------*/ 2080 PetscErrorCode MatMult_SeqMAIJ_16(Mat A,Vec xx,Vec yy) 2081 { 2082 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 2083 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 2084 const PetscScalar *x,*v; 2085 PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8; 2086 PetscScalar sum9, sum10, sum11, sum12, sum13, sum14, sum15, sum16; 2087 PetscErrorCode ierr; 2088 const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 2089 PetscInt nonzerorow=0,n,i,jrow,j; 2090 2091 PetscFunctionBegin; 2092 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 2093 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 2094 idx = a->j; 2095 v = a->a; 2096 ii = a->i; 2097 2098 for (i=0; i<m; i++) { 2099 jrow = ii[i]; 2100 n = ii[i+1] - jrow; 2101 sum1 = 0.0; 2102 sum2 = 0.0; 2103 sum3 = 0.0; 2104 sum4 = 0.0; 2105 sum5 = 0.0; 2106 sum6 = 0.0; 2107 sum7 = 0.0; 2108 sum8 = 0.0; 2109 sum9 = 0.0; 2110 sum10 = 0.0; 2111 sum11 = 0.0; 2112 sum12 = 0.0; 2113 sum13 = 0.0; 2114 sum14 = 0.0; 2115 sum15 = 0.0; 2116 sum16 = 0.0; 2117 2118 nonzerorow += (n>0); 2119 for (j=0; j<n; j++) { 2120 sum1 += v[jrow]*x[16*idx[jrow]]; 2121 sum2 += v[jrow]*x[16*idx[jrow]+1]; 2122 sum3 += v[jrow]*x[16*idx[jrow]+2]; 2123 sum4 += v[jrow]*x[16*idx[jrow]+3]; 2124 sum5 += v[jrow]*x[16*idx[jrow]+4]; 2125 sum6 += v[jrow]*x[16*idx[jrow]+5]; 2126 sum7 += v[jrow]*x[16*idx[jrow]+6]; 2127 sum8 += v[jrow]*x[16*idx[jrow]+7]; 2128 sum9 += v[jrow]*x[16*idx[jrow]+8]; 2129 sum10 += v[jrow]*x[16*idx[jrow]+9]; 2130 sum11 += v[jrow]*x[16*idx[jrow]+10]; 2131 sum12 += v[jrow]*x[16*idx[jrow]+11]; 2132 sum13 += v[jrow]*x[16*idx[jrow]+12]; 2133 sum14 += v[jrow]*x[16*idx[jrow]+13]; 2134 sum15 += v[jrow]*x[16*idx[jrow]+14]; 2135 sum16 += v[jrow]*x[16*idx[jrow]+15]; 2136 jrow++; 2137 } 2138 y[16*i] = sum1; 2139 y[16*i+1] = sum2; 2140 y[16*i+2] = sum3; 2141 y[16*i+3] = sum4; 2142 y[16*i+4] = sum5; 2143 y[16*i+5] = sum6; 2144 y[16*i+6] = sum7; 2145 y[16*i+7] = sum8; 2146 y[16*i+8] = sum9; 2147 y[16*i+9] = sum10; 2148 y[16*i+10] = sum11; 2149 y[16*i+11] = sum12; 2150 y[16*i+12] = sum13; 2151 y[16*i+13] = sum14; 2152 y[16*i+14] = sum15; 2153 y[16*i+15] = sum16; 2154 } 2155 2156 ierr = PetscLogFlops(32.0*a->nz - 16.0*nonzerorow);CHKERRQ(ierr); 2157 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 2158 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 2159 PetscFunctionReturn(0); 2160 } 2161 2162 PetscErrorCode MatMultTranspose_SeqMAIJ_16(Mat A,Vec xx,Vec yy) 2163 { 2164 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 2165 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 2166 const PetscScalar *x,*v; 2167 PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8; 2168 PetscScalar alpha9,alpha10,alpha11,alpha12,alpha13,alpha14,alpha15,alpha16; 2169 PetscErrorCode ierr; 2170 const PetscInt m = b->AIJ->rmap->n,*idx; 2171 PetscInt n,i; 2172 2173 PetscFunctionBegin; 2174 ierr = VecSet(yy,0.0);CHKERRQ(ierr); 2175 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 2176 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 2177 2178 for (i=0; i<m; i++) { 2179 idx = a->j + a->i[i]; 2180 v = a->a + a->i[i]; 2181 n = a->i[i+1] - a->i[i]; 2182 alpha1 = x[16*i]; 2183 alpha2 = x[16*i+1]; 2184 alpha3 = x[16*i+2]; 2185 alpha4 = x[16*i+3]; 2186 alpha5 = x[16*i+4]; 2187 alpha6 = x[16*i+5]; 2188 alpha7 = x[16*i+6]; 2189 alpha8 = x[16*i+7]; 2190 alpha9 = x[16*i+8]; 2191 alpha10 = x[16*i+9]; 2192 alpha11 = x[16*i+10]; 2193 alpha12 = x[16*i+11]; 2194 alpha13 = x[16*i+12]; 2195 alpha14 = x[16*i+13]; 2196 alpha15 = x[16*i+14]; 2197 alpha16 = x[16*i+15]; 2198 while (n-->0) { 2199 y[16*(*idx)] += alpha1*(*v); 2200 y[16*(*idx)+1] += alpha2*(*v); 2201 y[16*(*idx)+2] += alpha3*(*v); 2202 y[16*(*idx)+3] += alpha4*(*v); 2203 y[16*(*idx)+4] += alpha5*(*v); 2204 y[16*(*idx)+5] += alpha6*(*v); 2205 y[16*(*idx)+6] += alpha7*(*v); 2206 y[16*(*idx)+7] += alpha8*(*v); 2207 y[16*(*idx)+8] += alpha9*(*v); 2208 y[16*(*idx)+9] += alpha10*(*v); 2209 y[16*(*idx)+10] += alpha11*(*v); 2210 y[16*(*idx)+11] += alpha12*(*v); 2211 y[16*(*idx)+12] += alpha13*(*v); 2212 y[16*(*idx)+13] += alpha14*(*v); 2213 y[16*(*idx)+14] += alpha15*(*v); 2214 y[16*(*idx)+15] += alpha16*(*v); 2215 idx++; v++; 2216 } 2217 } 2218 ierr = PetscLogFlops(32.0*a->nz);CHKERRQ(ierr); 2219 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 2220 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 2221 PetscFunctionReturn(0); 2222 } 2223 2224 PetscErrorCode MatMultAdd_SeqMAIJ_16(Mat A,Vec xx,Vec yy,Vec zz) 2225 { 2226 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 2227 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 2228 const PetscScalar *x,*v; 2229 PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8; 2230 PetscScalar sum9, sum10, sum11, sum12, sum13, sum14, sum15, sum16; 2231 PetscErrorCode ierr; 2232 const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 2233 PetscInt n,i,jrow,j; 2234 2235 PetscFunctionBegin; 2236 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 2237 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 2238 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 2239 idx = a->j; 2240 v = a->a; 2241 ii = a->i; 2242 2243 for (i=0; i<m; i++) { 2244 jrow = ii[i]; 2245 n = ii[i+1] - jrow; 2246 sum1 = 0.0; 2247 sum2 = 0.0; 2248 sum3 = 0.0; 2249 sum4 = 0.0; 2250 sum5 = 0.0; 2251 sum6 = 0.0; 2252 sum7 = 0.0; 2253 sum8 = 0.0; 2254 sum9 = 0.0; 2255 sum10 = 0.0; 2256 sum11 = 0.0; 2257 sum12 = 0.0; 2258 sum13 = 0.0; 2259 sum14 = 0.0; 2260 sum15 = 0.0; 2261 sum16 = 0.0; 2262 for (j=0; j<n; j++) { 2263 sum1 += v[jrow]*x[16*idx[jrow]]; 2264 sum2 += v[jrow]*x[16*idx[jrow]+1]; 2265 sum3 += v[jrow]*x[16*idx[jrow]+2]; 2266 sum4 += v[jrow]*x[16*idx[jrow]+3]; 2267 sum5 += v[jrow]*x[16*idx[jrow]+4]; 2268 sum6 += v[jrow]*x[16*idx[jrow]+5]; 2269 sum7 += v[jrow]*x[16*idx[jrow]+6]; 2270 sum8 += v[jrow]*x[16*idx[jrow]+7]; 2271 sum9 += v[jrow]*x[16*idx[jrow]+8]; 2272 sum10 += v[jrow]*x[16*idx[jrow]+9]; 2273 sum11 += v[jrow]*x[16*idx[jrow]+10]; 2274 sum12 += v[jrow]*x[16*idx[jrow]+11]; 2275 sum13 += v[jrow]*x[16*idx[jrow]+12]; 2276 sum14 += v[jrow]*x[16*idx[jrow]+13]; 2277 sum15 += v[jrow]*x[16*idx[jrow]+14]; 2278 sum16 += v[jrow]*x[16*idx[jrow]+15]; 2279 jrow++; 2280 } 2281 y[16*i] += sum1; 2282 y[16*i+1] += sum2; 2283 y[16*i+2] += sum3; 2284 y[16*i+3] += sum4; 2285 y[16*i+4] += sum5; 2286 y[16*i+5] += sum6; 2287 y[16*i+6] += sum7; 2288 y[16*i+7] += sum8; 2289 y[16*i+8] += sum9; 2290 y[16*i+9] += sum10; 2291 y[16*i+10] += sum11; 2292 y[16*i+11] += sum12; 2293 y[16*i+12] += sum13; 2294 y[16*i+13] += sum14; 2295 y[16*i+14] += sum15; 2296 y[16*i+15] += sum16; 2297 } 2298 2299 ierr = PetscLogFlops(32.0*a->nz);CHKERRQ(ierr); 2300 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 2301 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 2302 PetscFunctionReturn(0); 2303 } 2304 2305 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_16(Mat A,Vec xx,Vec yy,Vec zz) 2306 { 2307 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 2308 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 2309 const PetscScalar *x,*v; 2310 PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8; 2311 PetscScalar alpha9,alpha10,alpha11,alpha12,alpha13,alpha14,alpha15,alpha16; 2312 PetscErrorCode ierr; 2313 const PetscInt m = b->AIJ->rmap->n,*idx; 2314 PetscInt n,i; 2315 2316 PetscFunctionBegin; 2317 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 2318 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 2319 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 2320 for (i=0; i<m; i++) { 2321 idx = a->j + a->i[i]; 2322 v = a->a + a->i[i]; 2323 n = a->i[i+1] - a->i[i]; 2324 alpha1 = x[16*i]; 2325 alpha2 = x[16*i+1]; 2326 alpha3 = x[16*i+2]; 2327 alpha4 = x[16*i+3]; 2328 alpha5 = x[16*i+4]; 2329 alpha6 = x[16*i+5]; 2330 alpha7 = x[16*i+6]; 2331 alpha8 = x[16*i+7]; 2332 alpha9 = x[16*i+8]; 2333 alpha10 = x[16*i+9]; 2334 alpha11 = x[16*i+10]; 2335 alpha12 = x[16*i+11]; 2336 alpha13 = x[16*i+12]; 2337 alpha14 = x[16*i+13]; 2338 alpha15 = x[16*i+14]; 2339 alpha16 = x[16*i+15]; 2340 while (n-->0) { 2341 y[16*(*idx)] += alpha1*(*v); 2342 y[16*(*idx)+1] += alpha2*(*v); 2343 y[16*(*idx)+2] += alpha3*(*v); 2344 y[16*(*idx)+3] += alpha4*(*v); 2345 y[16*(*idx)+4] += alpha5*(*v); 2346 y[16*(*idx)+5] += alpha6*(*v); 2347 y[16*(*idx)+6] += alpha7*(*v); 2348 y[16*(*idx)+7] += alpha8*(*v); 2349 y[16*(*idx)+8] += alpha9*(*v); 2350 y[16*(*idx)+9] += alpha10*(*v); 2351 y[16*(*idx)+10] += alpha11*(*v); 2352 y[16*(*idx)+11] += alpha12*(*v); 2353 y[16*(*idx)+12] += alpha13*(*v); 2354 y[16*(*idx)+13] += alpha14*(*v); 2355 y[16*(*idx)+14] += alpha15*(*v); 2356 y[16*(*idx)+15] += alpha16*(*v); 2357 idx++; v++; 2358 } 2359 } 2360 ierr = PetscLogFlops(32.0*a->nz);CHKERRQ(ierr); 2361 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 2362 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 2363 PetscFunctionReturn(0); 2364 } 2365 2366 /*--------------------------------------------------------------------------------------------*/ 2367 PetscErrorCode MatMult_SeqMAIJ_18(Mat A,Vec xx,Vec yy) 2368 { 2369 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 2370 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 2371 const PetscScalar *x,*v; 2372 PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8; 2373 PetscScalar sum9, sum10, sum11, sum12, sum13, sum14, sum15, sum16, sum17, sum18; 2374 PetscErrorCode ierr; 2375 const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 2376 PetscInt nonzerorow=0,n,i,jrow,j; 2377 2378 PetscFunctionBegin; 2379 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 2380 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 2381 idx = a->j; 2382 v = a->a; 2383 ii = a->i; 2384 2385 for (i=0; i<m; i++) { 2386 jrow = ii[i]; 2387 n = ii[i+1] - jrow; 2388 sum1 = 0.0; 2389 sum2 = 0.0; 2390 sum3 = 0.0; 2391 sum4 = 0.0; 2392 sum5 = 0.0; 2393 sum6 = 0.0; 2394 sum7 = 0.0; 2395 sum8 = 0.0; 2396 sum9 = 0.0; 2397 sum10 = 0.0; 2398 sum11 = 0.0; 2399 sum12 = 0.0; 2400 sum13 = 0.0; 2401 sum14 = 0.0; 2402 sum15 = 0.0; 2403 sum16 = 0.0; 2404 sum17 = 0.0; 2405 sum18 = 0.0; 2406 2407 nonzerorow += (n>0); 2408 for (j=0; j<n; j++) { 2409 sum1 += v[jrow]*x[18*idx[jrow]]; 2410 sum2 += v[jrow]*x[18*idx[jrow]+1]; 2411 sum3 += v[jrow]*x[18*idx[jrow]+2]; 2412 sum4 += v[jrow]*x[18*idx[jrow]+3]; 2413 sum5 += v[jrow]*x[18*idx[jrow]+4]; 2414 sum6 += v[jrow]*x[18*idx[jrow]+5]; 2415 sum7 += v[jrow]*x[18*idx[jrow]+6]; 2416 sum8 += v[jrow]*x[18*idx[jrow]+7]; 2417 sum9 += v[jrow]*x[18*idx[jrow]+8]; 2418 sum10 += v[jrow]*x[18*idx[jrow]+9]; 2419 sum11 += v[jrow]*x[18*idx[jrow]+10]; 2420 sum12 += v[jrow]*x[18*idx[jrow]+11]; 2421 sum13 += v[jrow]*x[18*idx[jrow]+12]; 2422 sum14 += v[jrow]*x[18*idx[jrow]+13]; 2423 sum15 += v[jrow]*x[18*idx[jrow]+14]; 2424 sum16 += v[jrow]*x[18*idx[jrow]+15]; 2425 sum17 += v[jrow]*x[18*idx[jrow]+16]; 2426 sum18 += v[jrow]*x[18*idx[jrow]+17]; 2427 jrow++; 2428 } 2429 y[18*i] = sum1; 2430 y[18*i+1] = sum2; 2431 y[18*i+2] = sum3; 2432 y[18*i+3] = sum4; 2433 y[18*i+4] = sum5; 2434 y[18*i+5] = sum6; 2435 y[18*i+6] = sum7; 2436 y[18*i+7] = sum8; 2437 y[18*i+8] = sum9; 2438 y[18*i+9] = sum10; 2439 y[18*i+10] = sum11; 2440 y[18*i+11] = sum12; 2441 y[18*i+12] = sum13; 2442 y[18*i+13] = sum14; 2443 y[18*i+14] = sum15; 2444 y[18*i+15] = sum16; 2445 y[18*i+16] = sum17; 2446 y[18*i+17] = sum18; 2447 } 2448 2449 ierr = PetscLogFlops(36.0*a->nz - 18.0*nonzerorow);CHKERRQ(ierr); 2450 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 2451 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 2452 PetscFunctionReturn(0); 2453 } 2454 2455 PetscErrorCode MatMultTranspose_SeqMAIJ_18(Mat A,Vec xx,Vec yy) 2456 { 2457 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 2458 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 2459 const PetscScalar *x,*v; 2460 PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8; 2461 PetscScalar alpha9,alpha10,alpha11,alpha12,alpha13,alpha14,alpha15,alpha16,alpha17,alpha18; 2462 PetscErrorCode ierr; 2463 const PetscInt m = b->AIJ->rmap->n,*idx; 2464 PetscInt n,i; 2465 2466 PetscFunctionBegin; 2467 ierr = VecSet(yy,0.0);CHKERRQ(ierr); 2468 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 2469 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 2470 2471 for (i=0; i<m; i++) { 2472 idx = a->j + a->i[i]; 2473 v = a->a + a->i[i]; 2474 n = a->i[i+1] - a->i[i]; 2475 alpha1 = x[18*i]; 2476 alpha2 = x[18*i+1]; 2477 alpha3 = x[18*i+2]; 2478 alpha4 = x[18*i+3]; 2479 alpha5 = x[18*i+4]; 2480 alpha6 = x[18*i+5]; 2481 alpha7 = x[18*i+6]; 2482 alpha8 = x[18*i+7]; 2483 alpha9 = x[18*i+8]; 2484 alpha10 = x[18*i+9]; 2485 alpha11 = x[18*i+10]; 2486 alpha12 = x[18*i+11]; 2487 alpha13 = x[18*i+12]; 2488 alpha14 = x[18*i+13]; 2489 alpha15 = x[18*i+14]; 2490 alpha16 = x[18*i+15]; 2491 alpha17 = x[18*i+16]; 2492 alpha18 = x[18*i+17]; 2493 while (n-->0) { 2494 y[18*(*idx)] += alpha1*(*v); 2495 y[18*(*idx)+1] += alpha2*(*v); 2496 y[18*(*idx)+2] += alpha3*(*v); 2497 y[18*(*idx)+3] += alpha4*(*v); 2498 y[18*(*idx)+4] += alpha5*(*v); 2499 y[18*(*idx)+5] += alpha6*(*v); 2500 y[18*(*idx)+6] += alpha7*(*v); 2501 y[18*(*idx)+7] += alpha8*(*v); 2502 y[18*(*idx)+8] += alpha9*(*v); 2503 y[18*(*idx)+9] += alpha10*(*v); 2504 y[18*(*idx)+10] += alpha11*(*v); 2505 y[18*(*idx)+11] += alpha12*(*v); 2506 y[18*(*idx)+12] += alpha13*(*v); 2507 y[18*(*idx)+13] += alpha14*(*v); 2508 y[18*(*idx)+14] += alpha15*(*v); 2509 y[18*(*idx)+15] += alpha16*(*v); 2510 y[18*(*idx)+16] += alpha17*(*v); 2511 y[18*(*idx)+17] += alpha18*(*v); 2512 idx++; v++; 2513 } 2514 } 2515 ierr = PetscLogFlops(36.0*a->nz);CHKERRQ(ierr); 2516 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 2517 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 2518 PetscFunctionReturn(0); 2519 } 2520 2521 PetscErrorCode MatMultAdd_SeqMAIJ_18(Mat A,Vec xx,Vec yy,Vec zz) 2522 { 2523 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 2524 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 2525 const PetscScalar *x,*v; 2526 PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8; 2527 PetscScalar sum9, sum10, sum11, sum12, sum13, sum14, sum15, sum16, sum17, sum18; 2528 PetscErrorCode ierr; 2529 const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 2530 PetscInt n,i,jrow,j; 2531 2532 PetscFunctionBegin; 2533 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 2534 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 2535 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 2536 idx = a->j; 2537 v = a->a; 2538 ii = a->i; 2539 2540 for (i=0; i<m; i++) { 2541 jrow = ii[i]; 2542 n = ii[i+1] - jrow; 2543 sum1 = 0.0; 2544 sum2 = 0.0; 2545 sum3 = 0.0; 2546 sum4 = 0.0; 2547 sum5 = 0.0; 2548 sum6 = 0.0; 2549 sum7 = 0.0; 2550 sum8 = 0.0; 2551 sum9 = 0.0; 2552 sum10 = 0.0; 2553 sum11 = 0.0; 2554 sum12 = 0.0; 2555 sum13 = 0.0; 2556 sum14 = 0.0; 2557 sum15 = 0.0; 2558 sum16 = 0.0; 2559 sum17 = 0.0; 2560 sum18 = 0.0; 2561 for (j=0; j<n; j++) { 2562 sum1 += v[jrow]*x[18*idx[jrow]]; 2563 sum2 += v[jrow]*x[18*idx[jrow]+1]; 2564 sum3 += v[jrow]*x[18*idx[jrow]+2]; 2565 sum4 += v[jrow]*x[18*idx[jrow]+3]; 2566 sum5 += v[jrow]*x[18*idx[jrow]+4]; 2567 sum6 += v[jrow]*x[18*idx[jrow]+5]; 2568 sum7 += v[jrow]*x[18*idx[jrow]+6]; 2569 sum8 += v[jrow]*x[18*idx[jrow]+7]; 2570 sum9 += v[jrow]*x[18*idx[jrow]+8]; 2571 sum10 += v[jrow]*x[18*idx[jrow]+9]; 2572 sum11 += v[jrow]*x[18*idx[jrow]+10]; 2573 sum12 += v[jrow]*x[18*idx[jrow]+11]; 2574 sum13 += v[jrow]*x[18*idx[jrow]+12]; 2575 sum14 += v[jrow]*x[18*idx[jrow]+13]; 2576 sum15 += v[jrow]*x[18*idx[jrow]+14]; 2577 sum16 += v[jrow]*x[18*idx[jrow]+15]; 2578 sum17 += v[jrow]*x[18*idx[jrow]+16]; 2579 sum18 += v[jrow]*x[18*idx[jrow]+17]; 2580 jrow++; 2581 } 2582 y[18*i] += sum1; 2583 y[18*i+1] += sum2; 2584 y[18*i+2] += sum3; 2585 y[18*i+3] += sum4; 2586 y[18*i+4] += sum5; 2587 y[18*i+5] += sum6; 2588 y[18*i+6] += sum7; 2589 y[18*i+7] += sum8; 2590 y[18*i+8] += sum9; 2591 y[18*i+9] += sum10; 2592 y[18*i+10] += sum11; 2593 y[18*i+11] += sum12; 2594 y[18*i+12] += sum13; 2595 y[18*i+13] += sum14; 2596 y[18*i+14] += sum15; 2597 y[18*i+15] += sum16; 2598 y[18*i+16] += sum17; 2599 y[18*i+17] += sum18; 2600 } 2601 2602 ierr = PetscLogFlops(36.0*a->nz);CHKERRQ(ierr); 2603 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 2604 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 2605 PetscFunctionReturn(0); 2606 } 2607 2608 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_18(Mat A,Vec xx,Vec yy,Vec zz) 2609 { 2610 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 2611 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 2612 const PetscScalar *x,*v; 2613 PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8; 2614 PetscScalar alpha9,alpha10,alpha11,alpha12,alpha13,alpha14,alpha15,alpha16,alpha17,alpha18; 2615 PetscErrorCode ierr; 2616 const PetscInt m = b->AIJ->rmap->n,*idx; 2617 PetscInt n,i; 2618 2619 PetscFunctionBegin; 2620 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 2621 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 2622 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 2623 for (i=0; i<m; i++) { 2624 idx = a->j + a->i[i]; 2625 v = a->a + a->i[i]; 2626 n = a->i[i+1] - a->i[i]; 2627 alpha1 = x[18*i]; 2628 alpha2 = x[18*i+1]; 2629 alpha3 = x[18*i+2]; 2630 alpha4 = x[18*i+3]; 2631 alpha5 = x[18*i+4]; 2632 alpha6 = x[18*i+5]; 2633 alpha7 = x[18*i+6]; 2634 alpha8 = x[18*i+7]; 2635 alpha9 = x[18*i+8]; 2636 alpha10 = x[18*i+9]; 2637 alpha11 = x[18*i+10]; 2638 alpha12 = x[18*i+11]; 2639 alpha13 = x[18*i+12]; 2640 alpha14 = x[18*i+13]; 2641 alpha15 = x[18*i+14]; 2642 alpha16 = x[18*i+15]; 2643 alpha17 = x[18*i+16]; 2644 alpha18 = x[18*i+17]; 2645 while (n-->0) { 2646 y[18*(*idx)] += alpha1*(*v); 2647 y[18*(*idx)+1] += alpha2*(*v); 2648 y[18*(*idx)+2] += alpha3*(*v); 2649 y[18*(*idx)+3] += alpha4*(*v); 2650 y[18*(*idx)+4] += alpha5*(*v); 2651 y[18*(*idx)+5] += alpha6*(*v); 2652 y[18*(*idx)+6] += alpha7*(*v); 2653 y[18*(*idx)+7] += alpha8*(*v); 2654 y[18*(*idx)+8] += alpha9*(*v); 2655 y[18*(*idx)+9] += alpha10*(*v); 2656 y[18*(*idx)+10] += alpha11*(*v); 2657 y[18*(*idx)+11] += alpha12*(*v); 2658 y[18*(*idx)+12] += alpha13*(*v); 2659 y[18*(*idx)+13] += alpha14*(*v); 2660 y[18*(*idx)+14] += alpha15*(*v); 2661 y[18*(*idx)+15] += alpha16*(*v); 2662 y[18*(*idx)+16] += alpha17*(*v); 2663 y[18*(*idx)+17] += alpha18*(*v); 2664 idx++; v++; 2665 } 2666 } 2667 ierr = PetscLogFlops(36.0*a->nz);CHKERRQ(ierr); 2668 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 2669 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 2670 PetscFunctionReturn(0); 2671 } 2672 2673 PetscErrorCode MatMult_SeqMAIJ_N(Mat A,Vec xx,Vec yy) 2674 { 2675 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 2676 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 2677 const PetscScalar *x,*v; 2678 PetscScalar *y,*sums; 2679 PetscErrorCode ierr; 2680 const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 2681 PetscInt n,i,jrow,j,dof = b->dof,k; 2682 2683 PetscFunctionBegin; 2684 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 2685 ierr = VecSet(yy,0.0);CHKERRQ(ierr); 2686 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 2687 idx = a->j; 2688 v = a->a; 2689 ii = a->i; 2690 2691 for (i=0; i<m; i++) { 2692 jrow = ii[i]; 2693 n = ii[i+1] - jrow; 2694 sums = y + dof*i; 2695 for (j=0; j<n; j++) { 2696 for (k=0; k<dof; k++) { 2697 sums[k] += v[jrow]*x[dof*idx[jrow]+k]; 2698 } 2699 jrow++; 2700 } 2701 } 2702 2703 ierr = PetscLogFlops(2.0*dof*a->nz);CHKERRQ(ierr); 2704 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 2705 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 2706 PetscFunctionReturn(0); 2707 } 2708 2709 PetscErrorCode MatMultAdd_SeqMAIJ_N(Mat A,Vec xx,Vec yy,Vec zz) 2710 { 2711 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 2712 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 2713 const PetscScalar *x,*v; 2714 PetscScalar *y,*sums; 2715 PetscErrorCode ierr; 2716 const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 2717 PetscInt n,i,jrow,j,dof = b->dof,k; 2718 2719 PetscFunctionBegin; 2720 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 2721 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 2722 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 2723 idx = a->j; 2724 v = a->a; 2725 ii = a->i; 2726 2727 for (i=0; i<m; i++) { 2728 jrow = ii[i]; 2729 n = ii[i+1] - jrow; 2730 sums = y + dof*i; 2731 for (j=0; j<n; j++) { 2732 for (k=0; k<dof; k++) { 2733 sums[k] += v[jrow]*x[dof*idx[jrow]+k]; 2734 } 2735 jrow++; 2736 } 2737 } 2738 2739 ierr = PetscLogFlops(2.0*dof*a->nz);CHKERRQ(ierr); 2740 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 2741 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 2742 PetscFunctionReturn(0); 2743 } 2744 2745 PetscErrorCode MatMultTranspose_SeqMAIJ_N(Mat A,Vec xx,Vec yy) 2746 { 2747 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 2748 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 2749 const PetscScalar *x,*v,*alpha; 2750 PetscScalar *y; 2751 PetscErrorCode ierr; 2752 const PetscInt m = b->AIJ->rmap->n,*idx,dof = b->dof; 2753 PetscInt n,i,k; 2754 2755 PetscFunctionBegin; 2756 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 2757 ierr = VecSet(yy,0.0);CHKERRQ(ierr); 2758 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 2759 for (i=0; i<m; i++) { 2760 idx = a->j + a->i[i]; 2761 v = a->a + a->i[i]; 2762 n = a->i[i+1] - a->i[i]; 2763 alpha = x + dof*i; 2764 while (n-->0) { 2765 for (k=0; k<dof; k++) { 2766 y[dof*(*idx)+k] += alpha[k]*(*v); 2767 } 2768 idx++; v++; 2769 } 2770 } 2771 ierr = PetscLogFlops(2.0*dof*a->nz);CHKERRQ(ierr); 2772 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 2773 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 2774 PetscFunctionReturn(0); 2775 } 2776 2777 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_N(Mat A,Vec xx,Vec yy,Vec zz) 2778 { 2779 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 2780 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 2781 const PetscScalar *x,*v,*alpha; 2782 PetscScalar *y; 2783 PetscErrorCode ierr; 2784 const PetscInt m = b->AIJ->rmap->n,*idx,dof = b->dof; 2785 PetscInt n,i,k; 2786 2787 PetscFunctionBegin; 2788 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 2789 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 2790 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 2791 for (i=0; i<m; i++) { 2792 idx = a->j + a->i[i]; 2793 v = a->a + a->i[i]; 2794 n = a->i[i+1] - a->i[i]; 2795 alpha = x + dof*i; 2796 while (n-->0) { 2797 for (k=0; k<dof; k++) { 2798 y[dof*(*idx)+k] += alpha[k]*(*v); 2799 } 2800 idx++; v++; 2801 } 2802 } 2803 ierr = PetscLogFlops(2.0*dof*a->nz);CHKERRQ(ierr); 2804 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 2805 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 2806 PetscFunctionReturn(0); 2807 } 2808 2809 /*===================================================================================*/ 2810 PetscErrorCode MatMult_MPIMAIJ_dof(Mat A,Vec xx,Vec yy) 2811 { 2812 Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data; 2813 PetscErrorCode ierr; 2814 2815 PetscFunctionBegin; 2816 /* start the scatter */ 2817 ierr = VecScatterBegin(b->ctx,xx,b->w,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2818 ierr = (*b->AIJ->ops->mult)(b->AIJ,xx,yy);CHKERRQ(ierr); 2819 ierr = VecScatterEnd(b->ctx,xx,b->w,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2820 ierr = (*b->OAIJ->ops->multadd)(b->OAIJ,b->w,yy,yy);CHKERRQ(ierr); 2821 PetscFunctionReturn(0); 2822 } 2823 2824 PetscErrorCode MatMultTranspose_MPIMAIJ_dof(Mat A,Vec xx,Vec yy) 2825 { 2826 Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data; 2827 PetscErrorCode ierr; 2828 2829 PetscFunctionBegin; 2830 ierr = (*b->OAIJ->ops->multtranspose)(b->OAIJ,xx,b->w);CHKERRQ(ierr); 2831 ierr = (*b->AIJ->ops->multtranspose)(b->AIJ,xx,yy);CHKERRQ(ierr); 2832 ierr = VecScatterBegin(b->ctx,b->w,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2833 ierr = VecScatterEnd(b->ctx,b->w,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2834 PetscFunctionReturn(0); 2835 } 2836 2837 PetscErrorCode MatMultAdd_MPIMAIJ_dof(Mat A,Vec xx,Vec yy,Vec zz) 2838 { 2839 Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data; 2840 PetscErrorCode ierr; 2841 2842 PetscFunctionBegin; 2843 /* start the scatter */ 2844 ierr = VecScatterBegin(b->ctx,xx,b->w,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2845 ierr = (*b->AIJ->ops->multadd)(b->AIJ,xx,yy,zz);CHKERRQ(ierr); 2846 ierr = VecScatterEnd(b->ctx,xx,b->w,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2847 ierr = (*b->OAIJ->ops->multadd)(b->OAIJ,b->w,zz,zz);CHKERRQ(ierr); 2848 PetscFunctionReturn(0); 2849 } 2850 2851 PetscErrorCode MatMultTransposeAdd_MPIMAIJ_dof(Mat A,Vec xx,Vec yy,Vec zz) 2852 { 2853 Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data; 2854 PetscErrorCode ierr; 2855 2856 PetscFunctionBegin; 2857 ierr = (*b->OAIJ->ops->multtranspose)(b->OAIJ,xx,b->w);CHKERRQ(ierr); 2858 ierr = VecScatterBegin(b->ctx,b->w,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2859 ierr = (*b->AIJ->ops->multtransposeadd)(b->AIJ,xx,yy,zz);CHKERRQ(ierr); 2860 ierr = VecScatterEnd(b->ctx,b->w,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2861 PetscFunctionReturn(0); 2862 } 2863 2864 /* ----------------------------------------------------------------*/ 2865 PetscErrorCode MatPtAPSymbolic_SeqAIJ_SeqMAIJ(Mat A,Mat PP,PetscReal fill,Mat *C) 2866 { 2867 PetscErrorCode ierr; 2868 PetscFreeSpaceList free_space=NULL,current_space=NULL; 2869 Mat_SeqMAIJ *pp =(Mat_SeqMAIJ*)PP->data; 2870 Mat P =pp->AIJ; 2871 Mat_SeqAIJ *a =(Mat_SeqAIJ*)A->data,*p=(Mat_SeqAIJ*)P->data,*c; 2872 PetscInt *pti,*ptj,*ptJ; 2873 PetscInt *ci,*cj,*ptadenserow,*ptasparserow,*denserow,*sparserow,*ptaj; 2874 const PetscInt an=A->cmap->N,am=A->rmap->N,pn=P->cmap->N,pm=P->rmap->N,ppdof=pp->dof; 2875 PetscInt i,j,k,dof,pshift,ptnzi,arow,anzj,ptanzi,prow,pnzj,cnzi,cn; 2876 MatScalar *ca; 2877 const PetscInt *pi = p->i,*pj = p->j,*pjj,*ai=a->i,*aj=a->j,*ajj; 2878 2879 PetscFunctionBegin; 2880 /* Get ij structure of P^T */ 2881 ierr = MatGetSymbolicTranspose_SeqAIJ(P,&pti,&ptj);CHKERRQ(ierr); 2882 2883 cn = pn*ppdof; 2884 /* Allocate ci array, arrays for fill computation and */ 2885 /* free space for accumulating nonzero column info */ 2886 ierr = PetscMalloc1(cn+1,&ci);CHKERRQ(ierr); 2887 ci[0] = 0; 2888 2889 /* Work arrays for rows of P^T*A */ 2890 ierr = PetscMalloc4(an,&ptadenserow,an,&ptasparserow,cn,&denserow,cn,&sparserow);CHKERRQ(ierr); 2891 ierr = PetscMemzero(ptadenserow,an*sizeof(PetscInt));CHKERRQ(ierr); 2892 ierr = PetscMemzero(denserow,cn*sizeof(PetscInt));CHKERRQ(ierr); 2893 2894 /* Set initial free space to be nnz(A) scaled by aspect ratio of P. */ 2895 /* This should be reasonable if sparsity of PtAP is similar to that of A. */ 2896 /* Note, aspect ratio of P is the same as the aspect ratio of SeqAIJ inside P */ 2897 ierr = PetscFreeSpaceGet(PetscIntMultTruncate(ai[am]/pm,pn),&free_space);CHKERRQ(ierr); 2898 current_space = free_space; 2899 2900 /* Determine symbolic info for each row of C: */ 2901 for (i=0; i<pn; i++) { 2902 ptnzi = pti[i+1] - pti[i]; 2903 ptJ = ptj + pti[i]; 2904 for (dof=0; dof<ppdof; dof++) { 2905 ptanzi = 0; 2906 /* Determine symbolic row of PtA: */ 2907 for (j=0; j<ptnzi; j++) { 2908 /* Expand ptJ[j] by block size and shift by dof to get the right row of A */ 2909 arow = ptJ[j]*ppdof + dof; 2910 /* Nonzeros of P^T*A will be in same locations as any element of A in that row */ 2911 anzj = ai[arow+1] - ai[arow]; 2912 ajj = aj + ai[arow]; 2913 for (k=0; k<anzj; k++) { 2914 if (!ptadenserow[ajj[k]]) { 2915 ptadenserow[ajj[k]] = -1; 2916 ptasparserow[ptanzi++] = ajj[k]; 2917 } 2918 } 2919 } 2920 /* Using symbolic info for row of PtA, determine symbolic info for row of C: */ 2921 ptaj = ptasparserow; 2922 cnzi = 0; 2923 for (j=0; j<ptanzi; j++) { 2924 /* Get offset within block of P */ 2925 pshift = *ptaj%ppdof; 2926 /* Get block row of P */ 2927 prow = (*ptaj++)/ppdof; /* integer division */ 2928 /* P has same number of nonzeros per row as the compressed form */ 2929 pnzj = pi[prow+1] - pi[prow]; 2930 pjj = pj + pi[prow]; 2931 for (k=0;k<pnzj;k++) { 2932 /* Locations in C are shifted by the offset within the block */ 2933 /* Note: we cannot use PetscLLAdd here because of the additional offset for the write location */ 2934 if (!denserow[pjj[k]*ppdof+pshift]) { 2935 denserow[pjj[k]*ppdof+pshift] = -1; 2936 sparserow[cnzi++] = pjj[k]*ppdof+pshift; 2937 } 2938 } 2939 } 2940 2941 /* sort sparserow */ 2942 ierr = PetscSortInt(cnzi,sparserow);CHKERRQ(ierr); 2943 2944 /* If free space is not available, make more free space */ 2945 /* Double the amount of total space in the list */ 2946 if (current_space->local_remaining<cnzi) { 2947 ierr = PetscFreeSpaceGet(PetscIntSumTruncate(cnzi,current_space->total_array_size),¤t_space);CHKERRQ(ierr); 2948 } 2949 2950 /* Copy data into free space, and zero out denserows */ 2951 ierr = PetscMemcpy(current_space->array,sparserow,cnzi*sizeof(PetscInt));CHKERRQ(ierr); 2952 2953 current_space->array += cnzi; 2954 current_space->local_used += cnzi; 2955 current_space->local_remaining -= cnzi; 2956 2957 for (j=0; j<ptanzi; j++) ptadenserow[ptasparserow[j]] = 0; 2958 for (j=0; j<cnzi; j++) denserow[sparserow[j]] = 0; 2959 2960 /* Aside: Perhaps we should save the pta info for the numerical factorization. */ 2961 /* For now, we will recompute what is needed. */ 2962 ci[i*ppdof+1+dof] = ci[i*ppdof+dof] + cnzi; 2963 } 2964 } 2965 /* nnz is now stored in ci[ptm], column indices are in the list of free space */ 2966 /* Allocate space for cj, initialize cj, and */ 2967 /* destroy list of free space and other temporary array(s) */ 2968 ierr = PetscMalloc1(ci[cn]+1,&cj);CHKERRQ(ierr); 2969 ierr = PetscFreeSpaceContiguous(&free_space,cj);CHKERRQ(ierr); 2970 ierr = PetscFree4(ptadenserow,ptasparserow,denserow,sparserow);CHKERRQ(ierr); 2971 2972 /* Allocate space for ca */ 2973 ierr = PetscCalloc1(ci[cn]+1,&ca);CHKERRQ(ierr); 2974 2975 /* put together the new matrix */ 2976 ierr = MatCreateSeqAIJWithArrays(PetscObjectComm((PetscObject)A),cn,cn,ci,cj,ca,C);CHKERRQ(ierr); 2977 ierr = MatSetBlockSize(*C,pp->dof);CHKERRQ(ierr); 2978 2979 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 2980 /* Since these are PETSc arrays, change flags to free them as necessary. */ 2981 c = (Mat_SeqAIJ*)((*C)->data); 2982 c->free_a = PETSC_TRUE; 2983 c->free_ij = PETSC_TRUE; 2984 c->nonew = 0; 2985 2986 (*C)->ops->ptapnumeric = MatPtAPNumeric_SeqAIJ_SeqMAIJ; 2987 2988 /* Clean up. */ 2989 ierr = MatRestoreSymbolicTranspose_SeqAIJ(P,&pti,&ptj);CHKERRQ(ierr); 2990 PetscFunctionReturn(0); 2991 } 2992 2993 PetscErrorCode MatPtAPNumeric_SeqAIJ_SeqMAIJ(Mat A,Mat PP,Mat C) 2994 { 2995 /* This routine requires testing -- first draft only */ 2996 PetscErrorCode ierr; 2997 Mat_SeqMAIJ *pp=(Mat_SeqMAIJ*)PP->data; 2998 Mat P =pp->AIJ; 2999 Mat_SeqAIJ *a = (Mat_SeqAIJ*) A->data; 3000 Mat_SeqAIJ *p = (Mat_SeqAIJ*) P->data; 3001 Mat_SeqAIJ *c = (Mat_SeqAIJ*) C->data; 3002 const PetscInt *ai=a->i,*aj=a->j,*pi=p->i,*pj=p->j,*pJ,*pjj; 3003 const PetscInt *ci=c->i,*cj=c->j,*cjj; 3004 const PetscInt am =A->rmap->N,cn=C->cmap->N,cm=C->rmap->N,ppdof=pp->dof; 3005 PetscInt i,j,k,pshift,poffset,anzi,pnzi,apnzj,nextap,pnzj,prow,crow,*apj,*apjdense; 3006 const MatScalar *aa=a->a,*pa=p->a,*pA,*paj; 3007 MatScalar *ca=c->a,*caj,*apa; 3008 3009 PetscFunctionBegin; 3010 /* Allocate temporary array for storage of one row of A*P */ 3011 ierr = PetscCalloc3(cn,&apa,cn,&apj,cn,&apjdense);CHKERRQ(ierr); 3012 3013 /* Clear old values in C */ 3014 ierr = PetscMemzero(ca,ci[cm]*sizeof(MatScalar));CHKERRQ(ierr); 3015 3016 for (i=0; i<am; i++) { 3017 /* Form sparse row of A*P */ 3018 anzi = ai[i+1] - ai[i]; 3019 apnzj = 0; 3020 for (j=0; j<anzi; j++) { 3021 /* Get offset within block of P */ 3022 pshift = *aj%ppdof; 3023 /* Get block row of P */ 3024 prow = *aj++/ppdof; /* integer division */ 3025 pnzj = pi[prow+1] - pi[prow]; 3026 pjj = pj + pi[prow]; 3027 paj = pa + pi[prow]; 3028 for (k=0; k<pnzj; k++) { 3029 poffset = pjj[k]*ppdof+pshift; 3030 if (!apjdense[poffset]) { 3031 apjdense[poffset] = -1; 3032 apj[apnzj++] = poffset; 3033 } 3034 apa[poffset] += (*aa)*paj[k]; 3035 } 3036 ierr = PetscLogFlops(2.0*pnzj);CHKERRQ(ierr); 3037 aa++; 3038 } 3039 3040 /* Sort the j index array for quick sparse axpy. */ 3041 /* Note: a array does not need sorting as it is in dense storage locations. */ 3042 ierr = PetscSortInt(apnzj,apj);CHKERRQ(ierr); 3043 3044 /* Compute P^T*A*P using outer product (P^T)[:,j]*(A*P)[j,:]. */ 3045 prow = i/ppdof; /* integer division */ 3046 pshift = i%ppdof; 3047 poffset = pi[prow]; 3048 pnzi = pi[prow+1] - poffset; 3049 /* Reset pJ and pA so we can traverse the same row of P 'dof' times. */ 3050 pJ = pj+poffset; 3051 pA = pa+poffset; 3052 for (j=0; j<pnzi; j++) { 3053 crow = (*pJ)*ppdof+pshift; 3054 cjj = cj + ci[crow]; 3055 caj = ca + ci[crow]; 3056 pJ++; 3057 /* Perform sparse axpy operation. Note cjj includes apj. */ 3058 for (k=0,nextap=0; nextap<apnzj; k++) { 3059 if (cjj[k] == apj[nextap]) caj[k] += (*pA)*apa[apj[nextap++]]; 3060 } 3061 ierr = PetscLogFlops(2.0*apnzj);CHKERRQ(ierr); 3062 pA++; 3063 } 3064 3065 /* Zero the current row info for A*P */ 3066 for (j=0; j<apnzj; j++) { 3067 apa[apj[j]] = 0.; 3068 apjdense[apj[j]] = 0; 3069 } 3070 } 3071 3072 /* Assemble the final matrix and clean up */ 3073 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3074 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3075 ierr = PetscFree3(apa,apj,apjdense);CHKERRQ(ierr); 3076 PetscFunctionReturn(0); 3077 } 3078 3079 PETSC_INTERN PetscErrorCode MatPtAP_SeqAIJ_SeqMAIJ(Mat A,Mat P,MatReuse scall,PetscReal fill,Mat *C) 3080 { 3081 PetscErrorCode ierr; 3082 3083 PetscFunctionBegin; 3084 if (scall == MAT_INITIAL_MATRIX) { 3085 ierr = PetscLogEventBegin(MAT_PtAPSymbolic,A,P,0,0);CHKERRQ(ierr); 3086 ierr = MatPtAPSymbolic_SeqAIJ_SeqMAIJ(A,P,fill,C);CHKERRQ(ierr); 3087 ierr = PetscLogEventEnd(MAT_PtAPSymbolic,A,P,0,0);CHKERRQ(ierr); 3088 } 3089 ierr = PetscLogEventBegin(MAT_PtAPNumeric,A,P,0,0);CHKERRQ(ierr); 3090 ierr = MatPtAPNumeric_SeqAIJ_SeqMAIJ(A,P,*C);CHKERRQ(ierr); 3091 ierr = PetscLogEventEnd(MAT_PtAPNumeric,A,P,0,0);CHKERRQ(ierr); 3092 PetscFunctionReturn(0); 3093 } 3094 3095 PetscErrorCode MatPtAPSymbolic_MPIAIJ_MPIMAIJ(Mat A,Mat PP,PetscReal fill,Mat *C) 3096 { 3097 PetscErrorCode ierr; 3098 3099 PetscFunctionBegin; 3100 /* MatPtAPSymbolic_MPIAIJ_MPIMAIJ() is not implemented yet. Convert PP to mpiaij format */ 3101 ierr = MatConvert(PP,MATMPIAIJ,MAT_INPLACE_MATRIX,&PP);CHKERRQ(ierr); 3102 ierr =(*PP->ops->ptapsymbolic)(A,PP,fill,C);CHKERRQ(ierr); 3103 PetscFunctionReturn(0); 3104 } 3105 3106 PetscErrorCode MatPtAPNumeric_MPIAIJ_MPIMAIJ(Mat A,Mat PP,Mat C) 3107 { 3108 PetscFunctionBegin; 3109 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatPtAPNumeric is not implemented for MPIMAIJ matrix yet"); 3110 PetscFunctionReturn(0); 3111 } 3112 3113 PETSC_INTERN PetscErrorCode MatPtAP_MPIAIJ_MPIMAIJ(Mat A,Mat P,MatReuse scall,PetscReal fill,Mat *C) 3114 { 3115 PetscErrorCode ierr; 3116 3117 PetscFunctionBegin; 3118 if (scall == MAT_INITIAL_MATRIX) { 3119 ierr = PetscLogEventBegin(MAT_PtAPSymbolic,A,P,0,0);CHKERRQ(ierr); 3120 ierr = MatPtAPSymbolic_MPIAIJ_MPIMAIJ(A,P,fill,C);CHKERRQ(ierr); 3121 ierr = PetscLogEventEnd(MAT_PtAPSymbolic,A,P,0,0);CHKERRQ(ierr); 3122 } 3123 ierr = PetscLogEventBegin(MAT_PtAPNumeric,A,P,0,0);CHKERRQ(ierr); 3124 ierr = ((*C)->ops->ptapnumeric)(A,P,*C);CHKERRQ(ierr); 3125 ierr = PetscLogEventEnd(MAT_PtAPNumeric,A,P,0,0);CHKERRQ(ierr); 3126 PetscFunctionReturn(0); 3127 } 3128 3129 PETSC_INTERN PetscErrorCode MatConvert_SeqMAIJ_SeqAIJ(Mat A, MatType newtype,MatReuse reuse,Mat *newmat) 3130 { 3131 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 3132 Mat a = b->AIJ,B; 3133 Mat_SeqAIJ *aij = (Mat_SeqAIJ*)a->data; 3134 PetscErrorCode ierr; 3135 PetscInt m,n,i,ncols,*ilen,nmax = 0,*icols,j,k,ii,dof = b->dof; 3136 PetscInt *cols; 3137 PetscScalar *vals; 3138 3139 PetscFunctionBegin; 3140 ierr = MatGetSize(a,&m,&n);CHKERRQ(ierr); 3141 ierr = PetscMalloc1(dof*m,&ilen);CHKERRQ(ierr); 3142 for (i=0; i<m; i++) { 3143 nmax = PetscMax(nmax,aij->ilen[i]); 3144 for (j=0; j<dof; j++) ilen[dof*i+j] = aij->ilen[i]; 3145 } 3146 ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,dof*m,dof*n,0,ilen,&B);CHKERRQ(ierr); 3147 ierr = PetscFree(ilen);CHKERRQ(ierr); 3148 ierr = PetscMalloc1(nmax,&icols);CHKERRQ(ierr); 3149 ii = 0; 3150 for (i=0; i<m; i++) { 3151 ierr = MatGetRow_SeqAIJ(a,i,&ncols,&cols,&vals);CHKERRQ(ierr); 3152 for (j=0; j<dof; j++) { 3153 for (k=0; k<ncols; k++) icols[k] = dof*cols[k]+j; 3154 ierr = MatSetValues_SeqAIJ(B,1,&ii,ncols,icols,vals,INSERT_VALUES);CHKERRQ(ierr); 3155 ii++; 3156 } 3157 ierr = MatRestoreRow_SeqAIJ(a,i,&ncols,&cols,&vals);CHKERRQ(ierr); 3158 } 3159 ierr = PetscFree(icols);CHKERRQ(ierr); 3160 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3161 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3162 3163 if (reuse == MAT_INPLACE_MATRIX) { 3164 ierr = MatHeaderReplace(A,&B);CHKERRQ(ierr); 3165 } else { 3166 *newmat = B; 3167 } 3168 PetscFunctionReturn(0); 3169 } 3170 3171 #include <../src/mat/impls/aij/mpi/mpiaij.h> 3172 3173 PETSC_INTERN PetscErrorCode MatConvert_MPIMAIJ_MPIAIJ(Mat A, MatType newtype,MatReuse reuse,Mat *newmat) 3174 { 3175 Mat_MPIMAIJ *maij = (Mat_MPIMAIJ*)A->data; 3176 Mat MatAIJ = ((Mat_SeqMAIJ*)maij->AIJ->data)->AIJ,B; 3177 Mat MatOAIJ = ((Mat_SeqMAIJ*)maij->OAIJ->data)->AIJ; 3178 Mat_SeqAIJ *AIJ = (Mat_SeqAIJ*) MatAIJ->data; 3179 Mat_SeqAIJ *OAIJ =(Mat_SeqAIJ*) MatOAIJ->data; 3180 Mat_MPIAIJ *mpiaij = (Mat_MPIAIJ*) maij->A->data; 3181 PetscInt dof = maij->dof,i,j,*dnz = NULL,*onz = NULL,nmax = 0,onmax = 0; 3182 PetscInt *oicols = NULL,*icols = NULL,ncols,*cols = NULL,oncols,*ocols = NULL; 3183 PetscInt rstart,cstart,*garray,ii,k; 3184 PetscErrorCode ierr; 3185 PetscScalar *vals,*ovals; 3186 3187 PetscFunctionBegin; 3188 ierr = PetscMalloc2(A->rmap->n,&dnz,A->rmap->n,&onz);CHKERRQ(ierr); 3189 for (i=0; i<A->rmap->n/dof; i++) { 3190 nmax = PetscMax(nmax,AIJ->ilen[i]); 3191 onmax = PetscMax(onmax,OAIJ->ilen[i]); 3192 for (j=0; j<dof; j++) { 3193 dnz[dof*i+j] = AIJ->ilen[i]; 3194 onz[dof*i+j] = OAIJ->ilen[i]; 3195 } 3196 } 3197 ierr = MatCreateAIJ(PetscObjectComm((PetscObject)A),A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N,0,dnz,0,onz,&B);CHKERRQ(ierr); 3198 ierr = MatSetBlockSize(B,dof);CHKERRQ(ierr); 3199 ierr = PetscFree2(dnz,onz);CHKERRQ(ierr); 3200 3201 ierr = PetscMalloc2(nmax,&icols,onmax,&oicols);CHKERRQ(ierr); 3202 rstart = dof*maij->A->rmap->rstart; 3203 cstart = dof*maij->A->cmap->rstart; 3204 garray = mpiaij->garray; 3205 3206 ii = rstart; 3207 for (i=0; i<A->rmap->n/dof; i++) { 3208 ierr = MatGetRow_SeqAIJ(MatAIJ,i,&ncols,&cols,&vals);CHKERRQ(ierr); 3209 ierr = MatGetRow_SeqAIJ(MatOAIJ,i,&oncols,&ocols,&ovals);CHKERRQ(ierr); 3210 for (j=0; j<dof; j++) { 3211 for (k=0; k<ncols; k++) { 3212 icols[k] = cstart + dof*cols[k]+j; 3213 } 3214 for (k=0; k<oncols; k++) { 3215 oicols[k] = dof*garray[ocols[k]]+j; 3216 } 3217 ierr = MatSetValues_MPIAIJ(B,1,&ii,ncols,icols,vals,INSERT_VALUES);CHKERRQ(ierr); 3218 ierr = MatSetValues_MPIAIJ(B,1,&ii,oncols,oicols,ovals,INSERT_VALUES);CHKERRQ(ierr); 3219 ii++; 3220 } 3221 ierr = MatRestoreRow_SeqAIJ(MatAIJ,i,&ncols,&cols,&vals);CHKERRQ(ierr); 3222 ierr = MatRestoreRow_SeqAIJ(MatOAIJ,i,&oncols,&ocols,&ovals);CHKERRQ(ierr); 3223 } 3224 ierr = PetscFree2(icols,oicols);CHKERRQ(ierr); 3225 3226 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3227 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3228 3229 if (reuse == MAT_INPLACE_MATRIX) { 3230 PetscInt refct = ((PetscObject)A)->refct; /* save ((PetscObject)A)->refct */ 3231 ((PetscObject)A)->refct = 1; 3232 3233 ierr = MatHeaderReplace(A,&B);CHKERRQ(ierr); 3234 3235 ((PetscObject)A)->refct = refct; /* restore ((PetscObject)A)->refct */ 3236 } else { 3237 *newmat = B; 3238 } 3239 PetscFunctionReturn(0); 3240 } 3241 3242 PetscErrorCode MatCreateSubMatrix_MAIJ(Mat mat,IS isrow,IS iscol,MatReuse cll,Mat *newmat) 3243 { 3244 PetscErrorCode ierr; 3245 Mat A; 3246 3247 PetscFunctionBegin; 3248 ierr = MatConvert(mat,MATAIJ,MAT_INITIAL_MATRIX,&A);CHKERRQ(ierr); 3249 ierr = MatCreateSubMatrix(A,isrow,iscol,cll,newmat);CHKERRQ(ierr); 3250 ierr = MatDestroy(&A);CHKERRQ(ierr); 3251 PetscFunctionReturn(0); 3252 } 3253 3254 /* ---------------------------------------------------------------------------------- */ 3255 /*@ 3256 MatCreateMAIJ - Creates a matrix type providing restriction and interpolation 3257 operations for multicomponent problems. It interpolates each component the same 3258 way independently. The matrix type is based on MATSEQAIJ for sequential matrices, 3259 and MATMPIAIJ for distributed matrices. 3260 3261 Collective 3262 3263 Input Parameters: 3264 + A - the AIJ matrix describing the action on blocks 3265 - dof - the block size (number of components per node) 3266 3267 Output Parameter: 3268 . maij - the new MAIJ matrix 3269 3270 Operations provided: 3271 + MatMult 3272 . MatMultTranspose 3273 . MatMultAdd 3274 . MatMultTransposeAdd 3275 - MatView 3276 3277 Level: advanced 3278 3279 .seealso: MatMAIJGetAIJ(), MatMAIJRedimension(), MATMAIJ 3280 @*/ 3281 PetscErrorCode MatCreateMAIJ(Mat A,PetscInt dof,Mat *maij) 3282 { 3283 PetscErrorCode ierr; 3284 PetscMPIInt size; 3285 PetscInt n; 3286 Mat B; 3287 3288 PetscFunctionBegin; 3289 ierr = PetscObjectReference((PetscObject)A);CHKERRQ(ierr); 3290 3291 if (dof == 1) *maij = A; 3292 else { 3293 ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr); 3294 ierr = MatSetSizes(B,dof*A->rmap->n,dof*A->cmap->n,dof*A->rmap->N,dof*A->cmap->N);CHKERRQ(ierr); 3295 ierr = PetscLayoutSetBlockSize(B->rmap,dof);CHKERRQ(ierr); 3296 ierr = PetscLayoutSetBlockSize(B->cmap,dof);CHKERRQ(ierr); 3297 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 3298 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 3299 3300 B->assembled = PETSC_TRUE; 3301 3302 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRQ(ierr); 3303 if (size == 1) { 3304 Mat_SeqMAIJ *b; 3305 3306 ierr = MatSetType(B,MATSEQMAIJ);CHKERRQ(ierr); 3307 3308 B->ops->setup = NULL; 3309 B->ops->destroy = MatDestroy_SeqMAIJ; 3310 B->ops->view = MatView_SeqMAIJ; 3311 b = (Mat_SeqMAIJ*)B->data; 3312 b->dof = dof; 3313 b->AIJ = A; 3314 3315 if (dof == 2) { 3316 B->ops->mult = MatMult_SeqMAIJ_2; 3317 B->ops->multadd = MatMultAdd_SeqMAIJ_2; 3318 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_2; 3319 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_2; 3320 } else if (dof == 3) { 3321 B->ops->mult = MatMult_SeqMAIJ_3; 3322 B->ops->multadd = MatMultAdd_SeqMAIJ_3; 3323 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_3; 3324 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_3; 3325 } else if (dof == 4) { 3326 B->ops->mult = MatMult_SeqMAIJ_4; 3327 B->ops->multadd = MatMultAdd_SeqMAIJ_4; 3328 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_4; 3329 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_4; 3330 } else if (dof == 5) { 3331 B->ops->mult = MatMult_SeqMAIJ_5; 3332 B->ops->multadd = MatMultAdd_SeqMAIJ_5; 3333 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_5; 3334 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_5; 3335 } else if (dof == 6) { 3336 B->ops->mult = MatMult_SeqMAIJ_6; 3337 B->ops->multadd = MatMultAdd_SeqMAIJ_6; 3338 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_6; 3339 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_6; 3340 } else if (dof == 7) { 3341 B->ops->mult = MatMult_SeqMAIJ_7; 3342 B->ops->multadd = MatMultAdd_SeqMAIJ_7; 3343 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_7; 3344 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_7; 3345 } else if (dof == 8) { 3346 B->ops->mult = MatMult_SeqMAIJ_8; 3347 B->ops->multadd = MatMultAdd_SeqMAIJ_8; 3348 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_8; 3349 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_8; 3350 } else if (dof == 9) { 3351 B->ops->mult = MatMult_SeqMAIJ_9; 3352 B->ops->multadd = MatMultAdd_SeqMAIJ_9; 3353 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_9; 3354 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_9; 3355 } else if (dof == 10) { 3356 B->ops->mult = MatMult_SeqMAIJ_10; 3357 B->ops->multadd = MatMultAdd_SeqMAIJ_10; 3358 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_10; 3359 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_10; 3360 } else if (dof == 11) { 3361 B->ops->mult = MatMult_SeqMAIJ_11; 3362 B->ops->multadd = MatMultAdd_SeqMAIJ_11; 3363 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_11; 3364 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_11; 3365 } else if (dof == 16) { 3366 B->ops->mult = MatMult_SeqMAIJ_16; 3367 B->ops->multadd = MatMultAdd_SeqMAIJ_16; 3368 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_16; 3369 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_16; 3370 } else if (dof == 18) { 3371 B->ops->mult = MatMult_SeqMAIJ_18; 3372 B->ops->multadd = MatMultAdd_SeqMAIJ_18; 3373 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_18; 3374 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_18; 3375 } else { 3376 B->ops->mult = MatMult_SeqMAIJ_N; 3377 B->ops->multadd = MatMultAdd_SeqMAIJ_N; 3378 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_N; 3379 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_N; 3380 } 3381 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqmaij_seqaij_C",MatConvert_SeqMAIJ_SeqAIJ);CHKERRQ(ierr); 3382 ierr = PetscObjectComposeFunction((PetscObject)B,"MatPtAP_seqaij_seqmaij_C",MatPtAP_SeqAIJ_SeqMAIJ);CHKERRQ(ierr); 3383 ierr = PetscObjectComposeFunction((PetscObject)B,"MatPtAP_seqaijperm_seqmaij_C",MatPtAP_SeqAIJ_SeqMAIJ);CHKERRQ(ierr); 3384 ierr = PetscObjectComposeFunction((PetscObject)B,"MatPtAP_seqaijmkl_seqmaij_C",MatPtAP_SeqAIJ_SeqMAIJ);CHKERRQ(ierr); 3385 } else { 3386 Mat_MPIAIJ *mpiaij = (Mat_MPIAIJ*)A->data; 3387 Mat_MPIMAIJ *b; 3388 IS from,to; 3389 Vec gvec; 3390 3391 ierr = MatSetType(B,MATMPIMAIJ);CHKERRQ(ierr); 3392 3393 B->ops->setup = NULL; 3394 B->ops->destroy = MatDestroy_MPIMAIJ; 3395 B->ops->view = MatView_MPIMAIJ; 3396 3397 b = (Mat_MPIMAIJ*)B->data; 3398 b->dof = dof; 3399 b->A = A; 3400 3401 ierr = MatCreateMAIJ(mpiaij->A,dof,&b->AIJ);CHKERRQ(ierr); 3402 ierr = MatCreateMAIJ(mpiaij->B,dof,&b->OAIJ);CHKERRQ(ierr); 3403 3404 ierr = VecGetSize(mpiaij->lvec,&n);CHKERRQ(ierr); 3405 ierr = VecCreate(PETSC_COMM_SELF,&b->w);CHKERRQ(ierr); 3406 ierr = VecSetSizes(b->w,n*dof,n*dof);CHKERRQ(ierr); 3407 ierr = VecSetBlockSize(b->w,dof);CHKERRQ(ierr); 3408 ierr = VecSetType(b->w,VECSEQ);CHKERRQ(ierr); 3409 3410 /* create two temporary Index sets for build scatter gather */ 3411 ierr = ISCreateBlock(PetscObjectComm((PetscObject)A),dof,n,mpiaij->garray,PETSC_COPY_VALUES,&from);CHKERRQ(ierr); 3412 ierr = ISCreateStride(PETSC_COMM_SELF,n*dof,0,1,&to);CHKERRQ(ierr); 3413 3414 /* create temporary global vector to generate scatter context */ 3415 ierr = VecCreateMPIWithArray(PetscObjectComm((PetscObject)A),dof,dof*A->cmap->n,dof*A->cmap->N,NULL,&gvec);CHKERRQ(ierr); 3416 3417 /* generate the scatter context */ 3418 ierr = VecScatterCreate(gvec,from,b->w,to,&b->ctx);CHKERRQ(ierr); 3419 3420 ierr = ISDestroy(&from);CHKERRQ(ierr); 3421 ierr = ISDestroy(&to);CHKERRQ(ierr); 3422 ierr = VecDestroy(&gvec);CHKERRQ(ierr); 3423 3424 B->ops->mult = MatMult_MPIMAIJ_dof; 3425 B->ops->multtranspose = MatMultTranspose_MPIMAIJ_dof; 3426 B->ops->multadd = MatMultAdd_MPIMAIJ_dof; 3427 B->ops->multtransposeadd = MatMultTransposeAdd_MPIMAIJ_dof; 3428 3429 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpimaij_mpiaij_C",MatConvert_MPIMAIJ_MPIAIJ);CHKERRQ(ierr); 3430 ierr = PetscObjectComposeFunction((PetscObject)B,"MatPtAP_mpiaij_mpimaij_C",MatPtAP_MPIAIJ_MPIMAIJ);CHKERRQ(ierr); 3431 } 3432 B->ops->createsubmatrix = MatCreateSubMatrix_MAIJ; 3433 ierr = MatSetUp(B);CHKERRQ(ierr); 3434 *maij = B; 3435 ierr = MatViewFromOptions(B,NULL,"-mat_view");CHKERRQ(ierr); 3436 } 3437 PetscFunctionReturn(0); 3438 } 3439