1 2 /* 3 Defines the basic matrix operations for the MAIJ matrix storage format. 4 This format is used for restriction and interpolation operations for 5 multicomponent problems. It interpolates each component the same way 6 independently. 7 8 We provide: 9 MatMult() 10 MatMultTranspose() 11 MatMultTransposeAdd() 12 MatMultAdd() 13 and 14 MatCreateMAIJ(Mat,dof,Mat*) 15 16 This single directory handles both the sequential and parallel codes 17 */ 18 19 #include <../src/mat/impls/maij/maij.h> /*I "petscmat.h" I*/ 20 #include <../src/mat/utils/freespace.h> 21 #include <petsc-private/vecimpl.h> 22 23 #undef __FUNCT__ 24 #define __FUNCT__ "MatMAIJGetAIJ" 25 /*@C 26 MatMAIJGetAIJ - Get the AIJ matrix describing the blockwise action of the MAIJ matrix 27 28 Not Collective, but if the MAIJ matrix is parallel, the AIJ matrix is also parallel 29 30 Input Parameter: 31 . A - the MAIJ matrix 32 33 Output Parameter: 34 . B - the AIJ matrix 35 36 Level: advanced 37 38 Notes: The reference count on the AIJ matrix is not increased so you should not destroy it. 39 40 .seealso: MatCreateMAIJ() 41 @*/ 42 PetscErrorCode MatMAIJGetAIJ(Mat A,Mat *B) 43 { 44 PetscErrorCode ierr; 45 PetscBool ismpimaij,isseqmaij; 46 47 PetscFunctionBegin; 48 ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIMAIJ,&ismpimaij);CHKERRQ(ierr); 49 ierr = PetscObjectTypeCompare((PetscObject)A,MATSEQMAIJ,&isseqmaij);CHKERRQ(ierr); 50 if (ismpimaij) { 51 Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data; 52 53 *B = b->A; 54 } else if (isseqmaij) { 55 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 56 57 *B = b->AIJ; 58 } else { 59 *B = A; 60 } 61 PetscFunctionReturn(0); 62 } 63 64 #undef __FUNCT__ 65 #define __FUNCT__ "MatMAIJRedimension" 66 /*@C 67 MatMAIJRedimension - Get an MAIJ matrix with the same action, but for a different block size 68 69 Logically Collective 70 71 Input Parameter: 72 + A - the MAIJ matrix 73 - dof - the block size for the new matrix 74 75 Output Parameter: 76 . B - the new MAIJ matrix 77 78 Level: advanced 79 80 .seealso: MatCreateMAIJ() 81 @*/ 82 PetscErrorCode MatMAIJRedimension(Mat A,PetscInt dof,Mat *B) 83 { 84 PetscErrorCode ierr; 85 Mat Aij = PETSC_NULL; 86 87 PetscFunctionBegin; 88 PetscValidLogicalCollectiveInt(A,dof,2); 89 ierr = MatMAIJGetAIJ(A,&Aij);CHKERRQ(ierr); 90 ierr = MatCreateMAIJ(Aij,dof,B);CHKERRQ(ierr); 91 PetscFunctionReturn(0); 92 } 93 94 #undef __FUNCT__ 95 #define __FUNCT__ "MatDestroy_SeqMAIJ" 96 PetscErrorCode MatDestroy_SeqMAIJ(Mat A) 97 { 98 PetscErrorCode ierr; 99 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 100 101 PetscFunctionBegin; 102 ierr = MatDestroy(&b->AIJ);CHKERRQ(ierr); 103 ierr = PetscFree(A->data);CHKERRQ(ierr); 104 ierr = PetscObjectComposeFunctionDynamic((PetscObject)A,"MatConvert_seqmaij_seqaij_C","",PETSC_NULL);CHKERRQ(ierr); 105 ierr = PetscObjectComposeFunctionDynamic((PetscObject)A,"MatPtAP_seqaij_seqmaij_C","",PETSC_NULL);CHKERRQ(ierr); 106 PetscFunctionReturn(0); 107 } 108 109 #undef __FUNCT__ 110 #define __FUNCT__ "MatSetUp_MAIJ" 111 PetscErrorCode MatSetUp_MAIJ(Mat A) 112 { 113 PetscFunctionBegin; 114 SETERRQ(((PetscObject)A)->comm,PETSC_ERR_SUP,"Must use MatCreateMAIJ() to create MAIJ matrices"); 115 PetscFunctionReturn(0); 116 } 117 118 #undef __FUNCT__ 119 #define __FUNCT__ "MatView_SeqMAIJ" 120 PetscErrorCode MatView_SeqMAIJ(Mat A,PetscViewer viewer) 121 { 122 PetscErrorCode ierr; 123 Mat B; 124 125 PetscFunctionBegin; 126 ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B); 127 ierr = MatView(B,viewer);CHKERRQ(ierr); 128 ierr = MatDestroy(&B);CHKERRQ(ierr); 129 PetscFunctionReturn(0); 130 } 131 132 #undef __FUNCT__ 133 #define __FUNCT__ "MatView_MPIMAIJ" 134 PetscErrorCode MatView_MPIMAIJ(Mat A,PetscViewer viewer) 135 { 136 PetscErrorCode ierr; 137 Mat B; 138 139 PetscFunctionBegin; 140 ierr = MatConvert(A,MATMPIAIJ,MAT_INITIAL_MATRIX,&B); 141 ierr = MatView(B,viewer);CHKERRQ(ierr); 142 ierr = MatDestroy(&B);CHKERRQ(ierr); 143 PetscFunctionReturn(0); 144 } 145 146 #undef __FUNCT__ 147 #define __FUNCT__ "MatDestroy_MPIMAIJ" 148 PetscErrorCode MatDestroy_MPIMAIJ(Mat A) 149 { 150 PetscErrorCode ierr; 151 Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data; 152 153 PetscFunctionBegin; 154 ierr = MatDestroy(&b->AIJ);CHKERRQ(ierr); 155 ierr = MatDestroy(&b->OAIJ);CHKERRQ(ierr); 156 ierr = MatDestroy(&b->A);CHKERRQ(ierr); 157 ierr = VecScatterDestroy(&b->ctx);CHKERRQ(ierr); 158 ierr = VecDestroy(&b->w);CHKERRQ(ierr); 159 ierr = PetscFree(A->data);CHKERRQ(ierr); 160 ierr = PetscObjectChangeTypeName((PetscObject)A,0);CHKERRQ(ierr); 161 PetscFunctionReturn(0); 162 } 163 164 /*MC 165 MATMAIJ - MATMAIJ = "maij" - A matrix type to be used for restriction and interpolation operations for 166 multicomponent problems, interpolating or restricting each component the same way independently. 167 The matrix type is based on MATSEQAIJ for sequential matrices, and MATMPIAIJ for distributed matrices. 168 169 Operations provided: 170 . MatMult 171 . MatMultTranspose 172 . MatMultAdd 173 . MatMultTransposeAdd 174 175 Level: advanced 176 177 .seealso: MatMAIJGetAIJ(), MatMAIJRedimension(), MatCreateMAIJ() 178 M*/ 179 180 #undef __FUNCT__ 181 #define __FUNCT__ "MatCreate_MAIJ" 182 PETSC_EXTERN_C PetscErrorCode MatCreate_MAIJ(Mat A) 183 { 184 PetscErrorCode ierr; 185 Mat_MPIMAIJ *b; 186 PetscMPIInt size; 187 188 PetscFunctionBegin; 189 ierr = PetscNewLog(A,Mat_MPIMAIJ,&b);CHKERRQ(ierr); 190 A->data = (void*)b; 191 ierr = PetscMemzero(A->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 192 A->ops->setup = MatSetUp_MAIJ; 193 194 b->AIJ = 0; 195 b->dof = 0; 196 b->OAIJ = 0; 197 b->ctx = 0; 198 b->w = 0; 199 ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr); 200 if (size == 1){ 201 ierr = PetscObjectChangeTypeName((PetscObject)A,MATSEQMAIJ);CHKERRQ(ierr); 202 } else { 203 ierr = PetscObjectChangeTypeName((PetscObject)A,MATMPIMAIJ);CHKERRQ(ierr); 204 } 205 PetscFunctionReturn(0); 206 } 207 208 /* --------------------------------------------------------------------------------------*/ 209 #undef __FUNCT__ 210 #define __FUNCT__ "MatMult_SeqMAIJ_2" 211 PetscErrorCode MatMult_SeqMAIJ_2(Mat A,Vec xx,Vec yy) 212 { 213 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 214 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 215 const PetscScalar *x,*v; 216 PetscScalar *y, sum1, sum2; 217 PetscErrorCode ierr; 218 PetscInt nonzerorow=0,n,i,jrow,j; 219 const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 220 221 PetscFunctionBegin; 222 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 223 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 224 idx = a->j; 225 v = a->a; 226 ii = a->i; 227 228 for (i=0; i<m; i++) { 229 jrow = ii[i]; 230 n = ii[i+1] - jrow; 231 sum1 = 0.0; 232 sum2 = 0.0; 233 nonzerorow += (n>0); 234 for (j=0; j<n; j++) { 235 sum1 += v[jrow]*x[2*idx[jrow]]; 236 sum2 += v[jrow]*x[2*idx[jrow]+1]; 237 jrow++; 238 } 239 y[2*i] = sum1; 240 y[2*i+1] = sum2; 241 } 242 243 ierr = PetscLogFlops(4.0*a->nz - 2.0*nonzerorow);CHKERRQ(ierr); 244 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 245 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 246 PetscFunctionReturn(0); 247 } 248 249 #undef __FUNCT__ 250 #define __FUNCT__ "MatMultTranspose_SeqMAIJ_2" 251 PetscErrorCode MatMultTranspose_SeqMAIJ_2(Mat A,Vec xx,Vec yy) 252 { 253 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 254 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 255 const PetscScalar *x,*v; 256 PetscScalar *y,alpha1,alpha2; 257 PetscErrorCode ierr; 258 PetscInt n,i; 259 const PetscInt m = b->AIJ->rmap->n,*idx; 260 261 PetscFunctionBegin; 262 ierr = VecSet(yy,0.0);CHKERRQ(ierr); 263 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 264 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 265 266 for (i=0; i<m; i++) { 267 idx = a->j + a->i[i] ; 268 v = a->a + a->i[i] ; 269 n = a->i[i+1] - a->i[i]; 270 alpha1 = x[2*i]; 271 alpha2 = x[2*i+1]; 272 while (n-->0) {y[2*(*idx)] += alpha1*(*v); y[2*(*idx)+1] += alpha2*(*v); idx++; v++;} 273 } 274 ierr = PetscLogFlops(4.0*a->nz);CHKERRQ(ierr); 275 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 276 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 277 PetscFunctionReturn(0); 278 } 279 280 #undef __FUNCT__ 281 #define __FUNCT__ "MatMultAdd_SeqMAIJ_2" 282 PetscErrorCode MatMultAdd_SeqMAIJ_2(Mat A,Vec xx,Vec yy,Vec zz) 283 { 284 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 285 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 286 const PetscScalar *x,*v; 287 PetscScalar *y,sum1, sum2; 288 PetscErrorCode ierr; 289 PetscInt n,i,jrow,j; 290 const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 291 292 PetscFunctionBegin; 293 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 294 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 295 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 296 idx = a->j; 297 v = a->a; 298 ii = a->i; 299 300 for (i=0; i<m; i++) { 301 jrow = ii[i]; 302 n = ii[i+1] - jrow; 303 sum1 = 0.0; 304 sum2 = 0.0; 305 for (j=0; j<n; j++) { 306 sum1 += v[jrow]*x[2*idx[jrow]]; 307 sum2 += v[jrow]*x[2*idx[jrow]+1]; 308 jrow++; 309 } 310 y[2*i] += sum1; 311 y[2*i+1] += sum2; 312 } 313 314 ierr = PetscLogFlops(4.0*a->nz);CHKERRQ(ierr); 315 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 316 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 317 PetscFunctionReturn(0); 318 } 319 #undef __FUNCT__ 320 #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_2" 321 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_2(Mat A,Vec xx,Vec yy,Vec zz) 322 { 323 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 324 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 325 const PetscScalar *x,*v; 326 PetscScalar *y,alpha1,alpha2; 327 PetscErrorCode ierr; 328 PetscInt n,i; 329 const PetscInt m = b->AIJ->rmap->n,*idx; 330 331 PetscFunctionBegin; 332 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 333 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 334 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 335 336 for (i=0; i<m; i++) { 337 idx = a->j + a->i[i] ; 338 v = a->a + a->i[i] ; 339 n = a->i[i+1] - a->i[i]; 340 alpha1 = x[2*i]; 341 alpha2 = x[2*i+1]; 342 while (n-->0) {y[2*(*idx)] += alpha1*(*v); y[2*(*idx)+1] += alpha2*(*v); idx++; v++;} 343 } 344 ierr = PetscLogFlops(4.0*a->nz);CHKERRQ(ierr); 345 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 346 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 347 PetscFunctionReturn(0); 348 } 349 /* --------------------------------------------------------------------------------------*/ 350 #undef __FUNCT__ 351 #define __FUNCT__ "MatMult_SeqMAIJ_3" 352 PetscErrorCode MatMult_SeqMAIJ_3(Mat A,Vec xx,Vec yy) 353 { 354 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 355 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 356 const PetscScalar *x,*v; 357 PetscScalar *y,sum1, sum2, sum3; 358 PetscErrorCode ierr; 359 PetscInt nonzerorow=0,n,i,jrow,j; 360 const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 361 362 PetscFunctionBegin; 363 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 364 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 365 idx = a->j; 366 v = a->a; 367 ii = a->i; 368 369 for (i=0; i<m; i++) { 370 jrow = ii[i]; 371 n = ii[i+1] - jrow; 372 sum1 = 0.0; 373 sum2 = 0.0; 374 sum3 = 0.0; 375 nonzerorow += (n>0); 376 for (j=0; j<n; j++) { 377 sum1 += v[jrow]*x[3*idx[jrow]]; 378 sum2 += v[jrow]*x[3*idx[jrow]+1]; 379 sum3 += v[jrow]*x[3*idx[jrow]+2]; 380 jrow++; 381 } 382 y[3*i] = sum1; 383 y[3*i+1] = sum2; 384 y[3*i+2] = sum3; 385 } 386 387 ierr = PetscLogFlops(6.0*a->nz - 3.0*nonzerorow);CHKERRQ(ierr); 388 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 389 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 390 PetscFunctionReturn(0); 391 } 392 393 #undef __FUNCT__ 394 #define __FUNCT__ "MatMultTranspose_SeqMAIJ_3" 395 PetscErrorCode MatMultTranspose_SeqMAIJ_3(Mat A,Vec xx,Vec yy) 396 { 397 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 398 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 399 const PetscScalar *x,*v; 400 PetscScalar *y,alpha1,alpha2,alpha3; 401 PetscErrorCode ierr; 402 PetscInt n,i; 403 const PetscInt m = b->AIJ->rmap->n,*idx; 404 405 PetscFunctionBegin; 406 ierr = VecSet(yy,0.0);CHKERRQ(ierr); 407 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 408 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 409 410 for (i=0; i<m; i++) { 411 idx = a->j + a->i[i]; 412 v = a->a + a->i[i]; 413 n = a->i[i+1] - a->i[i]; 414 alpha1 = x[3*i]; 415 alpha2 = x[3*i+1]; 416 alpha3 = x[3*i+2]; 417 while (n-->0) { 418 y[3*(*idx)] += alpha1*(*v); 419 y[3*(*idx)+1] += alpha2*(*v); 420 y[3*(*idx)+2] += alpha3*(*v); 421 idx++; v++; 422 } 423 } 424 ierr = PetscLogFlops(6.0*a->nz);CHKERRQ(ierr); 425 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 426 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 427 PetscFunctionReturn(0); 428 } 429 430 #undef __FUNCT__ 431 #define __FUNCT__ "MatMultAdd_SeqMAIJ_3" 432 PetscErrorCode MatMultAdd_SeqMAIJ_3(Mat A,Vec xx,Vec yy,Vec zz) 433 { 434 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 435 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 436 const PetscScalar *x,*v; 437 PetscScalar *y,sum1, sum2, sum3; 438 PetscErrorCode ierr; 439 PetscInt n,i,jrow,j; 440 const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 441 442 PetscFunctionBegin; 443 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 444 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 445 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 446 idx = a->j; 447 v = a->a; 448 ii = a->i; 449 450 for (i=0; i<m; i++) { 451 jrow = ii[i]; 452 n = ii[i+1] - jrow; 453 sum1 = 0.0; 454 sum2 = 0.0; 455 sum3 = 0.0; 456 for (j=0; j<n; j++) { 457 sum1 += v[jrow]*x[3*idx[jrow]]; 458 sum2 += v[jrow]*x[3*idx[jrow]+1]; 459 sum3 += v[jrow]*x[3*idx[jrow]+2]; 460 jrow++; 461 } 462 y[3*i] += sum1; 463 y[3*i+1] += sum2; 464 y[3*i+2] += sum3; 465 } 466 467 ierr = PetscLogFlops(6.0*a->nz);CHKERRQ(ierr); 468 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 469 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 470 PetscFunctionReturn(0); 471 } 472 #undef __FUNCT__ 473 #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_3" 474 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_3(Mat A,Vec xx,Vec yy,Vec zz) 475 { 476 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 477 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 478 const PetscScalar *x,*v; 479 PetscScalar *y,alpha1,alpha2,alpha3; 480 PetscErrorCode ierr; 481 PetscInt n,i; 482 const PetscInt m = b->AIJ->rmap->n,*idx; 483 484 PetscFunctionBegin; 485 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 486 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 487 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 488 for (i=0; i<m; i++) { 489 idx = a->j + a->i[i] ; 490 v = a->a + a->i[i] ; 491 n = a->i[i+1] - a->i[i]; 492 alpha1 = x[3*i]; 493 alpha2 = x[3*i+1]; 494 alpha3 = x[3*i+2]; 495 while (n-->0) { 496 y[3*(*idx)] += alpha1*(*v); 497 y[3*(*idx)+1] += alpha2*(*v); 498 y[3*(*idx)+2] += alpha3*(*v); 499 idx++; v++; 500 } 501 } 502 ierr = PetscLogFlops(6.0*a->nz);CHKERRQ(ierr); 503 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 504 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 505 PetscFunctionReturn(0); 506 } 507 508 /* ------------------------------------------------------------------------------*/ 509 #undef __FUNCT__ 510 #define __FUNCT__ "MatMult_SeqMAIJ_4" 511 PetscErrorCode MatMult_SeqMAIJ_4(Mat A,Vec xx,Vec yy) 512 { 513 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 514 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 515 const PetscScalar *x,*v; 516 PetscScalar *y,sum1, sum2, sum3, sum4; 517 PetscErrorCode ierr; 518 PetscInt nonzerorow=0,n,i,jrow,j; 519 const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 520 521 PetscFunctionBegin; 522 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 523 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 524 idx = a->j; 525 v = a->a; 526 ii = a->i; 527 528 for (i=0; i<m; i++) { 529 jrow = ii[i]; 530 n = ii[i+1] - jrow; 531 sum1 = 0.0; 532 sum2 = 0.0; 533 sum3 = 0.0; 534 sum4 = 0.0; 535 nonzerorow += (n>0); 536 for (j=0; j<n; j++) { 537 sum1 += v[jrow]*x[4*idx[jrow]]; 538 sum2 += v[jrow]*x[4*idx[jrow]+1]; 539 sum3 += v[jrow]*x[4*idx[jrow]+2]; 540 sum4 += v[jrow]*x[4*idx[jrow]+3]; 541 jrow++; 542 } 543 y[4*i] = sum1; 544 y[4*i+1] = sum2; 545 y[4*i+2] = sum3; 546 y[4*i+3] = sum4; 547 } 548 549 ierr = PetscLogFlops(8.0*a->nz - 4.0*nonzerorow);CHKERRQ(ierr); 550 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 551 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 552 PetscFunctionReturn(0); 553 } 554 555 #undef __FUNCT__ 556 #define __FUNCT__ "MatMultTranspose_SeqMAIJ_4" 557 PetscErrorCode MatMultTranspose_SeqMAIJ_4(Mat A,Vec xx,Vec yy) 558 { 559 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 560 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 561 const PetscScalar *x,*v; 562 PetscScalar *y,alpha1,alpha2,alpha3,alpha4; 563 PetscErrorCode ierr; 564 PetscInt n,i; 565 const PetscInt m = b->AIJ->rmap->n,*idx; 566 567 PetscFunctionBegin; 568 ierr = VecSet(yy,0.0);CHKERRQ(ierr); 569 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 570 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 571 for (i=0; i<m; i++) { 572 idx = a->j + a->i[i] ; 573 v = a->a + a->i[i] ; 574 n = a->i[i+1] - a->i[i]; 575 alpha1 = x[4*i]; 576 alpha2 = x[4*i+1]; 577 alpha3 = x[4*i+2]; 578 alpha4 = x[4*i+3]; 579 while (n-->0) { 580 y[4*(*idx)] += alpha1*(*v); 581 y[4*(*idx)+1] += alpha2*(*v); 582 y[4*(*idx)+2] += alpha3*(*v); 583 y[4*(*idx)+3] += alpha4*(*v); 584 idx++; v++; 585 } 586 } 587 ierr = PetscLogFlops(8.0*a->nz);CHKERRQ(ierr); 588 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 589 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 590 PetscFunctionReturn(0); 591 } 592 593 #undef __FUNCT__ 594 #define __FUNCT__ "MatMultAdd_SeqMAIJ_4" 595 PetscErrorCode MatMultAdd_SeqMAIJ_4(Mat A,Vec xx,Vec yy,Vec zz) 596 { 597 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 598 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 599 const PetscScalar *x,*v; 600 PetscScalar *y,sum1, sum2, sum3, sum4; 601 PetscErrorCode ierr; 602 PetscInt n,i,jrow,j; 603 const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 604 605 PetscFunctionBegin; 606 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 607 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 608 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 609 idx = a->j; 610 v = a->a; 611 ii = a->i; 612 613 for (i=0; i<m; i++) { 614 jrow = ii[i]; 615 n = ii[i+1] - jrow; 616 sum1 = 0.0; 617 sum2 = 0.0; 618 sum3 = 0.0; 619 sum4 = 0.0; 620 for (j=0; j<n; j++) { 621 sum1 += v[jrow]*x[4*idx[jrow]]; 622 sum2 += v[jrow]*x[4*idx[jrow]+1]; 623 sum3 += v[jrow]*x[4*idx[jrow]+2]; 624 sum4 += v[jrow]*x[4*idx[jrow]+3]; 625 jrow++; 626 } 627 y[4*i] += sum1; 628 y[4*i+1] += sum2; 629 y[4*i+2] += sum3; 630 y[4*i+3] += sum4; 631 } 632 633 ierr = PetscLogFlops(8.0*a->nz);CHKERRQ(ierr); 634 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 635 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 636 PetscFunctionReturn(0); 637 } 638 #undef __FUNCT__ 639 #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_4" 640 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_4(Mat A,Vec xx,Vec yy,Vec zz) 641 { 642 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 643 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 644 const PetscScalar *x,*v; 645 PetscScalar *y,alpha1,alpha2,alpha3,alpha4; 646 PetscErrorCode ierr; 647 PetscInt n,i; 648 const PetscInt m = b->AIJ->rmap->n,*idx; 649 650 PetscFunctionBegin; 651 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 652 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 653 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 654 655 for (i=0; i<m; i++) { 656 idx = a->j + a->i[i] ; 657 v = a->a + a->i[i] ; 658 n = a->i[i+1] - a->i[i]; 659 alpha1 = x[4*i]; 660 alpha2 = x[4*i+1]; 661 alpha3 = x[4*i+2]; 662 alpha4 = x[4*i+3]; 663 while (n-->0) { 664 y[4*(*idx)] += alpha1*(*v); 665 y[4*(*idx)+1] += alpha2*(*v); 666 y[4*(*idx)+2] += alpha3*(*v); 667 y[4*(*idx)+3] += alpha4*(*v); 668 idx++; v++; 669 } 670 } 671 ierr = PetscLogFlops(8.0*a->nz);CHKERRQ(ierr); 672 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 673 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 674 PetscFunctionReturn(0); 675 } 676 /* ------------------------------------------------------------------------------*/ 677 678 #undef __FUNCT__ 679 #define __FUNCT__ "MatMult_SeqMAIJ_5" 680 PetscErrorCode MatMult_SeqMAIJ_5(Mat A,Vec xx,Vec yy) 681 { 682 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 683 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 684 const PetscScalar *x,*v; 685 PetscScalar *y,sum1, sum2, sum3, sum4, sum5; 686 PetscErrorCode ierr; 687 PetscInt nonzerorow=0,n,i,jrow,j; 688 const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 689 690 PetscFunctionBegin; 691 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 692 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 693 idx = a->j; 694 v = a->a; 695 ii = a->i; 696 697 for (i=0; i<m; i++) { 698 jrow = ii[i]; 699 n = ii[i+1] - jrow; 700 sum1 = 0.0; 701 sum2 = 0.0; 702 sum3 = 0.0; 703 sum4 = 0.0; 704 sum5 = 0.0; 705 nonzerorow += (n>0); 706 for (j=0; j<n; j++) { 707 sum1 += v[jrow]*x[5*idx[jrow]]; 708 sum2 += v[jrow]*x[5*idx[jrow]+1]; 709 sum3 += v[jrow]*x[5*idx[jrow]+2]; 710 sum4 += v[jrow]*x[5*idx[jrow]+3]; 711 sum5 += v[jrow]*x[5*idx[jrow]+4]; 712 jrow++; 713 } 714 y[5*i] = sum1; 715 y[5*i+1] = sum2; 716 y[5*i+2] = sum3; 717 y[5*i+3] = sum4; 718 y[5*i+4] = sum5; 719 } 720 721 ierr = PetscLogFlops(10.0*a->nz - 5.0*nonzerorow);CHKERRQ(ierr); 722 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 723 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 724 PetscFunctionReturn(0); 725 } 726 727 #undef __FUNCT__ 728 #define __FUNCT__ "MatMultTranspose_SeqMAIJ_5" 729 PetscErrorCode MatMultTranspose_SeqMAIJ_5(Mat A,Vec xx,Vec yy) 730 { 731 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 732 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 733 const PetscScalar *x,*v; 734 PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5; 735 PetscErrorCode ierr; 736 PetscInt n,i; 737 const PetscInt m = b->AIJ->rmap->n,*idx; 738 739 PetscFunctionBegin; 740 ierr = VecSet(yy,0.0);CHKERRQ(ierr); 741 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 742 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 743 744 for (i=0; i<m; i++) { 745 idx = a->j + a->i[i] ; 746 v = a->a + a->i[i] ; 747 n = a->i[i+1] - a->i[i]; 748 alpha1 = x[5*i]; 749 alpha2 = x[5*i+1]; 750 alpha3 = x[5*i+2]; 751 alpha4 = x[5*i+3]; 752 alpha5 = x[5*i+4]; 753 while (n-->0) { 754 y[5*(*idx)] += alpha1*(*v); 755 y[5*(*idx)+1] += alpha2*(*v); 756 y[5*(*idx)+2] += alpha3*(*v); 757 y[5*(*idx)+3] += alpha4*(*v); 758 y[5*(*idx)+4] += alpha5*(*v); 759 idx++; v++; 760 } 761 } 762 ierr = PetscLogFlops(10.0*a->nz);CHKERRQ(ierr); 763 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 764 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 765 PetscFunctionReturn(0); 766 } 767 768 #undef __FUNCT__ 769 #define __FUNCT__ "MatMultAdd_SeqMAIJ_5" 770 PetscErrorCode MatMultAdd_SeqMAIJ_5(Mat A,Vec xx,Vec yy,Vec zz) 771 { 772 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 773 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 774 const PetscScalar *x,*v; 775 PetscScalar *y,sum1, sum2, sum3, sum4, sum5; 776 PetscErrorCode ierr; 777 PetscInt n,i,jrow,j; 778 const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 779 780 PetscFunctionBegin; 781 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 782 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 783 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 784 idx = a->j; 785 v = a->a; 786 ii = a->i; 787 788 for (i=0; i<m; i++) { 789 jrow = ii[i]; 790 n = ii[i+1] - jrow; 791 sum1 = 0.0; 792 sum2 = 0.0; 793 sum3 = 0.0; 794 sum4 = 0.0; 795 sum5 = 0.0; 796 for (j=0; j<n; j++) { 797 sum1 += v[jrow]*x[5*idx[jrow]]; 798 sum2 += v[jrow]*x[5*idx[jrow]+1]; 799 sum3 += v[jrow]*x[5*idx[jrow]+2]; 800 sum4 += v[jrow]*x[5*idx[jrow]+3]; 801 sum5 += v[jrow]*x[5*idx[jrow]+4]; 802 jrow++; 803 } 804 y[5*i] += sum1; 805 y[5*i+1] += sum2; 806 y[5*i+2] += sum3; 807 y[5*i+3] += sum4; 808 y[5*i+4] += sum5; 809 } 810 811 ierr = PetscLogFlops(10.0*a->nz);CHKERRQ(ierr); 812 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 813 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 814 PetscFunctionReturn(0); 815 } 816 817 #undef __FUNCT__ 818 #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_5" 819 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_5(Mat A,Vec xx,Vec yy,Vec zz) 820 { 821 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 822 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 823 const PetscScalar *x,*v; 824 PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5; 825 PetscErrorCode ierr; 826 PetscInt n,i; 827 const PetscInt m = b->AIJ->rmap->n,*idx; 828 829 PetscFunctionBegin; 830 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 831 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 832 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 833 834 for (i=0; i<m; i++) { 835 idx = a->j + a->i[i] ; 836 v = a->a + a->i[i] ; 837 n = a->i[i+1] - a->i[i]; 838 alpha1 = x[5*i]; 839 alpha2 = x[5*i+1]; 840 alpha3 = x[5*i+2]; 841 alpha4 = x[5*i+3]; 842 alpha5 = x[5*i+4]; 843 while (n-->0) { 844 y[5*(*idx)] += alpha1*(*v); 845 y[5*(*idx)+1] += alpha2*(*v); 846 y[5*(*idx)+2] += alpha3*(*v); 847 y[5*(*idx)+3] += alpha4*(*v); 848 y[5*(*idx)+4] += alpha5*(*v); 849 idx++; v++; 850 } 851 } 852 ierr = PetscLogFlops(10.0*a->nz);CHKERRQ(ierr); 853 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 854 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 855 PetscFunctionReturn(0); 856 } 857 858 /* ------------------------------------------------------------------------------*/ 859 #undef __FUNCT__ 860 #define __FUNCT__ "MatMult_SeqMAIJ_6" 861 PetscErrorCode MatMult_SeqMAIJ_6(Mat A,Vec xx,Vec yy) 862 { 863 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 864 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 865 const PetscScalar *x,*v; 866 PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6; 867 PetscErrorCode ierr; 868 PetscInt nonzerorow=0,n,i,jrow,j; 869 const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 870 871 PetscFunctionBegin; 872 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 873 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 874 idx = a->j; 875 v = a->a; 876 ii = a->i; 877 878 for (i=0; i<m; i++) { 879 jrow = ii[i]; 880 n = ii[i+1] - jrow; 881 sum1 = 0.0; 882 sum2 = 0.0; 883 sum3 = 0.0; 884 sum4 = 0.0; 885 sum5 = 0.0; 886 sum6 = 0.0; 887 nonzerorow += (n>0); 888 for (j=0; j<n; j++) { 889 sum1 += v[jrow]*x[6*idx[jrow]]; 890 sum2 += v[jrow]*x[6*idx[jrow]+1]; 891 sum3 += v[jrow]*x[6*idx[jrow]+2]; 892 sum4 += v[jrow]*x[6*idx[jrow]+3]; 893 sum5 += v[jrow]*x[6*idx[jrow]+4]; 894 sum6 += v[jrow]*x[6*idx[jrow]+5]; 895 jrow++; 896 } 897 y[6*i] = sum1; 898 y[6*i+1] = sum2; 899 y[6*i+2] = sum3; 900 y[6*i+3] = sum4; 901 y[6*i+4] = sum5; 902 y[6*i+5] = sum6; 903 } 904 905 ierr = PetscLogFlops(12.0*a->nz - 6.0*nonzerorow);CHKERRQ(ierr); 906 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 907 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 908 PetscFunctionReturn(0); 909 } 910 911 #undef __FUNCT__ 912 #define __FUNCT__ "MatMultTranspose_SeqMAIJ_6" 913 PetscErrorCode MatMultTranspose_SeqMAIJ_6(Mat A,Vec xx,Vec yy) 914 { 915 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 916 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 917 const PetscScalar *x,*v; 918 PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6; 919 PetscErrorCode ierr; 920 PetscInt n,i; 921 const PetscInt m = b->AIJ->rmap->n,*idx; 922 923 PetscFunctionBegin; 924 ierr = VecSet(yy,0.0);CHKERRQ(ierr); 925 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 926 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 927 928 for (i=0; i<m; i++) { 929 idx = a->j + a->i[i] ; 930 v = a->a + a->i[i] ; 931 n = a->i[i+1] - a->i[i]; 932 alpha1 = x[6*i]; 933 alpha2 = x[6*i+1]; 934 alpha3 = x[6*i+2]; 935 alpha4 = x[6*i+3]; 936 alpha5 = x[6*i+4]; 937 alpha6 = x[6*i+5]; 938 while (n-->0) { 939 y[6*(*idx)] += alpha1*(*v); 940 y[6*(*idx)+1] += alpha2*(*v); 941 y[6*(*idx)+2] += alpha3*(*v); 942 y[6*(*idx)+3] += alpha4*(*v); 943 y[6*(*idx)+4] += alpha5*(*v); 944 y[6*(*idx)+5] += alpha6*(*v); 945 idx++; v++; 946 } 947 } 948 ierr = PetscLogFlops(12.0*a->nz);CHKERRQ(ierr); 949 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 950 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 951 PetscFunctionReturn(0); 952 } 953 954 #undef __FUNCT__ 955 #define __FUNCT__ "MatMultAdd_SeqMAIJ_6" 956 PetscErrorCode MatMultAdd_SeqMAIJ_6(Mat A,Vec xx,Vec yy,Vec zz) 957 { 958 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 959 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 960 const PetscScalar *x,*v; 961 PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6; 962 PetscErrorCode ierr; 963 PetscInt n,i,jrow,j; 964 const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 965 966 PetscFunctionBegin; 967 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 968 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 969 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 970 idx = a->j; 971 v = a->a; 972 ii = a->i; 973 974 for (i=0; i<m; i++) { 975 jrow = ii[i]; 976 n = ii[i+1] - jrow; 977 sum1 = 0.0; 978 sum2 = 0.0; 979 sum3 = 0.0; 980 sum4 = 0.0; 981 sum5 = 0.0; 982 sum6 = 0.0; 983 for (j=0; j<n; j++) { 984 sum1 += v[jrow]*x[6*idx[jrow]]; 985 sum2 += v[jrow]*x[6*idx[jrow]+1]; 986 sum3 += v[jrow]*x[6*idx[jrow]+2]; 987 sum4 += v[jrow]*x[6*idx[jrow]+3]; 988 sum5 += v[jrow]*x[6*idx[jrow]+4]; 989 sum6 += v[jrow]*x[6*idx[jrow]+5]; 990 jrow++; 991 } 992 y[6*i] += sum1; 993 y[6*i+1] += sum2; 994 y[6*i+2] += sum3; 995 y[6*i+3] += sum4; 996 y[6*i+4] += sum5; 997 y[6*i+5] += sum6; 998 } 999 1000 ierr = PetscLogFlops(12.0*a->nz);CHKERRQ(ierr); 1001 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 1002 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 1003 PetscFunctionReturn(0); 1004 } 1005 1006 #undef __FUNCT__ 1007 #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_6" 1008 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_6(Mat A,Vec xx,Vec yy,Vec zz) 1009 { 1010 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1011 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1012 const PetscScalar *x,*v; 1013 PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6; 1014 PetscErrorCode ierr; 1015 PetscInt n,i; 1016 const PetscInt m = b->AIJ->rmap->n,*idx; 1017 1018 PetscFunctionBegin; 1019 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 1020 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 1021 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 1022 1023 for (i=0; i<m; i++) { 1024 idx = a->j + a->i[i] ; 1025 v = a->a + a->i[i] ; 1026 n = a->i[i+1] - a->i[i]; 1027 alpha1 = x[6*i]; 1028 alpha2 = x[6*i+1]; 1029 alpha3 = x[6*i+2]; 1030 alpha4 = x[6*i+3]; 1031 alpha5 = x[6*i+4]; 1032 alpha6 = x[6*i+5]; 1033 while (n-->0) { 1034 y[6*(*idx)] += alpha1*(*v); 1035 y[6*(*idx)+1] += alpha2*(*v); 1036 y[6*(*idx)+2] += alpha3*(*v); 1037 y[6*(*idx)+3] += alpha4*(*v); 1038 y[6*(*idx)+4] += alpha5*(*v); 1039 y[6*(*idx)+5] += alpha6*(*v); 1040 idx++; v++; 1041 } 1042 } 1043 ierr = PetscLogFlops(12.0*a->nz);CHKERRQ(ierr); 1044 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 1045 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 1046 PetscFunctionReturn(0); 1047 } 1048 1049 /* ------------------------------------------------------------------------------*/ 1050 #undef __FUNCT__ 1051 #define __FUNCT__ "MatMult_SeqMAIJ_7" 1052 PetscErrorCode MatMult_SeqMAIJ_7(Mat A,Vec xx,Vec yy) 1053 { 1054 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1055 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1056 const PetscScalar *x,*v; 1057 PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7; 1058 PetscErrorCode ierr; 1059 const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 1060 PetscInt nonzerorow=0,n,i,jrow,j; 1061 1062 PetscFunctionBegin; 1063 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 1064 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 1065 idx = a->j; 1066 v = a->a; 1067 ii = a->i; 1068 1069 for (i=0; i<m; i++) { 1070 jrow = ii[i]; 1071 n = ii[i+1] - jrow; 1072 sum1 = 0.0; 1073 sum2 = 0.0; 1074 sum3 = 0.0; 1075 sum4 = 0.0; 1076 sum5 = 0.0; 1077 sum6 = 0.0; 1078 sum7 = 0.0; 1079 nonzerorow += (n>0); 1080 for (j=0; j<n; j++) { 1081 sum1 += v[jrow]*x[7*idx[jrow]]; 1082 sum2 += v[jrow]*x[7*idx[jrow]+1]; 1083 sum3 += v[jrow]*x[7*idx[jrow]+2]; 1084 sum4 += v[jrow]*x[7*idx[jrow]+3]; 1085 sum5 += v[jrow]*x[7*idx[jrow]+4]; 1086 sum6 += v[jrow]*x[7*idx[jrow]+5]; 1087 sum7 += v[jrow]*x[7*idx[jrow]+6]; 1088 jrow++; 1089 } 1090 y[7*i] = sum1; 1091 y[7*i+1] = sum2; 1092 y[7*i+2] = sum3; 1093 y[7*i+3] = sum4; 1094 y[7*i+4] = sum5; 1095 y[7*i+5] = sum6; 1096 y[7*i+6] = sum7; 1097 } 1098 1099 ierr = PetscLogFlops(14.0*a->nz - 7.0*nonzerorow);CHKERRQ(ierr); 1100 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 1101 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 1102 PetscFunctionReturn(0); 1103 } 1104 1105 #undef __FUNCT__ 1106 #define __FUNCT__ "MatMultTranspose_SeqMAIJ_7" 1107 PetscErrorCode MatMultTranspose_SeqMAIJ_7(Mat A,Vec xx,Vec yy) 1108 { 1109 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1110 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1111 const PetscScalar *x,*v; 1112 PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7; 1113 PetscErrorCode ierr; 1114 const PetscInt m = b->AIJ->rmap->n,*idx; 1115 PetscInt n,i; 1116 1117 PetscFunctionBegin; 1118 ierr = VecSet(yy,0.0);CHKERRQ(ierr); 1119 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 1120 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 1121 1122 for (i=0; i<m; i++) { 1123 idx = a->j + a->i[i] ; 1124 v = a->a + a->i[i] ; 1125 n = a->i[i+1] - a->i[i]; 1126 alpha1 = x[7*i]; 1127 alpha2 = x[7*i+1]; 1128 alpha3 = x[7*i+2]; 1129 alpha4 = x[7*i+3]; 1130 alpha5 = x[7*i+4]; 1131 alpha6 = x[7*i+5]; 1132 alpha7 = x[7*i+6]; 1133 while (n-->0) { 1134 y[7*(*idx)] += alpha1*(*v); 1135 y[7*(*idx)+1] += alpha2*(*v); 1136 y[7*(*idx)+2] += alpha3*(*v); 1137 y[7*(*idx)+3] += alpha4*(*v); 1138 y[7*(*idx)+4] += alpha5*(*v); 1139 y[7*(*idx)+5] += alpha6*(*v); 1140 y[7*(*idx)+6] += alpha7*(*v); 1141 idx++; v++; 1142 } 1143 } 1144 ierr = PetscLogFlops(14.0*a->nz);CHKERRQ(ierr); 1145 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 1146 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 1147 PetscFunctionReturn(0); 1148 } 1149 1150 #undef __FUNCT__ 1151 #define __FUNCT__ "MatMultAdd_SeqMAIJ_7" 1152 PetscErrorCode MatMultAdd_SeqMAIJ_7(Mat A,Vec xx,Vec yy,Vec zz) 1153 { 1154 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1155 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1156 const PetscScalar *x,*v; 1157 PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7; 1158 PetscErrorCode ierr; 1159 const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 1160 PetscInt n,i,jrow,j; 1161 1162 PetscFunctionBegin; 1163 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 1164 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 1165 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 1166 idx = a->j; 1167 v = a->a; 1168 ii = a->i; 1169 1170 for (i=0; i<m; i++) { 1171 jrow = ii[i]; 1172 n = ii[i+1] - jrow; 1173 sum1 = 0.0; 1174 sum2 = 0.0; 1175 sum3 = 0.0; 1176 sum4 = 0.0; 1177 sum5 = 0.0; 1178 sum6 = 0.0; 1179 sum7 = 0.0; 1180 for (j=0; j<n; j++) { 1181 sum1 += v[jrow]*x[7*idx[jrow]]; 1182 sum2 += v[jrow]*x[7*idx[jrow]+1]; 1183 sum3 += v[jrow]*x[7*idx[jrow]+2]; 1184 sum4 += v[jrow]*x[7*idx[jrow]+3]; 1185 sum5 += v[jrow]*x[7*idx[jrow]+4]; 1186 sum6 += v[jrow]*x[7*idx[jrow]+5]; 1187 sum7 += v[jrow]*x[7*idx[jrow]+6]; 1188 jrow++; 1189 } 1190 y[7*i] += sum1; 1191 y[7*i+1] += sum2; 1192 y[7*i+2] += sum3; 1193 y[7*i+3] += sum4; 1194 y[7*i+4] += sum5; 1195 y[7*i+5] += sum6; 1196 y[7*i+6] += sum7; 1197 } 1198 1199 ierr = PetscLogFlops(14.0*a->nz);CHKERRQ(ierr); 1200 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 1201 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 1202 PetscFunctionReturn(0); 1203 } 1204 1205 #undef __FUNCT__ 1206 #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_7" 1207 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_7(Mat A,Vec xx,Vec yy,Vec zz) 1208 { 1209 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1210 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1211 const PetscScalar *x,*v; 1212 PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7; 1213 PetscErrorCode ierr; 1214 const PetscInt m = b->AIJ->rmap->n,*idx; 1215 PetscInt n,i; 1216 1217 PetscFunctionBegin; 1218 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 1219 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 1220 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 1221 for (i=0; i<m; i++) { 1222 idx = a->j + a->i[i] ; 1223 v = a->a + a->i[i] ; 1224 n = a->i[i+1] - a->i[i]; 1225 alpha1 = x[7*i]; 1226 alpha2 = x[7*i+1]; 1227 alpha3 = x[7*i+2]; 1228 alpha4 = x[7*i+3]; 1229 alpha5 = x[7*i+4]; 1230 alpha6 = x[7*i+5]; 1231 alpha7 = x[7*i+6]; 1232 while (n-->0) { 1233 y[7*(*idx)] += alpha1*(*v); 1234 y[7*(*idx)+1] += alpha2*(*v); 1235 y[7*(*idx)+2] += alpha3*(*v); 1236 y[7*(*idx)+3] += alpha4*(*v); 1237 y[7*(*idx)+4] += alpha5*(*v); 1238 y[7*(*idx)+5] += alpha6*(*v); 1239 y[7*(*idx)+6] += alpha7*(*v); 1240 idx++; v++; 1241 } 1242 } 1243 ierr = PetscLogFlops(14.0*a->nz);CHKERRQ(ierr); 1244 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 1245 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 1246 PetscFunctionReturn(0); 1247 } 1248 1249 #undef __FUNCT__ 1250 #define __FUNCT__ "MatMult_SeqMAIJ_8" 1251 PetscErrorCode MatMult_SeqMAIJ_8(Mat A,Vec xx,Vec yy) 1252 { 1253 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1254 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1255 const PetscScalar *x,*v; 1256 PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8; 1257 PetscErrorCode ierr; 1258 const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 1259 PetscInt nonzerorow=0,n,i,jrow,j; 1260 1261 PetscFunctionBegin; 1262 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 1263 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 1264 idx = a->j; 1265 v = a->a; 1266 ii = a->i; 1267 1268 for (i=0; i<m; i++) { 1269 jrow = ii[i]; 1270 n = ii[i+1] - jrow; 1271 sum1 = 0.0; 1272 sum2 = 0.0; 1273 sum3 = 0.0; 1274 sum4 = 0.0; 1275 sum5 = 0.0; 1276 sum6 = 0.0; 1277 sum7 = 0.0; 1278 sum8 = 0.0; 1279 nonzerorow += (n>0); 1280 for (j=0; j<n; j++) { 1281 sum1 += v[jrow]*x[8*idx[jrow]]; 1282 sum2 += v[jrow]*x[8*idx[jrow]+1]; 1283 sum3 += v[jrow]*x[8*idx[jrow]+2]; 1284 sum4 += v[jrow]*x[8*idx[jrow]+3]; 1285 sum5 += v[jrow]*x[8*idx[jrow]+4]; 1286 sum6 += v[jrow]*x[8*idx[jrow]+5]; 1287 sum7 += v[jrow]*x[8*idx[jrow]+6]; 1288 sum8 += v[jrow]*x[8*idx[jrow]+7]; 1289 jrow++; 1290 } 1291 y[8*i] = sum1; 1292 y[8*i+1] = sum2; 1293 y[8*i+2] = sum3; 1294 y[8*i+3] = sum4; 1295 y[8*i+4] = sum5; 1296 y[8*i+5] = sum6; 1297 y[8*i+6] = sum7; 1298 y[8*i+7] = sum8; 1299 } 1300 1301 ierr = PetscLogFlops(16.0*a->nz - 8.0*nonzerorow);CHKERRQ(ierr); 1302 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 1303 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 1304 PetscFunctionReturn(0); 1305 } 1306 1307 #undef __FUNCT__ 1308 #define __FUNCT__ "MatMultTranspose_SeqMAIJ_8" 1309 PetscErrorCode MatMultTranspose_SeqMAIJ_8(Mat A,Vec xx,Vec yy) 1310 { 1311 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1312 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1313 const PetscScalar *x,*v; 1314 PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8; 1315 PetscErrorCode ierr; 1316 const PetscInt m = b->AIJ->rmap->n,*idx; 1317 PetscInt n,i; 1318 1319 PetscFunctionBegin; 1320 ierr = VecSet(yy,0.0);CHKERRQ(ierr); 1321 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 1322 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 1323 1324 for (i=0; i<m; i++) { 1325 idx = a->j + a->i[i] ; 1326 v = a->a + a->i[i] ; 1327 n = a->i[i+1] - a->i[i]; 1328 alpha1 = x[8*i]; 1329 alpha2 = x[8*i+1]; 1330 alpha3 = x[8*i+2]; 1331 alpha4 = x[8*i+3]; 1332 alpha5 = x[8*i+4]; 1333 alpha6 = x[8*i+5]; 1334 alpha7 = x[8*i+6]; 1335 alpha8 = x[8*i+7]; 1336 while (n-->0) { 1337 y[8*(*idx)] += alpha1*(*v); 1338 y[8*(*idx)+1] += alpha2*(*v); 1339 y[8*(*idx)+2] += alpha3*(*v); 1340 y[8*(*idx)+3] += alpha4*(*v); 1341 y[8*(*idx)+4] += alpha5*(*v); 1342 y[8*(*idx)+5] += alpha6*(*v); 1343 y[8*(*idx)+6] += alpha7*(*v); 1344 y[8*(*idx)+7] += alpha8*(*v); 1345 idx++; v++; 1346 } 1347 } 1348 ierr = PetscLogFlops(16.0*a->nz);CHKERRQ(ierr); 1349 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 1350 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 1351 PetscFunctionReturn(0); 1352 } 1353 1354 #undef __FUNCT__ 1355 #define __FUNCT__ "MatMultAdd_SeqMAIJ_8" 1356 PetscErrorCode MatMultAdd_SeqMAIJ_8(Mat A,Vec xx,Vec yy,Vec zz) 1357 { 1358 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1359 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1360 const PetscScalar *x,*v; 1361 PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8; 1362 PetscErrorCode ierr; 1363 const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 1364 PetscInt n,i,jrow,j; 1365 1366 PetscFunctionBegin; 1367 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 1368 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 1369 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 1370 idx = a->j; 1371 v = a->a; 1372 ii = a->i; 1373 1374 for (i=0; i<m; i++) { 1375 jrow = ii[i]; 1376 n = ii[i+1] - jrow; 1377 sum1 = 0.0; 1378 sum2 = 0.0; 1379 sum3 = 0.0; 1380 sum4 = 0.0; 1381 sum5 = 0.0; 1382 sum6 = 0.0; 1383 sum7 = 0.0; 1384 sum8 = 0.0; 1385 for (j=0; j<n; j++) { 1386 sum1 += v[jrow]*x[8*idx[jrow]]; 1387 sum2 += v[jrow]*x[8*idx[jrow]+1]; 1388 sum3 += v[jrow]*x[8*idx[jrow]+2]; 1389 sum4 += v[jrow]*x[8*idx[jrow]+3]; 1390 sum5 += v[jrow]*x[8*idx[jrow]+4]; 1391 sum6 += v[jrow]*x[8*idx[jrow]+5]; 1392 sum7 += v[jrow]*x[8*idx[jrow]+6]; 1393 sum8 += v[jrow]*x[8*idx[jrow]+7]; 1394 jrow++; 1395 } 1396 y[8*i] += sum1; 1397 y[8*i+1] += sum2; 1398 y[8*i+2] += sum3; 1399 y[8*i+3] += sum4; 1400 y[8*i+4] += sum5; 1401 y[8*i+5] += sum6; 1402 y[8*i+6] += sum7; 1403 y[8*i+7] += sum8; 1404 } 1405 1406 ierr = PetscLogFlops(16.0*a->nz);CHKERRQ(ierr); 1407 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 1408 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 1409 PetscFunctionReturn(0); 1410 } 1411 1412 #undef __FUNCT__ 1413 #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_8" 1414 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_8(Mat A,Vec xx,Vec yy,Vec zz) 1415 { 1416 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1417 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1418 const PetscScalar *x,*v; 1419 PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8; 1420 PetscErrorCode ierr; 1421 const PetscInt m = b->AIJ->rmap->n,*idx; 1422 PetscInt n,i; 1423 1424 PetscFunctionBegin; 1425 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 1426 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 1427 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 1428 for (i=0; i<m; i++) { 1429 idx = a->j + a->i[i] ; 1430 v = a->a + a->i[i] ; 1431 n = a->i[i+1] - a->i[i]; 1432 alpha1 = x[8*i]; 1433 alpha2 = x[8*i+1]; 1434 alpha3 = x[8*i+2]; 1435 alpha4 = x[8*i+3]; 1436 alpha5 = x[8*i+4]; 1437 alpha6 = x[8*i+5]; 1438 alpha7 = x[8*i+6]; 1439 alpha8 = x[8*i+7]; 1440 while (n-->0) { 1441 y[8*(*idx)] += alpha1*(*v); 1442 y[8*(*idx)+1] += alpha2*(*v); 1443 y[8*(*idx)+2] += alpha3*(*v); 1444 y[8*(*idx)+3] += alpha4*(*v); 1445 y[8*(*idx)+4] += alpha5*(*v); 1446 y[8*(*idx)+5] += alpha6*(*v); 1447 y[8*(*idx)+6] += alpha7*(*v); 1448 y[8*(*idx)+7] += alpha8*(*v); 1449 idx++; v++; 1450 } 1451 } 1452 ierr = PetscLogFlops(16.0*a->nz);CHKERRQ(ierr); 1453 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 1454 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 1455 PetscFunctionReturn(0); 1456 } 1457 1458 /* ------------------------------------------------------------------------------*/ 1459 #undef __FUNCT__ 1460 #define __FUNCT__ "MatMult_SeqMAIJ_9" 1461 PetscErrorCode MatMult_SeqMAIJ_9(Mat A,Vec xx,Vec yy) 1462 { 1463 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1464 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1465 const PetscScalar *x,*v; 1466 PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9; 1467 PetscErrorCode ierr; 1468 const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 1469 PetscInt nonzerorow=0,n,i,jrow,j; 1470 1471 PetscFunctionBegin; 1472 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 1473 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 1474 idx = a->j; 1475 v = a->a; 1476 ii = a->i; 1477 1478 for (i=0; i<m; i++) { 1479 jrow = ii[i]; 1480 n = ii[i+1] - jrow; 1481 sum1 = 0.0; 1482 sum2 = 0.0; 1483 sum3 = 0.0; 1484 sum4 = 0.0; 1485 sum5 = 0.0; 1486 sum6 = 0.0; 1487 sum7 = 0.0; 1488 sum8 = 0.0; 1489 sum9 = 0.0; 1490 nonzerorow += (n>0); 1491 for (j=0; j<n; j++) { 1492 sum1 += v[jrow]*x[9*idx[jrow]]; 1493 sum2 += v[jrow]*x[9*idx[jrow]+1]; 1494 sum3 += v[jrow]*x[9*idx[jrow]+2]; 1495 sum4 += v[jrow]*x[9*idx[jrow]+3]; 1496 sum5 += v[jrow]*x[9*idx[jrow]+4]; 1497 sum6 += v[jrow]*x[9*idx[jrow]+5]; 1498 sum7 += v[jrow]*x[9*idx[jrow]+6]; 1499 sum8 += v[jrow]*x[9*idx[jrow]+7]; 1500 sum9 += v[jrow]*x[9*idx[jrow]+8]; 1501 jrow++; 1502 } 1503 y[9*i] = sum1; 1504 y[9*i+1] = sum2; 1505 y[9*i+2] = sum3; 1506 y[9*i+3] = sum4; 1507 y[9*i+4] = sum5; 1508 y[9*i+5] = sum6; 1509 y[9*i+6] = sum7; 1510 y[9*i+7] = sum8; 1511 y[9*i+8] = sum9; 1512 } 1513 1514 ierr = PetscLogFlops(18.0*a->nz - 9*nonzerorow);CHKERRQ(ierr); 1515 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 1516 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 1517 PetscFunctionReturn(0); 1518 } 1519 1520 /* ------------------------------------------------------------------------------*/ 1521 1522 #undef __FUNCT__ 1523 #define __FUNCT__ "MatMultTranspose_SeqMAIJ_9" 1524 PetscErrorCode MatMultTranspose_SeqMAIJ_9(Mat A,Vec xx,Vec yy) 1525 { 1526 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1527 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1528 const PetscScalar *x,*v; 1529 PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9; 1530 PetscErrorCode ierr; 1531 const PetscInt m = b->AIJ->rmap->n,*idx; 1532 PetscInt n,i; 1533 1534 PetscFunctionBegin; 1535 ierr = VecSet(yy,0.0);CHKERRQ(ierr); 1536 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 1537 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 1538 1539 for (i=0; i<m; i++) { 1540 idx = a->j + a->i[i] ; 1541 v = a->a + a->i[i] ; 1542 n = a->i[i+1] - a->i[i]; 1543 alpha1 = x[9*i]; 1544 alpha2 = x[9*i+1]; 1545 alpha3 = x[9*i+2]; 1546 alpha4 = x[9*i+3]; 1547 alpha5 = x[9*i+4]; 1548 alpha6 = x[9*i+5]; 1549 alpha7 = x[9*i+6]; 1550 alpha8 = x[9*i+7]; 1551 alpha9 = x[9*i+8]; 1552 while (n-->0) { 1553 y[9*(*idx)] += alpha1*(*v); 1554 y[9*(*idx)+1] += alpha2*(*v); 1555 y[9*(*idx)+2] += alpha3*(*v); 1556 y[9*(*idx)+3] += alpha4*(*v); 1557 y[9*(*idx)+4] += alpha5*(*v); 1558 y[9*(*idx)+5] += alpha6*(*v); 1559 y[9*(*idx)+6] += alpha7*(*v); 1560 y[9*(*idx)+7] += alpha8*(*v); 1561 y[9*(*idx)+8] += alpha9*(*v); 1562 idx++; v++; 1563 } 1564 } 1565 ierr = PetscLogFlops(18.0*a->nz);CHKERRQ(ierr); 1566 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 1567 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 1568 PetscFunctionReturn(0); 1569 } 1570 1571 #undef __FUNCT__ 1572 #define __FUNCT__ "MatMultAdd_SeqMAIJ_9" 1573 PetscErrorCode MatMultAdd_SeqMAIJ_9(Mat A,Vec xx,Vec yy,Vec zz) 1574 { 1575 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1576 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1577 const PetscScalar *x,*v; 1578 PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9; 1579 PetscErrorCode ierr; 1580 const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 1581 PetscInt n,i,jrow,j; 1582 1583 PetscFunctionBegin; 1584 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 1585 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 1586 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 1587 idx = a->j; 1588 v = a->a; 1589 ii = a->i; 1590 1591 for (i=0; i<m; i++) { 1592 jrow = ii[i]; 1593 n = ii[i+1] - jrow; 1594 sum1 = 0.0; 1595 sum2 = 0.0; 1596 sum3 = 0.0; 1597 sum4 = 0.0; 1598 sum5 = 0.0; 1599 sum6 = 0.0; 1600 sum7 = 0.0; 1601 sum8 = 0.0; 1602 sum9 = 0.0; 1603 for (j=0; j<n; j++) { 1604 sum1 += v[jrow]*x[9*idx[jrow]]; 1605 sum2 += v[jrow]*x[9*idx[jrow]+1]; 1606 sum3 += v[jrow]*x[9*idx[jrow]+2]; 1607 sum4 += v[jrow]*x[9*idx[jrow]+3]; 1608 sum5 += v[jrow]*x[9*idx[jrow]+4]; 1609 sum6 += v[jrow]*x[9*idx[jrow]+5]; 1610 sum7 += v[jrow]*x[9*idx[jrow]+6]; 1611 sum8 += v[jrow]*x[9*idx[jrow]+7]; 1612 sum9 += v[jrow]*x[9*idx[jrow]+8]; 1613 jrow++; 1614 } 1615 y[9*i] += sum1; 1616 y[9*i+1] += sum2; 1617 y[9*i+2] += sum3; 1618 y[9*i+3] += sum4; 1619 y[9*i+4] += sum5; 1620 y[9*i+5] += sum6; 1621 y[9*i+6] += sum7; 1622 y[9*i+7] += sum8; 1623 y[9*i+8] += sum9; 1624 } 1625 1626 ierr = PetscLogFlops(18*a->nz);CHKERRQ(ierr); 1627 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 1628 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 1629 PetscFunctionReturn(0); 1630 } 1631 1632 #undef __FUNCT__ 1633 #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_9" 1634 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_9(Mat A,Vec xx,Vec yy,Vec zz) 1635 { 1636 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1637 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1638 const PetscScalar *x,*v; 1639 PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9; 1640 PetscErrorCode ierr; 1641 const PetscInt m = b->AIJ->rmap->n,*idx; 1642 PetscInt n,i; 1643 1644 PetscFunctionBegin; 1645 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 1646 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 1647 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 1648 for (i=0; i<m; i++) { 1649 idx = a->j + a->i[i] ; 1650 v = a->a + a->i[i] ; 1651 n = a->i[i+1] - a->i[i]; 1652 alpha1 = x[9*i]; 1653 alpha2 = x[9*i+1]; 1654 alpha3 = x[9*i+2]; 1655 alpha4 = x[9*i+3]; 1656 alpha5 = x[9*i+4]; 1657 alpha6 = x[9*i+5]; 1658 alpha7 = x[9*i+6]; 1659 alpha8 = x[9*i+7]; 1660 alpha9 = x[9*i+8]; 1661 while (n-->0) { 1662 y[9*(*idx)] += alpha1*(*v); 1663 y[9*(*idx)+1] += alpha2*(*v); 1664 y[9*(*idx)+2] += alpha3*(*v); 1665 y[9*(*idx)+3] += alpha4*(*v); 1666 y[9*(*idx)+4] += alpha5*(*v); 1667 y[9*(*idx)+5] += alpha6*(*v); 1668 y[9*(*idx)+6] += alpha7*(*v); 1669 y[9*(*idx)+7] += alpha8*(*v); 1670 y[9*(*idx)+8] += alpha9*(*v); 1671 idx++; v++; 1672 } 1673 } 1674 ierr = PetscLogFlops(18.0*a->nz);CHKERRQ(ierr); 1675 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 1676 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 1677 PetscFunctionReturn(0); 1678 } 1679 #undef __FUNCT__ 1680 #define __FUNCT__ "MatMult_SeqMAIJ_10" 1681 PetscErrorCode MatMult_SeqMAIJ_10(Mat A,Vec xx,Vec yy) 1682 { 1683 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1684 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1685 const PetscScalar *x,*v; 1686 PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9, sum10; 1687 PetscErrorCode ierr; 1688 const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 1689 PetscInt nonzerorow=0,n,i,jrow,j; 1690 1691 PetscFunctionBegin; 1692 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 1693 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 1694 idx = a->j; 1695 v = a->a; 1696 ii = a->i; 1697 1698 for (i=0; i<m; i++) { 1699 jrow = ii[i]; 1700 n = ii[i+1] - jrow; 1701 sum1 = 0.0; 1702 sum2 = 0.0; 1703 sum3 = 0.0; 1704 sum4 = 0.0; 1705 sum5 = 0.0; 1706 sum6 = 0.0; 1707 sum7 = 0.0; 1708 sum8 = 0.0; 1709 sum9 = 0.0; 1710 sum10 = 0.0; 1711 nonzerorow += (n>0); 1712 for (j=0; j<n; j++) { 1713 sum1 += v[jrow]*x[10*idx[jrow]]; 1714 sum2 += v[jrow]*x[10*idx[jrow]+1]; 1715 sum3 += v[jrow]*x[10*idx[jrow]+2]; 1716 sum4 += v[jrow]*x[10*idx[jrow]+3]; 1717 sum5 += v[jrow]*x[10*idx[jrow]+4]; 1718 sum6 += v[jrow]*x[10*idx[jrow]+5]; 1719 sum7 += v[jrow]*x[10*idx[jrow]+6]; 1720 sum8 += v[jrow]*x[10*idx[jrow]+7]; 1721 sum9 += v[jrow]*x[10*idx[jrow]+8]; 1722 sum10 += v[jrow]*x[10*idx[jrow]+9]; 1723 jrow++; 1724 } 1725 y[10*i] = sum1; 1726 y[10*i+1] = sum2; 1727 y[10*i+2] = sum3; 1728 y[10*i+3] = sum4; 1729 y[10*i+4] = sum5; 1730 y[10*i+5] = sum6; 1731 y[10*i+6] = sum7; 1732 y[10*i+7] = sum8; 1733 y[10*i+8] = sum9; 1734 y[10*i+9] = sum10; 1735 } 1736 1737 ierr = PetscLogFlops(20.0*a->nz - 10.0*nonzerorow);CHKERRQ(ierr); 1738 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 1739 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 1740 PetscFunctionReturn(0); 1741 } 1742 1743 #undef __FUNCT__ 1744 #define __FUNCT__ "MatMultAdd_SeqMAIJ_10" 1745 PetscErrorCode MatMultAdd_SeqMAIJ_10(Mat A,Vec xx,Vec yy,Vec zz) 1746 { 1747 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1748 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1749 const PetscScalar *x,*v; 1750 PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9, sum10; 1751 PetscErrorCode ierr; 1752 const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 1753 PetscInt n,i,jrow,j; 1754 1755 PetscFunctionBegin; 1756 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 1757 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 1758 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 1759 idx = a->j; 1760 v = a->a; 1761 ii = a->i; 1762 1763 for (i=0; i<m; i++) { 1764 jrow = ii[i]; 1765 n = ii[i+1] - jrow; 1766 sum1 = 0.0; 1767 sum2 = 0.0; 1768 sum3 = 0.0; 1769 sum4 = 0.0; 1770 sum5 = 0.0; 1771 sum6 = 0.0; 1772 sum7 = 0.0; 1773 sum8 = 0.0; 1774 sum9 = 0.0; 1775 sum10 = 0.0; 1776 for (j=0; j<n; j++) { 1777 sum1 += v[jrow]*x[10*idx[jrow]]; 1778 sum2 += v[jrow]*x[10*idx[jrow]+1]; 1779 sum3 += v[jrow]*x[10*idx[jrow]+2]; 1780 sum4 += v[jrow]*x[10*idx[jrow]+3]; 1781 sum5 += v[jrow]*x[10*idx[jrow]+4]; 1782 sum6 += v[jrow]*x[10*idx[jrow]+5]; 1783 sum7 += v[jrow]*x[10*idx[jrow]+6]; 1784 sum8 += v[jrow]*x[10*idx[jrow]+7]; 1785 sum9 += v[jrow]*x[10*idx[jrow]+8]; 1786 sum10 += v[jrow]*x[10*idx[jrow]+9]; 1787 jrow++; 1788 } 1789 y[10*i] += sum1; 1790 y[10*i+1] += sum2; 1791 y[10*i+2] += sum3; 1792 y[10*i+3] += sum4; 1793 y[10*i+4] += sum5; 1794 y[10*i+5] += sum6; 1795 y[10*i+6] += sum7; 1796 y[10*i+7] += sum8; 1797 y[10*i+8] += sum9; 1798 y[10*i+9] += sum10; 1799 } 1800 1801 ierr = PetscLogFlops(20.0*a->nz);CHKERRQ(ierr); 1802 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 1803 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 1804 PetscFunctionReturn(0); 1805 } 1806 1807 #undef __FUNCT__ 1808 #define __FUNCT__ "MatMultTranspose_SeqMAIJ_10" 1809 PetscErrorCode MatMultTranspose_SeqMAIJ_10(Mat A,Vec xx,Vec yy) 1810 { 1811 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1812 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1813 const PetscScalar *x,*v; 1814 PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9,alpha10; 1815 PetscErrorCode ierr; 1816 const PetscInt m = b->AIJ->rmap->n,*idx; 1817 PetscInt n,i; 1818 1819 PetscFunctionBegin; 1820 ierr = VecSet(yy,0.0);CHKERRQ(ierr); 1821 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 1822 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 1823 1824 for (i=0; i<m; i++) { 1825 idx = a->j + a->i[i] ; 1826 v = a->a + a->i[i] ; 1827 n = a->i[i+1] - a->i[i]; 1828 alpha1 = x[10*i]; 1829 alpha2 = x[10*i+1]; 1830 alpha3 = x[10*i+2]; 1831 alpha4 = x[10*i+3]; 1832 alpha5 = x[10*i+4]; 1833 alpha6 = x[10*i+5]; 1834 alpha7 = x[10*i+6]; 1835 alpha8 = x[10*i+7]; 1836 alpha9 = x[10*i+8]; 1837 alpha10 = x[10*i+9]; 1838 while (n-->0) { 1839 y[10*(*idx)] += alpha1*(*v); 1840 y[10*(*idx)+1] += alpha2*(*v); 1841 y[10*(*idx)+2] += alpha3*(*v); 1842 y[10*(*idx)+3] += alpha4*(*v); 1843 y[10*(*idx)+4] += alpha5*(*v); 1844 y[10*(*idx)+5] += alpha6*(*v); 1845 y[10*(*idx)+6] += alpha7*(*v); 1846 y[10*(*idx)+7] += alpha8*(*v); 1847 y[10*(*idx)+8] += alpha9*(*v); 1848 y[10*(*idx)+9] += alpha10*(*v); 1849 idx++; v++; 1850 } 1851 } 1852 ierr = PetscLogFlops(20.0*a->nz);CHKERRQ(ierr); 1853 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 1854 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 1855 PetscFunctionReturn(0); 1856 } 1857 1858 #undef __FUNCT__ 1859 #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_10" 1860 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_10(Mat A,Vec xx,Vec yy,Vec zz) 1861 { 1862 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1863 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1864 const PetscScalar *x,*v; 1865 PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9,alpha10; 1866 PetscErrorCode ierr; 1867 const PetscInt m = b->AIJ->rmap->n,*idx; 1868 PetscInt n,i; 1869 1870 PetscFunctionBegin; 1871 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 1872 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 1873 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 1874 for (i=0; i<m; i++) { 1875 idx = a->j + a->i[i] ; 1876 v = a->a + a->i[i] ; 1877 n = a->i[i+1] - a->i[i]; 1878 alpha1 = x[10*i]; 1879 alpha2 = x[10*i+1]; 1880 alpha3 = x[10*i+2]; 1881 alpha4 = x[10*i+3]; 1882 alpha5 = x[10*i+4]; 1883 alpha6 = x[10*i+5]; 1884 alpha7 = x[10*i+6]; 1885 alpha8 = x[10*i+7]; 1886 alpha9 = x[10*i+8]; 1887 alpha10 = x[10*i+9]; 1888 while (n-->0) { 1889 y[10*(*idx)] += alpha1*(*v); 1890 y[10*(*idx)+1] += alpha2*(*v); 1891 y[10*(*idx)+2] += alpha3*(*v); 1892 y[10*(*idx)+3] += alpha4*(*v); 1893 y[10*(*idx)+4] += alpha5*(*v); 1894 y[10*(*idx)+5] += alpha6*(*v); 1895 y[10*(*idx)+6] += alpha7*(*v); 1896 y[10*(*idx)+7] += alpha8*(*v); 1897 y[10*(*idx)+8] += alpha9*(*v); 1898 y[10*(*idx)+9] += alpha10*(*v); 1899 idx++; v++; 1900 } 1901 } 1902 ierr = PetscLogFlops(20.0*a->nz);CHKERRQ(ierr); 1903 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 1904 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 1905 PetscFunctionReturn(0); 1906 } 1907 1908 1909 /*--------------------------------------------------------------------------------------------*/ 1910 #undef __FUNCT__ 1911 #define __FUNCT__ "MatMult_SeqMAIJ_11" 1912 PetscErrorCode MatMult_SeqMAIJ_11(Mat A,Vec xx,Vec yy) 1913 { 1914 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1915 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1916 const PetscScalar *x,*v; 1917 PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9, sum10, sum11; 1918 PetscErrorCode ierr; 1919 const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 1920 PetscInt nonzerorow=0,n,i,jrow,j; 1921 1922 PetscFunctionBegin; 1923 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 1924 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 1925 idx = a->j; 1926 v = a->a; 1927 ii = a->i; 1928 1929 for (i=0; i<m; i++) { 1930 jrow = ii[i]; 1931 n = ii[i+1] - jrow; 1932 sum1 = 0.0; 1933 sum2 = 0.0; 1934 sum3 = 0.0; 1935 sum4 = 0.0; 1936 sum5 = 0.0; 1937 sum6 = 0.0; 1938 sum7 = 0.0; 1939 sum8 = 0.0; 1940 sum9 = 0.0; 1941 sum10 = 0.0; 1942 sum11 = 0.0; 1943 nonzerorow += (n>0); 1944 for (j=0; j<n; j++) { 1945 sum1 += v[jrow]*x[11*idx[jrow]]; 1946 sum2 += v[jrow]*x[11*idx[jrow]+1]; 1947 sum3 += v[jrow]*x[11*idx[jrow]+2]; 1948 sum4 += v[jrow]*x[11*idx[jrow]+3]; 1949 sum5 += v[jrow]*x[11*idx[jrow]+4]; 1950 sum6 += v[jrow]*x[11*idx[jrow]+5]; 1951 sum7 += v[jrow]*x[11*idx[jrow]+6]; 1952 sum8 += v[jrow]*x[11*idx[jrow]+7]; 1953 sum9 += v[jrow]*x[11*idx[jrow]+8]; 1954 sum10 += v[jrow]*x[11*idx[jrow]+9]; 1955 sum11 += v[jrow]*x[11*idx[jrow]+10]; 1956 jrow++; 1957 } 1958 y[11*i] = sum1; 1959 y[11*i+1] = sum2; 1960 y[11*i+2] = sum3; 1961 y[11*i+3] = sum4; 1962 y[11*i+4] = sum5; 1963 y[11*i+5] = sum6; 1964 y[11*i+6] = sum7; 1965 y[11*i+7] = sum8; 1966 y[11*i+8] = sum9; 1967 y[11*i+9] = sum10; 1968 y[11*i+10] = sum11; 1969 } 1970 1971 ierr = PetscLogFlops(22*a->nz - 11*nonzerorow);CHKERRQ(ierr); 1972 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 1973 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 1974 PetscFunctionReturn(0); 1975 } 1976 1977 #undef __FUNCT__ 1978 #define __FUNCT__ "MatMultAdd_SeqMAIJ_11" 1979 PetscErrorCode MatMultAdd_SeqMAIJ_11(Mat A,Vec xx,Vec yy,Vec zz) 1980 { 1981 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1982 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1983 const PetscScalar *x,*v; 1984 PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9, sum10, sum11; 1985 PetscErrorCode ierr; 1986 const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 1987 PetscInt n,i,jrow,j; 1988 1989 PetscFunctionBegin; 1990 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 1991 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 1992 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 1993 idx = a->j; 1994 v = a->a; 1995 ii = a->i; 1996 1997 for (i=0; i<m; i++) { 1998 jrow = ii[i]; 1999 n = ii[i+1] - jrow; 2000 sum1 = 0.0; 2001 sum2 = 0.0; 2002 sum3 = 0.0; 2003 sum4 = 0.0; 2004 sum5 = 0.0; 2005 sum6 = 0.0; 2006 sum7 = 0.0; 2007 sum8 = 0.0; 2008 sum9 = 0.0; 2009 sum10 = 0.0; 2010 sum11 = 0.0; 2011 for (j=0; j<n; j++) { 2012 sum1 += v[jrow]*x[11*idx[jrow]]; 2013 sum2 += v[jrow]*x[11*idx[jrow]+1]; 2014 sum3 += v[jrow]*x[11*idx[jrow]+2]; 2015 sum4 += v[jrow]*x[11*idx[jrow]+3]; 2016 sum5 += v[jrow]*x[11*idx[jrow]+4]; 2017 sum6 += v[jrow]*x[11*idx[jrow]+5]; 2018 sum7 += v[jrow]*x[11*idx[jrow]+6]; 2019 sum8 += v[jrow]*x[11*idx[jrow]+7]; 2020 sum9 += v[jrow]*x[11*idx[jrow]+8]; 2021 sum10 += v[jrow]*x[11*idx[jrow]+9]; 2022 sum11 += v[jrow]*x[11*idx[jrow]+10]; 2023 jrow++; 2024 } 2025 y[11*i] += sum1; 2026 y[11*i+1] += sum2; 2027 y[11*i+2] += sum3; 2028 y[11*i+3] += sum4; 2029 y[11*i+4] += sum5; 2030 y[11*i+5] += sum6; 2031 y[11*i+6] += sum7; 2032 y[11*i+7] += sum8; 2033 y[11*i+8] += sum9; 2034 y[11*i+9] += sum10; 2035 y[11*i+10] += sum11; 2036 } 2037 2038 ierr = PetscLogFlops(22*a->nz);CHKERRQ(ierr); 2039 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 2040 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 2041 PetscFunctionReturn(0); 2042 } 2043 2044 #undef __FUNCT__ 2045 #define __FUNCT__ "MatMultTranspose_SeqMAIJ_11" 2046 PetscErrorCode MatMultTranspose_SeqMAIJ_11(Mat A,Vec xx,Vec yy) 2047 { 2048 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 2049 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 2050 const PetscScalar *x,*v; 2051 PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9,alpha10,alpha11; 2052 PetscErrorCode ierr; 2053 const PetscInt m = b->AIJ->rmap->n,*idx; 2054 PetscInt n,i; 2055 2056 PetscFunctionBegin; 2057 ierr = VecSet(yy,0.0);CHKERRQ(ierr); 2058 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 2059 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 2060 2061 for (i=0; i<m; i++) { 2062 idx = a->j + a->i[i] ; 2063 v = a->a + a->i[i] ; 2064 n = a->i[i+1] - a->i[i]; 2065 alpha1 = x[11*i]; 2066 alpha2 = x[11*i+1]; 2067 alpha3 = x[11*i+2]; 2068 alpha4 = x[11*i+3]; 2069 alpha5 = x[11*i+4]; 2070 alpha6 = x[11*i+5]; 2071 alpha7 = x[11*i+6]; 2072 alpha8 = x[11*i+7]; 2073 alpha9 = x[11*i+8]; 2074 alpha10 = x[11*i+9]; 2075 alpha11 = x[11*i+10]; 2076 while (n-->0) { 2077 y[11*(*idx)] += alpha1*(*v); 2078 y[11*(*idx)+1] += alpha2*(*v); 2079 y[11*(*idx)+2] += alpha3*(*v); 2080 y[11*(*idx)+3] += alpha4*(*v); 2081 y[11*(*idx)+4] += alpha5*(*v); 2082 y[11*(*idx)+5] += alpha6*(*v); 2083 y[11*(*idx)+6] += alpha7*(*v); 2084 y[11*(*idx)+7] += alpha8*(*v); 2085 y[11*(*idx)+8] += alpha9*(*v); 2086 y[11*(*idx)+9] += alpha10*(*v); 2087 y[11*(*idx)+10] += alpha11*(*v); 2088 idx++; v++; 2089 } 2090 } 2091 ierr = PetscLogFlops(22*a->nz);CHKERRQ(ierr); 2092 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 2093 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 2094 PetscFunctionReturn(0); 2095 } 2096 2097 #undef __FUNCT__ 2098 #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_11" 2099 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_11(Mat A,Vec xx,Vec yy,Vec zz) 2100 { 2101 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 2102 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 2103 const PetscScalar *x,*v; 2104 PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9,alpha10,alpha11; 2105 PetscErrorCode ierr; 2106 const PetscInt m = b->AIJ->rmap->n,*idx; 2107 PetscInt n,i; 2108 2109 PetscFunctionBegin; 2110 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 2111 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 2112 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 2113 for (i=0; i<m; i++) { 2114 idx = a->j + a->i[i] ; 2115 v = a->a + a->i[i] ; 2116 n = a->i[i+1] - a->i[i]; 2117 alpha1 = x[11*i]; 2118 alpha2 = x[11*i+1]; 2119 alpha3 = x[11*i+2]; 2120 alpha4 = x[11*i+3]; 2121 alpha5 = x[11*i+4]; 2122 alpha6 = x[11*i+5]; 2123 alpha7 = x[11*i+6]; 2124 alpha8 = x[11*i+7]; 2125 alpha9 = x[11*i+8]; 2126 alpha10 = x[11*i+9]; 2127 alpha11 = x[11*i+10]; 2128 while (n-->0) { 2129 y[11*(*idx)] += alpha1*(*v); 2130 y[11*(*idx)+1] += alpha2*(*v); 2131 y[11*(*idx)+2] += alpha3*(*v); 2132 y[11*(*idx)+3] += alpha4*(*v); 2133 y[11*(*idx)+4] += alpha5*(*v); 2134 y[11*(*idx)+5] += alpha6*(*v); 2135 y[11*(*idx)+6] += alpha7*(*v); 2136 y[11*(*idx)+7] += alpha8*(*v); 2137 y[11*(*idx)+8] += alpha9*(*v); 2138 y[11*(*idx)+9] += alpha10*(*v); 2139 y[11*(*idx)+10] += alpha11*(*v); 2140 idx++; v++; 2141 } 2142 } 2143 ierr = PetscLogFlops(22*a->nz);CHKERRQ(ierr); 2144 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 2145 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 2146 PetscFunctionReturn(0); 2147 } 2148 2149 2150 /*--------------------------------------------------------------------------------------------*/ 2151 #undef __FUNCT__ 2152 #define __FUNCT__ "MatMult_SeqMAIJ_16" 2153 PetscErrorCode MatMult_SeqMAIJ_16(Mat A,Vec xx,Vec yy) 2154 { 2155 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 2156 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 2157 const PetscScalar *x,*v; 2158 PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8; 2159 PetscScalar sum9, sum10, sum11, sum12, sum13, sum14, sum15, sum16; 2160 PetscErrorCode ierr; 2161 const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 2162 PetscInt nonzerorow=0,n,i,jrow,j; 2163 2164 PetscFunctionBegin; 2165 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 2166 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 2167 idx = a->j; 2168 v = a->a; 2169 ii = a->i; 2170 2171 for (i=0; i<m; i++) { 2172 jrow = ii[i]; 2173 n = ii[i+1] - jrow; 2174 sum1 = 0.0; 2175 sum2 = 0.0; 2176 sum3 = 0.0; 2177 sum4 = 0.0; 2178 sum5 = 0.0; 2179 sum6 = 0.0; 2180 sum7 = 0.0; 2181 sum8 = 0.0; 2182 sum9 = 0.0; 2183 sum10 = 0.0; 2184 sum11 = 0.0; 2185 sum12 = 0.0; 2186 sum13 = 0.0; 2187 sum14 = 0.0; 2188 sum15 = 0.0; 2189 sum16 = 0.0; 2190 nonzerorow += (n>0); 2191 for (j=0; j<n; j++) { 2192 sum1 += v[jrow]*x[16*idx[jrow]]; 2193 sum2 += v[jrow]*x[16*idx[jrow]+1]; 2194 sum3 += v[jrow]*x[16*idx[jrow]+2]; 2195 sum4 += v[jrow]*x[16*idx[jrow]+3]; 2196 sum5 += v[jrow]*x[16*idx[jrow]+4]; 2197 sum6 += v[jrow]*x[16*idx[jrow]+5]; 2198 sum7 += v[jrow]*x[16*idx[jrow]+6]; 2199 sum8 += v[jrow]*x[16*idx[jrow]+7]; 2200 sum9 += v[jrow]*x[16*idx[jrow]+8]; 2201 sum10 += v[jrow]*x[16*idx[jrow]+9]; 2202 sum11 += v[jrow]*x[16*idx[jrow]+10]; 2203 sum12 += v[jrow]*x[16*idx[jrow]+11]; 2204 sum13 += v[jrow]*x[16*idx[jrow]+12]; 2205 sum14 += v[jrow]*x[16*idx[jrow]+13]; 2206 sum15 += v[jrow]*x[16*idx[jrow]+14]; 2207 sum16 += v[jrow]*x[16*idx[jrow]+15]; 2208 jrow++; 2209 } 2210 y[16*i] = sum1; 2211 y[16*i+1] = sum2; 2212 y[16*i+2] = sum3; 2213 y[16*i+3] = sum4; 2214 y[16*i+4] = sum5; 2215 y[16*i+5] = sum6; 2216 y[16*i+6] = sum7; 2217 y[16*i+7] = sum8; 2218 y[16*i+8] = sum9; 2219 y[16*i+9] = sum10; 2220 y[16*i+10] = sum11; 2221 y[16*i+11] = sum12; 2222 y[16*i+12] = sum13; 2223 y[16*i+13] = sum14; 2224 y[16*i+14] = sum15; 2225 y[16*i+15] = sum16; 2226 } 2227 2228 ierr = PetscLogFlops(32.0*a->nz - 16.0*nonzerorow);CHKERRQ(ierr); 2229 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 2230 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 2231 PetscFunctionReturn(0); 2232 } 2233 2234 #undef __FUNCT__ 2235 #define __FUNCT__ "MatMultTranspose_SeqMAIJ_16" 2236 PetscErrorCode MatMultTranspose_SeqMAIJ_16(Mat A,Vec xx,Vec yy) 2237 { 2238 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 2239 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 2240 const PetscScalar *x,*v; 2241 PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8; 2242 PetscScalar alpha9,alpha10,alpha11,alpha12,alpha13,alpha14,alpha15,alpha16; 2243 PetscErrorCode ierr; 2244 const PetscInt m = b->AIJ->rmap->n,*idx; 2245 PetscInt n,i; 2246 2247 PetscFunctionBegin; 2248 ierr = VecSet(yy,0.0);CHKERRQ(ierr); 2249 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 2250 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 2251 2252 for (i=0; i<m; i++) { 2253 idx = a->j + a->i[i] ; 2254 v = a->a + a->i[i] ; 2255 n = a->i[i+1] - a->i[i]; 2256 alpha1 = x[16*i]; 2257 alpha2 = x[16*i+1]; 2258 alpha3 = x[16*i+2]; 2259 alpha4 = x[16*i+3]; 2260 alpha5 = x[16*i+4]; 2261 alpha6 = x[16*i+5]; 2262 alpha7 = x[16*i+6]; 2263 alpha8 = x[16*i+7]; 2264 alpha9 = x[16*i+8]; 2265 alpha10 = x[16*i+9]; 2266 alpha11 = x[16*i+10]; 2267 alpha12 = x[16*i+11]; 2268 alpha13 = x[16*i+12]; 2269 alpha14 = x[16*i+13]; 2270 alpha15 = x[16*i+14]; 2271 alpha16 = x[16*i+15]; 2272 while (n-->0) { 2273 y[16*(*idx)] += alpha1*(*v); 2274 y[16*(*idx)+1] += alpha2*(*v); 2275 y[16*(*idx)+2] += alpha3*(*v); 2276 y[16*(*idx)+3] += alpha4*(*v); 2277 y[16*(*idx)+4] += alpha5*(*v); 2278 y[16*(*idx)+5] += alpha6*(*v); 2279 y[16*(*idx)+6] += alpha7*(*v); 2280 y[16*(*idx)+7] += alpha8*(*v); 2281 y[16*(*idx)+8] += alpha9*(*v); 2282 y[16*(*idx)+9] += alpha10*(*v); 2283 y[16*(*idx)+10] += alpha11*(*v); 2284 y[16*(*idx)+11] += alpha12*(*v); 2285 y[16*(*idx)+12] += alpha13*(*v); 2286 y[16*(*idx)+13] += alpha14*(*v); 2287 y[16*(*idx)+14] += alpha15*(*v); 2288 y[16*(*idx)+15] += alpha16*(*v); 2289 idx++; v++; 2290 } 2291 } 2292 ierr = PetscLogFlops(32.0*a->nz);CHKERRQ(ierr); 2293 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 2294 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 2295 PetscFunctionReturn(0); 2296 } 2297 2298 #undef __FUNCT__ 2299 #define __FUNCT__ "MatMultAdd_SeqMAIJ_16" 2300 PetscErrorCode MatMultAdd_SeqMAIJ_16(Mat A,Vec xx,Vec yy,Vec zz) 2301 { 2302 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 2303 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 2304 const PetscScalar *x,*v; 2305 PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8; 2306 PetscScalar sum9, sum10, sum11, sum12, sum13, sum14, sum15, sum16; 2307 PetscErrorCode ierr; 2308 const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 2309 PetscInt n,i,jrow,j; 2310 2311 PetscFunctionBegin; 2312 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 2313 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 2314 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 2315 idx = a->j; 2316 v = a->a; 2317 ii = a->i; 2318 2319 for (i=0; i<m; i++) { 2320 jrow = ii[i]; 2321 n = ii[i+1] - jrow; 2322 sum1 = 0.0; 2323 sum2 = 0.0; 2324 sum3 = 0.0; 2325 sum4 = 0.0; 2326 sum5 = 0.0; 2327 sum6 = 0.0; 2328 sum7 = 0.0; 2329 sum8 = 0.0; 2330 sum9 = 0.0; 2331 sum10 = 0.0; 2332 sum11 = 0.0; 2333 sum12 = 0.0; 2334 sum13 = 0.0; 2335 sum14 = 0.0; 2336 sum15 = 0.0; 2337 sum16 = 0.0; 2338 for (j=0; j<n; j++) { 2339 sum1 += v[jrow]*x[16*idx[jrow]]; 2340 sum2 += v[jrow]*x[16*idx[jrow]+1]; 2341 sum3 += v[jrow]*x[16*idx[jrow]+2]; 2342 sum4 += v[jrow]*x[16*idx[jrow]+3]; 2343 sum5 += v[jrow]*x[16*idx[jrow]+4]; 2344 sum6 += v[jrow]*x[16*idx[jrow]+5]; 2345 sum7 += v[jrow]*x[16*idx[jrow]+6]; 2346 sum8 += v[jrow]*x[16*idx[jrow]+7]; 2347 sum9 += v[jrow]*x[16*idx[jrow]+8]; 2348 sum10 += v[jrow]*x[16*idx[jrow]+9]; 2349 sum11 += v[jrow]*x[16*idx[jrow]+10]; 2350 sum12 += v[jrow]*x[16*idx[jrow]+11]; 2351 sum13 += v[jrow]*x[16*idx[jrow]+12]; 2352 sum14 += v[jrow]*x[16*idx[jrow]+13]; 2353 sum15 += v[jrow]*x[16*idx[jrow]+14]; 2354 sum16 += v[jrow]*x[16*idx[jrow]+15]; 2355 jrow++; 2356 } 2357 y[16*i] += sum1; 2358 y[16*i+1] += sum2; 2359 y[16*i+2] += sum3; 2360 y[16*i+3] += sum4; 2361 y[16*i+4] += sum5; 2362 y[16*i+5] += sum6; 2363 y[16*i+6] += sum7; 2364 y[16*i+7] += sum8; 2365 y[16*i+8] += sum9; 2366 y[16*i+9] += sum10; 2367 y[16*i+10] += sum11; 2368 y[16*i+11] += sum12; 2369 y[16*i+12] += sum13; 2370 y[16*i+13] += sum14; 2371 y[16*i+14] += sum15; 2372 y[16*i+15] += sum16; 2373 } 2374 2375 ierr = PetscLogFlops(32.0*a->nz);CHKERRQ(ierr); 2376 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 2377 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 2378 PetscFunctionReturn(0); 2379 } 2380 2381 #undef __FUNCT__ 2382 #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_16" 2383 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_16(Mat A,Vec xx,Vec yy,Vec zz) 2384 { 2385 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 2386 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 2387 const PetscScalar *x,*v; 2388 PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8; 2389 PetscScalar alpha9,alpha10,alpha11,alpha12,alpha13,alpha14,alpha15,alpha16; 2390 PetscErrorCode ierr; 2391 const PetscInt m = b->AIJ->rmap->n,*idx; 2392 PetscInt n,i; 2393 2394 PetscFunctionBegin; 2395 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 2396 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 2397 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 2398 for (i=0; i<m; i++) { 2399 idx = a->j + a->i[i] ; 2400 v = a->a + a->i[i] ; 2401 n = a->i[i+1] - a->i[i]; 2402 alpha1 = x[16*i]; 2403 alpha2 = x[16*i+1]; 2404 alpha3 = x[16*i+2]; 2405 alpha4 = x[16*i+3]; 2406 alpha5 = x[16*i+4]; 2407 alpha6 = x[16*i+5]; 2408 alpha7 = x[16*i+6]; 2409 alpha8 = x[16*i+7]; 2410 alpha9 = x[16*i+8]; 2411 alpha10 = x[16*i+9]; 2412 alpha11 = x[16*i+10]; 2413 alpha12 = x[16*i+11]; 2414 alpha13 = x[16*i+12]; 2415 alpha14 = x[16*i+13]; 2416 alpha15 = x[16*i+14]; 2417 alpha16 = x[16*i+15]; 2418 while (n-->0) { 2419 y[16*(*idx)] += alpha1*(*v); 2420 y[16*(*idx)+1] += alpha2*(*v); 2421 y[16*(*idx)+2] += alpha3*(*v); 2422 y[16*(*idx)+3] += alpha4*(*v); 2423 y[16*(*idx)+4] += alpha5*(*v); 2424 y[16*(*idx)+5] += alpha6*(*v); 2425 y[16*(*idx)+6] += alpha7*(*v); 2426 y[16*(*idx)+7] += alpha8*(*v); 2427 y[16*(*idx)+8] += alpha9*(*v); 2428 y[16*(*idx)+9] += alpha10*(*v); 2429 y[16*(*idx)+10] += alpha11*(*v); 2430 y[16*(*idx)+11] += alpha12*(*v); 2431 y[16*(*idx)+12] += alpha13*(*v); 2432 y[16*(*idx)+13] += alpha14*(*v); 2433 y[16*(*idx)+14] += alpha15*(*v); 2434 y[16*(*idx)+15] += alpha16*(*v); 2435 idx++; v++; 2436 } 2437 } 2438 ierr = PetscLogFlops(32.0*a->nz);CHKERRQ(ierr); 2439 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 2440 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 2441 PetscFunctionReturn(0); 2442 } 2443 2444 /*--------------------------------------------------------------------------------------------*/ 2445 #undef __FUNCT__ 2446 #define __FUNCT__ "MatMult_SeqMAIJ_18" 2447 PetscErrorCode MatMult_SeqMAIJ_18(Mat A,Vec xx,Vec yy) 2448 { 2449 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 2450 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 2451 const PetscScalar *x,*v; 2452 PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8; 2453 PetscScalar sum9, sum10, sum11, sum12, sum13, sum14, sum15, sum16, sum17, sum18; 2454 PetscErrorCode ierr; 2455 const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 2456 PetscInt nonzerorow=0,n,i,jrow,j; 2457 2458 PetscFunctionBegin; 2459 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 2460 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 2461 idx = a->j; 2462 v = a->a; 2463 ii = a->i; 2464 2465 for (i=0; i<m; i++) { 2466 jrow = ii[i]; 2467 n = ii[i+1] - jrow; 2468 sum1 = 0.0; 2469 sum2 = 0.0; 2470 sum3 = 0.0; 2471 sum4 = 0.0; 2472 sum5 = 0.0; 2473 sum6 = 0.0; 2474 sum7 = 0.0; 2475 sum8 = 0.0; 2476 sum9 = 0.0; 2477 sum10 = 0.0; 2478 sum11 = 0.0; 2479 sum12 = 0.0; 2480 sum13 = 0.0; 2481 sum14 = 0.0; 2482 sum15 = 0.0; 2483 sum16 = 0.0; 2484 sum17 = 0.0; 2485 sum18 = 0.0; 2486 nonzerorow += (n>0); 2487 for (j=0; j<n; j++) { 2488 sum1 += v[jrow]*x[18*idx[jrow]]; 2489 sum2 += v[jrow]*x[18*idx[jrow]+1]; 2490 sum3 += v[jrow]*x[18*idx[jrow]+2]; 2491 sum4 += v[jrow]*x[18*idx[jrow]+3]; 2492 sum5 += v[jrow]*x[18*idx[jrow]+4]; 2493 sum6 += v[jrow]*x[18*idx[jrow]+5]; 2494 sum7 += v[jrow]*x[18*idx[jrow]+6]; 2495 sum8 += v[jrow]*x[18*idx[jrow]+7]; 2496 sum9 += v[jrow]*x[18*idx[jrow]+8]; 2497 sum10 += v[jrow]*x[18*idx[jrow]+9]; 2498 sum11 += v[jrow]*x[18*idx[jrow]+10]; 2499 sum12 += v[jrow]*x[18*idx[jrow]+11]; 2500 sum13 += v[jrow]*x[18*idx[jrow]+12]; 2501 sum14 += v[jrow]*x[18*idx[jrow]+13]; 2502 sum15 += v[jrow]*x[18*idx[jrow]+14]; 2503 sum16 += v[jrow]*x[18*idx[jrow]+15]; 2504 sum17 += v[jrow]*x[18*idx[jrow]+16]; 2505 sum18 += v[jrow]*x[18*idx[jrow]+17]; 2506 jrow++; 2507 } 2508 y[18*i] = sum1; 2509 y[18*i+1] = sum2; 2510 y[18*i+2] = sum3; 2511 y[18*i+3] = sum4; 2512 y[18*i+4] = sum5; 2513 y[18*i+5] = sum6; 2514 y[18*i+6] = sum7; 2515 y[18*i+7] = sum8; 2516 y[18*i+8] = sum9; 2517 y[18*i+9] = sum10; 2518 y[18*i+10] = sum11; 2519 y[18*i+11] = sum12; 2520 y[18*i+12] = sum13; 2521 y[18*i+13] = sum14; 2522 y[18*i+14] = sum15; 2523 y[18*i+15] = sum16; 2524 y[18*i+16] = sum17; 2525 y[18*i+17] = sum18; 2526 } 2527 2528 ierr = PetscLogFlops(36.0*a->nz - 18.0*nonzerorow);CHKERRQ(ierr); 2529 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 2530 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 2531 PetscFunctionReturn(0); 2532 } 2533 2534 #undef __FUNCT__ 2535 #define __FUNCT__ "MatMultTranspose_SeqMAIJ_18" 2536 PetscErrorCode MatMultTranspose_SeqMAIJ_18(Mat A,Vec xx,Vec yy) 2537 { 2538 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 2539 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 2540 const PetscScalar *x,*v; 2541 PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8; 2542 PetscScalar alpha9,alpha10,alpha11,alpha12,alpha13,alpha14,alpha15,alpha16,alpha17,alpha18; 2543 PetscErrorCode ierr; 2544 const PetscInt m = b->AIJ->rmap->n,*idx; 2545 PetscInt n,i; 2546 2547 PetscFunctionBegin; 2548 ierr = VecSet(yy,0.0);CHKERRQ(ierr); 2549 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 2550 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 2551 2552 for (i=0; i<m; i++) { 2553 idx = a->j + a->i[i] ; 2554 v = a->a + a->i[i] ; 2555 n = a->i[i+1] - a->i[i]; 2556 alpha1 = x[18*i]; 2557 alpha2 = x[18*i+1]; 2558 alpha3 = x[18*i+2]; 2559 alpha4 = x[18*i+3]; 2560 alpha5 = x[18*i+4]; 2561 alpha6 = x[18*i+5]; 2562 alpha7 = x[18*i+6]; 2563 alpha8 = x[18*i+7]; 2564 alpha9 = x[18*i+8]; 2565 alpha10 = x[18*i+9]; 2566 alpha11 = x[18*i+10]; 2567 alpha12 = x[18*i+11]; 2568 alpha13 = x[18*i+12]; 2569 alpha14 = x[18*i+13]; 2570 alpha15 = x[18*i+14]; 2571 alpha16 = x[18*i+15]; 2572 alpha17 = x[18*i+16]; 2573 alpha18 = x[18*i+17]; 2574 while (n-->0) { 2575 y[18*(*idx)] += alpha1*(*v); 2576 y[18*(*idx)+1] += alpha2*(*v); 2577 y[18*(*idx)+2] += alpha3*(*v); 2578 y[18*(*idx)+3] += alpha4*(*v); 2579 y[18*(*idx)+4] += alpha5*(*v); 2580 y[18*(*idx)+5] += alpha6*(*v); 2581 y[18*(*idx)+6] += alpha7*(*v); 2582 y[18*(*idx)+7] += alpha8*(*v); 2583 y[18*(*idx)+8] += alpha9*(*v); 2584 y[18*(*idx)+9] += alpha10*(*v); 2585 y[18*(*idx)+10] += alpha11*(*v); 2586 y[18*(*idx)+11] += alpha12*(*v); 2587 y[18*(*idx)+12] += alpha13*(*v); 2588 y[18*(*idx)+13] += alpha14*(*v); 2589 y[18*(*idx)+14] += alpha15*(*v); 2590 y[18*(*idx)+15] += alpha16*(*v); 2591 y[18*(*idx)+16] += alpha17*(*v); 2592 y[18*(*idx)+17] += alpha18*(*v); 2593 idx++; v++; 2594 } 2595 } 2596 ierr = PetscLogFlops(36.0*a->nz);CHKERRQ(ierr); 2597 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 2598 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 2599 PetscFunctionReturn(0); 2600 } 2601 2602 #undef __FUNCT__ 2603 #define __FUNCT__ "MatMultAdd_SeqMAIJ_18" 2604 PetscErrorCode MatMultAdd_SeqMAIJ_18(Mat A,Vec xx,Vec yy,Vec zz) 2605 { 2606 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 2607 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 2608 const PetscScalar *x,*v; 2609 PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8; 2610 PetscScalar sum9, sum10, sum11, sum12, sum13, sum14, sum15, sum16, sum17, sum18; 2611 PetscErrorCode ierr; 2612 const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 2613 PetscInt n,i,jrow,j; 2614 2615 PetscFunctionBegin; 2616 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 2617 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 2618 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 2619 idx = a->j; 2620 v = a->a; 2621 ii = a->i; 2622 2623 for (i=0; i<m; i++) { 2624 jrow = ii[i]; 2625 n = ii[i+1] - jrow; 2626 sum1 = 0.0; 2627 sum2 = 0.0; 2628 sum3 = 0.0; 2629 sum4 = 0.0; 2630 sum5 = 0.0; 2631 sum6 = 0.0; 2632 sum7 = 0.0; 2633 sum8 = 0.0; 2634 sum9 = 0.0; 2635 sum10 = 0.0; 2636 sum11 = 0.0; 2637 sum12 = 0.0; 2638 sum13 = 0.0; 2639 sum14 = 0.0; 2640 sum15 = 0.0; 2641 sum16 = 0.0; 2642 sum17 = 0.0; 2643 sum18 = 0.0; 2644 for (j=0; j<n; j++) { 2645 sum1 += v[jrow]*x[18*idx[jrow]]; 2646 sum2 += v[jrow]*x[18*idx[jrow]+1]; 2647 sum3 += v[jrow]*x[18*idx[jrow]+2]; 2648 sum4 += v[jrow]*x[18*idx[jrow]+3]; 2649 sum5 += v[jrow]*x[18*idx[jrow]+4]; 2650 sum6 += v[jrow]*x[18*idx[jrow]+5]; 2651 sum7 += v[jrow]*x[18*idx[jrow]+6]; 2652 sum8 += v[jrow]*x[18*idx[jrow]+7]; 2653 sum9 += v[jrow]*x[18*idx[jrow]+8]; 2654 sum10 += v[jrow]*x[18*idx[jrow]+9]; 2655 sum11 += v[jrow]*x[18*idx[jrow]+10]; 2656 sum12 += v[jrow]*x[18*idx[jrow]+11]; 2657 sum13 += v[jrow]*x[18*idx[jrow]+12]; 2658 sum14 += v[jrow]*x[18*idx[jrow]+13]; 2659 sum15 += v[jrow]*x[18*idx[jrow]+14]; 2660 sum16 += v[jrow]*x[18*idx[jrow]+15]; 2661 sum17 += v[jrow]*x[18*idx[jrow]+16]; 2662 sum18 += v[jrow]*x[18*idx[jrow]+17]; 2663 jrow++; 2664 } 2665 y[18*i] += sum1; 2666 y[18*i+1] += sum2; 2667 y[18*i+2] += sum3; 2668 y[18*i+3] += sum4; 2669 y[18*i+4] += sum5; 2670 y[18*i+5] += sum6; 2671 y[18*i+6] += sum7; 2672 y[18*i+7] += sum8; 2673 y[18*i+8] += sum9; 2674 y[18*i+9] += sum10; 2675 y[18*i+10] += sum11; 2676 y[18*i+11] += sum12; 2677 y[18*i+12] += sum13; 2678 y[18*i+13] += sum14; 2679 y[18*i+14] += sum15; 2680 y[18*i+15] += sum16; 2681 y[18*i+16] += sum17; 2682 y[18*i+17] += sum18; 2683 } 2684 2685 ierr = PetscLogFlops(36.0*a->nz);CHKERRQ(ierr); 2686 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 2687 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 2688 PetscFunctionReturn(0); 2689 } 2690 2691 #undef __FUNCT__ 2692 #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_18" 2693 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_18(Mat A,Vec xx,Vec yy,Vec zz) 2694 { 2695 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 2696 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 2697 const PetscScalar *x,*v; 2698 PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8; 2699 PetscScalar alpha9,alpha10,alpha11,alpha12,alpha13,alpha14,alpha15,alpha16,alpha17,alpha18; 2700 PetscErrorCode ierr; 2701 const PetscInt m = b->AIJ->rmap->n,*idx; 2702 PetscInt n,i; 2703 2704 PetscFunctionBegin; 2705 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 2706 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 2707 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 2708 for (i=0; i<m; i++) { 2709 idx = a->j + a->i[i] ; 2710 v = a->a + a->i[i] ; 2711 n = a->i[i+1] - a->i[i]; 2712 alpha1 = x[18*i]; 2713 alpha2 = x[18*i+1]; 2714 alpha3 = x[18*i+2]; 2715 alpha4 = x[18*i+3]; 2716 alpha5 = x[18*i+4]; 2717 alpha6 = x[18*i+5]; 2718 alpha7 = x[18*i+6]; 2719 alpha8 = x[18*i+7]; 2720 alpha9 = x[18*i+8]; 2721 alpha10 = x[18*i+9]; 2722 alpha11 = x[18*i+10]; 2723 alpha12 = x[18*i+11]; 2724 alpha13 = x[18*i+12]; 2725 alpha14 = x[18*i+13]; 2726 alpha15 = x[18*i+14]; 2727 alpha16 = x[18*i+15]; 2728 alpha17 = x[18*i+16]; 2729 alpha18 = x[18*i+17]; 2730 while (n-->0) { 2731 y[18*(*idx)] += alpha1*(*v); 2732 y[18*(*idx)+1] += alpha2*(*v); 2733 y[18*(*idx)+2] += alpha3*(*v); 2734 y[18*(*idx)+3] += alpha4*(*v); 2735 y[18*(*idx)+4] += alpha5*(*v); 2736 y[18*(*idx)+5] += alpha6*(*v); 2737 y[18*(*idx)+6] += alpha7*(*v); 2738 y[18*(*idx)+7] += alpha8*(*v); 2739 y[18*(*idx)+8] += alpha9*(*v); 2740 y[18*(*idx)+9] += alpha10*(*v); 2741 y[18*(*idx)+10] += alpha11*(*v); 2742 y[18*(*idx)+11] += alpha12*(*v); 2743 y[18*(*idx)+12] += alpha13*(*v); 2744 y[18*(*idx)+13] += alpha14*(*v); 2745 y[18*(*idx)+14] += alpha15*(*v); 2746 y[18*(*idx)+15] += alpha16*(*v); 2747 y[18*(*idx)+16] += alpha17*(*v); 2748 y[18*(*idx)+17] += alpha18*(*v); 2749 idx++; v++; 2750 } 2751 } 2752 ierr = PetscLogFlops(36.0*a->nz);CHKERRQ(ierr); 2753 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 2754 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 2755 PetscFunctionReturn(0); 2756 } 2757 2758 #undef __FUNCT__ 2759 #define __FUNCT__ "MatMult_SeqMAIJ_N" 2760 PetscErrorCode MatMult_SeqMAIJ_N(Mat A,Vec xx,Vec yy) 2761 { 2762 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 2763 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 2764 const PetscScalar *x,*v; 2765 PetscScalar *y,*sums; 2766 PetscErrorCode ierr; 2767 const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 2768 PetscInt n,i,jrow,j,dof = b->dof,k; 2769 2770 PetscFunctionBegin; 2771 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 2772 ierr = VecSet(yy,0.0);CHKERRQ(ierr); 2773 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 2774 idx = a->j; 2775 v = a->a; 2776 ii = a->i; 2777 2778 for (i=0; i<m; i++) { 2779 jrow = ii[i]; 2780 n = ii[i+1] - jrow; 2781 sums = y + dof*i; 2782 for (j=0; j<n; j++) { 2783 for (k=0; k<dof; k++) { 2784 sums[k] += v[jrow]*x[dof*idx[jrow]+k]; 2785 } 2786 jrow++; 2787 } 2788 } 2789 2790 ierr = PetscLogFlops(2.0*dof*a->nz);CHKERRQ(ierr); 2791 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 2792 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 2793 PetscFunctionReturn(0); 2794 } 2795 2796 #undef __FUNCT__ 2797 #define __FUNCT__ "MatMultAdd_SeqMAIJ_N" 2798 PetscErrorCode MatMultAdd_SeqMAIJ_N(Mat A,Vec xx,Vec yy,Vec zz) 2799 { 2800 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 2801 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 2802 const PetscScalar *x,*v; 2803 PetscScalar *y,*sums; 2804 PetscErrorCode ierr; 2805 const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 2806 PetscInt n,i,jrow,j,dof = b->dof,k; 2807 2808 PetscFunctionBegin; 2809 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 2810 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 2811 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 2812 idx = a->j; 2813 v = a->a; 2814 ii = a->i; 2815 2816 for (i=0; i<m; i++) { 2817 jrow = ii[i]; 2818 n = ii[i+1] - jrow; 2819 sums = y + dof*i; 2820 for (j=0; j<n; j++) { 2821 for (k=0; k<dof; k++) { 2822 sums[k] += v[jrow]*x[dof*idx[jrow]+k]; 2823 } 2824 jrow++; 2825 } 2826 } 2827 2828 ierr = PetscLogFlops(2.0*dof*a->nz);CHKERRQ(ierr); 2829 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 2830 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 2831 PetscFunctionReturn(0); 2832 } 2833 2834 #undef __FUNCT__ 2835 #define __FUNCT__ "MatMultTranspose_SeqMAIJ_N" 2836 PetscErrorCode MatMultTranspose_SeqMAIJ_N(Mat A,Vec xx,Vec yy) 2837 { 2838 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 2839 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 2840 const PetscScalar *x,*v,*alpha; 2841 PetscScalar *y; 2842 PetscErrorCode ierr; 2843 const PetscInt m = b->AIJ->rmap->n,*idx,dof = b->dof; 2844 PetscInt n,i,k; 2845 2846 PetscFunctionBegin; 2847 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 2848 ierr = VecSet(yy,0.0);CHKERRQ(ierr); 2849 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 2850 for (i=0; i<m; i++) { 2851 idx = a->j + a->i[i] ; 2852 v = a->a + a->i[i] ; 2853 n = a->i[i+1] - a->i[i]; 2854 alpha = x + dof*i; 2855 while (n-->0) { 2856 for (k=0; k<dof; k++) { 2857 y[dof*(*idx)+k] += alpha[k]*(*v); 2858 } 2859 idx++; v++; 2860 } 2861 } 2862 ierr = PetscLogFlops(2.0*dof*a->nz);CHKERRQ(ierr); 2863 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 2864 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 2865 PetscFunctionReturn(0); 2866 } 2867 2868 #undef __FUNCT__ 2869 #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_N" 2870 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_N(Mat A,Vec xx,Vec yy,Vec zz) 2871 { 2872 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 2873 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 2874 const PetscScalar *x,*v,*alpha; 2875 PetscScalar *y; 2876 PetscErrorCode ierr; 2877 const PetscInt m = b->AIJ->rmap->n,*idx,dof = b->dof; 2878 PetscInt n,i,k; 2879 2880 PetscFunctionBegin; 2881 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 2882 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 2883 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 2884 for (i=0; i<m; i++) { 2885 idx = a->j + a->i[i] ; 2886 v = a->a + a->i[i] ; 2887 n = a->i[i+1] - a->i[i]; 2888 alpha = x + dof*i; 2889 while (n-->0) { 2890 for (k=0; k<dof; k++) { 2891 y[dof*(*idx)+k] += alpha[k]*(*v); 2892 } 2893 idx++; v++; 2894 } 2895 } 2896 ierr = PetscLogFlops(2.0*dof*a->nz);CHKERRQ(ierr); 2897 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 2898 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 2899 PetscFunctionReturn(0); 2900 } 2901 2902 /*===================================================================================*/ 2903 #undef __FUNCT__ 2904 #define __FUNCT__ "MatMult_MPIMAIJ_dof" 2905 PetscErrorCode MatMult_MPIMAIJ_dof(Mat A,Vec xx,Vec yy) 2906 { 2907 Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data; 2908 PetscErrorCode ierr; 2909 2910 PetscFunctionBegin; 2911 /* start the scatter */ 2912 ierr = VecScatterBegin(b->ctx,xx,b->w,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2913 ierr = (*b->AIJ->ops->mult)(b->AIJ,xx,yy);CHKERRQ(ierr); 2914 ierr = VecScatterEnd(b->ctx,xx,b->w,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2915 ierr = (*b->OAIJ->ops->multadd)(b->OAIJ,b->w,yy,yy);CHKERRQ(ierr); 2916 PetscFunctionReturn(0); 2917 } 2918 2919 #undef __FUNCT__ 2920 #define __FUNCT__ "MatMultTranspose_MPIMAIJ_dof" 2921 PetscErrorCode MatMultTranspose_MPIMAIJ_dof(Mat A,Vec xx,Vec yy) 2922 { 2923 Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data; 2924 PetscErrorCode ierr; 2925 2926 PetscFunctionBegin; 2927 ierr = (*b->OAIJ->ops->multtranspose)(b->OAIJ,xx,b->w);CHKERRQ(ierr); 2928 ierr = (*b->AIJ->ops->multtranspose)(b->AIJ,xx,yy);CHKERRQ(ierr); 2929 ierr = VecScatterBegin(b->ctx,b->w,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2930 ierr = VecScatterEnd(b->ctx,b->w,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2931 PetscFunctionReturn(0); 2932 } 2933 2934 #undef __FUNCT__ 2935 #define __FUNCT__ "MatMultAdd_MPIMAIJ_dof" 2936 PetscErrorCode MatMultAdd_MPIMAIJ_dof(Mat A,Vec xx,Vec yy,Vec zz) 2937 { 2938 Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data; 2939 PetscErrorCode ierr; 2940 2941 PetscFunctionBegin; 2942 /* start the scatter */ 2943 ierr = VecScatterBegin(b->ctx,xx,b->w,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2944 ierr = (*b->AIJ->ops->multadd)(b->AIJ,xx,yy,zz);CHKERRQ(ierr); 2945 ierr = VecScatterEnd(b->ctx,xx,b->w,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2946 ierr = (*b->OAIJ->ops->multadd)(b->OAIJ,b->w,zz,zz);CHKERRQ(ierr); 2947 PetscFunctionReturn(0); 2948 } 2949 2950 #undef __FUNCT__ 2951 #define __FUNCT__ "MatMultTransposeAdd_MPIMAIJ_dof" 2952 PetscErrorCode MatMultTransposeAdd_MPIMAIJ_dof(Mat A,Vec xx,Vec yy,Vec zz) 2953 { 2954 Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data; 2955 PetscErrorCode ierr; 2956 2957 PetscFunctionBegin; 2958 ierr = (*b->OAIJ->ops->multtranspose)(b->OAIJ,xx,b->w);CHKERRQ(ierr); 2959 ierr = VecScatterBegin(b->ctx,b->w,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2960 ierr = (*b->AIJ->ops->multtransposeadd)(b->AIJ,xx,yy,zz);CHKERRQ(ierr); 2961 ierr = VecScatterEnd(b->ctx,b->w,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2962 PetscFunctionReturn(0); 2963 } 2964 2965 /* ----------------------------------------------------------------*/ 2966 #undef __FUNCT__ 2967 #define __FUNCT__ "MatPtAPSymbolic_SeqAIJ_SeqMAIJ" 2968 PetscErrorCode MatPtAPSymbolic_SeqAIJ_SeqMAIJ(Mat A,Mat PP,PetscReal fill,Mat *C) 2969 { 2970 PetscErrorCode ierr; 2971 PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 2972 Mat_SeqMAIJ *pp=(Mat_SeqMAIJ*)PP->data; 2973 Mat P=pp->AIJ; 2974 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*p=(Mat_SeqAIJ*)P->data,*c; 2975 PetscInt *pti,*ptj,*ptJ; 2976 PetscInt *ci,*cj,*ptadenserow,*ptasparserow,*denserow,*sparserow,*ptaj; 2977 const PetscInt an=A->cmap->N,am=A->rmap->N,pn=P->cmap->N,pm=P->rmap->N,ppdof=pp->dof; 2978 PetscInt i,j,k,dof,pshift,ptnzi,arow,anzj,ptanzi,prow,pnzj,cnzi,cn; 2979 MatScalar *ca; 2980 const PetscInt *pi = p->i,*pj = p->j,*pjj,*ai=a->i,*aj=a->j,*ajj; 2981 2982 PetscFunctionBegin; 2983 /* Get ij structure of P^T */ 2984 ierr = MatGetSymbolicTranspose_SeqAIJ(P,&pti,&ptj);CHKERRQ(ierr); 2985 2986 cn = pn*ppdof; 2987 /* Allocate ci array, arrays for fill computation and */ 2988 /* free space for accumulating nonzero column info */ 2989 ierr = PetscMalloc((cn+1)*sizeof(PetscInt),&ci);CHKERRQ(ierr); 2990 ci[0] = 0; 2991 2992 /* Work arrays for rows of P^T*A */ 2993 ierr = PetscMalloc4(an,PetscInt,&ptadenserow,an,PetscInt,&ptasparserow,cn,PetscInt,&denserow,cn,PetscInt,&sparserow);CHKERRQ(ierr); 2994 ierr = PetscMemzero(ptadenserow,an*sizeof(PetscInt));CHKERRQ(ierr); 2995 ierr = PetscMemzero(denserow,cn*sizeof(PetscInt));CHKERRQ(ierr); 2996 2997 /* Set initial free space to be nnz(A) scaled by aspect ratio of P. */ 2998 /* This should be reasonable if sparsity of PtAP is similar to that of A. */ 2999 /* Note, aspect ratio of P is the same as the aspect ratio of SeqAIJ inside P */ 3000 ierr = PetscFreeSpaceGet((ai[am]/pm)*pn,&free_space); 3001 current_space = free_space; 3002 3003 /* Determine symbolic info for each row of C: */ 3004 for (i=0;i<pn;i++) { 3005 ptnzi = pti[i+1] - pti[i]; 3006 ptJ = ptj + pti[i]; 3007 for (dof=0;dof<ppdof;dof++) { 3008 ptanzi = 0; 3009 /* Determine symbolic row of PtA: */ 3010 for (j=0;j<ptnzi;j++) { 3011 /* Expand ptJ[j] by block size and shift by dof to get the right row of A */ 3012 arow = ptJ[j]*ppdof + dof; 3013 /* Nonzeros of P^T*A will be in same locations as any element of A in that row */ 3014 anzj = ai[arow+1] - ai[arow]; 3015 ajj = aj + ai[arow]; 3016 for (k=0;k<anzj;k++) { 3017 if (!ptadenserow[ajj[k]]) { 3018 ptadenserow[ajj[k]] = -1; 3019 ptasparserow[ptanzi++] = ajj[k]; 3020 } 3021 } 3022 } 3023 /* Using symbolic info for row of PtA, determine symbolic info for row of C: */ 3024 ptaj = ptasparserow; 3025 cnzi = 0; 3026 for (j=0;j<ptanzi;j++) { 3027 /* Get offset within block of P */ 3028 pshift = *ptaj%ppdof; 3029 /* Get block row of P */ 3030 prow = (*ptaj++)/ppdof; /* integer division */ 3031 /* P has same number of nonzeros per row as the compressed form */ 3032 pnzj = pi[prow+1] - pi[prow]; 3033 pjj = pj + pi[prow]; 3034 for (k=0;k<pnzj;k++) { 3035 /* Locations in C are shifted by the offset within the block */ 3036 /* Note: we cannot use PetscLLAdd here because of the additional offset for the write location */ 3037 if (!denserow[pjj[k]*ppdof+pshift]) { 3038 denserow[pjj[k]*ppdof+pshift] = -1; 3039 sparserow[cnzi++] = pjj[k]*ppdof+pshift; 3040 } 3041 } 3042 } 3043 3044 /* sort sparserow */ 3045 ierr = PetscSortInt(cnzi,sparserow);CHKERRQ(ierr); 3046 3047 /* If free space is not available, make more free space */ 3048 /* Double the amount of total space in the list */ 3049 if (current_space->local_remaining<cnzi) { 3050 ierr = PetscFreeSpaceGet(cnzi+current_space->total_array_size,¤t_space);CHKERRQ(ierr); 3051 } 3052 3053 /* Copy data into free space, and zero out denserows */ 3054 ierr = PetscMemcpy(current_space->array,sparserow,cnzi*sizeof(PetscInt));CHKERRQ(ierr); 3055 current_space->array += cnzi; 3056 current_space->local_used += cnzi; 3057 current_space->local_remaining -= cnzi; 3058 3059 for (j=0;j<ptanzi;j++) { 3060 ptadenserow[ptasparserow[j]] = 0; 3061 } 3062 for (j=0;j<cnzi;j++) { 3063 denserow[sparserow[j]] = 0; 3064 } 3065 /* Aside: Perhaps we should save the pta info for the numerical factorization. */ 3066 /* For now, we will recompute what is needed. */ 3067 ci[i*ppdof+1+dof] = ci[i*ppdof+dof] + cnzi; 3068 } 3069 } 3070 /* nnz is now stored in ci[ptm], column indices are in the list of free space */ 3071 /* Allocate space for cj, initialize cj, and */ 3072 /* destroy list of free space and other temporary array(s) */ 3073 ierr = PetscMalloc((ci[cn]+1)*sizeof(PetscInt),&cj);CHKERRQ(ierr); 3074 ierr = PetscFreeSpaceContiguous(&free_space,cj);CHKERRQ(ierr); 3075 ierr = PetscFree4(ptadenserow,ptasparserow,denserow,sparserow);CHKERRQ(ierr); 3076 3077 /* Allocate space for ca */ 3078 ierr = PetscMalloc((ci[cn]+1)*sizeof(MatScalar),&ca);CHKERRQ(ierr); 3079 ierr = PetscMemzero(ca,(ci[cn]+1)*sizeof(MatScalar));CHKERRQ(ierr); 3080 3081 /* put together the new matrix */ 3082 ierr = MatCreateSeqAIJWithArrays(((PetscObject)A)->comm,cn,cn,ci,cj,ca,C);CHKERRQ(ierr); 3083 3084 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 3085 /* Since these are PETSc arrays, change flags to free them as necessary. */ 3086 c = (Mat_SeqAIJ *)((*C)->data); 3087 c->free_a = PETSC_TRUE; 3088 c->free_ij = PETSC_TRUE; 3089 c->nonew = 0; 3090 (*C)->ops->ptap = MatPtAP_SeqAIJ_SeqMAIJ; 3091 3092 /* Clean up. */ 3093 ierr = MatRestoreSymbolicTranspose_SeqAIJ(P,&pti,&ptj);CHKERRQ(ierr); 3094 PetscFunctionReturn(0); 3095 } 3096 3097 #undef __FUNCT__ 3098 #define __FUNCT__ "MatPtAPNumeric_SeqAIJ_SeqMAIJ" 3099 PetscErrorCode MatPtAPNumeric_SeqAIJ_SeqMAIJ(Mat A,Mat PP,Mat C) 3100 { 3101 /* This routine requires testing -- first draft only */ 3102 PetscErrorCode ierr; 3103 Mat_SeqMAIJ *pp=(Mat_SeqMAIJ*)PP->data; 3104 Mat P=pp->AIJ; 3105 Mat_SeqAIJ *a = (Mat_SeqAIJ *) A->data; 3106 Mat_SeqAIJ *p = (Mat_SeqAIJ *) P->data; 3107 Mat_SeqAIJ *c = (Mat_SeqAIJ *) C->data; 3108 const PetscInt *ai=a->i,*aj=a->j,*pi=p->i,*pj=p->j,*pJ=p->j,*pjj; 3109 const PetscInt *ci=c->i,*cj=c->j,*cjj; 3110 const PetscInt am=A->rmap->N,cn=C->cmap->N,cm=C->rmap->N,ppdof=pp->dof; 3111 PetscInt i,j,k,pshift,poffset,anzi,pnzi,apnzj,nextap,pnzj,prow,crow,*apj,*apjdense; 3112 const MatScalar *aa=a->a,*pa=p->a,*pA=p->a,*paj; 3113 MatScalar *ca=c->a,*caj,*apa; 3114 3115 PetscFunctionBegin; 3116 /* Allocate temporary array for storage of one row of A*P */ 3117 ierr = PetscMalloc3(cn,MatScalar,&apa,cn,PetscInt,&apj,cn,PetscInt,&apjdense);CHKERRQ(ierr); 3118 ierr = PetscMemzero(apa,cn*sizeof(MatScalar));CHKERRQ(ierr); 3119 ierr = PetscMemzero(apj,cn*sizeof(PetscInt));CHKERRQ(ierr); 3120 ierr = PetscMemzero(apjdense,cn*sizeof(PetscInt));CHKERRQ(ierr); 3121 3122 /* Clear old values in C */ 3123 ierr = PetscMemzero(ca,ci[cm]*sizeof(MatScalar));CHKERRQ(ierr); 3124 3125 for (i=0;i<am;i++) { 3126 /* Form sparse row of A*P */ 3127 anzi = ai[i+1] - ai[i]; 3128 apnzj = 0; 3129 for (j=0;j<anzi;j++) { 3130 /* Get offset within block of P */ 3131 pshift = *aj%ppdof; 3132 /* Get block row of P */ 3133 prow = *aj++/ppdof; /* integer division */ 3134 pnzj = pi[prow+1] - pi[prow]; 3135 pjj = pj + pi[prow]; 3136 paj = pa + pi[prow]; 3137 for (k=0;k<pnzj;k++) { 3138 poffset = pjj[k]*ppdof+pshift; 3139 if (!apjdense[poffset]) { 3140 apjdense[poffset] = -1; 3141 apj[apnzj++] = poffset; 3142 } 3143 apa[poffset] += (*aa)*paj[k]; 3144 } 3145 ierr = PetscLogFlops(2.0*pnzj);CHKERRQ(ierr); 3146 aa++; 3147 } 3148 3149 /* Sort the j index array for quick sparse axpy. */ 3150 /* Note: a array does not need sorting as it is in dense storage locations. */ 3151 ierr = PetscSortInt(apnzj,apj);CHKERRQ(ierr); 3152 3153 /* Compute P^T*A*P using outer product (P^T)[:,j]*(A*P)[j,:]. */ 3154 prow = i/ppdof; /* integer division */ 3155 pshift = i%ppdof; 3156 poffset = pi[prow]; 3157 pnzi = pi[prow+1] - poffset; 3158 /* Reset pJ and pA so we can traverse the same row of P 'dof' times. */ 3159 pJ = pj+poffset; 3160 pA = pa+poffset; 3161 for (j=0;j<pnzi;j++) { 3162 crow = (*pJ)*ppdof+pshift; 3163 cjj = cj + ci[crow]; 3164 caj = ca + ci[crow]; 3165 pJ++; 3166 /* Perform sparse axpy operation. Note cjj includes apj. */ 3167 for (k=0,nextap=0;nextap<apnzj;k++) { 3168 if (cjj[k]==apj[nextap]) { 3169 caj[k] += (*pA)*apa[apj[nextap++]]; 3170 } 3171 } 3172 ierr = PetscLogFlops(2.0*apnzj);CHKERRQ(ierr); 3173 pA++; 3174 } 3175 3176 /* Zero the current row info for A*P */ 3177 for (j=0;j<apnzj;j++) { 3178 apa[apj[j]] = 0.; 3179 apjdense[apj[j]] = 0; 3180 } 3181 } 3182 3183 /* Assemble the final matrix and clean up */ 3184 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3185 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3186 ierr = PetscFree3(apa,apj,apjdense);CHKERRQ(ierr); 3187 PetscFunctionReturn(0); 3188 } 3189 3190 #undef __FUNCT__ 3191 #define __FUNCT__ "MatPtAP_SeqAIJ_SeqMAIJ" 3192 PETSC_EXTERN_C PetscErrorCode MatPtAP_SeqAIJ_SeqMAIJ(Mat A,Mat P,MatReuse scall,PetscReal fill,Mat *C) 3193 { 3194 PetscErrorCode ierr; 3195 3196 PetscFunctionBegin; 3197 if (scall == MAT_INITIAL_MATRIX){ 3198 ierr = PetscLogEventBegin(MAT_PtAPSymbolic,A,P,0,0);CHKERRQ(ierr); 3199 ierr = MatPtAPSymbolic_SeqAIJ_SeqMAIJ(A,P,fill,C);CHKERRQ(ierr); 3200 ierr = PetscLogEventEnd(MAT_PtAPSymbolic,A,P,0,0);CHKERRQ(ierr); 3201 } 3202 ierr = PetscLogEventBegin(MAT_PtAPNumeric,A,P,0,0);CHKERRQ(ierr); 3203 ierr = MatPtAPNumeric_SeqAIJ_SeqMAIJ(A,P,*C);CHKERRQ(ierr); 3204 ierr = PetscLogEventEnd(MAT_PtAPNumeric,A,P,0,0);CHKERRQ(ierr); 3205 PetscFunctionReturn(0); 3206 } 3207 3208 #undef __FUNCT__ 3209 #define __FUNCT__ "MatPtAPSymbolic_MPIAIJ_MPIMAIJ" 3210 PETSC_EXTERN_C PetscErrorCode MatPtAPSymbolic_MPIAIJ_MPIMAIJ(Mat A,Mat PP,PetscReal fill,Mat *C) 3211 { 3212 PetscErrorCode ierr; 3213 3214 PetscFunctionBegin; 3215 /* MatPtAPSymbolic_MPIAIJ_MPIMAIJ() is not implemented yet. Convert PP to mpiaij format */ 3216 ierr = MatConvert(PP,MATMPIAIJ,MAT_REUSE_MATRIX,&PP);CHKERRQ(ierr); 3217 ierr =(*PP->ops->ptapsymbolic)(A,PP,fill,C);CHKERRQ(ierr); 3218 PetscFunctionReturn(0); 3219 } 3220 3221 #undef __FUNCT__ 3222 #define __FUNCT__ "MatPtAPNumeric_MPIAIJ_MPIMAIJ" 3223 PETSC_EXTERN_C PetscErrorCode MatPtAPNumeric_MPIAIJ_MPIMAIJ(Mat A,Mat PP,Mat C) 3224 { 3225 PetscFunctionBegin; 3226 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatPtAPNumeric is not implemented for MPIMAIJ matrix yet"); 3227 PetscFunctionReturn(0); 3228 } 3229 3230 #undef __FUNCT__ 3231 #define __FUNCT__ "MatConvert_SeqMAIJ_SeqAIJ" 3232 PETSC_EXTERN_C PetscErrorCode MatConvert_SeqMAIJ_SeqAIJ(Mat A, MatType newtype,MatReuse reuse,Mat *newmat) 3233 { 3234 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 3235 Mat a = b->AIJ,B; 3236 Mat_SeqAIJ *aij = (Mat_SeqAIJ*)a->data; 3237 PetscErrorCode ierr; 3238 PetscInt m,n,i,ncols,*ilen,nmax = 0,*icols,j,k,ii,dof = b->dof; 3239 PetscInt *cols; 3240 PetscScalar *vals; 3241 3242 PetscFunctionBegin; 3243 ierr = MatGetSize(a,&m,&n);CHKERRQ(ierr); 3244 ierr = PetscMalloc(dof*m*sizeof(PetscInt),&ilen);CHKERRQ(ierr); 3245 for (i=0; i<m; i++) { 3246 nmax = PetscMax(nmax,aij->ilen[i]); 3247 for (j=0; j<dof; j++) { 3248 ilen[dof*i+j] = aij->ilen[i]; 3249 } 3250 } 3251 ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,dof*m,dof*n,0,ilen,&B);CHKERRQ(ierr); 3252 ierr = PetscFree(ilen);CHKERRQ(ierr); 3253 ierr = PetscMalloc(nmax*sizeof(PetscInt),&icols);CHKERRQ(ierr); 3254 ii = 0; 3255 for (i=0; i<m; i++) { 3256 ierr = MatGetRow_SeqAIJ(a,i,&ncols,&cols,&vals);CHKERRQ(ierr); 3257 for (j=0; j<dof; j++) { 3258 for (k=0; k<ncols; k++) { 3259 icols[k] = dof*cols[k]+j; 3260 } 3261 ierr = MatSetValues_SeqAIJ(B,1,&ii,ncols,icols,vals,INSERT_VALUES);CHKERRQ(ierr); 3262 ii++; 3263 } 3264 ierr = MatRestoreRow_SeqAIJ(a,i,&ncols,&cols,&vals);CHKERRQ(ierr); 3265 } 3266 ierr = PetscFree(icols);CHKERRQ(ierr); 3267 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3268 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3269 3270 if (reuse == MAT_REUSE_MATRIX) { 3271 ierr = MatHeaderReplace(A,B);CHKERRQ(ierr); 3272 } else { 3273 *newmat = B; 3274 } 3275 PetscFunctionReturn(0); 3276 } 3277 3278 #include <../src/mat/impls/aij/mpi/mpiaij.h> 3279 3280 #undef __FUNCT__ 3281 #define __FUNCT__ "MatConvert_MPIMAIJ_MPIAIJ" 3282 PETSC_EXTERN_C PetscErrorCode MatConvert_MPIMAIJ_MPIAIJ(Mat A, MatType newtype,MatReuse reuse,Mat *newmat) 3283 { 3284 Mat_MPIMAIJ *maij = (Mat_MPIMAIJ*)A->data; 3285 Mat MatAIJ = ((Mat_SeqMAIJ*)maij->AIJ->data)->AIJ,B; 3286 Mat MatOAIJ = ((Mat_SeqMAIJ*)maij->OAIJ->data)->AIJ; 3287 Mat_SeqAIJ *AIJ = (Mat_SeqAIJ*) MatAIJ->data; 3288 Mat_SeqAIJ *OAIJ =(Mat_SeqAIJ*) MatOAIJ->data; 3289 Mat_MPIAIJ *mpiaij = (Mat_MPIAIJ*) maij->A->data; 3290 PetscInt dof = maij->dof,i,j,*dnz = PETSC_NULL,*onz = PETSC_NULL,nmax = 0,onmax = 0; 3291 PetscInt *oicols = PETSC_NULL,*icols = PETSC_NULL,ncols,*cols = PETSC_NULL,oncols,*ocols = PETSC_NULL; 3292 PetscInt rstart,cstart,*garray,ii,k; 3293 PetscErrorCode ierr; 3294 PetscScalar *vals,*ovals; 3295 3296 PetscFunctionBegin; 3297 ierr = PetscMalloc2(A->rmap->n,PetscInt,&dnz,A->rmap->n,PetscInt,&onz);CHKERRQ(ierr); 3298 for (i=0; i<A->rmap->n/dof; i++) { 3299 nmax = PetscMax(nmax,AIJ->ilen[i]); 3300 onmax = PetscMax(onmax,OAIJ->ilen[i]); 3301 for (j=0; j<dof; j++) { 3302 dnz[dof*i+j] = AIJ->ilen[i]; 3303 onz[dof*i+j] = OAIJ->ilen[i]; 3304 } 3305 } 3306 ierr = MatCreateAIJ(((PetscObject)A)->comm,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N,0,dnz,0,onz,&B);CHKERRQ(ierr); 3307 ierr = PetscFree2(dnz,onz);CHKERRQ(ierr); 3308 3309 ierr = PetscMalloc2(nmax,PetscInt,&icols,onmax,PetscInt,&oicols);CHKERRQ(ierr); 3310 rstart = dof*maij->A->rmap->rstart; 3311 cstart = dof*maij->A->cmap->rstart; 3312 garray = mpiaij->garray; 3313 3314 ii = rstart; 3315 for (i=0; i<A->rmap->n/dof; i++) { 3316 ierr = MatGetRow_SeqAIJ(MatAIJ,i,&ncols,&cols,&vals);CHKERRQ(ierr); 3317 ierr = MatGetRow_SeqAIJ(MatOAIJ,i,&oncols,&ocols,&ovals);CHKERRQ(ierr); 3318 for (j=0; j<dof; j++) { 3319 for (k=0; k<ncols; k++) { 3320 icols[k] = cstart + dof*cols[k]+j; 3321 } 3322 for (k=0; k<oncols; k++) { 3323 oicols[k] = dof*garray[ocols[k]]+j; 3324 } 3325 ierr = MatSetValues_MPIAIJ(B,1,&ii,ncols,icols,vals,INSERT_VALUES);CHKERRQ(ierr); 3326 ierr = MatSetValues_MPIAIJ(B,1,&ii,oncols,oicols,ovals,INSERT_VALUES);CHKERRQ(ierr); 3327 ii++; 3328 } 3329 ierr = MatRestoreRow_SeqAIJ(MatAIJ,i,&ncols,&cols,&vals);CHKERRQ(ierr); 3330 ierr = MatRestoreRow_SeqAIJ(MatOAIJ,i,&oncols,&ocols,&ovals);CHKERRQ(ierr); 3331 } 3332 ierr = PetscFree2(icols,oicols);CHKERRQ(ierr); 3333 3334 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3335 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3336 3337 if (reuse == MAT_REUSE_MATRIX) { 3338 PetscInt refct = ((PetscObject)A)->refct; /* save ((PetscObject)A)->refct */ 3339 ((PetscObject)A)->refct = 1; 3340 ierr = MatHeaderReplace(A,B);CHKERRQ(ierr); 3341 ((PetscObject)A)->refct = refct; /* restore ((PetscObject)A)->refct */ 3342 } else { 3343 *newmat = B; 3344 } 3345 PetscFunctionReturn(0); 3346 } 3347 3348 #undef __FUNCT__ 3349 #define __FUNCT__ "MatGetSubMatrix_MAIJ" 3350 PetscErrorCode MatGetSubMatrix_MAIJ(Mat mat,IS isrow,IS iscol,MatReuse cll,Mat *newmat) 3351 { 3352 PetscErrorCode ierr; 3353 Mat A; 3354 3355 PetscFunctionBegin; 3356 ierr = MatConvert(mat,MATAIJ,MAT_INITIAL_MATRIX,&A);CHKERRQ(ierr); 3357 ierr = MatGetSubMatrix(A,isrow,iscol,cll,newmat);CHKERRQ(ierr); 3358 ierr = MatDestroy(&A);CHKERRQ(ierr); 3359 PetscFunctionReturn(0); 3360 } 3361 3362 /* ---------------------------------------------------------------------------------- */ 3363 #undef __FUNCT__ 3364 #define __FUNCT__ "MatCreateMAIJ" 3365 /*@C 3366 MatCreateMAIJ - Creates a matrix type providing restriction and interpolation 3367 operations for multicomponent problems. It interpolates each component the same 3368 way independently. The matrix type is based on MATSEQAIJ for sequential matrices, 3369 and MATMPIAIJ for distributed matrices. 3370 3371 Collective 3372 3373 Input Parameters: 3374 + A - the AIJ matrix describing the action on blocks 3375 - dof - the block size (number of components per node) 3376 3377 Output Parameter: 3378 . maij - the new MAIJ matrix 3379 3380 Operations provided: 3381 + MatMult 3382 . MatMultTranspose 3383 . MatMultAdd 3384 . MatMultTransposeAdd 3385 - MatView 3386 3387 Level: advanced 3388 3389 .seealso: MatMAIJGetAIJ(), MatMAIJRedimension(), MATMAIJ 3390 @*/ 3391 PetscErrorCode MatCreateMAIJ(Mat A,PetscInt dof,Mat *maij) 3392 { 3393 PetscErrorCode ierr; 3394 PetscMPIInt size; 3395 PetscInt n; 3396 Mat B; 3397 3398 PetscFunctionBegin; 3399 ierr = PetscObjectReference((PetscObject)A);CHKERRQ(ierr); 3400 3401 if (dof == 1) { 3402 *maij = A; 3403 } else { 3404 ierr = MatCreate(((PetscObject)A)->comm,&B);CHKERRQ(ierr); 3405 ierr = MatSetSizes(B,dof*A->rmap->n,dof*A->cmap->n,dof*A->rmap->N,dof*A->cmap->N);CHKERRQ(ierr); 3406 ierr = PetscLayoutSetBlockSize(B->rmap,dof);CHKERRQ(ierr); 3407 ierr = PetscLayoutSetBlockSize(B->cmap,dof);CHKERRQ(ierr); 3408 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 3409 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 3410 B->assembled = PETSC_TRUE; 3411 3412 ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr); 3413 if (size == 1) { 3414 Mat_SeqMAIJ *b; 3415 3416 ierr = MatSetType(B,MATSEQMAIJ);CHKERRQ(ierr); 3417 B->ops->setup = PETSC_NULL; 3418 B->ops->destroy = MatDestroy_SeqMAIJ; 3419 B->ops->view = MatView_SeqMAIJ; 3420 b = (Mat_SeqMAIJ*)B->data; 3421 b->dof = dof; 3422 b->AIJ = A; 3423 if (dof == 2) { 3424 B->ops->mult = MatMult_SeqMAIJ_2; 3425 B->ops->multadd = MatMultAdd_SeqMAIJ_2; 3426 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_2; 3427 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_2; 3428 } else if (dof == 3) { 3429 B->ops->mult = MatMult_SeqMAIJ_3; 3430 B->ops->multadd = MatMultAdd_SeqMAIJ_3; 3431 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_3; 3432 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_3; 3433 } else if (dof == 4) { 3434 B->ops->mult = MatMult_SeqMAIJ_4; 3435 B->ops->multadd = MatMultAdd_SeqMAIJ_4; 3436 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_4; 3437 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_4; 3438 } else if (dof == 5) { 3439 B->ops->mult = MatMult_SeqMAIJ_5; 3440 B->ops->multadd = MatMultAdd_SeqMAIJ_5; 3441 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_5; 3442 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_5; 3443 } else if (dof == 6) { 3444 B->ops->mult = MatMult_SeqMAIJ_6; 3445 B->ops->multadd = MatMultAdd_SeqMAIJ_6; 3446 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_6; 3447 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_6; 3448 } else if (dof == 7) { 3449 B->ops->mult = MatMult_SeqMAIJ_7; 3450 B->ops->multadd = MatMultAdd_SeqMAIJ_7; 3451 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_7; 3452 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_7; 3453 } else if (dof == 8) { 3454 B->ops->mult = MatMult_SeqMAIJ_8; 3455 B->ops->multadd = MatMultAdd_SeqMAIJ_8; 3456 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_8; 3457 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_8; 3458 } else if (dof == 9) { 3459 B->ops->mult = MatMult_SeqMAIJ_9; 3460 B->ops->multadd = MatMultAdd_SeqMAIJ_9; 3461 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_9; 3462 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_9; 3463 } else if (dof == 10) { 3464 B->ops->mult = MatMult_SeqMAIJ_10; 3465 B->ops->multadd = MatMultAdd_SeqMAIJ_10; 3466 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_10; 3467 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_10; 3468 } else if (dof == 11) { 3469 B->ops->mult = MatMult_SeqMAIJ_11; 3470 B->ops->multadd = MatMultAdd_SeqMAIJ_11; 3471 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_11; 3472 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_11; 3473 } else if (dof == 16) { 3474 B->ops->mult = MatMult_SeqMAIJ_16; 3475 B->ops->multadd = MatMultAdd_SeqMAIJ_16; 3476 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_16; 3477 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_16; 3478 } else if (dof == 18) { 3479 B->ops->mult = MatMult_SeqMAIJ_18; 3480 B->ops->multadd = MatMultAdd_SeqMAIJ_18; 3481 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_18; 3482 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_18; 3483 } else { 3484 B->ops->mult = MatMult_SeqMAIJ_N; 3485 B->ops->multadd = MatMultAdd_SeqMAIJ_N; 3486 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_N; 3487 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_N; 3488 } 3489 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqmaij_seqaij_C","MatConvert_SeqMAIJ_SeqAIJ",MatConvert_SeqMAIJ_SeqAIJ);CHKERRQ(ierr); 3490 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatPtAP_seqaij_seqmaij_C","MatPtAP_SeqAIJ_SeqMAIJ",MatPtAP_SeqAIJ_SeqMAIJ);CHKERRQ(ierr); 3491 } else { 3492 Mat_MPIAIJ *mpiaij = (Mat_MPIAIJ *)A->data; 3493 Mat_MPIMAIJ *b; 3494 IS from,to; 3495 Vec gvec; 3496 3497 ierr = MatSetType(B,MATMPIMAIJ);CHKERRQ(ierr); 3498 B->ops->setup = PETSC_NULL; 3499 B->ops->destroy = MatDestroy_MPIMAIJ; 3500 B->ops->view = MatView_MPIMAIJ; 3501 b = (Mat_MPIMAIJ*)B->data; 3502 b->dof = dof; 3503 b->A = A; 3504 ierr = MatCreateMAIJ(mpiaij->A,dof,&b->AIJ);CHKERRQ(ierr); 3505 ierr = MatCreateMAIJ(mpiaij->B,dof,&b->OAIJ);CHKERRQ(ierr); 3506 3507 ierr = VecGetSize(mpiaij->lvec,&n);CHKERRQ(ierr); 3508 ierr = VecCreate(PETSC_COMM_SELF,&b->w);CHKERRQ(ierr); 3509 ierr = VecSetSizes(b->w,n*dof,n*dof);CHKERRQ(ierr); 3510 ierr = VecSetBlockSize(b->w,dof);CHKERRQ(ierr); 3511 ierr = VecSetType(b->w,VECSEQ);CHKERRQ(ierr); 3512 3513 /* create two temporary Index sets for build scatter gather */ 3514 ierr = ISCreateBlock(((PetscObject)A)->comm,dof,n,mpiaij->garray,PETSC_COPY_VALUES,&from);CHKERRQ(ierr); 3515 ierr = ISCreateStride(PETSC_COMM_SELF,n*dof,0,1,&to);CHKERRQ(ierr); 3516 3517 /* create temporary global vector to generate scatter context */ 3518 ierr = VecCreateMPIWithArray(((PetscObject)A)->comm,dof,dof*A->cmap->n,dof*A->cmap->N,PETSC_NULL,&gvec);CHKERRQ(ierr); 3519 3520 /* generate the scatter context */ 3521 ierr = VecScatterCreate(gvec,from,b->w,to,&b->ctx);CHKERRQ(ierr); 3522 3523 ierr = ISDestroy(&from);CHKERRQ(ierr); 3524 ierr = ISDestroy(&to);CHKERRQ(ierr); 3525 ierr = VecDestroy(&gvec);CHKERRQ(ierr); 3526 3527 B->ops->mult = MatMult_MPIMAIJ_dof; 3528 B->ops->multtranspose = MatMultTranspose_MPIMAIJ_dof; 3529 B->ops->multadd = MatMultAdd_MPIMAIJ_dof; 3530 B->ops->multtransposeadd = MatMultTransposeAdd_MPIMAIJ_dof; 3531 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpimaij_mpiaij_C","MatConvert_MPIMAIJ_MPIAIJ",MatConvert_MPIMAIJ_MPIAIJ);CHKERRQ(ierr); 3532 } 3533 B->ops->getsubmatrix = MatGetSubMatrix_MAIJ; 3534 ierr = MatSetUp(B);CHKERRQ(ierr); 3535 *maij = B; 3536 ierr = MatView_Private(B);CHKERRQ(ierr); 3537 } 3538 PetscFunctionReturn(0); 3539 } 3540