1 2 /* 3 Defines the basic matrix operations for the MAIJ matrix storage format. 4 This format is used for restriction and interpolation operations for 5 multicomponent problems. It interpolates each component the same way 6 independently. 7 8 We provide: 9 MatMult() 10 MatMultTranspose() 11 MatMultTransposeAdd() 12 MatMultAdd() 13 and 14 MatCreateMAIJ(Mat,dof,Mat*) 15 16 This single directory handles both the sequential and parallel codes 17 */ 18 19 #include <../src/mat/impls/maij/maij.h> /*I "petscmat.h" I*/ 20 #include <../src/mat/utils/freespace.h> 21 #include <petsc-private/vecimpl.h> 22 23 #undef __FUNCT__ 24 #define __FUNCT__ "MatMAIJGetAIJ" 25 /*@C 26 MatMAIJGetAIJ - Get the AIJ matrix describing the blockwise action of the MAIJ matrix 27 28 Not Collective, but if the MAIJ matrix is parallel, the AIJ matrix is also parallel 29 30 Input Parameter: 31 . A - the MAIJ matrix 32 33 Output Parameter: 34 . B - the AIJ matrix 35 36 Level: advanced 37 38 Notes: The reference count on the AIJ matrix is not increased so you should not destroy it. 39 40 .seealso: MatCreateMAIJ() 41 @*/ 42 PetscErrorCode MatMAIJGetAIJ(Mat A,Mat *B) 43 { 44 PetscErrorCode ierr; 45 PetscBool ismpimaij,isseqmaij; 46 47 PetscFunctionBegin; 48 ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIMAIJ,&ismpimaij);CHKERRQ(ierr); 49 ierr = PetscObjectTypeCompare((PetscObject)A,MATSEQMAIJ,&isseqmaij);CHKERRQ(ierr); 50 if (ismpimaij) { 51 Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data; 52 53 *B = b->A; 54 } else if (isseqmaij) { 55 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 56 57 *B = b->AIJ; 58 } else { 59 *B = A; 60 } 61 PetscFunctionReturn(0); 62 } 63 64 #undef __FUNCT__ 65 #define __FUNCT__ "MatMAIJRedimension" 66 /*@C 67 MatMAIJRedimension - Get an MAIJ matrix with the same action, but for a different block size 68 69 Logically Collective 70 71 Input Parameter: 72 + A - the MAIJ matrix 73 - dof - the block size for the new matrix 74 75 Output Parameter: 76 . B - the new MAIJ matrix 77 78 Level: advanced 79 80 .seealso: MatCreateMAIJ() 81 @*/ 82 PetscErrorCode MatMAIJRedimension(Mat A,PetscInt dof,Mat *B) 83 { 84 PetscErrorCode ierr; 85 Mat Aij = PETSC_NULL; 86 87 PetscFunctionBegin; 88 PetscValidLogicalCollectiveInt(A,dof,2); 89 ierr = MatMAIJGetAIJ(A,&Aij);CHKERRQ(ierr); 90 ierr = MatCreateMAIJ(Aij,dof,B);CHKERRQ(ierr); 91 PetscFunctionReturn(0); 92 } 93 94 #undef __FUNCT__ 95 #define __FUNCT__ "MatDestroy_SeqMAIJ" 96 PetscErrorCode MatDestroy_SeqMAIJ(Mat A) 97 { 98 PetscErrorCode ierr; 99 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 100 101 PetscFunctionBegin; 102 ierr = MatDestroy(&b->AIJ);CHKERRQ(ierr); 103 ierr = PetscFree(A->data);CHKERRQ(ierr); 104 PetscFunctionReturn(0); 105 } 106 107 #undef __FUNCT__ 108 #define __FUNCT__ "MatSetUp_MAIJ" 109 PetscErrorCode MatSetUp_MAIJ(Mat A) 110 { 111 PetscFunctionBegin; 112 SETERRQ(((PetscObject)A)->comm,PETSC_ERR_SUP,"Must use MatCreateMAIJ() to create MAIJ matrices"); 113 PetscFunctionReturn(0); 114 } 115 116 #undef __FUNCT__ 117 #define __FUNCT__ "MatView_SeqMAIJ" 118 PetscErrorCode MatView_SeqMAIJ(Mat A,PetscViewer viewer) 119 { 120 PetscErrorCode ierr; 121 Mat B; 122 123 PetscFunctionBegin; 124 ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B); 125 ierr = MatView(B,viewer);CHKERRQ(ierr); 126 ierr = MatDestroy(&B);CHKERRQ(ierr); 127 PetscFunctionReturn(0); 128 } 129 130 #undef __FUNCT__ 131 #define __FUNCT__ "MatView_MPIMAIJ" 132 PetscErrorCode MatView_MPIMAIJ(Mat A,PetscViewer viewer) 133 { 134 PetscErrorCode ierr; 135 Mat B; 136 137 PetscFunctionBegin; 138 ierr = MatConvert(A,MATMPIAIJ,MAT_INITIAL_MATRIX,&B); 139 ierr = MatView(B,viewer);CHKERRQ(ierr); 140 ierr = MatDestroy(&B);CHKERRQ(ierr); 141 PetscFunctionReturn(0); 142 } 143 144 #undef __FUNCT__ 145 #define __FUNCT__ "MatDestroy_MPIMAIJ" 146 PetscErrorCode MatDestroy_MPIMAIJ(Mat A) 147 { 148 PetscErrorCode ierr; 149 Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data; 150 151 PetscFunctionBegin; 152 ierr = MatDestroy(&b->AIJ);CHKERRQ(ierr); 153 ierr = MatDestroy(&b->OAIJ);CHKERRQ(ierr); 154 ierr = MatDestroy(&b->A);CHKERRQ(ierr); 155 ierr = VecScatterDestroy(&b->ctx);CHKERRQ(ierr); 156 ierr = VecDestroy(&b->w);CHKERRQ(ierr); 157 ierr = PetscFree(A->data);CHKERRQ(ierr); 158 ierr = PetscObjectChangeTypeName((PetscObject)A,0);CHKERRQ(ierr); 159 PetscFunctionReturn(0); 160 } 161 162 /*MC 163 MATMAIJ - MATMAIJ = "maij" - A matrix type to be used for restriction and interpolation operations for 164 multicomponent problems, interpolating or restricting each component the same way independently. 165 The matrix type is based on MATSEQAIJ for sequential matrices, and MATMPIAIJ for distributed matrices. 166 167 Operations provided: 168 . MatMult 169 . MatMultTranspose 170 . MatMultAdd 171 . MatMultTransposeAdd 172 173 Level: advanced 174 175 .seealso: MatMAIJGetAIJ(), MatMAIJRedimension(), MatCreateMAIJ() 176 M*/ 177 178 EXTERN_C_BEGIN 179 #undef __FUNCT__ 180 #define __FUNCT__ "MatCreate_MAIJ" 181 PetscErrorCode MatCreate_MAIJ(Mat A) 182 { 183 PetscErrorCode ierr; 184 Mat_MPIMAIJ *b; 185 PetscMPIInt size; 186 187 PetscFunctionBegin; 188 ierr = PetscNewLog(A,Mat_MPIMAIJ,&b);CHKERRQ(ierr); 189 A->data = (void*)b; 190 ierr = PetscMemzero(A->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 191 A->ops->setup = MatSetUp_MAIJ; 192 193 b->AIJ = 0; 194 b->dof = 0; 195 b->OAIJ = 0; 196 b->ctx = 0; 197 b->w = 0; 198 ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr); 199 if (size == 1){ 200 ierr = PetscObjectChangeTypeName((PetscObject)A,MATSEQMAIJ);CHKERRQ(ierr); 201 } else { 202 ierr = PetscObjectChangeTypeName((PetscObject)A,MATMPIMAIJ);CHKERRQ(ierr); 203 } 204 PetscFunctionReturn(0); 205 } 206 EXTERN_C_END 207 208 /* --------------------------------------------------------------------------------------*/ 209 #undef __FUNCT__ 210 #define __FUNCT__ "MatMult_SeqMAIJ_2" 211 PetscErrorCode MatMult_SeqMAIJ_2(Mat A,Vec xx,Vec yy) 212 { 213 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 214 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 215 const PetscScalar *x,*v; 216 PetscScalar *y, sum1, sum2; 217 PetscErrorCode ierr; 218 PetscInt nonzerorow=0,n,i,jrow,j; 219 const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 220 221 PetscFunctionBegin; 222 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 223 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 224 idx = a->j; 225 v = a->a; 226 ii = a->i; 227 228 for (i=0; i<m; i++) { 229 jrow = ii[i]; 230 n = ii[i+1] - jrow; 231 sum1 = 0.0; 232 sum2 = 0.0; 233 nonzerorow += (n>0); 234 for (j=0; j<n; j++) { 235 sum1 += v[jrow]*x[2*idx[jrow]]; 236 sum2 += v[jrow]*x[2*idx[jrow]+1]; 237 jrow++; 238 } 239 y[2*i] = sum1; 240 y[2*i+1] = sum2; 241 } 242 243 ierr = PetscLogFlops(4.0*a->nz - 2.0*nonzerorow);CHKERRQ(ierr); 244 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 245 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 246 PetscFunctionReturn(0); 247 } 248 249 #undef __FUNCT__ 250 #define __FUNCT__ "MatMultTranspose_SeqMAIJ_2" 251 PetscErrorCode MatMultTranspose_SeqMAIJ_2(Mat A,Vec xx,Vec yy) 252 { 253 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 254 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 255 const PetscScalar *x,*v; 256 PetscScalar *y,alpha1,alpha2; 257 PetscErrorCode ierr; 258 PetscInt n,i; 259 const PetscInt m = b->AIJ->rmap->n,*idx; 260 261 PetscFunctionBegin; 262 ierr = VecSet(yy,0.0);CHKERRQ(ierr); 263 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 264 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 265 266 for (i=0; i<m; i++) { 267 idx = a->j + a->i[i] ; 268 v = a->a + a->i[i] ; 269 n = a->i[i+1] - a->i[i]; 270 alpha1 = x[2*i]; 271 alpha2 = x[2*i+1]; 272 while (n-->0) {y[2*(*idx)] += alpha1*(*v); y[2*(*idx)+1] += alpha2*(*v); idx++; v++;} 273 } 274 ierr = PetscLogFlops(4.0*a->nz);CHKERRQ(ierr); 275 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 276 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 277 PetscFunctionReturn(0); 278 } 279 280 #undef __FUNCT__ 281 #define __FUNCT__ "MatMultAdd_SeqMAIJ_2" 282 PetscErrorCode MatMultAdd_SeqMAIJ_2(Mat A,Vec xx,Vec yy,Vec zz) 283 { 284 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 285 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 286 const PetscScalar *x,*v; 287 PetscScalar *y,sum1, sum2; 288 PetscErrorCode ierr; 289 PetscInt n,i,jrow,j; 290 const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 291 292 PetscFunctionBegin; 293 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 294 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 295 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 296 idx = a->j; 297 v = a->a; 298 ii = a->i; 299 300 for (i=0; i<m; i++) { 301 jrow = ii[i]; 302 n = ii[i+1] - jrow; 303 sum1 = 0.0; 304 sum2 = 0.0; 305 for (j=0; j<n; j++) { 306 sum1 += v[jrow]*x[2*idx[jrow]]; 307 sum2 += v[jrow]*x[2*idx[jrow]+1]; 308 jrow++; 309 } 310 y[2*i] += sum1; 311 y[2*i+1] += sum2; 312 } 313 314 ierr = PetscLogFlops(4.0*a->nz);CHKERRQ(ierr); 315 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 316 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 317 PetscFunctionReturn(0); 318 } 319 #undef __FUNCT__ 320 #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_2" 321 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_2(Mat A,Vec xx,Vec yy,Vec zz) 322 { 323 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 324 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 325 const PetscScalar *x,*v; 326 PetscScalar *y,alpha1,alpha2; 327 PetscErrorCode ierr; 328 PetscInt n,i; 329 const PetscInt m = b->AIJ->rmap->n,*idx; 330 331 PetscFunctionBegin; 332 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 333 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 334 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 335 336 for (i=0; i<m; i++) { 337 idx = a->j + a->i[i] ; 338 v = a->a + a->i[i] ; 339 n = a->i[i+1] - a->i[i]; 340 alpha1 = x[2*i]; 341 alpha2 = x[2*i+1]; 342 while (n-->0) {y[2*(*idx)] += alpha1*(*v); y[2*(*idx)+1] += alpha2*(*v); idx++; v++;} 343 } 344 ierr = PetscLogFlops(4.0*a->nz);CHKERRQ(ierr); 345 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 346 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 347 PetscFunctionReturn(0); 348 } 349 /* --------------------------------------------------------------------------------------*/ 350 #undef __FUNCT__ 351 #define __FUNCT__ "MatMult_SeqMAIJ_3" 352 PetscErrorCode MatMult_SeqMAIJ_3(Mat A,Vec xx,Vec yy) 353 { 354 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 355 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 356 const PetscScalar *x,*v; 357 PetscScalar *y,sum1, sum2, sum3; 358 PetscErrorCode ierr; 359 PetscInt nonzerorow=0,n,i,jrow,j; 360 const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 361 362 PetscFunctionBegin; 363 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 364 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 365 idx = a->j; 366 v = a->a; 367 ii = a->i; 368 369 for (i=0; i<m; i++) { 370 jrow = ii[i]; 371 n = ii[i+1] - jrow; 372 sum1 = 0.0; 373 sum2 = 0.0; 374 sum3 = 0.0; 375 nonzerorow += (n>0); 376 for (j=0; j<n; j++) { 377 sum1 += v[jrow]*x[3*idx[jrow]]; 378 sum2 += v[jrow]*x[3*idx[jrow]+1]; 379 sum3 += v[jrow]*x[3*idx[jrow]+2]; 380 jrow++; 381 } 382 y[3*i] = sum1; 383 y[3*i+1] = sum2; 384 y[3*i+2] = sum3; 385 } 386 387 ierr = PetscLogFlops(6.0*a->nz - 3.0*nonzerorow);CHKERRQ(ierr); 388 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 389 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 390 PetscFunctionReturn(0); 391 } 392 393 #undef __FUNCT__ 394 #define __FUNCT__ "MatMultTranspose_SeqMAIJ_3" 395 PetscErrorCode MatMultTranspose_SeqMAIJ_3(Mat A,Vec xx,Vec yy) 396 { 397 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 398 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 399 const PetscScalar *x,*v; 400 PetscScalar *y,alpha1,alpha2,alpha3; 401 PetscErrorCode ierr; 402 PetscInt n,i; 403 const PetscInt m = b->AIJ->rmap->n,*idx; 404 405 PetscFunctionBegin; 406 ierr = VecSet(yy,0.0);CHKERRQ(ierr); 407 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 408 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 409 410 for (i=0; i<m; i++) { 411 idx = a->j + a->i[i]; 412 v = a->a + a->i[i]; 413 n = a->i[i+1] - a->i[i]; 414 alpha1 = x[3*i]; 415 alpha2 = x[3*i+1]; 416 alpha3 = x[3*i+2]; 417 while (n-->0) { 418 y[3*(*idx)] += alpha1*(*v); 419 y[3*(*idx)+1] += alpha2*(*v); 420 y[3*(*idx)+2] += alpha3*(*v); 421 idx++; v++; 422 } 423 } 424 ierr = PetscLogFlops(6.0*a->nz);CHKERRQ(ierr); 425 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 426 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 427 PetscFunctionReturn(0); 428 } 429 430 #undef __FUNCT__ 431 #define __FUNCT__ "MatMultAdd_SeqMAIJ_3" 432 PetscErrorCode MatMultAdd_SeqMAIJ_3(Mat A,Vec xx,Vec yy,Vec zz) 433 { 434 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 435 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 436 const PetscScalar *x,*v; 437 PetscScalar *y,sum1, sum2, sum3; 438 PetscErrorCode ierr; 439 PetscInt n,i,jrow,j; 440 const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 441 442 PetscFunctionBegin; 443 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 444 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 445 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 446 idx = a->j; 447 v = a->a; 448 ii = a->i; 449 450 for (i=0; i<m; i++) { 451 jrow = ii[i]; 452 n = ii[i+1] - jrow; 453 sum1 = 0.0; 454 sum2 = 0.0; 455 sum3 = 0.0; 456 for (j=0; j<n; j++) { 457 sum1 += v[jrow]*x[3*idx[jrow]]; 458 sum2 += v[jrow]*x[3*idx[jrow]+1]; 459 sum3 += v[jrow]*x[3*idx[jrow]+2]; 460 jrow++; 461 } 462 y[3*i] += sum1; 463 y[3*i+1] += sum2; 464 y[3*i+2] += sum3; 465 } 466 467 ierr = PetscLogFlops(6.0*a->nz);CHKERRQ(ierr); 468 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 469 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 470 PetscFunctionReturn(0); 471 } 472 #undef __FUNCT__ 473 #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_3" 474 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_3(Mat A,Vec xx,Vec yy,Vec zz) 475 { 476 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 477 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 478 const PetscScalar *x,*v; 479 PetscScalar *y,alpha1,alpha2,alpha3; 480 PetscErrorCode ierr; 481 PetscInt n,i; 482 const PetscInt m = b->AIJ->rmap->n,*idx; 483 484 PetscFunctionBegin; 485 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 486 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 487 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 488 for (i=0; i<m; i++) { 489 idx = a->j + a->i[i] ; 490 v = a->a + a->i[i] ; 491 n = a->i[i+1] - a->i[i]; 492 alpha1 = x[3*i]; 493 alpha2 = x[3*i+1]; 494 alpha3 = x[3*i+2]; 495 while (n-->0) { 496 y[3*(*idx)] += alpha1*(*v); 497 y[3*(*idx)+1] += alpha2*(*v); 498 y[3*(*idx)+2] += alpha3*(*v); 499 idx++; v++; 500 } 501 } 502 ierr = PetscLogFlops(6.0*a->nz);CHKERRQ(ierr); 503 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 504 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 505 PetscFunctionReturn(0); 506 } 507 508 /* ------------------------------------------------------------------------------*/ 509 #undef __FUNCT__ 510 #define __FUNCT__ "MatMult_SeqMAIJ_4" 511 PetscErrorCode MatMult_SeqMAIJ_4(Mat A,Vec xx,Vec yy) 512 { 513 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 514 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 515 const PetscScalar *x,*v; 516 PetscScalar *y,sum1, sum2, sum3, sum4; 517 PetscErrorCode ierr; 518 PetscInt nonzerorow=0,n,i,jrow,j; 519 const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 520 521 PetscFunctionBegin; 522 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 523 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 524 idx = a->j; 525 v = a->a; 526 ii = a->i; 527 528 for (i=0; i<m; i++) { 529 jrow = ii[i]; 530 n = ii[i+1] - jrow; 531 sum1 = 0.0; 532 sum2 = 0.0; 533 sum3 = 0.0; 534 sum4 = 0.0; 535 nonzerorow += (n>0); 536 for (j=0; j<n; j++) { 537 sum1 += v[jrow]*x[4*idx[jrow]]; 538 sum2 += v[jrow]*x[4*idx[jrow]+1]; 539 sum3 += v[jrow]*x[4*idx[jrow]+2]; 540 sum4 += v[jrow]*x[4*idx[jrow]+3]; 541 jrow++; 542 } 543 y[4*i] = sum1; 544 y[4*i+1] = sum2; 545 y[4*i+2] = sum3; 546 y[4*i+3] = sum4; 547 } 548 549 ierr = PetscLogFlops(8.0*a->nz - 4.0*nonzerorow);CHKERRQ(ierr); 550 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 551 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 552 PetscFunctionReturn(0); 553 } 554 555 #undef __FUNCT__ 556 #define __FUNCT__ "MatMultTranspose_SeqMAIJ_4" 557 PetscErrorCode MatMultTranspose_SeqMAIJ_4(Mat A,Vec xx,Vec yy) 558 { 559 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 560 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 561 const PetscScalar *x,*v; 562 PetscScalar *y,alpha1,alpha2,alpha3,alpha4; 563 PetscErrorCode ierr; 564 PetscInt n,i; 565 const PetscInt m = b->AIJ->rmap->n,*idx; 566 567 PetscFunctionBegin; 568 ierr = VecSet(yy,0.0);CHKERRQ(ierr); 569 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 570 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 571 for (i=0; i<m; i++) { 572 idx = a->j + a->i[i] ; 573 v = a->a + a->i[i] ; 574 n = a->i[i+1] - a->i[i]; 575 alpha1 = x[4*i]; 576 alpha2 = x[4*i+1]; 577 alpha3 = x[4*i+2]; 578 alpha4 = x[4*i+3]; 579 while (n-->0) { 580 y[4*(*idx)] += alpha1*(*v); 581 y[4*(*idx)+1] += alpha2*(*v); 582 y[4*(*idx)+2] += alpha3*(*v); 583 y[4*(*idx)+3] += alpha4*(*v); 584 idx++; v++; 585 } 586 } 587 ierr = PetscLogFlops(8.0*a->nz);CHKERRQ(ierr); 588 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 589 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 590 PetscFunctionReturn(0); 591 } 592 593 #undef __FUNCT__ 594 #define __FUNCT__ "MatMultAdd_SeqMAIJ_4" 595 PetscErrorCode MatMultAdd_SeqMAIJ_4(Mat A,Vec xx,Vec yy,Vec zz) 596 { 597 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 598 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 599 const PetscScalar *x,*v; 600 PetscScalar *y,sum1, sum2, sum3, sum4; 601 PetscErrorCode ierr; 602 PetscInt n,i,jrow,j; 603 const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 604 605 PetscFunctionBegin; 606 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 607 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 608 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 609 idx = a->j; 610 v = a->a; 611 ii = a->i; 612 613 for (i=0; i<m; i++) { 614 jrow = ii[i]; 615 n = ii[i+1] - jrow; 616 sum1 = 0.0; 617 sum2 = 0.0; 618 sum3 = 0.0; 619 sum4 = 0.0; 620 for (j=0; j<n; j++) { 621 sum1 += v[jrow]*x[4*idx[jrow]]; 622 sum2 += v[jrow]*x[4*idx[jrow]+1]; 623 sum3 += v[jrow]*x[4*idx[jrow]+2]; 624 sum4 += v[jrow]*x[4*idx[jrow]+3]; 625 jrow++; 626 } 627 y[4*i] += sum1; 628 y[4*i+1] += sum2; 629 y[4*i+2] += sum3; 630 y[4*i+3] += sum4; 631 } 632 633 ierr = PetscLogFlops(8.0*a->nz);CHKERRQ(ierr); 634 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 635 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 636 PetscFunctionReturn(0); 637 } 638 #undef __FUNCT__ 639 #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_4" 640 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_4(Mat A,Vec xx,Vec yy,Vec zz) 641 { 642 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 643 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 644 const PetscScalar *x,*v; 645 PetscScalar *y,alpha1,alpha2,alpha3,alpha4; 646 PetscErrorCode ierr; 647 PetscInt n,i; 648 const PetscInt m = b->AIJ->rmap->n,*idx; 649 650 PetscFunctionBegin; 651 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 652 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 653 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 654 655 for (i=0; i<m; i++) { 656 idx = a->j + a->i[i] ; 657 v = a->a + a->i[i] ; 658 n = a->i[i+1] - a->i[i]; 659 alpha1 = x[4*i]; 660 alpha2 = x[4*i+1]; 661 alpha3 = x[4*i+2]; 662 alpha4 = x[4*i+3]; 663 while (n-->0) { 664 y[4*(*idx)] += alpha1*(*v); 665 y[4*(*idx)+1] += alpha2*(*v); 666 y[4*(*idx)+2] += alpha3*(*v); 667 y[4*(*idx)+3] += alpha4*(*v); 668 idx++; v++; 669 } 670 } 671 ierr = PetscLogFlops(8.0*a->nz);CHKERRQ(ierr); 672 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 673 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 674 PetscFunctionReturn(0); 675 } 676 /* ------------------------------------------------------------------------------*/ 677 678 #undef __FUNCT__ 679 #define __FUNCT__ "MatMult_SeqMAIJ_5" 680 PetscErrorCode MatMult_SeqMAIJ_5(Mat A,Vec xx,Vec yy) 681 { 682 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 683 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 684 const PetscScalar *x,*v; 685 PetscScalar *y,sum1, sum2, sum3, sum4, sum5; 686 PetscErrorCode ierr; 687 PetscInt nonzerorow=0,n,i,jrow,j; 688 const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 689 690 PetscFunctionBegin; 691 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 692 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 693 idx = a->j; 694 v = a->a; 695 ii = a->i; 696 697 for (i=0; i<m; i++) { 698 jrow = ii[i]; 699 n = ii[i+1] - jrow; 700 sum1 = 0.0; 701 sum2 = 0.0; 702 sum3 = 0.0; 703 sum4 = 0.0; 704 sum5 = 0.0; 705 nonzerorow += (n>0); 706 for (j=0; j<n; j++) { 707 sum1 += v[jrow]*x[5*idx[jrow]]; 708 sum2 += v[jrow]*x[5*idx[jrow]+1]; 709 sum3 += v[jrow]*x[5*idx[jrow]+2]; 710 sum4 += v[jrow]*x[5*idx[jrow]+3]; 711 sum5 += v[jrow]*x[5*idx[jrow]+4]; 712 jrow++; 713 } 714 y[5*i] = sum1; 715 y[5*i+1] = sum2; 716 y[5*i+2] = sum3; 717 y[5*i+3] = sum4; 718 y[5*i+4] = sum5; 719 } 720 721 ierr = PetscLogFlops(10.0*a->nz - 5.0*nonzerorow);CHKERRQ(ierr); 722 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 723 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 724 PetscFunctionReturn(0); 725 } 726 727 #undef __FUNCT__ 728 #define __FUNCT__ "MatMultTranspose_SeqMAIJ_5" 729 PetscErrorCode MatMultTranspose_SeqMAIJ_5(Mat A,Vec xx,Vec yy) 730 { 731 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 732 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 733 const PetscScalar *x,*v; 734 PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5; 735 PetscErrorCode ierr; 736 PetscInt n,i; 737 const PetscInt m = b->AIJ->rmap->n,*idx; 738 739 PetscFunctionBegin; 740 ierr = VecSet(yy,0.0);CHKERRQ(ierr); 741 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 742 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 743 744 for (i=0; i<m; i++) { 745 idx = a->j + a->i[i] ; 746 v = a->a + a->i[i] ; 747 n = a->i[i+1] - a->i[i]; 748 alpha1 = x[5*i]; 749 alpha2 = x[5*i+1]; 750 alpha3 = x[5*i+2]; 751 alpha4 = x[5*i+3]; 752 alpha5 = x[5*i+4]; 753 while (n-->0) { 754 y[5*(*idx)] += alpha1*(*v); 755 y[5*(*idx)+1] += alpha2*(*v); 756 y[5*(*idx)+2] += alpha3*(*v); 757 y[5*(*idx)+3] += alpha4*(*v); 758 y[5*(*idx)+4] += alpha5*(*v); 759 idx++; v++; 760 } 761 } 762 ierr = PetscLogFlops(10.0*a->nz);CHKERRQ(ierr); 763 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 764 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 765 PetscFunctionReturn(0); 766 } 767 768 #undef __FUNCT__ 769 #define __FUNCT__ "MatMultAdd_SeqMAIJ_5" 770 PetscErrorCode MatMultAdd_SeqMAIJ_5(Mat A,Vec xx,Vec yy,Vec zz) 771 { 772 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 773 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 774 const PetscScalar *x,*v; 775 PetscScalar *y,sum1, sum2, sum3, sum4, sum5; 776 PetscErrorCode ierr; 777 PetscInt n,i,jrow,j; 778 const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 779 780 PetscFunctionBegin; 781 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 782 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 783 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 784 idx = a->j; 785 v = a->a; 786 ii = a->i; 787 788 for (i=0; i<m; i++) { 789 jrow = ii[i]; 790 n = ii[i+1] - jrow; 791 sum1 = 0.0; 792 sum2 = 0.0; 793 sum3 = 0.0; 794 sum4 = 0.0; 795 sum5 = 0.0; 796 for (j=0; j<n; j++) { 797 sum1 += v[jrow]*x[5*idx[jrow]]; 798 sum2 += v[jrow]*x[5*idx[jrow]+1]; 799 sum3 += v[jrow]*x[5*idx[jrow]+2]; 800 sum4 += v[jrow]*x[5*idx[jrow]+3]; 801 sum5 += v[jrow]*x[5*idx[jrow]+4]; 802 jrow++; 803 } 804 y[5*i] += sum1; 805 y[5*i+1] += sum2; 806 y[5*i+2] += sum3; 807 y[5*i+3] += sum4; 808 y[5*i+4] += sum5; 809 } 810 811 ierr = PetscLogFlops(10.0*a->nz);CHKERRQ(ierr); 812 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 813 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 814 PetscFunctionReturn(0); 815 } 816 817 #undef __FUNCT__ 818 #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_5" 819 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_5(Mat A,Vec xx,Vec yy,Vec zz) 820 { 821 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 822 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 823 const PetscScalar *x,*v; 824 PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5; 825 PetscErrorCode ierr; 826 PetscInt n,i; 827 const PetscInt m = b->AIJ->rmap->n,*idx; 828 829 PetscFunctionBegin; 830 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 831 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 832 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 833 834 for (i=0; i<m; i++) { 835 idx = a->j + a->i[i] ; 836 v = a->a + a->i[i] ; 837 n = a->i[i+1] - a->i[i]; 838 alpha1 = x[5*i]; 839 alpha2 = x[5*i+1]; 840 alpha3 = x[5*i+2]; 841 alpha4 = x[5*i+3]; 842 alpha5 = x[5*i+4]; 843 while (n-->0) { 844 y[5*(*idx)] += alpha1*(*v); 845 y[5*(*idx)+1] += alpha2*(*v); 846 y[5*(*idx)+2] += alpha3*(*v); 847 y[5*(*idx)+3] += alpha4*(*v); 848 y[5*(*idx)+4] += alpha5*(*v); 849 idx++; v++; 850 } 851 } 852 ierr = PetscLogFlops(10.0*a->nz);CHKERRQ(ierr); 853 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 854 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 855 PetscFunctionReturn(0); 856 } 857 858 /* ------------------------------------------------------------------------------*/ 859 #undef __FUNCT__ 860 #define __FUNCT__ "MatMult_SeqMAIJ_6" 861 PetscErrorCode MatMult_SeqMAIJ_6(Mat A,Vec xx,Vec yy) 862 { 863 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 864 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 865 const PetscScalar *x,*v; 866 PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6; 867 PetscErrorCode ierr; 868 PetscInt nonzerorow=0,n,i,jrow,j; 869 const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 870 871 PetscFunctionBegin; 872 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 873 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 874 idx = a->j; 875 v = a->a; 876 ii = a->i; 877 878 for (i=0; i<m; i++) { 879 jrow = ii[i]; 880 n = ii[i+1] - jrow; 881 sum1 = 0.0; 882 sum2 = 0.0; 883 sum3 = 0.0; 884 sum4 = 0.0; 885 sum5 = 0.0; 886 sum6 = 0.0; 887 nonzerorow += (n>0); 888 for (j=0; j<n; j++) { 889 sum1 += v[jrow]*x[6*idx[jrow]]; 890 sum2 += v[jrow]*x[6*idx[jrow]+1]; 891 sum3 += v[jrow]*x[6*idx[jrow]+2]; 892 sum4 += v[jrow]*x[6*idx[jrow]+3]; 893 sum5 += v[jrow]*x[6*idx[jrow]+4]; 894 sum6 += v[jrow]*x[6*idx[jrow]+5]; 895 jrow++; 896 } 897 y[6*i] = sum1; 898 y[6*i+1] = sum2; 899 y[6*i+2] = sum3; 900 y[6*i+3] = sum4; 901 y[6*i+4] = sum5; 902 y[6*i+5] = sum6; 903 } 904 905 ierr = PetscLogFlops(12.0*a->nz - 6.0*nonzerorow);CHKERRQ(ierr); 906 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 907 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 908 PetscFunctionReturn(0); 909 } 910 911 #undef __FUNCT__ 912 #define __FUNCT__ "MatMultTranspose_SeqMAIJ_6" 913 PetscErrorCode MatMultTranspose_SeqMAIJ_6(Mat A,Vec xx,Vec yy) 914 { 915 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 916 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 917 const PetscScalar *x,*v; 918 PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6; 919 PetscErrorCode ierr; 920 PetscInt n,i; 921 const PetscInt m = b->AIJ->rmap->n,*idx; 922 923 PetscFunctionBegin; 924 ierr = VecSet(yy,0.0);CHKERRQ(ierr); 925 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 926 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 927 928 for (i=0; i<m; i++) { 929 idx = a->j + a->i[i] ; 930 v = a->a + a->i[i] ; 931 n = a->i[i+1] - a->i[i]; 932 alpha1 = x[6*i]; 933 alpha2 = x[6*i+1]; 934 alpha3 = x[6*i+2]; 935 alpha4 = x[6*i+3]; 936 alpha5 = x[6*i+4]; 937 alpha6 = x[6*i+5]; 938 while (n-->0) { 939 y[6*(*idx)] += alpha1*(*v); 940 y[6*(*idx)+1] += alpha2*(*v); 941 y[6*(*idx)+2] += alpha3*(*v); 942 y[6*(*idx)+3] += alpha4*(*v); 943 y[6*(*idx)+4] += alpha5*(*v); 944 y[6*(*idx)+5] += alpha6*(*v); 945 idx++; v++; 946 } 947 } 948 ierr = PetscLogFlops(12.0*a->nz);CHKERRQ(ierr); 949 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 950 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 951 PetscFunctionReturn(0); 952 } 953 954 #undef __FUNCT__ 955 #define __FUNCT__ "MatMultAdd_SeqMAIJ_6" 956 PetscErrorCode MatMultAdd_SeqMAIJ_6(Mat A,Vec xx,Vec yy,Vec zz) 957 { 958 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 959 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 960 const PetscScalar *x,*v; 961 PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6; 962 PetscErrorCode ierr; 963 PetscInt n,i,jrow,j; 964 const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 965 966 PetscFunctionBegin; 967 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 968 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 969 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 970 idx = a->j; 971 v = a->a; 972 ii = a->i; 973 974 for (i=0; i<m; i++) { 975 jrow = ii[i]; 976 n = ii[i+1] - jrow; 977 sum1 = 0.0; 978 sum2 = 0.0; 979 sum3 = 0.0; 980 sum4 = 0.0; 981 sum5 = 0.0; 982 sum6 = 0.0; 983 for (j=0; j<n; j++) { 984 sum1 += v[jrow]*x[6*idx[jrow]]; 985 sum2 += v[jrow]*x[6*idx[jrow]+1]; 986 sum3 += v[jrow]*x[6*idx[jrow]+2]; 987 sum4 += v[jrow]*x[6*idx[jrow]+3]; 988 sum5 += v[jrow]*x[6*idx[jrow]+4]; 989 sum6 += v[jrow]*x[6*idx[jrow]+5]; 990 jrow++; 991 } 992 y[6*i] += sum1; 993 y[6*i+1] += sum2; 994 y[6*i+2] += sum3; 995 y[6*i+3] += sum4; 996 y[6*i+4] += sum5; 997 y[6*i+5] += sum6; 998 } 999 1000 ierr = PetscLogFlops(12.0*a->nz);CHKERRQ(ierr); 1001 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 1002 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 1003 PetscFunctionReturn(0); 1004 } 1005 1006 #undef __FUNCT__ 1007 #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_6" 1008 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_6(Mat A,Vec xx,Vec yy,Vec zz) 1009 { 1010 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1011 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1012 const PetscScalar *x,*v; 1013 PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6; 1014 PetscErrorCode ierr; 1015 PetscInt n,i; 1016 const PetscInt m = b->AIJ->rmap->n,*idx; 1017 1018 PetscFunctionBegin; 1019 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 1020 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 1021 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 1022 1023 for (i=0; i<m; i++) { 1024 idx = a->j + a->i[i] ; 1025 v = a->a + a->i[i] ; 1026 n = a->i[i+1] - a->i[i]; 1027 alpha1 = x[6*i]; 1028 alpha2 = x[6*i+1]; 1029 alpha3 = x[6*i+2]; 1030 alpha4 = x[6*i+3]; 1031 alpha5 = x[6*i+4]; 1032 alpha6 = x[6*i+5]; 1033 while (n-->0) { 1034 y[6*(*idx)] += alpha1*(*v); 1035 y[6*(*idx)+1] += alpha2*(*v); 1036 y[6*(*idx)+2] += alpha3*(*v); 1037 y[6*(*idx)+3] += alpha4*(*v); 1038 y[6*(*idx)+4] += alpha5*(*v); 1039 y[6*(*idx)+5] += alpha6*(*v); 1040 idx++; v++; 1041 } 1042 } 1043 ierr = PetscLogFlops(12.0*a->nz);CHKERRQ(ierr); 1044 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 1045 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 1046 PetscFunctionReturn(0); 1047 } 1048 1049 /* ------------------------------------------------------------------------------*/ 1050 #undef __FUNCT__ 1051 #define __FUNCT__ "MatMult_SeqMAIJ_7" 1052 PetscErrorCode MatMult_SeqMAIJ_7(Mat A,Vec xx,Vec yy) 1053 { 1054 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1055 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1056 const PetscScalar *x,*v; 1057 PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7; 1058 PetscErrorCode ierr; 1059 const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 1060 PetscInt nonzerorow=0,n,i,jrow,j; 1061 1062 PetscFunctionBegin; 1063 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 1064 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 1065 idx = a->j; 1066 v = a->a; 1067 ii = a->i; 1068 1069 for (i=0; i<m; i++) { 1070 jrow = ii[i]; 1071 n = ii[i+1] - jrow; 1072 sum1 = 0.0; 1073 sum2 = 0.0; 1074 sum3 = 0.0; 1075 sum4 = 0.0; 1076 sum5 = 0.0; 1077 sum6 = 0.0; 1078 sum7 = 0.0; 1079 nonzerorow += (n>0); 1080 for (j=0; j<n; j++) { 1081 sum1 += v[jrow]*x[7*idx[jrow]]; 1082 sum2 += v[jrow]*x[7*idx[jrow]+1]; 1083 sum3 += v[jrow]*x[7*idx[jrow]+2]; 1084 sum4 += v[jrow]*x[7*idx[jrow]+3]; 1085 sum5 += v[jrow]*x[7*idx[jrow]+4]; 1086 sum6 += v[jrow]*x[7*idx[jrow]+5]; 1087 sum7 += v[jrow]*x[7*idx[jrow]+6]; 1088 jrow++; 1089 } 1090 y[7*i] = sum1; 1091 y[7*i+1] = sum2; 1092 y[7*i+2] = sum3; 1093 y[7*i+3] = sum4; 1094 y[7*i+4] = sum5; 1095 y[7*i+5] = sum6; 1096 y[7*i+6] = sum7; 1097 } 1098 1099 ierr = PetscLogFlops(14.0*a->nz - 7.0*nonzerorow);CHKERRQ(ierr); 1100 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 1101 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 1102 PetscFunctionReturn(0); 1103 } 1104 1105 #undef __FUNCT__ 1106 #define __FUNCT__ "MatMultTranspose_SeqMAIJ_7" 1107 PetscErrorCode MatMultTranspose_SeqMAIJ_7(Mat A,Vec xx,Vec yy) 1108 { 1109 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1110 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1111 const PetscScalar *x,*v; 1112 PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7; 1113 PetscErrorCode ierr; 1114 const PetscInt m = b->AIJ->rmap->n,*idx; 1115 PetscInt n,i; 1116 1117 PetscFunctionBegin; 1118 ierr = VecSet(yy,0.0);CHKERRQ(ierr); 1119 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 1120 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 1121 1122 for (i=0; i<m; i++) { 1123 idx = a->j + a->i[i] ; 1124 v = a->a + a->i[i] ; 1125 n = a->i[i+1] - a->i[i]; 1126 alpha1 = x[7*i]; 1127 alpha2 = x[7*i+1]; 1128 alpha3 = x[7*i+2]; 1129 alpha4 = x[7*i+3]; 1130 alpha5 = x[7*i+4]; 1131 alpha6 = x[7*i+5]; 1132 alpha7 = x[7*i+6]; 1133 while (n-->0) { 1134 y[7*(*idx)] += alpha1*(*v); 1135 y[7*(*idx)+1] += alpha2*(*v); 1136 y[7*(*idx)+2] += alpha3*(*v); 1137 y[7*(*idx)+3] += alpha4*(*v); 1138 y[7*(*idx)+4] += alpha5*(*v); 1139 y[7*(*idx)+5] += alpha6*(*v); 1140 y[7*(*idx)+6] += alpha7*(*v); 1141 idx++; v++; 1142 } 1143 } 1144 ierr = PetscLogFlops(14.0*a->nz);CHKERRQ(ierr); 1145 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 1146 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 1147 PetscFunctionReturn(0); 1148 } 1149 1150 #undef __FUNCT__ 1151 #define __FUNCT__ "MatMultAdd_SeqMAIJ_7" 1152 PetscErrorCode MatMultAdd_SeqMAIJ_7(Mat A,Vec xx,Vec yy,Vec zz) 1153 { 1154 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1155 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1156 const PetscScalar *x,*v; 1157 PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7; 1158 PetscErrorCode ierr; 1159 const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 1160 PetscInt n,i,jrow,j; 1161 1162 PetscFunctionBegin; 1163 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 1164 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 1165 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 1166 idx = a->j; 1167 v = a->a; 1168 ii = a->i; 1169 1170 for (i=0; i<m; i++) { 1171 jrow = ii[i]; 1172 n = ii[i+1] - jrow; 1173 sum1 = 0.0; 1174 sum2 = 0.0; 1175 sum3 = 0.0; 1176 sum4 = 0.0; 1177 sum5 = 0.0; 1178 sum6 = 0.0; 1179 sum7 = 0.0; 1180 for (j=0; j<n; j++) { 1181 sum1 += v[jrow]*x[7*idx[jrow]]; 1182 sum2 += v[jrow]*x[7*idx[jrow]+1]; 1183 sum3 += v[jrow]*x[7*idx[jrow]+2]; 1184 sum4 += v[jrow]*x[7*idx[jrow]+3]; 1185 sum5 += v[jrow]*x[7*idx[jrow]+4]; 1186 sum6 += v[jrow]*x[7*idx[jrow]+5]; 1187 sum7 += v[jrow]*x[7*idx[jrow]+6]; 1188 jrow++; 1189 } 1190 y[7*i] += sum1; 1191 y[7*i+1] += sum2; 1192 y[7*i+2] += sum3; 1193 y[7*i+3] += sum4; 1194 y[7*i+4] += sum5; 1195 y[7*i+5] += sum6; 1196 y[7*i+6] += sum7; 1197 } 1198 1199 ierr = PetscLogFlops(14.0*a->nz);CHKERRQ(ierr); 1200 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 1201 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 1202 PetscFunctionReturn(0); 1203 } 1204 1205 #undef __FUNCT__ 1206 #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_7" 1207 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_7(Mat A,Vec xx,Vec yy,Vec zz) 1208 { 1209 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1210 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1211 const PetscScalar *x,*v; 1212 PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7; 1213 PetscErrorCode ierr; 1214 const PetscInt m = b->AIJ->rmap->n,*idx; 1215 PetscInt n,i; 1216 1217 PetscFunctionBegin; 1218 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 1219 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 1220 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 1221 for (i=0; i<m; i++) { 1222 idx = a->j + a->i[i] ; 1223 v = a->a + a->i[i] ; 1224 n = a->i[i+1] - a->i[i]; 1225 alpha1 = x[7*i]; 1226 alpha2 = x[7*i+1]; 1227 alpha3 = x[7*i+2]; 1228 alpha4 = x[7*i+3]; 1229 alpha5 = x[7*i+4]; 1230 alpha6 = x[7*i+5]; 1231 alpha7 = x[7*i+6]; 1232 while (n-->0) { 1233 y[7*(*idx)] += alpha1*(*v); 1234 y[7*(*idx)+1] += alpha2*(*v); 1235 y[7*(*idx)+2] += alpha3*(*v); 1236 y[7*(*idx)+3] += alpha4*(*v); 1237 y[7*(*idx)+4] += alpha5*(*v); 1238 y[7*(*idx)+5] += alpha6*(*v); 1239 y[7*(*idx)+6] += alpha7*(*v); 1240 idx++; v++; 1241 } 1242 } 1243 ierr = PetscLogFlops(14.0*a->nz);CHKERRQ(ierr); 1244 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 1245 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 1246 PetscFunctionReturn(0); 1247 } 1248 1249 #undef __FUNCT__ 1250 #define __FUNCT__ "MatMult_SeqMAIJ_8" 1251 PetscErrorCode MatMult_SeqMAIJ_8(Mat A,Vec xx,Vec yy) 1252 { 1253 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1254 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1255 const PetscScalar *x,*v; 1256 PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8; 1257 PetscErrorCode ierr; 1258 const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 1259 PetscInt nonzerorow=0,n,i,jrow,j; 1260 1261 PetscFunctionBegin; 1262 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 1263 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 1264 idx = a->j; 1265 v = a->a; 1266 ii = a->i; 1267 1268 for (i=0; i<m; i++) { 1269 jrow = ii[i]; 1270 n = ii[i+1] - jrow; 1271 sum1 = 0.0; 1272 sum2 = 0.0; 1273 sum3 = 0.0; 1274 sum4 = 0.0; 1275 sum5 = 0.0; 1276 sum6 = 0.0; 1277 sum7 = 0.0; 1278 sum8 = 0.0; 1279 nonzerorow += (n>0); 1280 for (j=0; j<n; j++) { 1281 sum1 += v[jrow]*x[8*idx[jrow]]; 1282 sum2 += v[jrow]*x[8*idx[jrow]+1]; 1283 sum3 += v[jrow]*x[8*idx[jrow]+2]; 1284 sum4 += v[jrow]*x[8*idx[jrow]+3]; 1285 sum5 += v[jrow]*x[8*idx[jrow]+4]; 1286 sum6 += v[jrow]*x[8*idx[jrow]+5]; 1287 sum7 += v[jrow]*x[8*idx[jrow]+6]; 1288 sum8 += v[jrow]*x[8*idx[jrow]+7]; 1289 jrow++; 1290 } 1291 y[8*i] = sum1; 1292 y[8*i+1] = sum2; 1293 y[8*i+2] = sum3; 1294 y[8*i+3] = sum4; 1295 y[8*i+4] = sum5; 1296 y[8*i+5] = sum6; 1297 y[8*i+6] = sum7; 1298 y[8*i+7] = sum8; 1299 } 1300 1301 ierr = PetscLogFlops(16.0*a->nz - 8.0*nonzerorow);CHKERRQ(ierr); 1302 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 1303 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 1304 PetscFunctionReturn(0); 1305 } 1306 1307 #undef __FUNCT__ 1308 #define __FUNCT__ "MatMultTranspose_SeqMAIJ_8" 1309 PetscErrorCode MatMultTranspose_SeqMAIJ_8(Mat A,Vec xx,Vec yy) 1310 { 1311 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1312 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1313 const PetscScalar *x,*v; 1314 PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8; 1315 PetscErrorCode ierr; 1316 const PetscInt m = b->AIJ->rmap->n,*idx; 1317 PetscInt n,i; 1318 1319 PetscFunctionBegin; 1320 ierr = VecSet(yy,0.0);CHKERRQ(ierr); 1321 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 1322 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 1323 1324 for (i=0; i<m; i++) { 1325 idx = a->j + a->i[i] ; 1326 v = a->a + a->i[i] ; 1327 n = a->i[i+1] - a->i[i]; 1328 alpha1 = x[8*i]; 1329 alpha2 = x[8*i+1]; 1330 alpha3 = x[8*i+2]; 1331 alpha4 = x[8*i+3]; 1332 alpha5 = x[8*i+4]; 1333 alpha6 = x[8*i+5]; 1334 alpha7 = x[8*i+6]; 1335 alpha8 = x[8*i+7]; 1336 while (n-->0) { 1337 y[8*(*idx)] += alpha1*(*v); 1338 y[8*(*idx)+1] += alpha2*(*v); 1339 y[8*(*idx)+2] += alpha3*(*v); 1340 y[8*(*idx)+3] += alpha4*(*v); 1341 y[8*(*idx)+4] += alpha5*(*v); 1342 y[8*(*idx)+5] += alpha6*(*v); 1343 y[8*(*idx)+6] += alpha7*(*v); 1344 y[8*(*idx)+7] += alpha8*(*v); 1345 idx++; v++; 1346 } 1347 } 1348 ierr = PetscLogFlops(16.0*a->nz);CHKERRQ(ierr); 1349 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 1350 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 1351 PetscFunctionReturn(0); 1352 } 1353 1354 #undef __FUNCT__ 1355 #define __FUNCT__ "MatMultAdd_SeqMAIJ_8" 1356 PetscErrorCode MatMultAdd_SeqMAIJ_8(Mat A,Vec xx,Vec yy,Vec zz) 1357 { 1358 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1359 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1360 const PetscScalar *x,*v; 1361 PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8; 1362 PetscErrorCode ierr; 1363 const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 1364 PetscInt n,i,jrow,j; 1365 1366 PetscFunctionBegin; 1367 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 1368 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 1369 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 1370 idx = a->j; 1371 v = a->a; 1372 ii = a->i; 1373 1374 for (i=0; i<m; i++) { 1375 jrow = ii[i]; 1376 n = ii[i+1] - jrow; 1377 sum1 = 0.0; 1378 sum2 = 0.0; 1379 sum3 = 0.0; 1380 sum4 = 0.0; 1381 sum5 = 0.0; 1382 sum6 = 0.0; 1383 sum7 = 0.0; 1384 sum8 = 0.0; 1385 for (j=0; j<n; j++) { 1386 sum1 += v[jrow]*x[8*idx[jrow]]; 1387 sum2 += v[jrow]*x[8*idx[jrow]+1]; 1388 sum3 += v[jrow]*x[8*idx[jrow]+2]; 1389 sum4 += v[jrow]*x[8*idx[jrow]+3]; 1390 sum5 += v[jrow]*x[8*idx[jrow]+4]; 1391 sum6 += v[jrow]*x[8*idx[jrow]+5]; 1392 sum7 += v[jrow]*x[8*idx[jrow]+6]; 1393 sum8 += v[jrow]*x[8*idx[jrow]+7]; 1394 jrow++; 1395 } 1396 y[8*i] += sum1; 1397 y[8*i+1] += sum2; 1398 y[8*i+2] += sum3; 1399 y[8*i+3] += sum4; 1400 y[8*i+4] += sum5; 1401 y[8*i+5] += sum6; 1402 y[8*i+6] += sum7; 1403 y[8*i+7] += sum8; 1404 } 1405 1406 ierr = PetscLogFlops(16.0*a->nz);CHKERRQ(ierr); 1407 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 1408 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 1409 PetscFunctionReturn(0); 1410 } 1411 1412 #undef __FUNCT__ 1413 #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_8" 1414 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_8(Mat A,Vec xx,Vec yy,Vec zz) 1415 { 1416 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1417 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1418 const PetscScalar *x,*v; 1419 PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8; 1420 PetscErrorCode ierr; 1421 const PetscInt m = b->AIJ->rmap->n,*idx; 1422 PetscInt n,i; 1423 1424 PetscFunctionBegin; 1425 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 1426 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 1427 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 1428 for (i=0; i<m; i++) { 1429 idx = a->j + a->i[i] ; 1430 v = a->a + a->i[i] ; 1431 n = a->i[i+1] - a->i[i]; 1432 alpha1 = x[8*i]; 1433 alpha2 = x[8*i+1]; 1434 alpha3 = x[8*i+2]; 1435 alpha4 = x[8*i+3]; 1436 alpha5 = x[8*i+4]; 1437 alpha6 = x[8*i+5]; 1438 alpha7 = x[8*i+6]; 1439 alpha8 = x[8*i+7]; 1440 while (n-->0) { 1441 y[8*(*idx)] += alpha1*(*v); 1442 y[8*(*idx)+1] += alpha2*(*v); 1443 y[8*(*idx)+2] += alpha3*(*v); 1444 y[8*(*idx)+3] += alpha4*(*v); 1445 y[8*(*idx)+4] += alpha5*(*v); 1446 y[8*(*idx)+5] += alpha6*(*v); 1447 y[8*(*idx)+6] += alpha7*(*v); 1448 y[8*(*idx)+7] += alpha8*(*v); 1449 idx++; v++; 1450 } 1451 } 1452 ierr = PetscLogFlops(16.0*a->nz);CHKERRQ(ierr); 1453 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 1454 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 1455 PetscFunctionReturn(0); 1456 } 1457 1458 /* ------------------------------------------------------------------------------*/ 1459 #undef __FUNCT__ 1460 #define __FUNCT__ "MatMult_SeqMAIJ_9" 1461 PetscErrorCode MatMult_SeqMAIJ_9(Mat A,Vec xx,Vec yy) 1462 { 1463 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1464 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1465 const PetscScalar *x,*v; 1466 PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9; 1467 PetscErrorCode ierr; 1468 const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 1469 PetscInt nonzerorow=0,n,i,jrow,j; 1470 1471 PetscFunctionBegin; 1472 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 1473 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 1474 idx = a->j; 1475 v = a->a; 1476 ii = a->i; 1477 1478 for (i=0; i<m; i++) { 1479 jrow = ii[i]; 1480 n = ii[i+1] - jrow; 1481 sum1 = 0.0; 1482 sum2 = 0.0; 1483 sum3 = 0.0; 1484 sum4 = 0.0; 1485 sum5 = 0.0; 1486 sum6 = 0.0; 1487 sum7 = 0.0; 1488 sum8 = 0.0; 1489 sum9 = 0.0; 1490 nonzerorow += (n>0); 1491 for (j=0; j<n; j++) { 1492 sum1 += v[jrow]*x[9*idx[jrow]]; 1493 sum2 += v[jrow]*x[9*idx[jrow]+1]; 1494 sum3 += v[jrow]*x[9*idx[jrow]+2]; 1495 sum4 += v[jrow]*x[9*idx[jrow]+3]; 1496 sum5 += v[jrow]*x[9*idx[jrow]+4]; 1497 sum6 += v[jrow]*x[9*idx[jrow]+5]; 1498 sum7 += v[jrow]*x[9*idx[jrow]+6]; 1499 sum8 += v[jrow]*x[9*idx[jrow]+7]; 1500 sum9 += v[jrow]*x[9*idx[jrow]+8]; 1501 jrow++; 1502 } 1503 y[9*i] = sum1; 1504 y[9*i+1] = sum2; 1505 y[9*i+2] = sum3; 1506 y[9*i+3] = sum4; 1507 y[9*i+4] = sum5; 1508 y[9*i+5] = sum6; 1509 y[9*i+6] = sum7; 1510 y[9*i+7] = sum8; 1511 y[9*i+8] = sum9; 1512 } 1513 1514 ierr = PetscLogFlops(18.0*a->nz - 9*nonzerorow);CHKERRQ(ierr); 1515 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 1516 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 1517 PetscFunctionReturn(0); 1518 } 1519 1520 /* ------------------------------------------------------------------------------*/ 1521 1522 #undef __FUNCT__ 1523 #define __FUNCT__ "MatMultTranspose_SeqMAIJ_9" 1524 PetscErrorCode MatMultTranspose_SeqMAIJ_9(Mat A,Vec xx,Vec yy) 1525 { 1526 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1527 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1528 const PetscScalar *x,*v; 1529 PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9; 1530 PetscErrorCode ierr; 1531 const PetscInt m = b->AIJ->rmap->n,*idx; 1532 PetscInt n,i; 1533 1534 PetscFunctionBegin; 1535 ierr = VecSet(yy,0.0);CHKERRQ(ierr); 1536 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 1537 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 1538 1539 for (i=0; i<m; i++) { 1540 idx = a->j + a->i[i] ; 1541 v = a->a + a->i[i] ; 1542 n = a->i[i+1] - a->i[i]; 1543 alpha1 = x[9*i]; 1544 alpha2 = x[9*i+1]; 1545 alpha3 = x[9*i+2]; 1546 alpha4 = x[9*i+3]; 1547 alpha5 = x[9*i+4]; 1548 alpha6 = x[9*i+5]; 1549 alpha7 = x[9*i+6]; 1550 alpha8 = x[9*i+7]; 1551 alpha9 = x[9*i+8]; 1552 while (n-->0) { 1553 y[9*(*idx)] += alpha1*(*v); 1554 y[9*(*idx)+1] += alpha2*(*v); 1555 y[9*(*idx)+2] += alpha3*(*v); 1556 y[9*(*idx)+3] += alpha4*(*v); 1557 y[9*(*idx)+4] += alpha5*(*v); 1558 y[9*(*idx)+5] += alpha6*(*v); 1559 y[9*(*idx)+6] += alpha7*(*v); 1560 y[9*(*idx)+7] += alpha8*(*v); 1561 y[9*(*idx)+8] += alpha9*(*v); 1562 idx++; v++; 1563 } 1564 } 1565 ierr = PetscLogFlops(18.0*a->nz);CHKERRQ(ierr); 1566 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 1567 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 1568 PetscFunctionReturn(0); 1569 } 1570 1571 #undef __FUNCT__ 1572 #define __FUNCT__ "MatMultAdd_SeqMAIJ_9" 1573 PetscErrorCode MatMultAdd_SeqMAIJ_9(Mat A,Vec xx,Vec yy,Vec zz) 1574 { 1575 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1576 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1577 const PetscScalar *x,*v; 1578 PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9; 1579 PetscErrorCode ierr; 1580 const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 1581 PetscInt n,i,jrow,j; 1582 1583 PetscFunctionBegin; 1584 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 1585 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 1586 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 1587 idx = a->j; 1588 v = a->a; 1589 ii = a->i; 1590 1591 for (i=0; i<m; i++) { 1592 jrow = ii[i]; 1593 n = ii[i+1] - jrow; 1594 sum1 = 0.0; 1595 sum2 = 0.0; 1596 sum3 = 0.0; 1597 sum4 = 0.0; 1598 sum5 = 0.0; 1599 sum6 = 0.0; 1600 sum7 = 0.0; 1601 sum8 = 0.0; 1602 sum9 = 0.0; 1603 for (j=0; j<n; j++) { 1604 sum1 += v[jrow]*x[9*idx[jrow]]; 1605 sum2 += v[jrow]*x[9*idx[jrow]+1]; 1606 sum3 += v[jrow]*x[9*idx[jrow]+2]; 1607 sum4 += v[jrow]*x[9*idx[jrow]+3]; 1608 sum5 += v[jrow]*x[9*idx[jrow]+4]; 1609 sum6 += v[jrow]*x[9*idx[jrow]+5]; 1610 sum7 += v[jrow]*x[9*idx[jrow]+6]; 1611 sum8 += v[jrow]*x[9*idx[jrow]+7]; 1612 sum9 += v[jrow]*x[9*idx[jrow]+8]; 1613 jrow++; 1614 } 1615 y[9*i] += sum1; 1616 y[9*i+1] += sum2; 1617 y[9*i+2] += sum3; 1618 y[9*i+3] += sum4; 1619 y[9*i+4] += sum5; 1620 y[9*i+5] += sum6; 1621 y[9*i+6] += sum7; 1622 y[9*i+7] += sum8; 1623 y[9*i+8] += sum9; 1624 } 1625 1626 ierr = PetscLogFlops(18*a->nz);CHKERRQ(ierr); 1627 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 1628 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 1629 PetscFunctionReturn(0); 1630 } 1631 1632 #undef __FUNCT__ 1633 #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_9" 1634 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_9(Mat A,Vec xx,Vec yy,Vec zz) 1635 { 1636 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1637 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1638 const PetscScalar *x,*v; 1639 PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9; 1640 PetscErrorCode ierr; 1641 const PetscInt m = b->AIJ->rmap->n,*idx; 1642 PetscInt n,i; 1643 1644 PetscFunctionBegin; 1645 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 1646 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 1647 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 1648 for (i=0; i<m; i++) { 1649 idx = a->j + a->i[i] ; 1650 v = a->a + a->i[i] ; 1651 n = a->i[i+1] - a->i[i]; 1652 alpha1 = x[9*i]; 1653 alpha2 = x[9*i+1]; 1654 alpha3 = x[9*i+2]; 1655 alpha4 = x[9*i+3]; 1656 alpha5 = x[9*i+4]; 1657 alpha6 = x[9*i+5]; 1658 alpha7 = x[9*i+6]; 1659 alpha8 = x[9*i+7]; 1660 alpha9 = x[9*i+8]; 1661 while (n-->0) { 1662 y[9*(*idx)] += alpha1*(*v); 1663 y[9*(*idx)+1] += alpha2*(*v); 1664 y[9*(*idx)+2] += alpha3*(*v); 1665 y[9*(*idx)+3] += alpha4*(*v); 1666 y[9*(*idx)+4] += alpha5*(*v); 1667 y[9*(*idx)+5] += alpha6*(*v); 1668 y[9*(*idx)+6] += alpha7*(*v); 1669 y[9*(*idx)+7] += alpha8*(*v); 1670 y[9*(*idx)+8] += alpha9*(*v); 1671 idx++; v++; 1672 } 1673 } 1674 ierr = PetscLogFlops(18.0*a->nz);CHKERRQ(ierr); 1675 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 1676 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 1677 PetscFunctionReturn(0); 1678 } 1679 #undef __FUNCT__ 1680 #define __FUNCT__ "MatMult_SeqMAIJ_10" 1681 PetscErrorCode MatMult_SeqMAIJ_10(Mat A,Vec xx,Vec yy) 1682 { 1683 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1684 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1685 const PetscScalar *x,*v; 1686 PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9, sum10; 1687 PetscErrorCode ierr; 1688 const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 1689 PetscInt nonzerorow=0,n,i,jrow,j; 1690 1691 PetscFunctionBegin; 1692 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 1693 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 1694 idx = a->j; 1695 v = a->a; 1696 ii = a->i; 1697 1698 for (i=0; i<m; i++) { 1699 jrow = ii[i]; 1700 n = ii[i+1] - jrow; 1701 sum1 = 0.0; 1702 sum2 = 0.0; 1703 sum3 = 0.0; 1704 sum4 = 0.0; 1705 sum5 = 0.0; 1706 sum6 = 0.0; 1707 sum7 = 0.0; 1708 sum8 = 0.0; 1709 sum9 = 0.0; 1710 sum10 = 0.0; 1711 nonzerorow += (n>0); 1712 for (j=0; j<n; j++) { 1713 sum1 += v[jrow]*x[10*idx[jrow]]; 1714 sum2 += v[jrow]*x[10*idx[jrow]+1]; 1715 sum3 += v[jrow]*x[10*idx[jrow]+2]; 1716 sum4 += v[jrow]*x[10*idx[jrow]+3]; 1717 sum5 += v[jrow]*x[10*idx[jrow]+4]; 1718 sum6 += v[jrow]*x[10*idx[jrow]+5]; 1719 sum7 += v[jrow]*x[10*idx[jrow]+6]; 1720 sum8 += v[jrow]*x[10*idx[jrow]+7]; 1721 sum9 += v[jrow]*x[10*idx[jrow]+8]; 1722 sum10 += v[jrow]*x[10*idx[jrow]+9]; 1723 jrow++; 1724 } 1725 y[10*i] = sum1; 1726 y[10*i+1] = sum2; 1727 y[10*i+2] = sum3; 1728 y[10*i+3] = sum4; 1729 y[10*i+4] = sum5; 1730 y[10*i+5] = sum6; 1731 y[10*i+6] = sum7; 1732 y[10*i+7] = sum8; 1733 y[10*i+8] = sum9; 1734 y[10*i+9] = sum10; 1735 } 1736 1737 ierr = PetscLogFlops(20.0*a->nz - 10.0*nonzerorow);CHKERRQ(ierr); 1738 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 1739 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 1740 PetscFunctionReturn(0); 1741 } 1742 1743 #undef __FUNCT__ 1744 #define __FUNCT__ "MatMultAdd_SeqMAIJ_10" 1745 PetscErrorCode MatMultAdd_SeqMAIJ_10(Mat A,Vec xx,Vec yy,Vec zz) 1746 { 1747 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1748 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1749 const PetscScalar *x,*v; 1750 PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9, sum10; 1751 PetscErrorCode ierr; 1752 const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 1753 PetscInt n,i,jrow,j; 1754 1755 PetscFunctionBegin; 1756 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 1757 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 1758 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 1759 idx = a->j; 1760 v = a->a; 1761 ii = a->i; 1762 1763 for (i=0; i<m; i++) { 1764 jrow = ii[i]; 1765 n = ii[i+1] - jrow; 1766 sum1 = 0.0; 1767 sum2 = 0.0; 1768 sum3 = 0.0; 1769 sum4 = 0.0; 1770 sum5 = 0.0; 1771 sum6 = 0.0; 1772 sum7 = 0.0; 1773 sum8 = 0.0; 1774 sum9 = 0.0; 1775 sum10 = 0.0; 1776 for (j=0; j<n; j++) { 1777 sum1 += v[jrow]*x[10*idx[jrow]]; 1778 sum2 += v[jrow]*x[10*idx[jrow]+1]; 1779 sum3 += v[jrow]*x[10*idx[jrow]+2]; 1780 sum4 += v[jrow]*x[10*idx[jrow]+3]; 1781 sum5 += v[jrow]*x[10*idx[jrow]+4]; 1782 sum6 += v[jrow]*x[10*idx[jrow]+5]; 1783 sum7 += v[jrow]*x[10*idx[jrow]+6]; 1784 sum8 += v[jrow]*x[10*idx[jrow]+7]; 1785 sum9 += v[jrow]*x[10*idx[jrow]+8]; 1786 sum10 += v[jrow]*x[10*idx[jrow]+9]; 1787 jrow++; 1788 } 1789 y[10*i] += sum1; 1790 y[10*i+1] += sum2; 1791 y[10*i+2] += sum3; 1792 y[10*i+3] += sum4; 1793 y[10*i+4] += sum5; 1794 y[10*i+5] += sum6; 1795 y[10*i+6] += sum7; 1796 y[10*i+7] += sum8; 1797 y[10*i+8] += sum9; 1798 y[10*i+9] += sum10; 1799 } 1800 1801 ierr = PetscLogFlops(20.0*a->nz);CHKERRQ(ierr); 1802 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 1803 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 1804 PetscFunctionReturn(0); 1805 } 1806 1807 #undef __FUNCT__ 1808 #define __FUNCT__ "MatMultTranspose_SeqMAIJ_10" 1809 PetscErrorCode MatMultTranspose_SeqMAIJ_10(Mat A,Vec xx,Vec yy) 1810 { 1811 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1812 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1813 const PetscScalar *x,*v; 1814 PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9,alpha10; 1815 PetscErrorCode ierr; 1816 const PetscInt m = b->AIJ->rmap->n,*idx; 1817 PetscInt n,i; 1818 1819 PetscFunctionBegin; 1820 ierr = VecSet(yy,0.0);CHKERRQ(ierr); 1821 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 1822 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 1823 1824 for (i=0; i<m; i++) { 1825 idx = a->j + a->i[i] ; 1826 v = a->a + a->i[i] ; 1827 n = a->i[i+1] - a->i[i]; 1828 alpha1 = x[10*i]; 1829 alpha2 = x[10*i+1]; 1830 alpha3 = x[10*i+2]; 1831 alpha4 = x[10*i+3]; 1832 alpha5 = x[10*i+4]; 1833 alpha6 = x[10*i+5]; 1834 alpha7 = x[10*i+6]; 1835 alpha8 = x[10*i+7]; 1836 alpha9 = x[10*i+8]; 1837 alpha10 = x[10*i+9]; 1838 while (n-->0) { 1839 y[10*(*idx)] += alpha1*(*v); 1840 y[10*(*idx)+1] += alpha2*(*v); 1841 y[10*(*idx)+2] += alpha3*(*v); 1842 y[10*(*idx)+3] += alpha4*(*v); 1843 y[10*(*idx)+4] += alpha5*(*v); 1844 y[10*(*idx)+5] += alpha6*(*v); 1845 y[10*(*idx)+6] += alpha7*(*v); 1846 y[10*(*idx)+7] += alpha8*(*v); 1847 y[10*(*idx)+8] += alpha9*(*v); 1848 y[10*(*idx)+9] += alpha10*(*v); 1849 idx++; v++; 1850 } 1851 } 1852 ierr = PetscLogFlops(20.0*a->nz);CHKERRQ(ierr); 1853 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 1854 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 1855 PetscFunctionReturn(0); 1856 } 1857 1858 #undef __FUNCT__ 1859 #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_10" 1860 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_10(Mat A,Vec xx,Vec yy,Vec zz) 1861 { 1862 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1863 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1864 const PetscScalar *x,*v; 1865 PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9,alpha10; 1866 PetscErrorCode ierr; 1867 const PetscInt m = b->AIJ->rmap->n,*idx; 1868 PetscInt n,i; 1869 1870 PetscFunctionBegin; 1871 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 1872 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 1873 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 1874 for (i=0; i<m; i++) { 1875 idx = a->j + a->i[i] ; 1876 v = a->a + a->i[i] ; 1877 n = a->i[i+1] - a->i[i]; 1878 alpha1 = x[10*i]; 1879 alpha2 = x[10*i+1]; 1880 alpha3 = x[10*i+2]; 1881 alpha4 = x[10*i+3]; 1882 alpha5 = x[10*i+4]; 1883 alpha6 = x[10*i+5]; 1884 alpha7 = x[10*i+6]; 1885 alpha8 = x[10*i+7]; 1886 alpha9 = x[10*i+8]; 1887 alpha10 = x[10*i+9]; 1888 while (n-->0) { 1889 y[10*(*idx)] += alpha1*(*v); 1890 y[10*(*idx)+1] += alpha2*(*v); 1891 y[10*(*idx)+2] += alpha3*(*v); 1892 y[10*(*idx)+3] += alpha4*(*v); 1893 y[10*(*idx)+4] += alpha5*(*v); 1894 y[10*(*idx)+5] += alpha6*(*v); 1895 y[10*(*idx)+6] += alpha7*(*v); 1896 y[10*(*idx)+7] += alpha8*(*v); 1897 y[10*(*idx)+8] += alpha9*(*v); 1898 y[10*(*idx)+9] += alpha10*(*v); 1899 idx++; v++; 1900 } 1901 } 1902 ierr = PetscLogFlops(20.0*a->nz);CHKERRQ(ierr); 1903 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 1904 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 1905 PetscFunctionReturn(0); 1906 } 1907 1908 1909 /*--------------------------------------------------------------------------------------------*/ 1910 #undef __FUNCT__ 1911 #define __FUNCT__ "MatMult_SeqMAIJ_11" 1912 PetscErrorCode MatMult_SeqMAIJ_11(Mat A,Vec xx,Vec yy) 1913 { 1914 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1915 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1916 const PetscScalar *x,*v; 1917 PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9, sum10, sum11; 1918 PetscErrorCode ierr; 1919 const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 1920 PetscInt nonzerorow=0,n,i,jrow,j; 1921 1922 PetscFunctionBegin; 1923 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 1924 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 1925 idx = a->j; 1926 v = a->a; 1927 ii = a->i; 1928 1929 for (i=0; i<m; i++) { 1930 jrow = ii[i]; 1931 n = ii[i+1] - jrow; 1932 sum1 = 0.0; 1933 sum2 = 0.0; 1934 sum3 = 0.0; 1935 sum4 = 0.0; 1936 sum5 = 0.0; 1937 sum6 = 0.0; 1938 sum7 = 0.0; 1939 sum8 = 0.0; 1940 sum9 = 0.0; 1941 sum10 = 0.0; 1942 sum11 = 0.0; 1943 nonzerorow += (n>0); 1944 for (j=0; j<n; j++) { 1945 sum1 += v[jrow]*x[11*idx[jrow]]; 1946 sum2 += v[jrow]*x[11*idx[jrow]+1]; 1947 sum3 += v[jrow]*x[11*idx[jrow]+2]; 1948 sum4 += v[jrow]*x[11*idx[jrow]+3]; 1949 sum5 += v[jrow]*x[11*idx[jrow]+4]; 1950 sum6 += v[jrow]*x[11*idx[jrow]+5]; 1951 sum7 += v[jrow]*x[11*idx[jrow]+6]; 1952 sum8 += v[jrow]*x[11*idx[jrow]+7]; 1953 sum9 += v[jrow]*x[11*idx[jrow]+8]; 1954 sum10 += v[jrow]*x[11*idx[jrow]+9]; 1955 sum11 += v[jrow]*x[11*idx[jrow]+10]; 1956 jrow++; 1957 } 1958 y[11*i] = sum1; 1959 y[11*i+1] = sum2; 1960 y[11*i+2] = sum3; 1961 y[11*i+3] = sum4; 1962 y[11*i+4] = sum5; 1963 y[11*i+5] = sum6; 1964 y[11*i+6] = sum7; 1965 y[11*i+7] = sum8; 1966 y[11*i+8] = sum9; 1967 y[11*i+9] = sum10; 1968 y[11*i+10] = sum11; 1969 } 1970 1971 ierr = PetscLogFlops(22*a->nz - 11*nonzerorow);CHKERRQ(ierr); 1972 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 1973 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 1974 PetscFunctionReturn(0); 1975 } 1976 1977 #undef __FUNCT__ 1978 #define __FUNCT__ "MatMultAdd_SeqMAIJ_11" 1979 PetscErrorCode MatMultAdd_SeqMAIJ_11(Mat A,Vec xx,Vec yy,Vec zz) 1980 { 1981 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1982 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1983 const PetscScalar *x,*v; 1984 PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9, sum10, sum11; 1985 PetscErrorCode ierr; 1986 const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 1987 PetscInt n,i,jrow,j; 1988 1989 PetscFunctionBegin; 1990 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 1991 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 1992 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 1993 idx = a->j; 1994 v = a->a; 1995 ii = a->i; 1996 1997 for (i=0; i<m; i++) { 1998 jrow = ii[i]; 1999 n = ii[i+1] - jrow; 2000 sum1 = 0.0; 2001 sum2 = 0.0; 2002 sum3 = 0.0; 2003 sum4 = 0.0; 2004 sum5 = 0.0; 2005 sum6 = 0.0; 2006 sum7 = 0.0; 2007 sum8 = 0.0; 2008 sum9 = 0.0; 2009 sum10 = 0.0; 2010 sum11 = 0.0; 2011 for (j=0; j<n; j++) { 2012 sum1 += v[jrow]*x[11*idx[jrow]]; 2013 sum2 += v[jrow]*x[11*idx[jrow]+1]; 2014 sum3 += v[jrow]*x[11*idx[jrow]+2]; 2015 sum4 += v[jrow]*x[11*idx[jrow]+3]; 2016 sum5 += v[jrow]*x[11*idx[jrow]+4]; 2017 sum6 += v[jrow]*x[11*idx[jrow]+5]; 2018 sum7 += v[jrow]*x[11*idx[jrow]+6]; 2019 sum8 += v[jrow]*x[11*idx[jrow]+7]; 2020 sum9 += v[jrow]*x[11*idx[jrow]+8]; 2021 sum10 += v[jrow]*x[11*idx[jrow]+9]; 2022 sum11 += v[jrow]*x[11*idx[jrow]+10]; 2023 jrow++; 2024 } 2025 y[11*i] += sum1; 2026 y[11*i+1] += sum2; 2027 y[11*i+2] += sum3; 2028 y[11*i+3] += sum4; 2029 y[11*i+4] += sum5; 2030 y[11*i+5] += sum6; 2031 y[11*i+6] += sum7; 2032 y[11*i+7] += sum8; 2033 y[11*i+8] += sum9; 2034 y[11*i+9] += sum10; 2035 y[11*i+10] += sum11; 2036 } 2037 2038 ierr = PetscLogFlops(22*a->nz);CHKERRQ(ierr); 2039 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 2040 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 2041 PetscFunctionReturn(0); 2042 } 2043 2044 #undef __FUNCT__ 2045 #define __FUNCT__ "MatMultTranspose_SeqMAIJ_11" 2046 PetscErrorCode MatMultTranspose_SeqMAIJ_11(Mat A,Vec xx,Vec yy) 2047 { 2048 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 2049 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 2050 const PetscScalar *x,*v; 2051 PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9,alpha10,alpha11; 2052 PetscErrorCode ierr; 2053 const PetscInt m = b->AIJ->rmap->n,*idx; 2054 PetscInt n,i; 2055 2056 PetscFunctionBegin; 2057 ierr = VecSet(yy,0.0);CHKERRQ(ierr); 2058 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 2059 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 2060 2061 for (i=0; i<m; i++) { 2062 idx = a->j + a->i[i] ; 2063 v = a->a + a->i[i] ; 2064 n = a->i[i+1] - a->i[i]; 2065 alpha1 = x[11*i]; 2066 alpha2 = x[11*i+1]; 2067 alpha3 = x[11*i+2]; 2068 alpha4 = x[11*i+3]; 2069 alpha5 = x[11*i+4]; 2070 alpha6 = x[11*i+5]; 2071 alpha7 = x[11*i+6]; 2072 alpha8 = x[11*i+7]; 2073 alpha9 = x[11*i+8]; 2074 alpha10 = x[11*i+9]; 2075 alpha11 = x[11*i+10]; 2076 while (n-->0) { 2077 y[11*(*idx)] += alpha1*(*v); 2078 y[11*(*idx)+1] += alpha2*(*v); 2079 y[11*(*idx)+2] += alpha3*(*v); 2080 y[11*(*idx)+3] += alpha4*(*v); 2081 y[11*(*idx)+4] += alpha5*(*v); 2082 y[11*(*idx)+5] += alpha6*(*v); 2083 y[11*(*idx)+6] += alpha7*(*v); 2084 y[11*(*idx)+7] += alpha8*(*v); 2085 y[11*(*idx)+8] += alpha9*(*v); 2086 y[11*(*idx)+9] += alpha10*(*v); 2087 y[11*(*idx)+10] += alpha11*(*v); 2088 idx++; v++; 2089 } 2090 } 2091 ierr = PetscLogFlops(22*a->nz);CHKERRQ(ierr); 2092 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 2093 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 2094 PetscFunctionReturn(0); 2095 } 2096 2097 #undef __FUNCT__ 2098 #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_11" 2099 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_11(Mat A,Vec xx,Vec yy,Vec zz) 2100 { 2101 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 2102 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 2103 const PetscScalar *x,*v; 2104 PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9,alpha10,alpha11; 2105 PetscErrorCode ierr; 2106 const PetscInt m = b->AIJ->rmap->n,*idx; 2107 PetscInt n,i; 2108 2109 PetscFunctionBegin; 2110 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 2111 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 2112 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 2113 for (i=0; i<m; i++) { 2114 idx = a->j + a->i[i] ; 2115 v = a->a + a->i[i] ; 2116 n = a->i[i+1] - a->i[i]; 2117 alpha1 = x[11*i]; 2118 alpha2 = x[11*i+1]; 2119 alpha3 = x[11*i+2]; 2120 alpha4 = x[11*i+3]; 2121 alpha5 = x[11*i+4]; 2122 alpha6 = x[11*i+5]; 2123 alpha7 = x[11*i+6]; 2124 alpha8 = x[11*i+7]; 2125 alpha9 = x[11*i+8]; 2126 alpha10 = x[11*i+9]; 2127 alpha11 = x[11*i+10]; 2128 while (n-->0) { 2129 y[11*(*idx)] += alpha1*(*v); 2130 y[11*(*idx)+1] += alpha2*(*v); 2131 y[11*(*idx)+2] += alpha3*(*v); 2132 y[11*(*idx)+3] += alpha4*(*v); 2133 y[11*(*idx)+4] += alpha5*(*v); 2134 y[11*(*idx)+5] += alpha6*(*v); 2135 y[11*(*idx)+6] += alpha7*(*v); 2136 y[11*(*idx)+7] += alpha8*(*v); 2137 y[11*(*idx)+8] += alpha9*(*v); 2138 y[11*(*idx)+9] += alpha10*(*v); 2139 y[11*(*idx)+10] += alpha11*(*v); 2140 idx++; v++; 2141 } 2142 } 2143 ierr = PetscLogFlops(22*a->nz);CHKERRQ(ierr); 2144 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 2145 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 2146 PetscFunctionReturn(0); 2147 } 2148 2149 2150 /*--------------------------------------------------------------------------------------------*/ 2151 #undef __FUNCT__ 2152 #define __FUNCT__ "MatMult_SeqMAIJ_16" 2153 PetscErrorCode MatMult_SeqMAIJ_16(Mat A,Vec xx,Vec yy) 2154 { 2155 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 2156 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 2157 const PetscScalar *x,*v; 2158 PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8; 2159 PetscScalar sum9, sum10, sum11, sum12, sum13, sum14, sum15, sum16; 2160 PetscErrorCode ierr; 2161 const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 2162 PetscInt nonzerorow=0,n,i,jrow,j; 2163 2164 PetscFunctionBegin; 2165 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 2166 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 2167 idx = a->j; 2168 v = a->a; 2169 ii = a->i; 2170 2171 for (i=0; i<m; i++) { 2172 jrow = ii[i]; 2173 n = ii[i+1] - jrow; 2174 sum1 = 0.0; 2175 sum2 = 0.0; 2176 sum3 = 0.0; 2177 sum4 = 0.0; 2178 sum5 = 0.0; 2179 sum6 = 0.0; 2180 sum7 = 0.0; 2181 sum8 = 0.0; 2182 sum9 = 0.0; 2183 sum10 = 0.0; 2184 sum11 = 0.0; 2185 sum12 = 0.0; 2186 sum13 = 0.0; 2187 sum14 = 0.0; 2188 sum15 = 0.0; 2189 sum16 = 0.0; 2190 nonzerorow += (n>0); 2191 for (j=0; j<n; j++) { 2192 sum1 += v[jrow]*x[16*idx[jrow]]; 2193 sum2 += v[jrow]*x[16*idx[jrow]+1]; 2194 sum3 += v[jrow]*x[16*idx[jrow]+2]; 2195 sum4 += v[jrow]*x[16*idx[jrow]+3]; 2196 sum5 += v[jrow]*x[16*idx[jrow]+4]; 2197 sum6 += v[jrow]*x[16*idx[jrow]+5]; 2198 sum7 += v[jrow]*x[16*idx[jrow]+6]; 2199 sum8 += v[jrow]*x[16*idx[jrow]+7]; 2200 sum9 += v[jrow]*x[16*idx[jrow]+8]; 2201 sum10 += v[jrow]*x[16*idx[jrow]+9]; 2202 sum11 += v[jrow]*x[16*idx[jrow]+10]; 2203 sum12 += v[jrow]*x[16*idx[jrow]+11]; 2204 sum13 += v[jrow]*x[16*idx[jrow]+12]; 2205 sum14 += v[jrow]*x[16*idx[jrow]+13]; 2206 sum15 += v[jrow]*x[16*idx[jrow]+14]; 2207 sum16 += v[jrow]*x[16*idx[jrow]+15]; 2208 jrow++; 2209 } 2210 y[16*i] = sum1; 2211 y[16*i+1] = sum2; 2212 y[16*i+2] = sum3; 2213 y[16*i+3] = sum4; 2214 y[16*i+4] = sum5; 2215 y[16*i+5] = sum6; 2216 y[16*i+6] = sum7; 2217 y[16*i+7] = sum8; 2218 y[16*i+8] = sum9; 2219 y[16*i+9] = sum10; 2220 y[16*i+10] = sum11; 2221 y[16*i+11] = sum12; 2222 y[16*i+12] = sum13; 2223 y[16*i+13] = sum14; 2224 y[16*i+14] = sum15; 2225 y[16*i+15] = sum16; 2226 } 2227 2228 ierr = PetscLogFlops(32.0*a->nz - 16.0*nonzerorow);CHKERRQ(ierr); 2229 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 2230 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 2231 PetscFunctionReturn(0); 2232 } 2233 2234 #undef __FUNCT__ 2235 #define __FUNCT__ "MatMultTranspose_SeqMAIJ_16" 2236 PetscErrorCode MatMultTranspose_SeqMAIJ_16(Mat A,Vec xx,Vec yy) 2237 { 2238 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 2239 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 2240 const PetscScalar *x,*v; 2241 PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8; 2242 PetscScalar alpha9,alpha10,alpha11,alpha12,alpha13,alpha14,alpha15,alpha16; 2243 PetscErrorCode ierr; 2244 const PetscInt m = b->AIJ->rmap->n,*idx; 2245 PetscInt n,i; 2246 2247 PetscFunctionBegin; 2248 ierr = VecSet(yy,0.0);CHKERRQ(ierr); 2249 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 2250 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 2251 2252 for (i=0; i<m; i++) { 2253 idx = a->j + a->i[i] ; 2254 v = a->a + a->i[i] ; 2255 n = a->i[i+1] - a->i[i]; 2256 alpha1 = x[16*i]; 2257 alpha2 = x[16*i+1]; 2258 alpha3 = x[16*i+2]; 2259 alpha4 = x[16*i+3]; 2260 alpha5 = x[16*i+4]; 2261 alpha6 = x[16*i+5]; 2262 alpha7 = x[16*i+6]; 2263 alpha8 = x[16*i+7]; 2264 alpha9 = x[16*i+8]; 2265 alpha10 = x[16*i+9]; 2266 alpha11 = x[16*i+10]; 2267 alpha12 = x[16*i+11]; 2268 alpha13 = x[16*i+12]; 2269 alpha14 = x[16*i+13]; 2270 alpha15 = x[16*i+14]; 2271 alpha16 = x[16*i+15]; 2272 while (n-->0) { 2273 y[16*(*idx)] += alpha1*(*v); 2274 y[16*(*idx)+1] += alpha2*(*v); 2275 y[16*(*idx)+2] += alpha3*(*v); 2276 y[16*(*idx)+3] += alpha4*(*v); 2277 y[16*(*idx)+4] += alpha5*(*v); 2278 y[16*(*idx)+5] += alpha6*(*v); 2279 y[16*(*idx)+6] += alpha7*(*v); 2280 y[16*(*idx)+7] += alpha8*(*v); 2281 y[16*(*idx)+8] += alpha9*(*v); 2282 y[16*(*idx)+9] += alpha10*(*v); 2283 y[16*(*idx)+10] += alpha11*(*v); 2284 y[16*(*idx)+11] += alpha12*(*v); 2285 y[16*(*idx)+12] += alpha13*(*v); 2286 y[16*(*idx)+13] += alpha14*(*v); 2287 y[16*(*idx)+14] += alpha15*(*v); 2288 y[16*(*idx)+15] += alpha16*(*v); 2289 idx++; v++; 2290 } 2291 } 2292 ierr = PetscLogFlops(32.0*a->nz);CHKERRQ(ierr); 2293 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 2294 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 2295 PetscFunctionReturn(0); 2296 } 2297 2298 #undef __FUNCT__ 2299 #define __FUNCT__ "MatMultAdd_SeqMAIJ_16" 2300 PetscErrorCode MatMultAdd_SeqMAIJ_16(Mat A,Vec xx,Vec yy,Vec zz) 2301 { 2302 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 2303 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 2304 const PetscScalar *x,*v; 2305 PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8; 2306 PetscScalar sum9, sum10, sum11, sum12, sum13, sum14, sum15, sum16; 2307 PetscErrorCode ierr; 2308 const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 2309 PetscInt n,i,jrow,j; 2310 2311 PetscFunctionBegin; 2312 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 2313 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 2314 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 2315 idx = a->j; 2316 v = a->a; 2317 ii = a->i; 2318 2319 for (i=0; i<m; i++) { 2320 jrow = ii[i]; 2321 n = ii[i+1] - jrow; 2322 sum1 = 0.0; 2323 sum2 = 0.0; 2324 sum3 = 0.0; 2325 sum4 = 0.0; 2326 sum5 = 0.0; 2327 sum6 = 0.0; 2328 sum7 = 0.0; 2329 sum8 = 0.0; 2330 sum9 = 0.0; 2331 sum10 = 0.0; 2332 sum11 = 0.0; 2333 sum12 = 0.0; 2334 sum13 = 0.0; 2335 sum14 = 0.0; 2336 sum15 = 0.0; 2337 sum16 = 0.0; 2338 for (j=0; j<n; j++) { 2339 sum1 += v[jrow]*x[16*idx[jrow]]; 2340 sum2 += v[jrow]*x[16*idx[jrow]+1]; 2341 sum3 += v[jrow]*x[16*idx[jrow]+2]; 2342 sum4 += v[jrow]*x[16*idx[jrow]+3]; 2343 sum5 += v[jrow]*x[16*idx[jrow]+4]; 2344 sum6 += v[jrow]*x[16*idx[jrow]+5]; 2345 sum7 += v[jrow]*x[16*idx[jrow]+6]; 2346 sum8 += v[jrow]*x[16*idx[jrow]+7]; 2347 sum9 += v[jrow]*x[16*idx[jrow]+8]; 2348 sum10 += v[jrow]*x[16*idx[jrow]+9]; 2349 sum11 += v[jrow]*x[16*idx[jrow]+10]; 2350 sum12 += v[jrow]*x[16*idx[jrow]+11]; 2351 sum13 += v[jrow]*x[16*idx[jrow]+12]; 2352 sum14 += v[jrow]*x[16*idx[jrow]+13]; 2353 sum15 += v[jrow]*x[16*idx[jrow]+14]; 2354 sum16 += v[jrow]*x[16*idx[jrow]+15]; 2355 jrow++; 2356 } 2357 y[16*i] += sum1; 2358 y[16*i+1] += sum2; 2359 y[16*i+2] += sum3; 2360 y[16*i+3] += sum4; 2361 y[16*i+4] += sum5; 2362 y[16*i+5] += sum6; 2363 y[16*i+6] += sum7; 2364 y[16*i+7] += sum8; 2365 y[16*i+8] += sum9; 2366 y[16*i+9] += sum10; 2367 y[16*i+10] += sum11; 2368 y[16*i+11] += sum12; 2369 y[16*i+12] += sum13; 2370 y[16*i+13] += sum14; 2371 y[16*i+14] += sum15; 2372 y[16*i+15] += sum16; 2373 } 2374 2375 ierr = PetscLogFlops(32.0*a->nz);CHKERRQ(ierr); 2376 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 2377 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 2378 PetscFunctionReturn(0); 2379 } 2380 2381 #undef __FUNCT__ 2382 #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_16" 2383 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_16(Mat A,Vec xx,Vec yy,Vec zz) 2384 { 2385 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 2386 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 2387 const PetscScalar *x,*v; 2388 PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8; 2389 PetscScalar alpha9,alpha10,alpha11,alpha12,alpha13,alpha14,alpha15,alpha16; 2390 PetscErrorCode ierr; 2391 const PetscInt m = b->AIJ->rmap->n,*idx; 2392 PetscInt n,i; 2393 2394 PetscFunctionBegin; 2395 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 2396 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 2397 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 2398 for (i=0; i<m; i++) { 2399 idx = a->j + a->i[i] ; 2400 v = a->a + a->i[i] ; 2401 n = a->i[i+1] - a->i[i]; 2402 alpha1 = x[16*i]; 2403 alpha2 = x[16*i+1]; 2404 alpha3 = x[16*i+2]; 2405 alpha4 = x[16*i+3]; 2406 alpha5 = x[16*i+4]; 2407 alpha6 = x[16*i+5]; 2408 alpha7 = x[16*i+6]; 2409 alpha8 = x[16*i+7]; 2410 alpha9 = x[16*i+8]; 2411 alpha10 = x[16*i+9]; 2412 alpha11 = x[16*i+10]; 2413 alpha12 = x[16*i+11]; 2414 alpha13 = x[16*i+12]; 2415 alpha14 = x[16*i+13]; 2416 alpha15 = x[16*i+14]; 2417 alpha16 = x[16*i+15]; 2418 while (n-->0) { 2419 y[16*(*idx)] += alpha1*(*v); 2420 y[16*(*idx)+1] += alpha2*(*v); 2421 y[16*(*idx)+2] += alpha3*(*v); 2422 y[16*(*idx)+3] += alpha4*(*v); 2423 y[16*(*idx)+4] += alpha5*(*v); 2424 y[16*(*idx)+5] += alpha6*(*v); 2425 y[16*(*idx)+6] += alpha7*(*v); 2426 y[16*(*idx)+7] += alpha8*(*v); 2427 y[16*(*idx)+8] += alpha9*(*v); 2428 y[16*(*idx)+9] += alpha10*(*v); 2429 y[16*(*idx)+10] += alpha11*(*v); 2430 y[16*(*idx)+11] += alpha12*(*v); 2431 y[16*(*idx)+12] += alpha13*(*v); 2432 y[16*(*idx)+13] += alpha14*(*v); 2433 y[16*(*idx)+14] += alpha15*(*v); 2434 y[16*(*idx)+15] += alpha16*(*v); 2435 idx++; v++; 2436 } 2437 } 2438 ierr = PetscLogFlops(32.0*a->nz);CHKERRQ(ierr); 2439 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 2440 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 2441 PetscFunctionReturn(0); 2442 } 2443 2444 /*--------------------------------------------------------------------------------------------*/ 2445 #undef __FUNCT__ 2446 #define __FUNCT__ "MatMult_SeqMAIJ_18" 2447 PetscErrorCode MatMult_SeqMAIJ_18(Mat A,Vec xx,Vec yy) 2448 { 2449 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 2450 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 2451 const PetscScalar *x,*v; 2452 PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8; 2453 PetscScalar sum9, sum10, sum11, sum12, sum13, sum14, sum15, sum16, sum17, sum18; 2454 PetscErrorCode ierr; 2455 const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 2456 PetscInt nonzerorow=0,n,i,jrow,j; 2457 2458 PetscFunctionBegin; 2459 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 2460 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 2461 idx = a->j; 2462 v = a->a; 2463 ii = a->i; 2464 2465 for (i=0; i<m; i++) { 2466 jrow = ii[i]; 2467 n = ii[i+1] - jrow; 2468 sum1 = 0.0; 2469 sum2 = 0.0; 2470 sum3 = 0.0; 2471 sum4 = 0.0; 2472 sum5 = 0.0; 2473 sum6 = 0.0; 2474 sum7 = 0.0; 2475 sum8 = 0.0; 2476 sum9 = 0.0; 2477 sum10 = 0.0; 2478 sum11 = 0.0; 2479 sum12 = 0.0; 2480 sum13 = 0.0; 2481 sum14 = 0.0; 2482 sum15 = 0.0; 2483 sum16 = 0.0; 2484 sum17 = 0.0; 2485 sum18 = 0.0; 2486 nonzerorow += (n>0); 2487 for (j=0; j<n; j++) { 2488 sum1 += v[jrow]*x[18*idx[jrow]]; 2489 sum2 += v[jrow]*x[18*idx[jrow]+1]; 2490 sum3 += v[jrow]*x[18*idx[jrow]+2]; 2491 sum4 += v[jrow]*x[18*idx[jrow]+3]; 2492 sum5 += v[jrow]*x[18*idx[jrow]+4]; 2493 sum6 += v[jrow]*x[18*idx[jrow]+5]; 2494 sum7 += v[jrow]*x[18*idx[jrow]+6]; 2495 sum8 += v[jrow]*x[18*idx[jrow]+7]; 2496 sum9 += v[jrow]*x[18*idx[jrow]+8]; 2497 sum10 += v[jrow]*x[18*idx[jrow]+9]; 2498 sum11 += v[jrow]*x[18*idx[jrow]+10]; 2499 sum12 += v[jrow]*x[18*idx[jrow]+11]; 2500 sum13 += v[jrow]*x[18*idx[jrow]+12]; 2501 sum14 += v[jrow]*x[18*idx[jrow]+13]; 2502 sum15 += v[jrow]*x[18*idx[jrow]+14]; 2503 sum16 += v[jrow]*x[18*idx[jrow]+15]; 2504 sum17 += v[jrow]*x[18*idx[jrow]+16]; 2505 sum18 += v[jrow]*x[18*idx[jrow]+17]; 2506 jrow++; 2507 } 2508 y[18*i] = sum1; 2509 y[18*i+1] = sum2; 2510 y[18*i+2] = sum3; 2511 y[18*i+3] = sum4; 2512 y[18*i+4] = sum5; 2513 y[18*i+5] = sum6; 2514 y[18*i+6] = sum7; 2515 y[18*i+7] = sum8; 2516 y[18*i+8] = sum9; 2517 y[18*i+9] = sum10; 2518 y[18*i+10] = sum11; 2519 y[18*i+11] = sum12; 2520 y[18*i+12] = sum13; 2521 y[18*i+13] = sum14; 2522 y[18*i+14] = sum15; 2523 y[18*i+15] = sum16; 2524 y[18*i+16] = sum17; 2525 y[18*i+17] = sum18; 2526 } 2527 2528 ierr = PetscLogFlops(36.0*a->nz - 18.0*nonzerorow);CHKERRQ(ierr); 2529 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 2530 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 2531 PetscFunctionReturn(0); 2532 } 2533 2534 #undef __FUNCT__ 2535 #define __FUNCT__ "MatMultTranspose_SeqMAIJ_18" 2536 PetscErrorCode MatMultTranspose_SeqMAIJ_18(Mat A,Vec xx,Vec yy) 2537 { 2538 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 2539 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 2540 const PetscScalar *x,*v; 2541 PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8; 2542 PetscScalar alpha9,alpha10,alpha11,alpha12,alpha13,alpha14,alpha15,alpha16,alpha17,alpha18; 2543 PetscErrorCode ierr; 2544 const PetscInt m = b->AIJ->rmap->n,*idx; 2545 PetscInt n,i; 2546 2547 PetscFunctionBegin; 2548 ierr = VecSet(yy,0.0);CHKERRQ(ierr); 2549 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 2550 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 2551 2552 for (i=0; i<m; i++) { 2553 idx = a->j + a->i[i] ; 2554 v = a->a + a->i[i] ; 2555 n = a->i[i+1] - a->i[i]; 2556 alpha1 = x[18*i]; 2557 alpha2 = x[18*i+1]; 2558 alpha3 = x[18*i+2]; 2559 alpha4 = x[18*i+3]; 2560 alpha5 = x[18*i+4]; 2561 alpha6 = x[18*i+5]; 2562 alpha7 = x[18*i+6]; 2563 alpha8 = x[18*i+7]; 2564 alpha9 = x[18*i+8]; 2565 alpha10 = x[18*i+9]; 2566 alpha11 = x[18*i+10]; 2567 alpha12 = x[18*i+11]; 2568 alpha13 = x[18*i+12]; 2569 alpha14 = x[18*i+13]; 2570 alpha15 = x[18*i+14]; 2571 alpha16 = x[18*i+15]; 2572 alpha17 = x[18*i+16]; 2573 alpha18 = x[18*i+17]; 2574 while (n-->0) { 2575 y[18*(*idx)] += alpha1*(*v); 2576 y[18*(*idx)+1] += alpha2*(*v); 2577 y[18*(*idx)+2] += alpha3*(*v); 2578 y[18*(*idx)+3] += alpha4*(*v); 2579 y[18*(*idx)+4] += alpha5*(*v); 2580 y[18*(*idx)+5] += alpha6*(*v); 2581 y[18*(*idx)+6] += alpha7*(*v); 2582 y[18*(*idx)+7] += alpha8*(*v); 2583 y[18*(*idx)+8] += alpha9*(*v); 2584 y[18*(*idx)+9] += alpha10*(*v); 2585 y[18*(*idx)+10] += alpha11*(*v); 2586 y[18*(*idx)+11] += alpha12*(*v); 2587 y[18*(*idx)+12] += alpha13*(*v); 2588 y[18*(*idx)+13] += alpha14*(*v); 2589 y[18*(*idx)+14] += alpha15*(*v); 2590 y[18*(*idx)+15] += alpha16*(*v); 2591 y[18*(*idx)+16] += alpha17*(*v); 2592 y[18*(*idx)+17] += alpha18*(*v); 2593 idx++; v++; 2594 } 2595 } 2596 ierr = PetscLogFlops(36.0*a->nz);CHKERRQ(ierr); 2597 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 2598 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 2599 PetscFunctionReturn(0); 2600 } 2601 2602 #undef __FUNCT__ 2603 #define __FUNCT__ "MatMultAdd_SeqMAIJ_18" 2604 PetscErrorCode MatMultAdd_SeqMAIJ_18(Mat A,Vec xx,Vec yy,Vec zz) 2605 { 2606 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 2607 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 2608 const PetscScalar *x,*v; 2609 PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8; 2610 PetscScalar sum9, sum10, sum11, sum12, sum13, sum14, sum15, sum16, sum17, sum18; 2611 PetscErrorCode ierr; 2612 const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 2613 PetscInt n,i,jrow,j; 2614 2615 PetscFunctionBegin; 2616 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 2617 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 2618 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 2619 idx = a->j; 2620 v = a->a; 2621 ii = a->i; 2622 2623 for (i=0; i<m; i++) { 2624 jrow = ii[i]; 2625 n = ii[i+1] - jrow; 2626 sum1 = 0.0; 2627 sum2 = 0.0; 2628 sum3 = 0.0; 2629 sum4 = 0.0; 2630 sum5 = 0.0; 2631 sum6 = 0.0; 2632 sum7 = 0.0; 2633 sum8 = 0.0; 2634 sum9 = 0.0; 2635 sum10 = 0.0; 2636 sum11 = 0.0; 2637 sum12 = 0.0; 2638 sum13 = 0.0; 2639 sum14 = 0.0; 2640 sum15 = 0.0; 2641 sum16 = 0.0; 2642 sum17 = 0.0; 2643 sum18 = 0.0; 2644 for (j=0; j<n; j++) { 2645 sum1 += v[jrow]*x[18*idx[jrow]]; 2646 sum2 += v[jrow]*x[18*idx[jrow]+1]; 2647 sum3 += v[jrow]*x[18*idx[jrow]+2]; 2648 sum4 += v[jrow]*x[18*idx[jrow]+3]; 2649 sum5 += v[jrow]*x[18*idx[jrow]+4]; 2650 sum6 += v[jrow]*x[18*idx[jrow]+5]; 2651 sum7 += v[jrow]*x[18*idx[jrow]+6]; 2652 sum8 += v[jrow]*x[18*idx[jrow]+7]; 2653 sum9 += v[jrow]*x[18*idx[jrow]+8]; 2654 sum10 += v[jrow]*x[18*idx[jrow]+9]; 2655 sum11 += v[jrow]*x[18*idx[jrow]+10]; 2656 sum12 += v[jrow]*x[18*idx[jrow]+11]; 2657 sum13 += v[jrow]*x[18*idx[jrow]+12]; 2658 sum14 += v[jrow]*x[18*idx[jrow]+13]; 2659 sum15 += v[jrow]*x[18*idx[jrow]+14]; 2660 sum16 += v[jrow]*x[18*idx[jrow]+15]; 2661 sum17 += v[jrow]*x[18*idx[jrow]+16]; 2662 sum18 += v[jrow]*x[18*idx[jrow]+17]; 2663 jrow++; 2664 } 2665 y[18*i] += sum1; 2666 y[18*i+1] += sum2; 2667 y[18*i+2] += sum3; 2668 y[18*i+3] += sum4; 2669 y[18*i+4] += sum5; 2670 y[18*i+5] += sum6; 2671 y[18*i+6] += sum7; 2672 y[18*i+7] += sum8; 2673 y[18*i+8] += sum9; 2674 y[18*i+9] += sum10; 2675 y[18*i+10] += sum11; 2676 y[18*i+11] += sum12; 2677 y[18*i+12] += sum13; 2678 y[18*i+13] += sum14; 2679 y[18*i+14] += sum15; 2680 y[18*i+15] += sum16; 2681 y[18*i+16] += sum17; 2682 y[18*i+17] += sum18; 2683 } 2684 2685 ierr = PetscLogFlops(36.0*a->nz);CHKERRQ(ierr); 2686 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 2687 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 2688 PetscFunctionReturn(0); 2689 } 2690 2691 #undef __FUNCT__ 2692 #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_18" 2693 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_18(Mat A,Vec xx,Vec yy,Vec zz) 2694 { 2695 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 2696 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 2697 const PetscScalar *x,*v; 2698 PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8; 2699 PetscScalar alpha9,alpha10,alpha11,alpha12,alpha13,alpha14,alpha15,alpha16,alpha17,alpha18; 2700 PetscErrorCode ierr; 2701 const PetscInt m = b->AIJ->rmap->n,*idx; 2702 PetscInt n,i; 2703 2704 PetscFunctionBegin; 2705 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 2706 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 2707 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 2708 for (i=0; i<m; i++) { 2709 idx = a->j + a->i[i] ; 2710 v = a->a + a->i[i] ; 2711 n = a->i[i+1] - a->i[i]; 2712 alpha1 = x[18*i]; 2713 alpha2 = x[18*i+1]; 2714 alpha3 = x[18*i+2]; 2715 alpha4 = x[18*i+3]; 2716 alpha5 = x[18*i+4]; 2717 alpha6 = x[18*i+5]; 2718 alpha7 = x[18*i+6]; 2719 alpha8 = x[18*i+7]; 2720 alpha9 = x[18*i+8]; 2721 alpha10 = x[18*i+9]; 2722 alpha11 = x[18*i+10]; 2723 alpha12 = x[18*i+11]; 2724 alpha13 = x[18*i+12]; 2725 alpha14 = x[18*i+13]; 2726 alpha15 = x[18*i+14]; 2727 alpha16 = x[18*i+15]; 2728 alpha17 = x[18*i+16]; 2729 alpha18 = x[18*i+17]; 2730 while (n-->0) { 2731 y[18*(*idx)] += alpha1*(*v); 2732 y[18*(*idx)+1] += alpha2*(*v); 2733 y[18*(*idx)+2] += alpha3*(*v); 2734 y[18*(*idx)+3] += alpha4*(*v); 2735 y[18*(*idx)+4] += alpha5*(*v); 2736 y[18*(*idx)+5] += alpha6*(*v); 2737 y[18*(*idx)+6] += alpha7*(*v); 2738 y[18*(*idx)+7] += alpha8*(*v); 2739 y[18*(*idx)+8] += alpha9*(*v); 2740 y[18*(*idx)+9] += alpha10*(*v); 2741 y[18*(*idx)+10] += alpha11*(*v); 2742 y[18*(*idx)+11] += alpha12*(*v); 2743 y[18*(*idx)+12] += alpha13*(*v); 2744 y[18*(*idx)+13] += alpha14*(*v); 2745 y[18*(*idx)+14] += alpha15*(*v); 2746 y[18*(*idx)+15] += alpha16*(*v); 2747 y[18*(*idx)+16] += alpha17*(*v); 2748 y[18*(*idx)+17] += alpha18*(*v); 2749 idx++; v++; 2750 } 2751 } 2752 ierr = PetscLogFlops(36.0*a->nz);CHKERRQ(ierr); 2753 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 2754 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 2755 PetscFunctionReturn(0); 2756 } 2757 2758 #undef __FUNCT__ 2759 #define __FUNCT__ "MatMult_SeqMAIJ_N" 2760 PetscErrorCode MatMult_SeqMAIJ_N(Mat A,Vec xx,Vec yy) 2761 { 2762 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 2763 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 2764 const PetscScalar *x,*v; 2765 PetscScalar *y,*sums; 2766 PetscErrorCode ierr; 2767 const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 2768 PetscInt n,i,jrow,j,dof = b->dof,k; 2769 2770 PetscFunctionBegin; 2771 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 2772 ierr = VecSet(yy,0.0);CHKERRQ(ierr); 2773 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 2774 idx = a->j; 2775 v = a->a; 2776 ii = a->i; 2777 2778 for (i=0; i<m; i++) { 2779 jrow = ii[i]; 2780 n = ii[i+1] - jrow; 2781 sums = y + dof*i; 2782 for (j=0; j<n; j++) { 2783 for (k=0; k<dof; k++) { 2784 sums[k] += v[jrow]*x[dof*idx[jrow]+k]; 2785 } 2786 jrow++; 2787 } 2788 } 2789 2790 ierr = PetscLogFlops(2.0*dof*a->nz);CHKERRQ(ierr); 2791 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 2792 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 2793 PetscFunctionReturn(0); 2794 } 2795 2796 #undef __FUNCT__ 2797 #define __FUNCT__ "MatMultAdd_SeqMAIJ_N" 2798 PetscErrorCode MatMultAdd_SeqMAIJ_N(Mat A,Vec xx,Vec yy,Vec zz) 2799 { 2800 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 2801 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 2802 const PetscScalar *x,*v; 2803 PetscScalar *y,*sums; 2804 PetscErrorCode ierr; 2805 const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 2806 PetscInt n,i,jrow,j,dof = b->dof,k; 2807 2808 PetscFunctionBegin; 2809 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 2810 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 2811 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 2812 idx = a->j; 2813 v = a->a; 2814 ii = a->i; 2815 2816 for (i=0; i<m; i++) { 2817 jrow = ii[i]; 2818 n = ii[i+1] - jrow; 2819 sums = y + dof*i; 2820 for (j=0; j<n; j++) { 2821 for (k=0; k<dof; k++) { 2822 sums[k] += v[jrow]*x[dof*idx[jrow]+k]; 2823 } 2824 jrow++; 2825 } 2826 } 2827 2828 ierr = PetscLogFlops(2.0*dof*a->nz);CHKERRQ(ierr); 2829 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 2830 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 2831 PetscFunctionReturn(0); 2832 } 2833 2834 #undef __FUNCT__ 2835 #define __FUNCT__ "MatMultTranspose_SeqMAIJ_N" 2836 PetscErrorCode MatMultTranspose_SeqMAIJ_N(Mat A,Vec xx,Vec yy) 2837 { 2838 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 2839 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 2840 const PetscScalar *x,*v,*alpha; 2841 PetscScalar *y; 2842 PetscErrorCode ierr; 2843 const PetscInt m = b->AIJ->rmap->n,*idx,dof = b->dof; 2844 PetscInt n,i,k; 2845 2846 PetscFunctionBegin; 2847 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 2848 ierr = VecSet(yy,0.0);CHKERRQ(ierr); 2849 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 2850 for (i=0; i<m; i++) { 2851 idx = a->j + a->i[i] ; 2852 v = a->a + a->i[i] ; 2853 n = a->i[i+1] - a->i[i]; 2854 alpha = x + dof*i; 2855 while (n-->0) { 2856 for (k=0; k<dof; k++) { 2857 y[dof*(*idx)+k] += alpha[k]*(*v); 2858 } 2859 idx++; v++; 2860 } 2861 } 2862 ierr = PetscLogFlops(2.0*dof*a->nz);CHKERRQ(ierr); 2863 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 2864 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 2865 PetscFunctionReturn(0); 2866 } 2867 2868 #undef __FUNCT__ 2869 #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_N" 2870 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_N(Mat A,Vec xx,Vec yy,Vec zz) 2871 { 2872 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 2873 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 2874 const PetscScalar *x,*v,*alpha; 2875 PetscScalar *y; 2876 PetscErrorCode ierr; 2877 const PetscInt m = b->AIJ->rmap->n,*idx,dof = b->dof; 2878 PetscInt n,i,k; 2879 2880 PetscFunctionBegin; 2881 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 2882 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 2883 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 2884 for (i=0; i<m; i++) { 2885 idx = a->j + a->i[i] ; 2886 v = a->a + a->i[i] ; 2887 n = a->i[i+1] - a->i[i]; 2888 alpha = x + dof*i; 2889 while (n-->0) { 2890 for (k=0; k<dof; k++) { 2891 y[dof*(*idx)+k] += alpha[k]*(*v); 2892 } 2893 idx++; v++; 2894 } 2895 } 2896 ierr = PetscLogFlops(2.0*dof*a->nz);CHKERRQ(ierr); 2897 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 2898 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 2899 PetscFunctionReturn(0); 2900 } 2901 2902 /*===================================================================================*/ 2903 #undef __FUNCT__ 2904 #define __FUNCT__ "MatMult_MPIMAIJ_dof" 2905 PetscErrorCode MatMult_MPIMAIJ_dof(Mat A,Vec xx,Vec yy) 2906 { 2907 Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data; 2908 PetscErrorCode ierr; 2909 2910 PetscFunctionBegin; 2911 /* start the scatter */ 2912 ierr = VecScatterBegin(b->ctx,xx,b->w,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2913 ierr = (*b->AIJ->ops->mult)(b->AIJ,xx,yy);CHKERRQ(ierr); 2914 ierr = VecScatterEnd(b->ctx,xx,b->w,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2915 ierr = (*b->OAIJ->ops->multadd)(b->OAIJ,b->w,yy,yy);CHKERRQ(ierr); 2916 PetscFunctionReturn(0); 2917 } 2918 2919 #undef __FUNCT__ 2920 #define __FUNCT__ "MatMultTranspose_MPIMAIJ_dof" 2921 PetscErrorCode MatMultTranspose_MPIMAIJ_dof(Mat A,Vec xx,Vec yy) 2922 { 2923 Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data; 2924 PetscErrorCode ierr; 2925 2926 PetscFunctionBegin; 2927 ierr = (*b->OAIJ->ops->multtranspose)(b->OAIJ,xx,b->w);CHKERRQ(ierr); 2928 ierr = (*b->AIJ->ops->multtranspose)(b->AIJ,xx,yy);CHKERRQ(ierr); 2929 ierr = VecScatterBegin(b->ctx,b->w,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2930 ierr = VecScatterEnd(b->ctx,b->w,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2931 PetscFunctionReturn(0); 2932 } 2933 2934 #undef __FUNCT__ 2935 #define __FUNCT__ "MatMultAdd_MPIMAIJ_dof" 2936 PetscErrorCode MatMultAdd_MPIMAIJ_dof(Mat A,Vec xx,Vec yy,Vec zz) 2937 { 2938 Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data; 2939 PetscErrorCode ierr; 2940 2941 PetscFunctionBegin; 2942 /* start the scatter */ 2943 ierr = VecScatterBegin(b->ctx,xx,b->w,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2944 ierr = (*b->AIJ->ops->multadd)(b->AIJ,xx,yy,zz);CHKERRQ(ierr); 2945 ierr = VecScatterEnd(b->ctx,xx,b->w,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2946 ierr = (*b->OAIJ->ops->multadd)(b->OAIJ,b->w,zz,zz);CHKERRQ(ierr); 2947 PetscFunctionReturn(0); 2948 } 2949 2950 #undef __FUNCT__ 2951 #define __FUNCT__ "MatMultTransposeAdd_MPIMAIJ_dof" 2952 PetscErrorCode MatMultTransposeAdd_MPIMAIJ_dof(Mat A,Vec xx,Vec yy,Vec zz) 2953 { 2954 Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data; 2955 PetscErrorCode ierr; 2956 2957 PetscFunctionBegin; 2958 ierr = (*b->OAIJ->ops->multtranspose)(b->OAIJ,xx,b->w);CHKERRQ(ierr); 2959 ierr = VecScatterBegin(b->ctx,b->w,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2960 ierr = (*b->AIJ->ops->multtransposeadd)(b->AIJ,xx,yy,zz);CHKERRQ(ierr); 2961 ierr = VecScatterEnd(b->ctx,b->w,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2962 PetscFunctionReturn(0); 2963 } 2964 2965 /* ----------------------------------------------------------------*/ 2966 #undef __FUNCT__ 2967 #define __FUNCT__ "MatPtAPSymbolic_SeqAIJ_SeqMAIJ" 2968 PetscErrorCode MatPtAPSymbolic_SeqAIJ_SeqMAIJ(Mat A,Mat PP,PetscReal fill,Mat *C) 2969 { 2970 /* This routine requires testing -- but it's getting better. */ 2971 PetscErrorCode ierr; 2972 PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 2973 Mat_SeqMAIJ *pp=(Mat_SeqMAIJ*)PP->data; 2974 Mat P=pp->AIJ; 2975 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*p=(Mat_SeqAIJ*)P->data,*c; 2976 PetscInt *pti,*ptj,*ptJ; 2977 PetscInt *ci,*cj,*ptadenserow,*ptasparserow,*denserow,*sparserow,*ptaj; 2978 const PetscInt an=A->cmap->N,am=A->rmap->N,pn=P->cmap->N,pm=P->rmap->N,ppdof=pp->dof; 2979 PetscInt i,j,k,dof,pshift,ptnzi,arow,anzj,ptanzi,prow,pnzj,cnzi,cn; 2980 MatScalar *ca; 2981 const PetscInt *pi = p->i,*pj = p->j,*pjj,*ai=a->i,*aj=a->j,*ajj; 2982 2983 PetscFunctionBegin; 2984 /* Start timer */ 2985 ierr = PetscLogEventBegin(MAT_PtAPSymbolic,A,PP,0,0);CHKERRQ(ierr); 2986 2987 /* Get ij structure of P^T */ 2988 ierr = MatGetSymbolicTranspose_SeqAIJ(P,&pti,&ptj);CHKERRQ(ierr); 2989 2990 cn = pn*ppdof; 2991 /* Allocate ci array, arrays for fill computation and */ 2992 /* free space for accumulating nonzero column info */ 2993 ierr = PetscMalloc((cn+1)*sizeof(PetscInt),&ci);CHKERRQ(ierr); 2994 ci[0] = 0; 2995 2996 /* Work arrays for rows of P^T*A */ 2997 ierr = PetscMalloc4(an,PetscInt,&ptadenserow,an,PetscInt,&ptasparserow,cn,PetscInt,&denserow,cn,PetscInt,&sparserow);CHKERRQ(ierr); 2998 ierr = PetscMemzero(ptadenserow,an*sizeof(PetscInt));CHKERRQ(ierr); 2999 ierr = PetscMemzero(denserow,cn*sizeof(PetscInt));CHKERRQ(ierr); 3000 3001 /* Set initial free space to be nnz(A) scaled by aspect ratio of P. */ 3002 /* This should be reasonable if sparsity of PtAP is similar to that of A. */ 3003 /* Note, aspect ratio of P is the same as the aspect ratio of SeqAIJ inside P */ 3004 ierr = PetscFreeSpaceGet((ai[am]/pm)*pn,&free_space); 3005 current_space = free_space; 3006 3007 /* Determine symbolic info for each row of C: */ 3008 for (i=0;i<pn;i++) { 3009 ptnzi = pti[i+1] - pti[i]; 3010 ptJ = ptj + pti[i]; 3011 for (dof=0;dof<ppdof;dof++) { 3012 ptanzi = 0; 3013 /* Determine symbolic row of PtA: */ 3014 for (j=0;j<ptnzi;j++) { 3015 /* Expand ptJ[j] by block size and shift by dof to get the right row of A */ 3016 arow = ptJ[j]*ppdof + dof; 3017 /* Nonzeros of P^T*A will be in same locations as any element of A in that row */ 3018 anzj = ai[arow+1] - ai[arow]; 3019 ajj = aj + ai[arow]; 3020 for (k=0;k<anzj;k++) { 3021 if (!ptadenserow[ajj[k]]) { 3022 ptadenserow[ajj[k]] = -1; 3023 ptasparserow[ptanzi++] = ajj[k]; 3024 } 3025 } 3026 } 3027 /* Using symbolic info for row of PtA, determine symbolic info for row of C: */ 3028 ptaj = ptasparserow; 3029 cnzi = 0; 3030 for (j=0;j<ptanzi;j++) { 3031 /* Get offset within block of P */ 3032 pshift = *ptaj%ppdof; 3033 /* Get block row of P */ 3034 prow = (*ptaj++)/ppdof; /* integer division */ 3035 /* P has same number of nonzeros per row as the compressed form */ 3036 pnzj = pi[prow+1] - pi[prow]; 3037 pjj = pj + pi[prow]; 3038 for (k=0;k<pnzj;k++) { 3039 /* Locations in C are shifted by the offset within the block */ 3040 /* Note: we cannot use PetscLLAdd here because of the additional offset for the write location */ 3041 if (!denserow[pjj[k]*ppdof+pshift]) { 3042 denserow[pjj[k]*ppdof+pshift] = -1; 3043 sparserow[cnzi++] = pjj[k]*ppdof+pshift; 3044 } 3045 } 3046 } 3047 3048 /* sort sparserow */ 3049 ierr = PetscSortInt(cnzi,sparserow);CHKERRQ(ierr); 3050 3051 /* If free space is not available, make more free space */ 3052 /* Double the amount of total space in the list */ 3053 if (current_space->local_remaining<cnzi) { 3054 ierr = PetscFreeSpaceGet(cnzi+current_space->total_array_size,¤t_space);CHKERRQ(ierr); 3055 } 3056 3057 /* Copy data into free space, and zero out denserows */ 3058 ierr = PetscMemcpy(current_space->array,sparserow,cnzi*sizeof(PetscInt));CHKERRQ(ierr); 3059 current_space->array += cnzi; 3060 current_space->local_used += cnzi; 3061 current_space->local_remaining -= cnzi; 3062 3063 for (j=0;j<ptanzi;j++) { 3064 ptadenserow[ptasparserow[j]] = 0; 3065 } 3066 for (j=0;j<cnzi;j++) { 3067 denserow[sparserow[j]] = 0; 3068 } 3069 /* Aside: Perhaps we should save the pta info for the numerical factorization. */ 3070 /* For now, we will recompute what is needed. */ 3071 ci[i*ppdof+1+dof] = ci[i*ppdof+dof] + cnzi; 3072 } 3073 } 3074 /* nnz is now stored in ci[ptm], column indices are in the list of free space */ 3075 /* Allocate space for cj, initialize cj, and */ 3076 /* destroy list of free space and other temporary array(s) */ 3077 ierr = PetscMalloc((ci[cn]+1)*sizeof(PetscInt),&cj);CHKERRQ(ierr); 3078 ierr = PetscFreeSpaceContiguous(&free_space,cj);CHKERRQ(ierr); 3079 ierr = PetscFree4(ptadenserow,ptasparserow,denserow,sparserow);CHKERRQ(ierr); 3080 3081 /* Allocate space for ca */ 3082 ierr = PetscMalloc((ci[cn]+1)*sizeof(MatScalar),&ca);CHKERRQ(ierr); 3083 ierr = PetscMemzero(ca,(ci[cn]+1)*sizeof(MatScalar));CHKERRQ(ierr); 3084 3085 /* put together the new matrix */ 3086 ierr = MatCreateSeqAIJWithArrays(((PetscObject)A)->comm,cn,cn,ci,cj,ca,C);CHKERRQ(ierr); 3087 3088 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 3089 /* Since these are PETSc arrays, change flags to free them as necessary. */ 3090 c = (Mat_SeqAIJ *)((*C)->data); 3091 c->free_a = PETSC_TRUE; 3092 c->free_ij = PETSC_TRUE; 3093 c->nonew = 0; 3094 3095 /* Clean up. */ 3096 ierr = MatRestoreSymbolicTranspose_SeqAIJ(P,&pti,&ptj);CHKERRQ(ierr); 3097 3098 ierr = PetscLogEventEnd(MAT_PtAPSymbolic,A,PP,0,0);CHKERRQ(ierr); 3099 PetscFunctionReturn(0); 3100 } 3101 3102 #undef __FUNCT__ 3103 #define __FUNCT__ "MatPtAPNumeric_SeqAIJ_SeqMAIJ" 3104 PetscErrorCode MatPtAPNumeric_SeqAIJ_SeqMAIJ(Mat A,Mat PP,Mat C) 3105 { 3106 /* This routine requires testing -- first draft only */ 3107 PetscErrorCode ierr; 3108 Mat_SeqMAIJ *pp=(Mat_SeqMAIJ*)PP->data; 3109 Mat P=pp->AIJ; 3110 Mat_SeqAIJ *a = (Mat_SeqAIJ *) A->data; 3111 Mat_SeqAIJ *p = (Mat_SeqAIJ *) P->data; 3112 Mat_SeqAIJ *c = (Mat_SeqAIJ *) C->data; 3113 const PetscInt *ai=a->i,*aj=a->j,*pi=p->i,*pj=p->j,*pJ=p->j,*pjj; 3114 const PetscInt *ci=c->i,*cj=c->j,*cjj; 3115 const PetscInt am=A->rmap->N,cn=C->cmap->N,cm=C->rmap->N,ppdof=pp->dof; 3116 PetscInt i,j,k,pshift,poffset,anzi,pnzi,apnzj,nextap,pnzj,prow,crow,*apj,*apjdense; 3117 const MatScalar *aa=a->a,*pa=p->a,*pA=p->a,*paj; 3118 MatScalar *ca=c->a,*caj,*apa; 3119 3120 PetscFunctionBegin; 3121 /* Allocate temporary array for storage of one row of A*P */ 3122 ierr = PetscMalloc3(cn,MatScalar,&apa,cn,PetscInt,&apj,cn,PetscInt,&apjdense);CHKERRQ(ierr); 3123 ierr = PetscMemzero(apa,cn*sizeof(MatScalar));CHKERRQ(ierr); 3124 ierr = PetscMemzero(apj,cn*sizeof(PetscInt));CHKERRQ(ierr); 3125 ierr = PetscMemzero(apjdense,cn*sizeof(PetscInt));CHKERRQ(ierr); 3126 3127 /* Clear old values in C */ 3128 ierr = PetscMemzero(ca,ci[cm]*sizeof(MatScalar));CHKERRQ(ierr); 3129 3130 for (i=0;i<am;i++) { 3131 /* Form sparse row of A*P */ 3132 anzi = ai[i+1] - ai[i]; 3133 apnzj = 0; 3134 for (j=0;j<anzi;j++) { 3135 /* Get offset within block of P */ 3136 pshift = *aj%ppdof; 3137 /* Get block row of P */ 3138 prow = *aj++/ppdof; /* integer division */ 3139 pnzj = pi[prow+1] - pi[prow]; 3140 pjj = pj + pi[prow]; 3141 paj = pa + pi[prow]; 3142 for (k=0;k<pnzj;k++) { 3143 poffset = pjj[k]*ppdof+pshift; 3144 if (!apjdense[poffset]) { 3145 apjdense[poffset] = -1; 3146 apj[apnzj++] = poffset; 3147 } 3148 apa[poffset] += (*aa)*paj[k]; 3149 } 3150 ierr = PetscLogFlops(2.0*pnzj);CHKERRQ(ierr); 3151 aa++; 3152 } 3153 3154 /* Sort the j index array for quick sparse axpy. */ 3155 /* Note: a array does not need sorting as it is in dense storage locations. */ 3156 ierr = PetscSortInt(apnzj,apj);CHKERRQ(ierr); 3157 3158 /* Compute P^T*A*P using outer product (P^T)[:,j]*(A*P)[j,:]. */ 3159 prow = i/ppdof; /* integer division */ 3160 pshift = i%ppdof; 3161 poffset = pi[prow]; 3162 pnzi = pi[prow+1] - poffset; 3163 /* Reset pJ and pA so we can traverse the same row of P 'dof' times. */ 3164 pJ = pj+poffset; 3165 pA = pa+poffset; 3166 for (j=0;j<pnzi;j++) { 3167 crow = (*pJ)*ppdof+pshift; 3168 cjj = cj + ci[crow]; 3169 caj = ca + ci[crow]; 3170 pJ++; 3171 /* Perform sparse axpy operation. Note cjj includes apj. */ 3172 for (k=0,nextap=0;nextap<apnzj;k++) { 3173 if (cjj[k]==apj[nextap]) { 3174 caj[k] += (*pA)*apa[apj[nextap++]]; 3175 } 3176 } 3177 ierr = PetscLogFlops(2.0*apnzj);CHKERRQ(ierr); 3178 pA++; 3179 } 3180 3181 /* Zero the current row info for A*P */ 3182 for (j=0;j<apnzj;j++) { 3183 apa[apj[j]] = 0.; 3184 apjdense[apj[j]] = 0; 3185 } 3186 } 3187 3188 /* Assemble the final matrix and clean up */ 3189 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3190 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3191 ierr = PetscFree3(apa,apj,apjdense);CHKERRQ(ierr); 3192 PetscFunctionReturn(0); 3193 } 3194 3195 #undef __FUNCT__ 3196 #define __FUNCT__ "MatPtAPSymbolic_MPIAIJ_MPIMAIJ" 3197 PetscErrorCode MatPtAPSymbolic_MPIAIJ_MPIMAIJ(Mat A,Mat PP,PetscReal fill,Mat *C) 3198 { 3199 PetscErrorCode ierr; 3200 3201 PetscFunctionBegin; 3202 /* MatPtAPSymbolic_MPIAIJ_MPIMAIJ() is not implemented yet. Convert PP to mpiaij format */ 3203 ierr = MatConvert(PP,MATMPIAIJ,MAT_REUSE_MATRIX,&PP);CHKERRQ(ierr); 3204 ierr =(*PP->ops->ptapsymbolic)(A,PP,fill,C);CHKERRQ(ierr); 3205 PetscFunctionReturn(0); 3206 } 3207 3208 #undef __FUNCT__ 3209 #define __FUNCT__ "MatPtAPNumeric_MPIAIJ_MPIMAIJ" 3210 PetscErrorCode MatPtAPNumeric_MPIAIJ_MPIMAIJ(Mat A,Mat PP,Mat C) 3211 { 3212 PetscFunctionBegin; 3213 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatPtAPNumeric is not implemented for MPIMAIJ matrix yet"); 3214 PetscFunctionReturn(0); 3215 } 3216 3217 EXTERN_C_BEGIN 3218 #undef __FUNCT__ 3219 #define __FUNCT__ "MatConvert_SeqMAIJ_SeqAIJ" 3220 PetscErrorCode MatConvert_SeqMAIJ_SeqAIJ(Mat A, MatType newtype,MatReuse reuse,Mat *newmat) 3221 { 3222 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 3223 Mat a = b->AIJ,B; 3224 Mat_SeqAIJ *aij = (Mat_SeqAIJ*)a->data; 3225 PetscErrorCode ierr; 3226 PetscInt m,n,i,ncols,*ilen,nmax = 0,*icols,j,k,ii,dof = b->dof; 3227 PetscInt *cols; 3228 PetscScalar *vals; 3229 3230 PetscFunctionBegin; 3231 ierr = MatGetSize(a,&m,&n);CHKERRQ(ierr); 3232 ierr = PetscMalloc(dof*m*sizeof(PetscInt),&ilen);CHKERRQ(ierr); 3233 for (i=0; i<m; i++) { 3234 nmax = PetscMax(nmax,aij->ilen[i]); 3235 for (j=0; j<dof; j++) { 3236 ilen[dof*i+j] = aij->ilen[i]; 3237 } 3238 } 3239 ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,dof*m,dof*n,0,ilen,&B);CHKERRQ(ierr); 3240 ierr = PetscFree(ilen);CHKERRQ(ierr); 3241 ierr = PetscMalloc(nmax*sizeof(PetscInt),&icols);CHKERRQ(ierr); 3242 ii = 0; 3243 for (i=0; i<m; i++) { 3244 ierr = MatGetRow_SeqAIJ(a,i,&ncols,&cols,&vals);CHKERRQ(ierr); 3245 for (j=0; j<dof; j++) { 3246 for (k=0; k<ncols; k++) { 3247 icols[k] = dof*cols[k]+j; 3248 } 3249 ierr = MatSetValues_SeqAIJ(B,1,&ii,ncols,icols,vals,INSERT_VALUES);CHKERRQ(ierr); 3250 ii++; 3251 } 3252 ierr = MatRestoreRow_SeqAIJ(a,i,&ncols,&cols,&vals);CHKERRQ(ierr); 3253 } 3254 ierr = PetscFree(icols);CHKERRQ(ierr); 3255 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3256 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3257 3258 if (reuse == MAT_REUSE_MATRIX) { 3259 ierr = MatHeaderReplace(A,B);CHKERRQ(ierr); 3260 } else { 3261 *newmat = B; 3262 } 3263 PetscFunctionReturn(0); 3264 } 3265 EXTERN_C_END 3266 3267 #include <../src/mat/impls/aij/mpi/mpiaij.h> 3268 3269 EXTERN_C_BEGIN 3270 #undef __FUNCT__ 3271 #define __FUNCT__ "MatConvert_MPIMAIJ_MPIAIJ" 3272 PetscErrorCode MatConvert_MPIMAIJ_MPIAIJ(Mat A, MatType newtype,MatReuse reuse,Mat *newmat) 3273 { 3274 Mat_MPIMAIJ *maij = (Mat_MPIMAIJ*)A->data; 3275 Mat MatAIJ = ((Mat_SeqMAIJ*)maij->AIJ->data)->AIJ,B; 3276 Mat MatOAIJ = ((Mat_SeqMAIJ*)maij->OAIJ->data)->AIJ; 3277 Mat_SeqAIJ *AIJ = (Mat_SeqAIJ*) MatAIJ->data; 3278 Mat_SeqAIJ *OAIJ =(Mat_SeqAIJ*) MatOAIJ->data; 3279 Mat_MPIAIJ *mpiaij = (Mat_MPIAIJ*) maij->A->data; 3280 PetscInt dof = maij->dof,i,j,*dnz = PETSC_NULL,*onz = PETSC_NULL,nmax = 0,onmax = 0; 3281 PetscInt *oicols = PETSC_NULL,*icols = PETSC_NULL,ncols,*cols = PETSC_NULL,oncols,*ocols = PETSC_NULL; 3282 PetscInt rstart,cstart,*garray,ii,k; 3283 PetscErrorCode ierr; 3284 PetscScalar *vals,*ovals; 3285 3286 PetscFunctionBegin; 3287 ierr = PetscMalloc2(A->rmap->n,PetscInt,&dnz,A->rmap->n,PetscInt,&onz);CHKERRQ(ierr); 3288 for (i=0; i<A->rmap->n/dof; i++) { 3289 nmax = PetscMax(nmax,AIJ->ilen[i]); 3290 onmax = PetscMax(onmax,OAIJ->ilen[i]); 3291 for (j=0; j<dof; j++) { 3292 dnz[dof*i+j] = AIJ->ilen[i]; 3293 onz[dof*i+j] = OAIJ->ilen[i]; 3294 } 3295 } 3296 ierr = MatCreateAIJ(((PetscObject)A)->comm,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N,0,dnz,0,onz,&B);CHKERRQ(ierr); 3297 ierr = PetscFree2(dnz,onz);CHKERRQ(ierr); 3298 3299 ierr = PetscMalloc2(nmax,PetscInt,&icols,onmax,PetscInt,&oicols);CHKERRQ(ierr); 3300 rstart = dof*maij->A->rmap->rstart; 3301 cstart = dof*maij->A->cmap->rstart; 3302 garray = mpiaij->garray; 3303 3304 ii = rstart; 3305 for (i=0; i<A->rmap->n/dof; i++) { 3306 ierr = MatGetRow_SeqAIJ(MatAIJ,i,&ncols,&cols,&vals);CHKERRQ(ierr); 3307 ierr = MatGetRow_SeqAIJ(MatOAIJ,i,&oncols,&ocols,&ovals);CHKERRQ(ierr); 3308 for (j=0; j<dof; j++) { 3309 for (k=0; k<ncols; k++) { 3310 icols[k] = cstart + dof*cols[k]+j; 3311 } 3312 for (k=0; k<oncols; k++) { 3313 oicols[k] = dof*garray[ocols[k]]+j; 3314 } 3315 ierr = MatSetValues_MPIAIJ(B,1,&ii,ncols,icols,vals,INSERT_VALUES);CHKERRQ(ierr); 3316 ierr = MatSetValues_MPIAIJ(B,1,&ii,oncols,oicols,ovals,INSERT_VALUES);CHKERRQ(ierr); 3317 ii++; 3318 } 3319 ierr = MatRestoreRow_SeqAIJ(MatAIJ,i,&ncols,&cols,&vals);CHKERRQ(ierr); 3320 ierr = MatRestoreRow_SeqAIJ(MatOAIJ,i,&oncols,&ocols,&ovals);CHKERRQ(ierr); 3321 } 3322 ierr = PetscFree2(icols,oicols);CHKERRQ(ierr); 3323 3324 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3325 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3326 3327 if (reuse == MAT_REUSE_MATRIX) { 3328 PetscInt refct = ((PetscObject)A)->refct; /* save ((PetscObject)A)->refct */ 3329 ((PetscObject)A)->refct = 1; 3330 ierr = MatHeaderReplace(A,B);CHKERRQ(ierr); 3331 ((PetscObject)A)->refct = refct; /* restore ((PetscObject)A)->refct */ 3332 } else { 3333 *newmat = B; 3334 } 3335 PetscFunctionReturn(0); 3336 } 3337 EXTERN_C_END 3338 3339 #undef __FUNCT__ 3340 #define __FUNCT__ "MatGetSubMatrix_MAIJ" 3341 PetscErrorCode MatGetSubMatrix_MAIJ(Mat mat,IS isrow,IS iscol,MatReuse cll,Mat *newmat) 3342 { 3343 PetscErrorCode ierr; 3344 Mat A; 3345 3346 PetscFunctionBegin; 3347 ierr = MatConvert(mat,MATAIJ,MAT_INITIAL_MATRIX,&A);CHKERRQ(ierr); 3348 ierr = MatGetSubMatrix(A,isrow,iscol,cll,newmat);CHKERRQ(ierr); 3349 ierr = MatDestroy(&A);CHKERRQ(ierr); 3350 PetscFunctionReturn(0); 3351 } 3352 3353 /* ---------------------------------------------------------------------------------- */ 3354 #undef __FUNCT__ 3355 #define __FUNCT__ "MatCreateMAIJ" 3356 /*@C 3357 MatCreateMAIJ - Creates a matrix type providing restriction and interpolation 3358 operations for multicomponent problems. It interpolates each component the same 3359 way independently. The matrix type is based on MATSEQAIJ for sequential matrices, 3360 and MATMPIAIJ for distributed matrices. 3361 3362 Collective 3363 3364 Input Parameters: 3365 + A - the AIJ matrix describing the action on blocks 3366 - dof - the block size (number of components per node) 3367 3368 Output Parameter: 3369 . maij - the new MAIJ matrix 3370 3371 Operations provided: 3372 + MatMult 3373 . MatMultTranspose 3374 . MatMultAdd 3375 . MatMultTransposeAdd 3376 - MatView 3377 3378 Level: advanced 3379 3380 .seealso: MatMAIJGetAIJ(), MatMAIJRedimension(), MATMAIJ 3381 @*/ 3382 PetscErrorCode MatCreateMAIJ(Mat A,PetscInt dof,Mat *maij) 3383 { 3384 PetscErrorCode ierr; 3385 PetscMPIInt size; 3386 PetscInt n; 3387 Mat B; 3388 3389 PetscFunctionBegin; 3390 ierr = PetscObjectReference((PetscObject)A);CHKERRQ(ierr); 3391 3392 if (dof == 1) { 3393 *maij = A; 3394 } else { 3395 ierr = MatCreate(((PetscObject)A)->comm,&B);CHKERRQ(ierr); 3396 ierr = MatSetSizes(B,dof*A->rmap->n,dof*A->cmap->n,dof*A->rmap->N,dof*A->cmap->N);CHKERRQ(ierr); 3397 ierr = PetscLayoutSetBlockSize(B->rmap,dof);CHKERRQ(ierr); 3398 ierr = PetscLayoutSetBlockSize(B->cmap,dof);CHKERRQ(ierr); 3399 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 3400 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 3401 B->assembled = PETSC_TRUE; 3402 3403 ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr); 3404 if (size == 1) { 3405 Mat_SeqMAIJ *b; 3406 3407 ierr = MatSetType(B,MATSEQMAIJ);CHKERRQ(ierr); 3408 B->ops->setup = PETSC_NULL; 3409 B->ops->destroy = MatDestroy_SeqMAIJ; 3410 B->ops->view = MatView_SeqMAIJ; 3411 b = (Mat_SeqMAIJ*)B->data; 3412 b->dof = dof; 3413 b->AIJ = A; 3414 if (dof == 2) { 3415 B->ops->mult = MatMult_SeqMAIJ_2; 3416 B->ops->multadd = MatMultAdd_SeqMAIJ_2; 3417 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_2; 3418 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_2; 3419 } else if (dof == 3) { 3420 B->ops->mult = MatMult_SeqMAIJ_3; 3421 B->ops->multadd = MatMultAdd_SeqMAIJ_3; 3422 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_3; 3423 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_3; 3424 } else if (dof == 4) { 3425 B->ops->mult = MatMult_SeqMAIJ_4; 3426 B->ops->multadd = MatMultAdd_SeqMAIJ_4; 3427 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_4; 3428 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_4; 3429 } else if (dof == 5) { 3430 B->ops->mult = MatMult_SeqMAIJ_5; 3431 B->ops->multadd = MatMultAdd_SeqMAIJ_5; 3432 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_5; 3433 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_5; 3434 } else if (dof == 6) { 3435 B->ops->mult = MatMult_SeqMAIJ_6; 3436 B->ops->multadd = MatMultAdd_SeqMAIJ_6; 3437 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_6; 3438 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_6; 3439 } else if (dof == 7) { 3440 B->ops->mult = MatMult_SeqMAIJ_7; 3441 B->ops->multadd = MatMultAdd_SeqMAIJ_7; 3442 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_7; 3443 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_7; 3444 } else if (dof == 8) { 3445 B->ops->mult = MatMult_SeqMAIJ_8; 3446 B->ops->multadd = MatMultAdd_SeqMAIJ_8; 3447 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_8; 3448 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_8; 3449 } else if (dof == 9) { 3450 B->ops->mult = MatMult_SeqMAIJ_9; 3451 B->ops->multadd = MatMultAdd_SeqMAIJ_9; 3452 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_9; 3453 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_9; 3454 } else if (dof == 10) { 3455 B->ops->mult = MatMult_SeqMAIJ_10; 3456 B->ops->multadd = MatMultAdd_SeqMAIJ_10; 3457 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_10; 3458 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_10; 3459 } else if (dof == 11) { 3460 B->ops->mult = MatMult_SeqMAIJ_11; 3461 B->ops->multadd = MatMultAdd_SeqMAIJ_11; 3462 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_11; 3463 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_11; 3464 } else if (dof == 16) { 3465 B->ops->mult = MatMult_SeqMAIJ_16; 3466 B->ops->multadd = MatMultAdd_SeqMAIJ_16; 3467 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_16; 3468 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_16; 3469 } else if (dof == 18) { 3470 B->ops->mult = MatMult_SeqMAIJ_18; 3471 B->ops->multadd = MatMultAdd_SeqMAIJ_18; 3472 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_18; 3473 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_18; 3474 } else { 3475 B->ops->mult = MatMult_SeqMAIJ_N; 3476 B->ops->multadd = MatMultAdd_SeqMAIJ_N; 3477 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_N; 3478 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_N; 3479 } 3480 B->ops->ptapsymbolic_seqaij = MatPtAPSymbolic_SeqAIJ_SeqMAIJ; 3481 B->ops->ptapnumeric_seqaij = MatPtAPNumeric_SeqAIJ_SeqMAIJ; 3482 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqmaij_seqaij_C","MatConvert_SeqMAIJ_SeqAIJ",MatConvert_SeqMAIJ_SeqAIJ);CHKERRQ(ierr); 3483 } else { 3484 Mat_MPIAIJ *mpiaij = (Mat_MPIAIJ *)A->data; 3485 Mat_MPIMAIJ *b; 3486 IS from,to; 3487 Vec gvec; 3488 3489 ierr = MatSetType(B,MATMPIMAIJ);CHKERRQ(ierr); 3490 B->ops->setup = PETSC_NULL; 3491 B->ops->destroy = MatDestroy_MPIMAIJ; 3492 B->ops->view = MatView_MPIMAIJ; 3493 b = (Mat_MPIMAIJ*)B->data; 3494 b->dof = dof; 3495 b->A = A; 3496 ierr = MatCreateMAIJ(mpiaij->A,dof,&b->AIJ);CHKERRQ(ierr); 3497 ierr = MatCreateMAIJ(mpiaij->B,dof,&b->OAIJ);CHKERRQ(ierr); 3498 3499 ierr = VecGetSize(mpiaij->lvec,&n);CHKERRQ(ierr); 3500 ierr = VecCreate(PETSC_COMM_SELF,&b->w);CHKERRQ(ierr); 3501 ierr = VecSetSizes(b->w,n*dof,n*dof);CHKERRQ(ierr); 3502 ierr = VecSetBlockSize(b->w,dof);CHKERRQ(ierr); 3503 ierr = VecSetType(b->w,VECSEQ);CHKERRQ(ierr); 3504 3505 /* create two temporary Index sets for build scatter gather */ 3506 ierr = ISCreateBlock(((PetscObject)A)->comm,dof,n,mpiaij->garray,PETSC_COPY_VALUES,&from);CHKERRQ(ierr); 3507 ierr = ISCreateStride(PETSC_COMM_SELF,n*dof,0,1,&to);CHKERRQ(ierr); 3508 3509 /* create temporary global vector to generate scatter context */ 3510 ierr = VecCreateMPIWithArray(((PetscObject)A)->comm,dof,dof*A->cmap->n,dof*A->cmap->N,PETSC_NULL,&gvec);CHKERRQ(ierr); 3511 3512 /* generate the scatter context */ 3513 ierr = VecScatterCreate(gvec,from,b->w,to,&b->ctx);CHKERRQ(ierr); 3514 3515 ierr = ISDestroy(&from);CHKERRQ(ierr); 3516 ierr = ISDestroy(&to);CHKERRQ(ierr); 3517 ierr = VecDestroy(&gvec);CHKERRQ(ierr); 3518 3519 B->ops->mult = MatMult_MPIMAIJ_dof; 3520 B->ops->multtranspose = MatMultTranspose_MPIMAIJ_dof; 3521 B->ops->multadd = MatMultAdd_MPIMAIJ_dof; 3522 B->ops->multtransposeadd = MatMultTransposeAdd_MPIMAIJ_dof; 3523 B->ops->ptapsymbolic_mpiaij = MatPtAPSymbolic_MPIAIJ_MPIMAIJ; 3524 B->ops->ptapnumeric_mpiaij = MatPtAPNumeric_MPIAIJ_MPIMAIJ; 3525 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpimaij_mpiaij_C","MatConvert_MPIMAIJ_MPIAIJ",MatConvert_MPIMAIJ_MPIAIJ);CHKERRQ(ierr); 3526 } 3527 B->ops->getsubmatrix = MatGetSubMatrix_MAIJ; 3528 ierr = MatSetUp(B);CHKERRQ(ierr); 3529 *maij = B; 3530 ierr = MatView_Private(B);CHKERRQ(ierr); 3531 } 3532 PetscFunctionReturn(0); 3533 } 3534