1 #define PETSCMAT_DLL 2 3 /* 4 Defines the basic matrix operations for the MAIJ matrix storage format. 5 This format is used for restriction and interpolation operations for 6 multicomponent problems. It interpolates each component the same way 7 independently. 8 9 We provide: 10 MatMult() 11 MatMultTranspose() 12 MatMultTransposeAdd() 13 MatMultAdd() 14 and 15 MatCreateMAIJ(Mat,dof,Mat*) 16 17 This single directory handles both the sequential and parallel codes 18 */ 19 20 #include "../src/mat/impls/maij/maij.h" /*I "petscmat.h" I*/ 21 #include "../src/mat/utils/freespace.h" 22 #include "private/vecimpl.h" 23 24 #undef __FUNCT__ 25 #define __FUNCT__ "MatMAIJGetAIJ" 26 /*@C 27 MatMAIJGetAIJ - Get the AIJ matrix describing the blockwise action of the MAIJ matrix 28 29 Not Collective, but if the MAIJ matrix is parallel, the AIJ matrix is also parallel 30 31 Input Parameter: 32 . A - the MAIJ matrix 33 34 Output Parameter: 35 . B - the AIJ matrix 36 37 Level: advanced 38 39 Notes: The reference count on the AIJ matrix is not increased so you should not destroy it. 40 41 .seealso: MatCreateMAIJ() 42 @*/ 43 PetscErrorCode PETSCMAT_DLLEXPORT MatMAIJGetAIJ(Mat A,Mat *B) 44 { 45 PetscErrorCode ierr; 46 PetscTruth ismpimaij,isseqmaij; 47 48 PetscFunctionBegin; 49 ierr = PetscTypeCompare((PetscObject)A,MATMPIMAIJ,&ismpimaij);CHKERRQ(ierr); 50 ierr = PetscTypeCompare((PetscObject)A,MATSEQMAIJ,&isseqmaij);CHKERRQ(ierr); 51 if (ismpimaij) { 52 Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data; 53 54 *B = b->A; 55 } else if (isseqmaij) { 56 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 57 58 *B = b->AIJ; 59 } else { 60 *B = A; 61 } 62 PetscFunctionReturn(0); 63 } 64 65 #undef __FUNCT__ 66 #define __FUNCT__ "MatMAIJRedimension" 67 /*@C 68 MatMAIJRedimension - Get an MAIJ matrix with the same action, but for a different block size 69 70 Collective 71 72 Input Parameter: 73 + A - the MAIJ matrix 74 - dof - the block size for the new matrix 75 76 Output Parameter: 77 . B - the new MAIJ matrix 78 79 Level: advanced 80 81 .seealso: MatCreateMAIJ() 82 @*/ 83 PetscErrorCode PETSCMAT_DLLEXPORT MatMAIJRedimension(Mat A,PetscInt dof,Mat *B) 84 { 85 PetscErrorCode ierr; 86 Mat Aij; 87 88 PetscFunctionBegin; 89 ierr = MatMAIJGetAIJ(A,&Aij);CHKERRQ(ierr); 90 ierr = MatCreateMAIJ(Aij,dof,B);CHKERRQ(ierr); 91 PetscFunctionReturn(0); 92 } 93 94 #undef __FUNCT__ 95 #define __FUNCT__ "MatDestroy_SeqMAIJ" 96 PetscErrorCode MatDestroy_SeqMAIJ(Mat A) 97 { 98 PetscErrorCode ierr; 99 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 100 101 PetscFunctionBegin; 102 if (b->AIJ) { 103 ierr = MatDestroy(b->AIJ);CHKERRQ(ierr); 104 } 105 ierr = PetscFree(b);CHKERRQ(ierr); 106 PetscFunctionReturn(0); 107 } 108 109 #undef __FUNCT__ 110 #define __FUNCT__ "MatView_SeqMAIJ" 111 PetscErrorCode MatView_SeqMAIJ(Mat A,PetscViewer viewer) 112 { 113 PetscErrorCode ierr; 114 Mat B; 115 116 PetscFunctionBegin; 117 ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B); 118 ierr = MatView(B,viewer);CHKERRQ(ierr); 119 ierr = MatDestroy(B);CHKERRQ(ierr); 120 PetscFunctionReturn(0); 121 } 122 123 #undef __FUNCT__ 124 #define __FUNCT__ "MatView_MPIMAIJ" 125 PetscErrorCode MatView_MPIMAIJ(Mat A,PetscViewer viewer) 126 { 127 PetscErrorCode ierr; 128 Mat B; 129 130 PetscFunctionBegin; 131 ierr = MatConvert(A,MATMPIAIJ,MAT_INITIAL_MATRIX,&B); 132 ierr = MatView(B,viewer);CHKERRQ(ierr); 133 ierr = MatDestroy(B);CHKERRQ(ierr); 134 PetscFunctionReturn(0); 135 } 136 137 #undef __FUNCT__ 138 #define __FUNCT__ "MatDestroy_MPIMAIJ" 139 PetscErrorCode MatDestroy_MPIMAIJ(Mat A) 140 { 141 PetscErrorCode ierr; 142 Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data; 143 144 PetscFunctionBegin; 145 if (b->AIJ) { 146 ierr = MatDestroy(b->AIJ);CHKERRQ(ierr); 147 } 148 if (b->OAIJ) { 149 ierr = MatDestroy(b->OAIJ);CHKERRQ(ierr); 150 } 151 if (b->A) { 152 ierr = MatDestroy(b->A);CHKERRQ(ierr); 153 } 154 if (b->ctx) { 155 ierr = VecScatterDestroy(b->ctx);CHKERRQ(ierr); 156 } 157 if (b->w) { 158 ierr = VecDestroy(b->w);CHKERRQ(ierr); 159 } 160 ierr = PetscFree(b);CHKERRQ(ierr); 161 ierr = PetscObjectChangeTypeName((PetscObject)A,0);CHKERRQ(ierr); 162 PetscFunctionReturn(0); 163 } 164 165 /*MC 166 MATMAIJ - MATMAIJ = "maij" - A matrix type to be used for restriction and interpolation operations for 167 multicomponent problems, interpolating or restricting each component the same way independently. 168 The matrix type is based on MATSEQAIJ for sequential matrices, and MATMPIAIJ for distributed matrices. 169 170 Operations provided: 171 . MatMult 172 . MatMultTranspose 173 . MatMultAdd 174 . MatMultTransposeAdd 175 176 Level: advanced 177 178 .seealso: MatCreateSeqDense 179 M*/ 180 181 EXTERN_C_BEGIN 182 #undef __FUNCT__ 183 #define __FUNCT__ "MatCreate_MAIJ" 184 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_MAIJ(Mat A) 185 { 186 PetscErrorCode ierr; 187 Mat_MPIMAIJ *b; 188 PetscMPIInt size; 189 190 PetscFunctionBegin; 191 ierr = PetscNewLog(A,Mat_MPIMAIJ,&b);CHKERRQ(ierr); 192 A->data = (void*)b; 193 ierr = PetscMemzero(A->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 194 A->mapping = 0; 195 196 b->AIJ = 0; 197 b->dof = 0; 198 b->OAIJ = 0; 199 b->ctx = 0; 200 b->w = 0; 201 ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr); 202 if (size == 1){ 203 ierr = PetscObjectChangeTypeName((PetscObject)A,MATSEQMAIJ);CHKERRQ(ierr); 204 } else { 205 ierr = PetscObjectChangeTypeName((PetscObject)A,MATMPIMAIJ);CHKERRQ(ierr); 206 } 207 PetscFunctionReturn(0); 208 } 209 EXTERN_C_END 210 211 /* --------------------------------------------------------------------------------------*/ 212 #undef __FUNCT__ 213 #define __FUNCT__ "MatMult_SeqMAIJ_2" 214 PetscErrorCode MatMult_SeqMAIJ_2(Mat A,Vec xx,Vec yy) 215 { 216 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 217 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 218 PetscScalar *x,*y,*v,sum1, sum2; 219 PetscErrorCode ierr; 220 PetscInt m = b->AIJ->rmap->n,nonzerorow=0,*idx,*ii; 221 PetscInt n,i,jrow,j; 222 223 PetscFunctionBegin; 224 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 225 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 226 idx = a->j; 227 v = a->a; 228 ii = a->i; 229 230 for (i=0; i<m; i++) { 231 jrow = ii[i]; 232 n = ii[i+1] - jrow; 233 sum1 = 0.0; 234 sum2 = 0.0; 235 nonzerorow += (n>0); 236 for (j=0; j<n; j++) { 237 sum1 += v[jrow]*x[2*idx[jrow]]; 238 sum2 += v[jrow]*x[2*idx[jrow]+1]; 239 jrow++; 240 } 241 y[2*i] = sum1; 242 y[2*i+1] = sum2; 243 } 244 245 ierr = PetscLogFlops(4.0*a->nz - 2.0*nonzerorow);CHKERRQ(ierr); 246 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 247 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 248 PetscFunctionReturn(0); 249 } 250 251 #undef __FUNCT__ 252 #define __FUNCT__ "MatMultTranspose_SeqMAIJ_2" 253 PetscErrorCode MatMultTranspose_SeqMAIJ_2(Mat A,Vec xx,Vec yy) 254 { 255 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 256 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 257 PetscScalar *x,*y,*v,alpha1,alpha2,zero = 0.0; 258 PetscErrorCode ierr; 259 PetscInt m = b->AIJ->rmap->n,n,i,*idx; 260 261 PetscFunctionBegin; 262 ierr = VecSet(yy,zero);CHKERRQ(ierr); 263 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 264 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 265 266 for (i=0; i<m; i++) { 267 idx = a->j + a->i[i] ; 268 v = a->a + a->i[i] ; 269 n = a->i[i+1] - a->i[i]; 270 alpha1 = x[2*i]; 271 alpha2 = x[2*i+1]; 272 while (n-->0) {y[2*(*idx)] += alpha1*(*v); y[2*(*idx)+1] += alpha2*(*v); idx++; v++;} 273 } 274 ierr = PetscLogFlops(4.0*a->nz);CHKERRQ(ierr); 275 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 276 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 277 PetscFunctionReturn(0); 278 } 279 280 #undef __FUNCT__ 281 #define __FUNCT__ "MatMultAdd_SeqMAIJ_2" 282 PetscErrorCode MatMultAdd_SeqMAIJ_2(Mat A,Vec xx,Vec yy,Vec zz) 283 { 284 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 285 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 286 PetscScalar *x,*y,*v,sum1, sum2; 287 PetscErrorCode ierr; 288 PetscInt m = b->AIJ->rmap->n,*idx,*ii; 289 PetscInt n,i,jrow,j; 290 291 PetscFunctionBegin; 292 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 293 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 294 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 295 idx = a->j; 296 v = a->a; 297 ii = a->i; 298 299 for (i=0; i<m; i++) { 300 jrow = ii[i]; 301 n = ii[i+1] - jrow; 302 sum1 = 0.0; 303 sum2 = 0.0; 304 for (j=0; j<n; j++) { 305 sum1 += v[jrow]*x[2*idx[jrow]]; 306 sum2 += v[jrow]*x[2*idx[jrow]+1]; 307 jrow++; 308 } 309 y[2*i] += sum1; 310 y[2*i+1] += sum2; 311 } 312 313 ierr = PetscLogFlops(4.0*a->nz);CHKERRQ(ierr); 314 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 315 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 316 PetscFunctionReturn(0); 317 } 318 #undef __FUNCT__ 319 #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_2" 320 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_2(Mat A,Vec xx,Vec yy,Vec zz) 321 { 322 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 323 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 324 PetscScalar *x,*y,*v,alpha1,alpha2; 325 PetscErrorCode ierr; 326 PetscInt m = b->AIJ->rmap->n,n,i,*idx; 327 328 PetscFunctionBegin; 329 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 330 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 331 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 332 333 for (i=0; i<m; i++) { 334 idx = a->j + a->i[i] ; 335 v = a->a + a->i[i] ; 336 n = a->i[i+1] - a->i[i]; 337 alpha1 = x[2*i]; 338 alpha2 = x[2*i+1]; 339 while (n-->0) {y[2*(*idx)] += alpha1*(*v); y[2*(*idx)+1] += alpha2*(*v); idx++; v++;} 340 } 341 ierr = PetscLogFlops(4.0*a->nz);CHKERRQ(ierr); 342 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 343 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 344 PetscFunctionReturn(0); 345 } 346 /* --------------------------------------------------------------------------------------*/ 347 #undef __FUNCT__ 348 #define __FUNCT__ "MatMult_SeqMAIJ_3" 349 PetscErrorCode MatMult_SeqMAIJ_3(Mat A,Vec xx,Vec yy) 350 { 351 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 352 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 353 PetscScalar *x,*y,*v,sum1, sum2, sum3; 354 PetscErrorCode ierr; 355 PetscInt m = b->AIJ->rmap->n,nonzerorow=0,*idx,*ii; 356 PetscInt n,i,jrow,j; 357 358 PetscFunctionBegin; 359 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 360 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 361 idx = a->j; 362 v = a->a; 363 ii = a->i; 364 365 for (i=0; i<m; i++) { 366 jrow = ii[i]; 367 n = ii[i+1] - jrow; 368 sum1 = 0.0; 369 sum2 = 0.0; 370 sum3 = 0.0; 371 nonzerorow += (n>0); 372 for (j=0; j<n; j++) { 373 sum1 += v[jrow]*x[3*idx[jrow]]; 374 sum2 += v[jrow]*x[3*idx[jrow]+1]; 375 sum3 += v[jrow]*x[3*idx[jrow]+2]; 376 jrow++; 377 } 378 y[3*i] = sum1; 379 y[3*i+1] = sum2; 380 y[3*i+2] = sum3; 381 } 382 383 ierr = PetscLogFlops(6.0*a->nz - 3.0*nonzerorow);CHKERRQ(ierr); 384 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 385 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 386 PetscFunctionReturn(0); 387 } 388 389 #undef __FUNCT__ 390 #define __FUNCT__ "MatMultTranspose_SeqMAIJ_3" 391 PetscErrorCode MatMultTranspose_SeqMAIJ_3(Mat A,Vec xx,Vec yy) 392 { 393 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 394 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 395 PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,zero = 0.0; 396 PetscErrorCode ierr; 397 PetscInt m = b->AIJ->rmap->n,n,i,*idx; 398 399 PetscFunctionBegin; 400 ierr = VecSet(yy,zero);CHKERRQ(ierr); 401 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 402 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 403 404 for (i=0; i<m; i++) { 405 idx = a->j + a->i[i]; 406 v = a->a + a->i[i]; 407 n = a->i[i+1] - a->i[i]; 408 alpha1 = x[3*i]; 409 alpha2 = x[3*i+1]; 410 alpha3 = x[3*i+2]; 411 while (n-->0) { 412 y[3*(*idx)] += alpha1*(*v); 413 y[3*(*idx)+1] += alpha2*(*v); 414 y[3*(*idx)+2] += alpha3*(*v); 415 idx++; v++; 416 } 417 } 418 ierr = PetscLogFlops(6.0*a->nz);CHKERRQ(ierr); 419 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 420 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 421 PetscFunctionReturn(0); 422 } 423 424 #undef __FUNCT__ 425 #define __FUNCT__ "MatMultAdd_SeqMAIJ_3" 426 PetscErrorCode MatMultAdd_SeqMAIJ_3(Mat A,Vec xx,Vec yy,Vec zz) 427 { 428 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 429 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 430 PetscScalar *x,*y,*v,sum1, sum2, sum3; 431 PetscErrorCode ierr; 432 PetscInt m = b->AIJ->rmap->n,*idx,*ii; 433 PetscInt n,i,jrow,j; 434 435 PetscFunctionBegin; 436 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 437 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 438 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 439 idx = a->j; 440 v = a->a; 441 ii = a->i; 442 443 for (i=0; i<m; i++) { 444 jrow = ii[i]; 445 n = ii[i+1] - jrow; 446 sum1 = 0.0; 447 sum2 = 0.0; 448 sum3 = 0.0; 449 for (j=0; j<n; j++) { 450 sum1 += v[jrow]*x[3*idx[jrow]]; 451 sum2 += v[jrow]*x[3*idx[jrow]+1]; 452 sum3 += v[jrow]*x[3*idx[jrow]+2]; 453 jrow++; 454 } 455 y[3*i] += sum1; 456 y[3*i+1] += sum2; 457 y[3*i+2] += sum3; 458 } 459 460 ierr = PetscLogFlops(6.0*a->nz);CHKERRQ(ierr); 461 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 462 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 463 PetscFunctionReturn(0); 464 } 465 #undef __FUNCT__ 466 #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_3" 467 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_3(Mat A,Vec xx,Vec yy,Vec zz) 468 { 469 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 470 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 471 PetscScalar *x,*y,*v,alpha1,alpha2,alpha3; 472 PetscErrorCode ierr; 473 PetscInt m = b->AIJ->rmap->n,n,i,*idx; 474 475 PetscFunctionBegin; 476 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 477 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 478 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 479 for (i=0; i<m; i++) { 480 idx = a->j + a->i[i] ; 481 v = a->a + a->i[i] ; 482 n = a->i[i+1] - a->i[i]; 483 alpha1 = x[3*i]; 484 alpha2 = x[3*i+1]; 485 alpha3 = x[3*i+2]; 486 while (n-->0) { 487 y[3*(*idx)] += alpha1*(*v); 488 y[3*(*idx)+1] += alpha2*(*v); 489 y[3*(*idx)+2] += alpha3*(*v); 490 idx++; v++; 491 } 492 } 493 ierr = PetscLogFlops(6.0*a->nz);CHKERRQ(ierr); 494 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 495 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 496 PetscFunctionReturn(0); 497 } 498 499 /* ------------------------------------------------------------------------------*/ 500 #undef __FUNCT__ 501 #define __FUNCT__ "MatMult_SeqMAIJ_4" 502 PetscErrorCode MatMult_SeqMAIJ_4(Mat A,Vec xx,Vec yy) 503 { 504 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 505 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 506 PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4; 507 PetscErrorCode ierr; 508 PetscInt m = b->AIJ->rmap->n,nonzerorow=0,*idx,*ii; 509 PetscInt n,i,jrow,j; 510 511 PetscFunctionBegin; 512 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 513 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 514 idx = a->j; 515 v = a->a; 516 ii = a->i; 517 518 for (i=0; i<m; i++) { 519 jrow = ii[i]; 520 n = ii[i+1] - jrow; 521 sum1 = 0.0; 522 sum2 = 0.0; 523 sum3 = 0.0; 524 sum4 = 0.0; 525 nonzerorow += (n>0); 526 for (j=0; j<n; j++) { 527 sum1 += v[jrow]*x[4*idx[jrow]]; 528 sum2 += v[jrow]*x[4*idx[jrow]+1]; 529 sum3 += v[jrow]*x[4*idx[jrow]+2]; 530 sum4 += v[jrow]*x[4*idx[jrow]+3]; 531 jrow++; 532 } 533 y[4*i] = sum1; 534 y[4*i+1] = sum2; 535 y[4*i+2] = sum3; 536 y[4*i+3] = sum4; 537 } 538 539 ierr = PetscLogFlops(8.0*a->nz - 4.0*nonzerorow);CHKERRQ(ierr); 540 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 541 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 542 PetscFunctionReturn(0); 543 } 544 545 #undef __FUNCT__ 546 #define __FUNCT__ "MatMultTranspose_SeqMAIJ_4" 547 PetscErrorCode MatMultTranspose_SeqMAIJ_4(Mat A,Vec xx,Vec yy) 548 { 549 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 550 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 551 PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4,zero = 0.0; 552 PetscErrorCode ierr; 553 PetscInt m = b->AIJ->rmap->n,n,i,*idx; 554 555 PetscFunctionBegin; 556 ierr = VecSet(yy,zero);CHKERRQ(ierr); 557 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 558 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 559 for (i=0; i<m; i++) { 560 idx = a->j + a->i[i] ; 561 v = a->a + a->i[i] ; 562 n = a->i[i+1] - a->i[i]; 563 alpha1 = x[4*i]; 564 alpha2 = x[4*i+1]; 565 alpha3 = x[4*i+2]; 566 alpha4 = x[4*i+3]; 567 while (n-->0) { 568 y[4*(*idx)] += alpha1*(*v); 569 y[4*(*idx)+1] += alpha2*(*v); 570 y[4*(*idx)+2] += alpha3*(*v); 571 y[4*(*idx)+3] += alpha4*(*v); 572 idx++; v++; 573 } 574 } 575 ierr = PetscLogFlops(8.0*a->nz);CHKERRQ(ierr); 576 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 577 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 578 PetscFunctionReturn(0); 579 } 580 581 #undef __FUNCT__ 582 #define __FUNCT__ "MatMultAdd_SeqMAIJ_4" 583 PetscErrorCode MatMultAdd_SeqMAIJ_4(Mat A,Vec xx,Vec yy,Vec zz) 584 { 585 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 586 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 587 PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4; 588 PetscErrorCode ierr; 589 PetscInt m = b->AIJ->rmap->n,*idx,*ii; 590 PetscInt n,i,jrow,j; 591 592 PetscFunctionBegin; 593 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 594 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 595 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 596 idx = a->j; 597 v = a->a; 598 ii = a->i; 599 600 for (i=0; i<m; i++) { 601 jrow = ii[i]; 602 n = ii[i+1] - jrow; 603 sum1 = 0.0; 604 sum2 = 0.0; 605 sum3 = 0.0; 606 sum4 = 0.0; 607 for (j=0; j<n; j++) { 608 sum1 += v[jrow]*x[4*idx[jrow]]; 609 sum2 += v[jrow]*x[4*idx[jrow]+1]; 610 sum3 += v[jrow]*x[4*idx[jrow]+2]; 611 sum4 += v[jrow]*x[4*idx[jrow]+3]; 612 jrow++; 613 } 614 y[4*i] += sum1; 615 y[4*i+1] += sum2; 616 y[4*i+2] += sum3; 617 y[4*i+3] += sum4; 618 } 619 620 ierr = PetscLogFlops(8.0*a->nz);CHKERRQ(ierr); 621 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 622 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 623 PetscFunctionReturn(0); 624 } 625 #undef __FUNCT__ 626 #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_4" 627 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_4(Mat A,Vec xx,Vec yy,Vec zz) 628 { 629 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 630 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 631 PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4; 632 PetscErrorCode ierr; 633 PetscInt m = b->AIJ->rmap->n,n,i,*idx; 634 635 PetscFunctionBegin; 636 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 637 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 638 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 639 640 for (i=0; i<m; i++) { 641 idx = a->j + a->i[i] ; 642 v = a->a + a->i[i] ; 643 n = a->i[i+1] - a->i[i]; 644 alpha1 = x[4*i]; 645 alpha2 = x[4*i+1]; 646 alpha3 = x[4*i+2]; 647 alpha4 = x[4*i+3]; 648 while (n-->0) { 649 y[4*(*idx)] += alpha1*(*v); 650 y[4*(*idx)+1] += alpha2*(*v); 651 y[4*(*idx)+2] += alpha3*(*v); 652 y[4*(*idx)+3] += alpha4*(*v); 653 idx++; v++; 654 } 655 } 656 ierr = PetscLogFlops(8.0*a->nz);CHKERRQ(ierr); 657 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 658 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 659 PetscFunctionReturn(0); 660 } 661 /* ------------------------------------------------------------------------------*/ 662 663 #undef __FUNCT__ 664 #define __FUNCT__ "MatMult_SeqMAIJ_5" 665 PetscErrorCode MatMult_SeqMAIJ_5(Mat A,Vec xx,Vec yy) 666 { 667 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 668 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 669 PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4, sum5; 670 PetscErrorCode ierr; 671 PetscInt m = b->AIJ->rmap->n,nonzerorow=0,*idx,*ii; 672 PetscInt n,i,jrow,j; 673 674 PetscFunctionBegin; 675 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 676 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 677 idx = a->j; 678 v = a->a; 679 ii = a->i; 680 681 for (i=0; i<m; i++) { 682 jrow = ii[i]; 683 n = ii[i+1] - jrow; 684 sum1 = 0.0; 685 sum2 = 0.0; 686 sum3 = 0.0; 687 sum4 = 0.0; 688 sum5 = 0.0; 689 nonzerorow += (n>0); 690 for (j=0; j<n; j++) { 691 sum1 += v[jrow]*x[5*idx[jrow]]; 692 sum2 += v[jrow]*x[5*idx[jrow]+1]; 693 sum3 += v[jrow]*x[5*idx[jrow]+2]; 694 sum4 += v[jrow]*x[5*idx[jrow]+3]; 695 sum5 += v[jrow]*x[5*idx[jrow]+4]; 696 jrow++; 697 } 698 y[5*i] = sum1; 699 y[5*i+1] = sum2; 700 y[5*i+2] = sum3; 701 y[5*i+3] = sum4; 702 y[5*i+4] = sum5; 703 } 704 705 ierr = PetscLogFlops(10.0*a->nz - 5.0*nonzerorow);CHKERRQ(ierr); 706 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 707 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 708 PetscFunctionReturn(0); 709 } 710 711 #undef __FUNCT__ 712 #define __FUNCT__ "MatMultTranspose_SeqMAIJ_5" 713 PetscErrorCode MatMultTranspose_SeqMAIJ_5(Mat A,Vec xx,Vec yy) 714 { 715 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 716 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 717 PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,zero = 0.0; 718 PetscErrorCode ierr; 719 PetscInt m = b->AIJ->rmap->n,n,i,*idx; 720 721 PetscFunctionBegin; 722 ierr = VecSet(yy,zero);CHKERRQ(ierr); 723 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 724 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 725 726 for (i=0; i<m; i++) { 727 idx = a->j + a->i[i] ; 728 v = a->a + a->i[i] ; 729 n = a->i[i+1] - a->i[i]; 730 alpha1 = x[5*i]; 731 alpha2 = x[5*i+1]; 732 alpha3 = x[5*i+2]; 733 alpha4 = x[5*i+3]; 734 alpha5 = x[5*i+4]; 735 while (n-->0) { 736 y[5*(*idx)] += alpha1*(*v); 737 y[5*(*idx)+1] += alpha2*(*v); 738 y[5*(*idx)+2] += alpha3*(*v); 739 y[5*(*idx)+3] += alpha4*(*v); 740 y[5*(*idx)+4] += alpha5*(*v); 741 idx++; v++; 742 } 743 } 744 ierr = PetscLogFlops(10.0*a->nz);CHKERRQ(ierr); 745 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 746 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 747 PetscFunctionReturn(0); 748 } 749 750 #undef __FUNCT__ 751 #define __FUNCT__ "MatMultAdd_SeqMAIJ_5" 752 PetscErrorCode MatMultAdd_SeqMAIJ_5(Mat A,Vec xx,Vec yy,Vec zz) 753 { 754 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 755 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 756 PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4, sum5; 757 PetscErrorCode ierr; 758 PetscInt m = b->AIJ->rmap->n,*idx,*ii; 759 PetscInt n,i,jrow,j; 760 761 PetscFunctionBegin; 762 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 763 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 764 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 765 idx = a->j; 766 v = a->a; 767 ii = a->i; 768 769 for (i=0; i<m; i++) { 770 jrow = ii[i]; 771 n = ii[i+1] - jrow; 772 sum1 = 0.0; 773 sum2 = 0.0; 774 sum3 = 0.0; 775 sum4 = 0.0; 776 sum5 = 0.0; 777 for (j=0; j<n; j++) { 778 sum1 += v[jrow]*x[5*idx[jrow]]; 779 sum2 += v[jrow]*x[5*idx[jrow]+1]; 780 sum3 += v[jrow]*x[5*idx[jrow]+2]; 781 sum4 += v[jrow]*x[5*idx[jrow]+3]; 782 sum5 += v[jrow]*x[5*idx[jrow]+4]; 783 jrow++; 784 } 785 y[5*i] += sum1; 786 y[5*i+1] += sum2; 787 y[5*i+2] += sum3; 788 y[5*i+3] += sum4; 789 y[5*i+4] += sum5; 790 } 791 792 ierr = PetscLogFlops(10.0*a->nz);CHKERRQ(ierr); 793 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 794 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 795 PetscFunctionReturn(0); 796 } 797 798 #undef __FUNCT__ 799 #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_5" 800 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_5(Mat A,Vec xx,Vec yy,Vec zz) 801 { 802 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 803 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 804 PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5; 805 PetscErrorCode ierr; 806 PetscInt m = b->AIJ->rmap->n,n,i,*idx; 807 808 PetscFunctionBegin; 809 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 810 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 811 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 812 813 for (i=0; i<m; i++) { 814 idx = a->j + a->i[i] ; 815 v = a->a + a->i[i] ; 816 n = a->i[i+1] - a->i[i]; 817 alpha1 = x[5*i]; 818 alpha2 = x[5*i+1]; 819 alpha3 = x[5*i+2]; 820 alpha4 = x[5*i+3]; 821 alpha5 = x[5*i+4]; 822 while (n-->0) { 823 y[5*(*idx)] += alpha1*(*v); 824 y[5*(*idx)+1] += alpha2*(*v); 825 y[5*(*idx)+2] += alpha3*(*v); 826 y[5*(*idx)+3] += alpha4*(*v); 827 y[5*(*idx)+4] += alpha5*(*v); 828 idx++; v++; 829 } 830 } 831 ierr = PetscLogFlops(10.0*a->nz);CHKERRQ(ierr); 832 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 833 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 834 PetscFunctionReturn(0); 835 } 836 837 /* ------------------------------------------------------------------------------*/ 838 #undef __FUNCT__ 839 #define __FUNCT__ "MatMult_SeqMAIJ_6" 840 PetscErrorCode MatMult_SeqMAIJ_6(Mat A,Vec xx,Vec yy) 841 { 842 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 843 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 844 PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6; 845 PetscErrorCode ierr; 846 PetscInt m = b->AIJ->rmap->n,nonzerorow=0,*idx,*ii; 847 PetscInt n,i,jrow,j; 848 849 PetscFunctionBegin; 850 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 851 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 852 idx = a->j; 853 v = a->a; 854 ii = a->i; 855 856 for (i=0; i<m; i++) { 857 jrow = ii[i]; 858 n = ii[i+1] - jrow; 859 sum1 = 0.0; 860 sum2 = 0.0; 861 sum3 = 0.0; 862 sum4 = 0.0; 863 sum5 = 0.0; 864 sum6 = 0.0; 865 nonzerorow += (n>0); 866 for (j=0; j<n; j++) { 867 sum1 += v[jrow]*x[6*idx[jrow]]; 868 sum2 += v[jrow]*x[6*idx[jrow]+1]; 869 sum3 += v[jrow]*x[6*idx[jrow]+2]; 870 sum4 += v[jrow]*x[6*idx[jrow]+3]; 871 sum5 += v[jrow]*x[6*idx[jrow]+4]; 872 sum6 += v[jrow]*x[6*idx[jrow]+5]; 873 jrow++; 874 } 875 y[6*i] = sum1; 876 y[6*i+1] = sum2; 877 y[6*i+2] = sum3; 878 y[6*i+3] = sum4; 879 y[6*i+4] = sum5; 880 y[6*i+5] = sum6; 881 } 882 883 ierr = PetscLogFlops(12.0*a->nz - 6.0*nonzerorow);CHKERRQ(ierr); 884 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 885 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 886 PetscFunctionReturn(0); 887 } 888 889 #undef __FUNCT__ 890 #define __FUNCT__ "MatMultTranspose_SeqMAIJ_6" 891 PetscErrorCode MatMultTranspose_SeqMAIJ_6(Mat A,Vec xx,Vec yy) 892 { 893 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 894 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 895 PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,zero = 0.0; 896 PetscErrorCode ierr; 897 PetscInt m = b->AIJ->rmap->n,n,i,*idx; 898 899 PetscFunctionBegin; 900 ierr = VecSet(yy,zero);CHKERRQ(ierr); 901 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 902 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 903 904 for (i=0; i<m; i++) { 905 idx = a->j + a->i[i] ; 906 v = a->a + a->i[i] ; 907 n = a->i[i+1] - a->i[i]; 908 alpha1 = x[6*i]; 909 alpha2 = x[6*i+1]; 910 alpha3 = x[6*i+2]; 911 alpha4 = x[6*i+3]; 912 alpha5 = x[6*i+4]; 913 alpha6 = x[6*i+5]; 914 while (n-->0) { 915 y[6*(*idx)] += alpha1*(*v); 916 y[6*(*idx)+1] += alpha2*(*v); 917 y[6*(*idx)+2] += alpha3*(*v); 918 y[6*(*idx)+3] += alpha4*(*v); 919 y[6*(*idx)+4] += alpha5*(*v); 920 y[6*(*idx)+5] += alpha6*(*v); 921 idx++; v++; 922 } 923 } 924 ierr = PetscLogFlops(12.0*a->nz);CHKERRQ(ierr); 925 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 926 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 927 PetscFunctionReturn(0); 928 } 929 930 #undef __FUNCT__ 931 #define __FUNCT__ "MatMultAdd_SeqMAIJ_6" 932 PetscErrorCode MatMultAdd_SeqMAIJ_6(Mat A,Vec xx,Vec yy,Vec zz) 933 { 934 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 935 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 936 PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6; 937 PetscErrorCode ierr; 938 PetscInt m = b->AIJ->rmap->n,*idx,*ii; 939 PetscInt n,i,jrow,j; 940 941 PetscFunctionBegin; 942 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 943 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 944 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 945 idx = a->j; 946 v = a->a; 947 ii = a->i; 948 949 for (i=0; i<m; i++) { 950 jrow = ii[i]; 951 n = ii[i+1] - jrow; 952 sum1 = 0.0; 953 sum2 = 0.0; 954 sum3 = 0.0; 955 sum4 = 0.0; 956 sum5 = 0.0; 957 sum6 = 0.0; 958 for (j=0; j<n; j++) { 959 sum1 += v[jrow]*x[6*idx[jrow]]; 960 sum2 += v[jrow]*x[6*idx[jrow]+1]; 961 sum3 += v[jrow]*x[6*idx[jrow]+2]; 962 sum4 += v[jrow]*x[6*idx[jrow]+3]; 963 sum5 += v[jrow]*x[6*idx[jrow]+4]; 964 sum6 += v[jrow]*x[6*idx[jrow]+5]; 965 jrow++; 966 } 967 y[6*i] += sum1; 968 y[6*i+1] += sum2; 969 y[6*i+2] += sum3; 970 y[6*i+3] += sum4; 971 y[6*i+4] += sum5; 972 y[6*i+5] += sum6; 973 } 974 975 ierr = PetscLogFlops(12.0*a->nz);CHKERRQ(ierr); 976 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 977 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 978 PetscFunctionReturn(0); 979 } 980 981 #undef __FUNCT__ 982 #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_6" 983 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_6(Mat A,Vec xx,Vec yy,Vec zz) 984 { 985 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 986 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 987 PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6; 988 PetscErrorCode ierr; 989 PetscInt m = b->AIJ->rmap->n,n,i,*idx; 990 991 PetscFunctionBegin; 992 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 993 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 994 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 995 996 for (i=0; i<m; i++) { 997 idx = a->j + a->i[i] ; 998 v = a->a + a->i[i] ; 999 n = a->i[i+1] - a->i[i]; 1000 alpha1 = x[6*i]; 1001 alpha2 = x[6*i+1]; 1002 alpha3 = x[6*i+2]; 1003 alpha4 = x[6*i+3]; 1004 alpha5 = x[6*i+4]; 1005 alpha6 = x[6*i+5]; 1006 while (n-->0) { 1007 y[6*(*idx)] += alpha1*(*v); 1008 y[6*(*idx)+1] += alpha2*(*v); 1009 y[6*(*idx)+2] += alpha3*(*v); 1010 y[6*(*idx)+3] += alpha4*(*v); 1011 y[6*(*idx)+4] += alpha5*(*v); 1012 y[6*(*idx)+5] += alpha6*(*v); 1013 idx++; v++; 1014 } 1015 } 1016 ierr = PetscLogFlops(12.0*a->nz);CHKERRQ(ierr); 1017 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1018 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 1019 PetscFunctionReturn(0); 1020 } 1021 1022 /* ------------------------------------------------------------------------------*/ 1023 #undef __FUNCT__ 1024 #define __FUNCT__ "MatMult_SeqMAIJ_7" 1025 PetscErrorCode MatMult_SeqMAIJ_7(Mat A,Vec xx,Vec yy) 1026 { 1027 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1028 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1029 PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7; 1030 PetscErrorCode ierr; 1031 PetscInt m = b->AIJ->rmap->n,nonzerorow=0,*idx,*ii; 1032 PetscInt n,i,jrow,j; 1033 1034 PetscFunctionBegin; 1035 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1036 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 1037 idx = a->j; 1038 v = a->a; 1039 ii = a->i; 1040 1041 for (i=0; i<m; i++) { 1042 jrow = ii[i]; 1043 n = ii[i+1] - jrow; 1044 sum1 = 0.0; 1045 sum2 = 0.0; 1046 sum3 = 0.0; 1047 sum4 = 0.0; 1048 sum5 = 0.0; 1049 sum6 = 0.0; 1050 sum7 = 0.0; 1051 nonzerorow += (n>0); 1052 for (j=0; j<n; j++) { 1053 sum1 += v[jrow]*x[7*idx[jrow]]; 1054 sum2 += v[jrow]*x[7*idx[jrow]+1]; 1055 sum3 += v[jrow]*x[7*idx[jrow]+2]; 1056 sum4 += v[jrow]*x[7*idx[jrow]+3]; 1057 sum5 += v[jrow]*x[7*idx[jrow]+4]; 1058 sum6 += v[jrow]*x[7*idx[jrow]+5]; 1059 sum7 += v[jrow]*x[7*idx[jrow]+6]; 1060 jrow++; 1061 } 1062 y[7*i] = sum1; 1063 y[7*i+1] = sum2; 1064 y[7*i+2] = sum3; 1065 y[7*i+3] = sum4; 1066 y[7*i+4] = sum5; 1067 y[7*i+5] = sum6; 1068 y[7*i+6] = sum7; 1069 } 1070 1071 ierr = PetscLogFlops(14.0*a->nz - 7.0*nonzerorow);CHKERRQ(ierr); 1072 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1073 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 1074 PetscFunctionReturn(0); 1075 } 1076 1077 #undef __FUNCT__ 1078 #define __FUNCT__ "MatMultTranspose_SeqMAIJ_7" 1079 PetscErrorCode MatMultTranspose_SeqMAIJ_7(Mat A,Vec xx,Vec yy) 1080 { 1081 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1082 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1083 PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,zero = 0.0; 1084 PetscErrorCode ierr; 1085 PetscInt m = b->AIJ->rmap->n,n,i,*idx; 1086 1087 PetscFunctionBegin; 1088 ierr = VecSet(yy,zero);CHKERRQ(ierr); 1089 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1090 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 1091 1092 for (i=0; i<m; i++) { 1093 idx = a->j + a->i[i] ; 1094 v = a->a + a->i[i] ; 1095 n = a->i[i+1] - a->i[i]; 1096 alpha1 = x[7*i]; 1097 alpha2 = x[7*i+1]; 1098 alpha3 = x[7*i+2]; 1099 alpha4 = x[7*i+3]; 1100 alpha5 = x[7*i+4]; 1101 alpha6 = x[7*i+5]; 1102 alpha7 = x[7*i+6]; 1103 while (n-->0) { 1104 y[7*(*idx)] += alpha1*(*v); 1105 y[7*(*idx)+1] += alpha2*(*v); 1106 y[7*(*idx)+2] += alpha3*(*v); 1107 y[7*(*idx)+3] += alpha4*(*v); 1108 y[7*(*idx)+4] += alpha5*(*v); 1109 y[7*(*idx)+5] += alpha6*(*v); 1110 y[7*(*idx)+6] += alpha7*(*v); 1111 idx++; v++; 1112 } 1113 } 1114 ierr = PetscLogFlops(14.0*a->nz);CHKERRQ(ierr); 1115 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1116 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 1117 PetscFunctionReturn(0); 1118 } 1119 1120 #undef __FUNCT__ 1121 #define __FUNCT__ "MatMultAdd_SeqMAIJ_7" 1122 PetscErrorCode MatMultAdd_SeqMAIJ_7(Mat A,Vec xx,Vec yy,Vec zz) 1123 { 1124 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1125 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1126 PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7; 1127 PetscErrorCode ierr; 1128 PetscInt m = b->AIJ->rmap->n,*idx,*ii; 1129 PetscInt n,i,jrow,j; 1130 1131 PetscFunctionBegin; 1132 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 1133 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1134 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 1135 idx = a->j; 1136 v = a->a; 1137 ii = a->i; 1138 1139 for (i=0; i<m; i++) { 1140 jrow = ii[i]; 1141 n = ii[i+1] - jrow; 1142 sum1 = 0.0; 1143 sum2 = 0.0; 1144 sum3 = 0.0; 1145 sum4 = 0.0; 1146 sum5 = 0.0; 1147 sum6 = 0.0; 1148 sum7 = 0.0; 1149 for (j=0; j<n; j++) { 1150 sum1 += v[jrow]*x[7*idx[jrow]]; 1151 sum2 += v[jrow]*x[7*idx[jrow]+1]; 1152 sum3 += v[jrow]*x[7*idx[jrow]+2]; 1153 sum4 += v[jrow]*x[7*idx[jrow]+3]; 1154 sum5 += v[jrow]*x[7*idx[jrow]+4]; 1155 sum6 += v[jrow]*x[7*idx[jrow]+5]; 1156 sum7 += v[jrow]*x[7*idx[jrow]+6]; 1157 jrow++; 1158 } 1159 y[7*i] += sum1; 1160 y[7*i+1] += sum2; 1161 y[7*i+2] += sum3; 1162 y[7*i+3] += sum4; 1163 y[7*i+4] += sum5; 1164 y[7*i+5] += sum6; 1165 y[7*i+6] += sum7; 1166 } 1167 1168 ierr = PetscLogFlops(14.0*a->nz);CHKERRQ(ierr); 1169 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1170 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 1171 PetscFunctionReturn(0); 1172 } 1173 1174 #undef __FUNCT__ 1175 #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_7" 1176 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_7(Mat A,Vec xx,Vec yy,Vec zz) 1177 { 1178 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1179 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1180 PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7; 1181 PetscErrorCode ierr; 1182 PetscInt m = b->AIJ->rmap->n,n,i,*idx; 1183 1184 PetscFunctionBegin; 1185 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 1186 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1187 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 1188 for (i=0; i<m; i++) { 1189 idx = a->j + a->i[i] ; 1190 v = a->a + a->i[i] ; 1191 n = a->i[i+1] - a->i[i]; 1192 alpha1 = x[7*i]; 1193 alpha2 = x[7*i+1]; 1194 alpha3 = x[7*i+2]; 1195 alpha4 = x[7*i+3]; 1196 alpha5 = x[7*i+4]; 1197 alpha6 = x[7*i+5]; 1198 alpha7 = x[7*i+6]; 1199 while (n-->0) { 1200 y[7*(*idx)] += alpha1*(*v); 1201 y[7*(*idx)+1] += alpha2*(*v); 1202 y[7*(*idx)+2] += alpha3*(*v); 1203 y[7*(*idx)+3] += alpha4*(*v); 1204 y[7*(*idx)+4] += alpha5*(*v); 1205 y[7*(*idx)+5] += alpha6*(*v); 1206 y[7*(*idx)+6] += alpha7*(*v); 1207 idx++; v++; 1208 } 1209 } 1210 ierr = PetscLogFlops(14.0*a->nz);CHKERRQ(ierr); 1211 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1212 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 1213 PetscFunctionReturn(0); 1214 } 1215 1216 #undef __FUNCT__ 1217 #define __FUNCT__ "MatMult_SeqMAIJ_8" 1218 PetscErrorCode MatMult_SeqMAIJ_8(Mat A,Vec xx,Vec yy) 1219 { 1220 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1221 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1222 PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8; 1223 PetscErrorCode ierr; 1224 PetscInt m = b->AIJ->rmap->n,nonzerorow=0,*idx,*ii; 1225 PetscInt n,i,jrow,j; 1226 1227 PetscFunctionBegin; 1228 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1229 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 1230 idx = a->j; 1231 v = a->a; 1232 ii = a->i; 1233 1234 for (i=0; i<m; i++) { 1235 jrow = ii[i]; 1236 n = ii[i+1] - jrow; 1237 sum1 = 0.0; 1238 sum2 = 0.0; 1239 sum3 = 0.0; 1240 sum4 = 0.0; 1241 sum5 = 0.0; 1242 sum6 = 0.0; 1243 sum7 = 0.0; 1244 sum8 = 0.0; 1245 nonzerorow += (n>0); 1246 for (j=0; j<n; j++) { 1247 sum1 += v[jrow]*x[8*idx[jrow]]; 1248 sum2 += v[jrow]*x[8*idx[jrow]+1]; 1249 sum3 += v[jrow]*x[8*idx[jrow]+2]; 1250 sum4 += v[jrow]*x[8*idx[jrow]+3]; 1251 sum5 += v[jrow]*x[8*idx[jrow]+4]; 1252 sum6 += v[jrow]*x[8*idx[jrow]+5]; 1253 sum7 += v[jrow]*x[8*idx[jrow]+6]; 1254 sum8 += v[jrow]*x[8*idx[jrow]+7]; 1255 jrow++; 1256 } 1257 y[8*i] = sum1; 1258 y[8*i+1] = sum2; 1259 y[8*i+2] = sum3; 1260 y[8*i+3] = sum4; 1261 y[8*i+4] = sum5; 1262 y[8*i+5] = sum6; 1263 y[8*i+6] = sum7; 1264 y[8*i+7] = sum8; 1265 } 1266 1267 ierr = PetscLogFlops(16.0*a->nz - 8.0*nonzerorow);CHKERRQ(ierr); 1268 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1269 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 1270 PetscFunctionReturn(0); 1271 } 1272 1273 #undef __FUNCT__ 1274 #define __FUNCT__ "MatMultTranspose_SeqMAIJ_8" 1275 PetscErrorCode MatMultTranspose_SeqMAIJ_8(Mat A,Vec xx,Vec yy) 1276 { 1277 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1278 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1279 PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,zero = 0.0; 1280 PetscErrorCode ierr; 1281 PetscInt m = b->AIJ->rmap->n,n,i,*idx; 1282 1283 PetscFunctionBegin; 1284 ierr = VecSet(yy,zero);CHKERRQ(ierr); 1285 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1286 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 1287 1288 for (i=0; i<m; i++) { 1289 idx = a->j + a->i[i] ; 1290 v = a->a + a->i[i] ; 1291 n = a->i[i+1] - a->i[i]; 1292 alpha1 = x[8*i]; 1293 alpha2 = x[8*i+1]; 1294 alpha3 = x[8*i+2]; 1295 alpha4 = x[8*i+3]; 1296 alpha5 = x[8*i+4]; 1297 alpha6 = x[8*i+5]; 1298 alpha7 = x[8*i+6]; 1299 alpha8 = x[8*i+7]; 1300 while (n-->0) { 1301 y[8*(*idx)] += alpha1*(*v); 1302 y[8*(*idx)+1] += alpha2*(*v); 1303 y[8*(*idx)+2] += alpha3*(*v); 1304 y[8*(*idx)+3] += alpha4*(*v); 1305 y[8*(*idx)+4] += alpha5*(*v); 1306 y[8*(*idx)+5] += alpha6*(*v); 1307 y[8*(*idx)+6] += alpha7*(*v); 1308 y[8*(*idx)+7] += alpha8*(*v); 1309 idx++; v++; 1310 } 1311 } 1312 ierr = PetscLogFlops(16.0*a->nz);CHKERRQ(ierr); 1313 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1314 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 1315 PetscFunctionReturn(0); 1316 } 1317 1318 #undef __FUNCT__ 1319 #define __FUNCT__ "MatMultAdd_SeqMAIJ_8" 1320 PetscErrorCode MatMultAdd_SeqMAIJ_8(Mat A,Vec xx,Vec yy,Vec zz) 1321 { 1322 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1323 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1324 PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8; 1325 PetscErrorCode ierr; 1326 PetscInt m = b->AIJ->rmap->n,*idx,*ii; 1327 PetscInt n,i,jrow,j; 1328 1329 PetscFunctionBegin; 1330 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 1331 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1332 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 1333 idx = a->j; 1334 v = a->a; 1335 ii = a->i; 1336 1337 for (i=0; i<m; i++) { 1338 jrow = ii[i]; 1339 n = ii[i+1] - jrow; 1340 sum1 = 0.0; 1341 sum2 = 0.0; 1342 sum3 = 0.0; 1343 sum4 = 0.0; 1344 sum5 = 0.0; 1345 sum6 = 0.0; 1346 sum7 = 0.0; 1347 sum8 = 0.0; 1348 for (j=0; j<n; j++) { 1349 sum1 += v[jrow]*x[8*idx[jrow]]; 1350 sum2 += v[jrow]*x[8*idx[jrow]+1]; 1351 sum3 += v[jrow]*x[8*idx[jrow]+2]; 1352 sum4 += v[jrow]*x[8*idx[jrow]+3]; 1353 sum5 += v[jrow]*x[8*idx[jrow]+4]; 1354 sum6 += v[jrow]*x[8*idx[jrow]+5]; 1355 sum7 += v[jrow]*x[8*idx[jrow]+6]; 1356 sum8 += v[jrow]*x[8*idx[jrow]+7]; 1357 jrow++; 1358 } 1359 y[8*i] += sum1; 1360 y[8*i+1] += sum2; 1361 y[8*i+2] += sum3; 1362 y[8*i+3] += sum4; 1363 y[8*i+4] += sum5; 1364 y[8*i+5] += sum6; 1365 y[8*i+6] += sum7; 1366 y[8*i+7] += sum8; 1367 } 1368 1369 ierr = PetscLogFlops(16.0*a->nz);CHKERRQ(ierr); 1370 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1371 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 1372 PetscFunctionReturn(0); 1373 } 1374 1375 #undef __FUNCT__ 1376 #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_8" 1377 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_8(Mat A,Vec xx,Vec yy,Vec zz) 1378 { 1379 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1380 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1381 PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8; 1382 PetscErrorCode ierr; 1383 PetscInt m = b->AIJ->rmap->n,n,i,*idx; 1384 1385 PetscFunctionBegin; 1386 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 1387 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1388 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 1389 for (i=0; i<m; i++) { 1390 idx = a->j + a->i[i] ; 1391 v = a->a + a->i[i] ; 1392 n = a->i[i+1] - a->i[i]; 1393 alpha1 = x[8*i]; 1394 alpha2 = x[8*i+1]; 1395 alpha3 = x[8*i+2]; 1396 alpha4 = x[8*i+3]; 1397 alpha5 = x[8*i+4]; 1398 alpha6 = x[8*i+5]; 1399 alpha7 = x[8*i+6]; 1400 alpha8 = x[8*i+7]; 1401 while (n-->0) { 1402 y[8*(*idx)] += alpha1*(*v); 1403 y[8*(*idx)+1] += alpha2*(*v); 1404 y[8*(*idx)+2] += alpha3*(*v); 1405 y[8*(*idx)+3] += alpha4*(*v); 1406 y[8*(*idx)+4] += alpha5*(*v); 1407 y[8*(*idx)+5] += alpha6*(*v); 1408 y[8*(*idx)+6] += alpha7*(*v); 1409 y[8*(*idx)+7] += alpha8*(*v); 1410 idx++; v++; 1411 } 1412 } 1413 ierr = PetscLogFlops(16.0*a->nz);CHKERRQ(ierr); 1414 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1415 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 1416 PetscFunctionReturn(0); 1417 } 1418 1419 /* ------------------------------------------------------------------------------*/ 1420 #undef __FUNCT__ 1421 #define __FUNCT__ "MatMult_SeqMAIJ_9" 1422 PetscErrorCode MatMult_SeqMAIJ_9(Mat A,Vec xx,Vec yy) 1423 { 1424 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1425 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1426 PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9; 1427 PetscErrorCode ierr; 1428 PetscInt m = b->AIJ->rmap->n,nonzerorow=0,*idx,*ii; 1429 PetscInt n,i,jrow,j; 1430 1431 PetscFunctionBegin; 1432 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1433 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 1434 idx = a->j; 1435 v = a->a; 1436 ii = a->i; 1437 1438 for (i=0; i<m; i++) { 1439 jrow = ii[i]; 1440 n = ii[i+1] - jrow; 1441 sum1 = 0.0; 1442 sum2 = 0.0; 1443 sum3 = 0.0; 1444 sum4 = 0.0; 1445 sum5 = 0.0; 1446 sum6 = 0.0; 1447 sum7 = 0.0; 1448 sum8 = 0.0; 1449 sum9 = 0.0; 1450 nonzerorow += (n>0); 1451 for (j=0; j<n; j++) { 1452 sum1 += v[jrow]*x[9*idx[jrow]]; 1453 sum2 += v[jrow]*x[9*idx[jrow]+1]; 1454 sum3 += v[jrow]*x[9*idx[jrow]+2]; 1455 sum4 += v[jrow]*x[9*idx[jrow]+3]; 1456 sum5 += v[jrow]*x[9*idx[jrow]+4]; 1457 sum6 += v[jrow]*x[9*idx[jrow]+5]; 1458 sum7 += v[jrow]*x[9*idx[jrow]+6]; 1459 sum8 += v[jrow]*x[9*idx[jrow]+7]; 1460 sum9 += v[jrow]*x[9*idx[jrow]+8]; 1461 jrow++; 1462 } 1463 y[9*i] = sum1; 1464 y[9*i+1] = sum2; 1465 y[9*i+2] = sum3; 1466 y[9*i+3] = sum4; 1467 y[9*i+4] = sum5; 1468 y[9*i+5] = sum6; 1469 y[9*i+6] = sum7; 1470 y[9*i+7] = sum8; 1471 y[9*i+8] = sum9; 1472 } 1473 1474 ierr = PetscLogFlops(18.0*a->nz - 9*nonzerorow);CHKERRQ(ierr); 1475 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1476 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 1477 PetscFunctionReturn(0); 1478 } 1479 1480 /* ------------------------------------------------------------------------------*/ 1481 1482 #undef __FUNCT__ 1483 #define __FUNCT__ "MatMultTranspose_SeqMAIJ_9" 1484 PetscErrorCode MatMultTranspose_SeqMAIJ_9(Mat A,Vec xx,Vec yy) 1485 { 1486 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1487 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1488 PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9,zero = 0.0; 1489 PetscErrorCode ierr; 1490 PetscInt m = b->AIJ->rmap->n,n,i,*idx; 1491 1492 PetscFunctionBegin; 1493 ierr = VecSet(yy,zero);CHKERRQ(ierr); 1494 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1495 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 1496 1497 for (i=0; i<m; i++) { 1498 idx = a->j + a->i[i] ; 1499 v = a->a + a->i[i] ; 1500 n = a->i[i+1] - a->i[i]; 1501 alpha1 = x[9*i]; 1502 alpha2 = x[9*i+1]; 1503 alpha3 = x[9*i+2]; 1504 alpha4 = x[9*i+3]; 1505 alpha5 = x[9*i+4]; 1506 alpha6 = x[9*i+5]; 1507 alpha7 = x[9*i+6]; 1508 alpha8 = x[9*i+7]; 1509 alpha9 = x[9*i+8]; 1510 while (n-->0) { 1511 y[9*(*idx)] += alpha1*(*v); 1512 y[9*(*idx)+1] += alpha2*(*v); 1513 y[9*(*idx)+2] += alpha3*(*v); 1514 y[9*(*idx)+3] += alpha4*(*v); 1515 y[9*(*idx)+4] += alpha5*(*v); 1516 y[9*(*idx)+5] += alpha6*(*v); 1517 y[9*(*idx)+6] += alpha7*(*v); 1518 y[9*(*idx)+7] += alpha8*(*v); 1519 y[9*(*idx)+8] += alpha9*(*v); 1520 idx++; v++; 1521 } 1522 } 1523 ierr = PetscLogFlops(18.0*a->nz);CHKERRQ(ierr); 1524 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1525 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 1526 PetscFunctionReturn(0); 1527 } 1528 1529 #undef __FUNCT__ 1530 #define __FUNCT__ "MatMultAdd_SeqMAIJ_9" 1531 PetscErrorCode MatMultAdd_SeqMAIJ_9(Mat A,Vec xx,Vec yy,Vec zz) 1532 { 1533 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1534 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1535 PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9; 1536 PetscErrorCode ierr; 1537 PetscInt m = b->AIJ->rmap->n,*idx,*ii; 1538 PetscInt n,i,jrow,j; 1539 1540 PetscFunctionBegin; 1541 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 1542 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1543 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 1544 idx = a->j; 1545 v = a->a; 1546 ii = a->i; 1547 1548 for (i=0; i<m; i++) { 1549 jrow = ii[i]; 1550 n = ii[i+1] - jrow; 1551 sum1 = 0.0; 1552 sum2 = 0.0; 1553 sum3 = 0.0; 1554 sum4 = 0.0; 1555 sum5 = 0.0; 1556 sum6 = 0.0; 1557 sum7 = 0.0; 1558 sum8 = 0.0; 1559 sum9 = 0.0; 1560 for (j=0; j<n; j++) { 1561 sum1 += v[jrow]*x[9*idx[jrow]]; 1562 sum2 += v[jrow]*x[9*idx[jrow]+1]; 1563 sum3 += v[jrow]*x[9*idx[jrow]+2]; 1564 sum4 += v[jrow]*x[9*idx[jrow]+3]; 1565 sum5 += v[jrow]*x[9*idx[jrow]+4]; 1566 sum6 += v[jrow]*x[9*idx[jrow]+5]; 1567 sum7 += v[jrow]*x[9*idx[jrow]+6]; 1568 sum8 += v[jrow]*x[9*idx[jrow]+7]; 1569 sum9 += v[jrow]*x[9*idx[jrow]+8]; 1570 jrow++; 1571 } 1572 y[9*i] += sum1; 1573 y[9*i+1] += sum2; 1574 y[9*i+2] += sum3; 1575 y[9*i+3] += sum4; 1576 y[9*i+4] += sum5; 1577 y[9*i+5] += sum6; 1578 y[9*i+6] += sum7; 1579 y[9*i+7] += sum8; 1580 y[9*i+8] += sum9; 1581 } 1582 1583 ierr = PetscLogFlops(18*a->nz);CHKERRQ(ierr); 1584 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1585 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 1586 PetscFunctionReturn(0); 1587 } 1588 1589 #undef __FUNCT__ 1590 #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_9" 1591 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_9(Mat A,Vec xx,Vec yy,Vec zz) 1592 { 1593 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1594 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1595 PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9; 1596 PetscErrorCode ierr; 1597 PetscInt m = b->AIJ->rmap->n,n,i,*idx; 1598 1599 PetscFunctionBegin; 1600 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 1601 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1602 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 1603 for (i=0; i<m; i++) { 1604 idx = a->j + a->i[i] ; 1605 v = a->a + a->i[i] ; 1606 n = a->i[i+1] - a->i[i]; 1607 alpha1 = x[9*i]; 1608 alpha2 = x[9*i+1]; 1609 alpha3 = x[9*i+2]; 1610 alpha4 = x[9*i+3]; 1611 alpha5 = x[9*i+4]; 1612 alpha6 = x[9*i+5]; 1613 alpha7 = x[9*i+6]; 1614 alpha8 = x[9*i+7]; 1615 alpha9 = x[9*i+8]; 1616 while (n-->0) { 1617 y[9*(*idx)] += alpha1*(*v); 1618 y[9*(*idx)+1] += alpha2*(*v); 1619 y[9*(*idx)+2] += alpha3*(*v); 1620 y[9*(*idx)+3] += alpha4*(*v); 1621 y[9*(*idx)+4] += alpha5*(*v); 1622 y[9*(*idx)+5] += alpha6*(*v); 1623 y[9*(*idx)+6] += alpha7*(*v); 1624 y[9*(*idx)+7] += alpha8*(*v); 1625 y[9*(*idx)+8] += alpha9*(*v); 1626 idx++; v++; 1627 } 1628 } 1629 ierr = PetscLogFlops(18.0*a->nz);CHKERRQ(ierr); 1630 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1631 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 1632 PetscFunctionReturn(0); 1633 } 1634 #undef __FUNCT__ 1635 #define __FUNCT__ "MatMult_SeqMAIJ_10" 1636 PetscErrorCode MatMult_SeqMAIJ_10(Mat A,Vec xx,Vec yy) 1637 { 1638 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1639 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1640 PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9, sum10; 1641 PetscErrorCode ierr; 1642 PetscInt m = b->AIJ->rmap->n,nonzerorow=0,*idx,*ii; 1643 PetscInt n,i,jrow,j; 1644 1645 PetscFunctionBegin; 1646 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1647 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 1648 idx = a->j; 1649 v = a->a; 1650 ii = a->i; 1651 1652 for (i=0; i<m; i++) { 1653 jrow = ii[i]; 1654 n = ii[i+1] - jrow; 1655 sum1 = 0.0; 1656 sum2 = 0.0; 1657 sum3 = 0.0; 1658 sum4 = 0.0; 1659 sum5 = 0.0; 1660 sum6 = 0.0; 1661 sum7 = 0.0; 1662 sum8 = 0.0; 1663 sum9 = 0.0; 1664 sum10 = 0.0; 1665 nonzerorow += (n>0); 1666 for (j=0; j<n; j++) { 1667 sum1 += v[jrow]*x[10*idx[jrow]]; 1668 sum2 += v[jrow]*x[10*idx[jrow]+1]; 1669 sum3 += v[jrow]*x[10*idx[jrow]+2]; 1670 sum4 += v[jrow]*x[10*idx[jrow]+3]; 1671 sum5 += v[jrow]*x[10*idx[jrow]+4]; 1672 sum6 += v[jrow]*x[10*idx[jrow]+5]; 1673 sum7 += v[jrow]*x[10*idx[jrow]+6]; 1674 sum8 += v[jrow]*x[10*idx[jrow]+7]; 1675 sum9 += v[jrow]*x[10*idx[jrow]+8]; 1676 sum10 += v[jrow]*x[10*idx[jrow]+9]; 1677 jrow++; 1678 } 1679 y[10*i] = sum1; 1680 y[10*i+1] = sum2; 1681 y[10*i+2] = sum3; 1682 y[10*i+3] = sum4; 1683 y[10*i+4] = sum5; 1684 y[10*i+5] = sum6; 1685 y[10*i+6] = sum7; 1686 y[10*i+7] = sum8; 1687 y[10*i+8] = sum9; 1688 y[10*i+9] = sum10; 1689 } 1690 1691 ierr = PetscLogFlops(20.0*a->nz - 10.0*nonzerorow);CHKERRQ(ierr); 1692 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1693 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 1694 PetscFunctionReturn(0); 1695 } 1696 1697 #undef __FUNCT__ 1698 #define __FUNCT__ "MatMultAdd_SeqMAIJ_10" 1699 PetscErrorCode MatMultAdd_SeqMAIJ_10(Mat A,Vec xx,Vec yy,Vec zz) 1700 { 1701 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1702 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1703 PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9, sum10; 1704 PetscErrorCode ierr; 1705 PetscInt m = b->AIJ->rmap->n,*idx,*ii; 1706 PetscInt n,i,jrow,j; 1707 1708 PetscFunctionBegin; 1709 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 1710 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1711 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 1712 idx = a->j; 1713 v = a->a; 1714 ii = a->i; 1715 1716 for (i=0; i<m; i++) { 1717 jrow = ii[i]; 1718 n = ii[i+1] - jrow; 1719 sum1 = 0.0; 1720 sum2 = 0.0; 1721 sum3 = 0.0; 1722 sum4 = 0.0; 1723 sum5 = 0.0; 1724 sum6 = 0.0; 1725 sum7 = 0.0; 1726 sum8 = 0.0; 1727 sum9 = 0.0; 1728 sum10 = 0.0; 1729 for (j=0; j<n; j++) { 1730 sum1 += v[jrow]*x[10*idx[jrow]]; 1731 sum2 += v[jrow]*x[10*idx[jrow]+1]; 1732 sum3 += v[jrow]*x[10*idx[jrow]+2]; 1733 sum4 += v[jrow]*x[10*idx[jrow]+3]; 1734 sum5 += v[jrow]*x[10*idx[jrow]+4]; 1735 sum6 += v[jrow]*x[10*idx[jrow]+5]; 1736 sum7 += v[jrow]*x[10*idx[jrow]+6]; 1737 sum8 += v[jrow]*x[10*idx[jrow]+7]; 1738 sum9 += v[jrow]*x[10*idx[jrow]+8]; 1739 sum10 += v[jrow]*x[10*idx[jrow]+9]; 1740 jrow++; 1741 } 1742 y[10*i] += sum1; 1743 y[10*i+1] += sum2; 1744 y[10*i+2] += sum3; 1745 y[10*i+3] += sum4; 1746 y[10*i+4] += sum5; 1747 y[10*i+5] += sum6; 1748 y[10*i+6] += sum7; 1749 y[10*i+7] += sum8; 1750 y[10*i+8] += sum9; 1751 y[10*i+9] += sum10; 1752 } 1753 1754 ierr = PetscLogFlops(20.0*a->nz);CHKERRQ(ierr); 1755 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1756 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 1757 PetscFunctionReturn(0); 1758 } 1759 1760 #undef __FUNCT__ 1761 #define __FUNCT__ "MatMultTranspose_SeqMAIJ_10" 1762 PetscErrorCode MatMultTranspose_SeqMAIJ_10(Mat A,Vec xx,Vec yy) 1763 { 1764 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1765 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1766 PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9,alpha10,zero = 0.0; 1767 PetscErrorCode ierr; 1768 PetscInt m = b->AIJ->rmap->n,n,i,*idx; 1769 1770 PetscFunctionBegin; 1771 ierr = VecSet(yy,zero);CHKERRQ(ierr); 1772 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1773 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 1774 1775 for (i=0; i<m; i++) { 1776 idx = a->j + a->i[i] ; 1777 v = a->a + a->i[i] ; 1778 n = a->i[i+1] - a->i[i]; 1779 alpha1 = x[10*i]; 1780 alpha2 = x[10*i+1]; 1781 alpha3 = x[10*i+2]; 1782 alpha4 = x[10*i+3]; 1783 alpha5 = x[10*i+4]; 1784 alpha6 = x[10*i+5]; 1785 alpha7 = x[10*i+6]; 1786 alpha8 = x[10*i+7]; 1787 alpha9 = x[10*i+8]; 1788 alpha10 = x[10*i+9]; 1789 while (n-->0) { 1790 y[10*(*idx)] += alpha1*(*v); 1791 y[10*(*idx)+1] += alpha2*(*v); 1792 y[10*(*idx)+2] += alpha3*(*v); 1793 y[10*(*idx)+3] += alpha4*(*v); 1794 y[10*(*idx)+4] += alpha5*(*v); 1795 y[10*(*idx)+5] += alpha6*(*v); 1796 y[10*(*idx)+6] += alpha7*(*v); 1797 y[10*(*idx)+7] += alpha8*(*v); 1798 y[10*(*idx)+8] += alpha9*(*v); 1799 y[10*(*idx)+9] += alpha10*(*v); 1800 idx++; v++; 1801 } 1802 } 1803 ierr = PetscLogFlops(20.0*a->nz);CHKERRQ(ierr); 1804 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1805 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 1806 PetscFunctionReturn(0); 1807 } 1808 1809 #undef __FUNCT__ 1810 #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_10" 1811 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_10(Mat A,Vec xx,Vec yy,Vec zz) 1812 { 1813 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1814 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1815 PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9,alpha10; 1816 PetscErrorCode ierr; 1817 PetscInt m = b->AIJ->rmap->n,n,i,*idx; 1818 1819 PetscFunctionBegin; 1820 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 1821 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1822 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 1823 for (i=0; i<m; i++) { 1824 idx = a->j + a->i[i] ; 1825 v = a->a + a->i[i] ; 1826 n = a->i[i+1] - a->i[i]; 1827 alpha1 = x[10*i]; 1828 alpha2 = x[10*i+1]; 1829 alpha3 = x[10*i+2]; 1830 alpha4 = x[10*i+3]; 1831 alpha5 = x[10*i+4]; 1832 alpha6 = x[10*i+5]; 1833 alpha7 = x[10*i+6]; 1834 alpha8 = x[10*i+7]; 1835 alpha9 = x[10*i+8]; 1836 alpha10 = x[10*i+9]; 1837 while (n-->0) { 1838 y[10*(*idx)] += alpha1*(*v); 1839 y[10*(*idx)+1] += alpha2*(*v); 1840 y[10*(*idx)+2] += alpha3*(*v); 1841 y[10*(*idx)+3] += alpha4*(*v); 1842 y[10*(*idx)+4] += alpha5*(*v); 1843 y[10*(*idx)+5] += alpha6*(*v); 1844 y[10*(*idx)+6] += alpha7*(*v); 1845 y[10*(*idx)+7] += alpha8*(*v); 1846 y[10*(*idx)+8] += alpha9*(*v); 1847 y[10*(*idx)+9] += alpha10*(*v); 1848 idx++; v++; 1849 } 1850 } 1851 ierr = PetscLogFlops(20.0*a->nz);CHKERRQ(ierr); 1852 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1853 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 1854 PetscFunctionReturn(0); 1855 } 1856 1857 1858 /*--------------------------------------------------------------------------------------------*/ 1859 #undef __FUNCT__ 1860 #define __FUNCT__ "MatMult_SeqMAIJ_11" 1861 PetscErrorCode MatMult_SeqMAIJ_11(Mat A,Vec xx,Vec yy) 1862 { 1863 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1864 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1865 PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9, sum10, sum11; 1866 PetscErrorCode ierr; 1867 PetscInt m = b->AIJ->rmap->n,nonzerorow=0,*idx,*ii; 1868 PetscInt n,i,jrow,j; 1869 1870 PetscFunctionBegin; 1871 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1872 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 1873 idx = a->j; 1874 v = a->a; 1875 ii = a->i; 1876 1877 for (i=0; i<m; i++) { 1878 jrow = ii[i]; 1879 n = ii[i+1] - jrow; 1880 sum1 = 0.0; 1881 sum2 = 0.0; 1882 sum3 = 0.0; 1883 sum4 = 0.0; 1884 sum5 = 0.0; 1885 sum6 = 0.0; 1886 sum7 = 0.0; 1887 sum8 = 0.0; 1888 sum9 = 0.0; 1889 sum10 = 0.0; 1890 sum11 = 0.0; 1891 nonzerorow += (n>0); 1892 for (j=0; j<n; j++) { 1893 sum1 += v[jrow]*x[11*idx[jrow]]; 1894 sum2 += v[jrow]*x[11*idx[jrow]+1]; 1895 sum3 += v[jrow]*x[11*idx[jrow]+2]; 1896 sum4 += v[jrow]*x[11*idx[jrow]+3]; 1897 sum5 += v[jrow]*x[11*idx[jrow]+4]; 1898 sum6 += v[jrow]*x[11*idx[jrow]+5]; 1899 sum7 += v[jrow]*x[11*idx[jrow]+6]; 1900 sum8 += v[jrow]*x[11*idx[jrow]+7]; 1901 sum9 += v[jrow]*x[11*idx[jrow]+8]; 1902 sum10 += v[jrow]*x[11*idx[jrow]+9]; 1903 sum11 += v[jrow]*x[11*idx[jrow]+10]; 1904 jrow++; 1905 } 1906 y[11*i] = sum1; 1907 y[11*i+1] = sum2; 1908 y[11*i+2] = sum3; 1909 y[11*i+3] = sum4; 1910 y[11*i+4] = sum5; 1911 y[11*i+5] = sum6; 1912 y[11*i+6] = sum7; 1913 y[11*i+7] = sum8; 1914 y[11*i+8] = sum9; 1915 y[11*i+9] = sum10; 1916 y[11*i+10] = sum11; 1917 } 1918 1919 ierr = PetscLogFlops(22*a->nz - 11*nonzerorow);CHKERRQ(ierr); 1920 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1921 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 1922 PetscFunctionReturn(0); 1923 } 1924 1925 #undef __FUNCT__ 1926 #define __FUNCT__ "MatMultAdd_SeqMAIJ_11" 1927 PetscErrorCode MatMultAdd_SeqMAIJ_11(Mat A,Vec xx,Vec yy,Vec zz) 1928 { 1929 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1930 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1931 PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9, sum10, sum11; 1932 PetscErrorCode ierr; 1933 PetscInt m = b->AIJ->rmap->n,*idx,*ii; 1934 PetscInt n,i,jrow,j; 1935 1936 PetscFunctionBegin; 1937 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 1938 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1939 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 1940 idx = a->j; 1941 v = a->a; 1942 ii = a->i; 1943 1944 for (i=0; i<m; i++) { 1945 jrow = ii[i]; 1946 n = ii[i+1] - jrow; 1947 sum1 = 0.0; 1948 sum2 = 0.0; 1949 sum3 = 0.0; 1950 sum4 = 0.0; 1951 sum5 = 0.0; 1952 sum6 = 0.0; 1953 sum7 = 0.0; 1954 sum8 = 0.0; 1955 sum9 = 0.0; 1956 sum10 = 0.0; 1957 sum11 = 0.0; 1958 for (j=0; j<n; j++) { 1959 sum1 += v[jrow]*x[11*idx[jrow]]; 1960 sum2 += v[jrow]*x[11*idx[jrow]+1]; 1961 sum3 += v[jrow]*x[11*idx[jrow]+2]; 1962 sum4 += v[jrow]*x[11*idx[jrow]+3]; 1963 sum5 += v[jrow]*x[11*idx[jrow]+4]; 1964 sum6 += v[jrow]*x[11*idx[jrow]+5]; 1965 sum7 += v[jrow]*x[11*idx[jrow]+6]; 1966 sum8 += v[jrow]*x[11*idx[jrow]+7]; 1967 sum9 += v[jrow]*x[11*idx[jrow]+8]; 1968 sum10 += v[jrow]*x[11*idx[jrow]+9]; 1969 sum11 += v[jrow]*x[11*idx[jrow]+10]; 1970 jrow++; 1971 } 1972 y[11*i] += sum1; 1973 y[11*i+1] += sum2; 1974 y[11*i+2] += sum3; 1975 y[11*i+3] += sum4; 1976 y[11*i+4] += sum5; 1977 y[11*i+5] += sum6; 1978 y[11*i+6] += sum7; 1979 y[11*i+7] += sum8; 1980 y[11*i+8] += sum9; 1981 y[11*i+9] += sum10; 1982 y[11*i+10] += sum11; 1983 } 1984 1985 ierr = PetscLogFlops(22*a->nz);CHKERRQ(ierr); 1986 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1987 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 1988 PetscFunctionReturn(0); 1989 } 1990 1991 #undef __FUNCT__ 1992 #define __FUNCT__ "MatMultTranspose_SeqMAIJ_11" 1993 PetscErrorCode MatMultTranspose_SeqMAIJ_11(Mat A,Vec xx,Vec yy) 1994 { 1995 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1996 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1997 PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9,alpha10,alpha11,zero = 0.0; 1998 PetscErrorCode ierr; 1999 PetscInt m = b->AIJ->rmap->n,n,i,*idx; 2000 2001 PetscFunctionBegin; 2002 ierr = VecSet(yy,zero);CHKERRQ(ierr); 2003 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 2004 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 2005 2006 for (i=0; i<m; i++) { 2007 idx = a->j + a->i[i] ; 2008 v = a->a + a->i[i] ; 2009 n = a->i[i+1] - a->i[i]; 2010 alpha1 = x[11*i]; 2011 alpha2 = x[11*i+1]; 2012 alpha3 = x[11*i+2]; 2013 alpha4 = x[11*i+3]; 2014 alpha5 = x[11*i+4]; 2015 alpha6 = x[11*i+5]; 2016 alpha7 = x[11*i+6]; 2017 alpha8 = x[11*i+7]; 2018 alpha9 = x[11*i+8]; 2019 alpha10 = x[11*i+9]; 2020 alpha11 = x[11*i+10]; 2021 while (n-->0) { 2022 y[11*(*idx)] += alpha1*(*v); 2023 y[11*(*idx)+1] += alpha2*(*v); 2024 y[11*(*idx)+2] += alpha3*(*v); 2025 y[11*(*idx)+3] += alpha4*(*v); 2026 y[11*(*idx)+4] += alpha5*(*v); 2027 y[11*(*idx)+5] += alpha6*(*v); 2028 y[11*(*idx)+6] += alpha7*(*v); 2029 y[11*(*idx)+7] += alpha8*(*v); 2030 y[11*(*idx)+8] += alpha9*(*v); 2031 y[11*(*idx)+9] += alpha10*(*v); 2032 y[11*(*idx)+10] += alpha11*(*v); 2033 idx++; v++; 2034 } 2035 } 2036 ierr = PetscLogFlops(22*a->nz);CHKERRQ(ierr); 2037 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 2038 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 2039 PetscFunctionReturn(0); 2040 } 2041 2042 #undef __FUNCT__ 2043 #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_11" 2044 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_11(Mat A,Vec xx,Vec yy,Vec zz) 2045 { 2046 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 2047 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 2048 PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9,alpha10,alpha11; 2049 PetscErrorCode ierr; 2050 PetscInt m = b->AIJ->rmap->n,n,i,*idx; 2051 2052 PetscFunctionBegin; 2053 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 2054 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 2055 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 2056 for (i=0; i<m; i++) { 2057 idx = a->j + a->i[i] ; 2058 v = a->a + a->i[i] ; 2059 n = a->i[i+1] - a->i[i]; 2060 alpha1 = x[11*i]; 2061 alpha2 = x[11*i+1]; 2062 alpha3 = x[11*i+2]; 2063 alpha4 = x[11*i+3]; 2064 alpha5 = x[11*i+4]; 2065 alpha6 = x[11*i+5]; 2066 alpha7 = x[11*i+6]; 2067 alpha8 = x[11*i+7]; 2068 alpha9 = x[11*i+8]; 2069 alpha10 = x[11*i+9]; 2070 alpha11 = x[11*i+10]; 2071 while (n-->0) { 2072 y[11*(*idx)] += alpha1*(*v); 2073 y[11*(*idx)+1] += alpha2*(*v); 2074 y[11*(*idx)+2] += alpha3*(*v); 2075 y[11*(*idx)+3] += alpha4*(*v); 2076 y[11*(*idx)+4] += alpha5*(*v); 2077 y[11*(*idx)+5] += alpha6*(*v); 2078 y[11*(*idx)+6] += alpha7*(*v); 2079 y[11*(*idx)+7] += alpha8*(*v); 2080 y[11*(*idx)+8] += alpha9*(*v); 2081 y[11*(*idx)+9] += alpha10*(*v); 2082 y[11*(*idx)+10] += alpha11*(*v); 2083 idx++; v++; 2084 } 2085 } 2086 ierr = PetscLogFlops(22*a->nz);CHKERRQ(ierr); 2087 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 2088 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 2089 PetscFunctionReturn(0); 2090 } 2091 2092 2093 /*--------------------------------------------------------------------------------------------*/ 2094 #undef __FUNCT__ 2095 #define __FUNCT__ "MatMult_SeqMAIJ_16" 2096 PetscErrorCode MatMult_SeqMAIJ_16(Mat A,Vec xx,Vec yy) 2097 { 2098 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 2099 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 2100 PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8; 2101 PetscScalar sum9, sum10, sum11, sum12, sum13, sum14, sum15, sum16; 2102 PetscErrorCode ierr; 2103 PetscInt m = b->AIJ->rmap->n,nonzerorow=0,*idx,*ii; 2104 PetscInt n,i,jrow,j; 2105 2106 PetscFunctionBegin; 2107 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 2108 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 2109 idx = a->j; 2110 v = a->a; 2111 ii = a->i; 2112 2113 for (i=0; i<m; i++) { 2114 jrow = ii[i]; 2115 n = ii[i+1] - jrow; 2116 sum1 = 0.0; 2117 sum2 = 0.0; 2118 sum3 = 0.0; 2119 sum4 = 0.0; 2120 sum5 = 0.0; 2121 sum6 = 0.0; 2122 sum7 = 0.0; 2123 sum8 = 0.0; 2124 sum9 = 0.0; 2125 sum10 = 0.0; 2126 sum11 = 0.0; 2127 sum12 = 0.0; 2128 sum13 = 0.0; 2129 sum14 = 0.0; 2130 sum15 = 0.0; 2131 sum16 = 0.0; 2132 nonzerorow += (n>0); 2133 for (j=0; j<n; j++) { 2134 sum1 += v[jrow]*x[16*idx[jrow]]; 2135 sum2 += v[jrow]*x[16*idx[jrow]+1]; 2136 sum3 += v[jrow]*x[16*idx[jrow]+2]; 2137 sum4 += v[jrow]*x[16*idx[jrow]+3]; 2138 sum5 += v[jrow]*x[16*idx[jrow]+4]; 2139 sum6 += v[jrow]*x[16*idx[jrow]+5]; 2140 sum7 += v[jrow]*x[16*idx[jrow]+6]; 2141 sum8 += v[jrow]*x[16*idx[jrow]+7]; 2142 sum9 += v[jrow]*x[16*idx[jrow]+8]; 2143 sum10 += v[jrow]*x[16*idx[jrow]+9]; 2144 sum11 += v[jrow]*x[16*idx[jrow]+10]; 2145 sum12 += v[jrow]*x[16*idx[jrow]+11]; 2146 sum13 += v[jrow]*x[16*idx[jrow]+12]; 2147 sum14 += v[jrow]*x[16*idx[jrow]+13]; 2148 sum15 += v[jrow]*x[16*idx[jrow]+14]; 2149 sum16 += v[jrow]*x[16*idx[jrow]+15]; 2150 jrow++; 2151 } 2152 y[16*i] = sum1; 2153 y[16*i+1] = sum2; 2154 y[16*i+2] = sum3; 2155 y[16*i+3] = sum4; 2156 y[16*i+4] = sum5; 2157 y[16*i+5] = sum6; 2158 y[16*i+6] = sum7; 2159 y[16*i+7] = sum8; 2160 y[16*i+8] = sum9; 2161 y[16*i+9] = sum10; 2162 y[16*i+10] = sum11; 2163 y[16*i+11] = sum12; 2164 y[16*i+12] = sum13; 2165 y[16*i+13] = sum14; 2166 y[16*i+14] = sum15; 2167 y[16*i+15] = sum16; 2168 } 2169 2170 ierr = PetscLogFlops(32.0*a->nz - 16.0*nonzerorow);CHKERRQ(ierr); 2171 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 2172 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 2173 PetscFunctionReturn(0); 2174 } 2175 2176 #undef __FUNCT__ 2177 #define __FUNCT__ "MatMultTranspose_SeqMAIJ_16" 2178 PetscErrorCode MatMultTranspose_SeqMAIJ_16(Mat A,Vec xx,Vec yy) 2179 { 2180 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 2181 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 2182 PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,zero = 0.0; 2183 PetscScalar alpha9,alpha10,alpha11,alpha12,alpha13,alpha14,alpha15,alpha16; 2184 PetscErrorCode ierr; 2185 PetscInt m = b->AIJ->rmap->n,n,i,*idx; 2186 2187 PetscFunctionBegin; 2188 ierr = VecSet(yy,zero);CHKERRQ(ierr); 2189 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 2190 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 2191 2192 for (i=0; i<m; i++) { 2193 idx = a->j + a->i[i] ; 2194 v = a->a + a->i[i] ; 2195 n = a->i[i+1] - a->i[i]; 2196 alpha1 = x[16*i]; 2197 alpha2 = x[16*i+1]; 2198 alpha3 = x[16*i+2]; 2199 alpha4 = x[16*i+3]; 2200 alpha5 = x[16*i+4]; 2201 alpha6 = x[16*i+5]; 2202 alpha7 = x[16*i+6]; 2203 alpha8 = x[16*i+7]; 2204 alpha9 = x[16*i+8]; 2205 alpha10 = x[16*i+9]; 2206 alpha11 = x[16*i+10]; 2207 alpha12 = x[16*i+11]; 2208 alpha13 = x[16*i+12]; 2209 alpha14 = x[16*i+13]; 2210 alpha15 = x[16*i+14]; 2211 alpha16 = x[16*i+15]; 2212 while (n-->0) { 2213 y[16*(*idx)] += alpha1*(*v); 2214 y[16*(*idx)+1] += alpha2*(*v); 2215 y[16*(*idx)+2] += alpha3*(*v); 2216 y[16*(*idx)+3] += alpha4*(*v); 2217 y[16*(*idx)+4] += alpha5*(*v); 2218 y[16*(*idx)+5] += alpha6*(*v); 2219 y[16*(*idx)+6] += alpha7*(*v); 2220 y[16*(*idx)+7] += alpha8*(*v); 2221 y[16*(*idx)+8] += alpha9*(*v); 2222 y[16*(*idx)+9] += alpha10*(*v); 2223 y[16*(*idx)+10] += alpha11*(*v); 2224 y[16*(*idx)+11] += alpha12*(*v); 2225 y[16*(*idx)+12] += alpha13*(*v); 2226 y[16*(*idx)+13] += alpha14*(*v); 2227 y[16*(*idx)+14] += alpha15*(*v); 2228 y[16*(*idx)+15] += alpha16*(*v); 2229 idx++; v++; 2230 } 2231 } 2232 ierr = PetscLogFlops(32.0*a->nz);CHKERRQ(ierr); 2233 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 2234 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 2235 PetscFunctionReturn(0); 2236 } 2237 2238 #undef __FUNCT__ 2239 #define __FUNCT__ "MatMultAdd_SeqMAIJ_16" 2240 PetscErrorCode MatMultAdd_SeqMAIJ_16(Mat A,Vec xx,Vec yy,Vec zz) 2241 { 2242 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 2243 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 2244 PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8; 2245 PetscScalar sum9, sum10, sum11, sum12, sum13, sum14, sum15, sum16; 2246 PetscErrorCode ierr; 2247 PetscInt m = b->AIJ->rmap->n,*idx,*ii; 2248 PetscInt n,i,jrow,j; 2249 2250 PetscFunctionBegin; 2251 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 2252 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 2253 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 2254 idx = a->j; 2255 v = a->a; 2256 ii = a->i; 2257 2258 for (i=0; i<m; i++) { 2259 jrow = ii[i]; 2260 n = ii[i+1] - jrow; 2261 sum1 = 0.0; 2262 sum2 = 0.0; 2263 sum3 = 0.0; 2264 sum4 = 0.0; 2265 sum5 = 0.0; 2266 sum6 = 0.0; 2267 sum7 = 0.0; 2268 sum8 = 0.0; 2269 sum9 = 0.0; 2270 sum10 = 0.0; 2271 sum11 = 0.0; 2272 sum12 = 0.0; 2273 sum13 = 0.0; 2274 sum14 = 0.0; 2275 sum15 = 0.0; 2276 sum16 = 0.0; 2277 for (j=0; j<n; j++) { 2278 sum1 += v[jrow]*x[16*idx[jrow]]; 2279 sum2 += v[jrow]*x[16*idx[jrow]+1]; 2280 sum3 += v[jrow]*x[16*idx[jrow]+2]; 2281 sum4 += v[jrow]*x[16*idx[jrow]+3]; 2282 sum5 += v[jrow]*x[16*idx[jrow]+4]; 2283 sum6 += v[jrow]*x[16*idx[jrow]+5]; 2284 sum7 += v[jrow]*x[16*idx[jrow]+6]; 2285 sum8 += v[jrow]*x[16*idx[jrow]+7]; 2286 sum9 += v[jrow]*x[16*idx[jrow]+8]; 2287 sum10 += v[jrow]*x[16*idx[jrow]+9]; 2288 sum11 += v[jrow]*x[16*idx[jrow]+10]; 2289 sum12 += v[jrow]*x[16*idx[jrow]+11]; 2290 sum13 += v[jrow]*x[16*idx[jrow]+12]; 2291 sum14 += v[jrow]*x[16*idx[jrow]+13]; 2292 sum15 += v[jrow]*x[16*idx[jrow]+14]; 2293 sum16 += v[jrow]*x[16*idx[jrow]+15]; 2294 jrow++; 2295 } 2296 y[16*i] += sum1; 2297 y[16*i+1] += sum2; 2298 y[16*i+2] += sum3; 2299 y[16*i+3] += sum4; 2300 y[16*i+4] += sum5; 2301 y[16*i+5] += sum6; 2302 y[16*i+6] += sum7; 2303 y[16*i+7] += sum8; 2304 y[16*i+8] += sum9; 2305 y[16*i+9] += sum10; 2306 y[16*i+10] += sum11; 2307 y[16*i+11] += sum12; 2308 y[16*i+12] += sum13; 2309 y[16*i+13] += sum14; 2310 y[16*i+14] += sum15; 2311 y[16*i+15] += sum16; 2312 } 2313 2314 ierr = PetscLogFlops(32.0*a->nz);CHKERRQ(ierr); 2315 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 2316 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 2317 PetscFunctionReturn(0); 2318 } 2319 2320 #undef __FUNCT__ 2321 #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_16" 2322 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_16(Mat A,Vec xx,Vec yy,Vec zz) 2323 { 2324 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 2325 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 2326 PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8; 2327 PetscScalar alpha9,alpha10,alpha11,alpha12,alpha13,alpha14,alpha15,alpha16; 2328 PetscErrorCode ierr; 2329 PetscInt m = b->AIJ->rmap->n,n,i,*idx; 2330 2331 PetscFunctionBegin; 2332 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 2333 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 2334 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 2335 for (i=0; i<m; i++) { 2336 idx = a->j + a->i[i] ; 2337 v = a->a + a->i[i] ; 2338 n = a->i[i+1] - a->i[i]; 2339 alpha1 = x[16*i]; 2340 alpha2 = x[16*i+1]; 2341 alpha3 = x[16*i+2]; 2342 alpha4 = x[16*i+3]; 2343 alpha5 = x[16*i+4]; 2344 alpha6 = x[16*i+5]; 2345 alpha7 = x[16*i+6]; 2346 alpha8 = x[16*i+7]; 2347 alpha9 = x[16*i+8]; 2348 alpha10 = x[16*i+9]; 2349 alpha11 = x[16*i+10]; 2350 alpha12 = x[16*i+11]; 2351 alpha13 = x[16*i+12]; 2352 alpha14 = x[16*i+13]; 2353 alpha15 = x[16*i+14]; 2354 alpha16 = x[16*i+15]; 2355 while (n-->0) { 2356 y[16*(*idx)] += alpha1*(*v); 2357 y[16*(*idx)+1] += alpha2*(*v); 2358 y[16*(*idx)+2] += alpha3*(*v); 2359 y[16*(*idx)+3] += alpha4*(*v); 2360 y[16*(*idx)+4] += alpha5*(*v); 2361 y[16*(*idx)+5] += alpha6*(*v); 2362 y[16*(*idx)+6] += alpha7*(*v); 2363 y[16*(*idx)+7] += alpha8*(*v); 2364 y[16*(*idx)+8] += alpha9*(*v); 2365 y[16*(*idx)+9] += alpha10*(*v); 2366 y[16*(*idx)+10] += alpha11*(*v); 2367 y[16*(*idx)+11] += alpha12*(*v); 2368 y[16*(*idx)+12] += alpha13*(*v); 2369 y[16*(*idx)+13] += alpha14*(*v); 2370 y[16*(*idx)+14] += alpha15*(*v); 2371 y[16*(*idx)+15] += alpha16*(*v); 2372 idx++; v++; 2373 } 2374 } 2375 ierr = PetscLogFlops(32.0*a->nz);CHKERRQ(ierr); 2376 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 2377 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 2378 PetscFunctionReturn(0); 2379 } 2380 2381 /*--------------------------------------------------------------------------------------------*/ 2382 #undef __FUNCT__ 2383 #define __FUNCT__ "MatMult_SeqMAIJ_18" 2384 PetscErrorCode MatMult_SeqMAIJ_18(Mat A,Vec xx,Vec yy) 2385 { 2386 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 2387 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 2388 PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8; 2389 PetscScalar sum9, sum10, sum11, sum12, sum13, sum14, sum15, sum16, sum17, sum18; 2390 PetscErrorCode ierr; 2391 PetscInt m = b->AIJ->rmap->n,nonzerorow=0,*idx,*ii; 2392 PetscInt n,i,jrow,j; 2393 2394 PetscFunctionBegin; 2395 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 2396 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 2397 idx = a->j; 2398 v = a->a; 2399 ii = a->i; 2400 2401 for (i=0; i<m; i++) { 2402 jrow = ii[i]; 2403 n = ii[i+1] - jrow; 2404 sum1 = 0.0; 2405 sum2 = 0.0; 2406 sum3 = 0.0; 2407 sum4 = 0.0; 2408 sum5 = 0.0; 2409 sum6 = 0.0; 2410 sum7 = 0.0; 2411 sum8 = 0.0; 2412 sum9 = 0.0; 2413 sum10 = 0.0; 2414 sum11 = 0.0; 2415 sum12 = 0.0; 2416 sum13 = 0.0; 2417 sum14 = 0.0; 2418 sum15 = 0.0; 2419 sum16 = 0.0; 2420 sum17 = 0.0; 2421 sum18 = 0.0; 2422 nonzerorow += (n>0); 2423 for (j=0; j<n; j++) { 2424 sum1 += v[jrow]*x[18*idx[jrow]]; 2425 sum2 += v[jrow]*x[18*idx[jrow]+1]; 2426 sum3 += v[jrow]*x[18*idx[jrow]+2]; 2427 sum4 += v[jrow]*x[18*idx[jrow]+3]; 2428 sum5 += v[jrow]*x[18*idx[jrow]+4]; 2429 sum6 += v[jrow]*x[18*idx[jrow]+5]; 2430 sum7 += v[jrow]*x[18*idx[jrow]+6]; 2431 sum8 += v[jrow]*x[18*idx[jrow]+7]; 2432 sum9 += v[jrow]*x[18*idx[jrow]+8]; 2433 sum10 += v[jrow]*x[18*idx[jrow]+9]; 2434 sum11 += v[jrow]*x[18*idx[jrow]+10]; 2435 sum12 += v[jrow]*x[18*idx[jrow]+11]; 2436 sum13 += v[jrow]*x[18*idx[jrow]+12]; 2437 sum14 += v[jrow]*x[18*idx[jrow]+13]; 2438 sum15 += v[jrow]*x[18*idx[jrow]+14]; 2439 sum16 += v[jrow]*x[18*idx[jrow]+15]; 2440 sum17 += v[jrow]*x[18*idx[jrow]+16]; 2441 sum18 += v[jrow]*x[18*idx[jrow]+17]; 2442 jrow++; 2443 } 2444 y[18*i] = sum1; 2445 y[18*i+1] = sum2; 2446 y[18*i+2] = sum3; 2447 y[18*i+3] = sum4; 2448 y[18*i+4] = sum5; 2449 y[18*i+5] = sum6; 2450 y[18*i+6] = sum7; 2451 y[18*i+7] = sum8; 2452 y[18*i+8] = sum9; 2453 y[18*i+9] = sum10; 2454 y[18*i+10] = sum11; 2455 y[18*i+11] = sum12; 2456 y[18*i+12] = sum13; 2457 y[18*i+13] = sum14; 2458 y[18*i+14] = sum15; 2459 y[18*i+15] = sum16; 2460 y[18*i+16] = sum17; 2461 y[18*i+17] = sum18; 2462 } 2463 2464 ierr = PetscLogFlops(36.0*a->nz - 18.0*nonzerorow);CHKERRQ(ierr); 2465 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 2466 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 2467 PetscFunctionReturn(0); 2468 } 2469 2470 #undef __FUNCT__ 2471 #define __FUNCT__ "MatMultTranspose_SeqMAIJ_18" 2472 PetscErrorCode MatMultTranspose_SeqMAIJ_18(Mat A,Vec xx,Vec yy) 2473 { 2474 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 2475 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 2476 PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,zero = 0.0; 2477 PetscScalar alpha9,alpha10,alpha11,alpha12,alpha13,alpha14,alpha15,alpha16,alpha17,alpha18; 2478 PetscErrorCode ierr; 2479 PetscInt m = b->AIJ->rmap->n,n,i,*idx; 2480 2481 PetscFunctionBegin; 2482 ierr = VecSet(yy,zero);CHKERRQ(ierr); 2483 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 2484 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 2485 2486 for (i=0; i<m; i++) { 2487 idx = a->j + a->i[i] ; 2488 v = a->a + a->i[i] ; 2489 n = a->i[i+1] - a->i[i]; 2490 alpha1 = x[18*i]; 2491 alpha2 = x[18*i+1]; 2492 alpha3 = x[18*i+2]; 2493 alpha4 = x[18*i+3]; 2494 alpha5 = x[18*i+4]; 2495 alpha6 = x[18*i+5]; 2496 alpha7 = x[18*i+6]; 2497 alpha8 = x[18*i+7]; 2498 alpha9 = x[18*i+8]; 2499 alpha10 = x[18*i+9]; 2500 alpha11 = x[18*i+10]; 2501 alpha12 = x[18*i+11]; 2502 alpha13 = x[18*i+12]; 2503 alpha14 = x[18*i+13]; 2504 alpha15 = x[18*i+14]; 2505 alpha16 = x[18*i+15]; 2506 alpha17 = x[18*i+16]; 2507 alpha18 = x[18*i+17]; 2508 while (n-->0) { 2509 y[18*(*idx)] += alpha1*(*v); 2510 y[18*(*idx)+1] += alpha2*(*v); 2511 y[18*(*idx)+2] += alpha3*(*v); 2512 y[18*(*idx)+3] += alpha4*(*v); 2513 y[18*(*idx)+4] += alpha5*(*v); 2514 y[18*(*idx)+5] += alpha6*(*v); 2515 y[18*(*idx)+6] += alpha7*(*v); 2516 y[18*(*idx)+7] += alpha8*(*v); 2517 y[18*(*idx)+8] += alpha9*(*v); 2518 y[18*(*idx)+9] += alpha10*(*v); 2519 y[18*(*idx)+10] += alpha11*(*v); 2520 y[18*(*idx)+11] += alpha12*(*v); 2521 y[18*(*idx)+12] += alpha13*(*v); 2522 y[18*(*idx)+13] += alpha14*(*v); 2523 y[18*(*idx)+14] += alpha15*(*v); 2524 y[18*(*idx)+15] += alpha16*(*v); 2525 y[18*(*idx)+16] += alpha17*(*v); 2526 y[18*(*idx)+17] += alpha18*(*v); 2527 idx++; v++; 2528 } 2529 } 2530 ierr = PetscLogFlops(36.0*a->nz);CHKERRQ(ierr); 2531 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 2532 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 2533 PetscFunctionReturn(0); 2534 } 2535 2536 #undef __FUNCT__ 2537 #define __FUNCT__ "MatMultAdd_SeqMAIJ_18" 2538 PetscErrorCode MatMultAdd_SeqMAIJ_18(Mat A,Vec xx,Vec yy,Vec zz) 2539 { 2540 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 2541 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 2542 PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8; 2543 PetscScalar sum9, sum10, sum11, sum12, sum13, sum14, sum15, sum16, sum17, sum18; 2544 PetscErrorCode ierr; 2545 PetscInt m = b->AIJ->rmap->n,*idx,*ii; 2546 PetscInt n,i,jrow,j; 2547 2548 PetscFunctionBegin; 2549 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 2550 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 2551 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 2552 idx = a->j; 2553 v = a->a; 2554 ii = a->i; 2555 2556 for (i=0; i<m; i++) { 2557 jrow = ii[i]; 2558 n = ii[i+1] - jrow; 2559 sum1 = 0.0; 2560 sum2 = 0.0; 2561 sum3 = 0.0; 2562 sum4 = 0.0; 2563 sum5 = 0.0; 2564 sum6 = 0.0; 2565 sum7 = 0.0; 2566 sum8 = 0.0; 2567 sum9 = 0.0; 2568 sum10 = 0.0; 2569 sum11 = 0.0; 2570 sum12 = 0.0; 2571 sum13 = 0.0; 2572 sum14 = 0.0; 2573 sum15 = 0.0; 2574 sum16 = 0.0; 2575 sum17 = 0.0; 2576 sum18 = 0.0; 2577 for (j=0; j<n; j++) { 2578 sum1 += v[jrow]*x[18*idx[jrow]]; 2579 sum2 += v[jrow]*x[18*idx[jrow]+1]; 2580 sum3 += v[jrow]*x[18*idx[jrow]+2]; 2581 sum4 += v[jrow]*x[18*idx[jrow]+3]; 2582 sum5 += v[jrow]*x[18*idx[jrow]+4]; 2583 sum6 += v[jrow]*x[18*idx[jrow]+5]; 2584 sum7 += v[jrow]*x[18*idx[jrow]+6]; 2585 sum8 += v[jrow]*x[18*idx[jrow]+7]; 2586 sum9 += v[jrow]*x[18*idx[jrow]+8]; 2587 sum10 += v[jrow]*x[18*idx[jrow]+9]; 2588 sum11 += v[jrow]*x[18*idx[jrow]+10]; 2589 sum12 += v[jrow]*x[18*idx[jrow]+11]; 2590 sum13 += v[jrow]*x[18*idx[jrow]+12]; 2591 sum14 += v[jrow]*x[18*idx[jrow]+13]; 2592 sum15 += v[jrow]*x[18*idx[jrow]+14]; 2593 sum16 += v[jrow]*x[18*idx[jrow]+15]; 2594 sum17 += v[jrow]*x[18*idx[jrow]+16]; 2595 sum18 += v[jrow]*x[18*idx[jrow]+17]; 2596 jrow++; 2597 } 2598 y[18*i] += sum1; 2599 y[18*i+1] += sum2; 2600 y[18*i+2] += sum3; 2601 y[18*i+3] += sum4; 2602 y[18*i+4] += sum5; 2603 y[18*i+5] += sum6; 2604 y[18*i+6] += sum7; 2605 y[18*i+7] += sum8; 2606 y[18*i+8] += sum9; 2607 y[18*i+9] += sum10; 2608 y[18*i+10] += sum11; 2609 y[18*i+11] += sum12; 2610 y[18*i+12] += sum13; 2611 y[18*i+13] += sum14; 2612 y[18*i+14] += sum15; 2613 y[18*i+15] += sum16; 2614 y[18*i+16] += sum17; 2615 y[18*i+17] += sum18; 2616 } 2617 2618 ierr = PetscLogFlops(36.0*a->nz);CHKERRQ(ierr); 2619 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 2620 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 2621 PetscFunctionReturn(0); 2622 } 2623 2624 #undef __FUNCT__ 2625 #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_18" 2626 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_18(Mat A,Vec xx,Vec yy,Vec zz) 2627 { 2628 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 2629 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 2630 PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8; 2631 PetscScalar alpha9,alpha10,alpha11,alpha12,alpha13,alpha14,alpha15,alpha16,alpha17,alpha18; 2632 PetscErrorCode ierr; 2633 PetscInt m = b->AIJ->rmap->n,n,i,*idx; 2634 2635 PetscFunctionBegin; 2636 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 2637 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 2638 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 2639 for (i=0; i<m; i++) { 2640 idx = a->j + a->i[i] ; 2641 v = a->a + a->i[i] ; 2642 n = a->i[i+1] - a->i[i]; 2643 alpha1 = x[18*i]; 2644 alpha2 = x[18*i+1]; 2645 alpha3 = x[18*i+2]; 2646 alpha4 = x[18*i+3]; 2647 alpha5 = x[18*i+4]; 2648 alpha6 = x[18*i+5]; 2649 alpha7 = x[18*i+6]; 2650 alpha8 = x[18*i+7]; 2651 alpha9 = x[18*i+8]; 2652 alpha10 = x[18*i+9]; 2653 alpha11 = x[18*i+10]; 2654 alpha12 = x[18*i+11]; 2655 alpha13 = x[18*i+12]; 2656 alpha14 = x[18*i+13]; 2657 alpha15 = x[18*i+14]; 2658 alpha16 = x[18*i+15]; 2659 alpha17 = x[18*i+16]; 2660 alpha18 = x[18*i+17]; 2661 while (n-->0) { 2662 y[18*(*idx)] += alpha1*(*v); 2663 y[18*(*idx)+1] += alpha2*(*v); 2664 y[18*(*idx)+2] += alpha3*(*v); 2665 y[18*(*idx)+3] += alpha4*(*v); 2666 y[18*(*idx)+4] += alpha5*(*v); 2667 y[18*(*idx)+5] += alpha6*(*v); 2668 y[18*(*idx)+6] += alpha7*(*v); 2669 y[18*(*idx)+7] += alpha8*(*v); 2670 y[18*(*idx)+8] += alpha9*(*v); 2671 y[18*(*idx)+9] += alpha10*(*v); 2672 y[18*(*idx)+10] += alpha11*(*v); 2673 y[18*(*idx)+11] += alpha12*(*v); 2674 y[18*(*idx)+12] += alpha13*(*v); 2675 y[18*(*idx)+13] += alpha14*(*v); 2676 y[18*(*idx)+14] += alpha15*(*v); 2677 y[18*(*idx)+15] += alpha16*(*v); 2678 y[18*(*idx)+16] += alpha17*(*v); 2679 y[18*(*idx)+17] += alpha18*(*v); 2680 idx++; v++; 2681 } 2682 } 2683 ierr = PetscLogFlops(36.0*a->nz);CHKERRQ(ierr); 2684 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 2685 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 2686 PetscFunctionReturn(0); 2687 } 2688 2689 /*===================================================================================*/ 2690 #undef __FUNCT__ 2691 #define __FUNCT__ "MatMult_MPIMAIJ_dof" 2692 PetscErrorCode MatMult_MPIMAIJ_dof(Mat A,Vec xx,Vec yy) 2693 { 2694 Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data; 2695 PetscErrorCode ierr; 2696 2697 PetscFunctionBegin; 2698 /* start the scatter */ 2699 ierr = VecScatterBegin(b->ctx,xx,b->w,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2700 ierr = (*b->AIJ->ops->mult)(b->AIJ,xx,yy);CHKERRQ(ierr); 2701 ierr = VecScatterEnd(b->ctx,xx,b->w,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2702 ierr = (*b->OAIJ->ops->multadd)(b->OAIJ,b->w,yy,yy);CHKERRQ(ierr); 2703 PetscFunctionReturn(0); 2704 } 2705 2706 #undef __FUNCT__ 2707 #define __FUNCT__ "MatMultTranspose_MPIMAIJ_dof" 2708 PetscErrorCode MatMultTranspose_MPIMAIJ_dof(Mat A,Vec xx,Vec yy) 2709 { 2710 Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data; 2711 PetscErrorCode ierr; 2712 2713 PetscFunctionBegin; 2714 ierr = (*b->OAIJ->ops->multtranspose)(b->OAIJ,xx,b->w);CHKERRQ(ierr); 2715 ierr = (*b->AIJ->ops->multtranspose)(b->AIJ,xx,yy);CHKERRQ(ierr); 2716 ierr = VecScatterBegin(b->ctx,b->w,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2717 ierr = VecScatterEnd(b->ctx,b->w,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2718 PetscFunctionReturn(0); 2719 } 2720 2721 #undef __FUNCT__ 2722 #define __FUNCT__ "MatMultAdd_MPIMAIJ_dof" 2723 PetscErrorCode MatMultAdd_MPIMAIJ_dof(Mat A,Vec xx,Vec yy,Vec zz) 2724 { 2725 Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data; 2726 PetscErrorCode ierr; 2727 2728 PetscFunctionBegin; 2729 /* start the scatter */ 2730 ierr = VecScatterBegin(b->ctx,xx,b->w,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2731 ierr = (*b->AIJ->ops->multadd)(b->AIJ,xx,yy,zz);CHKERRQ(ierr); 2732 ierr = VecScatterEnd(b->ctx,xx,b->w,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2733 ierr = (*b->OAIJ->ops->multadd)(b->OAIJ,b->w,zz,zz);CHKERRQ(ierr); 2734 PetscFunctionReturn(0); 2735 } 2736 2737 #undef __FUNCT__ 2738 #define __FUNCT__ "MatMultTransposeAdd_MPIMAIJ_dof" 2739 PetscErrorCode MatMultTransposeAdd_MPIMAIJ_dof(Mat A,Vec xx,Vec yy,Vec zz) 2740 { 2741 Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data; 2742 PetscErrorCode ierr; 2743 2744 PetscFunctionBegin; 2745 ierr = (*b->OAIJ->ops->multtranspose)(b->OAIJ,xx,b->w);CHKERRQ(ierr); 2746 ierr = VecScatterBegin(b->ctx,b->w,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2747 ierr = (*b->AIJ->ops->multtransposeadd)(b->AIJ,xx,yy,zz);CHKERRQ(ierr); 2748 ierr = VecScatterEnd(b->ctx,b->w,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2749 PetscFunctionReturn(0); 2750 } 2751 2752 /* ----------------------------------------------------------------*/ 2753 #undef __FUNCT__ 2754 #define __FUNCT__ "MatPtAPSymbolic_SeqAIJ_SeqMAIJ" 2755 PetscErrorCode MatPtAPSymbolic_SeqAIJ_SeqMAIJ(Mat A,Mat PP,PetscReal fill,Mat *C) 2756 { 2757 /* This routine requires testing -- but it's getting better. */ 2758 PetscErrorCode ierr; 2759 PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 2760 Mat_SeqMAIJ *pp=(Mat_SeqMAIJ*)PP->data; 2761 Mat P=pp->AIJ; 2762 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*p=(Mat_SeqAIJ*)P->data,*c; 2763 PetscInt *pti,*ptj,*ptJ,*ai=a->i,*aj=a->j,*ajj,*pi=p->i,*pj=p->j,*pjj; 2764 PetscInt *ci,*cj,*ptadenserow,*ptasparserow,*denserow,*sparserow,*ptaj; 2765 PetscInt an=A->cmap->N,am=A->rmap->N,pn=P->cmap->N,pm=P->rmap->N,ppdof=pp->dof,cn; 2766 PetscInt i,j,k,dof,pshift,ptnzi,arow,anzj,ptanzi,prow,pnzj,cnzi; 2767 MatScalar *ca; 2768 2769 PetscFunctionBegin; 2770 /* Start timer */ 2771 ierr = PetscLogEventBegin(MAT_PtAPSymbolic,A,PP,0,0);CHKERRQ(ierr); 2772 2773 /* Get ij structure of P^T */ 2774 ierr = MatGetSymbolicTranspose_SeqAIJ(P,&pti,&ptj);CHKERRQ(ierr); 2775 2776 cn = pn*ppdof; 2777 /* Allocate ci array, arrays for fill computation and */ 2778 /* free space for accumulating nonzero column info */ 2779 ierr = PetscMalloc((cn+1)*sizeof(PetscInt),&ci);CHKERRQ(ierr); 2780 ci[0] = 0; 2781 2782 /* Work arrays for rows of P^T*A */ 2783 ierr = PetscMalloc((2*cn+2*an+1)*sizeof(PetscInt),&ptadenserow);CHKERRQ(ierr); 2784 ierr = PetscMemzero(ptadenserow,(2*cn+2*an+1)*sizeof(PetscInt));CHKERRQ(ierr); 2785 ptasparserow = ptadenserow + an; 2786 denserow = ptasparserow + an; 2787 sparserow = denserow + cn; 2788 2789 /* Set initial free space to be nnz(A) scaled by aspect ratio of P. */ 2790 /* This should be reasonable if sparsity of PtAP is similar to that of A. */ 2791 /* Note, aspect ratio of P is the same as the aspect ratio of SeqAIJ inside P */ 2792 ierr = PetscFreeSpaceGet((ai[am]/pm)*pn,&free_space); 2793 current_space = free_space; 2794 2795 /* Determine symbolic info for each row of C: */ 2796 for (i=0;i<pn;i++) { 2797 ptnzi = pti[i+1] - pti[i]; 2798 ptJ = ptj + pti[i]; 2799 for (dof=0;dof<ppdof;dof++) { 2800 ptanzi = 0; 2801 /* Determine symbolic row of PtA: */ 2802 for (j=0;j<ptnzi;j++) { 2803 /* Expand ptJ[j] by block size and shift by dof to get the right row of A */ 2804 arow = ptJ[j]*ppdof + dof; 2805 /* Nonzeros of P^T*A will be in same locations as any element of A in that row */ 2806 anzj = ai[arow+1] - ai[arow]; 2807 ajj = aj + ai[arow]; 2808 for (k=0;k<anzj;k++) { 2809 if (!ptadenserow[ajj[k]]) { 2810 ptadenserow[ajj[k]] = -1; 2811 ptasparserow[ptanzi++] = ajj[k]; 2812 } 2813 } 2814 } 2815 /* Using symbolic info for row of PtA, determine symbolic info for row of C: */ 2816 ptaj = ptasparserow; 2817 cnzi = 0; 2818 for (j=0;j<ptanzi;j++) { 2819 /* Get offset within block of P */ 2820 pshift = *ptaj%ppdof; 2821 /* Get block row of P */ 2822 prow = (*ptaj++)/ppdof; /* integer division */ 2823 /* P has same number of nonzeros per row as the compressed form */ 2824 pnzj = pi[prow+1] - pi[prow]; 2825 pjj = pj + pi[prow]; 2826 for (k=0;k<pnzj;k++) { 2827 /* Locations in C are shifted by the offset within the block */ 2828 /* Note: we cannot use PetscLLAdd here because of the additional offset for the write location */ 2829 if (!denserow[pjj[k]*ppdof+pshift]) { 2830 denserow[pjj[k]*ppdof+pshift] = -1; 2831 sparserow[cnzi++] = pjj[k]*ppdof+pshift; 2832 } 2833 } 2834 } 2835 2836 /* sort sparserow */ 2837 ierr = PetscSortInt(cnzi,sparserow);CHKERRQ(ierr); 2838 2839 /* If free space is not available, make more free space */ 2840 /* Double the amount of total space in the list */ 2841 if (current_space->local_remaining<cnzi) { 2842 ierr = PetscFreeSpaceGet(cnzi+current_space->total_array_size,¤t_space);CHKERRQ(ierr); 2843 } 2844 2845 /* Copy data into free space, and zero out denserows */ 2846 ierr = PetscMemcpy(current_space->array,sparserow,cnzi*sizeof(PetscInt));CHKERRQ(ierr); 2847 current_space->array += cnzi; 2848 current_space->local_used += cnzi; 2849 current_space->local_remaining -= cnzi; 2850 2851 for (j=0;j<ptanzi;j++) { 2852 ptadenserow[ptasparserow[j]] = 0; 2853 } 2854 for (j=0;j<cnzi;j++) { 2855 denserow[sparserow[j]] = 0; 2856 } 2857 /* Aside: Perhaps we should save the pta info for the numerical factorization. */ 2858 /* For now, we will recompute what is needed. */ 2859 ci[i*ppdof+1+dof] = ci[i*ppdof+dof] + cnzi; 2860 } 2861 } 2862 /* nnz is now stored in ci[ptm], column indices are in the list of free space */ 2863 /* Allocate space for cj, initialize cj, and */ 2864 /* destroy list of free space and other temporary array(s) */ 2865 ierr = PetscMalloc((ci[cn]+1)*sizeof(PetscInt),&cj);CHKERRQ(ierr); 2866 ierr = PetscFreeSpaceContiguous(&free_space,cj);CHKERRQ(ierr); 2867 ierr = PetscFree(ptadenserow);CHKERRQ(ierr); 2868 2869 /* Allocate space for ca */ 2870 ierr = PetscMalloc((ci[cn]+1)*sizeof(MatScalar),&ca);CHKERRQ(ierr); 2871 ierr = PetscMemzero(ca,(ci[cn]+1)*sizeof(MatScalar));CHKERRQ(ierr); 2872 2873 /* put together the new matrix */ 2874 ierr = MatCreateSeqAIJWithArrays(((PetscObject)A)->comm,cn,cn,ci,cj,ca,C);CHKERRQ(ierr); 2875 2876 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 2877 /* Since these are PETSc arrays, change flags to free them as necessary. */ 2878 c = (Mat_SeqAIJ *)((*C)->data); 2879 c->free_a = PETSC_TRUE; 2880 c->free_ij = PETSC_TRUE; 2881 c->nonew = 0; 2882 2883 /* Clean up. */ 2884 ierr = MatRestoreSymbolicTranspose_SeqAIJ(P,&pti,&ptj);CHKERRQ(ierr); 2885 2886 ierr = PetscLogEventEnd(MAT_PtAPSymbolic,A,PP,0,0);CHKERRQ(ierr); 2887 PetscFunctionReturn(0); 2888 } 2889 2890 #undef __FUNCT__ 2891 #define __FUNCT__ "MatPtAPNumeric_SeqAIJ_SeqMAIJ" 2892 PetscErrorCode MatPtAPNumeric_SeqAIJ_SeqMAIJ(Mat A,Mat PP,Mat C) 2893 { 2894 /* This routine requires testing -- first draft only */ 2895 PetscErrorCode ierr; 2896 Mat_SeqMAIJ *pp=(Mat_SeqMAIJ*)PP->data; 2897 Mat P=pp->AIJ; 2898 Mat_SeqAIJ *a = (Mat_SeqAIJ *) A->data; 2899 Mat_SeqAIJ *p = (Mat_SeqAIJ *) P->data; 2900 Mat_SeqAIJ *c = (Mat_SeqAIJ *) C->data; 2901 PetscInt *ai=a->i,*aj=a->j,*apj,*apjdense,*pi=p->i,*pj=p->j,*pJ=p->j,*pjj; 2902 PetscInt *ci=c->i,*cj=c->j,*cjj; 2903 PetscInt am=A->rmap->N,cn=C->cmap->N,cm=C->rmap->N,ppdof=pp->dof; 2904 PetscInt i,j,k,pshift,poffset,anzi,pnzi,apnzj,nextap,pnzj,prow,crow; 2905 MatScalar *aa=a->a,*apa,*pa=p->a,*pA=p->a,*paj,*ca=c->a,*caj; 2906 2907 PetscFunctionBegin; 2908 /* Allocate temporary array for storage of one row of A*P */ 2909 ierr = PetscMalloc(cn*(sizeof(MatScalar)+2*sizeof(PetscInt)),&apa);CHKERRQ(ierr); 2910 ierr = PetscMemzero(apa,cn*(sizeof(MatScalar)+2*sizeof(PetscInt)));CHKERRQ(ierr); 2911 2912 apj = (PetscInt *)(apa + cn); 2913 apjdense = apj + cn; 2914 2915 /* Clear old values in C */ 2916 ierr = PetscMemzero(ca,ci[cm]*sizeof(MatScalar));CHKERRQ(ierr); 2917 2918 for (i=0;i<am;i++) { 2919 /* Form sparse row of A*P */ 2920 anzi = ai[i+1] - ai[i]; 2921 apnzj = 0; 2922 for (j=0;j<anzi;j++) { 2923 /* Get offset within block of P */ 2924 pshift = *aj%ppdof; 2925 /* Get block row of P */ 2926 prow = *aj++/ppdof; /* integer division */ 2927 pnzj = pi[prow+1] - pi[prow]; 2928 pjj = pj + pi[prow]; 2929 paj = pa + pi[prow]; 2930 for (k=0;k<pnzj;k++) { 2931 poffset = pjj[k]*ppdof+pshift; 2932 if (!apjdense[poffset]) { 2933 apjdense[poffset] = -1; 2934 apj[apnzj++] = poffset; 2935 } 2936 apa[poffset] += (*aa)*paj[k]; 2937 } 2938 ierr = PetscLogFlops(2.0*pnzj);CHKERRQ(ierr); 2939 aa++; 2940 } 2941 2942 /* Sort the j index array for quick sparse axpy. */ 2943 /* Note: a array does not need sorting as it is in dense storage locations. */ 2944 ierr = PetscSortInt(apnzj,apj);CHKERRQ(ierr); 2945 2946 /* Compute P^T*A*P using outer product (P^T)[:,j]*(A*P)[j,:]. */ 2947 prow = i/ppdof; /* integer division */ 2948 pshift = i%ppdof; 2949 poffset = pi[prow]; 2950 pnzi = pi[prow+1] - poffset; 2951 /* Reset pJ and pA so we can traverse the same row of P 'dof' times. */ 2952 pJ = pj+poffset; 2953 pA = pa+poffset; 2954 for (j=0;j<pnzi;j++) { 2955 crow = (*pJ)*ppdof+pshift; 2956 cjj = cj + ci[crow]; 2957 caj = ca + ci[crow]; 2958 pJ++; 2959 /* Perform sparse axpy operation. Note cjj includes apj. */ 2960 for (k=0,nextap=0;nextap<apnzj;k++) { 2961 if (cjj[k]==apj[nextap]) { 2962 caj[k] += (*pA)*apa[apj[nextap++]]; 2963 } 2964 } 2965 ierr = PetscLogFlops(2.0*apnzj);CHKERRQ(ierr); 2966 pA++; 2967 } 2968 2969 /* Zero the current row info for A*P */ 2970 for (j=0;j<apnzj;j++) { 2971 apa[apj[j]] = 0.; 2972 apjdense[apj[j]] = 0; 2973 } 2974 } 2975 2976 /* Assemble the final matrix and clean up */ 2977 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2978 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2979 ierr = PetscFree(apa);CHKERRQ(ierr); 2980 PetscFunctionReturn(0); 2981 } 2982 2983 #undef __FUNCT__ 2984 #define __FUNCT__ "MatPtAPSymbolic_MPIAIJ_MPIMAIJ" 2985 PetscErrorCode MatPtAPSymbolic_MPIAIJ_MPIMAIJ(Mat A,Mat PP,PetscReal fill,Mat *C) 2986 { 2987 PetscErrorCode ierr; 2988 2989 PetscFunctionBegin; 2990 /* MatPtAPSymbolic_MPIAIJ_MPIMAIJ() is not implemented yet. Convert PP to mpiaij format */ 2991 ierr = MatConvert(PP,MATMPIAIJ,MAT_REUSE_MATRIX,&PP);CHKERRQ(ierr); 2992 ierr =(*PP->ops->ptapsymbolic)(A,PP,fill,C);CHKERRQ(ierr); 2993 PetscFunctionReturn(0); 2994 } 2995 2996 #undef __FUNCT__ 2997 #define __FUNCT__ "MatPtAPNumeric_MPIAIJ_MPIMAIJ" 2998 PetscErrorCode MatPtAPNumeric_MPIAIJ_MPIMAIJ(Mat A,Mat PP,Mat C) 2999 { 3000 PetscFunctionBegin; 3001 SETERRQ(PETSC_ERR_SUP,"MatPtAPNumeric is not implemented for MPIMAIJ matrix yet"); 3002 PetscFunctionReturn(0); 3003 } 3004 3005 EXTERN_C_BEGIN 3006 #undef __FUNCT__ 3007 #define __FUNCT__ "MatConvert_SeqMAIJ_SeqAIJ" 3008 PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_SeqMAIJ_SeqAIJ(Mat A, MatType newtype,MatReuse reuse,Mat *newmat) 3009 { 3010 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 3011 Mat a = b->AIJ,B; 3012 Mat_SeqAIJ *aij = (Mat_SeqAIJ*)a->data; 3013 PetscErrorCode ierr; 3014 PetscInt m,n,i,ncols,*ilen,nmax = 0,*icols,j,k,ii,dof = b->dof; 3015 PetscInt *cols; 3016 PetscScalar *vals; 3017 3018 PetscFunctionBegin; 3019 ierr = MatGetSize(a,&m,&n);CHKERRQ(ierr); 3020 ierr = PetscMalloc(dof*m*sizeof(PetscInt),&ilen);CHKERRQ(ierr); 3021 for (i=0; i<m; i++) { 3022 nmax = PetscMax(nmax,aij->ilen[i]); 3023 for (j=0; j<dof; j++) { 3024 ilen[dof*i+j] = aij->ilen[i]; 3025 } 3026 } 3027 ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,dof*m,dof*n,0,ilen,&B);CHKERRQ(ierr); 3028 ierr = PetscFree(ilen);CHKERRQ(ierr); 3029 ierr = PetscMalloc(nmax*sizeof(PetscInt),&icols);CHKERRQ(ierr); 3030 ii = 0; 3031 for (i=0; i<m; i++) { 3032 ierr = MatGetRow_SeqAIJ(a,i,&ncols,&cols,&vals);CHKERRQ(ierr); 3033 for (j=0; j<dof; j++) { 3034 for (k=0; k<ncols; k++) { 3035 icols[k] = dof*cols[k]+j; 3036 } 3037 ierr = MatSetValues_SeqAIJ(B,1,&ii,ncols,icols,vals,INSERT_VALUES);CHKERRQ(ierr); 3038 ii++; 3039 } 3040 ierr = MatRestoreRow_SeqAIJ(a,i,&ncols,&cols,&vals);CHKERRQ(ierr); 3041 } 3042 ierr = PetscFree(icols);CHKERRQ(ierr); 3043 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3044 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3045 3046 if (reuse == MAT_REUSE_MATRIX) { 3047 ierr = MatHeaderReplace(A,B);CHKERRQ(ierr); 3048 } else { 3049 *newmat = B; 3050 } 3051 PetscFunctionReturn(0); 3052 } 3053 EXTERN_C_END 3054 3055 #include "../src/mat/impls/aij/mpi/mpiaij.h" 3056 3057 EXTERN_C_BEGIN 3058 #undef __FUNCT__ 3059 #define __FUNCT__ "MatConvert_MPIMAIJ_MPIAIJ" 3060 PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_MPIMAIJ_MPIAIJ(Mat A, MatType newtype,MatReuse reuse,Mat *newmat) 3061 { 3062 Mat_MPIMAIJ *maij = (Mat_MPIMAIJ*)A->data; 3063 Mat MatAIJ = ((Mat_SeqMAIJ*)maij->AIJ->data)->AIJ,B; 3064 Mat MatOAIJ = ((Mat_SeqMAIJ*)maij->OAIJ->data)->AIJ; 3065 Mat_SeqAIJ *AIJ = (Mat_SeqAIJ*) MatAIJ->data; 3066 Mat_SeqAIJ *OAIJ =(Mat_SeqAIJ*) MatOAIJ->data; 3067 Mat_MPIAIJ *mpiaij = (Mat_MPIAIJ*) maij->A->data; 3068 PetscInt dof = maij->dof,i,j,*dnz = PETSC_NULL,*onz = PETSC_NULL,nmax = 0,onmax = 0; 3069 PetscInt *oicols = PETSC_NULL,*icols = PETSC_NULL,ncols,*cols = PETSC_NULL,oncols,*ocols = PETSC_NULL; 3070 PetscInt rstart,cstart,*garray,ii,k; 3071 PetscErrorCode ierr; 3072 PetscScalar *vals,*ovals; 3073 3074 PetscFunctionBegin; 3075 ierr = PetscMalloc2(A->rmap->n,PetscInt,&dnz,A->rmap->n,PetscInt,&onz);CHKERRQ(ierr); 3076 for (i=0; i<A->rmap->n/dof; i++) { 3077 nmax = PetscMax(nmax,AIJ->ilen[i]); 3078 onmax = PetscMax(onmax,OAIJ->ilen[i]); 3079 for (j=0; j<dof; j++) { 3080 dnz[dof*i+j] = AIJ->ilen[i]; 3081 onz[dof*i+j] = OAIJ->ilen[i]; 3082 } 3083 } 3084 ierr = MatCreateMPIAIJ(((PetscObject)A)->comm,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N,0,dnz,0,onz,&B);CHKERRQ(ierr); 3085 ierr = PetscFree2(dnz,onz);CHKERRQ(ierr); 3086 3087 ierr = PetscMalloc2(nmax,PetscInt,&icols,onmax,PetscInt,&oicols);CHKERRQ(ierr); 3088 rstart = dof*maij->A->rmap->rstart; 3089 cstart = dof*maij->A->cmap->rstart; 3090 garray = mpiaij->garray; 3091 3092 ii = rstart; 3093 for (i=0; i<A->rmap->n/dof; i++) { 3094 ierr = MatGetRow_SeqAIJ(MatAIJ,i,&ncols,&cols,&vals);CHKERRQ(ierr); 3095 ierr = MatGetRow_SeqAIJ(MatOAIJ,i,&oncols,&ocols,&ovals);CHKERRQ(ierr); 3096 for (j=0; j<dof; j++) { 3097 for (k=0; k<ncols; k++) { 3098 icols[k] = cstart + dof*cols[k]+j; 3099 } 3100 for (k=0; k<oncols; k++) { 3101 oicols[k] = dof*garray[ocols[k]]+j; 3102 } 3103 ierr = MatSetValues_MPIAIJ(B,1,&ii,ncols,icols,vals,INSERT_VALUES);CHKERRQ(ierr); 3104 ierr = MatSetValues_MPIAIJ(B,1,&ii,oncols,oicols,ovals,INSERT_VALUES);CHKERRQ(ierr); 3105 ii++; 3106 } 3107 ierr = MatRestoreRow_SeqAIJ(MatAIJ,i,&ncols,&cols,&vals);CHKERRQ(ierr); 3108 ierr = MatRestoreRow_SeqAIJ(MatOAIJ,i,&oncols,&ocols,&ovals);CHKERRQ(ierr); 3109 } 3110 ierr = PetscFree2(icols,oicols);CHKERRQ(ierr); 3111 3112 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3113 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3114 3115 if (reuse == MAT_REUSE_MATRIX) { 3116 PetscInt refct = ((PetscObject)A)->refct; /* save ((PetscObject)A)->refct */ 3117 ((PetscObject)A)->refct = 1; 3118 ierr = MatHeaderReplace(A,B);CHKERRQ(ierr); 3119 ((PetscObject)A)->refct = refct; /* restore ((PetscObject)A)->refct */ 3120 } else { 3121 *newmat = B; 3122 } 3123 PetscFunctionReturn(0); 3124 } 3125 EXTERN_C_END 3126 3127 3128 /* ---------------------------------------------------------------------------------- */ 3129 #undef __FUNCT__ 3130 #define __FUNCT__ "MatCreateMAIJ" 3131 /*@C 3132 MatCreateMAIJ - Creates a matrix type providing restriction and interpolation 3133 operations for multicomponent problems. It interpolates each component the same 3134 way independently. The matrix type is based on MATSEQAIJ for sequential matrices, 3135 and MATMPIAIJ for distributed matrices. 3136 3137 Collective 3138 3139 Input Parameters: 3140 + A - the AIJ matrix describing the action on blocks 3141 - dof - the block size (number of components per node) 3142 3143 Output Parameter: 3144 . maij - the new MAIJ matrix 3145 3146 Operations provided: 3147 + MatMult 3148 . MatMultTranspose 3149 . MatMultAdd 3150 . MatMultTransposeAdd 3151 - MatView 3152 3153 Level: advanced 3154 3155 .seealso: MatMAIJGetAIJ(), MatMAIJRedimension() 3156 @*/ 3157 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMAIJ(Mat A,PetscInt dof,Mat *maij) 3158 { 3159 PetscErrorCode ierr; 3160 PetscMPIInt size; 3161 PetscInt n; 3162 Mat_MPIMAIJ *b; 3163 Mat B; 3164 3165 PetscFunctionBegin; 3166 ierr = PetscObjectReference((PetscObject)A);CHKERRQ(ierr); 3167 3168 if (dof == 1) { 3169 *maij = A; 3170 } else { 3171 ierr = MatCreate(((PetscObject)A)->comm,&B);CHKERRQ(ierr); 3172 ierr = MatSetSizes(B,dof*A->rmap->n,dof*A->cmap->n,dof*A->rmap->N,dof*A->cmap->N);CHKERRQ(ierr); 3173 B->assembled = PETSC_TRUE; 3174 3175 ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr); 3176 if (size == 1) { 3177 ierr = MatSetType(B,MATSEQMAIJ);CHKERRQ(ierr); 3178 B->ops->destroy = MatDestroy_SeqMAIJ; 3179 B->ops->view = MatView_SeqMAIJ; 3180 b = (Mat_MPIMAIJ*)B->data; 3181 b->dof = dof; 3182 b->AIJ = A; 3183 if (dof == 2) { 3184 B->ops->mult = MatMult_SeqMAIJ_2; 3185 B->ops->multadd = MatMultAdd_SeqMAIJ_2; 3186 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_2; 3187 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_2; 3188 } else if (dof == 3) { 3189 B->ops->mult = MatMult_SeqMAIJ_3; 3190 B->ops->multadd = MatMultAdd_SeqMAIJ_3; 3191 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_3; 3192 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_3; 3193 } else if (dof == 4) { 3194 B->ops->mult = MatMult_SeqMAIJ_4; 3195 B->ops->multadd = MatMultAdd_SeqMAIJ_4; 3196 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_4; 3197 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_4; 3198 } else if (dof == 5) { 3199 B->ops->mult = MatMult_SeqMAIJ_5; 3200 B->ops->multadd = MatMultAdd_SeqMAIJ_5; 3201 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_5; 3202 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_5; 3203 } else if (dof == 6) { 3204 B->ops->mult = MatMult_SeqMAIJ_6; 3205 B->ops->multadd = MatMultAdd_SeqMAIJ_6; 3206 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_6; 3207 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_6; 3208 } else if (dof == 7) { 3209 B->ops->mult = MatMult_SeqMAIJ_7; 3210 B->ops->multadd = MatMultAdd_SeqMAIJ_7; 3211 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_7; 3212 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_7; 3213 } else if (dof == 8) { 3214 B->ops->mult = MatMult_SeqMAIJ_8; 3215 B->ops->multadd = MatMultAdd_SeqMAIJ_8; 3216 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_8; 3217 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_8; 3218 } else if (dof == 9) { 3219 B->ops->mult = MatMult_SeqMAIJ_9; 3220 B->ops->multadd = MatMultAdd_SeqMAIJ_9; 3221 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_9; 3222 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_9; 3223 } else if (dof == 10) { 3224 B->ops->mult = MatMult_SeqMAIJ_10; 3225 B->ops->multadd = MatMultAdd_SeqMAIJ_10; 3226 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_10; 3227 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_10; 3228 } else if (dof == 11) { 3229 B->ops->mult = MatMult_SeqMAIJ_11; 3230 B->ops->multadd = MatMultAdd_SeqMAIJ_11; 3231 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_11; 3232 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_11; 3233 } else if (dof == 16) { 3234 B->ops->mult = MatMult_SeqMAIJ_16; 3235 B->ops->multadd = MatMultAdd_SeqMAIJ_16; 3236 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_16; 3237 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_16; 3238 } else if (dof == 18) { 3239 B->ops->mult = MatMult_SeqMAIJ_18; 3240 B->ops->multadd = MatMultAdd_SeqMAIJ_18; 3241 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_18; 3242 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_18; 3243 } else { 3244 SETERRQ1(PETSC_ERR_SUP,"Cannot handle a dof of %D. Send request for code to petsc-maint@mcs.anl.gov\n",dof); 3245 } 3246 B->ops->ptapsymbolic_seqaij = MatPtAPSymbolic_SeqAIJ_SeqMAIJ; 3247 B->ops->ptapnumeric_seqaij = MatPtAPNumeric_SeqAIJ_SeqMAIJ; 3248 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqmaij_seqaij_C","MatConvert_SeqMAIJ_SeqAIJ",MatConvert_SeqMAIJ_SeqAIJ);CHKERRQ(ierr); 3249 } else { 3250 Mat_MPIAIJ *mpiaij = (Mat_MPIAIJ *)A->data; 3251 IS from,to; 3252 Vec gvec; 3253 PetscInt *garray,i; 3254 3255 ierr = MatSetType(B,MATMPIMAIJ);CHKERRQ(ierr); 3256 B->ops->destroy = MatDestroy_MPIMAIJ; 3257 B->ops->view = MatView_MPIMAIJ; 3258 b = (Mat_MPIMAIJ*)B->data; 3259 b->dof = dof; 3260 b->A = A; 3261 ierr = MatCreateMAIJ(mpiaij->A,dof,&b->AIJ);CHKERRQ(ierr); 3262 ierr = MatCreateMAIJ(mpiaij->B,dof,&b->OAIJ);CHKERRQ(ierr); 3263 3264 ierr = VecGetSize(mpiaij->lvec,&n);CHKERRQ(ierr); 3265 ierr = VecCreateSeq(PETSC_COMM_SELF,n*dof,&b->w);CHKERRQ(ierr); 3266 ierr = VecSetBlockSize(b->w,dof);CHKERRQ(ierr); 3267 3268 /* create two temporary Index sets for build scatter gather */ 3269 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&garray);CHKERRQ(ierr); 3270 for (i=0; i<n; i++) garray[i] = dof*mpiaij->garray[i]; 3271 ierr = ISCreateBlock(((PetscObject)A)->comm,dof,n,garray,&from);CHKERRQ(ierr); 3272 ierr = PetscFree(garray);CHKERRQ(ierr); 3273 ierr = ISCreateStride(PETSC_COMM_SELF,n*dof,0,1,&to);CHKERRQ(ierr); 3274 3275 /* create temporary global vector to generate scatter context */ 3276 ierr = VecCreateMPIWithArray(((PetscObject)A)->comm,dof*A->cmap->n,dof*A->cmap->N,PETSC_NULL,&gvec);CHKERRQ(ierr); 3277 ierr = VecSetBlockSize(gvec,dof);CHKERRQ(ierr); 3278 3279 /* generate the scatter context */ 3280 ierr = VecScatterCreate(gvec,from,b->w,to,&b->ctx);CHKERRQ(ierr); 3281 3282 ierr = ISDestroy(from);CHKERRQ(ierr); 3283 ierr = ISDestroy(to);CHKERRQ(ierr); 3284 ierr = VecDestroy(gvec);CHKERRQ(ierr); 3285 3286 B->ops->mult = MatMult_MPIMAIJ_dof; 3287 B->ops->multtranspose = MatMultTranspose_MPIMAIJ_dof; 3288 B->ops->multadd = MatMultAdd_MPIMAIJ_dof; 3289 B->ops->multtransposeadd = MatMultTransposeAdd_MPIMAIJ_dof; 3290 B->ops->ptapsymbolic_mpiaij = MatPtAPSymbolic_MPIAIJ_MPIMAIJ; 3291 B->ops->ptapnumeric_mpiaij = MatPtAPNumeric_MPIAIJ_MPIMAIJ; 3292 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpimaij_mpiaij_C","MatConvert_MPIMAIJ_MPIAIJ",MatConvert_MPIMAIJ_MPIAIJ);CHKERRQ(ierr); 3293 } 3294 *maij = B; 3295 ierr = MatView_Private(B);CHKERRQ(ierr); 3296 } 3297 PetscFunctionReturn(0); 3298 } 3299