1 #define PETSCMAT_DLL 2 3 /* 4 Defines the basic matrix operations for the MAIJ matrix storage format. 5 This format is used for restriction and interpolation operations for 6 multicomponent problems. It interpolates each component the same way 7 independently. 8 9 We provide: 10 MatMult() 11 MatMultTranspose() 12 MatMultTransposeAdd() 13 MatMultAdd() 14 and 15 MatCreateMAIJ(Mat,dof,Mat*) 16 17 This single directory handles both the sequential and parallel codes 18 */ 19 20 #include "../src/mat/impls/maij/maij.h" /*I "petscmat.h" I*/ 21 #include "../src/mat/utils/freespace.h" 22 #include "private/vecimpl.h" 23 24 #undef __FUNCT__ 25 #define __FUNCT__ "MatMAIJGetAIJ" 26 /*@C 27 MatMAIJGetAIJ - Get the AIJ matrix describing the blockwise action of the MAIJ matrix 28 29 Not Collective, but if the MAIJ matrix is parallel, the AIJ matrix is also parallel 30 31 Input Parameter: 32 . A - the MAIJ matrix 33 34 Output Parameter: 35 . B - the AIJ matrix 36 37 Level: advanced 38 39 Notes: The reference count on the AIJ matrix is not increased so you should not destroy it. 40 41 .seealso: MatCreateMAIJ() 42 @*/ 43 PetscErrorCode PETSCMAT_DLLEXPORT MatMAIJGetAIJ(Mat A,Mat *B) 44 { 45 PetscErrorCode ierr; 46 PetscTruth ismpimaij,isseqmaij; 47 48 PetscFunctionBegin; 49 ierr = PetscTypeCompare((PetscObject)A,MATMPIMAIJ,&ismpimaij);CHKERRQ(ierr); 50 ierr = PetscTypeCompare((PetscObject)A,MATSEQMAIJ,&isseqmaij);CHKERRQ(ierr); 51 if (ismpimaij) { 52 Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data; 53 54 *B = b->A; 55 } else if (isseqmaij) { 56 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 57 58 *B = b->AIJ; 59 } else { 60 *B = A; 61 } 62 PetscFunctionReturn(0); 63 } 64 65 #undef __FUNCT__ 66 #define __FUNCT__ "MatMAIJRedimension" 67 /*@C 68 MatMAIJRedimension - Get an MAIJ matrix with the same action, but for a different block size 69 70 Collective 71 72 Input Parameter: 73 + A - the MAIJ matrix 74 - dof - the block size for the new matrix 75 76 Output Parameter: 77 . B - the new MAIJ matrix 78 79 Level: advanced 80 81 .seealso: MatCreateMAIJ() 82 @*/ 83 PetscErrorCode PETSCMAT_DLLEXPORT MatMAIJRedimension(Mat A,PetscInt dof,Mat *B) 84 { 85 PetscErrorCode ierr; 86 Mat Aij; 87 88 PetscFunctionBegin; 89 ierr = MatMAIJGetAIJ(A,&Aij);CHKERRQ(ierr); 90 ierr = MatCreateMAIJ(Aij,dof,B);CHKERRQ(ierr); 91 PetscFunctionReturn(0); 92 } 93 94 #undef __FUNCT__ 95 #define __FUNCT__ "MatDestroy_SeqMAIJ" 96 PetscErrorCode MatDestroy_SeqMAIJ(Mat A) 97 { 98 PetscErrorCode ierr; 99 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 100 101 PetscFunctionBegin; 102 if (b->AIJ) { 103 ierr = MatDestroy(b->AIJ);CHKERRQ(ierr); 104 } 105 ierr = PetscFree(b);CHKERRQ(ierr); 106 PetscFunctionReturn(0); 107 } 108 109 #undef __FUNCT__ 110 #define __FUNCT__ "MatView_SeqMAIJ" 111 PetscErrorCode MatView_SeqMAIJ(Mat A,PetscViewer viewer) 112 { 113 PetscErrorCode ierr; 114 Mat B; 115 116 PetscFunctionBegin; 117 ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B); 118 ierr = MatView(B,viewer);CHKERRQ(ierr); 119 ierr = MatDestroy(B);CHKERRQ(ierr); 120 PetscFunctionReturn(0); 121 } 122 123 #undef __FUNCT__ 124 #define __FUNCT__ "MatView_MPIMAIJ" 125 PetscErrorCode MatView_MPIMAIJ(Mat A,PetscViewer viewer) 126 { 127 PetscErrorCode ierr; 128 Mat B; 129 130 PetscFunctionBegin; 131 ierr = MatConvert(A,MATMPIAIJ,MAT_INITIAL_MATRIX,&B); 132 ierr = MatView(B,viewer);CHKERRQ(ierr); 133 ierr = MatDestroy(B);CHKERRQ(ierr); 134 PetscFunctionReturn(0); 135 } 136 137 #undef __FUNCT__ 138 #define __FUNCT__ "MatDestroy_MPIMAIJ" 139 PetscErrorCode MatDestroy_MPIMAIJ(Mat A) 140 { 141 PetscErrorCode ierr; 142 Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data; 143 144 PetscFunctionBegin; 145 if (b->AIJ) { 146 ierr = MatDestroy(b->AIJ);CHKERRQ(ierr); 147 } 148 if (b->OAIJ) { 149 ierr = MatDestroy(b->OAIJ);CHKERRQ(ierr); 150 } 151 if (b->A) { 152 ierr = MatDestroy(b->A);CHKERRQ(ierr); 153 } 154 if (b->ctx) { 155 ierr = VecScatterDestroy(b->ctx);CHKERRQ(ierr); 156 } 157 if (b->w) { 158 ierr = VecDestroy(b->w);CHKERRQ(ierr); 159 } 160 ierr = PetscFree(b);CHKERRQ(ierr); 161 ierr = PetscObjectChangeTypeName((PetscObject)A,0);CHKERRQ(ierr); 162 PetscFunctionReturn(0); 163 } 164 165 /*MC 166 MATMAIJ - MATMAIJ = "maij" - A matrix type to be used for restriction and interpolation operations for 167 multicomponent problems, interpolating or restricting each component the same way independently. 168 The matrix type is based on MATSEQAIJ for sequential matrices, and MATMPIAIJ for distributed matrices. 169 170 Operations provided: 171 . MatMult 172 . MatMultTranspose 173 . MatMultAdd 174 . MatMultTransposeAdd 175 176 Level: advanced 177 178 .seealso: MatCreateSeqDense 179 M*/ 180 181 EXTERN_C_BEGIN 182 #undef __FUNCT__ 183 #define __FUNCT__ "MatCreate_MAIJ" 184 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_MAIJ(Mat A) 185 { 186 PetscErrorCode ierr; 187 Mat_MPIMAIJ *b; 188 PetscMPIInt size; 189 190 PetscFunctionBegin; 191 ierr = PetscNewLog(A,Mat_MPIMAIJ,&b);CHKERRQ(ierr); 192 A->data = (void*)b; 193 ierr = PetscMemzero(A->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 194 A->mapping = 0; 195 196 b->AIJ = 0; 197 b->dof = 0; 198 b->OAIJ = 0; 199 b->ctx = 0; 200 b->w = 0; 201 ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr); 202 if (size == 1){ 203 ierr = PetscObjectChangeTypeName((PetscObject)A,MATSEQMAIJ);CHKERRQ(ierr); 204 } else { 205 ierr = PetscObjectChangeTypeName((PetscObject)A,MATMPIMAIJ);CHKERRQ(ierr); 206 } 207 PetscFunctionReturn(0); 208 } 209 EXTERN_C_END 210 211 /* --------------------------------------------------------------------------------------*/ 212 #undef __FUNCT__ 213 #define __FUNCT__ "MatMult_SeqMAIJ_2" 214 PetscErrorCode MatMult_SeqMAIJ_2(Mat A,Vec xx,Vec yy) 215 { 216 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 217 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 218 const PetscScalar *x,*v; 219 PetscScalar *y, sum1, sum2; 220 PetscErrorCode ierr; 221 PetscInt nonzerorow=0,n,i,jrow,j; 222 const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 223 224 PetscFunctionBegin; 225 ierr = VecGetArray(xx,(PetscScalar**)&x);CHKERRQ(ierr); 226 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 227 idx = a->j; 228 v = a->a; 229 ii = a->i; 230 231 for (i=0; i<m; i++) { 232 jrow = ii[i]; 233 n = ii[i+1] - jrow; 234 sum1 = 0.0; 235 sum2 = 0.0; 236 nonzerorow += (n>0); 237 for (j=0; j<n; j++) { 238 sum1 += v[jrow]*x[2*idx[jrow]]; 239 sum2 += v[jrow]*x[2*idx[jrow]+1]; 240 jrow++; 241 } 242 y[2*i] = sum1; 243 y[2*i+1] = sum2; 244 } 245 246 ierr = PetscLogFlops(4.0*a->nz - 2.0*nonzerorow);CHKERRQ(ierr); 247 ierr = VecRestoreArray(xx,(PetscScalar**)&x);CHKERRQ(ierr); 248 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 249 PetscFunctionReturn(0); 250 } 251 252 #undef __FUNCT__ 253 #define __FUNCT__ "MatMultTranspose_SeqMAIJ_2" 254 PetscErrorCode MatMultTranspose_SeqMAIJ_2(Mat A,Vec xx,Vec yy) 255 { 256 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 257 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 258 const PetscScalar *x,*v; 259 PetscScalar *y,alpha1,alpha2; 260 PetscErrorCode ierr; 261 PetscInt n,i; 262 const PetscInt m = b->AIJ->rmap->n,*idx; 263 264 PetscFunctionBegin; 265 ierr = VecSet(yy,0.0);CHKERRQ(ierr); 266 ierr = VecGetArray(xx,(PetscScalar**)&x);CHKERRQ(ierr); 267 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 268 269 for (i=0; i<m; i++) { 270 idx = a->j + a->i[i] ; 271 v = a->a + a->i[i] ; 272 n = a->i[i+1] - a->i[i]; 273 alpha1 = x[2*i]; 274 alpha2 = x[2*i+1]; 275 while (n-->0) {y[2*(*idx)] += alpha1*(*v); y[2*(*idx)+1] += alpha2*(*v); idx++; v++;} 276 } 277 ierr = PetscLogFlops(4.0*a->nz);CHKERRQ(ierr); 278 ierr = VecRestoreArray(xx,(PetscScalar**)&x);CHKERRQ(ierr); 279 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 280 PetscFunctionReturn(0); 281 } 282 283 #undef __FUNCT__ 284 #define __FUNCT__ "MatMultAdd_SeqMAIJ_2" 285 PetscErrorCode MatMultAdd_SeqMAIJ_2(Mat A,Vec xx,Vec yy,Vec zz) 286 { 287 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 288 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 289 const PetscScalar *x,*v; 290 PetscScalar *y,sum1, sum2; 291 PetscErrorCode ierr; 292 PetscInt n,i,jrow,j; 293 const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 294 295 PetscFunctionBegin; 296 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 297 ierr = VecGetArray(xx,(PetscScalar**)&x);CHKERRQ(ierr); 298 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 299 idx = a->j; 300 v = a->a; 301 ii = a->i; 302 303 for (i=0; i<m; i++) { 304 jrow = ii[i]; 305 n = ii[i+1] - jrow; 306 sum1 = 0.0; 307 sum2 = 0.0; 308 for (j=0; j<n; j++) { 309 sum1 += v[jrow]*x[2*idx[jrow]]; 310 sum2 += v[jrow]*x[2*idx[jrow]+1]; 311 jrow++; 312 } 313 y[2*i] += sum1; 314 y[2*i+1] += sum2; 315 } 316 317 ierr = PetscLogFlops(4.0*a->nz);CHKERRQ(ierr); 318 ierr = VecRestoreArray(xx,(PetscScalar**)&x);CHKERRQ(ierr); 319 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 320 PetscFunctionReturn(0); 321 } 322 #undef __FUNCT__ 323 #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_2" 324 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_2(Mat A,Vec xx,Vec yy,Vec zz) 325 { 326 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 327 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 328 const PetscScalar *x,*v; 329 PetscScalar *y,alpha1,alpha2; 330 PetscErrorCode ierr; 331 PetscInt n,i; 332 const PetscInt m = b->AIJ->rmap->n,*idx; 333 334 PetscFunctionBegin; 335 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 336 ierr = VecGetArray(xx,(PetscScalar**)&x);CHKERRQ(ierr); 337 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 338 339 for (i=0; i<m; i++) { 340 idx = a->j + a->i[i] ; 341 v = a->a + a->i[i] ; 342 n = a->i[i+1] - a->i[i]; 343 alpha1 = x[2*i]; 344 alpha2 = x[2*i+1]; 345 while (n-->0) {y[2*(*idx)] += alpha1*(*v); y[2*(*idx)+1] += alpha2*(*v); idx++; v++;} 346 } 347 ierr = PetscLogFlops(4.0*a->nz);CHKERRQ(ierr); 348 ierr = VecRestoreArray(xx,(PetscScalar**)&x);CHKERRQ(ierr); 349 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 350 PetscFunctionReturn(0); 351 } 352 /* --------------------------------------------------------------------------------------*/ 353 #undef __FUNCT__ 354 #define __FUNCT__ "MatMult_SeqMAIJ_3" 355 PetscErrorCode MatMult_SeqMAIJ_3(Mat A,Vec xx,Vec yy) 356 { 357 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 358 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 359 const PetscScalar *x,*v; 360 PetscScalar *y,sum1, sum2, sum3; 361 PetscErrorCode ierr; 362 PetscInt nonzerorow=0,n,i,jrow,j; 363 const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 364 365 PetscFunctionBegin; 366 ierr = VecGetArray(xx,(PetscScalar**)&x);CHKERRQ(ierr); 367 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 368 idx = a->j; 369 v = a->a; 370 ii = a->i; 371 372 for (i=0; i<m; i++) { 373 jrow = ii[i]; 374 n = ii[i+1] - jrow; 375 sum1 = 0.0; 376 sum2 = 0.0; 377 sum3 = 0.0; 378 nonzerorow += (n>0); 379 for (j=0; j<n; j++) { 380 sum1 += v[jrow]*x[3*idx[jrow]]; 381 sum2 += v[jrow]*x[3*idx[jrow]+1]; 382 sum3 += v[jrow]*x[3*idx[jrow]+2]; 383 jrow++; 384 } 385 y[3*i] = sum1; 386 y[3*i+1] = sum2; 387 y[3*i+2] = sum3; 388 } 389 390 ierr = PetscLogFlops(6.0*a->nz - 3.0*nonzerorow);CHKERRQ(ierr); 391 ierr = VecRestoreArray(xx,(PetscScalar**)&x);CHKERRQ(ierr); 392 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 393 PetscFunctionReturn(0); 394 } 395 396 #undef __FUNCT__ 397 #define __FUNCT__ "MatMultTranspose_SeqMAIJ_3" 398 PetscErrorCode MatMultTranspose_SeqMAIJ_3(Mat A,Vec xx,Vec yy) 399 { 400 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 401 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 402 const PetscScalar *x,*v; 403 PetscScalar *y,alpha1,alpha2,alpha3; 404 PetscErrorCode ierr; 405 PetscInt n,i; 406 const PetscInt m = b->AIJ->rmap->n,*idx; 407 408 PetscFunctionBegin; 409 ierr = VecSet(yy,0.0);CHKERRQ(ierr); 410 ierr = VecGetArray(xx,(PetscScalar**)&x);CHKERRQ(ierr); 411 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 412 413 for (i=0; i<m; i++) { 414 idx = a->j + a->i[i]; 415 v = a->a + a->i[i]; 416 n = a->i[i+1] - a->i[i]; 417 alpha1 = x[3*i]; 418 alpha2 = x[3*i+1]; 419 alpha3 = x[3*i+2]; 420 while (n-->0) { 421 y[3*(*idx)] += alpha1*(*v); 422 y[3*(*idx)+1] += alpha2*(*v); 423 y[3*(*idx)+2] += alpha3*(*v); 424 idx++; v++; 425 } 426 } 427 ierr = PetscLogFlops(6.0*a->nz);CHKERRQ(ierr); 428 ierr = VecRestoreArray(xx,(PetscScalar**)&x);CHKERRQ(ierr); 429 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 430 PetscFunctionReturn(0); 431 } 432 433 #undef __FUNCT__ 434 #define __FUNCT__ "MatMultAdd_SeqMAIJ_3" 435 PetscErrorCode MatMultAdd_SeqMAIJ_3(Mat A,Vec xx,Vec yy,Vec zz) 436 { 437 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 438 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 439 const PetscScalar *x,*v; 440 PetscScalar *y,sum1, sum2, sum3; 441 PetscErrorCode ierr; 442 PetscInt n,i,jrow,j; 443 const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 444 445 PetscFunctionBegin; 446 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 447 ierr = VecGetArray(xx,(PetscScalar**)&x);CHKERRQ(ierr); 448 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 449 idx = a->j; 450 v = a->a; 451 ii = a->i; 452 453 for (i=0; i<m; i++) { 454 jrow = ii[i]; 455 n = ii[i+1] - jrow; 456 sum1 = 0.0; 457 sum2 = 0.0; 458 sum3 = 0.0; 459 for (j=0; j<n; j++) { 460 sum1 += v[jrow]*x[3*idx[jrow]]; 461 sum2 += v[jrow]*x[3*idx[jrow]+1]; 462 sum3 += v[jrow]*x[3*idx[jrow]+2]; 463 jrow++; 464 } 465 y[3*i] += sum1; 466 y[3*i+1] += sum2; 467 y[3*i+2] += sum3; 468 } 469 470 ierr = PetscLogFlops(6.0*a->nz);CHKERRQ(ierr); 471 ierr = VecRestoreArray(xx,(PetscScalar**)&x);CHKERRQ(ierr); 472 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 473 PetscFunctionReturn(0); 474 } 475 #undef __FUNCT__ 476 #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_3" 477 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_3(Mat A,Vec xx,Vec yy,Vec zz) 478 { 479 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 480 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 481 const PetscScalar *x,*v; 482 PetscScalar *y,alpha1,alpha2,alpha3; 483 PetscErrorCode ierr; 484 PetscInt n,i; 485 const PetscInt m = b->AIJ->rmap->n,*idx; 486 487 PetscFunctionBegin; 488 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 489 ierr = VecGetArray(xx,(PetscScalar**)&x);CHKERRQ(ierr); 490 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 491 for (i=0; i<m; i++) { 492 idx = a->j + a->i[i] ; 493 v = a->a + a->i[i] ; 494 n = a->i[i+1] - a->i[i]; 495 alpha1 = x[3*i]; 496 alpha2 = x[3*i+1]; 497 alpha3 = x[3*i+2]; 498 while (n-->0) { 499 y[3*(*idx)] += alpha1*(*v); 500 y[3*(*idx)+1] += alpha2*(*v); 501 y[3*(*idx)+2] += alpha3*(*v); 502 idx++; v++; 503 } 504 } 505 ierr = PetscLogFlops(6.0*a->nz);CHKERRQ(ierr); 506 ierr = VecRestoreArray(xx,(PetscScalar**)&x);CHKERRQ(ierr); 507 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 508 PetscFunctionReturn(0); 509 } 510 511 /* ------------------------------------------------------------------------------*/ 512 #undef __FUNCT__ 513 #define __FUNCT__ "MatMult_SeqMAIJ_4" 514 PetscErrorCode MatMult_SeqMAIJ_4(Mat A,Vec xx,Vec yy) 515 { 516 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 517 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 518 const PetscScalar *x,*v; 519 PetscScalar *y,sum1, sum2, sum3, sum4; 520 PetscErrorCode ierr; 521 PetscInt nonzerorow=0,n,i,jrow,j; 522 const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 523 524 PetscFunctionBegin; 525 ierr = VecGetArray(xx,(PetscScalar**)&x);CHKERRQ(ierr); 526 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 527 idx = a->j; 528 v = a->a; 529 ii = a->i; 530 531 for (i=0; i<m; i++) { 532 jrow = ii[i]; 533 n = ii[i+1] - jrow; 534 sum1 = 0.0; 535 sum2 = 0.0; 536 sum3 = 0.0; 537 sum4 = 0.0; 538 nonzerorow += (n>0); 539 for (j=0; j<n; j++) { 540 sum1 += v[jrow]*x[4*idx[jrow]]; 541 sum2 += v[jrow]*x[4*idx[jrow]+1]; 542 sum3 += v[jrow]*x[4*idx[jrow]+2]; 543 sum4 += v[jrow]*x[4*idx[jrow]+3]; 544 jrow++; 545 } 546 y[4*i] = sum1; 547 y[4*i+1] = sum2; 548 y[4*i+2] = sum3; 549 y[4*i+3] = sum4; 550 } 551 552 ierr = PetscLogFlops(8.0*a->nz - 4.0*nonzerorow);CHKERRQ(ierr); 553 ierr = VecRestoreArray(xx,(PetscScalar**)&x);CHKERRQ(ierr); 554 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 555 PetscFunctionReturn(0); 556 } 557 558 #undef __FUNCT__ 559 #define __FUNCT__ "MatMultTranspose_SeqMAIJ_4" 560 PetscErrorCode MatMultTranspose_SeqMAIJ_4(Mat A,Vec xx,Vec yy) 561 { 562 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 563 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 564 const PetscScalar *x,*v; 565 PetscScalar *y,alpha1,alpha2,alpha3,alpha4; 566 PetscErrorCode ierr; 567 PetscInt n,i; 568 const PetscInt m = b->AIJ->rmap->n,*idx; 569 570 PetscFunctionBegin; 571 ierr = VecSet(yy,0.0);CHKERRQ(ierr); 572 ierr = VecGetArray(xx,(PetscScalar**)&x);CHKERRQ(ierr); 573 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 574 for (i=0; i<m; i++) { 575 idx = a->j + a->i[i] ; 576 v = a->a + a->i[i] ; 577 n = a->i[i+1] - a->i[i]; 578 alpha1 = x[4*i]; 579 alpha2 = x[4*i+1]; 580 alpha3 = x[4*i+2]; 581 alpha4 = x[4*i+3]; 582 while (n-->0) { 583 y[4*(*idx)] += alpha1*(*v); 584 y[4*(*idx)+1] += alpha2*(*v); 585 y[4*(*idx)+2] += alpha3*(*v); 586 y[4*(*idx)+3] += alpha4*(*v); 587 idx++; v++; 588 } 589 } 590 ierr = PetscLogFlops(8.0*a->nz);CHKERRQ(ierr); 591 ierr = VecRestoreArray(xx,(PetscScalar**)&x);CHKERRQ(ierr); 592 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 593 PetscFunctionReturn(0); 594 } 595 596 #undef __FUNCT__ 597 #define __FUNCT__ "MatMultAdd_SeqMAIJ_4" 598 PetscErrorCode MatMultAdd_SeqMAIJ_4(Mat A,Vec xx,Vec yy,Vec zz) 599 { 600 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 601 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 602 const PetscScalar *x,*v; 603 PetscScalar *y,sum1, sum2, sum3, sum4; 604 PetscErrorCode ierr; 605 PetscInt n,i,jrow,j; 606 const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 607 608 PetscFunctionBegin; 609 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 610 ierr = VecGetArray(xx,(PetscScalar**)&x);CHKERRQ(ierr); 611 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 612 idx = a->j; 613 v = a->a; 614 ii = a->i; 615 616 for (i=0; i<m; i++) { 617 jrow = ii[i]; 618 n = ii[i+1] - jrow; 619 sum1 = 0.0; 620 sum2 = 0.0; 621 sum3 = 0.0; 622 sum4 = 0.0; 623 for (j=0; j<n; j++) { 624 sum1 += v[jrow]*x[4*idx[jrow]]; 625 sum2 += v[jrow]*x[4*idx[jrow]+1]; 626 sum3 += v[jrow]*x[4*idx[jrow]+2]; 627 sum4 += v[jrow]*x[4*idx[jrow]+3]; 628 jrow++; 629 } 630 y[4*i] += sum1; 631 y[4*i+1] += sum2; 632 y[4*i+2] += sum3; 633 y[4*i+3] += sum4; 634 } 635 636 ierr = PetscLogFlops(8.0*a->nz);CHKERRQ(ierr); 637 ierr = VecRestoreArray(xx,(PetscScalar**)&x);CHKERRQ(ierr); 638 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 639 PetscFunctionReturn(0); 640 } 641 #undef __FUNCT__ 642 #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_4" 643 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_4(Mat A,Vec xx,Vec yy,Vec zz) 644 { 645 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 646 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 647 const PetscScalar *x,*v; 648 PetscScalar *y,alpha1,alpha2,alpha3,alpha4; 649 PetscErrorCode ierr; 650 PetscInt n,i; 651 const PetscInt m = b->AIJ->rmap->n,*idx; 652 653 PetscFunctionBegin; 654 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 655 ierr = VecGetArray(xx,(PetscScalar**)&x);CHKERRQ(ierr); 656 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 657 658 for (i=0; i<m; i++) { 659 idx = a->j + a->i[i] ; 660 v = a->a + a->i[i] ; 661 n = a->i[i+1] - a->i[i]; 662 alpha1 = x[4*i]; 663 alpha2 = x[4*i+1]; 664 alpha3 = x[4*i+2]; 665 alpha4 = x[4*i+3]; 666 while (n-->0) { 667 y[4*(*idx)] += alpha1*(*v); 668 y[4*(*idx)+1] += alpha2*(*v); 669 y[4*(*idx)+2] += alpha3*(*v); 670 y[4*(*idx)+3] += alpha4*(*v); 671 idx++; v++; 672 } 673 } 674 ierr = PetscLogFlops(8.0*a->nz);CHKERRQ(ierr); 675 ierr = VecRestoreArray(xx,(PetscScalar**)&x);CHKERRQ(ierr); 676 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 677 PetscFunctionReturn(0); 678 } 679 /* ------------------------------------------------------------------------------*/ 680 681 #undef __FUNCT__ 682 #define __FUNCT__ "MatMult_SeqMAIJ_5" 683 PetscErrorCode MatMult_SeqMAIJ_5(Mat A,Vec xx,Vec yy) 684 { 685 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 686 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 687 const PetscScalar *x,*v; 688 PetscScalar *y,sum1, sum2, sum3, sum4, sum5; 689 PetscErrorCode ierr; 690 PetscInt nonzerorow=0,n,i,jrow,j; 691 const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 692 693 PetscFunctionBegin; 694 ierr = VecGetArray(xx,(PetscScalar**)&x);CHKERRQ(ierr); 695 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 696 idx = a->j; 697 v = a->a; 698 ii = a->i; 699 700 for (i=0; i<m; i++) { 701 jrow = ii[i]; 702 n = ii[i+1] - jrow; 703 sum1 = 0.0; 704 sum2 = 0.0; 705 sum3 = 0.0; 706 sum4 = 0.0; 707 sum5 = 0.0; 708 nonzerorow += (n>0); 709 for (j=0; j<n; j++) { 710 sum1 += v[jrow]*x[5*idx[jrow]]; 711 sum2 += v[jrow]*x[5*idx[jrow]+1]; 712 sum3 += v[jrow]*x[5*idx[jrow]+2]; 713 sum4 += v[jrow]*x[5*idx[jrow]+3]; 714 sum5 += v[jrow]*x[5*idx[jrow]+4]; 715 jrow++; 716 } 717 y[5*i] = sum1; 718 y[5*i+1] = sum2; 719 y[5*i+2] = sum3; 720 y[5*i+3] = sum4; 721 y[5*i+4] = sum5; 722 } 723 724 ierr = PetscLogFlops(10.0*a->nz - 5.0*nonzerorow);CHKERRQ(ierr); 725 ierr = VecRestoreArray(xx,(PetscScalar**)&x);CHKERRQ(ierr); 726 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 727 PetscFunctionReturn(0); 728 } 729 730 #undef __FUNCT__ 731 #define __FUNCT__ "MatMultTranspose_SeqMAIJ_5" 732 PetscErrorCode MatMultTranspose_SeqMAIJ_5(Mat A,Vec xx,Vec yy) 733 { 734 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 735 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 736 const PetscScalar *x,*v; 737 PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5; 738 PetscErrorCode ierr; 739 PetscInt n,i; 740 const PetscInt m = b->AIJ->rmap->n,*idx; 741 742 PetscFunctionBegin; 743 ierr = VecSet(yy,0.0);CHKERRQ(ierr); 744 ierr = VecGetArray(xx,(PetscScalar**)&x);CHKERRQ(ierr); 745 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 746 747 for (i=0; i<m; i++) { 748 idx = a->j + a->i[i] ; 749 v = a->a + a->i[i] ; 750 n = a->i[i+1] - a->i[i]; 751 alpha1 = x[5*i]; 752 alpha2 = x[5*i+1]; 753 alpha3 = x[5*i+2]; 754 alpha4 = x[5*i+3]; 755 alpha5 = x[5*i+4]; 756 while (n-->0) { 757 y[5*(*idx)] += alpha1*(*v); 758 y[5*(*idx)+1] += alpha2*(*v); 759 y[5*(*idx)+2] += alpha3*(*v); 760 y[5*(*idx)+3] += alpha4*(*v); 761 y[5*(*idx)+4] += alpha5*(*v); 762 idx++; v++; 763 } 764 } 765 ierr = PetscLogFlops(10.0*a->nz);CHKERRQ(ierr); 766 ierr = VecRestoreArray(xx,(PetscScalar**)&x);CHKERRQ(ierr); 767 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 768 PetscFunctionReturn(0); 769 } 770 771 #undef __FUNCT__ 772 #define __FUNCT__ "MatMultAdd_SeqMAIJ_5" 773 PetscErrorCode MatMultAdd_SeqMAIJ_5(Mat A,Vec xx,Vec yy,Vec zz) 774 { 775 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 776 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 777 const PetscScalar *x,*v; 778 PetscScalar *y,sum1, sum2, sum3, sum4, sum5; 779 PetscErrorCode ierr; 780 PetscInt n,i,jrow,j; 781 const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 782 783 PetscFunctionBegin; 784 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 785 ierr = VecGetArray(xx,(PetscScalar**)&x);CHKERRQ(ierr); 786 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 787 idx = a->j; 788 v = a->a; 789 ii = a->i; 790 791 for (i=0; i<m; i++) { 792 jrow = ii[i]; 793 n = ii[i+1] - jrow; 794 sum1 = 0.0; 795 sum2 = 0.0; 796 sum3 = 0.0; 797 sum4 = 0.0; 798 sum5 = 0.0; 799 for (j=0; j<n; j++) { 800 sum1 += v[jrow]*x[5*idx[jrow]]; 801 sum2 += v[jrow]*x[5*idx[jrow]+1]; 802 sum3 += v[jrow]*x[5*idx[jrow]+2]; 803 sum4 += v[jrow]*x[5*idx[jrow]+3]; 804 sum5 += v[jrow]*x[5*idx[jrow]+4]; 805 jrow++; 806 } 807 y[5*i] += sum1; 808 y[5*i+1] += sum2; 809 y[5*i+2] += sum3; 810 y[5*i+3] += sum4; 811 y[5*i+4] += sum5; 812 } 813 814 ierr = PetscLogFlops(10.0*a->nz);CHKERRQ(ierr); 815 ierr = VecRestoreArray(xx,(PetscScalar**)&x);CHKERRQ(ierr); 816 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 817 PetscFunctionReturn(0); 818 } 819 820 #undef __FUNCT__ 821 #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_5" 822 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_5(Mat A,Vec xx,Vec yy,Vec zz) 823 { 824 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 825 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 826 const PetscScalar *x,*v; 827 PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5; 828 PetscErrorCode ierr; 829 PetscInt n,i; 830 const PetscInt m = b->AIJ->rmap->n,*idx; 831 832 PetscFunctionBegin; 833 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 834 ierr = VecGetArray(xx,(PetscScalar**)&x);CHKERRQ(ierr); 835 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 836 837 for (i=0; i<m; i++) { 838 idx = a->j + a->i[i] ; 839 v = a->a + a->i[i] ; 840 n = a->i[i+1] - a->i[i]; 841 alpha1 = x[5*i]; 842 alpha2 = x[5*i+1]; 843 alpha3 = x[5*i+2]; 844 alpha4 = x[5*i+3]; 845 alpha5 = x[5*i+4]; 846 while (n-->0) { 847 y[5*(*idx)] += alpha1*(*v); 848 y[5*(*idx)+1] += alpha2*(*v); 849 y[5*(*idx)+2] += alpha3*(*v); 850 y[5*(*idx)+3] += alpha4*(*v); 851 y[5*(*idx)+4] += alpha5*(*v); 852 idx++; v++; 853 } 854 } 855 ierr = PetscLogFlops(10.0*a->nz);CHKERRQ(ierr); 856 ierr = VecRestoreArray(xx,(PetscScalar**)&x);CHKERRQ(ierr); 857 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 858 PetscFunctionReturn(0); 859 } 860 861 /* ------------------------------------------------------------------------------*/ 862 #undef __FUNCT__ 863 #define __FUNCT__ "MatMult_SeqMAIJ_6" 864 PetscErrorCode MatMult_SeqMAIJ_6(Mat A,Vec xx,Vec yy) 865 { 866 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 867 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 868 const PetscScalar *x,*v; 869 PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6; 870 PetscErrorCode ierr; 871 PetscInt nonzerorow=0,n,i,jrow,j; 872 const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 873 874 PetscFunctionBegin; 875 ierr = VecGetArray(xx,(PetscScalar**)&x);CHKERRQ(ierr); 876 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 877 idx = a->j; 878 v = a->a; 879 ii = a->i; 880 881 for (i=0; i<m; i++) { 882 jrow = ii[i]; 883 n = ii[i+1] - jrow; 884 sum1 = 0.0; 885 sum2 = 0.0; 886 sum3 = 0.0; 887 sum4 = 0.0; 888 sum5 = 0.0; 889 sum6 = 0.0; 890 nonzerorow += (n>0); 891 for (j=0; j<n; j++) { 892 sum1 += v[jrow]*x[6*idx[jrow]]; 893 sum2 += v[jrow]*x[6*idx[jrow]+1]; 894 sum3 += v[jrow]*x[6*idx[jrow]+2]; 895 sum4 += v[jrow]*x[6*idx[jrow]+3]; 896 sum5 += v[jrow]*x[6*idx[jrow]+4]; 897 sum6 += v[jrow]*x[6*idx[jrow]+5]; 898 jrow++; 899 } 900 y[6*i] = sum1; 901 y[6*i+1] = sum2; 902 y[6*i+2] = sum3; 903 y[6*i+3] = sum4; 904 y[6*i+4] = sum5; 905 y[6*i+5] = sum6; 906 } 907 908 ierr = PetscLogFlops(12.0*a->nz - 6.0*nonzerorow);CHKERRQ(ierr); 909 ierr = VecRestoreArray(xx,(PetscScalar**)&x);CHKERRQ(ierr); 910 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 911 PetscFunctionReturn(0); 912 } 913 914 #undef __FUNCT__ 915 #define __FUNCT__ "MatMultTranspose_SeqMAIJ_6" 916 PetscErrorCode MatMultTranspose_SeqMAIJ_6(Mat A,Vec xx,Vec yy) 917 { 918 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 919 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 920 const PetscScalar *x,*v; 921 PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6; 922 PetscErrorCode ierr; 923 PetscInt n,i; 924 const PetscInt m = b->AIJ->rmap->n,*idx; 925 926 PetscFunctionBegin; 927 ierr = VecSet(yy,0.0);CHKERRQ(ierr); 928 ierr = VecGetArray(xx,(PetscScalar**)&x);CHKERRQ(ierr); 929 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 930 931 for (i=0; i<m; i++) { 932 idx = a->j + a->i[i] ; 933 v = a->a + a->i[i] ; 934 n = a->i[i+1] - a->i[i]; 935 alpha1 = x[6*i]; 936 alpha2 = x[6*i+1]; 937 alpha3 = x[6*i+2]; 938 alpha4 = x[6*i+3]; 939 alpha5 = x[6*i+4]; 940 alpha6 = x[6*i+5]; 941 while (n-->0) { 942 y[6*(*idx)] += alpha1*(*v); 943 y[6*(*idx)+1] += alpha2*(*v); 944 y[6*(*idx)+2] += alpha3*(*v); 945 y[6*(*idx)+3] += alpha4*(*v); 946 y[6*(*idx)+4] += alpha5*(*v); 947 y[6*(*idx)+5] += alpha6*(*v); 948 idx++; v++; 949 } 950 } 951 ierr = PetscLogFlops(12.0*a->nz);CHKERRQ(ierr); 952 ierr = VecRestoreArray(xx,(PetscScalar**)&x);CHKERRQ(ierr); 953 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 954 PetscFunctionReturn(0); 955 } 956 957 #undef __FUNCT__ 958 #define __FUNCT__ "MatMultAdd_SeqMAIJ_6" 959 PetscErrorCode MatMultAdd_SeqMAIJ_6(Mat A,Vec xx,Vec yy,Vec zz) 960 { 961 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 962 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 963 const PetscScalar *x,*v; 964 PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6; 965 PetscErrorCode ierr; 966 PetscInt n,i,jrow,j; 967 const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 968 969 PetscFunctionBegin; 970 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 971 ierr = VecGetArray(xx,(PetscScalar**)&x);CHKERRQ(ierr); 972 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 973 idx = a->j; 974 v = a->a; 975 ii = a->i; 976 977 for (i=0; i<m; i++) { 978 jrow = ii[i]; 979 n = ii[i+1] - jrow; 980 sum1 = 0.0; 981 sum2 = 0.0; 982 sum3 = 0.0; 983 sum4 = 0.0; 984 sum5 = 0.0; 985 sum6 = 0.0; 986 for (j=0; j<n; j++) { 987 sum1 += v[jrow]*x[6*idx[jrow]]; 988 sum2 += v[jrow]*x[6*idx[jrow]+1]; 989 sum3 += v[jrow]*x[6*idx[jrow]+2]; 990 sum4 += v[jrow]*x[6*idx[jrow]+3]; 991 sum5 += v[jrow]*x[6*idx[jrow]+4]; 992 sum6 += v[jrow]*x[6*idx[jrow]+5]; 993 jrow++; 994 } 995 y[6*i] += sum1; 996 y[6*i+1] += sum2; 997 y[6*i+2] += sum3; 998 y[6*i+3] += sum4; 999 y[6*i+4] += sum5; 1000 y[6*i+5] += sum6; 1001 } 1002 1003 ierr = PetscLogFlops(12.0*a->nz);CHKERRQ(ierr); 1004 ierr = VecRestoreArray(xx,(PetscScalar**)&x);CHKERRQ(ierr); 1005 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 1006 PetscFunctionReturn(0); 1007 } 1008 1009 #undef __FUNCT__ 1010 #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_6" 1011 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_6(Mat A,Vec xx,Vec yy,Vec zz) 1012 { 1013 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1014 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1015 const PetscScalar *x,*v; 1016 PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6; 1017 PetscErrorCode ierr; 1018 PetscInt n,i; 1019 const PetscInt m = b->AIJ->rmap->n,*idx; 1020 1021 PetscFunctionBegin; 1022 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 1023 ierr = VecGetArray(xx,(PetscScalar**)&x);CHKERRQ(ierr); 1024 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 1025 1026 for (i=0; i<m; i++) { 1027 idx = a->j + a->i[i] ; 1028 v = a->a + a->i[i] ; 1029 n = a->i[i+1] - a->i[i]; 1030 alpha1 = x[6*i]; 1031 alpha2 = x[6*i+1]; 1032 alpha3 = x[6*i+2]; 1033 alpha4 = x[6*i+3]; 1034 alpha5 = x[6*i+4]; 1035 alpha6 = x[6*i+5]; 1036 while (n-->0) { 1037 y[6*(*idx)] += alpha1*(*v); 1038 y[6*(*idx)+1] += alpha2*(*v); 1039 y[6*(*idx)+2] += alpha3*(*v); 1040 y[6*(*idx)+3] += alpha4*(*v); 1041 y[6*(*idx)+4] += alpha5*(*v); 1042 y[6*(*idx)+5] += alpha6*(*v); 1043 idx++; v++; 1044 } 1045 } 1046 ierr = PetscLogFlops(12.0*a->nz);CHKERRQ(ierr); 1047 ierr = VecRestoreArray(xx,(PetscScalar**)&x);CHKERRQ(ierr); 1048 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 1049 PetscFunctionReturn(0); 1050 } 1051 1052 /* ------------------------------------------------------------------------------*/ 1053 #undef __FUNCT__ 1054 #define __FUNCT__ "MatMult_SeqMAIJ_7" 1055 PetscErrorCode MatMult_SeqMAIJ_7(Mat A,Vec xx,Vec yy) 1056 { 1057 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1058 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1059 const PetscScalar *x,*v; 1060 PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7; 1061 PetscErrorCode ierr; 1062 const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 1063 PetscInt nonzerorow=0,n,i,jrow,j; 1064 1065 PetscFunctionBegin; 1066 ierr = VecGetArray(xx,(PetscScalar**)&x);CHKERRQ(ierr); 1067 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 1068 idx = a->j; 1069 v = a->a; 1070 ii = a->i; 1071 1072 for (i=0; i<m; i++) { 1073 jrow = ii[i]; 1074 n = ii[i+1] - jrow; 1075 sum1 = 0.0; 1076 sum2 = 0.0; 1077 sum3 = 0.0; 1078 sum4 = 0.0; 1079 sum5 = 0.0; 1080 sum6 = 0.0; 1081 sum7 = 0.0; 1082 nonzerorow += (n>0); 1083 for (j=0; j<n; j++) { 1084 sum1 += v[jrow]*x[7*idx[jrow]]; 1085 sum2 += v[jrow]*x[7*idx[jrow]+1]; 1086 sum3 += v[jrow]*x[7*idx[jrow]+2]; 1087 sum4 += v[jrow]*x[7*idx[jrow]+3]; 1088 sum5 += v[jrow]*x[7*idx[jrow]+4]; 1089 sum6 += v[jrow]*x[7*idx[jrow]+5]; 1090 sum7 += v[jrow]*x[7*idx[jrow]+6]; 1091 jrow++; 1092 } 1093 y[7*i] = sum1; 1094 y[7*i+1] = sum2; 1095 y[7*i+2] = sum3; 1096 y[7*i+3] = sum4; 1097 y[7*i+4] = sum5; 1098 y[7*i+5] = sum6; 1099 y[7*i+6] = sum7; 1100 } 1101 1102 ierr = PetscLogFlops(14.0*a->nz - 7.0*nonzerorow);CHKERRQ(ierr); 1103 ierr = VecRestoreArray(xx,(PetscScalar**)&x);CHKERRQ(ierr); 1104 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 1105 PetscFunctionReturn(0); 1106 } 1107 1108 #undef __FUNCT__ 1109 #define __FUNCT__ "MatMultTranspose_SeqMAIJ_7" 1110 PetscErrorCode MatMultTranspose_SeqMAIJ_7(Mat A,Vec xx,Vec yy) 1111 { 1112 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1113 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1114 const PetscScalar *x,*v; 1115 PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7; 1116 PetscErrorCode ierr; 1117 const PetscInt m = b->AIJ->rmap->n,*idx; 1118 PetscInt n,i; 1119 1120 PetscFunctionBegin; 1121 ierr = VecSet(yy,0.0);CHKERRQ(ierr); 1122 ierr = VecGetArray(xx,(PetscScalar**)&x);CHKERRQ(ierr); 1123 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 1124 1125 for (i=0; i<m; i++) { 1126 idx = a->j + a->i[i] ; 1127 v = a->a + a->i[i] ; 1128 n = a->i[i+1] - a->i[i]; 1129 alpha1 = x[7*i]; 1130 alpha2 = x[7*i+1]; 1131 alpha3 = x[7*i+2]; 1132 alpha4 = x[7*i+3]; 1133 alpha5 = x[7*i+4]; 1134 alpha6 = x[7*i+5]; 1135 alpha7 = x[7*i+6]; 1136 while (n-->0) { 1137 y[7*(*idx)] += alpha1*(*v); 1138 y[7*(*idx)+1] += alpha2*(*v); 1139 y[7*(*idx)+2] += alpha3*(*v); 1140 y[7*(*idx)+3] += alpha4*(*v); 1141 y[7*(*idx)+4] += alpha5*(*v); 1142 y[7*(*idx)+5] += alpha6*(*v); 1143 y[7*(*idx)+6] += alpha7*(*v); 1144 idx++; v++; 1145 } 1146 } 1147 ierr = PetscLogFlops(14.0*a->nz);CHKERRQ(ierr); 1148 ierr = VecRestoreArray(xx,(PetscScalar**)&x);CHKERRQ(ierr); 1149 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 1150 PetscFunctionReturn(0); 1151 } 1152 1153 #undef __FUNCT__ 1154 #define __FUNCT__ "MatMultAdd_SeqMAIJ_7" 1155 PetscErrorCode MatMultAdd_SeqMAIJ_7(Mat A,Vec xx,Vec yy,Vec zz) 1156 { 1157 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1158 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1159 const PetscScalar *x,*v; 1160 PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7; 1161 PetscErrorCode ierr; 1162 const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 1163 PetscInt n,i,jrow,j; 1164 1165 PetscFunctionBegin; 1166 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 1167 ierr = VecGetArray(xx,(PetscScalar**)&x);CHKERRQ(ierr); 1168 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 1169 idx = a->j; 1170 v = a->a; 1171 ii = a->i; 1172 1173 for (i=0; i<m; i++) { 1174 jrow = ii[i]; 1175 n = ii[i+1] - jrow; 1176 sum1 = 0.0; 1177 sum2 = 0.0; 1178 sum3 = 0.0; 1179 sum4 = 0.0; 1180 sum5 = 0.0; 1181 sum6 = 0.0; 1182 sum7 = 0.0; 1183 for (j=0; j<n; j++) { 1184 sum1 += v[jrow]*x[7*idx[jrow]]; 1185 sum2 += v[jrow]*x[7*idx[jrow]+1]; 1186 sum3 += v[jrow]*x[7*idx[jrow]+2]; 1187 sum4 += v[jrow]*x[7*idx[jrow]+3]; 1188 sum5 += v[jrow]*x[7*idx[jrow]+4]; 1189 sum6 += v[jrow]*x[7*idx[jrow]+5]; 1190 sum7 += v[jrow]*x[7*idx[jrow]+6]; 1191 jrow++; 1192 } 1193 y[7*i] += sum1; 1194 y[7*i+1] += sum2; 1195 y[7*i+2] += sum3; 1196 y[7*i+3] += sum4; 1197 y[7*i+4] += sum5; 1198 y[7*i+5] += sum6; 1199 y[7*i+6] += sum7; 1200 } 1201 1202 ierr = PetscLogFlops(14.0*a->nz);CHKERRQ(ierr); 1203 ierr = VecRestoreArray(xx,(PetscScalar**)&x);CHKERRQ(ierr); 1204 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 1205 PetscFunctionReturn(0); 1206 } 1207 1208 #undef __FUNCT__ 1209 #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_7" 1210 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_7(Mat A,Vec xx,Vec yy,Vec zz) 1211 { 1212 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1213 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1214 const PetscScalar *x,*v; 1215 PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7; 1216 PetscErrorCode ierr; 1217 const PetscInt m = b->AIJ->rmap->n,*idx; 1218 PetscInt n,i; 1219 1220 PetscFunctionBegin; 1221 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 1222 ierr = VecGetArray(xx,(PetscScalar**)&x);CHKERRQ(ierr); 1223 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 1224 for (i=0; i<m; i++) { 1225 idx = a->j + a->i[i] ; 1226 v = a->a + a->i[i] ; 1227 n = a->i[i+1] - a->i[i]; 1228 alpha1 = x[7*i]; 1229 alpha2 = x[7*i+1]; 1230 alpha3 = x[7*i+2]; 1231 alpha4 = x[7*i+3]; 1232 alpha5 = x[7*i+4]; 1233 alpha6 = x[7*i+5]; 1234 alpha7 = x[7*i+6]; 1235 while (n-->0) { 1236 y[7*(*idx)] += alpha1*(*v); 1237 y[7*(*idx)+1] += alpha2*(*v); 1238 y[7*(*idx)+2] += alpha3*(*v); 1239 y[7*(*idx)+3] += alpha4*(*v); 1240 y[7*(*idx)+4] += alpha5*(*v); 1241 y[7*(*idx)+5] += alpha6*(*v); 1242 y[7*(*idx)+6] += alpha7*(*v); 1243 idx++; v++; 1244 } 1245 } 1246 ierr = PetscLogFlops(14.0*a->nz);CHKERRQ(ierr); 1247 ierr = VecRestoreArray(xx,(PetscScalar**)&x);CHKERRQ(ierr); 1248 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 1249 PetscFunctionReturn(0); 1250 } 1251 1252 #undef __FUNCT__ 1253 #define __FUNCT__ "MatMult_SeqMAIJ_8" 1254 PetscErrorCode MatMult_SeqMAIJ_8(Mat A,Vec xx,Vec yy) 1255 { 1256 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1257 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1258 const PetscScalar *x,*v; 1259 PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8; 1260 PetscErrorCode ierr; 1261 const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 1262 PetscInt nonzerorow=0,n,i,jrow,j; 1263 1264 PetscFunctionBegin; 1265 ierr = VecGetArray(xx,(PetscScalar**)&x);CHKERRQ(ierr); 1266 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 1267 idx = a->j; 1268 v = a->a; 1269 ii = a->i; 1270 1271 for (i=0; i<m; i++) { 1272 jrow = ii[i]; 1273 n = ii[i+1] - jrow; 1274 sum1 = 0.0; 1275 sum2 = 0.0; 1276 sum3 = 0.0; 1277 sum4 = 0.0; 1278 sum5 = 0.0; 1279 sum6 = 0.0; 1280 sum7 = 0.0; 1281 sum8 = 0.0; 1282 nonzerorow += (n>0); 1283 for (j=0; j<n; j++) { 1284 sum1 += v[jrow]*x[8*idx[jrow]]; 1285 sum2 += v[jrow]*x[8*idx[jrow]+1]; 1286 sum3 += v[jrow]*x[8*idx[jrow]+2]; 1287 sum4 += v[jrow]*x[8*idx[jrow]+3]; 1288 sum5 += v[jrow]*x[8*idx[jrow]+4]; 1289 sum6 += v[jrow]*x[8*idx[jrow]+5]; 1290 sum7 += v[jrow]*x[8*idx[jrow]+6]; 1291 sum8 += v[jrow]*x[8*idx[jrow]+7]; 1292 jrow++; 1293 } 1294 y[8*i] = sum1; 1295 y[8*i+1] = sum2; 1296 y[8*i+2] = sum3; 1297 y[8*i+3] = sum4; 1298 y[8*i+4] = sum5; 1299 y[8*i+5] = sum6; 1300 y[8*i+6] = sum7; 1301 y[8*i+7] = sum8; 1302 } 1303 1304 ierr = PetscLogFlops(16.0*a->nz - 8.0*nonzerorow);CHKERRQ(ierr); 1305 ierr = VecRestoreArray(xx,(PetscScalar**)&x);CHKERRQ(ierr); 1306 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 1307 PetscFunctionReturn(0); 1308 } 1309 1310 #undef __FUNCT__ 1311 #define __FUNCT__ "MatMultTranspose_SeqMAIJ_8" 1312 PetscErrorCode MatMultTranspose_SeqMAIJ_8(Mat A,Vec xx,Vec yy) 1313 { 1314 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1315 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1316 const PetscScalar *x,*v; 1317 PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8; 1318 PetscErrorCode ierr; 1319 const PetscInt m = b->AIJ->rmap->n,*idx; 1320 PetscInt n,i; 1321 1322 PetscFunctionBegin; 1323 ierr = VecSet(yy,0.0);CHKERRQ(ierr); 1324 ierr = VecGetArray(xx,(PetscScalar**)&x);CHKERRQ(ierr); 1325 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 1326 1327 for (i=0; i<m; i++) { 1328 idx = a->j + a->i[i] ; 1329 v = a->a + a->i[i] ; 1330 n = a->i[i+1] - a->i[i]; 1331 alpha1 = x[8*i]; 1332 alpha2 = x[8*i+1]; 1333 alpha3 = x[8*i+2]; 1334 alpha4 = x[8*i+3]; 1335 alpha5 = x[8*i+4]; 1336 alpha6 = x[8*i+5]; 1337 alpha7 = x[8*i+6]; 1338 alpha8 = x[8*i+7]; 1339 while (n-->0) { 1340 y[8*(*idx)] += alpha1*(*v); 1341 y[8*(*idx)+1] += alpha2*(*v); 1342 y[8*(*idx)+2] += alpha3*(*v); 1343 y[8*(*idx)+3] += alpha4*(*v); 1344 y[8*(*idx)+4] += alpha5*(*v); 1345 y[8*(*idx)+5] += alpha6*(*v); 1346 y[8*(*idx)+6] += alpha7*(*v); 1347 y[8*(*idx)+7] += alpha8*(*v); 1348 idx++; v++; 1349 } 1350 } 1351 ierr = PetscLogFlops(16.0*a->nz);CHKERRQ(ierr); 1352 ierr = VecRestoreArray(xx,(PetscScalar**)&x);CHKERRQ(ierr); 1353 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 1354 PetscFunctionReturn(0); 1355 } 1356 1357 #undef __FUNCT__ 1358 #define __FUNCT__ "MatMultAdd_SeqMAIJ_8" 1359 PetscErrorCode MatMultAdd_SeqMAIJ_8(Mat A,Vec xx,Vec yy,Vec zz) 1360 { 1361 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1362 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1363 const PetscScalar *x,*v; 1364 PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8; 1365 PetscErrorCode ierr; 1366 const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 1367 PetscInt n,i,jrow,j; 1368 1369 PetscFunctionBegin; 1370 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 1371 ierr = VecGetArray(xx,(PetscScalar**)&x);CHKERRQ(ierr); 1372 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 1373 idx = a->j; 1374 v = a->a; 1375 ii = a->i; 1376 1377 for (i=0; i<m; i++) { 1378 jrow = ii[i]; 1379 n = ii[i+1] - jrow; 1380 sum1 = 0.0; 1381 sum2 = 0.0; 1382 sum3 = 0.0; 1383 sum4 = 0.0; 1384 sum5 = 0.0; 1385 sum6 = 0.0; 1386 sum7 = 0.0; 1387 sum8 = 0.0; 1388 for (j=0; j<n; j++) { 1389 sum1 += v[jrow]*x[8*idx[jrow]]; 1390 sum2 += v[jrow]*x[8*idx[jrow]+1]; 1391 sum3 += v[jrow]*x[8*idx[jrow]+2]; 1392 sum4 += v[jrow]*x[8*idx[jrow]+3]; 1393 sum5 += v[jrow]*x[8*idx[jrow]+4]; 1394 sum6 += v[jrow]*x[8*idx[jrow]+5]; 1395 sum7 += v[jrow]*x[8*idx[jrow]+6]; 1396 sum8 += v[jrow]*x[8*idx[jrow]+7]; 1397 jrow++; 1398 } 1399 y[8*i] += sum1; 1400 y[8*i+1] += sum2; 1401 y[8*i+2] += sum3; 1402 y[8*i+3] += sum4; 1403 y[8*i+4] += sum5; 1404 y[8*i+5] += sum6; 1405 y[8*i+6] += sum7; 1406 y[8*i+7] += sum8; 1407 } 1408 1409 ierr = PetscLogFlops(16.0*a->nz);CHKERRQ(ierr); 1410 ierr = VecRestoreArray(xx,(PetscScalar**)&x);CHKERRQ(ierr); 1411 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 1412 PetscFunctionReturn(0); 1413 } 1414 1415 #undef __FUNCT__ 1416 #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_8" 1417 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_8(Mat A,Vec xx,Vec yy,Vec zz) 1418 { 1419 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1420 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1421 const PetscScalar *x,*v; 1422 PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8; 1423 PetscErrorCode ierr; 1424 const PetscInt m = b->AIJ->rmap->n,*idx; 1425 PetscInt n,i; 1426 1427 PetscFunctionBegin; 1428 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 1429 ierr = VecGetArray(xx,(PetscScalar**)&x);CHKERRQ(ierr); 1430 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 1431 for (i=0; i<m; i++) { 1432 idx = a->j + a->i[i] ; 1433 v = a->a + a->i[i] ; 1434 n = a->i[i+1] - a->i[i]; 1435 alpha1 = x[8*i]; 1436 alpha2 = x[8*i+1]; 1437 alpha3 = x[8*i+2]; 1438 alpha4 = x[8*i+3]; 1439 alpha5 = x[8*i+4]; 1440 alpha6 = x[8*i+5]; 1441 alpha7 = x[8*i+6]; 1442 alpha8 = x[8*i+7]; 1443 while (n-->0) { 1444 y[8*(*idx)] += alpha1*(*v); 1445 y[8*(*idx)+1] += alpha2*(*v); 1446 y[8*(*idx)+2] += alpha3*(*v); 1447 y[8*(*idx)+3] += alpha4*(*v); 1448 y[8*(*idx)+4] += alpha5*(*v); 1449 y[8*(*idx)+5] += alpha6*(*v); 1450 y[8*(*idx)+6] += alpha7*(*v); 1451 y[8*(*idx)+7] += alpha8*(*v); 1452 idx++; v++; 1453 } 1454 } 1455 ierr = PetscLogFlops(16.0*a->nz);CHKERRQ(ierr); 1456 ierr = VecRestoreArray(xx,(PetscScalar**)&x);CHKERRQ(ierr); 1457 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 1458 PetscFunctionReturn(0); 1459 } 1460 1461 /* ------------------------------------------------------------------------------*/ 1462 #undef __FUNCT__ 1463 #define __FUNCT__ "MatMult_SeqMAIJ_9" 1464 PetscErrorCode MatMult_SeqMAIJ_9(Mat A,Vec xx,Vec yy) 1465 { 1466 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1467 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1468 const PetscScalar *x,*v; 1469 PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9; 1470 PetscErrorCode ierr; 1471 const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 1472 PetscInt nonzerorow=0,n,i,jrow,j; 1473 1474 PetscFunctionBegin; 1475 ierr = VecGetArray(xx,(PetscScalar**)&x);CHKERRQ(ierr); 1476 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 1477 idx = a->j; 1478 v = a->a; 1479 ii = a->i; 1480 1481 for (i=0; i<m; i++) { 1482 jrow = ii[i]; 1483 n = ii[i+1] - jrow; 1484 sum1 = 0.0; 1485 sum2 = 0.0; 1486 sum3 = 0.0; 1487 sum4 = 0.0; 1488 sum5 = 0.0; 1489 sum6 = 0.0; 1490 sum7 = 0.0; 1491 sum8 = 0.0; 1492 sum9 = 0.0; 1493 nonzerorow += (n>0); 1494 for (j=0; j<n; j++) { 1495 sum1 += v[jrow]*x[9*idx[jrow]]; 1496 sum2 += v[jrow]*x[9*idx[jrow]+1]; 1497 sum3 += v[jrow]*x[9*idx[jrow]+2]; 1498 sum4 += v[jrow]*x[9*idx[jrow]+3]; 1499 sum5 += v[jrow]*x[9*idx[jrow]+4]; 1500 sum6 += v[jrow]*x[9*idx[jrow]+5]; 1501 sum7 += v[jrow]*x[9*idx[jrow]+6]; 1502 sum8 += v[jrow]*x[9*idx[jrow]+7]; 1503 sum9 += v[jrow]*x[9*idx[jrow]+8]; 1504 jrow++; 1505 } 1506 y[9*i] = sum1; 1507 y[9*i+1] = sum2; 1508 y[9*i+2] = sum3; 1509 y[9*i+3] = sum4; 1510 y[9*i+4] = sum5; 1511 y[9*i+5] = sum6; 1512 y[9*i+6] = sum7; 1513 y[9*i+7] = sum8; 1514 y[9*i+8] = sum9; 1515 } 1516 1517 ierr = PetscLogFlops(18.0*a->nz - 9*nonzerorow);CHKERRQ(ierr); 1518 ierr = VecRestoreArray(xx,(PetscScalar**)&x);CHKERRQ(ierr); 1519 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 1520 PetscFunctionReturn(0); 1521 } 1522 1523 /* ------------------------------------------------------------------------------*/ 1524 1525 #undef __FUNCT__ 1526 #define __FUNCT__ "MatMultTranspose_SeqMAIJ_9" 1527 PetscErrorCode MatMultTranspose_SeqMAIJ_9(Mat A,Vec xx,Vec yy) 1528 { 1529 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1530 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1531 const PetscScalar *x,*v; 1532 PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9; 1533 PetscErrorCode ierr; 1534 const PetscInt m = b->AIJ->rmap->n,*idx; 1535 PetscInt n,i; 1536 1537 PetscFunctionBegin; 1538 ierr = VecSet(yy,0.0);CHKERRQ(ierr); 1539 ierr = VecGetArray(xx,(PetscScalar**)&x);CHKERRQ(ierr); 1540 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 1541 1542 for (i=0; i<m; i++) { 1543 idx = a->j + a->i[i] ; 1544 v = a->a + a->i[i] ; 1545 n = a->i[i+1] - a->i[i]; 1546 alpha1 = x[9*i]; 1547 alpha2 = x[9*i+1]; 1548 alpha3 = x[9*i+2]; 1549 alpha4 = x[9*i+3]; 1550 alpha5 = x[9*i+4]; 1551 alpha6 = x[9*i+5]; 1552 alpha7 = x[9*i+6]; 1553 alpha8 = x[9*i+7]; 1554 alpha9 = x[9*i+8]; 1555 while (n-->0) { 1556 y[9*(*idx)] += alpha1*(*v); 1557 y[9*(*idx)+1] += alpha2*(*v); 1558 y[9*(*idx)+2] += alpha3*(*v); 1559 y[9*(*idx)+3] += alpha4*(*v); 1560 y[9*(*idx)+4] += alpha5*(*v); 1561 y[9*(*idx)+5] += alpha6*(*v); 1562 y[9*(*idx)+6] += alpha7*(*v); 1563 y[9*(*idx)+7] += alpha8*(*v); 1564 y[9*(*idx)+8] += alpha9*(*v); 1565 idx++; v++; 1566 } 1567 } 1568 ierr = PetscLogFlops(18.0*a->nz);CHKERRQ(ierr); 1569 ierr = VecRestoreArray(xx,(PetscScalar**)&x);CHKERRQ(ierr); 1570 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 1571 PetscFunctionReturn(0); 1572 } 1573 1574 #undef __FUNCT__ 1575 #define __FUNCT__ "MatMultAdd_SeqMAIJ_9" 1576 PetscErrorCode MatMultAdd_SeqMAIJ_9(Mat A,Vec xx,Vec yy,Vec zz) 1577 { 1578 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1579 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1580 const PetscScalar *x,*v; 1581 PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9; 1582 PetscErrorCode ierr; 1583 const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 1584 PetscInt n,i,jrow,j; 1585 1586 PetscFunctionBegin; 1587 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 1588 ierr = VecGetArray(xx,(PetscScalar**)&x);CHKERRQ(ierr); 1589 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 1590 idx = a->j; 1591 v = a->a; 1592 ii = a->i; 1593 1594 for (i=0; i<m; i++) { 1595 jrow = ii[i]; 1596 n = ii[i+1] - jrow; 1597 sum1 = 0.0; 1598 sum2 = 0.0; 1599 sum3 = 0.0; 1600 sum4 = 0.0; 1601 sum5 = 0.0; 1602 sum6 = 0.0; 1603 sum7 = 0.0; 1604 sum8 = 0.0; 1605 sum9 = 0.0; 1606 for (j=0; j<n; j++) { 1607 sum1 += v[jrow]*x[9*idx[jrow]]; 1608 sum2 += v[jrow]*x[9*idx[jrow]+1]; 1609 sum3 += v[jrow]*x[9*idx[jrow]+2]; 1610 sum4 += v[jrow]*x[9*idx[jrow]+3]; 1611 sum5 += v[jrow]*x[9*idx[jrow]+4]; 1612 sum6 += v[jrow]*x[9*idx[jrow]+5]; 1613 sum7 += v[jrow]*x[9*idx[jrow]+6]; 1614 sum8 += v[jrow]*x[9*idx[jrow]+7]; 1615 sum9 += v[jrow]*x[9*idx[jrow]+8]; 1616 jrow++; 1617 } 1618 y[9*i] += sum1; 1619 y[9*i+1] += sum2; 1620 y[9*i+2] += sum3; 1621 y[9*i+3] += sum4; 1622 y[9*i+4] += sum5; 1623 y[9*i+5] += sum6; 1624 y[9*i+6] += sum7; 1625 y[9*i+7] += sum8; 1626 y[9*i+8] += sum9; 1627 } 1628 1629 ierr = PetscLogFlops(18*a->nz);CHKERRQ(ierr); 1630 ierr = VecRestoreArray(xx,(PetscScalar**)&x);CHKERRQ(ierr); 1631 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 1632 PetscFunctionReturn(0); 1633 } 1634 1635 #undef __FUNCT__ 1636 #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_9" 1637 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_9(Mat A,Vec xx,Vec yy,Vec zz) 1638 { 1639 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1640 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1641 const PetscScalar *x,*v; 1642 PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9; 1643 PetscErrorCode ierr; 1644 const PetscInt m = b->AIJ->rmap->n,*idx; 1645 PetscInt n,i; 1646 1647 PetscFunctionBegin; 1648 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 1649 ierr = VecGetArray(xx,(PetscScalar**)&x);CHKERRQ(ierr); 1650 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 1651 for (i=0; i<m; i++) { 1652 idx = a->j + a->i[i] ; 1653 v = a->a + a->i[i] ; 1654 n = a->i[i+1] - a->i[i]; 1655 alpha1 = x[9*i]; 1656 alpha2 = x[9*i+1]; 1657 alpha3 = x[9*i+2]; 1658 alpha4 = x[9*i+3]; 1659 alpha5 = x[9*i+4]; 1660 alpha6 = x[9*i+5]; 1661 alpha7 = x[9*i+6]; 1662 alpha8 = x[9*i+7]; 1663 alpha9 = x[9*i+8]; 1664 while (n-->0) { 1665 y[9*(*idx)] += alpha1*(*v); 1666 y[9*(*idx)+1] += alpha2*(*v); 1667 y[9*(*idx)+2] += alpha3*(*v); 1668 y[9*(*idx)+3] += alpha4*(*v); 1669 y[9*(*idx)+4] += alpha5*(*v); 1670 y[9*(*idx)+5] += alpha6*(*v); 1671 y[9*(*idx)+6] += alpha7*(*v); 1672 y[9*(*idx)+7] += alpha8*(*v); 1673 y[9*(*idx)+8] += alpha9*(*v); 1674 idx++; v++; 1675 } 1676 } 1677 ierr = PetscLogFlops(18.0*a->nz);CHKERRQ(ierr); 1678 ierr = VecRestoreArray(xx,(PetscScalar**)&x);CHKERRQ(ierr); 1679 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 1680 PetscFunctionReturn(0); 1681 } 1682 #undef __FUNCT__ 1683 #define __FUNCT__ "MatMult_SeqMAIJ_10" 1684 PetscErrorCode MatMult_SeqMAIJ_10(Mat A,Vec xx,Vec yy) 1685 { 1686 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1687 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1688 const PetscScalar *x,*v; 1689 PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9, sum10; 1690 PetscErrorCode ierr; 1691 const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 1692 PetscInt nonzerorow=0,n,i,jrow,j; 1693 1694 PetscFunctionBegin; 1695 ierr = VecGetArray(xx,(PetscScalar**)&x);CHKERRQ(ierr); 1696 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 1697 idx = a->j; 1698 v = a->a; 1699 ii = a->i; 1700 1701 for (i=0; i<m; i++) { 1702 jrow = ii[i]; 1703 n = ii[i+1] - jrow; 1704 sum1 = 0.0; 1705 sum2 = 0.0; 1706 sum3 = 0.0; 1707 sum4 = 0.0; 1708 sum5 = 0.0; 1709 sum6 = 0.0; 1710 sum7 = 0.0; 1711 sum8 = 0.0; 1712 sum9 = 0.0; 1713 sum10 = 0.0; 1714 nonzerorow += (n>0); 1715 for (j=0; j<n; j++) { 1716 sum1 += v[jrow]*x[10*idx[jrow]]; 1717 sum2 += v[jrow]*x[10*idx[jrow]+1]; 1718 sum3 += v[jrow]*x[10*idx[jrow]+2]; 1719 sum4 += v[jrow]*x[10*idx[jrow]+3]; 1720 sum5 += v[jrow]*x[10*idx[jrow]+4]; 1721 sum6 += v[jrow]*x[10*idx[jrow]+5]; 1722 sum7 += v[jrow]*x[10*idx[jrow]+6]; 1723 sum8 += v[jrow]*x[10*idx[jrow]+7]; 1724 sum9 += v[jrow]*x[10*idx[jrow]+8]; 1725 sum10 += v[jrow]*x[10*idx[jrow]+9]; 1726 jrow++; 1727 } 1728 y[10*i] = sum1; 1729 y[10*i+1] = sum2; 1730 y[10*i+2] = sum3; 1731 y[10*i+3] = sum4; 1732 y[10*i+4] = sum5; 1733 y[10*i+5] = sum6; 1734 y[10*i+6] = sum7; 1735 y[10*i+7] = sum8; 1736 y[10*i+8] = sum9; 1737 y[10*i+9] = sum10; 1738 } 1739 1740 ierr = PetscLogFlops(20.0*a->nz - 10.0*nonzerorow);CHKERRQ(ierr); 1741 ierr = VecRestoreArray(xx,(PetscScalar**)&x);CHKERRQ(ierr); 1742 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 1743 PetscFunctionReturn(0); 1744 } 1745 1746 #undef __FUNCT__ 1747 #define __FUNCT__ "MatMultAdd_SeqMAIJ_10" 1748 PetscErrorCode MatMultAdd_SeqMAIJ_10(Mat A,Vec xx,Vec yy,Vec zz) 1749 { 1750 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1751 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1752 const PetscScalar *x,*v; 1753 PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9, sum10; 1754 PetscErrorCode ierr; 1755 const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 1756 PetscInt n,i,jrow,j; 1757 1758 PetscFunctionBegin; 1759 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 1760 ierr = VecGetArray(xx,(PetscScalar**)&x);CHKERRQ(ierr); 1761 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 1762 idx = a->j; 1763 v = a->a; 1764 ii = a->i; 1765 1766 for (i=0; i<m; i++) { 1767 jrow = ii[i]; 1768 n = ii[i+1] - jrow; 1769 sum1 = 0.0; 1770 sum2 = 0.0; 1771 sum3 = 0.0; 1772 sum4 = 0.0; 1773 sum5 = 0.0; 1774 sum6 = 0.0; 1775 sum7 = 0.0; 1776 sum8 = 0.0; 1777 sum9 = 0.0; 1778 sum10 = 0.0; 1779 for (j=0; j<n; j++) { 1780 sum1 += v[jrow]*x[10*idx[jrow]]; 1781 sum2 += v[jrow]*x[10*idx[jrow]+1]; 1782 sum3 += v[jrow]*x[10*idx[jrow]+2]; 1783 sum4 += v[jrow]*x[10*idx[jrow]+3]; 1784 sum5 += v[jrow]*x[10*idx[jrow]+4]; 1785 sum6 += v[jrow]*x[10*idx[jrow]+5]; 1786 sum7 += v[jrow]*x[10*idx[jrow]+6]; 1787 sum8 += v[jrow]*x[10*idx[jrow]+7]; 1788 sum9 += v[jrow]*x[10*idx[jrow]+8]; 1789 sum10 += v[jrow]*x[10*idx[jrow]+9]; 1790 jrow++; 1791 } 1792 y[10*i] += sum1; 1793 y[10*i+1] += sum2; 1794 y[10*i+2] += sum3; 1795 y[10*i+3] += sum4; 1796 y[10*i+4] += sum5; 1797 y[10*i+5] += sum6; 1798 y[10*i+6] += sum7; 1799 y[10*i+7] += sum8; 1800 y[10*i+8] += sum9; 1801 y[10*i+9] += sum10; 1802 } 1803 1804 ierr = PetscLogFlops(20.0*a->nz);CHKERRQ(ierr); 1805 ierr = VecRestoreArray(xx,(PetscScalar**)&x);CHKERRQ(ierr); 1806 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 1807 PetscFunctionReturn(0); 1808 } 1809 1810 #undef __FUNCT__ 1811 #define __FUNCT__ "MatMultTranspose_SeqMAIJ_10" 1812 PetscErrorCode MatMultTranspose_SeqMAIJ_10(Mat A,Vec xx,Vec yy) 1813 { 1814 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1815 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1816 const PetscScalar *x,*v; 1817 PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9,alpha10; 1818 PetscErrorCode ierr; 1819 const PetscInt m = b->AIJ->rmap->n,*idx; 1820 PetscInt n,i; 1821 1822 PetscFunctionBegin; 1823 ierr = VecSet(yy,0.0);CHKERRQ(ierr); 1824 ierr = VecGetArray(xx,(PetscScalar**)&x);CHKERRQ(ierr); 1825 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 1826 1827 for (i=0; i<m; i++) { 1828 idx = a->j + a->i[i] ; 1829 v = a->a + a->i[i] ; 1830 n = a->i[i+1] - a->i[i]; 1831 alpha1 = x[10*i]; 1832 alpha2 = x[10*i+1]; 1833 alpha3 = x[10*i+2]; 1834 alpha4 = x[10*i+3]; 1835 alpha5 = x[10*i+4]; 1836 alpha6 = x[10*i+5]; 1837 alpha7 = x[10*i+6]; 1838 alpha8 = x[10*i+7]; 1839 alpha9 = x[10*i+8]; 1840 alpha10 = x[10*i+9]; 1841 while (n-->0) { 1842 y[10*(*idx)] += alpha1*(*v); 1843 y[10*(*idx)+1] += alpha2*(*v); 1844 y[10*(*idx)+2] += alpha3*(*v); 1845 y[10*(*idx)+3] += alpha4*(*v); 1846 y[10*(*idx)+4] += alpha5*(*v); 1847 y[10*(*idx)+5] += alpha6*(*v); 1848 y[10*(*idx)+6] += alpha7*(*v); 1849 y[10*(*idx)+7] += alpha8*(*v); 1850 y[10*(*idx)+8] += alpha9*(*v); 1851 y[10*(*idx)+9] += alpha10*(*v); 1852 idx++; v++; 1853 } 1854 } 1855 ierr = PetscLogFlops(20.0*a->nz);CHKERRQ(ierr); 1856 ierr = VecRestoreArray(xx,(PetscScalar**)&x);CHKERRQ(ierr); 1857 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 1858 PetscFunctionReturn(0); 1859 } 1860 1861 #undef __FUNCT__ 1862 #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_10" 1863 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_10(Mat A,Vec xx,Vec yy,Vec zz) 1864 { 1865 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1866 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1867 const PetscScalar *x,*v; 1868 PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9,alpha10; 1869 PetscErrorCode ierr; 1870 const PetscInt m = b->AIJ->rmap->n,*idx; 1871 PetscInt n,i; 1872 1873 PetscFunctionBegin; 1874 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 1875 ierr = VecGetArray(xx,(PetscScalar**)&x);CHKERRQ(ierr); 1876 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 1877 for (i=0; i<m; i++) { 1878 idx = a->j + a->i[i] ; 1879 v = a->a + a->i[i] ; 1880 n = a->i[i+1] - a->i[i]; 1881 alpha1 = x[10*i]; 1882 alpha2 = x[10*i+1]; 1883 alpha3 = x[10*i+2]; 1884 alpha4 = x[10*i+3]; 1885 alpha5 = x[10*i+4]; 1886 alpha6 = x[10*i+5]; 1887 alpha7 = x[10*i+6]; 1888 alpha8 = x[10*i+7]; 1889 alpha9 = x[10*i+8]; 1890 alpha10 = x[10*i+9]; 1891 while (n-->0) { 1892 y[10*(*idx)] += alpha1*(*v); 1893 y[10*(*idx)+1] += alpha2*(*v); 1894 y[10*(*idx)+2] += alpha3*(*v); 1895 y[10*(*idx)+3] += alpha4*(*v); 1896 y[10*(*idx)+4] += alpha5*(*v); 1897 y[10*(*idx)+5] += alpha6*(*v); 1898 y[10*(*idx)+6] += alpha7*(*v); 1899 y[10*(*idx)+7] += alpha8*(*v); 1900 y[10*(*idx)+8] += alpha9*(*v); 1901 y[10*(*idx)+9] += alpha10*(*v); 1902 idx++; v++; 1903 } 1904 } 1905 ierr = PetscLogFlops(20.0*a->nz);CHKERRQ(ierr); 1906 ierr = VecRestoreArray(xx,(PetscScalar**)&x);CHKERRQ(ierr); 1907 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 1908 PetscFunctionReturn(0); 1909 } 1910 1911 1912 /*--------------------------------------------------------------------------------------------*/ 1913 #undef __FUNCT__ 1914 #define __FUNCT__ "MatMult_SeqMAIJ_11" 1915 PetscErrorCode MatMult_SeqMAIJ_11(Mat A,Vec xx,Vec yy) 1916 { 1917 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1918 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1919 const PetscScalar *x,*v; 1920 PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9, sum10, sum11; 1921 PetscErrorCode ierr; 1922 const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 1923 PetscInt nonzerorow=0,n,i,jrow,j; 1924 1925 PetscFunctionBegin; 1926 ierr = VecGetArray(xx,(PetscScalar**)&x);CHKERRQ(ierr); 1927 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 1928 idx = a->j; 1929 v = a->a; 1930 ii = a->i; 1931 1932 for (i=0; i<m; i++) { 1933 jrow = ii[i]; 1934 n = ii[i+1] - jrow; 1935 sum1 = 0.0; 1936 sum2 = 0.0; 1937 sum3 = 0.0; 1938 sum4 = 0.0; 1939 sum5 = 0.0; 1940 sum6 = 0.0; 1941 sum7 = 0.0; 1942 sum8 = 0.0; 1943 sum9 = 0.0; 1944 sum10 = 0.0; 1945 sum11 = 0.0; 1946 nonzerorow += (n>0); 1947 for (j=0; j<n; j++) { 1948 sum1 += v[jrow]*x[11*idx[jrow]]; 1949 sum2 += v[jrow]*x[11*idx[jrow]+1]; 1950 sum3 += v[jrow]*x[11*idx[jrow]+2]; 1951 sum4 += v[jrow]*x[11*idx[jrow]+3]; 1952 sum5 += v[jrow]*x[11*idx[jrow]+4]; 1953 sum6 += v[jrow]*x[11*idx[jrow]+5]; 1954 sum7 += v[jrow]*x[11*idx[jrow]+6]; 1955 sum8 += v[jrow]*x[11*idx[jrow]+7]; 1956 sum9 += v[jrow]*x[11*idx[jrow]+8]; 1957 sum10 += v[jrow]*x[11*idx[jrow]+9]; 1958 sum11 += v[jrow]*x[11*idx[jrow]+10]; 1959 jrow++; 1960 } 1961 y[11*i] = sum1; 1962 y[11*i+1] = sum2; 1963 y[11*i+2] = sum3; 1964 y[11*i+3] = sum4; 1965 y[11*i+4] = sum5; 1966 y[11*i+5] = sum6; 1967 y[11*i+6] = sum7; 1968 y[11*i+7] = sum8; 1969 y[11*i+8] = sum9; 1970 y[11*i+9] = sum10; 1971 y[11*i+10] = sum11; 1972 } 1973 1974 ierr = PetscLogFlops(22*a->nz - 11*nonzerorow);CHKERRQ(ierr); 1975 ierr = VecRestoreArray(xx,(PetscScalar**)&x);CHKERRQ(ierr); 1976 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 1977 PetscFunctionReturn(0); 1978 } 1979 1980 #undef __FUNCT__ 1981 #define __FUNCT__ "MatMultAdd_SeqMAIJ_11" 1982 PetscErrorCode MatMultAdd_SeqMAIJ_11(Mat A,Vec xx,Vec yy,Vec zz) 1983 { 1984 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1985 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1986 const PetscScalar *x,*v; 1987 PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9, sum10, sum11; 1988 PetscErrorCode ierr; 1989 const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 1990 PetscInt n,i,jrow,j; 1991 1992 PetscFunctionBegin; 1993 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 1994 ierr = VecGetArray(xx,(PetscScalar**)&x);CHKERRQ(ierr); 1995 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 1996 idx = a->j; 1997 v = a->a; 1998 ii = a->i; 1999 2000 for (i=0; i<m; i++) { 2001 jrow = ii[i]; 2002 n = ii[i+1] - jrow; 2003 sum1 = 0.0; 2004 sum2 = 0.0; 2005 sum3 = 0.0; 2006 sum4 = 0.0; 2007 sum5 = 0.0; 2008 sum6 = 0.0; 2009 sum7 = 0.0; 2010 sum8 = 0.0; 2011 sum9 = 0.0; 2012 sum10 = 0.0; 2013 sum11 = 0.0; 2014 for (j=0; j<n; j++) { 2015 sum1 += v[jrow]*x[11*idx[jrow]]; 2016 sum2 += v[jrow]*x[11*idx[jrow]+1]; 2017 sum3 += v[jrow]*x[11*idx[jrow]+2]; 2018 sum4 += v[jrow]*x[11*idx[jrow]+3]; 2019 sum5 += v[jrow]*x[11*idx[jrow]+4]; 2020 sum6 += v[jrow]*x[11*idx[jrow]+5]; 2021 sum7 += v[jrow]*x[11*idx[jrow]+6]; 2022 sum8 += v[jrow]*x[11*idx[jrow]+7]; 2023 sum9 += v[jrow]*x[11*idx[jrow]+8]; 2024 sum10 += v[jrow]*x[11*idx[jrow]+9]; 2025 sum11 += v[jrow]*x[11*idx[jrow]+10]; 2026 jrow++; 2027 } 2028 y[11*i] += sum1; 2029 y[11*i+1] += sum2; 2030 y[11*i+2] += sum3; 2031 y[11*i+3] += sum4; 2032 y[11*i+4] += sum5; 2033 y[11*i+5] += sum6; 2034 y[11*i+6] += sum7; 2035 y[11*i+7] += sum8; 2036 y[11*i+8] += sum9; 2037 y[11*i+9] += sum10; 2038 y[11*i+10] += sum11; 2039 } 2040 2041 ierr = PetscLogFlops(22*a->nz);CHKERRQ(ierr); 2042 ierr = VecRestoreArray(xx,(PetscScalar**)&x);CHKERRQ(ierr); 2043 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 2044 PetscFunctionReturn(0); 2045 } 2046 2047 #undef __FUNCT__ 2048 #define __FUNCT__ "MatMultTranspose_SeqMAIJ_11" 2049 PetscErrorCode MatMultTranspose_SeqMAIJ_11(Mat A,Vec xx,Vec yy) 2050 { 2051 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 2052 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 2053 const PetscScalar *x,*v; 2054 PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9,alpha10,alpha11; 2055 PetscErrorCode ierr; 2056 const PetscInt m = b->AIJ->rmap->n,*idx; 2057 PetscInt n,i; 2058 2059 PetscFunctionBegin; 2060 ierr = VecSet(yy,0.0);CHKERRQ(ierr); 2061 ierr = VecGetArray(xx,(PetscScalar**)&x);CHKERRQ(ierr); 2062 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 2063 2064 for (i=0; i<m; i++) { 2065 idx = a->j + a->i[i] ; 2066 v = a->a + a->i[i] ; 2067 n = a->i[i+1] - a->i[i]; 2068 alpha1 = x[11*i]; 2069 alpha2 = x[11*i+1]; 2070 alpha3 = x[11*i+2]; 2071 alpha4 = x[11*i+3]; 2072 alpha5 = x[11*i+4]; 2073 alpha6 = x[11*i+5]; 2074 alpha7 = x[11*i+6]; 2075 alpha8 = x[11*i+7]; 2076 alpha9 = x[11*i+8]; 2077 alpha10 = x[11*i+9]; 2078 alpha11 = x[11*i+10]; 2079 while (n-->0) { 2080 y[11*(*idx)] += alpha1*(*v); 2081 y[11*(*idx)+1] += alpha2*(*v); 2082 y[11*(*idx)+2] += alpha3*(*v); 2083 y[11*(*idx)+3] += alpha4*(*v); 2084 y[11*(*idx)+4] += alpha5*(*v); 2085 y[11*(*idx)+5] += alpha6*(*v); 2086 y[11*(*idx)+6] += alpha7*(*v); 2087 y[11*(*idx)+7] += alpha8*(*v); 2088 y[11*(*idx)+8] += alpha9*(*v); 2089 y[11*(*idx)+9] += alpha10*(*v); 2090 y[11*(*idx)+10] += alpha11*(*v); 2091 idx++; v++; 2092 } 2093 } 2094 ierr = PetscLogFlops(22*a->nz);CHKERRQ(ierr); 2095 ierr = VecRestoreArray(xx,(PetscScalar**)&x);CHKERRQ(ierr); 2096 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 2097 PetscFunctionReturn(0); 2098 } 2099 2100 #undef __FUNCT__ 2101 #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_11" 2102 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_11(Mat A,Vec xx,Vec yy,Vec zz) 2103 { 2104 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 2105 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 2106 const PetscScalar *x,*v; 2107 PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9,alpha10,alpha11; 2108 PetscErrorCode ierr; 2109 const PetscInt m = b->AIJ->rmap->n,*idx; 2110 PetscInt n,i; 2111 2112 PetscFunctionBegin; 2113 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 2114 ierr = VecGetArray(xx,(PetscScalar**)&x);CHKERRQ(ierr); 2115 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 2116 for (i=0; i<m; i++) { 2117 idx = a->j + a->i[i] ; 2118 v = a->a + a->i[i] ; 2119 n = a->i[i+1] - a->i[i]; 2120 alpha1 = x[11*i]; 2121 alpha2 = x[11*i+1]; 2122 alpha3 = x[11*i+2]; 2123 alpha4 = x[11*i+3]; 2124 alpha5 = x[11*i+4]; 2125 alpha6 = x[11*i+5]; 2126 alpha7 = x[11*i+6]; 2127 alpha8 = x[11*i+7]; 2128 alpha9 = x[11*i+8]; 2129 alpha10 = x[11*i+9]; 2130 alpha11 = x[11*i+10]; 2131 while (n-->0) { 2132 y[11*(*idx)] += alpha1*(*v); 2133 y[11*(*idx)+1] += alpha2*(*v); 2134 y[11*(*idx)+2] += alpha3*(*v); 2135 y[11*(*idx)+3] += alpha4*(*v); 2136 y[11*(*idx)+4] += alpha5*(*v); 2137 y[11*(*idx)+5] += alpha6*(*v); 2138 y[11*(*idx)+6] += alpha7*(*v); 2139 y[11*(*idx)+7] += alpha8*(*v); 2140 y[11*(*idx)+8] += alpha9*(*v); 2141 y[11*(*idx)+9] += alpha10*(*v); 2142 y[11*(*idx)+10] += alpha11*(*v); 2143 idx++; v++; 2144 } 2145 } 2146 ierr = PetscLogFlops(22*a->nz);CHKERRQ(ierr); 2147 ierr = VecRestoreArray(xx,(PetscScalar**)&x);CHKERRQ(ierr); 2148 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 2149 PetscFunctionReturn(0); 2150 } 2151 2152 2153 /*--------------------------------------------------------------------------------------------*/ 2154 #undef __FUNCT__ 2155 #define __FUNCT__ "MatMult_SeqMAIJ_16" 2156 PetscErrorCode MatMult_SeqMAIJ_16(Mat A,Vec xx,Vec yy) 2157 { 2158 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 2159 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 2160 const PetscScalar *x,*v; 2161 PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8; 2162 PetscScalar sum9, sum10, sum11, sum12, sum13, sum14, sum15, sum16; 2163 PetscErrorCode ierr; 2164 const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 2165 PetscInt nonzerorow=0,n,i,jrow,j; 2166 2167 PetscFunctionBegin; 2168 ierr = VecGetArray(xx,(PetscScalar**)&x);CHKERRQ(ierr); 2169 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 2170 idx = a->j; 2171 v = a->a; 2172 ii = a->i; 2173 2174 for (i=0; i<m; i++) { 2175 jrow = ii[i]; 2176 n = ii[i+1] - jrow; 2177 sum1 = 0.0; 2178 sum2 = 0.0; 2179 sum3 = 0.0; 2180 sum4 = 0.0; 2181 sum5 = 0.0; 2182 sum6 = 0.0; 2183 sum7 = 0.0; 2184 sum8 = 0.0; 2185 sum9 = 0.0; 2186 sum10 = 0.0; 2187 sum11 = 0.0; 2188 sum12 = 0.0; 2189 sum13 = 0.0; 2190 sum14 = 0.0; 2191 sum15 = 0.0; 2192 sum16 = 0.0; 2193 nonzerorow += (n>0); 2194 for (j=0; j<n; j++) { 2195 sum1 += v[jrow]*x[16*idx[jrow]]; 2196 sum2 += v[jrow]*x[16*idx[jrow]+1]; 2197 sum3 += v[jrow]*x[16*idx[jrow]+2]; 2198 sum4 += v[jrow]*x[16*idx[jrow]+3]; 2199 sum5 += v[jrow]*x[16*idx[jrow]+4]; 2200 sum6 += v[jrow]*x[16*idx[jrow]+5]; 2201 sum7 += v[jrow]*x[16*idx[jrow]+6]; 2202 sum8 += v[jrow]*x[16*idx[jrow]+7]; 2203 sum9 += v[jrow]*x[16*idx[jrow]+8]; 2204 sum10 += v[jrow]*x[16*idx[jrow]+9]; 2205 sum11 += v[jrow]*x[16*idx[jrow]+10]; 2206 sum12 += v[jrow]*x[16*idx[jrow]+11]; 2207 sum13 += v[jrow]*x[16*idx[jrow]+12]; 2208 sum14 += v[jrow]*x[16*idx[jrow]+13]; 2209 sum15 += v[jrow]*x[16*idx[jrow]+14]; 2210 sum16 += v[jrow]*x[16*idx[jrow]+15]; 2211 jrow++; 2212 } 2213 y[16*i] = sum1; 2214 y[16*i+1] = sum2; 2215 y[16*i+2] = sum3; 2216 y[16*i+3] = sum4; 2217 y[16*i+4] = sum5; 2218 y[16*i+5] = sum6; 2219 y[16*i+6] = sum7; 2220 y[16*i+7] = sum8; 2221 y[16*i+8] = sum9; 2222 y[16*i+9] = sum10; 2223 y[16*i+10] = sum11; 2224 y[16*i+11] = sum12; 2225 y[16*i+12] = sum13; 2226 y[16*i+13] = sum14; 2227 y[16*i+14] = sum15; 2228 y[16*i+15] = sum16; 2229 } 2230 2231 ierr = PetscLogFlops(32.0*a->nz - 16.0*nonzerorow);CHKERRQ(ierr); 2232 ierr = VecRestoreArray(xx,(PetscScalar**)&x);CHKERRQ(ierr); 2233 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 2234 PetscFunctionReturn(0); 2235 } 2236 2237 #undef __FUNCT__ 2238 #define __FUNCT__ "MatMultTranspose_SeqMAIJ_16" 2239 PetscErrorCode MatMultTranspose_SeqMAIJ_16(Mat A,Vec xx,Vec yy) 2240 { 2241 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 2242 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 2243 const PetscScalar *x,*v; 2244 PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8; 2245 PetscScalar alpha9,alpha10,alpha11,alpha12,alpha13,alpha14,alpha15,alpha16; 2246 PetscErrorCode ierr; 2247 const PetscInt m = b->AIJ->rmap->n,*idx; 2248 PetscInt n,i; 2249 2250 PetscFunctionBegin; 2251 ierr = VecSet(yy,0.0);CHKERRQ(ierr); 2252 ierr = VecGetArray(xx,(PetscScalar**)&x);CHKERRQ(ierr); 2253 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 2254 2255 for (i=0; i<m; i++) { 2256 idx = a->j + a->i[i] ; 2257 v = a->a + a->i[i] ; 2258 n = a->i[i+1] - a->i[i]; 2259 alpha1 = x[16*i]; 2260 alpha2 = x[16*i+1]; 2261 alpha3 = x[16*i+2]; 2262 alpha4 = x[16*i+3]; 2263 alpha5 = x[16*i+4]; 2264 alpha6 = x[16*i+5]; 2265 alpha7 = x[16*i+6]; 2266 alpha8 = x[16*i+7]; 2267 alpha9 = x[16*i+8]; 2268 alpha10 = x[16*i+9]; 2269 alpha11 = x[16*i+10]; 2270 alpha12 = x[16*i+11]; 2271 alpha13 = x[16*i+12]; 2272 alpha14 = x[16*i+13]; 2273 alpha15 = x[16*i+14]; 2274 alpha16 = x[16*i+15]; 2275 while (n-->0) { 2276 y[16*(*idx)] += alpha1*(*v); 2277 y[16*(*idx)+1] += alpha2*(*v); 2278 y[16*(*idx)+2] += alpha3*(*v); 2279 y[16*(*idx)+3] += alpha4*(*v); 2280 y[16*(*idx)+4] += alpha5*(*v); 2281 y[16*(*idx)+5] += alpha6*(*v); 2282 y[16*(*idx)+6] += alpha7*(*v); 2283 y[16*(*idx)+7] += alpha8*(*v); 2284 y[16*(*idx)+8] += alpha9*(*v); 2285 y[16*(*idx)+9] += alpha10*(*v); 2286 y[16*(*idx)+10] += alpha11*(*v); 2287 y[16*(*idx)+11] += alpha12*(*v); 2288 y[16*(*idx)+12] += alpha13*(*v); 2289 y[16*(*idx)+13] += alpha14*(*v); 2290 y[16*(*idx)+14] += alpha15*(*v); 2291 y[16*(*idx)+15] += alpha16*(*v); 2292 idx++; v++; 2293 } 2294 } 2295 ierr = PetscLogFlops(32.0*a->nz);CHKERRQ(ierr); 2296 ierr = VecRestoreArray(xx,(PetscScalar**)&x);CHKERRQ(ierr); 2297 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 2298 PetscFunctionReturn(0); 2299 } 2300 2301 #undef __FUNCT__ 2302 #define __FUNCT__ "MatMultAdd_SeqMAIJ_16" 2303 PetscErrorCode MatMultAdd_SeqMAIJ_16(Mat A,Vec xx,Vec yy,Vec zz) 2304 { 2305 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 2306 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 2307 const PetscScalar *x,*v; 2308 PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8; 2309 PetscScalar sum9, sum10, sum11, sum12, sum13, sum14, sum15, sum16; 2310 PetscErrorCode ierr; 2311 const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 2312 PetscInt n,i,jrow,j; 2313 2314 PetscFunctionBegin; 2315 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 2316 ierr = VecGetArray(xx,(PetscScalar**)&x);CHKERRQ(ierr); 2317 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 2318 idx = a->j; 2319 v = a->a; 2320 ii = a->i; 2321 2322 for (i=0; i<m; i++) { 2323 jrow = ii[i]; 2324 n = ii[i+1] - jrow; 2325 sum1 = 0.0; 2326 sum2 = 0.0; 2327 sum3 = 0.0; 2328 sum4 = 0.0; 2329 sum5 = 0.0; 2330 sum6 = 0.0; 2331 sum7 = 0.0; 2332 sum8 = 0.0; 2333 sum9 = 0.0; 2334 sum10 = 0.0; 2335 sum11 = 0.0; 2336 sum12 = 0.0; 2337 sum13 = 0.0; 2338 sum14 = 0.0; 2339 sum15 = 0.0; 2340 sum16 = 0.0; 2341 for (j=0; j<n; j++) { 2342 sum1 += v[jrow]*x[16*idx[jrow]]; 2343 sum2 += v[jrow]*x[16*idx[jrow]+1]; 2344 sum3 += v[jrow]*x[16*idx[jrow]+2]; 2345 sum4 += v[jrow]*x[16*idx[jrow]+3]; 2346 sum5 += v[jrow]*x[16*idx[jrow]+4]; 2347 sum6 += v[jrow]*x[16*idx[jrow]+5]; 2348 sum7 += v[jrow]*x[16*idx[jrow]+6]; 2349 sum8 += v[jrow]*x[16*idx[jrow]+7]; 2350 sum9 += v[jrow]*x[16*idx[jrow]+8]; 2351 sum10 += v[jrow]*x[16*idx[jrow]+9]; 2352 sum11 += v[jrow]*x[16*idx[jrow]+10]; 2353 sum12 += v[jrow]*x[16*idx[jrow]+11]; 2354 sum13 += v[jrow]*x[16*idx[jrow]+12]; 2355 sum14 += v[jrow]*x[16*idx[jrow]+13]; 2356 sum15 += v[jrow]*x[16*idx[jrow]+14]; 2357 sum16 += v[jrow]*x[16*idx[jrow]+15]; 2358 jrow++; 2359 } 2360 y[16*i] += sum1; 2361 y[16*i+1] += sum2; 2362 y[16*i+2] += sum3; 2363 y[16*i+3] += sum4; 2364 y[16*i+4] += sum5; 2365 y[16*i+5] += sum6; 2366 y[16*i+6] += sum7; 2367 y[16*i+7] += sum8; 2368 y[16*i+8] += sum9; 2369 y[16*i+9] += sum10; 2370 y[16*i+10] += sum11; 2371 y[16*i+11] += sum12; 2372 y[16*i+12] += sum13; 2373 y[16*i+13] += sum14; 2374 y[16*i+14] += sum15; 2375 y[16*i+15] += sum16; 2376 } 2377 2378 ierr = PetscLogFlops(32.0*a->nz);CHKERRQ(ierr); 2379 ierr = VecRestoreArray(xx,(PetscScalar**)&x);CHKERRQ(ierr); 2380 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 2381 PetscFunctionReturn(0); 2382 } 2383 2384 #undef __FUNCT__ 2385 #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_16" 2386 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_16(Mat A,Vec xx,Vec yy,Vec zz) 2387 { 2388 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 2389 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 2390 const PetscScalar *x,*v; 2391 PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8; 2392 PetscScalar alpha9,alpha10,alpha11,alpha12,alpha13,alpha14,alpha15,alpha16; 2393 PetscErrorCode ierr; 2394 const PetscInt m = b->AIJ->rmap->n,*idx; 2395 PetscInt n,i; 2396 2397 PetscFunctionBegin; 2398 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 2399 ierr = VecGetArray(xx,(PetscScalar**)&x);CHKERRQ(ierr); 2400 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 2401 for (i=0; i<m; i++) { 2402 idx = a->j + a->i[i] ; 2403 v = a->a + a->i[i] ; 2404 n = a->i[i+1] - a->i[i]; 2405 alpha1 = x[16*i]; 2406 alpha2 = x[16*i+1]; 2407 alpha3 = x[16*i+2]; 2408 alpha4 = x[16*i+3]; 2409 alpha5 = x[16*i+4]; 2410 alpha6 = x[16*i+5]; 2411 alpha7 = x[16*i+6]; 2412 alpha8 = x[16*i+7]; 2413 alpha9 = x[16*i+8]; 2414 alpha10 = x[16*i+9]; 2415 alpha11 = x[16*i+10]; 2416 alpha12 = x[16*i+11]; 2417 alpha13 = x[16*i+12]; 2418 alpha14 = x[16*i+13]; 2419 alpha15 = x[16*i+14]; 2420 alpha16 = x[16*i+15]; 2421 while (n-->0) { 2422 y[16*(*idx)] += alpha1*(*v); 2423 y[16*(*idx)+1] += alpha2*(*v); 2424 y[16*(*idx)+2] += alpha3*(*v); 2425 y[16*(*idx)+3] += alpha4*(*v); 2426 y[16*(*idx)+4] += alpha5*(*v); 2427 y[16*(*idx)+5] += alpha6*(*v); 2428 y[16*(*idx)+6] += alpha7*(*v); 2429 y[16*(*idx)+7] += alpha8*(*v); 2430 y[16*(*idx)+8] += alpha9*(*v); 2431 y[16*(*idx)+9] += alpha10*(*v); 2432 y[16*(*idx)+10] += alpha11*(*v); 2433 y[16*(*idx)+11] += alpha12*(*v); 2434 y[16*(*idx)+12] += alpha13*(*v); 2435 y[16*(*idx)+13] += alpha14*(*v); 2436 y[16*(*idx)+14] += alpha15*(*v); 2437 y[16*(*idx)+15] += alpha16*(*v); 2438 idx++; v++; 2439 } 2440 } 2441 ierr = PetscLogFlops(32.0*a->nz);CHKERRQ(ierr); 2442 ierr = VecRestoreArray(xx,(PetscScalar**)&x);CHKERRQ(ierr); 2443 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 2444 PetscFunctionReturn(0); 2445 } 2446 2447 /*--------------------------------------------------------------------------------------------*/ 2448 #undef __FUNCT__ 2449 #define __FUNCT__ "MatMult_SeqMAIJ_18" 2450 PetscErrorCode MatMult_SeqMAIJ_18(Mat A,Vec xx,Vec yy) 2451 { 2452 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 2453 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 2454 const PetscScalar *x,*v; 2455 PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8; 2456 PetscScalar sum9, sum10, sum11, sum12, sum13, sum14, sum15, sum16, sum17, sum18; 2457 PetscErrorCode ierr; 2458 const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 2459 PetscInt nonzerorow=0,n,i,jrow,j; 2460 2461 PetscFunctionBegin; 2462 ierr = VecGetArray(xx,(PetscScalar**)&x);CHKERRQ(ierr); 2463 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 2464 idx = a->j; 2465 v = a->a; 2466 ii = a->i; 2467 2468 for (i=0; i<m; i++) { 2469 jrow = ii[i]; 2470 n = ii[i+1] - jrow; 2471 sum1 = 0.0; 2472 sum2 = 0.0; 2473 sum3 = 0.0; 2474 sum4 = 0.0; 2475 sum5 = 0.0; 2476 sum6 = 0.0; 2477 sum7 = 0.0; 2478 sum8 = 0.0; 2479 sum9 = 0.0; 2480 sum10 = 0.0; 2481 sum11 = 0.0; 2482 sum12 = 0.0; 2483 sum13 = 0.0; 2484 sum14 = 0.0; 2485 sum15 = 0.0; 2486 sum16 = 0.0; 2487 sum17 = 0.0; 2488 sum18 = 0.0; 2489 nonzerorow += (n>0); 2490 for (j=0; j<n; j++) { 2491 sum1 += v[jrow]*x[18*idx[jrow]]; 2492 sum2 += v[jrow]*x[18*idx[jrow]+1]; 2493 sum3 += v[jrow]*x[18*idx[jrow]+2]; 2494 sum4 += v[jrow]*x[18*idx[jrow]+3]; 2495 sum5 += v[jrow]*x[18*idx[jrow]+4]; 2496 sum6 += v[jrow]*x[18*idx[jrow]+5]; 2497 sum7 += v[jrow]*x[18*idx[jrow]+6]; 2498 sum8 += v[jrow]*x[18*idx[jrow]+7]; 2499 sum9 += v[jrow]*x[18*idx[jrow]+8]; 2500 sum10 += v[jrow]*x[18*idx[jrow]+9]; 2501 sum11 += v[jrow]*x[18*idx[jrow]+10]; 2502 sum12 += v[jrow]*x[18*idx[jrow]+11]; 2503 sum13 += v[jrow]*x[18*idx[jrow]+12]; 2504 sum14 += v[jrow]*x[18*idx[jrow]+13]; 2505 sum15 += v[jrow]*x[18*idx[jrow]+14]; 2506 sum16 += v[jrow]*x[18*idx[jrow]+15]; 2507 sum17 += v[jrow]*x[18*idx[jrow]+16]; 2508 sum18 += v[jrow]*x[18*idx[jrow]+17]; 2509 jrow++; 2510 } 2511 y[18*i] = sum1; 2512 y[18*i+1] = sum2; 2513 y[18*i+2] = sum3; 2514 y[18*i+3] = sum4; 2515 y[18*i+4] = sum5; 2516 y[18*i+5] = sum6; 2517 y[18*i+6] = sum7; 2518 y[18*i+7] = sum8; 2519 y[18*i+8] = sum9; 2520 y[18*i+9] = sum10; 2521 y[18*i+10] = sum11; 2522 y[18*i+11] = sum12; 2523 y[18*i+12] = sum13; 2524 y[18*i+13] = sum14; 2525 y[18*i+14] = sum15; 2526 y[18*i+15] = sum16; 2527 y[18*i+16] = sum17; 2528 y[18*i+17] = sum18; 2529 } 2530 2531 ierr = PetscLogFlops(36.0*a->nz - 18.0*nonzerorow);CHKERRQ(ierr); 2532 ierr = VecRestoreArray(xx,(PetscScalar**)&x);CHKERRQ(ierr); 2533 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 2534 PetscFunctionReturn(0); 2535 } 2536 2537 #undef __FUNCT__ 2538 #define __FUNCT__ "MatMultTranspose_SeqMAIJ_18" 2539 PetscErrorCode MatMultTranspose_SeqMAIJ_18(Mat A,Vec xx,Vec yy) 2540 { 2541 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 2542 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 2543 const PetscScalar *x,*v; 2544 PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8; 2545 PetscScalar alpha9,alpha10,alpha11,alpha12,alpha13,alpha14,alpha15,alpha16,alpha17,alpha18; 2546 PetscErrorCode ierr; 2547 const PetscInt m = b->AIJ->rmap->n,*idx; 2548 PetscInt n,i; 2549 2550 PetscFunctionBegin; 2551 ierr = VecSet(yy,0.0);CHKERRQ(ierr); 2552 ierr = VecGetArray(xx,(PetscScalar**)&x);CHKERRQ(ierr); 2553 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 2554 2555 for (i=0; i<m; i++) { 2556 idx = a->j + a->i[i] ; 2557 v = a->a + a->i[i] ; 2558 n = a->i[i+1] - a->i[i]; 2559 alpha1 = x[18*i]; 2560 alpha2 = x[18*i+1]; 2561 alpha3 = x[18*i+2]; 2562 alpha4 = x[18*i+3]; 2563 alpha5 = x[18*i+4]; 2564 alpha6 = x[18*i+5]; 2565 alpha7 = x[18*i+6]; 2566 alpha8 = x[18*i+7]; 2567 alpha9 = x[18*i+8]; 2568 alpha10 = x[18*i+9]; 2569 alpha11 = x[18*i+10]; 2570 alpha12 = x[18*i+11]; 2571 alpha13 = x[18*i+12]; 2572 alpha14 = x[18*i+13]; 2573 alpha15 = x[18*i+14]; 2574 alpha16 = x[18*i+15]; 2575 alpha17 = x[18*i+16]; 2576 alpha18 = x[18*i+17]; 2577 while (n-->0) { 2578 y[18*(*idx)] += alpha1*(*v); 2579 y[18*(*idx)+1] += alpha2*(*v); 2580 y[18*(*idx)+2] += alpha3*(*v); 2581 y[18*(*idx)+3] += alpha4*(*v); 2582 y[18*(*idx)+4] += alpha5*(*v); 2583 y[18*(*idx)+5] += alpha6*(*v); 2584 y[18*(*idx)+6] += alpha7*(*v); 2585 y[18*(*idx)+7] += alpha8*(*v); 2586 y[18*(*idx)+8] += alpha9*(*v); 2587 y[18*(*idx)+9] += alpha10*(*v); 2588 y[18*(*idx)+10] += alpha11*(*v); 2589 y[18*(*idx)+11] += alpha12*(*v); 2590 y[18*(*idx)+12] += alpha13*(*v); 2591 y[18*(*idx)+13] += alpha14*(*v); 2592 y[18*(*idx)+14] += alpha15*(*v); 2593 y[18*(*idx)+15] += alpha16*(*v); 2594 y[18*(*idx)+16] += alpha17*(*v); 2595 y[18*(*idx)+17] += alpha18*(*v); 2596 idx++; v++; 2597 } 2598 } 2599 ierr = PetscLogFlops(36.0*a->nz);CHKERRQ(ierr); 2600 ierr = VecRestoreArray(xx,(PetscScalar**)&x);CHKERRQ(ierr); 2601 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 2602 PetscFunctionReturn(0); 2603 } 2604 2605 #undef __FUNCT__ 2606 #define __FUNCT__ "MatMultAdd_SeqMAIJ_18" 2607 PetscErrorCode MatMultAdd_SeqMAIJ_18(Mat A,Vec xx,Vec yy,Vec zz) 2608 { 2609 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 2610 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 2611 const PetscScalar *x,*v; 2612 PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8; 2613 PetscScalar sum9, sum10, sum11, sum12, sum13, sum14, sum15, sum16, sum17, sum18; 2614 PetscErrorCode ierr; 2615 const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 2616 PetscInt n,i,jrow,j; 2617 2618 PetscFunctionBegin; 2619 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 2620 ierr = VecGetArray(xx,(PetscScalar**)&x);CHKERRQ(ierr); 2621 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 2622 idx = a->j; 2623 v = a->a; 2624 ii = a->i; 2625 2626 for (i=0; i<m; i++) { 2627 jrow = ii[i]; 2628 n = ii[i+1] - jrow; 2629 sum1 = 0.0; 2630 sum2 = 0.0; 2631 sum3 = 0.0; 2632 sum4 = 0.0; 2633 sum5 = 0.0; 2634 sum6 = 0.0; 2635 sum7 = 0.0; 2636 sum8 = 0.0; 2637 sum9 = 0.0; 2638 sum10 = 0.0; 2639 sum11 = 0.0; 2640 sum12 = 0.0; 2641 sum13 = 0.0; 2642 sum14 = 0.0; 2643 sum15 = 0.0; 2644 sum16 = 0.0; 2645 sum17 = 0.0; 2646 sum18 = 0.0; 2647 for (j=0; j<n; j++) { 2648 sum1 += v[jrow]*x[18*idx[jrow]]; 2649 sum2 += v[jrow]*x[18*idx[jrow]+1]; 2650 sum3 += v[jrow]*x[18*idx[jrow]+2]; 2651 sum4 += v[jrow]*x[18*idx[jrow]+3]; 2652 sum5 += v[jrow]*x[18*idx[jrow]+4]; 2653 sum6 += v[jrow]*x[18*idx[jrow]+5]; 2654 sum7 += v[jrow]*x[18*idx[jrow]+6]; 2655 sum8 += v[jrow]*x[18*idx[jrow]+7]; 2656 sum9 += v[jrow]*x[18*idx[jrow]+8]; 2657 sum10 += v[jrow]*x[18*idx[jrow]+9]; 2658 sum11 += v[jrow]*x[18*idx[jrow]+10]; 2659 sum12 += v[jrow]*x[18*idx[jrow]+11]; 2660 sum13 += v[jrow]*x[18*idx[jrow]+12]; 2661 sum14 += v[jrow]*x[18*idx[jrow]+13]; 2662 sum15 += v[jrow]*x[18*idx[jrow]+14]; 2663 sum16 += v[jrow]*x[18*idx[jrow]+15]; 2664 sum17 += v[jrow]*x[18*idx[jrow]+16]; 2665 sum18 += v[jrow]*x[18*idx[jrow]+17]; 2666 jrow++; 2667 } 2668 y[18*i] += sum1; 2669 y[18*i+1] += sum2; 2670 y[18*i+2] += sum3; 2671 y[18*i+3] += sum4; 2672 y[18*i+4] += sum5; 2673 y[18*i+5] += sum6; 2674 y[18*i+6] += sum7; 2675 y[18*i+7] += sum8; 2676 y[18*i+8] += sum9; 2677 y[18*i+9] += sum10; 2678 y[18*i+10] += sum11; 2679 y[18*i+11] += sum12; 2680 y[18*i+12] += sum13; 2681 y[18*i+13] += sum14; 2682 y[18*i+14] += sum15; 2683 y[18*i+15] += sum16; 2684 y[18*i+16] += sum17; 2685 y[18*i+17] += sum18; 2686 } 2687 2688 ierr = PetscLogFlops(36.0*a->nz);CHKERRQ(ierr); 2689 ierr = VecRestoreArray(xx,(PetscScalar**)&x);CHKERRQ(ierr); 2690 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 2691 PetscFunctionReturn(0); 2692 } 2693 2694 #undef __FUNCT__ 2695 #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_18" 2696 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_18(Mat A,Vec xx,Vec yy,Vec zz) 2697 { 2698 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 2699 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 2700 const PetscScalar *x,*v; 2701 PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8; 2702 PetscScalar alpha9,alpha10,alpha11,alpha12,alpha13,alpha14,alpha15,alpha16,alpha17,alpha18; 2703 PetscErrorCode ierr; 2704 const PetscInt m = b->AIJ->rmap->n,*idx; 2705 PetscInt n,i; 2706 2707 PetscFunctionBegin; 2708 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 2709 ierr = VecGetArray(xx,(PetscScalar**)&x);CHKERRQ(ierr); 2710 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 2711 for (i=0; i<m; i++) { 2712 idx = a->j + a->i[i] ; 2713 v = a->a + a->i[i] ; 2714 n = a->i[i+1] - a->i[i]; 2715 alpha1 = x[18*i]; 2716 alpha2 = x[18*i+1]; 2717 alpha3 = x[18*i+2]; 2718 alpha4 = x[18*i+3]; 2719 alpha5 = x[18*i+4]; 2720 alpha6 = x[18*i+5]; 2721 alpha7 = x[18*i+6]; 2722 alpha8 = x[18*i+7]; 2723 alpha9 = x[18*i+8]; 2724 alpha10 = x[18*i+9]; 2725 alpha11 = x[18*i+10]; 2726 alpha12 = x[18*i+11]; 2727 alpha13 = x[18*i+12]; 2728 alpha14 = x[18*i+13]; 2729 alpha15 = x[18*i+14]; 2730 alpha16 = x[18*i+15]; 2731 alpha17 = x[18*i+16]; 2732 alpha18 = x[18*i+17]; 2733 while (n-->0) { 2734 y[18*(*idx)] += alpha1*(*v); 2735 y[18*(*idx)+1] += alpha2*(*v); 2736 y[18*(*idx)+2] += alpha3*(*v); 2737 y[18*(*idx)+3] += alpha4*(*v); 2738 y[18*(*idx)+4] += alpha5*(*v); 2739 y[18*(*idx)+5] += alpha6*(*v); 2740 y[18*(*idx)+6] += alpha7*(*v); 2741 y[18*(*idx)+7] += alpha8*(*v); 2742 y[18*(*idx)+8] += alpha9*(*v); 2743 y[18*(*idx)+9] += alpha10*(*v); 2744 y[18*(*idx)+10] += alpha11*(*v); 2745 y[18*(*idx)+11] += alpha12*(*v); 2746 y[18*(*idx)+12] += alpha13*(*v); 2747 y[18*(*idx)+13] += alpha14*(*v); 2748 y[18*(*idx)+14] += alpha15*(*v); 2749 y[18*(*idx)+15] += alpha16*(*v); 2750 y[18*(*idx)+16] += alpha17*(*v); 2751 y[18*(*idx)+17] += alpha18*(*v); 2752 idx++; v++; 2753 } 2754 } 2755 ierr = PetscLogFlops(36.0*a->nz);CHKERRQ(ierr); 2756 ierr = VecRestoreArray(xx,(PetscScalar**)&x);CHKERRQ(ierr); 2757 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 2758 PetscFunctionReturn(0); 2759 } 2760 2761 #undef __FUNCT__ 2762 #define __FUNCT__ "MatMult_SeqMAIJ_N" 2763 PetscErrorCode MatMult_SeqMAIJ_N(Mat A,Vec xx,Vec yy) 2764 { 2765 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 2766 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 2767 const PetscScalar *x,*v; 2768 PetscScalar *y,*sums; 2769 PetscErrorCode ierr; 2770 const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 2771 PetscInt n,i,jrow,j,dof = b->dof,k; 2772 2773 PetscFunctionBegin; 2774 ierr = VecGetArray(xx,(PetscScalar**)&x);CHKERRQ(ierr); 2775 ierr = VecSet(yy,0.0);CHKERRQ(ierr); 2776 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 2777 idx = a->j; 2778 v = a->a; 2779 ii = a->i; 2780 2781 for (i=0; i<m; i++) { 2782 jrow = ii[i]; 2783 n = ii[i+1] - jrow; 2784 sums = y + dof*i; 2785 for (j=0; j<n; j++) { 2786 for (k=0; k<dof; k++) { 2787 sums[k] += v[jrow]*x[dof*idx[jrow]+k]; 2788 } 2789 jrow++; 2790 } 2791 } 2792 2793 ierr = PetscLogFlops(2.0*dof*a->nz);CHKERRQ(ierr); 2794 ierr = VecRestoreArray(xx,(PetscScalar**)&x);CHKERRQ(ierr); 2795 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 2796 PetscFunctionReturn(0); 2797 } 2798 2799 #undef __FUNCT__ 2800 #define __FUNCT__ "MatMultAdd_SeqMAIJ_N" 2801 PetscErrorCode MatMultAdd_SeqMAIJ_N(Mat A,Vec xx,Vec yy,Vec zz) 2802 { 2803 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 2804 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 2805 const PetscScalar *x,*v; 2806 PetscScalar *y,*sums; 2807 PetscErrorCode ierr; 2808 const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 2809 PetscInt n,i,jrow,j,dof = b->dof,k; 2810 2811 PetscFunctionBegin; 2812 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 2813 ierr = VecGetArray(xx,(PetscScalar**)&x);CHKERRQ(ierr); 2814 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 2815 idx = a->j; 2816 v = a->a; 2817 ii = a->i; 2818 2819 for (i=0; i<m; i++) { 2820 jrow = ii[i]; 2821 n = ii[i+1] - jrow; 2822 sums = y + dof*i; 2823 for (j=0; j<n; j++) { 2824 for (k=0; k<dof; k++) { 2825 sums[k] += v[jrow]*x[dof*idx[jrow]+k]; 2826 } 2827 jrow++; 2828 } 2829 } 2830 2831 ierr = PetscLogFlops(2.0*dof*a->nz);CHKERRQ(ierr); 2832 ierr = VecRestoreArray(xx,(PetscScalar**)&x);CHKERRQ(ierr); 2833 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 2834 PetscFunctionReturn(0); 2835 } 2836 2837 #undef __FUNCT__ 2838 #define __FUNCT__ "MatMultTranspose_SeqMAIJ_N" 2839 PetscErrorCode MatMultTranspose_SeqMAIJ_N(Mat A,Vec xx,Vec yy) 2840 { 2841 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 2842 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 2843 const PetscScalar *x,*v,*alpha; 2844 PetscScalar *y; 2845 PetscErrorCode ierr; 2846 const PetscInt m = b->AIJ->rmap->n,*idx,dof = b->dof; 2847 PetscInt n,i,k; 2848 2849 PetscFunctionBegin; 2850 ierr = VecGetArray(xx,(PetscScalar**)&x);CHKERRQ(ierr); 2851 ierr = VecSet(yy,0.0);CHKERRQ(ierr); 2852 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 2853 for (i=0; i<m; i++) { 2854 idx = a->j + a->i[i] ; 2855 v = a->a + a->i[i] ; 2856 n = a->i[i+1] - a->i[i]; 2857 alpha = x + dof*i; 2858 while (n-->0) { 2859 for (k=0; k<dof; k++) { 2860 y[dof*(*idx)+k] += alpha[k]*(*v); 2861 } 2862 idx++; v++; 2863 } 2864 } 2865 ierr = PetscLogFlops(2.0*dof*a->nz);CHKERRQ(ierr); 2866 ierr = VecRestoreArray(xx,(PetscScalar**)&x);CHKERRQ(ierr); 2867 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 2868 PetscFunctionReturn(0); 2869 } 2870 2871 #undef __FUNCT__ 2872 #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_N" 2873 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_N(Mat A,Vec xx,Vec yy,Vec zz) 2874 { 2875 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 2876 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 2877 const PetscScalar *x,*v,*alpha; 2878 PetscScalar *y; 2879 PetscErrorCode ierr; 2880 const PetscInt m = b->AIJ->rmap->n,*idx,dof = b->dof; 2881 PetscInt n,i,k; 2882 2883 PetscFunctionBegin; 2884 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 2885 ierr = VecGetArray(xx,(PetscScalar**)&x);CHKERRQ(ierr); 2886 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 2887 for (i=0; i<m; i++) { 2888 idx = a->j + a->i[i] ; 2889 v = a->a + a->i[i] ; 2890 n = a->i[i+1] - a->i[i]; 2891 alpha = x + dof*i; 2892 while (n-->0) { 2893 for (k=0; k<dof; k++) { 2894 y[dof*(*idx)+k] += alpha[k]*(*v); 2895 } 2896 idx++; v++; 2897 } 2898 } 2899 ierr = PetscLogFlops(2.0*dof*a->nz);CHKERRQ(ierr); 2900 ierr = VecRestoreArray(xx,(PetscScalar**)&x);CHKERRQ(ierr); 2901 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 2902 PetscFunctionReturn(0); 2903 } 2904 2905 /*===================================================================================*/ 2906 #undef __FUNCT__ 2907 #define __FUNCT__ "MatMult_MPIMAIJ_dof" 2908 PetscErrorCode MatMult_MPIMAIJ_dof(Mat A,Vec xx,Vec yy) 2909 { 2910 Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data; 2911 PetscErrorCode ierr; 2912 2913 PetscFunctionBegin; 2914 /* start the scatter */ 2915 ierr = VecScatterBegin(b->ctx,xx,b->w,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2916 ierr = (*b->AIJ->ops->mult)(b->AIJ,xx,yy);CHKERRQ(ierr); 2917 ierr = VecScatterEnd(b->ctx,xx,b->w,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2918 ierr = (*b->OAIJ->ops->multadd)(b->OAIJ,b->w,yy,yy);CHKERRQ(ierr); 2919 PetscFunctionReturn(0); 2920 } 2921 2922 #undef __FUNCT__ 2923 #define __FUNCT__ "MatMultTranspose_MPIMAIJ_dof" 2924 PetscErrorCode MatMultTranspose_MPIMAIJ_dof(Mat A,Vec xx,Vec yy) 2925 { 2926 Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data; 2927 PetscErrorCode ierr; 2928 2929 PetscFunctionBegin; 2930 ierr = (*b->OAIJ->ops->multtranspose)(b->OAIJ,xx,b->w);CHKERRQ(ierr); 2931 ierr = (*b->AIJ->ops->multtranspose)(b->AIJ,xx,yy);CHKERRQ(ierr); 2932 ierr = VecScatterBegin(b->ctx,b->w,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2933 ierr = VecScatterEnd(b->ctx,b->w,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2934 PetscFunctionReturn(0); 2935 } 2936 2937 #undef __FUNCT__ 2938 #define __FUNCT__ "MatMultAdd_MPIMAIJ_dof" 2939 PetscErrorCode MatMultAdd_MPIMAIJ_dof(Mat A,Vec xx,Vec yy,Vec zz) 2940 { 2941 Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data; 2942 PetscErrorCode ierr; 2943 2944 PetscFunctionBegin; 2945 /* start the scatter */ 2946 ierr = VecScatterBegin(b->ctx,xx,b->w,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2947 ierr = (*b->AIJ->ops->multadd)(b->AIJ,xx,yy,zz);CHKERRQ(ierr); 2948 ierr = VecScatterEnd(b->ctx,xx,b->w,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2949 ierr = (*b->OAIJ->ops->multadd)(b->OAIJ,b->w,zz,zz);CHKERRQ(ierr); 2950 PetscFunctionReturn(0); 2951 } 2952 2953 #undef __FUNCT__ 2954 #define __FUNCT__ "MatMultTransposeAdd_MPIMAIJ_dof" 2955 PetscErrorCode MatMultTransposeAdd_MPIMAIJ_dof(Mat A,Vec xx,Vec yy,Vec zz) 2956 { 2957 Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data; 2958 PetscErrorCode ierr; 2959 2960 PetscFunctionBegin; 2961 ierr = (*b->OAIJ->ops->multtranspose)(b->OAIJ,xx,b->w);CHKERRQ(ierr); 2962 ierr = VecScatterBegin(b->ctx,b->w,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2963 ierr = (*b->AIJ->ops->multtransposeadd)(b->AIJ,xx,yy,zz);CHKERRQ(ierr); 2964 ierr = VecScatterEnd(b->ctx,b->w,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2965 PetscFunctionReturn(0); 2966 } 2967 2968 /* ----------------------------------------------------------------*/ 2969 #undef __FUNCT__ 2970 #define __FUNCT__ "MatPtAPSymbolic_SeqAIJ_SeqMAIJ" 2971 PetscErrorCode MatPtAPSymbolic_SeqAIJ_SeqMAIJ(Mat A,Mat PP,PetscReal fill,Mat *C) 2972 { 2973 /* This routine requires testing -- but it's getting better. */ 2974 PetscErrorCode ierr; 2975 PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 2976 Mat_SeqMAIJ *pp=(Mat_SeqMAIJ*)PP->data; 2977 Mat P=pp->AIJ; 2978 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*p=(Mat_SeqAIJ*)P->data,*c; 2979 PetscInt *pti,*ptj,*ptJ; 2980 PetscInt *ci,*cj,*ptadenserow,*ptasparserow,*denserow,*sparserow,*ptaj; 2981 const PetscInt an=A->cmap->N,am=A->rmap->N,pn=P->cmap->N,pm=P->rmap->N,ppdof=pp->dof; 2982 PetscInt i,j,k,dof,pshift,ptnzi,arow,anzj,ptanzi,prow,pnzj,cnzi,cn; 2983 MatScalar *ca; 2984 const PetscInt *pi = p->i,*pj = p->j,*pjj,*ai=a->i,*aj=a->j,*ajj; 2985 2986 PetscFunctionBegin; 2987 /* Start timer */ 2988 ierr = PetscLogEventBegin(MAT_PtAPSymbolic,A,PP,0,0);CHKERRQ(ierr); 2989 2990 /* Get ij structure of P^T */ 2991 ierr = MatGetSymbolicTranspose_SeqAIJ(P,&pti,&ptj);CHKERRQ(ierr); 2992 2993 cn = pn*ppdof; 2994 /* Allocate ci array, arrays for fill computation and */ 2995 /* free space for accumulating nonzero column info */ 2996 ierr = PetscMalloc((cn+1)*sizeof(PetscInt),&ci);CHKERRQ(ierr); 2997 ci[0] = 0; 2998 2999 /* Work arrays for rows of P^T*A */ 3000 ierr = PetscMalloc4(an,PetscInt,&ptadenserow,an,PetscInt,&ptasparserow,cn,PetscInt,&denserow,cn,PetscInt,&sparserow);CHKERRQ(ierr); 3001 3002 /* Set initial free space to be nnz(A) scaled by aspect ratio of P. */ 3003 /* This should be reasonable if sparsity of PtAP is similar to that of A. */ 3004 /* Note, aspect ratio of P is the same as the aspect ratio of SeqAIJ inside P */ 3005 ierr = PetscFreeSpaceGet((ai[am]/pm)*pn,&free_space); 3006 current_space = free_space; 3007 3008 /* Determine symbolic info for each row of C: */ 3009 for (i=0;i<pn;i++) { 3010 ptnzi = pti[i+1] - pti[i]; 3011 ptJ = ptj + pti[i]; 3012 for (dof=0;dof<ppdof;dof++) { 3013 ptanzi = 0; 3014 /* Determine symbolic row of PtA: */ 3015 for (j=0;j<ptnzi;j++) { 3016 /* Expand ptJ[j] by block size and shift by dof to get the right row of A */ 3017 arow = ptJ[j]*ppdof + dof; 3018 /* Nonzeros of P^T*A will be in same locations as any element of A in that row */ 3019 anzj = ai[arow+1] - ai[arow]; 3020 ajj = aj + ai[arow]; 3021 for (k=0;k<anzj;k++) { 3022 if (!ptadenserow[ajj[k]]) { 3023 ptadenserow[ajj[k]] = -1; 3024 ptasparserow[ptanzi++] = ajj[k]; 3025 } 3026 } 3027 } 3028 /* Using symbolic info for row of PtA, determine symbolic info for row of C: */ 3029 ptaj = ptasparserow; 3030 cnzi = 0; 3031 for (j=0;j<ptanzi;j++) { 3032 /* Get offset within block of P */ 3033 pshift = *ptaj%ppdof; 3034 /* Get block row of P */ 3035 prow = (*ptaj++)/ppdof; /* integer division */ 3036 /* P has same number of nonzeros per row as the compressed form */ 3037 pnzj = pi[prow+1] - pi[prow]; 3038 pjj = pj + pi[prow]; 3039 for (k=0;k<pnzj;k++) { 3040 /* Locations in C are shifted by the offset within the block */ 3041 /* Note: we cannot use PetscLLAdd here because of the additional offset for the write location */ 3042 if (!denserow[pjj[k]*ppdof+pshift]) { 3043 denserow[pjj[k]*ppdof+pshift] = -1; 3044 sparserow[cnzi++] = pjj[k]*ppdof+pshift; 3045 } 3046 } 3047 } 3048 3049 /* sort sparserow */ 3050 ierr = PetscSortInt(cnzi,sparserow);CHKERRQ(ierr); 3051 3052 /* If free space is not available, make more free space */ 3053 /* Double the amount of total space in the list */ 3054 if (current_space->local_remaining<cnzi) { 3055 ierr = PetscFreeSpaceGet(cnzi+current_space->total_array_size,¤t_space);CHKERRQ(ierr); 3056 } 3057 3058 /* Copy data into free space, and zero out denserows */ 3059 ierr = PetscMemcpy(current_space->array,sparserow,cnzi*sizeof(PetscInt));CHKERRQ(ierr); 3060 current_space->array += cnzi; 3061 current_space->local_used += cnzi; 3062 current_space->local_remaining -= cnzi; 3063 3064 for (j=0;j<ptanzi;j++) { 3065 ptadenserow[ptasparserow[j]] = 0; 3066 } 3067 for (j=0;j<cnzi;j++) { 3068 denserow[sparserow[j]] = 0; 3069 } 3070 /* Aside: Perhaps we should save the pta info for the numerical factorization. */ 3071 /* For now, we will recompute what is needed. */ 3072 ci[i*ppdof+1+dof] = ci[i*ppdof+dof] + cnzi; 3073 } 3074 } 3075 /* nnz is now stored in ci[ptm], column indices are in the list of free space */ 3076 /* Allocate space for cj, initialize cj, and */ 3077 /* destroy list of free space and other temporary array(s) */ 3078 ierr = PetscMalloc((ci[cn]+1)*sizeof(PetscInt),&cj);CHKERRQ(ierr); 3079 ierr = PetscFreeSpaceContiguous(&free_space,cj);CHKERRQ(ierr); 3080 ierr = PetscFree4(ptadenserow,ptasparserow,denserow,sparserow);CHKERRQ(ierr); 3081 3082 /* Allocate space for ca */ 3083 ierr = PetscMalloc((ci[cn]+1)*sizeof(MatScalar),&ca);CHKERRQ(ierr); 3084 ierr = PetscMemzero(ca,(ci[cn]+1)*sizeof(MatScalar));CHKERRQ(ierr); 3085 3086 /* put together the new matrix */ 3087 ierr = MatCreateSeqAIJWithArrays(((PetscObject)A)->comm,cn,cn,ci,cj,ca,C);CHKERRQ(ierr); 3088 3089 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 3090 /* Since these are PETSc arrays, change flags to free them as necessary. */ 3091 c = (Mat_SeqAIJ *)((*C)->data); 3092 c->free_a = PETSC_TRUE; 3093 c->free_ij = PETSC_TRUE; 3094 c->nonew = 0; 3095 3096 /* Clean up. */ 3097 ierr = MatRestoreSymbolicTranspose_SeqAIJ(P,&pti,&ptj);CHKERRQ(ierr); 3098 3099 ierr = PetscLogEventEnd(MAT_PtAPSymbolic,A,PP,0,0);CHKERRQ(ierr); 3100 PetscFunctionReturn(0); 3101 } 3102 3103 #undef __FUNCT__ 3104 #define __FUNCT__ "MatPtAPNumeric_SeqAIJ_SeqMAIJ" 3105 PetscErrorCode MatPtAPNumeric_SeqAIJ_SeqMAIJ(Mat A,Mat PP,Mat C) 3106 { 3107 /* This routine requires testing -- first draft only */ 3108 PetscErrorCode ierr; 3109 Mat_SeqMAIJ *pp=(Mat_SeqMAIJ*)PP->data; 3110 Mat P=pp->AIJ; 3111 Mat_SeqAIJ *a = (Mat_SeqAIJ *) A->data; 3112 Mat_SeqAIJ *p = (Mat_SeqAIJ *) P->data; 3113 Mat_SeqAIJ *c = (Mat_SeqAIJ *) C->data; 3114 const PetscInt *ai=a->i,*aj=a->j,*pi=p->i,*pj=p->j,*pJ=p->j,*pjj; 3115 const PetscInt *ci=c->i,*cj=c->j,*cjj; 3116 const PetscInt am=A->rmap->N,cn=C->cmap->N,cm=C->rmap->N,ppdof=pp->dof; 3117 PetscInt i,j,k,pshift,poffset,anzi,pnzi,apnzj,nextap,pnzj,prow,crow,*apj,*apjdense; 3118 const MatScalar *aa=a->a,*pa=p->a,*pA=p->a,*paj; 3119 MatScalar *ca=c->a,*caj,*apa; 3120 3121 PetscFunctionBegin; 3122 /* Allocate temporary array for storage of one row of A*P */ 3123 ierr = PetscMalloc3(cn,MatScalar,&apa,cn,PetscInt,&apj,cn,PetscInt,&apjdense);CHKERRQ(ierr); 3124 ierr = PetscMemzero(apa,cn*sizeof(MatScalar));CHKERRQ(ierr); 3125 ierr = PetscMemzero(apj,cn*sizeof(PetscInt));CHKERRQ(ierr); 3126 ierr = PetscMemzero(apjdense,cn*sizeof(PetscInt));CHKERRQ(ierr); 3127 3128 /* Clear old values in C */ 3129 ierr = PetscMemzero(ca,ci[cm]*sizeof(MatScalar));CHKERRQ(ierr); 3130 3131 for (i=0;i<am;i++) { 3132 /* Form sparse row of A*P */ 3133 anzi = ai[i+1] - ai[i]; 3134 apnzj = 0; 3135 for (j=0;j<anzi;j++) { 3136 /* Get offset within block of P */ 3137 pshift = *aj%ppdof; 3138 /* Get block row of P */ 3139 prow = *aj++/ppdof; /* integer division */ 3140 pnzj = pi[prow+1] - pi[prow]; 3141 pjj = pj + pi[prow]; 3142 paj = pa + pi[prow]; 3143 for (k=0;k<pnzj;k++) { 3144 poffset = pjj[k]*ppdof+pshift; 3145 if (!apjdense[poffset]) { 3146 apjdense[poffset] = -1; 3147 apj[apnzj++] = poffset; 3148 } 3149 apa[poffset] += (*aa)*paj[k]; 3150 } 3151 ierr = PetscLogFlops(2.0*pnzj);CHKERRQ(ierr); 3152 aa++; 3153 } 3154 3155 /* Sort the j index array for quick sparse axpy. */ 3156 /* Note: a array does not need sorting as it is in dense storage locations. */ 3157 ierr = PetscSortInt(apnzj,apj);CHKERRQ(ierr); 3158 3159 /* Compute P^T*A*P using outer product (P^T)[:,j]*(A*P)[j,:]. */ 3160 prow = i/ppdof; /* integer division */ 3161 pshift = i%ppdof; 3162 poffset = pi[prow]; 3163 pnzi = pi[prow+1] - poffset; 3164 /* Reset pJ and pA so we can traverse the same row of P 'dof' times. */ 3165 pJ = pj+poffset; 3166 pA = pa+poffset; 3167 for (j=0;j<pnzi;j++) { 3168 crow = (*pJ)*ppdof+pshift; 3169 cjj = cj + ci[crow]; 3170 caj = ca + ci[crow]; 3171 pJ++; 3172 /* Perform sparse axpy operation. Note cjj includes apj. */ 3173 for (k=0,nextap=0;nextap<apnzj;k++) { 3174 if (cjj[k]==apj[nextap]) { 3175 caj[k] += (*pA)*apa[apj[nextap++]]; 3176 } 3177 } 3178 ierr = PetscLogFlops(2.0*apnzj);CHKERRQ(ierr); 3179 pA++; 3180 } 3181 3182 /* Zero the current row info for A*P */ 3183 for (j=0;j<apnzj;j++) { 3184 apa[apj[j]] = 0.; 3185 apjdense[apj[j]] = 0; 3186 } 3187 } 3188 3189 /* Assemble the final matrix and clean up */ 3190 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3191 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3192 ierr = PetscFree3(apa,apj,apjdense);CHKERRQ(ierr); 3193 PetscFunctionReturn(0); 3194 } 3195 3196 #undef __FUNCT__ 3197 #define __FUNCT__ "MatPtAPSymbolic_MPIAIJ_MPIMAIJ" 3198 PetscErrorCode MatPtAPSymbolic_MPIAIJ_MPIMAIJ(Mat A,Mat PP,PetscReal fill,Mat *C) 3199 { 3200 PetscErrorCode ierr; 3201 3202 PetscFunctionBegin; 3203 /* MatPtAPSymbolic_MPIAIJ_MPIMAIJ() is not implemented yet. Convert PP to mpiaij format */ 3204 ierr = MatConvert(PP,MATMPIAIJ,MAT_REUSE_MATRIX,&PP);CHKERRQ(ierr); 3205 ierr =(*PP->ops->ptapsymbolic)(A,PP,fill,C);CHKERRQ(ierr); 3206 PetscFunctionReturn(0); 3207 } 3208 3209 #undef __FUNCT__ 3210 #define __FUNCT__ "MatPtAPNumeric_MPIAIJ_MPIMAIJ" 3211 PetscErrorCode MatPtAPNumeric_MPIAIJ_MPIMAIJ(Mat A,Mat PP,Mat C) 3212 { 3213 PetscFunctionBegin; 3214 SETERRQ(PETSC_ERR_SUP,"MatPtAPNumeric is not implemented for MPIMAIJ matrix yet"); 3215 PetscFunctionReturn(0); 3216 } 3217 3218 EXTERN_C_BEGIN 3219 #undef __FUNCT__ 3220 #define __FUNCT__ "MatConvert_SeqMAIJ_SeqAIJ" 3221 PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_SeqMAIJ_SeqAIJ(Mat A, MatType newtype,MatReuse reuse,Mat *newmat) 3222 { 3223 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 3224 Mat a = b->AIJ,B; 3225 Mat_SeqAIJ *aij = (Mat_SeqAIJ*)a->data; 3226 PetscErrorCode ierr; 3227 PetscInt m,n,i,ncols,*ilen,nmax = 0,*icols,j,k,ii,dof = b->dof; 3228 PetscInt *cols; 3229 PetscScalar *vals; 3230 3231 PetscFunctionBegin; 3232 ierr = MatGetSize(a,&m,&n);CHKERRQ(ierr); 3233 ierr = PetscMalloc(dof*m*sizeof(PetscInt),&ilen);CHKERRQ(ierr); 3234 for (i=0; i<m; i++) { 3235 nmax = PetscMax(nmax,aij->ilen[i]); 3236 for (j=0; j<dof; j++) { 3237 ilen[dof*i+j] = aij->ilen[i]; 3238 } 3239 } 3240 ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,dof*m,dof*n,0,ilen,&B);CHKERRQ(ierr); 3241 ierr = PetscFree(ilen);CHKERRQ(ierr); 3242 ierr = PetscMalloc(nmax*sizeof(PetscInt),&icols);CHKERRQ(ierr); 3243 ii = 0; 3244 for (i=0; i<m; i++) { 3245 ierr = MatGetRow_SeqAIJ(a,i,&ncols,&cols,&vals);CHKERRQ(ierr); 3246 for (j=0; j<dof; j++) { 3247 for (k=0; k<ncols; k++) { 3248 icols[k] = dof*cols[k]+j; 3249 } 3250 ierr = MatSetValues_SeqAIJ(B,1,&ii,ncols,icols,vals,INSERT_VALUES);CHKERRQ(ierr); 3251 ii++; 3252 } 3253 ierr = MatRestoreRow_SeqAIJ(a,i,&ncols,&cols,&vals);CHKERRQ(ierr); 3254 } 3255 ierr = PetscFree(icols);CHKERRQ(ierr); 3256 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3257 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3258 3259 if (reuse == MAT_REUSE_MATRIX) { 3260 ierr = MatHeaderReplace(A,B);CHKERRQ(ierr); 3261 } else { 3262 *newmat = B; 3263 } 3264 PetscFunctionReturn(0); 3265 } 3266 EXTERN_C_END 3267 3268 #include "../src/mat/impls/aij/mpi/mpiaij.h" 3269 3270 EXTERN_C_BEGIN 3271 #undef __FUNCT__ 3272 #define __FUNCT__ "MatConvert_MPIMAIJ_MPIAIJ" 3273 PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_MPIMAIJ_MPIAIJ(Mat A, MatType newtype,MatReuse reuse,Mat *newmat) 3274 { 3275 Mat_MPIMAIJ *maij = (Mat_MPIMAIJ*)A->data; 3276 Mat MatAIJ = ((Mat_SeqMAIJ*)maij->AIJ->data)->AIJ,B; 3277 Mat MatOAIJ = ((Mat_SeqMAIJ*)maij->OAIJ->data)->AIJ; 3278 Mat_SeqAIJ *AIJ = (Mat_SeqAIJ*) MatAIJ->data; 3279 Mat_SeqAIJ *OAIJ =(Mat_SeqAIJ*) MatOAIJ->data; 3280 Mat_MPIAIJ *mpiaij = (Mat_MPIAIJ*) maij->A->data; 3281 PetscInt dof = maij->dof,i,j,*dnz = PETSC_NULL,*onz = PETSC_NULL,nmax = 0,onmax = 0; 3282 PetscInt *oicols = PETSC_NULL,*icols = PETSC_NULL,ncols,*cols = PETSC_NULL,oncols,*ocols = PETSC_NULL; 3283 PetscInt rstart,cstart,*garray,ii,k; 3284 PetscErrorCode ierr; 3285 PetscScalar *vals,*ovals; 3286 3287 PetscFunctionBegin; 3288 ierr = PetscMalloc2(A->rmap->n,PetscInt,&dnz,A->rmap->n,PetscInt,&onz);CHKERRQ(ierr); 3289 for (i=0; i<A->rmap->n/dof; i++) { 3290 nmax = PetscMax(nmax,AIJ->ilen[i]); 3291 onmax = PetscMax(onmax,OAIJ->ilen[i]); 3292 for (j=0; j<dof; j++) { 3293 dnz[dof*i+j] = AIJ->ilen[i]; 3294 onz[dof*i+j] = OAIJ->ilen[i]; 3295 } 3296 } 3297 ierr = MatCreateMPIAIJ(((PetscObject)A)->comm,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N,0,dnz,0,onz,&B);CHKERRQ(ierr); 3298 ierr = PetscFree2(dnz,onz);CHKERRQ(ierr); 3299 3300 ierr = PetscMalloc2(nmax,PetscInt,&icols,onmax,PetscInt,&oicols);CHKERRQ(ierr); 3301 rstart = dof*maij->A->rmap->rstart; 3302 cstart = dof*maij->A->cmap->rstart; 3303 garray = mpiaij->garray; 3304 3305 ii = rstart; 3306 for (i=0; i<A->rmap->n/dof; i++) { 3307 ierr = MatGetRow_SeqAIJ(MatAIJ,i,&ncols,&cols,&vals);CHKERRQ(ierr); 3308 ierr = MatGetRow_SeqAIJ(MatOAIJ,i,&oncols,&ocols,&ovals);CHKERRQ(ierr); 3309 for (j=0; j<dof; j++) { 3310 for (k=0; k<ncols; k++) { 3311 icols[k] = cstart + dof*cols[k]+j; 3312 } 3313 for (k=0; k<oncols; k++) { 3314 oicols[k] = dof*garray[ocols[k]]+j; 3315 } 3316 ierr = MatSetValues_MPIAIJ(B,1,&ii,ncols,icols,vals,INSERT_VALUES);CHKERRQ(ierr); 3317 ierr = MatSetValues_MPIAIJ(B,1,&ii,oncols,oicols,ovals,INSERT_VALUES);CHKERRQ(ierr); 3318 ii++; 3319 } 3320 ierr = MatRestoreRow_SeqAIJ(MatAIJ,i,&ncols,&cols,&vals);CHKERRQ(ierr); 3321 ierr = MatRestoreRow_SeqAIJ(MatOAIJ,i,&oncols,&ocols,&ovals);CHKERRQ(ierr); 3322 } 3323 ierr = PetscFree2(icols,oicols);CHKERRQ(ierr); 3324 3325 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3326 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3327 3328 if (reuse == MAT_REUSE_MATRIX) { 3329 PetscInt refct = ((PetscObject)A)->refct; /* save ((PetscObject)A)->refct */ 3330 ((PetscObject)A)->refct = 1; 3331 ierr = MatHeaderReplace(A,B);CHKERRQ(ierr); 3332 ((PetscObject)A)->refct = refct; /* restore ((PetscObject)A)->refct */ 3333 } else { 3334 *newmat = B; 3335 } 3336 PetscFunctionReturn(0); 3337 } 3338 EXTERN_C_END 3339 3340 3341 /* ---------------------------------------------------------------------------------- */ 3342 #undef __FUNCT__ 3343 #define __FUNCT__ "MatCreateMAIJ" 3344 /*@C 3345 MatCreateMAIJ - Creates a matrix type providing restriction and interpolation 3346 operations for multicomponent problems. It interpolates each component the same 3347 way independently. The matrix type is based on MATSEQAIJ for sequential matrices, 3348 and MATMPIAIJ for distributed matrices. 3349 3350 Collective 3351 3352 Input Parameters: 3353 + A - the AIJ matrix describing the action on blocks 3354 - dof - the block size (number of components per node) 3355 3356 Output Parameter: 3357 . maij - the new MAIJ matrix 3358 3359 Operations provided: 3360 + MatMult 3361 . MatMultTranspose 3362 . MatMultAdd 3363 . MatMultTransposeAdd 3364 - MatView 3365 3366 Level: advanced 3367 3368 .seealso: MatMAIJGetAIJ(), MatMAIJRedimension() 3369 @*/ 3370 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMAIJ(Mat A,PetscInt dof,Mat *maij) 3371 { 3372 PetscErrorCode ierr; 3373 PetscMPIInt size; 3374 PetscInt n; 3375 Mat B; 3376 3377 PetscFunctionBegin; 3378 ierr = PetscObjectReference((PetscObject)A);CHKERRQ(ierr); 3379 3380 if (dof == 1) { 3381 *maij = A; 3382 } else { 3383 ierr = MatCreate(((PetscObject)A)->comm,&B);CHKERRQ(ierr); 3384 ierr = MatSetSizes(B,dof*A->rmap->n,dof*A->cmap->n,dof*A->rmap->N,dof*A->cmap->N);CHKERRQ(ierr); 3385 ierr = PetscLayoutSetBlockSize(B->rmap,dof);CHKERRQ(ierr); 3386 ierr = PetscLayoutSetBlockSize(B->cmap,dof);CHKERRQ(ierr); 3387 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 3388 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 3389 B->assembled = PETSC_TRUE; 3390 3391 ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr); 3392 if (size == 1) { 3393 Mat_SeqMAIJ *b; 3394 3395 ierr = MatSetType(B,MATSEQMAIJ);CHKERRQ(ierr); 3396 B->ops->destroy = MatDestroy_SeqMAIJ; 3397 B->ops->view = MatView_SeqMAIJ; 3398 b = (Mat_SeqMAIJ*)B->data; 3399 b->dof = dof; 3400 b->AIJ = A; 3401 if (dof == 2) { 3402 B->ops->mult = MatMult_SeqMAIJ_2; 3403 B->ops->multadd = MatMultAdd_SeqMAIJ_2; 3404 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_2; 3405 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_2; 3406 } else if (dof == 3) { 3407 B->ops->mult = MatMult_SeqMAIJ_3; 3408 B->ops->multadd = MatMultAdd_SeqMAIJ_3; 3409 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_3; 3410 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_3; 3411 } else if (dof == 4) { 3412 B->ops->mult = MatMult_SeqMAIJ_4; 3413 B->ops->multadd = MatMultAdd_SeqMAIJ_4; 3414 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_4; 3415 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_4; 3416 } else if (dof == 5) { 3417 B->ops->mult = MatMult_SeqMAIJ_5; 3418 B->ops->multadd = MatMultAdd_SeqMAIJ_5; 3419 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_5; 3420 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_5; 3421 } else if (dof == 6) { 3422 B->ops->mult = MatMult_SeqMAIJ_6; 3423 B->ops->multadd = MatMultAdd_SeqMAIJ_6; 3424 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_6; 3425 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_6; 3426 } else if (dof == 7) { 3427 B->ops->mult = MatMult_SeqMAIJ_7; 3428 B->ops->multadd = MatMultAdd_SeqMAIJ_7; 3429 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_7; 3430 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_7; 3431 } else if (dof == 8) { 3432 B->ops->mult = MatMult_SeqMAIJ_8; 3433 B->ops->multadd = MatMultAdd_SeqMAIJ_8; 3434 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_8; 3435 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_8; 3436 } else if (dof == 9) { 3437 B->ops->mult = MatMult_SeqMAIJ_9; 3438 B->ops->multadd = MatMultAdd_SeqMAIJ_9; 3439 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_9; 3440 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_9; 3441 } else if (dof == 10) { 3442 B->ops->mult = MatMult_SeqMAIJ_10; 3443 B->ops->multadd = MatMultAdd_SeqMAIJ_10; 3444 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_10; 3445 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_10; 3446 } else if (dof == 11) { 3447 B->ops->mult = MatMult_SeqMAIJ_11; 3448 B->ops->multadd = MatMultAdd_SeqMAIJ_11; 3449 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_11; 3450 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_11; 3451 } else if (dof == 16) { 3452 B->ops->mult = MatMult_SeqMAIJ_16; 3453 B->ops->multadd = MatMultAdd_SeqMAIJ_16; 3454 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_16; 3455 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_16; 3456 } else if (dof == 18) { 3457 B->ops->mult = MatMult_SeqMAIJ_18; 3458 B->ops->multadd = MatMultAdd_SeqMAIJ_18; 3459 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_18; 3460 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_18; 3461 } else { 3462 B->ops->mult = MatMult_SeqMAIJ_N; 3463 B->ops->multadd = MatMultAdd_SeqMAIJ_N; 3464 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_N; 3465 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_N; 3466 } 3467 B->ops->ptapsymbolic_seqaij = MatPtAPSymbolic_SeqAIJ_SeqMAIJ; 3468 B->ops->ptapnumeric_seqaij = MatPtAPNumeric_SeqAIJ_SeqMAIJ; 3469 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqmaij_seqaij_C","MatConvert_SeqMAIJ_SeqAIJ",MatConvert_SeqMAIJ_SeqAIJ);CHKERRQ(ierr); 3470 } else { 3471 Mat_MPIAIJ *mpiaij = (Mat_MPIAIJ *)A->data; 3472 Mat_MPIMAIJ *b; 3473 IS from,to; 3474 Vec gvec; 3475 PetscInt *garray,i; 3476 3477 ierr = MatSetType(B,MATMPIMAIJ);CHKERRQ(ierr); 3478 B->ops->destroy = MatDestroy_MPIMAIJ; 3479 B->ops->view = MatView_MPIMAIJ; 3480 b = (Mat_MPIMAIJ*)B->data; 3481 b->dof = dof; 3482 b->A = A; 3483 ierr = MatCreateMAIJ(mpiaij->A,dof,&b->AIJ);CHKERRQ(ierr); 3484 ierr = MatCreateMAIJ(mpiaij->B,dof,&b->OAIJ);CHKERRQ(ierr); 3485 3486 ierr = VecGetSize(mpiaij->lvec,&n);CHKERRQ(ierr); 3487 ierr = VecCreateSeq(PETSC_COMM_SELF,n*dof,&b->w);CHKERRQ(ierr); 3488 ierr = VecSetBlockSize(b->w,dof);CHKERRQ(ierr); 3489 3490 /* create two temporary Index sets for build scatter gather */ 3491 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&garray);CHKERRQ(ierr); 3492 for (i=0; i<n; i++) garray[i] = dof*mpiaij->garray[i]; 3493 ierr = ISCreateBlock(((PetscObject)A)->comm,dof,n,garray,&from);CHKERRQ(ierr); 3494 ierr = PetscFree(garray);CHKERRQ(ierr); 3495 ierr = ISCreateStride(PETSC_COMM_SELF,n*dof,0,1,&to);CHKERRQ(ierr); 3496 3497 /* create temporary global vector to generate scatter context */ 3498 ierr = VecCreateMPIWithArray(((PetscObject)A)->comm,dof*A->cmap->n,dof*A->cmap->N,PETSC_NULL,&gvec);CHKERRQ(ierr); 3499 ierr = VecSetBlockSize(gvec,dof);CHKERRQ(ierr); 3500 3501 /* generate the scatter context */ 3502 ierr = VecScatterCreate(gvec,from,b->w,to,&b->ctx);CHKERRQ(ierr); 3503 3504 ierr = ISDestroy(from);CHKERRQ(ierr); 3505 ierr = ISDestroy(to);CHKERRQ(ierr); 3506 ierr = VecDestroy(gvec);CHKERRQ(ierr); 3507 3508 B->ops->mult = MatMult_MPIMAIJ_dof; 3509 B->ops->multtranspose = MatMultTranspose_MPIMAIJ_dof; 3510 B->ops->multadd = MatMultAdd_MPIMAIJ_dof; 3511 B->ops->multtransposeadd = MatMultTransposeAdd_MPIMAIJ_dof; 3512 B->ops->ptapsymbolic_mpiaij = MatPtAPSymbolic_MPIAIJ_MPIMAIJ; 3513 B->ops->ptapnumeric_mpiaij = MatPtAPNumeric_MPIAIJ_MPIMAIJ; 3514 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpimaij_mpiaij_C","MatConvert_MPIMAIJ_MPIAIJ",MatConvert_MPIMAIJ_MPIAIJ);CHKERRQ(ierr); 3515 } 3516 *maij = B; 3517 ierr = MatView_Private(B);CHKERRQ(ierr); 3518 } 3519 PetscFunctionReturn(0); 3520 } 3521