1 #define PETSCMAT_DLL 2 3 /* 4 Defines the basic matrix operations for the MAIJ matrix storage format. 5 This format is used for restriction and interpolation operations for 6 multicomponent problems. It interpolates each component the same way 7 independently. 8 9 We provide: 10 MatMult() 11 MatMultTranspose() 12 MatMultTransposeAdd() 13 MatMultAdd() 14 and 15 MatCreateMAIJ(Mat,dof,Mat*) 16 17 This single directory handles both the sequential and parallel codes 18 */ 19 20 #include "../src/mat/impls/maij/maij.h" /*I "petscmat.h" I*/ 21 #include "../src/mat/utils/freespace.h" 22 #include "private/vecimpl.h" 23 24 #undef __FUNCT__ 25 #define __FUNCT__ "MatMAIJGetAIJ" 26 /*@C 27 MatMAIJGetAIJ - Get the AIJ matrix describing the blockwise action of the MAIJ matrix 28 29 Not Collective, but if the MAIJ matrix is parallel, the AIJ matrix is also parallel 30 31 Input Parameter: 32 . A - the MAIJ matrix 33 34 Output Parameter: 35 . B - the AIJ matrix 36 37 Level: advanced 38 39 Notes: The reference count on the AIJ matrix is not increased so you should not destroy it. 40 41 .seealso: MatCreateMAIJ() 42 @*/ 43 PetscErrorCode PETSCMAT_DLLEXPORT MatMAIJGetAIJ(Mat A,Mat *B) 44 { 45 PetscErrorCode ierr; 46 PetscTruth ismpimaij,isseqmaij; 47 48 PetscFunctionBegin; 49 ierr = PetscTypeCompare((PetscObject)A,MATMPIMAIJ,&ismpimaij);CHKERRQ(ierr); 50 ierr = PetscTypeCompare((PetscObject)A,MATSEQMAIJ,&isseqmaij);CHKERRQ(ierr); 51 if (ismpimaij) { 52 Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data; 53 54 *B = b->A; 55 } else if (isseqmaij) { 56 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 57 58 *B = b->AIJ; 59 } else { 60 *B = A; 61 } 62 PetscFunctionReturn(0); 63 } 64 65 #undef __FUNCT__ 66 #define __FUNCT__ "MatMAIJRedimension" 67 /*@C 68 MatMAIJRedimension - Get an MAIJ matrix with the same action, but for a different block size 69 70 Collective 71 72 Input Parameter: 73 + A - the MAIJ matrix 74 - dof - the block size for the new matrix 75 76 Output Parameter: 77 . B - the new MAIJ matrix 78 79 Level: advanced 80 81 .seealso: MatCreateMAIJ() 82 @*/ 83 PetscErrorCode PETSCMAT_DLLEXPORT MatMAIJRedimension(Mat A,PetscInt dof,Mat *B) 84 { 85 PetscErrorCode ierr; 86 Mat Aij; 87 88 PetscFunctionBegin; 89 ierr = MatMAIJGetAIJ(A,&Aij);CHKERRQ(ierr); 90 ierr = MatCreateMAIJ(Aij,dof,B);CHKERRQ(ierr); 91 PetscFunctionReturn(0); 92 } 93 94 #undef __FUNCT__ 95 #define __FUNCT__ "MatDestroy_SeqMAIJ" 96 PetscErrorCode MatDestroy_SeqMAIJ(Mat A) 97 { 98 PetscErrorCode ierr; 99 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 100 101 PetscFunctionBegin; 102 if (b->AIJ) { 103 ierr = MatDestroy(b->AIJ);CHKERRQ(ierr); 104 } 105 ierr = PetscFree(b);CHKERRQ(ierr); 106 PetscFunctionReturn(0); 107 } 108 109 #undef __FUNCT__ 110 #define __FUNCT__ "MatView_SeqMAIJ" 111 PetscErrorCode MatView_SeqMAIJ(Mat A,PetscViewer viewer) 112 { 113 PetscErrorCode ierr; 114 Mat B; 115 116 PetscFunctionBegin; 117 ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B); 118 ierr = MatView(B,viewer);CHKERRQ(ierr); 119 ierr = MatDestroy(B);CHKERRQ(ierr); 120 PetscFunctionReturn(0); 121 } 122 123 #undef __FUNCT__ 124 #define __FUNCT__ "MatView_MPIMAIJ" 125 PetscErrorCode MatView_MPIMAIJ(Mat A,PetscViewer viewer) 126 { 127 PetscErrorCode ierr; 128 Mat B; 129 130 PetscFunctionBegin; 131 ierr = MatConvert(A,MATMPIAIJ,MAT_INITIAL_MATRIX,&B); 132 ierr = MatView(B,viewer);CHKERRQ(ierr); 133 ierr = MatDestroy(B);CHKERRQ(ierr); 134 PetscFunctionReturn(0); 135 } 136 137 #undef __FUNCT__ 138 #define __FUNCT__ "MatDestroy_MPIMAIJ" 139 PetscErrorCode MatDestroy_MPIMAIJ(Mat A) 140 { 141 PetscErrorCode ierr; 142 Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data; 143 144 PetscFunctionBegin; 145 if (b->AIJ) { 146 ierr = MatDestroy(b->AIJ);CHKERRQ(ierr); 147 } 148 if (b->OAIJ) { 149 ierr = MatDestroy(b->OAIJ);CHKERRQ(ierr); 150 } 151 if (b->A) { 152 ierr = MatDestroy(b->A);CHKERRQ(ierr); 153 } 154 if (b->ctx) { 155 ierr = VecScatterDestroy(b->ctx);CHKERRQ(ierr); 156 } 157 if (b->w) { 158 ierr = VecDestroy(b->w);CHKERRQ(ierr); 159 } 160 ierr = PetscFree(b);CHKERRQ(ierr); 161 ierr = PetscObjectChangeTypeName((PetscObject)A,0);CHKERRQ(ierr); 162 PetscFunctionReturn(0); 163 } 164 165 /*MC 166 MATMAIJ - MATMAIJ = "maij" - A matrix type to be used for restriction and interpolation operations for 167 multicomponent problems, interpolating or restricting each component the same way independently. 168 The matrix type is based on MATSEQAIJ for sequential matrices, and MATMPIAIJ for distributed matrices. 169 170 Operations provided: 171 . MatMult 172 . MatMultTranspose 173 . MatMultAdd 174 . MatMultTransposeAdd 175 176 Level: advanced 177 178 .seealso: MatCreateSeqDense 179 M*/ 180 181 EXTERN_C_BEGIN 182 #undef __FUNCT__ 183 #define __FUNCT__ "MatCreate_MAIJ" 184 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_MAIJ(Mat A) 185 { 186 PetscErrorCode ierr; 187 Mat_MPIMAIJ *b; 188 PetscMPIInt size; 189 190 PetscFunctionBegin; 191 ierr = PetscNewLog(A,Mat_MPIMAIJ,&b);CHKERRQ(ierr); 192 A->data = (void*)b; 193 ierr = PetscMemzero(A->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 194 A->mapping = 0; 195 196 b->AIJ = 0; 197 b->dof = 0; 198 b->OAIJ = 0; 199 b->ctx = 0; 200 b->w = 0; 201 ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr); 202 if (size == 1){ 203 ierr = PetscObjectChangeTypeName((PetscObject)A,MATSEQMAIJ);CHKERRQ(ierr); 204 } else { 205 ierr = PetscObjectChangeTypeName((PetscObject)A,MATMPIMAIJ);CHKERRQ(ierr); 206 } 207 PetscFunctionReturn(0); 208 } 209 EXTERN_C_END 210 211 /* --------------------------------------------------------------------------------------*/ 212 #undef __FUNCT__ 213 #define __FUNCT__ "MatMult_SeqMAIJ_2" 214 PetscErrorCode MatMult_SeqMAIJ_2(Mat A,Vec xx,Vec yy) 215 { 216 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 217 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 218 const PetscScalar *x,*v; 219 PetscScalar *y, sum1, sum2; 220 PetscErrorCode ierr; 221 PetscInt nonzerorow=0,n,i,jrow,j; 222 const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 223 224 PetscFunctionBegin; 225 ierr = VecGetArray(xx,(PetscScalar**)&x);CHKERRQ(ierr); 226 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 227 idx = a->j; 228 v = a->a; 229 ii = a->i; 230 231 for (i=0; i<m; i++) { 232 jrow = ii[i]; 233 n = ii[i+1] - jrow; 234 sum1 = 0.0; 235 sum2 = 0.0; 236 nonzerorow += (n>0); 237 for (j=0; j<n; j++) { 238 sum1 += v[jrow]*x[2*idx[jrow]]; 239 sum2 += v[jrow]*x[2*idx[jrow]+1]; 240 jrow++; 241 } 242 y[2*i] = sum1; 243 y[2*i+1] = sum2; 244 } 245 246 ierr = PetscLogFlops(4.0*a->nz - 2.0*nonzerorow);CHKERRQ(ierr); 247 ierr = VecRestoreArray(xx,(PetscScalar**)&x);CHKERRQ(ierr); 248 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 249 PetscFunctionReturn(0); 250 } 251 252 #undef __FUNCT__ 253 #define __FUNCT__ "MatMultTranspose_SeqMAIJ_2" 254 PetscErrorCode MatMultTranspose_SeqMAIJ_2(Mat A,Vec xx,Vec yy) 255 { 256 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 257 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 258 const PetscScalar *x,*v; 259 PetscScalar *y,alpha1,alpha2; 260 PetscErrorCode ierr; 261 PetscInt n,i; 262 const PetscInt m = b->AIJ->rmap->n,*idx; 263 264 PetscFunctionBegin; 265 ierr = VecSet(yy,0.0);CHKERRQ(ierr); 266 ierr = VecGetArray(xx,(PetscScalar**)&x);CHKERRQ(ierr); 267 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 268 269 for (i=0; i<m; i++) { 270 idx = a->j + a->i[i] ; 271 v = a->a + a->i[i] ; 272 n = a->i[i+1] - a->i[i]; 273 alpha1 = x[2*i]; 274 alpha2 = x[2*i+1]; 275 while (n-->0) {y[2*(*idx)] += alpha1*(*v); y[2*(*idx)+1] += alpha2*(*v); idx++; v++;} 276 } 277 ierr = PetscLogFlops(4.0*a->nz);CHKERRQ(ierr); 278 ierr = VecRestoreArray(xx,(PetscScalar**)&x);CHKERRQ(ierr); 279 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 280 PetscFunctionReturn(0); 281 } 282 283 #undef __FUNCT__ 284 #define __FUNCT__ "MatMultAdd_SeqMAIJ_2" 285 PetscErrorCode MatMultAdd_SeqMAIJ_2(Mat A,Vec xx,Vec yy,Vec zz) 286 { 287 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 288 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 289 const PetscScalar *x,*v; 290 PetscScalar *y,sum1, sum2; 291 PetscErrorCode ierr; 292 PetscInt n,i,jrow,j; 293 const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 294 295 PetscFunctionBegin; 296 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 297 ierr = VecGetArray(xx,(PetscScalar**)&x);CHKERRQ(ierr); 298 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 299 idx = a->j; 300 v = a->a; 301 ii = a->i; 302 303 for (i=0; i<m; i++) { 304 jrow = ii[i]; 305 n = ii[i+1] - jrow; 306 sum1 = 0.0; 307 sum2 = 0.0; 308 for (j=0; j<n; j++) { 309 sum1 += v[jrow]*x[2*idx[jrow]]; 310 sum2 += v[jrow]*x[2*idx[jrow]+1]; 311 jrow++; 312 } 313 y[2*i] += sum1; 314 y[2*i+1] += sum2; 315 } 316 317 ierr = PetscLogFlops(4.0*a->nz);CHKERRQ(ierr); 318 ierr = VecRestoreArray(xx,(PetscScalar**)&x);CHKERRQ(ierr); 319 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 320 PetscFunctionReturn(0); 321 } 322 #undef __FUNCT__ 323 #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_2" 324 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_2(Mat A,Vec xx,Vec yy,Vec zz) 325 { 326 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 327 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 328 const PetscScalar *x,*v; 329 PetscScalar *y,alpha1,alpha2; 330 PetscErrorCode ierr; 331 PetscInt n,i; 332 const PetscInt m = b->AIJ->rmap->n,*idx; 333 334 PetscFunctionBegin; 335 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 336 ierr = VecGetArray(xx,(PetscScalar**)&x);CHKERRQ(ierr); 337 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 338 339 for (i=0; i<m; i++) { 340 idx = a->j + a->i[i] ; 341 v = a->a + a->i[i] ; 342 n = a->i[i+1] - a->i[i]; 343 alpha1 = x[2*i]; 344 alpha2 = x[2*i+1]; 345 while (n-->0) {y[2*(*idx)] += alpha1*(*v); y[2*(*idx)+1] += alpha2*(*v); idx++; v++;} 346 } 347 ierr = PetscLogFlops(4.0*a->nz);CHKERRQ(ierr); 348 ierr = VecRestoreArray(xx,(PetscScalar**)&x);CHKERRQ(ierr); 349 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 350 PetscFunctionReturn(0); 351 } 352 /* --------------------------------------------------------------------------------------*/ 353 #undef __FUNCT__ 354 #define __FUNCT__ "MatMult_SeqMAIJ_3" 355 PetscErrorCode MatMult_SeqMAIJ_3(Mat A,Vec xx,Vec yy) 356 { 357 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 358 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 359 const PetscScalar *x,*v; 360 PetscScalar *y,sum1, sum2, sum3; 361 PetscErrorCode ierr; 362 PetscInt nonzerorow=0,n,i,jrow,j; 363 const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 364 365 PetscFunctionBegin; 366 ierr = VecGetArray(xx,(PetscScalar**)&x);CHKERRQ(ierr); 367 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 368 idx = a->j; 369 v = a->a; 370 ii = a->i; 371 372 for (i=0; i<m; i++) { 373 jrow = ii[i]; 374 n = ii[i+1] - jrow; 375 sum1 = 0.0; 376 sum2 = 0.0; 377 sum3 = 0.0; 378 nonzerorow += (n>0); 379 for (j=0; j<n; j++) { 380 sum1 += v[jrow]*x[3*idx[jrow]]; 381 sum2 += v[jrow]*x[3*idx[jrow]+1]; 382 sum3 += v[jrow]*x[3*idx[jrow]+2]; 383 jrow++; 384 } 385 y[3*i] = sum1; 386 y[3*i+1] = sum2; 387 y[3*i+2] = sum3; 388 } 389 390 ierr = PetscLogFlops(6.0*a->nz - 3.0*nonzerorow);CHKERRQ(ierr); 391 ierr = VecRestoreArray(xx,(PetscScalar**)&x);CHKERRQ(ierr); 392 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 393 PetscFunctionReturn(0); 394 } 395 396 #undef __FUNCT__ 397 #define __FUNCT__ "MatMultTranspose_SeqMAIJ_3" 398 PetscErrorCode MatMultTranspose_SeqMAIJ_3(Mat A,Vec xx,Vec yy) 399 { 400 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 401 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 402 const PetscScalar *x,*v; 403 PetscScalar *y,alpha1,alpha2,alpha3; 404 PetscErrorCode ierr; 405 PetscInt n,i; 406 const PetscInt m = b->AIJ->rmap->n,*idx; 407 408 PetscFunctionBegin; 409 ierr = VecSet(yy,0.0);CHKERRQ(ierr); 410 ierr = VecGetArray(xx,(PetscScalar**)&x);CHKERRQ(ierr); 411 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 412 413 for (i=0; i<m; i++) { 414 idx = a->j + a->i[i]; 415 v = a->a + a->i[i]; 416 n = a->i[i+1] - a->i[i]; 417 alpha1 = x[3*i]; 418 alpha2 = x[3*i+1]; 419 alpha3 = x[3*i+2]; 420 while (n-->0) { 421 y[3*(*idx)] += alpha1*(*v); 422 y[3*(*idx)+1] += alpha2*(*v); 423 y[3*(*idx)+2] += alpha3*(*v); 424 idx++; v++; 425 } 426 } 427 ierr = PetscLogFlops(6.0*a->nz);CHKERRQ(ierr); 428 ierr = VecRestoreArray(xx,(PetscScalar**)&x);CHKERRQ(ierr); 429 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 430 PetscFunctionReturn(0); 431 } 432 433 #undef __FUNCT__ 434 #define __FUNCT__ "MatMultAdd_SeqMAIJ_3" 435 PetscErrorCode MatMultAdd_SeqMAIJ_3(Mat A,Vec xx,Vec yy,Vec zz) 436 { 437 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 438 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 439 const PetscScalar *x,*v; 440 PetscScalar *y,sum1, sum2, sum3; 441 PetscErrorCode ierr; 442 PetscInt n,i,jrow,j; 443 const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 444 445 PetscFunctionBegin; 446 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 447 ierr = VecGetArray(xx,(PetscScalar**)&x);CHKERRQ(ierr); 448 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 449 idx = a->j; 450 v = a->a; 451 ii = a->i; 452 453 for (i=0; i<m; i++) { 454 jrow = ii[i]; 455 n = ii[i+1] - jrow; 456 sum1 = 0.0; 457 sum2 = 0.0; 458 sum3 = 0.0; 459 for (j=0; j<n; j++) { 460 sum1 += v[jrow]*x[3*idx[jrow]]; 461 sum2 += v[jrow]*x[3*idx[jrow]+1]; 462 sum3 += v[jrow]*x[3*idx[jrow]+2]; 463 jrow++; 464 } 465 y[3*i] += sum1; 466 y[3*i+1] += sum2; 467 y[3*i+2] += sum3; 468 } 469 470 ierr = PetscLogFlops(6.0*a->nz);CHKERRQ(ierr); 471 ierr = VecRestoreArray(xx,(PetscScalar**)&x);CHKERRQ(ierr); 472 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 473 PetscFunctionReturn(0); 474 } 475 #undef __FUNCT__ 476 #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_3" 477 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_3(Mat A,Vec xx,Vec yy,Vec zz) 478 { 479 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 480 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 481 const PetscScalar *x,*v; 482 PetscScalar *y,alpha1,alpha2,alpha3; 483 PetscErrorCode ierr; 484 PetscInt n,i; 485 const PetscInt m = b->AIJ->rmap->n,*idx; 486 487 PetscFunctionBegin; 488 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 489 ierr = VecGetArray(xx,(PetscScalar**)&x);CHKERRQ(ierr); 490 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 491 for (i=0; i<m; i++) { 492 idx = a->j + a->i[i] ; 493 v = a->a + a->i[i] ; 494 n = a->i[i+1] - a->i[i]; 495 alpha1 = x[3*i]; 496 alpha2 = x[3*i+1]; 497 alpha3 = x[3*i+2]; 498 while (n-->0) { 499 y[3*(*idx)] += alpha1*(*v); 500 y[3*(*idx)+1] += alpha2*(*v); 501 y[3*(*idx)+2] += alpha3*(*v); 502 idx++; v++; 503 } 504 } 505 ierr = PetscLogFlops(6.0*a->nz);CHKERRQ(ierr); 506 ierr = VecRestoreArray(xx,(PetscScalar**)&x);CHKERRQ(ierr); 507 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 508 PetscFunctionReturn(0); 509 } 510 511 /* ------------------------------------------------------------------------------*/ 512 #undef __FUNCT__ 513 #define __FUNCT__ "MatMult_SeqMAIJ_4" 514 PetscErrorCode MatMult_SeqMAIJ_4(Mat A,Vec xx,Vec yy) 515 { 516 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 517 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 518 const PetscScalar *x,*v; 519 PetscScalar *y,sum1, sum2, sum3, sum4; 520 PetscErrorCode ierr; 521 PetscInt nonzerorow=0,n,i,jrow,j; 522 const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 523 524 PetscFunctionBegin; 525 ierr = VecGetArray(xx,(PetscScalar**)&x);CHKERRQ(ierr); 526 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 527 idx = a->j; 528 v = a->a; 529 ii = a->i; 530 531 for (i=0; i<m; i++) { 532 jrow = ii[i]; 533 n = ii[i+1] - jrow; 534 sum1 = 0.0; 535 sum2 = 0.0; 536 sum3 = 0.0; 537 sum4 = 0.0; 538 nonzerorow += (n>0); 539 for (j=0; j<n; j++) { 540 sum1 += v[jrow]*x[4*idx[jrow]]; 541 sum2 += v[jrow]*x[4*idx[jrow]+1]; 542 sum3 += v[jrow]*x[4*idx[jrow]+2]; 543 sum4 += v[jrow]*x[4*idx[jrow]+3]; 544 jrow++; 545 } 546 y[4*i] = sum1; 547 y[4*i+1] = sum2; 548 y[4*i+2] = sum3; 549 y[4*i+3] = sum4; 550 } 551 552 ierr = PetscLogFlops(8.0*a->nz - 4.0*nonzerorow);CHKERRQ(ierr); 553 ierr = VecRestoreArray(xx,(PetscScalar**)&x);CHKERRQ(ierr); 554 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 555 PetscFunctionReturn(0); 556 } 557 558 #undef __FUNCT__ 559 #define __FUNCT__ "MatMultTranspose_SeqMAIJ_4" 560 PetscErrorCode MatMultTranspose_SeqMAIJ_4(Mat A,Vec xx,Vec yy) 561 { 562 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 563 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 564 const PetscScalar *x,*v; 565 PetscScalar *y,alpha1,alpha2,alpha3,alpha4; 566 PetscErrorCode ierr; 567 PetscInt n,i; 568 const PetscInt m = b->AIJ->rmap->n,*idx; 569 570 PetscFunctionBegin; 571 ierr = VecSet(yy,0.0);CHKERRQ(ierr); 572 ierr = VecGetArray(xx,(PetscScalar**)&x);CHKERRQ(ierr); 573 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 574 for (i=0; i<m; i++) { 575 idx = a->j + a->i[i] ; 576 v = a->a + a->i[i] ; 577 n = a->i[i+1] - a->i[i]; 578 alpha1 = x[4*i]; 579 alpha2 = x[4*i+1]; 580 alpha3 = x[4*i+2]; 581 alpha4 = x[4*i+3]; 582 while (n-->0) { 583 y[4*(*idx)] += alpha1*(*v); 584 y[4*(*idx)+1] += alpha2*(*v); 585 y[4*(*idx)+2] += alpha3*(*v); 586 y[4*(*idx)+3] += alpha4*(*v); 587 idx++; v++; 588 } 589 } 590 ierr = PetscLogFlops(8.0*a->nz);CHKERRQ(ierr); 591 ierr = VecRestoreArray(xx,(PetscScalar**)&x);CHKERRQ(ierr); 592 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 593 PetscFunctionReturn(0); 594 } 595 596 #undef __FUNCT__ 597 #define __FUNCT__ "MatMultAdd_SeqMAIJ_4" 598 PetscErrorCode MatMultAdd_SeqMAIJ_4(Mat A,Vec xx,Vec yy,Vec zz) 599 { 600 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 601 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 602 const PetscScalar *x,*v; 603 PetscScalar *y,sum1, sum2, sum3, sum4; 604 PetscErrorCode ierr; 605 PetscInt n,i,jrow,j; 606 const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 607 608 PetscFunctionBegin; 609 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 610 ierr = VecGetArray(xx,(PetscScalar**)&x);CHKERRQ(ierr); 611 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 612 idx = a->j; 613 v = a->a; 614 ii = a->i; 615 616 for (i=0; i<m; i++) { 617 jrow = ii[i]; 618 n = ii[i+1] - jrow; 619 sum1 = 0.0; 620 sum2 = 0.0; 621 sum3 = 0.0; 622 sum4 = 0.0; 623 for (j=0; j<n; j++) { 624 sum1 += v[jrow]*x[4*idx[jrow]]; 625 sum2 += v[jrow]*x[4*idx[jrow]+1]; 626 sum3 += v[jrow]*x[4*idx[jrow]+2]; 627 sum4 += v[jrow]*x[4*idx[jrow]+3]; 628 jrow++; 629 } 630 y[4*i] += sum1; 631 y[4*i+1] += sum2; 632 y[4*i+2] += sum3; 633 y[4*i+3] += sum4; 634 } 635 636 ierr = PetscLogFlops(8.0*a->nz);CHKERRQ(ierr); 637 ierr = VecRestoreArray(xx,(PetscScalar**)&x);CHKERRQ(ierr); 638 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 639 PetscFunctionReturn(0); 640 } 641 #undef __FUNCT__ 642 #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_4" 643 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_4(Mat A,Vec xx,Vec yy,Vec zz) 644 { 645 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 646 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 647 const PetscScalar *x,*v; 648 PetscScalar *y,alpha1,alpha2,alpha3,alpha4; 649 PetscErrorCode ierr; 650 PetscInt n,i; 651 const PetscInt m = b->AIJ->rmap->n,*idx; 652 653 PetscFunctionBegin; 654 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 655 ierr = VecGetArray(xx,(PetscScalar**)&x);CHKERRQ(ierr); 656 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 657 658 for (i=0; i<m; i++) { 659 idx = a->j + a->i[i] ; 660 v = a->a + a->i[i] ; 661 n = a->i[i+1] - a->i[i]; 662 alpha1 = x[4*i]; 663 alpha2 = x[4*i+1]; 664 alpha3 = x[4*i+2]; 665 alpha4 = x[4*i+3]; 666 while (n-->0) { 667 y[4*(*idx)] += alpha1*(*v); 668 y[4*(*idx)+1] += alpha2*(*v); 669 y[4*(*idx)+2] += alpha3*(*v); 670 y[4*(*idx)+3] += alpha4*(*v); 671 idx++; v++; 672 } 673 } 674 ierr = PetscLogFlops(8.0*a->nz);CHKERRQ(ierr); 675 ierr = VecRestoreArray(xx,(PetscScalar**)&x);CHKERRQ(ierr); 676 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 677 PetscFunctionReturn(0); 678 } 679 /* ------------------------------------------------------------------------------*/ 680 681 #undef __FUNCT__ 682 #define __FUNCT__ "MatMult_SeqMAIJ_5" 683 PetscErrorCode MatMult_SeqMAIJ_5(Mat A,Vec xx,Vec yy) 684 { 685 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 686 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 687 const PetscScalar *x,*v; 688 PetscScalar *y,sum1, sum2, sum3, sum4, sum5; 689 PetscErrorCode ierr; 690 PetscInt nonzerorow=0,n,i,jrow,j; 691 const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 692 693 PetscFunctionBegin; 694 ierr = VecGetArray(xx,(PetscScalar**)&x);CHKERRQ(ierr); 695 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 696 idx = a->j; 697 v = a->a; 698 ii = a->i; 699 700 for (i=0; i<m; i++) { 701 jrow = ii[i]; 702 n = ii[i+1] - jrow; 703 sum1 = 0.0; 704 sum2 = 0.0; 705 sum3 = 0.0; 706 sum4 = 0.0; 707 sum5 = 0.0; 708 nonzerorow += (n>0); 709 for (j=0; j<n; j++) { 710 sum1 += v[jrow]*x[5*idx[jrow]]; 711 sum2 += v[jrow]*x[5*idx[jrow]+1]; 712 sum3 += v[jrow]*x[5*idx[jrow]+2]; 713 sum4 += v[jrow]*x[5*idx[jrow]+3]; 714 sum5 += v[jrow]*x[5*idx[jrow]+4]; 715 jrow++; 716 } 717 y[5*i] = sum1; 718 y[5*i+1] = sum2; 719 y[5*i+2] = sum3; 720 y[5*i+3] = sum4; 721 y[5*i+4] = sum5; 722 } 723 724 ierr = PetscLogFlops(10.0*a->nz - 5.0*nonzerorow);CHKERRQ(ierr); 725 ierr = VecRestoreArray(xx,(PetscScalar**)&x);CHKERRQ(ierr); 726 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 727 PetscFunctionReturn(0); 728 } 729 730 #undef __FUNCT__ 731 #define __FUNCT__ "MatMultTranspose_SeqMAIJ_5" 732 PetscErrorCode MatMultTranspose_SeqMAIJ_5(Mat A,Vec xx,Vec yy) 733 { 734 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 735 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 736 const PetscScalar *x,*v; 737 PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5; 738 PetscErrorCode ierr; 739 PetscInt n,i; 740 const PetscInt m = b->AIJ->rmap->n,*idx; 741 742 PetscFunctionBegin; 743 ierr = VecSet(yy,0.0);CHKERRQ(ierr); 744 ierr = VecGetArray(xx,(PetscScalar**)&x);CHKERRQ(ierr); 745 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 746 747 for (i=0; i<m; i++) { 748 idx = a->j + a->i[i] ; 749 v = a->a + a->i[i] ; 750 n = a->i[i+1] - a->i[i]; 751 alpha1 = x[5*i]; 752 alpha2 = x[5*i+1]; 753 alpha3 = x[5*i+2]; 754 alpha4 = x[5*i+3]; 755 alpha5 = x[5*i+4]; 756 while (n-->0) { 757 y[5*(*idx)] += alpha1*(*v); 758 y[5*(*idx)+1] += alpha2*(*v); 759 y[5*(*idx)+2] += alpha3*(*v); 760 y[5*(*idx)+3] += alpha4*(*v); 761 y[5*(*idx)+4] += alpha5*(*v); 762 idx++; v++; 763 } 764 } 765 ierr = PetscLogFlops(10.0*a->nz);CHKERRQ(ierr); 766 ierr = VecRestoreArray(xx,(PetscScalar**)&x);CHKERRQ(ierr); 767 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 768 PetscFunctionReturn(0); 769 } 770 771 #undef __FUNCT__ 772 #define __FUNCT__ "MatMultAdd_SeqMAIJ_5" 773 PetscErrorCode MatMultAdd_SeqMAIJ_5(Mat A,Vec xx,Vec yy,Vec zz) 774 { 775 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 776 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 777 const PetscScalar *x,*v; 778 PetscScalar *y,sum1, sum2, sum3, sum4, sum5; 779 PetscErrorCode ierr; 780 PetscInt n,i,jrow,j; 781 const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 782 783 PetscFunctionBegin; 784 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 785 ierr = VecGetArray(xx,(PetscScalar**)&x);CHKERRQ(ierr); 786 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 787 idx = a->j; 788 v = a->a; 789 ii = a->i; 790 791 for (i=0; i<m; i++) { 792 jrow = ii[i]; 793 n = ii[i+1] - jrow; 794 sum1 = 0.0; 795 sum2 = 0.0; 796 sum3 = 0.0; 797 sum4 = 0.0; 798 sum5 = 0.0; 799 for (j=0; j<n; j++) { 800 sum1 += v[jrow]*x[5*idx[jrow]]; 801 sum2 += v[jrow]*x[5*idx[jrow]+1]; 802 sum3 += v[jrow]*x[5*idx[jrow]+2]; 803 sum4 += v[jrow]*x[5*idx[jrow]+3]; 804 sum5 += v[jrow]*x[5*idx[jrow]+4]; 805 jrow++; 806 } 807 y[5*i] += sum1; 808 y[5*i+1] += sum2; 809 y[5*i+2] += sum3; 810 y[5*i+3] += sum4; 811 y[5*i+4] += sum5; 812 } 813 814 ierr = PetscLogFlops(10.0*a->nz);CHKERRQ(ierr); 815 ierr = VecRestoreArray(xx,(PetscScalar**)&x);CHKERRQ(ierr); 816 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 817 PetscFunctionReturn(0); 818 } 819 820 #undef __FUNCT__ 821 #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_5" 822 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_5(Mat A,Vec xx,Vec yy,Vec zz) 823 { 824 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 825 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 826 const PetscScalar *x,*v; 827 PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5; 828 PetscErrorCode ierr; 829 PetscInt n,i; 830 const PetscInt m = b->AIJ->rmap->n,*idx; 831 832 PetscFunctionBegin; 833 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 834 ierr = VecGetArray(xx,(PetscScalar**)&x);CHKERRQ(ierr); 835 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 836 837 for (i=0; i<m; i++) { 838 idx = a->j + a->i[i] ; 839 v = a->a + a->i[i] ; 840 n = a->i[i+1] - a->i[i]; 841 alpha1 = x[5*i]; 842 alpha2 = x[5*i+1]; 843 alpha3 = x[5*i+2]; 844 alpha4 = x[5*i+3]; 845 alpha5 = x[5*i+4]; 846 while (n-->0) { 847 y[5*(*idx)] += alpha1*(*v); 848 y[5*(*idx)+1] += alpha2*(*v); 849 y[5*(*idx)+2] += alpha3*(*v); 850 y[5*(*idx)+3] += alpha4*(*v); 851 y[5*(*idx)+4] += alpha5*(*v); 852 idx++; v++; 853 } 854 } 855 ierr = PetscLogFlops(10.0*a->nz);CHKERRQ(ierr); 856 ierr = VecRestoreArray(xx,(PetscScalar**)&x);CHKERRQ(ierr); 857 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 858 PetscFunctionReturn(0); 859 } 860 861 /* ------------------------------------------------------------------------------*/ 862 #undef __FUNCT__ 863 #define __FUNCT__ "MatMult_SeqMAIJ_6" 864 PetscErrorCode MatMult_SeqMAIJ_6(Mat A,Vec xx,Vec yy) 865 { 866 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 867 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 868 const PetscScalar *x,*v; 869 PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6; 870 PetscErrorCode ierr; 871 PetscInt nonzerorow=0,n,i,jrow,j; 872 const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 873 874 PetscFunctionBegin; 875 ierr = VecGetArray(xx,(PetscScalar**)&x);CHKERRQ(ierr); 876 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 877 idx = a->j; 878 v = a->a; 879 ii = a->i; 880 881 for (i=0; i<m; i++) { 882 jrow = ii[i]; 883 n = ii[i+1] - jrow; 884 sum1 = 0.0; 885 sum2 = 0.0; 886 sum3 = 0.0; 887 sum4 = 0.0; 888 sum5 = 0.0; 889 sum6 = 0.0; 890 nonzerorow += (n>0); 891 for (j=0; j<n; j++) { 892 sum1 += v[jrow]*x[6*idx[jrow]]; 893 sum2 += v[jrow]*x[6*idx[jrow]+1]; 894 sum3 += v[jrow]*x[6*idx[jrow]+2]; 895 sum4 += v[jrow]*x[6*idx[jrow]+3]; 896 sum5 += v[jrow]*x[6*idx[jrow]+4]; 897 sum6 += v[jrow]*x[6*idx[jrow]+5]; 898 jrow++; 899 } 900 y[6*i] = sum1; 901 y[6*i+1] = sum2; 902 y[6*i+2] = sum3; 903 y[6*i+3] = sum4; 904 y[6*i+4] = sum5; 905 y[6*i+5] = sum6; 906 } 907 908 ierr = PetscLogFlops(12.0*a->nz - 6.0*nonzerorow);CHKERRQ(ierr); 909 ierr = VecRestoreArray(xx,(PetscScalar**)&x);CHKERRQ(ierr); 910 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 911 PetscFunctionReturn(0); 912 } 913 914 #undef __FUNCT__ 915 #define __FUNCT__ "MatMultTranspose_SeqMAIJ_6" 916 PetscErrorCode MatMultTranspose_SeqMAIJ_6(Mat A,Vec xx,Vec yy) 917 { 918 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 919 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 920 const PetscScalar *x,*v; 921 PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6; 922 PetscErrorCode ierr; 923 PetscInt n,i; 924 const PetscInt m = b->AIJ->rmap->n,*idx; 925 926 PetscFunctionBegin; 927 ierr = VecSet(yy,0.0);CHKERRQ(ierr); 928 ierr = VecGetArray(xx,(PetscScalar**)&x);CHKERRQ(ierr); 929 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 930 931 for (i=0; i<m; i++) { 932 idx = a->j + a->i[i] ; 933 v = a->a + a->i[i] ; 934 n = a->i[i+1] - a->i[i]; 935 alpha1 = x[6*i]; 936 alpha2 = x[6*i+1]; 937 alpha3 = x[6*i+2]; 938 alpha4 = x[6*i+3]; 939 alpha5 = x[6*i+4]; 940 alpha6 = x[6*i+5]; 941 while (n-->0) { 942 y[6*(*idx)] += alpha1*(*v); 943 y[6*(*idx)+1] += alpha2*(*v); 944 y[6*(*idx)+2] += alpha3*(*v); 945 y[6*(*idx)+3] += alpha4*(*v); 946 y[6*(*idx)+4] += alpha5*(*v); 947 y[6*(*idx)+5] += alpha6*(*v); 948 idx++; v++; 949 } 950 } 951 ierr = PetscLogFlops(12.0*a->nz);CHKERRQ(ierr); 952 ierr = VecRestoreArray(xx,(PetscScalar**)&x);CHKERRQ(ierr); 953 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 954 PetscFunctionReturn(0); 955 } 956 957 #undef __FUNCT__ 958 #define __FUNCT__ "MatMultAdd_SeqMAIJ_6" 959 PetscErrorCode MatMultAdd_SeqMAIJ_6(Mat A,Vec xx,Vec yy,Vec zz) 960 { 961 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 962 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 963 const PetscScalar *x,*v; 964 PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6; 965 PetscErrorCode ierr; 966 PetscInt n,i,jrow,j; 967 const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 968 969 PetscFunctionBegin; 970 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 971 ierr = VecGetArray(xx,(PetscScalar**)&x);CHKERRQ(ierr); 972 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 973 idx = a->j; 974 v = a->a; 975 ii = a->i; 976 977 for (i=0; i<m; i++) { 978 jrow = ii[i]; 979 n = ii[i+1] - jrow; 980 sum1 = 0.0; 981 sum2 = 0.0; 982 sum3 = 0.0; 983 sum4 = 0.0; 984 sum5 = 0.0; 985 sum6 = 0.0; 986 for (j=0; j<n; j++) { 987 sum1 += v[jrow]*x[6*idx[jrow]]; 988 sum2 += v[jrow]*x[6*idx[jrow]+1]; 989 sum3 += v[jrow]*x[6*idx[jrow]+2]; 990 sum4 += v[jrow]*x[6*idx[jrow]+3]; 991 sum5 += v[jrow]*x[6*idx[jrow]+4]; 992 sum6 += v[jrow]*x[6*idx[jrow]+5]; 993 jrow++; 994 } 995 y[6*i] += sum1; 996 y[6*i+1] += sum2; 997 y[6*i+2] += sum3; 998 y[6*i+3] += sum4; 999 y[6*i+4] += sum5; 1000 y[6*i+5] += sum6; 1001 } 1002 1003 ierr = PetscLogFlops(12.0*a->nz);CHKERRQ(ierr); 1004 ierr = VecRestoreArray(xx,(PetscScalar**)&x);CHKERRQ(ierr); 1005 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 1006 PetscFunctionReturn(0); 1007 } 1008 1009 #undef __FUNCT__ 1010 #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_6" 1011 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_6(Mat A,Vec xx,Vec yy,Vec zz) 1012 { 1013 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1014 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1015 const PetscScalar *x,*v; 1016 PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6; 1017 PetscErrorCode ierr; 1018 PetscInt n,i; 1019 const PetscInt m = b->AIJ->rmap->n,*idx; 1020 1021 PetscFunctionBegin; 1022 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 1023 ierr = VecGetArray(xx,(PetscScalar**)&x);CHKERRQ(ierr); 1024 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 1025 1026 for (i=0; i<m; i++) { 1027 idx = a->j + a->i[i] ; 1028 v = a->a + a->i[i] ; 1029 n = a->i[i+1] - a->i[i]; 1030 alpha1 = x[6*i]; 1031 alpha2 = x[6*i+1]; 1032 alpha3 = x[6*i+2]; 1033 alpha4 = x[6*i+3]; 1034 alpha5 = x[6*i+4]; 1035 alpha6 = x[6*i+5]; 1036 while (n-->0) { 1037 y[6*(*idx)] += alpha1*(*v); 1038 y[6*(*idx)+1] += alpha2*(*v); 1039 y[6*(*idx)+2] += alpha3*(*v); 1040 y[6*(*idx)+3] += alpha4*(*v); 1041 y[6*(*idx)+4] += alpha5*(*v); 1042 y[6*(*idx)+5] += alpha6*(*v); 1043 idx++; v++; 1044 } 1045 } 1046 ierr = PetscLogFlops(12.0*a->nz);CHKERRQ(ierr); 1047 ierr = VecRestoreArray(xx,(PetscScalar**)&x);CHKERRQ(ierr); 1048 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 1049 PetscFunctionReturn(0); 1050 } 1051 1052 /* ------------------------------------------------------------------------------*/ 1053 #undef __FUNCT__ 1054 #define __FUNCT__ "MatMult_SeqMAIJ_7" 1055 PetscErrorCode MatMult_SeqMAIJ_7(Mat A,Vec xx,Vec yy) 1056 { 1057 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1058 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1059 const PetscScalar *x,*v; 1060 PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7; 1061 PetscErrorCode ierr; 1062 const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 1063 PetscInt nonzerorow=0,n,i,jrow,j; 1064 1065 PetscFunctionBegin; 1066 ierr = VecGetArray(xx,(PetscScalar**)&x);CHKERRQ(ierr); 1067 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 1068 idx = a->j; 1069 v = a->a; 1070 ii = a->i; 1071 1072 for (i=0; i<m; i++) { 1073 jrow = ii[i]; 1074 n = ii[i+1] - jrow; 1075 sum1 = 0.0; 1076 sum2 = 0.0; 1077 sum3 = 0.0; 1078 sum4 = 0.0; 1079 sum5 = 0.0; 1080 sum6 = 0.0; 1081 sum7 = 0.0; 1082 nonzerorow += (n>0); 1083 for (j=0; j<n; j++) { 1084 sum1 += v[jrow]*x[7*idx[jrow]]; 1085 sum2 += v[jrow]*x[7*idx[jrow]+1]; 1086 sum3 += v[jrow]*x[7*idx[jrow]+2]; 1087 sum4 += v[jrow]*x[7*idx[jrow]+3]; 1088 sum5 += v[jrow]*x[7*idx[jrow]+4]; 1089 sum6 += v[jrow]*x[7*idx[jrow]+5]; 1090 sum7 += v[jrow]*x[7*idx[jrow]+6]; 1091 jrow++; 1092 } 1093 y[7*i] = sum1; 1094 y[7*i+1] = sum2; 1095 y[7*i+2] = sum3; 1096 y[7*i+3] = sum4; 1097 y[7*i+4] = sum5; 1098 y[7*i+5] = sum6; 1099 y[7*i+6] = sum7; 1100 } 1101 1102 ierr = PetscLogFlops(14.0*a->nz - 7.0*nonzerorow);CHKERRQ(ierr); 1103 ierr = VecRestoreArray(xx,(PetscScalar**)&x);CHKERRQ(ierr); 1104 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 1105 PetscFunctionReturn(0); 1106 } 1107 1108 #undef __FUNCT__ 1109 #define __FUNCT__ "MatMultTranspose_SeqMAIJ_7" 1110 PetscErrorCode MatMultTranspose_SeqMAIJ_7(Mat A,Vec xx,Vec yy) 1111 { 1112 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1113 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1114 const PetscScalar *x,*v; 1115 PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7; 1116 PetscErrorCode ierr; 1117 const PetscInt m = b->AIJ->rmap->n,*idx; 1118 PetscInt n,i; 1119 1120 PetscFunctionBegin; 1121 ierr = VecSet(yy,0.0);CHKERRQ(ierr); 1122 ierr = VecGetArray(xx,(PetscScalar**)&x);CHKERRQ(ierr); 1123 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 1124 1125 for (i=0; i<m; i++) { 1126 idx = a->j + a->i[i] ; 1127 v = a->a + a->i[i] ; 1128 n = a->i[i+1] - a->i[i]; 1129 alpha1 = x[7*i]; 1130 alpha2 = x[7*i+1]; 1131 alpha3 = x[7*i+2]; 1132 alpha4 = x[7*i+3]; 1133 alpha5 = x[7*i+4]; 1134 alpha6 = x[7*i+5]; 1135 alpha7 = x[7*i+6]; 1136 while (n-->0) { 1137 y[7*(*idx)] += alpha1*(*v); 1138 y[7*(*idx)+1] += alpha2*(*v); 1139 y[7*(*idx)+2] += alpha3*(*v); 1140 y[7*(*idx)+3] += alpha4*(*v); 1141 y[7*(*idx)+4] += alpha5*(*v); 1142 y[7*(*idx)+5] += alpha6*(*v); 1143 y[7*(*idx)+6] += alpha7*(*v); 1144 idx++; v++; 1145 } 1146 } 1147 ierr = PetscLogFlops(14.0*a->nz);CHKERRQ(ierr); 1148 ierr = VecRestoreArray(xx,(PetscScalar**)&x);CHKERRQ(ierr); 1149 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 1150 PetscFunctionReturn(0); 1151 } 1152 1153 #undef __FUNCT__ 1154 #define __FUNCT__ "MatMultAdd_SeqMAIJ_7" 1155 PetscErrorCode MatMultAdd_SeqMAIJ_7(Mat A,Vec xx,Vec yy,Vec zz) 1156 { 1157 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1158 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1159 const PetscScalar *x,*v; 1160 PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7; 1161 PetscErrorCode ierr; 1162 const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 1163 PetscInt n,i,jrow,j; 1164 1165 PetscFunctionBegin; 1166 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 1167 ierr = VecGetArray(xx,(PetscScalar**)&x);CHKERRQ(ierr); 1168 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 1169 idx = a->j; 1170 v = a->a; 1171 ii = a->i; 1172 1173 for (i=0; i<m; i++) { 1174 jrow = ii[i]; 1175 n = ii[i+1] - jrow; 1176 sum1 = 0.0; 1177 sum2 = 0.0; 1178 sum3 = 0.0; 1179 sum4 = 0.0; 1180 sum5 = 0.0; 1181 sum6 = 0.0; 1182 sum7 = 0.0; 1183 for (j=0; j<n; j++) { 1184 sum1 += v[jrow]*x[7*idx[jrow]]; 1185 sum2 += v[jrow]*x[7*idx[jrow]+1]; 1186 sum3 += v[jrow]*x[7*idx[jrow]+2]; 1187 sum4 += v[jrow]*x[7*idx[jrow]+3]; 1188 sum5 += v[jrow]*x[7*idx[jrow]+4]; 1189 sum6 += v[jrow]*x[7*idx[jrow]+5]; 1190 sum7 += v[jrow]*x[7*idx[jrow]+6]; 1191 jrow++; 1192 } 1193 y[7*i] += sum1; 1194 y[7*i+1] += sum2; 1195 y[7*i+2] += sum3; 1196 y[7*i+3] += sum4; 1197 y[7*i+4] += sum5; 1198 y[7*i+5] += sum6; 1199 y[7*i+6] += sum7; 1200 } 1201 1202 ierr = PetscLogFlops(14.0*a->nz);CHKERRQ(ierr); 1203 ierr = VecRestoreArray(xx,(PetscScalar**)&x);CHKERRQ(ierr); 1204 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 1205 PetscFunctionReturn(0); 1206 } 1207 1208 #undef __FUNCT__ 1209 #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_7" 1210 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_7(Mat A,Vec xx,Vec yy,Vec zz) 1211 { 1212 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1213 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1214 const PetscScalar *x,*v; 1215 PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7; 1216 PetscErrorCode ierr; 1217 const PetscInt m = b->AIJ->rmap->n,*idx; 1218 PetscInt n,i; 1219 1220 PetscFunctionBegin; 1221 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 1222 ierr = VecGetArray(xx,(PetscScalar**)&x);CHKERRQ(ierr); 1223 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 1224 for (i=0; i<m; i++) { 1225 idx = a->j + a->i[i] ; 1226 v = a->a + a->i[i] ; 1227 n = a->i[i+1] - a->i[i]; 1228 alpha1 = x[7*i]; 1229 alpha2 = x[7*i+1]; 1230 alpha3 = x[7*i+2]; 1231 alpha4 = x[7*i+3]; 1232 alpha5 = x[7*i+4]; 1233 alpha6 = x[7*i+5]; 1234 alpha7 = x[7*i+6]; 1235 while (n-->0) { 1236 y[7*(*idx)] += alpha1*(*v); 1237 y[7*(*idx)+1] += alpha2*(*v); 1238 y[7*(*idx)+2] += alpha3*(*v); 1239 y[7*(*idx)+3] += alpha4*(*v); 1240 y[7*(*idx)+4] += alpha5*(*v); 1241 y[7*(*idx)+5] += alpha6*(*v); 1242 y[7*(*idx)+6] += alpha7*(*v); 1243 idx++; v++; 1244 } 1245 } 1246 ierr = PetscLogFlops(14.0*a->nz);CHKERRQ(ierr); 1247 ierr = VecRestoreArray(xx,(PetscScalar**)&x);CHKERRQ(ierr); 1248 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 1249 PetscFunctionReturn(0); 1250 } 1251 1252 #undef __FUNCT__ 1253 #define __FUNCT__ "MatMult_SeqMAIJ_8" 1254 PetscErrorCode MatMult_SeqMAIJ_8(Mat A,Vec xx,Vec yy) 1255 { 1256 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1257 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1258 const PetscScalar *x,*v; 1259 PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8; 1260 PetscErrorCode ierr; 1261 const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 1262 PetscInt nonzerorow=0,n,i,jrow,j; 1263 1264 PetscFunctionBegin; 1265 ierr = VecGetArray(xx,(PetscScalar**)&x);CHKERRQ(ierr); 1266 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 1267 idx = a->j; 1268 v = a->a; 1269 ii = a->i; 1270 1271 for (i=0; i<m; i++) { 1272 jrow = ii[i]; 1273 n = ii[i+1] - jrow; 1274 sum1 = 0.0; 1275 sum2 = 0.0; 1276 sum3 = 0.0; 1277 sum4 = 0.0; 1278 sum5 = 0.0; 1279 sum6 = 0.0; 1280 sum7 = 0.0; 1281 sum8 = 0.0; 1282 nonzerorow += (n>0); 1283 for (j=0; j<n; j++) { 1284 sum1 += v[jrow]*x[8*idx[jrow]]; 1285 sum2 += v[jrow]*x[8*idx[jrow]+1]; 1286 sum3 += v[jrow]*x[8*idx[jrow]+2]; 1287 sum4 += v[jrow]*x[8*idx[jrow]+3]; 1288 sum5 += v[jrow]*x[8*idx[jrow]+4]; 1289 sum6 += v[jrow]*x[8*idx[jrow]+5]; 1290 sum7 += v[jrow]*x[8*idx[jrow]+6]; 1291 sum8 += v[jrow]*x[8*idx[jrow]+7]; 1292 jrow++; 1293 } 1294 y[8*i] = sum1; 1295 y[8*i+1] = sum2; 1296 y[8*i+2] = sum3; 1297 y[8*i+3] = sum4; 1298 y[8*i+4] = sum5; 1299 y[8*i+5] = sum6; 1300 y[8*i+6] = sum7; 1301 y[8*i+7] = sum8; 1302 } 1303 1304 ierr = PetscLogFlops(16.0*a->nz - 8.0*nonzerorow);CHKERRQ(ierr); 1305 ierr = VecRestoreArray(xx,(PetscScalar**)&x);CHKERRQ(ierr); 1306 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 1307 PetscFunctionReturn(0); 1308 } 1309 1310 #undef __FUNCT__ 1311 #define __FUNCT__ "MatMultTranspose_SeqMAIJ_8" 1312 PetscErrorCode MatMultTranspose_SeqMAIJ_8(Mat A,Vec xx,Vec yy) 1313 { 1314 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1315 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1316 const PetscScalar *x,*v; 1317 PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8; 1318 PetscErrorCode ierr; 1319 const PetscInt m = b->AIJ->rmap->n,*idx; 1320 PetscInt n,i; 1321 1322 PetscFunctionBegin; 1323 ierr = VecSet(yy,0.0);CHKERRQ(ierr); 1324 ierr = VecGetArray(xx,(PetscScalar**)&x);CHKERRQ(ierr); 1325 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 1326 1327 for (i=0; i<m; i++) { 1328 idx = a->j + a->i[i] ; 1329 v = a->a + a->i[i] ; 1330 n = a->i[i+1] - a->i[i]; 1331 alpha1 = x[8*i]; 1332 alpha2 = x[8*i+1]; 1333 alpha3 = x[8*i+2]; 1334 alpha4 = x[8*i+3]; 1335 alpha5 = x[8*i+4]; 1336 alpha6 = x[8*i+5]; 1337 alpha7 = x[8*i+6]; 1338 alpha8 = x[8*i+7]; 1339 while (n-->0) { 1340 y[8*(*idx)] += alpha1*(*v); 1341 y[8*(*idx)+1] += alpha2*(*v); 1342 y[8*(*idx)+2] += alpha3*(*v); 1343 y[8*(*idx)+3] += alpha4*(*v); 1344 y[8*(*idx)+4] += alpha5*(*v); 1345 y[8*(*idx)+5] += alpha6*(*v); 1346 y[8*(*idx)+6] += alpha7*(*v); 1347 y[8*(*idx)+7] += alpha8*(*v); 1348 idx++; v++; 1349 } 1350 } 1351 ierr = PetscLogFlops(16.0*a->nz);CHKERRQ(ierr); 1352 ierr = VecRestoreArray(xx,(PetscScalar**)&x);CHKERRQ(ierr); 1353 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 1354 PetscFunctionReturn(0); 1355 } 1356 1357 #undef __FUNCT__ 1358 #define __FUNCT__ "MatMultAdd_SeqMAIJ_8" 1359 PetscErrorCode MatMultAdd_SeqMAIJ_8(Mat A,Vec xx,Vec yy,Vec zz) 1360 { 1361 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1362 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1363 const PetscScalar *x,*v; 1364 PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8; 1365 PetscErrorCode ierr; 1366 const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 1367 PetscInt n,i,jrow,j; 1368 1369 PetscFunctionBegin; 1370 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 1371 ierr = VecGetArray(xx,(PetscScalar**)&x);CHKERRQ(ierr); 1372 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 1373 idx = a->j; 1374 v = a->a; 1375 ii = a->i; 1376 1377 for (i=0; i<m; i++) { 1378 jrow = ii[i]; 1379 n = ii[i+1] - jrow; 1380 sum1 = 0.0; 1381 sum2 = 0.0; 1382 sum3 = 0.0; 1383 sum4 = 0.0; 1384 sum5 = 0.0; 1385 sum6 = 0.0; 1386 sum7 = 0.0; 1387 sum8 = 0.0; 1388 for (j=0; j<n; j++) { 1389 sum1 += v[jrow]*x[8*idx[jrow]]; 1390 sum2 += v[jrow]*x[8*idx[jrow]+1]; 1391 sum3 += v[jrow]*x[8*idx[jrow]+2]; 1392 sum4 += v[jrow]*x[8*idx[jrow]+3]; 1393 sum5 += v[jrow]*x[8*idx[jrow]+4]; 1394 sum6 += v[jrow]*x[8*idx[jrow]+5]; 1395 sum7 += v[jrow]*x[8*idx[jrow]+6]; 1396 sum8 += v[jrow]*x[8*idx[jrow]+7]; 1397 jrow++; 1398 } 1399 y[8*i] += sum1; 1400 y[8*i+1] += sum2; 1401 y[8*i+2] += sum3; 1402 y[8*i+3] += sum4; 1403 y[8*i+4] += sum5; 1404 y[8*i+5] += sum6; 1405 y[8*i+6] += sum7; 1406 y[8*i+7] += sum8; 1407 } 1408 1409 ierr = PetscLogFlops(16.0*a->nz);CHKERRQ(ierr); 1410 ierr = VecRestoreArray(xx,(PetscScalar**)&x);CHKERRQ(ierr); 1411 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 1412 PetscFunctionReturn(0); 1413 } 1414 1415 #undef __FUNCT__ 1416 #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_8" 1417 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_8(Mat A,Vec xx,Vec yy,Vec zz) 1418 { 1419 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1420 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1421 const PetscScalar *x,*v; 1422 PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8; 1423 PetscErrorCode ierr; 1424 const PetscInt m = b->AIJ->rmap->n,*idx; 1425 PetscInt n,i; 1426 1427 PetscFunctionBegin; 1428 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 1429 ierr = VecGetArray(xx,(PetscScalar**)&x);CHKERRQ(ierr); 1430 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 1431 for (i=0; i<m; i++) { 1432 idx = a->j + a->i[i] ; 1433 v = a->a + a->i[i] ; 1434 n = a->i[i+1] - a->i[i]; 1435 alpha1 = x[8*i]; 1436 alpha2 = x[8*i+1]; 1437 alpha3 = x[8*i+2]; 1438 alpha4 = x[8*i+3]; 1439 alpha5 = x[8*i+4]; 1440 alpha6 = x[8*i+5]; 1441 alpha7 = x[8*i+6]; 1442 alpha8 = x[8*i+7]; 1443 while (n-->0) { 1444 y[8*(*idx)] += alpha1*(*v); 1445 y[8*(*idx)+1] += alpha2*(*v); 1446 y[8*(*idx)+2] += alpha3*(*v); 1447 y[8*(*idx)+3] += alpha4*(*v); 1448 y[8*(*idx)+4] += alpha5*(*v); 1449 y[8*(*idx)+5] += alpha6*(*v); 1450 y[8*(*idx)+6] += alpha7*(*v); 1451 y[8*(*idx)+7] += alpha8*(*v); 1452 idx++; v++; 1453 } 1454 } 1455 ierr = PetscLogFlops(16.0*a->nz);CHKERRQ(ierr); 1456 ierr = VecRestoreArray(xx,(PetscScalar**)&x);CHKERRQ(ierr); 1457 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 1458 PetscFunctionReturn(0); 1459 } 1460 1461 /* ------------------------------------------------------------------------------*/ 1462 #undef __FUNCT__ 1463 #define __FUNCT__ "MatMult_SeqMAIJ_9" 1464 PetscErrorCode MatMult_SeqMAIJ_9(Mat A,Vec xx,Vec yy) 1465 { 1466 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1467 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1468 const PetscScalar *x,*v; 1469 PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9; 1470 PetscErrorCode ierr; 1471 const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 1472 PetscInt nonzerorow=0,n,i,jrow,j; 1473 1474 PetscFunctionBegin; 1475 ierr = VecGetArray(xx,(PetscScalar**)&x);CHKERRQ(ierr); 1476 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 1477 idx = a->j; 1478 v = a->a; 1479 ii = a->i; 1480 1481 for (i=0; i<m; i++) { 1482 jrow = ii[i]; 1483 n = ii[i+1] - jrow; 1484 sum1 = 0.0; 1485 sum2 = 0.0; 1486 sum3 = 0.0; 1487 sum4 = 0.0; 1488 sum5 = 0.0; 1489 sum6 = 0.0; 1490 sum7 = 0.0; 1491 sum8 = 0.0; 1492 sum9 = 0.0; 1493 nonzerorow += (n>0); 1494 for (j=0; j<n; j++) { 1495 sum1 += v[jrow]*x[9*idx[jrow]]; 1496 sum2 += v[jrow]*x[9*idx[jrow]+1]; 1497 sum3 += v[jrow]*x[9*idx[jrow]+2]; 1498 sum4 += v[jrow]*x[9*idx[jrow]+3]; 1499 sum5 += v[jrow]*x[9*idx[jrow]+4]; 1500 sum6 += v[jrow]*x[9*idx[jrow]+5]; 1501 sum7 += v[jrow]*x[9*idx[jrow]+6]; 1502 sum8 += v[jrow]*x[9*idx[jrow]+7]; 1503 sum9 += v[jrow]*x[9*idx[jrow]+8]; 1504 jrow++; 1505 } 1506 y[9*i] = sum1; 1507 y[9*i+1] = sum2; 1508 y[9*i+2] = sum3; 1509 y[9*i+3] = sum4; 1510 y[9*i+4] = sum5; 1511 y[9*i+5] = sum6; 1512 y[9*i+6] = sum7; 1513 y[9*i+7] = sum8; 1514 y[9*i+8] = sum9; 1515 } 1516 1517 ierr = PetscLogFlops(18.0*a->nz - 9*nonzerorow);CHKERRQ(ierr); 1518 ierr = VecRestoreArray(xx,(PetscScalar**)&x);CHKERRQ(ierr); 1519 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 1520 PetscFunctionReturn(0); 1521 } 1522 1523 /* ------------------------------------------------------------------------------*/ 1524 1525 #undef __FUNCT__ 1526 #define __FUNCT__ "MatMultTranspose_SeqMAIJ_9" 1527 PetscErrorCode MatMultTranspose_SeqMAIJ_9(Mat A,Vec xx,Vec yy) 1528 { 1529 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1530 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1531 const PetscScalar *x,*v; 1532 PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9; 1533 PetscErrorCode ierr; 1534 const PetscInt m = b->AIJ->rmap->n,*idx; 1535 PetscInt n,i; 1536 1537 PetscFunctionBegin; 1538 ierr = VecSet(yy,0.0);CHKERRQ(ierr); 1539 ierr = VecGetArray(xx,(PetscScalar**)&x);CHKERRQ(ierr); 1540 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 1541 1542 for (i=0; i<m; i++) { 1543 idx = a->j + a->i[i] ; 1544 v = a->a + a->i[i] ; 1545 n = a->i[i+1] - a->i[i]; 1546 alpha1 = x[9*i]; 1547 alpha2 = x[9*i+1]; 1548 alpha3 = x[9*i+2]; 1549 alpha4 = x[9*i+3]; 1550 alpha5 = x[9*i+4]; 1551 alpha6 = x[9*i+5]; 1552 alpha7 = x[9*i+6]; 1553 alpha8 = x[9*i+7]; 1554 alpha9 = x[9*i+8]; 1555 while (n-->0) { 1556 y[9*(*idx)] += alpha1*(*v); 1557 y[9*(*idx)+1] += alpha2*(*v); 1558 y[9*(*idx)+2] += alpha3*(*v); 1559 y[9*(*idx)+3] += alpha4*(*v); 1560 y[9*(*idx)+4] += alpha5*(*v); 1561 y[9*(*idx)+5] += alpha6*(*v); 1562 y[9*(*idx)+6] += alpha7*(*v); 1563 y[9*(*idx)+7] += alpha8*(*v); 1564 y[9*(*idx)+8] += alpha9*(*v); 1565 idx++; v++; 1566 } 1567 } 1568 ierr = PetscLogFlops(18.0*a->nz);CHKERRQ(ierr); 1569 ierr = VecRestoreArray(xx,(PetscScalar**)&x);CHKERRQ(ierr); 1570 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 1571 PetscFunctionReturn(0); 1572 } 1573 1574 #undef __FUNCT__ 1575 #define __FUNCT__ "MatMultAdd_SeqMAIJ_9" 1576 PetscErrorCode MatMultAdd_SeqMAIJ_9(Mat A,Vec xx,Vec yy,Vec zz) 1577 { 1578 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1579 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1580 const PetscScalar *x,*v; 1581 PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9; 1582 PetscErrorCode ierr; 1583 const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 1584 PetscInt n,i,jrow,j; 1585 1586 PetscFunctionBegin; 1587 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 1588 ierr = VecGetArray(xx,(PetscScalar**)&x);CHKERRQ(ierr); 1589 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 1590 idx = a->j; 1591 v = a->a; 1592 ii = a->i; 1593 1594 for (i=0; i<m; i++) { 1595 jrow = ii[i]; 1596 n = ii[i+1] - jrow; 1597 sum1 = 0.0; 1598 sum2 = 0.0; 1599 sum3 = 0.0; 1600 sum4 = 0.0; 1601 sum5 = 0.0; 1602 sum6 = 0.0; 1603 sum7 = 0.0; 1604 sum8 = 0.0; 1605 sum9 = 0.0; 1606 for (j=0; j<n; j++) { 1607 sum1 += v[jrow]*x[9*idx[jrow]]; 1608 sum2 += v[jrow]*x[9*idx[jrow]+1]; 1609 sum3 += v[jrow]*x[9*idx[jrow]+2]; 1610 sum4 += v[jrow]*x[9*idx[jrow]+3]; 1611 sum5 += v[jrow]*x[9*idx[jrow]+4]; 1612 sum6 += v[jrow]*x[9*idx[jrow]+5]; 1613 sum7 += v[jrow]*x[9*idx[jrow]+6]; 1614 sum8 += v[jrow]*x[9*idx[jrow]+7]; 1615 sum9 += v[jrow]*x[9*idx[jrow]+8]; 1616 jrow++; 1617 } 1618 y[9*i] += sum1; 1619 y[9*i+1] += sum2; 1620 y[9*i+2] += sum3; 1621 y[9*i+3] += sum4; 1622 y[9*i+4] += sum5; 1623 y[9*i+5] += sum6; 1624 y[9*i+6] += sum7; 1625 y[9*i+7] += sum8; 1626 y[9*i+8] += sum9; 1627 } 1628 1629 ierr = PetscLogFlops(18*a->nz);CHKERRQ(ierr); 1630 ierr = VecRestoreArray(xx,(PetscScalar**)&x);CHKERRQ(ierr); 1631 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 1632 PetscFunctionReturn(0); 1633 } 1634 1635 #undef __FUNCT__ 1636 #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_9" 1637 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_9(Mat A,Vec xx,Vec yy,Vec zz) 1638 { 1639 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1640 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1641 const PetscScalar *x,*v; 1642 PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9; 1643 PetscErrorCode ierr; 1644 const PetscInt m = b->AIJ->rmap->n,*idx; 1645 PetscInt n,i; 1646 1647 PetscFunctionBegin; 1648 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 1649 ierr = VecGetArray(xx,(PetscScalar**)&x);CHKERRQ(ierr); 1650 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 1651 for (i=0; i<m; i++) { 1652 idx = a->j + a->i[i] ; 1653 v = a->a + a->i[i] ; 1654 n = a->i[i+1] - a->i[i]; 1655 alpha1 = x[9*i]; 1656 alpha2 = x[9*i+1]; 1657 alpha3 = x[9*i+2]; 1658 alpha4 = x[9*i+3]; 1659 alpha5 = x[9*i+4]; 1660 alpha6 = x[9*i+5]; 1661 alpha7 = x[9*i+6]; 1662 alpha8 = x[9*i+7]; 1663 alpha9 = x[9*i+8]; 1664 while (n-->0) { 1665 y[9*(*idx)] += alpha1*(*v); 1666 y[9*(*idx)+1] += alpha2*(*v); 1667 y[9*(*idx)+2] += alpha3*(*v); 1668 y[9*(*idx)+3] += alpha4*(*v); 1669 y[9*(*idx)+4] += alpha5*(*v); 1670 y[9*(*idx)+5] += alpha6*(*v); 1671 y[9*(*idx)+6] += alpha7*(*v); 1672 y[9*(*idx)+7] += alpha8*(*v); 1673 y[9*(*idx)+8] += alpha9*(*v); 1674 idx++; v++; 1675 } 1676 } 1677 ierr = PetscLogFlops(18.0*a->nz);CHKERRQ(ierr); 1678 ierr = VecRestoreArray(xx,(PetscScalar**)&x);CHKERRQ(ierr); 1679 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 1680 PetscFunctionReturn(0); 1681 } 1682 #undef __FUNCT__ 1683 #define __FUNCT__ "MatMult_SeqMAIJ_10" 1684 PetscErrorCode MatMult_SeqMAIJ_10(Mat A,Vec xx,Vec yy) 1685 { 1686 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1687 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1688 const PetscScalar *x,*v; 1689 PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9, sum10; 1690 PetscErrorCode ierr; 1691 const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 1692 PetscInt nonzerorow=0,n,i,jrow,j; 1693 1694 PetscFunctionBegin; 1695 ierr = VecGetArray(xx,(PetscScalar**)&x);CHKERRQ(ierr); 1696 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 1697 idx = a->j; 1698 v = a->a; 1699 ii = a->i; 1700 1701 for (i=0; i<m; i++) { 1702 jrow = ii[i]; 1703 n = ii[i+1] - jrow; 1704 sum1 = 0.0; 1705 sum2 = 0.0; 1706 sum3 = 0.0; 1707 sum4 = 0.0; 1708 sum5 = 0.0; 1709 sum6 = 0.0; 1710 sum7 = 0.0; 1711 sum8 = 0.0; 1712 sum9 = 0.0; 1713 sum10 = 0.0; 1714 nonzerorow += (n>0); 1715 for (j=0; j<n; j++) { 1716 sum1 += v[jrow]*x[10*idx[jrow]]; 1717 sum2 += v[jrow]*x[10*idx[jrow]+1]; 1718 sum3 += v[jrow]*x[10*idx[jrow]+2]; 1719 sum4 += v[jrow]*x[10*idx[jrow]+3]; 1720 sum5 += v[jrow]*x[10*idx[jrow]+4]; 1721 sum6 += v[jrow]*x[10*idx[jrow]+5]; 1722 sum7 += v[jrow]*x[10*idx[jrow]+6]; 1723 sum8 += v[jrow]*x[10*idx[jrow]+7]; 1724 sum9 += v[jrow]*x[10*idx[jrow]+8]; 1725 sum10 += v[jrow]*x[10*idx[jrow]+9]; 1726 jrow++; 1727 } 1728 y[10*i] = sum1; 1729 y[10*i+1] = sum2; 1730 y[10*i+2] = sum3; 1731 y[10*i+3] = sum4; 1732 y[10*i+4] = sum5; 1733 y[10*i+5] = sum6; 1734 y[10*i+6] = sum7; 1735 y[10*i+7] = sum8; 1736 y[10*i+8] = sum9; 1737 y[10*i+9] = sum10; 1738 } 1739 1740 ierr = PetscLogFlops(20.0*a->nz - 10.0*nonzerorow);CHKERRQ(ierr); 1741 ierr = VecRestoreArray(xx,(PetscScalar**)&x);CHKERRQ(ierr); 1742 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 1743 PetscFunctionReturn(0); 1744 } 1745 1746 #undef __FUNCT__ 1747 #define __FUNCT__ "MatMultAdd_SeqMAIJ_10" 1748 PetscErrorCode MatMultAdd_SeqMAIJ_10(Mat A,Vec xx,Vec yy,Vec zz) 1749 { 1750 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1751 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1752 const PetscScalar *x,*v; 1753 PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9, sum10; 1754 PetscErrorCode ierr; 1755 const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 1756 PetscInt n,i,jrow,j; 1757 1758 PetscFunctionBegin; 1759 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 1760 ierr = VecGetArray(xx,(PetscScalar**)&x);CHKERRQ(ierr); 1761 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 1762 idx = a->j; 1763 v = a->a; 1764 ii = a->i; 1765 1766 for (i=0; i<m; i++) { 1767 jrow = ii[i]; 1768 n = ii[i+1] - jrow; 1769 sum1 = 0.0; 1770 sum2 = 0.0; 1771 sum3 = 0.0; 1772 sum4 = 0.0; 1773 sum5 = 0.0; 1774 sum6 = 0.0; 1775 sum7 = 0.0; 1776 sum8 = 0.0; 1777 sum9 = 0.0; 1778 sum10 = 0.0; 1779 for (j=0; j<n; j++) { 1780 sum1 += v[jrow]*x[10*idx[jrow]]; 1781 sum2 += v[jrow]*x[10*idx[jrow]+1]; 1782 sum3 += v[jrow]*x[10*idx[jrow]+2]; 1783 sum4 += v[jrow]*x[10*idx[jrow]+3]; 1784 sum5 += v[jrow]*x[10*idx[jrow]+4]; 1785 sum6 += v[jrow]*x[10*idx[jrow]+5]; 1786 sum7 += v[jrow]*x[10*idx[jrow]+6]; 1787 sum8 += v[jrow]*x[10*idx[jrow]+7]; 1788 sum9 += v[jrow]*x[10*idx[jrow]+8]; 1789 sum10 += v[jrow]*x[10*idx[jrow]+9]; 1790 jrow++; 1791 } 1792 y[10*i] += sum1; 1793 y[10*i+1] += sum2; 1794 y[10*i+2] += sum3; 1795 y[10*i+3] += sum4; 1796 y[10*i+4] += sum5; 1797 y[10*i+5] += sum6; 1798 y[10*i+6] += sum7; 1799 y[10*i+7] += sum8; 1800 y[10*i+8] += sum9; 1801 y[10*i+9] += sum10; 1802 } 1803 1804 ierr = PetscLogFlops(20.0*a->nz);CHKERRQ(ierr); 1805 ierr = VecRestoreArray(xx,(PetscScalar**)&x);CHKERRQ(ierr); 1806 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 1807 PetscFunctionReturn(0); 1808 } 1809 1810 #undef __FUNCT__ 1811 #define __FUNCT__ "MatMultTranspose_SeqMAIJ_10" 1812 PetscErrorCode MatMultTranspose_SeqMAIJ_10(Mat A,Vec xx,Vec yy) 1813 { 1814 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1815 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1816 const PetscScalar *x,*v; 1817 PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9,alpha10; 1818 PetscErrorCode ierr; 1819 const PetscInt m = b->AIJ->rmap->n,*idx; 1820 PetscInt n,i; 1821 1822 PetscFunctionBegin; 1823 ierr = VecSet(yy,0.0);CHKERRQ(ierr); 1824 ierr = VecGetArray(xx,(PetscScalar**)&x);CHKERRQ(ierr); 1825 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 1826 1827 for (i=0; i<m; i++) { 1828 idx = a->j + a->i[i] ; 1829 v = a->a + a->i[i] ; 1830 n = a->i[i+1] - a->i[i]; 1831 alpha1 = x[10*i]; 1832 alpha2 = x[10*i+1]; 1833 alpha3 = x[10*i+2]; 1834 alpha4 = x[10*i+3]; 1835 alpha5 = x[10*i+4]; 1836 alpha6 = x[10*i+5]; 1837 alpha7 = x[10*i+6]; 1838 alpha8 = x[10*i+7]; 1839 alpha9 = x[10*i+8]; 1840 alpha10 = x[10*i+9]; 1841 while (n-->0) { 1842 y[10*(*idx)] += alpha1*(*v); 1843 y[10*(*idx)+1] += alpha2*(*v); 1844 y[10*(*idx)+2] += alpha3*(*v); 1845 y[10*(*idx)+3] += alpha4*(*v); 1846 y[10*(*idx)+4] += alpha5*(*v); 1847 y[10*(*idx)+5] += alpha6*(*v); 1848 y[10*(*idx)+6] += alpha7*(*v); 1849 y[10*(*idx)+7] += alpha8*(*v); 1850 y[10*(*idx)+8] += alpha9*(*v); 1851 y[10*(*idx)+9] += alpha10*(*v); 1852 idx++; v++; 1853 } 1854 } 1855 ierr = PetscLogFlops(20.0*a->nz);CHKERRQ(ierr); 1856 ierr = VecRestoreArray(xx,(PetscScalar**)&x);CHKERRQ(ierr); 1857 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 1858 PetscFunctionReturn(0); 1859 } 1860 1861 #undef __FUNCT__ 1862 #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_10" 1863 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_10(Mat A,Vec xx,Vec yy,Vec zz) 1864 { 1865 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1866 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1867 const PetscScalar *x,*v; 1868 PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9,alpha10; 1869 PetscErrorCode ierr; 1870 const PetscInt m = b->AIJ->rmap->n,*idx; 1871 PetscInt n,i; 1872 1873 PetscFunctionBegin; 1874 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 1875 ierr = VecGetArray(xx,(PetscScalar**)&x);CHKERRQ(ierr); 1876 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 1877 for (i=0; i<m; i++) { 1878 idx = a->j + a->i[i] ; 1879 v = a->a + a->i[i] ; 1880 n = a->i[i+1] - a->i[i]; 1881 alpha1 = x[10*i]; 1882 alpha2 = x[10*i+1]; 1883 alpha3 = x[10*i+2]; 1884 alpha4 = x[10*i+3]; 1885 alpha5 = x[10*i+4]; 1886 alpha6 = x[10*i+5]; 1887 alpha7 = x[10*i+6]; 1888 alpha8 = x[10*i+7]; 1889 alpha9 = x[10*i+8]; 1890 alpha10 = x[10*i+9]; 1891 while (n-->0) { 1892 y[10*(*idx)] += alpha1*(*v); 1893 y[10*(*idx)+1] += alpha2*(*v); 1894 y[10*(*idx)+2] += alpha3*(*v); 1895 y[10*(*idx)+3] += alpha4*(*v); 1896 y[10*(*idx)+4] += alpha5*(*v); 1897 y[10*(*idx)+5] += alpha6*(*v); 1898 y[10*(*idx)+6] += alpha7*(*v); 1899 y[10*(*idx)+7] += alpha8*(*v); 1900 y[10*(*idx)+8] += alpha9*(*v); 1901 y[10*(*idx)+9] += alpha10*(*v); 1902 idx++; v++; 1903 } 1904 } 1905 ierr = PetscLogFlops(20.0*a->nz);CHKERRQ(ierr); 1906 ierr = VecRestoreArray(xx,(PetscScalar**)&x);CHKERRQ(ierr); 1907 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 1908 PetscFunctionReturn(0); 1909 } 1910 1911 1912 /*--------------------------------------------------------------------------------------------*/ 1913 #undef __FUNCT__ 1914 #define __FUNCT__ "MatMult_SeqMAIJ_11" 1915 PetscErrorCode MatMult_SeqMAIJ_11(Mat A,Vec xx,Vec yy) 1916 { 1917 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1918 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1919 const PetscScalar *x,*v; 1920 PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9, sum10, sum11; 1921 PetscErrorCode ierr; 1922 const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 1923 PetscInt nonzerorow=0,n,i,jrow,j; 1924 1925 PetscFunctionBegin; 1926 ierr = VecGetArray(xx,(PetscScalar**)&x);CHKERRQ(ierr); 1927 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 1928 idx = a->j; 1929 v = a->a; 1930 ii = a->i; 1931 1932 for (i=0; i<m; i++) { 1933 jrow = ii[i]; 1934 n = ii[i+1] - jrow; 1935 sum1 = 0.0; 1936 sum2 = 0.0; 1937 sum3 = 0.0; 1938 sum4 = 0.0; 1939 sum5 = 0.0; 1940 sum6 = 0.0; 1941 sum7 = 0.0; 1942 sum8 = 0.0; 1943 sum9 = 0.0; 1944 sum10 = 0.0; 1945 sum11 = 0.0; 1946 nonzerorow += (n>0); 1947 for (j=0; j<n; j++) { 1948 sum1 += v[jrow]*x[11*idx[jrow]]; 1949 sum2 += v[jrow]*x[11*idx[jrow]+1]; 1950 sum3 += v[jrow]*x[11*idx[jrow]+2]; 1951 sum4 += v[jrow]*x[11*idx[jrow]+3]; 1952 sum5 += v[jrow]*x[11*idx[jrow]+4]; 1953 sum6 += v[jrow]*x[11*idx[jrow]+5]; 1954 sum7 += v[jrow]*x[11*idx[jrow]+6]; 1955 sum8 += v[jrow]*x[11*idx[jrow]+7]; 1956 sum9 += v[jrow]*x[11*idx[jrow]+8]; 1957 sum10 += v[jrow]*x[11*idx[jrow]+9]; 1958 sum11 += v[jrow]*x[11*idx[jrow]+10]; 1959 jrow++; 1960 } 1961 y[11*i] = sum1; 1962 y[11*i+1] = sum2; 1963 y[11*i+2] = sum3; 1964 y[11*i+3] = sum4; 1965 y[11*i+4] = sum5; 1966 y[11*i+5] = sum6; 1967 y[11*i+6] = sum7; 1968 y[11*i+7] = sum8; 1969 y[11*i+8] = sum9; 1970 y[11*i+9] = sum10; 1971 y[11*i+10] = sum11; 1972 } 1973 1974 ierr = PetscLogFlops(22*a->nz - 11*nonzerorow);CHKERRQ(ierr); 1975 ierr = VecRestoreArray(xx,(PetscScalar**)&x);CHKERRQ(ierr); 1976 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 1977 PetscFunctionReturn(0); 1978 } 1979 1980 #undef __FUNCT__ 1981 #define __FUNCT__ "MatMultAdd_SeqMAIJ_11" 1982 PetscErrorCode MatMultAdd_SeqMAIJ_11(Mat A,Vec xx,Vec yy,Vec zz) 1983 { 1984 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1985 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1986 const PetscScalar *x,*v; 1987 PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9, sum10, sum11; 1988 PetscErrorCode ierr; 1989 const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 1990 PetscInt n,i,jrow,j; 1991 1992 PetscFunctionBegin; 1993 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 1994 ierr = VecGetArray(xx,(PetscScalar**)&x);CHKERRQ(ierr); 1995 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 1996 idx = a->j; 1997 v = a->a; 1998 ii = a->i; 1999 2000 for (i=0; i<m; i++) { 2001 jrow = ii[i]; 2002 n = ii[i+1] - jrow; 2003 sum1 = 0.0; 2004 sum2 = 0.0; 2005 sum3 = 0.0; 2006 sum4 = 0.0; 2007 sum5 = 0.0; 2008 sum6 = 0.0; 2009 sum7 = 0.0; 2010 sum8 = 0.0; 2011 sum9 = 0.0; 2012 sum10 = 0.0; 2013 sum11 = 0.0; 2014 for (j=0; j<n; j++) { 2015 sum1 += v[jrow]*x[11*idx[jrow]]; 2016 sum2 += v[jrow]*x[11*idx[jrow]+1]; 2017 sum3 += v[jrow]*x[11*idx[jrow]+2]; 2018 sum4 += v[jrow]*x[11*idx[jrow]+3]; 2019 sum5 += v[jrow]*x[11*idx[jrow]+4]; 2020 sum6 += v[jrow]*x[11*idx[jrow]+5]; 2021 sum7 += v[jrow]*x[11*idx[jrow]+6]; 2022 sum8 += v[jrow]*x[11*idx[jrow]+7]; 2023 sum9 += v[jrow]*x[11*idx[jrow]+8]; 2024 sum10 += v[jrow]*x[11*idx[jrow]+9]; 2025 sum11 += v[jrow]*x[11*idx[jrow]+10]; 2026 jrow++; 2027 } 2028 y[11*i] += sum1; 2029 y[11*i+1] += sum2; 2030 y[11*i+2] += sum3; 2031 y[11*i+3] += sum4; 2032 y[11*i+4] += sum5; 2033 y[11*i+5] += sum6; 2034 y[11*i+6] += sum7; 2035 y[11*i+7] += sum8; 2036 y[11*i+8] += sum9; 2037 y[11*i+9] += sum10; 2038 y[11*i+10] += sum11; 2039 } 2040 2041 ierr = PetscLogFlops(22*a->nz);CHKERRQ(ierr); 2042 ierr = VecRestoreArray(xx,(PetscScalar**)&x);CHKERRQ(ierr); 2043 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 2044 PetscFunctionReturn(0); 2045 } 2046 2047 #undef __FUNCT__ 2048 #define __FUNCT__ "MatMultTranspose_SeqMAIJ_11" 2049 PetscErrorCode MatMultTranspose_SeqMAIJ_11(Mat A,Vec xx,Vec yy) 2050 { 2051 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 2052 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 2053 const PetscScalar *x,*v; 2054 PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9,alpha10,alpha11; 2055 PetscErrorCode ierr; 2056 const PetscInt m = b->AIJ->rmap->n,*idx; 2057 PetscInt n,i; 2058 2059 PetscFunctionBegin; 2060 ierr = VecSet(yy,0.0);CHKERRQ(ierr); 2061 ierr = VecGetArray(xx,(PetscScalar**)&x);CHKERRQ(ierr); 2062 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 2063 2064 for (i=0; i<m; i++) { 2065 idx = a->j + a->i[i] ; 2066 v = a->a + a->i[i] ; 2067 n = a->i[i+1] - a->i[i]; 2068 alpha1 = x[11*i]; 2069 alpha2 = x[11*i+1]; 2070 alpha3 = x[11*i+2]; 2071 alpha4 = x[11*i+3]; 2072 alpha5 = x[11*i+4]; 2073 alpha6 = x[11*i+5]; 2074 alpha7 = x[11*i+6]; 2075 alpha8 = x[11*i+7]; 2076 alpha9 = x[11*i+8]; 2077 alpha10 = x[11*i+9]; 2078 alpha11 = x[11*i+10]; 2079 while (n-->0) { 2080 y[11*(*idx)] += alpha1*(*v); 2081 y[11*(*idx)+1] += alpha2*(*v); 2082 y[11*(*idx)+2] += alpha3*(*v); 2083 y[11*(*idx)+3] += alpha4*(*v); 2084 y[11*(*idx)+4] += alpha5*(*v); 2085 y[11*(*idx)+5] += alpha6*(*v); 2086 y[11*(*idx)+6] += alpha7*(*v); 2087 y[11*(*idx)+7] += alpha8*(*v); 2088 y[11*(*idx)+8] += alpha9*(*v); 2089 y[11*(*idx)+9] += alpha10*(*v); 2090 y[11*(*idx)+10] += alpha11*(*v); 2091 idx++; v++; 2092 } 2093 } 2094 ierr = PetscLogFlops(22*a->nz);CHKERRQ(ierr); 2095 ierr = VecRestoreArray(xx,(PetscScalar**)&x);CHKERRQ(ierr); 2096 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 2097 PetscFunctionReturn(0); 2098 } 2099 2100 #undef __FUNCT__ 2101 #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_11" 2102 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_11(Mat A,Vec xx,Vec yy,Vec zz) 2103 { 2104 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 2105 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 2106 const PetscScalar *x,*v; 2107 PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9,alpha10,alpha11; 2108 PetscErrorCode ierr; 2109 const PetscInt m = b->AIJ->rmap->n,*idx; 2110 PetscInt n,i; 2111 2112 PetscFunctionBegin; 2113 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 2114 ierr = VecGetArray(xx,(PetscScalar**)&x);CHKERRQ(ierr); 2115 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 2116 for (i=0; i<m; i++) { 2117 idx = a->j + a->i[i] ; 2118 v = a->a + a->i[i] ; 2119 n = a->i[i+1] - a->i[i]; 2120 alpha1 = x[11*i]; 2121 alpha2 = x[11*i+1]; 2122 alpha3 = x[11*i+2]; 2123 alpha4 = x[11*i+3]; 2124 alpha5 = x[11*i+4]; 2125 alpha6 = x[11*i+5]; 2126 alpha7 = x[11*i+6]; 2127 alpha8 = x[11*i+7]; 2128 alpha9 = x[11*i+8]; 2129 alpha10 = x[11*i+9]; 2130 alpha11 = x[11*i+10]; 2131 while (n-->0) { 2132 y[11*(*idx)] += alpha1*(*v); 2133 y[11*(*idx)+1] += alpha2*(*v); 2134 y[11*(*idx)+2] += alpha3*(*v); 2135 y[11*(*idx)+3] += alpha4*(*v); 2136 y[11*(*idx)+4] += alpha5*(*v); 2137 y[11*(*idx)+5] += alpha6*(*v); 2138 y[11*(*idx)+6] += alpha7*(*v); 2139 y[11*(*idx)+7] += alpha8*(*v); 2140 y[11*(*idx)+8] += alpha9*(*v); 2141 y[11*(*idx)+9] += alpha10*(*v); 2142 y[11*(*idx)+10] += alpha11*(*v); 2143 idx++; v++; 2144 } 2145 } 2146 ierr = PetscLogFlops(22*a->nz);CHKERRQ(ierr); 2147 ierr = VecRestoreArray(xx,(PetscScalar**)&x);CHKERRQ(ierr); 2148 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 2149 PetscFunctionReturn(0); 2150 } 2151 2152 2153 /*--------------------------------------------------------------------------------------------*/ 2154 #undef __FUNCT__ 2155 #define __FUNCT__ "MatMult_SeqMAIJ_16" 2156 PetscErrorCode MatMult_SeqMAIJ_16(Mat A,Vec xx,Vec yy) 2157 { 2158 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 2159 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 2160 const PetscScalar *x,*v; 2161 PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8; 2162 PetscScalar sum9, sum10, sum11, sum12, sum13, sum14, sum15, sum16; 2163 PetscErrorCode ierr; 2164 const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 2165 PetscInt nonzerorow=0,n,i,jrow,j; 2166 2167 PetscFunctionBegin; 2168 ierr = VecGetArray(xx,(PetscScalar**)&x);CHKERRQ(ierr); 2169 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 2170 idx = a->j; 2171 v = a->a; 2172 ii = a->i; 2173 2174 for (i=0; i<m; i++) { 2175 jrow = ii[i]; 2176 n = ii[i+1] - jrow; 2177 sum1 = 0.0; 2178 sum2 = 0.0; 2179 sum3 = 0.0; 2180 sum4 = 0.0; 2181 sum5 = 0.0; 2182 sum6 = 0.0; 2183 sum7 = 0.0; 2184 sum8 = 0.0; 2185 sum9 = 0.0; 2186 sum10 = 0.0; 2187 sum11 = 0.0; 2188 sum12 = 0.0; 2189 sum13 = 0.0; 2190 sum14 = 0.0; 2191 sum15 = 0.0; 2192 sum16 = 0.0; 2193 nonzerorow += (n>0); 2194 for (j=0; j<n; j++) { 2195 sum1 += v[jrow]*x[16*idx[jrow]]; 2196 sum2 += v[jrow]*x[16*idx[jrow]+1]; 2197 sum3 += v[jrow]*x[16*idx[jrow]+2]; 2198 sum4 += v[jrow]*x[16*idx[jrow]+3]; 2199 sum5 += v[jrow]*x[16*idx[jrow]+4]; 2200 sum6 += v[jrow]*x[16*idx[jrow]+5]; 2201 sum7 += v[jrow]*x[16*idx[jrow]+6]; 2202 sum8 += v[jrow]*x[16*idx[jrow]+7]; 2203 sum9 += v[jrow]*x[16*idx[jrow]+8]; 2204 sum10 += v[jrow]*x[16*idx[jrow]+9]; 2205 sum11 += v[jrow]*x[16*idx[jrow]+10]; 2206 sum12 += v[jrow]*x[16*idx[jrow]+11]; 2207 sum13 += v[jrow]*x[16*idx[jrow]+12]; 2208 sum14 += v[jrow]*x[16*idx[jrow]+13]; 2209 sum15 += v[jrow]*x[16*idx[jrow]+14]; 2210 sum16 += v[jrow]*x[16*idx[jrow]+15]; 2211 jrow++; 2212 } 2213 y[16*i] = sum1; 2214 y[16*i+1] = sum2; 2215 y[16*i+2] = sum3; 2216 y[16*i+3] = sum4; 2217 y[16*i+4] = sum5; 2218 y[16*i+5] = sum6; 2219 y[16*i+6] = sum7; 2220 y[16*i+7] = sum8; 2221 y[16*i+8] = sum9; 2222 y[16*i+9] = sum10; 2223 y[16*i+10] = sum11; 2224 y[16*i+11] = sum12; 2225 y[16*i+12] = sum13; 2226 y[16*i+13] = sum14; 2227 y[16*i+14] = sum15; 2228 y[16*i+15] = sum16; 2229 } 2230 2231 ierr = PetscLogFlops(32.0*a->nz - 16.0*nonzerorow);CHKERRQ(ierr); 2232 ierr = VecRestoreArray(xx,(PetscScalar**)&x);CHKERRQ(ierr); 2233 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 2234 PetscFunctionReturn(0); 2235 } 2236 2237 #undef __FUNCT__ 2238 #define __FUNCT__ "MatMultTranspose_SeqMAIJ_16" 2239 PetscErrorCode MatMultTranspose_SeqMAIJ_16(Mat A,Vec xx,Vec yy) 2240 { 2241 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 2242 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 2243 const PetscScalar *x,*v; 2244 PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8; 2245 PetscScalar alpha9,alpha10,alpha11,alpha12,alpha13,alpha14,alpha15,alpha16; 2246 PetscErrorCode ierr; 2247 const PetscInt m = b->AIJ->rmap->n,*idx; 2248 PetscInt n,i; 2249 2250 PetscFunctionBegin; 2251 ierr = VecSet(yy,0.0);CHKERRQ(ierr); 2252 ierr = VecGetArray(xx,(PetscScalar**)&x);CHKERRQ(ierr); 2253 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 2254 2255 for (i=0; i<m; i++) { 2256 idx = a->j + a->i[i] ; 2257 v = a->a + a->i[i] ; 2258 n = a->i[i+1] - a->i[i]; 2259 alpha1 = x[16*i]; 2260 alpha2 = x[16*i+1]; 2261 alpha3 = x[16*i+2]; 2262 alpha4 = x[16*i+3]; 2263 alpha5 = x[16*i+4]; 2264 alpha6 = x[16*i+5]; 2265 alpha7 = x[16*i+6]; 2266 alpha8 = x[16*i+7]; 2267 alpha9 = x[16*i+8]; 2268 alpha10 = x[16*i+9]; 2269 alpha11 = x[16*i+10]; 2270 alpha12 = x[16*i+11]; 2271 alpha13 = x[16*i+12]; 2272 alpha14 = x[16*i+13]; 2273 alpha15 = x[16*i+14]; 2274 alpha16 = x[16*i+15]; 2275 while (n-->0) { 2276 y[16*(*idx)] += alpha1*(*v); 2277 y[16*(*idx)+1] += alpha2*(*v); 2278 y[16*(*idx)+2] += alpha3*(*v); 2279 y[16*(*idx)+3] += alpha4*(*v); 2280 y[16*(*idx)+4] += alpha5*(*v); 2281 y[16*(*idx)+5] += alpha6*(*v); 2282 y[16*(*idx)+6] += alpha7*(*v); 2283 y[16*(*idx)+7] += alpha8*(*v); 2284 y[16*(*idx)+8] += alpha9*(*v); 2285 y[16*(*idx)+9] += alpha10*(*v); 2286 y[16*(*idx)+10] += alpha11*(*v); 2287 y[16*(*idx)+11] += alpha12*(*v); 2288 y[16*(*idx)+12] += alpha13*(*v); 2289 y[16*(*idx)+13] += alpha14*(*v); 2290 y[16*(*idx)+14] += alpha15*(*v); 2291 y[16*(*idx)+15] += alpha16*(*v); 2292 idx++; v++; 2293 } 2294 } 2295 ierr = PetscLogFlops(32.0*a->nz);CHKERRQ(ierr); 2296 ierr = VecRestoreArray(xx,(PetscScalar**)&x);CHKERRQ(ierr); 2297 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 2298 PetscFunctionReturn(0); 2299 } 2300 2301 #undef __FUNCT__ 2302 #define __FUNCT__ "MatMultAdd_SeqMAIJ_16" 2303 PetscErrorCode MatMultAdd_SeqMAIJ_16(Mat A,Vec xx,Vec yy,Vec zz) 2304 { 2305 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 2306 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 2307 const PetscScalar *x,*v; 2308 PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8; 2309 PetscScalar sum9, sum10, sum11, sum12, sum13, sum14, sum15, sum16; 2310 PetscErrorCode ierr; 2311 const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 2312 PetscInt n,i,jrow,j; 2313 2314 PetscFunctionBegin; 2315 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 2316 ierr = VecGetArray(xx,(PetscScalar**)&x);CHKERRQ(ierr); 2317 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 2318 idx = a->j; 2319 v = a->a; 2320 ii = a->i; 2321 2322 for (i=0; i<m; i++) { 2323 jrow = ii[i]; 2324 n = ii[i+1] - jrow; 2325 sum1 = 0.0; 2326 sum2 = 0.0; 2327 sum3 = 0.0; 2328 sum4 = 0.0; 2329 sum5 = 0.0; 2330 sum6 = 0.0; 2331 sum7 = 0.0; 2332 sum8 = 0.0; 2333 sum9 = 0.0; 2334 sum10 = 0.0; 2335 sum11 = 0.0; 2336 sum12 = 0.0; 2337 sum13 = 0.0; 2338 sum14 = 0.0; 2339 sum15 = 0.0; 2340 sum16 = 0.0; 2341 for (j=0; j<n; j++) { 2342 sum1 += v[jrow]*x[16*idx[jrow]]; 2343 sum2 += v[jrow]*x[16*idx[jrow]+1]; 2344 sum3 += v[jrow]*x[16*idx[jrow]+2]; 2345 sum4 += v[jrow]*x[16*idx[jrow]+3]; 2346 sum5 += v[jrow]*x[16*idx[jrow]+4]; 2347 sum6 += v[jrow]*x[16*idx[jrow]+5]; 2348 sum7 += v[jrow]*x[16*idx[jrow]+6]; 2349 sum8 += v[jrow]*x[16*idx[jrow]+7]; 2350 sum9 += v[jrow]*x[16*idx[jrow]+8]; 2351 sum10 += v[jrow]*x[16*idx[jrow]+9]; 2352 sum11 += v[jrow]*x[16*idx[jrow]+10]; 2353 sum12 += v[jrow]*x[16*idx[jrow]+11]; 2354 sum13 += v[jrow]*x[16*idx[jrow]+12]; 2355 sum14 += v[jrow]*x[16*idx[jrow]+13]; 2356 sum15 += v[jrow]*x[16*idx[jrow]+14]; 2357 sum16 += v[jrow]*x[16*idx[jrow]+15]; 2358 jrow++; 2359 } 2360 y[16*i] += sum1; 2361 y[16*i+1] += sum2; 2362 y[16*i+2] += sum3; 2363 y[16*i+3] += sum4; 2364 y[16*i+4] += sum5; 2365 y[16*i+5] += sum6; 2366 y[16*i+6] += sum7; 2367 y[16*i+7] += sum8; 2368 y[16*i+8] += sum9; 2369 y[16*i+9] += sum10; 2370 y[16*i+10] += sum11; 2371 y[16*i+11] += sum12; 2372 y[16*i+12] += sum13; 2373 y[16*i+13] += sum14; 2374 y[16*i+14] += sum15; 2375 y[16*i+15] += sum16; 2376 } 2377 2378 ierr = PetscLogFlops(32.0*a->nz);CHKERRQ(ierr); 2379 ierr = VecRestoreArray(xx,(PetscScalar**)&x);CHKERRQ(ierr); 2380 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 2381 PetscFunctionReturn(0); 2382 } 2383 2384 #undef __FUNCT__ 2385 #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_16" 2386 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_16(Mat A,Vec xx,Vec yy,Vec zz) 2387 { 2388 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 2389 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 2390 const PetscScalar *x,*v; 2391 PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8; 2392 PetscScalar alpha9,alpha10,alpha11,alpha12,alpha13,alpha14,alpha15,alpha16; 2393 PetscErrorCode ierr; 2394 const PetscInt m = b->AIJ->rmap->n,*idx; 2395 PetscInt n,i; 2396 2397 PetscFunctionBegin; 2398 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 2399 ierr = VecGetArray(xx,(PetscScalar**)&x);CHKERRQ(ierr); 2400 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 2401 for (i=0; i<m; i++) { 2402 idx = a->j + a->i[i] ; 2403 v = a->a + a->i[i] ; 2404 n = a->i[i+1] - a->i[i]; 2405 alpha1 = x[16*i]; 2406 alpha2 = x[16*i+1]; 2407 alpha3 = x[16*i+2]; 2408 alpha4 = x[16*i+3]; 2409 alpha5 = x[16*i+4]; 2410 alpha6 = x[16*i+5]; 2411 alpha7 = x[16*i+6]; 2412 alpha8 = x[16*i+7]; 2413 alpha9 = x[16*i+8]; 2414 alpha10 = x[16*i+9]; 2415 alpha11 = x[16*i+10]; 2416 alpha12 = x[16*i+11]; 2417 alpha13 = x[16*i+12]; 2418 alpha14 = x[16*i+13]; 2419 alpha15 = x[16*i+14]; 2420 alpha16 = x[16*i+15]; 2421 while (n-->0) { 2422 y[16*(*idx)] += alpha1*(*v); 2423 y[16*(*idx)+1] += alpha2*(*v); 2424 y[16*(*idx)+2] += alpha3*(*v); 2425 y[16*(*idx)+3] += alpha4*(*v); 2426 y[16*(*idx)+4] += alpha5*(*v); 2427 y[16*(*idx)+5] += alpha6*(*v); 2428 y[16*(*idx)+6] += alpha7*(*v); 2429 y[16*(*idx)+7] += alpha8*(*v); 2430 y[16*(*idx)+8] += alpha9*(*v); 2431 y[16*(*idx)+9] += alpha10*(*v); 2432 y[16*(*idx)+10] += alpha11*(*v); 2433 y[16*(*idx)+11] += alpha12*(*v); 2434 y[16*(*idx)+12] += alpha13*(*v); 2435 y[16*(*idx)+13] += alpha14*(*v); 2436 y[16*(*idx)+14] += alpha15*(*v); 2437 y[16*(*idx)+15] += alpha16*(*v); 2438 idx++; v++; 2439 } 2440 } 2441 ierr = PetscLogFlops(32.0*a->nz);CHKERRQ(ierr); 2442 ierr = VecRestoreArray(xx,(PetscScalar**)&x);CHKERRQ(ierr); 2443 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 2444 PetscFunctionReturn(0); 2445 } 2446 2447 /*--------------------------------------------------------------------------------------------*/ 2448 #undef __FUNCT__ 2449 #define __FUNCT__ "MatMult_SeqMAIJ_18" 2450 PetscErrorCode MatMult_SeqMAIJ_18(Mat A,Vec xx,Vec yy) 2451 { 2452 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 2453 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 2454 const PetscScalar *x,*v; 2455 PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8; 2456 PetscScalar sum9, sum10, sum11, sum12, sum13, sum14, sum15, sum16, sum17, sum18; 2457 PetscErrorCode ierr; 2458 const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 2459 PetscInt nonzerorow=0,n,i,jrow,j; 2460 2461 PetscFunctionBegin; 2462 ierr = VecGetArray(xx,(PetscScalar**)&x);CHKERRQ(ierr); 2463 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 2464 idx = a->j; 2465 v = a->a; 2466 ii = a->i; 2467 2468 for (i=0; i<m; i++) { 2469 jrow = ii[i]; 2470 n = ii[i+1] - jrow; 2471 sum1 = 0.0; 2472 sum2 = 0.0; 2473 sum3 = 0.0; 2474 sum4 = 0.0; 2475 sum5 = 0.0; 2476 sum6 = 0.0; 2477 sum7 = 0.0; 2478 sum8 = 0.0; 2479 sum9 = 0.0; 2480 sum10 = 0.0; 2481 sum11 = 0.0; 2482 sum12 = 0.0; 2483 sum13 = 0.0; 2484 sum14 = 0.0; 2485 sum15 = 0.0; 2486 sum16 = 0.0; 2487 sum17 = 0.0; 2488 sum18 = 0.0; 2489 nonzerorow += (n>0); 2490 for (j=0; j<n; j++) { 2491 sum1 += v[jrow]*x[18*idx[jrow]]; 2492 sum2 += v[jrow]*x[18*idx[jrow]+1]; 2493 sum3 += v[jrow]*x[18*idx[jrow]+2]; 2494 sum4 += v[jrow]*x[18*idx[jrow]+3]; 2495 sum5 += v[jrow]*x[18*idx[jrow]+4]; 2496 sum6 += v[jrow]*x[18*idx[jrow]+5]; 2497 sum7 += v[jrow]*x[18*idx[jrow]+6]; 2498 sum8 += v[jrow]*x[18*idx[jrow]+7]; 2499 sum9 += v[jrow]*x[18*idx[jrow]+8]; 2500 sum10 += v[jrow]*x[18*idx[jrow]+9]; 2501 sum11 += v[jrow]*x[18*idx[jrow]+10]; 2502 sum12 += v[jrow]*x[18*idx[jrow]+11]; 2503 sum13 += v[jrow]*x[18*idx[jrow]+12]; 2504 sum14 += v[jrow]*x[18*idx[jrow]+13]; 2505 sum15 += v[jrow]*x[18*idx[jrow]+14]; 2506 sum16 += v[jrow]*x[18*idx[jrow]+15]; 2507 sum17 += v[jrow]*x[18*idx[jrow]+16]; 2508 sum18 += v[jrow]*x[18*idx[jrow]+17]; 2509 jrow++; 2510 } 2511 y[18*i] = sum1; 2512 y[18*i+1] = sum2; 2513 y[18*i+2] = sum3; 2514 y[18*i+3] = sum4; 2515 y[18*i+4] = sum5; 2516 y[18*i+5] = sum6; 2517 y[18*i+6] = sum7; 2518 y[18*i+7] = sum8; 2519 y[18*i+8] = sum9; 2520 y[18*i+9] = sum10; 2521 y[18*i+10] = sum11; 2522 y[18*i+11] = sum12; 2523 y[18*i+12] = sum13; 2524 y[18*i+13] = sum14; 2525 y[18*i+14] = sum15; 2526 y[18*i+15] = sum16; 2527 y[18*i+16] = sum17; 2528 y[18*i+17] = sum18; 2529 } 2530 2531 ierr = PetscLogFlops(36.0*a->nz - 18.0*nonzerorow);CHKERRQ(ierr); 2532 ierr = VecRestoreArray(xx,(PetscScalar**)&x);CHKERRQ(ierr); 2533 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 2534 PetscFunctionReturn(0); 2535 } 2536 2537 #undef __FUNCT__ 2538 #define __FUNCT__ "MatMultTranspose_SeqMAIJ_18" 2539 PetscErrorCode MatMultTranspose_SeqMAIJ_18(Mat A,Vec xx,Vec yy) 2540 { 2541 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 2542 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 2543 const PetscScalar *x,*v; 2544 PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8; 2545 PetscScalar alpha9,alpha10,alpha11,alpha12,alpha13,alpha14,alpha15,alpha16,alpha17,alpha18; 2546 PetscErrorCode ierr; 2547 const PetscInt m = b->AIJ->rmap->n,*idx; 2548 PetscInt n,i; 2549 2550 PetscFunctionBegin; 2551 ierr = VecSet(yy,0.0);CHKERRQ(ierr); 2552 ierr = VecGetArray(xx,(PetscScalar**)&x);CHKERRQ(ierr); 2553 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 2554 2555 for (i=0; i<m; i++) { 2556 idx = a->j + a->i[i] ; 2557 v = a->a + a->i[i] ; 2558 n = a->i[i+1] - a->i[i]; 2559 alpha1 = x[18*i]; 2560 alpha2 = x[18*i+1]; 2561 alpha3 = x[18*i+2]; 2562 alpha4 = x[18*i+3]; 2563 alpha5 = x[18*i+4]; 2564 alpha6 = x[18*i+5]; 2565 alpha7 = x[18*i+6]; 2566 alpha8 = x[18*i+7]; 2567 alpha9 = x[18*i+8]; 2568 alpha10 = x[18*i+9]; 2569 alpha11 = x[18*i+10]; 2570 alpha12 = x[18*i+11]; 2571 alpha13 = x[18*i+12]; 2572 alpha14 = x[18*i+13]; 2573 alpha15 = x[18*i+14]; 2574 alpha16 = x[18*i+15]; 2575 alpha17 = x[18*i+16]; 2576 alpha18 = x[18*i+17]; 2577 while (n-->0) { 2578 y[18*(*idx)] += alpha1*(*v); 2579 y[18*(*idx)+1] += alpha2*(*v); 2580 y[18*(*idx)+2] += alpha3*(*v); 2581 y[18*(*idx)+3] += alpha4*(*v); 2582 y[18*(*idx)+4] += alpha5*(*v); 2583 y[18*(*idx)+5] += alpha6*(*v); 2584 y[18*(*idx)+6] += alpha7*(*v); 2585 y[18*(*idx)+7] += alpha8*(*v); 2586 y[18*(*idx)+8] += alpha9*(*v); 2587 y[18*(*idx)+9] += alpha10*(*v); 2588 y[18*(*idx)+10] += alpha11*(*v); 2589 y[18*(*idx)+11] += alpha12*(*v); 2590 y[18*(*idx)+12] += alpha13*(*v); 2591 y[18*(*idx)+13] += alpha14*(*v); 2592 y[18*(*idx)+14] += alpha15*(*v); 2593 y[18*(*idx)+15] += alpha16*(*v); 2594 y[18*(*idx)+16] += alpha17*(*v); 2595 y[18*(*idx)+17] += alpha18*(*v); 2596 idx++; v++; 2597 } 2598 } 2599 ierr = PetscLogFlops(36.0*a->nz);CHKERRQ(ierr); 2600 ierr = VecRestoreArray(xx,(PetscScalar**)&x);CHKERRQ(ierr); 2601 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 2602 PetscFunctionReturn(0); 2603 } 2604 2605 #undef __FUNCT__ 2606 #define __FUNCT__ "MatMultAdd_SeqMAIJ_18" 2607 PetscErrorCode MatMultAdd_SeqMAIJ_18(Mat A,Vec xx,Vec yy,Vec zz) 2608 { 2609 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 2610 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 2611 const PetscScalar *x,*v; 2612 PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8; 2613 PetscScalar sum9, sum10, sum11, sum12, sum13, sum14, sum15, sum16, sum17, sum18; 2614 PetscErrorCode ierr; 2615 const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 2616 PetscInt n,i,jrow,j; 2617 2618 PetscFunctionBegin; 2619 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 2620 ierr = VecGetArray(xx,(PetscScalar**)&x);CHKERRQ(ierr); 2621 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 2622 idx = a->j; 2623 v = a->a; 2624 ii = a->i; 2625 2626 for (i=0; i<m; i++) { 2627 jrow = ii[i]; 2628 n = ii[i+1] - jrow; 2629 sum1 = 0.0; 2630 sum2 = 0.0; 2631 sum3 = 0.0; 2632 sum4 = 0.0; 2633 sum5 = 0.0; 2634 sum6 = 0.0; 2635 sum7 = 0.0; 2636 sum8 = 0.0; 2637 sum9 = 0.0; 2638 sum10 = 0.0; 2639 sum11 = 0.0; 2640 sum12 = 0.0; 2641 sum13 = 0.0; 2642 sum14 = 0.0; 2643 sum15 = 0.0; 2644 sum16 = 0.0; 2645 sum17 = 0.0; 2646 sum18 = 0.0; 2647 for (j=0; j<n; j++) { 2648 sum1 += v[jrow]*x[18*idx[jrow]]; 2649 sum2 += v[jrow]*x[18*idx[jrow]+1]; 2650 sum3 += v[jrow]*x[18*idx[jrow]+2]; 2651 sum4 += v[jrow]*x[18*idx[jrow]+3]; 2652 sum5 += v[jrow]*x[18*idx[jrow]+4]; 2653 sum6 += v[jrow]*x[18*idx[jrow]+5]; 2654 sum7 += v[jrow]*x[18*idx[jrow]+6]; 2655 sum8 += v[jrow]*x[18*idx[jrow]+7]; 2656 sum9 += v[jrow]*x[18*idx[jrow]+8]; 2657 sum10 += v[jrow]*x[18*idx[jrow]+9]; 2658 sum11 += v[jrow]*x[18*idx[jrow]+10]; 2659 sum12 += v[jrow]*x[18*idx[jrow]+11]; 2660 sum13 += v[jrow]*x[18*idx[jrow]+12]; 2661 sum14 += v[jrow]*x[18*idx[jrow]+13]; 2662 sum15 += v[jrow]*x[18*idx[jrow]+14]; 2663 sum16 += v[jrow]*x[18*idx[jrow]+15]; 2664 sum17 += v[jrow]*x[18*idx[jrow]+16]; 2665 sum18 += v[jrow]*x[18*idx[jrow]+17]; 2666 jrow++; 2667 } 2668 y[18*i] += sum1; 2669 y[18*i+1] += sum2; 2670 y[18*i+2] += sum3; 2671 y[18*i+3] += sum4; 2672 y[18*i+4] += sum5; 2673 y[18*i+5] += sum6; 2674 y[18*i+6] += sum7; 2675 y[18*i+7] += sum8; 2676 y[18*i+8] += sum9; 2677 y[18*i+9] += sum10; 2678 y[18*i+10] += sum11; 2679 y[18*i+11] += sum12; 2680 y[18*i+12] += sum13; 2681 y[18*i+13] += sum14; 2682 y[18*i+14] += sum15; 2683 y[18*i+15] += sum16; 2684 y[18*i+16] += sum17; 2685 y[18*i+17] += sum18; 2686 } 2687 2688 ierr = PetscLogFlops(36.0*a->nz);CHKERRQ(ierr); 2689 ierr = VecRestoreArray(xx,(PetscScalar**)&x);CHKERRQ(ierr); 2690 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 2691 PetscFunctionReturn(0); 2692 } 2693 2694 #undef __FUNCT__ 2695 #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_18" 2696 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_18(Mat A,Vec xx,Vec yy,Vec zz) 2697 { 2698 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 2699 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 2700 const PetscScalar *x,*v; 2701 PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8; 2702 PetscScalar alpha9,alpha10,alpha11,alpha12,alpha13,alpha14,alpha15,alpha16,alpha17,alpha18; 2703 PetscErrorCode ierr; 2704 const PetscInt m = b->AIJ->rmap->n,*idx; 2705 PetscInt n,i; 2706 2707 PetscFunctionBegin; 2708 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 2709 ierr = VecGetArray(xx,(PetscScalar**)&x);CHKERRQ(ierr); 2710 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 2711 for (i=0; i<m; i++) { 2712 idx = a->j + a->i[i] ; 2713 v = a->a + a->i[i] ; 2714 n = a->i[i+1] - a->i[i]; 2715 alpha1 = x[18*i]; 2716 alpha2 = x[18*i+1]; 2717 alpha3 = x[18*i+2]; 2718 alpha4 = x[18*i+3]; 2719 alpha5 = x[18*i+4]; 2720 alpha6 = x[18*i+5]; 2721 alpha7 = x[18*i+6]; 2722 alpha8 = x[18*i+7]; 2723 alpha9 = x[18*i+8]; 2724 alpha10 = x[18*i+9]; 2725 alpha11 = x[18*i+10]; 2726 alpha12 = x[18*i+11]; 2727 alpha13 = x[18*i+12]; 2728 alpha14 = x[18*i+13]; 2729 alpha15 = x[18*i+14]; 2730 alpha16 = x[18*i+15]; 2731 alpha17 = x[18*i+16]; 2732 alpha18 = x[18*i+17]; 2733 while (n-->0) { 2734 y[18*(*idx)] += alpha1*(*v); 2735 y[18*(*idx)+1] += alpha2*(*v); 2736 y[18*(*idx)+2] += alpha3*(*v); 2737 y[18*(*idx)+3] += alpha4*(*v); 2738 y[18*(*idx)+4] += alpha5*(*v); 2739 y[18*(*idx)+5] += alpha6*(*v); 2740 y[18*(*idx)+6] += alpha7*(*v); 2741 y[18*(*idx)+7] += alpha8*(*v); 2742 y[18*(*idx)+8] += alpha9*(*v); 2743 y[18*(*idx)+9] += alpha10*(*v); 2744 y[18*(*idx)+10] += alpha11*(*v); 2745 y[18*(*idx)+11] += alpha12*(*v); 2746 y[18*(*idx)+12] += alpha13*(*v); 2747 y[18*(*idx)+13] += alpha14*(*v); 2748 y[18*(*idx)+14] += alpha15*(*v); 2749 y[18*(*idx)+15] += alpha16*(*v); 2750 y[18*(*idx)+16] += alpha17*(*v); 2751 y[18*(*idx)+17] += alpha18*(*v); 2752 idx++; v++; 2753 } 2754 } 2755 ierr = PetscLogFlops(36.0*a->nz);CHKERRQ(ierr); 2756 ierr = VecRestoreArray(xx,(PetscScalar**)&x);CHKERRQ(ierr); 2757 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 2758 PetscFunctionReturn(0); 2759 } 2760 2761 /*===================================================================================*/ 2762 #undef __FUNCT__ 2763 #define __FUNCT__ "MatMult_MPIMAIJ_dof" 2764 PetscErrorCode MatMult_MPIMAIJ_dof(Mat A,Vec xx,Vec yy) 2765 { 2766 Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data; 2767 PetscErrorCode ierr; 2768 2769 PetscFunctionBegin; 2770 /* start the scatter */ 2771 ierr = VecScatterBegin(b->ctx,xx,b->w,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2772 ierr = (*b->AIJ->ops->mult)(b->AIJ,xx,yy);CHKERRQ(ierr); 2773 ierr = VecScatterEnd(b->ctx,xx,b->w,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2774 ierr = (*b->OAIJ->ops->multadd)(b->OAIJ,b->w,yy,yy);CHKERRQ(ierr); 2775 PetscFunctionReturn(0); 2776 } 2777 2778 #undef __FUNCT__ 2779 #define __FUNCT__ "MatMultTranspose_MPIMAIJ_dof" 2780 PetscErrorCode MatMultTranspose_MPIMAIJ_dof(Mat A,Vec xx,Vec yy) 2781 { 2782 Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data; 2783 PetscErrorCode ierr; 2784 2785 PetscFunctionBegin; 2786 ierr = (*b->OAIJ->ops->multtranspose)(b->OAIJ,xx,b->w);CHKERRQ(ierr); 2787 ierr = (*b->AIJ->ops->multtranspose)(b->AIJ,xx,yy);CHKERRQ(ierr); 2788 ierr = VecScatterBegin(b->ctx,b->w,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2789 ierr = VecScatterEnd(b->ctx,b->w,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2790 PetscFunctionReturn(0); 2791 } 2792 2793 #undef __FUNCT__ 2794 #define __FUNCT__ "MatMultAdd_MPIMAIJ_dof" 2795 PetscErrorCode MatMultAdd_MPIMAIJ_dof(Mat A,Vec xx,Vec yy,Vec zz) 2796 { 2797 Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data; 2798 PetscErrorCode ierr; 2799 2800 PetscFunctionBegin; 2801 /* start the scatter */ 2802 ierr = VecScatterBegin(b->ctx,xx,b->w,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2803 ierr = (*b->AIJ->ops->multadd)(b->AIJ,xx,yy,zz);CHKERRQ(ierr); 2804 ierr = VecScatterEnd(b->ctx,xx,b->w,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2805 ierr = (*b->OAIJ->ops->multadd)(b->OAIJ,b->w,zz,zz);CHKERRQ(ierr); 2806 PetscFunctionReturn(0); 2807 } 2808 2809 #undef __FUNCT__ 2810 #define __FUNCT__ "MatMultTransposeAdd_MPIMAIJ_dof" 2811 PetscErrorCode MatMultTransposeAdd_MPIMAIJ_dof(Mat A,Vec xx,Vec yy,Vec zz) 2812 { 2813 Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data; 2814 PetscErrorCode ierr; 2815 2816 PetscFunctionBegin; 2817 ierr = (*b->OAIJ->ops->multtranspose)(b->OAIJ,xx,b->w);CHKERRQ(ierr); 2818 ierr = VecScatterBegin(b->ctx,b->w,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2819 ierr = (*b->AIJ->ops->multtransposeadd)(b->AIJ,xx,yy,zz);CHKERRQ(ierr); 2820 ierr = VecScatterEnd(b->ctx,b->w,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2821 PetscFunctionReturn(0); 2822 } 2823 2824 /* ----------------------------------------------------------------*/ 2825 #undef __FUNCT__ 2826 #define __FUNCT__ "MatPtAPSymbolic_SeqAIJ_SeqMAIJ" 2827 PetscErrorCode MatPtAPSymbolic_SeqAIJ_SeqMAIJ(Mat A,Mat PP,PetscReal fill,Mat *C) 2828 { 2829 /* This routine requires testing -- but it's getting better. */ 2830 PetscErrorCode ierr; 2831 PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 2832 Mat_SeqMAIJ *pp=(Mat_SeqMAIJ*)PP->data; 2833 Mat P=pp->AIJ; 2834 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*p=(Mat_SeqAIJ*)P->data,*c; 2835 PetscInt *pti,*ptj,*ptJ; 2836 PetscInt *ci,*cj,*ptadenserow,*ptasparserow,*denserow,*sparserow,*ptaj; 2837 const PetscInt an=A->cmap->N,am=A->rmap->N,pn=P->cmap->N,pm=P->rmap->N,ppdof=pp->dof; 2838 PetscInt i,j,k,dof,pshift,ptnzi,arow,anzj,ptanzi,prow,pnzj,cnzi,cn; 2839 MatScalar *ca; 2840 const PetscInt *pi = p->i,*pj = p->j,*pjj,*ai=a->i,*aj=a->j,*ajj; 2841 2842 PetscFunctionBegin; 2843 /* Start timer */ 2844 ierr = PetscLogEventBegin(MAT_PtAPSymbolic,A,PP,0,0);CHKERRQ(ierr); 2845 2846 /* Get ij structure of P^T */ 2847 ierr = MatGetSymbolicTranspose_SeqAIJ(P,&pti,&ptj);CHKERRQ(ierr); 2848 2849 cn = pn*ppdof; 2850 /* Allocate ci array, arrays for fill computation and */ 2851 /* free space for accumulating nonzero column info */ 2852 ierr = PetscMalloc((cn+1)*sizeof(PetscInt),&ci);CHKERRQ(ierr); 2853 ci[0] = 0; 2854 2855 /* Work arrays for rows of P^T*A */ 2856 ierr = PetscMalloc4(an,PetscInt,&ptadenserow,an,PetscInt,&ptasparserow,cn,PetscInt,&denserow,cn,PetscInt,&sparserow);CHKERRQ(ierr); 2857 2858 /* Set initial free space to be nnz(A) scaled by aspect ratio of P. */ 2859 /* This should be reasonable if sparsity of PtAP is similar to that of A. */ 2860 /* Note, aspect ratio of P is the same as the aspect ratio of SeqAIJ inside P */ 2861 ierr = PetscFreeSpaceGet((ai[am]/pm)*pn,&free_space); 2862 current_space = free_space; 2863 2864 /* Determine symbolic info for each row of C: */ 2865 for (i=0;i<pn;i++) { 2866 ptnzi = pti[i+1] - pti[i]; 2867 ptJ = ptj + pti[i]; 2868 for (dof=0;dof<ppdof;dof++) { 2869 ptanzi = 0; 2870 /* Determine symbolic row of PtA: */ 2871 for (j=0;j<ptnzi;j++) { 2872 /* Expand ptJ[j] by block size and shift by dof to get the right row of A */ 2873 arow = ptJ[j]*ppdof + dof; 2874 /* Nonzeros of P^T*A will be in same locations as any element of A in that row */ 2875 anzj = ai[arow+1] - ai[arow]; 2876 ajj = aj + ai[arow]; 2877 for (k=0;k<anzj;k++) { 2878 if (!ptadenserow[ajj[k]]) { 2879 ptadenserow[ajj[k]] = -1; 2880 ptasparserow[ptanzi++] = ajj[k]; 2881 } 2882 } 2883 } 2884 /* Using symbolic info for row of PtA, determine symbolic info for row of C: */ 2885 ptaj = ptasparserow; 2886 cnzi = 0; 2887 for (j=0;j<ptanzi;j++) { 2888 /* Get offset within block of P */ 2889 pshift = *ptaj%ppdof; 2890 /* Get block row of P */ 2891 prow = (*ptaj++)/ppdof; /* integer division */ 2892 /* P has same number of nonzeros per row as the compressed form */ 2893 pnzj = pi[prow+1] - pi[prow]; 2894 pjj = pj + pi[prow]; 2895 for (k=0;k<pnzj;k++) { 2896 /* Locations in C are shifted by the offset within the block */ 2897 /* Note: we cannot use PetscLLAdd here because of the additional offset for the write location */ 2898 if (!denserow[pjj[k]*ppdof+pshift]) { 2899 denserow[pjj[k]*ppdof+pshift] = -1; 2900 sparserow[cnzi++] = pjj[k]*ppdof+pshift; 2901 } 2902 } 2903 } 2904 2905 /* sort sparserow */ 2906 ierr = PetscSortInt(cnzi,sparserow);CHKERRQ(ierr); 2907 2908 /* If free space is not available, make more free space */ 2909 /* Double the amount of total space in the list */ 2910 if (current_space->local_remaining<cnzi) { 2911 ierr = PetscFreeSpaceGet(cnzi+current_space->total_array_size,¤t_space);CHKERRQ(ierr); 2912 } 2913 2914 /* Copy data into free space, and zero out denserows */ 2915 ierr = PetscMemcpy(current_space->array,sparserow,cnzi*sizeof(PetscInt));CHKERRQ(ierr); 2916 current_space->array += cnzi; 2917 current_space->local_used += cnzi; 2918 current_space->local_remaining -= cnzi; 2919 2920 for (j=0;j<ptanzi;j++) { 2921 ptadenserow[ptasparserow[j]] = 0; 2922 } 2923 for (j=0;j<cnzi;j++) { 2924 denserow[sparserow[j]] = 0; 2925 } 2926 /* Aside: Perhaps we should save the pta info for the numerical factorization. */ 2927 /* For now, we will recompute what is needed. */ 2928 ci[i*ppdof+1+dof] = ci[i*ppdof+dof] + cnzi; 2929 } 2930 } 2931 /* nnz is now stored in ci[ptm], column indices are in the list of free space */ 2932 /* Allocate space for cj, initialize cj, and */ 2933 /* destroy list of free space and other temporary array(s) */ 2934 ierr = PetscMalloc((ci[cn]+1)*sizeof(PetscInt),&cj);CHKERRQ(ierr); 2935 ierr = PetscFreeSpaceContiguous(&free_space,cj);CHKERRQ(ierr); 2936 ierr = PetscFree4(ptadenserow,ptasparserow,denserow,sparserow);CHKERRQ(ierr); 2937 2938 /* Allocate space for ca */ 2939 ierr = PetscMalloc((ci[cn]+1)*sizeof(MatScalar),&ca);CHKERRQ(ierr); 2940 ierr = PetscMemzero(ca,(ci[cn]+1)*sizeof(MatScalar));CHKERRQ(ierr); 2941 2942 /* put together the new matrix */ 2943 ierr = MatCreateSeqAIJWithArrays(((PetscObject)A)->comm,cn,cn,ci,cj,ca,C);CHKERRQ(ierr); 2944 2945 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 2946 /* Since these are PETSc arrays, change flags to free them as necessary. */ 2947 c = (Mat_SeqAIJ *)((*C)->data); 2948 c->free_a = PETSC_TRUE; 2949 c->free_ij = PETSC_TRUE; 2950 c->nonew = 0; 2951 2952 /* Clean up. */ 2953 ierr = MatRestoreSymbolicTranspose_SeqAIJ(P,&pti,&ptj);CHKERRQ(ierr); 2954 2955 ierr = PetscLogEventEnd(MAT_PtAPSymbolic,A,PP,0,0);CHKERRQ(ierr); 2956 PetscFunctionReturn(0); 2957 } 2958 2959 #undef __FUNCT__ 2960 #define __FUNCT__ "MatPtAPNumeric_SeqAIJ_SeqMAIJ" 2961 PetscErrorCode MatPtAPNumeric_SeqAIJ_SeqMAIJ(Mat A,Mat PP,Mat C) 2962 { 2963 /* This routine requires testing -- first draft only */ 2964 PetscErrorCode ierr; 2965 Mat_SeqMAIJ *pp=(Mat_SeqMAIJ*)PP->data; 2966 Mat P=pp->AIJ; 2967 Mat_SeqAIJ *a = (Mat_SeqAIJ *) A->data; 2968 Mat_SeqAIJ *p = (Mat_SeqAIJ *) P->data; 2969 Mat_SeqAIJ *c = (Mat_SeqAIJ *) C->data; 2970 const PetscInt *ai=a->i,*aj=a->j,*pi=p->i,*pj=p->j,*pJ=p->j,*pjj; 2971 const PetscInt *ci=c->i,*cj=c->j,*cjj; 2972 const PetscInt am=A->rmap->N,cn=C->cmap->N,cm=C->rmap->N,ppdof=pp->dof; 2973 PetscInt i,j,k,pshift,poffset,anzi,pnzi,apnzj,nextap,pnzj,prow,crow,*apj,*apjdense; 2974 const MatScalar *aa=a->a,*pa=p->a,*pA=p->a,*paj; 2975 MatScalar *ca=c->a,*caj,*apa; 2976 2977 PetscFunctionBegin; 2978 /* Allocate temporary array for storage of one row of A*P */ 2979 ierr = PetscMalloc3(cn,MatScalar,&apa,cn,PetscInt,&apj,cn,PetscInt,&apjdense);CHKERRQ(ierr); 2980 ierr = PetscMemzero(apa,cn*sizeof(MatScalar));CHKERRQ(ierr); 2981 ierr = PetscMemzero(apj,cn*sizeof(PetscInt));CHKERRQ(ierr); 2982 ierr = PetscMemzero(apjdense,cn*sizeof(PetscInt));CHKERRQ(ierr); 2983 2984 /* Clear old values in C */ 2985 ierr = PetscMemzero(ca,ci[cm]*sizeof(MatScalar));CHKERRQ(ierr); 2986 2987 for (i=0;i<am;i++) { 2988 /* Form sparse row of A*P */ 2989 anzi = ai[i+1] - ai[i]; 2990 apnzj = 0; 2991 for (j=0;j<anzi;j++) { 2992 /* Get offset within block of P */ 2993 pshift = *aj%ppdof; 2994 /* Get block row of P */ 2995 prow = *aj++/ppdof; /* integer division */ 2996 pnzj = pi[prow+1] - pi[prow]; 2997 pjj = pj + pi[prow]; 2998 paj = pa + pi[prow]; 2999 for (k=0;k<pnzj;k++) { 3000 poffset = pjj[k]*ppdof+pshift; 3001 if (!apjdense[poffset]) { 3002 apjdense[poffset] = -1; 3003 apj[apnzj++] = poffset; 3004 } 3005 apa[poffset] += (*aa)*paj[k]; 3006 } 3007 ierr = PetscLogFlops(2.0*pnzj);CHKERRQ(ierr); 3008 aa++; 3009 } 3010 3011 /* Sort the j index array for quick sparse axpy. */ 3012 /* Note: a array does not need sorting as it is in dense storage locations. */ 3013 ierr = PetscSortInt(apnzj,apj);CHKERRQ(ierr); 3014 3015 /* Compute P^T*A*P using outer product (P^T)[:,j]*(A*P)[j,:]. */ 3016 prow = i/ppdof; /* integer division */ 3017 pshift = i%ppdof; 3018 poffset = pi[prow]; 3019 pnzi = pi[prow+1] - poffset; 3020 /* Reset pJ and pA so we can traverse the same row of P 'dof' times. */ 3021 pJ = pj+poffset; 3022 pA = pa+poffset; 3023 for (j=0;j<pnzi;j++) { 3024 crow = (*pJ)*ppdof+pshift; 3025 cjj = cj + ci[crow]; 3026 caj = ca + ci[crow]; 3027 pJ++; 3028 /* Perform sparse axpy operation. Note cjj includes apj. */ 3029 for (k=0,nextap=0;nextap<apnzj;k++) { 3030 if (cjj[k]==apj[nextap]) { 3031 caj[k] += (*pA)*apa[apj[nextap++]]; 3032 } 3033 } 3034 ierr = PetscLogFlops(2.0*apnzj);CHKERRQ(ierr); 3035 pA++; 3036 } 3037 3038 /* Zero the current row info for A*P */ 3039 for (j=0;j<apnzj;j++) { 3040 apa[apj[j]] = 0.; 3041 apjdense[apj[j]] = 0; 3042 } 3043 } 3044 3045 /* Assemble the final matrix and clean up */ 3046 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3047 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3048 ierr = PetscFree3(apa,apj,apjdense);CHKERRQ(ierr); 3049 PetscFunctionReturn(0); 3050 } 3051 3052 #undef __FUNCT__ 3053 #define __FUNCT__ "MatPtAPSymbolic_MPIAIJ_MPIMAIJ" 3054 PetscErrorCode MatPtAPSymbolic_MPIAIJ_MPIMAIJ(Mat A,Mat PP,PetscReal fill,Mat *C) 3055 { 3056 PetscErrorCode ierr; 3057 3058 PetscFunctionBegin; 3059 /* MatPtAPSymbolic_MPIAIJ_MPIMAIJ() is not implemented yet. Convert PP to mpiaij format */ 3060 ierr = MatConvert(PP,MATMPIAIJ,MAT_REUSE_MATRIX,&PP);CHKERRQ(ierr); 3061 ierr =(*PP->ops->ptapsymbolic)(A,PP,fill,C);CHKERRQ(ierr); 3062 PetscFunctionReturn(0); 3063 } 3064 3065 #undef __FUNCT__ 3066 #define __FUNCT__ "MatPtAPNumeric_MPIAIJ_MPIMAIJ" 3067 PetscErrorCode MatPtAPNumeric_MPIAIJ_MPIMAIJ(Mat A,Mat PP,Mat C) 3068 { 3069 PetscFunctionBegin; 3070 SETERRQ(PETSC_ERR_SUP,"MatPtAPNumeric is not implemented for MPIMAIJ matrix yet"); 3071 PetscFunctionReturn(0); 3072 } 3073 3074 EXTERN_C_BEGIN 3075 #undef __FUNCT__ 3076 #define __FUNCT__ "MatConvert_SeqMAIJ_SeqAIJ" 3077 PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_SeqMAIJ_SeqAIJ(Mat A, MatType newtype,MatReuse reuse,Mat *newmat) 3078 { 3079 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 3080 Mat a = b->AIJ,B; 3081 Mat_SeqAIJ *aij = (Mat_SeqAIJ*)a->data; 3082 PetscErrorCode ierr; 3083 PetscInt m,n,i,ncols,*ilen,nmax = 0,*icols,j,k,ii,dof = b->dof; 3084 PetscInt *cols; 3085 PetscScalar *vals; 3086 3087 PetscFunctionBegin; 3088 ierr = MatGetSize(a,&m,&n);CHKERRQ(ierr); 3089 ierr = PetscMalloc(dof*m*sizeof(PetscInt),&ilen);CHKERRQ(ierr); 3090 for (i=0; i<m; i++) { 3091 nmax = PetscMax(nmax,aij->ilen[i]); 3092 for (j=0; j<dof; j++) { 3093 ilen[dof*i+j] = aij->ilen[i]; 3094 } 3095 } 3096 ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,dof*m,dof*n,0,ilen,&B);CHKERRQ(ierr); 3097 ierr = PetscFree(ilen);CHKERRQ(ierr); 3098 ierr = PetscMalloc(nmax*sizeof(PetscInt),&icols);CHKERRQ(ierr); 3099 ii = 0; 3100 for (i=0; i<m; i++) { 3101 ierr = MatGetRow_SeqAIJ(a,i,&ncols,&cols,&vals);CHKERRQ(ierr); 3102 for (j=0; j<dof; j++) { 3103 for (k=0; k<ncols; k++) { 3104 icols[k] = dof*cols[k]+j; 3105 } 3106 ierr = MatSetValues_SeqAIJ(B,1,&ii,ncols,icols,vals,INSERT_VALUES);CHKERRQ(ierr); 3107 ii++; 3108 } 3109 ierr = MatRestoreRow_SeqAIJ(a,i,&ncols,&cols,&vals);CHKERRQ(ierr); 3110 } 3111 ierr = PetscFree(icols);CHKERRQ(ierr); 3112 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3113 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3114 3115 if (reuse == MAT_REUSE_MATRIX) { 3116 ierr = MatHeaderReplace(A,B);CHKERRQ(ierr); 3117 } else { 3118 *newmat = B; 3119 } 3120 PetscFunctionReturn(0); 3121 } 3122 EXTERN_C_END 3123 3124 #include "../src/mat/impls/aij/mpi/mpiaij.h" 3125 3126 EXTERN_C_BEGIN 3127 #undef __FUNCT__ 3128 #define __FUNCT__ "MatConvert_MPIMAIJ_MPIAIJ" 3129 PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_MPIMAIJ_MPIAIJ(Mat A, MatType newtype,MatReuse reuse,Mat *newmat) 3130 { 3131 Mat_MPIMAIJ *maij = (Mat_MPIMAIJ*)A->data; 3132 Mat MatAIJ = ((Mat_SeqMAIJ*)maij->AIJ->data)->AIJ,B; 3133 Mat MatOAIJ = ((Mat_SeqMAIJ*)maij->OAIJ->data)->AIJ; 3134 Mat_SeqAIJ *AIJ = (Mat_SeqAIJ*) MatAIJ->data; 3135 Mat_SeqAIJ *OAIJ =(Mat_SeqAIJ*) MatOAIJ->data; 3136 Mat_MPIAIJ *mpiaij = (Mat_MPIAIJ*) maij->A->data; 3137 PetscInt dof = maij->dof,i,j,*dnz = PETSC_NULL,*onz = PETSC_NULL,nmax = 0,onmax = 0; 3138 PetscInt *oicols = PETSC_NULL,*icols = PETSC_NULL,ncols,*cols = PETSC_NULL,oncols,*ocols = PETSC_NULL; 3139 PetscInt rstart,cstart,*garray,ii,k; 3140 PetscErrorCode ierr; 3141 PetscScalar *vals,*ovals; 3142 3143 PetscFunctionBegin; 3144 ierr = PetscMalloc2(A->rmap->n,PetscInt,&dnz,A->rmap->n,PetscInt,&onz);CHKERRQ(ierr); 3145 for (i=0; i<A->rmap->n/dof; i++) { 3146 nmax = PetscMax(nmax,AIJ->ilen[i]); 3147 onmax = PetscMax(onmax,OAIJ->ilen[i]); 3148 for (j=0; j<dof; j++) { 3149 dnz[dof*i+j] = AIJ->ilen[i]; 3150 onz[dof*i+j] = OAIJ->ilen[i]; 3151 } 3152 } 3153 ierr = MatCreateMPIAIJ(((PetscObject)A)->comm,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N,0,dnz,0,onz,&B);CHKERRQ(ierr); 3154 ierr = PetscFree2(dnz,onz);CHKERRQ(ierr); 3155 3156 ierr = PetscMalloc2(nmax,PetscInt,&icols,onmax,PetscInt,&oicols);CHKERRQ(ierr); 3157 rstart = dof*maij->A->rmap->rstart; 3158 cstart = dof*maij->A->cmap->rstart; 3159 garray = mpiaij->garray; 3160 3161 ii = rstart; 3162 for (i=0; i<A->rmap->n/dof; i++) { 3163 ierr = MatGetRow_SeqAIJ(MatAIJ,i,&ncols,&cols,&vals);CHKERRQ(ierr); 3164 ierr = MatGetRow_SeqAIJ(MatOAIJ,i,&oncols,&ocols,&ovals);CHKERRQ(ierr); 3165 for (j=0; j<dof; j++) { 3166 for (k=0; k<ncols; k++) { 3167 icols[k] = cstart + dof*cols[k]+j; 3168 } 3169 for (k=0; k<oncols; k++) { 3170 oicols[k] = dof*garray[ocols[k]]+j; 3171 } 3172 ierr = MatSetValues_MPIAIJ(B,1,&ii,ncols,icols,vals,INSERT_VALUES);CHKERRQ(ierr); 3173 ierr = MatSetValues_MPIAIJ(B,1,&ii,oncols,oicols,ovals,INSERT_VALUES);CHKERRQ(ierr); 3174 ii++; 3175 } 3176 ierr = MatRestoreRow_SeqAIJ(MatAIJ,i,&ncols,&cols,&vals);CHKERRQ(ierr); 3177 ierr = MatRestoreRow_SeqAIJ(MatOAIJ,i,&oncols,&ocols,&ovals);CHKERRQ(ierr); 3178 } 3179 ierr = PetscFree2(icols,oicols);CHKERRQ(ierr); 3180 3181 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3182 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3183 3184 if (reuse == MAT_REUSE_MATRIX) { 3185 PetscInt refct = ((PetscObject)A)->refct; /* save ((PetscObject)A)->refct */ 3186 ((PetscObject)A)->refct = 1; 3187 ierr = MatHeaderReplace(A,B);CHKERRQ(ierr); 3188 ((PetscObject)A)->refct = refct; /* restore ((PetscObject)A)->refct */ 3189 } else { 3190 *newmat = B; 3191 } 3192 PetscFunctionReturn(0); 3193 } 3194 EXTERN_C_END 3195 3196 3197 /* ---------------------------------------------------------------------------------- */ 3198 #undef __FUNCT__ 3199 #define __FUNCT__ "MatCreateMAIJ" 3200 /*@C 3201 MatCreateMAIJ - Creates a matrix type providing restriction and interpolation 3202 operations for multicomponent problems. It interpolates each component the same 3203 way independently. The matrix type is based on MATSEQAIJ for sequential matrices, 3204 and MATMPIAIJ for distributed matrices. 3205 3206 Collective 3207 3208 Input Parameters: 3209 + A - the AIJ matrix describing the action on blocks 3210 - dof - the block size (number of components per node) 3211 3212 Output Parameter: 3213 . maij - the new MAIJ matrix 3214 3215 Operations provided: 3216 + MatMult 3217 . MatMultTranspose 3218 . MatMultAdd 3219 . MatMultTransposeAdd 3220 - MatView 3221 3222 Level: advanced 3223 3224 .seealso: MatMAIJGetAIJ(), MatMAIJRedimension() 3225 @*/ 3226 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMAIJ(Mat A,PetscInt dof,Mat *maij) 3227 { 3228 PetscErrorCode ierr; 3229 PetscMPIInt size; 3230 PetscInt n; 3231 Mat B; 3232 3233 PetscFunctionBegin; 3234 ierr = PetscObjectReference((PetscObject)A);CHKERRQ(ierr); 3235 3236 if (dof == 1) { 3237 *maij = A; 3238 } else { 3239 ierr = MatCreate(((PetscObject)A)->comm,&B);CHKERRQ(ierr); 3240 ierr = MatSetSizes(B,dof*A->rmap->n,dof*A->cmap->n,dof*A->rmap->N,dof*A->cmap->N);CHKERRQ(ierr); 3241 ierr = PetscLayoutSetBlockSize(B->rmap,dof);CHKERRQ(ierr); 3242 ierr = PetscLayoutSetBlockSize(B->cmap,dof);CHKERRQ(ierr); 3243 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 3244 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 3245 B->assembled = PETSC_TRUE; 3246 3247 ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr); 3248 if (size == 1) { 3249 Mat_SeqMAIJ *b; 3250 3251 ierr = MatSetType(B,MATSEQMAIJ);CHKERRQ(ierr); 3252 B->ops->destroy = MatDestroy_SeqMAIJ; 3253 B->ops->view = MatView_SeqMAIJ; 3254 b = (Mat_SeqMAIJ*)B->data; 3255 b->dof = dof; 3256 b->AIJ = A; 3257 if (dof == 2) { 3258 B->ops->mult = MatMult_SeqMAIJ_2; 3259 B->ops->multadd = MatMultAdd_SeqMAIJ_2; 3260 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_2; 3261 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_2; 3262 } else if (dof == 3) { 3263 B->ops->mult = MatMult_SeqMAIJ_3; 3264 B->ops->multadd = MatMultAdd_SeqMAIJ_3; 3265 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_3; 3266 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_3; 3267 } else if (dof == 4) { 3268 B->ops->mult = MatMult_SeqMAIJ_4; 3269 B->ops->multadd = MatMultAdd_SeqMAIJ_4; 3270 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_4; 3271 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_4; 3272 } else if (dof == 5) { 3273 B->ops->mult = MatMult_SeqMAIJ_5; 3274 B->ops->multadd = MatMultAdd_SeqMAIJ_5; 3275 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_5; 3276 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_5; 3277 } else if (dof == 6) { 3278 B->ops->mult = MatMult_SeqMAIJ_6; 3279 B->ops->multadd = MatMultAdd_SeqMAIJ_6; 3280 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_6; 3281 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_6; 3282 } else if (dof == 7) { 3283 B->ops->mult = MatMult_SeqMAIJ_7; 3284 B->ops->multadd = MatMultAdd_SeqMAIJ_7; 3285 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_7; 3286 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_7; 3287 } else if (dof == 8) { 3288 B->ops->mult = MatMult_SeqMAIJ_8; 3289 B->ops->multadd = MatMultAdd_SeqMAIJ_8; 3290 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_8; 3291 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_8; 3292 } else if (dof == 9) { 3293 B->ops->mult = MatMult_SeqMAIJ_9; 3294 B->ops->multadd = MatMultAdd_SeqMAIJ_9; 3295 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_9; 3296 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_9; 3297 } else if (dof == 10) { 3298 B->ops->mult = MatMult_SeqMAIJ_10; 3299 B->ops->multadd = MatMultAdd_SeqMAIJ_10; 3300 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_10; 3301 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_10; 3302 } else if (dof == 11) { 3303 B->ops->mult = MatMult_SeqMAIJ_11; 3304 B->ops->multadd = MatMultAdd_SeqMAIJ_11; 3305 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_11; 3306 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_11; 3307 } else if (dof == 16) { 3308 B->ops->mult = MatMult_SeqMAIJ_16; 3309 B->ops->multadd = MatMultAdd_SeqMAIJ_16; 3310 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_16; 3311 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_16; 3312 } else if (dof == 18) { 3313 B->ops->mult = MatMult_SeqMAIJ_18; 3314 B->ops->multadd = MatMultAdd_SeqMAIJ_18; 3315 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_18; 3316 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_18; 3317 } else { 3318 SETERRQ1(PETSC_ERR_SUP,"Cannot handle a dof of %D. Send request for code to petsc-maint@mcs.anl.gov\n",dof); 3319 } 3320 B->ops->ptapsymbolic_seqaij = MatPtAPSymbolic_SeqAIJ_SeqMAIJ; 3321 B->ops->ptapnumeric_seqaij = MatPtAPNumeric_SeqAIJ_SeqMAIJ; 3322 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqmaij_seqaij_C","MatConvert_SeqMAIJ_SeqAIJ",MatConvert_SeqMAIJ_SeqAIJ);CHKERRQ(ierr); 3323 } else { 3324 Mat_MPIAIJ *mpiaij = (Mat_MPIAIJ *)A->data; 3325 Mat_MPIMAIJ *b; 3326 IS from,to; 3327 Vec gvec; 3328 PetscInt *garray,i; 3329 3330 ierr = MatSetType(B,MATMPIMAIJ);CHKERRQ(ierr); 3331 B->ops->destroy = MatDestroy_MPIMAIJ; 3332 B->ops->view = MatView_MPIMAIJ; 3333 b = (Mat_MPIMAIJ*)B->data; 3334 b->dof = dof; 3335 b->A = A; 3336 ierr = MatCreateMAIJ(mpiaij->A,dof,&b->AIJ);CHKERRQ(ierr); 3337 ierr = MatCreateMAIJ(mpiaij->B,dof,&b->OAIJ);CHKERRQ(ierr); 3338 3339 ierr = VecGetSize(mpiaij->lvec,&n);CHKERRQ(ierr); 3340 ierr = VecCreateSeq(PETSC_COMM_SELF,n*dof,&b->w);CHKERRQ(ierr); 3341 ierr = VecSetBlockSize(b->w,dof);CHKERRQ(ierr); 3342 3343 /* create two temporary Index sets for build scatter gather */ 3344 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&garray);CHKERRQ(ierr); 3345 for (i=0; i<n; i++) garray[i] = dof*mpiaij->garray[i]; 3346 ierr = ISCreateBlock(((PetscObject)A)->comm,dof,n,garray,&from);CHKERRQ(ierr); 3347 ierr = PetscFree(garray);CHKERRQ(ierr); 3348 ierr = ISCreateStride(PETSC_COMM_SELF,n*dof,0,1,&to);CHKERRQ(ierr); 3349 3350 /* create temporary global vector to generate scatter context */ 3351 ierr = VecCreateMPIWithArray(((PetscObject)A)->comm,dof*A->cmap->n,dof*A->cmap->N,PETSC_NULL,&gvec);CHKERRQ(ierr); 3352 ierr = VecSetBlockSize(gvec,dof);CHKERRQ(ierr); 3353 3354 /* generate the scatter context */ 3355 ierr = VecScatterCreate(gvec,from,b->w,to,&b->ctx);CHKERRQ(ierr); 3356 3357 ierr = ISDestroy(from);CHKERRQ(ierr); 3358 ierr = ISDestroy(to);CHKERRQ(ierr); 3359 ierr = VecDestroy(gvec);CHKERRQ(ierr); 3360 3361 B->ops->mult = MatMult_MPIMAIJ_dof; 3362 B->ops->multtranspose = MatMultTranspose_MPIMAIJ_dof; 3363 B->ops->multadd = MatMultAdd_MPIMAIJ_dof; 3364 B->ops->multtransposeadd = MatMultTransposeAdd_MPIMAIJ_dof; 3365 B->ops->ptapsymbolic_mpiaij = MatPtAPSymbolic_MPIAIJ_MPIMAIJ; 3366 B->ops->ptapnumeric_mpiaij = MatPtAPNumeric_MPIAIJ_MPIMAIJ; 3367 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpimaij_mpiaij_C","MatConvert_MPIMAIJ_MPIAIJ",MatConvert_MPIMAIJ_MPIAIJ);CHKERRQ(ierr); 3368 } 3369 *maij = B; 3370 ierr = MatView_Private(B);CHKERRQ(ierr); 3371 } 3372 PetscFunctionReturn(0); 3373 } 3374