1 /*$Id: maij.c,v 1.19 2001/08/07 03:03:00 balay Exp $*/ 2 /* 3 Defines the basic matrix operations for the MAIJ matrix storage format. 4 This format is used for restriction and interpolation operations for 5 multicomponent problems. It interpolates each component the same way 6 independently. 7 8 We provide: 9 MatMult() 10 MatMultTranspose() 11 MatMultTransposeAdd() 12 MatMultAdd() 13 and 14 MatCreateMAIJ(Mat,dof,Mat*) 15 16 This single directory handles both the sequential and parallel codes 17 */ 18 19 #include "src/mat/impls/maij/maij.h" 20 #include "src/vec/vecimpl.h" 21 22 #undef __FUNCT__ 23 #define __FUNCT__ "MatMAIJGetAIJ" 24 int MatMAIJGetAIJ(Mat A,Mat *B) 25 { 26 int ierr; 27 PetscTruth ismpimaij,isseqmaij; 28 29 PetscFunctionBegin; 30 ierr = PetscTypeCompare((PetscObject)A,MATMPIMAIJ,&ismpimaij);CHKERRQ(ierr); 31 ierr = PetscTypeCompare((PetscObject)A,MATSEQMAIJ,&isseqmaij);CHKERRQ(ierr); 32 if (ismpimaij) { 33 Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data; 34 35 *B = b->A; 36 } else if (isseqmaij) { 37 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 38 39 *B = b->AIJ; 40 } else { 41 *B = A; 42 } 43 PetscFunctionReturn(0); 44 } 45 46 #undef __FUNCT__ 47 #define __FUNCT__ "MatMAIJRedimension" 48 int MatMAIJRedimension(Mat A,int dof,Mat *B) 49 { 50 int ierr; 51 Mat Aij; 52 53 PetscFunctionBegin; 54 ierr = MatMAIJGetAIJ(A,&Aij);CHKERRQ(ierr); 55 ierr = MatCreateMAIJ(Aij,dof,B);CHKERRQ(ierr); 56 PetscFunctionReturn(0); 57 } 58 59 #undef __FUNCT__ 60 #define __FUNCT__ "MatDestroy_SeqMAIJ" 61 int MatDestroy_SeqMAIJ(Mat A) 62 { 63 int ierr; 64 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 65 66 PetscFunctionBegin; 67 if (b->AIJ) { 68 ierr = MatDestroy(b->AIJ);CHKERRQ(ierr); 69 } 70 ierr = PetscFree(b);CHKERRQ(ierr); 71 PetscFunctionReturn(0); 72 } 73 74 #undef __FUNCT__ 75 #define __FUNCT__ "MatDestroy_MPIMAIJ" 76 int MatDestroy_MPIMAIJ(Mat A) 77 { 78 int ierr; 79 Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data; 80 81 PetscFunctionBegin; 82 if (b->AIJ) { 83 ierr = MatDestroy(b->AIJ);CHKERRQ(ierr); 84 } 85 if (b->OAIJ) { 86 ierr = MatDestroy(b->OAIJ);CHKERRQ(ierr); 87 } 88 if (b->A) { 89 ierr = MatDestroy(b->A);CHKERRQ(ierr); 90 } 91 if (b->ctx) { 92 ierr = VecScatterDestroy(b->ctx);CHKERRQ(ierr); 93 } 94 if (b->w) { 95 ierr = VecDestroy(b->w);CHKERRQ(ierr); 96 } 97 ierr = PetscFree(b);CHKERRQ(ierr); 98 PetscFunctionReturn(0); 99 } 100 101 /*MC 102 MATMAIJ = "maij" - A matrix type to be used for restriction and interpolation operations for 103 multicomponent problems, interpolating or restricting each component the same way independently. 104 The matrix type is based on MATSEQAIJ for sequential matrices, and MATMPIAIJ for distributed matrices. 105 106 Operations provided: 107 . MatMult 108 . MatMultTranspose 109 . MatMultAdd 110 . MatMultTransposeAdd 111 112 Level: advanced 113 114 .seealso: MatCreateSeqDense 115 M*/ 116 117 EXTERN_C_BEGIN 118 #undef __FUNCT__ 119 #define __FUNCT__ "MatCreate_MAIJ" 120 int MatCreate_MAIJ(Mat A) 121 { 122 int ierr; 123 Mat_MPIMAIJ *b; 124 125 PetscFunctionBegin; 126 ierr = PetscNew(Mat_MPIMAIJ,&b);CHKERRQ(ierr); 127 A->data = (void*)b; 128 ierr = PetscMemzero(b,sizeof(Mat_MPIMAIJ));CHKERRQ(ierr); 129 ierr = PetscMemzero(A->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 130 A->factor = 0; 131 A->mapping = 0; 132 133 b->AIJ = 0; 134 b->dof = 0; 135 b->OAIJ = 0; 136 b->ctx = 0; 137 b->w = 0; 138 PetscFunctionReturn(0); 139 } 140 EXTERN_C_END 141 142 /* --------------------------------------------------------------------------------------*/ 143 #undef __FUNCT__ 144 #define __FUNCT__ "MatMult_SeqMAIJ_2" 145 int MatMult_SeqMAIJ_2(Mat A,Vec xx,Vec yy) 146 { 147 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 148 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 149 PetscScalar *x,*y,*v,sum1, sum2; 150 int ierr,m = b->AIJ->m,*idx,*ii; 151 int n,i,jrow,j; 152 153 PetscFunctionBegin; 154 ierr = VecGetArrayFast(xx,&x);CHKERRQ(ierr); 155 ierr = VecGetArrayFast(yy,&y);CHKERRQ(ierr); 156 idx = a->j; 157 v = a->a; 158 ii = a->i; 159 160 for (i=0; i<m; i++) { 161 jrow = ii[i]; 162 n = ii[i+1] - jrow; 163 sum1 = 0.0; 164 sum2 = 0.0; 165 for (j=0; j<n; j++) { 166 sum1 += v[jrow]*x[2*idx[jrow]]; 167 sum2 += v[jrow]*x[2*idx[jrow]+1]; 168 jrow++; 169 } 170 y[2*i] = sum1; 171 y[2*i+1] = sum2; 172 } 173 174 PetscLogFlops(4*a->nz - 2*m); 175 ierr = VecRestoreArrayFast(xx,&x);CHKERRQ(ierr); 176 ierr = VecRestoreArrayFast(yy,&y);CHKERRQ(ierr); 177 PetscFunctionReturn(0); 178 } 179 180 #undef __FUNCT__ 181 #define __FUNCT__ "MatMultTranspose_SeqMAIJ_2" 182 int MatMultTranspose_SeqMAIJ_2(Mat A,Vec xx,Vec yy) 183 { 184 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 185 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 186 PetscScalar *x,*y,*v,alpha1,alpha2,zero = 0.0; 187 int ierr,m = b->AIJ->m,n,i,*idx; 188 189 PetscFunctionBegin; 190 ierr = VecSet(&zero,yy);CHKERRQ(ierr); 191 ierr = VecGetArrayFast(xx,&x);CHKERRQ(ierr); 192 ierr = VecGetArrayFast(yy,&y);CHKERRQ(ierr); 193 194 for (i=0; i<m; i++) { 195 idx = a->j + a->i[i] ; 196 v = a->a + a->i[i] ; 197 n = a->i[i+1] - a->i[i]; 198 alpha1 = x[2*i]; 199 alpha2 = x[2*i+1]; 200 while (n-->0) {y[2*(*idx)] += alpha1*(*v); y[2*(*idx)+1] += alpha2*(*v); idx++; v++;} 201 } 202 PetscLogFlops(4*a->nz - 2*b->AIJ->n); 203 ierr = VecRestoreArrayFast(xx,&x);CHKERRQ(ierr); 204 ierr = VecRestoreArrayFast(yy,&y);CHKERRQ(ierr); 205 PetscFunctionReturn(0); 206 } 207 208 #undef __FUNCT__ 209 #define __FUNCT__ "MatMultAdd_SeqMAIJ_2" 210 int MatMultAdd_SeqMAIJ_2(Mat A,Vec xx,Vec yy,Vec zz) 211 { 212 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 213 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 214 PetscScalar *x,*y,*v,sum1, sum2; 215 int ierr,m = b->AIJ->m,*idx,*ii; 216 int n,i,jrow,j; 217 218 PetscFunctionBegin; 219 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 220 ierr = VecGetArrayFast(xx,&x);CHKERRQ(ierr); 221 ierr = VecGetArrayFast(zz,&y);CHKERRQ(ierr); 222 idx = a->j; 223 v = a->a; 224 ii = a->i; 225 226 for (i=0; i<m; i++) { 227 jrow = ii[i]; 228 n = ii[i+1] - jrow; 229 sum1 = 0.0; 230 sum2 = 0.0; 231 for (j=0; j<n; j++) { 232 sum1 += v[jrow]*x[2*idx[jrow]]; 233 sum2 += v[jrow]*x[2*idx[jrow]+1]; 234 jrow++; 235 } 236 y[2*i] += sum1; 237 y[2*i+1] += sum2; 238 } 239 240 PetscLogFlops(4*a->nz - 2*m); 241 ierr = VecRestoreArrayFast(xx,&x);CHKERRQ(ierr); 242 ierr = VecRestoreArrayFast(zz,&y);CHKERRQ(ierr); 243 PetscFunctionReturn(0); 244 } 245 #undef __FUNCT__ 246 #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_2" 247 int MatMultTransposeAdd_SeqMAIJ_2(Mat A,Vec xx,Vec yy,Vec zz) 248 { 249 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 250 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 251 PetscScalar *x,*y,*v,alpha1,alpha2; 252 int ierr,m = b->AIJ->m,n,i,*idx; 253 254 PetscFunctionBegin; 255 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 256 ierr = VecGetArrayFast(xx,&x);CHKERRQ(ierr); 257 ierr = VecGetArrayFast(zz,&y);CHKERRQ(ierr); 258 259 for (i=0; i<m; i++) { 260 idx = a->j + a->i[i] ; 261 v = a->a + a->i[i] ; 262 n = a->i[i+1] - a->i[i]; 263 alpha1 = x[2*i]; 264 alpha2 = x[2*i+1]; 265 while (n-->0) {y[2*(*idx)] += alpha1*(*v); y[2*(*idx)+1] += alpha2*(*v); idx++; v++;} 266 } 267 PetscLogFlops(4*a->nz - 2*b->AIJ->n); 268 ierr = VecRestoreArrayFast(xx,&x);CHKERRQ(ierr); 269 ierr = VecRestoreArrayFast(zz,&y);CHKERRQ(ierr); 270 PetscFunctionReturn(0); 271 } 272 /* --------------------------------------------------------------------------------------*/ 273 #undef __FUNCT__ 274 #define __FUNCT__ "MatMult_SeqMAIJ_3" 275 int MatMult_SeqMAIJ_3(Mat A,Vec xx,Vec yy) 276 { 277 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 278 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 279 PetscScalar *x,*y,*v,sum1, sum2, sum3; 280 int ierr,m = b->AIJ->m,*idx,*ii; 281 int n,i,jrow,j; 282 283 PetscFunctionBegin; 284 ierr = VecGetArrayFast(xx,&x);CHKERRQ(ierr); 285 ierr = VecGetArrayFast(yy,&y);CHKERRQ(ierr); 286 idx = a->j; 287 v = a->a; 288 ii = a->i; 289 290 for (i=0; i<m; i++) { 291 jrow = ii[i]; 292 n = ii[i+1] - jrow; 293 sum1 = 0.0; 294 sum2 = 0.0; 295 sum3 = 0.0; 296 for (j=0; j<n; j++) { 297 sum1 += v[jrow]*x[3*idx[jrow]]; 298 sum2 += v[jrow]*x[3*idx[jrow]+1]; 299 sum3 += v[jrow]*x[3*idx[jrow]+2]; 300 jrow++; 301 } 302 y[3*i] = sum1; 303 y[3*i+1] = sum2; 304 y[3*i+2] = sum3; 305 } 306 307 PetscLogFlops(6*a->nz - 3*m); 308 ierr = VecRestoreArrayFast(xx,&x);CHKERRQ(ierr); 309 ierr = VecRestoreArrayFast(yy,&y);CHKERRQ(ierr); 310 PetscFunctionReturn(0); 311 } 312 313 #undef __FUNCT__ 314 #define __FUNCT__ "MatMultTranspose_SeqMAIJ_3" 315 int MatMultTranspose_SeqMAIJ_3(Mat A,Vec xx,Vec yy) 316 { 317 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 318 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 319 PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,zero = 0.0; 320 int ierr,m = b->AIJ->m,n,i,*idx; 321 322 PetscFunctionBegin; 323 ierr = VecSet(&zero,yy);CHKERRQ(ierr); 324 ierr = VecGetArrayFast(xx,&x);CHKERRQ(ierr); 325 ierr = VecGetArrayFast(yy,&y);CHKERRQ(ierr); 326 327 for (i=0; i<m; i++) { 328 idx = a->j + a->i[i]; 329 v = a->a + a->i[i]; 330 n = a->i[i+1] - a->i[i]; 331 alpha1 = x[3*i]; 332 alpha2 = x[3*i+1]; 333 alpha3 = x[3*i+2]; 334 while (n-->0) { 335 y[3*(*idx)] += alpha1*(*v); 336 y[3*(*idx)+1] += alpha2*(*v); 337 y[3*(*idx)+2] += alpha3*(*v); 338 idx++; v++; 339 } 340 } 341 PetscLogFlops(6*a->nz - 3*b->AIJ->n); 342 ierr = VecRestoreArrayFast(xx,&x);CHKERRQ(ierr); 343 ierr = VecRestoreArrayFast(yy,&y);CHKERRQ(ierr); 344 PetscFunctionReturn(0); 345 } 346 347 #undef __FUNCT__ 348 #define __FUNCT__ "MatMultAdd_SeqMAIJ_3" 349 int MatMultAdd_SeqMAIJ_3(Mat A,Vec xx,Vec yy,Vec zz) 350 { 351 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 352 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 353 PetscScalar *x,*y,*v,sum1, sum2, sum3; 354 int ierr,m = b->AIJ->m,*idx,*ii; 355 int n,i,jrow,j; 356 357 PetscFunctionBegin; 358 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 359 ierr = VecGetArrayFast(xx,&x);CHKERRQ(ierr); 360 ierr = VecGetArrayFast(zz,&y);CHKERRQ(ierr); 361 idx = a->j; 362 v = a->a; 363 ii = a->i; 364 365 for (i=0; i<m; i++) { 366 jrow = ii[i]; 367 n = ii[i+1] - jrow; 368 sum1 = 0.0; 369 sum2 = 0.0; 370 sum3 = 0.0; 371 for (j=0; j<n; j++) { 372 sum1 += v[jrow]*x[3*idx[jrow]]; 373 sum2 += v[jrow]*x[3*idx[jrow]+1]; 374 sum3 += v[jrow]*x[3*idx[jrow]+2]; 375 jrow++; 376 } 377 y[3*i] += sum1; 378 y[3*i+1] += sum2; 379 y[3*i+2] += sum3; 380 } 381 382 PetscLogFlops(6*a->nz); 383 ierr = VecRestoreArrayFast(xx,&x);CHKERRQ(ierr); 384 ierr = VecRestoreArrayFast(zz,&y);CHKERRQ(ierr); 385 PetscFunctionReturn(0); 386 } 387 #undef __FUNCT__ 388 #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_3" 389 int MatMultTransposeAdd_SeqMAIJ_3(Mat A,Vec xx,Vec yy,Vec zz) 390 { 391 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 392 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 393 PetscScalar *x,*y,*v,alpha1,alpha2,alpha3; 394 int ierr,m = b->AIJ->m,n,i,*idx; 395 396 PetscFunctionBegin; 397 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 398 ierr = VecGetArrayFast(xx,&x);CHKERRQ(ierr); 399 ierr = VecGetArrayFast(zz,&y);CHKERRQ(ierr); 400 for (i=0; i<m; i++) { 401 idx = a->j + a->i[i] ; 402 v = a->a + a->i[i] ; 403 n = a->i[i+1] - a->i[i]; 404 alpha1 = x[3*i]; 405 alpha2 = x[3*i+1]; 406 alpha3 = x[3*i+2]; 407 while (n-->0) { 408 y[3*(*idx)] += alpha1*(*v); 409 y[3*(*idx)+1] += alpha2*(*v); 410 y[3*(*idx)+2] += alpha3*(*v); 411 idx++; v++; 412 } 413 } 414 PetscLogFlops(6*a->nz); 415 ierr = VecRestoreArrayFast(xx,&x);CHKERRQ(ierr); 416 ierr = VecRestoreArrayFast(zz,&y);CHKERRQ(ierr); 417 PetscFunctionReturn(0); 418 } 419 420 /* ------------------------------------------------------------------------------*/ 421 #undef __FUNCT__ 422 #define __FUNCT__ "MatMult_SeqMAIJ_4" 423 int MatMult_SeqMAIJ_4(Mat A,Vec xx,Vec yy) 424 { 425 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 426 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 427 PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4; 428 int ierr,m = b->AIJ->m,*idx,*ii; 429 int n,i,jrow,j; 430 431 PetscFunctionBegin; 432 ierr = VecGetArrayFast(xx,&x);CHKERRQ(ierr); 433 ierr = VecGetArrayFast(yy,&y);CHKERRQ(ierr); 434 idx = a->j; 435 v = a->a; 436 ii = a->i; 437 438 for (i=0; i<m; i++) { 439 jrow = ii[i]; 440 n = ii[i+1] - jrow; 441 sum1 = 0.0; 442 sum2 = 0.0; 443 sum3 = 0.0; 444 sum4 = 0.0; 445 for (j=0; j<n; j++) { 446 sum1 += v[jrow]*x[4*idx[jrow]]; 447 sum2 += v[jrow]*x[4*idx[jrow]+1]; 448 sum3 += v[jrow]*x[4*idx[jrow]+2]; 449 sum4 += v[jrow]*x[4*idx[jrow]+3]; 450 jrow++; 451 } 452 y[4*i] = sum1; 453 y[4*i+1] = sum2; 454 y[4*i+2] = sum3; 455 y[4*i+3] = sum4; 456 } 457 458 PetscLogFlops(8*a->nz - 4*m); 459 ierr = VecRestoreArrayFast(xx,&x);CHKERRQ(ierr); 460 ierr = VecRestoreArrayFast(yy,&y);CHKERRQ(ierr); 461 PetscFunctionReturn(0); 462 } 463 464 #undef __FUNCT__ 465 #define __FUNCT__ "MatMultTranspose_SeqMAIJ_4" 466 int MatMultTranspose_SeqMAIJ_4(Mat A,Vec xx,Vec yy) 467 { 468 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 469 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 470 PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4,zero = 0.0; 471 int ierr,m = b->AIJ->m,n,i,*idx; 472 473 PetscFunctionBegin; 474 ierr = VecSet(&zero,yy);CHKERRQ(ierr); 475 ierr = VecGetArrayFast(xx,&x);CHKERRQ(ierr); 476 ierr = VecGetArrayFast(yy,&y);CHKERRQ(ierr); 477 for (i=0; i<m; i++) { 478 idx = a->j + a->i[i] ; 479 v = a->a + a->i[i] ; 480 n = a->i[i+1] - a->i[i]; 481 alpha1 = x[4*i]; 482 alpha2 = x[4*i+1]; 483 alpha3 = x[4*i+2]; 484 alpha4 = x[4*i+3]; 485 while (n-->0) { 486 y[4*(*idx)] += alpha1*(*v); 487 y[4*(*idx)+1] += alpha2*(*v); 488 y[4*(*idx)+2] += alpha3*(*v); 489 y[4*(*idx)+3] += alpha4*(*v); 490 idx++; v++; 491 } 492 } 493 PetscLogFlops(8*a->nz - 4*b->AIJ->n); 494 ierr = VecRestoreArrayFast(xx,&x);CHKERRQ(ierr); 495 ierr = VecRestoreArrayFast(yy,&y);CHKERRQ(ierr); 496 PetscFunctionReturn(0); 497 } 498 499 #undef __FUNCT__ 500 #define __FUNCT__ "MatMultAdd_SeqMAIJ_4" 501 int MatMultAdd_SeqMAIJ_4(Mat A,Vec xx,Vec yy,Vec zz) 502 { 503 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 504 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 505 PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4; 506 int ierr,m = b->AIJ->m,*idx,*ii; 507 int n,i,jrow,j; 508 509 PetscFunctionBegin; 510 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 511 ierr = VecGetArrayFast(xx,&x);CHKERRQ(ierr); 512 ierr = VecGetArrayFast(zz,&y);CHKERRQ(ierr); 513 idx = a->j; 514 v = a->a; 515 ii = a->i; 516 517 for (i=0; i<m; i++) { 518 jrow = ii[i]; 519 n = ii[i+1] - jrow; 520 sum1 = 0.0; 521 sum2 = 0.0; 522 sum3 = 0.0; 523 sum4 = 0.0; 524 for (j=0; j<n; j++) { 525 sum1 += v[jrow]*x[4*idx[jrow]]; 526 sum2 += v[jrow]*x[4*idx[jrow]+1]; 527 sum3 += v[jrow]*x[4*idx[jrow]+2]; 528 sum4 += v[jrow]*x[4*idx[jrow]+3]; 529 jrow++; 530 } 531 y[4*i] += sum1; 532 y[4*i+1] += sum2; 533 y[4*i+2] += sum3; 534 y[4*i+3] += sum4; 535 } 536 537 PetscLogFlops(8*a->nz - 4*m); 538 ierr = VecRestoreArrayFast(xx,&x);CHKERRQ(ierr); 539 ierr = VecRestoreArrayFast(zz,&y);CHKERRQ(ierr); 540 PetscFunctionReturn(0); 541 } 542 #undef __FUNCT__ 543 #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_4" 544 int MatMultTransposeAdd_SeqMAIJ_4(Mat A,Vec xx,Vec yy,Vec zz) 545 { 546 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 547 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 548 PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4; 549 int ierr,m = b->AIJ->m,n,i,*idx; 550 551 PetscFunctionBegin; 552 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 553 ierr = VecGetArrayFast(xx,&x);CHKERRQ(ierr); 554 ierr = VecGetArrayFast(zz,&y);CHKERRQ(ierr); 555 556 for (i=0; i<m; i++) { 557 idx = a->j + a->i[i] ; 558 v = a->a + a->i[i] ; 559 n = a->i[i+1] - a->i[i]; 560 alpha1 = x[4*i]; 561 alpha2 = x[4*i+1]; 562 alpha3 = x[4*i+2]; 563 alpha4 = x[4*i+3]; 564 while (n-->0) { 565 y[4*(*idx)] += alpha1*(*v); 566 y[4*(*idx)+1] += alpha2*(*v); 567 y[4*(*idx)+2] += alpha3*(*v); 568 y[4*(*idx)+3] += alpha4*(*v); 569 idx++; v++; 570 } 571 } 572 PetscLogFlops(8*a->nz - 4*b->AIJ->n); 573 ierr = VecRestoreArrayFast(xx,&x);CHKERRQ(ierr); 574 ierr = VecRestoreArrayFast(zz,&y);CHKERRQ(ierr); 575 PetscFunctionReturn(0); 576 } 577 /* ------------------------------------------------------------------------------*/ 578 579 #undef __FUNCT__ 580 #define __FUNCT__ "MatMult_SeqMAIJ_5" 581 int MatMult_SeqMAIJ_5(Mat A,Vec xx,Vec yy) 582 { 583 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 584 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 585 PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4, sum5; 586 int ierr,m = b->AIJ->m,*idx,*ii; 587 int n,i,jrow,j; 588 589 PetscFunctionBegin; 590 ierr = VecGetArrayFast(xx,&x);CHKERRQ(ierr); 591 ierr = VecGetArrayFast(yy,&y);CHKERRQ(ierr); 592 idx = a->j; 593 v = a->a; 594 ii = a->i; 595 596 for (i=0; i<m; i++) { 597 jrow = ii[i]; 598 n = ii[i+1] - jrow; 599 sum1 = 0.0; 600 sum2 = 0.0; 601 sum3 = 0.0; 602 sum4 = 0.0; 603 sum5 = 0.0; 604 for (j=0; j<n; j++) { 605 sum1 += v[jrow]*x[5*idx[jrow]]; 606 sum2 += v[jrow]*x[5*idx[jrow]+1]; 607 sum3 += v[jrow]*x[5*idx[jrow]+2]; 608 sum4 += v[jrow]*x[5*idx[jrow]+3]; 609 sum5 += v[jrow]*x[5*idx[jrow]+4]; 610 jrow++; 611 } 612 y[5*i] = sum1; 613 y[5*i+1] = sum2; 614 y[5*i+2] = sum3; 615 y[5*i+3] = sum4; 616 y[5*i+4] = sum5; 617 } 618 619 PetscLogFlops(10*a->nz - 5*m); 620 ierr = VecRestoreArrayFast(xx,&x);CHKERRQ(ierr); 621 ierr = VecRestoreArrayFast(yy,&y);CHKERRQ(ierr); 622 PetscFunctionReturn(0); 623 } 624 625 #undef __FUNCT__ 626 #define __FUNCT__ "MatMultTranspose_SeqMAIJ_5" 627 int MatMultTranspose_SeqMAIJ_5(Mat A,Vec xx,Vec yy) 628 { 629 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 630 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 631 PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,zero = 0.0; 632 int ierr,m = b->AIJ->m,n,i,*idx; 633 634 PetscFunctionBegin; 635 ierr = VecSet(&zero,yy);CHKERRQ(ierr); 636 ierr = VecGetArrayFast(xx,&x);CHKERRQ(ierr); 637 ierr = VecGetArrayFast(yy,&y);CHKERRQ(ierr); 638 639 for (i=0; i<m; i++) { 640 idx = a->j + a->i[i] ; 641 v = a->a + a->i[i] ; 642 n = a->i[i+1] - a->i[i]; 643 alpha1 = x[5*i]; 644 alpha2 = x[5*i+1]; 645 alpha3 = x[5*i+2]; 646 alpha4 = x[5*i+3]; 647 alpha5 = x[5*i+4]; 648 while (n-->0) { 649 y[5*(*idx)] += alpha1*(*v); 650 y[5*(*idx)+1] += alpha2*(*v); 651 y[5*(*idx)+2] += alpha3*(*v); 652 y[5*(*idx)+3] += alpha4*(*v); 653 y[5*(*idx)+4] += alpha5*(*v); 654 idx++; v++; 655 } 656 } 657 PetscLogFlops(10*a->nz - 5*b->AIJ->n); 658 ierr = VecRestoreArrayFast(xx,&x);CHKERRQ(ierr); 659 ierr = VecRestoreArrayFast(yy,&y);CHKERRQ(ierr); 660 PetscFunctionReturn(0); 661 } 662 663 #undef __FUNCT__ 664 #define __FUNCT__ "MatMultAdd_SeqMAIJ_5" 665 int MatMultAdd_SeqMAIJ_5(Mat A,Vec xx,Vec yy,Vec zz) 666 { 667 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 668 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 669 PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4, sum5; 670 int ierr,m = b->AIJ->m,*idx,*ii; 671 int n,i,jrow,j; 672 673 PetscFunctionBegin; 674 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 675 ierr = VecGetArrayFast(xx,&x);CHKERRQ(ierr); 676 ierr = VecGetArrayFast(zz,&y);CHKERRQ(ierr); 677 idx = a->j; 678 v = a->a; 679 ii = a->i; 680 681 for (i=0; i<m; i++) { 682 jrow = ii[i]; 683 n = ii[i+1] - jrow; 684 sum1 = 0.0; 685 sum2 = 0.0; 686 sum3 = 0.0; 687 sum4 = 0.0; 688 sum5 = 0.0; 689 for (j=0; j<n; j++) { 690 sum1 += v[jrow]*x[5*idx[jrow]]; 691 sum2 += v[jrow]*x[5*idx[jrow]+1]; 692 sum3 += v[jrow]*x[5*idx[jrow]+2]; 693 sum4 += v[jrow]*x[5*idx[jrow]+3]; 694 sum5 += v[jrow]*x[5*idx[jrow]+4]; 695 jrow++; 696 } 697 y[5*i] += sum1; 698 y[5*i+1] += sum2; 699 y[5*i+2] += sum3; 700 y[5*i+3] += sum4; 701 y[5*i+4] += sum5; 702 } 703 704 PetscLogFlops(10*a->nz); 705 ierr = VecRestoreArrayFast(xx,&x);CHKERRQ(ierr); 706 ierr = VecRestoreArrayFast(zz,&y);CHKERRQ(ierr); 707 PetscFunctionReturn(0); 708 } 709 710 #undef __FUNCT__ 711 #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_5" 712 int MatMultTransposeAdd_SeqMAIJ_5(Mat A,Vec xx,Vec yy,Vec zz) 713 { 714 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 715 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 716 PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5; 717 int ierr,m = b->AIJ->m,n,i,*idx; 718 719 PetscFunctionBegin; 720 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 721 ierr = VecGetArrayFast(xx,&x);CHKERRQ(ierr); 722 ierr = VecGetArrayFast(zz,&y);CHKERRQ(ierr); 723 724 for (i=0; i<m; i++) { 725 idx = a->j + a->i[i] ; 726 v = a->a + a->i[i] ; 727 n = a->i[i+1] - a->i[i]; 728 alpha1 = x[5*i]; 729 alpha2 = x[5*i+1]; 730 alpha3 = x[5*i+2]; 731 alpha4 = x[5*i+3]; 732 alpha5 = x[5*i+4]; 733 while (n-->0) { 734 y[5*(*idx)] += alpha1*(*v); 735 y[5*(*idx)+1] += alpha2*(*v); 736 y[5*(*idx)+2] += alpha3*(*v); 737 y[5*(*idx)+3] += alpha4*(*v); 738 y[5*(*idx)+4] += alpha5*(*v); 739 idx++; v++; 740 } 741 } 742 PetscLogFlops(10*a->nz); 743 ierr = VecRestoreArrayFast(xx,&x);CHKERRQ(ierr); 744 ierr = VecRestoreArrayFast(zz,&y);CHKERRQ(ierr); 745 PetscFunctionReturn(0); 746 } 747 748 /* ------------------------------------------------------------------------------*/ 749 #undef __FUNCT__ 750 #define __FUNCT__ "MatMult_SeqMAIJ_6" 751 int MatMult_SeqMAIJ_6(Mat A,Vec xx,Vec yy) 752 { 753 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 754 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 755 PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6; 756 int ierr,m = b->AIJ->m,*idx,*ii; 757 int n,i,jrow,j; 758 759 PetscFunctionBegin; 760 ierr = VecGetArrayFast(xx,&x);CHKERRQ(ierr); 761 ierr = VecGetArrayFast(yy,&y);CHKERRQ(ierr); 762 idx = a->j; 763 v = a->a; 764 ii = a->i; 765 766 for (i=0; i<m; i++) { 767 jrow = ii[i]; 768 n = ii[i+1] - jrow; 769 sum1 = 0.0; 770 sum2 = 0.0; 771 sum3 = 0.0; 772 sum4 = 0.0; 773 sum5 = 0.0; 774 sum6 = 0.0; 775 for (j=0; j<n; j++) { 776 sum1 += v[jrow]*x[6*idx[jrow]]; 777 sum2 += v[jrow]*x[6*idx[jrow]+1]; 778 sum3 += v[jrow]*x[6*idx[jrow]+2]; 779 sum4 += v[jrow]*x[6*idx[jrow]+3]; 780 sum5 += v[jrow]*x[6*idx[jrow]+4]; 781 sum6 += v[jrow]*x[6*idx[jrow]+5]; 782 jrow++; 783 } 784 y[6*i] = sum1; 785 y[6*i+1] = sum2; 786 y[6*i+2] = sum3; 787 y[6*i+3] = sum4; 788 y[6*i+4] = sum5; 789 y[6*i+5] = sum6; 790 } 791 792 PetscLogFlops(12*a->nz - 6*m); 793 ierr = VecRestoreArrayFast(xx,&x);CHKERRQ(ierr); 794 ierr = VecRestoreArrayFast(yy,&y);CHKERRQ(ierr); 795 PetscFunctionReturn(0); 796 } 797 798 #undef __FUNCT__ 799 #define __FUNCT__ "MatMultTranspose_SeqMAIJ_6" 800 int MatMultTranspose_SeqMAIJ_6(Mat A,Vec xx,Vec yy) 801 { 802 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 803 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 804 PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,zero = 0.0; 805 int ierr,m = b->AIJ->m,n,i,*idx; 806 807 PetscFunctionBegin; 808 ierr = VecSet(&zero,yy);CHKERRQ(ierr); 809 ierr = VecGetArrayFast(xx,&x);CHKERRQ(ierr); 810 ierr = VecGetArrayFast(yy,&y);CHKERRQ(ierr); 811 812 for (i=0; i<m; i++) { 813 idx = a->j + a->i[i] ; 814 v = a->a + a->i[i] ; 815 n = a->i[i+1] - a->i[i]; 816 alpha1 = x[6*i]; 817 alpha2 = x[6*i+1]; 818 alpha3 = x[6*i+2]; 819 alpha4 = x[6*i+3]; 820 alpha5 = x[6*i+4]; 821 alpha6 = x[6*i+5]; 822 while (n-->0) { 823 y[6*(*idx)] += alpha1*(*v); 824 y[6*(*idx)+1] += alpha2*(*v); 825 y[6*(*idx)+2] += alpha3*(*v); 826 y[6*(*idx)+3] += alpha4*(*v); 827 y[6*(*idx)+4] += alpha5*(*v); 828 y[6*(*idx)+5] += alpha6*(*v); 829 idx++; v++; 830 } 831 } 832 PetscLogFlops(12*a->nz - 6*b->AIJ->n); 833 ierr = VecRestoreArrayFast(xx,&x);CHKERRQ(ierr); 834 ierr = VecRestoreArrayFast(yy,&y);CHKERRQ(ierr); 835 PetscFunctionReturn(0); 836 } 837 838 #undef __FUNCT__ 839 #define __FUNCT__ "MatMultAdd_SeqMAIJ_6" 840 int MatMultAdd_SeqMAIJ_6(Mat A,Vec xx,Vec yy,Vec zz) 841 { 842 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 843 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 844 PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6; 845 int ierr,m = b->AIJ->m,*idx,*ii; 846 int n,i,jrow,j; 847 848 PetscFunctionBegin; 849 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 850 ierr = VecGetArrayFast(xx,&x);CHKERRQ(ierr); 851 ierr = VecGetArrayFast(zz,&y);CHKERRQ(ierr); 852 idx = a->j; 853 v = a->a; 854 ii = a->i; 855 856 for (i=0; i<m; i++) { 857 jrow = ii[i]; 858 n = ii[i+1] - jrow; 859 sum1 = 0.0; 860 sum2 = 0.0; 861 sum3 = 0.0; 862 sum4 = 0.0; 863 sum5 = 0.0; 864 sum6 = 0.0; 865 for (j=0; j<n; j++) { 866 sum1 += v[jrow]*x[6*idx[jrow]]; 867 sum2 += v[jrow]*x[6*idx[jrow]+1]; 868 sum3 += v[jrow]*x[6*idx[jrow]+2]; 869 sum4 += v[jrow]*x[6*idx[jrow]+3]; 870 sum5 += v[jrow]*x[6*idx[jrow]+4]; 871 sum6 += v[jrow]*x[6*idx[jrow]+5]; 872 jrow++; 873 } 874 y[6*i] += sum1; 875 y[6*i+1] += sum2; 876 y[6*i+2] += sum3; 877 y[6*i+3] += sum4; 878 y[6*i+4] += sum5; 879 y[6*i+5] += sum6; 880 } 881 882 PetscLogFlops(12*a->nz); 883 ierr = VecRestoreArrayFast(xx,&x);CHKERRQ(ierr); 884 ierr = VecRestoreArrayFast(zz,&y);CHKERRQ(ierr); 885 PetscFunctionReturn(0); 886 } 887 888 #undef __FUNCT__ 889 #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_6" 890 int MatMultTransposeAdd_SeqMAIJ_6(Mat A,Vec xx,Vec yy,Vec zz) 891 { 892 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 893 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 894 PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6; 895 int ierr,m = b->AIJ->m,n,i,*idx; 896 897 PetscFunctionBegin; 898 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 899 ierr = VecGetArrayFast(xx,&x);CHKERRQ(ierr); 900 ierr = VecGetArrayFast(zz,&y);CHKERRQ(ierr); 901 902 for (i=0; i<m; i++) { 903 idx = a->j + a->i[i] ; 904 v = a->a + a->i[i] ; 905 n = a->i[i+1] - a->i[i]; 906 alpha1 = x[6*i]; 907 alpha2 = x[6*i+1]; 908 alpha3 = x[6*i+2]; 909 alpha4 = x[6*i+3]; 910 alpha5 = x[6*i+4]; 911 alpha6 = x[6*i+5]; 912 while (n-->0) { 913 y[6*(*idx)] += alpha1*(*v); 914 y[6*(*idx)+1] += alpha2*(*v); 915 y[6*(*idx)+2] += alpha3*(*v); 916 y[6*(*idx)+3] += alpha4*(*v); 917 y[6*(*idx)+4] += alpha5*(*v); 918 y[6*(*idx)+5] += alpha6*(*v); 919 idx++; v++; 920 } 921 } 922 PetscLogFlops(12*a->nz); 923 ierr = VecRestoreArrayFast(xx,&x);CHKERRQ(ierr); 924 ierr = VecRestoreArrayFast(zz,&y);CHKERRQ(ierr); 925 PetscFunctionReturn(0); 926 } 927 928 /* ------------------------------------------------------------------------------*/ 929 #undef __FUNCT__ 930 #define __FUNCT__ "MatMult_SeqMAIJ_8" 931 int MatMult_SeqMAIJ_8(Mat A,Vec xx,Vec yy) 932 { 933 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 934 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 935 PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8; 936 int ierr,m = b->AIJ->m,*idx,*ii; 937 int n,i,jrow,j; 938 939 PetscFunctionBegin; 940 ierr = VecGetArrayFast(xx,&x);CHKERRQ(ierr); 941 ierr = VecGetArrayFast(yy,&y);CHKERRQ(ierr); 942 idx = a->j; 943 v = a->a; 944 ii = a->i; 945 946 for (i=0; i<m; i++) { 947 jrow = ii[i]; 948 n = ii[i+1] - jrow; 949 sum1 = 0.0; 950 sum2 = 0.0; 951 sum3 = 0.0; 952 sum4 = 0.0; 953 sum5 = 0.0; 954 sum6 = 0.0; 955 sum7 = 0.0; 956 sum8 = 0.0; 957 for (j=0; j<n; j++) { 958 sum1 += v[jrow]*x[8*idx[jrow]]; 959 sum2 += v[jrow]*x[8*idx[jrow]+1]; 960 sum3 += v[jrow]*x[8*idx[jrow]+2]; 961 sum4 += v[jrow]*x[8*idx[jrow]+3]; 962 sum5 += v[jrow]*x[8*idx[jrow]+4]; 963 sum6 += v[jrow]*x[8*idx[jrow]+5]; 964 sum7 += v[jrow]*x[8*idx[jrow]+6]; 965 sum8 += v[jrow]*x[8*idx[jrow]+7]; 966 jrow++; 967 } 968 y[8*i] = sum1; 969 y[8*i+1] = sum2; 970 y[8*i+2] = sum3; 971 y[8*i+3] = sum4; 972 y[8*i+4] = sum5; 973 y[8*i+5] = sum6; 974 y[8*i+6] = sum7; 975 y[8*i+7] = sum8; 976 } 977 978 PetscLogFlops(16*a->nz - 8*m); 979 ierr = VecRestoreArrayFast(xx,&x);CHKERRQ(ierr); 980 ierr = VecRestoreArrayFast(yy,&y);CHKERRQ(ierr); 981 PetscFunctionReturn(0); 982 } 983 984 #undef __FUNCT__ 985 #define __FUNCT__ "MatMultTranspose_SeqMAIJ_8" 986 int MatMultTranspose_SeqMAIJ_8(Mat A,Vec xx,Vec yy) 987 { 988 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 989 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 990 PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,zero = 0.0; 991 int ierr,m = b->AIJ->m,n,i,*idx; 992 993 PetscFunctionBegin; 994 ierr = VecSet(&zero,yy);CHKERRQ(ierr); 995 ierr = VecGetArrayFast(xx,&x);CHKERRQ(ierr); 996 ierr = VecGetArrayFast(yy,&y);CHKERRQ(ierr); 997 998 for (i=0; i<m; i++) { 999 idx = a->j + a->i[i] ; 1000 v = a->a + a->i[i] ; 1001 n = a->i[i+1] - a->i[i]; 1002 alpha1 = x[8*i]; 1003 alpha2 = x[8*i+1]; 1004 alpha3 = x[8*i+2]; 1005 alpha4 = x[8*i+3]; 1006 alpha5 = x[8*i+4]; 1007 alpha6 = x[8*i+5]; 1008 alpha7 = x[8*i+6]; 1009 alpha8 = x[8*i+7]; 1010 while (n-->0) { 1011 y[8*(*idx)] += alpha1*(*v); 1012 y[8*(*idx)+1] += alpha2*(*v); 1013 y[8*(*idx)+2] += alpha3*(*v); 1014 y[8*(*idx)+3] += alpha4*(*v); 1015 y[8*(*idx)+4] += alpha5*(*v); 1016 y[8*(*idx)+5] += alpha6*(*v); 1017 y[8*(*idx)+6] += alpha7*(*v); 1018 y[8*(*idx)+7] += alpha8*(*v); 1019 idx++; v++; 1020 } 1021 } 1022 PetscLogFlops(16*a->nz - 8*b->AIJ->n); 1023 ierr = VecRestoreArrayFast(xx,&x);CHKERRQ(ierr); 1024 ierr = VecRestoreArrayFast(yy,&y);CHKERRQ(ierr); 1025 PetscFunctionReturn(0); 1026 } 1027 1028 #undef __FUNCT__ 1029 #define __FUNCT__ "MatMultAdd_SeqMAIJ_8" 1030 int MatMultAdd_SeqMAIJ_8(Mat A,Vec xx,Vec yy,Vec zz) 1031 { 1032 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1033 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1034 PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8; 1035 int ierr,m = b->AIJ->m,*idx,*ii; 1036 int n,i,jrow,j; 1037 1038 PetscFunctionBegin; 1039 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 1040 ierr = VecGetArrayFast(xx,&x);CHKERRQ(ierr); 1041 ierr = VecGetArrayFast(zz,&y);CHKERRQ(ierr); 1042 idx = a->j; 1043 v = a->a; 1044 ii = a->i; 1045 1046 for (i=0; i<m; i++) { 1047 jrow = ii[i]; 1048 n = ii[i+1] - jrow; 1049 sum1 = 0.0; 1050 sum2 = 0.0; 1051 sum3 = 0.0; 1052 sum4 = 0.0; 1053 sum5 = 0.0; 1054 sum6 = 0.0; 1055 sum7 = 0.0; 1056 sum8 = 0.0; 1057 for (j=0; j<n; j++) { 1058 sum1 += v[jrow]*x[8*idx[jrow]]; 1059 sum2 += v[jrow]*x[8*idx[jrow]+1]; 1060 sum3 += v[jrow]*x[8*idx[jrow]+2]; 1061 sum4 += v[jrow]*x[8*idx[jrow]+3]; 1062 sum5 += v[jrow]*x[8*idx[jrow]+4]; 1063 sum6 += v[jrow]*x[8*idx[jrow]+5]; 1064 sum7 += v[jrow]*x[8*idx[jrow]+6]; 1065 sum8 += v[jrow]*x[8*idx[jrow]+7]; 1066 jrow++; 1067 } 1068 y[8*i] += sum1; 1069 y[8*i+1] += sum2; 1070 y[8*i+2] += sum3; 1071 y[8*i+3] += sum4; 1072 y[8*i+4] += sum5; 1073 y[8*i+5] += sum6; 1074 y[8*i+6] += sum7; 1075 y[8*i+7] += sum8; 1076 } 1077 1078 PetscLogFlops(16*a->nz); 1079 ierr = VecRestoreArrayFast(xx,&x);CHKERRQ(ierr); 1080 ierr = VecRestoreArrayFast(zz,&y);CHKERRQ(ierr); 1081 PetscFunctionReturn(0); 1082 } 1083 1084 #undef __FUNCT__ 1085 #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_8" 1086 int MatMultTransposeAdd_SeqMAIJ_8(Mat A,Vec xx,Vec yy,Vec zz) 1087 { 1088 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1089 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1090 PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8; 1091 int ierr,m = b->AIJ->m,n,i,*idx; 1092 1093 PetscFunctionBegin; 1094 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 1095 ierr = VecGetArrayFast(xx,&x);CHKERRQ(ierr); 1096 ierr = VecGetArrayFast(zz,&y);CHKERRQ(ierr); 1097 for (i=0; i<m; i++) { 1098 idx = a->j + a->i[i] ; 1099 v = a->a + a->i[i] ; 1100 n = a->i[i+1] - a->i[i]; 1101 alpha1 = x[8*i]; 1102 alpha2 = x[8*i+1]; 1103 alpha3 = x[8*i+2]; 1104 alpha4 = x[8*i+3]; 1105 alpha5 = x[8*i+4]; 1106 alpha6 = x[8*i+5]; 1107 alpha7 = x[8*i+6]; 1108 alpha8 = x[8*i+7]; 1109 while (n-->0) { 1110 y[8*(*idx)] += alpha1*(*v); 1111 y[8*(*idx)+1] += alpha2*(*v); 1112 y[8*(*idx)+2] += alpha3*(*v); 1113 y[8*(*idx)+3] += alpha4*(*v); 1114 y[8*(*idx)+4] += alpha5*(*v); 1115 y[8*(*idx)+5] += alpha6*(*v); 1116 y[8*(*idx)+6] += alpha7*(*v); 1117 y[8*(*idx)+7] += alpha8*(*v); 1118 idx++; v++; 1119 } 1120 } 1121 PetscLogFlops(16*a->nz); 1122 ierr = VecRestoreArrayFast(xx,&x);CHKERRQ(ierr); 1123 ierr = VecRestoreArrayFast(zz,&y);CHKERRQ(ierr); 1124 PetscFunctionReturn(0); 1125 } 1126 1127 /*--------------------------------------------------------------------------------------------*/ 1128 #undef __FUNCT__ 1129 #define __FUNCT__ "MatMult_SeqMAIJ_16" 1130 int MatMult_SeqMAIJ_16(Mat A,Vec xx,Vec yy) 1131 { 1132 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1133 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1134 PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8; 1135 PetscScalar sum9, sum10, sum11, sum12, sum13, sum14, sum15, sum16; 1136 int ierr,m = b->AIJ->m,*idx,*ii; 1137 int n,i,jrow,j; 1138 1139 PetscFunctionBegin; 1140 ierr = VecGetArrayFast(xx,&x);CHKERRQ(ierr); 1141 ierr = VecGetArrayFast(yy,&y);CHKERRQ(ierr); 1142 idx = a->j; 1143 v = a->a; 1144 ii = a->i; 1145 1146 for (i=0; i<m; i++) { 1147 jrow = ii[i]; 1148 n = ii[i+1] - jrow; 1149 sum1 = 0.0; 1150 sum2 = 0.0; 1151 sum3 = 0.0; 1152 sum4 = 0.0; 1153 sum5 = 0.0; 1154 sum6 = 0.0; 1155 sum7 = 0.0; 1156 sum8 = 0.0; 1157 sum9 = 0.0; 1158 sum10 = 0.0; 1159 sum11 = 0.0; 1160 sum12 = 0.0; 1161 sum13 = 0.0; 1162 sum14 = 0.0; 1163 sum15 = 0.0; 1164 sum16 = 0.0; 1165 for (j=0; j<n; j++) { 1166 sum1 += v[jrow]*x[16*idx[jrow]]; 1167 sum2 += v[jrow]*x[16*idx[jrow]+1]; 1168 sum3 += v[jrow]*x[16*idx[jrow]+2]; 1169 sum4 += v[jrow]*x[16*idx[jrow]+3]; 1170 sum5 += v[jrow]*x[16*idx[jrow]+4]; 1171 sum6 += v[jrow]*x[16*idx[jrow]+5]; 1172 sum7 += v[jrow]*x[16*idx[jrow]+6]; 1173 sum8 += v[jrow]*x[16*idx[jrow]+7]; 1174 sum9 += v[jrow]*x[16*idx[jrow]+8]; 1175 sum10 += v[jrow]*x[16*idx[jrow]+9]; 1176 sum11 += v[jrow]*x[16*idx[jrow]+10]; 1177 sum12 += v[jrow]*x[16*idx[jrow]+11]; 1178 sum13 += v[jrow]*x[16*idx[jrow]+12]; 1179 sum14 += v[jrow]*x[16*idx[jrow]+13]; 1180 sum15 += v[jrow]*x[16*idx[jrow]+14]; 1181 sum16 += v[jrow]*x[16*idx[jrow]+15]; 1182 jrow++; 1183 } 1184 y[16*i] = sum1; 1185 y[16*i+1] = sum2; 1186 y[16*i+2] = sum3; 1187 y[16*i+3] = sum4; 1188 y[16*i+4] = sum5; 1189 y[16*i+5] = sum6; 1190 y[16*i+6] = sum7; 1191 y[16*i+7] = sum8; 1192 y[16*i+8] = sum9; 1193 y[16*i+9] = sum10; 1194 y[16*i+10] = sum11; 1195 y[16*i+11] = sum12; 1196 y[16*i+12] = sum13; 1197 y[16*i+13] = sum14; 1198 y[16*i+14] = sum15; 1199 y[16*i+15] = sum16; 1200 } 1201 1202 PetscLogFlops(32*a->nz - 16*m); 1203 ierr = VecRestoreArrayFast(xx,&x);CHKERRQ(ierr); 1204 ierr = VecRestoreArrayFast(yy,&y);CHKERRQ(ierr); 1205 PetscFunctionReturn(0); 1206 } 1207 1208 #undef __FUNCT__ 1209 #define __FUNCT__ "MatMultTranspose_SeqMAIJ_16" 1210 int MatMultTranspose_SeqMAIJ_16(Mat A,Vec xx,Vec yy) 1211 { 1212 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1213 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1214 PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,zero = 0.0; 1215 PetscScalar alpha9,alpha10,alpha11,alpha12,alpha13,alpha14,alpha15,alpha16; 1216 int ierr,m = b->AIJ->m,n,i,*idx; 1217 1218 PetscFunctionBegin; 1219 ierr = VecSet(&zero,yy);CHKERRQ(ierr); 1220 ierr = VecGetArrayFast(xx,&x);CHKERRQ(ierr); 1221 ierr = VecGetArrayFast(yy,&y);CHKERRQ(ierr); 1222 1223 for (i=0; i<m; i++) { 1224 idx = a->j + a->i[i] ; 1225 v = a->a + a->i[i] ; 1226 n = a->i[i+1] - a->i[i]; 1227 alpha1 = x[16*i]; 1228 alpha2 = x[16*i+1]; 1229 alpha3 = x[16*i+2]; 1230 alpha4 = x[16*i+3]; 1231 alpha5 = x[16*i+4]; 1232 alpha6 = x[16*i+5]; 1233 alpha7 = x[16*i+6]; 1234 alpha8 = x[16*i+7]; 1235 alpha9 = x[16*i+8]; 1236 alpha10 = x[16*i+9]; 1237 alpha11 = x[16*i+10]; 1238 alpha12 = x[16*i+11]; 1239 alpha13 = x[16*i+12]; 1240 alpha14 = x[16*i+13]; 1241 alpha15 = x[16*i+14]; 1242 alpha16 = x[16*i+15]; 1243 while (n-->0) { 1244 y[16*(*idx)] += alpha1*(*v); 1245 y[16*(*idx)+1] += alpha2*(*v); 1246 y[16*(*idx)+2] += alpha3*(*v); 1247 y[16*(*idx)+3] += alpha4*(*v); 1248 y[16*(*idx)+4] += alpha5*(*v); 1249 y[16*(*idx)+5] += alpha6*(*v); 1250 y[16*(*idx)+6] += alpha7*(*v); 1251 y[16*(*idx)+7] += alpha8*(*v); 1252 y[16*(*idx)+8] += alpha9*(*v); 1253 y[16*(*idx)+9] += alpha10*(*v); 1254 y[16*(*idx)+10] += alpha11*(*v); 1255 y[16*(*idx)+11] += alpha12*(*v); 1256 y[16*(*idx)+12] += alpha13*(*v); 1257 y[16*(*idx)+13] += alpha14*(*v); 1258 y[16*(*idx)+14] += alpha15*(*v); 1259 y[16*(*idx)+15] += alpha16*(*v); 1260 idx++; v++; 1261 } 1262 } 1263 PetscLogFlops(32*a->nz - 16*b->AIJ->n); 1264 ierr = VecRestoreArrayFast(xx,&x);CHKERRQ(ierr); 1265 ierr = VecRestoreArrayFast(yy,&y);CHKERRQ(ierr); 1266 PetscFunctionReturn(0); 1267 } 1268 1269 #undef __FUNCT__ 1270 #define __FUNCT__ "MatMultAdd_SeqMAIJ_16" 1271 int MatMultAdd_SeqMAIJ_16(Mat A,Vec xx,Vec yy,Vec zz) 1272 { 1273 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1274 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1275 PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8; 1276 PetscScalar sum9, sum10, sum11, sum12, sum13, sum14, sum15, sum16; 1277 int ierr,m = b->AIJ->m,*idx,*ii; 1278 int n,i,jrow,j; 1279 1280 PetscFunctionBegin; 1281 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 1282 ierr = VecGetArrayFast(xx,&x);CHKERRQ(ierr); 1283 ierr = VecGetArrayFast(zz,&y);CHKERRQ(ierr); 1284 idx = a->j; 1285 v = a->a; 1286 ii = a->i; 1287 1288 for (i=0; i<m; i++) { 1289 jrow = ii[i]; 1290 n = ii[i+1] - jrow; 1291 sum1 = 0.0; 1292 sum2 = 0.0; 1293 sum3 = 0.0; 1294 sum4 = 0.0; 1295 sum5 = 0.0; 1296 sum6 = 0.0; 1297 sum7 = 0.0; 1298 sum8 = 0.0; 1299 sum9 = 0.0; 1300 sum10 = 0.0; 1301 sum11 = 0.0; 1302 sum12 = 0.0; 1303 sum13 = 0.0; 1304 sum14 = 0.0; 1305 sum15 = 0.0; 1306 sum16 = 0.0; 1307 for (j=0; j<n; j++) { 1308 sum1 += v[jrow]*x[16*idx[jrow]]; 1309 sum2 += v[jrow]*x[16*idx[jrow]+1]; 1310 sum3 += v[jrow]*x[16*idx[jrow]+2]; 1311 sum4 += v[jrow]*x[16*idx[jrow]+3]; 1312 sum5 += v[jrow]*x[16*idx[jrow]+4]; 1313 sum6 += v[jrow]*x[16*idx[jrow]+5]; 1314 sum7 += v[jrow]*x[16*idx[jrow]+6]; 1315 sum8 += v[jrow]*x[16*idx[jrow]+7]; 1316 sum9 += v[jrow]*x[16*idx[jrow]+8]; 1317 sum10 += v[jrow]*x[16*idx[jrow]+9]; 1318 sum11 += v[jrow]*x[16*idx[jrow]+10]; 1319 sum12 += v[jrow]*x[16*idx[jrow]+11]; 1320 sum13 += v[jrow]*x[16*idx[jrow]+12]; 1321 sum14 += v[jrow]*x[16*idx[jrow]+13]; 1322 sum15 += v[jrow]*x[16*idx[jrow]+14]; 1323 sum16 += v[jrow]*x[16*idx[jrow]+15]; 1324 jrow++; 1325 } 1326 y[16*i] += sum1; 1327 y[16*i+1] += sum2; 1328 y[16*i+2] += sum3; 1329 y[16*i+3] += sum4; 1330 y[16*i+4] += sum5; 1331 y[16*i+5] += sum6; 1332 y[16*i+6] += sum7; 1333 y[16*i+7] += sum8; 1334 y[16*i+8] += sum9; 1335 y[16*i+9] += sum10; 1336 y[16*i+10] += sum11; 1337 y[16*i+11] += sum12; 1338 y[16*i+12] += sum13; 1339 y[16*i+13] += sum14; 1340 y[16*i+14] += sum15; 1341 y[16*i+15] += sum16; 1342 } 1343 1344 PetscLogFlops(32*a->nz); 1345 ierr = VecRestoreArrayFast(xx,&x);CHKERRQ(ierr); 1346 ierr = VecRestoreArrayFast(zz,&y);CHKERRQ(ierr); 1347 PetscFunctionReturn(0); 1348 } 1349 1350 #undef __FUNCT__ 1351 #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_16" 1352 int MatMultTransposeAdd_SeqMAIJ_16(Mat A,Vec xx,Vec yy,Vec zz) 1353 { 1354 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1355 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1356 PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8; 1357 PetscScalar alpha9,alpha10,alpha11,alpha12,alpha13,alpha14,alpha15,alpha16; 1358 int ierr,m = b->AIJ->m,n,i,*idx; 1359 1360 PetscFunctionBegin; 1361 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 1362 ierr = VecGetArrayFast(xx,&x);CHKERRQ(ierr); 1363 ierr = VecGetArrayFast(zz,&y);CHKERRQ(ierr); 1364 for (i=0; i<m; i++) { 1365 idx = a->j + a->i[i] ; 1366 v = a->a + a->i[i] ; 1367 n = a->i[i+1] - a->i[i]; 1368 alpha1 = x[16*i]; 1369 alpha2 = x[16*i+1]; 1370 alpha3 = x[16*i+2]; 1371 alpha4 = x[16*i+3]; 1372 alpha5 = x[16*i+4]; 1373 alpha6 = x[16*i+5]; 1374 alpha7 = x[16*i+6]; 1375 alpha8 = x[16*i+7]; 1376 alpha9 = x[16*i+8]; 1377 alpha10 = x[16*i+9]; 1378 alpha11 = x[16*i+10]; 1379 alpha12 = x[16*i+11]; 1380 alpha13 = x[16*i+12]; 1381 alpha14 = x[16*i+13]; 1382 alpha15 = x[16*i+14]; 1383 alpha16 = x[16*i+15]; 1384 while (n-->0) { 1385 y[16*(*idx)] += alpha1*(*v); 1386 y[16*(*idx)+1] += alpha2*(*v); 1387 y[16*(*idx)+2] += alpha3*(*v); 1388 y[16*(*idx)+3] += alpha4*(*v); 1389 y[16*(*idx)+4] += alpha5*(*v); 1390 y[16*(*idx)+5] += alpha6*(*v); 1391 y[16*(*idx)+6] += alpha7*(*v); 1392 y[16*(*idx)+7] += alpha8*(*v); 1393 y[16*(*idx)+8] += alpha9*(*v); 1394 y[16*(*idx)+9] += alpha10*(*v); 1395 y[16*(*idx)+10] += alpha11*(*v); 1396 y[16*(*idx)+11] += alpha12*(*v); 1397 y[16*(*idx)+12] += alpha13*(*v); 1398 y[16*(*idx)+13] += alpha14*(*v); 1399 y[16*(*idx)+14] += alpha15*(*v); 1400 y[16*(*idx)+15] += alpha16*(*v); 1401 idx++; v++; 1402 } 1403 } 1404 PetscLogFlops(32*a->nz); 1405 ierr = VecRestoreArrayFast(xx,&x);CHKERRQ(ierr); 1406 ierr = VecRestoreArrayFast(zz,&y);CHKERRQ(ierr); 1407 PetscFunctionReturn(0); 1408 } 1409 1410 /*===================================================================================*/ 1411 #undef __FUNCT__ 1412 #define __FUNCT__ "MatMult_MPIMAIJ_dof" 1413 int MatMult_MPIMAIJ_dof(Mat A,Vec xx,Vec yy) 1414 { 1415 Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data; 1416 int ierr; 1417 PetscFunctionBegin; 1418 1419 /* start the scatter */ 1420 ierr = VecScatterBegin(xx,b->w,INSERT_VALUES,SCATTER_FORWARD,b->ctx);CHKERRQ(ierr); 1421 ierr = (*b->AIJ->ops->mult)(b->AIJ,xx,yy);CHKERRQ(ierr); 1422 ierr = VecScatterEnd(xx,b->w,INSERT_VALUES,SCATTER_FORWARD,b->ctx);CHKERRQ(ierr); 1423 ierr = (*b->OAIJ->ops->multadd)(b->OAIJ,b->w,yy,yy);CHKERRQ(ierr); 1424 PetscFunctionReturn(0); 1425 } 1426 1427 #undef __FUNCT__ 1428 #define __FUNCT__ "MatMultTranspose_MPIMAIJ_dof" 1429 int MatMultTranspose_MPIMAIJ_dof(Mat A,Vec xx,Vec yy) 1430 { 1431 Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data; 1432 int ierr; 1433 PetscFunctionBegin; 1434 ierr = (*b->OAIJ->ops->multtranspose)(b->OAIJ,xx,b->w);CHKERRQ(ierr); 1435 ierr = VecScatterBegin(b->w,yy,ADD_VALUES,SCATTER_REVERSE,b->ctx);CHKERRQ(ierr); 1436 ierr = (*b->AIJ->ops->multtranspose)(b->AIJ,xx,yy);CHKERRQ(ierr); 1437 ierr = VecScatterEnd(b->w,yy,ADD_VALUES,SCATTER_REVERSE,b->ctx);CHKERRQ(ierr); 1438 PetscFunctionReturn(0); 1439 } 1440 1441 #undef __FUNCT__ 1442 #define __FUNCT__ "MatMultAdd_MPIMAIJ_dof" 1443 int MatMultAdd_MPIMAIJ_dof(Mat A,Vec xx,Vec yy,Vec zz) 1444 { 1445 Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data; 1446 int ierr; 1447 PetscFunctionBegin; 1448 1449 /* start the scatter */ 1450 ierr = VecScatterBegin(xx,b->w,INSERT_VALUES,SCATTER_FORWARD,b->ctx);CHKERRQ(ierr); 1451 ierr = (*b->AIJ->ops->multadd)(b->AIJ,xx,yy,zz);CHKERRQ(ierr); 1452 ierr = VecScatterEnd(xx,b->w,INSERT_VALUES,SCATTER_FORWARD,b->ctx);CHKERRQ(ierr); 1453 ierr = (*b->OAIJ->ops->multadd)(b->OAIJ,b->w,yy,zz);CHKERRQ(ierr); 1454 PetscFunctionReturn(0); 1455 } 1456 1457 #undef __FUNCT__ 1458 #define __FUNCT__ "MatMultTransposeAdd_MPIMAIJ_dof" 1459 int MatMultTransposeAdd_MPIMAIJ_dof(Mat A,Vec xx,Vec yy,Vec zz) 1460 { 1461 Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data; 1462 int ierr; 1463 PetscFunctionBegin; 1464 ierr = (*b->OAIJ->ops->multtranspose)(b->OAIJ,xx,b->w);CHKERRQ(ierr); 1465 ierr = VecScatterBegin(b->w,zz,ADD_VALUES,SCATTER_REVERSE,b->ctx);CHKERRQ(ierr); 1466 ierr = (*b->AIJ->ops->multtransposeadd)(b->AIJ,xx,yy,zz);CHKERRQ(ierr); 1467 ierr = VecScatterEnd(b->w,zz,ADD_VALUES,SCATTER_REVERSE,b->ctx);CHKERRQ(ierr); 1468 PetscFunctionReturn(0); 1469 } 1470 1471 /* ---------------------------------------------------------------------------------- */ 1472 /*@C 1473 MatCreateMAIJ - Creates a matrix type providing restriction and interpolation 1474 operations for multicomponent problems. It interpolates each component the same 1475 way independently. The matrix type is based on MATSEQAIJ for sequential matrices, 1476 and MATMPIAIJ for distributed matrices. 1477 1478 Operations provided: 1479 . MatMult 1480 . MatMultTranspose 1481 . MatMultAdd 1482 . MatMultTransposeAdd 1483 1484 Level: advanced 1485 1486 M*/ 1487 #undef __FUNCT__ 1488 #define __FUNCT__ "MatCreateMAIJ" 1489 int MatCreateMAIJ(Mat A,int dof,Mat *maij) 1490 { 1491 int ierr,size,n; 1492 Mat_MPIMAIJ *b; 1493 Mat B; 1494 1495 PetscFunctionBegin; 1496 ierr = PetscObjectReference((PetscObject)A);CHKERRQ(ierr); 1497 1498 if (dof == 1) { 1499 *maij = A; 1500 } else { 1501 ierr = MatCreate(A->comm,dof*A->m,dof*A->n,dof*A->M,dof*A->N,&B);CHKERRQ(ierr); 1502 B->assembled = PETSC_TRUE; 1503 1504 ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr); 1505 if (size == 1) { 1506 ierr = MatSetType(B,MATSEQMAIJ);CHKERRQ(ierr); 1507 B->ops->destroy = MatDestroy_SeqMAIJ; 1508 b = (Mat_MPIMAIJ*)B->data; 1509 b->dof = dof; 1510 b->AIJ = A; 1511 if (dof == 2) { 1512 B->ops->mult = MatMult_SeqMAIJ_2; 1513 B->ops->multadd = MatMultAdd_SeqMAIJ_2; 1514 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_2; 1515 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_2; 1516 } else if (dof == 3) { 1517 B->ops->mult = MatMult_SeqMAIJ_3; 1518 B->ops->multadd = MatMultAdd_SeqMAIJ_3; 1519 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_3; 1520 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_3; 1521 } else if (dof == 4) { 1522 B->ops->mult = MatMult_SeqMAIJ_4; 1523 B->ops->multadd = MatMultAdd_SeqMAIJ_4; 1524 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_4; 1525 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_4; 1526 } else if (dof == 5) { 1527 B->ops->mult = MatMult_SeqMAIJ_5; 1528 B->ops->multadd = MatMultAdd_SeqMAIJ_5; 1529 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_5; 1530 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_5; 1531 } else if (dof == 6) { 1532 B->ops->mult = MatMult_SeqMAIJ_6; 1533 B->ops->multadd = MatMultAdd_SeqMAIJ_6; 1534 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_6; 1535 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_6; 1536 } else if (dof == 8) { 1537 B->ops->mult = MatMult_SeqMAIJ_8; 1538 B->ops->multadd = MatMultAdd_SeqMAIJ_8; 1539 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_8; 1540 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_8; 1541 } else if (dof == 16) { 1542 B->ops->mult = MatMult_SeqMAIJ_16; 1543 B->ops->multadd = MatMultAdd_SeqMAIJ_16; 1544 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_16; 1545 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_16; 1546 } else { 1547 SETERRQ1(1,"Cannot handle a dof of %d. Send request for code to petsc-maint@mcs.anl.gov\n",dof); 1548 } 1549 } else { 1550 Mat_MPIAIJ *mpiaij = (Mat_MPIAIJ *)A->data; 1551 IS from,to; 1552 Vec gvec; 1553 int *garray,i; 1554 1555 ierr = MatSetType(B,MATMPIMAIJ);CHKERRQ(ierr); 1556 B->ops->destroy = MatDestroy_MPIMAIJ; 1557 b = (Mat_MPIMAIJ*)B->data; 1558 b->dof = dof; 1559 b->A = A; 1560 ierr = MatCreateMAIJ(mpiaij->A,dof,&b->AIJ);CHKERRQ(ierr); 1561 ierr = MatCreateMAIJ(mpiaij->B,dof,&b->OAIJ);CHKERRQ(ierr); 1562 1563 ierr = VecGetSize(mpiaij->lvec,&n);CHKERRQ(ierr); 1564 ierr = VecCreateSeq(PETSC_COMM_SELF,n*dof,&b->w);CHKERRQ(ierr); 1565 1566 /* create two temporary Index sets for build scatter gather */ 1567 ierr = PetscMalloc((n+1)*sizeof(int),&garray);CHKERRQ(ierr); 1568 for (i=0; i<n; i++) garray[i] = dof*mpiaij->garray[i]; 1569 ierr = ISCreateBlock(A->comm,dof,n,garray,&from);CHKERRQ(ierr); 1570 ierr = PetscFree(garray);CHKERRQ(ierr); 1571 ierr = ISCreateStride(PETSC_COMM_SELF,n*dof,0,1,&to);CHKERRQ(ierr); 1572 1573 /* create temporary global vector to generate scatter context */ 1574 ierr = VecCreateMPI(A->comm,dof*A->n,dof*A->N,&gvec);CHKERRQ(ierr); 1575 1576 /* generate the scatter context */ 1577 ierr = VecScatterCreate(gvec,from,b->w,to,&b->ctx);CHKERRQ(ierr); 1578 1579 ierr = ISDestroy(from);CHKERRQ(ierr); 1580 ierr = ISDestroy(to);CHKERRQ(ierr); 1581 ierr = VecDestroy(gvec);CHKERRQ(ierr); 1582 1583 B->ops->mult = MatMult_MPIMAIJ_dof; 1584 B->ops->multtranspose = MatMultTranspose_MPIMAIJ_dof; 1585 B->ops->multadd = MatMultAdd_MPIMAIJ_dof; 1586 B->ops->multtransposeadd = MatMultTransposeAdd_MPIMAIJ_dof; 1587 } 1588 *maij = B; 1589 } 1590 PetscFunctionReturn(0); 1591 } 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604