1 /*$Id: maij.c,v 1.19 2001/08/07 03:03:00 balay Exp $*/ 2 /* 3 Defines the basic matrix operations for the MAIJ matrix storage format. 4 This format is used for restriction and interpolation operations for 5 multicomponent problems. It interpolates each component the same way 6 independently. 7 8 We provide: 9 MatMult() 10 MatMultTranspose() 11 MatMultTransposeAdd() 12 MatMultAdd() 13 and 14 MatCreateMAIJ(Mat,dof,Mat*) 15 16 This single directory handles both the sequential and parallel codes 17 */ 18 19 #include "src/mat/impls/maij/maij.h" 20 #include "src/vec/vecimpl.h" 21 22 #undef __FUNCT__ 23 #define __FUNCT__ "MatMAIJGetAIJ" 24 int MatMAIJGetAIJ(Mat A,Mat *B) 25 { 26 int ierr; 27 PetscTruth ismpimaij,isseqmaij; 28 29 PetscFunctionBegin; 30 ierr = PetscTypeCompare((PetscObject)A,MATMPIMAIJ,&ismpimaij);CHKERRQ(ierr); 31 ierr = PetscTypeCompare((PetscObject)A,MATSEQMAIJ,&isseqmaij);CHKERRQ(ierr); 32 if (ismpimaij) { 33 Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data; 34 35 *B = b->A; 36 } else if (isseqmaij) { 37 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 38 39 *B = b->AIJ; 40 } else { 41 *B = A; 42 } 43 PetscFunctionReturn(0); 44 } 45 46 #undef __FUNCT__ 47 #define __FUNCT__ "MatMAIJRedimension" 48 int MatMAIJRedimension(Mat A,int dof,Mat *B) 49 { 50 int ierr; 51 Mat Aij; 52 53 PetscFunctionBegin; 54 ierr = MatMAIJGetAIJ(A,&Aij);CHKERRQ(ierr); 55 ierr = MatCreateMAIJ(Aij,dof,B);CHKERRQ(ierr); 56 PetscFunctionReturn(0); 57 } 58 59 #undef __FUNCT__ 60 #define __FUNCT__ "MatDestroy_SeqMAIJ" 61 int MatDestroy_SeqMAIJ(Mat A) 62 { 63 int ierr; 64 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 65 66 PetscFunctionBegin; 67 if (b->AIJ) { 68 ierr = MatDestroy(b->AIJ);CHKERRQ(ierr); 69 } 70 ierr = PetscFree(b);CHKERRQ(ierr); 71 PetscFunctionReturn(0); 72 } 73 74 #undef __FUNCT__ 75 #define __FUNCT__ "MatDestroy_MPIMAIJ" 76 int MatDestroy_MPIMAIJ(Mat A) 77 { 78 int ierr; 79 Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data; 80 81 PetscFunctionBegin; 82 if (b->AIJ) { 83 ierr = MatDestroy(b->AIJ);CHKERRQ(ierr); 84 } 85 if (b->OAIJ) { 86 ierr = MatDestroy(b->OAIJ);CHKERRQ(ierr); 87 } 88 if (b->A) { 89 ierr = MatDestroy(b->A);CHKERRQ(ierr); 90 } 91 if (b->ctx) { 92 ierr = VecScatterDestroy(b->ctx);CHKERRQ(ierr); 93 } 94 if (b->w) { 95 ierr = VecDestroy(b->w);CHKERRQ(ierr); 96 } 97 ierr = PetscFree(b);CHKERRQ(ierr); 98 PetscFunctionReturn(0); 99 } 100 101 EXTERN_C_BEGIN 102 #undef __FUNCT__ 103 #define __FUNCT__ "MatCreate_MAIJ" 104 int MatCreate_MAIJ(Mat A) 105 { 106 int ierr; 107 Mat_MPIMAIJ *b; 108 109 PetscFunctionBegin; 110 ierr = PetscNew(Mat_MPIMAIJ,&b);CHKERRQ(ierr); 111 A->data = (void*)b; 112 ierr = PetscMemzero(b,sizeof(Mat_MPIMAIJ));CHKERRQ(ierr); 113 ierr = PetscMemzero(A->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 114 A->factor = 0; 115 A->mapping = 0; 116 117 b->AIJ = 0; 118 b->dof = 0; 119 b->OAIJ = 0; 120 b->ctx = 0; 121 b->w = 0; 122 PetscFunctionReturn(0); 123 } 124 EXTERN_C_END 125 126 /* --------------------------------------------------------------------------------------*/ 127 #undef __FUNCT__ 128 #define __FUNCT__ "MatMult_SeqMAIJ_2" 129 int MatMult_SeqMAIJ_2(Mat A,Vec xx,Vec yy) 130 { 131 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 132 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 133 PetscScalar *x,*y,*v,sum1, sum2; 134 int ierr,m = b->AIJ->m,*idx,*ii; 135 int n,i,jrow,j; 136 137 PetscFunctionBegin; 138 ierr = VecGetArrayFast(xx,&x);CHKERRQ(ierr); 139 ierr = VecGetArrayFast(yy,&y);CHKERRQ(ierr); 140 idx = a->j; 141 v = a->a; 142 ii = a->i; 143 144 for (i=0; i<m; i++) { 145 jrow = ii[i]; 146 n = ii[i+1] - jrow; 147 sum1 = 0.0; 148 sum2 = 0.0; 149 for (j=0; j<n; j++) { 150 sum1 += v[jrow]*x[2*idx[jrow]]; 151 sum2 += v[jrow]*x[2*idx[jrow]+1]; 152 jrow++; 153 } 154 y[2*i] = sum1; 155 y[2*i+1] = sum2; 156 } 157 158 PetscLogFlops(4*a->nz - 2*m); 159 ierr = VecRestoreArrayFast(xx,&x);CHKERRQ(ierr); 160 ierr = VecRestoreArrayFast(yy,&y);CHKERRQ(ierr); 161 PetscFunctionReturn(0); 162 } 163 164 #undef __FUNCT__ 165 #define __FUNCT__ "MatMultTranspose_SeqMAIJ_2" 166 int MatMultTranspose_SeqMAIJ_2(Mat A,Vec xx,Vec yy) 167 { 168 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 169 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 170 PetscScalar *x,*y,*v,alpha1,alpha2,zero = 0.0; 171 int ierr,m = b->AIJ->m,n,i,*idx; 172 173 PetscFunctionBegin; 174 ierr = VecSet(&zero,yy);CHKERRQ(ierr); 175 ierr = VecGetArrayFast(xx,&x);CHKERRQ(ierr); 176 ierr = VecGetArrayFast(yy,&y);CHKERRQ(ierr); 177 178 for (i=0; i<m; i++) { 179 idx = a->j + a->i[i] ; 180 v = a->a + a->i[i] ; 181 n = a->i[i+1] - a->i[i]; 182 alpha1 = x[2*i]; 183 alpha2 = x[2*i+1]; 184 while (n-->0) {y[2*(*idx)] += alpha1*(*v); y[2*(*idx)+1] += alpha2*(*v); idx++; v++;} 185 } 186 PetscLogFlops(4*a->nz - 2*b->AIJ->n); 187 ierr = VecRestoreArrayFast(xx,&x);CHKERRQ(ierr); 188 ierr = VecRestoreArrayFast(yy,&y);CHKERRQ(ierr); 189 PetscFunctionReturn(0); 190 } 191 192 #undef __FUNCT__ 193 #define __FUNCT__ "MatMultAdd_SeqMAIJ_2" 194 int MatMultAdd_SeqMAIJ_2(Mat A,Vec xx,Vec yy,Vec zz) 195 { 196 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 197 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 198 PetscScalar *x,*y,*v,sum1, sum2; 199 int ierr,m = b->AIJ->m,*idx,*ii; 200 int n,i,jrow,j; 201 202 PetscFunctionBegin; 203 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 204 ierr = VecGetArrayFast(xx,&x);CHKERRQ(ierr); 205 ierr = VecGetArrayFast(zz,&y);CHKERRQ(ierr); 206 idx = a->j; 207 v = a->a; 208 ii = a->i; 209 210 for (i=0; i<m; i++) { 211 jrow = ii[i]; 212 n = ii[i+1] - jrow; 213 sum1 = 0.0; 214 sum2 = 0.0; 215 for (j=0; j<n; j++) { 216 sum1 += v[jrow]*x[2*idx[jrow]]; 217 sum2 += v[jrow]*x[2*idx[jrow]+1]; 218 jrow++; 219 } 220 y[2*i] += sum1; 221 y[2*i+1] += sum2; 222 } 223 224 PetscLogFlops(4*a->nz - 2*m); 225 ierr = VecRestoreArrayFast(xx,&x);CHKERRQ(ierr); 226 ierr = VecRestoreArrayFast(zz,&y);CHKERRQ(ierr); 227 PetscFunctionReturn(0); 228 } 229 #undef __FUNCT__ 230 #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_2" 231 int MatMultTransposeAdd_SeqMAIJ_2(Mat A,Vec xx,Vec yy,Vec zz) 232 { 233 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 234 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 235 PetscScalar *x,*y,*v,alpha1,alpha2; 236 int ierr,m = b->AIJ->m,n,i,*idx; 237 238 PetscFunctionBegin; 239 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 240 ierr = VecGetArrayFast(xx,&x);CHKERRQ(ierr); 241 ierr = VecGetArrayFast(zz,&y);CHKERRQ(ierr); 242 243 for (i=0; i<m; i++) { 244 idx = a->j + a->i[i] ; 245 v = a->a + a->i[i] ; 246 n = a->i[i+1] - a->i[i]; 247 alpha1 = x[2*i]; 248 alpha2 = x[2*i+1]; 249 while (n-->0) {y[2*(*idx)] += alpha1*(*v); y[2*(*idx)+1] += alpha2*(*v); idx++; v++;} 250 } 251 PetscLogFlops(4*a->nz - 2*b->AIJ->n); 252 ierr = VecRestoreArrayFast(xx,&x);CHKERRQ(ierr); 253 ierr = VecRestoreArrayFast(zz,&y);CHKERRQ(ierr); 254 PetscFunctionReturn(0); 255 } 256 /* --------------------------------------------------------------------------------------*/ 257 #undef __FUNCT__ 258 #define __FUNCT__ "MatMult_SeqMAIJ_3" 259 int MatMult_SeqMAIJ_3(Mat A,Vec xx,Vec yy) 260 { 261 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 262 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 263 PetscScalar *x,*y,*v,sum1, sum2, sum3; 264 int ierr,m = b->AIJ->m,*idx,*ii; 265 int n,i,jrow,j; 266 267 PetscFunctionBegin; 268 ierr = VecGetArrayFast(xx,&x);CHKERRQ(ierr); 269 ierr = VecGetArrayFast(yy,&y);CHKERRQ(ierr); 270 idx = a->j; 271 v = a->a; 272 ii = a->i; 273 274 for (i=0; i<m; i++) { 275 jrow = ii[i]; 276 n = ii[i+1] - jrow; 277 sum1 = 0.0; 278 sum2 = 0.0; 279 sum3 = 0.0; 280 for (j=0; j<n; j++) { 281 sum1 += v[jrow]*x[3*idx[jrow]]; 282 sum2 += v[jrow]*x[3*idx[jrow]+1]; 283 sum3 += v[jrow]*x[3*idx[jrow]+2]; 284 jrow++; 285 } 286 y[3*i] = sum1; 287 y[3*i+1] = sum2; 288 y[3*i+2] = sum3; 289 } 290 291 PetscLogFlops(6*a->nz - 3*m); 292 ierr = VecRestoreArrayFast(xx,&x);CHKERRQ(ierr); 293 ierr = VecRestoreArrayFast(yy,&y);CHKERRQ(ierr); 294 PetscFunctionReturn(0); 295 } 296 297 #undef __FUNCT__ 298 #define __FUNCT__ "MatMultTranspose_SeqMAIJ_3" 299 int MatMultTranspose_SeqMAIJ_3(Mat A,Vec xx,Vec yy) 300 { 301 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 302 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 303 PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,zero = 0.0; 304 int ierr,m = b->AIJ->m,n,i,*idx; 305 306 PetscFunctionBegin; 307 ierr = VecSet(&zero,yy);CHKERRQ(ierr); 308 ierr = VecGetArrayFast(xx,&x);CHKERRQ(ierr); 309 ierr = VecGetArrayFast(yy,&y);CHKERRQ(ierr); 310 311 for (i=0; i<m; i++) { 312 idx = a->j + a->i[i]; 313 v = a->a + a->i[i]; 314 n = a->i[i+1] - a->i[i]; 315 alpha1 = x[3*i]; 316 alpha2 = x[3*i+1]; 317 alpha3 = x[3*i+2]; 318 while (n-->0) { 319 y[3*(*idx)] += alpha1*(*v); 320 y[3*(*idx)+1] += alpha2*(*v); 321 y[3*(*idx)+2] += alpha3*(*v); 322 idx++; v++; 323 } 324 } 325 PetscLogFlops(6*a->nz - 3*b->AIJ->n); 326 ierr = VecRestoreArrayFast(xx,&x);CHKERRQ(ierr); 327 ierr = VecRestoreArrayFast(yy,&y);CHKERRQ(ierr); 328 PetscFunctionReturn(0); 329 } 330 331 #undef __FUNCT__ 332 #define __FUNCT__ "MatMultAdd_SeqMAIJ_3" 333 int MatMultAdd_SeqMAIJ_3(Mat A,Vec xx,Vec yy,Vec zz) 334 { 335 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 336 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 337 PetscScalar *x,*y,*v,sum1, sum2, sum3; 338 int ierr,m = b->AIJ->m,*idx,*ii; 339 int n,i,jrow,j; 340 341 PetscFunctionBegin; 342 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 343 ierr = VecGetArrayFast(xx,&x);CHKERRQ(ierr); 344 ierr = VecGetArrayFast(zz,&y);CHKERRQ(ierr); 345 idx = a->j; 346 v = a->a; 347 ii = a->i; 348 349 for (i=0; i<m; i++) { 350 jrow = ii[i]; 351 n = ii[i+1] - jrow; 352 sum1 = 0.0; 353 sum2 = 0.0; 354 sum3 = 0.0; 355 for (j=0; j<n; j++) { 356 sum1 += v[jrow]*x[3*idx[jrow]]; 357 sum2 += v[jrow]*x[3*idx[jrow]+1]; 358 sum3 += v[jrow]*x[3*idx[jrow]+2]; 359 jrow++; 360 } 361 y[3*i] += sum1; 362 y[3*i+1] += sum2; 363 y[3*i+2] += sum3; 364 } 365 366 PetscLogFlops(6*a->nz); 367 ierr = VecRestoreArrayFast(xx,&x);CHKERRQ(ierr); 368 ierr = VecRestoreArrayFast(zz,&y);CHKERRQ(ierr); 369 PetscFunctionReturn(0); 370 } 371 #undef __FUNCT__ 372 #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_3" 373 int MatMultTransposeAdd_SeqMAIJ_3(Mat A,Vec xx,Vec yy,Vec zz) 374 { 375 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 376 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 377 PetscScalar *x,*y,*v,alpha1,alpha2,alpha3; 378 int ierr,m = b->AIJ->m,n,i,*idx; 379 380 PetscFunctionBegin; 381 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 382 ierr = VecGetArrayFast(xx,&x);CHKERRQ(ierr); 383 ierr = VecGetArrayFast(zz,&y);CHKERRQ(ierr); 384 for (i=0; i<m; i++) { 385 idx = a->j + a->i[i] ; 386 v = a->a + a->i[i] ; 387 n = a->i[i+1] - a->i[i]; 388 alpha1 = x[3*i]; 389 alpha2 = x[3*i+1]; 390 alpha3 = x[3*i+2]; 391 while (n-->0) { 392 y[3*(*idx)] += alpha1*(*v); 393 y[3*(*idx)+1] += alpha2*(*v); 394 y[3*(*idx)+2] += alpha3*(*v); 395 idx++; v++; 396 } 397 } 398 PetscLogFlops(6*a->nz); 399 ierr = VecRestoreArrayFast(xx,&x);CHKERRQ(ierr); 400 ierr = VecRestoreArrayFast(zz,&y);CHKERRQ(ierr); 401 PetscFunctionReturn(0); 402 } 403 404 /* ------------------------------------------------------------------------------*/ 405 #undef __FUNCT__ 406 #define __FUNCT__ "MatMult_SeqMAIJ_4" 407 int MatMult_SeqMAIJ_4(Mat A,Vec xx,Vec yy) 408 { 409 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 410 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 411 PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4; 412 int ierr,m = b->AIJ->m,*idx,*ii; 413 int n,i,jrow,j; 414 415 PetscFunctionBegin; 416 ierr = VecGetArrayFast(xx,&x);CHKERRQ(ierr); 417 ierr = VecGetArrayFast(yy,&y);CHKERRQ(ierr); 418 idx = a->j; 419 v = a->a; 420 ii = a->i; 421 422 for (i=0; i<m; i++) { 423 jrow = ii[i]; 424 n = ii[i+1] - jrow; 425 sum1 = 0.0; 426 sum2 = 0.0; 427 sum3 = 0.0; 428 sum4 = 0.0; 429 for (j=0; j<n; j++) { 430 sum1 += v[jrow]*x[4*idx[jrow]]; 431 sum2 += v[jrow]*x[4*idx[jrow]+1]; 432 sum3 += v[jrow]*x[4*idx[jrow]+2]; 433 sum4 += v[jrow]*x[4*idx[jrow]+3]; 434 jrow++; 435 } 436 y[4*i] = sum1; 437 y[4*i+1] = sum2; 438 y[4*i+2] = sum3; 439 y[4*i+3] = sum4; 440 } 441 442 PetscLogFlops(8*a->nz - 4*m); 443 ierr = VecRestoreArrayFast(xx,&x);CHKERRQ(ierr); 444 ierr = VecRestoreArrayFast(yy,&y);CHKERRQ(ierr); 445 PetscFunctionReturn(0); 446 } 447 448 #undef __FUNCT__ 449 #define __FUNCT__ "MatMultTranspose_SeqMAIJ_4" 450 int MatMultTranspose_SeqMAIJ_4(Mat A,Vec xx,Vec yy) 451 { 452 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 453 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 454 PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4,zero = 0.0; 455 int ierr,m = b->AIJ->m,n,i,*idx; 456 457 PetscFunctionBegin; 458 ierr = VecSet(&zero,yy);CHKERRQ(ierr); 459 ierr = VecGetArrayFast(xx,&x);CHKERRQ(ierr); 460 ierr = VecGetArrayFast(yy,&y);CHKERRQ(ierr); 461 for (i=0; i<m; i++) { 462 idx = a->j + a->i[i] ; 463 v = a->a + a->i[i] ; 464 n = a->i[i+1] - a->i[i]; 465 alpha1 = x[4*i]; 466 alpha2 = x[4*i+1]; 467 alpha3 = x[4*i+2]; 468 alpha4 = x[4*i+3]; 469 while (n-->0) { 470 y[4*(*idx)] += alpha1*(*v); 471 y[4*(*idx)+1] += alpha2*(*v); 472 y[4*(*idx)+2] += alpha3*(*v); 473 y[4*(*idx)+3] += alpha4*(*v); 474 idx++; v++; 475 } 476 } 477 PetscLogFlops(8*a->nz - 4*b->AIJ->n); 478 ierr = VecRestoreArrayFast(xx,&x);CHKERRQ(ierr); 479 ierr = VecRestoreArrayFast(yy,&y);CHKERRQ(ierr); 480 PetscFunctionReturn(0); 481 } 482 483 #undef __FUNCT__ 484 #define __FUNCT__ "MatMultAdd_SeqMAIJ_4" 485 int MatMultAdd_SeqMAIJ_4(Mat A,Vec xx,Vec yy,Vec zz) 486 { 487 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 488 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 489 PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4; 490 int ierr,m = b->AIJ->m,*idx,*ii; 491 int n,i,jrow,j; 492 493 PetscFunctionBegin; 494 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 495 ierr = VecGetArrayFast(xx,&x);CHKERRQ(ierr); 496 ierr = VecGetArrayFast(zz,&y);CHKERRQ(ierr); 497 idx = a->j; 498 v = a->a; 499 ii = a->i; 500 501 for (i=0; i<m; i++) { 502 jrow = ii[i]; 503 n = ii[i+1] - jrow; 504 sum1 = 0.0; 505 sum2 = 0.0; 506 sum3 = 0.0; 507 sum4 = 0.0; 508 for (j=0; j<n; j++) { 509 sum1 += v[jrow]*x[4*idx[jrow]]; 510 sum2 += v[jrow]*x[4*idx[jrow]+1]; 511 sum3 += v[jrow]*x[4*idx[jrow]+2]; 512 sum4 += v[jrow]*x[4*idx[jrow]+3]; 513 jrow++; 514 } 515 y[4*i] += sum1; 516 y[4*i+1] += sum2; 517 y[4*i+2] += sum3; 518 y[4*i+3] += sum4; 519 } 520 521 PetscLogFlops(8*a->nz - 4*m); 522 ierr = VecRestoreArrayFast(xx,&x);CHKERRQ(ierr); 523 ierr = VecRestoreArrayFast(zz,&y);CHKERRQ(ierr); 524 PetscFunctionReturn(0); 525 } 526 #undef __FUNCT__ 527 #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_4" 528 int MatMultTransposeAdd_SeqMAIJ_4(Mat A,Vec xx,Vec yy,Vec zz) 529 { 530 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 531 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 532 PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4; 533 int ierr,m = b->AIJ->m,n,i,*idx; 534 535 PetscFunctionBegin; 536 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 537 ierr = VecGetArrayFast(xx,&x);CHKERRQ(ierr); 538 ierr = VecGetArrayFast(zz,&y);CHKERRQ(ierr); 539 540 for (i=0; i<m; i++) { 541 idx = a->j + a->i[i] ; 542 v = a->a + a->i[i] ; 543 n = a->i[i+1] - a->i[i]; 544 alpha1 = x[4*i]; 545 alpha2 = x[4*i+1]; 546 alpha3 = x[4*i+2]; 547 alpha4 = x[4*i+3]; 548 while (n-->0) { 549 y[4*(*idx)] += alpha1*(*v); 550 y[4*(*idx)+1] += alpha2*(*v); 551 y[4*(*idx)+2] += alpha3*(*v); 552 y[4*(*idx)+3] += alpha4*(*v); 553 idx++; v++; 554 } 555 } 556 PetscLogFlops(8*a->nz - 4*b->AIJ->n); 557 ierr = VecRestoreArrayFast(xx,&x);CHKERRQ(ierr); 558 ierr = VecRestoreArrayFast(zz,&y);CHKERRQ(ierr); 559 PetscFunctionReturn(0); 560 } 561 /* ------------------------------------------------------------------------------*/ 562 563 #undef __FUNCT__ 564 #define __FUNCT__ "MatMult_SeqMAIJ_5" 565 int MatMult_SeqMAIJ_5(Mat A,Vec xx,Vec yy) 566 { 567 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 568 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 569 PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4, sum5; 570 int ierr,m = b->AIJ->m,*idx,*ii; 571 int n,i,jrow,j; 572 573 PetscFunctionBegin; 574 ierr = VecGetArrayFast(xx,&x);CHKERRQ(ierr); 575 ierr = VecGetArrayFast(yy,&y);CHKERRQ(ierr); 576 idx = a->j; 577 v = a->a; 578 ii = a->i; 579 580 for (i=0; i<m; i++) { 581 jrow = ii[i]; 582 n = ii[i+1] - jrow; 583 sum1 = 0.0; 584 sum2 = 0.0; 585 sum3 = 0.0; 586 sum4 = 0.0; 587 sum5 = 0.0; 588 for (j=0; j<n; j++) { 589 sum1 += v[jrow]*x[5*idx[jrow]]; 590 sum2 += v[jrow]*x[5*idx[jrow]+1]; 591 sum3 += v[jrow]*x[5*idx[jrow]+2]; 592 sum4 += v[jrow]*x[5*idx[jrow]+3]; 593 sum5 += v[jrow]*x[5*idx[jrow]+4]; 594 jrow++; 595 } 596 y[5*i] = sum1; 597 y[5*i+1] = sum2; 598 y[5*i+2] = sum3; 599 y[5*i+3] = sum4; 600 y[5*i+4] = sum5; 601 } 602 603 PetscLogFlops(10*a->nz - 5*m); 604 ierr = VecRestoreArrayFast(xx,&x);CHKERRQ(ierr); 605 ierr = VecRestoreArrayFast(yy,&y);CHKERRQ(ierr); 606 PetscFunctionReturn(0); 607 } 608 609 #undef __FUNCT__ 610 #define __FUNCT__ "MatMultTranspose_SeqMAIJ_5" 611 int MatMultTranspose_SeqMAIJ_5(Mat A,Vec xx,Vec yy) 612 { 613 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 614 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 615 PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,zero = 0.0; 616 int ierr,m = b->AIJ->m,n,i,*idx; 617 618 PetscFunctionBegin; 619 ierr = VecSet(&zero,yy);CHKERRQ(ierr); 620 ierr = VecGetArrayFast(xx,&x);CHKERRQ(ierr); 621 ierr = VecGetArrayFast(yy,&y);CHKERRQ(ierr); 622 623 for (i=0; i<m; i++) { 624 idx = a->j + a->i[i] ; 625 v = a->a + a->i[i] ; 626 n = a->i[i+1] - a->i[i]; 627 alpha1 = x[5*i]; 628 alpha2 = x[5*i+1]; 629 alpha3 = x[5*i+2]; 630 alpha4 = x[5*i+3]; 631 alpha5 = x[5*i+4]; 632 while (n-->0) { 633 y[5*(*idx)] += alpha1*(*v); 634 y[5*(*idx)+1] += alpha2*(*v); 635 y[5*(*idx)+2] += alpha3*(*v); 636 y[5*(*idx)+3] += alpha4*(*v); 637 y[5*(*idx)+4] += alpha5*(*v); 638 idx++; v++; 639 } 640 } 641 PetscLogFlops(10*a->nz - 5*b->AIJ->n); 642 ierr = VecRestoreArrayFast(xx,&x);CHKERRQ(ierr); 643 ierr = VecRestoreArrayFast(yy,&y);CHKERRQ(ierr); 644 PetscFunctionReturn(0); 645 } 646 647 #undef __FUNCT__ 648 #define __FUNCT__ "MatMultAdd_SeqMAIJ_5" 649 int MatMultAdd_SeqMAIJ_5(Mat A,Vec xx,Vec yy,Vec zz) 650 { 651 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 652 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 653 PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4, sum5; 654 int ierr,m = b->AIJ->m,*idx,*ii; 655 int n,i,jrow,j; 656 657 PetscFunctionBegin; 658 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 659 ierr = VecGetArrayFast(xx,&x);CHKERRQ(ierr); 660 ierr = VecGetArrayFast(zz,&y);CHKERRQ(ierr); 661 idx = a->j; 662 v = a->a; 663 ii = a->i; 664 665 for (i=0; i<m; i++) { 666 jrow = ii[i]; 667 n = ii[i+1] - jrow; 668 sum1 = 0.0; 669 sum2 = 0.0; 670 sum3 = 0.0; 671 sum4 = 0.0; 672 sum5 = 0.0; 673 for (j=0; j<n; j++) { 674 sum1 += v[jrow]*x[5*idx[jrow]]; 675 sum2 += v[jrow]*x[5*idx[jrow]+1]; 676 sum3 += v[jrow]*x[5*idx[jrow]+2]; 677 sum4 += v[jrow]*x[5*idx[jrow]+3]; 678 sum5 += v[jrow]*x[5*idx[jrow]+4]; 679 jrow++; 680 } 681 y[5*i] += sum1; 682 y[5*i+1] += sum2; 683 y[5*i+2] += sum3; 684 y[5*i+3] += sum4; 685 y[5*i+4] += sum5; 686 } 687 688 PetscLogFlops(10*a->nz); 689 ierr = VecRestoreArrayFast(xx,&x);CHKERRQ(ierr); 690 ierr = VecRestoreArrayFast(zz,&y);CHKERRQ(ierr); 691 PetscFunctionReturn(0); 692 } 693 694 #undef __FUNCT__ 695 #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_5" 696 int MatMultTransposeAdd_SeqMAIJ_5(Mat A,Vec xx,Vec yy,Vec zz) 697 { 698 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 699 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 700 PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5; 701 int ierr,m = b->AIJ->m,n,i,*idx; 702 703 PetscFunctionBegin; 704 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 705 ierr = VecGetArrayFast(xx,&x);CHKERRQ(ierr); 706 ierr = VecGetArrayFast(zz,&y);CHKERRQ(ierr); 707 708 for (i=0; i<m; i++) { 709 idx = a->j + a->i[i] ; 710 v = a->a + a->i[i] ; 711 n = a->i[i+1] - a->i[i]; 712 alpha1 = x[5*i]; 713 alpha2 = x[5*i+1]; 714 alpha3 = x[5*i+2]; 715 alpha4 = x[5*i+3]; 716 alpha5 = x[5*i+4]; 717 while (n-->0) { 718 y[5*(*idx)] += alpha1*(*v); 719 y[5*(*idx)+1] += alpha2*(*v); 720 y[5*(*idx)+2] += alpha3*(*v); 721 y[5*(*idx)+3] += alpha4*(*v); 722 y[5*(*idx)+4] += alpha5*(*v); 723 idx++; v++; 724 } 725 } 726 PetscLogFlops(10*a->nz); 727 ierr = VecRestoreArrayFast(xx,&x);CHKERRQ(ierr); 728 ierr = VecRestoreArrayFast(zz,&y);CHKERRQ(ierr); 729 PetscFunctionReturn(0); 730 } 731 732 /* ------------------------------------------------------------------------------*/ 733 #undef __FUNCT__ 734 #define __FUNCT__ "MatMult_SeqMAIJ_6" 735 int MatMult_SeqMAIJ_6(Mat A,Vec xx,Vec yy) 736 { 737 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 738 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 739 PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6; 740 int ierr,m = b->AIJ->m,*idx,*ii; 741 int n,i,jrow,j; 742 743 PetscFunctionBegin; 744 ierr = VecGetArrayFast(xx,&x);CHKERRQ(ierr); 745 ierr = VecGetArrayFast(yy,&y);CHKERRQ(ierr); 746 idx = a->j; 747 v = a->a; 748 ii = a->i; 749 750 for (i=0; i<m; i++) { 751 jrow = ii[i]; 752 n = ii[i+1] - jrow; 753 sum1 = 0.0; 754 sum2 = 0.0; 755 sum3 = 0.0; 756 sum4 = 0.0; 757 sum5 = 0.0; 758 sum6 = 0.0; 759 for (j=0; j<n; j++) { 760 sum1 += v[jrow]*x[6*idx[jrow]]; 761 sum2 += v[jrow]*x[6*idx[jrow]+1]; 762 sum3 += v[jrow]*x[6*idx[jrow]+2]; 763 sum4 += v[jrow]*x[6*idx[jrow]+3]; 764 sum5 += v[jrow]*x[6*idx[jrow]+4]; 765 sum6 += v[jrow]*x[6*idx[jrow]+5]; 766 jrow++; 767 } 768 y[6*i] = sum1; 769 y[6*i+1] = sum2; 770 y[6*i+2] = sum3; 771 y[6*i+3] = sum4; 772 y[6*i+4] = sum5; 773 y[6*i+5] = sum6; 774 } 775 776 PetscLogFlops(12*a->nz - 6*m); 777 ierr = VecRestoreArrayFast(xx,&x);CHKERRQ(ierr); 778 ierr = VecRestoreArrayFast(yy,&y);CHKERRQ(ierr); 779 PetscFunctionReturn(0); 780 } 781 782 #undef __FUNCT__ 783 #define __FUNCT__ "MatMultTranspose_SeqMAIJ_6" 784 int MatMultTranspose_SeqMAIJ_6(Mat A,Vec xx,Vec yy) 785 { 786 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 787 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 788 PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,zero = 0.0; 789 int ierr,m = b->AIJ->m,n,i,*idx; 790 791 PetscFunctionBegin; 792 ierr = VecSet(&zero,yy);CHKERRQ(ierr); 793 ierr = VecGetArrayFast(xx,&x);CHKERRQ(ierr); 794 ierr = VecGetArrayFast(yy,&y);CHKERRQ(ierr); 795 796 for (i=0; i<m; i++) { 797 idx = a->j + a->i[i] ; 798 v = a->a + a->i[i] ; 799 n = a->i[i+1] - a->i[i]; 800 alpha1 = x[6*i]; 801 alpha2 = x[6*i+1]; 802 alpha3 = x[6*i+2]; 803 alpha4 = x[6*i+3]; 804 alpha5 = x[6*i+4]; 805 alpha6 = x[6*i+5]; 806 while (n-->0) { 807 y[6*(*idx)] += alpha1*(*v); 808 y[6*(*idx)+1] += alpha2*(*v); 809 y[6*(*idx)+2] += alpha3*(*v); 810 y[6*(*idx)+3] += alpha4*(*v); 811 y[6*(*idx)+4] += alpha5*(*v); 812 y[6*(*idx)+5] += alpha6*(*v); 813 idx++; v++; 814 } 815 } 816 PetscLogFlops(12*a->nz - 6*b->AIJ->n); 817 ierr = VecRestoreArrayFast(xx,&x);CHKERRQ(ierr); 818 ierr = VecRestoreArrayFast(yy,&y);CHKERRQ(ierr); 819 PetscFunctionReturn(0); 820 } 821 822 #undef __FUNCT__ 823 #define __FUNCT__ "MatMultAdd_SeqMAIJ_6" 824 int MatMultAdd_SeqMAIJ_6(Mat A,Vec xx,Vec yy,Vec zz) 825 { 826 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 827 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 828 PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6; 829 int ierr,m = b->AIJ->m,*idx,*ii; 830 int n,i,jrow,j; 831 832 PetscFunctionBegin; 833 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 834 ierr = VecGetArrayFast(xx,&x);CHKERRQ(ierr); 835 ierr = VecGetArrayFast(zz,&y);CHKERRQ(ierr); 836 idx = a->j; 837 v = a->a; 838 ii = a->i; 839 840 for (i=0; i<m; i++) { 841 jrow = ii[i]; 842 n = ii[i+1] - jrow; 843 sum1 = 0.0; 844 sum2 = 0.0; 845 sum3 = 0.0; 846 sum4 = 0.0; 847 sum5 = 0.0; 848 sum6 = 0.0; 849 for (j=0; j<n; j++) { 850 sum1 += v[jrow]*x[6*idx[jrow]]; 851 sum2 += v[jrow]*x[6*idx[jrow]+1]; 852 sum3 += v[jrow]*x[6*idx[jrow]+2]; 853 sum4 += v[jrow]*x[6*idx[jrow]+3]; 854 sum5 += v[jrow]*x[6*idx[jrow]+4]; 855 sum6 += v[jrow]*x[6*idx[jrow]+5]; 856 jrow++; 857 } 858 y[6*i] += sum1; 859 y[6*i+1] += sum2; 860 y[6*i+2] += sum3; 861 y[6*i+3] += sum4; 862 y[6*i+4] += sum5; 863 y[6*i+5] += sum6; 864 } 865 866 PetscLogFlops(12*a->nz); 867 ierr = VecRestoreArrayFast(xx,&x);CHKERRQ(ierr); 868 ierr = VecRestoreArrayFast(zz,&y);CHKERRQ(ierr); 869 PetscFunctionReturn(0); 870 } 871 872 #undef __FUNCT__ 873 #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_6" 874 int MatMultTransposeAdd_SeqMAIJ_6(Mat A,Vec xx,Vec yy,Vec zz) 875 { 876 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 877 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 878 PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6; 879 int ierr,m = b->AIJ->m,n,i,*idx; 880 881 PetscFunctionBegin; 882 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 883 ierr = VecGetArrayFast(xx,&x);CHKERRQ(ierr); 884 ierr = VecGetArrayFast(zz,&y);CHKERRQ(ierr); 885 886 for (i=0; i<m; i++) { 887 idx = a->j + a->i[i] ; 888 v = a->a + a->i[i] ; 889 n = a->i[i+1] - a->i[i]; 890 alpha1 = x[6*i]; 891 alpha2 = x[6*i+1]; 892 alpha3 = x[6*i+2]; 893 alpha4 = x[6*i+3]; 894 alpha5 = x[6*i+4]; 895 alpha6 = x[6*i+5]; 896 while (n-->0) { 897 y[6*(*idx)] += alpha1*(*v); 898 y[6*(*idx)+1] += alpha2*(*v); 899 y[6*(*idx)+2] += alpha3*(*v); 900 y[6*(*idx)+3] += alpha4*(*v); 901 y[6*(*idx)+4] += alpha5*(*v); 902 y[6*(*idx)+5] += alpha6*(*v); 903 idx++; v++; 904 } 905 } 906 PetscLogFlops(12*a->nz); 907 ierr = VecRestoreArrayFast(xx,&x);CHKERRQ(ierr); 908 ierr = VecRestoreArrayFast(zz,&y);CHKERRQ(ierr); 909 PetscFunctionReturn(0); 910 } 911 912 /* ------------------------------------------------------------------------------*/ 913 #undef __FUNCT__ 914 #define __FUNCT__ "MatMult_SeqMAIJ_8" 915 int MatMult_SeqMAIJ_8(Mat A,Vec xx,Vec yy) 916 { 917 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 918 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 919 PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8; 920 int ierr,m = b->AIJ->m,*idx,*ii; 921 int n,i,jrow,j; 922 923 PetscFunctionBegin; 924 ierr = VecGetArrayFast(xx,&x);CHKERRQ(ierr); 925 ierr = VecGetArrayFast(yy,&y);CHKERRQ(ierr); 926 idx = a->j; 927 v = a->a; 928 ii = a->i; 929 930 for (i=0; i<m; i++) { 931 jrow = ii[i]; 932 n = ii[i+1] - jrow; 933 sum1 = 0.0; 934 sum2 = 0.0; 935 sum3 = 0.0; 936 sum4 = 0.0; 937 sum5 = 0.0; 938 sum6 = 0.0; 939 sum7 = 0.0; 940 sum8 = 0.0; 941 for (j=0; j<n; j++) { 942 sum1 += v[jrow]*x[8*idx[jrow]]; 943 sum2 += v[jrow]*x[8*idx[jrow]+1]; 944 sum3 += v[jrow]*x[8*idx[jrow]+2]; 945 sum4 += v[jrow]*x[8*idx[jrow]+3]; 946 sum5 += v[jrow]*x[8*idx[jrow]+4]; 947 sum6 += v[jrow]*x[8*idx[jrow]+5]; 948 sum7 += v[jrow]*x[8*idx[jrow]+6]; 949 sum8 += v[jrow]*x[8*idx[jrow]+7]; 950 jrow++; 951 } 952 y[8*i] = sum1; 953 y[8*i+1] = sum2; 954 y[8*i+2] = sum3; 955 y[8*i+3] = sum4; 956 y[8*i+4] = sum5; 957 y[8*i+5] = sum6; 958 y[8*i+6] = sum7; 959 y[8*i+7] = sum8; 960 } 961 962 PetscLogFlops(16*a->nz - 8*m); 963 ierr = VecRestoreArrayFast(xx,&x);CHKERRQ(ierr); 964 ierr = VecRestoreArrayFast(yy,&y);CHKERRQ(ierr); 965 PetscFunctionReturn(0); 966 } 967 968 #undef __FUNCT__ 969 #define __FUNCT__ "MatMultTranspose_SeqMAIJ_8" 970 int MatMultTranspose_SeqMAIJ_8(Mat A,Vec xx,Vec yy) 971 { 972 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 973 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 974 PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,zero = 0.0; 975 int ierr,m = b->AIJ->m,n,i,*idx; 976 977 PetscFunctionBegin; 978 ierr = VecSet(&zero,yy);CHKERRQ(ierr); 979 ierr = VecGetArrayFast(xx,&x);CHKERRQ(ierr); 980 ierr = VecGetArrayFast(yy,&y);CHKERRQ(ierr); 981 982 for (i=0; i<m; i++) { 983 idx = a->j + a->i[i] ; 984 v = a->a + a->i[i] ; 985 n = a->i[i+1] - a->i[i]; 986 alpha1 = x[8*i]; 987 alpha2 = x[8*i+1]; 988 alpha3 = x[8*i+2]; 989 alpha4 = x[8*i+3]; 990 alpha5 = x[8*i+4]; 991 alpha6 = x[8*i+5]; 992 alpha7 = x[8*i+6]; 993 alpha8 = x[8*i+7]; 994 while (n-->0) { 995 y[8*(*idx)] += alpha1*(*v); 996 y[8*(*idx)+1] += alpha2*(*v); 997 y[8*(*idx)+2] += alpha3*(*v); 998 y[8*(*idx)+3] += alpha4*(*v); 999 y[8*(*idx)+4] += alpha5*(*v); 1000 y[8*(*idx)+5] += alpha6*(*v); 1001 y[8*(*idx)+6] += alpha7*(*v); 1002 y[8*(*idx)+7] += alpha8*(*v); 1003 idx++; v++; 1004 } 1005 } 1006 PetscLogFlops(16*a->nz - 8*b->AIJ->n); 1007 ierr = VecRestoreArrayFast(xx,&x);CHKERRQ(ierr); 1008 ierr = VecRestoreArrayFast(yy,&y);CHKERRQ(ierr); 1009 PetscFunctionReturn(0); 1010 } 1011 1012 #undef __FUNCT__ 1013 #define __FUNCT__ "MatMultAdd_SeqMAIJ_8" 1014 int MatMultAdd_SeqMAIJ_8(Mat A,Vec xx,Vec yy,Vec zz) 1015 { 1016 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1017 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1018 PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8; 1019 int ierr,m = b->AIJ->m,*idx,*ii; 1020 int n,i,jrow,j; 1021 1022 PetscFunctionBegin; 1023 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 1024 ierr = VecGetArrayFast(xx,&x);CHKERRQ(ierr); 1025 ierr = VecGetArrayFast(zz,&y);CHKERRQ(ierr); 1026 idx = a->j; 1027 v = a->a; 1028 ii = a->i; 1029 1030 for (i=0; i<m; i++) { 1031 jrow = ii[i]; 1032 n = ii[i+1] - jrow; 1033 sum1 = 0.0; 1034 sum2 = 0.0; 1035 sum3 = 0.0; 1036 sum4 = 0.0; 1037 sum5 = 0.0; 1038 sum6 = 0.0; 1039 sum7 = 0.0; 1040 sum8 = 0.0; 1041 for (j=0; j<n; j++) { 1042 sum1 += v[jrow]*x[8*idx[jrow]]; 1043 sum2 += v[jrow]*x[8*idx[jrow]+1]; 1044 sum3 += v[jrow]*x[8*idx[jrow]+2]; 1045 sum4 += v[jrow]*x[8*idx[jrow]+3]; 1046 sum5 += v[jrow]*x[8*idx[jrow]+4]; 1047 sum6 += v[jrow]*x[8*idx[jrow]+5]; 1048 sum7 += v[jrow]*x[8*idx[jrow]+6]; 1049 sum8 += v[jrow]*x[8*idx[jrow]+7]; 1050 jrow++; 1051 } 1052 y[8*i] += sum1; 1053 y[8*i+1] += sum2; 1054 y[8*i+2] += sum3; 1055 y[8*i+3] += sum4; 1056 y[8*i+4] += sum5; 1057 y[8*i+5] += sum6; 1058 y[8*i+6] += sum7; 1059 y[8*i+7] += sum8; 1060 } 1061 1062 PetscLogFlops(16*a->nz); 1063 ierr = VecRestoreArrayFast(xx,&x);CHKERRQ(ierr); 1064 ierr = VecRestoreArrayFast(zz,&y);CHKERRQ(ierr); 1065 PetscFunctionReturn(0); 1066 } 1067 1068 #undef __FUNCT__ 1069 #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_8" 1070 int MatMultTransposeAdd_SeqMAIJ_8(Mat A,Vec xx,Vec yy,Vec zz) 1071 { 1072 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1073 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1074 PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8; 1075 int ierr,m = b->AIJ->m,n,i,*idx; 1076 1077 PetscFunctionBegin; 1078 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 1079 ierr = VecGetArrayFast(xx,&x);CHKERRQ(ierr); 1080 ierr = VecGetArrayFast(zz,&y);CHKERRQ(ierr); 1081 for (i=0; i<m; i++) { 1082 idx = a->j + a->i[i] ; 1083 v = a->a + a->i[i] ; 1084 n = a->i[i+1] - a->i[i]; 1085 alpha1 = x[8*i]; 1086 alpha2 = x[8*i+1]; 1087 alpha3 = x[8*i+2]; 1088 alpha4 = x[8*i+3]; 1089 alpha5 = x[8*i+4]; 1090 alpha6 = x[8*i+5]; 1091 alpha7 = x[8*i+6]; 1092 alpha8 = x[8*i+7]; 1093 while (n-->0) { 1094 y[8*(*idx)] += alpha1*(*v); 1095 y[8*(*idx)+1] += alpha2*(*v); 1096 y[8*(*idx)+2] += alpha3*(*v); 1097 y[8*(*idx)+3] += alpha4*(*v); 1098 y[8*(*idx)+4] += alpha5*(*v); 1099 y[8*(*idx)+5] += alpha6*(*v); 1100 y[8*(*idx)+6] += alpha7*(*v); 1101 y[8*(*idx)+7] += alpha8*(*v); 1102 idx++; v++; 1103 } 1104 } 1105 PetscLogFlops(16*a->nz); 1106 ierr = VecRestoreArrayFast(xx,&x);CHKERRQ(ierr); 1107 ierr = VecRestoreArrayFast(zz,&y);CHKERRQ(ierr); 1108 PetscFunctionReturn(0); 1109 } 1110 1111 /*--------------------------------------------------------------------------------------------*/ 1112 #undef __FUNCT__ 1113 #define __FUNCT__ "MatMult_SeqMAIJ_16" 1114 int MatMult_SeqMAIJ_16(Mat A,Vec xx,Vec yy) 1115 { 1116 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1117 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1118 PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8; 1119 PetscScalar sum9, sum10, sum11, sum12, sum13, sum14, sum15, sum16; 1120 int ierr,m = b->AIJ->m,*idx,*ii; 1121 int n,i,jrow,j; 1122 1123 PetscFunctionBegin; 1124 ierr = VecGetArrayFast(xx,&x);CHKERRQ(ierr); 1125 ierr = VecGetArrayFast(yy,&y);CHKERRQ(ierr); 1126 idx = a->j; 1127 v = a->a; 1128 ii = a->i; 1129 1130 for (i=0; i<m; i++) { 1131 jrow = ii[i]; 1132 n = ii[i+1] - jrow; 1133 sum1 = 0.0; 1134 sum2 = 0.0; 1135 sum3 = 0.0; 1136 sum4 = 0.0; 1137 sum5 = 0.0; 1138 sum6 = 0.0; 1139 sum7 = 0.0; 1140 sum8 = 0.0; 1141 sum9 = 0.0; 1142 sum10 = 0.0; 1143 sum11 = 0.0; 1144 sum12 = 0.0; 1145 sum13 = 0.0; 1146 sum14 = 0.0; 1147 sum15 = 0.0; 1148 sum16 = 0.0; 1149 for (j=0; j<n; j++) { 1150 sum1 += v[jrow]*x[16*idx[jrow]]; 1151 sum2 += v[jrow]*x[16*idx[jrow]+1]; 1152 sum3 += v[jrow]*x[16*idx[jrow]+2]; 1153 sum4 += v[jrow]*x[16*idx[jrow]+3]; 1154 sum5 += v[jrow]*x[16*idx[jrow]+4]; 1155 sum6 += v[jrow]*x[16*idx[jrow]+5]; 1156 sum7 += v[jrow]*x[16*idx[jrow]+6]; 1157 sum8 += v[jrow]*x[16*idx[jrow]+7]; 1158 sum9 += v[jrow]*x[16*idx[jrow]+8]; 1159 sum10 += v[jrow]*x[16*idx[jrow]+9]; 1160 sum11 += v[jrow]*x[16*idx[jrow]+10]; 1161 sum12 += v[jrow]*x[16*idx[jrow]+11]; 1162 sum13 += v[jrow]*x[16*idx[jrow]+12]; 1163 sum14 += v[jrow]*x[16*idx[jrow]+13]; 1164 sum15 += v[jrow]*x[16*idx[jrow]+14]; 1165 sum16 += v[jrow]*x[16*idx[jrow]+15]; 1166 jrow++; 1167 } 1168 y[16*i] = sum1; 1169 y[16*i+1] = sum2; 1170 y[16*i+2] = sum3; 1171 y[16*i+3] = sum4; 1172 y[16*i+4] = sum5; 1173 y[16*i+5] = sum6; 1174 y[16*i+6] = sum7; 1175 y[16*i+7] = sum8; 1176 y[16*i+8] = sum9; 1177 y[16*i+9] = sum10; 1178 y[16*i+10] = sum11; 1179 y[16*i+11] = sum12; 1180 y[16*i+12] = sum13; 1181 y[16*i+13] = sum14; 1182 y[16*i+14] = sum15; 1183 y[16*i+15] = sum16; 1184 } 1185 1186 PetscLogFlops(32*a->nz - 16*m); 1187 ierr = VecRestoreArrayFast(xx,&x);CHKERRQ(ierr); 1188 ierr = VecRestoreArrayFast(yy,&y);CHKERRQ(ierr); 1189 PetscFunctionReturn(0); 1190 } 1191 1192 #undef __FUNCT__ 1193 #define __FUNCT__ "MatMultTranspose_SeqMAIJ_16" 1194 int MatMultTranspose_SeqMAIJ_16(Mat A,Vec xx,Vec yy) 1195 { 1196 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1197 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1198 PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,zero = 0.0; 1199 PetscScalar alpha9,alpha10,alpha11,alpha12,alpha13,alpha14,alpha15,alpha16; 1200 int ierr,m = b->AIJ->m,n,i,*idx; 1201 1202 PetscFunctionBegin; 1203 ierr = VecSet(&zero,yy);CHKERRQ(ierr); 1204 ierr = VecGetArrayFast(xx,&x);CHKERRQ(ierr); 1205 ierr = VecGetArrayFast(yy,&y);CHKERRQ(ierr); 1206 1207 for (i=0; i<m; i++) { 1208 idx = a->j + a->i[i] ; 1209 v = a->a + a->i[i] ; 1210 n = a->i[i+1] - a->i[i]; 1211 alpha1 = x[16*i]; 1212 alpha2 = x[16*i+1]; 1213 alpha3 = x[16*i+2]; 1214 alpha4 = x[16*i+3]; 1215 alpha5 = x[16*i+4]; 1216 alpha6 = x[16*i+5]; 1217 alpha7 = x[16*i+6]; 1218 alpha8 = x[16*i+7]; 1219 alpha9 = x[16*i+8]; 1220 alpha10 = x[16*i+9]; 1221 alpha11 = x[16*i+10]; 1222 alpha12 = x[16*i+11]; 1223 alpha13 = x[16*i+12]; 1224 alpha14 = x[16*i+13]; 1225 alpha15 = x[16*i+14]; 1226 alpha16 = x[16*i+15]; 1227 while (n-->0) { 1228 y[16*(*idx)] += alpha1*(*v); 1229 y[16*(*idx)+1] += alpha2*(*v); 1230 y[16*(*idx)+2] += alpha3*(*v); 1231 y[16*(*idx)+3] += alpha4*(*v); 1232 y[16*(*idx)+4] += alpha5*(*v); 1233 y[16*(*idx)+5] += alpha6*(*v); 1234 y[16*(*idx)+6] += alpha7*(*v); 1235 y[16*(*idx)+7] += alpha8*(*v); 1236 y[16*(*idx)+8] += alpha9*(*v); 1237 y[16*(*idx)+9] += alpha10*(*v); 1238 y[16*(*idx)+10] += alpha11*(*v); 1239 y[16*(*idx)+11] += alpha12*(*v); 1240 y[16*(*idx)+12] += alpha13*(*v); 1241 y[16*(*idx)+13] += alpha14*(*v); 1242 y[16*(*idx)+14] += alpha15*(*v); 1243 y[16*(*idx)+15] += alpha16*(*v); 1244 idx++; v++; 1245 } 1246 } 1247 PetscLogFlops(32*a->nz - 16*b->AIJ->n); 1248 ierr = VecRestoreArrayFast(xx,&x);CHKERRQ(ierr); 1249 ierr = VecRestoreArrayFast(yy,&y);CHKERRQ(ierr); 1250 PetscFunctionReturn(0); 1251 } 1252 1253 #undef __FUNCT__ 1254 #define __FUNCT__ "MatMultAdd_SeqMAIJ_16" 1255 int MatMultAdd_SeqMAIJ_16(Mat A,Vec xx,Vec yy,Vec zz) 1256 { 1257 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1258 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1259 PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8; 1260 PetscScalar sum9, sum10, sum11, sum12, sum13, sum14, sum15, sum16; 1261 int ierr,m = b->AIJ->m,*idx,*ii; 1262 int n,i,jrow,j; 1263 1264 PetscFunctionBegin; 1265 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 1266 ierr = VecGetArrayFast(xx,&x);CHKERRQ(ierr); 1267 ierr = VecGetArrayFast(zz,&y);CHKERRQ(ierr); 1268 idx = a->j; 1269 v = a->a; 1270 ii = a->i; 1271 1272 for (i=0; i<m; i++) { 1273 jrow = ii[i]; 1274 n = ii[i+1] - jrow; 1275 sum1 = 0.0; 1276 sum2 = 0.0; 1277 sum3 = 0.0; 1278 sum4 = 0.0; 1279 sum5 = 0.0; 1280 sum6 = 0.0; 1281 sum7 = 0.0; 1282 sum8 = 0.0; 1283 sum9 = 0.0; 1284 sum10 = 0.0; 1285 sum11 = 0.0; 1286 sum12 = 0.0; 1287 sum13 = 0.0; 1288 sum14 = 0.0; 1289 sum15 = 0.0; 1290 sum16 = 0.0; 1291 for (j=0; j<n; j++) { 1292 sum1 += v[jrow]*x[16*idx[jrow]]; 1293 sum2 += v[jrow]*x[16*idx[jrow]+1]; 1294 sum3 += v[jrow]*x[16*idx[jrow]+2]; 1295 sum4 += v[jrow]*x[16*idx[jrow]+3]; 1296 sum5 += v[jrow]*x[16*idx[jrow]+4]; 1297 sum6 += v[jrow]*x[16*idx[jrow]+5]; 1298 sum7 += v[jrow]*x[16*idx[jrow]+6]; 1299 sum8 += v[jrow]*x[16*idx[jrow]+7]; 1300 sum9 += v[jrow]*x[16*idx[jrow]+8]; 1301 sum10 += v[jrow]*x[16*idx[jrow]+9]; 1302 sum11 += v[jrow]*x[16*idx[jrow]+10]; 1303 sum12 += v[jrow]*x[16*idx[jrow]+11]; 1304 sum13 += v[jrow]*x[16*idx[jrow]+12]; 1305 sum14 += v[jrow]*x[16*idx[jrow]+13]; 1306 sum15 += v[jrow]*x[16*idx[jrow]+14]; 1307 sum16 += v[jrow]*x[16*idx[jrow]+15]; 1308 jrow++; 1309 } 1310 y[16*i] += sum1; 1311 y[16*i+1] += sum2; 1312 y[16*i+2] += sum3; 1313 y[16*i+3] += sum4; 1314 y[16*i+4] += sum5; 1315 y[16*i+5] += sum6; 1316 y[16*i+6] += sum7; 1317 y[16*i+7] += sum8; 1318 y[16*i+8] += sum9; 1319 y[16*i+9] += sum10; 1320 y[16*i+10] += sum11; 1321 y[16*i+11] += sum12; 1322 y[16*i+12] += sum13; 1323 y[16*i+13] += sum14; 1324 y[16*i+14] += sum15; 1325 y[16*i+15] += sum16; 1326 } 1327 1328 PetscLogFlops(32*a->nz); 1329 ierr = VecRestoreArrayFast(xx,&x);CHKERRQ(ierr); 1330 ierr = VecRestoreArrayFast(zz,&y);CHKERRQ(ierr); 1331 PetscFunctionReturn(0); 1332 } 1333 1334 #undef __FUNCT__ 1335 #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_16" 1336 int MatMultTransposeAdd_SeqMAIJ_16(Mat A,Vec xx,Vec yy,Vec zz) 1337 { 1338 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1339 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1340 PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8; 1341 PetscScalar alpha9,alpha10,alpha11,alpha12,alpha13,alpha14,alpha15,alpha16; 1342 int ierr,m = b->AIJ->m,n,i,*idx; 1343 1344 PetscFunctionBegin; 1345 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 1346 ierr = VecGetArrayFast(xx,&x);CHKERRQ(ierr); 1347 ierr = VecGetArrayFast(zz,&y);CHKERRQ(ierr); 1348 for (i=0; i<m; i++) { 1349 idx = a->j + a->i[i] ; 1350 v = a->a + a->i[i] ; 1351 n = a->i[i+1] - a->i[i]; 1352 alpha1 = x[16*i]; 1353 alpha2 = x[16*i+1]; 1354 alpha3 = x[16*i+2]; 1355 alpha4 = x[16*i+3]; 1356 alpha5 = x[16*i+4]; 1357 alpha6 = x[16*i+5]; 1358 alpha7 = x[16*i+6]; 1359 alpha8 = x[16*i+7]; 1360 alpha9 = x[16*i+8]; 1361 alpha10 = x[16*i+9]; 1362 alpha11 = x[16*i+10]; 1363 alpha12 = x[16*i+11]; 1364 alpha13 = x[16*i+12]; 1365 alpha14 = x[16*i+13]; 1366 alpha15 = x[16*i+14]; 1367 alpha16 = x[16*i+15]; 1368 while (n-->0) { 1369 y[16*(*idx)] += alpha1*(*v); 1370 y[16*(*idx)+1] += alpha2*(*v); 1371 y[16*(*idx)+2] += alpha3*(*v); 1372 y[16*(*idx)+3] += alpha4*(*v); 1373 y[16*(*idx)+4] += alpha5*(*v); 1374 y[16*(*idx)+5] += alpha6*(*v); 1375 y[16*(*idx)+6] += alpha7*(*v); 1376 y[16*(*idx)+7] += alpha8*(*v); 1377 y[16*(*idx)+8] += alpha9*(*v); 1378 y[16*(*idx)+9] += alpha10*(*v); 1379 y[16*(*idx)+10] += alpha11*(*v); 1380 y[16*(*idx)+11] += alpha12*(*v); 1381 y[16*(*idx)+12] += alpha13*(*v); 1382 y[16*(*idx)+13] += alpha14*(*v); 1383 y[16*(*idx)+14] += alpha15*(*v); 1384 y[16*(*idx)+15] += alpha16*(*v); 1385 idx++; v++; 1386 } 1387 } 1388 PetscLogFlops(32*a->nz); 1389 ierr = VecRestoreArrayFast(xx,&x);CHKERRQ(ierr); 1390 ierr = VecRestoreArrayFast(zz,&y);CHKERRQ(ierr); 1391 PetscFunctionReturn(0); 1392 } 1393 1394 /*===================================================================================*/ 1395 #undef __FUNCT__ 1396 #define __FUNCT__ "MatMult_MPIMAIJ_dof" 1397 int MatMult_MPIMAIJ_dof(Mat A,Vec xx,Vec yy) 1398 { 1399 Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data; 1400 int ierr; 1401 PetscFunctionBegin; 1402 1403 /* start the scatter */ 1404 ierr = VecScatterBegin(xx,b->w,INSERT_VALUES,SCATTER_FORWARD,b->ctx);CHKERRQ(ierr); 1405 ierr = (*b->AIJ->ops->mult)(b->AIJ,xx,yy);CHKERRQ(ierr); 1406 ierr = VecScatterEnd(xx,b->w,INSERT_VALUES,SCATTER_FORWARD,b->ctx);CHKERRQ(ierr); 1407 ierr = (*b->OAIJ->ops->multadd)(b->OAIJ,b->w,yy,yy);CHKERRQ(ierr); 1408 PetscFunctionReturn(0); 1409 } 1410 1411 #undef __FUNCT__ 1412 #define __FUNCT__ "MatMultTranspose_MPIMAIJ_dof" 1413 int MatMultTranspose_MPIMAIJ_dof(Mat A,Vec xx,Vec yy) 1414 { 1415 Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data; 1416 int ierr; 1417 PetscFunctionBegin; 1418 ierr = (*b->OAIJ->ops->multtranspose)(b->OAIJ,xx,b->w);CHKERRQ(ierr); 1419 ierr = VecScatterBegin(b->w,yy,ADD_VALUES,SCATTER_REVERSE,b->ctx);CHKERRQ(ierr); 1420 ierr = (*b->AIJ->ops->multtranspose)(b->AIJ,xx,yy);CHKERRQ(ierr); 1421 ierr = VecScatterEnd(b->w,yy,ADD_VALUES,SCATTER_REVERSE,b->ctx);CHKERRQ(ierr); 1422 PetscFunctionReturn(0); 1423 } 1424 1425 #undef __FUNCT__ 1426 #define __FUNCT__ "MatMultAdd_MPIMAIJ_dof" 1427 int MatMultAdd_MPIMAIJ_dof(Mat A,Vec xx,Vec yy,Vec zz) 1428 { 1429 Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data; 1430 int ierr; 1431 PetscFunctionBegin; 1432 1433 /* start the scatter */ 1434 ierr = VecScatterBegin(xx,b->w,INSERT_VALUES,SCATTER_FORWARD,b->ctx);CHKERRQ(ierr); 1435 ierr = (*b->AIJ->ops->multadd)(b->AIJ,xx,yy,zz);CHKERRQ(ierr); 1436 ierr = VecScatterEnd(xx,b->w,INSERT_VALUES,SCATTER_FORWARD,b->ctx);CHKERRQ(ierr); 1437 ierr = (*b->OAIJ->ops->multadd)(b->OAIJ,b->w,yy,zz);CHKERRQ(ierr); 1438 PetscFunctionReturn(0); 1439 } 1440 1441 #undef __FUNCT__ 1442 #define __FUNCT__ "MatMultTransposeAdd_MPIMAIJ_dof" 1443 int MatMultTransposeAdd_MPIMAIJ_dof(Mat A,Vec xx,Vec yy,Vec zz) 1444 { 1445 Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data; 1446 int ierr; 1447 PetscFunctionBegin; 1448 ierr = (*b->OAIJ->ops->multtranspose)(b->OAIJ,xx,b->w);CHKERRQ(ierr); 1449 ierr = VecScatterBegin(b->w,zz,ADD_VALUES,SCATTER_REVERSE,b->ctx);CHKERRQ(ierr); 1450 ierr = (*b->AIJ->ops->multtransposeadd)(b->AIJ,xx,yy,zz);CHKERRQ(ierr); 1451 ierr = VecScatterEnd(b->w,zz,ADD_VALUES,SCATTER_REVERSE,b->ctx);CHKERRQ(ierr); 1452 PetscFunctionReturn(0); 1453 } 1454 1455 /* ---------------------------------------------------------------------------------- */ 1456 #undef __FUNCT__ 1457 #define __FUNCT__ "MatCreateMAIJ" 1458 int MatCreateMAIJ(Mat A,int dof,Mat *maij) 1459 { 1460 int ierr,size,n; 1461 Mat_MPIMAIJ *b; 1462 Mat B; 1463 1464 PetscFunctionBegin; 1465 ierr = PetscObjectReference((PetscObject)A);CHKERRQ(ierr); 1466 1467 if (dof == 1) { 1468 *maij = A; 1469 } else { 1470 ierr = MatCreate(A->comm,dof*A->m,dof*A->n,dof*A->M,dof*A->N,&B);CHKERRQ(ierr); 1471 B->assembled = PETSC_TRUE; 1472 1473 ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr); 1474 if (size == 1) { 1475 ierr = MatSetType(B,MATSEQMAIJ);CHKERRQ(ierr); 1476 B->ops->destroy = MatDestroy_SeqMAIJ; 1477 b = (Mat_MPIMAIJ*)B->data; 1478 b->dof = dof; 1479 b->AIJ = A; 1480 if (dof == 2) { 1481 B->ops->mult = MatMult_SeqMAIJ_2; 1482 B->ops->multadd = MatMultAdd_SeqMAIJ_2; 1483 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_2; 1484 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_2; 1485 } else if (dof == 3) { 1486 B->ops->mult = MatMult_SeqMAIJ_3; 1487 B->ops->multadd = MatMultAdd_SeqMAIJ_3; 1488 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_3; 1489 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_3; 1490 } else if (dof == 4) { 1491 B->ops->mult = MatMult_SeqMAIJ_4; 1492 B->ops->multadd = MatMultAdd_SeqMAIJ_4; 1493 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_4; 1494 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_4; 1495 } else if (dof == 5) { 1496 B->ops->mult = MatMult_SeqMAIJ_5; 1497 B->ops->multadd = MatMultAdd_SeqMAIJ_5; 1498 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_5; 1499 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_5; 1500 } else if (dof == 6) { 1501 B->ops->mult = MatMult_SeqMAIJ_6; 1502 B->ops->multadd = MatMultAdd_SeqMAIJ_6; 1503 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_6; 1504 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_6; 1505 } else if (dof == 8) { 1506 B->ops->mult = MatMult_SeqMAIJ_8; 1507 B->ops->multadd = MatMultAdd_SeqMAIJ_8; 1508 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_8; 1509 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_8; 1510 } else if (dof == 16) { 1511 B->ops->mult = MatMult_SeqMAIJ_16; 1512 B->ops->multadd = MatMultAdd_SeqMAIJ_16; 1513 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_16; 1514 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_16; 1515 } else { 1516 SETERRQ1(1,"Cannot handle a dof of %d. Send request for code to petsc-maint@mcs.anl.gov\n",dof); 1517 } 1518 } else { 1519 Mat_MPIAIJ *mpiaij = (Mat_MPIAIJ *)A->data; 1520 IS from,to; 1521 Vec gvec; 1522 int *garray,i; 1523 1524 ierr = MatSetType(B,MATMPIMAIJ);CHKERRQ(ierr); 1525 B->ops->destroy = MatDestroy_MPIMAIJ; 1526 b = (Mat_MPIMAIJ*)B->data; 1527 b->dof = dof; 1528 b->A = A; 1529 ierr = MatCreateMAIJ(mpiaij->A,dof,&b->AIJ);CHKERRQ(ierr); 1530 ierr = MatCreateMAIJ(mpiaij->B,dof,&b->OAIJ);CHKERRQ(ierr); 1531 1532 ierr = VecGetSize(mpiaij->lvec,&n);CHKERRQ(ierr); 1533 ierr = VecCreateSeq(PETSC_COMM_SELF,n*dof,&b->w);CHKERRQ(ierr); 1534 1535 /* create two temporary Index sets for build scatter gather */ 1536 ierr = PetscMalloc((n+1)*sizeof(int),&garray);CHKERRQ(ierr); 1537 for (i=0; i<n; i++) garray[i] = dof*mpiaij->garray[i]; 1538 ierr = ISCreateBlock(A->comm,dof,n,garray,&from);CHKERRQ(ierr); 1539 ierr = PetscFree(garray);CHKERRQ(ierr); 1540 ierr = ISCreateStride(PETSC_COMM_SELF,n*dof,0,1,&to);CHKERRQ(ierr); 1541 1542 /* create temporary global vector to generate scatter context */ 1543 ierr = VecCreateMPI(A->comm,dof*A->n,dof*A->N,&gvec);CHKERRQ(ierr); 1544 1545 /* generate the scatter context */ 1546 ierr = VecScatterCreate(gvec,from,b->w,to,&b->ctx);CHKERRQ(ierr); 1547 1548 ierr = ISDestroy(from);CHKERRQ(ierr); 1549 ierr = ISDestroy(to);CHKERRQ(ierr); 1550 ierr = VecDestroy(gvec);CHKERRQ(ierr); 1551 1552 B->ops->mult = MatMult_MPIMAIJ_dof; 1553 B->ops->multtranspose = MatMultTranspose_MPIMAIJ_dof; 1554 B->ops->multadd = MatMultAdd_MPIMAIJ_dof; 1555 B->ops->multtransposeadd = MatMultTransposeAdd_MPIMAIJ_dof; 1556 } 1557 *maij = B; 1558 } 1559 PetscFunctionReturn(0); 1560 } 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573