1 /* 2 Defines the basic matrix operations for the MAIJ matrix storage format. 3 This format is used for restriction and interpolation operations for 4 multicomponent problems. It interpolates each component the same way 5 independently. 6 7 We provide: 8 MatMult() 9 MatMultTranspose() 10 MatMultTransposeAdd() 11 MatMultAdd() 12 and 13 MatCreateMAIJ(Mat,dof,Mat*) 14 15 This single directory handles both the sequential and parallel codes 16 */ 17 18 #include "src/mat/impls/maij/maij.h" 19 #include "vecimpl.h" 20 21 #undef __FUNCT__ 22 #define __FUNCT__ "MatMAIJGetAIJ" 23 int MatMAIJGetAIJ(Mat A,Mat *B) 24 { 25 int ierr; 26 PetscTruth ismpimaij,isseqmaij; 27 28 PetscFunctionBegin; 29 ierr = PetscTypeCompare((PetscObject)A,MATMPIMAIJ,&ismpimaij);CHKERRQ(ierr); 30 ierr = PetscTypeCompare((PetscObject)A,MATSEQMAIJ,&isseqmaij);CHKERRQ(ierr); 31 if (ismpimaij) { 32 Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data; 33 34 *B = b->A; 35 } else if (isseqmaij) { 36 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 37 38 *B = b->AIJ; 39 } else { 40 *B = A; 41 } 42 PetscFunctionReturn(0); 43 } 44 45 #undef __FUNCT__ 46 #define __FUNCT__ "MatMAIJRedimension" 47 int MatMAIJRedimension(Mat A,int dof,Mat *B) 48 { 49 int ierr; 50 Mat Aij; 51 52 PetscFunctionBegin; 53 ierr = MatMAIJGetAIJ(A,&Aij);CHKERRQ(ierr); 54 ierr = MatCreateMAIJ(Aij,dof,B);CHKERRQ(ierr); 55 PetscFunctionReturn(0); 56 } 57 58 #undef __FUNCT__ 59 #define __FUNCT__ "MatDestroy_SeqMAIJ" 60 int MatDestroy_SeqMAIJ(Mat A) 61 { 62 int ierr; 63 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 64 65 PetscFunctionBegin; 66 if (b->AIJ) { 67 ierr = MatDestroy(b->AIJ);CHKERRQ(ierr); 68 } 69 ierr = PetscFree(b);CHKERRQ(ierr); 70 PetscFunctionReturn(0); 71 } 72 73 #undef __FUNCT__ 74 #define __FUNCT__ "MatDestroy_MPIMAIJ" 75 int MatDestroy_MPIMAIJ(Mat A) 76 { 77 int ierr; 78 Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data; 79 80 PetscFunctionBegin; 81 if (b->AIJ) { 82 ierr = MatDestroy(b->AIJ);CHKERRQ(ierr); 83 } 84 if (b->OAIJ) { 85 ierr = MatDestroy(b->OAIJ);CHKERRQ(ierr); 86 } 87 if (b->A) { 88 ierr = MatDestroy(b->A);CHKERRQ(ierr); 89 } 90 if (b->ctx) { 91 ierr = VecScatterDestroy(b->ctx);CHKERRQ(ierr); 92 } 93 if (b->w) { 94 ierr = VecDestroy(b->w);CHKERRQ(ierr); 95 } 96 ierr = PetscFree(b);CHKERRQ(ierr); 97 PetscFunctionReturn(0); 98 } 99 100 /*MC 101 MATMAIJ - MATMAIJ = "maij" - A matrix type to be used for restriction and interpolation operations for 102 multicomponent problems, interpolating or restricting each component the same way independently. 103 The matrix type is based on MATSEQAIJ for sequential matrices, and MATMPIAIJ for distributed matrices. 104 105 Operations provided: 106 . MatMult 107 . MatMultTranspose 108 . MatMultAdd 109 . MatMultTransposeAdd 110 111 Level: advanced 112 113 .seealso: MatCreateSeqDense 114 M*/ 115 116 EXTERN_C_BEGIN 117 #undef __FUNCT__ 118 #define __FUNCT__ "MatCreate_MAIJ" 119 int MatCreate_MAIJ(Mat A) 120 { 121 int ierr; 122 Mat_MPIMAIJ *b; 123 124 PetscFunctionBegin; 125 ierr = PetscNew(Mat_MPIMAIJ,&b);CHKERRQ(ierr); 126 A->data = (void*)b; 127 ierr = PetscMemzero(b,sizeof(Mat_MPIMAIJ));CHKERRQ(ierr); 128 ierr = PetscMemzero(A->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 129 A->factor = 0; 130 A->mapping = 0; 131 132 b->AIJ = 0; 133 b->dof = 0; 134 b->OAIJ = 0; 135 b->ctx = 0; 136 b->w = 0; 137 PetscFunctionReturn(0); 138 } 139 EXTERN_C_END 140 141 /* --------------------------------------------------------------------------------------*/ 142 #undef __FUNCT__ 143 #define __FUNCT__ "MatMult_SeqMAIJ_2" 144 int MatMult_SeqMAIJ_2(Mat A,Vec xx,Vec yy) 145 { 146 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 147 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 148 PetscScalar *x,*y,*v,sum1, sum2; 149 int ierr,m = b->AIJ->m,*idx,*ii; 150 int n,i,jrow,j; 151 152 PetscFunctionBegin; 153 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 154 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 155 idx = a->j; 156 v = a->a; 157 ii = a->i; 158 159 for (i=0; i<m; i++) { 160 jrow = ii[i]; 161 n = ii[i+1] - jrow; 162 sum1 = 0.0; 163 sum2 = 0.0; 164 for (j=0; j<n; j++) { 165 sum1 += v[jrow]*x[2*idx[jrow]]; 166 sum2 += v[jrow]*x[2*idx[jrow]+1]; 167 jrow++; 168 } 169 y[2*i] = sum1; 170 y[2*i+1] = sum2; 171 } 172 173 PetscLogFlops(4*a->nz - 2*m); 174 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 175 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 176 PetscFunctionReturn(0); 177 } 178 179 #undef __FUNCT__ 180 #define __FUNCT__ "MatMultTranspose_SeqMAIJ_2" 181 int MatMultTranspose_SeqMAIJ_2(Mat A,Vec xx,Vec yy) 182 { 183 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 184 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 185 PetscScalar *x,*y,*v,alpha1,alpha2,zero = 0.0; 186 int ierr,m = b->AIJ->m,n,i,*idx; 187 188 PetscFunctionBegin; 189 ierr = VecSet(&zero,yy);CHKERRQ(ierr); 190 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 191 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 192 193 for (i=0; i<m; i++) { 194 idx = a->j + a->i[i] ; 195 v = a->a + a->i[i] ; 196 n = a->i[i+1] - a->i[i]; 197 alpha1 = x[2*i]; 198 alpha2 = x[2*i+1]; 199 while (n-->0) {y[2*(*idx)] += alpha1*(*v); y[2*(*idx)+1] += alpha2*(*v); idx++; v++;} 200 } 201 PetscLogFlops(4*a->nz - 2*b->AIJ->n); 202 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 203 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 204 PetscFunctionReturn(0); 205 } 206 207 #undef __FUNCT__ 208 #define __FUNCT__ "MatMultAdd_SeqMAIJ_2" 209 int MatMultAdd_SeqMAIJ_2(Mat A,Vec xx,Vec yy,Vec zz) 210 { 211 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 212 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 213 PetscScalar *x,*y,*v,sum1, sum2; 214 int ierr,m = b->AIJ->m,*idx,*ii; 215 int n,i,jrow,j; 216 217 PetscFunctionBegin; 218 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 219 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 220 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 221 idx = a->j; 222 v = a->a; 223 ii = a->i; 224 225 for (i=0; i<m; i++) { 226 jrow = ii[i]; 227 n = ii[i+1] - jrow; 228 sum1 = 0.0; 229 sum2 = 0.0; 230 for (j=0; j<n; j++) { 231 sum1 += v[jrow]*x[2*idx[jrow]]; 232 sum2 += v[jrow]*x[2*idx[jrow]+1]; 233 jrow++; 234 } 235 y[2*i] += sum1; 236 y[2*i+1] += sum2; 237 } 238 239 PetscLogFlops(4*a->nz - 2*m); 240 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 241 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 242 PetscFunctionReturn(0); 243 } 244 #undef __FUNCT__ 245 #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_2" 246 int MatMultTransposeAdd_SeqMAIJ_2(Mat A,Vec xx,Vec yy,Vec zz) 247 { 248 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 249 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 250 PetscScalar *x,*y,*v,alpha1,alpha2; 251 int ierr,m = b->AIJ->m,n,i,*idx; 252 253 PetscFunctionBegin; 254 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 255 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 256 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 257 258 for (i=0; i<m; i++) { 259 idx = a->j + a->i[i] ; 260 v = a->a + a->i[i] ; 261 n = a->i[i+1] - a->i[i]; 262 alpha1 = x[2*i]; 263 alpha2 = x[2*i+1]; 264 while (n-->0) {y[2*(*idx)] += alpha1*(*v); y[2*(*idx)+1] += alpha2*(*v); idx++; v++;} 265 } 266 PetscLogFlops(4*a->nz - 2*b->AIJ->n); 267 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 268 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 269 PetscFunctionReturn(0); 270 } 271 /* --------------------------------------------------------------------------------------*/ 272 #undef __FUNCT__ 273 #define __FUNCT__ "MatMult_SeqMAIJ_3" 274 int MatMult_SeqMAIJ_3(Mat A,Vec xx,Vec yy) 275 { 276 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 277 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 278 PetscScalar *x,*y,*v,sum1, sum2, sum3; 279 int ierr,m = b->AIJ->m,*idx,*ii; 280 int n,i,jrow,j; 281 282 PetscFunctionBegin; 283 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 284 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 285 idx = a->j; 286 v = a->a; 287 ii = a->i; 288 289 for (i=0; i<m; i++) { 290 jrow = ii[i]; 291 n = ii[i+1] - jrow; 292 sum1 = 0.0; 293 sum2 = 0.0; 294 sum3 = 0.0; 295 for (j=0; j<n; j++) { 296 sum1 += v[jrow]*x[3*idx[jrow]]; 297 sum2 += v[jrow]*x[3*idx[jrow]+1]; 298 sum3 += v[jrow]*x[3*idx[jrow]+2]; 299 jrow++; 300 } 301 y[3*i] = sum1; 302 y[3*i+1] = sum2; 303 y[3*i+2] = sum3; 304 } 305 306 PetscLogFlops(6*a->nz - 3*m); 307 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 308 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 309 PetscFunctionReturn(0); 310 } 311 312 #undef __FUNCT__ 313 #define __FUNCT__ "MatMultTranspose_SeqMAIJ_3" 314 int MatMultTranspose_SeqMAIJ_3(Mat A,Vec xx,Vec yy) 315 { 316 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 317 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 318 PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,zero = 0.0; 319 int ierr,m = b->AIJ->m,n,i,*idx; 320 321 PetscFunctionBegin; 322 ierr = VecSet(&zero,yy);CHKERRQ(ierr); 323 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 324 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 325 326 for (i=0; i<m; i++) { 327 idx = a->j + a->i[i]; 328 v = a->a + a->i[i]; 329 n = a->i[i+1] - a->i[i]; 330 alpha1 = x[3*i]; 331 alpha2 = x[3*i+1]; 332 alpha3 = x[3*i+2]; 333 while (n-->0) { 334 y[3*(*idx)] += alpha1*(*v); 335 y[3*(*idx)+1] += alpha2*(*v); 336 y[3*(*idx)+2] += alpha3*(*v); 337 idx++; v++; 338 } 339 } 340 PetscLogFlops(6*a->nz - 3*b->AIJ->n); 341 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 342 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 343 PetscFunctionReturn(0); 344 } 345 346 #undef __FUNCT__ 347 #define __FUNCT__ "MatMultAdd_SeqMAIJ_3" 348 int MatMultAdd_SeqMAIJ_3(Mat A,Vec xx,Vec yy,Vec zz) 349 { 350 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 351 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 352 PetscScalar *x,*y,*v,sum1, sum2, sum3; 353 int ierr,m = b->AIJ->m,*idx,*ii; 354 int n,i,jrow,j; 355 356 PetscFunctionBegin; 357 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 358 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 359 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 360 idx = a->j; 361 v = a->a; 362 ii = a->i; 363 364 for (i=0; i<m; i++) { 365 jrow = ii[i]; 366 n = ii[i+1] - jrow; 367 sum1 = 0.0; 368 sum2 = 0.0; 369 sum3 = 0.0; 370 for (j=0; j<n; j++) { 371 sum1 += v[jrow]*x[3*idx[jrow]]; 372 sum2 += v[jrow]*x[3*idx[jrow]+1]; 373 sum3 += v[jrow]*x[3*idx[jrow]+2]; 374 jrow++; 375 } 376 y[3*i] += sum1; 377 y[3*i+1] += sum2; 378 y[3*i+2] += sum3; 379 } 380 381 PetscLogFlops(6*a->nz); 382 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 383 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 384 PetscFunctionReturn(0); 385 } 386 #undef __FUNCT__ 387 #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_3" 388 int MatMultTransposeAdd_SeqMAIJ_3(Mat A,Vec xx,Vec yy,Vec zz) 389 { 390 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 391 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 392 PetscScalar *x,*y,*v,alpha1,alpha2,alpha3; 393 int ierr,m = b->AIJ->m,n,i,*idx; 394 395 PetscFunctionBegin; 396 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 397 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 398 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 399 for (i=0; i<m; i++) { 400 idx = a->j + a->i[i] ; 401 v = a->a + a->i[i] ; 402 n = a->i[i+1] - a->i[i]; 403 alpha1 = x[3*i]; 404 alpha2 = x[3*i+1]; 405 alpha3 = x[3*i+2]; 406 while (n-->0) { 407 y[3*(*idx)] += alpha1*(*v); 408 y[3*(*idx)+1] += alpha2*(*v); 409 y[3*(*idx)+2] += alpha3*(*v); 410 idx++; v++; 411 } 412 } 413 PetscLogFlops(6*a->nz); 414 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 415 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 416 PetscFunctionReturn(0); 417 } 418 419 /* ------------------------------------------------------------------------------*/ 420 #undef __FUNCT__ 421 #define __FUNCT__ "MatMult_SeqMAIJ_4" 422 int MatMult_SeqMAIJ_4(Mat A,Vec xx,Vec yy) 423 { 424 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 425 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 426 PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4; 427 int ierr,m = b->AIJ->m,*idx,*ii; 428 int n,i,jrow,j; 429 430 PetscFunctionBegin; 431 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 432 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 433 idx = a->j; 434 v = a->a; 435 ii = a->i; 436 437 for (i=0; i<m; i++) { 438 jrow = ii[i]; 439 n = ii[i+1] - jrow; 440 sum1 = 0.0; 441 sum2 = 0.0; 442 sum3 = 0.0; 443 sum4 = 0.0; 444 for (j=0; j<n; j++) { 445 sum1 += v[jrow]*x[4*idx[jrow]]; 446 sum2 += v[jrow]*x[4*idx[jrow]+1]; 447 sum3 += v[jrow]*x[4*idx[jrow]+2]; 448 sum4 += v[jrow]*x[4*idx[jrow]+3]; 449 jrow++; 450 } 451 y[4*i] = sum1; 452 y[4*i+1] = sum2; 453 y[4*i+2] = sum3; 454 y[4*i+3] = sum4; 455 } 456 457 PetscLogFlops(8*a->nz - 4*m); 458 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 459 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 460 PetscFunctionReturn(0); 461 } 462 463 #undef __FUNCT__ 464 #define __FUNCT__ "MatMultTranspose_SeqMAIJ_4" 465 int MatMultTranspose_SeqMAIJ_4(Mat A,Vec xx,Vec yy) 466 { 467 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 468 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 469 PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4,zero = 0.0; 470 int ierr,m = b->AIJ->m,n,i,*idx; 471 472 PetscFunctionBegin; 473 ierr = VecSet(&zero,yy);CHKERRQ(ierr); 474 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 475 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 476 for (i=0; i<m; i++) { 477 idx = a->j + a->i[i] ; 478 v = a->a + a->i[i] ; 479 n = a->i[i+1] - a->i[i]; 480 alpha1 = x[4*i]; 481 alpha2 = x[4*i+1]; 482 alpha3 = x[4*i+2]; 483 alpha4 = x[4*i+3]; 484 while (n-->0) { 485 y[4*(*idx)] += alpha1*(*v); 486 y[4*(*idx)+1] += alpha2*(*v); 487 y[4*(*idx)+2] += alpha3*(*v); 488 y[4*(*idx)+3] += alpha4*(*v); 489 idx++; v++; 490 } 491 } 492 PetscLogFlops(8*a->nz - 4*b->AIJ->n); 493 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 494 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 495 PetscFunctionReturn(0); 496 } 497 498 #undef __FUNCT__ 499 #define __FUNCT__ "MatMultAdd_SeqMAIJ_4" 500 int MatMultAdd_SeqMAIJ_4(Mat A,Vec xx,Vec yy,Vec zz) 501 { 502 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 503 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 504 PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4; 505 int ierr,m = b->AIJ->m,*idx,*ii; 506 int n,i,jrow,j; 507 508 PetscFunctionBegin; 509 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 510 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 511 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 512 idx = a->j; 513 v = a->a; 514 ii = a->i; 515 516 for (i=0; i<m; i++) { 517 jrow = ii[i]; 518 n = ii[i+1] - jrow; 519 sum1 = 0.0; 520 sum2 = 0.0; 521 sum3 = 0.0; 522 sum4 = 0.0; 523 for (j=0; j<n; j++) { 524 sum1 += v[jrow]*x[4*idx[jrow]]; 525 sum2 += v[jrow]*x[4*idx[jrow]+1]; 526 sum3 += v[jrow]*x[4*idx[jrow]+2]; 527 sum4 += v[jrow]*x[4*idx[jrow]+3]; 528 jrow++; 529 } 530 y[4*i] += sum1; 531 y[4*i+1] += sum2; 532 y[4*i+2] += sum3; 533 y[4*i+3] += sum4; 534 } 535 536 PetscLogFlops(8*a->nz - 4*m); 537 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 538 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 539 PetscFunctionReturn(0); 540 } 541 #undef __FUNCT__ 542 #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_4" 543 int MatMultTransposeAdd_SeqMAIJ_4(Mat A,Vec xx,Vec yy,Vec zz) 544 { 545 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 546 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 547 PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4; 548 int ierr,m = b->AIJ->m,n,i,*idx; 549 550 PetscFunctionBegin; 551 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 552 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 553 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 554 555 for (i=0; i<m; i++) { 556 idx = a->j + a->i[i] ; 557 v = a->a + a->i[i] ; 558 n = a->i[i+1] - a->i[i]; 559 alpha1 = x[4*i]; 560 alpha2 = x[4*i+1]; 561 alpha3 = x[4*i+2]; 562 alpha4 = x[4*i+3]; 563 while (n-->0) { 564 y[4*(*idx)] += alpha1*(*v); 565 y[4*(*idx)+1] += alpha2*(*v); 566 y[4*(*idx)+2] += alpha3*(*v); 567 y[4*(*idx)+3] += alpha4*(*v); 568 idx++; v++; 569 } 570 } 571 PetscLogFlops(8*a->nz - 4*b->AIJ->n); 572 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 573 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 574 PetscFunctionReturn(0); 575 } 576 /* ------------------------------------------------------------------------------*/ 577 578 #undef __FUNCT__ 579 #define __FUNCT__ "MatMult_SeqMAIJ_5" 580 int MatMult_SeqMAIJ_5(Mat A,Vec xx,Vec yy) 581 { 582 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 583 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 584 PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4, sum5; 585 int ierr,m = b->AIJ->m,*idx,*ii; 586 int n,i,jrow,j; 587 588 PetscFunctionBegin; 589 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 590 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 591 idx = a->j; 592 v = a->a; 593 ii = a->i; 594 595 for (i=0; i<m; i++) { 596 jrow = ii[i]; 597 n = ii[i+1] - jrow; 598 sum1 = 0.0; 599 sum2 = 0.0; 600 sum3 = 0.0; 601 sum4 = 0.0; 602 sum5 = 0.0; 603 for (j=0; j<n; j++) { 604 sum1 += v[jrow]*x[5*idx[jrow]]; 605 sum2 += v[jrow]*x[5*idx[jrow]+1]; 606 sum3 += v[jrow]*x[5*idx[jrow]+2]; 607 sum4 += v[jrow]*x[5*idx[jrow]+3]; 608 sum5 += v[jrow]*x[5*idx[jrow]+4]; 609 jrow++; 610 } 611 y[5*i] = sum1; 612 y[5*i+1] = sum2; 613 y[5*i+2] = sum3; 614 y[5*i+3] = sum4; 615 y[5*i+4] = sum5; 616 } 617 618 PetscLogFlops(10*a->nz - 5*m); 619 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 620 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 621 PetscFunctionReturn(0); 622 } 623 624 #undef __FUNCT__ 625 #define __FUNCT__ "MatMultTranspose_SeqMAIJ_5" 626 int MatMultTranspose_SeqMAIJ_5(Mat A,Vec xx,Vec yy) 627 { 628 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 629 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 630 PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,zero = 0.0; 631 int ierr,m = b->AIJ->m,n,i,*idx; 632 633 PetscFunctionBegin; 634 ierr = VecSet(&zero,yy);CHKERRQ(ierr); 635 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 636 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 637 638 for (i=0; i<m; i++) { 639 idx = a->j + a->i[i] ; 640 v = a->a + a->i[i] ; 641 n = a->i[i+1] - a->i[i]; 642 alpha1 = x[5*i]; 643 alpha2 = x[5*i+1]; 644 alpha3 = x[5*i+2]; 645 alpha4 = x[5*i+3]; 646 alpha5 = x[5*i+4]; 647 while (n-->0) { 648 y[5*(*idx)] += alpha1*(*v); 649 y[5*(*idx)+1] += alpha2*(*v); 650 y[5*(*idx)+2] += alpha3*(*v); 651 y[5*(*idx)+3] += alpha4*(*v); 652 y[5*(*idx)+4] += alpha5*(*v); 653 idx++; v++; 654 } 655 } 656 PetscLogFlops(10*a->nz - 5*b->AIJ->n); 657 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 658 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 659 PetscFunctionReturn(0); 660 } 661 662 #undef __FUNCT__ 663 #define __FUNCT__ "MatMultAdd_SeqMAIJ_5" 664 int MatMultAdd_SeqMAIJ_5(Mat A,Vec xx,Vec yy,Vec zz) 665 { 666 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 667 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 668 PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4, sum5; 669 int ierr,m = b->AIJ->m,*idx,*ii; 670 int n,i,jrow,j; 671 672 PetscFunctionBegin; 673 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 674 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 675 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 676 idx = a->j; 677 v = a->a; 678 ii = a->i; 679 680 for (i=0; i<m; i++) { 681 jrow = ii[i]; 682 n = ii[i+1] - jrow; 683 sum1 = 0.0; 684 sum2 = 0.0; 685 sum3 = 0.0; 686 sum4 = 0.0; 687 sum5 = 0.0; 688 for (j=0; j<n; j++) { 689 sum1 += v[jrow]*x[5*idx[jrow]]; 690 sum2 += v[jrow]*x[5*idx[jrow]+1]; 691 sum3 += v[jrow]*x[5*idx[jrow]+2]; 692 sum4 += v[jrow]*x[5*idx[jrow]+3]; 693 sum5 += v[jrow]*x[5*idx[jrow]+4]; 694 jrow++; 695 } 696 y[5*i] += sum1; 697 y[5*i+1] += sum2; 698 y[5*i+2] += sum3; 699 y[5*i+3] += sum4; 700 y[5*i+4] += sum5; 701 } 702 703 PetscLogFlops(10*a->nz); 704 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 705 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 706 PetscFunctionReturn(0); 707 } 708 709 #undef __FUNCT__ 710 #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_5" 711 int MatMultTransposeAdd_SeqMAIJ_5(Mat A,Vec xx,Vec yy,Vec zz) 712 { 713 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 714 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 715 PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5; 716 int ierr,m = b->AIJ->m,n,i,*idx; 717 718 PetscFunctionBegin; 719 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 720 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 721 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 722 723 for (i=0; i<m; i++) { 724 idx = a->j + a->i[i] ; 725 v = a->a + a->i[i] ; 726 n = a->i[i+1] - a->i[i]; 727 alpha1 = x[5*i]; 728 alpha2 = x[5*i+1]; 729 alpha3 = x[5*i+2]; 730 alpha4 = x[5*i+3]; 731 alpha5 = x[5*i+4]; 732 while (n-->0) { 733 y[5*(*idx)] += alpha1*(*v); 734 y[5*(*idx)+1] += alpha2*(*v); 735 y[5*(*idx)+2] += alpha3*(*v); 736 y[5*(*idx)+3] += alpha4*(*v); 737 y[5*(*idx)+4] += alpha5*(*v); 738 idx++; v++; 739 } 740 } 741 PetscLogFlops(10*a->nz); 742 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 743 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 744 PetscFunctionReturn(0); 745 } 746 747 /* ------------------------------------------------------------------------------*/ 748 #undef __FUNCT__ 749 #define __FUNCT__ "MatMult_SeqMAIJ_6" 750 int MatMult_SeqMAIJ_6(Mat A,Vec xx,Vec yy) 751 { 752 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 753 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 754 PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6; 755 int ierr,m = b->AIJ->m,*idx,*ii; 756 int n,i,jrow,j; 757 758 PetscFunctionBegin; 759 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 760 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 761 idx = a->j; 762 v = a->a; 763 ii = a->i; 764 765 for (i=0; i<m; i++) { 766 jrow = ii[i]; 767 n = ii[i+1] - jrow; 768 sum1 = 0.0; 769 sum2 = 0.0; 770 sum3 = 0.0; 771 sum4 = 0.0; 772 sum5 = 0.0; 773 sum6 = 0.0; 774 for (j=0; j<n; j++) { 775 sum1 += v[jrow]*x[6*idx[jrow]]; 776 sum2 += v[jrow]*x[6*idx[jrow]+1]; 777 sum3 += v[jrow]*x[6*idx[jrow]+2]; 778 sum4 += v[jrow]*x[6*idx[jrow]+3]; 779 sum5 += v[jrow]*x[6*idx[jrow]+4]; 780 sum6 += v[jrow]*x[6*idx[jrow]+5]; 781 jrow++; 782 } 783 y[6*i] = sum1; 784 y[6*i+1] = sum2; 785 y[6*i+2] = sum3; 786 y[6*i+3] = sum4; 787 y[6*i+4] = sum5; 788 y[6*i+5] = sum6; 789 } 790 791 PetscLogFlops(12*a->nz - 6*m); 792 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 793 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 794 PetscFunctionReturn(0); 795 } 796 797 #undef __FUNCT__ 798 #define __FUNCT__ "MatMultTranspose_SeqMAIJ_6" 799 int MatMultTranspose_SeqMAIJ_6(Mat A,Vec xx,Vec yy) 800 { 801 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 802 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 803 PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,zero = 0.0; 804 int ierr,m = b->AIJ->m,n,i,*idx; 805 806 PetscFunctionBegin; 807 ierr = VecSet(&zero,yy);CHKERRQ(ierr); 808 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 809 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 810 811 for (i=0; i<m; i++) { 812 idx = a->j + a->i[i] ; 813 v = a->a + a->i[i] ; 814 n = a->i[i+1] - a->i[i]; 815 alpha1 = x[6*i]; 816 alpha2 = x[6*i+1]; 817 alpha3 = x[6*i+2]; 818 alpha4 = x[6*i+3]; 819 alpha5 = x[6*i+4]; 820 alpha6 = x[6*i+5]; 821 while (n-->0) { 822 y[6*(*idx)] += alpha1*(*v); 823 y[6*(*idx)+1] += alpha2*(*v); 824 y[6*(*idx)+2] += alpha3*(*v); 825 y[6*(*idx)+3] += alpha4*(*v); 826 y[6*(*idx)+4] += alpha5*(*v); 827 y[6*(*idx)+5] += alpha6*(*v); 828 idx++; v++; 829 } 830 } 831 PetscLogFlops(12*a->nz - 6*b->AIJ->n); 832 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 833 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 834 PetscFunctionReturn(0); 835 } 836 837 #undef __FUNCT__ 838 #define __FUNCT__ "MatMultAdd_SeqMAIJ_6" 839 int MatMultAdd_SeqMAIJ_6(Mat A,Vec xx,Vec yy,Vec zz) 840 { 841 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 842 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 843 PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6; 844 int ierr,m = b->AIJ->m,*idx,*ii; 845 int n,i,jrow,j; 846 847 PetscFunctionBegin; 848 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 849 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 850 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 851 idx = a->j; 852 v = a->a; 853 ii = a->i; 854 855 for (i=0; i<m; i++) { 856 jrow = ii[i]; 857 n = ii[i+1] - jrow; 858 sum1 = 0.0; 859 sum2 = 0.0; 860 sum3 = 0.0; 861 sum4 = 0.0; 862 sum5 = 0.0; 863 sum6 = 0.0; 864 for (j=0; j<n; j++) { 865 sum1 += v[jrow]*x[6*idx[jrow]]; 866 sum2 += v[jrow]*x[6*idx[jrow]+1]; 867 sum3 += v[jrow]*x[6*idx[jrow]+2]; 868 sum4 += v[jrow]*x[6*idx[jrow]+3]; 869 sum5 += v[jrow]*x[6*idx[jrow]+4]; 870 sum6 += v[jrow]*x[6*idx[jrow]+5]; 871 jrow++; 872 } 873 y[6*i] += sum1; 874 y[6*i+1] += sum2; 875 y[6*i+2] += sum3; 876 y[6*i+3] += sum4; 877 y[6*i+4] += sum5; 878 y[6*i+5] += sum6; 879 } 880 881 PetscLogFlops(12*a->nz); 882 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 883 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 884 PetscFunctionReturn(0); 885 } 886 887 #undef __FUNCT__ 888 #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_6" 889 int MatMultTransposeAdd_SeqMAIJ_6(Mat A,Vec xx,Vec yy,Vec zz) 890 { 891 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 892 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 893 PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6; 894 int ierr,m = b->AIJ->m,n,i,*idx; 895 896 PetscFunctionBegin; 897 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 898 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 899 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 900 901 for (i=0; i<m; i++) { 902 idx = a->j + a->i[i] ; 903 v = a->a + a->i[i] ; 904 n = a->i[i+1] - a->i[i]; 905 alpha1 = x[6*i]; 906 alpha2 = x[6*i+1]; 907 alpha3 = x[6*i+2]; 908 alpha4 = x[6*i+3]; 909 alpha5 = x[6*i+4]; 910 alpha6 = x[6*i+5]; 911 while (n-->0) { 912 y[6*(*idx)] += alpha1*(*v); 913 y[6*(*idx)+1] += alpha2*(*v); 914 y[6*(*idx)+2] += alpha3*(*v); 915 y[6*(*idx)+3] += alpha4*(*v); 916 y[6*(*idx)+4] += alpha5*(*v); 917 y[6*(*idx)+5] += alpha6*(*v); 918 idx++; v++; 919 } 920 } 921 PetscLogFlops(12*a->nz); 922 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 923 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 924 PetscFunctionReturn(0); 925 } 926 927 /* ------------------------------------------------------------------------------*/ 928 #undef __FUNCT__ 929 #define __FUNCT__ "MatMult_SeqMAIJ_8" 930 int MatMult_SeqMAIJ_8(Mat A,Vec xx,Vec yy) 931 { 932 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 933 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 934 PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8; 935 int ierr,m = b->AIJ->m,*idx,*ii; 936 int n,i,jrow,j; 937 938 PetscFunctionBegin; 939 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 940 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 941 idx = a->j; 942 v = a->a; 943 ii = a->i; 944 945 for (i=0; i<m; i++) { 946 jrow = ii[i]; 947 n = ii[i+1] - jrow; 948 sum1 = 0.0; 949 sum2 = 0.0; 950 sum3 = 0.0; 951 sum4 = 0.0; 952 sum5 = 0.0; 953 sum6 = 0.0; 954 sum7 = 0.0; 955 sum8 = 0.0; 956 for (j=0; j<n; j++) { 957 sum1 += v[jrow]*x[8*idx[jrow]]; 958 sum2 += v[jrow]*x[8*idx[jrow]+1]; 959 sum3 += v[jrow]*x[8*idx[jrow]+2]; 960 sum4 += v[jrow]*x[8*idx[jrow]+3]; 961 sum5 += v[jrow]*x[8*idx[jrow]+4]; 962 sum6 += v[jrow]*x[8*idx[jrow]+5]; 963 sum7 += v[jrow]*x[8*idx[jrow]+6]; 964 sum8 += v[jrow]*x[8*idx[jrow]+7]; 965 jrow++; 966 } 967 y[8*i] = sum1; 968 y[8*i+1] = sum2; 969 y[8*i+2] = sum3; 970 y[8*i+3] = sum4; 971 y[8*i+4] = sum5; 972 y[8*i+5] = sum6; 973 y[8*i+6] = sum7; 974 y[8*i+7] = sum8; 975 } 976 977 PetscLogFlops(16*a->nz - 8*m); 978 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 979 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 980 PetscFunctionReturn(0); 981 } 982 983 #undef __FUNCT__ 984 #define __FUNCT__ "MatMultTranspose_SeqMAIJ_8" 985 int MatMultTranspose_SeqMAIJ_8(Mat A,Vec xx,Vec yy) 986 { 987 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 988 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 989 PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,zero = 0.0; 990 int ierr,m = b->AIJ->m,n,i,*idx; 991 992 PetscFunctionBegin; 993 ierr = VecSet(&zero,yy);CHKERRQ(ierr); 994 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 995 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 996 997 for (i=0; i<m; i++) { 998 idx = a->j + a->i[i] ; 999 v = a->a + a->i[i] ; 1000 n = a->i[i+1] - a->i[i]; 1001 alpha1 = x[8*i]; 1002 alpha2 = x[8*i+1]; 1003 alpha3 = x[8*i+2]; 1004 alpha4 = x[8*i+3]; 1005 alpha5 = x[8*i+4]; 1006 alpha6 = x[8*i+5]; 1007 alpha7 = x[8*i+6]; 1008 alpha8 = x[8*i+7]; 1009 while (n-->0) { 1010 y[8*(*idx)] += alpha1*(*v); 1011 y[8*(*idx)+1] += alpha2*(*v); 1012 y[8*(*idx)+2] += alpha3*(*v); 1013 y[8*(*idx)+3] += alpha4*(*v); 1014 y[8*(*idx)+4] += alpha5*(*v); 1015 y[8*(*idx)+5] += alpha6*(*v); 1016 y[8*(*idx)+6] += alpha7*(*v); 1017 y[8*(*idx)+7] += alpha8*(*v); 1018 idx++; v++; 1019 } 1020 } 1021 PetscLogFlops(16*a->nz - 8*b->AIJ->n); 1022 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1023 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 1024 PetscFunctionReturn(0); 1025 } 1026 1027 #undef __FUNCT__ 1028 #define __FUNCT__ "MatMultAdd_SeqMAIJ_8" 1029 int MatMultAdd_SeqMAIJ_8(Mat A,Vec xx,Vec yy,Vec zz) 1030 { 1031 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1032 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1033 PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8; 1034 int ierr,m = b->AIJ->m,*idx,*ii; 1035 int n,i,jrow,j; 1036 1037 PetscFunctionBegin; 1038 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 1039 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1040 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 1041 idx = a->j; 1042 v = a->a; 1043 ii = a->i; 1044 1045 for (i=0; i<m; i++) { 1046 jrow = ii[i]; 1047 n = ii[i+1] - jrow; 1048 sum1 = 0.0; 1049 sum2 = 0.0; 1050 sum3 = 0.0; 1051 sum4 = 0.0; 1052 sum5 = 0.0; 1053 sum6 = 0.0; 1054 sum7 = 0.0; 1055 sum8 = 0.0; 1056 for (j=0; j<n; j++) { 1057 sum1 += v[jrow]*x[8*idx[jrow]]; 1058 sum2 += v[jrow]*x[8*idx[jrow]+1]; 1059 sum3 += v[jrow]*x[8*idx[jrow]+2]; 1060 sum4 += v[jrow]*x[8*idx[jrow]+3]; 1061 sum5 += v[jrow]*x[8*idx[jrow]+4]; 1062 sum6 += v[jrow]*x[8*idx[jrow]+5]; 1063 sum7 += v[jrow]*x[8*idx[jrow]+6]; 1064 sum8 += v[jrow]*x[8*idx[jrow]+7]; 1065 jrow++; 1066 } 1067 y[8*i] += sum1; 1068 y[8*i+1] += sum2; 1069 y[8*i+2] += sum3; 1070 y[8*i+3] += sum4; 1071 y[8*i+4] += sum5; 1072 y[8*i+5] += sum6; 1073 y[8*i+6] += sum7; 1074 y[8*i+7] += sum8; 1075 } 1076 1077 PetscLogFlops(16*a->nz); 1078 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1079 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 1080 PetscFunctionReturn(0); 1081 } 1082 1083 #undef __FUNCT__ 1084 #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_8" 1085 int MatMultTransposeAdd_SeqMAIJ_8(Mat A,Vec xx,Vec yy,Vec zz) 1086 { 1087 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1088 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1089 PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8; 1090 int ierr,m = b->AIJ->m,n,i,*idx; 1091 1092 PetscFunctionBegin; 1093 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 1094 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1095 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 1096 for (i=0; i<m; i++) { 1097 idx = a->j + a->i[i] ; 1098 v = a->a + a->i[i] ; 1099 n = a->i[i+1] - a->i[i]; 1100 alpha1 = x[8*i]; 1101 alpha2 = x[8*i+1]; 1102 alpha3 = x[8*i+2]; 1103 alpha4 = x[8*i+3]; 1104 alpha5 = x[8*i+4]; 1105 alpha6 = x[8*i+5]; 1106 alpha7 = x[8*i+6]; 1107 alpha8 = x[8*i+7]; 1108 while (n-->0) { 1109 y[8*(*idx)] += alpha1*(*v); 1110 y[8*(*idx)+1] += alpha2*(*v); 1111 y[8*(*idx)+2] += alpha3*(*v); 1112 y[8*(*idx)+3] += alpha4*(*v); 1113 y[8*(*idx)+4] += alpha5*(*v); 1114 y[8*(*idx)+5] += alpha6*(*v); 1115 y[8*(*idx)+6] += alpha7*(*v); 1116 y[8*(*idx)+7] += alpha8*(*v); 1117 idx++; v++; 1118 } 1119 } 1120 PetscLogFlops(16*a->nz); 1121 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1122 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 1123 PetscFunctionReturn(0); 1124 } 1125 1126 /* ------------------------------------------------------------------------------*/ 1127 #undef __FUNCT__ 1128 #define __FUNCT__ "MatMult_SeqMAIJ_9" 1129 int MatMult_SeqMAIJ_9(Mat A,Vec xx,Vec yy) 1130 { 1131 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1132 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1133 PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9; 1134 int ierr,m = b->AIJ->m,*idx,*ii; 1135 int n,i,jrow,j; 1136 1137 PetscFunctionBegin; 1138 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1139 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 1140 idx = a->j; 1141 v = a->a; 1142 ii = a->i; 1143 1144 for (i=0; i<m; i++) { 1145 jrow = ii[i]; 1146 n = ii[i+1] - jrow; 1147 sum1 = 0.0; 1148 sum2 = 0.0; 1149 sum3 = 0.0; 1150 sum4 = 0.0; 1151 sum5 = 0.0; 1152 sum6 = 0.0; 1153 sum7 = 0.0; 1154 sum8 = 0.0; 1155 sum9 = 0.0; 1156 for (j=0; j<n; j++) { 1157 sum1 += v[jrow]*x[9*idx[jrow]]; 1158 sum2 += v[jrow]*x[9*idx[jrow]+1]; 1159 sum3 += v[jrow]*x[9*idx[jrow]+2]; 1160 sum4 += v[jrow]*x[9*idx[jrow]+3]; 1161 sum5 += v[jrow]*x[9*idx[jrow]+4]; 1162 sum6 += v[jrow]*x[9*idx[jrow]+5]; 1163 sum7 += v[jrow]*x[9*idx[jrow]+6]; 1164 sum8 += v[jrow]*x[9*idx[jrow]+7]; 1165 sum9 += v[jrow]*x[9*idx[jrow]+8]; 1166 jrow++; 1167 } 1168 y[9*i] = sum1; 1169 y[9*i+1] = sum2; 1170 y[9*i+2] = sum3; 1171 y[9*i+3] = sum4; 1172 y[9*i+4] = sum5; 1173 y[9*i+5] = sum6; 1174 y[9*i+6] = sum7; 1175 y[9*i+7] = sum8; 1176 y[9*i+8] = sum9; 1177 } 1178 1179 PetscLogFlops(18*a->nz - 9*m); 1180 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1181 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 1182 PetscFunctionReturn(0); 1183 } 1184 1185 #undef __FUNCT__ 1186 #define __FUNCT__ "MatMultTranspose_SeqMAIJ_9" 1187 int MatMultTranspose_SeqMAIJ_9(Mat A,Vec xx,Vec yy) 1188 { 1189 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1190 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1191 PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9,zero = 0.0; 1192 int ierr,m = b->AIJ->m,n,i,*idx; 1193 1194 PetscFunctionBegin; 1195 ierr = VecSet(&zero,yy);CHKERRQ(ierr); 1196 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1197 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 1198 1199 for (i=0; i<m; i++) { 1200 idx = a->j + a->i[i] ; 1201 v = a->a + a->i[i] ; 1202 n = a->i[i+1] - a->i[i]; 1203 alpha1 = x[9*i]; 1204 alpha2 = x[9*i+1]; 1205 alpha3 = x[9*i+2]; 1206 alpha4 = x[9*i+3]; 1207 alpha5 = x[9*i+4]; 1208 alpha6 = x[9*i+5]; 1209 alpha7 = x[9*i+6]; 1210 alpha8 = x[9*i+7]; 1211 alpha9 = x[9*i+8]; 1212 while (n-->0) { 1213 y[9*(*idx)] += alpha1*(*v); 1214 y[9*(*idx)+1] += alpha2*(*v); 1215 y[9*(*idx)+2] += alpha3*(*v); 1216 y[9*(*idx)+3] += alpha4*(*v); 1217 y[9*(*idx)+4] += alpha5*(*v); 1218 y[9*(*idx)+5] += alpha6*(*v); 1219 y[9*(*idx)+6] += alpha7*(*v); 1220 y[9*(*idx)+7] += alpha8*(*v); 1221 y[9*(*idx)+8] += alpha9*(*v); 1222 idx++; v++; 1223 } 1224 } 1225 PetscLogFlops(18*a->nz - 9*b->AIJ->n); 1226 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1227 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 1228 PetscFunctionReturn(0); 1229 } 1230 1231 #undef __FUNCT__ 1232 #define __FUNCT__ "MatMultAdd_SeqMAIJ_9" 1233 int MatMultAdd_SeqMAIJ_9(Mat A,Vec xx,Vec yy,Vec zz) 1234 { 1235 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1236 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1237 PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9; 1238 int ierr,m = b->AIJ->m,*idx,*ii; 1239 int n,i,jrow,j; 1240 1241 PetscFunctionBegin; 1242 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 1243 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1244 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 1245 idx = a->j; 1246 v = a->a; 1247 ii = a->i; 1248 1249 for (i=0; i<m; i++) { 1250 jrow = ii[i]; 1251 n = ii[i+1] - jrow; 1252 sum1 = 0.0; 1253 sum2 = 0.0; 1254 sum3 = 0.0; 1255 sum4 = 0.0; 1256 sum5 = 0.0; 1257 sum6 = 0.0; 1258 sum7 = 0.0; 1259 sum8 = 0.0; 1260 sum9 = 0.0; 1261 for (j=0; j<n; j++) { 1262 sum1 += v[jrow]*x[9*idx[jrow]]; 1263 sum2 += v[jrow]*x[9*idx[jrow]+1]; 1264 sum3 += v[jrow]*x[9*idx[jrow]+2]; 1265 sum4 += v[jrow]*x[9*idx[jrow]+3]; 1266 sum5 += v[jrow]*x[9*idx[jrow]+4]; 1267 sum6 += v[jrow]*x[9*idx[jrow]+5]; 1268 sum7 += v[jrow]*x[9*idx[jrow]+6]; 1269 sum8 += v[jrow]*x[9*idx[jrow]+7]; 1270 sum9 += v[jrow]*x[9*idx[jrow]+8]; 1271 jrow++; 1272 } 1273 y[9*i] += sum1; 1274 y[9*i+1] += sum2; 1275 y[9*i+2] += sum3; 1276 y[9*i+3] += sum4; 1277 y[9*i+4] += sum5; 1278 y[9*i+5] += sum6; 1279 y[9*i+6] += sum7; 1280 y[9*i+7] += sum8; 1281 y[9*i+8] += sum9; 1282 } 1283 1284 PetscLogFlops(18*a->nz); 1285 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1286 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 1287 PetscFunctionReturn(0); 1288 } 1289 1290 #undef __FUNCT__ 1291 #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_9" 1292 int MatMultTransposeAdd_SeqMAIJ_9(Mat A,Vec xx,Vec yy,Vec zz) 1293 { 1294 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1295 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1296 PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9; 1297 int ierr,m = b->AIJ->m,n,i,*idx; 1298 1299 PetscFunctionBegin; 1300 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 1301 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1302 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 1303 for (i=0; i<m; i++) { 1304 idx = a->j + a->i[i] ; 1305 v = a->a + a->i[i] ; 1306 n = a->i[i+1] - a->i[i]; 1307 alpha1 = x[9*i]; 1308 alpha2 = x[9*i+1]; 1309 alpha3 = x[9*i+2]; 1310 alpha4 = x[9*i+3]; 1311 alpha5 = x[9*i+4]; 1312 alpha6 = x[9*i+5]; 1313 alpha7 = x[9*i+6]; 1314 alpha8 = x[9*i+7]; 1315 alpha9 = x[9*i+8]; 1316 while (n-->0) { 1317 y[9*(*idx)] += alpha1*(*v); 1318 y[9*(*idx)+1] += alpha2*(*v); 1319 y[9*(*idx)+2] += alpha3*(*v); 1320 y[9*(*idx)+3] += alpha4*(*v); 1321 y[9*(*idx)+4] += alpha5*(*v); 1322 y[9*(*idx)+5] += alpha6*(*v); 1323 y[9*(*idx)+6] += alpha7*(*v); 1324 y[9*(*idx)+7] += alpha8*(*v); 1325 y[9*(*idx)+8] += alpha9*(*v); 1326 idx++; v++; 1327 } 1328 } 1329 PetscLogFlops(18*a->nz); 1330 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1331 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 1332 PetscFunctionReturn(0); 1333 } 1334 1335 /*--------------------------------------------------------------------------------------------*/ 1336 #undef __FUNCT__ 1337 #define __FUNCT__ "MatMult_SeqMAIJ_16" 1338 int MatMult_SeqMAIJ_16(Mat A,Vec xx,Vec yy) 1339 { 1340 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1341 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1342 PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8; 1343 PetscScalar sum9, sum10, sum11, sum12, sum13, sum14, sum15, sum16; 1344 int ierr,m = b->AIJ->m,*idx,*ii; 1345 int n,i,jrow,j; 1346 1347 PetscFunctionBegin; 1348 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1349 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 1350 idx = a->j; 1351 v = a->a; 1352 ii = a->i; 1353 1354 for (i=0; i<m; i++) { 1355 jrow = ii[i]; 1356 n = ii[i+1] - jrow; 1357 sum1 = 0.0; 1358 sum2 = 0.0; 1359 sum3 = 0.0; 1360 sum4 = 0.0; 1361 sum5 = 0.0; 1362 sum6 = 0.0; 1363 sum7 = 0.0; 1364 sum8 = 0.0; 1365 sum9 = 0.0; 1366 sum10 = 0.0; 1367 sum11 = 0.0; 1368 sum12 = 0.0; 1369 sum13 = 0.0; 1370 sum14 = 0.0; 1371 sum15 = 0.0; 1372 sum16 = 0.0; 1373 for (j=0; j<n; j++) { 1374 sum1 += v[jrow]*x[16*idx[jrow]]; 1375 sum2 += v[jrow]*x[16*idx[jrow]+1]; 1376 sum3 += v[jrow]*x[16*idx[jrow]+2]; 1377 sum4 += v[jrow]*x[16*idx[jrow]+3]; 1378 sum5 += v[jrow]*x[16*idx[jrow]+4]; 1379 sum6 += v[jrow]*x[16*idx[jrow]+5]; 1380 sum7 += v[jrow]*x[16*idx[jrow]+6]; 1381 sum8 += v[jrow]*x[16*idx[jrow]+7]; 1382 sum9 += v[jrow]*x[16*idx[jrow]+8]; 1383 sum10 += v[jrow]*x[16*idx[jrow]+9]; 1384 sum11 += v[jrow]*x[16*idx[jrow]+10]; 1385 sum12 += v[jrow]*x[16*idx[jrow]+11]; 1386 sum13 += v[jrow]*x[16*idx[jrow]+12]; 1387 sum14 += v[jrow]*x[16*idx[jrow]+13]; 1388 sum15 += v[jrow]*x[16*idx[jrow]+14]; 1389 sum16 += v[jrow]*x[16*idx[jrow]+15]; 1390 jrow++; 1391 } 1392 y[16*i] = sum1; 1393 y[16*i+1] = sum2; 1394 y[16*i+2] = sum3; 1395 y[16*i+3] = sum4; 1396 y[16*i+4] = sum5; 1397 y[16*i+5] = sum6; 1398 y[16*i+6] = sum7; 1399 y[16*i+7] = sum8; 1400 y[16*i+8] = sum9; 1401 y[16*i+9] = sum10; 1402 y[16*i+10] = sum11; 1403 y[16*i+11] = sum12; 1404 y[16*i+12] = sum13; 1405 y[16*i+13] = sum14; 1406 y[16*i+14] = sum15; 1407 y[16*i+15] = sum16; 1408 } 1409 1410 PetscLogFlops(32*a->nz - 16*m); 1411 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1412 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 1413 PetscFunctionReturn(0); 1414 } 1415 1416 #undef __FUNCT__ 1417 #define __FUNCT__ "MatMultTranspose_SeqMAIJ_16" 1418 int MatMultTranspose_SeqMAIJ_16(Mat A,Vec xx,Vec yy) 1419 { 1420 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1421 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1422 PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,zero = 0.0; 1423 PetscScalar alpha9,alpha10,alpha11,alpha12,alpha13,alpha14,alpha15,alpha16; 1424 int ierr,m = b->AIJ->m,n,i,*idx; 1425 1426 PetscFunctionBegin; 1427 ierr = VecSet(&zero,yy);CHKERRQ(ierr); 1428 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1429 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 1430 1431 for (i=0; i<m; i++) { 1432 idx = a->j + a->i[i] ; 1433 v = a->a + a->i[i] ; 1434 n = a->i[i+1] - a->i[i]; 1435 alpha1 = x[16*i]; 1436 alpha2 = x[16*i+1]; 1437 alpha3 = x[16*i+2]; 1438 alpha4 = x[16*i+3]; 1439 alpha5 = x[16*i+4]; 1440 alpha6 = x[16*i+5]; 1441 alpha7 = x[16*i+6]; 1442 alpha8 = x[16*i+7]; 1443 alpha9 = x[16*i+8]; 1444 alpha10 = x[16*i+9]; 1445 alpha11 = x[16*i+10]; 1446 alpha12 = x[16*i+11]; 1447 alpha13 = x[16*i+12]; 1448 alpha14 = x[16*i+13]; 1449 alpha15 = x[16*i+14]; 1450 alpha16 = x[16*i+15]; 1451 while (n-->0) { 1452 y[16*(*idx)] += alpha1*(*v); 1453 y[16*(*idx)+1] += alpha2*(*v); 1454 y[16*(*idx)+2] += alpha3*(*v); 1455 y[16*(*idx)+3] += alpha4*(*v); 1456 y[16*(*idx)+4] += alpha5*(*v); 1457 y[16*(*idx)+5] += alpha6*(*v); 1458 y[16*(*idx)+6] += alpha7*(*v); 1459 y[16*(*idx)+7] += alpha8*(*v); 1460 y[16*(*idx)+8] += alpha9*(*v); 1461 y[16*(*idx)+9] += alpha10*(*v); 1462 y[16*(*idx)+10] += alpha11*(*v); 1463 y[16*(*idx)+11] += alpha12*(*v); 1464 y[16*(*idx)+12] += alpha13*(*v); 1465 y[16*(*idx)+13] += alpha14*(*v); 1466 y[16*(*idx)+14] += alpha15*(*v); 1467 y[16*(*idx)+15] += alpha16*(*v); 1468 idx++; v++; 1469 } 1470 } 1471 PetscLogFlops(32*a->nz - 16*b->AIJ->n); 1472 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1473 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 1474 PetscFunctionReturn(0); 1475 } 1476 1477 #undef __FUNCT__ 1478 #define __FUNCT__ "MatMultAdd_SeqMAIJ_16" 1479 int MatMultAdd_SeqMAIJ_16(Mat A,Vec xx,Vec yy,Vec zz) 1480 { 1481 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1482 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1483 PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8; 1484 PetscScalar sum9, sum10, sum11, sum12, sum13, sum14, sum15, sum16; 1485 int ierr,m = b->AIJ->m,*idx,*ii; 1486 int n,i,jrow,j; 1487 1488 PetscFunctionBegin; 1489 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 1490 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1491 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 1492 idx = a->j; 1493 v = a->a; 1494 ii = a->i; 1495 1496 for (i=0; i<m; i++) { 1497 jrow = ii[i]; 1498 n = ii[i+1] - jrow; 1499 sum1 = 0.0; 1500 sum2 = 0.0; 1501 sum3 = 0.0; 1502 sum4 = 0.0; 1503 sum5 = 0.0; 1504 sum6 = 0.0; 1505 sum7 = 0.0; 1506 sum8 = 0.0; 1507 sum9 = 0.0; 1508 sum10 = 0.0; 1509 sum11 = 0.0; 1510 sum12 = 0.0; 1511 sum13 = 0.0; 1512 sum14 = 0.0; 1513 sum15 = 0.0; 1514 sum16 = 0.0; 1515 for (j=0; j<n; j++) { 1516 sum1 += v[jrow]*x[16*idx[jrow]]; 1517 sum2 += v[jrow]*x[16*idx[jrow]+1]; 1518 sum3 += v[jrow]*x[16*idx[jrow]+2]; 1519 sum4 += v[jrow]*x[16*idx[jrow]+3]; 1520 sum5 += v[jrow]*x[16*idx[jrow]+4]; 1521 sum6 += v[jrow]*x[16*idx[jrow]+5]; 1522 sum7 += v[jrow]*x[16*idx[jrow]+6]; 1523 sum8 += v[jrow]*x[16*idx[jrow]+7]; 1524 sum9 += v[jrow]*x[16*idx[jrow]+8]; 1525 sum10 += v[jrow]*x[16*idx[jrow]+9]; 1526 sum11 += v[jrow]*x[16*idx[jrow]+10]; 1527 sum12 += v[jrow]*x[16*idx[jrow]+11]; 1528 sum13 += v[jrow]*x[16*idx[jrow]+12]; 1529 sum14 += v[jrow]*x[16*idx[jrow]+13]; 1530 sum15 += v[jrow]*x[16*idx[jrow]+14]; 1531 sum16 += v[jrow]*x[16*idx[jrow]+15]; 1532 jrow++; 1533 } 1534 y[16*i] += sum1; 1535 y[16*i+1] += sum2; 1536 y[16*i+2] += sum3; 1537 y[16*i+3] += sum4; 1538 y[16*i+4] += sum5; 1539 y[16*i+5] += sum6; 1540 y[16*i+6] += sum7; 1541 y[16*i+7] += sum8; 1542 y[16*i+8] += sum9; 1543 y[16*i+9] += sum10; 1544 y[16*i+10] += sum11; 1545 y[16*i+11] += sum12; 1546 y[16*i+12] += sum13; 1547 y[16*i+13] += sum14; 1548 y[16*i+14] += sum15; 1549 y[16*i+15] += sum16; 1550 } 1551 1552 PetscLogFlops(32*a->nz); 1553 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1554 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 1555 PetscFunctionReturn(0); 1556 } 1557 1558 #undef __FUNCT__ 1559 #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_16" 1560 int MatMultTransposeAdd_SeqMAIJ_16(Mat A,Vec xx,Vec yy,Vec zz) 1561 { 1562 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1563 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1564 PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8; 1565 PetscScalar alpha9,alpha10,alpha11,alpha12,alpha13,alpha14,alpha15,alpha16; 1566 int ierr,m = b->AIJ->m,n,i,*idx; 1567 1568 PetscFunctionBegin; 1569 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 1570 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1571 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 1572 for (i=0; i<m; i++) { 1573 idx = a->j + a->i[i] ; 1574 v = a->a + a->i[i] ; 1575 n = a->i[i+1] - a->i[i]; 1576 alpha1 = x[16*i]; 1577 alpha2 = x[16*i+1]; 1578 alpha3 = x[16*i+2]; 1579 alpha4 = x[16*i+3]; 1580 alpha5 = x[16*i+4]; 1581 alpha6 = x[16*i+5]; 1582 alpha7 = x[16*i+6]; 1583 alpha8 = x[16*i+7]; 1584 alpha9 = x[16*i+8]; 1585 alpha10 = x[16*i+9]; 1586 alpha11 = x[16*i+10]; 1587 alpha12 = x[16*i+11]; 1588 alpha13 = x[16*i+12]; 1589 alpha14 = x[16*i+13]; 1590 alpha15 = x[16*i+14]; 1591 alpha16 = x[16*i+15]; 1592 while (n-->0) { 1593 y[16*(*idx)] += alpha1*(*v); 1594 y[16*(*idx)+1] += alpha2*(*v); 1595 y[16*(*idx)+2] += alpha3*(*v); 1596 y[16*(*idx)+3] += alpha4*(*v); 1597 y[16*(*idx)+4] += alpha5*(*v); 1598 y[16*(*idx)+5] += alpha6*(*v); 1599 y[16*(*idx)+6] += alpha7*(*v); 1600 y[16*(*idx)+7] += alpha8*(*v); 1601 y[16*(*idx)+8] += alpha9*(*v); 1602 y[16*(*idx)+9] += alpha10*(*v); 1603 y[16*(*idx)+10] += alpha11*(*v); 1604 y[16*(*idx)+11] += alpha12*(*v); 1605 y[16*(*idx)+12] += alpha13*(*v); 1606 y[16*(*idx)+13] += alpha14*(*v); 1607 y[16*(*idx)+14] += alpha15*(*v); 1608 y[16*(*idx)+15] += alpha16*(*v); 1609 idx++; v++; 1610 } 1611 } 1612 PetscLogFlops(32*a->nz); 1613 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1614 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 1615 PetscFunctionReturn(0); 1616 } 1617 1618 /*===================================================================================*/ 1619 #undef __FUNCT__ 1620 #define __FUNCT__ "MatMult_MPIMAIJ_dof" 1621 int MatMult_MPIMAIJ_dof(Mat A,Vec xx,Vec yy) 1622 { 1623 Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data; 1624 int ierr; 1625 PetscFunctionBegin; 1626 1627 /* start the scatter */ 1628 ierr = VecScatterBegin(xx,b->w,INSERT_VALUES,SCATTER_FORWARD,b->ctx);CHKERRQ(ierr); 1629 ierr = (*b->AIJ->ops->mult)(b->AIJ,xx,yy);CHKERRQ(ierr); 1630 ierr = VecScatterEnd(xx,b->w,INSERT_VALUES,SCATTER_FORWARD,b->ctx);CHKERRQ(ierr); 1631 ierr = (*b->OAIJ->ops->multadd)(b->OAIJ,b->w,yy,yy);CHKERRQ(ierr); 1632 PetscFunctionReturn(0); 1633 } 1634 1635 #undef __FUNCT__ 1636 #define __FUNCT__ "MatMultTranspose_MPIMAIJ_dof" 1637 int MatMultTranspose_MPIMAIJ_dof(Mat A,Vec xx,Vec yy) 1638 { 1639 Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data; 1640 int ierr; 1641 PetscFunctionBegin; 1642 ierr = (*b->OAIJ->ops->multtranspose)(b->OAIJ,xx,b->w);CHKERRQ(ierr); 1643 ierr = VecScatterBegin(b->w,yy,ADD_VALUES,SCATTER_REVERSE,b->ctx);CHKERRQ(ierr); 1644 ierr = (*b->AIJ->ops->multtranspose)(b->AIJ,xx,yy);CHKERRQ(ierr); 1645 ierr = VecScatterEnd(b->w,yy,ADD_VALUES,SCATTER_REVERSE,b->ctx);CHKERRQ(ierr); 1646 PetscFunctionReturn(0); 1647 } 1648 1649 #undef __FUNCT__ 1650 #define __FUNCT__ "MatMultAdd_MPIMAIJ_dof" 1651 int MatMultAdd_MPIMAIJ_dof(Mat A,Vec xx,Vec yy,Vec zz) 1652 { 1653 Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data; 1654 int ierr; 1655 PetscFunctionBegin; 1656 1657 /* start the scatter */ 1658 ierr = VecScatterBegin(xx,b->w,INSERT_VALUES,SCATTER_FORWARD,b->ctx);CHKERRQ(ierr); 1659 ierr = (*b->AIJ->ops->multadd)(b->AIJ,xx,yy,zz);CHKERRQ(ierr); 1660 ierr = VecScatterEnd(xx,b->w,INSERT_VALUES,SCATTER_FORWARD,b->ctx);CHKERRQ(ierr); 1661 ierr = (*b->OAIJ->ops->multadd)(b->OAIJ,b->w,yy,zz);CHKERRQ(ierr); 1662 PetscFunctionReturn(0); 1663 } 1664 1665 #undef __FUNCT__ 1666 #define __FUNCT__ "MatMultTransposeAdd_MPIMAIJ_dof" 1667 int MatMultTransposeAdd_MPIMAIJ_dof(Mat A,Vec xx,Vec yy,Vec zz) 1668 { 1669 Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data; 1670 int ierr; 1671 PetscFunctionBegin; 1672 ierr = (*b->OAIJ->ops->multtranspose)(b->OAIJ,xx,b->w);CHKERRQ(ierr); 1673 ierr = VecScatterBegin(b->w,zz,ADD_VALUES,SCATTER_REVERSE,b->ctx);CHKERRQ(ierr); 1674 ierr = (*b->AIJ->ops->multtransposeadd)(b->AIJ,xx,yy,zz);CHKERRQ(ierr); 1675 ierr = VecScatterEnd(b->w,zz,ADD_VALUES,SCATTER_REVERSE,b->ctx);CHKERRQ(ierr); 1676 PetscFunctionReturn(0); 1677 } 1678 1679 /* ---------------------------------------------------------------------------------- */ 1680 /*MC 1681 MatCreateMAIJ - Creates a matrix type providing restriction and interpolation 1682 operations for multicomponent problems. It interpolates each component the same 1683 way independently. The matrix type is based on MATSEQAIJ for sequential matrices, 1684 and MATMPIAIJ for distributed matrices. 1685 1686 Operations provided: 1687 . MatMult 1688 . MatMultTranspose 1689 . MatMultAdd 1690 . MatMultTransposeAdd 1691 1692 Level: advanced 1693 1694 M*/ 1695 #undef __FUNCT__ 1696 #define __FUNCT__ "MatCreateMAIJ" 1697 int MatCreateMAIJ(Mat A,int dof,Mat *maij) 1698 { 1699 int ierr,size,n; 1700 Mat_MPIMAIJ *b; 1701 Mat B; 1702 1703 PetscFunctionBegin; 1704 ierr = PetscObjectReference((PetscObject)A);CHKERRQ(ierr); 1705 1706 if (dof == 1) { 1707 *maij = A; 1708 } else { 1709 ierr = MatCreate(A->comm,dof*A->m,dof*A->n,dof*A->M,dof*A->N,&B);CHKERRQ(ierr); 1710 B->assembled = PETSC_TRUE; 1711 1712 ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr); 1713 if (size == 1) { 1714 ierr = MatSetType(B,MATSEQMAIJ);CHKERRQ(ierr); 1715 B->ops->destroy = MatDestroy_SeqMAIJ; 1716 b = (Mat_MPIMAIJ*)B->data; 1717 b->dof = dof; 1718 b->AIJ = A; 1719 if (dof == 2) { 1720 B->ops->mult = MatMult_SeqMAIJ_2; 1721 B->ops->multadd = MatMultAdd_SeqMAIJ_2; 1722 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_2; 1723 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_2; 1724 } else if (dof == 3) { 1725 B->ops->mult = MatMult_SeqMAIJ_3; 1726 B->ops->multadd = MatMultAdd_SeqMAIJ_3; 1727 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_3; 1728 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_3; 1729 } else if (dof == 4) { 1730 B->ops->mult = MatMult_SeqMAIJ_4; 1731 B->ops->multadd = MatMultAdd_SeqMAIJ_4; 1732 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_4; 1733 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_4; 1734 } else if (dof == 5) { 1735 B->ops->mult = MatMult_SeqMAIJ_5; 1736 B->ops->multadd = MatMultAdd_SeqMAIJ_5; 1737 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_5; 1738 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_5; 1739 } else if (dof == 6) { 1740 B->ops->mult = MatMult_SeqMAIJ_6; 1741 B->ops->multadd = MatMultAdd_SeqMAIJ_6; 1742 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_6; 1743 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_6; 1744 } else if (dof == 8) { 1745 B->ops->mult = MatMult_SeqMAIJ_8; 1746 B->ops->multadd = MatMultAdd_SeqMAIJ_8; 1747 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_8; 1748 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_8; 1749 } else if (dof == 9) { 1750 B->ops->mult = MatMult_SeqMAIJ_9; 1751 B->ops->multadd = MatMultAdd_SeqMAIJ_9; 1752 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_9; 1753 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_9; 1754 } else if (dof == 16) { 1755 B->ops->mult = MatMult_SeqMAIJ_16; 1756 B->ops->multadd = MatMultAdd_SeqMAIJ_16; 1757 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_16; 1758 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_16; 1759 } else { 1760 SETERRQ1(1,"Cannot handle a dof of %d. Send request for code to petsc-maint@mcs.anl.gov\n",dof); 1761 } 1762 } else { 1763 Mat_MPIAIJ *mpiaij = (Mat_MPIAIJ *)A->data; 1764 IS from,to; 1765 Vec gvec; 1766 int *garray,i; 1767 1768 ierr = MatSetType(B,MATMPIMAIJ);CHKERRQ(ierr); 1769 B->ops->destroy = MatDestroy_MPIMAIJ; 1770 b = (Mat_MPIMAIJ*)B->data; 1771 b->dof = dof; 1772 b->A = A; 1773 ierr = MatCreateMAIJ(mpiaij->A,dof,&b->AIJ);CHKERRQ(ierr); 1774 ierr = MatCreateMAIJ(mpiaij->B,dof,&b->OAIJ);CHKERRQ(ierr); 1775 1776 ierr = VecGetSize(mpiaij->lvec,&n);CHKERRQ(ierr); 1777 ierr = VecCreateSeq(PETSC_COMM_SELF,n*dof,&b->w);CHKERRQ(ierr); 1778 1779 /* create two temporary Index sets for build scatter gather */ 1780 ierr = PetscMalloc((n+1)*sizeof(int),&garray);CHKERRQ(ierr); 1781 for (i=0; i<n; i++) garray[i] = dof*mpiaij->garray[i]; 1782 ierr = ISCreateBlock(A->comm,dof,n,garray,&from);CHKERRQ(ierr); 1783 ierr = PetscFree(garray);CHKERRQ(ierr); 1784 ierr = ISCreateStride(PETSC_COMM_SELF,n*dof,0,1,&to);CHKERRQ(ierr); 1785 1786 /* create temporary global vector to generate scatter context */ 1787 ierr = VecCreateMPI(A->comm,dof*A->n,dof*A->N,&gvec);CHKERRQ(ierr); 1788 1789 /* generate the scatter context */ 1790 ierr = VecScatterCreate(gvec,from,b->w,to,&b->ctx);CHKERRQ(ierr); 1791 1792 ierr = ISDestroy(from);CHKERRQ(ierr); 1793 ierr = ISDestroy(to);CHKERRQ(ierr); 1794 ierr = VecDestroy(gvec);CHKERRQ(ierr); 1795 1796 B->ops->mult = MatMult_MPIMAIJ_dof; 1797 B->ops->multtranspose = MatMultTranspose_MPIMAIJ_dof; 1798 B->ops->multadd = MatMultAdd_MPIMAIJ_dof; 1799 B->ops->multtransposeadd = MatMultTransposeAdd_MPIMAIJ_dof; 1800 } 1801 *maij = B; 1802 } 1803 PetscFunctionReturn(0); 1804 } 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817