1 /*$Id: maij.c,v 1.19 2001/08/07 03:03:00 balay Exp $*/ 2 /* 3 Defines the basic matrix operations for the MAIJ matrix storage format. 4 This format is used for restriction and interpolation operations for 5 multicomponent problems. It interpolates each component the same way 6 independently. 7 8 We provide: 9 MatMult() 10 MatMultTranspose() 11 MatMultTransposeAdd() 12 MatMultAdd() 13 and 14 MatCreateMAIJ(Mat,dof,Mat*) 15 16 This single directory handles both the sequential and parallel codes 17 */ 18 19 #include "src/mat/impls/maij/maij.h" 20 #include "src/vec/vecimpl.h" 21 22 #undef __FUNCT__ 23 #define __FUNCT__ "MatMAIJGetAIJ" 24 int MatMAIJGetAIJ(Mat A,Mat *B) 25 { 26 int ierr; 27 PetscTruth ismpimaij,isseqmaij; 28 29 PetscFunctionBegin; 30 ierr = PetscTypeCompare((PetscObject)A,MATMPIMAIJ,&ismpimaij);CHKERRQ(ierr); 31 ierr = PetscTypeCompare((PetscObject)A,MATSEQMAIJ,&isseqmaij);CHKERRQ(ierr); 32 if (ismpimaij) { 33 Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data; 34 35 *B = b->A; 36 } else if (isseqmaij) { 37 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 38 39 *B = b->AIJ; 40 } else { 41 *B = A; 42 } 43 PetscFunctionReturn(0); 44 } 45 46 #undef __FUNCT__ 47 #define __FUNCT__ "MatMAIJRedimension" 48 int MatMAIJRedimension(Mat A,int dof,Mat *B) 49 { 50 int ierr; 51 Mat Aij; 52 53 PetscFunctionBegin; 54 ierr = MatMAIJGetAIJ(A,&Aij);CHKERRQ(ierr); 55 ierr = MatCreateMAIJ(Aij,dof,B);CHKERRQ(ierr); 56 PetscFunctionReturn(0); 57 } 58 59 #undef __FUNCT__ 60 #define __FUNCT__ "MatDestroy_SeqMAIJ" 61 int MatDestroy_SeqMAIJ(Mat A) 62 { 63 int ierr; 64 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 65 66 PetscFunctionBegin; 67 if (b->AIJ) { 68 ierr = MatDestroy(b->AIJ);CHKERRQ(ierr); 69 } 70 ierr = PetscFree(b);CHKERRQ(ierr); 71 PetscFunctionReturn(0); 72 } 73 74 #undef __FUNCT__ 75 #define __FUNCT__ "MatDestroy_MPIMAIJ" 76 int MatDestroy_MPIMAIJ(Mat A) 77 { 78 int ierr; 79 Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data; 80 81 PetscFunctionBegin; 82 if (b->AIJ) { 83 ierr = MatDestroy(b->AIJ);CHKERRQ(ierr); 84 } 85 if (b->OAIJ) { 86 ierr = MatDestroy(b->OAIJ);CHKERRQ(ierr); 87 } 88 if (b->A) { 89 ierr = MatDestroy(b->A);CHKERRQ(ierr); 90 } 91 if (b->ctx) { 92 ierr = VecScatterDestroy(b->ctx);CHKERRQ(ierr); 93 } 94 if (b->w) { 95 ierr = VecDestroy(b->w);CHKERRQ(ierr); 96 } 97 ierr = PetscFree(b);CHKERRQ(ierr); 98 PetscFunctionReturn(0); 99 } 100 101 /*MC 102 MATMAIJ - MATMAIJ = "maij" - A matrix type to be used for restriction and interpolation operations for 103 multicomponent problems, interpolating or restricting each component the same way independently. 104 The matrix type is based on MATSEQAIJ for sequential matrices, and MATMPIAIJ for distributed matrices. 105 106 Operations provided: 107 . MatMult 108 . MatMultTranspose 109 . MatMultAdd 110 . MatMultTransposeAdd 111 112 Level: advanced 113 114 .seealso: MatCreateSeqDense 115 M*/ 116 117 EXTERN_C_BEGIN 118 #undef __FUNCT__ 119 #define __FUNCT__ "MatCreate_MAIJ" 120 int MatCreate_MAIJ(Mat A) 121 { 122 int ierr; 123 Mat_MPIMAIJ *b; 124 125 PetscFunctionBegin; 126 ierr = PetscNew(Mat_MPIMAIJ,&b);CHKERRQ(ierr); 127 A->data = (void*)b; 128 ierr = PetscMemzero(b,sizeof(Mat_MPIMAIJ));CHKERRQ(ierr); 129 ierr = PetscMemzero(A->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 130 A->factor = 0; 131 A->mapping = 0; 132 133 b->AIJ = 0; 134 b->dof = 0; 135 b->OAIJ = 0; 136 b->ctx = 0; 137 b->w = 0; 138 PetscFunctionReturn(0); 139 } 140 EXTERN_C_END 141 142 /* --------------------------------------------------------------------------------------*/ 143 #undef __FUNCT__ 144 #define __FUNCT__ "MatMult_SeqMAIJ_2" 145 int MatMult_SeqMAIJ_2(Mat A,Vec xx,Vec yy) 146 { 147 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 148 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 149 PetscScalar *x,*y,*v,sum1, sum2; 150 int ierr,m = b->AIJ->m,*idx,*ii; 151 int n,i,jrow,j; 152 153 PetscFunctionBegin; 154 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 155 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 156 idx = a->j; 157 v = a->a; 158 ii = a->i; 159 160 for (i=0; i<m; i++) { 161 jrow = ii[i]; 162 n = ii[i+1] - jrow; 163 sum1 = 0.0; 164 sum2 = 0.0; 165 for (j=0; j<n; j++) { 166 sum1 += v[jrow]*x[2*idx[jrow]]; 167 sum2 += v[jrow]*x[2*idx[jrow]+1]; 168 jrow++; 169 } 170 y[2*i] = sum1; 171 y[2*i+1] = sum2; 172 } 173 174 PetscLogFlops(4*a->nz - 2*m); 175 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 176 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 177 PetscFunctionReturn(0); 178 } 179 180 #undef __FUNCT__ 181 #define __FUNCT__ "MatMultTranspose_SeqMAIJ_2" 182 int MatMultTranspose_SeqMAIJ_2(Mat A,Vec xx,Vec yy) 183 { 184 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 185 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 186 PetscScalar *x,*y,*v,alpha1,alpha2,zero = 0.0; 187 int ierr,m = b->AIJ->m,n,i,*idx; 188 189 PetscFunctionBegin; 190 ierr = VecSet(&zero,yy);CHKERRQ(ierr); 191 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 192 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 193 194 for (i=0; i<m; i++) { 195 idx = a->j + a->i[i] ; 196 v = a->a + a->i[i] ; 197 n = a->i[i+1] - a->i[i]; 198 alpha1 = x[2*i]; 199 alpha2 = x[2*i+1]; 200 while (n-->0) {y[2*(*idx)] += alpha1*(*v); y[2*(*idx)+1] += alpha2*(*v); idx++; v++;} 201 } 202 PetscLogFlops(4*a->nz - 2*b->AIJ->n); 203 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 204 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 205 PetscFunctionReturn(0); 206 } 207 208 #undef __FUNCT__ 209 #define __FUNCT__ "MatMultAdd_SeqMAIJ_2" 210 int MatMultAdd_SeqMAIJ_2(Mat A,Vec xx,Vec yy,Vec zz) 211 { 212 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 213 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 214 PetscScalar *x,*y,*v,sum1, sum2; 215 int ierr,m = b->AIJ->m,*idx,*ii; 216 int n,i,jrow,j; 217 218 PetscFunctionBegin; 219 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 220 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 221 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 222 idx = a->j; 223 v = a->a; 224 ii = a->i; 225 226 for (i=0; i<m; i++) { 227 jrow = ii[i]; 228 n = ii[i+1] - jrow; 229 sum1 = 0.0; 230 sum2 = 0.0; 231 for (j=0; j<n; j++) { 232 sum1 += v[jrow]*x[2*idx[jrow]]; 233 sum2 += v[jrow]*x[2*idx[jrow]+1]; 234 jrow++; 235 } 236 y[2*i] += sum1; 237 y[2*i+1] += sum2; 238 } 239 240 PetscLogFlops(4*a->nz - 2*m); 241 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 242 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 243 PetscFunctionReturn(0); 244 } 245 #undef __FUNCT__ 246 #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_2" 247 int MatMultTransposeAdd_SeqMAIJ_2(Mat A,Vec xx,Vec yy,Vec zz) 248 { 249 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 250 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 251 PetscScalar *x,*y,*v,alpha1,alpha2; 252 int ierr,m = b->AIJ->m,n,i,*idx; 253 254 PetscFunctionBegin; 255 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 256 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 257 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 258 259 for (i=0; i<m; i++) { 260 idx = a->j + a->i[i] ; 261 v = a->a + a->i[i] ; 262 n = a->i[i+1] - a->i[i]; 263 alpha1 = x[2*i]; 264 alpha2 = x[2*i+1]; 265 while (n-->0) {y[2*(*idx)] += alpha1*(*v); y[2*(*idx)+1] += alpha2*(*v); idx++; v++;} 266 } 267 PetscLogFlops(4*a->nz - 2*b->AIJ->n); 268 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 269 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 270 PetscFunctionReturn(0); 271 } 272 /* --------------------------------------------------------------------------------------*/ 273 #undef __FUNCT__ 274 #define __FUNCT__ "MatMult_SeqMAIJ_3" 275 int MatMult_SeqMAIJ_3(Mat A,Vec xx,Vec yy) 276 { 277 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 278 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 279 PetscScalar *x,*y,*v,sum1, sum2, sum3; 280 int ierr,m = b->AIJ->m,*idx,*ii; 281 int n,i,jrow,j; 282 283 PetscFunctionBegin; 284 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 285 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 286 idx = a->j; 287 v = a->a; 288 ii = a->i; 289 290 for (i=0; i<m; i++) { 291 jrow = ii[i]; 292 n = ii[i+1] - jrow; 293 sum1 = 0.0; 294 sum2 = 0.0; 295 sum3 = 0.0; 296 for (j=0; j<n; j++) { 297 sum1 += v[jrow]*x[3*idx[jrow]]; 298 sum2 += v[jrow]*x[3*idx[jrow]+1]; 299 sum3 += v[jrow]*x[3*idx[jrow]+2]; 300 jrow++; 301 } 302 y[3*i] = sum1; 303 y[3*i+1] = sum2; 304 y[3*i+2] = sum3; 305 } 306 307 PetscLogFlops(6*a->nz - 3*m); 308 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 309 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 310 PetscFunctionReturn(0); 311 } 312 313 #undef __FUNCT__ 314 #define __FUNCT__ "MatMultTranspose_SeqMAIJ_3" 315 int MatMultTranspose_SeqMAIJ_3(Mat A,Vec xx,Vec yy) 316 { 317 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 318 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 319 PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,zero = 0.0; 320 int ierr,m = b->AIJ->m,n,i,*idx; 321 322 PetscFunctionBegin; 323 ierr = VecSet(&zero,yy);CHKERRQ(ierr); 324 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 325 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 326 327 for (i=0; i<m; i++) { 328 idx = a->j + a->i[i]; 329 v = a->a + a->i[i]; 330 n = a->i[i+1] - a->i[i]; 331 alpha1 = x[3*i]; 332 alpha2 = x[3*i+1]; 333 alpha3 = x[3*i+2]; 334 while (n-->0) { 335 y[3*(*idx)] += alpha1*(*v); 336 y[3*(*idx)+1] += alpha2*(*v); 337 y[3*(*idx)+2] += alpha3*(*v); 338 idx++; v++; 339 } 340 } 341 PetscLogFlops(6*a->nz - 3*b->AIJ->n); 342 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 343 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 344 PetscFunctionReturn(0); 345 } 346 347 #undef __FUNCT__ 348 #define __FUNCT__ "MatMultAdd_SeqMAIJ_3" 349 int MatMultAdd_SeqMAIJ_3(Mat A,Vec xx,Vec yy,Vec zz) 350 { 351 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 352 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 353 PetscScalar *x,*y,*v,sum1, sum2, sum3; 354 int ierr,m = b->AIJ->m,*idx,*ii; 355 int n,i,jrow,j; 356 357 PetscFunctionBegin; 358 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 359 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 360 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 361 idx = a->j; 362 v = a->a; 363 ii = a->i; 364 365 for (i=0; i<m; i++) { 366 jrow = ii[i]; 367 n = ii[i+1] - jrow; 368 sum1 = 0.0; 369 sum2 = 0.0; 370 sum3 = 0.0; 371 for (j=0; j<n; j++) { 372 sum1 += v[jrow]*x[3*idx[jrow]]; 373 sum2 += v[jrow]*x[3*idx[jrow]+1]; 374 sum3 += v[jrow]*x[3*idx[jrow]+2]; 375 jrow++; 376 } 377 y[3*i] += sum1; 378 y[3*i+1] += sum2; 379 y[3*i+2] += sum3; 380 } 381 382 PetscLogFlops(6*a->nz); 383 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 384 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 385 PetscFunctionReturn(0); 386 } 387 #undef __FUNCT__ 388 #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_3" 389 int MatMultTransposeAdd_SeqMAIJ_3(Mat A,Vec xx,Vec yy,Vec zz) 390 { 391 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 392 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 393 PetscScalar *x,*y,*v,alpha1,alpha2,alpha3; 394 int ierr,m = b->AIJ->m,n,i,*idx; 395 396 PetscFunctionBegin; 397 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 398 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 399 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 400 for (i=0; i<m; i++) { 401 idx = a->j + a->i[i] ; 402 v = a->a + a->i[i] ; 403 n = a->i[i+1] - a->i[i]; 404 alpha1 = x[3*i]; 405 alpha2 = x[3*i+1]; 406 alpha3 = x[3*i+2]; 407 while (n-->0) { 408 y[3*(*idx)] += alpha1*(*v); 409 y[3*(*idx)+1] += alpha2*(*v); 410 y[3*(*idx)+2] += alpha3*(*v); 411 idx++; v++; 412 } 413 } 414 PetscLogFlops(6*a->nz); 415 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 416 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 417 PetscFunctionReturn(0); 418 } 419 420 /* ------------------------------------------------------------------------------*/ 421 #undef __FUNCT__ 422 #define __FUNCT__ "MatMult_SeqMAIJ_4" 423 int MatMult_SeqMAIJ_4(Mat A,Vec xx,Vec yy) 424 { 425 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 426 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 427 PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4; 428 int ierr,m = b->AIJ->m,*idx,*ii; 429 int n,i,jrow,j; 430 431 PetscFunctionBegin; 432 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 433 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 434 idx = a->j; 435 v = a->a; 436 ii = a->i; 437 438 for (i=0; i<m; i++) { 439 jrow = ii[i]; 440 n = ii[i+1] - jrow; 441 sum1 = 0.0; 442 sum2 = 0.0; 443 sum3 = 0.0; 444 sum4 = 0.0; 445 for (j=0; j<n; j++) { 446 sum1 += v[jrow]*x[4*idx[jrow]]; 447 sum2 += v[jrow]*x[4*idx[jrow]+1]; 448 sum3 += v[jrow]*x[4*idx[jrow]+2]; 449 sum4 += v[jrow]*x[4*idx[jrow]+3]; 450 jrow++; 451 } 452 y[4*i] = sum1; 453 y[4*i+1] = sum2; 454 y[4*i+2] = sum3; 455 y[4*i+3] = sum4; 456 } 457 458 PetscLogFlops(8*a->nz - 4*m); 459 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 460 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 461 PetscFunctionReturn(0); 462 } 463 464 #undef __FUNCT__ 465 #define __FUNCT__ "MatMultTranspose_SeqMAIJ_4" 466 int MatMultTranspose_SeqMAIJ_4(Mat A,Vec xx,Vec yy) 467 { 468 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 469 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 470 PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4,zero = 0.0; 471 int ierr,m = b->AIJ->m,n,i,*idx; 472 473 PetscFunctionBegin; 474 ierr = VecSet(&zero,yy);CHKERRQ(ierr); 475 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 476 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 477 for (i=0; i<m; i++) { 478 idx = a->j + a->i[i] ; 479 v = a->a + a->i[i] ; 480 n = a->i[i+1] - a->i[i]; 481 alpha1 = x[4*i]; 482 alpha2 = x[4*i+1]; 483 alpha3 = x[4*i+2]; 484 alpha4 = x[4*i+3]; 485 while (n-->0) { 486 y[4*(*idx)] += alpha1*(*v); 487 y[4*(*idx)+1] += alpha2*(*v); 488 y[4*(*idx)+2] += alpha3*(*v); 489 y[4*(*idx)+3] += alpha4*(*v); 490 idx++; v++; 491 } 492 } 493 PetscLogFlops(8*a->nz - 4*b->AIJ->n); 494 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 495 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 496 PetscFunctionReturn(0); 497 } 498 499 #undef __FUNCT__ 500 #define __FUNCT__ "MatMultAdd_SeqMAIJ_4" 501 int MatMultAdd_SeqMAIJ_4(Mat A,Vec xx,Vec yy,Vec zz) 502 { 503 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 504 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 505 PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4; 506 int ierr,m = b->AIJ->m,*idx,*ii; 507 int n,i,jrow,j; 508 509 PetscFunctionBegin; 510 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 511 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 512 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 513 idx = a->j; 514 v = a->a; 515 ii = a->i; 516 517 for (i=0; i<m; i++) { 518 jrow = ii[i]; 519 n = ii[i+1] - jrow; 520 sum1 = 0.0; 521 sum2 = 0.0; 522 sum3 = 0.0; 523 sum4 = 0.0; 524 for (j=0; j<n; j++) { 525 sum1 += v[jrow]*x[4*idx[jrow]]; 526 sum2 += v[jrow]*x[4*idx[jrow]+1]; 527 sum3 += v[jrow]*x[4*idx[jrow]+2]; 528 sum4 += v[jrow]*x[4*idx[jrow]+3]; 529 jrow++; 530 } 531 y[4*i] += sum1; 532 y[4*i+1] += sum2; 533 y[4*i+2] += sum3; 534 y[4*i+3] += sum4; 535 } 536 537 PetscLogFlops(8*a->nz - 4*m); 538 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 539 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 540 PetscFunctionReturn(0); 541 } 542 #undef __FUNCT__ 543 #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_4" 544 int MatMultTransposeAdd_SeqMAIJ_4(Mat A,Vec xx,Vec yy,Vec zz) 545 { 546 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 547 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 548 PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4; 549 int ierr,m = b->AIJ->m,n,i,*idx; 550 551 PetscFunctionBegin; 552 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 553 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 554 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 555 556 for (i=0; i<m; i++) { 557 idx = a->j + a->i[i] ; 558 v = a->a + a->i[i] ; 559 n = a->i[i+1] - a->i[i]; 560 alpha1 = x[4*i]; 561 alpha2 = x[4*i+1]; 562 alpha3 = x[4*i+2]; 563 alpha4 = x[4*i+3]; 564 while (n-->0) { 565 y[4*(*idx)] += alpha1*(*v); 566 y[4*(*idx)+1] += alpha2*(*v); 567 y[4*(*idx)+2] += alpha3*(*v); 568 y[4*(*idx)+3] += alpha4*(*v); 569 idx++; v++; 570 } 571 } 572 PetscLogFlops(8*a->nz - 4*b->AIJ->n); 573 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 574 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 575 PetscFunctionReturn(0); 576 } 577 /* ------------------------------------------------------------------------------*/ 578 579 #undef __FUNCT__ 580 #define __FUNCT__ "MatMult_SeqMAIJ_5" 581 int MatMult_SeqMAIJ_5(Mat A,Vec xx,Vec yy) 582 { 583 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 584 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 585 PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4, sum5; 586 int ierr,m = b->AIJ->m,*idx,*ii; 587 int n,i,jrow,j; 588 589 PetscFunctionBegin; 590 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 591 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 592 idx = a->j; 593 v = a->a; 594 ii = a->i; 595 596 for (i=0; i<m; i++) { 597 jrow = ii[i]; 598 n = ii[i+1] - jrow; 599 sum1 = 0.0; 600 sum2 = 0.0; 601 sum3 = 0.0; 602 sum4 = 0.0; 603 sum5 = 0.0; 604 for (j=0; j<n; j++) { 605 sum1 += v[jrow]*x[5*idx[jrow]]; 606 sum2 += v[jrow]*x[5*idx[jrow]+1]; 607 sum3 += v[jrow]*x[5*idx[jrow]+2]; 608 sum4 += v[jrow]*x[5*idx[jrow]+3]; 609 sum5 += v[jrow]*x[5*idx[jrow]+4]; 610 jrow++; 611 } 612 y[5*i] = sum1; 613 y[5*i+1] = sum2; 614 y[5*i+2] = sum3; 615 y[5*i+3] = sum4; 616 y[5*i+4] = sum5; 617 } 618 619 PetscLogFlops(10*a->nz - 5*m); 620 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 621 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 622 PetscFunctionReturn(0); 623 } 624 625 #undef __FUNCT__ 626 #define __FUNCT__ "MatMultTranspose_SeqMAIJ_5" 627 int MatMultTranspose_SeqMAIJ_5(Mat A,Vec xx,Vec yy) 628 { 629 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 630 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 631 PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,zero = 0.0; 632 int ierr,m = b->AIJ->m,n,i,*idx; 633 634 PetscFunctionBegin; 635 ierr = VecSet(&zero,yy);CHKERRQ(ierr); 636 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 637 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 638 639 for (i=0; i<m; i++) { 640 idx = a->j + a->i[i] ; 641 v = a->a + a->i[i] ; 642 n = a->i[i+1] - a->i[i]; 643 alpha1 = x[5*i]; 644 alpha2 = x[5*i+1]; 645 alpha3 = x[5*i+2]; 646 alpha4 = x[5*i+3]; 647 alpha5 = x[5*i+4]; 648 while (n-->0) { 649 y[5*(*idx)] += alpha1*(*v); 650 y[5*(*idx)+1] += alpha2*(*v); 651 y[5*(*idx)+2] += alpha3*(*v); 652 y[5*(*idx)+3] += alpha4*(*v); 653 y[5*(*idx)+4] += alpha5*(*v); 654 idx++; v++; 655 } 656 } 657 PetscLogFlops(10*a->nz - 5*b->AIJ->n); 658 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 659 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 660 PetscFunctionReturn(0); 661 } 662 663 #undef __FUNCT__ 664 #define __FUNCT__ "MatMultAdd_SeqMAIJ_5" 665 int MatMultAdd_SeqMAIJ_5(Mat A,Vec xx,Vec yy,Vec zz) 666 { 667 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 668 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 669 PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4, sum5; 670 int ierr,m = b->AIJ->m,*idx,*ii; 671 int n,i,jrow,j; 672 673 PetscFunctionBegin; 674 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 675 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 676 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 677 idx = a->j; 678 v = a->a; 679 ii = a->i; 680 681 for (i=0; i<m; i++) { 682 jrow = ii[i]; 683 n = ii[i+1] - jrow; 684 sum1 = 0.0; 685 sum2 = 0.0; 686 sum3 = 0.0; 687 sum4 = 0.0; 688 sum5 = 0.0; 689 for (j=0; j<n; j++) { 690 sum1 += v[jrow]*x[5*idx[jrow]]; 691 sum2 += v[jrow]*x[5*idx[jrow]+1]; 692 sum3 += v[jrow]*x[5*idx[jrow]+2]; 693 sum4 += v[jrow]*x[5*idx[jrow]+3]; 694 sum5 += v[jrow]*x[5*idx[jrow]+4]; 695 jrow++; 696 } 697 y[5*i] += sum1; 698 y[5*i+1] += sum2; 699 y[5*i+2] += sum3; 700 y[5*i+3] += sum4; 701 y[5*i+4] += sum5; 702 } 703 704 PetscLogFlops(10*a->nz); 705 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 706 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 707 PetscFunctionReturn(0); 708 } 709 710 #undef __FUNCT__ 711 #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_5" 712 int MatMultTransposeAdd_SeqMAIJ_5(Mat A,Vec xx,Vec yy,Vec zz) 713 { 714 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 715 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 716 PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5; 717 int ierr,m = b->AIJ->m,n,i,*idx; 718 719 PetscFunctionBegin; 720 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 721 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 722 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 723 724 for (i=0; i<m; i++) { 725 idx = a->j + a->i[i] ; 726 v = a->a + a->i[i] ; 727 n = a->i[i+1] - a->i[i]; 728 alpha1 = x[5*i]; 729 alpha2 = x[5*i+1]; 730 alpha3 = x[5*i+2]; 731 alpha4 = x[5*i+3]; 732 alpha5 = x[5*i+4]; 733 while (n-->0) { 734 y[5*(*idx)] += alpha1*(*v); 735 y[5*(*idx)+1] += alpha2*(*v); 736 y[5*(*idx)+2] += alpha3*(*v); 737 y[5*(*idx)+3] += alpha4*(*v); 738 y[5*(*idx)+4] += alpha5*(*v); 739 idx++; v++; 740 } 741 } 742 PetscLogFlops(10*a->nz); 743 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 744 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 745 PetscFunctionReturn(0); 746 } 747 748 /* ------------------------------------------------------------------------------*/ 749 #undef __FUNCT__ 750 #define __FUNCT__ "MatMult_SeqMAIJ_6" 751 int MatMult_SeqMAIJ_6(Mat A,Vec xx,Vec yy) 752 { 753 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 754 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 755 PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6; 756 int ierr,m = b->AIJ->m,*idx,*ii; 757 int n,i,jrow,j; 758 759 PetscFunctionBegin; 760 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 761 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 762 idx = a->j; 763 v = a->a; 764 ii = a->i; 765 766 for (i=0; i<m; i++) { 767 jrow = ii[i]; 768 n = ii[i+1] - jrow; 769 sum1 = 0.0; 770 sum2 = 0.0; 771 sum3 = 0.0; 772 sum4 = 0.0; 773 sum5 = 0.0; 774 sum6 = 0.0; 775 for (j=0; j<n; j++) { 776 sum1 += v[jrow]*x[6*idx[jrow]]; 777 sum2 += v[jrow]*x[6*idx[jrow]+1]; 778 sum3 += v[jrow]*x[6*idx[jrow]+2]; 779 sum4 += v[jrow]*x[6*idx[jrow]+3]; 780 sum5 += v[jrow]*x[6*idx[jrow]+4]; 781 sum6 += v[jrow]*x[6*idx[jrow]+5]; 782 jrow++; 783 } 784 y[6*i] = sum1; 785 y[6*i+1] = sum2; 786 y[6*i+2] = sum3; 787 y[6*i+3] = sum4; 788 y[6*i+4] = sum5; 789 y[6*i+5] = sum6; 790 } 791 792 PetscLogFlops(12*a->nz - 6*m); 793 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 794 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 795 PetscFunctionReturn(0); 796 } 797 798 #undef __FUNCT__ 799 #define __FUNCT__ "MatMultTranspose_SeqMAIJ_6" 800 int MatMultTranspose_SeqMAIJ_6(Mat A,Vec xx,Vec yy) 801 { 802 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 803 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 804 PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,zero = 0.0; 805 int ierr,m = b->AIJ->m,n,i,*idx; 806 807 PetscFunctionBegin; 808 ierr = VecSet(&zero,yy);CHKERRQ(ierr); 809 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 810 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 811 812 for (i=0; i<m; i++) { 813 idx = a->j + a->i[i] ; 814 v = a->a + a->i[i] ; 815 n = a->i[i+1] - a->i[i]; 816 alpha1 = x[6*i]; 817 alpha2 = x[6*i+1]; 818 alpha3 = x[6*i+2]; 819 alpha4 = x[6*i+3]; 820 alpha5 = x[6*i+4]; 821 alpha6 = x[6*i+5]; 822 while (n-->0) { 823 y[6*(*idx)] += alpha1*(*v); 824 y[6*(*idx)+1] += alpha2*(*v); 825 y[6*(*idx)+2] += alpha3*(*v); 826 y[6*(*idx)+3] += alpha4*(*v); 827 y[6*(*idx)+4] += alpha5*(*v); 828 y[6*(*idx)+5] += alpha6*(*v); 829 idx++; v++; 830 } 831 } 832 PetscLogFlops(12*a->nz - 6*b->AIJ->n); 833 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 834 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 835 PetscFunctionReturn(0); 836 } 837 838 #undef __FUNCT__ 839 #define __FUNCT__ "MatMultAdd_SeqMAIJ_6" 840 int MatMultAdd_SeqMAIJ_6(Mat A,Vec xx,Vec yy,Vec zz) 841 { 842 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 843 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 844 PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6; 845 int ierr,m = b->AIJ->m,*idx,*ii; 846 int n,i,jrow,j; 847 848 PetscFunctionBegin; 849 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 850 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 851 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 852 idx = a->j; 853 v = a->a; 854 ii = a->i; 855 856 for (i=0; i<m; i++) { 857 jrow = ii[i]; 858 n = ii[i+1] - jrow; 859 sum1 = 0.0; 860 sum2 = 0.0; 861 sum3 = 0.0; 862 sum4 = 0.0; 863 sum5 = 0.0; 864 sum6 = 0.0; 865 for (j=0; j<n; j++) { 866 sum1 += v[jrow]*x[6*idx[jrow]]; 867 sum2 += v[jrow]*x[6*idx[jrow]+1]; 868 sum3 += v[jrow]*x[6*idx[jrow]+2]; 869 sum4 += v[jrow]*x[6*idx[jrow]+3]; 870 sum5 += v[jrow]*x[6*idx[jrow]+4]; 871 sum6 += v[jrow]*x[6*idx[jrow]+5]; 872 jrow++; 873 } 874 y[6*i] += sum1; 875 y[6*i+1] += sum2; 876 y[6*i+2] += sum3; 877 y[6*i+3] += sum4; 878 y[6*i+4] += sum5; 879 y[6*i+5] += sum6; 880 } 881 882 PetscLogFlops(12*a->nz); 883 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 884 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 885 PetscFunctionReturn(0); 886 } 887 888 #undef __FUNCT__ 889 #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_6" 890 int MatMultTransposeAdd_SeqMAIJ_6(Mat A,Vec xx,Vec yy,Vec zz) 891 { 892 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 893 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 894 PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6; 895 int ierr,m = b->AIJ->m,n,i,*idx; 896 897 PetscFunctionBegin; 898 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 899 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 900 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 901 902 for (i=0; i<m; i++) { 903 idx = a->j + a->i[i] ; 904 v = a->a + a->i[i] ; 905 n = a->i[i+1] - a->i[i]; 906 alpha1 = x[6*i]; 907 alpha2 = x[6*i+1]; 908 alpha3 = x[6*i+2]; 909 alpha4 = x[6*i+3]; 910 alpha5 = x[6*i+4]; 911 alpha6 = x[6*i+5]; 912 while (n-->0) { 913 y[6*(*idx)] += alpha1*(*v); 914 y[6*(*idx)+1] += alpha2*(*v); 915 y[6*(*idx)+2] += alpha3*(*v); 916 y[6*(*idx)+3] += alpha4*(*v); 917 y[6*(*idx)+4] += alpha5*(*v); 918 y[6*(*idx)+5] += alpha6*(*v); 919 idx++; v++; 920 } 921 } 922 PetscLogFlops(12*a->nz); 923 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 924 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 925 PetscFunctionReturn(0); 926 } 927 928 /* ------------------------------------------------------------------------------*/ 929 #undef __FUNCT__ 930 #define __FUNCT__ "MatMult_SeqMAIJ_8" 931 int MatMult_SeqMAIJ_8(Mat A,Vec xx,Vec yy) 932 { 933 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 934 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 935 PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8; 936 int ierr,m = b->AIJ->m,*idx,*ii; 937 int n,i,jrow,j; 938 939 PetscFunctionBegin; 940 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 941 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 942 idx = a->j; 943 v = a->a; 944 ii = a->i; 945 946 for (i=0; i<m; i++) { 947 jrow = ii[i]; 948 n = ii[i+1] - jrow; 949 sum1 = 0.0; 950 sum2 = 0.0; 951 sum3 = 0.0; 952 sum4 = 0.0; 953 sum5 = 0.0; 954 sum6 = 0.0; 955 sum7 = 0.0; 956 sum8 = 0.0; 957 for (j=0; j<n; j++) { 958 sum1 += v[jrow]*x[8*idx[jrow]]; 959 sum2 += v[jrow]*x[8*idx[jrow]+1]; 960 sum3 += v[jrow]*x[8*idx[jrow]+2]; 961 sum4 += v[jrow]*x[8*idx[jrow]+3]; 962 sum5 += v[jrow]*x[8*idx[jrow]+4]; 963 sum6 += v[jrow]*x[8*idx[jrow]+5]; 964 sum7 += v[jrow]*x[8*idx[jrow]+6]; 965 sum8 += v[jrow]*x[8*idx[jrow]+7]; 966 jrow++; 967 } 968 y[8*i] = sum1; 969 y[8*i+1] = sum2; 970 y[8*i+2] = sum3; 971 y[8*i+3] = sum4; 972 y[8*i+4] = sum5; 973 y[8*i+5] = sum6; 974 y[8*i+6] = sum7; 975 y[8*i+7] = sum8; 976 } 977 978 PetscLogFlops(16*a->nz - 8*m); 979 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 980 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 981 PetscFunctionReturn(0); 982 } 983 984 #undef __FUNCT__ 985 #define __FUNCT__ "MatMultTranspose_SeqMAIJ_8" 986 int MatMultTranspose_SeqMAIJ_8(Mat A,Vec xx,Vec yy) 987 { 988 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 989 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 990 PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,zero = 0.0; 991 int ierr,m = b->AIJ->m,n,i,*idx; 992 993 PetscFunctionBegin; 994 ierr = VecSet(&zero,yy);CHKERRQ(ierr); 995 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 996 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 997 998 for (i=0; i<m; i++) { 999 idx = a->j + a->i[i] ; 1000 v = a->a + a->i[i] ; 1001 n = a->i[i+1] - a->i[i]; 1002 alpha1 = x[8*i]; 1003 alpha2 = x[8*i+1]; 1004 alpha3 = x[8*i+2]; 1005 alpha4 = x[8*i+3]; 1006 alpha5 = x[8*i+4]; 1007 alpha6 = x[8*i+5]; 1008 alpha7 = x[8*i+6]; 1009 alpha8 = x[8*i+7]; 1010 while (n-->0) { 1011 y[8*(*idx)] += alpha1*(*v); 1012 y[8*(*idx)+1] += alpha2*(*v); 1013 y[8*(*idx)+2] += alpha3*(*v); 1014 y[8*(*idx)+3] += alpha4*(*v); 1015 y[8*(*idx)+4] += alpha5*(*v); 1016 y[8*(*idx)+5] += alpha6*(*v); 1017 y[8*(*idx)+6] += alpha7*(*v); 1018 y[8*(*idx)+7] += alpha8*(*v); 1019 idx++; v++; 1020 } 1021 } 1022 PetscLogFlops(16*a->nz - 8*b->AIJ->n); 1023 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1024 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 1025 PetscFunctionReturn(0); 1026 } 1027 1028 #undef __FUNCT__ 1029 #define __FUNCT__ "MatMultAdd_SeqMAIJ_8" 1030 int MatMultAdd_SeqMAIJ_8(Mat A,Vec xx,Vec yy,Vec zz) 1031 { 1032 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1033 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1034 PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8; 1035 int ierr,m = b->AIJ->m,*idx,*ii; 1036 int n,i,jrow,j; 1037 1038 PetscFunctionBegin; 1039 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 1040 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1041 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 1042 idx = a->j; 1043 v = a->a; 1044 ii = a->i; 1045 1046 for (i=0; i<m; i++) { 1047 jrow = ii[i]; 1048 n = ii[i+1] - jrow; 1049 sum1 = 0.0; 1050 sum2 = 0.0; 1051 sum3 = 0.0; 1052 sum4 = 0.0; 1053 sum5 = 0.0; 1054 sum6 = 0.0; 1055 sum7 = 0.0; 1056 sum8 = 0.0; 1057 for (j=0; j<n; j++) { 1058 sum1 += v[jrow]*x[8*idx[jrow]]; 1059 sum2 += v[jrow]*x[8*idx[jrow]+1]; 1060 sum3 += v[jrow]*x[8*idx[jrow]+2]; 1061 sum4 += v[jrow]*x[8*idx[jrow]+3]; 1062 sum5 += v[jrow]*x[8*idx[jrow]+4]; 1063 sum6 += v[jrow]*x[8*idx[jrow]+5]; 1064 sum7 += v[jrow]*x[8*idx[jrow]+6]; 1065 sum8 += v[jrow]*x[8*idx[jrow]+7]; 1066 jrow++; 1067 } 1068 y[8*i] += sum1; 1069 y[8*i+1] += sum2; 1070 y[8*i+2] += sum3; 1071 y[8*i+3] += sum4; 1072 y[8*i+4] += sum5; 1073 y[8*i+5] += sum6; 1074 y[8*i+6] += sum7; 1075 y[8*i+7] += sum8; 1076 } 1077 1078 PetscLogFlops(16*a->nz); 1079 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1080 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 1081 PetscFunctionReturn(0); 1082 } 1083 1084 #undef __FUNCT__ 1085 #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_8" 1086 int MatMultTransposeAdd_SeqMAIJ_8(Mat A,Vec xx,Vec yy,Vec zz) 1087 { 1088 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1089 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1090 PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8; 1091 int ierr,m = b->AIJ->m,n,i,*idx; 1092 1093 PetscFunctionBegin; 1094 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 1095 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1096 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 1097 for (i=0; i<m; i++) { 1098 idx = a->j + a->i[i] ; 1099 v = a->a + a->i[i] ; 1100 n = a->i[i+1] - a->i[i]; 1101 alpha1 = x[8*i]; 1102 alpha2 = x[8*i+1]; 1103 alpha3 = x[8*i+2]; 1104 alpha4 = x[8*i+3]; 1105 alpha5 = x[8*i+4]; 1106 alpha6 = x[8*i+5]; 1107 alpha7 = x[8*i+6]; 1108 alpha8 = x[8*i+7]; 1109 while (n-->0) { 1110 y[8*(*idx)] += alpha1*(*v); 1111 y[8*(*idx)+1] += alpha2*(*v); 1112 y[8*(*idx)+2] += alpha3*(*v); 1113 y[8*(*idx)+3] += alpha4*(*v); 1114 y[8*(*idx)+4] += alpha5*(*v); 1115 y[8*(*idx)+5] += alpha6*(*v); 1116 y[8*(*idx)+6] += alpha7*(*v); 1117 y[8*(*idx)+7] += alpha8*(*v); 1118 idx++; v++; 1119 } 1120 } 1121 PetscLogFlops(16*a->nz); 1122 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1123 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 1124 PetscFunctionReturn(0); 1125 } 1126 1127 /* ------------------------------------------------------------------------------*/ 1128 #undef __FUNCT__ 1129 #define __FUNCT__ "MatMult_SeqMAIJ_9" 1130 int MatMult_SeqMAIJ_9(Mat A,Vec xx,Vec yy) 1131 { 1132 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1133 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1134 PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9; 1135 int ierr,m = b->AIJ->m,*idx,*ii; 1136 int n,i,jrow,j; 1137 1138 PetscFunctionBegin; 1139 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1140 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 1141 idx = a->j; 1142 v = a->a; 1143 ii = a->i; 1144 1145 for (i=0; i<m; i++) { 1146 jrow = ii[i]; 1147 n = ii[i+1] - jrow; 1148 sum1 = 0.0; 1149 sum2 = 0.0; 1150 sum3 = 0.0; 1151 sum4 = 0.0; 1152 sum5 = 0.0; 1153 sum6 = 0.0; 1154 sum7 = 0.0; 1155 sum8 = 0.0; 1156 sum9 = 0.0; 1157 for (j=0; j<n; j++) { 1158 sum1 += v[jrow]*x[9*idx[jrow]]; 1159 sum2 += v[jrow]*x[9*idx[jrow]+1]; 1160 sum3 += v[jrow]*x[9*idx[jrow]+2]; 1161 sum4 += v[jrow]*x[9*idx[jrow]+3]; 1162 sum5 += v[jrow]*x[9*idx[jrow]+4]; 1163 sum6 += v[jrow]*x[9*idx[jrow]+5]; 1164 sum7 += v[jrow]*x[9*idx[jrow]+6]; 1165 sum8 += v[jrow]*x[9*idx[jrow]+7]; 1166 sum9 += v[jrow]*x[9*idx[jrow]+8]; 1167 jrow++; 1168 } 1169 y[9*i] = sum1; 1170 y[9*i+1] = sum2; 1171 y[9*i+2] = sum3; 1172 y[9*i+3] = sum4; 1173 y[9*i+4] = sum5; 1174 y[9*i+5] = sum6; 1175 y[9*i+6] = sum7; 1176 y[9*i+7] = sum8; 1177 y[9*i+8] = sum9; 1178 } 1179 1180 PetscLogFlops(18*a->nz - 9*m); 1181 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1182 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 1183 PetscFunctionReturn(0); 1184 } 1185 1186 #undef __FUNCT__ 1187 #define __FUNCT__ "MatMultTranspose_SeqMAIJ_9" 1188 int MatMultTranspose_SeqMAIJ_9(Mat A,Vec xx,Vec yy) 1189 { 1190 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1191 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1192 PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9,zero = 0.0; 1193 int ierr,m = b->AIJ->m,n,i,*idx; 1194 1195 PetscFunctionBegin; 1196 ierr = VecSet(&zero,yy);CHKERRQ(ierr); 1197 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1198 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 1199 1200 for (i=0; i<m; i++) { 1201 idx = a->j + a->i[i] ; 1202 v = a->a + a->i[i] ; 1203 n = a->i[i+1] - a->i[i]; 1204 alpha1 = x[9*i]; 1205 alpha2 = x[9*i+1]; 1206 alpha3 = x[9*i+2]; 1207 alpha4 = x[9*i+3]; 1208 alpha5 = x[9*i+4]; 1209 alpha6 = x[9*i+5]; 1210 alpha7 = x[9*i+6]; 1211 alpha8 = x[9*i+7]; 1212 alpha9 = x[9*i+8]; 1213 while (n-->0) { 1214 y[9*(*idx)] += alpha1*(*v); 1215 y[9*(*idx)+1] += alpha2*(*v); 1216 y[9*(*idx)+2] += alpha3*(*v); 1217 y[9*(*idx)+3] += alpha4*(*v); 1218 y[9*(*idx)+4] += alpha5*(*v); 1219 y[9*(*idx)+5] += alpha6*(*v); 1220 y[9*(*idx)+6] += alpha7*(*v); 1221 y[9*(*idx)+7] += alpha8*(*v); 1222 y[9*(*idx)+8] += alpha9*(*v); 1223 idx++; v++; 1224 } 1225 } 1226 PetscLogFlops(18*a->nz - 9*b->AIJ->n); 1227 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1228 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 1229 PetscFunctionReturn(0); 1230 } 1231 1232 #undef __FUNCT__ 1233 #define __FUNCT__ "MatMultAdd_SeqMAIJ_9" 1234 int MatMultAdd_SeqMAIJ_9(Mat A,Vec xx,Vec yy,Vec zz) 1235 { 1236 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1237 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1238 PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9; 1239 int ierr,m = b->AIJ->m,*idx,*ii; 1240 int n,i,jrow,j; 1241 1242 PetscFunctionBegin; 1243 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 1244 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1245 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 1246 idx = a->j; 1247 v = a->a; 1248 ii = a->i; 1249 1250 for (i=0; i<m; i++) { 1251 jrow = ii[i]; 1252 n = ii[i+1] - jrow; 1253 sum1 = 0.0; 1254 sum2 = 0.0; 1255 sum3 = 0.0; 1256 sum4 = 0.0; 1257 sum5 = 0.0; 1258 sum6 = 0.0; 1259 sum7 = 0.0; 1260 sum8 = 0.0; 1261 sum9 = 0.0; 1262 for (j=0; j<n; j++) { 1263 sum1 += v[jrow]*x[9*idx[jrow]]; 1264 sum2 += v[jrow]*x[9*idx[jrow]+1]; 1265 sum3 += v[jrow]*x[9*idx[jrow]+2]; 1266 sum4 += v[jrow]*x[9*idx[jrow]+3]; 1267 sum5 += v[jrow]*x[9*idx[jrow]+4]; 1268 sum6 += v[jrow]*x[9*idx[jrow]+5]; 1269 sum7 += v[jrow]*x[9*idx[jrow]+6]; 1270 sum8 += v[jrow]*x[9*idx[jrow]+7]; 1271 sum9 += v[jrow]*x[9*idx[jrow]+8]; 1272 jrow++; 1273 } 1274 y[9*i] += sum1; 1275 y[9*i+1] += sum2; 1276 y[9*i+2] += sum3; 1277 y[9*i+3] += sum4; 1278 y[9*i+4] += sum5; 1279 y[9*i+5] += sum6; 1280 y[9*i+6] += sum7; 1281 y[9*i+7] += sum8; 1282 y[9*i+8] += sum9; 1283 } 1284 1285 PetscLogFlops(18*a->nz); 1286 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1287 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 1288 PetscFunctionReturn(0); 1289 } 1290 1291 #undef __FUNCT__ 1292 #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_9" 1293 int MatMultTransposeAdd_SeqMAIJ_9(Mat A,Vec xx,Vec yy,Vec zz) 1294 { 1295 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1296 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1297 PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9; 1298 int ierr,m = b->AIJ->m,n,i,*idx; 1299 1300 PetscFunctionBegin; 1301 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 1302 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1303 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 1304 for (i=0; i<m; i++) { 1305 idx = a->j + a->i[i] ; 1306 v = a->a + a->i[i] ; 1307 n = a->i[i+1] - a->i[i]; 1308 alpha1 = x[9*i]; 1309 alpha2 = x[9*i+1]; 1310 alpha3 = x[9*i+2]; 1311 alpha4 = x[9*i+3]; 1312 alpha5 = x[9*i+4]; 1313 alpha6 = x[9*i+5]; 1314 alpha7 = x[9*i+6]; 1315 alpha8 = x[9*i+7]; 1316 alpha9 = x[9*i+8]; 1317 while (n-->0) { 1318 y[9*(*idx)] += alpha1*(*v); 1319 y[9*(*idx)+1] += alpha2*(*v); 1320 y[9*(*idx)+2] += alpha3*(*v); 1321 y[9*(*idx)+3] += alpha4*(*v); 1322 y[9*(*idx)+4] += alpha5*(*v); 1323 y[9*(*idx)+5] += alpha6*(*v); 1324 y[9*(*idx)+6] += alpha7*(*v); 1325 y[9*(*idx)+7] += alpha8*(*v); 1326 y[9*(*idx)+8] += alpha9*(*v); 1327 idx++; v++; 1328 } 1329 } 1330 PetscLogFlops(18*a->nz); 1331 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1332 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 1333 PetscFunctionReturn(0); 1334 } 1335 1336 /*--------------------------------------------------------------------------------------------*/ 1337 #undef __FUNCT__ 1338 #define __FUNCT__ "MatMult_SeqMAIJ_16" 1339 int MatMult_SeqMAIJ_16(Mat A,Vec xx,Vec yy) 1340 { 1341 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1342 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1343 PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8; 1344 PetscScalar sum9, sum10, sum11, sum12, sum13, sum14, sum15, sum16; 1345 int ierr,m = b->AIJ->m,*idx,*ii; 1346 int n,i,jrow,j; 1347 1348 PetscFunctionBegin; 1349 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1350 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 1351 idx = a->j; 1352 v = a->a; 1353 ii = a->i; 1354 1355 for (i=0; i<m; i++) { 1356 jrow = ii[i]; 1357 n = ii[i+1] - jrow; 1358 sum1 = 0.0; 1359 sum2 = 0.0; 1360 sum3 = 0.0; 1361 sum4 = 0.0; 1362 sum5 = 0.0; 1363 sum6 = 0.0; 1364 sum7 = 0.0; 1365 sum8 = 0.0; 1366 sum9 = 0.0; 1367 sum10 = 0.0; 1368 sum11 = 0.0; 1369 sum12 = 0.0; 1370 sum13 = 0.0; 1371 sum14 = 0.0; 1372 sum15 = 0.0; 1373 sum16 = 0.0; 1374 for (j=0; j<n; j++) { 1375 sum1 += v[jrow]*x[16*idx[jrow]]; 1376 sum2 += v[jrow]*x[16*idx[jrow]+1]; 1377 sum3 += v[jrow]*x[16*idx[jrow]+2]; 1378 sum4 += v[jrow]*x[16*idx[jrow]+3]; 1379 sum5 += v[jrow]*x[16*idx[jrow]+4]; 1380 sum6 += v[jrow]*x[16*idx[jrow]+5]; 1381 sum7 += v[jrow]*x[16*idx[jrow]+6]; 1382 sum8 += v[jrow]*x[16*idx[jrow]+7]; 1383 sum9 += v[jrow]*x[16*idx[jrow]+8]; 1384 sum10 += v[jrow]*x[16*idx[jrow]+9]; 1385 sum11 += v[jrow]*x[16*idx[jrow]+10]; 1386 sum12 += v[jrow]*x[16*idx[jrow]+11]; 1387 sum13 += v[jrow]*x[16*idx[jrow]+12]; 1388 sum14 += v[jrow]*x[16*idx[jrow]+13]; 1389 sum15 += v[jrow]*x[16*idx[jrow]+14]; 1390 sum16 += v[jrow]*x[16*idx[jrow]+15]; 1391 jrow++; 1392 } 1393 y[16*i] = sum1; 1394 y[16*i+1] = sum2; 1395 y[16*i+2] = sum3; 1396 y[16*i+3] = sum4; 1397 y[16*i+4] = sum5; 1398 y[16*i+5] = sum6; 1399 y[16*i+6] = sum7; 1400 y[16*i+7] = sum8; 1401 y[16*i+8] = sum9; 1402 y[16*i+9] = sum10; 1403 y[16*i+10] = sum11; 1404 y[16*i+11] = sum12; 1405 y[16*i+12] = sum13; 1406 y[16*i+13] = sum14; 1407 y[16*i+14] = sum15; 1408 y[16*i+15] = sum16; 1409 } 1410 1411 PetscLogFlops(32*a->nz - 16*m); 1412 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1413 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 1414 PetscFunctionReturn(0); 1415 } 1416 1417 #undef __FUNCT__ 1418 #define __FUNCT__ "MatMultTranspose_SeqMAIJ_16" 1419 int MatMultTranspose_SeqMAIJ_16(Mat A,Vec xx,Vec yy) 1420 { 1421 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1422 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1423 PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,zero = 0.0; 1424 PetscScalar alpha9,alpha10,alpha11,alpha12,alpha13,alpha14,alpha15,alpha16; 1425 int ierr,m = b->AIJ->m,n,i,*idx; 1426 1427 PetscFunctionBegin; 1428 ierr = VecSet(&zero,yy);CHKERRQ(ierr); 1429 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1430 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 1431 1432 for (i=0; i<m; i++) { 1433 idx = a->j + a->i[i] ; 1434 v = a->a + a->i[i] ; 1435 n = a->i[i+1] - a->i[i]; 1436 alpha1 = x[16*i]; 1437 alpha2 = x[16*i+1]; 1438 alpha3 = x[16*i+2]; 1439 alpha4 = x[16*i+3]; 1440 alpha5 = x[16*i+4]; 1441 alpha6 = x[16*i+5]; 1442 alpha7 = x[16*i+6]; 1443 alpha8 = x[16*i+7]; 1444 alpha9 = x[16*i+8]; 1445 alpha10 = x[16*i+9]; 1446 alpha11 = x[16*i+10]; 1447 alpha12 = x[16*i+11]; 1448 alpha13 = x[16*i+12]; 1449 alpha14 = x[16*i+13]; 1450 alpha15 = x[16*i+14]; 1451 alpha16 = x[16*i+15]; 1452 while (n-->0) { 1453 y[16*(*idx)] += alpha1*(*v); 1454 y[16*(*idx)+1] += alpha2*(*v); 1455 y[16*(*idx)+2] += alpha3*(*v); 1456 y[16*(*idx)+3] += alpha4*(*v); 1457 y[16*(*idx)+4] += alpha5*(*v); 1458 y[16*(*idx)+5] += alpha6*(*v); 1459 y[16*(*idx)+6] += alpha7*(*v); 1460 y[16*(*idx)+7] += alpha8*(*v); 1461 y[16*(*idx)+8] += alpha9*(*v); 1462 y[16*(*idx)+9] += alpha10*(*v); 1463 y[16*(*idx)+10] += alpha11*(*v); 1464 y[16*(*idx)+11] += alpha12*(*v); 1465 y[16*(*idx)+12] += alpha13*(*v); 1466 y[16*(*idx)+13] += alpha14*(*v); 1467 y[16*(*idx)+14] += alpha15*(*v); 1468 y[16*(*idx)+15] += alpha16*(*v); 1469 idx++; v++; 1470 } 1471 } 1472 PetscLogFlops(32*a->nz - 16*b->AIJ->n); 1473 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1474 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 1475 PetscFunctionReturn(0); 1476 } 1477 1478 #undef __FUNCT__ 1479 #define __FUNCT__ "MatMultAdd_SeqMAIJ_16" 1480 int MatMultAdd_SeqMAIJ_16(Mat A,Vec xx,Vec yy,Vec zz) 1481 { 1482 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1483 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1484 PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8; 1485 PetscScalar sum9, sum10, sum11, sum12, sum13, sum14, sum15, sum16; 1486 int ierr,m = b->AIJ->m,*idx,*ii; 1487 int n,i,jrow,j; 1488 1489 PetscFunctionBegin; 1490 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 1491 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1492 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 1493 idx = a->j; 1494 v = a->a; 1495 ii = a->i; 1496 1497 for (i=0; i<m; i++) { 1498 jrow = ii[i]; 1499 n = ii[i+1] - jrow; 1500 sum1 = 0.0; 1501 sum2 = 0.0; 1502 sum3 = 0.0; 1503 sum4 = 0.0; 1504 sum5 = 0.0; 1505 sum6 = 0.0; 1506 sum7 = 0.0; 1507 sum8 = 0.0; 1508 sum9 = 0.0; 1509 sum10 = 0.0; 1510 sum11 = 0.0; 1511 sum12 = 0.0; 1512 sum13 = 0.0; 1513 sum14 = 0.0; 1514 sum15 = 0.0; 1515 sum16 = 0.0; 1516 for (j=0; j<n; j++) { 1517 sum1 += v[jrow]*x[16*idx[jrow]]; 1518 sum2 += v[jrow]*x[16*idx[jrow]+1]; 1519 sum3 += v[jrow]*x[16*idx[jrow]+2]; 1520 sum4 += v[jrow]*x[16*idx[jrow]+3]; 1521 sum5 += v[jrow]*x[16*idx[jrow]+4]; 1522 sum6 += v[jrow]*x[16*idx[jrow]+5]; 1523 sum7 += v[jrow]*x[16*idx[jrow]+6]; 1524 sum8 += v[jrow]*x[16*idx[jrow]+7]; 1525 sum9 += v[jrow]*x[16*idx[jrow]+8]; 1526 sum10 += v[jrow]*x[16*idx[jrow]+9]; 1527 sum11 += v[jrow]*x[16*idx[jrow]+10]; 1528 sum12 += v[jrow]*x[16*idx[jrow]+11]; 1529 sum13 += v[jrow]*x[16*idx[jrow]+12]; 1530 sum14 += v[jrow]*x[16*idx[jrow]+13]; 1531 sum15 += v[jrow]*x[16*idx[jrow]+14]; 1532 sum16 += v[jrow]*x[16*idx[jrow]+15]; 1533 jrow++; 1534 } 1535 y[16*i] += sum1; 1536 y[16*i+1] += sum2; 1537 y[16*i+2] += sum3; 1538 y[16*i+3] += sum4; 1539 y[16*i+4] += sum5; 1540 y[16*i+5] += sum6; 1541 y[16*i+6] += sum7; 1542 y[16*i+7] += sum8; 1543 y[16*i+8] += sum9; 1544 y[16*i+9] += sum10; 1545 y[16*i+10] += sum11; 1546 y[16*i+11] += sum12; 1547 y[16*i+12] += sum13; 1548 y[16*i+13] += sum14; 1549 y[16*i+14] += sum15; 1550 y[16*i+15] += sum16; 1551 } 1552 1553 PetscLogFlops(32*a->nz); 1554 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1555 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 1556 PetscFunctionReturn(0); 1557 } 1558 1559 #undef __FUNCT__ 1560 #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_16" 1561 int MatMultTransposeAdd_SeqMAIJ_16(Mat A,Vec xx,Vec yy,Vec zz) 1562 { 1563 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1564 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1565 PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8; 1566 PetscScalar alpha9,alpha10,alpha11,alpha12,alpha13,alpha14,alpha15,alpha16; 1567 int ierr,m = b->AIJ->m,n,i,*idx; 1568 1569 PetscFunctionBegin; 1570 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 1571 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1572 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 1573 for (i=0; i<m; i++) { 1574 idx = a->j + a->i[i] ; 1575 v = a->a + a->i[i] ; 1576 n = a->i[i+1] - a->i[i]; 1577 alpha1 = x[16*i]; 1578 alpha2 = x[16*i+1]; 1579 alpha3 = x[16*i+2]; 1580 alpha4 = x[16*i+3]; 1581 alpha5 = x[16*i+4]; 1582 alpha6 = x[16*i+5]; 1583 alpha7 = x[16*i+6]; 1584 alpha8 = x[16*i+7]; 1585 alpha9 = x[16*i+8]; 1586 alpha10 = x[16*i+9]; 1587 alpha11 = x[16*i+10]; 1588 alpha12 = x[16*i+11]; 1589 alpha13 = x[16*i+12]; 1590 alpha14 = x[16*i+13]; 1591 alpha15 = x[16*i+14]; 1592 alpha16 = x[16*i+15]; 1593 while (n-->0) { 1594 y[16*(*idx)] += alpha1*(*v); 1595 y[16*(*idx)+1] += alpha2*(*v); 1596 y[16*(*idx)+2] += alpha3*(*v); 1597 y[16*(*idx)+3] += alpha4*(*v); 1598 y[16*(*idx)+4] += alpha5*(*v); 1599 y[16*(*idx)+5] += alpha6*(*v); 1600 y[16*(*idx)+6] += alpha7*(*v); 1601 y[16*(*idx)+7] += alpha8*(*v); 1602 y[16*(*idx)+8] += alpha9*(*v); 1603 y[16*(*idx)+9] += alpha10*(*v); 1604 y[16*(*idx)+10] += alpha11*(*v); 1605 y[16*(*idx)+11] += alpha12*(*v); 1606 y[16*(*idx)+12] += alpha13*(*v); 1607 y[16*(*idx)+13] += alpha14*(*v); 1608 y[16*(*idx)+14] += alpha15*(*v); 1609 y[16*(*idx)+15] += alpha16*(*v); 1610 idx++; v++; 1611 } 1612 } 1613 PetscLogFlops(32*a->nz); 1614 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1615 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 1616 PetscFunctionReturn(0); 1617 } 1618 1619 /*===================================================================================*/ 1620 #undef __FUNCT__ 1621 #define __FUNCT__ "MatMult_MPIMAIJ_dof" 1622 int MatMult_MPIMAIJ_dof(Mat A,Vec xx,Vec yy) 1623 { 1624 Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data; 1625 int ierr; 1626 PetscFunctionBegin; 1627 1628 /* start the scatter */ 1629 ierr = VecScatterBegin(xx,b->w,INSERT_VALUES,SCATTER_FORWARD,b->ctx);CHKERRQ(ierr); 1630 ierr = (*b->AIJ->ops->mult)(b->AIJ,xx,yy);CHKERRQ(ierr); 1631 ierr = VecScatterEnd(xx,b->w,INSERT_VALUES,SCATTER_FORWARD,b->ctx);CHKERRQ(ierr); 1632 ierr = (*b->OAIJ->ops->multadd)(b->OAIJ,b->w,yy,yy);CHKERRQ(ierr); 1633 PetscFunctionReturn(0); 1634 } 1635 1636 #undef __FUNCT__ 1637 #define __FUNCT__ "MatMultTranspose_MPIMAIJ_dof" 1638 int MatMultTranspose_MPIMAIJ_dof(Mat A,Vec xx,Vec yy) 1639 { 1640 Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data; 1641 int ierr; 1642 PetscFunctionBegin; 1643 ierr = (*b->OAIJ->ops->multtranspose)(b->OAIJ,xx,b->w);CHKERRQ(ierr); 1644 ierr = VecScatterBegin(b->w,yy,ADD_VALUES,SCATTER_REVERSE,b->ctx);CHKERRQ(ierr); 1645 ierr = (*b->AIJ->ops->multtranspose)(b->AIJ,xx,yy);CHKERRQ(ierr); 1646 ierr = VecScatterEnd(b->w,yy,ADD_VALUES,SCATTER_REVERSE,b->ctx);CHKERRQ(ierr); 1647 PetscFunctionReturn(0); 1648 } 1649 1650 #undef __FUNCT__ 1651 #define __FUNCT__ "MatMultAdd_MPIMAIJ_dof" 1652 int MatMultAdd_MPIMAIJ_dof(Mat A,Vec xx,Vec yy,Vec zz) 1653 { 1654 Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data; 1655 int ierr; 1656 PetscFunctionBegin; 1657 1658 /* start the scatter */ 1659 ierr = VecScatterBegin(xx,b->w,INSERT_VALUES,SCATTER_FORWARD,b->ctx);CHKERRQ(ierr); 1660 ierr = (*b->AIJ->ops->multadd)(b->AIJ,xx,yy,zz);CHKERRQ(ierr); 1661 ierr = VecScatterEnd(xx,b->w,INSERT_VALUES,SCATTER_FORWARD,b->ctx);CHKERRQ(ierr); 1662 ierr = (*b->OAIJ->ops->multadd)(b->OAIJ,b->w,yy,zz);CHKERRQ(ierr); 1663 PetscFunctionReturn(0); 1664 } 1665 1666 #undef __FUNCT__ 1667 #define __FUNCT__ "MatMultTransposeAdd_MPIMAIJ_dof" 1668 int MatMultTransposeAdd_MPIMAIJ_dof(Mat A,Vec xx,Vec yy,Vec zz) 1669 { 1670 Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data; 1671 int ierr; 1672 PetscFunctionBegin; 1673 ierr = (*b->OAIJ->ops->multtranspose)(b->OAIJ,xx,b->w);CHKERRQ(ierr); 1674 ierr = VecScatterBegin(b->w,zz,ADD_VALUES,SCATTER_REVERSE,b->ctx);CHKERRQ(ierr); 1675 ierr = (*b->AIJ->ops->multtransposeadd)(b->AIJ,xx,yy,zz);CHKERRQ(ierr); 1676 ierr = VecScatterEnd(b->w,zz,ADD_VALUES,SCATTER_REVERSE,b->ctx);CHKERRQ(ierr); 1677 PetscFunctionReturn(0); 1678 } 1679 1680 /* ---------------------------------------------------------------------------------- */ 1681 /*MC 1682 MatCreateMAIJ - Creates a matrix type providing restriction and interpolation 1683 operations for multicomponent problems. It interpolates each component the same 1684 way independently. The matrix type is based on MATSEQAIJ for sequential matrices, 1685 and MATMPIAIJ for distributed matrices. 1686 1687 Operations provided: 1688 . MatMult 1689 . MatMultTranspose 1690 . MatMultAdd 1691 . MatMultTransposeAdd 1692 1693 Level: advanced 1694 1695 M*/ 1696 #undef __FUNCT__ 1697 #define __FUNCT__ "MatCreateMAIJ" 1698 int MatCreateMAIJ(Mat A,int dof,Mat *maij) 1699 { 1700 int ierr,size,n; 1701 Mat_MPIMAIJ *b; 1702 Mat B; 1703 1704 PetscFunctionBegin; 1705 ierr = PetscObjectReference((PetscObject)A);CHKERRQ(ierr); 1706 1707 if (dof == 1) { 1708 *maij = A; 1709 } else { 1710 ierr = MatCreate(A->comm,dof*A->m,dof*A->n,dof*A->M,dof*A->N,&B);CHKERRQ(ierr); 1711 B->assembled = PETSC_TRUE; 1712 1713 ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr); 1714 if (size == 1) { 1715 ierr = MatSetType(B,MATSEQMAIJ);CHKERRQ(ierr); 1716 B->ops->destroy = MatDestroy_SeqMAIJ; 1717 b = (Mat_MPIMAIJ*)B->data; 1718 b->dof = dof; 1719 b->AIJ = A; 1720 if (dof == 2) { 1721 B->ops->mult = MatMult_SeqMAIJ_2; 1722 B->ops->multadd = MatMultAdd_SeqMAIJ_2; 1723 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_2; 1724 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_2; 1725 } else if (dof == 3) { 1726 B->ops->mult = MatMult_SeqMAIJ_3; 1727 B->ops->multadd = MatMultAdd_SeqMAIJ_3; 1728 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_3; 1729 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_3; 1730 } else if (dof == 4) { 1731 B->ops->mult = MatMult_SeqMAIJ_4; 1732 B->ops->multadd = MatMultAdd_SeqMAIJ_4; 1733 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_4; 1734 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_4; 1735 } else if (dof == 5) { 1736 B->ops->mult = MatMult_SeqMAIJ_5; 1737 B->ops->multadd = MatMultAdd_SeqMAIJ_5; 1738 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_5; 1739 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_5; 1740 } else if (dof == 6) { 1741 B->ops->mult = MatMult_SeqMAIJ_6; 1742 B->ops->multadd = MatMultAdd_SeqMAIJ_6; 1743 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_6; 1744 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_6; 1745 } else if (dof == 8) { 1746 B->ops->mult = MatMult_SeqMAIJ_8; 1747 B->ops->multadd = MatMultAdd_SeqMAIJ_8; 1748 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_8; 1749 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_8; 1750 } else if (dof == 9) { 1751 B->ops->mult = MatMult_SeqMAIJ_9; 1752 B->ops->multadd = MatMultAdd_SeqMAIJ_9; 1753 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_9; 1754 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_9; 1755 } else if (dof == 16) { 1756 B->ops->mult = MatMult_SeqMAIJ_16; 1757 B->ops->multadd = MatMultAdd_SeqMAIJ_16; 1758 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_16; 1759 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_16; 1760 } else { 1761 SETERRQ1(1,"Cannot handle a dof of %d. Send request for code to petsc-maint@mcs.anl.gov\n",dof); 1762 } 1763 } else { 1764 Mat_MPIAIJ *mpiaij = (Mat_MPIAIJ *)A->data; 1765 IS from,to; 1766 Vec gvec; 1767 int *garray,i; 1768 1769 ierr = MatSetType(B,MATMPIMAIJ);CHKERRQ(ierr); 1770 B->ops->destroy = MatDestroy_MPIMAIJ; 1771 b = (Mat_MPIMAIJ*)B->data; 1772 b->dof = dof; 1773 b->A = A; 1774 ierr = MatCreateMAIJ(mpiaij->A,dof,&b->AIJ);CHKERRQ(ierr); 1775 ierr = MatCreateMAIJ(mpiaij->B,dof,&b->OAIJ);CHKERRQ(ierr); 1776 1777 ierr = VecGetSize(mpiaij->lvec,&n);CHKERRQ(ierr); 1778 ierr = VecCreateSeq(PETSC_COMM_SELF,n*dof,&b->w);CHKERRQ(ierr); 1779 1780 /* create two temporary Index sets for build scatter gather */ 1781 ierr = PetscMalloc((n+1)*sizeof(int),&garray);CHKERRQ(ierr); 1782 for (i=0; i<n; i++) garray[i] = dof*mpiaij->garray[i]; 1783 ierr = ISCreateBlock(A->comm,dof,n,garray,&from);CHKERRQ(ierr); 1784 ierr = PetscFree(garray);CHKERRQ(ierr); 1785 ierr = ISCreateStride(PETSC_COMM_SELF,n*dof,0,1,&to);CHKERRQ(ierr); 1786 1787 /* create temporary global vector to generate scatter context */ 1788 ierr = VecCreateMPI(A->comm,dof*A->n,dof*A->N,&gvec);CHKERRQ(ierr); 1789 1790 /* generate the scatter context */ 1791 ierr = VecScatterCreate(gvec,from,b->w,to,&b->ctx);CHKERRQ(ierr); 1792 1793 ierr = ISDestroy(from);CHKERRQ(ierr); 1794 ierr = ISDestroy(to);CHKERRQ(ierr); 1795 ierr = VecDestroy(gvec);CHKERRQ(ierr); 1796 1797 B->ops->mult = MatMult_MPIMAIJ_dof; 1798 B->ops->multtranspose = MatMultTranspose_MPIMAIJ_dof; 1799 B->ops->multadd = MatMultAdd_MPIMAIJ_dof; 1800 B->ops->multtransposeadd = MatMultTransposeAdd_MPIMAIJ_dof; 1801 } 1802 *maij = B; 1803 } 1804 PetscFunctionReturn(0); 1805 } 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818