1 /*$Id: maij.c,v 1.19 2001/08/07 03:03:00 balay Exp $*/ 2 /* 3 Defines the basic matrix operations for the MAIJ matrix storage format. 4 This format is used for restriction and interpolation operations for 5 multicomponent problems. It interpolates each component the same way 6 independently. 7 8 We provide: 9 MatMult() 10 MatMultTranspose() 11 MatMultTransposeAdd() 12 MatMultAdd() 13 and 14 MatCreateMAIJ(Mat,dof,Mat*) 15 16 This single directory handles both the sequential and parallel codes 17 */ 18 19 #include "src/mat/impls/maij/maij.h" 20 #include "src/vec/vecimpl.h" 21 22 #undef __FUNCT__ 23 #define __FUNCT__ "MatMAIJGetAIJ" 24 int MatMAIJGetAIJ(Mat A,Mat *B) 25 { 26 int ierr; 27 PetscTruth ismpimaij,isseqmaij; 28 29 PetscFunctionBegin; 30 ierr = PetscTypeCompare((PetscObject)A,MATMPIMAIJ,&ismpimaij);CHKERRQ(ierr); 31 ierr = PetscTypeCompare((PetscObject)A,MATSEQMAIJ,&isseqmaij);CHKERRQ(ierr); 32 if (ismpimaij) { 33 Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data; 34 35 *B = b->A; 36 } else if (isseqmaij) { 37 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 38 39 *B = b->AIJ; 40 } else { 41 *B = A; 42 } 43 PetscFunctionReturn(0); 44 } 45 46 #undef __FUNCT__ 47 #define __FUNCT__ "MatMAIJRedimension" 48 int MatMAIJRedimension(Mat A,int dof,Mat *B) 49 { 50 int ierr; 51 Mat Aij; 52 53 PetscFunctionBegin; 54 ierr = MatMAIJGetAIJ(A,&Aij);CHKERRQ(ierr); 55 ierr = MatCreateMAIJ(Aij,dof,B);CHKERRQ(ierr); 56 PetscFunctionReturn(0); 57 } 58 59 #undef __FUNCT__ 60 #define __FUNCT__ "MatDestroy_SeqMAIJ" 61 int MatDestroy_SeqMAIJ(Mat A) 62 { 63 int ierr; 64 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 65 66 PetscFunctionBegin; 67 if (b->AIJ) { 68 ierr = MatDestroy(b->AIJ);CHKERRQ(ierr); 69 } 70 ierr = PetscFree(b);CHKERRQ(ierr); 71 PetscFunctionReturn(0); 72 } 73 74 #undef __FUNCT__ 75 #define __FUNCT__ "MatDestroy_MPIMAIJ" 76 int MatDestroy_MPIMAIJ(Mat A) 77 { 78 int ierr; 79 Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data; 80 81 PetscFunctionBegin; 82 if (b->AIJ) { 83 ierr = MatDestroy(b->AIJ);CHKERRQ(ierr); 84 } 85 if (b->OAIJ) { 86 ierr = MatDestroy(b->OAIJ);CHKERRQ(ierr); 87 } 88 if (b->A) { 89 ierr = MatDestroy(b->A);CHKERRQ(ierr); 90 } 91 if (b->ctx) { 92 ierr = VecScatterDestroy(b->ctx);CHKERRQ(ierr); 93 } 94 if (b->w) { 95 ierr = VecDestroy(b->w);CHKERRQ(ierr); 96 } 97 ierr = PetscFree(b);CHKERRQ(ierr); 98 PetscFunctionReturn(0); 99 } 100 101 EXTERN_C_BEGIN 102 #undef __FUNCT__ 103 #define __FUNCT__ "MatCreate_MAIJ" 104 int MatCreate_MAIJ(Mat A) 105 { 106 int ierr; 107 Mat_MPIMAIJ *b; 108 109 PetscFunctionBegin; 110 ierr = PetscNew(Mat_MPIMAIJ,&b);CHKERRQ(ierr); 111 A->data = (void*)b; 112 ierr = PetscMemzero(b,sizeof(Mat_MPIMAIJ));CHKERRQ(ierr); 113 ierr = PetscMemzero(A->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 114 A->factor = 0; 115 A->mapping = 0; 116 117 b->AIJ = 0; 118 b->dof = 0; 119 b->OAIJ = 0; 120 b->ctx = 0; 121 b->w = 0; 122 PetscFunctionReturn(0); 123 } 124 EXTERN_C_END 125 126 /* --------------------------------------------------------------------------------------*/ 127 #undef __FUNCT__ 128 #define __FUNCT__ "MatMult_SeqMAIJ_2" 129 int MatMult_SeqMAIJ_2(Mat A,Vec xx,Vec yy) 130 { 131 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 132 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 133 PetscScalar *x,*y,*v,sum1, sum2; 134 int ierr,m = b->AIJ->m,*idx,shift = a->indexshift,*ii; 135 int n,i,jrow,j; 136 137 PetscFunctionBegin; 138 ierr = VecGetArrayFast(xx,&x);CHKERRQ(ierr); 139 ierr = VecGetArrayFast(yy,&y);CHKERRQ(ierr); 140 x = x + shift; /* shift for Fortran start by 1 indexing */ 141 idx = a->j; 142 v = a->a; 143 ii = a->i; 144 145 v += shift; /* shift for Fortran start by 1 indexing */ 146 idx += shift; 147 for (i=0; i<m; i++) { 148 jrow = ii[i]; 149 n = ii[i+1] - jrow; 150 sum1 = 0.0; 151 sum2 = 0.0; 152 for (j=0; j<n; j++) { 153 sum1 += v[jrow]*x[2*idx[jrow]]; 154 sum2 += v[jrow]*x[2*idx[jrow]+1]; 155 jrow++; 156 } 157 y[2*i] = sum1; 158 y[2*i+1] = sum2; 159 } 160 161 PetscLogFlops(4*a->nz - 2*m); 162 ierr = VecRestoreArrayFast(xx,&x);CHKERRQ(ierr); 163 ierr = VecRestoreArrayFast(yy,&y);CHKERRQ(ierr); 164 PetscFunctionReturn(0); 165 } 166 167 #undef __FUNCT__ 168 #define __FUNCT__ "MatMultTranspose_SeqMAIJ_2" 169 int MatMultTranspose_SeqMAIJ_2(Mat A,Vec xx,Vec yy) 170 { 171 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 172 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 173 PetscScalar *x,*y,*v,alpha1,alpha2,zero = 0.0; 174 int ierr,m = b->AIJ->m,n,i,*idx,shift = a->indexshift; 175 176 PetscFunctionBegin; 177 ierr = VecSet(&zero,yy);CHKERRQ(ierr); 178 ierr = VecGetArrayFast(xx,&x);CHKERRQ(ierr); 179 ierr = VecGetArrayFast(yy,&y);CHKERRQ(ierr); 180 y = y + shift; /* shift for Fortran start by 1 indexing */ 181 for (i=0; i<m; i++) { 182 idx = a->j + a->i[i] + shift; 183 v = a->a + a->i[i] + shift; 184 n = a->i[i+1] - a->i[i]; 185 alpha1 = x[2*i]; 186 alpha2 = x[2*i+1]; 187 while (n-->0) {y[2*(*idx)] += alpha1*(*v); y[2*(*idx)+1] += alpha2*(*v); idx++; v++;} 188 } 189 PetscLogFlops(4*a->nz - 2*b->AIJ->n); 190 ierr = VecRestoreArrayFast(xx,&x);CHKERRQ(ierr); 191 ierr = VecRestoreArrayFast(yy,&y);CHKERRQ(ierr); 192 PetscFunctionReturn(0); 193 } 194 195 #undef __FUNCT__ 196 #define __FUNCT__ "MatMultAdd_SeqMAIJ_2" 197 int MatMultAdd_SeqMAIJ_2(Mat A,Vec xx,Vec yy,Vec zz) 198 { 199 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 200 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 201 PetscScalar *x,*y,*v,sum1, sum2; 202 int ierr,m = b->AIJ->m,*idx,shift = a->indexshift,*ii; 203 int n,i,jrow,j; 204 205 PetscFunctionBegin; 206 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 207 ierr = VecGetArrayFast(xx,&x);CHKERRQ(ierr); 208 ierr = VecGetArrayFast(zz,&y);CHKERRQ(ierr); 209 x = x + shift; /* shift for Fortran start by 1 indexing */ 210 idx = a->j; 211 v = a->a; 212 ii = a->i; 213 214 v += shift; /* shift for Fortran start by 1 indexing */ 215 idx += shift; 216 for (i=0; i<m; i++) { 217 jrow = ii[i]; 218 n = ii[i+1] - jrow; 219 sum1 = 0.0; 220 sum2 = 0.0; 221 for (j=0; j<n; j++) { 222 sum1 += v[jrow]*x[2*idx[jrow]]; 223 sum2 += v[jrow]*x[2*idx[jrow]+1]; 224 jrow++; 225 } 226 y[2*i] += sum1; 227 y[2*i+1] += sum2; 228 } 229 230 PetscLogFlops(4*a->nz - 2*m); 231 ierr = VecRestoreArrayFast(xx,&x);CHKERRQ(ierr); 232 ierr = VecRestoreArrayFast(zz,&y);CHKERRQ(ierr); 233 PetscFunctionReturn(0); 234 } 235 #undef __FUNCT__ 236 #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_2" 237 int MatMultTransposeAdd_SeqMAIJ_2(Mat A,Vec xx,Vec yy,Vec zz) 238 { 239 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 240 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 241 PetscScalar *x,*y,*v,alpha1,alpha2; 242 int ierr,m = b->AIJ->m,n,i,*idx,shift = a->indexshift; 243 244 PetscFunctionBegin; 245 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 246 ierr = VecGetArrayFast(xx,&x);CHKERRQ(ierr); 247 ierr = VecGetArrayFast(zz,&y);CHKERRQ(ierr); 248 y = y + shift; /* shift for Fortran start by 1 indexing */ 249 for (i=0; i<m; i++) { 250 idx = a->j + a->i[i] + shift; 251 v = a->a + a->i[i] + shift; 252 n = a->i[i+1] - a->i[i]; 253 alpha1 = x[2*i]; 254 alpha2 = x[2*i+1]; 255 while (n-->0) {y[2*(*idx)] += alpha1*(*v); y[2*(*idx)+1] += alpha2*(*v); idx++; v++;} 256 } 257 PetscLogFlops(4*a->nz - 2*b->AIJ->n); 258 ierr = VecRestoreArrayFast(xx,&x);CHKERRQ(ierr); 259 ierr = VecRestoreArrayFast(zz,&y);CHKERRQ(ierr); 260 PetscFunctionReturn(0); 261 } 262 /* --------------------------------------------------------------------------------------*/ 263 #undef __FUNCT__ 264 #define __FUNCT__ "MatMult_SeqMAIJ_3" 265 int MatMult_SeqMAIJ_3(Mat A,Vec xx,Vec yy) 266 { 267 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 268 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 269 PetscScalar *x,*y,*v,sum1, sum2, sum3; 270 int ierr,m = b->AIJ->m,*idx,shift = a->indexshift,*ii; 271 int n,i,jrow,j; 272 273 PetscFunctionBegin; 274 ierr = VecGetArrayFast(xx,&x);CHKERRQ(ierr); 275 ierr = VecGetArrayFast(yy,&y);CHKERRQ(ierr); 276 x = x + shift; /* shift for Fortran start by 1 indexing */ 277 idx = a->j; 278 v = a->a; 279 ii = a->i; 280 281 v += shift; /* shift for Fortran start by 1 indexing */ 282 idx += shift; 283 for (i=0; i<m; i++) { 284 jrow = ii[i]; 285 n = ii[i+1] - jrow; 286 sum1 = 0.0; 287 sum2 = 0.0; 288 sum3 = 0.0; 289 for (j=0; j<n; j++) { 290 sum1 += v[jrow]*x[3*idx[jrow]]; 291 sum2 += v[jrow]*x[3*idx[jrow]+1]; 292 sum3 += v[jrow]*x[3*idx[jrow]+2]; 293 jrow++; 294 } 295 y[3*i] = sum1; 296 y[3*i+1] = sum2; 297 y[3*i+2] = sum3; 298 } 299 300 PetscLogFlops(6*a->nz - 3*m); 301 ierr = VecRestoreArrayFast(xx,&x);CHKERRQ(ierr); 302 ierr = VecRestoreArrayFast(yy,&y);CHKERRQ(ierr); 303 PetscFunctionReturn(0); 304 } 305 306 #undef __FUNCT__ 307 #define __FUNCT__ "MatMultTranspose_SeqMAIJ_3" 308 int MatMultTranspose_SeqMAIJ_3(Mat A,Vec xx,Vec yy) 309 { 310 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 311 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 312 PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,zero = 0.0; 313 int ierr,m = b->AIJ->m,n,i,*idx,shift = a->indexshift; 314 315 PetscFunctionBegin; 316 ierr = VecSet(&zero,yy);CHKERRQ(ierr); 317 ierr = VecGetArrayFast(xx,&x);CHKERRQ(ierr); 318 ierr = VecGetArrayFast(yy,&y);CHKERRQ(ierr); 319 y = y + shift; /* shift for Fortran start by 1 indexing */ 320 for (i=0; i<m; i++) { 321 idx = a->j + a->i[i] + shift; 322 v = a->a + a->i[i] + shift; 323 n = a->i[i+1] - a->i[i]; 324 alpha1 = x[3*i]; 325 alpha2 = x[3*i+1]; 326 alpha3 = x[3*i+2]; 327 while (n-->0) { 328 y[3*(*idx)] += alpha1*(*v); 329 y[3*(*idx)+1] += alpha2*(*v); 330 y[3*(*idx)+2] += alpha3*(*v); 331 idx++; v++; 332 } 333 } 334 PetscLogFlops(6*a->nz - 3*b->AIJ->n); 335 ierr = VecRestoreArrayFast(xx,&x);CHKERRQ(ierr); 336 ierr = VecRestoreArrayFast(yy,&y);CHKERRQ(ierr); 337 PetscFunctionReturn(0); 338 } 339 340 #undef __FUNCT__ 341 #define __FUNCT__ "MatMultAdd_SeqMAIJ_3" 342 int MatMultAdd_SeqMAIJ_3(Mat A,Vec xx,Vec yy,Vec zz) 343 { 344 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 345 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 346 PetscScalar *x,*y,*v,sum1, sum2, sum3; 347 int ierr,m = b->AIJ->m,*idx,shift = a->indexshift,*ii; 348 int n,i,jrow,j; 349 350 PetscFunctionBegin; 351 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 352 ierr = VecGetArrayFast(xx,&x);CHKERRQ(ierr); 353 ierr = VecGetArrayFast(zz,&y);CHKERRQ(ierr); 354 x = x + shift; /* shift for Fortran start by 1 indexing */ 355 idx = a->j; 356 v = a->a; 357 ii = a->i; 358 359 v += shift; /* shift for Fortran start by 1 indexing */ 360 idx += shift; 361 for (i=0; i<m; i++) { 362 jrow = ii[i]; 363 n = ii[i+1] - jrow; 364 sum1 = 0.0; 365 sum2 = 0.0; 366 sum3 = 0.0; 367 for (j=0; j<n; j++) { 368 sum1 += v[jrow]*x[3*idx[jrow]]; 369 sum2 += v[jrow]*x[3*idx[jrow]+1]; 370 sum3 += v[jrow]*x[3*idx[jrow]+2]; 371 jrow++; 372 } 373 y[3*i] += sum1; 374 y[3*i+1] += sum2; 375 y[3*i+2] += sum3; 376 } 377 378 PetscLogFlops(6*a->nz); 379 ierr = VecRestoreArrayFast(xx,&x);CHKERRQ(ierr); 380 ierr = VecRestoreArrayFast(zz,&y);CHKERRQ(ierr); 381 PetscFunctionReturn(0); 382 } 383 #undef __FUNCT__ 384 #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_3" 385 int MatMultTransposeAdd_SeqMAIJ_3(Mat A,Vec xx,Vec yy,Vec zz) 386 { 387 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 388 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 389 PetscScalar *x,*y,*v,alpha1,alpha2,alpha3; 390 int ierr,m = b->AIJ->m,n,i,*idx,shift = a->indexshift; 391 392 PetscFunctionBegin; 393 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 394 ierr = VecGetArrayFast(xx,&x);CHKERRQ(ierr); 395 ierr = VecGetArrayFast(zz,&y);CHKERRQ(ierr); 396 y = y + shift; /* shift for Fortran start by 1 indexing */ 397 for (i=0; i<m; i++) { 398 idx = a->j + a->i[i] + shift; 399 v = a->a + a->i[i] + shift; 400 n = a->i[i+1] - a->i[i]; 401 alpha1 = x[3*i]; 402 alpha2 = x[3*i+1]; 403 alpha3 = x[3*i+2]; 404 while (n-->0) { 405 y[3*(*idx)] += alpha1*(*v); 406 y[3*(*idx)+1] += alpha2*(*v); 407 y[3*(*idx)+2] += alpha3*(*v); 408 idx++; v++; 409 } 410 } 411 PetscLogFlops(6*a->nz); 412 ierr = VecRestoreArrayFast(xx,&x);CHKERRQ(ierr); 413 ierr = VecRestoreArrayFast(zz,&y);CHKERRQ(ierr); 414 PetscFunctionReturn(0); 415 } 416 417 /* ------------------------------------------------------------------------------*/ 418 #undef __FUNCT__ 419 #define __FUNCT__ "MatMult_SeqMAIJ_4" 420 int MatMult_SeqMAIJ_4(Mat A,Vec xx,Vec yy) 421 { 422 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 423 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 424 PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4; 425 int ierr,m = b->AIJ->m,*idx,shift = a->indexshift,*ii; 426 int n,i,jrow,j; 427 428 PetscFunctionBegin; 429 ierr = VecGetArrayFast(xx,&x);CHKERRQ(ierr); 430 ierr = VecGetArrayFast(yy,&y);CHKERRQ(ierr); 431 x = x + shift; /* shift for Fortran start by 1 indexing */ 432 idx = a->j; 433 v = a->a; 434 ii = a->i; 435 436 v += shift; /* shift for Fortran start by 1 indexing */ 437 idx += shift; 438 for (i=0; i<m; i++) { 439 jrow = ii[i]; 440 n = ii[i+1] - jrow; 441 sum1 = 0.0; 442 sum2 = 0.0; 443 sum3 = 0.0; 444 sum4 = 0.0; 445 for (j=0; j<n; j++) { 446 sum1 += v[jrow]*x[4*idx[jrow]]; 447 sum2 += v[jrow]*x[4*idx[jrow]+1]; 448 sum3 += v[jrow]*x[4*idx[jrow]+2]; 449 sum4 += v[jrow]*x[4*idx[jrow]+3]; 450 jrow++; 451 } 452 y[4*i] = sum1; 453 y[4*i+1] = sum2; 454 y[4*i+2] = sum3; 455 y[4*i+3] = sum4; 456 } 457 458 PetscLogFlops(8*a->nz - 4*m); 459 ierr = VecRestoreArrayFast(xx,&x);CHKERRQ(ierr); 460 ierr = VecRestoreArrayFast(yy,&y);CHKERRQ(ierr); 461 PetscFunctionReturn(0); 462 } 463 464 #undef __FUNCT__ 465 #define __FUNCT__ "MatMultTranspose_SeqMAIJ_4" 466 int MatMultTranspose_SeqMAIJ_4(Mat A,Vec xx,Vec yy) 467 { 468 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 469 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 470 PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4,zero = 0.0; 471 int ierr,m = b->AIJ->m,n,i,*idx,shift = a->indexshift; 472 473 PetscFunctionBegin; 474 ierr = VecSet(&zero,yy);CHKERRQ(ierr); 475 ierr = VecGetArrayFast(xx,&x);CHKERRQ(ierr); 476 ierr = VecGetArrayFast(yy,&y);CHKERRQ(ierr); 477 y = y + shift; /* shift for Fortran start by 1 indexing */ 478 for (i=0; i<m; i++) { 479 idx = a->j + a->i[i] + shift; 480 v = a->a + a->i[i] + shift; 481 n = a->i[i+1] - a->i[i]; 482 alpha1 = x[4*i]; 483 alpha2 = x[4*i+1]; 484 alpha3 = x[4*i+2]; 485 alpha4 = x[4*i+3]; 486 while (n-->0) { 487 y[4*(*idx)] += alpha1*(*v); 488 y[4*(*idx)+1] += alpha2*(*v); 489 y[4*(*idx)+2] += alpha3*(*v); 490 y[4*(*idx)+3] += alpha4*(*v); 491 idx++; v++; 492 } 493 } 494 PetscLogFlops(8*a->nz - 4*b->AIJ->n); 495 ierr = VecRestoreArrayFast(xx,&x);CHKERRQ(ierr); 496 ierr = VecRestoreArrayFast(yy,&y);CHKERRQ(ierr); 497 PetscFunctionReturn(0); 498 } 499 500 #undef __FUNCT__ 501 #define __FUNCT__ "MatMultAdd_SeqMAIJ_4" 502 int MatMultAdd_SeqMAIJ_4(Mat A,Vec xx,Vec yy,Vec zz) 503 { 504 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 505 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 506 PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4; 507 int ierr,m = b->AIJ->m,*idx,shift = a->indexshift,*ii; 508 int n,i,jrow,j; 509 510 PetscFunctionBegin; 511 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 512 ierr = VecGetArrayFast(xx,&x);CHKERRQ(ierr); 513 ierr = VecGetArrayFast(zz,&y);CHKERRQ(ierr); 514 x = x + shift; /* shift for Fortran start by 1 indexing */ 515 idx = a->j; 516 v = a->a; 517 ii = a->i; 518 519 v += shift; /* shift for Fortran start by 1 indexing */ 520 idx += shift; 521 for (i=0; i<m; i++) { 522 jrow = ii[i]; 523 n = ii[i+1] - jrow; 524 sum1 = 0.0; 525 sum2 = 0.0; 526 sum3 = 0.0; 527 sum4 = 0.0; 528 for (j=0; j<n; j++) { 529 sum1 += v[jrow]*x[4*idx[jrow]]; 530 sum2 += v[jrow]*x[4*idx[jrow]+1]; 531 sum3 += v[jrow]*x[4*idx[jrow]+2]; 532 sum4 += v[jrow]*x[4*idx[jrow]+3]; 533 jrow++; 534 } 535 y[4*i] += sum1; 536 y[4*i+1] += sum2; 537 y[4*i+2] += sum3; 538 y[4*i+3] += sum4; 539 } 540 541 PetscLogFlops(8*a->nz - 4*m); 542 ierr = VecRestoreArrayFast(xx,&x);CHKERRQ(ierr); 543 ierr = VecRestoreArrayFast(zz,&y);CHKERRQ(ierr); 544 PetscFunctionReturn(0); 545 } 546 #undef __FUNCT__ 547 #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_4" 548 int MatMultTransposeAdd_SeqMAIJ_4(Mat A,Vec xx,Vec yy,Vec zz) 549 { 550 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 551 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 552 PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4; 553 int ierr,m = b->AIJ->m,n,i,*idx,shift = a->indexshift; 554 555 PetscFunctionBegin; 556 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 557 ierr = VecGetArrayFast(xx,&x);CHKERRQ(ierr); 558 ierr = VecGetArrayFast(zz,&y);CHKERRQ(ierr); 559 y = y + shift; /* shift for Fortran start by 1 indexing */ 560 for (i=0; i<m; i++) { 561 idx = a->j + a->i[i] + shift; 562 v = a->a + a->i[i] + shift; 563 n = a->i[i+1] - a->i[i]; 564 alpha1 = x[4*i]; 565 alpha2 = x[4*i+1]; 566 alpha3 = x[4*i+2]; 567 alpha4 = x[4*i+3]; 568 while (n-->0) { 569 y[4*(*idx)] += alpha1*(*v); 570 y[4*(*idx)+1] += alpha2*(*v); 571 y[4*(*idx)+2] += alpha3*(*v); 572 y[4*(*idx)+3] += alpha4*(*v); 573 idx++; v++; 574 } 575 } 576 PetscLogFlops(8*a->nz - 4*b->AIJ->n); 577 ierr = VecRestoreArrayFast(xx,&x);CHKERRQ(ierr); 578 ierr = VecRestoreArrayFast(zz,&y);CHKERRQ(ierr); 579 PetscFunctionReturn(0); 580 } 581 /* ------------------------------------------------------------------------------*/ 582 583 #undef __FUNCT__ 584 #define __FUNCT__ "MatMult_SeqMAIJ_5" 585 int MatMult_SeqMAIJ_5(Mat A,Vec xx,Vec yy) 586 { 587 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 588 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 589 PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4, sum5; 590 int ierr,m = b->AIJ->m,*idx,shift = a->indexshift,*ii; 591 int n,i,jrow,j; 592 593 PetscFunctionBegin; 594 ierr = VecGetArrayFast(xx,&x);CHKERRQ(ierr); 595 ierr = VecGetArrayFast(yy,&y);CHKERRQ(ierr); 596 x = x + shift; /* shift for Fortran start by 1 indexing */ 597 idx = a->j; 598 v = a->a; 599 ii = a->i; 600 601 v += shift; /* shift for Fortran start by 1 indexing */ 602 idx += shift; 603 for (i=0; i<m; i++) { 604 jrow = ii[i]; 605 n = ii[i+1] - jrow; 606 sum1 = 0.0; 607 sum2 = 0.0; 608 sum3 = 0.0; 609 sum4 = 0.0; 610 sum5 = 0.0; 611 for (j=0; j<n; j++) { 612 sum1 += v[jrow]*x[5*idx[jrow]]; 613 sum2 += v[jrow]*x[5*idx[jrow]+1]; 614 sum3 += v[jrow]*x[5*idx[jrow]+2]; 615 sum4 += v[jrow]*x[5*idx[jrow]+3]; 616 sum5 += v[jrow]*x[5*idx[jrow]+4]; 617 jrow++; 618 } 619 y[5*i] = sum1; 620 y[5*i+1] = sum2; 621 y[5*i+2] = sum3; 622 y[5*i+3] = sum4; 623 y[5*i+4] = sum5; 624 } 625 626 PetscLogFlops(10*a->nz - 5*m); 627 ierr = VecRestoreArrayFast(xx,&x);CHKERRQ(ierr); 628 ierr = VecRestoreArrayFast(yy,&y);CHKERRQ(ierr); 629 PetscFunctionReturn(0); 630 } 631 632 #undef __FUNCT__ 633 #define __FUNCT__ "MatMultTranspose_SeqMAIJ_5" 634 int MatMultTranspose_SeqMAIJ_5(Mat A,Vec xx,Vec yy) 635 { 636 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 637 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 638 PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,zero = 0.0; 639 int ierr,m = b->AIJ->m,n,i,*idx,shift = a->indexshift; 640 641 PetscFunctionBegin; 642 ierr = VecSet(&zero,yy);CHKERRQ(ierr); 643 ierr = VecGetArrayFast(xx,&x);CHKERRQ(ierr); 644 ierr = VecGetArrayFast(yy,&y);CHKERRQ(ierr); 645 y = y + shift; /* shift for Fortran start by 1 indexing */ 646 for (i=0; i<m; i++) { 647 idx = a->j + a->i[i] + shift; 648 v = a->a + a->i[i] + shift; 649 n = a->i[i+1] - a->i[i]; 650 alpha1 = x[5*i]; 651 alpha2 = x[5*i+1]; 652 alpha3 = x[5*i+2]; 653 alpha4 = x[5*i+3]; 654 alpha5 = x[5*i+4]; 655 while (n-->0) { 656 y[5*(*idx)] += alpha1*(*v); 657 y[5*(*idx)+1] += alpha2*(*v); 658 y[5*(*idx)+2] += alpha3*(*v); 659 y[5*(*idx)+3] += alpha4*(*v); 660 y[5*(*idx)+4] += alpha5*(*v); 661 idx++; v++; 662 } 663 } 664 PetscLogFlops(10*a->nz - 5*b->AIJ->n); 665 ierr = VecRestoreArrayFast(xx,&x);CHKERRQ(ierr); 666 ierr = VecRestoreArrayFast(yy,&y);CHKERRQ(ierr); 667 PetscFunctionReturn(0); 668 } 669 670 #undef __FUNCT__ 671 #define __FUNCT__ "MatMultAdd_SeqMAIJ_5" 672 int MatMultAdd_SeqMAIJ_5(Mat A,Vec xx,Vec yy,Vec zz) 673 { 674 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 675 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 676 PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4, sum5; 677 int ierr,m = b->AIJ->m,*idx,shift = a->indexshift,*ii; 678 int n,i,jrow,j; 679 680 PetscFunctionBegin; 681 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 682 ierr = VecGetArrayFast(xx,&x);CHKERRQ(ierr); 683 ierr = VecGetArrayFast(zz,&y);CHKERRQ(ierr); 684 x = x + shift; /* shift for Fortran start by 1 indexing */ 685 idx = a->j; 686 v = a->a; 687 ii = a->i; 688 689 v += shift; /* shift for Fortran start by 1 indexing */ 690 idx += shift; 691 for (i=0; i<m; i++) { 692 jrow = ii[i]; 693 n = ii[i+1] - jrow; 694 sum1 = 0.0; 695 sum2 = 0.0; 696 sum3 = 0.0; 697 sum4 = 0.0; 698 sum5 = 0.0; 699 for (j=0; j<n; j++) { 700 sum1 += v[jrow]*x[5*idx[jrow]]; 701 sum2 += v[jrow]*x[5*idx[jrow]+1]; 702 sum3 += v[jrow]*x[5*idx[jrow]+2]; 703 sum4 += v[jrow]*x[5*idx[jrow]+3]; 704 sum5 += v[jrow]*x[5*idx[jrow]+4]; 705 jrow++; 706 } 707 y[5*i] += sum1; 708 y[5*i+1] += sum2; 709 y[5*i+2] += sum3; 710 y[5*i+3] += sum4; 711 y[5*i+4] += sum5; 712 } 713 714 PetscLogFlops(10*a->nz); 715 ierr = VecRestoreArrayFast(xx,&x);CHKERRQ(ierr); 716 ierr = VecRestoreArrayFast(zz,&y);CHKERRQ(ierr); 717 PetscFunctionReturn(0); 718 } 719 720 #undef __FUNCT__ 721 #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_5" 722 int MatMultTransposeAdd_SeqMAIJ_5(Mat A,Vec xx,Vec yy,Vec zz) 723 { 724 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 725 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 726 PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5; 727 int ierr,m = b->AIJ->m,n,i,*idx,shift = a->indexshift; 728 729 PetscFunctionBegin; 730 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 731 ierr = VecGetArrayFast(xx,&x);CHKERRQ(ierr); 732 ierr = VecGetArrayFast(zz,&y);CHKERRQ(ierr); 733 y = y + shift; /* shift for Fortran start by 1 indexing */ 734 for (i=0; i<m; i++) { 735 idx = a->j + a->i[i] + shift; 736 v = a->a + a->i[i] + shift; 737 n = a->i[i+1] - a->i[i]; 738 alpha1 = x[5*i]; 739 alpha2 = x[5*i+1]; 740 alpha3 = x[5*i+2]; 741 alpha4 = x[5*i+3]; 742 alpha5 = x[5*i+4]; 743 while (n-->0) { 744 y[5*(*idx)] += alpha1*(*v); 745 y[5*(*idx)+1] += alpha2*(*v); 746 y[5*(*idx)+2] += alpha3*(*v); 747 y[5*(*idx)+3] += alpha4*(*v); 748 y[5*(*idx)+4] += alpha5*(*v); 749 idx++; v++; 750 } 751 } 752 PetscLogFlops(10*a->nz); 753 ierr = VecRestoreArrayFast(xx,&x);CHKERRQ(ierr); 754 ierr = VecRestoreArrayFast(zz,&y);CHKERRQ(ierr); 755 PetscFunctionReturn(0); 756 } 757 758 /* ------------------------------------------------------------------------------*/ 759 #undef __FUNCT__ 760 #define __FUNCT__ "MatMult_SeqMAIJ_6" 761 int MatMult_SeqMAIJ_6(Mat A,Vec xx,Vec yy) 762 { 763 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 764 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 765 PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6; 766 int ierr,m = b->AIJ->m,*idx,shift = a->indexshift,*ii; 767 int n,i,jrow,j; 768 769 PetscFunctionBegin; 770 ierr = VecGetArrayFast(xx,&x);CHKERRQ(ierr); 771 ierr = VecGetArrayFast(yy,&y);CHKERRQ(ierr); 772 x = x + shift; /* shift for Fortran start by 1 indexing */ 773 idx = a->j; 774 v = a->a; 775 ii = a->i; 776 777 v += shift; /* shift for Fortran start by 1 indexing */ 778 idx += shift; 779 for (i=0; i<m; i++) { 780 jrow = ii[i]; 781 n = ii[i+1] - jrow; 782 sum1 = 0.0; 783 sum2 = 0.0; 784 sum3 = 0.0; 785 sum4 = 0.0; 786 sum5 = 0.0; 787 sum6 = 0.0; 788 for (j=0; j<n; j++) { 789 sum1 += v[jrow]*x[6*idx[jrow]]; 790 sum2 += v[jrow]*x[6*idx[jrow]+1]; 791 sum3 += v[jrow]*x[6*idx[jrow]+2]; 792 sum4 += v[jrow]*x[6*idx[jrow]+3]; 793 sum5 += v[jrow]*x[6*idx[jrow]+4]; 794 sum6 += v[jrow]*x[6*idx[jrow]+5]; 795 jrow++; 796 } 797 y[6*i] = sum1; 798 y[6*i+1] = sum2; 799 y[6*i+2] = sum3; 800 y[6*i+3] = sum4; 801 y[6*i+4] = sum5; 802 y[6*i+5] = sum6; 803 } 804 805 PetscLogFlops(12*a->nz - 6*m); 806 ierr = VecRestoreArrayFast(xx,&x);CHKERRQ(ierr); 807 ierr = VecRestoreArrayFast(yy,&y);CHKERRQ(ierr); 808 PetscFunctionReturn(0); 809 } 810 811 #undef __FUNCT__ 812 #define __FUNCT__ "MatMultTranspose_SeqMAIJ_6" 813 int MatMultTranspose_SeqMAIJ_6(Mat A,Vec xx,Vec yy) 814 { 815 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 816 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 817 PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,zero = 0.0; 818 int ierr,m = b->AIJ->m,n,i,*idx,shift = a->indexshift; 819 820 PetscFunctionBegin; 821 ierr = VecSet(&zero,yy);CHKERRQ(ierr); 822 ierr = VecGetArrayFast(xx,&x);CHKERRQ(ierr); 823 ierr = VecGetArrayFast(yy,&y);CHKERRQ(ierr); 824 y = y + shift; /* shift for Fortran start by 1 indexing */ 825 for (i=0; i<m; i++) { 826 idx = a->j + a->i[i] + shift; 827 v = a->a + a->i[i] + shift; 828 n = a->i[i+1] - a->i[i]; 829 alpha1 = x[6*i]; 830 alpha2 = x[6*i+1]; 831 alpha3 = x[6*i+2]; 832 alpha4 = x[6*i+3]; 833 alpha5 = x[6*i+4]; 834 alpha6 = x[6*i+5]; 835 while (n-->0) { 836 y[6*(*idx)] += alpha1*(*v); 837 y[6*(*idx)+1] += alpha2*(*v); 838 y[6*(*idx)+2] += alpha3*(*v); 839 y[6*(*idx)+3] += alpha4*(*v); 840 y[6*(*idx)+4] += alpha5*(*v); 841 y[6*(*idx)+5] += alpha6*(*v); 842 idx++; v++; 843 } 844 } 845 PetscLogFlops(12*a->nz - 6*b->AIJ->n); 846 ierr = VecRestoreArrayFast(xx,&x);CHKERRQ(ierr); 847 ierr = VecRestoreArrayFast(yy,&y);CHKERRQ(ierr); 848 PetscFunctionReturn(0); 849 } 850 851 #undef __FUNCT__ 852 #define __FUNCT__ "MatMultAdd_SeqMAIJ_6" 853 int MatMultAdd_SeqMAIJ_6(Mat A,Vec xx,Vec yy,Vec zz) 854 { 855 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 856 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 857 PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6; 858 int ierr,m = b->AIJ->m,*idx,shift = a->indexshift,*ii; 859 int n,i,jrow,j; 860 861 PetscFunctionBegin; 862 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 863 ierr = VecGetArrayFast(xx,&x);CHKERRQ(ierr); 864 ierr = VecGetArrayFast(zz,&y);CHKERRQ(ierr); 865 x = x + shift; /* shift for Fortran start by 1 indexing */ 866 idx = a->j; 867 v = a->a; 868 ii = a->i; 869 870 v += shift; /* shift for Fortran start by 1 indexing */ 871 idx += shift; 872 for (i=0; i<m; i++) { 873 jrow = ii[i]; 874 n = ii[i+1] - jrow; 875 sum1 = 0.0; 876 sum2 = 0.0; 877 sum3 = 0.0; 878 sum4 = 0.0; 879 sum5 = 0.0; 880 sum6 = 0.0; 881 for (j=0; j<n; j++) { 882 sum1 += v[jrow]*x[6*idx[jrow]]; 883 sum2 += v[jrow]*x[6*idx[jrow]+1]; 884 sum3 += v[jrow]*x[6*idx[jrow]+2]; 885 sum4 += v[jrow]*x[6*idx[jrow]+3]; 886 sum5 += v[jrow]*x[6*idx[jrow]+4]; 887 sum6 += v[jrow]*x[6*idx[jrow]+5]; 888 jrow++; 889 } 890 y[6*i] += sum1; 891 y[6*i+1] += sum2; 892 y[6*i+2] += sum3; 893 y[6*i+3] += sum4; 894 y[6*i+4] += sum5; 895 y[6*i+5] += sum6; 896 } 897 898 PetscLogFlops(12*a->nz); 899 ierr = VecRestoreArrayFast(xx,&x);CHKERRQ(ierr); 900 ierr = VecRestoreArrayFast(zz,&y);CHKERRQ(ierr); 901 PetscFunctionReturn(0); 902 } 903 904 #undef __FUNCT__ 905 #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_6" 906 int MatMultTransposeAdd_SeqMAIJ_6(Mat A,Vec xx,Vec yy,Vec zz) 907 { 908 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 909 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 910 PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6; 911 int ierr,m = b->AIJ->m,n,i,*idx,shift = a->indexshift; 912 913 PetscFunctionBegin; 914 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 915 ierr = VecGetArrayFast(xx,&x);CHKERRQ(ierr); 916 ierr = VecGetArrayFast(zz,&y);CHKERRQ(ierr); 917 y = y + shift; /* shift for Fortran start by 1 indexing */ 918 for (i=0; i<m; i++) { 919 idx = a->j + a->i[i] + shift; 920 v = a->a + a->i[i] + shift; 921 n = a->i[i+1] - a->i[i]; 922 alpha1 = x[6*i]; 923 alpha2 = x[6*i+1]; 924 alpha3 = x[6*i+2]; 925 alpha4 = x[6*i+3]; 926 alpha5 = x[6*i+4]; 927 alpha6 = x[6*i+5]; 928 while (n-->0) { 929 y[6*(*idx)] += alpha1*(*v); 930 y[6*(*idx)+1] += alpha2*(*v); 931 y[6*(*idx)+2] += alpha3*(*v); 932 y[6*(*idx)+3] += alpha4*(*v); 933 y[6*(*idx)+4] += alpha5*(*v); 934 y[6*(*idx)+5] += alpha6*(*v); 935 idx++; v++; 936 } 937 } 938 PetscLogFlops(12*a->nz); 939 ierr = VecRestoreArrayFast(xx,&x);CHKERRQ(ierr); 940 ierr = VecRestoreArrayFast(zz,&y);CHKERRQ(ierr); 941 PetscFunctionReturn(0); 942 } 943 944 /* ------------------------------------------------------------------------------*/ 945 #undef __FUNCT__ 946 #define __FUNCT__ "MatMult_SeqMAIJ_8" 947 int MatMult_SeqMAIJ_8(Mat A,Vec xx,Vec yy) 948 { 949 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 950 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 951 PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8; 952 int ierr,m = b->AIJ->m,*idx,shift = a->indexshift,*ii; 953 int n,i,jrow,j; 954 955 PetscFunctionBegin; 956 ierr = VecGetArrayFast(xx,&x);CHKERRQ(ierr); 957 ierr = VecGetArrayFast(yy,&y);CHKERRQ(ierr); 958 x = x + shift; /* shift for Fortran start by 1 indexing */ 959 idx = a->j; 960 v = a->a; 961 ii = a->i; 962 963 v += shift; /* shift for Fortran start by 1 indexing */ 964 idx += shift; 965 for (i=0; i<m; i++) { 966 jrow = ii[i]; 967 n = ii[i+1] - jrow; 968 sum1 = 0.0; 969 sum2 = 0.0; 970 sum3 = 0.0; 971 sum4 = 0.0; 972 sum5 = 0.0; 973 sum6 = 0.0; 974 sum7 = 0.0; 975 sum8 = 0.0; 976 for (j=0; j<n; j++) { 977 sum1 += v[jrow]*x[8*idx[jrow]]; 978 sum2 += v[jrow]*x[8*idx[jrow]+1]; 979 sum3 += v[jrow]*x[8*idx[jrow]+2]; 980 sum4 += v[jrow]*x[8*idx[jrow]+3]; 981 sum5 += v[jrow]*x[8*idx[jrow]+4]; 982 sum6 += v[jrow]*x[8*idx[jrow]+5]; 983 sum7 += v[jrow]*x[8*idx[jrow]+6]; 984 sum8 += v[jrow]*x[8*idx[jrow]+7]; 985 jrow++; 986 } 987 y[8*i] = sum1; 988 y[8*i+1] = sum2; 989 y[8*i+2] = sum3; 990 y[8*i+3] = sum4; 991 y[8*i+4] = sum5; 992 y[8*i+5] = sum6; 993 y[8*i+6] = sum7; 994 y[8*i+7] = sum8; 995 } 996 997 PetscLogFlops(16*a->nz - 8*m); 998 ierr = VecRestoreArrayFast(xx,&x);CHKERRQ(ierr); 999 ierr = VecRestoreArrayFast(yy,&y);CHKERRQ(ierr); 1000 PetscFunctionReturn(0); 1001 } 1002 1003 #undef __FUNCT__ 1004 #define __FUNCT__ "MatMultTranspose_SeqMAIJ_8" 1005 int MatMultTranspose_SeqMAIJ_8(Mat A,Vec xx,Vec yy) 1006 { 1007 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1008 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1009 PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,zero = 0.0; 1010 int ierr,m = b->AIJ->m,n,i,*idx,shift = a->indexshift; 1011 1012 PetscFunctionBegin; 1013 ierr = VecSet(&zero,yy);CHKERRQ(ierr); 1014 ierr = VecGetArrayFast(xx,&x);CHKERRQ(ierr); 1015 ierr = VecGetArrayFast(yy,&y);CHKERRQ(ierr); 1016 y = y + shift; /* shift for Fortran start by 1 indexing */ 1017 for (i=0; i<m; i++) { 1018 idx = a->j + a->i[i] + shift; 1019 v = a->a + a->i[i] + shift; 1020 n = a->i[i+1] - a->i[i]; 1021 alpha1 = x[8*i]; 1022 alpha2 = x[8*i+1]; 1023 alpha3 = x[8*i+2]; 1024 alpha4 = x[8*i+3]; 1025 alpha5 = x[8*i+4]; 1026 alpha6 = x[8*i+5]; 1027 alpha7 = x[8*i+6]; 1028 alpha8 = x[8*i+7]; 1029 while (n-->0) { 1030 y[8*(*idx)] += alpha1*(*v); 1031 y[8*(*idx)+1] += alpha2*(*v); 1032 y[8*(*idx)+2] += alpha3*(*v); 1033 y[8*(*idx)+3] += alpha4*(*v); 1034 y[8*(*idx)+4] += alpha5*(*v); 1035 y[8*(*idx)+5] += alpha6*(*v); 1036 y[8*(*idx)+6] += alpha7*(*v); 1037 y[8*(*idx)+7] += alpha8*(*v); 1038 idx++; v++; 1039 } 1040 } 1041 PetscLogFlops(16*a->nz - 8*b->AIJ->n); 1042 ierr = VecRestoreArrayFast(xx,&x);CHKERRQ(ierr); 1043 ierr = VecRestoreArrayFast(yy,&y);CHKERRQ(ierr); 1044 PetscFunctionReturn(0); 1045 } 1046 1047 #undef __FUNCT__ 1048 #define __FUNCT__ "MatMultAdd_SeqMAIJ_8" 1049 int MatMultAdd_SeqMAIJ_8(Mat A,Vec xx,Vec yy,Vec zz) 1050 { 1051 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1052 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1053 PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8; 1054 int ierr,m = b->AIJ->m,*idx,shift = a->indexshift,*ii; 1055 int n,i,jrow,j; 1056 1057 PetscFunctionBegin; 1058 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 1059 ierr = VecGetArrayFast(xx,&x);CHKERRQ(ierr); 1060 ierr = VecGetArrayFast(zz,&y);CHKERRQ(ierr); 1061 x = x + shift; /* shift for Fortran start by 1 indexing */ 1062 idx = a->j; 1063 v = a->a; 1064 ii = a->i; 1065 1066 v += shift; /* shift for Fortran start by 1 indexing */ 1067 idx += shift; 1068 for (i=0; i<m; i++) { 1069 jrow = ii[i]; 1070 n = ii[i+1] - jrow; 1071 sum1 = 0.0; 1072 sum2 = 0.0; 1073 sum3 = 0.0; 1074 sum4 = 0.0; 1075 sum5 = 0.0; 1076 sum6 = 0.0; 1077 sum7 = 0.0; 1078 sum8 = 0.0; 1079 for (j=0; j<n; j++) { 1080 sum1 += v[jrow]*x[8*idx[jrow]]; 1081 sum2 += v[jrow]*x[8*idx[jrow]+1]; 1082 sum3 += v[jrow]*x[8*idx[jrow]+2]; 1083 sum4 += v[jrow]*x[8*idx[jrow]+3]; 1084 sum5 += v[jrow]*x[8*idx[jrow]+4]; 1085 sum6 += v[jrow]*x[8*idx[jrow]+5]; 1086 sum7 += v[jrow]*x[8*idx[jrow]+6]; 1087 sum8 += v[jrow]*x[8*idx[jrow]+7]; 1088 jrow++; 1089 } 1090 y[8*i] += sum1; 1091 y[8*i+1] += sum2; 1092 y[8*i+2] += sum3; 1093 y[8*i+3] += sum4; 1094 y[8*i+4] += sum5; 1095 y[8*i+5] += sum6; 1096 y[8*i+6] += sum7; 1097 y[8*i+7] += sum8; 1098 } 1099 1100 PetscLogFlops(16*a->nz); 1101 ierr = VecRestoreArrayFast(xx,&x);CHKERRQ(ierr); 1102 ierr = VecRestoreArrayFast(zz,&y);CHKERRQ(ierr); 1103 PetscFunctionReturn(0); 1104 } 1105 1106 #undef __FUNCT__ 1107 #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_8" 1108 int MatMultTransposeAdd_SeqMAIJ_8(Mat A,Vec xx,Vec yy,Vec zz) 1109 { 1110 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1111 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1112 PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8; 1113 int ierr,m = b->AIJ->m,n,i,*idx,shift = a->indexshift; 1114 1115 PetscFunctionBegin; 1116 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 1117 ierr = VecGetArrayFast(xx,&x);CHKERRQ(ierr); 1118 ierr = VecGetArrayFast(zz,&y);CHKERRQ(ierr); 1119 y = y + shift; /* shift for Fortran start by 1 indexing */ 1120 for (i=0; i<m; i++) { 1121 idx = a->j + a->i[i] + shift; 1122 v = a->a + a->i[i] + shift; 1123 n = a->i[i+1] - a->i[i]; 1124 alpha1 = x[8*i]; 1125 alpha2 = x[8*i+1]; 1126 alpha3 = x[8*i+2]; 1127 alpha4 = x[8*i+3]; 1128 alpha5 = x[8*i+4]; 1129 alpha6 = x[8*i+5]; 1130 alpha7 = x[8*i+6]; 1131 alpha8 = x[8*i+7]; 1132 while (n-->0) { 1133 y[8*(*idx)] += alpha1*(*v); 1134 y[8*(*idx)+1] += alpha2*(*v); 1135 y[8*(*idx)+2] += alpha3*(*v); 1136 y[8*(*idx)+3] += alpha4*(*v); 1137 y[8*(*idx)+4] += alpha5*(*v); 1138 y[8*(*idx)+5] += alpha6*(*v); 1139 y[8*(*idx)+6] += alpha7*(*v); 1140 y[8*(*idx)+7] += alpha8*(*v); 1141 idx++; v++; 1142 } 1143 } 1144 PetscLogFlops(16*a->nz); 1145 ierr = VecRestoreArrayFast(xx,&x);CHKERRQ(ierr); 1146 ierr = VecRestoreArrayFast(zz,&y);CHKERRQ(ierr); 1147 PetscFunctionReturn(0); 1148 } 1149 1150 /*--------------------------------------------------------------------------------------------*/ 1151 #undef __FUNCT__ 1152 #define __FUNCT__ "MatMult_SeqMAIJ_16" 1153 int MatMult_SeqMAIJ_16(Mat A,Vec xx,Vec yy) 1154 { 1155 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1156 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1157 PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8; 1158 PetscScalar sum9, sum10, sum11, sum12, sum13, sum14, sum15, sum16; 1159 int ierr,m = b->AIJ->m,*idx,shift = a->indexshift,*ii; 1160 int n,i,jrow,j; 1161 1162 PetscFunctionBegin; 1163 ierr = VecGetArrayFast(xx,&x);CHKERRQ(ierr); 1164 ierr = VecGetArrayFast(yy,&y);CHKERRQ(ierr); 1165 x = x + shift; /* shift for Fortran start by 1 indexing */ 1166 idx = a->j; 1167 v = a->a; 1168 ii = a->i; 1169 1170 v += shift; /* shift for Fortran start by 1 indexing */ 1171 idx += shift; 1172 for (i=0; i<m; i++) { 1173 jrow = ii[i]; 1174 n = ii[i+1] - jrow; 1175 sum1 = 0.0; 1176 sum2 = 0.0; 1177 sum3 = 0.0; 1178 sum4 = 0.0; 1179 sum5 = 0.0; 1180 sum6 = 0.0; 1181 sum7 = 0.0; 1182 sum8 = 0.0; 1183 sum9 = 0.0; 1184 sum10 = 0.0; 1185 sum11 = 0.0; 1186 sum12 = 0.0; 1187 sum13 = 0.0; 1188 sum14 = 0.0; 1189 sum15 = 0.0; 1190 sum16 = 0.0; 1191 for (j=0; j<n; j++) { 1192 sum1 += v[jrow]*x[16*idx[jrow]]; 1193 sum2 += v[jrow]*x[16*idx[jrow]+1]; 1194 sum3 += v[jrow]*x[16*idx[jrow]+2]; 1195 sum4 += v[jrow]*x[16*idx[jrow]+3]; 1196 sum5 += v[jrow]*x[16*idx[jrow]+4]; 1197 sum6 += v[jrow]*x[16*idx[jrow]+5]; 1198 sum7 += v[jrow]*x[16*idx[jrow]+6]; 1199 sum8 += v[jrow]*x[16*idx[jrow]+7]; 1200 sum9 += v[jrow]*x[16*idx[jrow]+8]; 1201 sum10 += v[jrow]*x[16*idx[jrow]+9]; 1202 sum11 += v[jrow]*x[16*idx[jrow]+10]; 1203 sum12 += v[jrow]*x[16*idx[jrow]+11]; 1204 sum13 += v[jrow]*x[16*idx[jrow]+12]; 1205 sum14 += v[jrow]*x[16*idx[jrow]+13]; 1206 sum15 += v[jrow]*x[16*idx[jrow]+14]; 1207 sum16 += v[jrow]*x[16*idx[jrow]+15]; 1208 jrow++; 1209 } 1210 y[16*i] = sum1; 1211 y[16*i+1] = sum2; 1212 y[16*i+2] = sum3; 1213 y[16*i+3] = sum4; 1214 y[16*i+4] = sum5; 1215 y[16*i+5] = sum6; 1216 y[16*i+6] = sum7; 1217 y[16*i+7] = sum8; 1218 y[16*i+8] = sum9; 1219 y[16*i+9] = sum10; 1220 y[16*i+10] = sum11; 1221 y[16*i+11] = sum12; 1222 y[16*i+12] = sum13; 1223 y[16*i+13] = sum14; 1224 y[16*i+14] = sum15; 1225 y[16*i+15] = sum16; 1226 } 1227 1228 PetscLogFlops(32*a->nz - 16*m); 1229 ierr = VecRestoreArrayFast(xx,&x);CHKERRQ(ierr); 1230 ierr = VecRestoreArrayFast(yy,&y);CHKERRQ(ierr); 1231 PetscFunctionReturn(0); 1232 } 1233 1234 #undef __FUNCT__ 1235 #define __FUNCT__ "MatMultTranspose_SeqMAIJ_16" 1236 int MatMultTranspose_SeqMAIJ_16(Mat A,Vec xx,Vec yy) 1237 { 1238 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1239 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1240 PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,zero = 0.0; 1241 PetscScalar alpha9,alpha10,alpha11,alpha12,alpha13,alpha14,alpha15,alpha16; 1242 int ierr,m = b->AIJ->m,n,i,*idx,shift = a->indexshift; 1243 1244 PetscFunctionBegin; 1245 ierr = VecSet(&zero,yy);CHKERRQ(ierr); 1246 ierr = VecGetArrayFast(xx,&x);CHKERRQ(ierr); 1247 ierr = VecGetArrayFast(yy,&y);CHKERRQ(ierr); 1248 y = y + shift; /* shift for Fortran start by 1 indexing */ 1249 for (i=0; i<m; i++) { 1250 idx = a->j + a->i[i] + shift; 1251 v = a->a + a->i[i] + shift; 1252 n = a->i[i+1] - a->i[i]; 1253 alpha1 = x[16*i]; 1254 alpha2 = x[16*i+1]; 1255 alpha3 = x[16*i+2]; 1256 alpha4 = x[16*i+3]; 1257 alpha5 = x[16*i+4]; 1258 alpha6 = x[16*i+5]; 1259 alpha7 = x[16*i+6]; 1260 alpha8 = x[16*i+7]; 1261 alpha9 = x[16*i+8]; 1262 alpha10 = x[16*i+9]; 1263 alpha11 = x[16*i+10]; 1264 alpha12 = x[16*i+11]; 1265 alpha13 = x[16*i+12]; 1266 alpha14 = x[16*i+13]; 1267 alpha15 = x[16*i+14]; 1268 alpha16 = x[16*i+15]; 1269 while (n-->0) { 1270 y[16*(*idx)] += alpha1*(*v); 1271 y[16*(*idx)+1] += alpha2*(*v); 1272 y[16*(*idx)+2] += alpha3*(*v); 1273 y[16*(*idx)+3] += alpha4*(*v); 1274 y[16*(*idx)+4] += alpha5*(*v); 1275 y[16*(*idx)+5] += alpha6*(*v); 1276 y[16*(*idx)+6] += alpha7*(*v); 1277 y[16*(*idx)+7] += alpha8*(*v); 1278 y[16*(*idx)+8] += alpha9*(*v); 1279 y[16*(*idx)+9] += alpha10*(*v); 1280 y[16*(*idx)+10] += alpha11*(*v); 1281 y[16*(*idx)+11] += alpha12*(*v); 1282 y[16*(*idx)+12] += alpha13*(*v); 1283 y[16*(*idx)+13] += alpha14*(*v); 1284 y[16*(*idx)+14] += alpha15*(*v); 1285 y[16*(*idx)+15] += alpha16*(*v); 1286 idx++; v++; 1287 } 1288 } 1289 PetscLogFlops(32*a->nz - 16*b->AIJ->n); 1290 ierr = VecRestoreArrayFast(xx,&x);CHKERRQ(ierr); 1291 ierr = VecRestoreArrayFast(yy,&y);CHKERRQ(ierr); 1292 PetscFunctionReturn(0); 1293 } 1294 1295 #undef __FUNCT__ 1296 #define __FUNCT__ "MatMultAdd_SeqMAIJ_16" 1297 int MatMultAdd_SeqMAIJ_16(Mat A,Vec xx,Vec yy,Vec zz) 1298 { 1299 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1300 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1301 PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8; 1302 PetscScalar sum9, sum10, sum11, sum12, sum13, sum14, sum15, sum16; 1303 int ierr,m = b->AIJ->m,*idx,shift = a->indexshift,*ii; 1304 int n,i,jrow,j; 1305 1306 PetscFunctionBegin; 1307 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 1308 ierr = VecGetArrayFast(xx,&x);CHKERRQ(ierr); 1309 ierr = VecGetArrayFast(zz,&y);CHKERRQ(ierr); 1310 x = x + shift; /* shift for Fortran start by 1 indexing */ 1311 idx = a->j; 1312 v = a->a; 1313 ii = a->i; 1314 1315 v += shift; /* shift for Fortran start by 1 indexing */ 1316 idx += shift; 1317 for (i=0; i<m; i++) { 1318 jrow = ii[i]; 1319 n = ii[i+1] - jrow; 1320 sum1 = 0.0; 1321 sum2 = 0.0; 1322 sum3 = 0.0; 1323 sum4 = 0.0; 1324 sum5 = 0.0; 1325 sum6 = 0.0; 1326 sum7 = 0.0; 1327 sum8 = 0.0; 1328 sum9 = 0.0; 1329 sum10 = 0.0; 1330 sum11 = 0.0; 1331 sum12 = 0.0; 1332 sum13 = 0.0; 1333 sum14 = 0.0; 1334 sum15 = 0.0; 1335 sum16 = 0.0; 1336 for (j=0; j<n; j++) { 1337 sum1 += v[jrow]*x[16*idx[jrow]]; 1338 sum2 += v[jrow]*x[16*idx[jrow]+1]; 1339 sum3 += v[jrow]*x[16*idx[jrow]+2]; 1340 sum4 += v[jrow]*x[16*idx[jrow]+3]; 1341 sum5 += v[jrow]*x[16*idx[jrow]+4]; 1342 sum6 += v[jrow]*x[16*idx[jrow]+5]; 1343 sum7 += v[jrow]*x[16*idx[jrow]+6]; 1344 sum8 += v[jrow]*x[16*idx[jrow]+7]; 1345 sum9 += v[jrow]*x[16*idx[jrow]+8]; 1346 sum10 += v[jrow]*x[16*idx[jrow]+9]; 1347 sum11 += v[jrow]*x[16*idx[jrow]+10]; 1348 sum12 += v[jrow]*x[16*idx[jrow]+11]; 1349 sum13 += v[jrow]*x[16*idx[jrow]+12]; 1350 sum14 += v[jrow]*x[16*idx[jrow]+13]; 1351 sum15 += v[jrow]*x[16*idx[jrow]+14]; 1352 sum16 += v[jrow]*x[16*idx[jrow]+15]; 1353 jrow++; 1354 } 1355 y[16*i] += sum1; 1356 y[16*i+1] += sum2; 1357 y[16*i+2] += sum3; 1358 y[16*i+3] += sum4; 1359 y[16*i+4] += sum5; 1360 y[16*i+5] += sum6; 1361 y[16*i+6] += sum7; 1362 y[16*i+7] += sum8; 1363 y[16*i+8] += sum9; 1364 y[16*i+9] += sum10; 1365 y[16*i+10] += sum11; 1366 y[16*i+11] += sum12; 1367 y[16*i+12] += sum13; 1368 y[16*i+13] += sum14; 1369 y[16*i+14] += sum15; 1370 y[16*i+15] += sum16; 1371 } 1372 1373 PetscLogFlops(32*a->nz); 1374 ierr = VecRestoreArrayFast(xx,&x);CHKERRQ(ierr); 1375 ierr = VecRestoreArrayFast(zz,&y);CHKERRQ(ierr); 1376 PetscFunctionReturn(0); 1377 } 1378 1379 #undef __FUNCT__ 1380 #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_16" 1381 int MatMultTransposeAdd_SeqMAIJ_16(Mat A,Vec xx,Vec yy,Vec zz) 1382 { 1383 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1384 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1385 PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8; 1386 PetscScalar alpha9,alpha10,alpha11,alpha12,alpha13,alpha14,alpha15,alpha16; 1387 int ierr,m = b->AIJ->m,n,i,*idx,shift = a->indexshift; 1388 1389 PetscFunctionBegin; 1390 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 1391 ierr = VecGetArrayFast(xx,&x);CHKERRQ(ierr); 1392 ierr = VecGetArrayFast(zz,&y);CHKERRQ(ierr); 1393 y = y + shift; /* shift for Fortran start by 1 indexing */ 1394 for (i=0; i<m; i++) { 1395 idx = a->j + a->i[i] + shift; 1396 v = a->a + a->i[i] + shift; 1397 n = a->i[i+1] - a->i[i]; 1398 alpha1 = x[16*i]; 1399 alpha2 = x[16*i+1]; 1400 alpha3 = x[16*i+2]; 1401 alpha4 = x[16*i+3]; 1402 alpha5 = x[16*i+4]; 1403 alpha6 = x[16*i+5]; 1404 alpha7 = x[16*i+6]; 1405 alpha8 = x[16*i+7]; 1406 alpha9 = x[16*i+8]; 1407 alpha10 = x[16*i+9]; 1408 alpha11 = x[16*i+10]; 1409 alpha12 = x[16*i+11]; 1410 alpha13 = x[16*i+12]; 1411 alpha14 = x[16*i+13]; 1412 alpha15 = x[16*i+14]; 1413 alpha16 = x[16*i+15]; 1414 while (n-->0) { 1415 y[16*(*idx)] += alpha1*(*v); 1416 y[16*(*idx)+1] += alpha2*(*v); 1417 y[16*(*idx)+2] += alpha3*(*v); 1418 y[16*(*idx)+3] += alpha4*(*v); 1419 y[16*(*idx)+4] += alpha5*(*v); 1420 y[16*(*idx)+5] += alpha6*(*v); 1421 y[16*(*idx)+6] += alpha7*(*v); 1422 y[16*(*idx)+7] += alpha8*(*v); 1423 y[16*(*idx)+8] += alpha9*(*v); 1424 y[16*(*idx)+9] += alpha10*(*v); 1425 y[16*(*idx)+10] += alpha11*(*v); 1426 y[16*(*idx)+11] += alpha12*(*v); 1427 y[16*(*idx)+12] += alpha13*(*v); 1428 y[16*(*idx)+13] += alpha14*(*v); 1429 y[16*(*idx)+14] += alpha15*(*v); 1430 y[16*(*idx)+15] += alpha16*(*v); 1431 idx++; v++; 1432 } 1433 } 1434 PetscLogFlops(32*a->nz); 1435 ierr = VecRestoreArrayFast(xx,&x);CHKERRQ(ierr); 1436 ierr = VecRestoreArrayFast(zz,&y);CHKERRQ(ierr); 1437 PetscFunctionReturn(0); 1438 } 1439 1440 /*===================================================================================*/ 1441 #undef __FUNCT__ 1442 #define __FUNCT__ "MatMult_MPIMAIJ_dof" 1443 int MatMult_MPIMAIJ_dof(Mat A,Vec xx,Vec yy) 1444 { 1445 Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data; 1446 int ierr; 1447 PetscFunctionBegin; 1448 1449 /* start the scatter */ 1450 ierr = VecScatterBegin(xx,b->w,INSERT_VALUES,SCATTER_FORWARD,b->ctx);CHKERRQ(ierr); 1451 ierr = (*b->AIJ->ops->mult)(b->AIJ,xx,yy);CHKERRQ(ierr); 1452 ierr = VecScatterEnd(xx,b->w,INSERT_VALUES,SCATTER_FORWARD,b->ctx);CHKERRQ(ierr); 1453 ierr = (*b->OAIJ->ops->multadd)(b->OAIJ,b->w,yy,yy);CHKERRQ(ierr); 1454 PetscFunctionReturn(0); 1455 } 1456 1457 #undef __FUNCT__ 1458 #define __FUNCT__ "MatMultTranspose_MPIMAIJ_dof" 1459 int MatMultTranspose_MPIMAIJ_dof(Mat A,Vec xx,Vec yy) 1460 { 1461 Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data; 1462 int ierr; 1463 PetscFunctionBegin; 1464 ierr = (*b->OAIJ->ops->multtranspose)(b->OAIJ,xx,b->w);CHKERRQ(ierr); 1465 ierr = VecScatterBegin(b->w,yy,ADD_VALUES,SCATTER_REVERSE,b->ctx);CHKERRQ(ierr); 1466 ierr = (*b->AIJ->ops->multtranspose)(b->AIJ,xx,yy);CHKERRQ(ierr); 1467 ierr = VecScatterEnd(b->w,yy,ADD_VALUES,SCATTER_REVERSE,b->ctx);CHKERRQ(ierr); 1468 PetscFunctionReturn(0); 1469 } 1470 1471 #undef __FUNCT__ 1472 #define __FUNCT__ "MatMultAdd_MPIMAIJ_dof" 1473 int MatMultAdd_MPIMAIJ_dof(Mat A,Vec xx,Vec yy,Vec zz) 1474 { 1475 Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data; 1476 int ierr; 1477 PetscFunctionBegin; 1478 1479 /* start the scatter */ 1480 ierr = VecScatterBegin(xx,b->w,INSERT_VALUES,SCATTER_FORWARD,b->ctx);CHKERRQ(ierr); 1481 ierr = (*b->AIJ->ops->multadd)(b->AIJ,xx,yy,zz);CHKERRQ(ierr); 1482 ierr = VecScatterEnd(xx,b->w,INSERT_VALUES,SCATTER_FORWARD,b->ctx);CHKERRQ(ierr); 1483 ierr = (*b->OAIJ->ops->multadd)(b->OAIJ,b->w,yy,zz);CHKERRQ(ierr); 1484 PetscFunctionReturn(0); 1485 } 1486 1487 #undef __FUNCT__ 1488 #define __FUNCT__ "MatMultTransposeAdd_MPIMAIJ_dof" 1489 int MatMultTransposeAdd_MPIMAIJ_dof(Mat A,Vec xx,Vec yy,Vec zz) 1490 { 1491 Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data; 1492 int ierr; 1493 PetscFunctionBegin; 1494 ierr = (*b->OAIJ->ops->multtranspose)(b->OAIJ,xx,b->w);CHKERRQ(ierr); 1495 ierr = VecScatterBegin(b->w,zz,ADD_VALUES,SCATTER_REVERSE,b->ctx);CHKERRQ(ierr); 1496 ierr = (*b->AIJ->ops->multtransposeadd)(b->AIJ,xx,yy,zz);CHKERRQ(ierr); 1497 ierr = VecScatterEnd(b->w,zz,ADD_VALUES,SCATTER_REVERSE,b->ctx);CHKERRQ(ierr); 1498 PetscFunctionReturn(0); 1499 } 1500 1501 /* ---------------------------------------------------------------------------------- */ 1502 #undef __FUNCT__ 1503 #define __FUNCT__ "MatCreateMAIJ" 1504 int MatCreateMAIJ(Mat A,int dof,Mat *maij) 1505 { 1506 int ierr,size,n; 1507 Mat_MPIMAIJ *b; 1508 Mat B; 1509 1510 PetscFunctionBegin; 1511 ierr = PetscObjectReference((PetscObject)A);CHKERRQ(ierr); 1512 1513 if (dof == 1) { 1514 *maij = A; 1515 } else { 1516 ierr = MatCreate(A->comm,dof*A->m,dof*A->n,dof*A->M,dof*A->N,&B);CHKERRQ(ierr); 1517 B->assembled = PETSC_TRUE; 1518 1519 ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr); 1520 if (size == 1) { 1521 ierr = MatSetType(B,MATSEQMAIJ);CHKERRQ(ierr); 1522 B->ops->destroy = MatDestroy_SeqMAIJ; 1523 b = (Mat_MPIMAIJ*)B->data; 1524 b->dof = dof; 1525 b->AIJ = A; 1526 if (dof == 2) { 1527 B->ops->mult = MatMult_SeqMAIJ_2; 1528 B->ops->multadd = MatMultAdd_SeqMAIJ_2; 1529 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_2; 1530 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_2; 1531 } else if (dof == 3) { 1532 B->ops->mult = MatMult_SeqMAIJ_3; 1533 B->ops->multadd = MatMultAdd_SeqMAIJ_3; 1534 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_3; 1535 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_3; 1536 } else if (dof == 4) { 1537 B->ops->mult = MatMult_SeqMAIJ_4; 1538 B->ops->multadd = MatMultAdd_SeqMAIJ_4; 1539 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_4; 1540 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_4; 1541 } else if (dof == 5) { 1542 B->ops->mult = MatMult_SeqMAIJ_5; 1543 B->ops->multadd = MatMultAdd_SeqMAIJ_5; 1544 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_5; 1545 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_5; 1546 } else if (dof == 6) { 1547 B->ops->mult = MatMult_SeqMAIJ_6; 1548 B->ops->multadd = MatMultAdd_SeqMAIJ_6; 1549 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_6; 1550 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_6; 1551 } else if (dof == 8) { 1552 B->ops->mult = MatMult_SeqMAIJ_8; 1553 B->ops->multadd = MatMultAdd_SeqMAIJ_8; 1554 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_8; 1555 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_8; 1556 } else if (dof == 16) { 1557 B->ops->mult = MatMult_SeqMAIJ_16; 1558 B->ops->multadd = MatMultAdd_SeqMAIJ_16; 1559 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_16; 1560 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_16; 1561 } else { 1562 SETERRQ1(1,"Cannot handle a dof of %d. Send request for code to petsc-maint@mcs.anl.gov\n",dof); 1563 } 1564 } else { 1565 Mat_MPIAIJ *mpiaij = (Mat_MPIAIJ *)A->data; 1566 IS from,to; 1567 Vec gvec; 1568 int *garray,i; 1569 1570 ierr = MatSetType(B,MATMPIMAIJ);CHKERRQ(ierr); 1571 B->ops->destroy = MatDestroy_MPIMAIJ; 1572 b = (Mat_MPIMAIJ*)B->data; 1573 b->dof = dof; 1574 b->A = A; 1575 ierr = MatCreateMAIJ(mpiaij->A,dof,&b->AIJ);CHKERRQ(ierr); 1576 ierr = MatCreateMAIJ(mpiaij->B,dof,&b->OAIJ);CHKERRQ(ierr); 1577 1578 ierr = VecGetSize(mpiaij->lvec,&n);CHKERRQ(ierr); 1579 ierr = VecCreateSeq(PETSC_COMM_SELF,n*dof,&b->w);CHKERRQ(ierr); 1580 1581 /* create two temporary Index sets for build scatter gather */ 1582 ierr = PetscMalloc((n+1)*sizeof(int),&garray);CHKERRQ(ierr); 1583 for (i=0; i<n; i++) garray[i] = dof*mpiaij->garray[i]; 1584 ierr = ISCreateBlock(A->comm,dof,n,garray,&from);CHKERRQ(ierr); 1585 ierr = PetscFree(garray);CHKERRQ(ierr); 1586 ierr = ISCreateStride(PETSC_COMM_SELF,n*dof,0,1,&to);CHKERRQ(ierr); 1587 1588 /* create temporary global vector to generate scatter context */ 1589 ierr = VecCreateMPI(A->comm,dof*A->n,dof*A->N,&gvec);CHKERRQ(ierr); 1590 1591 /* generate the scatter context */ 1592 ierr = VecScatterCreate(gvec,from,b->w,to,&b->ctx);CHKERRQ(ierr); 1593 1594 ierr = ISDestroy(from);CHKERRQ(ierr); 1595 ierr = ISDestroy(to);CHKERRQ(ierr); 1596 ierr = VecDestroy(gvec);CHKERRQ(ierr); 1597 1598 B->ops->mult = MatMult_MPIMAIJ_dof; 1599 B->ops->multtranspose = MatMultTranspose_MPIMAIJ_dof; 1600 B->ops->multadd = MatMultAdd_MPIMAIJ_dof; 1601 B->ops->multtransposeadd = MatMultTransposeAdd_MPIMAIJ_dof; 1602 } 1603 *maij = B; 1604 } 1605 PetscFunctionReturn(0); 1606 } 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619