1 /*$Id: maij.c,v 1.18 2001/08/06 21:15:46 bsmith Exp balay $*/ 2 /* 3 Defines the basic matrix operations for the MAIJ matrix storage format. 4 This format is used for restriction and interpolation operations for 5 multicomponent problems. It interpolates each component the same way 6 independently. 7 8 We provide: 9 MatMult() 10 MatMultTranspose() 11 MatMultTransposeAdd() 12 MatMultAdd() 13 and 14 MatCreateMAIJ(Mat,dof,Mat*) 15 16 This single directory handles both the sequential and parallel codes 17 */ 18 19 #include "src/mat/impls/aij/mpi/mpiaij.h" 20 21 typedef struct { 22 int dof; /* number of components */ 23 Mat AIJ; /* representation of interpolation for one component */ 24 } Mat_SeqMAIJ; 25 26 typedef struct { 27 int dof; /* number of components */ 28 Mat AIJ,OAIJ; /* representation of interpolation for one component */ 29 Mat A; 30 VecScatter ctx; /* update ghost points for parallel case */ 31 Vec w; /* work space for ghost values for parallel case */ 32 } Mat_MPIMAIJ; 33 34 #undef __FUNCT__ 35 #define __FUNCT__ "MatMAIJGetAIJ" 36 int MatMAIJGetAIJ(Mat A,Mat *B) 37 { 38 int ierr; 39 PetscTruth ismpimaij,isseqmaij; 40 41 PetscFunctionBegin; 42 ierr = PetscTypeCompare((PetscObject)A,MATMPIMAIJ,&ismpimaij);CHKERRQ(ierr); 43 ierr = PetscTypeCompare((PetscObject)A,MATSEQMAIJ,&isseqmaij);CHKERRQ(ierr); 44 if (ismpimaij) { 45 Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data; 46 47 *B = b->A; 48 } else if (isseqmaij) { 49 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 50 51 *B = b->AIJ; 52 } else { 53 *B = A; 54 } 55 PetscFunctionReturn(0); 56 } 57 58 #undef __FUNCT__ 59 #define __FUNCT__ "MatMAIJRedimension" 60 int MatMAIJRedimension(Mat A,int dof,Mat *B) 61 { 62 int ierr; 63 Mat Aij; 64 65 PetscFunctionBegin; 66 ierr = MatMAIJGetAIJ(A,&Aij);CHKERRQ(ierr); 67 ierr = MatCreateMAIJ(Aij,dof,B);CHKERRQ(ierr); 68 PetscFunctionReturn(0); 69 } 70 71 #undef __FUNCT__ 72 #define __FUNCT__ "MatDestroy_SeqMAIJ" 73 int MatDestroy_SeqMAIJ(Mat A) 74 { 75 int ierr; 76 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 77 78 PetscFunctionBegin; 79 if (b->AIJ) { 80 ierr = MatDestroy(b->AIJ);CHKERRQ(ierr); 81 } 82 ierr = PetscFree(b);CHKERRQ(ierr); 83 PetscFunctionReturn(0); 84 } 85 86 #undef __FUNCT__ 87 #define __FUNCT__ "MatDestroy_MPIMAIJ" 88 int MatDestroy_MPIMAIJ(Mat A) 89 { 90 int ierr; 91 Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data; 92 93 PetscFunctionBegin; 94 if (b->AIJ) { 95 ierr = MatDestroy(b->AIJ);CHKERRQ(ierr); 96 } 97 if (b->OAIJ) { 98 ierr = MatDestroy(b->OAIJ);CHKERRQ(ierr); 99 } 100 if (b->A) { 101 ierr = MatDestroy(b->A);CHKERRQ(ierr); 102 } 103 if (b->ctx) { 104 ierr = VecScatterDestroy(b->ctx);CHKERRQ(ierr); 105 } 106 if (b->w) { 107 ierr = VecDestroy(b->w);CHKERRQ(ierr); 108 } 109 ierr = PetscFree(b);CHKERRQ(ierr); 110 PetscFunctionReturn(0); 111 } 112 113 EXTERN_C_BEGIN 114 #undef __FUNCT__ 115 #define __FUNCT__ "MatCreate_MAIJ" 116 int MatCreate_MAIJ(Mat A) 117 { 118 int ierr; 119 Mat_MPIMAIJ *b; 120 121 PetscFunctionBegin; 122 ierr = PetscNew(Mat_MPIMAIJ,&b);CHKERRQ(ierr); 123 A->data = (void*)b; 124 ierr = PetscMemzero(b,sizeof(Mat_MPIMAIJ));CHKERRQ(ierr); 125 ierr = PetscMemzero(A->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 126 A->factor = 0; 127 A->mapping = 0; 128 129 b->AIJ = 0; 130 b->dof = 0; 131 b->OAIJ = 0; 132 b->ctx = 0; 133 b->w = 0; 134 PetscFunctionReturn(0); 135 } 136 EXTERN_C_END 137 138 /* --------------------------------------------------------------------------------------*/ 139 #undef __FUNCT__ 140 #define __FUNCT__ "MatMult_SeqMAIJ_2" 141 int MatMult_SeqMAIJ_2(Mat A,Vec xx,Vec yy) 142 { 143 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 144 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 145 PetscScalar *x,*y,*v,sum1, sum2; 146 int ierr,m = b->AIJ->m,*idx,shift = a->indexshift,*ii; 147 int n,i,jrow,j; 148 149 PetscFunctionBegin; 150 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 151 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 152 x = x + shift; /* shift for Fortran start by 1 indexing */ 153 idx = a->j; 154 v = a->a; 155 ii = a->i; 156 157 v += shift; /* shift for Fortran start by 1 indexing */ 158 idx += shift; 159 for (i=0; i<m; i++) { 160 jrow = ii[i]; 161 n = ii[i+1] - jrow; 162 sum1 = 0.0; 163 sum2 = 0.0; 164 for (j=0; j<n; j++) { 165 sum1 += v[jrow]*x[2*idx[jrow]]; 166 sum2 += v[jrow]*x[2*idx[jrow]+1]; 167 jrow++; 168 } 169 y[2*i] = sum1; 170 y[2*i+1] = sum2; 171 } 172 173 PetscLogFlops(4*a->nz - 2*m); 174 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 175 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 176 PetscFunctionReturn(0); 177 } 178 179 #undef __FUNCT__ 180 #define __FUNCT__ "MatMultTranspose_SeqMAIJ_2" 181 int MatMultTranspose_SeqMAIJ_2(Mat A,Vec xx,Vec yy) 182 { 183 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 184 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 185 PetscScalar *x,*y,*v,alpha1,alpha2,zero = 0.0; 186 int ierr,m = b->AIJ->m,n,i,*idx,shift = a->indexshift; 187 188 PetscFunctionBegin; 189 ierr = VecSet(&zero,yy);CHKERRQ(ierr); 190 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 191 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 192 y = y + shift; /* shift for Fortran start by 1 indexing */ 193 for (i=0; i<m; i++) { 194 idx = a->j + a->i[i] + shift; 195 v = a->a + a->i[i] + shift; 196 n = a->i[i+1] - a->i[i]; 197 alpha1 = x[2*i]; 198 alpha2 = x[2*i+1]; 199 while (n-->0) {y[2*(*idx)] += alpha1*(*v); y[2*(*idx)+1] += alpha2*(*v); idx++; v++;} 200 } 201 PetscLogFlops(4*a->nz - 2*b->AIJ->n); 202 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 203 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 204 PetscFunctionReturn(0); 205 } 206 207 #undef __FUNCT__ 208 #define __FUNCT__ "MatMultAdd_SeqMAIJ_2" 209 int MatMultAdd_SeqMAIJ_2(Mat A,Vec xx,Vec yy,Vec zz) 210 { 211 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 212 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 213 PetscScalar *x,*y,*v,sum1, sum2; 214 int ierr,m = b->AIJ->m,*idx,shift = a->indexshift,*ii; 215 int n,i,jrow,j; 216 217 PetscFunctionBegin; 218 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 219 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 220 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 221 x = x + shift; /* shift for Fortran start by 1 indexing */ 222 idx = a->j; 223 v = a->a; 224 ii = a->i; 225 226 v += shift; /* shift for Fortran start by 1 indexing */ 227 idx += shift; 228 for (i=0; i<m; i++) { 229 jrow = ii[i]; 230 n = ii[i+1] - jrow; 231 sum1 = 0.0; 232 sum2 = 0.0; 233 for (j=0; j<n; j++) { 234 sum1 += v[jrow]*x[2*idx[jrow]]; 235 sum2 += v[jrow]*x[2*idx[jrow]+1]; 236 jrow++; 237 } 238 y[2*i] += sum1; 239 y[2*i+1] += sum2; 240 } 241 242 PetscLogFlops(4*a->nz - 2*m); 243 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 244 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 245 PetscFunctionReturn(0); 246 } 247 #undef __FUNCT__ 248 #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_2" 249 int MatMultTransposeAdd_SeqMAIJ_2(Mat A,Vec xx,Vec yy,Vec zz) 250 { 251 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 252 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 253 PetscScalar *x,*y,*v,alpha1,alpha2; 254 int ierr,m = b->AIJ->m,n,i,*idx,shift = a->indexshift; 255 256 PetscFunctionBegin; 257 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 258 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 259 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 260 y = y + shift; /* shift for Fortran start by 1 indexing */ 261 for (i=0; i<m; i++) { 262 idx = a->j + a->i[i] + shift; 263 v = a->a + a->i[i] + shift; 264 n = a->i[i+1] - a->i[i]; 265 alpha1 = x[2*i]; 266 alpha2 = x[2*i+1]; 267 while (n-->0) {y[2*(*idx)] += alpha1*(*v); y[2*(*idx)+1] += alpha2*(*v); idx++; v++;} 268 } 269 PetscLogFlops(4*a->nz - 2*b->AIJ->n); 270 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 271 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 272 PetscFunctionReturn(0); 273 } 274 /* --------------------------------------------------------------------------------------*/ 275 #undef __FUNCT__ 276 #define __FUNCT__ "MatMult_SeqMAIJ_3" 277 int MatMult_SeqMAIJ_3(Mat A,Vec xx,Vec yy) 278 { 279 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 280 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 281 PetscScalar *x,*y,*v,sum1, sum2, sum3; 282 int ierr,m = b->AIJ->m,*idx,shift = a->indexshift,*ii; 283 int n,i,jrow,j; 284 285 PetscFunctionBegin; 286 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 287 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 288 x = x + shift; /* shift for Fortran start by 1 indexing */ 289 idx = a->j; 290 v = a->a; 291 ii = a->i; 292 293 v += shift; /* shift for Fortran start by 1 indexing */ 294 idx += shift; 295 for (i=0; i<m; i++) { 296 jrow = ii[i]; 297 n = ii[i+1] - jrow; 298 sum1 = 0.0; 299 sum2 = 0.0; 300 sum3 = 0.0; 301 for (j=0; j<n; j++) { 302 sum1 += v[jrow]*x[3*idx[jrow]]; 303 sum2 += v[jrow]*x[3*idx[jrow]+1]; 304 sum3 += v[jrow]*x[3*idx[jrow]+2]; 305 jrow++; 306 } 307 y[3*i] = sum1; 308 y[3*i+1] = sum2; 309 y[3*i+2] = sum3; 310 } 311 312 PetscLogFlops(6*a->nz - 3*m); 313 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 314 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 315 PetscFunctionReturn(0); 316 } 317 318 #undef __FUNCT__ 319 #define __FUNCT__ "MatMultTranspose_SeqMAIJ_3" 320 int MatMultTranspose_SeqMAIJ_3(Mat A,Vec xx,Vec yy) 321 { 322 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 323 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 324 PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,zero = 0.0; 325 int ierr,m = b->AIJ->m,n,i,*idx,shift = a->indexshift; 326 327 PetscFunctionBegin; 328 ierr = VecSet(&zero,yy);CHKERRQ(ierr); 329 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 330 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 331 y = y + shift; /* shift for Fortran start by 1 indexing */ 332 for (i=0; i<m; i++) { 333 idx = a->j + a->i[i] + shift; 334 v = a->a + a->i[i] + shift; 335 n = a->i[i+1] - a->i[i]; 336 alpha1 = x[3*i]; 337 alpha2 = x[3*i+1]; 338 alpha3 = x[3*i+2]; 339 while (n-->0) { 340 y[3*(*idx)] += alpha1*(*v); 341 y[3*(*idx)+1] += alpha2*(*v); 342 y[3*(*idx)+2] += alpha3*(*v); 343 idx++; v++; 344 } 345 } 346 PetscLogFlops(6*a->nz - 3*b->AIJ->n); 347 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 348 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 349 PetscFunctionReturn(0); 350 } 351 352 #undef __FUNCT__ 353 #define __FUNCT__ "MatMultAdd_SeqMAIJ_3" 354 int MatMultAdd_SeqMAIJ_3(Mat A,Vec xx,Vec yy,Vec zz) 355 { 356 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 357 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 358 PetscScalar *x,*y,*v,sum1, sum2, sum3; 359 int ierr,m = b->AIJ->m,*idx,shift = a->indexshift,*ii; 360 int n,i,jrow,j; 361 362 PetscFunctionBegin; 363 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 364 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 365 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 366 x = x + shift; /* shift for Fortran start by 1 indexing */ 367 idx = a->j; 368 v = a->a; 369 ii = a->i; 370 371 v += shift; /* shift for Fortran start by 1 indexing */ 372 idx += shift; 373 for (i=0; i<m; i++) { 374 jrow = ii[i]; 375 n = ii[i+1] - jrow; 376 sum1 = 0.0; 377 sum2 = 0.0; 378 sum3 = 0.0; 379 for (j=0; j<n; j++) { 380 sum1 += v[jrow]*x[3*idx[jrow]]; 381 sum2 += v[jrow]*x[3*idx[jrow]+1]; 382 sum3 += v[jrow]*x[3*idx[jrow]+2]; 383 jrow++; 384 } 385 y[3*i] += sum1; 386 y[3*i+1] += sum2; 387 y[3*i+2] += sum3; 388 } 389 390 PetscLogFlops(6*a->nz); 391 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 392 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 393 PetscFunctionReturn(0); 394 } 395 #undef __FUNCT__ 396 #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_3" 397 int MatMultTransposeAdd_SeqMAIJ_3(Mat A,Vec xx,Vec yy,Vec zz) 398 { 399 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 400 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 401 PetscScalar *x,*y,*v,alpha1,alpha2,alpha3; 402 int ierr,m = b->AIJ->m,n,i,*idx,shift = a->indexshift; 403 404 PetscFunctionBegin; 405 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 406 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 407 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 408 y = y + shift; /* shift for Fortran start by 1 indexing */ 409 for (i=0; i<m; i++) { 410 idx = a->j + a->i[i] + shift; 411 v = a->a + a->i[i] + shift; 412 n = a->i[i+1] - a->i[i]; 413 alpha1 = x[3*i]; 414 alpha2 = x[3*i+1]; 415 alpha3 = x[3*i+2]; 416 while (n-->0) { 417 y[3*(*idx)] += alpha1*(*v); 418 y[3*(*idx)+1] += alpha2*(*v); 419 y[3*(*idx)+2] += alpha3*(*v); 420 idx++; v++; 421 } 422 } 423 PetscLogFlops(6*a->nz); 424 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 425 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 426 PetscFunctionReturn(0); 427 } 428 429 /* ------------------------------------------------------------------------------*/ 430 #undef __FUNCT__ 431 #define __FUNCT__ "MatMult_SeqMAIJ_4" 432 int MatMult_SeqMAIJ_4(Mat A,Vec xx,Vec yy) 433 { 434 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 435 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 436 PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4; 437 int ierr,m = b->AIJ->m,*idx,shift = a->indexshift,*ii; 438 int n,i,jrow,j; 439 440 PetscFunctionBegin; 441 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 442 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 443 x = x + shift; /* shift for Fortran start by 1 indexing */ 444 idx = a->j; 445 v = a->a; 446 ii = a->i; 447 448 v += shift; /* shift for Fortran start by 1 indexing */ 449 idx += shift; 450 for (i=0; i<m; i++) { 451 jrow = ii[i]; 452 n = ii[i+1] - jrow; 453 sum1 = 0.0; 454 sum2 = 0.0; 455 sum3 = 0.0; 456 sum4 = 0.0; 457 for (j=0; j<n; j++) { 458 sum1 += v[jrow]*x[4*idx[jrow]]; 459 sum2 += v[jrow]*x[4*idx[jrow]+1]; 460 sum3 += v[jrow]*x[4*idx[jrow]+2]; 461 sum4 += v[jrow]*x[4*idx[jrow]+3]; 462 jrow++; 463 } 464 y[4*i] = sum1; 465 y[4*i+1] = sum2; 466 y[4*i+2] = sum3; 467 y[4*i+3] = sum4; 468 } 469 470 PetscLogFlops(8*a->nz - 4*m); 471 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 472 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 473 PetscFunctionReturn(0); 474 } 475 476 #undef __FUNCT__ 477 #define __FUNCT__ "MatMultTranspose_SeqMAIJ_4" 478 int MatMultTranspose_SeqMAIJ_4(Mat A,Vec xx,Vec yy) 479 { 480 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 481 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 482 PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4,zero = 0.0; 483 int ierr,m = b->AIJ->m,n,i,*idx,shift = a->indexshift; 484 485 PetscFunctionBegin; 486 ierr = VecSet(&zero,yy);CHKERRQ(ierr); 487 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 488 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 489 y = y + shift; /* shift for Fortran start by 1 indexing */ 490 for (i=0; i<m; i++) { 491 idx = a->j + a->i[i] + shift; 492 v = a->a + a->i[i] + shift; 493 n = a->i[i+1] - a->i[i]; 494 alpha1 = x[4*i]; 495 alpha2 = x[4*i+1]; 496 alpha3 = x[4*i+2]; 497 alpha4 = x[4*i+3]; 498 while (n-->0) { 499 y[4*(*idx)] += alpha1*(*v); 500 y[4*(*idx)+1] += alpha2*(*v); 501 y[4*(*idx)+2] += alpha3*(*v); 502 y[4*(*idx)+3] += alpha4*(*v); 503 idx++; v++; 504 } 505 } 506 PetscLogFlops(8*a->nz - 4*b->AIJ->n); 507 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 508 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 509 PetscFunctionReturn(0); 510 } 511 512 #undef __FUNCT__ 513 #define __FUNCT__ "MatMultAdd_SeqMAIJ_4" 514 int MatMultAdd_SeqMAIJ_4(Mat A,Vec xx,Vec yy,Vec zz) 515 { 516 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 517 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 518 PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4; 519 int ierr,m = b->AIJ->m,*idx,shift = a->indexshift,*ii; 520 int n,i,jrow,j; 521 522 PetscFunctionBegin; 523 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 524 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 525 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 526 x = x + shift; /* shift for Fortran start by 1 indexing */ 527 idx = a->j; 528 v = a->a; 529 ii = a->i; 530 531 v += shift; /* shift for Fortran start by 1 indexing */ 532 idx += shift; 533 for (i=0; i<m; i++) { 534 jrow = ii[i]; 535 n = ii[i+1] - jrow; 536 sum1 = 0.0; 537 sum2 = 0.0; 538 sum3 = 0.0; 539 sum4 = 0.0; 540 for (j=0; j<n; j++) { 541 sum1 += v[jrow]*x[4*idx[jrow]]; 542 sum2 += v[jrow]*x[4*idx[jrow]+1]; 543 sum3 += v[jrow]*x[4*idx[jrow]+2]; 544 sum4 += v[jrow]*x[4*idx[jrow]+3]; 545 jrow++; 546 } 547 y[4*i] += sum1; 548 y[4*i+1] += sum2; 549 y[4*i+2] += sum3; 550 y[4*i+3] += sum4; 551 } 552 553 PetscLogFlops(8*a->nz - 4*m); 554 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 555 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 556 PetscFunctionReturn(0); 557 } 558 #undef __FUNCT__ 559 #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_4" 560 int MatMultTransposeAdd_SeqMAIJ_4(Mat A,Vec xx,Vec yy,Vec zz) 561 { 562 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 563 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 564 PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4; 565 int ierr,m = b->AIJ->m,n,i,*idx,shift = a->indexshift; 566 567 PetscFunctionBegin; 568 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 569 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 570 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 571 y = y + shift; /* shift for Fortran start by 1 indexing */ 572 for (i=0; i<m; i++) { 573 idx = a->j + a->i[i] + shift; 574 v = a->a + a->i[i] + shift; 575 n = a->i[i+1] - a->i[i]; 576 alpha1 = x[4*i]; 577 alpha2 = x[4*i+1]; 578 alpha3 = x[4*i+2]; 579 alpha4 = x[4*i+3]; 580 while (n-->0) { 581 y[4*(*idx)] += alpha1*(*v); 582 y[4*(*idx)+1] += alpha2*(*v); 583 y[4*(*idx)+2] += alpha3*(*v); 584 y[4*(*idx)+3] += alpha4*(*v); 585 idx++; v++; 586 } 587 } 588 PetscLogFlops(8*a->nz - 4*b->AIJ->n); 589 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 590 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 591 PetscFunctionReturn(0); 592 } 593 /* ------------------------------------------------------------------------------*/ 594 595 #undef __FUNCT__ 596 #define __FUNCT__ "MatMult_SeqMAIJ_5" 597 int MatMult_SeqMAIJ_5(Mat A,Vec xx,Vec yy) 598 { 599 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 600 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 601 PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4, sum5; 602 int ierr,m = b->AIJ->m,*idx,shift = a->indexshift,*ii; 603 int n,i,jrow,j; 604 605 PetscFunctionBegin; 606 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 607 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 608 x = x + shift; /* shift for Fortran start by 1 indexing */ 609 idx = a->j; 610 v = a->a; 611 ii = a->i; 612 613 v += shift; /* shift for Fortran start by 1 indexing */ 614 idx += shift; 615 for (i=0; i<m; i++) { 616 jrow = ii[i]; 617 n = ii[i+1] - jrow; 618 sum1 = 0.0; 619 sum2 = 0.0; 620 sum3 = 0.0; 621 sum4 = 0.0; 622 sum5 = 0.0; 623 for (j=0; j<n; j++) { 624 sum1 += v[jrow]*x[5*idx[jrow]]; 625 sum2 += v[jrow]*x[5*idx[jrow]+1]; 626 sum3 += v[jrow]*x[5*idx[jrow]+2]; 627 sum4 += v[jrow]*x[5*idx[jrow]+3]; 628 sum5 += v[jrow]*x[5*idx[jrow]+4]; 629 jrow++; 630 } 631 y[5*i] = sum1; 632 y[5*i+1] = sum2; 633 y[5*i+2] = sum3; 634 y[5*i+3] = sum4; 635 y[5*i+4] = sum5; 636 } 637 638 PetscLogFlops(10*a->nz - 5*m); 639 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 640 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 641 PetscFunctionReturn(0); 642 } 643 644 #undef __FUNCT__ 645 #define __FUNCT__ "MatMultTranspose_SeqMAIJ_5" 646 int MatMultTranspose_SeqMAIJ_5(Mat A,Vec xx,Vec yy) 647 { 648 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 649 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 650 PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,zero = 0.0; 651 int ierr,m = b->AIJ->m,n,i,*idx,shift = a->indexshift; 652 653 PetscFunctionBegin; 654 ierr = VecSet(&zero,yy);CHKERRQ(ierr); 655 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 656 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 657 y = y + shift; /* shift for Fortran start by 1 indexing */ 658 for (i=0; i<m; i++) { 659 idx = a->j + a->i[i] + shift; 660 v = a->a + a->i[i] + shift; 661 n = a->i[i+1] - a->i[i]; 662 alpha1 = x[5*i]; 663 alpha2 = x[5*i+1]; 664 alpha3 = x[5*i+2]; 665 alpha4 = x[5*i+3]; 666 alpha5 = x[5*i+4]; 667 while (n-->0) { 668 y[5*(*idx)] += alpha1*(*v); 669 y[5*(*idx)+1] += alpha2*(*v); 670 y[5*(*idx)+2] += alpha3*(*v); 671 y[5*(*idx)+3] += alpha4*(*v); 672 y[5*(*idx)+4] += alpha5*(*v); 673 idx++; v++; 674 } 675 } 676 PetscLogFlops(10*a->nz - 5*b->AIJ->n); 677 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 678 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 679 PetscFunctionReturn(0); 680 } 681 682 #undef __FUNCT__ 683 #define __FUNCT__ "MatMultAdd_SeqMAIJ_5" 684 int MatMultAdd_SeqMAIJ_5(Mat A,Vec xx,Vec yy,Vec zz) 685 { 686 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 687 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 688 PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4, sum5; 689 int ierr,m = b->AIJ->m,*idx,shift = a->indexshift,*ii; 690 int n,i,jrow,j; 691 692 PetscFunctionBegin; 693 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 694 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 695 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 696 x = x + shift; /* shift for Fortran start by 1 indexing */ 697 idx = a->j; 698 v = a->a; 699 ii = a->i; 700 701 v += shift; /* shift for Fortran start by 1 indexing */ 702 idx += shift; 703 for (i=0; i<m; i++) { 704 jrow = ii[i]; 705 n = ii[i+1] - jrow; 706 sum1 = 0.0; 707 sum2 = 0.0; 708 sum3 = 0.0; 709 sum4 = 0.0; 710 sum5 = 0.0; 711 for (j=0; j<n; j++) { 712 sum1 += v[jrow]*x[5*idx[jrow]]; 713 sum2 += v[jrow]*x[5*idx[jrow]+1]; 714 sum3 += v[jrow]*x[5*idx[jrow]+2]; 715 sum4 += v[jrow]*x[5*idx[jrow]+3]; 716 sum5 += v[jrow]*x[5*idx[jrow]+4]; 717 jrow++; 718 } 719 y[5*i] += sum1; 720 y[5*i+1] += sum2; 721 y[5*i+2] += sum3; 722 y[5*i+3] += sum4; 723 y[5*i+4] += sum5; 724 } 725 726 PetscLogFlops(10*a->nz); 727 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 728 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 729 PetscFunctionReturn(0); 730 } 731 732 #undef __FUNCT__ 733 #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_5" 734 int MatMultTransposeAdd_SeqMAIJ_5(Mat A,Vec xx,Vec yy,Vec zz) 735 { 736 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 737 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 738 PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5; 739 int ierr,m = b->AIJ->m,n,i,*idx,shift = a->indexshift; 740 741 PetscFunctionBegin; 742 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 743 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 744 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 745 y = y + shift; /* shift for Fortran start by 1 indexing */ 746 for (i=0; i<m; i++) { 747 idx = a->j + a->i[i] + shift; 748 v = a->a + a->i[i] + shift; 749 n = a->i[i+1] - a->i[i]; 750 alpha1 = x[5*i]; 751 alpha2 = x[5*i+1]; 752 alpha3 = x[5*i+2]; 753 alpha4 = x[5*i+3]; 754 alpha5 = x[5*i+4]; 755 while (n-->0) { 756 y[5*(*idx)] += alpha1*(*v); 757 y[5*(*idx)+1] += alpha2*(*v); 758 y[5*(*idx)+2] += alpha3*(*v); 759 y[5*(*idx)+3] += alpha4*(*v); 760 y[5*(*idx)+4] += alpha5*(*v); 761 idx++; v++; 762 } 763 } 764 PetscLogFlops(10*a->nz); 765 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 766 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 767 PetscFunctionReturn(0); 768 } 769 770 /* ------------------------------------------------------------------------------*/ 771 #undef __FUNCT__ 772 #define __FUNCT__ "MatMult_SeqMAIJ_6" 773 int MatMult_SeqMAIJ_6(Mat A,Vec xx,Vec yy) 774 { 775 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 776 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 777 PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6; 778 int ierr,m = b->AIJ->m,*idx,shift = a->indexshift,*ii; 779 int n,i,jrow,j; 780 781 PetscFunctionBegin; 782 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 783 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 784 x = x + shift; /* shift for Fortran start by 1 indexing */ 785 idx = a->j; 786 v = a->a; 787 ii = a->i; 788 789 v += shift; /* shift for Fortran start by 1 indexing */ 790 idx += shift; 791 for (i=0; i<m; i++) { 792 jrow = ii[i]; 793 n = ii[i+1] - jrow; 794 sum1 = 0.0; 795 sum2 = 0.0; 796 sum3 = 0.0; 797 sum4 = 0.0; 798 sum5 = 0.0; 799 sum6 = 0.0; 800 for (j=0; j<n; j++) { 801 sum1 += v[jrow]*x[6*idx[jrow]]; 802 sum2 += v[jrow]*x[6*idx[jrow]+1]; 803 sum3 += v[jrow]*x[6*idx[jrow]+2]; 804 sum4 += v[jrow]*x[6*idx[jrow]+3]; 805 sum5 += v[jrow]*x[6*idx[jrow]+4]; 806 sum6 += v[jrow]*x[6*idx[jrow]+5]; 807 jrow++; 808 } 809 y[6*i] = sum1; 810 y[6*i+1] = sum2; 811 y[6*i+2] = sum3; 812 y[6*i+3] = sum4; 813 y[6*i+4] = sum5; 814 y[6*i+5] = sum6; 815 } 816 817 PetscLogFlops(12*a->nz - 6*m); 818 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 819 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 820 PetscFunctionReturn(0); 821 } 822 823 #undef __FUNCT__ 824 #define __FUNCT__ "MatMultTranspose_SeqMAIJ_6" 825 int MatMultTranspose_SeqMAIJ_6(Mat A,Vec xx,Vec yy) 826 { 827 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 828 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 829 PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,zero = 0.0; 830 int ierr,m = b->AIJ->m,n,i,*idx,shift = a->indexshift; 831 832 PetscFunctionBegin; 833 ierr = VecSet(&zero,yy);CHKERRQ(ierr); 834 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 835 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 836 y = y + shift; /* shift for Fortran start by 1 indexing */ 837 for (i=0; i<m; i++) { 838 idx = a->j + a->i[i] + shift; 839 v = a->a + a->i[i] + shift; 840 n = a->i[i+1] - a->i[i]; 841 alpha1 = x[6*i]; 842 alpha2 = x[6*i+1]; 843 alpha3 = x[6*i+2]; 844 alpha4 = x[6*i+3]; 845 alpha5 = x[6*i+4]; 846 alpha6 = x[6*i+5]; 847 while (n-->0) { 848 y[6*(*idx)] += alpha1*(*v); 849 y[6*(*idx)+1] += alpha2*(*v); 850 y[6*(*idx)+2] += alpha3*(*v); 851 y[6*(*idx)+3] += alpha4*(*v); 852 y[6*(*idx)+4] += alpha5*(*v); 853 y[6*(*idx)+5] += alpha6*(*v); 854 idx++; v++; 855 } 856 } 857 PetscLogFlops(12*a->nz - 6*b->AIJ->n); 858 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 859 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 860 PetscFunctionReturn(0); 861 } 862 863 #undef __FUNCT__ 864 #define __FUNCT__ "MatMultAdd_SeqMAIJ_6" 865 int MatMultAdd_SeqMAIJ_6(Mat A,Vec xx,Vec yy,Vec zz) 866 { 867 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 868 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 869 PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6; 870 int ierr,m = b->AIJ->m,*idx,shift = a->indexshift,*ii; 871 int n,i,jrow,j; 872 873 PetscFunctionBegin; 874 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 875 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 876 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 877 x = x + shift; /* shift for Fortran start by 1 indexing */ 878 idx = a->j; 879 v = a->a; 880 ii = a->i; 881 882 v += shift; /* shift for Fortran start by 1 indexing */ 883 idx += shift; 884 for (i=0; i<m; i++) { 885 jrow = ii[i]; 886 n = ii[i+1] - jrow; 887 sum1 = 0.0; 888 sum2 = 0.0; 889 sum3 = 0.0; 890 sum4 = 0.0; 891 sum5 = 0.0; 892 sum6 = 0.0; 893 for (j=0; j<n; j++) { 894 sum1 += v[jrow]*x[6*idx[jrow]]; 895 sum2 += v[jrow]*x[6*idx[jrow]+1]; 896 sum3 += v[jrow]*x[6*idx[jrow]+2]; 897 sum4 += v[jrow]*x[6*idx[jrow]+3]; 898 sum5 += v[jrow]*x[6*idx[jrow]+4]; 899 sum6 += v[jrow]*x[6*idx[jrow]+5]; 900 jrow++; 901 } 902 y[6*i] += sum1; 903 y[6*i+1] += sum2; 904 y[6*i+2] += sum3; 905 y[6*i+3] += sum4; 906 y[6*i+4] += sum5; 907 y[6*i+5] += sum6; 908 } 909 910 PetscLogFlops(12*a->nz); 911 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 912 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 913 PetscFunctionReturn(0); 914 } 915 916 #undef __FUNCT__ 917 #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_6" 918 int MatMultTransposeAdd_SeqMAIJ_6(Mat A,Vec xx,Vec yy,Vec zz) 919 { 920 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 921 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 922 PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6; 923 int ierr,m = b->AIJ->m,n,i,*idx,shift = a->indexshift; 924 925 PetscFunctionBegin; 926 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 927 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 928 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 929 y = y + shift; /* shift for Fortran start by 1 indexing */ 930 for (i=0; i<m; i++) { 931 idx = a->j + a->i[i] + shift; 932 v = a->a + a->i[i] + shift; 933 n = a->i[i+1] - a->i[i]; 934 alpha1 = x[6*i]; 935 alpha2 = x[6*i+1]; 936 alpha3 = x[6*i+2]; 937 alpha4 = x[6*i+3]; 938 alpha5 = x[6*i+4]; 939 alpha6 = x[6*i+5]; 940 while (n-->0) { 941 y[6*(*idx)] += alpha1*(*v); 942 y[6*(*idx)+1] += alpha2*(*v); 943 y[6*(*idx)+2] += alpha3*(*v); 944 y[6*(*idx)+3] += alpha4*(*v); 945 y[6*(*idx)+4] += alpha5*(*v); 946 y[6*(*idx)+5] += alpha6*(*v); 947 idx++; v++; 948 } 949 } 950 PetscLogFlops(12*a->nz); 951 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 952 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 953 PetscFunctionReturn(0); 954 } 955 956 /* ------------------------------------------------------------------------------*/ 957 #undef __FUNCT__ 958 #define __FUNCT__ "MatMult_SeqMAIJ_8" 959 int MatMult_SeqMAIJ_8(Mat A,Vec xx,Vec yy) 960 { 961 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 962 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 963 PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8; 964 int ierr,m = b->AIJ->m,*idx,shift = a->indexshift,*ii; 965 int n,i,jrow,j; 966 967 PetscFunctionBegin; 968 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 969 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 970 x = x + shift; /* shift for Fortran start by 1 indexing */ 971 idx = a->j; 972 v = a->a; 973 ii = a->i; 974 975 v += shift; /* shift for Fortran start by 1 indexing */ 976 idx += shift; 977 for (i=0; i<m; i++) { 978 jrow = ii[i]; 979 n = ii[i+1] - jrow; 980 sum1 = 0.0; 981 sum2 = 0.0; 982 sum3 = 0.0; 983 sum4 = 0.0; 984 sum5 = 0.0; 985 sum6 = 0.0; 986 sum7 = 0.0; 987 sum8 = 0.0; 988 for (j=0; j<n; j++) { 989 sum1 += v[jrow]*x[8*idx[jrow]]; 990 sum2 += v[jrow]*x[8*idx[jrow]+1]; 991 sum3 += v[jrow]*x[8*idx[jrow]+2]; 992 sum4 += v[jrow]*x[8*idx[jrow]+3]; 993 sum5 += v[jrow]*x[8*idx[jrow]+4]; 994 sum6 += v[jrow]*x[8*idx[jrow]+5]; 995 sum7 += v[jrow]*x[8*idx[jrow]+6]; 996 sum8 += v[jrow]*x[8*idx[jrow]+7]; 997 jrow++; 998 } 999 y[8*i] = sum1; 1000 y[8*i+1] = sum2; 1001 y[8*i+2] = sum3; 1002 y[8*i+3] = sum4; 1003 y[8*i+4] = sum5; 1004 y[8*i+5] = sum6; 1005 y[8*i+6] = sum7; 1006 y[8*i+7] = sum8; 1007 } 1008 1009 PetscLogFlops(16*a->nz - 8*m); 1010 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1011 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 1012 PetscFunctionReturn(0); 1013 } 1014 1015 #undef __FUNCT__ 1016 #define __FUNCT__ "MatMultTranspose_SeqMAIJ_8" 1017 int MatMultTranspose_SeqMAIJ_8(Mat A,Vec xx,Vec yy) 1018 { 1019 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1020 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1021 PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,zero = 0.0; 1022 int ierr,m = b->AIJ->m,n,i,*idx,shift = a->indexshift; 1023 1024 PetscFunctionBegin; 1025 ierr = VecSet(&zero,yy);CHKERRQ(ierr); 1026 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1027 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 1028 y = y + shift; /* shift for Fortran start by 1 indexing */ 1029 for (i=0; i<m; i++) { 1030 idx = a->j + a->i[i] + shift; 1031 v = a->a + a->i[i] + shift; 1032 n = a->i[i+1] - a->i[i]; 1033 alpha1 = x[8*i]; 1034 alpha2 = x[8*i+1]; 1035 alpha3 = x[8*i+2]; 1036 alpha4 = x[8*i+3]; 1037 alpha5 = x[8*i+4]; 1038 alpha6 = x[8*i+5]; 1039 alpha7 = x[8*i+6]; 1040 alpha8 = x[8*i+7]; 1041 while (n-->0) { 1042 y[8*(*idx)] += alpha1*(*v); 1043 y[8*(*idx)+1] += alpha2*(*v); 1044 y[8*(*idx)+2] += alpha3*(*v); 1045 y[8*(*idx)+3] += alpha4*(*v); 1046 y[8*(*idx)+4] += alpha5*(*v); 1047 y[8*(*idx)+5] += alpha6*(*v); 1048 y[8*(*idx)+6] += alpha7*(*v); 1049 y[8*(*idx)+7] += alpha8*(*v); 1050 idx++; v++; 1051 } 1052 } 1053 PetscLogFlops(16*a->nz - 8*b->AIJ->n); 1054 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1055 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 1056 PetscFunctionReturn(0); 1057 } 1058 1059 #undef __FUNCT__ 1060 #define __FUNCT__ "MatMultAdd_SeqMAIJ_8" 1061 int MatMultAdd_SeqMAIJ_8(Mat A,Vec xx,Vec yy,Vec zz) 1062 { 1063 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1064 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1065 PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8; 1066 int ierr,m = b->AIJ->m,*idx,shift = a->indexshift,*ii; 1067 int n,i,jrow,j; 1068 1069 PetscFunctionBegin; 1070 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 1071 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1072 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 1073 x = x + shift; /* shift for Fortran start by 1 indexing */ 1074 idx = a->j; 1075 v = a->a; 1076 ii = a->i; 1077 1078 v += shift; /* shift for Fortran start by 1 indexing */ 1079 idx += shift; 1080 for (i=0; i<m; i++) { 1081 jrow = ii[i]; 1082 n = ii[i+1] - jrow; 1083 sum1 = 0.0; 1084 sum2 = 0.0; 1085 sum3 = 0.0; 1086 sum4 = 0.0; 1087 sum5 = 0.0; 1088 sum6 = 0.0; 1089 sum7 = 0.0; 1090 sum8 = 0.0; 1091 for (j=0; j<n; j++) { 1092 sum1 += v[jrow]*x[8*idx[jrow]]; 1093 sum2 += v[jrow]*x[8*idx[jrow]+1]; 1094 sum3 += v[jrow]*x[8*idx[jrow]+2]; 1095 sum4 += v[jrow]*x[8*idx[jrow]+3]; 1096 sum5 += v[jrow]*x[8*idx[jrow]+4]; 1097 sum6 += v[jrow]*x[8*idx[jrow]+5]; 1098 sum7 += v[jrow]*x[8*idx[jrow]+6]; 1099 sum8 += v[jrow]*x[8*idx[jrow]+7]; 1100 jrow++; 1101 } 1102 y[8*i] += sum1; 1103 y[8*i+1] += sum2; 1104 y[8*i+2] += sum3; 1105 y[8*i+3] += sum4; 1106 y[8*i+4] += sum5; 1107 y[8*i+5] += sum6; 1108 y[8*i+6] += sum7; 1109 y[8*i+7] += sum8; 1110 } 1111 1112 PetscLogFlops(16*a->nz); 1113 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1114 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 1115 PetscFunctionReturn(0); 1116 } 1117 1118 #undef __FUNCT__ 1119 #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_8" 1120 int MatMultTransposeAdd_SeqMAIJ_8(Mat A,Vec xx,Vec yy,Vec zz) 1121 { 1122 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1123 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1124 PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8; 1125 int ierr,m = b->AIJ->m,n,i,*idx,shift = a->indexshift; 1126 1127 PetscFunctionBegin; 1128 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 1129 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1130 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 1131 y = y + shift; /* shift for Fortran start by 1 indexing */ 1132 for (i=0; i<m; i++) { 1133 idx = a->j + a->i[i] + shift; 1134 v = a->a + a->i[i] + shift; 1135 n = a->i[i+1] - a->i[i]; 1136 alpha1 = x[8*i]; 1137 alpha2 = x[8*i+1]; 1138 alpha3 = x[8*i+2]; 1139 alpha4 = x[8*i+3]; 1140 alpha5 = x[8*i+4]; 1141 alpha6 = x[8*i+5]; 1142 alpha7 = x[8*i+6]; 1143 alpha8 = x[8*i+7]; 1144 while (n-->0) { 1145 y[8*(*idx)] += alpha1*(*v); 1146 y[8*(*idx)+1] += alpha2*(*v); 1147 y[8*(*idx)+2] += alpha3*(*v); 1148 y[8*(*idx)+3] += alpha4*(*v); 1149 y[8*(*idx)+4] += alpha5*(*v); 1150 y[8*(*idx)+5] += alpha6*(*v); 1151 y[8*(*idx)+6] += alpha7*(*v); 1152 y[8*(*idx)+7] += alpha8*(*v); 1153 idx++; v++; 1154 } 1155 } 1156 PetscLogFlops(16*a->nz); 1157 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1158 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 1159 PetscFunctionReturn(0); 1160 } 1161 1162 /*===================================================================================*/ 1163 #undef __FUNCT__ 1164 #define __FUNCT__ "MatMult_MPIMAIJ_dof" 1165 int MatMult_MPIMAIJ_dof(Mat A,Vec xx,Vec yy) 1166 { 1167 Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data; 1168 int ierr; 1169 PetscFunctionBegin; 1170 1171 /* start the scatter */ 1172 ierr = VecScatterBegin(xx,b->w,INSERT_VALUES,SCATTER_FORWARD,b->ctx);CHKERRQ(ierr); 1173 ierr = (*b->AIJ->ops->mult)(b->AIJ,xx,yy);CHKERRQ(ierr); 1174 ierr = VecScatterEnd(xx,b->w,INSERT_VALUES,SCATTER_FORWARD,b->ctx);CHKERRQ(ierr); 1175 ierr = (*b->OAIJ->ops->multadd)(b->OAIJ,b->w,yy,yy);CHKERRQ(ierr); 1176 PetscFunctionReturn(0); 1177 } 1178 1179 #undef __FUNCT__ 1180 #define __FUNCT__ "MatMultTranspose_MPIMAIJ_dof" 1181 int MatMultTranspose_MPIMAIJ_dof(Mat A,Vec xx,Vec yy) 1182 { 1183 Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data; 1184 int ierr; 1185 PetscFunctionBegin; 1186 ierr = (*b->OAIJ->ops->multtranspose)(b->OAIJ,xx,b->w);CHKERRQ(ierr); 1187 ierr = VecScatterBegin(b->w,yy,ADD_VALUES,SCATTER_REVERSE,b->ctx);CHKERRQ(ierr); 1188 ierr = (*b->AIJ->ops->multtranspose)(b->AIJ,xx,yy);CHKERRQ(ierr); 1189 ierr = VecScatterEnd(b->w,yy,ADD_VALUES,SCATTER_REVERSE,b->ctx);CHKERRQ(ierr); 1190 PetscFunctionReturn(0); 1191 } 1192 1193 #undef __FUNCT__ 1194 #define __FUNCT__ "MatMultAdd_MPIMAIJ_dof" 1195 int MatMultAdd_MPIMAIJ_dof(Mat A,Vec xx,Vec yy,Vec zz) 1196 { 1197 Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data; 1198 int ierr; 1199 PetscFunctionBegin; 1200 1201 /* start the scatter */ 1202 ierr = VecScatterBegin(xx,b->w,INSERT_VALUES,SCATTER_FORWARD,b->ctx);CHKERRQ(ierr); 1203 ierr = (*b->AIJ->ops->multadd)(b->AIJ,xx,yy,zz);CHKERRQ(ierr); 1204 ierr = VecScatterEnd(xx,b->w,INSERT_VALUES,SCATTER_FORWARD,b->ctx);CHKERRQ(ierr); 1205 ierr = (*b->OAIJ->ops->multadd)(b->OAIJ,b->w,yy,zz);CHKERRQ(ierr); 1206 PetscFunctionReturn(0); 1207 } 1208 1209 #undef __FUNCT__ 1210 #define __FUNCT__ "MatMultTransposeAdd_MPIMAIJ_dof" 1211 int MatMultTransposeAdd_MPIMAIJ_dof(Mat A,Vec xx,Vec yy,Vec zz) 1212 { 1213 Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data; 1214 int ierr; 1215 PetscFunctionBegin; 1216 ierr = (*b->OAIJ->ops->multtranspose)(b->OAIJ,xx,b->w);CHKERRQ(ierr); 1217 ierr = VecScatterBegin(b->w,zz,ADD_VALUES,SCATTER_REVERSE,b->ctx);CHKERRQ(ierr); 1218 ierr = (*b->AIJ->ops->multtransposeadd)(b->AIJ,xx,yy,zz);CHKERRQ(ierr); 1219 ierr = VecScatterEnd(b->w,zz,ADD_VALUES,SCATTER_REVERSE,b->ctx);CHKERRQ(ierr); 1220 PetscFunctionReturn(0); 1221 } 1222 1223 /* ---------------------------------------------------------------------------------- */ 1224 #undef __FUNCT__ 1225 #define __FUNCT__ "MatCreateMAIJ" 1226 int MatCreateMAIJ(Mat A,int dof,Mat *maij) 1227 { 1228 int ierr,size,n; 1229 Mat_MPIMAIJ *b; 1230 Mat B; 1231 1232 PetscFunctionBegin; 1233 ierr = PetscObjectReference((PetscObject)A);CHKERRQ(ierr); 1234 1235 if (dof == 1) { 1236 *maij = A; 1237 } else { 1238 ierr = MatCreate(A->comm,dof*A->m,dof*A->n,dof*A->M,dof*A->N,&B);CHKERRQ(ierr); 1239 B->assembled = PETSC_TRUE; 1240 1241 ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr); 1242 if (size == 1) { 1243 ierr = MatSetType(B,MATSEQMAIJ);CHKERRQ(ierr); 1244 B->ops->destroy = MatDestroy_SeqMAIJ; 1245 b = (Mat_MPIMAIJ*)B->data; 1246 b->dof = dof; 1247 b->AIJ = A; 1248 if (dof == 2) { 1249 B->ops->mult = MatMult_SeqMAIJ_2; 1250 B->ops->multadd = MatMultAdd_SeqMAIJ_2; 1251 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_2; 1252 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_2; 1253 } else if (dof == 3) { 1254 B->ops->mult = MatMult_SeqMAIJ_3; 1255 B->ops->multadd = MatMultAdd_SeqMAIJ_3; 1256 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_3; 1257 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_3; 1258 } else if (dof == 4) { 1259 B->ops->mult = MatMult_SeqMAIJ_4; 1260 B->ops->multadd = MatMultAdd_SeqMAIJ_4; 1261 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_4; 1262 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_4; 1263 } else if (dof == 5) { 1264 B->ops->mult = MatMult_SeqMAIJ_5; 1265 B->ops->multadd = MatMultAdd_SeqMAIJ_5; 1266 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_5; 1267 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_5; 1268 } else if (dof == 6) { 1269 B->ops->mult = MatMult_SeqMAIJ_6; 1270 B->ops->multadd = MatMultAdd_SeqMAIJ_6; 1271 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_6; 1272 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_6; 1273 } else if (dof == 8) { 1274 B->ops->mult = MatMult_SeqMAIJ_8; 1275 B->ops->multadd = MatMultAdd_SeqMAIJ_8; 1276 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_8; 1277 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_8; 1278 } else { 1279 SETERRQ1(1,"Cannot handle a dof of %d. Send request for code to petsc-maint@mcs.anl.gov\n",dof); 1280 } 1281 } else { 1282 Mat_MPIAIJ *mpiaij = (Mat_MPIAIJ *)A->data; 1283 IS from,to; 1284 Vec gvec; 1285 int *garray,i; 1286 1287 ierr = MatSetType(B,MATMPIMAIJ);CHKERRQ(ierr); 1288 B->ops->destroy = MatDestroy_MPIMAIJ; 1289 b = (Mat_MPIMAIJ*)B->data; 1290 b->dof = dof; 1291 b->A = A; 1292 ierr = MatCreateMAIJ(mpiaij->A,dof,&b->AIJ);CHKERRQ(ierr); 1293 ierr = MatCreateMAIJ(mpiaij->B,dof,&b->OAIJ);CHKERRQ(ierr); 1294 1295 ierr = VecGetSize(mpiaij->lvec,&n);CHKERRQ(ierr); 1296 ierr = VecCreateSeq(PETSC_COMM_SELF,n*dof,&b->w);CHKERRQ(ierr); 1297 1298 /* create two temporary Index sets for build scatter gather */ 1299 ierr = PetscMalloc((n+1)*sizeof(int),&garray);CHKERRQ(ierr); 1300 for (i=0; i<n; i++) garray[i] = dof*mpiaij->garray[i]; 1301 ierr = ISCreateBlock(A->comm,dof,n,garray,&from);CHKERRQ(ierr); 1302 ierr = PetscFree(garray);CHKERRQ(ierr); 1303 ierr = ISCreateStride(PETSC_COMM_SELF,n*dof,0,1,&to);CHKERRQ(ierr); 1304 1305 /* create temporary global vector to generate scatter context */ 1306 ierr = VecCreateMPI(A->comm,dof*A->n,dof*A->N,&gvec);CHKERRQ(ierr); 1307 1308 /* generate the scatter context */ 1309 ierr = VecScatterCreate(gvec,from,b->w,to,&b->ctx);CHKERRQ(ierr); 1310 1311 ierr = ISDestroy(from);CHKERRQ(ierr); 1312 ierr = ISDestroy(to);CHKERRQ(ierr); 1313 ierr = VecDestroy(gvec);CHKERRQ(ierr); 1314 1315 B->ops->mult = MatMult_MPIMAIJ_dof; 1316 B->ops->multtranspose = MatMultTranspose_MPIMAIJ_dof; 1317 B->ops->multadd = MatMultAdd_MPIMAIJ_dof; 1318 B->ops->multtransposeadd = MatMultTransposeAdd_MPIMAIJ_dof; 1319 } 1320 *maij = B; 1321 } 1322 PetscFunctionReturn(0); 1323 } 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336