1 /*$Id: maij.c,v 1.19 2001/08/07 03:03:00 balay Exp $*/ 2 /* 3 Defines the basic matrix operations for the MAIJ matrix storage format. 4 This format is used for restriction and interpolation operations for 5 multicomponent problems. It interpolates each component the same way 6 independently. 7 8 We provide: 9 MatMult() 10 MatMultTranspose() 11 MatMultTransposeAdd() 12 MatMultAdd() 13 and 14 MatCreateMAIJ(Mat,dof,Mat*) 15 16 This single directory handles both the sequential and parallel codes 17 */ 18 19 #include "src/mat/impls/maij/maij.h" 20 21 #undef __FUNCT__ 22 #define __FUNCT__ "MatMAIJGetAIJ" 23 int MatMAIJGetAIJ(Mat A,Mat *B) 24 { 25 int ierr; 26 PetscTruth ismpimaij,isseqmaij; 27 28 PetscFunctionBegin; 29 ierr = PetscTypeCompare((PetscObject)A,MATMPIMAIJ,&ismpimaij);CHKERRQ(ierr); 30 ierr = PetscTypeCompare((PetscObject)A,MATSEQMAIJ,&isseqmaij);CHKERRQ(ierr); 31 if (ismpimaij) { 32 Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data; 33 34 *B = b->A; 35 } else if (isseqmaij) { 36 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 37 38 *B = b->AIJ; 39 } else { 40 *B = A; 41 } 42 PetscFunctionReturn(0); 43 } 44 45 #undef __FUNCT__ 46 #define __FUNCT__ "MatMAIJRedimension" 47 int MatMAIJRedimension(Mat A,int dof,Mat *B) 48 { 49 int ierr; 50 Mat Aij; 51 52 PetscFunctionBegin; 53 ierr = MatMAIJGetAIJ(A,&Aij);CHKERRQ(ierr); 54 ierr = MatCreateMAIJ(Aij,dof,B);CHKERRQ(ierr); 55 PetscFunctionReturn(0); 56 } 57 58 #undef __FUNCT__ 59 #define __FUNCT__ "MatDestroy_SeqMAIJ" 60 int MatDestroy_SeqMAIJ(Mat A) 61 { 62 int ierr; 63 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 64 65 PetscFunctionBegin; 66 if (b->AIJ) { 67 ierr = MatDestroy(b->AIJ);CHKERRQ(ierr); 68 } 69 ierr = PetscFree(b);CHKERRQ(ierr); 70 PetscFunctionReturn(0); 71 } 72 73 #undef __FUNCT__ 74 #define __FUNCT__ "MatDestroy_MPIMAIJ" 75 int MatDestroy_MPIMAIJ(Mat A) 76 { 77 int ierr; 78 Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data; 79 80 PetscFunctionBegin; 81 if (b->AIJ) { 82 ierr = MatDestroy(b->AIJ);CHKERRQ(ierr); 83 } 84 if (b->OAIJ) { 85 ierr = MatDestroy(b->OAIJ);CHKERRQ(ierr); 86 } 87 if (b->A) { 88 ierr = MatDestroy(b->A);CHKERRQ(ierr); 89 } 90 if (b->ctx) { 91 ierr = VecScatterDestroy(b->ctx);CHKERRQ(ierr); 92 } 93 if (b->w) { 94 ierr = VecDestroy(b->w);CHKERRQ(ierr); 95 } 96 ierr = PetscFree(b);CHKERRQ(ierr); 97 PetscFunctionReturn(0); 98 } 99 100 EXTERN_C_BEGIN 101 #undef __FUNCT__ 102 #define __FUNCT__ "MatCreate_MAIJ" 103 int MatCreate_MAIJ(Mat A) 104 { 105 int ierr; 106 Mat_MPIMAIJ *b; 107 108 PetscFunctionBegin; 109 ierr = PetscNew(Mat_MPIMAIJ,&b);CHKERRQ(ierr); 110 A->data = (void*)b; 111 ierr = PetscMemzero(b,sizeof(Mat_MPIMAIJ));CHKERRQ(ierr); 112 ierr = PetscMemzero(A->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 113 A->factor = 0; 114 A->mapping = 0; 115 116 b->AIJ = 0; 117 b->dof = 0; 118 b->OAIJ = 0; 119 b->ctx = 0; 120 b->w = 0; 121 PetscFunctionReturn(0); 122 } 123 EXTERN_C_END 124 125 /* --------------------------------------------------------------------------------------*/ 126 #undef __FUNCT__ 127 #define __FUNCT__ "MatMult_SeqMAIJ_2" 128 int MatMult_SeqMAIJ_2(Mat A,Vec xx,Vec yy) 129 { 130 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 131 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 132 PetscScalar *x,*y,*v,sum1, sum2; 133 int ierr,m = b->AIJ->m,*idx,shift = a->indexshift,*ii; 134 int n,i,jrow,j; 135 136 PetscFunctionBegin; 137 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 138 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 139 x = x + shift; /* shift for Fortran start by 1 indexing */ 140 idx = a->j; 141 v = a->a; 142 ii = a->i; 143 144 v += shift; /* shift for Fortran start by 1 indexing */ 145 idx += shift; 146 for (i=0; i<m; i++) { 147 jrow = ii[i]; 148 n = ii[i+1] - jrow; 149 sum1 = 0.0; 150 sum2 = 0.0; 151 for (j=0; j<n; j++) { 152 sum1 += v[jrow]*x[2*idx[jrow]]; 153 sum2 += v[jrow]*x[2*idx[jrow]+1]; 154 jrow++; 155 } 156 y[2*i] = sum1; 157 y[2*i+1] = sum2; 158 } 159 160 PetscLogFlops(4*a->nz - 2*m); 161 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 162 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 163 PetscFunctionReturn(0); 164 } 165 166 #undef __FUNCT__ 167 #define __FUNCT__ "MatMultTranspose_SeqMAIJ_2" 168 int MatMultTranspose_SeqMAIJ_2(Mat A,Vec xx,Vec yy) 169 { 170 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 171 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 172 PetscScalar *x,*y,*v,alpha1,alpha2,zero = 0.0; 173 int ierr,m = b->AIJ->m,n,i,*idx,shift = a->indexshift; 174 175 PetscFunctionBegin; 176 ierr = VecSet(&zero,yy);CHKERRQ(ierr); 177 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 178 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 179 y = y + shift; /* shift for Fortran start by 1 indexing */ 180 for (i=0; i<m; i++) { 181 idx = a->j + a->i[i] + shift; 182 v = a->a + a->i[i] + shift; 183 n = a->i[i+1] - a->i[i]; 184 alpha1 = x[2*i]; 185 alpha2 = x[2*i+1]; 186 while (n-->0) {y[2*(*idx)] += alpha1*(*v); y[2*(*idx)+1] += alpha2*(*v); idx++; v++;} 187 } 188 PetscLogFlops(4*a->nz - 2*b->AIJ->n); 189 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 190 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 191 PetscFunctionReturn(0); 192 } 193 194 #undef __FUNCT__ 195 #define __FUNCT__ "MatMultAdd_SeqMAIJ_2" 196 int MatMultAdd_SeqMAIJ_2(Mat A,Vec xx,Vec yy,Vec zz) 197 { 198 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 199 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 200 PetscScalar *x,*y,*v,sum1, sum2; 201 int ierr,m = b->AIJ->m,*idx,shift = a->indexshift,*ii; 202 int n,i,jrow,j; 203 204 PetscFunctionBegin; 205 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 206 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 207 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 208 x = x + shift; /* shift for Fortran start by 1 indexing */ 209 idx = a->j; 210 v = a->a; 211 ii = a->i; 212 213 v += shift; /* shift for Fortran start by 1 indexing */ 214 idx += shift; 215 for (i=0; i<m; i++) { 216 jrow = ii[i]; 217 n = ii[i+1] - jrow; 218 sum1 = 0.0; 219 sum2 = 0.0; 220 for (j=0; j<n; j++) { 221 sum1 += v[jrow]*x[2*idx[jrow]]; 222 sum2 += v[jrow]*x[2*idx[jrow]+1]; 223 jrow++; 224 } 225 y[2*i] += sum1; 226 y[2*i+1] += sum2; 227 } 228 229 PetscLogFlops(4*a->nz - 2*m); 230 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 231 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 232 PetscFunctionReturn(0); 233 } 234 #undef __FUNCT__ 235 #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_2" 236 int MatMultTransposeAdd_SeqMAIJ_2(Mat A,Vec xx,Vec yy,Vec zz) 237 { 238 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 239 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 240 PetscScalar *x,*y,*v,alpha1,alpha2; 241 int ierr,m = b->AIJ->m,n,i,*idx,shift = a->indexshift; 242 243 PetscFunctionBegin; 244 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 245 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 246 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 247 y = y + shift; /* shift for Fortran start by 1 indexing */ 248 for (i=0; i<m; i++) { 249 idx = a->j + a->i[i] + shift; 250 v = a->a + a->i[i] + shift; 251 n = a->i[i+1] - a->i[i]; 252 alpha1 = x[2*i]; 253 alpha2 = x[2*i+1]; 254 while (n-->0) {y[2*(*idx)] += alpha1*(*v); y[2*(*idx)+1] += alpha2*(*v); idx++; v++;} 255 } 256 PetscLogFlops(4*a->nz - 2*b->AIJ->n); 257 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 258 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 259 PetscFunctionReturn(0); 260 } 261 /* --------------------------------------------------------------------------------------*/ 262 #undef __FUNCT__ 263 #define __FUNCT__ "MatMult_SeqMAIJ_3" 264 int MatMult_SeqMAIJ_3(Mat A,Vec xx,Vec yy) 265 { 266 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 267 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 268 PetscScalar *x,*y,*v,sum1, sum2, sum3; 269 int ierr,m = b->AIJ->m,*idx,shift = a->indexshift,*ii; 270 int n,i,jrow,j; 271 272 PetscFunctionBegin; 273 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 274 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 275 x = x + shift; /* shift for Fortran start by 1 indexing */ 276 idx = a->j; 277 v = a->a; 278 ii = a->i; 279 280 v += shift; /* shift for Fortran start by 1 indexing */ 281 idx += shift; 282 for (i=0; i<m; i++) { 283 jrow = ii[i]; 284 n = ii[i+1] - jrow; 285 sum1 = 0.0; 286 sum2 = 0.0; 287 sum3 = 0.0; 288 for (j=0; j<n; j++) { 289 sum1 += v[jrow]*x[3*idx[jrow]]; 290 sum2 += v[jrow]*x[3*idx[jrow]+1]; 291 sum3 += v[jrow]*x[3*idx[jrow]+2]; 292 jrow++; 293 } 294 y[3*i] = sum1; 295 y[3*i+1] = sum2; 296 y[3*i+2] = sum3; 297 } 298 299 PetscLogFlops(6*a->nz - 3*m); 300 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 301 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 302 PetscFunctionReturn(0); 303 } 304 305 #undef __FUNCT__ 306 #define __FUNCT__ "MatMultTranspose_SeqMAIJ_3" 307 int MatMultTranspose_SeqMAIJ_3(Mat A,Vec xx,Vec yy) 308 { 309 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 310 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 311 PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,zero = 0.0; 312 int ierr,m = b->AIJ->m,n,i,*idx,shift = a->indexshift; 313 314 PetscFunctionBegin; 315 ierr = VecSet(&zero,yy);CHKERRQ(ierr); 316 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 317 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 318 y = y + shift; /* shift for Fortran start by 1 indexing */ 319 for (i=0; i<m; i++) { 320 idx = a->j + a->i[i] + shift; 321 v = a->a + a->i[i] + shift; 322 n = a->i[i+1] - a->i[i]; 323 alpha1 = x[3*i]; 324 alpha2 = x[3*i+1]; 325 alpha3 = x[3*i+2]; 326 while (n-->0) { 327 y[3*(*idx)] += alpha1*(*v); 328 y[3*(*idx)+1] += alpha2*(*v); 329 y[3*(*idx)+2] += alpha3*(*v); 330 idx++; v++; 331 } 332 } 333 PetscLogFlops(6*a->nz - 3*b->AIJ->n); 334 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 335 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 336 PetscFunctionReturn(0); 337 } 338 339 #undef __FUNCT__ 340 #define __FUNCT__ "MatMultAdd_SeqMAIJ_3" 341 int MatMultAdd_SeqMAIJ_3(Mat A,Vec xx,Vec yy,Vec zz) 342 { 343 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 344 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 345 PetscScalar *x,*y,*v,sum1, sum2, sum3; 346 int ierr,m = b->AIJ->m,*idx,shift = a->indexshift,*ii; 347 int n,i,jrow,j; 348 349 PetscFunctionBegin; 350 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 351 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 352 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 353 x = x + shift; /* shift for Fortran start by 1 indexing */ 354 idx = a->j; 355 v = a->a; 356 ii = a->i; 357 358 v += shift; /* shift for Fortran start by 1 indexing */ 359 idx += shift; 360 for (i=0; i<m; i++) { 361 jrow = ii[i]; 362 n = ii[i+1] - jrow; 363 sum1 = 0.0; 364 sum2 = 0.0; 365 sum3 = 0.0; 366 for (j=0; j<n; j++) { 367 sum1 += v[jrow]*x[3*idx[jrow]]; 368 sum2 += v[jrow]*x[3*idx[jrow]+1]; 369 sum3 += v[jrow]*x[3*idx[jrow]+2]; 370 jrow++; 371 } 372 y[3*i] += sum1; 373 y[3*i+1] += sum2; 374 y[3*i+2] += sum3; 375 } 376 377 PetscLogFlops(6*a->nz); 378 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 379 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 380 PetscFunctionReturn(0); 381 } 382 #undef __FUNCT__ 383 #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_3" 384 int MatMultTransposeAdd_SeqMAIJ_3(Mat A,Vec xx,Vec yy,Vec zz) 385 { 386 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 387 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 388 PetscScalar *x,*y,*v,alpha1,alpha2,alpha3; 389 int ierr,m = b->AIJ->m,n,i,*idx,shift = a->indexshift; 390 391 PetscFunctionBegin; 392 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 393 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 394 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 395 y = y + shift; /* shift for Fortran start by 1 indexing */ 396 for (i=0; i<m; i++) { 397 idx = a->j + a->i[i] + shift; 398 v = a->a + a->i[i] + shift; 399 n = a->i[i+1] - a->i[i]; 400 alpha1 = x[3*i]; 401 alpha2 = x[3*i+1]; 402 alpha3 = x[3*i+2]; 403 while (n-->0) { 404 y[3*(*idx)] += alpha1*(*v); 405 y[3*(*idx)+1] += alpha2*(*v); 406 y[3*(*idx)+2] += alpha3*(*v); 407 idx++; v++; 408 } 409 } 410 PetscLogFlops(6*a->nz); 411 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 412 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 413 PetscFunctionReturn(0); 414 } 415 416 /* ------------------------------------------------------------------------------*/ 417 #undef __FUNCT__ 418 #define __FUNCT__ "MatMult_SeqMAIJ_4" 419 int MatMult_SeqMAIJ_4(Mat A,Vec xx,Vec yy) 420 { 421 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 422 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 423 PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4; 424 int ierr,m = b->AIJ->m,*idx,shift = a->indexshift,*ii; 425 int n,i,jrow,j; 426 427 PetscFunctionBegin; 428 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 429 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 430 x = x + shift; /* shift for Fortran start by 1 indexing */ 431 idx = a->j; 432 v = a->a; 433 ii = a->i; 434 435 v += shift; /* shift for Fortran start by 1 indexing */ 436 idx += shift; 437 for (i=0; i<m; i++) { 438 jrow = ii[i]; 439 n = ii[i+1] - jrow; 440 sum1 = 0.0; 441 sum2 = 0.0; 442 sum3 = 0.0; 443 sum4 = 0.0; 444 for (j=0; j<n; j++) { 445 sum1 += v[jrow]*x[4*idx[jrow]]; 446 sum2 += v[jrow]*x[4*idx[jrow]+1]; 447 sum3 += v[jrow]*x[4*idx[jrow]+2]; 448 sum4 += v[jrow]*x[4*idx[jrow]+3]; 449 jrow++; 450 } 451 y[4*i] = sum1; 452 y[4*i+1] = sum2; 453 y[4*i+2] = sum3; 454 y[4*i+3] = sum4; 455 } 456 457 PetscLogFlops(8*a->nz - 4*m); 458 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 459 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 460 PetscFunctionReturn(0); 461 } 462 463 #undef __FUNCT__ 464 #define __FUNCT__ "MatMultTranspose_SeqMAIJ_4" 465 int MatMultTranspose_SeqMAIJ_4(Mat A,Vec xx,Vec yy) 466 { 467 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 468 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 469 PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4,zero = 0.0; 470 int ierr,m = b->AIJ->m,n,i,*idx,shift = a->indexshift; 471 472 PetscFunctionBegin; 473 ierr = VecSet(&zero,yy);CHKERRQ(ierr); 474 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 475 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 476 y = y + shift; /* shift for Fortran start by 1 indexing */ 477 for (i=0; i<m; i++) { 478 idx = a->j + a->i[i] + shift; 479 v = a->a + a->i[i] + shift; 480 n = a->i[i+1] - a->i[i]; 481 alpha1 = x[4*i]; 482 alpha2 = x[4*i+1]; 483 alpha3 = x[4*i+2]; 484 alpha4 = x[4*i+3]; 485 while (n-->0) { 486 y[4*(*idx)] += alpha1*(*v); 487 y[4*(*idx)+1] += alpha2*(*v); 488 y[4*(*idx)+2] += alpha3*(*v); 489 y[4*(*idx)+3] += alpha4*(*v); 490 idx++; v++; 491 } 492 } 493 PetscLogFlops(8*a->nz - 4*b->AIJ->n); 494 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 495 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 496 PetscFunctionReturn(0); 497 } 498 499 #undef __FUNCT__ 500 #define __FUNCT__ "MatMultAdd_SeqMAIJ_4" 501 int MatMultAdd_SeqMAIJ_4(Mat A,Vec xx,Vec yy,Vec zz) 502 { 503 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 504 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 505 PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4; 506 int ierr,m = b->AIJ->m,*idx,shift = a->indexshift,*ii; 507 int n,i,jrow,j; 508 509 PetscFunctionBegin; 510 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 511 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 512 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 513 x = x + shift; /* shift for Fortran start by 1 indexing */ 514 idx = a->j; 515 v = a->a; 516 ii = a->i; 517 518 v += shift; /* shift for Fortran start by 1 indexing */ 519 idx += shift; 520 for (i=0; i<m; i++) { 521 jrow = ii[i]; 522 n = ii[i+1] - jrow; 523 sum1 = 0.0; 524 sum2 = 0.0; 525 sum3 = 0.0; 526 sum4 = 0.0; 527 for (j=0; j<n; j++) { 528 sum1 += v[jrow]*x[4*idx[jrow]]; 529 sum2 += v[jrow]*x[4*idx[jrow]+1]; 530 sum3 += v[jrow]*x[4*idx[jrow]+2]; 531 sum4 += v[jrow]*x[4*idx[jrow]+3]; 532 jrow++; 533 } 534 y[4*i] += sum1; 535 y[4*i+1] += sum2; 536 y[4*i+2] += sum3; 537 y[4*i+3] += sum4; 538 } 539 540 PetscLogFlops(8*a->nz - 4*m); 541 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 542 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 543 PetscFunctionReturn(0); 544 } 545 #undef __FUNCT__ 546 #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_4" 547 int MatMultTransposeAdd_SeqMAIJ_4(Mat A,Vec xx,Vec yy,Vec zz) 548 { 549 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 550 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 551 PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4; 552 int ierr,m = b->AIJ->m,n,i,*idx,shift = a->indexshift; 553 554 PetscFunctionBegin; 555 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 556 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 557 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 558 y = y + shift; /* shift for Fortran start by 1 indexing */ 559 for (i=0; i<m; i++) { 560 idx = a->j + a->i[i] + shift; 561 v = a->a + a->i[i] + shift; 562 n = a->i[i+1] - a->i[i]; 563 alpha1 = x[4*i]; 564 alpha2 = x[4*i+1]; 565 alpha3 = x[4*i+2]; 566 alpha4 = x[4*i+3]; 567 while (n-->0) { 568 y[4*(*idx)] += alpha1*(*v); 569 y[4*(*idx)+1] += alpha2*(*v); 570 y[4*(*idx)+2] += alpha3*(*v); 571 y[4*(*idx)+3] += alpha4*(*v); 572 idx++; v++; 573 } 574 } 575 PetscLogFlops(8*a->nz - 4*b->AIJ->n); 576 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 577 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 578 PetscFunctionReturn(0); 579 } 580 /* ------------------------------------------------------------------------------*/ 581 582 #undef __FUNCT__ 583 #define __FUNCT__ "MatMult_SeqMAIJ_5" 584 int MatMult_SeqMAIJ_5(Mat A,Vec xx,Vec yy) 585 { 586 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 587 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 588 PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4, sum5; 589 int ierr,m = b->AIJ->m,*idx,shift = a->indexshift,*ii; 590 int n,i,jrow,j; 591 592 PetscFunctionBegin; 593 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 594 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 595 x = x + shift; /* shift for Fortran start by 1 indexing */ 596 idx = a->j; 597 v = a->a; 598 ii = a->i; 599 600 v += shift; /* shift for Fortran start by 1 indexing */ 601 idx += shift; 602 for (i=0; i<m; i++) { 603 jrow = ii[i]; 604 n = ii[i+1] - jrow; 605 sum1 = 0.0; 606 sum2 = 0.0; 607 sum3 = 0.0; 608 sum4 = 0.0; 609 sum5 = 0.0; 610 for (j=0; j<n; j++) { 611 sum1 += v[jrow]*x[5*idx[jrow]]; 612 sum2 += v[jrow]*x[5*idx[jrow]+1]; 613 sum3 += v[jrow]*x[5*idx[jrow]+2]; 614 sum4 += v[jrow]*x[5*idx[jrow]+3]; 615 sum5 += v[jrow]*x[5*idx[jrow]+4]; 616 jrow++; 617 } 618 y[5*i] = sum1; 619 y[5*i+1] = sum2; 620 y[5*i+2] = sum3; 621 y[5*i+3] = sum4; 622 y[5*i+4] = sum5; 623 } 624 625 PetscLogFlops(10*a->nz - 5*m); 626 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 627 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 628 PetscFunctionReturn(0); 629 } 630 631 #undef __FUNCT__ 632 #define __FUNCT__ "MatMultTranspose_SeqMAIJ_5" 633 int MatMultTranspose_SeqMAIJ_5(Mat A,Vec xx,Vec yy) 634 { 635 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 636 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 637 PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,zero = 0.0; 638 int ierr,m = b->AIJ->m,n,i,*idx,shift = a->indexshift; 639 640 PetscFunctionBegin; 641 ierr = VecSet(&zero,yy);CHKERRQ(ierr); 642 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 643 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 644 y = y + shift; /* shift for Fortran start by 1 indexing */ 645 for (i=0; i<m; i++) { 646 idx = a->j + a->i[i] + shift; 647 v = a->a + a->i[i] + shift; 648 n = a->i[i+1] - a->i[i]; 649 alpha1 = x[5*i]; 650 alpha2 = x[5*i+1]; 651 alpha3 = x[5*i+2]; 652 alpha4 = x[5*i+3]; 653 alpha5 = x[5*i+4]; 654 while (n-->0) { 655 y[5*(*idx)] += alpha1*(*v); 656 y[5*(*idx)+1] += alpha2*(*v); 657 y[5*(*idx)+2] += alpha3*(*v); 658 y[5*(*idx)+3] += alpha4*(*v); 659 y[5*(*idx)+4] += alpha5*(*v); 660 idx++; v++; 661 } 662 } 663 PetscLogFlops(10*a->nz - 5*b->AIJ->n); 664 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 665 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 666 PetscFunctionReturn(0); 667 } 668 669 #undef __FUNCT__ 670 #define __FUNCT__ "MatMultAdd_SeqMAIJ_5" 671 int MatMultAdd_SeqMAIJ_5(Mat A,Vec xx,Vec yy,Vec zz) 672 { 673 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 674 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 675 PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4, sum5; 676 int ierr,m = b->AIJ->m,*idx,shift = a->indexshift,*ii; 677 int n,i,jrow,j; 678 679 PetscFunctionBegin; 680 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 681 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 682 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 683 x = x + shift; /* shift for Fortran start by 1 indexing */ 684 idx = a->j; 685 v = a->a; 686 ii = a->i; 687 688 v += shift; /* shift for Fortran start by 1 indexing */ 689 idx += shift; 690 for (i=0; i<m; i++) { 691 jrow = ii[i]; 692 n = ii[i+1] - jrow; 693 sum1 = 0.0; 694 sum2 = 0.0; 695 sum3 = 0.0; 696 sum4 = 0.0; 697 sum5 = 0.0; 698 for (j=0; j<n; j++) { 699 sum1 += v[jrow]*x[5*idx[jrow]]; 700 sum2 += v[jrow]*x[5*idx[jrow]+1]; 701 sum3 += v[jrow]*x[5*idx[jrow]+2]; 702 sum4 += v[jrow]*x[5*idx[jrow]+3]; 703 sum5 += v[jrow]*x[5*idx[jrow]+4]; 704 jrow++; 705 } 706 y[5*i] += sum1; 707 y[5*i+1] += sum2; 708 y[5*i+2] += sum3; 709 y[5*i+3] += sum4; 710 y[5*i+4] += sum5; 711 } 712 713 PetscLogFlops(10*a->nz); 714 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 715 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 716 PetscFunctionReturn(0); 717 } 718 719 #undef __FUNCT__ 720 #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_5" 721 int MatMultTransposeAdd_SeqMAIJ_5(Mat A,Vec xx,Vec yy,Vec zz) 722 { 723 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 724 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 725 PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5; 726 int ierr,m = b->AIJ->m,n,i,*idx,shift = a->indexshift; 727 728 PetscFunctionBegin; 729 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 730 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 731 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 732 y = y + shift; /* shift for Fortran start by 1 indexing */ 733 for (i=0; i<m; i++) { 734 idx = a->j + a->i[i] + shift; 735 v = a->a + a->i[i] + shift; 736 n = a->i[i+1] - a->i[i]; 737 alpha1 = x[5*i]; 738 alpha2 = x[5*i+1]; 739 alpha3 = x[5*i+2]; 740 alpha4 = x[5*i+3]; 741 alpha5 = x[5*i+4]; 742 while (n-->0) { 743 y[5*(*idx)] += alpha1*(*v); 744 y[5*(*idx)+1] += alpha2*(*v); 745 y[5*(*idx)+2] += alpha3*(*v); 746 y[5*(*idx)+3] += alpha4*(*v); 747 y[5*(*idx)+4] += alpha5*(*v); 748 idx++; v++; 749 } 750 } 751 PetscLogFlops(10*a->nz); 752 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 753 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 754 PetscFunctionReturn(0); 755 } 756 757 /* ------------------------------------------------------------------------------*/ 758 #undef __FUNCT__ 759 #define __FUNCT__ "MatMult_SeqMAIJ_6" 760 int MatMult_SeqMAIJ_6(Mat A,Vec xx,Vec yy) 761 { 762 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 763 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 764 PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6; 765 int ierr,m = b->AIJ->m,*idx,shift = a->indexshift,*ii; 766 int n,i,jrow,j; 767 768 PetscFunctionBegin; 769 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 770 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 771 x = x + shift; /* shift for Fortran start by 1 indexing */ 772 idx = a->j; 773 v = a->a; 774 ii = a->i; 775 776 v += shift; /* shift for Fortran start by 1 indexing */ 777 idx += shift; 778 for (i=0; i<m; i++) { 779 jrow = ii[i]; 780 n = ii[i+1] - jrow; 781 sum1 = 0.0; 782 sum2 = 0.0; 783 sum3 = 0.0; 784 sum4 = 0.0; 785 sum5 = 0.0; 786 sum6 = 0.0; 787 for (j=0; j<n; j++) { 788 sum1 += v[jrow]*x[6*idx[jrow]]; 789 sum2 += v[jrow]*x[6*idx[jrow]+1]; 790 sum3 += v[jrow]*x[6*idx[jrow]+2]; 791 sum4 += v[jrow]*x[6*idx[jrow]+3]; 792 sum5 += v[jrow]*x[6*idx[jrow]+4]; 793 sum6 += v[jrow]*x[6*idx[jrow]+5]; 794 jrow++; 795 } 796 y[6*i] = sum1; 797 y[6*i+1] = sum2; 798 y[6*i+2] = sum3; 799 y[6*i+3] = sum4; 800 y[6*i+4] = sum5; 801 y[6*i+5] = sum6; 802 } 803 804 PetscLogFlops(12*a->nz - 6*m); 805 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 806 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 807 PetscFunctionReturn(0); 808 } 809 810 #undef __FUNCT__ 811 #define __FUNCT__ "MatMultTranspose_SeqMAIJ_6" 812 int MatMultTranspose_SeqMAIJ_6(Mat A,Vec xx,Vec yy) 813 { 814 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 815 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 816 PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,zero = 0.0; 817 int ierr,m = b->AIJ->m,n,i,*idx,shift = a->indexshift; 818 819 PetscFunctionBegin; 820 ierr = VecSet(&zero,yy);CHKERRQ(ierr); 821 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 822 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 823 y = y + shift; /* shift for Fortran start by 1 indexing */ 824 for (i=0; i<m; i++) { 825 idx = a->j + a->i[i] + shift; 826 v = a->a + a->i[i] + shift; 827 n = a->i[i+1] - a->i[i]; 828 alpha1 = x[6*i]; 829 alpha2 = x[6*i+1]; 830 alpha3 = x[6*i+2]; 831 alpha4 = x[6*i+3]; 832 alpha5 = x[6*i+4]; 833 alpha6 = x[6*i+5]; 834 while (n-->0) { 835 y[6*(*idx)] += alpha1*(*v); 836 y[6*(*idx)+1] += alpha2*(*v); 837 y[6*(*idx)+2] += alpha3*(*v); 838 y[6*(*idx)+3] += alpha4*(*v); 839 y[6*(*idx)+4] += alpha5*(*v); 840 y[6*(*idx)+5] += alpha6*(*v); 841 idx++; v++; 842 } 843 } 844 PetscLogFlops(12*a->nz - 6*b->AIJ->n); 845 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 846 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 847 PetscFunctionReturn(0); 848 } 849 850 #undef __FUNCT__ 851 #define __FUNCT__ "MatMultAdd_SeqMAIJ_6" 852 int MatMultAdd_SeqMAIJ_6(Mat A,Vec xx,Vec yy,Vec zz) 853 { 854 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 855 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 856 PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6; 857 int ierr,m = b->AIJ->m,*idx,shift = a->indexshift,*ii; 858 int n,i,jrow,j; 859 860 PetscFunctionBegin; 861 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 862 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 863 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 864 x = x + shift; /* shift for Fortran start by 1 indexing */ 865 idx = a->j; 866 v = a->a; 867 ii = a->i; 868 869 v += shift; /* shift for Fortran start by 1 indexing */ 870 idx += shift; 871 for (i=0; i<m; i++) { 872 jrow = ii[i]; 873 n = ii[i+1] - jrow; 874 sum1 = 0.0; 875 sum2 = 0.0; 876 sum3 = 0.0; 877 sum4 = 0.0; 878 sum5 = 0.0; 879 sum6 = 0.0; 880 for (j=0; j<n; j++) { 881 sum1 += v[jrow]*x[6*idx[jrow]]; 882 sum2 += v[jrow]*x[6*idx[jrow]+1]; 883 sum3 += v[jrow]*x[6*idx[jrow]+2]; 884 sum4 += v[jrow]*x[6*idx[jrow]+3]; 885 sum5 += v[jrow]*x[6*idx[jrow]+4]; 886 sum6 += v[jrow]*x[6*idx[jrow]+5]; 887 jrow++; 888 } 889 y[6*i] += sum1; 890 y[6*i+1] += sum2; 891 y[6*i+2] += sum3; 892 y[6*i+3] += sum4; 893 y[6*i+4] += sum5; 894 y[6*i+5] += sum6; 895 } 896 897 PetscLogFlops(12*a->nz); 898 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 899 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 900 PetscFunctionReturn(0); 901 } 902 903 #undef __FUNCT__ 904 #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_6" 905 int MatMultTransposeAdd_SeqMAIJ_6(Mat A,Vec xx,Vec yy,Vec zz) 906 { 907 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 908 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 909 PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6; 910 int ierr,m = b->AIJ->m,n,i,*idx,shift = a->indexshift; 911 912 PetscFunctionBegin; 913 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 914 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 915 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 916 y = y + shift; /* shift for Fortran start by 1 indexing */ 917 for (i=0; i<m; i++) { 918 idx = a->j + a->i[i] + shift; 919 v = a->a + a->i[i] + shift; 920 n = a->i[i+1] - a->i[i]; 921 alpha1 = x[6*i]; 922 alpha2 = x[6*i+1]; 923 alpha3 = x[6*i+2]; 924 alpha4 = x[6*i+3]; 925 alpha5 = x[6*i+4]; 926 alpha6 = x[6*i+5]; 927 while (n-->0) { 928 y[6*(*idx)] += alpha1*(*v); 929 y[6*(*idx)+1] += alpha2*(*v); 930 y[6*(*idx)+2] += alpha3*(*v); 931 y[6*(*idx)+3] += alpha4*(*v); 932 y[6*(*idx)+4] += alpha5*(*v); 933 y[6*(*idx)+5] += alpha6*(*v); 934 idx++; v++; 935 } 936 } 937 PetscLogFlops(12*a->nz); 938 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 939 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 940 PetscFunctionReturn(0); 941 } 942 943 /* ------------------------------------------------------------------------------*/ 944 #undef __FUNCT__ 945 #define __FUNCT__ "MatMult_SeqMAIJ_8" 946 int MatMult_SeqMAIJ_8(Mat A,Vec xx,Vec yy) 947 { 948 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 949 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 950 PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8; 951 int ierr,m = b->AIJ->m,*idx,shift = a->indexshift,*ii; 952 int n,i,jrow,j; 953 954 PetscFunctionBegin; 955 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 956 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 957 x = x + shift; /* shift for Fortran start by 1 indexing */ 958 idx = a->j; 959 v = a->a; 960 ii = a->i; 961 962 v += shift; /* shift for Fortran start by 1 indexing */ 963 idx += shift; 964 for (i=0; i<m; i++) { 965 jrow = ii[i]; 966 n = ii[i+1] - jrow; 967 sum1 = 0.0; 968 sum2 = 0.0; 969 sum3 = 0.0; 970 sum4 = 0.0; 971 sum5 = 0.0; 972 sum6 = 0.0; 973 sum7 = 0.0; 974 sum8 = 0.0; 975 for (j=0; j<n; j++) { 976 sum1 += v[jrow]*x[8*idx[jrow]]; 977 sum2 += v[jrow]*x[8*idx[jrow]+1]; 978 sum3 += v[jrow]*x[8*idx[jrow]+2]; 979 sum4 += v[jrow]*x[8*idx[jrow]+3]; 980 sum5 += v[jrow]*x[8*idx[jrow]+4]; 981 sum6 += v[jrow]*x[8*idx[jrow]+5]; 982 sum7 += v[jrow]*x[8*idx[jrow]+6]; 983 sum8 += v[jrow]*x[8*idx[jrow]+7]; 984 jrow++; 985 } 986 y[8*i] = sum1; 987 y[8*i+1] = sum2; 988 y[8*i+2] = sum3; 989 y[8*i+3] = sum4; 990 y[8*i+4] = sum5; 991 y[8*i+5] = sum6; 992 y[8*i+6] = sum7; 993 y[8*i+7] = sum8; 994 } 995 996 PetscLogFlops(16*a->nz - 8*m); 997 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 998 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 999 PetscFunctionReturn(0); 1000 } 1001 1002 #undef __FUNCT__ 1003 #define __FUNCT__ "MatMultTranspose_SeqMAIJ_8" 1004 int MatMultTranspose_SeqMAIJ_8(Mat A,Vec xx,Vec yy) 1005 { 1006 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1007 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1008 PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,zero = 0.0; 1009 int ierr,m = b->AIJ->m,n,i,*idx,shift = a->indexshift; 1010 1011 PetscFunctionBegin; 1012 ierr = VecSet(&zero,yy);CHKERRQ(ierr); 1013 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1014 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 1015 y = y + shift; /* shift for Fortran start by 1 indexing */ 1016 for (i=0; i<m; i++) { 1017 idx = a->j + a->i[i] + shift; 1018 v = a->a + a->i[i] + shift; 1019 n = a->i[i+1] - a->i[i]; 1020 alpha1 = x[8*i]; 1021 alpha2 = x[8*i+1]; 1022 alpha3 = x[8*i+2]; 1023 alpha4 = x[8*i+3]; 1024 alpha5 = x[8*i+4]; 1025 alpha6 = x[8*i+5]; 1026 alpha7 = x[8*i+6]; 1027 alpha8 = x[8*i+7]; 1028 while (n-->0) { 1029 y[8*(*idx)] += alpha1*(*v); 1030 y[8*(*idx)+1] += alpha2*(*v); 1031 y[8*(*idx)+2] += alpha3*(*v); 1032 y[8*(*idx)+3] += alpha4*(*v); 1033 y[8*(*idx)+4] += alpha5*(*v); 1034 y[8*(*idx)+5] += alpha6*(*v); 1035 y[8*(*idx)+6] += alpha7*(*v); 1036 y[8*(*idx)+7] += alpha8*(*v); 1037 idx++; v++; 1038 } 1039 } 1040 PetscLogFlops(16*a->nz - 8*b->AIJ->n); 1041 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1042 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 1043 PetscFunctionReturn(0); 1044 } 1045 1046 #undef __FUNCT__ 1047 #define __FUNCT__ "MatMultAdd_SeqMAIJ_8" 1048 int MatMultAdd_SeqMAIJ_8(Mat A,Vec xx,Vec yy,Vec zz) 1049 { 1050 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1051 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1052 PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8; 1053 int ierr,m = b->AIJ->m,*idx,shift = a->indexshift,*ii; 1054 int n,i,jrow,j; 1055 1056 PetscFunctionBegin; 1057 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 1058 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1059 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 1060 x = x + shift; /* shift for Fortran start by 1 indexing */ 1061 idx = a->j; 1062 v = a->a; 1063 ii = a->i; 1064 1065 v += shift; /* shift for Fortran start by 1 indexing */ 1066 idx += shift; 1067 for (i=0; i<m; i++) { 1068 jrow = ii[i]; 1069 n = ii[i+1] - jrow; 1070 sum1 = 0.0; 1071 sum2 = 0.0; 1072 sum3 = 0.0; 1073 sum4 = 0.0; 1074 sum5 = 0.0; 1075 sum6 = 0.0; 1076 sum7 = 0.0; 1077 sum8 = 0.0; 1078 for (j=0; j<n; j++) { 1079 sum1 += v[jrow]*x[8*idx[jrow]]; 1080 sum2 += v[jrow]*x[8*idx[jrow]+1]; 1081 sum3 += v[jrow]*x[8*idx[jrow]+2]; 1082 sum4 += v[jrow]*x[8*idx[jrow]+3]; 1083 sum5 += v[jrow]*x[8*idx[jrow]+4]; 1084 sum6 += v[jrow]*x[8*idx[jrow]+5]; 1085 sum7 += v[jrow]*x[8*idx[jrow]+6]; 1086 sum8 += v[jrow]*x[8*idx[jrow]+7]; 1087 jrow++; 1088 } 1089 y[8*i] += sum1; 1090 y[8*i+1] += sum2; 1091 y[8*i+2] += sum3; 1092 y[8*i+3] += sum4; 1093 y[8*i+4] += sum5; 1094 y[8*i+5] += sum6; 1095 y[8*i+6] += sum7; 1096 y[8*i+7] += sum8; 1097 } 1098 1099 PetscLogFlops(16*a->nz); 1100 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1101 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 1102 PetscFunctionReturn(0); 1103 } 1104 1105 #undef __FUNCT__ 1106 #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_8" 1107 int MatMultTransposeAdd_SeqMAIJ_8(Mat A,Vec xx,Vec yy,Vec zz) 1108 { 1109 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1110 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1111 PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8; 1112 int ierr,m = b->AIJ->m,n,i,*idx,shift = a->indexshift; 1113 1114 PetscFunctionBegin; 1115 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 1116 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1117 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 1118 y = y + shift; /* shift for Fortran start by 1 indexing */ 1119 for (i=0; i<m; i++) { 1120 idx = a->j + a->i[i] + shift; 1121 v = a->a + a->i[i] + shift; 1122 n = a->i[i+1] - a->i[i]; 1123 alpha1 = x[8*i]; 1124 alpha2 = x[8*i+1]; 1125 alpha3 = x[8*i+2]; 1126 alpha4 = x[8*i+3]; 1127 alpha5 = x[8*i+4]; 1128 alpha6 = x[8*i+5]; 1129 alpha7 = x[8*i+6]; 1130 alpha8 = x[8*i+7]; 1131 while (n-->0) { 1132 y[8*(*idx)] += alpha1*(*v); 1133 y[8*(*idx)+1] += alpha2*(*v); 1134 y[8*(*idx)+2] += alpha3*(*v); 1135 y[8*(*idx)+3] += alpha4*(*v); 1136 y[8*(*idx)+4] += alpha5*(*v); 1137 y[8*(*idx)+5] += alpha6*(*v); 1138 y[8*(*idx)+6] += alpha7*(*v); 1139 y[8*(*idx)+7] += alpha8*(*v); 1140 idx++; v++; 1141 } 1142 } 1143 PetscLogFlops(16*a->nz); 1144 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1145 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 1146 PetscFunctionReturn(0); 1147 } 1148 1149 /*===================================================================================*/ 1150 #undef __FUNCT__ 1151 #define __FUNCT__ "MatMult_MPIMAIJ_dof" 1152 int MatMult_MPIMAIJ_dof(Mat A,Vec xx,Vec yy) 1153 { 1154 Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data; 1155 int ierr; 1156 PetscFunctionBegin; 1157 1158 /* start the scatter */ 1159 ierr = VecScatterBegin(xx,b->w,INSERT_VALUES,SCATTER_FORWARD,b->ctx);CHKERRQ(ierr); 1160 ierr = (*b->AIJ->ops->mult)(b->AIJ,xx,yy);CHKERRQ(ierr); 1161 ierr = VecScatterEnd(xx,b->w,INSERT_VALUES,SCATTER_FORWARD,b->ctx);CHKERRQ(ierr); 1162 ierr = (*b->OAIJ->ops->multadd)(b->OAIJ,b->w,yy,yy);CHKERRQ(ierr); 1163 PetscFunctionReturn(0); 1164 } 1165 1166 #undef __FUNCT__ 1167 #define __FUNCT__ "MatMultTranspose_MPIMAIJ_dof" 1168 int MatMultTranspose_MPIMAIJ_dof(Mat A,Vec xx,Vec yy) 1169 { 1170 Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data; 1171 int ierr; 1172 PetscFunctionBegin; 1173 ierr = (*b->OAIJ->ops->multtranspose)(b->OAIJ,xx,b->w);CHKERRQ(ierr); 1174 ierr = VecScatterBegin(b->w,yy,ADD_VALUES,SCATTER_REVERSE,b->ctx);CHKERRQ(ierr); 1175 ierr = (*b->AIJ->ops->multtranspose)(b->AIJ,xx,yy);CHKERRQ(ierr); 1176 ierr = VecScatterEnd(b->w,yy,ADD_VALUES,SCATTER_REVERSE,b->ctx);CHKERRQ(ierr); 1177 PetscFunctionReturn(0); 1178 } 1179 1180 #undef __FUNCT__ 1181 #define __FUNCT__ "MatMultAdd_MPIMAIJ_dof" 1182 int MatMultAdd_MPIMAIJ_dof(Mat A,Vec xx,Vec yy,Vec zz) 1183 { 1184 Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data; 1185 int ierr; 1186 PetscFunctionBegin; 1187 1188 /* start the scatter */ 1189 ierr = VecScatterBegin(xx,b->w,INSERT_VALUES,SCATTER_FORWARD,b->ctx);CHKERRQ(ierr); 1190 ierr = (*b->AIJ->ops->multadd)(b->AIJ,xx,yy,zz);CHKERRQ(ierr); 1191 ierr = VecScatterEnd(xx,b->w,INSERT_VALUES,SCATTER_FORWARD,b->ctx);CHKERRQ(ierr); 1192 ierr = (*b->OAIJ->ops->multadd)(b->OAIJ,b->w,yy,zz);CHKERRQ(ierr); 1193 PetscFunctionReturn(0); 1194 } 1195 1196 #undef __FUNCT__ 1197 #define __FUNCT__ "MatMultTransposeAdd_MPIMAIJ_dof" 1198 int MatMultTransposeAdd_MPIMAIJ_dof(Mat A,Vec xx,Vec yy,Vec zz) 1199 { 1200 Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data; 1201 int ierr; 1202 PetscFunctionBegin; 1203 ierr = (*b->OAIJ->ops->multtranspose)(b->OAIJ,xx,b->w);CHKERRQ(ierr); 1204 ierr = VecScatterBegin(b->w,zz,ADD_VALUES,SCATTER_REVERSE,b->ctx);CHKERRQ(ierr); 1205 ierr = (*b->AIJ->ops->multtransposeadd)(b->AIJ,xx,yy,zz);CHKERRQ(ierr); 1206 ierr = VecScatterEnd(b->w,zz,ADD_VALUES,SCATTER_REVERSE,b->ctx);CHKERRQ(ierr); 1207 PetscFunctionReturn(0); 1208 } 1209 1210 /* ---------------------------------------------------------------------------------- */ 1211 #undef __FUNCT__ 1212 #define __FUNCT__ "MatCreateMAIJ" 1213 int MatCreateMAIJ(Mat A,int dof,Mat *maij) 1214 { 1215 int ierr,size,n; 1216 Mat_MPIMAIJ *b; 1217 Mat B; 1218 1219 PetscFunctionBegin; 1220 ierr = PetscObjectReference((PetscObject)A);CHKERRQ(ierr); 1221 1222 if (dof == 1) { 1223 *maij = A; 1224 } else { 1225 ierr = MatCreate(A->comm,dof*A->m,dof*A->n,dof*A->M,dof*A->N,&B);CHKERRQ(ierr); 1226 B->assembled = PETSC_TRUE; 1227 1228 ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr); 1229 if (size == 1) { 1230 ierr = MatSetType(B,MATSEQMAIJ);CHKERRQ(ierr); 1231 B->ops->destroy = MatDestroy_SeqMAIJ; 1232 b = (Mat_MPIMAIJ*)B->data; 1233 b->dof = dof; 1234 b->AIJ = A; 1235 if (dof == 2) { 1236 B->ops->mult = MatMult_SeqMAIJ_2; 1237 B->ops->multadd = MatMultAdd_SeqMAIJ_2; 1238 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_2; 1239 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_2; 1240 } else if (dof == 3) { 1241 B->ops->mult = MatMult_SeqMAIJ_3; 1242 B->ops->multadd = MatMultAdd_SeqMAIJ_3; 1243 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_3; 1244 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_3; 1245 } else if (dof == 4) { 1246 B->ops->mult = MatMult_SeqMAIJ_4; 1247 B->ops->multadd = MatMultAdd_SeqMAIJ_4; 1248 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_4; 1249 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_4; 1250 } else if (dof == 5) { 1251 B->ops->mult = MatMult_SeqMAIJ_5; 1252 B->ops->multadd = MatMultAdd_SeqMAIJ_5; 1253 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_5; 1254 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_5; 1255 } else if (dof == 6) { 1256 B->ops->mult = MatMult_SeqMAIJ_6; 1257 B->ops->multadd = MatMultAdd_SeqMAIJ_6; 1258 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_6; 1259 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_6; 1260 } else if (dof == 8) { 1261 B->ops->mult = MatMult_SeqMAIJ_8; 1262 B->ops->multadd = MatMultAdd_SeqMAIJ_8; 1263 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_8; 1264 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_8; 1265 } else { 1266 SETERRQ1(1,"Cannot handle a dof of %d. Send request for code to petsc-maint@mcs.anl.gov\n",dof); 1267 } 1268 } else { 1269 Mat_MPIAIJ *mpiaij = (Mat_MPIAIJ *)A->data; 1270 IS from,to; 1271 Vec gvec; 1272 int *garray,i; 1273 1274 ierr = MatSetType(B,MATMPIMAIJ);CHKERRQ(ierr); 1275 B->ops->destroy = MatDestroy_MPIMAIJ; 1276 b = (Mat_MPIMAIJ*)B->data; 1277 b->dof = dof; 1278 b->A = A; 1279 ierr = MatCreateMAIJ(mpiaij->A,dof,&b->AIJ);CHKERRQ(ierr); 1280 ierr = MatCreateMAIJ(mpiaij->B,dof,&b->OAIJ);CHKERRQ(ierr); 1281 1282 ierr = VecGetSize(mpiaij->lvec,&n);CHKERRQ(ierr); 1283 ierr = VecCreateSeq(PETSC_COMM_SELF,n*dof,&b->w);CHKERRQ(ierr); 1284 1285 /* create two temporary Index sets for build scatter gather */ 1286 ierr = PetscMalloc((n+1)*sizeof(int),&garray);CHKERRQ(ierr); 1287 for (i=0; i<n; i++) garray[i] = dof*mpiaij->garray[i]; 1288 ierr = ISCreateBlock(A->comm,dof,n,garray,&from);CHKERRQ(ierr); 1289 ierr = PetscFree(garray);CHKERRQ(ierr); 1290 ierr = ISCreateStride(PETSC_COMM_SELF,n*dof,0,1,&to);CHKERRQ(ierr); 1291 1292 /* create temporary global vector to generate scatter context */ 1293 ierr = VecCreateMPI(A->comm,dof*A->n,dof*A->N,&gvec);CHKERRQ(ierr); 1294 1295 /* generate the scatter context */ 1296 ierr = VecScatterCreate(gvec,from,b->w,to,&b->ctx);CHKERRQ(ierr); 1297 1298 ierr = ISDestroy(from);CHKERRQ(ierr); 1299 ierr = ISDestroy(to);CHKERRQ(ierr); 1300 ierr = VecDestroy(gvec);CHKERRQ(ierr); 1301 1302 B->ops->mult = MatMult_MPIMAIJ_dof; 1303 B->ops->multtranspose = MatMultTranspose_MPIMAIJ_dof; 1304 B->ops->multadd = MatMultAdd_MPIMAIJ_dof; 1305 B->ops->multtransposeadd = MatMultTransposeAdd_MPIMAIJ_dof; 1306 } 1307 *maij = B; 1308 } 1309 PetscFunctionReturn(0); 1310 } 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323