1 /*$Id: maij.c,v 1.6 2000/06/27 17:46:45 bsmith Exp bsmith $*/ 2 /* 3 Defines the basic matrix operations for the MAIJ matrix storage format. 4 This format is used for restriction and interpolation operations for 5 multicomponent problems. It interpolates each component the same way 6 independently. 7 8 We provide: 9 MatMult() 10 MatMultTranspose() 11 MatMultTransposeAdd() 12 MatMultAdd() 13 and 14 MatCreateMAIJ(Mat,dof,Mat*) 15 16 This single directory handles both the sequential and parallel codes 17 */ 18 19 #include "src/mat/impls/aij/mpi/mpiaij.h" 20 21 typedef struct { 22 int dof; /* number of components */ 23 Mat AIJ; /* representation of interpolation for one component */ 24 } Mat_SeqMAIJ; 25 26 typedef struct { 27 int dof; /* number of components */ 28 Mat AIJ,OAIJ; /* representation of interpolation for one component */ 29 Mat A; 30 VecScatter ctx; /* update ghost points for parallel case */ 31 Vec w; /* work space for ghost values for parallel case */ 32 } Mat_MPIMAIJ; 33 34 #undef __FUNC__ 35 #define __FUNC__ /*<a name="MatDestroy_SeqMAIJ"></a>*/"MatDestroy_SeqMAIJ" 36 int MatDestroy_SeqMAIJ(Mat A) 37 { 38 int ierr; 39 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 40 41 PetscFunctionBegin; 42 if (b->AIJ) { 43 ierr = MatDestroy(b->AIJ);CHKERRQ(ierr); 44 } 45 ierr = PetscFree(b);CHKERRQ(ierr); 46 PetscHeaderDestroy(A); 47 PetscFunctionReturn(0); 48 } 49 50 #undef __FUNC__ 51 #define __FUNC__ /*<a name="MatDestroy_MPIMAIJ"></a>*/"MatDestroy_MPIMAIJ" 52 int MatDestroy_MPIMAIJ(Mat A) 53 { 54 int ierr; 55 Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data; 56 57 PetscFunctionBegin; 58 if (b->A) { 59 ierr = MatDestroy(b->A);CHKERRQ(ierr); 60 } 61 if (b->AIJ) { 62 ierr = MatDestroy(b->AIJ);CHKERRQ(ierr); 63 } 64 if (b->OAIJ) { 65 ierr = MatDestroy(b->OAIJ);CHKERRQ(ierr); 66 } 67 if (b->ctx) { 68 ierr = VecScatterDestroy(b->ctx);CHKERRQ(ierr); 69 } 70 if (b->w) { 71 ierr = VecDestroy(b->w);CHKERRQ(ierr); 72 } 73 ierr = PetscFree(b);CHKERRQ(ierr); 74 PetscHeaderDestroy(A); 75 PetscFunctionReturn(0); 76 } 77 78 EXTERN_C_BEGIN 79 #undef __FUNC__ 80 #define __FUNC__ /*<a name="MatCreate_MAIJ"></a>*/"MatCreate_MAIJ" 81 int MatCreate_MAIJ(Mat A) 82 { 83 int ierr; 84 Mat_MPIMAIJ *b; 85 86 PetscFunctionBegin; 87 A->data = (void*)(b = PetscNew(Mat_MPIMAIJ));CHKPTRQ(b); 88 ierr = PetscMemzero(b,sizeof(Mat_MPIMAIJ));CHKERRQ(ierr); 89 ierr = PetscMemzero(A->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 90 A->factor = 0; 91 A->mapping = 0; 92 93 b->AIJ = 0; 94 b->dof = 0; 95 b->OAIJ = 0; 96 b->ctx = 0; 97 b->w = 0; 98 PetscFunctionReturn(0); 99 } 100 EXTERN_C_END 101 102 /* --------------------------------------------------------------------------------------*/ 103 #undef __FUNC__ 104 #define __FUNC__ /*<a name="MatMult_SeqMAIJ_2"></a>*/"MatMult_SeqMAIJ_2" 105 int MatMult_SeqMAIJ_2(Mat A,Vec xx,Vec yy) 106 { 107 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 108 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 109 Scalar *x,*y,*v,sum1, sum2; 110 int ierr,m = a->m,*idx,shift = a->indexshift,*ii; 111 int n,i,jrow,j; 112 113 PetscFunctionBegin; 114 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 115 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 116 x = x + shift; /* shift for Fortran start by 1 indexing */ 117 idx = a->j; 118 v = a->a; 119 ii = a->i; 120 121 v += shift; /* shift for Fortran start by 1 indexing */ 122 idx += shift; 123 for (i=0; i<m; i++) { 124 jrow = ii[i]; 125 n = ii[i+1] - jrow; 126 sum1 = 0.0; 127 sum2 = 0.0; 128 for (j=0; j<n; j++) { 129 sum1 += v[jrow]*x[2*idx[jrow]]; 130 sum2 += v[jrow]*x[2*idx[jrow]+1]; 131 jrow++; 132 } 133 y[2*i] = sum1; 134 y[2*i+1] = sum2; 135 } 136 137 PLogFlops(4*a->nz - 2*m); 138 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 139 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 140 PetscFunctionReturn(0); 141 } 142 143 #undef __FUNC__ 144 #define __FUNC__ /*<a name="MatMultTranspose_SeqMAIJ_2"></a>*/"MatMultTranspose_SeqMAIJ_2" 145 int MatMultTranspose_SeqMAIJ_2(Mat A,Vec xx,Vec yy) 146 { 147 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 148 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 149 Scalar *x,*y,*v,alpha1,alpha2,zero = 0.0; 150 int ierr,m = a->m,n,i,*idx,shift = a->indexshift; 151 152 PetscFunctionBegin; 153 ierr = VecSet(&zero,yy);CHKERRQ(ierr); 154 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 155 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 156 y = y + shift; /* shift for Fortran start by 1 indexing */ 157 for (i=0; i<m; i++) { 158 idx = a->j + a->i[i] + shift; 159 v = a->a + a->i[i] + shift; 160 n = a->i[i+1] - a->i[i]; 161 alpha1 = x[2*i]; 162 alpha2 = x[2*i+1]; 163 while (n-->0) {y[2*(*idx)] += alpha1*(*v); y[2*(*idx)+1] += alpha2*(*v); idx++; v++;} 164 } 165 PLogFlops(4*a->nz - 2*a->n); 166 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 167 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 168 PetscFunctionReturn(0); 169 } 170 171 #undef __FUNC__ 172 #define __FUNC__ /*<a name="MatMultAdd_SeqMAIJ_2"></a>*/"MatMultAdd_SeqMAIJ_2" 173 int MatMultAdd_SeqMAIJ_2(Mat A,Vec xx,Vec yy,Vec zz) 174 { 175 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 176 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 177 Scalar *x,*y,*v,sum1, sum2; 178 int ierr,m = a->m,*idx,shift = a->indexshift,*ii; 179 int n,i,jrow,j; 180 181 PetscFunctionBegin; 182 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 183 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 184 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 185 x = x + shift; /* shift for Fortran start by 1 indexing */ 186 idx = a->j; 187 v = a->a; 188 ii = a->i; 189 190 v += shift; /* shift for Fortran start by 1 indexing */ 191 idx += shift; 192 for (i=0; i<m; i++) { 193 jrow = ii[i]; 194 n = ii[i+1] - jrow; 195 sum1 = 0.0; 196 sum2 = 0.0; 197 for (j=0; j<n; j++) { 198 sum1 += v[jrow]*x[2*idx[jrow]]; 199 sum2 += v[jrow]*x[2*idx[jrow]+1]; 200 jrow++; 201 } 202 y[2*i] += sum1; 203 y[2*i+1] += sum2; 204 } 205 206 PLogFlops(4*a->nz - 2*m); 207 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 208 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 209 PetscFunctionReturn(0); 210 } 211 #undef __FUNC__ 212 #define __FUNC__ /*<a name="MatMultTransposeAdd_SeqMAIJ_2"></a>*/"MatMultTransposeAdd_SeqMAIJ_2" 213 int MatMultTransposeAdd_SeqMAIJ_2(Mat A,Vec xx,Vec yy,Vec zz) 214 { 215 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 216 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 217 Scalar *x,*y,*v,alpha1,alpha2; 218 int ierr,m = a->m,n,i,*idx,shift = a->indexshift; 219 220 PetscFunctionBegin; 221 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 222 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 223 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 224 y = y + shift; /* shift for Fortran start by 1 indexing */ 225 for (i=0; i<m; i++) { 226 idx = a->j + a->i[i] + shift; 227 v = a->a + a->i[i] + shift; 228 n = a->i[i+1] - a->i[i]; 229 alpha1 = x[2*i]; 230 alpha2 = x[2*i+1]; 231 while (n-->0) {y[2*(*idx)] += alpha1*(*v); y[2*(*idx)+1] += alpha2*(*v); idx++; v++;} 232 } 233 PLogFlops(4*a->nz - 2*a->n); 234 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 235 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 236 PetscFunctionReturn(0); 237 } 238 /* --------------------------------------------------------------------------------------*/ 239 #undef __FUNC__ 240 #define __FUNC__ /*<a name="MatMult_SeqMAIJ_3"></a>*/"MatMult_SeqMAIJ_3" 241 int MatMult_SeqMAIJ_3(Mat A,Vec xx,Vec yy) 242 { 243 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 244 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 245 Scalar *x,*y,*v,sum1, sum2, sum3; 246 int ierr,m = a->m,*idx,shift = a->indexshift,*ii; 247 int n,i,jrow,j; 248 249 PetscFunctionBegin; 250 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 251 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 252 x = x + shift; /* shift for Fortran start by 1 indexing */ 253 idx = a->j; 254 v = a->a; 255 ii = a->i; 256 257 v += shift; /* shift for Fortran start by 1 indexing */ 258 idx += shift; 259 for (i=0; i<m; i++) { 260 jrow = ii[i]; 261 n = ii[i+1] - jrow; 262 sum1 = 0.0; 263 sum2 = 0.0; 264 sum3 = 0.0; 265 for (j=0; j<n; j++) { 266 sum1 += v[jrow]*x[3*idx[jrow]]; 267 sum2 += v[jrow]*x[3*idx[jrow]+1]; 268 sum3 += v[jrow]*x[3*idx[jrow]+2]; 269 jrow++; 270 } 271 y[3*i] = sum1; 272 y[3*i+1] = sum2; 273 y[3*i+2] = sum3; 274 } 275 276 PLogFlops(6*a->nz - 3*m); 277 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 278 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 279 PetscFunctionReturn(0); 280 } 281 282 #undef __FUNC__ 283 #define __FUNC__ /*<a name="MatMultTranspose_SeqMAIJ_3"></a>*/"MatMultTranspose_SeqMAIJ_3" 284 int MatMultTranspose_SeqMAIJ_3(Mat A,Vec xx,Vec yy) 285 { 286 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 287 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 288 Scalar *x,*y,*v,alpha1,alpha2,alpha3,zero = 0.0; 289 int ierr,m = a->m,n,i,*idx,shift = a->indexshift; 290 291 PetscFunctionBegin; 292 ierr = VecSet(&zero,yy);CHKERRQ(ierr); 293 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 294 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 295 y = y + shift; /* shift for Fortran start by 1 indexing */ 296 for (i=0; i<m; i++) { 297 idx = a->j + a->i[i] + shift; 298 v = a->a + a->i[i] + shift; 299 n = a->i[i+1] - a->i[i]; 300 alpha1 = x[3*i]; 301 alpha2 = x[3*i+1]; 302 alpha3 = x[3*i+2]; 303 while (n-->0) { 304 y[3*(*idx)] += alpha1*(*v); 305 y[3*(*idx)+1] += alpha2*(*v); 306 y[3*(*idx)+2] += alpha3*(*v); 307 idx++; v++; 308 } 309 } 310 PLogFlops(6*a->nz - 3*a->n); 311 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 312 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 313 PetscFunctionReturn(0); 314 } 315 316 #undef __FUNC__ 317 #define __FUNC__ /*<a name="MatMultAdd_SeqMAIJ_3"></a>*/"MatMultAdd_SeqMAIJ_3" 318 int MatMultAdd_SeqMAIJ_3(Mat A,Vec xx,Vec yy,Vec zz) 319 { 320 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 321 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 322 Scalar *x,*y,*v,sum1, sum2, sum3; 323 int ierr,m = a->m,*idx,shift = a->indexshift,*ii; 324 int n,i,jrow,j; 325 326 PetscFunctionBegin; 327 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 328 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 329 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 330 x = x + shift; /* shift for Fortran start by 1 indexing */ 331 idx = a->j; 332 v = a->a; 333 ii = a->i; 334 335 v += shift; /* shift for Fortran start by 1 indexing */ 336 idx += shift; 337 for (i=0; i<m; i++) { 338 jrow = ii[i]; 339 n = ii[i+1] - jrow; 340 sum1 = 0.0; 341 sum2 = 0.0; 342 sum3 = 0.0; 343 for (j=0; j<n; j++) { 344 sum1 += v[jrow]*x[3*idx[jrow]]; 345 sum2 += v[jrow]*x[3*idx[jrow]+1]; 346 sum3 += v[jrow]*x[3*idx[jrow]+2]; 347 jrow++; 348 } 349 y[3*i] += sum1; 350 y[3*i+1] += sum2; 351 y[3*i+2] += sum3; 352 } 353 354 PLogFlops(6*a->nz); 355 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 356 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 357 PetscFunctionReturn(0); 358 } 359 #undef __FUNC__ 360 #define __FUNC__ /*<a name="MatMultTransposeAdd_SeqMAIJ_3"></a>*/"MatMultTransposeAdd_SeqMAIJ_3" 361 int MatMultTransposeAdd_SeqMAIJ_3(Mat A,Vec xx,Vec yy,Vec zz) 362 { 363 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 364 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 365 Scalar *x,*y,*v,alpha1,alpha2,alpha3; 366 int ierr,m = a->m,n,i,*idx,shift = a->indexshift; 367 368 PetscFunctionBegin; 369 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 370 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 371 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 372 y = y + shift; /* shift for Fortran start by 1 indexing */ 373 for (i=0; i<m; i++) { 374 idx = a->j + a->i[i] + shift; 375 v = a->a + a->i[i] + shift; 376 n = a->i[i+1] - a->i[i]; 377 alpha1 = x[3*i]; 378 alpha2 = x[3*i+1]; 379 alpha3 = x[3*i+2]; 380 while (n-->0) { 381 y[3*(*idx)] += alpha1*(*v); 382 y[3*(*idx)+1] += alpha2*(*v); 383 y[3*(*idx)+2] += alpha3*(*v); 384 idx++; v++; 385 } 386 } 387 PLogFlops(6*a->nz); 388 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 389 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 390 PetscFunctionReturn(0); 391 } 392 393 /* ------------------------------------------------------------------------------*/ 394 #undef __FUNC__ 395 #define __FUNC__ /*<a name="MatMult_SeqMAIJ_4"></a>*/"MatMult_SeqMAIJ_4" 396 int MatMult_SeqMAIJ_4(Mat A,Vec xx,Vec yy) 397 { 398 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 399 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 400 Scalar *x,*y,*v,sum1, sum2, sum3, sum4; 401 int ierr,m = a->m,*idx,shift = a->indexshift,*ii; 402 int n,i,jrow,j; 403 404 PetscFunctionBegin; 405 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 406 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 407 x = x + shift; /* shift for Fortran start by 1 indexing */ 408 idx = a->j; 409 v = a->a; 410 ii = a->i; 411 412 v += shift; /* shift for Fortran start by 1 indexing */ 413 idx += shift; 414 for (i=0; i<m; i++) { 415 jrow = ii[i]; 416 n = ii[i+1] - jrow; 417 sum1 = 0.0; 418 sum2 = 0.0; 419 sum3 = 0.0; 420 sum4 = 0.0; 421 for (j=0; j<n; j++) { 422 sum1 += v[jrow]*x[4*idx[jrow]]; 423 sum2 += v[jrow]*x[4*idx[jrow]+1]; 424 sum3 += v[jrow]*x[4*idx[jrow]+2]; 425 sum4 += v[jrow]*x[4*idx[jrow]+3]; 426 jrow++; 427 } 428 y[4*i] = sum1; 429 y[4*i+1] = sum2; 430 y[4*i+2] = sum3; 431 y[4*i+3] = sum4; 432 } 433 434 PLogFlops(8*a->nz - 4*m); 435 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 436 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 437 PetscFunctionReturn(0); 438 } 439 440 #undef __FUNC__ 441 #define __FUNC__ /*<a name="MatMultTranspose_SeqMAIJ_4"></a>*/"MatMultTranspose_SeqMAIJ_4" 442 int MatMultTranspose_SeqMAIJ_4(Mat A,Vec xx,Vec yy) 443 { 444 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 445 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 446 Scalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4,zero = 0.0; 447 int ierr,m = a->m,n,i,*idx,shift = a->indexshift; 448 449 PetscFunctionBegin; 450 ierr = VecSet(&zero,yy);CHKERRQ(ierr); 451 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 452 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 453 y = y + shift; /* shift for Fortran start by 1 indexing */ 454 for (i=0; i<m; i++) { 455 idx = a->j + a->i[i] + shift; 456 v = a->a + a->i[i] + shift; 457 n = a->i[i+1] - a->i[i]; 458 alpha1 = x[4*i]; 459 alpha2 = x[4*i+1]; 460 alpha3 = x[4*i+2]; 461 alpha4 = x[4*i+3]; 462 while (n-->0) { 463 y[4*(*idx)] += alpha1*(*v); 464 y[4*(*idx)+1] += alpha2*(*v); 465 y[4*(*idx)+2] += alpha3*(*v); 466 y[4*(*idx)+3] += alpha4*(*v); 467 idx++; v++; 468 } 469 } 470 PLogFlops(8*a->nz - 4*a->n); 471 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 472 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 473 PetscFunctionReturn(0); 474 } 475 476 #undef __FUNC__ 477 #define __FUNC__ /*<a name="MatMultAdd_SeqMAIJ_4"></a>*/"MatMultAdd_SeqMAIJ_4" 478 int MatMultAdd_SeqMAIJ_4(Mat A,Vec xx,Vec yy,Vec zz) 479 { 480 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 481 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 482 Scalar *x,*y,*v,sum1, sum2, sum3, sum4; 483 int ierr,m = a->m,*idx,shift = a->indexshift,*ii; 484 int n,i,jrow,j; 485 486 PetscFunctionBegin; 487 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 488 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 489 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 490 x = x + shift; /* shift for Fortran start by 1 indexing */ 491 idx = a->j; 492 v = a->a; 493 ii = a->i; 494 495 v += shift; /* shift for Fortran start by 1 indexing */ 496 idx += shift; 497 for (i=0; i<m; i++) { 498 jrow = ii[i]; 499 n = ii[i+1] - jrow; 500 sum1 = 0.0; 501 sum2 = 0.0; 502 sum3 = 0.0; 503 sum4 = 0.0; 504 for (j=0; j<n; j++) { 505 sum1 += v[jrow]*x[4*idx[jrow]]; 506 sum2 += v[jrow]*x[4*idx[jrow]+1]; 507 sum3 += v[jrow]*x[4*idx[jrow]+2]; 508 sum4 += v[jrow]*x[4*idx[jrow]+3]; 509 jrow++; 510 } 511 y[4*i] += sum1; 512 y[4*i+1] += sum2; 513 y[4*i+2] += sum3; 514 y[4*i+3] += sum4; 515 } 516 517 PLogFlops(8*a->nz - 4*m); 518 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 519 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 520 PetscFunctionReturn(0); 521 } 522 #undef __FUNC__ 523 #define __FUNC__ /*<a name="MatMultTransposeAdd_SeqMAIJ_4"></a>*/"MatMultTransposeAdd_SeqMAIJ_4" 524 int MatMultTransposeAdd_SeqMAIJ_4(Mat A,Vec xx,Vec yy,Vec zz) 525 { 526 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 527 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 528 Scalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4; 529 int ierr,m = a->m,n,i,*idx,shift = a->indexshift; 530 531 PetscFunctionBegin; 532 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 533 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 534 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 535 y = y + shift; /* shift for Fortran start by 1 indexing */ 536 for (i=0; i<m; i++) { 537 idx = a->j + a->i[i] + shift; 538 v = a->a + a->i[i] + shift; 539 n = a->i[i+1] - a->i[i]; 540 alpha1 = x[4*i]; 541 alpha2 = x[4*i+1]; 542 alpha3 = x[4*i+2]; 543 alpha4 = x[4*i+3]; 544 while (n-->0) { 545 y[4*(*idx)] += alpha1*(*v); 546 y[4*(*idx)+1] += alpha2*(*v); 547 y[4*(*idx)+2] += alpha3*(*v); 548 y[4*(*idx)+3] += alpha4*(*v); 549 idx++; v++; 550 } 551 } 552 PLogFlops(8*a->nz - 4*a->n); 553 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 554 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 555 PetscFunctionReturn(0); 556 557 } 558 559 /* ------------------------------------------------------------------------------*/ 560 #undef __FUNC__ 561 #define __FUNC__ /*<a name="MatMult_SeqMAIJ_5"></a>*/"MatMult_SeqMAIJ_5" 562 int MatMult_SeqMAIJ_5(Mat A,Vec xx,Vec yy) 563 { 564 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 565 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 566 Scalar *x,*y,*v,sum1, sum2, sum3, sum4, sum5; 567 int ierr,m = a->m,*idx,shift = a->indexshift,*ii; 568 int n,i,jrow,j; 569 570 PetscFunctionBegin; 571 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 572 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 573 x = x + shift; /* shift for Fortran start by 1 indexing */ 574 idx = a->j; 575 v = a->a; 576 ii = a->i; 577 578 v += shift; /* shift for Fortran start by 1 indexing */ 579 idx += shift; 580 for (i=0; i<m; i++) { 581 jrow = ii[i]; 582 n = ii[i+1] - jrow; 583 sum1 = 0.0; 584 sum2 = 0.0; 585 sum3 = 0.0; 586 sum4 = 0.0; 587 sum5 = 0.0; 588 for (j=0; j<n; j++) { 589 sum1 += v[jrow]*x[5*idx[jrow]]; 590 sum2 += v[jrow]*x[5*idx[jrow]+1]; 591 sum3 += v[jrow]*x[5*idx[jrow]+2]; 592 sum4 += v[jrow]*x[5*idx[jrow]+3]; 593 sum5 += v[jrow]*x[5*idx[jrow]+4]; 594 jrow++; 595 } 596 y[5*i] = sum1; 597 y[5*i+1] = sum2; 598 y[5*i+2] = sum3; 599 y[5*i+3] = sum4; 600 y[5*i+4] = sum5; 601 } 602 603 PLogFlops(10*a->nz - 5*m); 604 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 605 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 606 PetscFunctionReturn(0); 607 } 608 609 #undef __FUNC__ 610 #define __FUNC__ /*<a name="MatMultTranspose_SeqMAIJ_5"></a>*/"MatMultTranspose_SeqMAIJ_5" 611 int MatMultTranspose_SeqMAIJ_5(Mat A,Vec xx,Vec yy) 612 { 613 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 614 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 615 Scalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,zero = 0.0; 616 int ierr,m = a->m,n,i,*idx,shift = a->indexshift; 617 618 PetscFunctionBegin; 619 ierr = VecSet(&zero,yy);CHKERRQ(ierr); 620 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 621 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 622 y = y + shift; /* shift for Fortran start by 1 indexing */ 623 for (i=0; i<m; i++) { 624 idx = a->j + a->i[i] + shift; 625 v = a->a + a->i[i] + shift; 626 n = a->i[i+1] - a->i[i]; 627 alpha1 = x[5*i]; 628 alpha2 = x[5*i+1]; 629 alpha3 = x[5*i+2]; 630 alpha4 = x[5*i+3]; 631 alpha5 = x[5*i+4]; 632 while (n-->0) { 633 y[5*(*idx)] += alpha1*(*v); 634 y[5*(*idx)+1] += alpha2*(*v); 635 y[5*(*idx)+2] += alpha3*(*v); 636 y[5*(*idx)+3] += alpha4*(*v); 637 y[5*(*idx)+4] += alpha5*(*v); 638 idx++; v++; 639 } 640 } 641 PLogFlops(10*a->nz - 5*a->n); 642 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 643 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 644 PetscFunctionReturn(0); 645 } 646 647 #undef __FUNC__ 648 #define __FUNC__ /*<a name="MatMultAdd_SeqMAIJ_5"></a>*/"MatMultAdd_SeqMAIJ_5" 649 int MatMultAdd_SeqMAIJ_5(Mat A,Vec xx,Vec yy,Vec zz) 650 { 651 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 652 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 653 Scalar *x,*y,*v,sum1, sum2, sum3, sum4, sum5; 654 int ierr,m = a->m,*idx,shift = a->indexshift,*ii; 655 int n,i,jrow,j; 656 657 PetscFunctionBegin; 658 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 659 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 660 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 661 x = x + shift; /* shift for Fortran start by 1 indexing */ 662 idx = a->j; 663 v = a->a; 664 ii = a->i; 665 666 v += shift; /* shift for Fortran start by 1 indexing */ 667 idx += shift; 668 for (i=0; i<m; i++) { 669 jrow = ii[i]; 670 n = ii[i+1] - jrow; 671 sum1 = 0.0; 672 sum2 = 0.0; 673 sum3 = 0.0; 674 sum4 = 0.0; 675 sum5 = 0.0; 676 for (j=0; j<n; j++) { 677 sum1 += v[jrow]*x[5*idx[jrow]]; 678 sum2 += v[jrow]*x[5*idx[jrow]+1]; 679 sum3 += v[jrow]*x[5*idx[jrow]+2]; 680 sum4 += v[jrow]*x[5*idx[jrow]+3]; 681 sum5 += v[jrow]*x[5*idx[jrow]+4]; 682 jrow++; 683 } 684 y[5*i] += sum1; 685 y[5*i+1] += sum2; 686 y[5*i+2] += sum3; 687 y[5*i+3] += sum4; 688 y[5*i+4] += sum5; 689 } 690 691 PLogFlops(10*a->nz); 692 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 693 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 694 PetscFunctionReturn(0); 695 } 696 697 #undef __FUNC__ 698 #define __FUNC__ /*<a name="MatMultTransposeAdd_SeqMAIJ_5"></a>*/"MatMultTransposeAdd_SeqMAIJ_5" 699 int MatMultTransposeAdd_SeqMAIJ_5(Mat A,Vec xx,Vec yy,Vec zz) 700 { 701 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 702 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 703 Scalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5; 704 int ierr,m = a->m,n,i,*idx,shift = a->indexshift; 705 706 PetscFunctionBegin; 707 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 708 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 709 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 710 y = y + shift; /* shift for Fortran start by 1 indexing */ 711 for (i=0; i<m; i++) { 712 idx = a->j + a->i[i] + shift; 713 v = a->a + a->i[i] + shift; 714 n = a->i[i+1] - a->i[i]; 715 alpha1 = x[5*i]; 716 alpha2 = x[5*i+1]; 717 alpha3 = x[5*i+2]; 718 alpha4 = x[5*i+3]; 719 alpha5 = x[5*i+4]; 720 while (n-->0) { 721 y[5*(*idx)] += alpha1*(*v); 722 y[5*(*idx)+1] += alpha2*(*v); 723 y[5*(*idx)+2] += alpha3*(*v); 724 y[5*(*idx)+3] += alpha4*(*v); 725 y[5*(*idx)+4] += alpha5*(*v); 726 idx++; v++; 727 } 728 } 729 PLogFlops(10*a->nz); 730 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 731 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 732 PetscFunctionReturn(0); 733 } 734 735 /*===================================================================================*/ 736 #undef __FUNC__ 737 #define __FUNC__ /*<a name="MatMult_MPIMAIJ_dof"></a>*/"MatMult_MPIMAIJ_dof" 738 int MatMult_MPIMAIJ_dof(Mat A,Vec xx,Vec yy) 739 { 740 Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data; 741 int ierr; 742 PetscFunctionBegin; 743 744 /* start the scatter */ 745 ierr = VecScatterBegin(xx,b->w,INSERT_VALUES,SCATTER_FORWARD,b->ctx);CHKERRQ(ierr); 746 ierr = (*b->AIJ->ops->mult)(b->AIJ,xx,yy);CHKERRQ(ierr); 747 ierr = VecScatterEnd(xx,b->w,INSERT_VALUES,SCATTER_FORWARD,b->ctx);CHKERRQ(ierr); 748 ierr = (*b->OAIJ->ops->multadd)(b->OAIJ,b->w,yy,yy);CHKERRQ(ierr); 749 PetscFunctionReturn(0); 750 } 751 752 #undef __FUNC__ 753 #define __FUNC__ /*<a name="MatMultTranspose_MPIMAIJ_dof"></a>*/"MatMultTranspose_MPIMAIJ_dof" 754 int MatMultTranspose_MPIMAIJ_dof(Mat A,Vec xx,Vec yy) 755 { 756 Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data; 757 int ierr; 758 PetscFunctionBegin; 759 ierr = (*b->OAIJ->ops->multtranspose)(b->OAIJ,xx,b->w);CHKERRQ(ierr); 760 ierr = VecScatterBegin(b->w,yy,ADD_VALUES,SCATTER_REVERSE,b->ctx);CHKERRQ(ierr); 761 ierr = (*b->AIJ->ops->multtranspose)(b->AIJ,xx,yy);CHKERRQ(ierr); 762 ierr = VecScatterEnd(b->w,yy,ADD_VALUES,SCATTER_REVERSE,b->ctx);CHKERRQ(ierr); 763 PetscFunctionReturn(0); 764 } 765 766 #undef __FUNC__ 767 #define __FUNC__ /*<a name="MatMultAdd_MPIMAIJ_dof"></a>*/"MatMultAdd_MPIMAIJ_dof" 768 int MatMultAdd_MPIMAIJ_dof(Mat A,Vec xx,Vec yy,Vec zz) 769 { 770 Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data; 771 int ierr; 772 PetscFunctionBegin; 773 774 /* start the scatter */ 775 ierr = VecScatterBegin(xx,b->w,INSERT_VALUES,SCATTER_FORWARD,b->ctx);CHKERRQ(ierr); 776 ierr = (*b->AIJ->ops->multadd)(b->AIJ,xx,yy,zz);CHKERRQ(ierr); 777 ierr = VecScatterEnd(xx,b->w,INSERT_VALUES,SCATTER_FORWARD,b->ctx);CHKERRQ(ierr); 778 ierr = (*b->OAIJ->ops->multadd)(b->OAIJ,b->w,yy,zz);CHKERRQ(ierr); 779 PetscFunctionReturn(0); 780 } 781 782 #undef __FUNC__ 783 #define __FUNC__ /*<a name="MatMultTransposeAdd_MPIMAIJ_dof"></a>*/"MatMultTransposeAdd_MPIMAIJ_dof" 784 int MatMultTransposeAdd_MPIMAIJ_dof(Mat A,Vec xx,Vec yy,Vec zz) 785 { 786 Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data; 787 int ierr; 788 PetscFunctionBegin; 789 ierr = (*b->OAIJ->ops->multtranspose)(b->OAIJ,xx,b->w);CHKERRQ(ierr); 790 ierr = VecScatterBegin(b->w,zz,ADD_VALUES,SCATTER_REVERSE,b->ctx);CHKERRQ(ierr); 791 ierr = (*b->AIJ->ops->multtransposeadd)(b->AIJ,xx,yy,zz);CHKERRQ(ierr); 792 ierr = VecScatterEnd(b->w,zz,ADD_VALUES,SCATTER_REVERSE,b->ctx);CHKERRQ(ierr); 793 PetscFunctionReturn(0); 794 } 795 796 /* ---------------------------------------------------------------------------------- */ 797 #undef __FUNC__ 798 #define __FUNC__ /*<a name="MatCreateMAIJ"></a>*/"MatCreateMAIJ" 799 int MatCreateMAIJ(Mat A,int dof,Mat *maij) 800 { 801 int ierr,size,n; 802 Mat_MPIMAIJ *b; 803 Mat B; 804 805 PetscFunctionBegin; 806 ierr = PetscObjectReference((PetscObject)A);CHKERRQ(ierr); 807 808 if (dof == 1) { 809 *maij = A; 810 } else { 811 ierr = MATCreate(A->comm,dof*A->m,dof*A->n,dof*A->M,dof*A->N,&B);CHKERRQ(ierr); 812 ierr = MatSetType(B,MATMAIJ);CHKERRQ(ierr); 813 B->assembled = PETSC_TRUE; 814 b = (Mat_MPIMAIJ*)B->data; 815 b->dof = dof; 816 817 ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr); 818 if (size == 1) { 819 B->ops->destroy = MatDestroy_SeqMAIJ; 820 b->AIJ = A; 821 if (dof == 2) { 822 B->ops->mult = MatMult_SeqMAIJ_2; 823 B->ops->multadd = MatMultAdd_SeqMAIJ_2; 824 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_2; 825 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_2; 826 } else if (dof == 3) { 827 B->ops->mult = MatMult_SeqMAIJ_3; 828 B->ops->multadd = MatMultAdd_SeqMAIJ_3; 829 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_3; 830 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_3; 831 } else if (dof == 4) { 832 B->ops->mult = MatMult_SeqMAIJ_4; 833 B->ops->multadd = MatMultAdd_SeqMAIJ_4; 834 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_4; 835 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_4; 836 } else if (dof == 5) { 837 B->ops->mult = MatMult_SeqMAIJ_5; 838 B->ops->multadd = MatMultAdd_SeqMAIJ_5; 839 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_5; 840 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_5; 841 } else { 842 SETERRQ1(1,1,"Cannot handle a dof of %d\n",dof); 843 } 844 } else { 845 Mat_MPIAIJ *mpiaij = (Mat_MPIAIJ *)A->data; 846 IS from,to; 847 Vec gvec; 848 int *garray,i; 849 850 b->A = A; 851 B->ops->destroy = MatDestroy_MPIMAIJ; 852 ierr = MatCreateMAIJ(mpiaij->A,dof,&b->AIJ);CHKERRQ(ierr); 853 ierr = MatCreateMAIJ(mpiaij->B,dof,&b->OAIJ);CHKERRQ(ierr); 854 855 ierr = VecGetSize(mpiaij->lvec,&n);CHKERRQ(ierr); 856 ierr = VecCreateSeq(PETSC_COMM_SELF,n*dof,&b->w);CHKERRQ(ierr); 857 858 /* create two temporary Index sets for build scatter gather */ 859 garray = (int*)PetscMalloc((n+1)*sizeof(int));CHKPTRQ(garray); 860 for (i=0; i<n; i++) garray[i] = dof*mpiaij->garray[i]; 861 ierr = ISCreateBlock(A->comm,dof,n,garray,&from);CHKERRQ(ierr); 862 ierr = PetscFree(garray);CHKERRQ(ierr); 863 ierr = ISCreateStride(PETSC_COMM_SELF,n*dof,0,1,&to);CHKERRQ(ierr); 864 865 /* create temporary global vector to generate scatter context */ 866 ierr = VecCreateMPI(A->comm,dof*A->n,dof*A->N,&gvec);CHKERRQ(ierr); 867 868 /* generate the scatter context */ 869 ierr = VecScatterCreate(gvec,from,b->w,to,&b->ctx);CHKERRQ(ierr); 870 871 ierr = ISDestroy(from);CHKERRQ(ierr); 872 ierr = ISDestroy(to);CHKERRQ(ierr); 873 ierr = VecDestroy(gvec);CHKERRQ(ierr); 874 875 B->ops->mult = MatMult_MPIMAIJ_dof; 876 B->ops->multtranspose = MatMultTranspose_MPIMAIJ_dof; 877 B->ops->multadd = MatMultAdd_MPIMAIJ_dof; 878 B->ops->multtransposeadd = MatMultTransposeAdd_MPIMAIJ_dof; 879 } 880 *maij = B; 881 } 882 PetscFunctionReturn(0); 883 } 884 885 886 887 888 889 890 891 892 893 894 895 896