1 /*$Id: maij.c,v 1.9 2000/09/28 21:11:37 bsmith Exp bsmith $*/ 2 /* 3 Defines the basic matrix operations for the MAIJ matrix storage format. 4 This format is used for restriction and interpolation operations for 5 multicomponent problems. It interpolates each component the same way 6 independently. 7 8 We provide: 9 MatMult() 10 MatMultTranspose() 11 MatMultTransposeAdd() 12 MatMultAdd() 13 and 14 MatCreateMAIJ(Mat,dof,Mat*) 15 16 This single directory handles both the sequential and parallel codes 17 */ 18 19 #include "src/mat/impls/aij/mpi/mpiaij.h" 20 21 typedef struct { 22 int dof; /* number of components */ 23 Mat AIJ; /* representation of interpolation for one component */ 24 } Mat_SeqMAIJ; 25 26 typedef struct { 27 int dof; /* number of components */ 28 Mat AIJ,OAIJ; /* representation of interpolation for one component */ 29 Mat A; 30 VecScatter ctx; /* update ghost points for parallel case */ 31 Vec w; /* work space for ghost values for parallel case */ 32 } Mat_MPIMAIJ; 33 34 #undef __FUNC__ 35 #define __FUNC__ /*<a name="MatMAIJGetAIJ"></a>*/"MatMAIJGetAIJ" 36 int MatMAIJGetAIJ(Mat A,Mat *B) 37 { 38 int ierr; 39 PetscTruth ismpimaij,isseqmaij; 40 41 PetscFunctionBegin; 42 ierr = PetscTypeCompare((PetscObject)A,MATMPIMAIJ,&ismpimaij);CHKERRQ(ierr); 43 ierr = PetscTypeCompare((PetscObject)A,MATSEQMAIJ,&isseqmaij);CHKERRQ(ierr); 44 if (ismpimaij) { 45 Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data; 46 47 *B = b->A; 48 } else if (isseqmaij) { 49 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 50 51 *B = b->AIJ; 52 } else { 53 *B = A; 54 } 55 PetscFunctionReturn(0); 56 } 57 58 #undef __FUNC__ 59 #define __FUNC__ /*<a name="MatMAIJRedimension"></a>*/"MatMAIJRedimension" 60 int MatMAIJRedimension(Mat A,int dof,Mat *B) 61 { 62 int ierr; 63 Mat Aij; 64 65 PetscFunctionBegin; 66 ierr = MatMAIJGetAIJ(A,&Aij);CHKERRQ(ierr); 67 ierr = MatCreateMAIJ(Aij,dof,B);CHKERRQ(ierr); 68 PetscFunctionReturn(0); 69 } 70 71 #undef __FUNC__ 72 #define __FUNC__ /*<a name="MatDestroy_SeqMAIJ"></a>*/"MatDestroy_SeqMAIJ" 73 int MatDestroy_SeqMAIJ(Mat A) 74 { 75 int ierr; 76 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 77 78 PetscFunctionBegin; 79 if (b->AIJ) { 80 ierr = MatDestroy(b->AIJ);CHKERRQ(ierr); 81 } 82 ierr = PetscFree(b);CHKERRQ(ierr); 83 PetscHeaderDestroy(A); 84 PetscFunctionReturn(0); 85 } 86 87 #undef __FUNC__ 88 #define __FUNC__ /*<a name="MatDestroy_MPIMAIJ"></a>*/"MatDestroy_MPIMAIJ" 89 int MatDestroy_MPIMAIJ(Mat A) 90 { 91 int ierr; 92 Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data; 93 94 PetscFunctionBegin; 95 if (b->AIJ) { 96 ierr = MatDestroy(b->AIJ);CHKERRQ(ierr); 97 } 98 if (b->OAIJ) { 99 ierr = MatDestroy(b->OAIJ);CHKERRQ(ierr); 100 } 101 if (b->A) { 102 ierr = MatDestroy(b->A);CHKERRQ(ierr); 103 } 104 if (b->ctx) { 105 ierr = VecScatterDestroy(b->ctx);CHKERRQ(ierr); 106 } 107 if (b->w) { 108 ierr = VecDestroy(b->w);CHKERRQ(ierr); 109 } 110 ierr = PetscFree(b);CHKERRQ(ierr); 111 PetscHeaderDestroy(A); 112 PetscFunctionReturn(0); 113 } 114 115 #undef __FUNC__ 116 #define __FUNC__ /*<a name="MatGetSize_MAIJ"></a>*/"MatGetSize_MAIJ" 117 static int MatGetSize_MAIJ(Mat A,int *m,int *n) 118 { 119 PetscFunctionBegin; 120 if (m) *m = A->M; 121 if (n) *n = A->N; 122 PetscFunctionReturn(0); 123 } 124 125 EXTERN_C_BEGIN 126 #undef __FUNC__ 127 #define __FUNC__ /*<a name="MatCreate_MAIJ"></a>*/"MatCreate_MAIJ" 128 int MatCreate_MAIJ(Mat A) 129 { 130 int ierr; 131 Mat_MPIMAIJ *b; 132 133 PetscFunctionBegin; 134 A->data = (void*)(b = PetscNew(Mat_MPIMAIJ));CHKPTRQ(b); 135 ierr = PetscMemzero(b,sizeof(Mat_MPIMAIJ));CHKERRQ(ierr); 136 ierr = PetscMemzero(A->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 137 A->ops->getsize = MatGetSize_MAIJ; 138 A->factor = 0; 139 A->mapping = 0; 140 141 b->AIJ = 0; 142 b->dof = 0; 143 b->OAIJ = 0; 144 b->ctx = 0; 145 b->w = 0; 146 PetscFunctionReturn(0); 147 } 148 EXTERN_C_END 149 150 /* --------------------------------------------------------------------------------------*/ 151 #undef __FUNC__ 152 #define __FUNC__ /*<a name="MatMult_SeqMAIJ_2"></a>*/"MatMult_SeqMAIJ_2" 153 int MatMult_SeqMAIJ_2(Mat A,Vec xx,Vec yy) 154 { 155 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 156 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 157 Scalar *x,*y,*v,sum1, sum2; 158 int ierr,m = a->m,*idx,shift = a->indexshift,*ii; 159 int n,i,jrow,j; 160 161 PetscFunctionBegin; 162 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 163 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 164 x = x + shift; /* shift for Fortran start by 1 indexing */ 165 idx = a->j; 166 v = a->a; 167 ii = a->i; 168 169 v += shift; /* shift for Fortran start by 1 indexing */ 170 idx += shift; 171 for (i=0; i<m; i++) { 172 jrow = ii[i]; 173 n = ii[i+1] - jrow; 174 sum1 = 0.0; 175 sum2 = 0.0; 176 for (j=0; j<n; j++) { 177 sum1 += v[jrow]*x[2*idx[jrow]]; 178 sum2 += v[jrow]*x[2*idx[jrow]+1]; 179 jrow++; 180 } 181 y[2*i] = sum1; 182 y[2*i+1] = sum2; 183 } 184 185 PLogFlops(4*a->nz - 2*m); 186 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 187 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 188 PetscFunctionReturn(0); 189 } 190 191 #undef __FUNC__ 192 #define __FUNC__ /*<a name="MatMultTranspose_SeqMAIJ_2"></a>*/"MatMultTranspose_SeqMAIJ_2" 193 int MatMultTranspose_SeqMAIJ_2(Mat A,Vec xx,Vec yy) 194 { 195 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 196 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 197 Scalar *x,*y,*v,alpha1,alpha2,zero = 0.0; 198 int ierr,m = a->m,n,i,*idx,shift = a->indexshift; 199 200 PetscFunctionBegin; 201 ierr = VecSet(&zero,yy);CHKERRQ(ierr); 202 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 203 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 204 y = y + shift; /* shift for Fortran start by 1 indexing */ 205 for (i=0; i<m; i++) { 206 idx = a->j + a->i[i] + shift; 207 v = a->a + a->i[i] + shift; 208 n = a->i[i+1] - a->i[i]; 209 alpha1 = x[2*i]; 210 alpha2 = x[2*i+1]; 211 while (n-->0) {y[2*(*idx)] += alpha1*(*v); y[2*(*idx)+1] += alpha2*(*v); idx++; v++;} 212 } 213 PLogFlops(4*a->nz - 2*a->n); 214 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 215 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 216 PetscFunctionReturn(0); 217 } 218 219 #undef __FUNC__ 220 #define __FUNC__ /*<a name="MatMultAdd_SeqMAIJ_2"></a>*/"MatMultAdd_SeqMAIJ_2" 221 int MatMultAdd_SeqMAIJ_2(Mat A,Vec xx,Vec yy,Vec zz) 222 { 223 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 224 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 225 Scalar *x,*y,*v,sum1, sum2; 226 int ierr,m = a->m,*idx,shift = a->indexshift,*ii; 227 int n,i,jrow,j; 228 229 PetscFunctionBegin; 230 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 231 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 232 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 233 x = x + shift; /* shift for Fortran start by 1 indexing */ 234 idx = a->j; 235 v = a->a; 236 ii = a->i; 237 238 v += shift; /* shift for Fortran start by 1 indexing */ 239 idx += shift; 240 for (i=0; i<m; i++) { 241 jrow = ii[i]; 242 n = ii[i+1] - jrow; 243 sum1 = 0.0; 244 sum2 = 0.0; 245 for (j=0; j<n; j++) { 246 sum1 += v[jrow]*x[2*idx[jrow]]; 247 sum2 += v[jrow]*x[2*idx[jrow]+1]; 248 jrow++; 249 } 250 y[2*i] += sum1; 251 y[2*i+1] += sum2; 252 } 253 254 PLogFlops(4*a->nz - 2*m); 255 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 256 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 257 PetscFunctionReturn(0); 258 } 259 #undef __FUNC__ 260 #define __FUNC__ /*<a name="MatMultTransposeAdd_SeqMAIJ_2"></a>*/"MatMultTransposeAdd_SeqMAIJ_2" 261 int MatMultTransposeAdd_SeqMAIJ_2(Mat A,Vec xx,Vec yy,Vec zz) 262 { 263 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 264 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 265 Scalar *x,*y,*v,alpha1,alpha2; 266 int ierr,m = a->m,n,i,*idx,shift = a->indexshift; 267 268 PetscFunctionBegin; 269 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 270 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 271 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 272 y = y + shift; /* shift for Fortran start by 1 indexing */ 273 for (i=0; i<m; i++) { 274 idx = a->j + a->i[i] + shift; 275 v = a->a + a->i[i] + shift; 276 n = a->i[i+1] - a->i[i]; 277 alpha1 = x[2*i]; 278 alpha2 = x[2*i+1]; 279 while (n-->0) {y[2*(*idx)] += alpha1*(*v); y[2*(*idx)+1] += alpha2*(*v); idx++; v++;} 280 } 281 PLogFlops(4*a->nz - 2*a->n); 282 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 283 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 284 PetscFunctionReturn(0); 285 } 286 /* --------------------------------------------------------------------------------------*/ 287 #undef __FUNC__ 288 #define __FUNC__ /*<a name="MatMult_SeqMAIJ_3"></a>*/"MatMult_SeqMAIJ_3" 289 int MatMult_SeqMAIJ_3(Mat A,Vec xx,Vec yy) 290 { 291 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 292 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 293 Scalar *x,*y,*v,sum1, sum2, sum3; 294 int ierr,m = a->m,*idx,shift = a->indexshift,*ii; 295 int n,i,jrow,j; 296 297 PetscFunctionBegin; 298 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 299 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 300 x = x + shift; /* shift for Fortran start by 1 indexing */ 301 idx = a->j; 302 v = a->a; 303 ii = a->i; 304 305 v += shift; /* shift for Fortran start by 1 indexing */ 306 idx += shift; 307 for (i=0; i<m; i++) { 308 jrow = ii[i]; 309 n = ii[i+1] - jrow; 310 sum1 = 0.0; 311 sum2 = 0.0; 312 sum3 = 0.0; 313 for (j=0; j<n; j++) { 314 sum1 += v[jrow]*x[3*idx[jrow]]; 315 sum2 += v[jrow]*x[3*idx[jrow]+1]; 316 sum3 += v[jrow]*x[3*idx[jrow]+2]; 317 jrow++; 318 } 319 y[3*i] = sum1; 320 y[3*i+1] = sum2; 321 y[3*i+2] = sum3; 322 } 323 324 PLogFlops(6*a->nz - 3*m); 325 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 326 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 327 PetscFunctionReturn(0); 328 } 329 330 #undef __FUNC__ 331 #define __FUNC__ /*<a name="MatMultTranspose_SeqMAIJ_3"></a>*/"MatMultTranspose_SeqMAIJ_3" 332 int MatMultTranspose_SeqMAIJ_3(Mat A,Vec xx,Vec yy) 333 { 334 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 335 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 336 Scalar *x,*y,*v,alpha1,alpha2,alpha3,zero = 0.0; 337 int ierr,m = a->m,n,i,*idx,shift = a->indexshift; 338 339 PetscFunctionBegin; 340 ierr = VecSet(&zero,yy);CHKERRQ(ierr); 341 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 342 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 343 y = y + shift; /* shift for Fortran start by 1 indexing */ 344 for (i=0; i<m; i++) { 345 idx = a->j + a->i[i] + shift; 346 v = a->a + a->i[i] + shift; 347 n = a->i[i+1] - a->i[i]; 348 alpha1 = x[3*i]; 349 alpha2 = x[3*i+1]; 350 alpha3 = x[3*i+2]; 351 while (n-->0) { 352 y[3*(*idx)] += alpha1*(*v); 353 y[3*(*idx)+1] += alpha2*(*v); 354 y[3*(*idx)+2] += alpha3*(*v); 355 idx++; v++; 356 } 357 } 358 PLogFlops(6*a->nz - 3*a->n); 359 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 360 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 361 PetscFunctionReturn(0); 362 } 363 364 #undef __FUNC__ 365 #define __FUNC__ /*<a name="MatMultAdd_SeqMAIJ_3"></a>*/"MatMultAdd_SeqMAIJ_3" 366 int MatMultAdd_SeqMAIJ_3(Mat A,Vec xx,Vec yy,Vec zz) 367 { 368 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 369 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 370 Scalar *x,*y,*v,sum1, sum2, sum3; 371 int ierr,m = a->m,*idx,shift = a->indexshift,*ii; 372 int n,i,jrow,j; 373 374 PetscFunctionBegin; 375 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 376 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 377 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 378 x = x + shift; /* shift for Fortran start by 1 indexing */ 379 idx = a->j; 380 v = a->a; 381 ii = a->i; 382 383 v += shift; /* shift for Fortran start by 1 indexing */ 384 idx += shift; 385 for (i=0; i<m; i++) { 386 jrow = ii[i]; 387 n = ii[i+1] - jrow; 388 sum1 = 0.0; 389 sum2 = 0.0; 390 sum3 = 0.0; 391 for (j=0; j<n; j++) { 392 sum1 += v[jrow]*x[3*idx[jrow]]; 393 sum2 += v[jrow]*x[3*idx[jrow]+1]; 394 sum3 += v[jrow]*x[3*idx[jrow]+2]; 395 jrow++; 396 } 397 y[3*i] += sum1; 398 y[3*i+1] += sum2; 399 y[3*i+2] += sum3; 400 } 401 402 PLogFlops(6*a->nz); 403 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 404 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 405 PetscFunctionReturn(0); 406 } 407 #undef __FUNC__ 408 #define __FUNC__ /*<a name="MatMultTransposeAdd_SeqMAIJ_3"></a>*/"MatMultTransposeAdd_SeqMAIJ_3" 409 int MatMultTransposeAdd_SeqMAIJ_3(Mat A,Vec xx,Vec yy,Vec zz) 410 { 411 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 412 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 413 Scalar *x,*y,*v,alpha1,alpha2,alpha3; 414 int ierr,m = a->m,n,i,*idx,shift = a->indexshift; 415 416 PetscFunctionBegin; 417 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 418 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 419 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 420 y = y + shift; /* shift for Fortran start by 1 indexing */ 421 for (i=0; i<m; i++) { 422 idx = a->j + a->i[i] + shift; 423 v = a->a + a->i[i] + shift; 424 n = a->i[i+1] - a->i[i]; 425 alpha1 = x[3*i]; 426 alpha2 = x[3*i+1]; 427 alpha3 = x[3*i+2]; 428 while (n-->0) { 429 y[3*(*idx)] += alpha1*(*v); 430 y[3*(*idx)+1] += alpha2*(*v); 431 y[3*(*idx)+2] += alpha3*(*v); 432 idx++; v++; 433 } 434 } 435 PLogFlops(6*a->nz); 436 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 437 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 438 PetscFunctionReturn(0); 439 } 440 441 /* ------------------------------------------------------------------------------*/ 442 #undef __FUNC__ 443 #define __FUNC__ /*<a name="MatMult_SeqMAIJ_4"></a>*/"MatMult_SeqMAIJ_4" 444 int MatMult_SeqMAIJ_4(Mat A,Vec xx,Vec yy) 445 { 446 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 447 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 448 Scalar *x,*y,*v,sum1, sum2, sum3, sum4; 449 int ierr,m = a->m,*idx,shift = a->indexshift,*ii; 450 int n,i,jrow,j; 451 452 PetscFunctionBegin; 453 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 454 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 455 x = x + shift; /* shift for Fortran start by 1 indexing */ 456 idx = a->j; 457 v = a->a; 458 ii = a->i; 459 460 v += shift; /* shift for Fortran start by 1 indexing */ 461 idx += shift; 462 for (i=0; i<m; i++) { 463 jrow = ii[i]; 464 n = ii[i+1] - jrow; 465 sum1 = 0.0; 466 sum2 = 0.0; 467 sum3 = 0.0; 468 sum4 = 0.0; 469 for (j=0; j<n; j++) { 470 sum1 += v[jrow]*x[4*idx[jrow]]; 471 sum2 += v[jrow]*x[4*idx[jrow]+1]; 472 sum3 += v[jrow]*x[4*idx[jrow]+2]; 473 sum4 += v[jrow]*x[4*idx[jrow]+3]; 474 jrow++; 475 } 476 y[4*i] = sum1; 477 y[4*i+1] = sum2; 478 y[4*i+2] = sum3; 479 y[4*i+3] = sum4; 480 } 481 482 PLogFlops(8*a->nz - 4*m); 483 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 484 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 485 PetscFunctionReturn(0); 486 } 487 488 #undef __FUNC__ 489 #define __FUNC__ /*<a name="MatMultTranspose_SeqMAIJ_4"></a>*/"MatMultTranspose_SeqMAIJ_4" 490 int MatMultTranspose_SeqMAIJ_4(Mat A,Vec xx,Vec yy) 491 { 492 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 493 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 494 Scalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4,zero = 0.0; 495 int ierr,m = a->m,n,i,*idx,shift = a->indexshift; 496 497 PetscFunctionBegin; 498 ierr = VecSet(&zero,yy);CHKERRQ(ierr); 499 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 500 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 501 y = y + shift; /* shift for Fortran start by 1 indexing */ 502 for (i=0; i<m; i++) { 503 idx = a->j + a->i[i] + shift; 504 v = a->a + a->i[i] + shift; 505 n = a->i[i+1] - a->i[i]; 506 alpha1 = x[4*i]; 507 alpha2 = x[4*i+1]; 508 alpha3 = x[4*i+2]; 509 alpha4 = x[4*i+3]; 510 while (n-->0) { 511 y[4*(*idx)] += alpha1*(*v); 512 y[4*(*idx)+1] += alpha2*(*v); 513 y[4*(*idx)+2] += alpha3*(*v); 514 y[4*(*idx)+3] += alpha4*(*v); 515 idx++; v++; 516 } 517 } 518 PLogFlops(8*a->nz - 4*a->n); 519 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 520 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 521 PetscFunctionReturn(0); 522 } 523 524 #undef __FUNC__ 525 #define __FUNC__ /*<a name="MatMultAdd_SeqMAIJ_4"></a>*/"MatMultAdd_SeqMAIJ_4" 526 int MatMultAdd_SeqMAIJ_4(Mat A,Vec xx,Vec yy,Vec zz) 527 { 528 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 529 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 530 Scalar *x,*y,*v,sum1, sum2, sum3, sum4; 531 int ierr,m = a->m,*idx,shift = a->indexshift,*ii; 532 int n,i,jrow,j; 533 534 PetscFunctionBegin; 535 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 536 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 537 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 538 x = x + shift; /* shift for Fortran start by 1 indexing */ 539 idx = a->j; 540 v = a->a; 541 ii = a->i; 542 543 v += shift; /* shift for Fortran start by 1 indexing */ 544 idx += shift; 545 for (i=0; i<m; i++) { 546 jrow = ii[i]; 547 n = ii[i+1] - jrow; 548 sum1 = 0.0; 549 sum2 = 0.0; 550 sum3 = 0.0; 551 sum4 = 0.0; 552 for (j=0; j<n; j++) { 553 sum1 += v[jrow]*x[4*idx[jrow]]; 554 sum2 += v[jrow]*x[4*idx[jrow]+1]; 555 sum3 += v[jrow]*x[4*idx[jrow]+2]; 556 sum4 += v[jrow]*x[4*idx[jrow]+3]; 557 jrow++; 558 } 559 y[4*i] += sum1; 560 y[4*i+1] += sum2; 561 y[4*i+2] += sum3; 562 y[4*i+3] += sum4; 563 } 564 565 PLogFlops(8*a->nz - 4*m); 566 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 567 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 568 PetscFunctionReturn(0); 569 } 570 #undef __FUNC__ 571 #define __FUNC__ /*<a name="MatMultTransposeAdd_SeqMAIJ_4"></a>*/"MatMultTransposeAdd_SeqMAIJ_4" 572 int MatMultTransposeAdd_SeqMAIJ_4(Mat A,Vec xx,Vec yy,Vec zz) 573 { 574 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 575 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 576 Scalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4; 577 int ierr,m = a->m,n,i,*idx,shift = a->indexshift; 578 579 PetscFunctionBegin; 580 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 581 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 582 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 583 y = y + shift; /* shift for Fortran start by 1 indexing */ 584 for (i=0; i<m; i++) { 585 idx = a->j + a->i[i] + shift; 586 v = a->a + a->i[i] + shift; 587 n = a->i[i+1] - a->i[i]; 588 alpha1 = x[4*i]; 589 alpha2 = x[4*i+1]; 590 alpha3 = x[4*i+2]; 591 alpha4 = x[4*i+3]; 592 while (n-->0) { 593 y[4*(*idx)] += alpha1*(*v); 594 y[4*(*idx)+1] += alpha2*(*v); 595 y[4*(*idx)+2] += alpha3*(*v); 596 y[4*(*idx)+3] += alpha4*(*v); 597 idx++; v++; 598 } 599 } 600 PLogFlops(8*a->nz - 4*a->n); 601 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 602 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 603 PetscFunctionReturn(0); 604 605 } 606 607 /* ------------------------------------------------------------------------------*/ 608 #undef __FUNC__ 609 #define __FUNC__ /*<a name="MatMult_SeqMAIJ_5"></a>*/"MatMult_SeqMAIJ_5" 610 int MatMult_SeqMAIJ_5(Mat A,Vec xx,Vec yy) 611 { 612 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 613 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 614 Scalar *x,*y,*v,sum1, sum2, sum3, sum4, sum5; 615 int ierr,m = a->m,*idx,shift = a->indexshift,*ii; 616 int n,i,jrow,j; 617 618 PetscFunctionBegin; 619 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 620 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 621 x = x + shift; /* shift for Fortran start by 1 indexing */ 622 idx = a->j; 623 v = a->a; 624 ii = a->i; 625 626 v += shift; /* shift for Fortran start by 1 indexing */ 627 idx += shift; 628 for (i=0; i<m; i++) { 629 jrow = ii[i]; 630 n = ii[i+1] - jrow; 631 sum1 = 0.0; 632 sum2 = 0.0; 633 sum3 = 0.0; 634 sum4 = 0.0; 635 sum5 = 0.0; 636 for (j=0; j<n; j++) { 637 sum1 += v[jrow]*x[5*idx[jrow]]; 638 sum2 += v[jrow]*x[5*idx[jrow]+1]; 639 sum3 += v[jrow]*x[5*idx[jrow]+2]; 640 sum4 += v[jrow]*x[5*idx[jrow]+3]; 641 sum5 += v[jrow]*x[5*idx[jrow]+4]; 642 jrow++; 643 } 644 y[5*i] = sum1; 645 y[5*i+1] = sum2; 646 y[5*i+2] = sum3; 647 y[5*i+3] = sum4; 648 y[5*i+4] = sum5; 649 } 650 651 PLogFlops(10*a->nz - 5*m); 652 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 653 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 654 PetscFunctionReturn(0); 655 } 656 657 #undef __FUNC__ 658 #define __FUNC__ /*<a name="MatMultTranspose_SeqMAIJ_5"></a>*/"MatMultTranspose_SeqMAIJ_5" 659 int MatMultTranspose_SeqMAIJ_5(Mat A,Vec xx,Vec yy) 660 { 661 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 662 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 663 Scalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,zero = 0.0; 664 int ierr,m = a->m,n,i,*idx,shift = a->indexshift; 665 666 PetscFunctionBegin; 667 ierr = VecSet(&zero,yy);CHKERRQ(ierr); 668 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 669 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 670 y = y + shift; /* shift for Fortran start by 1 indexing */ 671 for (i=0; i<m; i++) { 672 idx = a->j + a->i[i] + shift; 673 v = a->a + a->i[i] + shift; 674 n = a->i[i+1] - a->i[i]; 675 alpha1 = x[5*i]; 676 alpha2 = x[5*i+1]; 677 alpha3 = x[5*i+2]; 678 alpha4 = x[5*i+3]; 679 alpha5 = x[5*i+4]; 680 while (n-->0) { 681 y[5*(*idx)] += alpha1*(*v); 682 y[5*(*idx)+1] += alpha2*(*v); 683 y[5*(*idx)+2] += alpha3*(*v); 684 y[5*(*idx)+3] += alpha4*(*v); 685 y[5*(*idx)+4] += alpha5*(*v); 686 idx++; v++; 687 } 688 } 689 PLogFlops(10*a->nz - 5*a->n); 690 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 691 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 692 PetscFunctionReturn(0); 693 } 694 695 #undef __FUNC__ 696 #define __FUNC__ /*<a name="MatMultAdd_SeqMAIJ_5"></a>*/"MatMultAdd_SeqMAIJ_5" 697 int MatMultAdd_SeqMAIJ_5(Mat A,Vec xx,Vec yy,Vec zz) 698 { 699 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 700 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 701 Scalar *x,*y,*v,sum1, sum2, sum3, sum4, sum5; 702 int ierr,m = a->m,*idx,shift = a->indexshift,*ii; 703 int n,i,jrow,j; 704 705 PetscFunctionBegin; 706 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 707 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 708 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 709 x = x + shift; /* shift for Fortran start by 1 indexing */ 710 idx = a->j; 711 v = a->a; 712 ii = a->i; 713 714 v += shift; /* shift for Fortran start by 1 indexing */ 715 idx += shift; 716 for (i=0; i<m; i++) { 717 jrow = ii[i]; 718 n = ii[i+1] - jrow; 719 sum1 = 0.0; 720 sum2 = 0.0; 721 sum3 = 0.0; 722 sum4 = 0.0; 723 sum5 = 0.0; 724 for (j=0; j<n; j++) { 725 sum1 += v[jrow]*x[5*idx[jrow]]; 726 sum2 += v[jrow]*x[5*idx[jrow]+1]; 727 sum3 += v[jrow]*x[5*idx[jrow]+2]; 728 sum4 += v[jrow]*x[5*idx[jrow]+3]; 729 sum5 += v[jrow]*x[5*idx[jrow]+4]; 730 jrow++; 731 } 732 y[5*i] += sum1; 733 y[5*i+1] += sum2; 734 y[5*i+2] += sum3; 735 y[5*i+3] += sum4; 736 y[5*i+4] += sum5; 737 } 738 739 PLogFlops(10*a->nz); 740 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 741 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 742 PetscFunctionReturn(0); 743 } 744 745 #undef __FUNC__ 746 #define __FUNC__ /*<a name="MatMultTransposeAdd_SeqMAIJ_5"></a>*/"MatMultTransposeAdd_SeqMAIJ_5" 747 int MatMultTransposeAdd_SeqMAIJ_5(Mat A,Vec xx,Vec yy,Vec zz) 748 { 749 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 750 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 751 Scalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5; 752 int ierr,m = a->m,n,i,*idx,shift = a->indexshift; 753 754 PetscFunctionBegin; 755 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 756 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 757 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 758 y = y + shift; /* shift for Fortran start by 1 indexing */ 759 for (i=0; i<m; i++) { 760 idx = a->j + a->i[i] + shift; 761 v = a->a + a->i[i] + shift; 762 n = a->i[i+1] - a->i[i]; 763 alpha1 = x[5*i]; 764 alpha2 = x[5*i+1]; 765 alpha3 = x[5*i+2]; 766 alpha4 = x[5*i+3]; 767 alpha5 = x[5*i+4]; 768 while (n-->0) { 769 y[5*(*idx)] += alpha1*(*v); 770 y[5*(*idx)+1] += alpha2*(*v); 771 y[5*(*idx)+2] += alpha3*(*v); 772 y[5*(*idx)+3] += alpha4*(*v); 773 y[5*(*idx)+4] += alpha5*(*v); 774 idx++; v++; 775 } 776 } 777 PLogFlops(10*a->nz); 778 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 779 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 780 PetscFunctionReturn(0); 781 } 782 783 /*===================================================================================*/ 784 #undef __FUNC__ 785 #define __FUNC__ /*<a name="MatMult_MPIMAIJ_dof"></a>*/"MatMult_MPIMAIJ_dof" 786 int MatMult_MPIMAIJ_dof(Mat A,Vec xx,Vec yy) 787 { 788 Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data; 789 int ierr; 790 PetscFunctionBegin; 791 792 /* start the scatter */ 793 ierr = VecScatterBegin(xx,b->w,INSERT_VALUES,SCATTER_FORWARD,b->ctx);CHKERRQ(ierr); 794 ierr = (*b->AIJ->ops->mult)(b->AIJ,xx,yy);CHKERRQ(ierr); 795 ierr = VecScatterEnd(xx,b->w,INSERT_VALUES,SCATTER_FORWARD,b->ctx);CHKERRQ(ierr); 796 ierr = (*b->OAIJ->ops->multadd)(b->OAIJ,b->w,yy,yy);CHKERRQ(ierr); 797 PetscFunctionReturn(0); 798 } 799 800 #undef __FUNC__ 801 #define __FUNC__ /*<a name="MatMultTranspose_MPIMAIJ_dof"></a>*/"MatMultTranspose_MPIMAIJ_dof" 802 int MatMultTranspose_MPIMAIJ_dof(Mat A,Vec xx,Vec yy) 803 { 804 Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data; 805 int ierr; 806 PetscFunctionBegin; 807 ierr = (*b->OAIJ->ops->multtranspose)(b->OAIJ,xx,b->w);CHKERRQ(ierr); 808 ierr = VecScatterBegin(b->w,yy,ADD_VALUES,SCATTER_REVERSE,b->ctx);CHKERRQ(ierr); 809 ierr = (*b->AIJ->ops->multtranspose)(b->AIJ,xx,yy);CHKERRQ(ierr); 810 ierr = VecScatterEnd(b->w,yy,ADD_VALUES,SCATTER_REVERSE,b->ctx);CHKERRQ(ierr); 811 PetscFunctionReturn(0); 812 } 813 814 #undef __FUNC__ 815 #define __FUNC__ /*<a name="MatMultAdd_MPIMAIJ_dof"></a>*/"MatMultAdd_MPIMAIJ_dof" 816 int MatMultAdd_MPIMAIJ_dof(Mat A,Vec xx,Vec yy,Vec zz) 817 { 818 Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data; 819 int ierr; 820 PetscFunctionBegin; 821 822 /* start the scatter */ 823 ierr = VecScatterBegin(xx,b->w,INSERT_VALUES,SCATTER_FORWARD,b->ctx);CHKERRQ(ierr); 824 ierr = (*b->AIJ->ops->multadd)(b->AIJ,xx,yy,zz);CHKERRQ(ierr); 825 ierr = VecScatterEnd(xx,b->w,INSERT_VALUES,SCATTER_FORWARD,b->ctx);CHKERRQ(ierr); 826 ierr = (*b->OAIJ->ops->multadd)(b->OAIJ,b->w,yy,zz);CHKERRQ(ierr); 827 PetscFunctionReturn(0); 828 } 829 830 #undef __FUNC__ 831 #define __FUNC__ /*<a name="MatMultTransposeAdd_MPIMAIJ_dof"></a>*/"MatMultTransposeAdd_MPIMAIJ_dof" 832 int MatMultTransposeAdd_MPIMAIJ_dof(Mat A,Vec xx,Vec yy,Vec zz) 833 { 834 Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data; 835 int ierr; 836 PetscFunctionBegin; 837 ierr = (*b->OAIJ->ops->multtranspose)(b->OAIJ,xx,b->w);CHKERRQ(ierr); 838 ierr = VecScatterBegin(b->w,zz,ADD_VALUES,SCATTER_REVERSE,b->ctx);CHKERRQ(ierr); 839 ierr = (*b->AIJ->ops->multtransposeadd)(b->AIJ,xx,yy,zz);CHKERRQ(ierr); 840 ierr = VecScatterEnd(b->w,zz,ADD_VALUES,SCATTER_REVERSE,b->ctx);CHKERRQ(ierr); 841 PetscFunctionReturn(0); 842 } 843 844 /* ---------------------------------------------------------------------------------- */ 845 #undef __FUNC__ 846 #define __FUNC__ /*<a name="MatCreateMAIJ"></a>*/"MatCreateMAIJ" 847 int MatCreateMAIJ(Mat A,int dof,Mat *maij) 848 { 849 int ierr,size,n; 850 Mat_MPIMAIJ *b; 851 Mat B; 852 853 PetscFunctionBegin; 854 ierr = PetscObjectReference((PetscObject)A);CHKERRQ(ierr); 855 856 if (dof == 1) { 857 *maij = A; 858 } else { 859 ierr = MatCreate(A->comm,dof*A->m,dof*A->n,dof*A->M,dof*A->N,&B);CHKERRQ(ierr); 860 B->assembled = PETSC_TRUE; 861 862 ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr); 863 if (size == 1) { 864 ierr = MatSetType(B,MATSEQMAIJ);CHKERRQ(ierr); 865 B->ops->destroy = MatDestroy_SeqMAIJ; 866 b = (Mat_MPIMAIJ*)B->data; 867 b->dof = dof; 868 b->AIJ = A; 869 if (dof == 2) { 870 B->ops->mult = MatMult_SeqMAIJ_2; 871 B->ops->multadd = MatMultAdd_SeqMAIJ_2; 872 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_2; 873 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_2; 874 } else if (dof == 3) { 875 B->ops->mult = MatMult_SeqMAIJ_3; 876 B->ops->multadd = MatMultAdd_SeqMAIJ_3; 877 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_3; 878 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_3; 879 } else if (dof == 4) { 880 B->ops->mult = MatMult_SeqMAIJ_4; 881 B->ops->multadd = MatMultAdd_SeqMAIJ_4; 882 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_4; 883 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_4; 884 } else if (dof == 5) { 885 B->ops->mult = MatMult_SeqMAIJ_5; 886 B->ops->multadd = MatMultAdd_SeqMAIJ_5; 887 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_5; 888 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_5; 889 } else { 890 SETERRQ1(1,"Cannot handle a dof of %d\n",dof); 891 } 892 } else { 893 Mat_MPIAIJ *mpiaij = (Mat_MPIAIJ *)A->data; 894 IS from,to; 895 Vec gvec; 896 int *garray,i; 897 898 ierr = MatSetType(B,MATMPIMAIJ);CHKERRQ(ierr); 899 B->ops->destroy = MatDestroy_MPIMAIJ; 900 b = (Mat_MPIMAIJ*)B->data; 901 b->dof = dof; 902 b->A = A; 903 ierr = MatCreateMAIJ(mpiaij->A,dof,&b->AIJ);CHKERRQ(ierr); 904 ierr = MatCreateMAIJ(mpiaij->B,dof,&b->OAIJ);CHKERRQ(ierr); 905 906 ierr = VecGetSize(mpiaij->lvec,&n);CHKERRQ(ierr); 907 ierr = VecCreateSeq(PETSC_COMM_SELF,n*dof,&b->w);CHKERRQ(ierr); 908 909 /* create two temporary Index sets for build scatter gather */ 910 garray = (int*)PetscMalloc((n+1)*sizeof(int));CHKPTRQ(garray); 911 for (i=0; i<n; i++) garray[i] = dof*mpiaij->garray[i]; 912 ierr = ISCreateBlock(A->comm,dof,n,garray,&from);CHKERRQ(ierr); 913 ierr = PetscFree(garray);CHKERRQ(ierr); 914 ierr = ISCreateStride(PETSC_COMM_SELF,n*dof,0,1,&to);CHKERRQ(ierr); 915 916 /* create temporary global vector to generate scatter context */ 917 ierr = VecCreateMPI(A->comm,dof*A->n,dof*A->N,&gvec);CHKERRQ(ierr); 918 919 /* generate the scatter context */ 920 ierr = VecScatterCreate(gvec,from,b->w,to,&b->ctx);CHKERRQ(ierr); 921 922 ierr = ISDestroy(from);CHKERRQ(ierr); 923 ierr = ISDestroy(to);CHKERRQ(ierr); 924 ierr = VecDestroy(gvec);CHKERRQ(ierr); 925 926 B->ops->mult = MatMult_MPIMAIJ_dof; 927 B->ops->multtranspose = MatMultTranspose_MPIMAIJ_dof; 928 B->ops->multadd = MatMultAdd_MPIMAIJ_dof; 929 B->ops->multtransposeadd = MatMultTransposeAdd_MPIMAIJ_dof; 930 } 931 *maij = B; 932 } 933 PetscFunctionReturn(0); 934 } 935 936 937 938 939 940 941 942 943 944 945 946 947