1 /*$Id: maij.c,v 1.10 2000/10/10 19:24:12 bsmith Exp bsmith $*/ 2 /* 3 Defines the basic matrix operations for the MAIJ matrix storage format. 4 This format is used for restriction and interpolation operations for 5 multicomponent problems. It interpolates each component the same way 6 independently. 7 8 We provide: 9 MatMult() 10 MatMultTranspose() 11 MatMultTransposeAdd() 12 MatMultAdd() 13 and 14 MatCreateMAIJ(Mat,dof,Mat*) 15 16 This single directory handles both the sequential and parallel codes 17 */ 18 19 #include "src/mat/impls/aij/mpi/mpiaij.h" 20 21 typedef struct { 22 int dof; /* number of components */ 23 Mat AIJ; /* representation of interpolation for one component */ 24 } Mat_SeqMAIJ; 25 26 typedef struct { 27 int dof; /* number of components */ 28 Mat AIJ,OAIJ; /* representation of interpolation for one component */ 29 Mat A; 30 VecScatter ctx; /* update ghost points for parallel case */ 31 Vec w; /* work space for ghost values for parallel case */ 32 } Mat_MPIMAIJ; 33 34 #undef __FUNC__ 35 #define __FUNC__ /*<a name="MatMAIJGetAIJ"></a>*/"MatMAIJGetAIJ" 36 int MatMAIJGetAIJ(Mat A,Mat *B) 37 { 38 int ierr; 39 PetscTruth ismpimaij,isseqmaij; 40 41 PetscFunctionBegin; 42 ierr = PetscTypeCompare((PetscObject)A,MATMPIMAIJ,&ismpimaij);CHKERRQ(ierr); 43 ierr = PetscTypeCompare((PetscObject)A,MATSEQMAIJ,&isseqmaij);CHKERRQ(ierr); 44 if (ismpimaij) { 45 Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data; 46 47 *B = b->A; 48 } else if (isseqmaij) { 49 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 50 51 *B = b->AIJ; 52 } else { 53 *B = A; 54 } 55 PetscFunctionReturn(0); 56 } 57 58 #undef __FUNC__ 59 #define __FUNC__ /*<a name="MatMAIJRedimension"></a>*/"MatMAIJRedimension" 60 int MatMAIJRedimension(Mat A,int dof,Mat *B) 61 { 62 int ierr; 63 Mat Aij; 64 65 PetscFunctionBegin; 66 ierr = MatMAIJGetAIJ(A,&Aij);CHKERRQ(ierr); 67 ierr = MatCreateMAIJ(Aij,dof,B);CHKERRQ(ierr); 68 PetscFunctionReturn(0); 69 } 70 71 #undef __FUNC__ 72 #define __FUNC__ /*<a name="MatDestroy_SeqMAIJ"></a>*/"MatDestroy_SeqMAIJ" 73 int MatDestroy_SeqMAIJ(Mat A) 74 { 75 int ierr; 76 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 77 78 PetscFunctionBegin; 79 if (b->AIJ) { 80 ierr = MatDestroy(b->AIJ);CHKERRQ(ierr); 81 } 82 ierr = PetscFree(b);CHKERRQ(ierr); 83 PetscFunctionReturn(0); 84 } 85 86 #undef __FUNC__ 87 #define __FUNC__ /*<a name="MatDestroy_MPIMAIJ"></a>*/"MatDestroy_MPIMAIJ" 88 int MatDestroy_MPIMAIJ(Mat A) 89 { 90 int ierr; 91 Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data; 92 93 PetscFunctionBegin; 94 if (b->AIJ) { 95 ierr = MatDestroy(b->AIJ);CHKERRQ(ierr); 96 } 97 if (b->OAIJ) { 98 ierr = MatDestroy(b->OAIJ);CHKERRQ(ierr); 99 } 100 if (b->A) { 101 ierr = MatDestroy(b->A);CHKERRQ(ierr); 102 } 103 if (b->ctx) { 104 ierr = VecScatterDestroy(b->ctx);CHKERRQ(ierr); 105 } 106 if (b->w) { 107 ierr = VecDestroy(b->w);CHKERRQ(ierr); 108 } 109 ierr = PetscFree(b);CHKERRQ(ierr); 110 PetscFunctionReturn(0); 111 } 112 113 EXTERN_C_BEGIN 114 #undef __FUNC__ 115 #define __FUNC__ /*<a name="MatCreate_MAIJ"></a>*/"MatCreate_MAIJ" 116 int MatCreate_MAIJ(Mat A) 117 { 118 int ierr; 119 Mat_MPIMAIJ *b; 120 121 PetscFunctionBegin; 122 A->data = (void*)(b = PetscNew(Mat_MPIMAIJ));CHKPTRQ(b); 123 ierr = PetscMemzero(b,sizeof(Mat_MPIMAIJ));CHKERRQ(ierr); 124 ierr = PetscMemzero(A->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 125 A->factor = 0; 126 A->mapping = 0; 127 128 b->AIJ = 0; 129 b->dof = 0; 130 b->OAIJ = 0; 131 b->ctx = 0; 132 b->w = 0; 133 PetscFunctionReturn(0); 134 } 135 EXTERN_C_END 136 137 /* --------------------------------------------------------------------------------------*/ 138 #undef __FUNC__ 139 #define __FUNC__ /*<a name="MatMult_SeqMAIJ_2"></a>*/"MatMult_SeqMAIJ_2" 140 int MatMult_SeqMAIJ_2(Mat A,Vec xx,Vec yy) 141 { 142 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 143 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 144 Scalar *x,*y,*v,sum1, sum2; 145 int ierr,m = b->AIJ->m,*idx,shift = a->indexshift,*ii; 146 int n,i,jrow,j; 147 148 PetscFunctionBegin; 149 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 150 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 151 x = x + shift; /* shift for Fortran start by 1 indexing */ 152 idx = a->j; 153 v = a->a; 154 ii = a->i; 155 156 v += shift; /* shift for Fortran start by 1 indexing */ 157 idx += shift; 158 for (i=0; i<m; i++) { 159 jrow = ii[i]; 160 n = ii[i+1] - jrow; 161 sum1 = 0.0; 162 sum2 = 0.0; 163 for (j=0; j<n; j++) { 164 sum1 += v[jrow]*x[2*idx[jrow]]; 165 sum2 += v[jrow]*x[2*idx[jrow]+1]; 166 jrow++; 167 } 168 y[2*i] = sum1; 169 y[2*i+1] = sum2; 170 } 171 172 PLogFlops(4*a->nz - 2*m); 173 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 174 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 175 PetscFunctionReturn(0); 176 } 177 178 #undef __FUNC__ 179 #define __FUNC__ /*<a name="MatMultTranspose_SeqMAIJ_2"></a>*/"MatMultTranspose_SeqMAIJ_2" 180 int MatMultTranspose_SeqMAIJ_2(Mat A,Vec xx,Vec yy) 181 { 182 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 183 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 184 Scalar *x,*y,*v,alpha1,alpha2,zero = 0.0; 185 int ierr,m = b->AIJ->m,n,i,*idx,shift = a->indexshift; 186 187 PetscFunctionBegin; 188 ierr = VecSet(&zero,yy);CHKERRQ(ierr); 189 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 190 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 191 y = y + shift; /* shift for Fortran start by 1 indexing */ 192 for (i=0; i<m; i++) { 193 idx = a->j + a->i[i] + shift; 194 v = a->a + a->i[i] + shift; 195 n = a->i[i+1] - a->i[i]; 196 alpha1 = x[2*i]; 197 alpha2 = x[2*i+1]; 198 while (n-->0) {y[2*(*idx)] += alpha1*(*v); y[2*(*idx)+1] += alpha2*(*v); idx++; v++;} 199 } 200 PLogFlops(4*a->nz - 2*b->AIJ->n); 201 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 202 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 203 PetscFunctionReturn(0); 204 } 205 206 #undef __FUNC__ 207 #define __FUNC__ /*<a name="MatMultAdd_SeqMAIJ_2"></a>*/"MatMultAdd_SeqMAIJ_2" 208 int MatMultAdd_SeqMAIJ_2(Mat A,Vec xx,Vec yy,Vec zz) 209 { 210 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 211 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 212 Scalar *x,*y,*v,sum1, sum2; 213 int ierr,m = b->AIJ->m,*idx,shift = a->indexshift,*ii; 214 int n,i,jrow,j; 215 216 PetscFunctionBegin; 217 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 218 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 219 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 220 x = x + shift; /* shift for Fortran start by 1 indexing */ 221 idx = a->j; 222 v = a->a; 223 ii = a->i; 224 225 v += shift; /* shift for Fortran start by 1 indexing */ 226 idx += shift; 227 for (i=0; i<m; i++) { 228 jrow = ii[i]; 229 n = ii[i+1] - jrow; 230 sum1 = 0.0; 231 sum2 = 0.0; 232 for (j=0; j<n; j++) { 233 sum1 += v[jrow]*x[2*idx[jrow]]; 234 sum2 += v[jrow]*x[2*idx[jrow]+1]; 235 jrow++; 236 } 237 y[2*i] += sum1; 238 y[2*i+1] += sum2; 239 } 240 241 PLogFlops(4*a->nz - 2*m); 242 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 243 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 244 PetscFunctionReturn(0); 245 } 246 #undef __FUNC__ 247 #define __FUNC__ /*<a name="MatMultTransposeAdd_SeqMAIJ_2"></a>*/"MatMultTransposeAdd_SeqMAIJ_2" 248 int MatMultTransposeAdd_SeqMAIJ_2(Mat A,Vec xx,Vec yy,Vec zz) 249 { 250 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 251 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 252 Scalar *x,*y,*v,alpha1,alpha2; 253 int ierr,m = b->AIJ->m,n,i,*idx,shift = a->indexshift; 254 255 PetscFunctionBegin; 256 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 257 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 258 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 259 y = y + shift; /* shift for Fortran start by 1 indexing */ 260 for (i=0; i<m; i++) { 261 idx = a->j + a->i[i] + shift; 262 v = a->a + a->i[i] + shift; 263 n = a->i[i+1] - a->i[i]; 264 alpha1 = x[2*i]; 265 alpha2 = x[2*i+1]; 266 while (n-->0) {y[2*(*idx)] += alpha1*(*v); y[2*(*idx)+1] += alpha2*(*v); idx++; v++;} 267 } 268 PLogFlops(4*a->nz - 2*b->AIJ->n); 269 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 270 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 271 PetscFunctionReturn(0); 272 } 273 /* --------------------------------------------------------------------------------------*/ 274 #undef __FUNC__ 275 #define __FUNC__ /*<a name="MatMult_SeqMAIJ_3"></a>*/"MatMult_SeqMAIJ_3" 276 int MatMult_SeqMAIJ_3(Mat A,Vec xx,Vec yy) 277 { 278 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 279 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 280 Scalar *x,*y,*v,sum1, sum2, sum3; 281 int ierr,m = b->AIJ->m,*idx,shift = a->indexshift,*ii; 282 int n,i,jrow,j; 283 284 PetscFunctionBegin; 285 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 286 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 287 x = x + shift; /* shift for Fortran start by 1 indexing */ 288 idx = a->j; 289 v = a->a; 290 ii = a->i; 291 292 v += shift; /* shift for Fortran start by 1 indexing */ 293 idx += shift; 294 for (i=0; i<m; i++) { 295 jrow = ii[i]; 296 n = ii[i+1] - jrow; 297 sum1 = 0.0; 298 sum2 = 0.0; 299 sum3 = 0.0; 300 for (j=0; j<n; j++) { 301 sum1 += v[jrow]*x[3*idx[jrow]]; 302 sum2 += v[jrow]*x[3*idx[jrow]+1]; 303 sum3 += v[jrow]*x[3*idx[jrow]+2]; 304 jrow++; 305 } 306 y[3*i] = sum1; 307 y[3*i+1] = sum2; 308 y[3*i+2] = sum3; 309 } 310 311 PLogFlops(6*a->nz - 3*m); 312 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 313 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 314 PetscFunctionReturn(0); 315 } 316 317 #undef __FUNC__ 318 #define __FUNC__ /*<a name="MatMultTranspose_SeqMAIJ_3"></a>*/"MatMultTranspose_SeqMAIJ_3" 319 int MatMultTranspose_SeqMAIJ_3(Mat A,Vec xx,Vec yy) 320 { 321 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 322 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 323 Scalar *x,*y,*v,alpha1,alpha2,alpha3,zero = 0.0; 324 int ierr,m = b->AIJ->m,n,i,*idx,shift = a->indexshift; 325 326 PetscFunctionBegin; 327 ierr = VecSet(&zero,yy);CHKERRQ(ierr); 328 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 329 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 330 y = y + shift; /* shift for Fortran start by 1 indexing */ 331 for (i=0; i<m; i++) { 332 idx = a->j + a->i[i] + shift; 333 v = a->a + a->i[i] + shift; 334 n = a->i[i+1] - a->i[i]; 335 alpha1 = x[3*i]; 336 alpha2 = x[3*i+1]; 337 alpha3 = x[3*i+2]; 338 while (n-->0) { 339 y[3*(*idx)] += alpha1*(*v); 340 y[3*(*idx)+1] += alpha2*(*v); 341 y[3*(*idx)+2] += alpha3*(*v); 342 idx++; v++; 343 } 344 } 345 PLogFlops(6*a->nz - 3*b->AIJ->n); 346 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 347 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 348 PetscFunctionReturn(0); 349 } 350 351 #undef __FUNC__ 352 #define __FUNC__ /*<a name="MatMultAdd_SeqMAIJ_3"></a>*/"MatMultAdd_SeqMAIJ_3" 353 int MatMultAdd_SeqMAIJ_3(Mat A,Vec xx,Vec yy,Vec zz) 354 { 355 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 356 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 357 Scalar *x,*y,*v,sum1, sum2, sum3; 358 int ierr,m = b->AIJ->m,*idx,shift = a->indexshift,*ii; 359 int n,i,jrow,j; 360 361 PetscFunctionBegin; 362 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 363 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 364 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 365 x = x + shift; /* shift for Fortran start by 1 indexing */ 366 idx = a->j; 367 v = a->a; 368 ii = a->i; 369 370 v += shift; /* shift for Fortran start by 1 indexing */ 371 idx += shift; 372 for (i=0; i<m; i++) { 373 jrow = ii[i]; 374 n = ii[i+1] - jrow; 375 sum1 = 0.0; 376 sum2 = 0.0; 377 sum3 = 0.0; 378 for (j=0; j<n; j++) { 379 sum1 += v[jrow]*x[3*idx[jrow]]; 380 sum2 += v[jrow]*x[3*idx[jrow]+1]; 381 sum3 += v[jrow]*x[3*idx[jrow]+2]; 382 jrow++; 383 } 384 y[3*i] += sum1; 385 y[3*i+1] += sum2; 386 y[3*i+2] += sum3; 387 } 388 389 PLogFlops(6*a->nz); 390 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 391 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 392 PetscFunctionReturn(0); 393 } 394 #undef __FUNC__ 395 #define __FUNC__ /*<a name="MatMultTransposeAdd_SeqMAIJ_3"></a>*/"MatMultTransposeAdd_SeqMAIJ_3" 396 int MatMultTransposeAdd_SeqMAIJ_3(Mat A,Vec xx,Vec yy,Vec zz) 397 { 398 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 399 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 400 Scalar *x,*y,*v,alpha1,alpha2,alpha3; 401 int ierr,m = b->AIJ->m,n,i,*idx,shift = a->indexshift; 402 403 PetscFunctionBegin; 404 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 405 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 406 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 407 y = y + shift; /* shift for Fortran start by 1 indexing */ 408 for (i=0; i<m; i++) { 409 idx = a->j + a->i[i] + shift; 410 v = a->a + a->i[i] + shift; 411 n = a->i[i+1] - a->i[i]; 412 alpha1 = x[3*i]; 413 alpha2 = x[3*i+1]; 414 alpha3 = x[3*i+2]; 415 while (n-->0) { 416 y[3*(*idx)] += alpha1*(*v); 417 y[3*(*idx)+1] += alpha2*(*v); 418 y[3*(*idx)+2] += alpha3*(*v); 419 idx++; v++; 420 } 421 } 422 PLogFlops(6*a->nz); 423 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 424 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 425 PetscFunctionReturn(0); 426 } 427 428 /* ------------------------------------------------------------------------------*/ 429 #undef __FUNC__ 430 #define __FUNC__ /*<a name="MatMult_SeqMAIJ_4"></a>*/"MatMult_SeqMAIJ_4" 431 int MatMult_SeqMAIJ_4(Mat A,Vec xx,Vec yy) 432 { 433 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 434 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 435 Scalar *x,*y,*v,sum1, sum2, sum3, sum4; 436 int ierr,m = b->AIJ->m,*idx,shift = a->indexshift,*ii; 437 int n,i,jrow,j; 438 439 PetscFunctionBegin; 440 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 441 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 442 x = x + shift; /* shift for Fortran start by 1 indexing */ 443 idx = a->j; 444 v = a->a; 445 ii = a->i; 446 447 v += shift; /* shift for Fortran start by 1 indexing */ 448 idx += shift; 449 for (i=0; i<m; i++) { 450 jrow = ii[i]; 451 n = ii[i+1] - jrow; 452 sum1 = 0.0; 453 sum2 = 0.0; 454 sum3 = 0.0; 455 sum4 = 0.0; 456 for (j=0; j<n; j++) { 457 sum1 += v[jrow]*x[4*idx[jrow]]; 458 sum2 += v[jrow]*x[4*idx[jrow]+1]; 459 sum3 += v[jrow]*x[4*idx[jrow]+2]; 460 sum4 += v[jrow]*x[4*idx[jrow]+3]; 461 jrow++; 462 } 463 y[4*i] = sum1; 464 y[4*i+1] = sum2; 465 y[4*i+2] = sum3; 466 y[4*i+3] = sum4; 467 } 468 469 PLogFlops(8*a->nz - 4*m); 470 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 471 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 472 PetscFunctionReturn(0); 473 } 474 475 #undef __FUNC__ 476 #define __FUNC__ /*<a name="MatMultTranspose_SeqMAIJ_4"></a>*/"MatMultTranspose_SeqMAIJ_4" 477 int MatMultTranspose_SeqMAIJ_4(Mat A,Vec xx,Vec yy) 478 { 479 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 480 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 481 Scalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4,zero = 0.0; 482 int ierr,m = b->AIJ->m,n,i,*idx,shift = a->indexshift; 483 484 PetscFunctionBegin; 485 ierr = VecSet(&zero,yy);CHKERRQ(ierr); 486 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 487 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 488 y = y + shift; /* shift for Fortran start by 1 indexing */ 489 for (i=0; i<m; i++) { 490 idx = a->j + a->i[i] + shift; 491 v = a->a + a->i[i] + shift; 492 n = a->i[i+1] - a->i[i]; 493 alpha1 = x[4*i]; 494 alpha2 = x[4*i+1]; 495 alpha3 = x[4*i+2]; 496 alpha4 = x[4*i+3]; 497 while (n-->0) { 498 y[4*(*idx)] += alpha1*(*v); 499 y[4*(*idx)+1] += alpha2*(*v); 500 y[4*(*idx)+2] += alpha3*(*v); 501 y[4*(*idx)+3] += alpha4*(*v); 502 idx++; v++; 503 } 504 } 505 PLogFlops(8*a->nz - 4*b->AIJ->n); 506 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 507 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 508 PetscFunctionReturn(0); 509 } 510 511 #undef __FUNC__ 512 #define __FUNC__ /*<a name="MatMultAdd_SeqMAIJ_4"></a>*/"MatMultAdd_SeqMAIJ_4" 513 int MatMultAdd_SeqMAIJ_4(Mat A,Vec xx,Vec yy,Vec zz) 514 { 515 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 516 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 517 Scalar *x,*y,*v,sum1, sum2, sum3, sum4; 518 int ierr,m = b->AIJ->m,*idx,shift = a->indexshift,*ii; 519 int n,i,jrow,j; 520 521 PetscFunctionBegin; 522 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 523 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 524 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 525 x = x + shift; /* shift for Fortran start by 1 indexing */ 526 idx = a->j; 527 v = a->a; 528 ii = a->i; 529 530 v += shift; /* shift for Fortran start by 1 indexing */ 531 idx += shift; 532 for (i=0; i<m; i++) { 533 jrow = ii[i]; 534 n = ii[i+1] - jrow; 535 sum1 = 0.0; 536 sum2 = 0.0; 537 sum3 = 0.0; 538 sum4 = 0.0; 539 for (j=0; j<n; j++) { 540 sum1 += v[jrow]*x[4*idx[jrow]]; 541 sum2 += v[jrow]*x[4*idx[jrow]+1]; 542 sum3 += v[jrow]*x[4*idx[jrow]+2]; 543 sum4 += v[jrow]*x[4*idx[jrow]+3]; 544 jrow++; 545 } 546 y[4*i] += sum1; 547 y[4*i+1] += sum2; 548 y[4*i+2] += sum3; 549 y[4*i+3] += sum4; 550 } 551 552 PLogFlops(8*a->nz - 4*m); 553 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 554 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 555 PetscFunctionReturn(0); 556 } 557 #undef __FUNC__ 558 #define __FUNC__ /*<a name="MatMultTransposeAdd_SeqMAIJ_4"></a>*/"MatMultTransposeAdd_SeqMAIJ_4" 559 int MatMultTransposeAdd_SeqMAIJ_4(Mat A,Vec xx,Vec yy,Vec zz) 560 { 561 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 562 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 563 Scalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4; 564 int ierr,m = b->AIJ->m,n,i,*idx,shift = a->indexshift; 565 566 PetscFunctionBegin; 567 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 568 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 569 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 570 y = y + shift; /* shift for Fortran start by 1 indexing */ 571 for (i=0; i<m; i++) { 572 idx = a->j + a->i[i] + shift; 573 v = a->a + a->i[i] + shift; 574 n = a->i[i+1] - a->i[i]; 575 alpha1 = x[4*i]; 576 alpha2 = x[4*i+1]; 577 alpha3 = x[4*i+2]; 578 alpha4 = x[4*i+3]; 579 while (n-->0) { 580 y[4*(*idx)] += alpha1*(*v); 581 y[4*(*idx)+1] += alpha2*(*v); 582 y[4*(*idx)+2] += alpha3*(*v); 583 y[4*(*idx)+3] += alpha4*(*v); 584 idx++; v++; 585 } 586 } 587 PLogFlops(8*a->nz - 4*b->AIJ->n); 588 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 589 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 590 PetscFunctionReturn(0); 591 592 } 593 594 /* ------------------------------------------------------------------------------*/ 595 #undef __FUNC__ 596 #define __FUNC__ /*<a name="MatMult_SeqMAIJ_5"></a>*/"MatMult_SeqMAIJ_5" 597 int MatMult_SeqMAIJ_5(Mat A,Vec xx,Vec yy) 598 { 599 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 600 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 601 Scalar *x,*y,*v,sum1, sum2, sum3, sum4, sum5; 602 int ierr,m = b->AIJ->m,*idx,shift = a->indexshift,*ii; 603 int n,i,jrow,j; 604 605 PetscFunctionBegin; 606 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 607 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 608 x = x + shift; /* shift for Fortran start by 1 indexing */ 609 idx = a->j; 610 v = a->a; 611 ii = a->i; 612 613 v += shift; /* shift for Fortran start by 1 indexing */ 614 idx += shift; 615 for (i=0; i<m; i++) { 616 jrow = ii[i]; 617 n = ii[i+1] - jrow; 618 sum1 = 0.0; 619 sum2 = 0.0; 620 sum3 = 0.0; 621 sum4 = 0.0; 622 sum5 = 0.0; 623 for (j=0; j<n; j++) { 624 sum1 += v[jrow]*x[5*idx[jrow]]; 625 sum2 += v[jrow]*x[5*idx[jrow]+1]; 626 sum3 += v[jrow]*x[5*idx[jrow]+2]; 627 sum4 += v[jrow]*x[5*idx[jrow]+3]; 628 sum5 += v[jrow]*x[5*idx[jrow]+4]; 629 jrow++; 630 } 631 y[5*i] = sum1; 632 y[5*i+1] = sum2; 633 y[5*i+2] = sum3; 634 y[5*i+3] = sum4; 635 y[5*i+4] = sum5; 636 } 637 638 PLogFlops(10*a->nz - 5*m); 639 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 640 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 641 PetscFunctionReturn(0); 642 } 643 644 #undef __FUNC__ 645 #define __FUNC__ /*<a name="MatMultTranspose_SeqMAIJ_5"></a>*/"MatMultTranspose_SeqMAIJ_5" 646 int MatMultTranspose_SeqMAIJ_5(Mat A,Vec xx,Vec yy) 647 { 648 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 649 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 650 Scalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,zero = 0.0; 651 int ierr,m = b->AIJ->m,n,i,*idx,shift = a->indexshift; 652 653 PetscFunctionBegin; 654 ierr = VecSet(&zero,yy);CHKERRQ(ierr); 655 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 656 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 657 y = y + shift; /* shift for Fortran start by 1 indexing */ 658 for (i=0; i<m; i++) { 659 idx = a->j + a->i[i] + shift; 660 v = a->a + a->i[i] + shift; 661 n = a->i[i+1] - a->i[i]; 662 alpha1 = x[5*i]; 663 alpha2 = x[5*i+1]; 664 alpha3 = x[5*i+2]; 665 alpha4 = x[5*i+3]; 666 alpha5 = x[5*i+4]; 667 while (n-->0) { 668 y[5*(*idx)] += alpha1*(*v); 669 y[5*(*idx)+1] += alpha2*(*v); 670 y[5*(*idx)+2] += alpha3*(*v); 671 y[5*(*idx)+3] += alpha4*(*v); 672 y[5*(*idx)+4] += alpha5*(*v); 673 idx++; v++; 674 } 675 } 676 PLogFlops(10*a->nz - 5*b->AIJ->n); 677 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 678 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 679 PetscFunctionReturn(0); 680 } 681 682 #undef __FUNC__ 683 #define __FUNC__ /*<a name="MatMultAdd_SeqMAIJ_5"></a>*/"MatMultAdd_SeqMAIJ_5" 684 int MatMultAdd_SeqMAIJ_5(Mat A,Vec xx,Vec yy,Vec zz) 685 { 686 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 687 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 688 Scalar *x,*y,*v,sum1, sum2, sum3, sum4, sum5; 689 int ierr,m = b->AIJ->m,*idx,shift = a->indexshift,*ii; 690 int n,i,jrow,j; 691 692 PetscFunctionBegin; 693 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 694 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 695 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 696 x = x + shift; /* shift for Fortran start by 1 indexing */ 697 idx = a->j; 698 v = a->a; 699 ii = a->i; 700 701 v += shift; /* shift for Fortran start by 1 indexing */ 702 idx += shift; 703 for (i=0; i<m; i++) { 704 jrow = ii[i]; 705 n = ii[i+1] - jrow; 706 sum1 = 0.0; 707 sum2 = 0.0; 708 sum3 = 0.0; 709 sum4 = 0.0; 710 sum5 = 0.0; 711 for (j=0; j<n; j++) { 712 sum1 += v[jrow]*x[5*idx[jrow]]; 713 sum2 += v[jrow]*x[5*idx[jrow]+1]; 714 sum3 += v[jrow]*x[5*idx[jrow]+2]; 715 sum4 += v[jrow]*x[5*idx[jrow]+3]; 716 sum5 += v[jrow]*x[5*idx[jrow]+4]; 717 jrow++; 718 } 719 y[5*i] += sum1; 720 y[5*i+1] += sum2; 721 y[5*i+2] += sum3; 722 y[5*i+3] += sum4; 723 y[5*i+4] += sum5; 724 } 725 726 PLogFlops(10*a->nz); 727 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 728 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 729 PetscFunctionReturn(0); 730 } 731 732 #undef __FUNC__ 733 #define __FUNC__ /*<a name="MatMultTransposeAdd_SeqMAIJ_5"></a>*/"MatMultTransposeAdd_SeqMAIJ_5" 734 int MatMultTransposeAdd_SeqMAIJ_5(Mat A,Vec xx,Vec yy,Vec zz) 735 { 736 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 737 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 738 Scalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5; 739 int ierr,m = b->AIJ->m,n,i,*idx,shift = a->indexshift; 740 741 PetscFunctionBegin; 742 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 743 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 744 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 745 y = y + shift; /* shift for Fortran start by 1 indexing */ 746 for (i=0; i<m; i++) { 747 idx = a->j + a->i[i] + shift; 748 v = a->a + a->i[i] + shift; 749 n = a->i[i+1] - a->i[i]; 750 alpha1 = x[5*i]; 751 alpha2 = x[5*i+1]; 752 alpha3 = x[5*i+2]; 753 alpha4 = x[5*i+3]; 754 alpha5 = x[5*i+4]; 755 while (n-->0) { 756 y[5*(*idx)] += alpha1*(*v); 757 y[5*(*idx)+1] += alpha2*(*v); 758 y[5*(*idx)+2] += alpha3*(*v); 759 y[5*(*idx)+3] += alpha4*(*v); 760 y[5*(*idx)+4] += alpha5*(*v); 761 idx++; v++; 762 } 763 } 764 PLogFlops(10*a->nz); 765 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 766 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 767 PetscFunctionReturn(0); 768 } 769 770 /*===================================================================================*/ 771 #undef __FUNC__ 772 #define __FUNC__ /*<a name="MatMult_MPIMAIJ_dof"></a>*/"MatMult_MPIMAIJ_dof" 773 int MatMult_MPIMAIJ_dof(Mat A,Vec xx,Vec yy) 774 { 775 Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data; 776 int ierr; 777 PetscFunctionBegin; 778 779 /* start the scatter */ 780 ierr = VecScatterBegin(xx,b->w,INSERT_VALUES,SCATTER_FORWARD,b->ctx);CHKERRQ(ierr); 781 ierr = (*b->AIJ->ops->mult)(b->AIJ,xx,yy);CHKERRQ(ierr); 782 ierr = VecScatterEnd(xx,b->w,INSERT_VALUES,SCATTER_FORWARD,b->ctx);CHKERRQ(ierr); 783 ierr = (*b->OAIJ->ops->multadd)(b->OAIJ,b->w,yy,yy);CHKERRQ(ierr); 784 PetscFunctionReturn(0); 785 } 786 787 #undef __FUNC__ 788 #define __FUNC__ /*<a name="MatMultTranspose_MPIMAIJ_dof"></a>*/"MatMultTranspose_MPIMAIJ_dof" 789 int MatMultTranspose_MPIMAIJ_dof(Mat A,Vec xx,Vec yy) 790 { 791 Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data; 792 int ierr; 793 PetscFunctionBegin; 794 ierr = (*b->OAIJ->ops->multtranspose)(b->OAIJ,xx,b->w);CHKERRQ(ierr); 795 ierr = VecScatterBegin(b->w,yy,ADD_VALUES,SCATTER_REVERSE,b->ctx);CHKERRQ(ierr); 796 ierr = (*b->AIJ->ops->multtranspose)(b->AIJ,xx,yy);CHKERRQ(ierr); 797 ierr = VecScatterEnd(b->w,yy,ADD_VALUES,SCATTER_REVERSE,b->ctx);CHKERRQ(ierr); 798 PetscFunctionReturn(0); 799 } 800 801 #undef __FUNC__ 802 #define __FUNC__ /*<a name="MatMultAdd_MPIMAIJ_dof"></a>*/"MatMultAdd_MPIMAIJ_dof" 803 int MatMultAdd_MPIMAIJ_dof(Mat A,Vec xx,Vec yy,Vec zz) 804 { 805 Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data; 806 int ierr; 807 PetscFunctionBegin; 808 809 /* start the scatter */ 810 ierr = VecScatterBegin(xx,b->w,INSERT_VALUES,SCATTER_FORWARD,b->ctx);CHKERRQ(ierr); 811 ierr = (*b->AIJ->ops->multadd)(b->AIJ,xx,yy,zz);CHKERRQ(ierr); 812 ierr = VecScatterEnd(xx,b->w,INSERT_VALUES,SCATTER_FORWARD,b->ctx);CHKERRQ(ierr); 813 ierr = (*b->OAIJ->ops->multadd)(b->OAIJ,b->w,yy,zz);CHKERRQ(ierr); 814 PetscFunctionReturn(0); 815 } 816 817 #undef __FUNC__ 818 #define __FUNC__ /*<a name="MatMultTransposeAdd_MPIMAIJ_dof"></a>*/"MatMultTransposeAdd_MPIMAIJ_dof" 819 int MatMultTransposeAdd_MPIMAIJ_dof(Mat A,Vec xx,Vec yy,Vec zz) 820 { 821 Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data; 822 int ierr; 823 PetscFunctionBegin; 824 ierr = (*b->OAIJ->ops->multtranspose)(b->OAIJ,xx,b->w);CHKERRQ(ierr); 825 ierr = VecScatterBegin(b->w,zz,ADD_VALUES,SCATTER_REVERSE,b->ctx);CHKERRQ(ierr); 826 ierr = (*b->AIJ->ops->multtransposeadd)(b->AIJ,xx,yy,zz);CHKERRQ(ierr); 827 ierr = VecScatterEnd(b->w,zz,ADD_VALUES,SCATTER_REVERSE,b->ctx);CHKERRQ(ierr); 828 PetscFunctionReturn(0); 829 } 830 831 /* ---------------------------------------------------------------------------------- */ 832 #undef __FUNC__ 833 #define __FUNC__ /*<a name="MatCreateMAIJ"></a>*/"MatCreateMAIJ" 834 int MatCreateMAIJ(Mat A,int dof,Mat *maij) 835 { 836 int ierr,size,n; 837 Mat_MPIMAIJ *b; 838 Mat B; 839 840 PetscFunctionBegin; 841 ierr = PetscObjectReference((PetscObject)A);CHKERRQ(ierr); 842 843 if (dof == 1) { 844 *maij = A; 845 } else { 846 ierr = MatCreate(A->comm,dof*A->m,dof*A->n,dof*A->M,dof*A->N,&B);CHKERRQ(ierr); 847 B->assembled = PETSC_TRUE; 848 849 ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr); 850 if (size == 1) { 851 ierr = MatSetType(B,MATSEQMAIJ);CHKERRQ(ierr); 852 B->ops->destroy = MatDestroy_SeqMAIJ; 853 b = (Mat_MPIMAIJ*)B->data; 854 b->dof = dof; 855 b->AIJ = A; 856 if (dof == 2) { 857 B->ops->mult = MatMult_SeqMAIJ_2; 858 B->ops->multadd = MatMultAdd_SeqMAIJ_2; 859 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_2; 860 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_2; 861 } else if (dof == 3) { 862 B->ops->mult = MatMult_SeqMAIJ_3; 863 B->ops->multadd = MatMultAdd_SeqMAIJ_3; 864 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_3; 865 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_3; 866 } else if (dof == 4) { 867 B->ops->mult = MatMult_SeqMAIJ_4; 868 B->ops->multadd = MatMultAdd_SeqMAIJ_4; 869 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_4; 870 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_4; 871 } else if (dof == 5) { 872 B->ops->mult = MatMult_SeqMAIJ_5; 873 B->ops->multadd = MatMultAdd_SeqMAIJ_5; 874 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_5; 875 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_5; 876 } else { 877 SETERRQ1(1,"Cannot handle a dof of %d\n",dof); 878 } 879 } else { 880 Mat_MPIAIJ *mpiaij = (Mat_MPIAIJ *)A->data; 881 IS from,to; 882 Vec gvec; 883 int *garray,i; 884 885 ierr = MatSetType(B,MATMPIMAIJ);CHKERRQ(ierr); 886 B->ops->destroy = MatDestroy_MPIMAIJ; 887 b = (Mat_MPIMAIJ*)B->data; 888 b->dof = dof; 889 b->A = A; 890 ierr = MatCreateMAIJ(mpiaij->A,dof,&b->AIJ);CHKERRQ(ierr); 891 ierr = MatCreateMAIJ(mpiaij->B,dof,&b->OAIJ);CHKERRQ(ierr); 892 893 ierr = VecGetSize(mpiaij->lvec,&n);CHKERRQ(ierr); 894 ierr = VecCreateSeq(PETSC_COMM_SELF,n*dof,&b->w);CHKERRQ(ierr); 895 896 /* create two temporary Index sets for build scatter gather */ 897 garray = (int*)PetscMalloc((n+1)*sizeof(int));CHKPTRQ(garray); 898 for (i=0; i<n; i++) garray[i] = dof*mpiaij->garray[i]; 899 ierr = ISCreateBlock(A->comm,dof,n,garray,&from);CHKERRQ(ierr); 900 ierr = PetscFree(garray);CHKERRQ(ierr); 901 ierr = ISCreateStride(PETSC_COMM_SELF,n*dof,0,1,&to);CHKERRQ(ierr); 902 903 /* create temporary global vector to generate scatter context */ 904 ierr = VecCreateMPI(A->comm,dof*A->n,dof*A->N,&gvec);CHKERRQ(ierr); 905 906 /* generate the scatter context */ 907 ierr = VecScatterCreate(gvec,from,b->w,to,&b->ctx);CHKERRQ(ierr); 908 909 ierr = ISDestroy(from);CHKERRQ(ierr); 910 ierr = ISDestroy(to);CHKERRQ(ierr); 911 ierr = VecDestroy(gvec);CHKERRQ(ierr); 912 913 B->ops->mult = MatMult_MPIMAIJ_dof; 914 B->ops->multtranspose = MatMultTranspose_MPIMAIJ_dof; 915 B->ops->multadd = MatMultAdd_MPIMAIJ_dof; 916 B->ops->multtransposeadd = MatMultTransposeAdd_MPIMAIJ_dof; 917 } 918 *maij = B; 919 } 920 PetscFunctionReturn(0); 921 } 922 923 924 925 926 927 928 929 930 931 932 933 934