1 /*$Id: maij.c,v 1.4 2000/06/01 20:10:27 bsmith Exp bsmith $*/ 2 /* 3 Defines the basic matrix operations for the MAIJ matrix storage format. 4 This format is used for restriction and interpolation operations for 5 multicomponent problems. It interpolates each component the same way 6 independently. 7 8 We provide: 9 MatMult() 10 MatMultTranspose() 11 MatMultTransposeAdd() 12 MatMultAdd() 13 and 14 MatCreateMAIJ(Mat,dof,Mat*) 15 16 This single directory handles both the sequential and parallel codes 17 */ 18 19 #include "src/mat/impls/aij/mpi/mpiaij.h" 20 21 typedef struct { 22 int dof; /* number of components */ 23 Mat AIJ,OAIJ; /* representation of interpolation for one component */ 24 VecScatter ctx; /* update ghost points for parallel case */ 25 Vec w; /* work space for ghost values for parallel case */ 26 } Mat_MAIJ; 27 28 #undef __FUNC__ 29 #define __FUNC__ /*<a name="MatDestroy_MAIJ"></a>*/"MatDestroy_MAIJ" 30 int MatDestroy_MAIJ(Mat A) 31 { 32 int ierr; 33 Mat_MAIJ *b = (Mat_MAIJ*)A->data; 34 35 PetscFunctionBegin; 36 if (b->AIJ) { 37 ierr = MatDestroy(b->AIJ);CHKERRQ(ierr); 38 } 39 if (b->OAIJ) { 40 ierr = MatDestroy(b->OAIJ);CHKERRQ(ierr); 41 } 42 if (b->ctx) { 43 ierr = VecScatterDestroy(b->ctx);CHKERRQ(ierr); 44 } 45 if (b->w) { 46 ierr = VecDestroy(b->w);CHKERRQ(ierr); 47 } 48 ierr = PetscFree(b);CHKERRQ(ierr); 49 PetscHeaderDestroy(A); 50 PetscFunctionReturn(0); 51 } 52 53 EXTERN_C_BEGIN 54 #undef __FUNC__ 55 #define __FUNC__ /*<a name="MatCreate_MAIJ"></a>*/"MatCreate_MAIJ" 56 int MatCreate_MAIJ(Mat A) 57 { 58 int ierr; 59 Mat_MAIJ *b; 60 61 PetscFunctionBegin; 62 A->data = (void*)(b = PetscNew(Mat_MAIJ));CHKPTRQ(b); 63 ierr = PetscMemzero(b,sizeof(Mat_MAIJ));CHKERRQ(ierr); 64 ierr = PetscMemzero(A->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 65 A->factor = 0; 66 A->mapping = 0; 67 68 b->AIJ = 0; 69 b->dof = 0; 70 b->OAIJ = 0; 71 b->ctx = 0; 72 b->w = 0; 73 PetscFunctionReturn(0); 74 } 75 EXTERN_C_END 76 77 /* ----------------------------------------------------------------------------------*/ 78 EXTERN int MatMult_SeqAIJ(Mat,Vec,Vec); 79 EXTERN int MatMultAdd_SeqAIJ(Mat,Vec,Vec,Vec); 80 EXTERN int MatMultTranspose_SeqAIJ(Mat,Vec,Vec); 81 EXTERN int MatMultTransposeAdd_SeqAIJ(Mat,Vec,Vec,Vec); 82 83 #undef __FUNC__ 84 #define __FUNC__ /*<a name="MatMult_SeqMAIJ_1"></a>*/"MatMult_SeqMAIJ_1" 85 int MatMult_SeqMAIJ_1(Mat A,Vec xx,Vec yy) 86 { 87 Mat_MAIJ *b = (Mat_MAIJ*)A->data; 88 int ierr; 89 PetscFunctionBegin; 90 ierr = MatMult_SeqAIJ(b->AIJ,xx,yy); 91 PetscFunctionReturn(0); 92 } 93 #undef __FUNC__ 94 #define __FUNC__ /*<a name="MatMultTranspose_SeqMAIJ_1"></a>*/"MatMultTranspose_SeqMAIJ_1" 95 int MatMultTranspose_SeqMAIJ_1(Mat A,Vec xx,Vec yy) 96 { 97 Mat_MAIJ *b = (Mat_MAIJ*)A->data; 98 int ierr; 99 PetscFunctionBegin; 100 ierr = MatMultTranspose_SeqAIJ(b->AIJ,xx,yy); 101 PetscFunctionReturn(0); 102 } 103 #undef __FUNC__ 104 #define __FUNC__ /*<a name="MatMultAdd_SeqMAIJ_1"></a>*/"MatMultAdd_SeqMAIJ_1" 105 int MatMultAdd_SeqMAIJ_1(Mat A,Vec xx,Vec yy,Vec zz) 106 { 107 Mat_MAIJ *b = (Mat_MAIJ*)A->data; 108 int ierr; 109 PetscFunctionBegin; 110 ierr = MatMultAdd_SeqAIJ(b->AIJ,xx,yy,zz); 111 PetscFunctionReturn(0); 112 } 113 #undef __FUNC__ 114 #define __FUNC__ /*<a name="MatMultTransposeAdd_SeqMAIJ_1"></a>*/"MatMultTransposeAdd_SeqMAIJ_1" 115 int MatMultTransposeAdd_SeqMAIJ_1(Mat A,Vec xx,Vec yy,Vec zz) 116 { 117 Mat_MAIJ *b = (Mat_MAIJ*)A->data; 118 int ierr; 119 PetscFunctionBegin; 120 ierr = MatMultTransposeAdd_SeqAIJ(b->AIJ,xx,yy,zz); 121 PetscFunctionReturn(0); 122 } 123 124 /* --------------------------------------------------------------------------------------*/ 125 #undef __FUNC__ 126 #define __FUNC__ /*<a name="MatMult_SeqMAIJ_2"></a>*/"MatMult_SeqMAIJ_2" 127 int MatMult_SeqMAIJ_2(Mat A,Vec xx,Vec yy) 128 { 129 Mat_MAIJ *b = (Mat_MAIJ*)A->data; 130 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 131 Scalar *x,*y,*v,sum1, sum2; 132 int ierr,m = a->m,*idx,shift = a->indexshift,*ii; 133 int n,i,jrow,j; 134 135 PetscFunctionBegin; 136 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 137 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 138 x = x + shift; /* shift for Fortran start by 1 indexing */ 139 idx = a->j; 140 v = a->a; 141 ii = a->i; 142 143 v += shift; /* shift for Fortran start by 1 indexing */ 144 idx += shift; 145 for (i=0; i<m; i++) { 146 jrow = ii[i]; 147 n = ii[i+1] - jrow; 148 sum1 = 0.0; 149 sum2 = 0.0; 150 for (j=0; j<n; j++) { 151 sum1 += v[jrow]*x[2*idx[jrow]]; 152 sum2 += v[jrow]*x[2*idx[jrow]+1]; 153 jrow++; 154 } 155 y[2*i] = sum1; 156 y[2*i+1] = sum2; 157 } 158 159 PLogFlops(4*a->nz - 2*m); 160 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 161 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 162 PetscFunctionReturn(0); 163 } 164 165 #undef __FUNC__ 166 #define __FUNC__ /*<a name="MatMultTranspose_SeqMAIJ_2"></a>*/"MatMultTranspose_SeqMAIJ_2" 167 int MatMultTranspose_SeqMAIJ_2(Mat A,Vec xx,Vec yy) 168 { 169 Mat_MAIJ *b = (Mat_MAIJ*)A->data; 170 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 171 Scalar *x,*y,*v,alpha1,alpha2,zero = 0.0; 172 int ierr,m = a->m,n,i,*idx,shift = a->indexshift; 173 174 PetscFunctionBegin; 175 ierr = VecSet(&zero,yy);CHKERRQ(ierr); 176 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 177 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 178 y = y + shift; /* shift for Fortran start by 1 indexing */ 179 for (i=0; i<m; i++) { 180 idx = a->j + a->i[i] + shift; 181 v = a->a + a->i[i] + shift; 182 n = a->i[i+1] - a->i[i]; 183 alpha1 = x[2*i]; 184 alpha2 = x[2*i+1]; 185 while (n-->0) {y[2*(*idx)] += alpha1*(*v); y[2*(*idx)+1] += alpha2*(*v); idx++; v++;} 186 } 187 PLogFlops(4*a->nz - 2*a->n); 188 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 189 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 190 PetscFunctionReturn(0); 191 } 192 193 #undef __FUNC__ 194 #define __FUNC__ /*<a name="MatMultAdd_SeqMAIJ_2"></a>*/"MatMultAdd_SeqMAIJ_2" 195 int MatMultAdd_SeqMAIJ_2(Mat A,Vec xx,Vec yy,Vec zz) 196 { 197 Mat_MAIJ *b = (Mat_MAIJ*)A->data; 198 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 199 Scalar *x,*y,*v,sum1, sum2; 200 int ierr,m = a->m,*idx,shift = a->indexshift,*ii; 201 int n,i,jrow,j; 202 203 PetscFunctionBegin; 204 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 205 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 206 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 207 x = x + shift; /* shift for Fortran start by 1 indexing */ 208 idx = a->j; 209 v = a->a; 210 ii = a->i; 211 212 v += shift; /* shift for Fortran start by 1 indexing */ 213 idx += shift; 214 for (i=0; i<m; i++) { 215 jrow = ii[i]; 216 n = ii[i+1] - jrow; 217 sum1 = 0.0; 218 sum2 = 0.0; 219 for (j=0; j<n; j++) { 220 sum1 += v[jrow]*x[2*idx[jrow]]; 221 sum2 += v[jrow]*x[2*idx[jrow]+1]; 222 jrow++; 223 } 224 y[2*i] += sum1; 225 y[2*i+1] += sum2; 226 } 227 228 PLogFlops(4*a->nz - 2*m); 229 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 230 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 231 PetscFunctionReturn(0); 232 } 233 #undef __FUNC__ 234 #define __FUNC__ /*<a name="MatMultTransposeAdd_SeqMAIJ_2"></a>*/"MatMultTransposeAdd_SeqMAIJ_2" 235 int MatMultTransposeAdd_SeqMAIJ_2(Mat A,Vec xx,Vec yy,Vec zz) 236 { 237 Mat_MAIJ *b = (Mat_MAIJ*)A->data; 238 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 239 Scalar *x,*y,*v,alpha1,alpha2; 240 int ierr,m = a->m,n,i,*idx,shift = a->indexshift; 241 242 PetscFunctionBegin; 243 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 244 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 245 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 246 y = y + shift; /* shift for Fortran start by 1 indexing */ 247 for (i=0; i<m; i++) { 248 idx = a->j + a->i[i] + shift; 249 v = a->a + a->i[i] + shift; 250 n = a->i[i+1] - a->i[i]; 251 alpha1 = x[2*i]; 252 alpha2 = x[2*i+1]; 253 while (n-->0) {y[2*(*idx)] += alpha1*(*v); y[2*(*idx)+1] += alpha2*(*v); idx++; v++;} 254 } 255 PLogFlops(4*a->nz - 2*a->n); 256 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 257 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 258 PetscFunctionReturn(0); 259 } 260 /* --------------------------------------------------------------------------------------*/ 261 #undef __FUNC__ 262 #define __FUNC__ /*<a name="MatMult_SeqMAIJ_3"></a>*/"MatMult_SeqMAIJ_3" 263 int MatMult_SeqMAIJ_3(Mat A,Vec xx,Vec yy) 264 { 265 Mat_MAIJ *b = (Mat_MAIJ*)A->data; 266 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 267 Scalar *x,*y,*v,sum1, sum2, sum3; 268 int ierr,m = a->m,*idx,shift = a->indexshift,*ii; 269 int n,i,jrow,j; 270 271 PetscFunctionBegin; 272 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 273 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 274 x = x + shift; /* shift for Fortran start by 1 indexing */ 275 idx = a->j; 276 v = a->a; 277 ii = a->i; 278 279 v += shift; /* shift for Fortran start by 1 indexing */ 280 idx += shift; 281 for (i=0; i<m; i++) { 282 jrow = ii[i]; 283 n = ii[i+1] - jrow; 284 sum1 = 0.0; 285 sum2 = 0.0; 286 sum3 = 0.0; 287 for (j=0; j<n; j++) { 288 sum1 += v[jrow]*x[3*idx[jrow]]; 289 sum2 += v[jrow]*x[3*idx[jrow]+1]; 290 sum3 += v[jrow]*x[3*idx[jrow]+2]; 291 jrow++; 292 } 293 y[3*i] = sum1; 294 y[3*i+1] = sum2; 295 y[3*i+2] = sum3; 296 } 297 298 PLogFlops(6*a->nz - 3*m); 299 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 300 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 301 PetscFunctionReturn(0); 302 } 303 304 #undef __FUNC__ 305 #define __FUNC__ /*<a name="MatMultTranspose_SeqMAIJ_3"></a>*/"MatMultTranspose_SeqMAIJ_3" 306 int MatMultTranspose_SeqMAIJ_3(Mat A,Vec xx,Vec yy) 307 { 308 Mat_MAIJ *b = (Mat_MAIJ*)A->data; 309 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 310 Scalar *x,*y,*v,alpha1,alpha2,alpha3,zero = 0.0; 311 int ierr,m = a->m,n,i,*idx,shift = a->indexshift; 312 313 PetscFunctionBegin; 314 ierr = VecSet(&zero,yy);CHKERRQ(ierr); 315 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 316 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 317 y = y + shift; /* shift for Fortran start by 1 indexing */ 318 for (i=0; i<m; i++) { 319 idx = a->j + a->i[i] + shift; 320 v = a->a + a->i[i] + shift; 321 n = a->i[i+1] - a->i[i]; 322 alpha1 = x[3*i]; 323 alpha2 = x[3*i+1]; 324 alpha3 = x[3*i+2]; 325 while (n-->0) { 326 y[3*(*idx)] += alpha1*(*v); 327 y[3*(*idx)+1] += alpha2*(*v); 328 y[3*(*idx)+2] += alpha3*(*v); 329 idx++; v++; 330 } 331 } 332 PLogFlops(6*a->nz - 3*a->n); 333 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 334 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 335 PetscFunctionReturn(0); 336 } 337 338 #undef __FUNC__ 339 #define __FUNC__ /*<a name="MatMultAdd_SeqMAIJ_3"></a>*/"MatMultAdd_SeqMAIJ_3" 340 int MatMultAdd_SeqMAIJ_3(Mat A,Vec xx,Vec yy,Vec zz) 341 { 342 Mat_MAIJ *b = (Mat_MAIJ*)A->data; 343 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 344 Scalar *x,*y,*v,sum1, sum2, sum3; 345 int ierr,m = a->m,*idx,shift = a->indexshift,*ii; 346 int n,i,jrow,j; 347 348 PetscFunctionBegin; 349 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 350 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 351 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 352 x = x + shift; /* shift for Fortran start by 1 indexing */ 353 idx = a->j; 354 v = a->a; 355 ii = a->i; 356 357 v += shift; /* shift for Fortran start by 1 indexing */ 358 idx += shift; 359 for (i=0; i<m; i++) { 360 jrow = ii[i]; 361 n = ii[i+1] - jrow; 362 sum1 = 0.0; 363 sum2 = 0.0; 364 sum3 = 0.0; 365 for (j=0; j<n; j++) { 366 sum1 += v[jrow]*x[3*idx[jrow]]; 367 sum2 += v[jrow]*x[3*idx[jrow]+1]; 368 sum3 += v[jrow]*x[3*idx[jrow]+2]; 369 jrow++; 370 } 371 y[3*i] += sum1; 372 y[3*i+1] += sum2; 373 y[3*i+2] += sum3; 374 } 375 376 PLogFlops(6*a->nz); 377 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 378 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 379 PetscFunctionReturn(0); 380 } 381 #undef __FUNC__ 382 #define __FUNC__ /*<a name="MatMultTransposeAdd_SeqMAIJ_3"></a>*/"MatMultTransposeAdd_SeqMAIJ_3" 383 int MatMultTransposeAdd_SeqMAIJ_3(Mat A,Vec xx,Vec yy,Vec zz) 384 { 385 Mat_MAIJ *b = (Mat_MAIJ*)A->data; 386 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 387 Scalar *x,*y,*v,alpha1,alpha2,alpha3; 388 int ierr,m = a->m,n,i,*idx,shift = a->indexshift; 389 390 PetscFunctionBegin; 391 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 392 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 393 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 394 y = y + shift; /* shift for Fortran start by 1 indexing */ 395 for (i=0; i<m; i++) { 396 idx = a->j + a->i[i] + shift; 397 v = a->a + a->i[i] + shift; 398 n = a->i[i+1] - a->i[i]; 399 alpha1 = x[3*i]; 400 alpha2 = x[3*i+1]; 401 alpha3 = x[3*i+2]; 402 while (n-->0) { 403 y[3*(*idx)] += alpha1*(*v); 404 y[3*(*idx)+1] += alpha2*(*v); 405 y[3*(*idx)+2] += alpha3*(*v); 406 idx++; v++; 407 } 408 } 409 PLogFlops(6*a->nz); 410 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 411 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 412 PetscFunctionReturn(0); 413 } 414 415 /* ------------------------------------------------------------------------------*/ 416 #undef __FUNC__ 417 #define __FUNC__ /*<a name="MatMult_SeqMAIJ_4"></a>*/"MatMult_SeqMAIJ_4" 418 int MatMult_SeqMAIJ_4(Mat A,Vec xx,Vec yy) 419 { 420 Mat_MAIJ *b = (Mat_MAIJ*)A->data; 421 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 422 Scalar *x,*y,*v,sum1, sum2, sum3, sum4; 423 int ierr,m = a->m,*idx,shift = a->indexshift,*ii; 424 int n,i,jrow,j; 425 426 PetscFunctionBegin; 427 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 428 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 429 x = x + shift; /* shift for Fortran start by 1 indexing */ 430 idx = a->j; 431 v = a->a; 432 ii = a->i; 433 434 v += shift; /* shift for Fortran start by 1 indexing */ 435 idx += shift; 436 for (i=0; i<m; i++) { 437 jrow = ii[i]; 438 n = ii[i+1] - jrow; 439 sum1 = 0.0; 440 sum2 = 0.0; 441 sum3 = 0.0; 442 sum4 = 0.0; 443 for (j=0; j<n; j++) { 444 sum1 += v[jrow]*x[4*idx[jrow]]; 445 sum2 += v[jrow]*x[4*idx[jrow]+1]; 446 sum3 += v[jrow]*x[4*idx[jrow]+2]; 447 sum4 += v[jrow]*x[4*idx[jrow]+3]; 448 jrow++; 449 } 450 y[4*i] = sum1; 451 y[4*i+1] = sum2; 452 y[4*i+2] = sum3; 453 y[4*i+3] = sum4; 454 } 455 456 PLogFlops(8*a->nz - 4*m); 457 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 458 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 459 PetscFunctionReturn(0); 460 } 461 462 #undef __FUNC__ 463 #define __FUNC__ /*<a name="MatMultTranspose_SeqMAIJ_4"></a>*/"MatMultTranspose_SeqMAIJ_4" 464 int MatMultTranspose_SeqMAIJ_4(Mat A,Vec xx,Vec yy) 465 { 466 Mat_MAIJ *b = (Mat_MAIJ*)A->data; 467 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 468 Scalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4,zero = 0.0; 469 int ierr,m = a->m,n,i,*idx,shift = a->indexshift; 470 471 PetscFunctionBegin; 472 ierr = VecSet(&zero,yy);CHKERRQ(ierr); 473 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 474 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 475 y = y + shift; /* shift for Fortran start by 1 indexing */ 476 for (i=0; i<m; i++) { 477 idx = a->j + a->i[i] + shift; 478 v = a->a + a->i[i] + shift; 479 n = a->i[i+1] - a->i[i]; 480 alpha1 = x[4*i]; 481 alpha2 = x[4*i+1]; 482 alpha3 = x[4*i+2]; 483 alpha4 = x[4*i+3]; 484 while (n-->0) { 485 y[4*(*idx)] += alpha1*(*v); 486 y[4*(*idx)+1] += alpha2*(*v); 487 y[4*(*idx)+2] += alpha3*(*v); 488 y[4*(*idx)+3] += alpha4*(*v); 489 idx++; v++; 490 } 491 } 492 PLogFlops(8*a->nz - 4*a->n); 493 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 494 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 495 PetscFunctionReturn(0); 496 } 497 498 #undef __FUNC__ 499 #define __FUNC__ /*<a name="MatMultAdd_SeqMAIJ_4"></a>*/"MatMultAdd_SeqMAIJ_4" 500 int MatMultAdd_SeqMAIJ_4(Mat A,Vec xx,Vec yy,Vec zz) 501 { 502 Mat_MAIJ *b = (Mat_MAIJ*)A->data; 503 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 504 Scalar *x,*y,*v,sum1, sum2, sum3, sum4; 505 int ierr,m = a->m,*idx,shift = a->indexshift,*ii; 506 int n,i,jrow,j; 507 508 PetscFunctionBegin; 509 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 510 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 511 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 512 x = x + shift; /* shift for Fortran start by 1 indexing */ 513 idx = a->j; 514 v = a->a; 515 ii = a->i; 516 517 v += shift; /* shift for Fortran start by 1 indexing */ 518 idx += shift; 519 for (i=0; i<m; i++) { 520 jrow = ii[i]; 521 n = ii[i+1] - jrow; 522 sum1 = 0.0; 523 sum2 = 0.0; 524 sum3 = 0.0; 525 sum4 = 0.0; 526 for (j=0; j<n; j++) { 527 sum1 += v[jrow]*x[4*idx[jrow]]; 528 sum2 += v[jrow]*x[4*idx[jrow]+1]; 529 sum3 += v[jrow]*x[4*idx[jrow]+2]; 530 sum4 += v[jrow]*x[4*idx[jrow]+3]; 531 jrow++; 532 } 533 y[4*i] += sum1; 534 y[4*i+1] += sum2; 535 y[4*i+2] += sum3; 536 y[4*i+3] += sum4; 537 } 538 539 PLogFlops(8*a->nz - 4*m); 540 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 541 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 542 PetscFunctionReturn(0); 543 } 544 #undef __FUNC__ 545 #define __FUNC__ /*<a name="MatMultTransposeAdd_SeqMAIJ_4"></a>*/"MatMultTransposeAdd_SeqMAIJ_4" 546 int MatMultTransposeAdd_SeqMAIJ_4(Mat A,Vec xx,Vec yy,Vec zz) 547 { 548 Mat_MAIJ *b = (Mat_MAIJ*)A->data; 549 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 550 Scalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4; 551 int ierr,m = a->m,n,i,*idx,shift = a->indexshift; 552 553 PetscFunctionBegin; 554 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 555 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 556 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 557 y = y + shift; /* shift for Fortran start by 1 indexing */ 558 for (i=0; i<m; i++) { 559 idx = a->j + a->i[i] + shift; 560 v = a->a + a->i[i] + shift; 561 n = a->i[i+1] - a->i[i]; 562 alpha1 = x[4*i]; 563 alpha2 = x[4*i+1]; 564 alpha3 = x[4*i+2]; 565 alpha4 = x[4*i+3]; 566 while (n-->0) { 567 y[4*(*idx)] += alpha1*(*v); 568 y[4*(*idx)+1] += alpha2*(*v); 569 y[4*(*idx)+2] += alpha3*(*v); 570 y[4*(*idx)+3] += alpha4*(*v); 571 idx++; v++; 572 } 573 } 574 PLogFlops(8*a->nz - 4*a->n); 575 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 576 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 577 PetscFunctionReturn(0); 578 579 } 580 581 /* ------------------------------------------------------------------------------*/ 582 #undef __FUNC__ 583 #define __FUNC__ /*<a name="MatMult_SeqMAIJ_5"></a>*/"MatMult_SeqMAIJ_5" 584 int MatMult_SeqMAIJ_5(Mat A,Vec xx,Vec yy) 585 { 586 Mat_MAIJ *b = (Mat_MAIJ*)A->data; 587 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 588 Scalar *x,*y,*v,sum1, sum2, sum3, sum4, sum5; 589 int ierr,m = a->m,*idx,shift = a->indexshift,*ii; 590 int n,i,jrow,j; 591 592 PetscFunctionBegin; 593 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 594 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 595 x = x + shift; /* shift for Fortran start by 1 indexing */ 596 idx = a->j; 597 v = a->a; 598 ii = a->i; 599 600 v += shift; /* shift for Fortran start by 1 indexing */ 601 idx += shift; 602 for (i=0; i<m; i++) { 603 jrow = ii[i]; 604 n = ii[i+1] - jrow; 605 sum1 = 0.0; 606 sum2 = 0.0; 607 sum3 = 0.0; 608 sum4 = 0.0; 609 sum5 = 0.0; 610 for (j=0; j<n; j++) { 611 sum1 += v[jrow]*x[5*idx[jrow]]; 612 sum2 += v[jrow]*x[5*idx[jrow]+1]; 613 sum3 += v[jrow]*x[5*idx[jrow]+2]; 614 sum4 += v[jrow]*x[5*idx[jrow]+3]; 615 sum5 += v[jrow]*x[5*idx[jrow]+4]; 616 jrow++; 617 } 618 y[5*i] = sum1; 619 y[5*i+1] = sum2; 620 y[5*i+2] = sum3; 621 y[5*i+3] = sum4; 622 y[5*i+4] = sum5; 623 } 624 625 PLogFlops(10*a->nz - 5*m); 626 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 627 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 628 PetscFunctionReturn(0); 629 } 630 631 #undef __FUNC__ 632 #define __FUNC__ /*<a name="MatMultTranspose_SeqMAIJ_5"></a>*/"MatMultTranspose_SeqMAIJ_5" 633 int MatMultTranspose_SeqMAIJ_5(Mat A,Vec xx,Vec yy) 634 { 635 Mat_MAIJ *b = (Mat_MAIJ*)A->data; 636 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 637 Scalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,zero = 0.0; 638 int ierr,m = a->m,n,i,*idx,shift = a->indexshift; 639 640 PetscFunctionBegin; 641 ierr = VecSet(&zero,yy);CHKERRQ(ierr); 642 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 643 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 644 y = y + shift; /* shift for Fortran start by 1 indexing */ 645 for (i=0; i<m; i++) { 646 idx = a->j + a->i[i] + shift; 647 v = a->a + a->i[i] + shift; 648 n = a->i[i+1] - a->i[i]; 649 alpha1 = x[5*i]; 650 alpha2 = x[5*i+1]; 651 alpha3 = x[5*i+2]; 652 alpha4 = x[5*i+3]; 653 alpha5 = x[5*i+4]; 654 while (n-->0) { 655 y[5*(*idx)] += alpha1*(*v); 656 y[5*(*idx)+1] += alpha2*(*v); 657 y[5*(*idx)+2] += alpha3*(*v); 658 y[5*(*idx)+3] += alpha4*(*v); 659 y[5*(*idx)+4] += alpha5*(*v); 660 idx++; v++; 661 } 662 } 663 PLogFlops(10*a->nz - 5*a->n); 664 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 665 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 666 PetscFunctionReturn(0); 667 } 668 669 #undef __FUNC__ 670 #define __FUNC__ /*<a name="MatMultAdd_SeqMAIJ_5"></a>*/"MatMultAdd_SeqMAIJ_5" 671 int MatMultAdd_SeqMAIJ_5(Mat A,Vec xx,Vec yy,Vec zz) 672 { 673 Mat_MAIJ *b = (Mat_MAIJ*)A->data; 674 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 675 Scalar *x,*y,*v,sum1, sum2, sum3, sum4, sum5; 676 int ierr,m = a->m,*idx,shift = a->indexshift,*ii; 677 int n,i,jrow,j; 678 679 PetscFunctionBegin; 680 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 681 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 682 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 683 x = x + shift; /* shift for Fortran start by 1 indexing */ 684 idx = a->j; 685 v = a->a; 686 ii = a->i; 687 688 v += shift; /* shift for Fortran start by 1 indexing */ 689 idx += shift; 690 for (i=0; i<m; i++) { 691 jrow = ii[i]; 692 n = ii[i+1] - jrow; 693 sum1 = 0.0; 694 sum2 = 0.0; 695 sum3 = 0.0; 696 sum4 = 0.0; 697 sum5 = 0.0; 698 for (j=0; j<n; j++) { 699 sum1 += v[jrow]*x[5*idx[jrow]]; 700 sum2 += v[jrow]*x[5*idx[jrow]+1]; 701 sum3 += v[jrow]*x[5*idx[jrow]+2]; 702 sum4 += v[jrow]*x[5*idx[jrow]+3]; 703 sum5 += v[jrow]*x[5*idx[jrow]+4]; 704 jrow++; 705 } 706 y[5*i] += sum1; 707 y[5*i+1] += sum2; 708 y[5*i+2] += sum3; 709 y[5*i+3] += sum4; 710 y[5*i+4] += sum5; 711 } 712 713 PLogFlops(10*a->nz); 714 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 715 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 716 PetscFunctionReturn(0); 717 } 718 719 #undef __FUNC__ 720 #define __FUNC__ /*<a name="MatMultTransposeAdd_SeqMAIJ_5"></a>*/"MatMultTransposeAdd_SeqMAIJ_5" 721 int MatMultTransposeAdd_SeqMAIJ_5(Mat A,Vec xx,Vec yy,Vec zz) 722 { 723 Mat_MAIJ *b = (Mat_MAIJ*)A->data; 724 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 725 Scalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5; 726 int ierr,m = a->m,n,i,*idx,shift = a->indexshift; 727 728 PetscFunctionBegin; 729 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 730 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 731 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 732 y = y + shift; /* shift for Fortran start by 1 indexing */ 733 for (i=0; i<m; i++) { 734 idx = a->j + a->i[i] + shift; 735 v = a->a + a->i[i] + shift; 736 n = a->i[i+1] - a->i[i]; 737 alpha1 = x[5*i]; 738 alpha2 = x[5*i+1]; 739 alpha3 = x[5*i+2]; 740 alpha4 = x[5*i+3]; 741 alpha5 = x[5*i+4]; 742 while (n-->0) { 743 y[5*(*idx)] += alpha1*(*v); 744 y[5*(*idx)+1] += alpha2*(*v); 745 y[5*(*idx)+2] += alpha3*(*v); 746 y[5*(*idx)+3] += alpha4*(*v); 747 y[5*(*idx)+4] += alpha5*(*v); 748 idx++; v++; 749 } 750 } 751 PLogFlops(10*a->nz); 752 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 753 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 754 PetscFunctionReturn(0); 755 } 756 757 /*===================================================================================*/ 758 EXTERN int MatMult_MPIAIJ(Mat,Vec,Vec); 759 EXTERN int MatMultAdd_MPIAIJ(Mat,Vec,Vec,Vec); 760 EXTERN int MatMultTranspose_MPIAIJ(Mat,Vec,Vec); 761 EXTERN int MatMultTransposeAdd_MPIAIJ(Mat,Vec,Vec,Vec); 762 763 #undef __FUNC__ 764 #define __FUNC__ /*<a name="MatMult_MPIMAIJ_1"></a>*/"MatMult_MPIMAIJ_1" 765 int MatMult_MPIMAIJ_1(Mat A,Vec xx,Vec yy) 766 { 767 Mat_MAIJ *b = (Mat_MAIJ*)A->data; 768 int ierr; 769 PetscFunctionBegin; 770 ierr = MatMult_MPIAIJ(b->AIJ,xx,yy); 771 PetscFunctionReturn(0); 772 } 773 #undef __FUNC__ 774 #define __FUNC__ /*<a name="MatMultTranspose_MPIMAIJ_1"></a>*/"MatMultTranspose_MPIMAIJ_1" 775 int MatMultTranspose_MPIMAIJ_1(Mat A,Vec xx,Vec yy) 776 { 777 Mat_MAIJ *b = (Mat_MAIJ*)A->data; 778 int ierr; 779 PetscFunctionBegin; 780 ierr = MatMultTranspose_MPIAIJ(b->AIJ,xx,yy); 781 PetscFunctionReturn(0); 782 } 783 #undef __FUNC__ 784 #define __FUNC__ /*<a name="MatMultAdd_MPIMAIJ_1"></a>*/"MatMultAdd_MPIMAIJ_1" 785 int MatMultAdd_MPIMAIJ_1(Mat A,Vec xx,Vec yy,Vec zz) 786 { 787 Mat_MAIJ *b = (Mat_MAIJ*)A->data; 788 int ierr; 789 PetscFunctionBegin; 790 ierr = MatMultAdd_MPIAIJ(b->AIJ,xx,yy,zz); 791 PetscFunctionReturn(0); 792 } 793 #undef __FUNC__ 794 #define __FUNC__ /*<a name="MatMultTransposeAdd_MPIMAIJ_1"></a>*/"MatMultTransposeAdd_MPIMAIJ_1" 795 int MatMultTransposeAdd_MPIMAIJ_1(Mat A,Vec xx,Vec yy,Vec zz) 796 { 797 Mat_MAIJ *b = (Mat_MAIJ*)A->data; 798 int ierr; 799 PetscFunctionBegin; 800 ierr = MatMultTransposeAdd_MPIAIJ(b->AIJ,xx,yy,zz); 801 PetscFunctionReturn(0); 802 } 803 804 /* ---------------------------------------------------------------------------------- */ 805 #undef __FUNC__ 806 #define __FUNC__ /*<a name="MatMult_MPIMAIJ_dof"></a>*/"MatMult_MPIMAIJ_dof" 807 int MatMult_MPIMAIJ_dof(Mat A,Vec xx,Vec yy) 808 { 809 Mat_MAIJ *b = (Mat_MAIJ*)A->data; 810 int ierr; 811 PetscFunctionBegin; 812 813 /* start the scatter */ 814 ierr = VecScatterBegin(xx,b->w,INSERT_VALUES,SCATTER_FORWARD,b->ctx);CHKERRQ(ierr); 815 ierr = MatMult_SeqMAIJ_2(A,xx,yy);CHKERRQ(ierr); 816 ierr = VecScatterEnd(xx,b->w,INSERT_VALUES,SCATTER_FORWARD,b->ctx);CHKERRQ(ierr); 817 ierr = MatMultAdd_SeqMAIJ_1(A,b->w,yy,yy);CHKERRQ(ierr); 818 PetscFunctionReturn(0); 819 } 820 821 /* ---------------------------------------------------------------------------------- */ 822 #undef __FUNC__ 823 #define __FUNC__ /*<a name="MatCreateMAIJ"></a>*/"MatCreateMAIJ" 824 int MatCreateMAIJ(Mat A,int dof,Mat *maij) 825 { 826 int ierr,size,n; 827 Mat_MAIJ *b; 828 Mat B; 829 830 PetscFunctionBegin; 831 ierr = MATCreate(A->comm,dof*A->m,dof*A->n,dof*A->M,dof*A->N,&B);CHKERRQ(ierr); 832 ierr = MatSetType(B,MATMAIJ);CHKERRQ(ierr); 833 834 B->assembled = PETSC_TRUE; 835 B->ops->destroy = MatDestroy_MAIJ; 836 b = (Mat_MAIJ*)B->data; 837 838 b->AIJ = A; 839 b->dof = dof; 840 ierr = PetscObjectReference((PetscObject)A);CHKERRQ(ierr); 841 ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr); 842 if (size == 1) { 843 if (dof == 1) { 844 B->ops->mult = MatMult_SeqMAIJ_1; 845 B->ops->multadd = MatMultAdd_SeqMAIJ_1; 846 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_1; 847 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_1; 848 } else if (dof == 2) { 849 B->ops->mult = MatMult_SeqMAIJ_2; 850 B->ops->multadd = MatMultAdd_SeqMAIJ_2; 851 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_2; 852 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_2; 853 } else if (dof == 3) { 854 B->ops->mult = MatMult_SeqMAIJ_3; 855 B->ops->multadd = MatMultAdd_SeqMAIJ_3; 856 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_3; 857 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_3; 858 } else if (dof == 4) { 859 B->ops->mult = MatMult_SeqMAIJ_4; 860 B->ops->multadd = MatMultAdd_SeqMAIJ_4; 861 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_4; 862 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_4; 863 } else if (dof == 5) { 864 B->ops->mult = MatMult_SeqMAIJ_5; 865 B->ops->multadd = MatMultAdd_SeqMAIJ_5; 866 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_5; 867 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_5; 868 } else { 869 SETERRQ1(1,1,"Cannot handle a dof of %d\n",dof); 870 } 871 } else { 872 if (dof == 1) { 873 B->ops->mult = MatMult_MPIMAIJ_1; 874 B->ops->multadd = MatMultAdd_MPIMAIJ_1; 875 B->ops->multtranspose = MatMultTranspose_MPIMAIJ_1; 876 B->ops->multtransposeadd = MatMultTransposeAdd_MPIMAIJ_1; 877 } else { 878 Mat_MPIAIJ *mpiaij = (Mat_MPIAIJ *)A->data; 879 IS from,to; 880 Vec gvec; 881 int *garray,i; 882 883 ierr = VecGetSize(mpiaij->lvec,&n);CHKERRQ(ierr); 884 ierr = VecCreateSeq(PETSC_COMM_SELF,n*dof,&b->w);CHKERRQ(ierr); 885 886 /* create two temporary Index sets for build scatter gather */ 887 garray = (int*)PetscMalloc((n+1)*sizeof(int));CHKPTRQ(garray); 888 for (i=0; i<n; i++) garray[i] = dof*mpiaij->garray[i]; 889 ierr = ISCreateBlock(A->comm,dof,n,garray,&from);CHKERRQ(ierr); 890 ierr = PetscFree(garray);CHKERRQ(ierr); 891 ierr = ISCreateStride(PETSC_COMM_SELF,n*dof,0,1,&to);CHKERRQ(ierr); 892 893 /* create temporary global vector to generate scatter context */ 894 ierr = VecCreateMPI(A->comm,dof*A->n,dof*A->N,&gvec);CHKERRQ(ierr); 895 896 /* generate the scatter context */ 897 ierr = VecScatterCreate(gvec,from,b->w,to,&b->ctx);CHKERRQ(ierr); 898 899 ierr = ISDestroy(from);CHKERRQ(ierr); 900 ierr = ISDestroy(to);CHKERRQ(ierr); 901 ierr = VecDestroy(gvec);CHKERRQ(ierr); 902 903 B->ops->mult = MatMult_MPIMAIJ_dof; 904 } 905 } 906 *maij = B; 907 PetscFunctionReturn(0); 908 } 909 910 911 912 913 914 915 916 917 918 919 920 921