1 /*$Id: maij.c,v 1.3 2000/06/01 19:56:50 bsmith Exp bsmith $*/ 2 /* 3 Defines the basic matrix operations for the MAIJ matrix storage format. 4 This format is used for restriction and interpolation operations for 5 multicomponent problems. It interpolates each component the same way 6 independently. 7 8 We provide: 9 MatMult() 10 MatMultTranspose() 11 MatMultTransposeAdd() 12 MatMultAdd() 13 and 14 MatCreateMAIJ(Mat,dof,Mat*) 15 */ 16 17 #include "src/mat/impls/aij/seq/aij.h" 18 19 typedef struct { 20 int dof; /* number of components */ 21 Mat AIJ; /* representation of interpolation for one component */ 22 } Mat_SeqMAIJ; 23 24 #undef __FUNC__ 25 #define __FUNC__ /*<a name="MatDestroy_SeqMAIJ"></a>*/"MatDestroy_SeqMAIJ" 26 int MatDestroy_SeqMAIJ(Mat A) 27 { 28 int ierr; 29 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 30 31 PetscFunctionBegin; 32 if (b->AIJ) { 33 ierr = MatDestroy(b->AIJ);CHKERRQ(ierr); 34 } 35 ierr = PetscFree(b);CHKERRQ(ierr); 36 PetscHeaderDestroy(A); 37 PetscFunctionReturn(0); 38 } 39 40 EXTERN_C_BEGIN 41 #undef __FUNC__ 42 #define __FUNC__ /*<a name="MatCreate_SeqMAIJ"></a>*/"MatCreate_SeqMAIJ" 43 int MatCreate_SeqMAIJ(Mat A) 44 { 45 int ierr; 46 Mat_SeqMAIJ *b; 47 48 PetscFunctionBegin; 49 A->data = (void*)(b = PetscNew(Mat_SeqMAIJ));CHKPTRQ(b); 50 ierr = PetscMemzero(b,sizeof(Mat_SeqMAIJ));CHKERRQ(ierr); 51 ierr = PetscMemzero(A->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 52 A->factor = 0; 53 A->mapping = 0; 54 b->AIJ = 0; 55 b->dof = 0; 56 PetscFunctionReturn(0); 57 } 58 EXTERN_C_END 59 60 /* ----------------------------------------------------------------------------------*/ 61 EXTERN int MatMult_SeqAIJ(Mat,Vec,Vec); 62 EXTERN int MatMultTranspose_SeqAIJ(Mat,Vec,Vec); 63 EXTERN int MatMultTransposeAdd_SeqAIJ(Mat,Vec,Vec,Vec); 64 EXTERN int matmulttransposeadd_seqaijMatMultAdd_SeqAIJ(Mat,Vec,Vec,Vec); 65 66 #undef __FUNC__ 67 #define __FUNC__ /*<a name="MatMult_SeqMAIJ_1"></a>*/"MatMult_SeqMAIJ_1" 68 int MatMult_SeqMAIJ_1(Mat A,Vec xx,Vec yy) 69 { 70 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 71 int ierr; 72 PetscFunctionBegin; 73 ierr = MatMult_SeqAIJ(b->AIJ,xx,yy); 74 PetscFunctionReturn(0); 75 } 76 #undef __FUNC__ 77 #define __FUNC__ /*<a name="MatMultTranspose_SeqMAIJ_1"></a>*/"MatMultTranspose_SeqMAIJ_1" 78 int MatMultTranspose_SeqMAIJ_1(Mat A,Vec xx,Vec yy) 79 { 80 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 81 int ierr; 82 PetscFunctionBegin; 83 ierr = MatMultTranspose_SeqAIJ(b->AIJ,xx,yy); 84 PetscFunctionReturn(0); 85 } 86 #undef __FUNC__ 87 #define __FUNC__ /*<a name="MatMultAdd_SeqMAIJ_1"></a>*/"MatMultAdd_SeqMAIJ_1" 88 int MatMultAdd_SeqMAIJ_1(Mat A,Vec xx,Vec yy,Vec zz) 89 { 90 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 91 int ierr; 92 PetscFunctionBegin; 93 ierr = MatMultAdd_SeqAIJ(b->AIJ,xx,yy,zz); 94 PetscFunctionReturn(0); 95 } 96 #undef __FUNC__ 97 #define __FUNC__ /*<a name="MatMultTransposeAdd_SeqMAIJ_1"></a>*/"MatMultTransposeAdd_SeqMAIJ_1" 98 int MatMultTransposeAdd_SeqMAIJ_1(Mat A,Vec xx,Vec yy,Vec zz) 99 { 100 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 101 int ierr; 102 PetscFunctionBegin; 103 ierr = MatMultTransposeAdd_SeqAIJ(b->AIJ,xx,yy,zz); 104 PetscFunctionReturn(0); 105 } 106 107 /* --------------------------------------------------------------------------------------*/ 108 #undef __FUNC__ 109 #define __FUNC__ /*<a name="MatMult_SeqMAIJ_2"></a>*/"MatMult_SeqMAIJ_2" 110 int MatMult_SeqMAIJ_2(Mat A,Vec xx,Vec yy) 111 { 112 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 113 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 114 Scalar *x,*y,*v,sum1, sum2; 115 int ierr,m = a->m,*idx,shift = a->indexshift,*ii; 116 int n,i,jrow,j; 117 118 PetscFunctionBegin; 119 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 120 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 121 x = x + shift; /* shift for Fortran start by 1 indexing */ 122 idx = a->j; 123 v = a->a; 124 ii = a->i; 125 126 v += shift; /* shift for Fortran start by 1 indexing */ 127 idx += shift; 128 for (i=0; i<m; i++) { 129 jrow = ii[i]; 130 n = ii[i+1] - jrow; 131 sum1 = 0.0; 132 sum2 = 0.0; 133 for (j=0; j<n; j++) { 134 sum1 += v[jrow]*x[2*idx[jrow]]; 135 sum2 += v[jrow]*x[2*idx[jrow]+1]; 136 jrow++; 137 } 138 y[2*i] = sum1; 139 y[2*i+1] = sum2; 140 } 141 142 PLogFlops(4*a->nz - 2*m); 143 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 144 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 145 PetscFunctionReturn(0); 146 } 147 148 #undef __FUNC__ 149 #define __FUNC__ /*<a name="MatMultTranspose_SeqMAIJ_2"></a>*/"MatMultTranspose_SeqMAIJ_2" 150 int MatMultTranspose_SeqMAIJ_2(Mat A,Vec xx,Vec yy) 151 { 152 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 153 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 154 Scalar *x,*y,*v,alpha1,alpha2,zero = 0.0; 155 int ierr,m = a->m,n,i,*idx,shift = a->indexshift; 156 157 PetscFunctionBegin; 158 ierr = VecSet(&zero,yy);CHKERRQ(ierr); 159 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 160 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 161 y = y + shift; /* shift for Fortran start by 1 indexing */ 162 for (i=0; i<m; i++) { 163 idx = a->j + a->i[i] + shift; 164 v = a->a + a->i[i] + shift; 165 n = a->i[i+1] - a->i[i]; 166 alpha1 = x[2*i]; 167 alpha2 = x[2*i+1]; 168 while (n-->0) {y[2*(*idx)] += alpha1*(*v); y[2*(*idx)+1] += alpha2*(*v); idx++; v++;} 169 } 170 PLogFlops(4*a->nz - 2*a->n); 171 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 172 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 173 PetscFunctionReturn(0); 174 } 175 176 #undef __FUNC__ 177 #define __FUNC__ /*<a name="MatMultAdd_SeqMAIJ_2"></a>*/"MatMultAdd_SeqMAIJ_2" 178 int MatMultAdd_SeqMAIJ_2(Mat A,Vec xx,Vec yy,Vec zz) 179 { 180 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 181 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 182 Scalar *x,*y,*v,sum1, sum2; 183 int ierr,m = a->m,*idx,shift = a->indexshift,*ii; 184 int n,i,jrow,j; 185 186 PetscFunctionBegin; 187 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 188 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 189 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 190 x = x + shift; /* shift for Fortran start by 1 indexing */ 191 idx = a->j; 192 v = a->a; 193 ii = a->i; 194 195 v += shift; /* shift for Fortran start by 1 indexing */ 196 idx += shift; 197 for (i=0; i<m; i++) { 198 jrow = ii[i]; 199 n = ii[i+1] - jrow; 200 sum1 = 0.0; 201 sum2 = 0.0; 202 for (j=0; j<n; j++) { 203 sum1 += v[jrow]*x[2*idx[jrow]]; 204 sum2 += v[jrow]*x[2*idx[jrow]+1]; 205 jrow++; 206 } 207 y[2*i] += sum1; 208 y[2*i+1] += sum2; 209 } 210 211 PLogFlops(4*a->nz - 2*m); 212 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 213 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 214 PetscFunctionReturn(0); 215 PetscFunctionReturn(0); 216 } 217 #undef __FUNC__ 218 #define __FUNC__ /*<a name="MatMultTransposeAdd_SeqMAIJ_2"></a>*/"MatMultTransposeAdd_SeqMAIJ_2" 219 int MatMultTransposeAdd_SeqMAIJ_2(Mat A,Vec xx,Vec yy,Vec zz) 220 { 221 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 222 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 223 Scalar *x,*y,*v,alpha1,alpha2; 224 int ierr,m = a->m,n,i,*idx,shift = a->indexshift; 225 226 PetscFunctionBegin; 227 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 228 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 229 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 230 y = y + shift; /* shift for Fortran start by 1 indexing */ 231 for (i=0; i<m; i++) { 232 idx = a->j + a->i[i] + shift; 233 v = a->a + a->i[i] + shift; 234 n = a->i[i+1] - a->i[i]; 235 alpha1 = x[2*i]; 236 alpha2 = x[2*i+1]; 237 while (n-->0) {y[2*(*idx)] += alpha1*(*v); y[2*(*idx)+1] += alpha2*(*v); idx++; v++;} 238 } 239 PLogFlops(4*a->nz - 2*a->n); 240 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 241 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 242 PetscFunctionReturn(0); 243 PetscFunctionReturn(0); 244 } 245 /* --------------------------------------------------------------------------------------*/ 246 #undef __FUNC__ 247 #define __FUNC__ /*<a name="MatMult_SeqMAIJ_3"></a>*/"MatMult_SeqMAIJ_3" 248 int MatMult_SeqMAIJ_3(Mat A,Vec xx,Vec yy) 249 { 250 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 251 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 252 Scalar *x,*y,*v,sum1, sum2, sum3; 253 int ierr,m = a->m,*idx,shift = a->indexshift,*ii; 254 int n,i,jrow,j; 255 256 PetscFunctionBegin; 257 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 258 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 259 x = x + shift; /* shift for Fortran start by 1 indexing */ 260 idx = a->j; 261 v = a->a; 262 ii = a->i; 263 264 v += shift; /* shift for Fortran start by 1 indexing */ 265 idx += shift; 266 for (i=0; i<m; i++) { 267 jrow = ii[i]; 268 n = ii[i+1] - jrow; 269 sum1 = 0.0; 270 sum2 = 0.0; 271 sum3 = 0.0; 272 for (j=0; j<n; j++) { 273 sum1 += v[jrow]*x[3*idx[jrow]]; 274 sum2 += v[jrow]*x[3*idx[jrow]+1]; 275 sum3 += v[jrow]*x[3*idx[jrow]+2]; 276 jrow++; 277 } 278 y[3*i] = sum1; 279 y[3*i+1] = sum2; 280 y[3*i+2] = sum3; 281 } 282 283 PLogFlops(6*a->nz - 3*m); 284 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 285 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 286 PetscFunctionReturn(0); 287 } 288 289 #undef __FUNC__ 290 #define __FUNC__ /*<a name="MatMultTranspose_SeqMAIJ_3"></a>*/"MatMultTranspose_SeqMAIJ_3" 291 int MatMultTranspose_SeqMAIJ_3(Mat A,Vec xx,Vec yy) 292 { 293 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 294 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 295 Scalar *x,*y,*v,alpha1,alpha2,alpha3,zero = 0.0; 296 int ierr,m = a->m,n,i,*idx,shift = a->indexshift; 297 298 PetscFunctionBegin; 299 ierr = VecSet(&zero,yy);CHKERRQ(ierr); 300 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 301 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 302 y = y + shift; /* shift for Fortran start by 1 indexing */ 303 for (i=0; i<m; i++) { 304 idx = a->j + a->i[i] + shift; 305 v = a->a + a->i[i] + shift; 306 n = a->i[i+1] - a->i[i]; 307 alpha1 = x[3*i]; 308 alpha2 = x[3*i+1]; 309 alpha3 = x[3*i+2]; 310 while (n-->0) { 311 y[3*(*idx)] += alpha1*(*v); 312 y[3*(*idx)+1] += alpha2*(*v); 313 y[3*(*idx)+2] += alpha3*(*v); 314 idx++; v++; 315 } 316 } 317 PLogFlops(6*a->nz - 3*a->n); 318 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 319 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 320 PetscFunctionReturn(0); 321 } 322 323 #undef __FUNC__ 324 #define __FUNC__ /*<a name="MatMultAdd_SeqMAIJ_3"></a>*/"MatMultAdd_SeqMAIJ_3" 325 int MatMultAdd_SeqMAIJ_3(Mat A,Vec xx,Vec yy,Vec zz) 326 { 327 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 328 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 329 Scalar *x,*y,*v,sum1, sum2, sum3; 330 int ierr,m = a->m,*idx,shift = a->indexshift,*ii; 331 int n,i,jrow,j; 332 333 PetscFunctionBegin; 334 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 335 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 336 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 337 x = x + shift; /* shift for Fortran start by 1 indexing */ 338 idx = a->j; 339 v = a->a; 340 ii = a->i; 341 342 v += shift; /* shift for Fortran start by 1 indexing */ 343 idx += shift; 344 for (i=0; i<m; i++) { 345 jrow = ii[i]; 346 n = ii[i+1] - jrow; 347 sum1 = 0.0; 348 sum2 = 0.0; 349 sum3 = 0.0; 350 for (j=0; j<n; j++) { 351 sum1 += v[jrow]*x[3*idx[jrow]]; 352 sum2 += v[jrow]*x[3*idx[jrow]+1]; 353 sum3 += v[jrow]*x[3*idx[jrow]+2]; 354 jrow++; 355 } 356 y[3*i] += sum1; 357 y[3*i+1] += sum2; 358 y[3*i+2] += sum3; 359 } 360 361 PLogFlops(6*a->nz); 362 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 363 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 364 PetscFunctionReturn(0); 365 } 366 #undef __FUNC__ 367 #define __FUNC__ /*<a name="MatMultTransposeAdd_SeqMAIJ_3"></a>*/"MatMultTransposeAdd_SeqMAIJ_3" 368 int MatMultTransposeAdd_SeqMAIJ_3(Mat A,Vec xx,Vec yy,Vec zz) 369 { 370 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 371 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 372 Scalar *x,*y,*v,alpha1,alpha2,alpha3; 373 int ierr,m = a->m,n,i,*idx,shift = a->indexshift; 374 375 PetscFunctionBegin; 376 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 377 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 378 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 379 y = y + shift; /* shift for Fortran start by 1 indexing */ 380 for (i=0; i<m; i++) { 381 idx = a->j + a->i[i] + shift; 382 v = a->a + a->i[i] + shift; 383 n = a->i[i+1] - a->i[i]; 384 alpha1 = x[3*i]; 385 alpha2 = x[3*i+1]; 386 alpha3 = x[3*i+2]; 387 while (n-->0) { 388 y[3*(*idx)] += alpha1*(*v); 389 y[3*(*idx)+1] += alpha2*(*v); 390 y[3*(*idx)+2] += alpha3*(*v); 391 idx++; v++; 392 } 393 } 394 PLogFlops(6*a->nz); 395 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 396 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 397 PetscFunctionReturn(0); 398 } 399 400 /* ------------------------------------------------------------------------------*/ 401 #undef __FUNC__ 402 #define __FUNC__ /*<a name="MatMult_SeqMAIJ_4"></a>*/"MatMult_SeqMAIJ_4" 403 int MatMult_SeqMAIJ_4(Mat A,Vec xx,Vec yy) 404 { 405 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 406 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 407 Scalar *x,*y,*v,sum1, sum2, sum3, sum4; 408 int ierr,m = a->m,*idx,shift = a->indexshift,*ii; 409 int n,i,jrow,j; 410 411 PetscFunctionBegin; 412 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 413 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 414 x = x + shift; /* shift for Fortran start by 1 indexing */ 415 idx = a->j; 416 v = a->a; 417 ii = a->i; 418 419 v += shift; /* shift for Fortran start by 1 indexing */ 420 idx += shift; 421 for (i=0; i<m; i++) { 422 jrow = ii[i]; 423 n = ii[i+1] - jrow; 424 sum1 = 0.0; 425 sum2 = 0.0; 426 sum3 = 0.0; 427 sum4 = 0.0; 428 for (j=0; j<n; j++) { 429 sum1 += v[jrow]*x[4*idx[jrow]]; 430 sum2 += v[jrow]*x[4*idx[jrow]+1]; 431 sum3 += v[jrow]*x[4*idx[jrow]+2]; 432 sum4 += v[jrow]*x[4*idx[jrow]+3]; 433 jrow++; 434 } 435 y[4*i] = sum1; 436 y[4*i+1] = sum2; 437 y[4*i+2] = sum3; 438 y[4*i+3] = sum4; 439 } 440 441 PLogFlops(8*a->nz - 4*m); 442 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 443 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 444 PetscFunctionReturn(0); 445 } 446 447 #undef __FUNC__ 448 #define __FUNC__ /*<a name="MatMultTranspose_SeqMAIJ_4"></a>*/"MatMultTranspose_SeqMAIJ_4" 449 int MatMultTranspose_SeqMAIJ_4(Mat A,Vec xx,Vec yy) 450 { 451 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 452 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 453 Scalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4,zero = 0.0; 454 int ierr,m = a->m,n,i,*idx,shift = a->indexshift; 455 456 PetscFunctionBegin; 457 ierr = VecSet(&zero,yy);CHKERRQ(ierr); 458 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 459 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 460 y = y + shift; /* shift for Fortran start by 1 indexing */ 461 for (i=0; i<m; i++) { 462 idx = a->j + a->i[i] + shift; 463 v = a->a + a->i[i] + shift; 464 n = a->i[i+1] - a->i[i]; 465 alpha1 = x[4*i]; 466 alpha2 = x[4*i+1]; 467 alpha3 = x[4*i+2]; 468 alpha4 = x[4*i+3]; 469 while (n-->0) { 470 y[4*(*idx)] += alpha1*(*v); 471 y[4*(*idx)+1] += alpha2*(*v); 472 y[4*(*idx)+2] += alpha3*(*v); 473 y[4*(*idx)+3] += alpha4*(*v); 474 idx++; v++; 475 } 476 } 477 PLogFlops(8*a->nz - 4*a->n); 478 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 479 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 480 PetscFunctionReturn(0); 481 } 482 483 #undef __FUNC__ 484 #define __FUNC__ /*<a name="MatMultAdd_SeqMAIJ_4"></a>*/"MatMultAdd_SeqMAIJ_4" 485 int MatMultAdd_SeqMAIJ_4(Mat A,Vec xx,Vec yy,Vec zz) 486 { 487 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 488 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 489 Scalar *x,*y,*v,sum1, sum2, sum3, sum4; 490 int ierr,m = a->m,*idx,shift = a->indexshift,*ii; 491 int n,i,jrow,j; 492 493 PetscFunctionBegin; 494 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 495 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 496 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 497 x = x + shift; /* shift for Fortran start by 1 indexing */ 498 idx = a->j; 499 v = a->a; 500 ii = a->i; 501 502 v += shift; /* shift for Fortran start by 1 indexing */ 503 idx += shift; 504 for (i=0; i<m; i++) { 505 jrow = ii[i]; 506 n = ii[i+1] - jrow; 507 sum1 = 0.0; 508 sum2 = 0.0; 509 sum3 = 0.0; 510 sum4 = 0.0; 511 for (j=0; j<n; j++) { 512 sum1 += v[jrow]*x[4*idx[jrow]]; 513 sum2 += v[jrow]*x[4*idx[jrow]+1]; 514 sum3 += v[jrow]*x[4*idx[jrow]+2]; 515 sum4 += v[jrow]*x[4*idx[jrow]+3]; 516 jrow++; 517 } 518 y[4*i] += sum1; 519 y[4*i+1] += sum2; 520 y[4*i+2] += sum3; 521 y[4*i+3] += sum4; 522 } 523 524 PLogFlops(8*a->nz - 4*m); 525 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 526 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 527 PetscFunctionReturn(0); 528 } 529 #undef __FUNC__ 530 #define __FUNC__ /*<a name="MatMultTransposeAdd_SeqMAIJ_4"></a>*/"MatMultTransposeAdd_SeqMAIJ_4" 531 int MatMultTransposeAdd_SeqMAIJ_4(Mat A,Vec xx,Vec yy,Vec zz) 532 { 533 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 534 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 535 Scalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4; 536 int ierr,m = a->m,n,i,*idx,shift = a->indexshift; 537 538 PetscFunctionBegin; 539 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 540 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 541 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 542 y = y + shift; /* shift for Fortran start by 1 indexing */ 543 for (i=0; i<m; i++) { 544 idx = a->j + a->i[i] + shift; 545 v = a->a + a->i[i] + shift; 546 n = a->i[i+1] - a->i[i]; 547 alpha1 = x[4*i]; 548 alpha2 = x[4*i+1]; 549 alpha3 = x[4*i+2]; 550 alpha4 = x[4*i+3]; 551 while (n-->0) { 552 y[4*(*idx)] += alpha1*(*v); 553 y[4*(*idx)+1] += alpha2*(*v); 554 y[4*(*idx)+2] += alpha3*(*v); 555 y[4*(*idx)+3] += alpha4*(*v); 556 idx++; v++; 557 } 558 } 559 PLogFlops(8*a->nz - 4*a->n); 560 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 561 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 562 PetscFunctionReturn(0); 563 564 } 565 566 /* ------------------------------------------------------------------------------*/ 567 #undef __FUNC__ 568 #define __FUNC__ /*<a name="MatMult_SeqMAIJ_5"></a>*/"MatMult_SeqMAIJ_5" 569 int MatMult_SeqMAIJ_5(Mat A,Vec xx,Vec yy) 570 { 571 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 572 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 573 Scalar *x,*y,*v,sum1, sum2, sum3, sum4, sum5; 574 int ierr,m = a->m,*idx,shift = a->indexshift,*ii; 575 int n,i,jrow,j; 576 577 PetscFunctionBegin; 578 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 579 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 580 x = x + shift; /* shift for Fortran start by 1 indexing */ 581 idx = a->j; 582 v = a->a; 583 ii = a->i; 584 585 v += shift; /* shift for Fortran start by 1 indexing */ 586 idx += shift; 587 for (i=0; i<m; i++) { 588 jrow = ii[i]; 589 n = ii[i+1] - jrow; 590 sum1 = 0.0; 591 sum2 = 0.0; 592 sum3 = 0.0; 593 sum4 = 0.0; 594 sum5 = 0.0; 595 for (j=0; j<n; j++) { 596 sum1 += v[jrow]*x[5*idx[jrow]]; 597 sum2 += v[jrow]*x[5*idx[jrow]+1]; 598 sum3 += v[jrow]*x[5*idx[jrow]+2]; 599 sum4 += v[jrow]*x[5*idx[jrow]+3]; 600 sum5 += v[jrow]*x[5*idx[jrow]+4]; 601 jrow++; 602 } 603 y[5*i] = sum1; 604 y[5*i+1] = sum2; 605 y[5*i+2] = sum3; 606 y[5*i+3] = sum4; 607 y[5*i+4] = sum5; 608 } 609 610 PLogFlops(10*a->nz - 5*m); 611 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 612 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 613 PetscFunctionReturn(0); 614 } 615 616 #undef __FUNC__ 617 #define __FUNC__ /*<a name="MatMultTranspose_SeqMAIJ_5"></a>*/"MatMultTranspose_SeqMAIJ_5" 618 int MatMultTranspose_SeqMAIJ_5(Mat A,Vec xx,Vec yy) 619 { 620 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 621 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 622 Scalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,zero = 0.0; 623 int ierr,m = a->m,n,i,*idx,shift = a->indexshift; 624 625 PetscFunctionBegin; 626 ierr = VecSet(&zero,yy);CHKERRQ(ierr); 627 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 628 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 629 y = y + shift; /* shift for Fortran start by 1 indexing */ 630 for (i=0; i<m; i++) { 631 idx = a->j + a->i[i] + shift; 632 v = a->a + a->i[i] + shift; 633 n = a->i[i+1] - a->i[i]; 634 alpha1 = x[5*i]; 635 alpha2 = x[5*i+1]; 636 alpha3 = x[5*i+2]; 637 alpha4 = x[5*i+3]; 638 alpha5 = x[5*i+4]; 639 while (n-->0) { 640 y[5*(*idx)] += alpha1*(*v); 641 y[5*(*idx)+1] += alpha2*(*v); 642 y[5*(*idx)+2] += alpha3*(*v); 643 y[5*(*idx)+3] += alpha4*(*v); 644 y[5*(*idx)+4] += alpha5*(*v); 645 idx++; v++; 646 } 647 } 648 PLogFlops(10*a->nz - 5*a->n); 649 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 650 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 651 PetscFunctionReturn(0); 652 } 653 654 #undef __FUNC__ 655 #define __FUNC__ /*<a name="MatMultAdd_SeqMAIJ_5"></a>*/"MatMultAdd_SeqMAIJ_5" 656 int MatMultAdd_SeqMAIJ_5(Mat A,Vec xx,Vec yy,Vec zz) 657 { 658 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 659 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 660 Scalar *x,*y,*v,sum1, sum2, sum3, sum4, sum5; 661 int ierr,m = a->m,*idx,shift = a->indexshift,*ii; 662 int n,i,jrow,j; 663 664 PetscFunctionBegin; 665 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 666 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 667 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 668 x = x + shift; /* shift for Fortran start by 1 indexing */ 669 idx = a->j; 670 v = a->a; 671 ii = a->i; 672 673 v += shift; /* shift for Fortran start by 1 indexing */ 674 idx += shift; 675 for (i=0; i<m; i++) { 676 jrow = ii[i]; 677 n = ii[i+1] - jrow; 678 sum1 = 0.0; 679 sum2 = 0.0; 680 sum3 = 0.0; 681 sum4 = 0.0; 682 sum5 = 0.0; 683 for (j=0; j<n; j++) { 684 sum1 += v[jrow]*x[5*idx[jrow]]; 685 sum2 += v[jrow]*x[5*idx[jrow]+1]; 686 sum3 += v[jrow]*x[5*idx[jrow]+2]; 687 sum4 += v[jrow]*x[5*idx[jrow]+3]; 688 sum5 += v[jrow]*x[5*idx[jrow]+4]; 689 jrow++; 690 } 691 y[5*i] += sum1; 692 y[5*i+1] += sum2; 693 y[5*i+2] += sum3; 694 y[5*i+3] += sum4; 695 y[5*i+4] += sum5; 696 } 697 698 PLogFlops(10*a->nz); 699 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 700 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 701 PetscFunctionReturn(0); 702 } 703 704 #undef __FUNC__ 705 #define __FUNC__ /*<a name="MatMultTransposeAdd_SeqMAIJ_5"></a>*/"MatMultTransposeAdd_SeqMAIJ_5" 706 int MatMultTransposeAdd_SeqMAIJ_5(Mat A,Vec xx,Vec yy,Vec zz) 707 { 708 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 709 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 710 Scalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5; 711 int ierr,m = a->m,n,i,*idx,shift = a->indexshift; 712 713 PetscFunctionBegin; 714 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 715 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 716 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 717 y = y + shift; /* shift for Fortran start by 1 indexing */ 718 for (i=0; i<m; i++) { 719 idx = a->j + a->i[i] + shift; 720 v = a->a + a->i[i] + shift; 721 n = a->i[i+1] - a->i[i]; 722 alpha1 = x[5*i]; 723 alpha2 = x[5*i+1]; 724 alpha3 = x[5*i+2]; 725 alpha4 = x[5*i+3]; 726 alpha5 = x[5*i+4]; 727 while (n-->0) { 728 y[5*(*idx)] += alpha1*(*v); 729 y[5*(*idx)+1] += alpha2*(*v); 730 y[5*(*idx)+2] += alpha3*(*v); 731 y[5*(*idx)+3] += alpha4*(*v); 732 y[5*(*idx)+4] += alpha5*(*v); 733 idx++; v++; 734 } 735 } 736 PLogFlops(10*a->nz); 737 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 738 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 739 PetscFunctionReturn(0); 740 } 741 742 /* ---------------------------------------------------------------------------------- */ 743 #undef __FUNC__ 744 #define __FUNC__ /*<a name="MatCreateMAIJ"></a>*/"MatCreateMAIJ" 745 int MatCreateMAIJ(Mat A,int dof,Mat *maij) 746 { 747 int ierr; 748 Mat_SeqMAIJ *b; 749 Mat B; 750 751 PetscFunctionBegin; 752 ierr = MATCreate(A->comm,dof*A->m,dof*A->n,dof*A->M,dof*A->N,&B);CHKERRQ(ierr); 753 ierr = MatSetType(B,MATSEQMAIJ);CHKERRQ(ierr); 754 755 B->assembled = PETSC_TRUE; 756 B->ops->destroy = MatDestroy_SeqMAIJ; 757 b = (Mat_SeqMAIJ*)B->data; 758 759 b->AIJ = A; 760 b->dof = dof; 761 ierr = PetscObjectReference((PetscObject)A);CHKERRQ(ierr); 762 if (dof == 1) { 763 B->ops->mult = MatMult_SeqMAIJ_1; 764 B->ops->multadd = MatMultAdd_SeqMAIJ_1; 765 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_1; 766 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_1; 767 } else if (dof == 2) { 768 B->ops->mult = MatMult_SeqMAIJ_2; 769 B->ops->multadd = MatMultAdd_SeqMAIJ_2; 770 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_2; 771 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_2; 772 } else if (dof == 3) { 773 B->ops->mult = MatMult_SeqMAIJ_3; 774 B->ops->multadd = MatMultAdd_SeqMAIJ_3; 775 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_3; 776 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_3; 777 } else if (dof == 4) { 778 B->ops->mult = MatMult_SeqMAIJ_4; 779 B->ops->multadd = MatMultAdd_SeqMAIJ_4; 780 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_4; 781 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_4; 782 } else if (dof == 5) { 783 B->ops->mult = MatMult_SeqMAIJ_5; 784 B->ops->multadd = MatMultAdd_SeqMAIJ_5; 785 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_5; 786 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_5; 787 } else { 788 SETERRQ1(1,1,"Cannot handle a dof of %d\n",dof); 789 } 790 *maij = B; 791 PetscFunctionReturn(0); 792 } 793 794 795 796 797 798 799 800 801 802 803 804 805