1 /* 2 Defines the basic matrix operations for the MAIJ matrix storage format. 3 This format is used for restriction and interpolation operations for 4 multicomponent problems. It interpolates each component the same way 5 independently. 6 7 We provide: 8 MatMult() 9 MatMultTranspose() 10 MatMultTransposeAdd() 11 MatMultAdd() 12 and 13 MatCreateMAIJ(Mat,dof,Mat*) 14 15 This single directory handles both the sequential and parallel codes 16 */ 17 18 #include "src/mat/impls/maij/maij.h" 19 #include "vecimpl.h" 20 21 #undef __FUNCT__ 22 #define __FUNCT__ "MatMAIJGetAIJ" 23 PetscErrorCode MatMAIJGetAIJ(Mat A,Mat *B) 24 { 25 PetscErrorCode ierr; 26 PetscTruth ismpimaij,isseqmaij; 27 28 PetscFunctionBegin; 29 ierr = PetscTypeCompare((PetscObject)A,MATMPIMAIJ,&ismpimaij);CHKERRQ(ierr); 30 ierr = PetscTypeCompare((PetscObject)A,MATSEQMAIJ,&isseqmaij);CHKERRQ(ierr); 31 if (ismpimaij) { 32 Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data; 33 34 *B = b->A; 35 } else if (isseqmaij) { 36 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 37 38 *B = b->AIJ; 39 } else { 40 *B = A; 41 } 42 PetscFunctionReturn(0); 43 } 44 45 #undef __FUNCT__ 46 #define __FUNCT__ "MatMAIJRedimension" 47 PetscErrorCode MatMAIJRedimension(Mat A,PetscInt dof,Mat *B) 48 { 49 PetscErrorCode ierr; 50 Mat Aij; 51 52 PetscFunctionBegin; 53 ierr = MatMAIJGetAIJ(A,&Aij);CHKERRQ(ierr); 54 ierr = MatCreateMAIJ(Aij,dof,B);CHKERRQ(ierr); 55 PetscFunctionReturn(0); 56 } 57 58 #undef __FUNCT__ 59 #define __FUNCT__ "MatDestroy_SeqMAIJ" 60 PetscErrorCode MatDestroy_SeqMAIJ(Mat A) 61 { 62 PetscErrorCode ierr; 63 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 64 65 PetscFunctionBegin; 66 if (b->AIJ) { 67 ierr = MatDestroy(b->AIJ);CHKERRQ(ierr); 68 } 69 ierr = PetscFree(b);CHKERRQ(ierr); 70 PetscFunctionReturn(0); 71 } 72 73 #undef __FUNCT__ 74 #define __FUNCT__ "MatView_SeqMAIJ" 75 PetscErrorCode MatView_SeqMAIJ(Mat A,PetscViewer viewer) 76 { 77 PetscErrorCode ierr; 78 Mat B; 79 80 PetscFunctionBegin; 81 ierr = MatConvert(A,MATSEQAIJ,&B); 82 ierr = MatView(B,viewer);CHKERRQ(ierr); 83 ierr = MatDestroy(B);CHKERRQ(ierr); 84 PetscFunctionReturn(0); 85 } 86 87 #undef __FUNCT__ 88 #define __FUNCT__ "MatView_MPIMAIJ" 89 PetscErrorCode MatView_MPIMAIJ(Mat A,PetscViewer viewer) 90 { 91 PetscErrorCode ierr; 92 Mat B; 93 94 PetscFunctionBegin; 95 ierr = MatConvert(A,MATMPIAIJ,&B); 96 ierr = MatView(B,viewer);CHKERRQ(ierr); 97 ierr = MatDestroy(B);CHKERRQ(ierr); 98 PetscFunctionReturn(0); 99 } 100 101 #undef __FUNCT__ 102 #define __FUNCT__ "MatDestroy_MPIMAIJ" 103 PetscErrorCode MatDestroy_MPIMAIJ(Mat A) 104 { 105 PetscErrorCode ierr; 106 Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data; 107 108 PetscFunctionBegin; 109 if (b->AIJ) { 110 ierr = MatDestroy(b->AIJ);CHKERRQ(ierr); 111 } 112 if (b->OAIJ) { 113 ierr = MatDestroy(b->OAIJ);CHKERRQ(ierr); 114 } 115 if (b->A) { 116 ierr = MatDestroy(b->A);CHKERRQ(ierr); 117 } 118 if (b->ctx) { 119 ierr = VecScatterDestroy(b->ctx);CHKERRQ(ierr); 120 } 121 if (b->w) { 122 ierr = VecDestroy(b->w);CHKERRQ(ierr); 123 } 124 ierr = PetscFree(b);CHKERRQ(ierr); 125 PetscFunctionReturn(0); 126 } 127 128 /*MC 129 MATMAIJ - MATMAIJ = "maij" - A matrix type to be used for restriction and interpolation operations for 130 multicomponent problems, interpolating or restricting each component the same way independently. 131 The matrix type is based on MATSEQAIJ for sequential matrices, and MATMPIAIJ for distributed matrices. 132 133 Operations provided: 134 . MatMult 135 . MatMultTranspose 136 . MatMultAdd 137 . MatMultTransposeAdd 138 139 Level: advanced 140 141 .seealso: MatCreateSeqDense 142 M*/ 143 144 EXTERN_C_BEGIN 145 #undef __FUNCT__ 146 #define __FUNCT__ "MatCreate_MAIJ" 147 PetscErrorCode MatCreate_MAIJ(Mat A) 148 { 149 PetscErrorCode ierr; 150 Mat_MPIMAIJ *b; 151 152 PetscFunctionBegin; 153 ierr = PetscNew(Mat_MPIMAIJ,&b);CHKERRQ(ierr); 154 A->data = (void*)b; 155 ierr = PetscMemzero(b,sizeof(Mat_MPIMAIJ));CHKERRQ(ierr); 156 ierr = PetscMemzero(A->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 157 A->factor = 0; 158 A->mapping = 0; 159 160 b->AIJ = 0; 161 b->dof = 0; 162 b->OAIJ = 0; 163 b->ctx = 0; 164 b->w = 0; 165 PetscFunctionReturn(0); 166 } 167 EXTERN_C_END 168 169 /* --------------------------------------------------------------------------------------*/ 170 #undef __FUNCT__ 171 #define __FUNCT__ "MatMult_SeqMAIJ_2" 172 PetscErrorCode MatMult_SeqMAIJ_2(Mat A,Vec xx,Vec yy) 173 { 174 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 175 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 176 PetscScalar *x,*y,*v,sum1, sum2; 177 PetscErrorCode ierr; 178 PetscInt m = b->AIJ->m,*idx,*ii; 179 PetscInt n,i,jrow,j; 180 181 PetscFunctionBegin; 182 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 183 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 184 idx = a->j; 185 v = a->a; 186 ii = a->i; 187 188 for (i=0; i<m; i++) { 189 jrow = ii[i]; 190 n = ii[i+1] - jrow; 191 sum1 = 0.0; 192 sum2 = 0.0; 193 for (j=0; j<n; j++) { 194 sum1 += v[jrow]*x[2*idx[jrow]]; 195 sum2 += v[jrow]*x[2*idx[jrow]+1]; 196 jrow++; 197 } 198 y[2*i] = sum1; 199 y[2*i+1] = sum2; 200 } 201 202 PetscLogFlops(4*a->nz - 2*m); 203 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 204 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 205 PetscFunctionReturn(0); 206 } 207 208 #undef __FUNCT__ 209 #define __FUNCT__ "MatMultTranspose_SeqMAIJ_2" 210 PetscErrorCode MatMultTranspose_SeqMAIJ_2(Mat A,Vec xx,Vec yy) 211 { 212 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 213 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 214 PetscScalar *x,*y,*v,alpha1,alpha2,zero = 0.0; 215 PetscErrorCode ierr; 216 PetscInt m = b->AIJ->m,n,i,*idx; 217 218 PetscFunctionBegin; 219 ierr = VecSet(&zero,yy);CHKERRQ(ierr); 220 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 221 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 222 223 for (i=0; i<m; i++) { 224 idx = a->j + a->i[i] ; 225 v = a->a + a->i[i] ; 226 n = a->i[i+1] - a->i[i]; 227 alpha1 = x[2*i]; 228 alpha2 = x[2*i+1]; 229 while (n-->0) {y[2*(*idx)] += alpha1*(*v); y[2*(*idx)+1] += alpha2*(*v); idx++; v++;} 230 } 231 PetscLogFlops(4*a->nz - 2*b->AIJ->n); 232 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 233 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 234 PetscFunctionReturn(0); 235 } 236 237 #undef __FUNCT__ 238 #define __FUNCT__ "MatMultAdd_SeqMAIJ_2" 239 PetscErrorCode MatMultAdd_SeqMAIJ_2(Mat A,Vec xx,Vec yy,Vec zz) 240 { 241 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 242 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 243 PetscScalar *x,*y,*v,sum1, sum2; 244 PetscErrorCode ierr; 245 PetscInt m = b->AIJ->m,*idx,*ii; 246 PetscInt n,i,jrow,j; 247 248 PetscFunctionBegin; 249 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 250 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 251 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 252 idx = a->j; 253 v = a->a; 254 ii = a->i; 255 256 for (i=0; i<m; i++) { 257 jrow = ii[i]; 258 n = ii[i+1] - jrow; 259 sum1 = 0.0; 260 sum2 = 0.0; 261 for (j=0; j<n; j++) { 262 sum1 += v[jrow]*x[2*idx[jrow]]; 263 sum2 += v[jrow]*x[2*idx[jrow]+1]; 264 jrow++; 265 } 266 y[2*i] += sum1; 267 y[2*i+1] += sum2; 268 } 269 270 PetscLogFlops(4*a->nz - 2*m); 271 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 272 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 273 PetscFunctionReturn(0); 274 } 275 #undef __FUNCT__ 276 #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_2" 277 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_2(Mat A,Vec xx,Vec yy,Vec zz) 278 { 279 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 280 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 281 PetscScalar *x,*y,*v,alpha1,alpha2; 282 PetscErrorCode ierr; 283 PetscInt m = b->AIJ->m,n,i,*idx; 284 285 PetscFunctionBegin; 286 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 287 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 288 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 289 290 for (i=0; i<m; i++) { 291 idx = a->j + a->i[i] ; 292 v = a->a + a->i[i] ; 293 n = a->i[i+1] - a->i[i]; 294 alpha1 = x[2*i]; 295 alpha2 = x[2*i+1]; 296 while (n-->0) {y[2*(*idx)] += alpha1*(*v); y[2*(*idx)+1] += alpha2*(*v); idx++; v++;} 297 } 298 PetscLogFlops(4*a->nz - 2*b->AIJ->n); 299 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 300 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 301 PetscFunctionReturn(0); 302 } 303 /* --------------------------------------------------------------------------------------*/ 304 #undef __FUNCT__ 305 #define __FUNCT__ "MatMult_SeqMAIJ_3" 306 PetscErrorCode MatMult_SeqMAIJ_3(Mat A,Vec xx,Vec yy) 307 { 308 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 309 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 310 PetscScalar *x,*y,*v,sum1, sum2, sum3; 311 PetscErrorCode ierr; 312 PetscInt m = b->AIJ->m,*idx,*ii; 313 PetscInt n,i,jrow,j; 314 315 PetscFunctionBegin; 316 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 317 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 318 idx = a->j; 319 v = a->a; 320 ii = a->i; 321 322 for (i=0; i<m; i++) { 323 jrow = ii[i]; 324 n = ii[i+1] - jrow; 325 sum1 = 0.0; 326 sum2 = 0.0; 327 sum3 = 0.0; 328 for (j=0; j<n; j++) { 329 sum1 += v[jrow]*x[3*idx[jrow]]; 330 sum2 += v[jrow]*x[3*idx[jrow]+1]; 331 sum3 += v[jrow]*x[3*idx[jrow]+2]; 332 jrow++; 333 } 334 y[3*i] = sum1; 335 y[3*i+1] = sum2; 336 y[3*i+2] = sum3; 337 } 338 339 PetscLogFlops(6*a->nz - 3*m); 340 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 341 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 342 PetscFunctionReturn(0); 343 } 344 345 #undef __FUNCT__ 346 #define __FUNCT__ "MatMultTranspose_SeqMAIJ_3" 347 PetscErrorCode MatMultTranspose_SeqMAIJ_3(Mat A,Vec xx,Vec yy) 348 { 349 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 350 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 351 PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,zero = 0.0; 352 PetscErrorCode ierr; 353 PetscInt m = b->AIJ->m,n,i,*idx; 354 355 PetscFunctionBegin; 356 ierr = VecSet(&zero,yy);CHKERRQ(ierr); 357 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 358 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 359 360 for (i=0; i<m; i++) { 361 idx = a->j + a->i[i]; 362 v = a->a + a->i[i]; 363 n = a->i[i+1] - a->i[i]; 364 alpha1 = x[3*i]; 365 alpha2 = x[3*i+1]; 366 alpha3 = x[3*i+2]; 367 while (n-->0) { 368 y[3*(*idx)] += alpha1*(*v); 369 y[3*(*idx)+1] += alpha2*(*v); 370 y[3*(*idx)+2] += alpha3*(*v); 371 idx++; v++; 372 } 373 } 374 PetscLogFlops(6*a->nz - 3*b->AIJ->n); 375 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 376 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 377 PetscFunctionReturn(0); 378 } 379 380 #undef __FUNCT__ 381 #define __FUNCT__ "MatMultAdd_SeqMAIJ_3" 382 PetscErrorCode MatMultAdd_SeqMAIJ_3(Mat A,Vec xx,Vec yy,Vec zz) 383 { 384 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 385 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 386 PetscScalar *x,*y,*v,sum1, sum2, sum3; 387 PetscErrorCode ierr; 388 PetscInt m = b->AIJ->m,*idx,*ii; 389 PetscInt n,i,jrow,j; 390 391 PetscFunctionBegin; 392 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 393 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 394 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 395 idx = a->j; 396 v = a->a; 397 ii = a->i; 398 399 for (i=0; i<m; i++) { 400 jrow = ii[i]; 401 n = ii[i+1] - jrow; 402 sum1 = 0.0; 403 sum2 = 0.0; 404 sum3 = 0.0; 405 for (j=0; j<n; j++) { 406 sum1 += v[jrow]*x[3*idx[jrow]]; 407 sum2 += v[jrow]*x[3*idx[jrow]+1]; 408 sum3 += v[jrow]*x[3*idx[jrow]+2]; 409 jrow++; 410 } 411 y[3*i] += sum1; 412 y[3*i+1] += sum2; 413 y[3*i+2] += sum3; 414 } 415 416 PetscLogFlops(6*a->nz); 417 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 418 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 419 PetscFunctionReturn(0); 420 } 421 #undef __FUNCT__ 422 #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_3" 423 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_3(Mat A,Vec xx,Vec yy,Vec zz) 424 { 425 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 426 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 427 PetscScalar *x,*y,*v,alpha1,alpha2,alpha3; 428 PetscErrorCode ierr; 429 PetscInt m = b->AIJ->m,n,i,*idx; 430 431 PetscFunctionBegin; 432 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 433 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 434 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 435 for (i=0; i<m; i++) { 436 idx = a->j + a->i[i] ; 437 v = a->a + a->i[i] ; 438 n = a->i[i+1] - a->i[i]; 439 alpha1 = x[3*i]; 440 alpha2 = x[3*i+1]; 441 alpha3 = x[3*i+2]; 442 while (n-->0) { 443 y[3*(*idx)] += alpha1*(*v); 444 y[3*(*idx)+1] += alpha2*(*v); 445 y[3*(*idx)+2] += alpha3*(*v); 446 idx++; v++; 447 } 448 } 449 PetscLogFlops(6*a->nz); 450 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 451 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 452 PetscFunctionReturn(0); 453 } 454 455 /* ------------------------------------------------------------------------------*/ 456 #undef __FUNCT__ 457 #define __FUNCT__ "MatMult_SeqMAIJ_4" 458 PetscErrorCode MatMult_SeqMAIJ_4(Mat A,Vec xx,Vec yy) 459 { 460 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 461 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 462 PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4; 463 PetscErrorCode ierr; 464 PetscInt m = b->AIJ->m,*idx,*ii; 465 PetscInt n,i,jrow,j; 466 467 PetscFunctionBegin; 468 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 469 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 470 idx = a->j; 471 v = a->a; 472 ii = a->i; 473 474 for (i=0; i<m; i++) { 475 jrow = ii[i]; 476 n = ii[i+1] - jrow; 477 sum1 = 0.0; 478 sum2 = 0.0; 479 sum3 = 0.0; 480 sum4 = 0.0; 481 for (j=0; j<n; j++) { 482 sum1 += v[jrow]*x[4*idx[jrow]]; 483 sum2 += v[jrow]*x[4*idx[jrow]+1]; 484 sum3 += v[jrow]*x[4*idx[jrow]+2]; 485 sum4 += v[jrow]*x[4*idx[jrow]+3]; 486 jrow++; 487 } 488 y[4*i] = sum1; 489 y[4*i+1] = sum2; 490 y[4*i+2] = sum3; 491 y[4*i+3] = sum4; 492 } 493 494 PetscLogFlops(8*a->nz - 4*m); 495 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 496 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 497 PetscFunctionReturn(0); 498 } 499 500 #undef __FUNCT__ 501 #define __FUNCT__ "MatMultTranspose_SeqMAIJ_4" 502 PetscErrorCode MatMultTranspose_SeqMAIJ_4(Mat A,Vec xx,Vec yy) 503 { 504 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 505 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 506 PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4,zero = 0.0; 507 PetscErrorCode ierr; 508 PetscInt m = b->AIJ->m,n,i,*idx; 509 510 PetscFunctionBegin; 511 ierr = VecSet(&zero,yy);CHKERRQ(ierr); 512 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 513 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 514 for (i=0; i<m; i++) { 515 idx = a->j + a->i[i] ; 516 v = a->a + a->i[i] ; 517 n = a->i[i+1] - a->i[i]; 518 alpha1 = x[4*i]; 519 alpha2 = x[4*i+1]; 520 alpha3 = x[4*i+2]; 521 alpha4 = x[4*i+3]; 522 while (n-->0) { 523 y[4*(*idx)] += alpha1*(*v); 524 y[4*(*idx)+1] += alpha2*(*v); 525 y[4*(*idx)+2] += alpha3*(*v); 526 y[4*(*idx)+3] += alpha4*(*v); 527 idx++; v++; 528 } 529 } 530 PetscLogFlops(8*a->nz - 4*b->AIJ->n); 531 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 532 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 533 PetscFunctionReturn(0); 534 } 535 536 #undef __FUNCT__ 537 #define __FUNCT__ "MatMultAdd_SeqMAIJ_4" 538 PetscErrorCode MatMultAdd_SeqMAIJ_4(Mat A,Vec xx,Vec yy,Vec zz) 539 { 540 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 541 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 542 PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4; 543 PetscErrorCode ierr; 544 PetscInt m = b->AIJ->m,*idx,*ii; 545 PetscInt n,i,jrow,j; 546 547 PetscFunctionBegin; 548 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 549 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 550 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 551 idx = a->j; 552 v = a->a; 553 ii = a->i; 554 555 for (i=0; i<m; i++) { 556 jrow = ii[i]; 557 n = ii[i+1] - jrow; 558 sum1 = 0.0; 559 sum2 = 0.0; 560 sum3 = 0.0; 561 sum4 = 0.0; 562 for (j=0; j<n; j++) { 563 sum1 += v[jrow]*x[4*idx[jrow]]; 564 sum2 += v[jrow]*x[4*idx[jrow]+1]; 565 sum3 += v[jrow]*x[4*idx[jrow]+2]; 566 sum4 += v[jrow]*x[4*idx[jrow]+3]; 567 jrow++; 568 } 569 y[4*i] += sum1; 570 y[4*i+1] += sum2; 571 y[4*i+2] += sum3; 572 y[4*i+3] += sum4; 573 } 574 575 PetscLogFlops(8*a->nz - 4*m); 576 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 577 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 578 PetscFunctionReturn(0); 579 } 580 #undef __FUNCT__ 581 #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_4" 582 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_4(Mat A,Vec xx,Vec yy,Vec zz) 583 { 584 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 585 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 586 PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4; 587 PetscErrorCode ierr; 588 PetscInt m = b->AIJ->m,n,i,*idx; 589 590 PetscFunctionBegin; 591 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 592 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 593 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 594 595 for (i=0; i<m; i++) { 596 idx = a->j + a->i[i] ; 597 v = a->a + a->i[i] ; 598 n = a->i[i+1] - a->i[i]; 599 alpha1 = x[4*i]; 600 alpha2 = x[4*i+1]; 601 alpha3 = x[4*i+2]; 602 alpha4 = x[4*i+3]; 603 while (n-->0) { 604 y[4*(*idx)] += alpha1*(*v); 605 y[4*(*idx)+1] += alpha2*(*v); 606 y[4*(*idx)+2] += alpha3*(*v); 607 y[4*(*idx)+3] += alpha4*(*v); 608 idx++; v++; 609 } 610 } 611 PetscLogFlops(8*a->nz - 4*b->AIJ->n); 612 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 613 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 614 PetscFunctionReturn(0); 615 } 616 /* ------------------------------------------------------------------------------*/ 617 618 #undef __FUNCT__ 619 #define __FUNCT__ "MatMult_SeqMAIJ_5" 620 PetscErrorCode MatMult_SeqMAIJ_5(Mat A,Vec xx,Vec yy) 621 { 622 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 623 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 624 PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4, sum5; 625 PetscErrorCode ierr; 626 PetscInt m = b->AIJ->m,*idx,*ii; 627 PetscInt n,i,jrow,j; 628 629 PetscFunctionBegin; 630 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 631 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 632 idx = a->j; 633 v = a->a; 634 ii = a->i; 635 636 for (i=0; i<m; i++) { 637 jrow = ii[i]; 638 n = ii[i+1] - jrow; 639 sum1 = 0.0; 640 sum2 = 0.0; 641 sum3 = 0.0; 642 sum4 = 0.0; 643 sum5 = 0.0; 644 for (j=0; j<n; j++) { 645 sum1 += v[jrow]*x[5*idx[jrow]]; 646 sum2 += v[jrow]*x[5*idx[jrow]+1]; 647 sum3 += v[jrow]*x[5*idx[jrow]+2]; 648 sum4 += v[jrow]*x[5*idx[jrow]+3]; 649 sum5 += v[jrow]*x[5*idx[jrow]+4]; 650 jrow++; 651 } 652 y[5*i] = sum1; 653 y[5*i+1] = sum2; 654 y[5*i+2] = sum3; 655 y[5*i+3] = sum4; 656 y[5*i+4] = sum5; 657 } 658 659 PetscLogFlops(10*a->nz - 5*m); 660 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 661 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 662 PetscFunctionReturn(0); 663 } 664 665 #undef __FUNCT__ 666 #define __FUNCT__ "MatMultTranspose_SeqMAIJ_5" 667 PetscErrorCode MatMultTranspose_SeqMAIJ_5(Mat A,Vec xx,Vec yy) 668 { 669 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 670 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 671 PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,zero = 0.0; 672 PetscErrorCode ierr; 673 PetscInt m = b->AIJ->m,n,i,*idx; 674 675 PetscFunctionBegin; 676 ierr = VecSet(&zero,yy);CHKERRQ(ierr); 677 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 678 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 679 680 for (i=0; i<m; i++) { 681 idx = a->j + a->i[i] ; 682 v = a->a + a->i[i] ; 683 n = a->i[i+1] - a->i[i]; 684 alpha1 = x[5*i]; 685 alpha2 = x[5*i+1]; 686 alpha3 = x[5*i+2]; 687 alpha4 = x[5*i+3]; 688 alpha5 = x[5*i+4]; 689 while (n-->0) { 690 y[5*(*idx)] += alpha1*(*v); 691 y[5*(*idx)+1] += alpha2*(*v); 692 y[5*(*idx)+2] += alpha3*(*v); 693 y[5*(*idx)+3] += alpha4*(*v); 694 y[5*(*idx)+4] += alpha5*(*v); 695 idx++; v++; 696 } 697 } 698 PetscLogFlops(10*a->nz - 5*b->AIJ->n); 699 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 700 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 701 PetscFunctionReturn(0); 702 } 703 704 #undef __FUNCT__ 705 #define __FUNCT__ "MatMultAdd_SeqMAIJ_5" 706 PetscErrorCode MatMultAdd_SeqMAIJ_5(Mat A,Vec xx,Vec yy,Vec zz) 707 { 708 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 709 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 710 PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4, sum5; 711 PetscErrorCode ierr; 712 PetscInt m = b->AIJ->m,*idx,*ii; 713 PetscInt n,i,jrow,j; 714 715 PetscFunctionBegin; 716 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 717 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 718 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 719 idx = a->j; 720 v = a->a; 721 ii = a->i; 722 723 for (i=0; i<m; i++) { 724 jrow = ii[i]; 725 n = ii[i+1] - jrow; 726 sum1 = 0.0; 727 sum2 = 0.0; 728 sum3 = 0.0; 729 sum4 = 0.0; 730 sum5 = 0.0; 731 for (j=0; j<n; j++) { 732 sum1 += v[jrow]*x[5*idx[jrow]]; 733 sum2 += v[jrow]*x[5*idx[jrow]+1]; 734 sum3 += v[jrow]*x[5*idx[jrow]+2]; 735 sum4 += v[jrow]*x[5*idx[jrow]+3]; 736 sum5 += v[jrow]*x[5*idx[jrow]+4]; 737 jrow++; 738 } 739 y[5*i] += sum1; 740 y[5*i+1] += sum2; 741 y[5*i+2] += sum3; 742 y[5*i+3] += sum4; 743 y[5*i+4] += sum5; 744 } 745 746 PetscLogFlops(10*a->nz); 747 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 748 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 749 PetscFunctionReturn(0); 750 } 751 752 #undef __FUNCT__ 753 #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_5" 754 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_5(Mat A,Vec xx,Vec yy,Vec zz) 755 { 756 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 757 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 758 PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5; 759 PetscErrorCode ierr; 760 PetscInt m = b->AIJ->m,n,i,*idx; 761 762 PetscFunctionBegin; 763 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 764 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 765 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 766 767 for (i=0; i<m; i++) { 768 idx = a->j + a->i[i] ; 769 v = a->a + a->i[i] ; 770 n = a->i[i+1] - a->i[i]; 771 alpha1 = x[5*i]; 772 alpha2 = x[5*i+1]; 773 alpha3 = x[5*i+2]; 774 alpha4 = x[5*i+3]; 775 alpha5 = x[5*i+4]; 776 while (n-->0) { 777 y[5*(*idx)] += alpha1*(*v); 778 y[5*(*idx)+1] += alpha2*(*v); 779 y[5*(*idx)+2] += alpha3*(*v); 780 y[5*(*idx)+3] += alpha4*(*v); 781 y[5*(*idx)+4] += alpha5*(*v); 782 idx++; v++; 783 } 784 } 785 PetscLogFlops(10*a->nz); 786 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 787 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 788 PetscFunctionReturn(0); 789 } 790 791 /* ------------------------------------------------------------------------------*/ 792 #undef __FUNCT__ 793 #define __FUNCT__ "MatMult_SeqMAIJ_6" 794 PetscErrorCode MatMult_SeqMAIJ_6(Mat A,Vec xx,Vec yy) 795 { 796 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 797 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 798 PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6; 799 PetscErrorCode ierr; 800 PetscInt m = b->AIJ->m,*idx,*ii; 801 PetscInt n,i,jrow,j; 802 803 PetscFunctionBegin; 804 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 805 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 806 idx = a->j; 807 v = a->a; 808 ii = a->i; 809 810 for (i=0; i<m; i++) { 811 jrow = ii[i]; 812 n = ii[i+1] - jrow; 813 sum1 = 0.0; 814 sum2 = 0.0; 815 sum3 = 0.0; 816 sum4 = 0.0; 817 sum5 = 0.0; 818 sum6 = 0.0; 819 for (j=0; j<n; j++) { 820 sum1 += v[jrow]*x[6*idx[jrow]]; 821 sum2 += v[jrow]*x[6*idx[jrow]+1]; 822 sum3 += v[jrow]*x[6*idx[jrow]+2]; 823 sum4 += v[jrow]*x[6*idx[jrow]+3]; 824 sum5 += v[jrow]*x[6*idx[jrow]+4]; 825 sum6 += v[jrow]*x[6*idx[jrow]+5]; 826 jrow++; 827 } 828 y[6*i] = sum1; 829 y[6*i+1] = sum2; 830 y[6*i+2] = sum3; 831 y[6*i+3] = sum4; 832 y[6*i+4] = sum5; 833 y[6*i+5] = sum6; 834 } 835 836 PetscLogFlops(12*a->nz - 6*m); 837 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 838 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 839 PetscFunctionReturn(0); 840 } 841 842 #undef __FUNCT__ 843 #define __FUNCT__ "MatMultTranspose_SeqMAIJ_6" 844 PetscErrorCode MatMultTranspose_SeqMAIJ_6(Mat A,Vec xx,Vec yy) 845 { 846 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 847 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 848 PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,zero = 0.0; 849 PetscErrorCode ierr; 850 PetscInt m = b->AIJ->m,n,i,*idx; 851 852 PetscFunctionBegin; 853 ierr = VecSet(&zero,yy);CHKERRQ(ierr); 854 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 855 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 856 857 for (i=0; i<m; i++) { 858 idx = a->j + a->i[i] ; 859 v = a->a + a->i[i] ; 860 n = a->i[i+1] - a->i[i]; 861 alpha1 = x[6*i]; 862 alpha2 = x[6*i+1]; 863 alpha3 = x[6*i+2]; 864 alpha4 = x[6*i+3]; 865 alpha5 = x[6*i+4]; 866 alpha6 = x[6*i+5]; 867 while (n-->0) { 868 y[6*(*idx)] += alpha1*(*v); 869 y[6*(*idx)+1] += alpha2*(*v); 870 y[6*(*idx)+2] += alpha3*(*v); 871 y[6*(*idx)+3] += alpha4*(*v); 872 y[6*(*idx)+4] += alpha5*(*v); 873 y[6*(*idx)+5] += alpha6*(*v); 874 idx++; v++; 875 } 876 } 877 PetscLogFlops(12*a->nz - 6*b->AIJ->n); 878 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 879 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 880 PetscFunctionReturn(0); 881 } 882 883 #undef __FUNCT__ 884 #define __FUNCT__ "MatMultAdd_SeqMAIJ_6" 885 PetscErrorCode MatMultAdd_SeqMAIJ_6(Mat A,Vec xx,Vec yy,Vec zz) 886 { 887 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 888 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 889 PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6; 890 PetscErrorCode ierr; 891 PetscInt m = b->AIJ->m,*idx,*ii; 892 PetscInt n,i,jrow,j; 893 894 PetscFunctionBegin; 895 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 896 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 897 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 898 idx = a->j; 899 v = a->a; 900 ii = a->i; 901 902 for (i=0; i<m; i++) { 903 jrow = ii[i]; 904 n = ii[i+1] - jrow; 905 sum1 = 0.0; 906 sum2 = 0.0; 907 sum3 = 0.0; 908 sum4 = 0.0; 909 sum5 = 0.0; 910 sum6 = 0.0; 911 for (j=0; j<n; j++) { 912 sum1 += v[jrow]*x[6*idx[jrow]]; 913 sum2 += v[jrow]*x[6*idx[jrow]+1]; 914 sum3 += v[jrow]*x[6*idx[jrow]+2]; 915 sum4 += v[jrow]*x[6*idx[jrow]+3]; 916 sum5 += v[jrow]*x[6*idx[jrow]+4]; 917 sum6 += v[jrow]*x[6*idx[jrow]+5]; 918 jrow++; 919 } 920 y[6*i] += sum1; 921 y[6*i+1] += sum2; 922 y[6*i+2] += sum3; 923 y[6*i+3] += sum4; 924 y[6*i+4] += sum5; 925 y[6*i+5] += sum6; 926 } 927 928 PetscLogFlops(12*a->nz); 929 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 930 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 931 PetscFunctionReturn(0); 932 } 933 934 #undef __FUNCT__ 935 #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_6" 936 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_6(Mat A,Vec xx,Vec yy,Vec zz) 937 { 938 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 939 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 940 PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6; 941 PetscErrorCode ierr; 942 PetscInt m = b->AIJ->m,n,i,*idx; 943 944 PetscFunctionBegin; 945 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 946 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 947 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 948 949 for (i=0; i<m; i++) { 950 idx = a->j + a->i[i] ; 951 v = a->a + a->i[i] ; 952 n = a->i[i+1] - a->i[i]; 953 alpha1 = x[6*i]; 954 alpha2 = x[6*i+1]; 955 alpha3 = x[6*i+2]; 956 alpha4 = x[6*i+3]; 957 alpha5 = x[6*i+4]; 958 alpha6 = x[6*i+5]; 959 while (n-->0) { 960 y[6*(*idx)] += alpha1*(*v); 961 y[6*(*idx)+1] += alpha2*(*v); 962 y[6*(*idx)+2] += alpha3*(*v); 963 y[6*(*idx)+3] += alpha4*(*v); 964 y[6*(*idx)+4] += alpha5*(*v); 965 y[6*(*idx)+5] += alpha6*(*v); 966 idx++; v++; 967 } 968 } 969 PetscLogFlops(12*a->nz); 970 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 971 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 972 PetscFunctionReturn(0); 973 } 974 975 /* ------------------------------------------------------------------------------*/ 976 #undef __FUNCT__ 977 #define __FUNCT__ "MatMult_SeqMAIJ_8" 978 PetscErrorCode MatMult_SeqMAIJ_8(Mat A,Vec xx,Vec yy) 979 { 980 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 981 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 982 PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8; 983 PetscErrorCode ierr; 984 PetscInt m = b->AIJ->m,*idx,*ii; 985 PetscInt n,i,jrow,j; 986 987 PetscFunctionBegin; 988 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 989 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 990 idx = a->j; 991 v = a->a; 992 ii = a->i; 993 994 for (i=0; i<m; i++) { 995 jrow = ii[i]; 996 n = ii[i+1] - jrow; 997 sum1 = 0.0; 998 sum2 = 0.0; 999 sum3 = 0.0; 1000 sum4 = 0.0; 1001 sum5 = 0.0; 1002 sum6 = 0.0; 1003 sum7 = 0.0; 1004 sum8 = 0.0; 1005 for (j=0; j<n; j++) { 1006 sum1 += v[jrow]*x[8*idx[jrow]]; 1007 sum2 += v[jrow]*x[8*idx[jrow]+1]; 1008 sum3 += v[jrow]*x[8*idx[jrow]+2]; 1009 sum4 += v[jrow]*x[8*idx[jrow]+3]; 1010 sum5 += v[jrow]*x[8*idx[jrow]+4]; 1011 sum6 += v[jrow]*x[8*idx[jrow]+5]; 1012 sum7 += v[jrow]*x[8*idx[jrow]+6]; 1013 sum8 += v[jrow]*x[8*idx[jrow]+7]; 1014 jrow++; 1015 } 1016 y[8*i] = sum1; 1017 y[8*i+1] = sum2; 1018 y[8*i+2] = sum3; 1019 y[8*i+3] = sum4; 1020 y[8*i+4] = sum5; 1021 y[8*i+5] = sum6; 1022 y[8*i+6] = sum7; 1023 y[8*i+7] = sum8; 1024 } 1025 1026 PetscLogFlops(16*a->nz - 8*m); 1027 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1028 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 1029 PetscFunctionReturn(0); 1030 } 1031 1032 #undef __FUNCT__ 1033 #define __FUNCT__ "MatMultTranspose_SeqMAIJ_8" 1034 PetscErrorCode MatMultTranspose_SeqMAIJ_8(Mat A,Vec xx,Vec yy) 1035 { 1036 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1037 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1038 PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,zero = 0.0; 1039 PetscErrorCode ierr; 1040 PetscInt m = b->AIJ->m,n,i,*idx; 1041 1042 PetscFunctionBegin; 1043 ierr = VecSet(&zero,yy);CHKERRQ(ierr); 1044 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1045 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 1046 1047 for (i=0; i<m; i++) { 1048 idx = a->j + a->i[i] ; 1049 v = a->a + a->i[i] ; 1050 n = a->i[i+1] - a->i[i]; 1051 alpha1 = x[8*i]; 1052 alpha2 = x[8*i+1]; 1053 alpha3 = x[8*i+2]; 1054 alpha4 = x[8*i+3]; 1055 alpha5 = x[8*i+4]; 1056 alpha6 = x[8*i+5]; 1057 alpha7 = x[8*i+6]; 1058 alpha8 = x[8*i+7]; 1059 while (n-->0) { 1060 y[8*(*idx)] += alpha1*(*v); 1061 y[8*(*idx)+1] += alpha2*(*v); 1062 y[8*(*idx)+2] += alpha3*(*v); 1063 y[8*(*idx)+3] += alpha4*(*v); 1064 y[8*(*idx)+4] += alpha5*(*v); 1065 y[8*(*idx)+5] += alpha6*(*v); 1066 y[8*(*idx)+6] += alpha7*(*v); 1067 y[8*(*idx)+7] += alpha8*(*v); 1068 idx++; v++; 1069 } 1070 } 1071 PetscLogFlops(16*a->nz - 8*b->AIJ->n); 1072 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1073 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 1074 PetscFunctionReturn(0); 1075 } 1076 1077 #undef __FUNCT__ 1078 #define __FUNCT__ "MatMultAdd_SeqMAIJ_8" 1079 PetscErrorCode MatMultAdd_SeqMAIJ_8(Mat A,Vec xx,Vec yy,Vec zz) 1080 { 1081 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1082 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1083 PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8; 1084 PetscErrorCode ierr; 1085 PetscInt m = b->AIJ->m,*idx,*ii; 1086 PetscInt n,i,jrow,j; 1087 1088 PetscFunctionBegin; 1089 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 1090 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1091 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 1092 idx = a->j; 1093 v = a->a; 1094 ii = a->i; 1095 1096 for (i=0; i<m; i++) { 1097 jrow = ii[i]; 1098 n = ii[i+1] - jrow; 1099 sum1 = 0.0; 1100 sum2 = 0.0; 1101 sum3 = 0.0; 1102 sum4 = 0.0; 1103 sum5 = 0.0; 1104 sum6 = 0.0; 1105 sum7 = 0.0; 1106 sum8 = 0.0; 1107 for (j=0; j<n; j++) { 1108 sum1 += v[jrow]*x[8*idx[jrow]]; 1109 sum2 += v[jrow]*x[8*idx[jrow]+1]; 1110 sum3 += v[jrow]*x[8*idx[jrow]+2]; 1111 sum4 += v[jrow]*x[8*idx[jrow]+3]; 1112 sum5 += v[jrow]*x[8*idx[jrow]+4]; 1113 sum6 += v[jrow]*x[8*idx[jrow]+5]; 1114 sum7 += v[jrow]*x[8*idx[jrow]+6]; 1115 sum8 += v[jrow]*x[8*idx[jrow]+7]; 1116 jrow++; 1117 } 1118 y[8*i] += sum1; 1119 y[8*i+1] += sum2; 1120 y[8*i+2] += sum3; 1121 y[8*i+3] += sum4; 1122 y[8*i+4] += sum5; 1123 y[8*i+5] += sum6; 1124 y[8*i+6] += sum7; 1125 y[8*i+7] += sum8; 1126 } 1127 1128 PetscLogFlops(16*a->nz); 1129 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1130 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 1131 PetscFunctionReturn(0); 1132 } 1133 1134 #undef __FUNCT__ 1135 #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_8" 1136 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_8(Mat A,Vec xx,Vec yy,Vec zz) 1137 { 1138 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1139 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1140 PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8; 1141 PetscErrorCode ierr; 1142 PetscInt m = b->AIJ->m,n,i,*idx; 1143 1144 PetscFunctionBegin; 1145 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 1146 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1147 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 1148 for (i=0; i<m; i++) { 1149 idx = a->j + a->i[i] ; 1150 v = a->a + a->i[i] ; 1151 n = a->i[i+1] - a->i[i]; 1152 alpha1 = x[8*i]; 1153 alpha2 = x[8*i+1]; 1154 alpha3 = x[8*i+2]; 1155 alpha4 = x[8*i+3]; 1156 alpha5 = x[8*i+4]; 1157 alpha6 = x[8*i+5]; 1158 alpha7 = x[8*i+6]; 1159 alpha8 = x[8*i+7]; 1160 while (n-->0) { 1161 y[8*(*idx)] += alpha1*(*v); 1162 y[8*(*idx)+1] += alpha2*(*v); 1163 y[8*(*idx)+2] += alpha3*(*v); 1164 y[8*(*idx)+3] += alpha4*(*v); 1165 y[8*(*idx)+4] += alpha5*(*v); 1166 y[8*(*idx)+5] += alpha6*(*v); 1167 y[8*(*idx)+6] += alpha7*(*v); 1168 y[8*(*idx)+7] += alpha8*(*v); 1169 idx++; v++; 1170 } 1171 } 1172 PetscLogFlops(16*a->nz); 1173 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1174 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 1175 PetscFunctionReturn(0); 1176 } 1177 1178 /* ------------------------------------------------------------------------------*/ 1179 #undef __FUNCT__ 1180 #define __FUNCT__ "MatMult_SeqMAIJ_9" 1181 PetscErrorCode MatMult_SeqMAIJ_9(Mat A,Vec xx,Vec yy) 1182 { 1183 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1184 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1185 PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9; 1186 PetscErrorCode ierr; 1187 PetscInt m = b->AIJ->m,*idx,*ii; 1188 PetscInt n,i,jrow,j; 1189 1190 PetscFunctionBegin; 1191 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1192 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 1193 idx = a->j; 1194 v = a->a; 1195 ii = a->i; 1196 1197 for (i=0; i<m; i++) { 1198 jrow = ii[i]; 1199 n = ii[i+1] - jrow; 1200 sum1 = 0.0; 1201 sum2 = 0.0; 1202 sum3 = 0.0; 1203 sum4 = 0.0; 1204 sum5 = 0.0; 1205 sum6 = 0.0; 1206 sum7 = 0.0; 1207 sum8 = 0.0; 1208 sum9 = 0.0; 1209 for (j=0; j<n; j++) { 1210 sum1 += v[jrow]*x[9*idx[jrow]]; 1211 sum2 += v[jrow]*x[9*idx[jrow]+1]; 1212 sum3 += v[jrow]*x[9*idx[jrow]+2]; 1213 sum4 += v[jrow]*x[9*idx[jrow]+3]; 1214 sum5 += v[jrow]*x[9*idx[jrow]+4]; 1215 sum6 += v[jrow]*x[9*idx[jrow]+5]; 1216 sum7 += v[jrow]*x[9*idx[jrow]+6]; 1217 sum8 += v[jrow]*x[9*idx[jrow]+7]; 1218 sum9 += v[jrow]*x[9*idx[jrow]+8]; 1219 jrow++; 1220 } 1221 y[9*i] = sum1; 1222 y[9*i+1] = sum2; 1223 y[9*i+2] = sum3; 1224 y[9*i+3] = sum4; 1225 y[9*i+4] = sum5; 1226 y[9*i+5] = sum6; 1227 y[9*i+6] = sum7; 1228 y[9*i+7] = sum8; 1229 y[9*i+8] = sum9; 1230 } 1231 1232 PetscLogFlops(18*a->nz - 9*m); 1233 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1234 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 1235 PetscFunctionReturn(0); 1236 } 1237 1238 #undef __FUNCT__ 1239 #define __FUNCT__ "MatMultTranspose_SeqMAIJ_9" 1240 PetscErrorCode MatMultTranspose_SeqMAIJ_9(Mat A,Vec xx,Vec yy) 1241 { 1242 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1243 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1244 PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9,zero = 0.0; 1245 PetscErrorCode ierr; 1246 PetscInt m = b->AIJ->m,n,i,*idx; 1247 1248 PetscFunctionBegin; 1249 ierr = VecSet(&zero,yy);CHKERRQ(ierr); 1250 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1251 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 1252 1253 for (i=0; i<m; i++) { 1254 idx = a->j + a->i[i] ; 1255 v = a->a + a->i[i] ; 1256 n = a->i[i+1] - a->i[i]; 1257 alpha1 = x[9*i]; 1258 alpha2 = x[9*i+1]; 1259 alpha3 = x[9*i+2]; 1260 alpha4 = x[9*i+3]; 1261 alpha5 = x[9*i+4]; 1262 alpha6 = x[9*i+5]; 1263 alpha7 = x[9*i+6]; 1264 alpha8 = x[9*i+7]; 1265 alpha9 = x[9*i+8]; 1266 while (n-->0) { 1267 y[9*(*idx)] += alpha1*(*v); 1268 y[9*(*idx)+1] += alpha2*(*v); 1269 y[9*(*idx)+2] += alpha3*(*v); 1270 y[9*(*idx)+3] += alpha4*(*v); 1271 y[9*(*idx)+4] += alpha5*(*v); 1272 y[9*(*idx)+5] += alpha6*(*v); 1273 y[9*(*idx)+6] += alpha7*(*v); 1274 y[9*(*idx)+7] += alpha8*(*v); 1275 y[9*(*idx)+8] += alpha9*(*v); 1276 idx++; v++; 1277 } 1278 } 1279 PetscLogFlops(18*a->nz - 9*b->AIJ->n); 1280 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1281 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 1282 PetscFunctionReturn(0); 1283 } 1284 1285 #undef __FUNCT__ 1286 #define __FUNCT__ "MatMultAdd_SeqMAIJ_9" 1287 PetscErrorCode MatMultAdd_SeqMAIJ_9(Mat A,Vec xx,Vec yy,Vec zz) 1288 { 1289 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1290 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1291 PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9; 1292 PetscErrorCode ierr; 1293 PetscInt m = b->AIJ->m,*idx,*ii; 1294 PetscInt n,i,jrow,j; 1295 1296 PetscFunctionBegin; 1297 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 1298 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1299 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 1300 idx = a->j; 1301 v = a->a; 1302 ii = a->i; 1303 1304 for (i=0; i<m; i++) { 1305 jrow = ii[i]; 1306 n = ii[i+1] - jrow; 1307 sum1 = 0.0; 1308 sum2 = 0.0; 1309 sum3 = 0.0; 1310 sum4 = 0.0; 1311 sum5 = 0.0; 1312 sum6 = 0.0; 1313 sum7 = 0.0; 1314 sum8 = 0.0; 1315 sum9 = 0.0; 1316 for (j=0; j<n; j++) { 1317 sum1 += v[jrow]*x[9*idx[jrow]]; 1318 sum2 += v[jrow]*x[9*idx[jrow]+1]; 1319 sum3 += v[jrow]*x[9*idx[jrow]+2]; 1320 sum4 += v[jrow]*x[9*idx[jrow]+3]; 1321 sum5 += v[jrow]*x[9*idx[jrow]+4]; 1322 sum6 += v[jrow]*x[9*idx[jrow]+5]; 1323 sum7 += v[jrow]*x[9*idx[jrow]+6]; 1324 sum8 += v[jrow]*x[9*idx[jrow]+7]; 1325 sum9 += v[jrow]*x[9*idx[jrow]+8]; 1326 jrow++; 1327 } 1328 y[9*i] += sum1; 1329 y[9*i+1] += sum2; 1330 y[9*i+2] += sum3; 1331 y[9*i+3] += sum4; 1332 y[9*i+4] += sum5; 1333 y[9*i+5] += sum6; 1334 y[9*i+6] += sum7; 1335 y[9*i+7] += sum8; 1336 y[9*i+8] += sum9; 1337 } 1338 1339 PetscLogFlops(18*a->nz); 1340 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1341 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 1342 PetscFunctionReturn(0); 1343 } 1344 1345 #undef __FUNCT__ 1346 #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_9" 1347 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_9(Mat A,Vec xx,Vec yy,Vec zz) 1348 { 1349 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1350 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1351 PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9; 1352 PetscErrorCode ierr; 1353 PetscInt m = b->AIJ->m,n,i,*idx; 1354 1355 PetscFunctionBegin; 1356 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 1357 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1358 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 1359 for (i=0; i<m; i++) { 1360 idx = a->j + a->i[i] ; 1361 v = a->a + a->i[i] ; 1362 n = a->i[i+1] - a->i[i]; 1363 alpha1 = x[9*i]; 1364 alpha2 = x[9*i+1]; 1365 alpha3 = x[9*i+2]; 1366 alpha4 = x[9*i+3]; 1367 alpha5 = x[9*i+4]; 1368 alpha6 = x[9*i+5]; 1369 alpha7 = x[9*i+6]; 1370 alpha8 = x[9*i+7]; 1371 alpha9 = x[9*i+8]; 1372 while (n-->0) { 1373 y[9*(*idx)] += alpha1*(*v); 1374 y[9*(*idx)+1] += alpha2*(*v); 1375 y[9*(*idx)+2] += alpha3*(*v); 1376 y[9*(*idx)+3] += alpha4*(*v); 1377 y[9*(*idx)+4] += alpha5*(*v); 1378 y[9*(*idx)+5] += alpha6*(*v); 1379 y[9*(*idx)+6] += alpha7*(*v); 1380 y[9*(*idx)+7] += alpha8*(*v); 1381 y[9*(*idx)+8] += alpha9*(*v); 1382 idx++; v++; 1383 } 1384 } 1385 PetscLogFlops(18*a->nz); 1386 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1387 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 1388 PetscFunctionReturn(0); 1389 } 1390 1391 /*--------------------------------------------------------------------------------------------*/ 1392 #undef __FUNCT__ 1393 #define __FUNCT__ "MatMult_SeqMAIJ_16" 1394 PetscErrorCode MatMult_SeqMAIJ_16(Mat A,Vec xx,Vec yy) 1395 { 1396 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1397 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1398 PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8; 1399 PetscScalar sum9, sum10, sum11, sum12, sum13, sum14, sum15, sum16; 1400 PetscErrorCode ierr; 1401 PetscInt m = b->AIJ->m,*idx,*ii; 1402 PetscInt n,i,jrow,j; 1403 1404 PetscFunctionBegin; 1405 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1406 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 1407 idx = a->j; 1408 v = a->a; 1409 ii = a->i; 1410 1411 for (i=0; i<m; i++) { 1412 jrow = ii[i]; 1413 n = ii[i+1] - jrow; 1414 sum1 = 0.0; 1415 sum2 = 0.0; 1416 sum3 = 0.0; 1417 sum4 = 0.0; 1418 sum5 = 0.0; 1419 sum6 = 0.0; 1420 sum7 = 0.0; 1421 sum8 = 0.0; 1422 sum9 = 0.0; 1423 sum10 = 0.0; 1424 sum11 = 0.0; 1425 sum12 = 0.0; 1426 sum13 = 0.0; 1427 sum14 = 0.0; 1428 sum15 = 0.0; 1429 sum16 = 0.0; 1430 for (j=0; j<n; j++) { 1431 sum1 += v[jrow]*x[16*idx[jrow]]; 1432 sum2 += v[jrow]*x[16*idx[jrow]+1]; 1433 sum3 += v[jrow]*x[16*idx[jrow]+2]; 1434 sum4 += v[jrow]*x[16*idx[jrow]+3]; 1435 sum5 += v[jrow]*x[16*idx[jrow]+4]; 1436 sum6 += v[jrow]*x[16*idx[jrow]+5]; 1437 sum7 += v[jrow]*x[16*idx[jrow]+6]; 1438 sum8 += v[jrow]*x[16*idx[jrow]+7]; 1439 sum9 += v[jrow]*x[16*idx[jrow]+8]; 1440 sum10 += v[jrow]*x[16*idx[jrow]+9]; 1441 sum11 += v[jrow]*x[16*idx[jrow]+10]; 1442 sum12 += v[jrow]*x[16*idx[jrow]+11]; 1443 sum13 += v[jrow]*x[16*idx[jrow]+12]; 1444 sum14 += v[jrow]*x[16*idx[jrow]+13]; 1445 sum15 += v[jrow]*x[16*idx[jrow]+14]; 1446 sum16 += v[jrow]*x[16*idx[jrow]+15]; 1447 jrow++; 1448 } 1449 y[16*i] = sum1; 1450 y[16*i+1] = sum2; 1451 y[16*i+2] = sum3; 1452 y[16*i+3] = sum4; 1453 y[16*i+4] = sum5; 1454 y[16*i+5] = sum6; 1455 y[16*i+6] = sum7; 1456 y[16*i+7] = sum8; 1457 y[16*i+8] = sum9; 1458 y[16*i+9] = sum10; 1459 y[16*i+10] = sum11; 1460 y[16*i+11] = sum12; 1461 y[16*i+12] = sum13; 1462 y[16*i+13] = sum14; 1463 y[16*i+14] = sum15; 1464 y[16*i+15] = sum16; 1465 } 1466 1467 PetscLogFlops(32*a->nz - 16*m); 1468 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1469 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 1470 PetscFunctionReturn(0); 1471 } 1472 1473 #undef __FUNCT__ 1474 #define __FUNCT__ "MatMultTranspose_SeqMAIJ_16" 1475 PetscErrorCode MatMultTranspose_SeqMAIJ_16(Mat A,Vec xx,Vec yy) 1476 { 1477 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1478 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1479 PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,zero = 0.0; 1480 PetscScalar alpha9,alpha10,alpha11,alpha12,alpha13,alpha14,alpha15,alpha16; 1481 PetscErrorCode ierr; 1482 PetscInt m = b->AIJ->m,n,i,*idx; 1483 1484 PetscFunctionBegin; 1485 ierr = VecSet(&zero,yy);CHKERRQ(ierr); 1486 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1487 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 1488 1489 for (i=0; i<m; i++) { 1490 idx = a->j + a->i[i] ; 1491 v = a->a + a->i[i] ; 1492 n = a->i[i+1] - a->i[i]; 1493 alpha1 = x[16*i]; 1494 alpha2 = x[16*i+1]; 1495 alpha3 = x[16*i+2]; 1496 alpha4 = x[16*i+3]; 1497 alpha5 = x[16*i+4]; 1498 alpha6 = x[16*i+5]; 1499 alpha7 = x[16*i+6]; 1500 alpha8 = x[16*i+7]; 1501 alpha9 = x[16*i+8]; 1502 alpha10 = x[16*i+9]; 1503 alpha11 = x[16*i+10]; 1504 alpha12 = x[16*i+11]; 1505 alpha13 = x[16*i+12]; 1506 alpha14 = x[16*i+13]; 1507 alpha15 = x[16*i+14]; 1508 alpha16 = x[16*i+15]; 1509 while (n-->0) { 1510 y[16*(*idx)] += alpha1*(*v); 1511 y[16*(*idx)+1] += alpha2*(*v); 1512 y[16*(*idx)+2] += alpha3*(*v); 1513 y[16*(*idx)+3] += alpha4*(*v); 1514 y[16*(*idx)+4] += alpha5*(*v); 1515 y[16*(*idx)+5] += alpha6*(*v); 1516 y[16*(*idx)+6] += alpha7*(*v); 1517 y[16*(*idx)+7] += alpha8*(*v); 1518 y[16*(*idx)+8] += alpha9*(*v); 1519 y[16*(*idx)+9] += alpha10*(*v); 1520 y[16*(*idx)+10] += alpha11*(*v); 1521 y[16*(*idx)+11] += alpha12*(*v); 1522 y[16*(*idx)+12] += alpha13*(*v); 1523 y[16*(*idx)+13] += alpha14*(*v); 1524 y[16*(*idx)+14] += alpha15*(*v); 1525 y[16*(*idx)+15] += alpha16*(*v); 1526 idx++; v++; 1527 } 1528 } 1529 PetscLogFlops(32*a->nz - 16*b->AIJ->n); 1530 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1531 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 1532 PetscFunctionReturn(0); 1533 } 1534 1535 #undef __FUNCT__ 1536 #define __FUNCT__ "MatMultAdd_SeqMAIJ_16" 1537 PetscErrorCode MatMultAdd_SeqMAIJ_16(Mat A,Vec xx,Vec yy,Vec zz) 1538 { 1539 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1540 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1541 PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8; 1542 PetscScalar sum9, sum10, sum11, sum12, sum13, sum14, sum15, sum16; 1543 PetscErrorCode ierr; 1544 PetscInt m = b->AIJ->m,*idx,*ii; 1545 PetscInt n,i,jrow,j; 1546 1547 PetscFunctionBegin; 1548 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 1549 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1550 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 1551 idx = a->j; 1552 v = a->a; 1553 ii = a->i; 1554 1555 for (i=0; i<m; i++) { 1556 jrow = ii[i]; 1557 n = ii[i+1] - jrow; 1558 sum1 = 0.0; 1559 sum2 = 0.0; 1560 sum3 = 0.0; 1561 sum4 = 0.0; 1562 sum5 = 0.0; 1563 sum6 = 0.0; 1564 sum7 = 0.0; 1565 sum8 = 0.0; 1566 sum9 = 0.0; 1567 sum10 = 0.0; 1568 sum11 = 0.0; 1569 sum12 = 0.0; 1570 sum13 = 0.0; 1571 sum14 = 0.0; 1572 sum15 = 0.0; 1573 sum16 = 0.0; 1574 for (j=0; j<n; j++) { 1575 sum1 += v[jrow]*x[16*idx[jrow]]; 1576 sum2 += v[jrow]*x[16*idx[jrow]+1]; 1577 sum3 += v[jrow]*x[16*idx[jrow]+2]; 1578 sum4 += v[jrow]*x[16*idx[jrow]+3]; 1579 sum5 += v[jrow]*x[16*idx[jrow]+4]; 1580 sum6 += v[jrow]*x[16*idx[jrow]+5]; 1581 sum7 += v[jrow]*x[16*idx[jrow]+6]; 1582 sum8 += v[jrow]*x[16*idx[jrow]+7]; 1583 sum9 += v[jrow]*x[16*idx[jrow]+8]; 1584 sum10 += v[jrow]*x[16*idx[jrow]+9]; 1585 sum11 += v[jrow]*x[16*idx[jrow]+10]; 1586 sum12 += v[jrow]*x[16*idx[jrow]+11]; 1587 sum13 += v[jrow]*x[16*idx[jrow]+12]; 1588 sum14 += v[jrow]*x[16*idx[jrow]+13]; 1589 sum15 += v[jrow]*x[16*idx[jrow]+14]; 1590 sum16 += v[jrow]*x[16*idx[jrow]+15]; 1591 jrow++; 1592 } 1593 y[16*i] += sum1; 1594 y[16*i+1] += sum2; 1595 y[16*i+2] += sum3; 1596 y[16*i+3] += sum4; 1597 y[16*i+4] += sum5; 1598 y[16*i+5] += sum6; 1599 y[16*i+6] += sum7; 1600 y[16*i+7] += sum8; 1601 y[16*i+8] += sum9; 1602 y[16*i+9] += sum10; 1603 y[16*i+10] += sum11; 1604 y[16*i+11] += sum12; 1605 y[16*i+12] += sum13; 1606 y[16*i+13] += sum14; 1607 y[16*i+14] += sum15; 1608 y[16*i+15] += sum16; 1609 } 1610 1611 PetscLogFlops(32*a->nz); 1612 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1613 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 1614 PetscFunctionReturn(0); 1615 } 1616 1617 #undef __FUNCT__ 1618 #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_16" 1619 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_16(Mat A,Vec xx,Vec yy,Vec zz) 1620 { 1621 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1622 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1623 PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8; 1624 PetscScalar alpha9,alpha10,alpha11,alpha12,alpha13,alpha14,alpha15,alpha16; 1625 PetscErrorCode ierr; 1626 PetscInt m = b->AIJ->m,n,i,*idx; 1627 1628 PetscFunctionBegin; 1629 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 1630 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1631 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 1632 for (i=0; i<m; i++) { 1633 idx = a->j + a->i[i] ; 1634 v = a->a + a->i[i] ; 1635 n = a->i[i+1] - a->i[i]; 1636 alpha1 = x[16*i]; 1637 alpha2 = x[16*i+1]; 1638 alpha3 = x[16*i+2]; 1639 alpha4 = x[16*i+3]; 1640 alpha5 = x[16*i+4]; 1641 alpha6 = x[16*i+5]; 1642 alpha7 = x[16*i+6]; 1643 alpha8 = x[16*i+7]; 1644 alpha9 = x[16*i+8]; 1645 alpha10 = x[16*i+9]; 1646 alpha11 = x[16*i+10]; 1647 alpha12 = x[16*i+11]; 1648 alpha13 = x[16*i+12]; 1649 alpha14 = x[16*i+13]; 1650 alpha15 = x[16*i+14]; 1651 alpha16 = x[16*i+15]; 1652 while (n-->0) { 1653 y[16*(*idx)] += alpha1*(*v); 1654 y[16*(*idx)+1] += alpha2*(*v); 1655 y[16*(*idx)+2] += alpha3*(*v); 1656 y[16*(*idx)+3] += alpha4*(*v); 1657 y[16*(*idx)+4] += alpha5*(*v); 1658 y[16*(*idx)+5] += alpha6*(*v); 1659 y[16*(*idx)+6] += alpha7*(*v); 1660 y[16*(*idx)+7] += alpha8*(*v); 1661 y[16*(*idx)+8] += alpha9*(*v); 1662 y[16*(*idx)+9] += alpha10*(*v); 1663 y[16*(*idx)+10] += alpha11*(*v); 1664 y[16*(*idx)+11] += alpha12*(*v); 1665 y[16*(*idx)+12] += alpha13*(*v); 1666 y[16*(*idx)+13] += alpha14*(*v); 1667 y[16*(*idx)+14] += alpha15*(*v); 1668 y[16*(*idx)+15] += alpha16*(*v); 1669 idx++; v++; 1670 } 1671 } 1672 PetscLogFlops(32*a->nz); 1673 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1674 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 1675 PetscFunctionReturn(0); 1676 } 1677 1678 /*===================================================================================*/ 1679 #undef __FUNCT__ 1680 #define __FUNCT__ "MatMult_MPIMAIJ_dof" 1681 PetscErrorCode MatMult_MPIMAIJ_dof(Mat A,Vec xx,Vec yy) 1682 { 1683 Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data; 1684 PetscErrorCode ierr; 1685 1686 PetscFunctionBegin; 1687 /* start the scatter */ 1688 ierr = VecScatterBegin(xx,b->w,INSERT_VALUES,SCATTER_FORWARD,b->ctx);CHKERRQ(ierr); 1689 ierr = (*b->AIJ->ops->mult)(b->AIJ,xx,yy);CHKERRQ(ierr); 1690 ierr = VecScatterEnd(xx,b->w,INSERT_VALUES,SCATTER_FORWARD,b->ctx);CHKERRQ(ierr); 1691 ierr = (*b->OAIJ->ops->multadd)(b->OAIJ,b->w,yy,yy);CHKERRQ(ierr); 1692 PetscFunctionReturn(0); 1693 } 1694 1695 #undef __FUNCT__ 1696 #define __FUNCT__ "MatMultTranspose_MPIMAIJ_dof" 1697 PetscErrorCode MatMultTranspose_MPIMAIJ_dof(Mat A,Vec xx,Vec yy) 1698 { 1699 Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data; 1700 PetscErrorCode ierr; 1701 1702 PetscFunctionBegin; 1703 ierr = (*b->OAIJ->ops->multtranspose)(b->OAIJ,xx,b->w);CHKERRQ(ierr); 1704 ierr = VecScatterBegin(b->w,yy,ADD_VALUES,SCATTER_REVERSE,b->ctx);CHKERRQ(ierr); 1705 ierr = (*b->AIJ->ops->multtranspose)(b->AIJ,xx,yy);CHKERRQ(ierr); 1706 ierr = VecScatterEnd(b->w,yy,ADD_VALUES,SCATTER_REVERSE,b->ctx);CHKERRQ(ierr); 1707 PetscFunctionReturn(0); 1708 } 1709 1710 #undef __FUNCT__ 1711 #define __FUNCT__ "MatMultAdd_MPIMAIJ_dof" 1712 PetscErrorCode MatMultAdd_MPIMAIJ_dof(Mat A,Vec xx,Vec yy,Vec zz) 1713 { 1714 Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data; 1715 PetscErrorCode ierr; 1716 1717 PetscFunctionBegin; 1718 /* start the scatter */ 1719 ierr = VecScatterBegin(xx,b->w,INSERT_VALUES,SCATTER_FORWARD,b->ctx);CHKERRQ(ierr); 1720 ierr = (*b->AIJ->ops->multadd)(b->AIJ,xx,yy,zz);CHKERRQ(ierr); 1721 ierr = VecScatterEnd(xx,b->w,INSERT_VALUES,SCATTER_FORWARD,b->ctx);CHKERRQ(ierr); 1722 ierr = (*b->OAIJ->ops->multadd)(b->OAIJ,b->w,yy,zz);CHKERRQ(ierr); 1723 PetscFunctionReturn(0); 1724 } 1725 1726 #undef __FUNCT__ 1727 #define __FUNCT__ "MatMultTransposeAdd_MPIMAIJ_dof" 1728 PetscErrorCode MatMultTransposeAdd_MPIMAIJ_dof(Mat A,Vec xx,Vec yy,Vec zz) 1729 { 1730 Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data; 1731 PetscErrorCode ierr; 1732 1733 PetscFunctionBegin; 1734 ierr = (*b->OAIJ->ops->multtranspose)(b->OAIJ,xx,b->w);CHKERRQ(ierr); 1735 ierr = VecScatterBegin(b->w,zz,ADD_VALUES,SCATTER_REVERSE,b->ctx);CHKERRQ(ierr); 1736 ierr = (*b->AIJ->ops->multtransposeadd)(b->AIJ,xx,yy,zz);CHKERRQ(ierr); 1737 ierr = VecScatterEnd(b->w,zz,ADD_VALUES,SCATTER_REVERSE,b->ctx);CHKERRQ(ierr); 1738 PetscFunctionReturn(0); 1739 } 1740 1741 #undef __FUNCT__ 1742 #define __FUNCT__ "MatConvert_SeqMAIJ_SeqAIJ" 1743 PetscErrorCode MatConvert_SeqMAIJ_SeqAIJ(Mat A,const MatType newtype,Mat *B) 1744 { 1745 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1746 Mat a = b->AIJ; 1747 Mat_SeqAIJ *aij = (Mat_SeqAIJ*)a->data; 1748 PetscErrorCode ierr; 1749 PetscInt m,n,i,ncols,*ilen,nmax = 0,*icols,j,k,ii,dof = b->dof; 1750 PetscInt *cols; 1751 PetscScalar *vals; 1752 1753 PetscFunctionBegin; 1754 ierr = MatGetSize(a,&m,&n);CHKERRQ(ierr); 1755 ierr = PetscMalloc(dof*m*sizeof(int),&ilen);CHKERRQ(ierr); 1756 for (i=0; i<m; i++) { 1757 nmax = PetscMax(nmax,aij->ilen[i]); 1758 for (j=0; j<dof; j++) { 1759 ilen[dof*i+j] = aij->ilen[i]; 1760 } 1761 } 1762 ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,dof*m,dof*n,0,ilen,B);CHKERRQ(ierr); 1763 ierr = MatSetOption(*B,MAT_COLUMNS_SORTED);CHKERRQ(ierr); 1764 ierr = PetscFree(ilen);CHKERRQ(ierr); 1765 ierr = PetscMalloc(nmax*sizeof(PetscInt),&icols);CHKERRQ(ierr); 1766 ii = 0; 1767 for (i=0; i<m; i++) { 1768 ierr = MatGetRow_SeqAIJ(a,i,&ncols,&cols,&vals);CHKERRQ(ierr); 1769 for (j=0; j<dof; j++) { 1770 for (k=0; k<ncols; k++) { 1771 icols[k] = dof*cols[k]+j; 1772 } 1773 ierr = MatSetValues_SeqAIJ(*B,1,&ii,ncols,icols,vals,INSERT_VALUES);CHKERRQ(ierr); 1774 ii++; 1775 } 1776 ierr = MatRestoreRow_SeqAIJ(a,i,&ncols,&cols,&vals);CHKERRQ(ierr); 1777 } 1778 ierr = PetscFree(icols);CHKERRQ(ierr); 1779 ierr = MatAssemblyBegin(*B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1780 ierr = MatAssemblyEnd(*B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1781 PetscFunctionReturn(0); 1782 } 1783 1784 #include "src/mat/impls/aij/mpi/mpiaij.h" 1785 #undef __FUNCT__ 1786 #define __FUNCT__ "MatConvert_MPIMAIJ_MPIAIJ" 1787 PetscErrorCode MatConvert_MPIMAIJ_MPIAIJ(Mat A,const MatType newtype,Mat *B) 1788 { 1789 Mat_MPIMAIJ *maij = (Mat_MPIMAIJ*)A->data; 1790 Mat MatAIJ = ((Mat_SeqMAIJ*)maij->AIJ->data)->AIJ; 1791 Mat MatOAIJ = ((Mat_SeqMAIJ*)maij->OAIJ->data)->AIJ; 1792 Mat_SeqAIJ *AIJ = (Mat_SeqAIJ*) MatAIJ->data; 1793 Mat_SeqAIJ *OAIJ =(Mat_SeqAIJ*) MatOAIJ->data; 1794 Mat_MPIAIJ *mpiaij = (Mat_MPIAIJ*) maij->A->data; 1795 PetscInt dof = maij->dof,i,j,*dnz,*onz,nmax = 0,onmax = 0,*oicols,*icols,ncols,*cols,oncols,*ocols,omax; 1796 PetscInt rstart,cstart,*garray,ii,k; 1797 PetscErrorCode ierr; 1798 PetscScalar *vals,*ovals; 1799 1800 PetscFunctionBegin; 1801 ierr = PetscMalloc2(A->m,PETSC_INT,&dnz,A->m,PETSC_INT,&onz);CHKERRQ(ierr); 1802 for (i=0; i<A->m/dof; i++) { 1803 nmax = PetscMax(nmax,AIJ->ilen[i]); 1804 onmax = PetscMax(onmax,OAIJ->ilen[i]); 1805 for (j=0; j<dof; j++) { 1806 dnz[dof*i+j] = AIJ->ilen[i]; 1807 onz[dof*i+j] = OAIJ->ilen[i]; 1808 } 1809 } 1810 ierr = MatCreateMPIAIJ(A->comm,A->m,A->n,A->M,A->N,0,dnz,0,onz,B);CHKERRQ(ierr); 1811 ierr = MatSetOption(*B,MAT_COLUMNS_SORTED);CHKERRQ(ierr); 1812 ierr = PetscFree2(dnz,onz);CHKERRQ(ierr); 1813 1814 ierr = PetscMalloc2(nmax,PETSC_INT,&icols,omax,PETSC_INT,&oicols);CHKERRQ(ierr); 1815 rstart = dof*mpiaij->rstart; 1816 cstart = dof*mpiaij->cstart; 1817 garray = mpiaij->garray; 1818 1819 ii = rstart; 1820 for (i=0; i<A->m/dof; i++) { 1821 ierr = MatGetRow_SeqAIJ(MatAIJ,i,&ncols,&cols,&vals);CHKERRQ(ierr); 1822 ierr = MatGetRow_SeqAIJ(MatOAIJ,i,&oncols,&ocols,&ovals);CHKERRQ(ierr); 1823 for (j=0; j<dof; j++) { 1824 for (k=0; k<ncols; k++) { 1825 icols[k] = cstart + dof*cols[k]+j; 1826 } 1827 for (k=0; k<oncols; k++) { 1828 oicols[k] = dof*garray[ocols[k]]+j; 1829 } 1830 ierr = MatSetValues_MPIAIJ(*B,1,&ii,ncols,icols,vals,INSERT_VALUES);CHKERRQ(ierr); 1831 ierr = MatSetValues_MPIAIJ(*B,1,&ii,oncols,oicols,ovals,INSERT_VALUES);CHKERRQ(ierr); 1832 ii++; 1833 } 1834 ierr = MatRestoreRow_SeqAIJ(MatAIJ,i,&ncols,&cols,&vals);CHKERRQ(ierr); 1835 ierr = MatRestoreRow_SeqAIJ(MatOAIJ,i,&oncols,&ocols,&ovals);CHKERRQ(ierr); 1836 } 1837 ierr = PetscFree2(icols,oicols);CHKERRQ(ierr); 1838 1839 ierr = MatAssemblyBegin(*B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1840 ierr = MatAssemblyEnd(*B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1841 PetscFunctionReturn(0); 1842 } 1843 1844 1845 1846 /* ---------------------------------------------------------------------------------- */ 1847 /*MC 1848 MatCreateMAIJ - Creates a matrix type providing restriction and interpolation 1849 operations for multicomponent problems. It interpolates each component the same 1850 way independently. The matrix type is based on MATSEQAIJ for sequential matrices, 1851 and MATMPIAIJ for distributed matrices. 1852 1853 Operations provided: 1854 + MatMult 1855 . MatMultTranspose 1856 . MatMultAdd 1857 . MatMultTransposeAdd 1858 - MatView 1859 1860 Level: advanced 1861 1862 M*/ 1863 #undef __FUNCT__ 1864 #define __FUNCT__ "MatCreateMAIJ" 1865 PetscErrorCode MatCreateMAIJ(Mat A,PetscInt dof,Mat *maij) 1866 { 1867 PetscErrorCode ierr; 1868 PetscMPIInt size; 1869 PetscInt n; 1870 Mat_MPIMAIJ *b; 1871 Mat B; 1872 1873 PetscFunctionBegin; 1874 ierr = PetscObjectReference((PetscObject)A);CHKERRQ(ierr); 1875 1876 if (dof == 1) { 1877 *maij = A; 1878 } else { 1879 ierr = MatCreate(A->comm,dof*A->m,dof*A->n,dof*A->M,dof*A->N,&B);CHKERRQ(ierr); 1880 B->assembled = PETSC_TRUE; 1881 1882 ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr); 1883 if (size == 1) { 1884 ierr = MatSetType(B,MATSEQMAIJ);CHKERRQ(ierr); 1885 B->ops->destroy = MatDestroy_SeqMAIJ; 1886 B->ops->view = MatView_SeqMAIJ; 1887 b = (Mat_MPIMAIJ*)B->data; 1888 b->dof = dof; 1889 b->AIJ = A; 1890 if (dof == 2) { 1891 B->ops->mult = MatMult_SeqMAIJ_2; 1892 B->ops->multadd = MatMultAdd_SeqMAIJ_2; 1893 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_2; 1894 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_2; 1895 } else if (dof == 3) { 1896 B->ops->mult = MatMult_SeqMAIJ_3; 1897 B->ops->multadd = MatMultAdd_SeqMAIJ_3; 1898 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_3; 1899 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_3; 1900 } else if (dof == 4) { 1901 B->ops->mult = MatMult_SeqMAIJ_4; 1902 B->ops->multadd = MatMultAdd_SeqMAIJ_4; 1903 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_4; 1904 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_4; 1905 } else if (dof == 5) { 1906 B->ops->mult = MatMult_SeqMAIJ_5; 1907 B->ops->multadd = MatMultAdd_SeqMAIJ_5; 1908 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_5; 1909 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_5; 1910 } else if (dof == 6) { 1911 B->ops->mult = MatMult_SeqMAIJ_6; 1912 B->ops->multadd = MatMultAdd_SeqMAIJ_6; 1913 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_6; 1914 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_6; 1915 } else if (dof == 8) { 1916 B->ops->mult = MatMult_SeqMAIJ_8; 1917 B->ops->multadd = MatMultAdd_SeqMAIJ_8; 1918 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_8; 1919 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_8; 1920 } else if (dof == 9) { 1921 B->ops->mult = MatMult_SeqMAIJ_9; 1922 B->ops->multadd = MatMultAdd_SeqMAIJ_9; 1923 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_9; 1924 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_9; 1925 } else if (dof == 16) { 1926 B->ops->mult = MatMult_SeqMAIJ_16; 1927 B->ops->multadd = MatMultAdd_SeqMAIJ_16; 1928 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_16; 1929 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_16; 1930 } else { 1931 SETERRQ1(PETSC_ERR_SUP,"Cannot handle a dof of %D. Send request for code to petsc-maint@mcs.anl.gov\n",dof); 1932 } 1933 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqmaij_seqaij_C","MatConvert_SeqMAIJ_SeqAIJ",MatConvert_SeqMAIJ_SeqAIJ);CHKERRQ(ierr); 1934 } else { 1935 Mat_MPIAIJ *mpiaij = (Mat_MPIAIJ *)A->data; 1936 IS from,to; 1937 Vec gvec; 1938 PetscInt *garray,i; 1939 1940 ierr = MatSetType(B,MATMPIMAIJ);CHKERRQ(ierr); 1941 B->ops->destroy = MatDestroy_MPIMAIJ; 1942 B->ops->view = MatView_MPIMAIJ; 1943 b = (Mat_MPIMAIJ*)B->data; 1944 b->dof = dof; 1945 b->A = A; 1946 ierr = MatCreateMAIJ(mpiaij->A,dof,&b->AIJ);CHKERRQ(ierr); 1947 ierr = MatCreateMAIJ(mpiaij->B,dof,&b->OAIJ);CHKERRQ(ierr); 1948 1949 ierr = VecGetSize(mpiaij->lvec,&n);CHKERRQ(ierr); 1950 ierr = VecCreateSeq(PETSC_COMM_SELF,n*dof,&b->w);CHKERRQ(ierr); 1951 1952 /* create two temporary Index sets for build scatter gather */ 1953 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&garray);CHKERRQ(ierr); 1954 for (i=0; i<n; i++) garray[i] = dof*mpiaij->garray[i]; 1955 ierr = ISCreateBlock(A->comm,dof,n,garray,&from);CHKERRQ(ierr); 1956 ierr = PetscFree(garray);CHKERRQ(ierr); 1957 ierr = ISCreateStride(PETSC_COMM_SELF,n*dof,0,1,&to);CHKERRQ(ierr); 1958 1959 /* create temporary global vector to generate scatter context */ 1960 ierr = VecCreateMPI(A->comm,dof*A->n,dof*A->N,&gvec);CHKERRQ(ierr); 1961 1962 /* generate the scatter context */ 1963 ierr = VecScatterCreate(gvec,from,b->w,to,&b->ctx);CHKERRQ(ierr); 1964 1965 ierr = ISDestroy(from);CHKERRQ(ierr); 1966 ierr = ISDestroy(to);CHKERRQ(ierr); 1967 ierr = VecDestroy(gvec);CHKERRQ(ierr); 1968 1969 B->ops->mult = MatMult_MPIMAIJ_dof; 1970 B->ops->multtranspose = MatMultTranspose_MPIMAIJ_dof; 1971 B->ops->multadd = MatMultAdd_MPIMAIJ_dof; 1972 B->ops->multtransposeadd = MatMultTransposeAdd_MPIMAIJ_dof; 1973 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpimaij_mpiaij_C","MatConvert_MPIMAIJ_MPIAIJ",MatConvert_MPIMAIJ_MPIAIJ);CHKERRQ(ierr); 1974 } 1975 *maij = B; 1976 ierr = MatView_Private(B);CHKERRQ(ierr); 1977 } 1978 PetscFunctionReturn(0); 1979 } 1980 1981 1982 1983 1984 1985 1986 1987 1988 1989 1990 1991 1992