1 /* 2 Defines the basic matrix operations for the MAIJ matrix storage format. 3 This format is used for restriction and interpolation operations for 4 multicomponent problems. It interpolates each component the same way 5 independently. 6 7 We provide: 8 MatMult() 9 MatMultTranspose() 10 MatMultTransposeAdd() 11 MatMultAdd() 12 and 13 MatCreateMAIJ(Mat,dof,Mat*) 14 15 This single directory handles both the sequential and parallel codes 16 */ 17 18 #include "src/mat/impls/maij/maij.h" 19 #include "vecimpl.h" 20 21 #undef __FUNCT__ 22 #define __FUNCT__ "MatMAIJGetAIJ" 23 PetscErrorCode MatMAIJGetAIJ(Mat A,Mat *B) 24 { 25 PetscErrorCode ierr; 26 PetscTruth ismpimaij,isseqmaij; 27 28 PetscFunctionBegin; 29 ierr = PetscTypeCompare((PetscObject)A,MATMPIMAIJ,&ismpimaij);CHKERRQ(ierr); 30 ierr = PetscTypeCompare((PetscObject)A,MATSEQMAIJ,&isseqmaij);CHKERRQ(ierr); 31 if (ismpimaij) { 32 Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data; 33 34 *B = b->A; 35 } else if (isseqmaij) { 36 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 37 38 *B = b->AIJ; 39 } else { 40 *B = A; 41 } 42 PetscFunctionReturn(0); 43 } 44 45 #undef __FUNCT__ 46 #define __FUNCT__ "MatMAIJRedimension" 47 PetscErrorCode MatMAIJRedimension(Mat A,PetscInt dof,Mat *B) 48 { 49 PetscErrorCode ierr; 50 Mat Aij; 51 52 PetscFunctionBegin; 53 ierr = MatMAIJGetAIJ(A,&Aij);CHKERRQ(ierr); 54 ierr = MatCreateMAIJ(Aij,dof,B);CHKERRQ(ierr); 55 PetscFunctionReturn(0); 56 } 57 58 #undef __FUNCT__ 59 #define __FUNCT__ "MatDestroy_SeqMAIJ" 60 PetscErrorCode MatDestroy_SeqMAIJ(Mat A) 61 { 62 PetscErrorCode ierr; 63 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 64 65 PetscFunctionBegin; 66 if (b->AIJ) { 67 ierr = MatDestroy(b->AIJ);CHKERRQ(ierr); 68 } 69 ierr = PetscFree(b);CHKERRQ(ierr); 70 PetscFunctionReturn(0); 71 } 72 73 #undef __FUNCT__ 74 #define __FUNCT__ "MatView_SeqMAIJ" 75 PetscErrorCode MatView_SeqMAIJ(Mat A,PetscViewer viewer) 76 { 77 PetscErrorCode ierr; 78 Mat B; 79 80 PetscFunctionBegin; 81 ierr = MatConvert(A,MATSEQAIJ,&B); 82 ierr = MatView(B,viewer);CHKERRQ(ierr); 83 ierr = MatDestroy(B);CHKERRQ(ierr); 84 PetscFunctionReturn(0); 85 } 86 87 #undef __FUNCT__ 88 #define __FUNCT__ "MatView_MPIMAIJ" 89 PetscErrorCode MatView_MPIMAIJ(Mat A,PetscViewer viewer) 90 { 91 PetscErrorCode ierr; 92 Mat B; 93 94 PetscFunctionBegin; 95 ierr = MatConvert(A,MATMPIAIJ,&B); 96 ierr = MatView(B,viewer);CHKERRQ(ierr); 97 ierr = MatDestroy(B);CHKERRQ(ierr); 98 PetscFunctionReturn(0); 99 } 100 101 #undef __FUNCT__ 102 #define __FUNCT__ "MatDestroy_MPIMAIJ" 103 PetscErrorCode MatDestroy_MPIMAIJ(Mat A) 104 { 105 PetscErrorCode ierr; 106 Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data; 107 108 PetscFunctionBegin; 109 if (b->AIJ) { 110 ierr = MatDestroy(b->AIJ);CHKERRQ(ierr); 111 } 112 if (b->OAIJ) { 113 ierr = MatDestroy(b->OAIJ);CHKERRQ(ierr); 114 } 115 if (b->A) { 116 ierr = MatDestroy(b->A);CHKERRQ(ierr); 117 } 118 if (b->ctx) { 119 ierr = VecScatterDestroy(b->ctx);CHKERRQ(ierr); 120 } 121 if (b->w) { 122 ierr = VecDestroy(b->w);CHKERRQ(ierr); 123 } 124 ierr = PetscFree(b);CHKERRQ(ierr); 125 PetscFunctionReturn(0); 126 } 127 128 /*MC 129 MATMAIJ - MATMAIJ = "maij" - A matrix type to be used for restriction and interpolation operations for 130 multicomponent problems, interpolating or restricting each component the same way independently. 131 The matrix type is based on MATSEQAIJ for sequential matrices, and MATMPIAIJ for distributed matrices. 132 133 Operations provided: 134 . MatMult 135 . MatMultTranspose 136 . MatMultAdd 137 . MatMultTransposeAdd 138 139 Level: advanced 140 141 .seealso: MatCreateSeqDense 142 M*/ 143 144 EXTERN_C_BEGIN 145 #undef __FUNCT__ 146 #define __FUNCT__ "MatCreate_MAIJ" 147 PetscErrorCode MatCreate_MAIJ(Mat A) 148 { 149 PetscErrorCode ierr; 150 Mat_MPIMAIJ *b; 151 152 PetscFunctionBegin; 153 ierr = PetscNew(Mat_MPIMAIJ,&b);CHKERRQ(ierr); 154 A->data = (void*)b; 155 ierr = PetscMemzero(A->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 156 A->factor = 0; 157 A->mapping = 0; 158 159 b->AIJ = 0; 160 b->dof = 0; 161 b->OAIJ = 0; 162 b->ctx = 0; 163 b->w = 0; 164 PetscFunctionReturn(0); 165 } 166 EXTERN_C_END 167 168 /* --------------------------------------------------------------------------------------*/ 169 #undef __FUNCT__ 170 #define __FUNCT__ "MatMult_SeqMAIJ_2" 171 PetscErrorCode MatMult_SeqMAIJ_2(Mat A,Vec xx,Vec yy) 172 { 173 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 174 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 175 PetscScalar *x,*y,*v,sum1, sum2; 176 PetscErrorCode ierr; 177 PetscInt m = b->AIJ->m,*idx,*ii; 178 PetscInt n,i,jrow,j; 179 180 PetscFunctionBegin; 181 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 182 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 183 idx = a->j; 184 v = a->a; 185 ii = a->i; 186 187 for (i=0; i<m; i++) { 188 jrow = ii[i]; 189 n = ii[i+1] - jrow; 190 sum1 = 0.0; 191 sum2 = 0.0; 192 for (j=0; j<n; j++) { 193 sum1 += v[jrow]*x[2*idx[jrow]]; 194 sum2 += v[jrow]*x[2*idx[jrow]+1]; 195 jrow++; 196 } 197 y[2*i] = sum1; 198 y[2*i+1] = sum2; 199 } 200 201 PetscLogFlops(4*a->nz - 2*m); 202 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 203 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 204 PetscFunctionReturn(0); 205 } 206 207 #undef __FUNCT__ 208 #define __FUNCT__ "MatMultTranspose_SeqMAIJ_2" 209 PetscErrorCode MatMultTranspose_SeqMAIJ_2(Mat A,Vec xx,Vec yy) 210 { 211 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 212 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 213 PetscScalar *x,*y,*v,alpha1,alpha2,zero = 0.0; 214 PetscErrorCode ierr; 215 PetscInt m = b->AIJ->m,n,i,*idx; 216 217 PetscFunctionBegin; 218 ierr = VecSet(&zero,yy);CHKERRQ(ierr); 219 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 220 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 221 222 for (i=0; i<m; i++) { 223 idx = a->j + a->i[i] ; 224 v = a->a + a->i[i] ; 225 n = a->i[i+1] - a->i[i]; 226 alpha1 = x[2*i]; 227 alpha2 = x[2*i+1]; 228 while (n-->0) {y[2*(*idx)] += alpha1*(*v); y[2*(*idx)+1] += alpha2*(*v); idx++; v++;} 229 } 230 PetscLogFlops(4*a->nz - 2*b->AIJ->n); 231 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 232 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 233 PetscFunctionReturn(0); 234 } 235 236 #undef __FUNCT__ 237 #define __FUNCT__ "MatMultAdd_SeqMAIJ_2" 238 PetscErrorCode MatMultAdd_SeqMAIJ_2(Mat A,Vec xx,Vec yy,Vec zz) 239 { 240 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 241 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 242 PetscScalar *x,*y,*v,sum1, sum2; 243 PetscErrorCode ierr; 244 PetscInt m = b->AIJ->m,*idx,*ii; 245 PetscInt n,i,jrow,j; 246 247 PetscFunctionBegin; 248 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 249 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 250 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 251 idx = a->j; 252 v = a->a; 253 ii = a->i; 254 255 for (i=0; i<m; i++) { 256 jrow = ii[i]; 257 n = ii[i+1] - jrow; 258 sum1 = 0.0; 259 sum2 = 0.0; 260 for (j=0; j<n; j++) { 261 sum1 += v[jrow]*x[2*idx[jrow]]; 262 sum2 += v[jrow]*x[2*idx[jrow]+1]; 263 jrow++; 264 } 265 y[2*i] += sum1; 266 y[2*i+1] += sum2; 267 } 268 269 PetscLogFlops(4*a->nz - 2*m); 270 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 271 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 272 PetscFunctionReturn(0); 273 } 274 #undef __FUNCT__ 275 #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_2" 276 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_2(Mat A,Vec xx,Vec yy,Vec zz) 277 { 278 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 279 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 280 PetscScalar *x,*y,*v,alpha1,alpha2; 281 PetscErrorCode ierr; 282 PetscInt m = b->AIJ->m,n,i,*idx; 283 284 PetscFunctionBegin; 285 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 286 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 287 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 288 289 for (i=0; i<m; i++) { 290 idx = a->j + a->i[i] ; 291 v = a->a + a->i[i] ; 292 n = a->i[i+1] - a->i[i]; 293 alpha1 = x[2*i]; 294 alpha2 = x[2*i+1]; 295 while (n-->0) {y[2*(*idx)] += alpha1*(*v); y[2*(*idx)+1] += alpha2*(*v); idx++; v++;} 296 } 297 PetscLogFlops(4*a->nz - 2*b->AIJ->n); 298 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 299 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 300 PetscFunctionReturn(0); 301 } 302 /* --------------------------------------------------------------------------------------*/ 303 #undef __FUNCT__ 304 #define __FUNCT__ "MatMult_SeqMAIJ_3" 305 PetscErrorCode MatMult_SeqMAIJ_3(Mat A,Vec xx,Vec yy) 306 { 307 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 308 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 309 PetscScalar *x,*y,*v,sum1, sum2, sum3; 310 PetscErrorCode ierr; 311 PetscInt m = b->AIJ->m,*idx,*ii; 312 PetscInt n,i,jrow,j; 313 314 PetscFunctionBegin; 315 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 316 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 317 idx = a->j; 318 v = a->a; 319 ii = a->i; 320 321 for (i=0; i<m; i++) { 322 jrow = ii[i]; 323 n = ii[i+1] - jrow; 324 sum1 = 0.0; 325 sum2 = 0.0; 326 sum3 = 0.0; 327 for (j=0; j<n; j++) { 328 sum1 += v[jrow]*x[3*idx[jrow]]; 329 sum2 += v[jrow]*x[3*idx[jrow]+1]; 330 sum3 += v[jrow]*x[3*idx[jrow]+2]; 331 jrow++; 332 } 333 y[3*i] = sum1; 334 y[3*i+1] = sum2; 335 y[3*i+2] = sum3; 336 } 337 338 PetscLogFlops(6*a->nz - 3*m); 339 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 340 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 341 PetscFunctionReturn(0); 342 } 343 344 #undef __FUNCT__ 345 #define __FUNCT__ "MatMultTranspose_SeqMAIJ_3" 346 PetscErrorCode MatMultTranspose_SeqMAIJ_3(Mat A,Vec xx,Vec yy) 347 { 348 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 349 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 350 PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,zero = 0.0; 351 PetscErrorCode ierr; 352 PetscInt m = b->AIJ->m,n,i,*idx; 353 354 PetscFunctionBegin; 355 ierr = VecSet(&zero,yy);CHKERRQ(ierr); 356 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 357 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 358 359 for (i=0; i<m; i++) { 360 idx = a->j + a->i[i]; 361 v = a->a + a->i[i]; 362 n = a->i[i+1] - a->i[i]; 363 alpha1 = x[3*i]; 364 alpha2 = x[3*i+1]; 365 alpha3 = x[3*i+2]; 366 while (n-->0) { 367 y[3*(*idx)] += alpha1*(*v); 368 y[3*(*idx)+1] += alpha2*(*v); 369 y[3*(*idx)+2] += alpha3*(*v); 370 idx++; v++; 371 } 372 } 373 PetscLogFlops(6*a->nz - 3*b->AIJ->n); 374 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 375 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 376 PetscFunctionReturn(0); 377 } 378 379 #undef __FUNCT__ 380 #define __FUNCT__ "MatMultAdd_SeqMAIJ_3" 381 PetscErrorCode MatMultAdd_SeqMAIJ_3(Mat A,Vec xx,Vec yy,Vec zz) 382 { 383 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 384 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 385 PetscScalar *x,*y,*v,sum1, sum2, sum3; 386 PetscErrorCode ierr; 387 PetscInt m = b->AIJ->m,*idx,*ii; 388 PetscInt n,i,jrow,j; 389 390 PetscFunctionBegin; 391 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 392 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 393 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 394 idx = a->j; 395 v = a->a; 396 ii = a->i; 397 398 for (i=0; i<m; i++) { 399 jrow = ii[i]; 400 n = ii[i+1] - jrow; 401 sum1 = 0.0; 402 sum2 = 0.0; 403 sum3 = 0.0; 404 for (j=0; j<n; j++) { 405 sum1 += v[jrow]*x[3*idx[jrow]]; 406 sum2 += v[jrow]*x[3*idx[jrow]+1]; 407 sum3 += v[jrow]*x[3*idx[jrow]+2]; 408 jrow++; 409 } 410 y[3*i] += sum1; 411 y[3*i+1] += sum2; 412 y[3*i+2] += sum3; 413 } 414 415 PetscLogFlops(6*a->nz); 416 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 417 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 418 PetscFunctionReturn(0); 419 } 420 #undef __FUNCT__ 421 #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_3" 422 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_3(Mat A,Vec xx,Vec yy,Vec zz) 423 { 424 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 425 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 426 PetscScalar *x,*y,*v,alpha1,alpha2,alpha3; 427 PetscErrorCode ierr; 428 PetscInt m = b->AIJ->m,n,i,*idx; 429 430 PetscFunctionBegin; 431 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 432 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 433 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 434 for (i=0; i<m; i++) { 435 idx = a->j + a->i[i] ; 436 v = a->a + a->i[i] ; 437 n = a->i[i+1] - a->i[i]; 438 alpha1 = x[3*i]; 439 alpha2 = x[3*i+1]; 440 alpha3 = x[3*i+2]; 441 while (n-->0) { 442 y[3*(*idx)] += alpha1*(*v); 443 y[3*(*idx)+1] += alpha2*(*v); 444 y[3*(*idx)+2] += alpha3*(*v); 445 idx++; v++; 446 } 447 } 448 PetscLogFlops(6*a->nz); 449 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 450 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 451 PetscFunctionReturn(0); 452 } 453 454 /* ------------------------------------------------------------------------------*/ 455 #undef __FUNCT__ 456 #define __FUNCT__ "MatMult_SeqMAIJ_4" 457 PetscErrorCode MatMult_SeqMAIJ_4(Mat A,Vec xx,Vec yy) 458 { 459 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 460 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 461 PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4; 462 PetscErrorCode ierr; 463 PetscInt m = b->AIJ->m,*idx,*ii; 464 PetscInt n,i,jrow,j; 465 466 PetscFunctionBegin; 467 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 468 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 469 idx = a->j; 470 v = a->a; 471 ii = a->i; 472 473 for (i=0; i<m; i++) { 474 jrow = ii[i]; 475 n = ii[i+1] - jrow; 476 sum1 = 0.0; 477 sum2 = 0.0; 478 sum3 = 0.0; 479 sum4 = 0.0; 480 for (j=0; j<n; j++) { 481 sum1 += v[jrow]*x[4*idx[jrow]]; 482 sum2 += v[jrow]*x[4*idx[jrow]+1]; 483 sum3 += v[jrow]*x[4*idx[jrow]+2]; 484 sum4 += v[jrow]*x[4*idx[jrow]+3]; 485 jrow++; 486 } 487 y[4*i] = sum1; 488 y[4*i+1] = sum2; 489 y[4*i+2] = sum3; 490 y[4*i+3] = sum4; 491 } 492 493 PetscLogFlops(8*a->nz - 4*m); 494 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 495 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 496 PetscFunctionReturn(0); 497 } 498 499 #undef __FUNCT__ 500 #define __FUNCT__ "MatMultTranspose_SeqMAIJ_4" 501 PetscErrorCode MatMultTranspose_SeqMAIJ_4(Mat A,Vec xx,Vec yy) 502 { 503 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 504 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 505 PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4,zero = 0.0; 506 PetscErrorCode ierr; 507 PetscInt m = b->AIJ->m,n,i,*idx; 508 509 PetscFunctionBegin; 510 ierr = VecSet(&zero,yy);CHKERRQ(ierr); 511 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 512 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 513 for (i=0; i<m; i++) { 514 idx = a->j + a->i[i] ; 515 v = a->a + a->i[i] ; 516 n = a->i[i+1] - a->i[i]; 517 alpha1 = x[4*i]; 518 alpha2 = x[4*i+1]; 519 alpha3 = x[4*i+2]; 520 alpha4 = x[4*i+3]; 521 while (n-->0) { 522 y[4*(*idx)] += alpha1*(*v); 523 y[4*(*idx)+1] += alpha2*(*v); 524 y[4*(*idx)+2] += alpha3*(*v); 525 y[4*(*idx)+3] += alpha4*(*v); 526 idx++; v++; 527 } 528 } 529 PetscLogFlops(8*a->nz - 4*b->AIJ->n); 530 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 531 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 532 PetscFunctionReturn(0); 533 } 534 535 #undef __FUNCT__ 536 #define __FUNCT__ "MatMultAdd_SeqMAIJ_4" 537 PetscErrorCode MatMultAdd_SeqMAIJ_4(Mat A,Vec xx,Vec yy,Vec zz) 538 { 539 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 540 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 541 PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4; 542 PetscErrorCode ierr; 543 PetscInt m = b->AIJ->m,*idx,*ii; 544 PetscInt n,i,jrow,j; 545 546 PetscFunctionBegin; 547 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 548 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 549 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 550 idx = a->j; 551 v = a->a; 552 ii = a->i; 553 554 for (i=0; i<m; i++) { 555 jrow = ii[i]; 556 n = ii[i+1] - jrow; 557 sum1 = 0.0; 558 sum2 = 0.0; 559 sum3 = 0.0; 560 sum4 = 0.0; 561 for (j=0; j<n; j++) { 562 sum1 += v[jrow]*x[4*idx[jrow]]; 563 sum2 += v[jrow]*x[4*idx[jrow]+1]; 564 sum3 += v[jrow]*x[4*idx[jrow]+2]; 565 sum4 += v[jrow]*x[4*idx[jrow]+3]; 566 jrow++; 567 } 568 y[4*i] += sum1; 569 y[4*i+1] += sum2; 570 y[4*i+2] += sum3; 571 y[4*i+3] += sum4; 572 } 573 574 PetscLogFlops(8*a->nz - 4*m); 575 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 576 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 577 PetscFunctionReturn(0); 578 } 579 #undef __FUNCT__ 580 #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_4" 581 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_4(Mat A,Vec xx,Vec yy,Vec zz) 582 { 583 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 584 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 585 PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4; 586 PetscErrorCode ierr; 587 PetscInt m = b->AIJ->m,n,i,*idx; 588 589 PetscFunctionBegin; 590 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 591 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 592 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 593 594 for (i=0; i<m; i++) { 595 idx = a->j + a->i[i] ; 596 v = a->a + a->i[i] ; 597 n = a->i[i+1] - a->i[i]; 598 alpha1 = x[4*i]; 599 alpha2 = x[4*i+1]; 600 alpha3 = x[4*i+2]; 601 alpha4 = x[4*i+3]; 602 while (n-->0) { 603 y[4*(*idx)] += alpha1*(*v); 604 y[4*(*idx)+1] += alpha2*(*v); 605 y[4*(*idx)+2] += alpha3*(*v); 606 y[4*(*idx)+3] += alpha4*(*v); 607 idx++; v++; 608 } 609 } 610 PetscLogFlops(8*a->nz - 4*b->AIJ->n); 611 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 612 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 613 PetscFunctionReturn(0); 614 } 615 /* ------------------------------------------------------------------------------*/ 616 617 #undef __FUNCT__ 618 #define __FUNCT__ "MatMult_SeqMAIJ_5" 619 PetscErrorCode MatMult_SeqMAIJ_5(Mat A,Vec xx,Vec yy) 620 { 621 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 622 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 623 PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4, sum5; 624 PetscErrorCode ierr; 625 PetscInt m = b->AIJ->m,*idx,*ii; 626 PetscInt n,i,jrow,j; 627 628 PetscFunctionBegin; 629 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 630 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 631 idx = a->j; 632 v = a->a; 633 ii = a->i; 634 635 for (i=0; i<m; i++) { 636 jrow = ii[i]; 637 n = ii[i+1] - jrow; 638 sum1 = 0.0; 639 sum2 = 0.0; 640 sum3 = 0.0; 641 sum4 = 0.0; 642 sum5 = 0.0; 643 for (j=0; j<n; j++) { 644 sum1 += v[jrow]*x[5*idx[jrow]]; 645 sum2 += v[jrow]*x[5*idx[jrow]+1]; 646 sum3 += v[jrow]*x[5*idx[jrow]+2]; 647 sum4 += v[jrow]*x[5*idx[jrow]+3]; 648 sum5 += v[jrow]*x[5*idx[jrow]+4]; 649 jrow++; 650 } 651 y[5*i] = sum1; 652 y[5*i+1] = sum2; 653 y[5*i+2] = sum3; 654 y[5*i+3] = sum4; 655 y[5*i+4] = sum5; 656 } 657 658 PetscLogFlops(10*a->nz - 5*m); 659 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 660 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 661 PetscFunctionReturn(0); 662 } 663 664 #undef __FUNCT__ 665 #define __FUNCT__ "MatMultTranspose_SeqMAIJ_5" 666 PetscErrorCode MatMultTranspose_SeqMAIJ_5(Mat A,Vec xx,Vec yy) 667 { 668 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 669 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 670 PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,zero = 0.0; 671 PetscErrorCode ierr; 672 PetscInt m = b->AIJ->m,n,i,*idx; 673 674 PetscFunctionBegin; 675 ierr = VecSet(&zero,yy);CHKERRQ(ierr); 676 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 677 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 678 679 for (i=0; i<m; i++) { 680 idx = a->j + a->i[i] ; 681 v = a->a + a->i[i] ; 682 n = a->i[i+1] - a->i[i]; 683 alpha1 = x[5*i]; 684 alpha2 = x[5*i+1]; 685 alpha3 = x[5*i+2]; 686 alpha4 = x[5*i+3]; 687 alpha5 = x[5*i+4]; 688 while (n-->0) { 689 y[5*(*idx)] += alpha1*(*v); 690 y[5*(*idx)+1] += alpha2*(*v); 691 y[5*(*idx)+2] += alpha3*(*v); 692 y[5*(*idx)+3] += alpha4*(*v); 693 y[5*(*idx)+4] += alpha5*(*v); 694 idx++; v++; 695 } 696 } 697 PetscLogFlops(10*a->nz - 5*b->AIJ->n); 698 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 699 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 700 PetscFunctionReturn(0); 701 } 702 703 #undef __FUNCT__ 704 #define __FUNCT__ "MatMultAdd_SeqMAIJ_5" 705 PetscErrorCode MatMultAdd_SeqMAIJ_5(Mat A,Vec xx,Vec yy,Vec zz) 706 { 707 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 708 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 709 PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4, sum5; 710 PetscErrorCode ierr; 711 PetscInt m = b->AIJ->m,*idx,*ii; 712 PetscInt n,i,jrow,j; 713 714 PetscFunctionBegin; 715 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 716 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 717 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 718 idx = a->j; 719 v = a->a; 720 ii = a->i; 721 722 for (i=0; i<m; i++) { 723 jrow = ii[i]; 724 n = ii[i+1] - jrow; 725 sum1 = 0.0; 726 sum2 = 0.0; 727 sum3 = 0.0; 728 sum4 = 0.0; 729 sum5 = 0.0; 730 for (j=0; j<n; j++) { 731 sum1 += v[jrow]*x[5*idx[jrow]]; 732 sum2 += v[jrow]*x[5*idx[jrow]+1]; 733 sum3 += v[jrow]*x[5*idx[jrow]+2]; 734 sum4 += v[jrow]*x[5*idx[jrow]+3]; 735 sum5 += v[jrow]*x[5*idx[jrow]+4]; 736 jrow++; 737 } 738 y[5*i] += sum1; 739 y[5*i+1] += sum2; 740 y[5*i+2] += sum3; 741 y[5*i+3] += sum4; 742 y[5*i+4] += sum5; 743 } 744 745 PetscLogFlops(10*a->nz); 746 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 747 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 748 PetscFunctionReturn(0); 749 } 750 751 #undef __FUNCT__ 752 #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_5" 753 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_5(Mat A,Vec xx,Vec yy,Vec zz) 754 { 755 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 756 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 757 PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5; 758 PetscErrorCode ierr; 759 PetscInt m = b->AIJ->m,n,i,*idx; 760 761 PetscFunctionBegin; 762 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 763 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 764 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 765 766 for (i=0; i<m; i++) { 767 idx = a->j + a->i[i] ; 768 v = a->a + a->i[i] ; 769 n = a->i[i+1] - a->i[i]; 770 alpha1 = x[5*i]; 771 alpha2 = x[5*i+1]; 772 alpha3 = x[5*i+2]; 773 alpha4 = x[5*i+3]; 774 alpha5 = x[5*i+4]; 775 while (n-->0) { 776 y[5*(*idx)] += alpha1*(*v); 777 y[5*(*idx)+1] += alpha2*(*v); 778 y[5*(*idx)+2] += alpha3*(*v); 779 y[5*(*idx)+3] += alpha4*(*v); 780 y[5*(*idx)+4] += alpha5*(*v); 781 idx++; v++; 782 } 783 } 784 PetscLogFlops(10*a->nz); 785 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 786 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 787 PetscFunctionReturn(0); 788 } 789 790 /* ------------------------------------------------------------------------------*/ 791 #undef __FUNCT__ 792 #define __FUNCT__ "MatMult_SeqMAIJ_6" 793 PetscErrorCode MatMult_SeqMAIJ_6(Mat A,Vec xx,Vec yy) 794 { 795 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 796 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 797 PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6; 798 PetscErrorCode ierr; 799 PetscInt m = b->AIJ->m,*idx,*ii; 800 PetscInt n,i,jrow,j; 801 802 PetscFunctionBegin; 803 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 804 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 805 idx = a->j; 806 v = a->a; 807 ii = a->i; 808 809 for (i=0; i<m; i++) { 810 jrow = ii[i]; 811 n = ii[i+1] - jrow; 812 sum1 = 0.0; 813 sum2 = 0.0; 814 sum3 = 0.0; 815 sum4 = 0.0; 816 sum5 = 0.0; 817 sum6 = 0.0; 818 for (j=0; j<n; j++) { 819 sum1 += v[jrow]*x[6*idx[jrow]]; 820 sum2 += v[jrow]*x[6*idx[jrow]+1]; 821 sum3 += v[jrow]*x[6*idx[jrow]+2]; 822 sum4 += v[jrow]*x[6*idx[jrow]+3]; 823 sum5 += v[jrow]*x[6*idx[jrow]+4]; 824 sum6 += v[jrow]*x[6*idx[jrow]+5]; 825 jrow++; 826 } 827 y[6*i] = sum1; 828 y[6*i+1] = sum2; 829 y[6*i+2] = sum3; 830 y[6*i+3] = sum4; 831 y[6*i+4] = sum5; 832 y[6*i+5] = sum6; 833 } 834 835 PetscLogFlops(12*a->nz - 6*m); 836 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 837 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 838 PetscFunctionReturn(0); 839 } 840 841 #undef __FUNCT__ 842 #define __FUNCT__ "MatMultTranspose_SeqMAIJ_6" 843 PetscErrorCode MatMultTranspose_SeqMAIJ_6(Mat A,Vec xx,Vec yy) 844 { 845 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 846 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 847 PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,zero = 0.0; 848 PetscErrorCode ierr; 849 PetscInt m = b->AIJ->m,n,i,*idx; 850 851 PetscFunctionBegin; 852 ierr = VecSet(&zero,yy);CHKERRQ(ierr); 853 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 854 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 855 856 for (i=0; i<m; i++) { 857 idx = a->j + a->i[i] ; 858 v = a->a + a->i[i] ; 859 n = a->i[i+1] - a->i[i]; 860 alpha1 = x[6*i]; 861 alpha2 = x[6*i+1]; 862 alpha3 = x[6*i+2]; 863 alpha4 = x[6*i+3]; 864 alpha5 = x[6*i+4]; 865 alpha6 = x[6*i+5]; 866 while (n-->0) { 867 y[6*(*idx)] += alpha1*(*v); 868 y[6*(*idx)+1] += alpha2*(*v); 869 y[6*(*idx)+2] += alpha3*(*v); 870 y[6*(*idx)+3] += alpha4*(*v); 871 y[6*(*idx)+4] += alpha5*(*v); 872 y[6*(*idx)+5] += alpha6*(*v); 873 idx++; v++; 874 } 875 } 876 PetscLogFlops(12*a->nz - 6*b->AIJ->n); 877 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 878 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 879 PetscFunctionReturn(0); 880 } 881 882 #undef __FUNCT__ 883 #define __FUNCT__ "MatMultAdd_SeqMAIJ_6" 884 PetscErrorCode MatMultAdd_SeqMAIJ_6(Mat A,Vec xx,Vec yy,Vec zz) 885 { 886 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 887 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 888 PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6; 889 PetscErrorCode ierr; 890 PetscInt m = b->AIJ->m,*idx,*ii; 891 PetscInt n,i,jrow,j; 892 893 PetscFunctionBegin; 894 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 895 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 896 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 897 idx = a->j; 898 v = a->a; 899 ii = a->i; 900 901 for (i=0; i<m; i++) { 902 jrow = ii[i]; 903 n = ii[i+1] - jrow; 904 sum1 = 0.0; 905 sum2 = 0.0; 906 sum3 = 0.0; 907 sum4 = 0.0; 908 sum5 = 0.0; 909 sum6 = 0.0; 910 for (j=0; j<n; j++) { 911 sum1 += v[jrow]*x[6*idx[jrow]]; 912 sum2 += v[jrow]*x[6*idx[jrow]+1]; 913 sum3 += v[jrow]*x[6*idx[jrow]+2]; 914 sum4 += v[jrow]*x[6*idx[jrow]+3]; 915 sum5 += v[jrow]*x[6*idx[jrow]+4]; 916 sum6 += v[jrow]*x[6*idx[jrow]+5]; 917 jrow++; 918 } 919 y[6*i] += sum1; 920 y[6*i+1] += sum2; 921 y[6*i+2] += sum3; 922 y[6*i+3] += sum4; 923 y[6*i+4] += sum5; 924 y[6*i+5] += sum6; 925 } 926 927 PetscLogFlops(12*a->nz); 928 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 929 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 930 PetscFunctionReturn(0); 931 } 932 933 #undef __FUNCT__ 934 #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_6" 935 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_6(Mat A,Vec xx,Vec yy,Vec zz) 936 { 937 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 938 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 939 PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6; 940 PetscErrorCode ierr; 941 PetscInt m = b->AIJ->m,n,i,*idx; 942 943 PetscFunctionBegin; 944 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 945 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 946 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 947 948 for (i=0; i<m; i++) { 949 idx = a->j + a->i[i] ; 950 v = a->a + a->i[i] ; 951 n = a->i[i+1] - a->i[i]; 952 alpha1 = x[6*i]; 953 alpha2 = x[6*i+1]; 954 alpha3 = x[6*i+2]; 955 alpha4 = x[6*i+3]; 956 alpha5 = x[6*i+4]; 957 alpha6 = x[6*i+5]; 958 while (n-->0) { 959 y[6*(*idx)] += alpha1*(*v); 960 y[6*(*idx)+1] += alpha2*(*v); 961 y[6*(*idx)+2] += alpha3*(*v); 962 y[6*(*idx)+3] += alpha4*(*v); 963 y[6*(*idx)+4] += alpha5*(*v); 964 y[6*(*idx)+5] += alpha6*(*v); 965 idx++; v++; 966 } 967 } 968 PetscLogFlops(12*a->nz); 969 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 970 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 971 PetscFunctionReturn(0); 972 } 973 974 /* ------------------------------------------------------------------------------*/ 975 #undef __FUNCT__ 976 #define __FUNCT__ "MatMult_SeqMAIJ_8" 977 PetscErrorCode MatMult_SeqMAIJ_8(Mat A,Vec xx,Vec yy) 978 { 979 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 980 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 981 PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8; 982 PetscErrorCode ierr; 983 PetscInt m = b->AIJ->m,*idx,*ii; 984 PetscInt n,i,jrow,j; 985 986 PetscFunctionBegin; 987 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 988 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 989 idx = a->j; 990 v = a->a; 991 ii = a->i; 992 993 for (i=0; i<m; i++) { 994 jrow = ii[i]; 995 n = ii[i+1] - jrow; 996 sum1 = 0.0; 997 sum2 = 0.0; 998 sum3 = 0.0; 999 sum4 = 0.0; 1000 sum5 = 0.0; 1001 sum6 = 0.0; 1002 sum7 = 0.0; 1003 sum8 = 0.0; 1004 for (j=0; j<n; j++) { 1005 sum1 += v[jrow]*x[8*idx[jrow]]; 1006 sum2 += v[jrow]*x[8*idx[jrow]+1]; 1007 sum3 += v[jrow]*x[8*idx[jrow]+2]; 1008 sum4 += v[jrow]*x[8*idx[jrow]+3]; 1009 sum5 += v[jrow]*x[8*idx[jrow]+4]; 1010 sum6 += v[jrow]*x[8*idx[jrow]+5]; 1011 sum7 += v[jrow]*x[8*idx[jrow]+6]; 1012 sum8 += v[jrow]*x[8*idx[jrow]+7]; 1013 jrow++; 1014 } 1015 y[8*i] = sum1; 1016 y[8*i+1] = sum2; 1017 y[8*i+2] = sum3; 1018 y[8*i+3] = sum4; 1019 y[8*i+4] = sum5; 1020 y[8*i+5] = sum6; 1021 y[8*i+6] = sum7; 1022 y[8*i+7] = sum8; 1023 } 1024 1025 PetscLogFlops(16*a->nz - 8*m); 1026 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1027 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 1028 PetscFunctionReturn(0); 1029 } 1030 1031 #undef __FUNCT__ 1032 #define __FUNCT__ "MatMultTranspose_SeqMAIJ_8" 1033 PetscErrorCode MatMultTranspose_SeqMAIJ_8(Mat A,Vec xx,Vec yy) 1034 { 1035 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1036 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1037 PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,zero = 0.0; 1038 PetscErrorCode ierr; 1039 PetscInt m = b->AIJ->m,n,i,*idx; 1040 1041 PetscFunctionBegin; 1042 ierr = VecSet(&zero,yy);CHKERRQ(ierr); 1043 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1044 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 1045 1046 for (i=0; i<m; i++) { 1047 idx = a->j + a->i[i] ; 1048 v = a->a + a->i[i] ; 1049 n = a->i[i+1] - a->i[i]; 1050 alpha1 = x[8*i]; 1051 alpha2 = x[8*i+1]; 1052 alpha3 = x[8*i+2]; 1053 alpha4 = x[8*i+3]; 1054 alpha5 = x[8*i+4]; 1055 alpha6 = x[8*i+5]; 1056 alpha7 = x[8*i+6]; 1057 alpha8 = x[8*i+7]; 1058 while (n-->0) { 1059 y[8*(*idx)] += alpha1*(*v); 1060 y[8*(*idx)+1] += alpha2*(*v); 1061 y[8*(*idx)+2] += alpha3*(*v); 1062 y[8*(*idx)+3] += alpha4*(*v); 1063 y[8*(*idx)+4] += alpha5*(*v); 1064 y[8*(*idx)+5] += alpha6*(*v); 1065 y[8*(*idx)+6] += alpha7*(*v); 1066 y[8*(*idx)+7] += alpha8*(*v); 1067 idx++; v++; 1068 } 1069 } 1070 PetscLogFlops(16*a->nz - 8*b->AIJ->n); 1071 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1072 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 1073 PetscFunctionReturn(0); 1074 } 1075 1076 #undef __FUNCT__ 1077 #define __FUNCT__ "MatMultAdd_SeqMAIJ_8" 1078 PetscErrorCode MatMultAdd_SeqMAIJ_8(Mat A,Vec xx,Vec yy,Vec zz) 1079 { 1080 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1081 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1082 PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8; 1083 PetscErrorCode ierr; 1084 PetscInt m = b->AIJ->m,*idx,*ii; 1085 PetscInt n,i,jrow,j; 1086 1087 PetscFunctionBegin; 1088 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 1089 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1090 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 1091 idx = a->j; 1092 v = a->a; 1093 ii = a->i; 1094 1095 for (i=0; i<m; i++) { 1096 jrow = ii[i]; 1097 n = ii[i+1] - jrow; 1098 sum1 = 0.0; 1099 sum2 = 0.0; 1100 sum3 = 0.0; 1101 sum4 = 0.0; 1102 sum5 = 0.0; 1103 sum6 = 0.0; 1104 sum7 = 0.0; 1105 sum8 = 0.0; 1106 for (j=0; j<n; j++) { 1107 sum1 += v[jrow]*x[8*idx[jrow]]; 1108 sum2 += v[jrow]*x[8*idx[jrow]+1]; 1109 sum3 += v[jrow]*x[8*idx[jrow]+2]; 1110 sum4 += v[jrow]*x[8*idx[jrow]+3]; 1111 sum5 += v[jrow]*x[8*idx[jrow]+4]; 1112 sum6 += v[jrow]*x[8*idx[jrow]+5]; 1113 sum7 += v[jrow]*x[8*idx[jrow]+6]; 1114 sum8 += v[jrow]*x[8*idx[jrow]+7]; 1115 jrow++; 1116 } 1117 y[8*i] += sum1; 1118 y[8*i+1] += sum2; 1119 y[8*i+2] += sum3; 1120 y[8*i+3] += sum4; 1121 y[8*i+4] += sum5; 1122 y[8*i+5] += sum6; 1123 y[8*i+6] += sum7; 1124 y[8*i+7] += sum8; 1125 } 1126 1127 PetscLogFlops(16*a->nz); 1128 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1129 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 1130 PetscFunctionReturn(0); 1131 } 1132 1133 #undef __FUNCT__ 1134 #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_8" 1135 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_8(Mat A,Vec xx,Vec yy,Vec zz) 1136 { 1137 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1138 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1139 PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8; 1140 PetscErrorCode ierr; 1141 PetscInt m = b->AIJ->m,n,i,*idx; 1142 1143 PetscFunctionBegin; 1144 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 1145 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1146 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 1147 for (i=0; i<m; i++) { 1148 idx = a->j + a->i[i] ; 1149 v = a->a + a->i[i] ; 1150 n = a->i[i+1] - a->i[i]; 1151 alpha1 = x[8*i]; 1152 alpha2 = x[8*i+1]; 1153 alpha3 = x[8*i+2]; 1154 alpha4 = x[8*i+3]; 1155 alpha5 = x[8*i+4]; 1156 alpha6 = x[8*i+5]; 1157 alpha7 = x[8*i+6]; 1158 alpha8 = x[8*i+7]; 1159 while (n-->0) { 1160 y[8*(*idx)] += alpha1*(*v); 1161 y[8*(*idx)+1] += alpha2*(*v); 1162 y[8*(*idx)+2] += alpha3*(*v); 1163 y[8*(*idx)+3] += alpha4*(*v); 1164 y[8*(*idx)+4] += alpha5*(*v); 1165 y[8*(*idx)+5] += alpha6*(*v); 1166 y[8*(*idx)+6] += alpha7*(*v); 1167 y[8*(*idx)+7] += alpha8*(*v); 1168 idx++; v++; 1169 } 1170 } 1171 PetscLogFlops(16*a->nz); 1172 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1173 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 1174 PetscFunctionReturn(0); 1175 } 1176 1177 /* ------------------------------------------------------------------------------*/ 1178 #undef __FUNCT__ 1179 #define __FUNCT__ "MatMult_SeqMAIJ_9" 1180 PetscErrorCode MatMult_SeqMAIJ_9(Mat A,Vec xx,Vec yy) 1181 { 1182 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1183 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1184 PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9; 1185 PetscErrorCode ierr; 1186 PetscInt m = b->AIJ->m,*idx,*ii; 1187 PetscInt n,i,jrow,j; 1188 1189 PetscFunctionBegin; 1190 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1191 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 1192 idx = a->j; 1193 v = a->a; 1194 ii = a->i; 1195 1196 for (i=0; i<m; i++) { 1197 jrow = ii[i]; 1198 n = ii[i+1] - jrow; 1199 sum1 = 0.0; 1200 sum2 = 0.0; 1201 sum3 = 0.0; 1202 sum4 = 0.0; 1203 sum5 = 0.0; 1204 sum6 = 0.0; 1205 sum7 = 0.0; 1206 sum8 = 0.0; 1207 sum9 = 0.0; 1208 for (j=0; j<n; j++) { 1209 sum1 += v[jrow]*x[9*idx[jrow]]; 1210 sum2 += v[jrow]*x[9*idx[jrow]+1]; 1211 sum3 += v[jrow]*x[9*idx[jrow]+2]; 1212 sum4 += v[jrow]*x[9*idx[jrow]+3]; 1213 sum5 += v[jrow]*x[9*idx[jrow]+4]; 1214 sum6 += v[jrow]*x[9*idx[jrow]+5]; 1215 sum7 += v[jrow]*x[9*idx[jrow]+6]; 1216 sum8 += v[jrow]*x[9*idx[jrow]+7]; 1217 sum9 += v[jrow]*x[9*idx[jrow]+8]; 1218 jrow++; 1219 } 1220 y[9*i] = sum1; 1221 y[9*i+1] = sum2; 1222 y[9*i+2] = sum3; 1223 y[9*i+3] = sum4; 1224 y[9*i+4] = sum5; 1225 y[9*i+5] = sum6; 1226 y[9*i+6] = sum7; 1227 y[9*i+7] = sum8; 1228 y[9*i+8] = sum9; 1229 } 1230 1231 PetscLogFlops(18*a->nz - 9*m); 1232 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1233 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 1234 PetscFunctionReturn(0); 1235 } 1236 1237 #undef __FUNCT__ 1238 #define __FUNCT__ "MatMultTranspose_SeqMAIJ_9" 1239 PetscErrorCode MatMultTranspose_SeqMAIJ_9(Mat A,Vec xx,Vec yy) 1240 { 1241 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1242 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1243 PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9,zero = 0.0; 1244 PetscErrorCode ierr; 1245 PetscInt m = b->AIJ->m,n,i,*idx; 1246 1247 PetscFunctionBegin; 1248 ierr = VecSet(&zero,yy);CHKERRQ(ierr); 1249 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1250 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 1251 1252 for (i=0; i<m; i++) { 1253 idx = a->j + a->i[i] ; 1254 v = a->a + a->i[i] ; 1255 n = a->i[i+1] - a->i[i]; 1256 alpha1 = x[9*i]; 1257 alpha2 = x[9*i+1]; 1258 alpha3 = x[9*i+2]; 1259 alpha4 = x[9*i+3]; 1260 alpha5 = x[9*i+4]; 1261 alpha6 = x[9*i+5]; 1262 alpha7 = x[9*i+6]; 1263 alpha8 = x[9*i+7]; 1264 alpha9 = x[9*i+8]; 1265 while (n-->0) { 1266 y[9*(*idx)] += alpha1*(*v); 1267 y[9*(*idx)+1] += alpha2*(*v); 1268 y[9*(*idx)+2] += alpha3*(*v); 1269 y[9*(*idx)+3] += alpha4*(*v); 1270 y[9*(*idx)+4] += alpha5*(*v); 1271 y[9*(*idx)+5] += alpha6*(*v); 1272 y[9*(*idx)+6] += alpha7*(*v); 1273 y[9*(*idx)+7] += alpha8*(*v); 1274 y[9*(*idx)+8] += alpha9*(*v); 1275 idx++; v++; 1276 } 1277 } 1278 PetscLogFlops(18*a->nz - 9*b->AIJ->n); 1279 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1280 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 1281 PetscFunctionReturn(0); 1282 } 1283 1284 #undef __FUNCT__ 1285 #define __FUNCT__ "MatMultAdd_SeqMAIJ_9" 1286 PetscErrorCode MatMultAdd_SeqMAIJ_9(Mat A,Vec xx,Vec yy,Vec zz) 1287 { 1288 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1289 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1290 PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9; 1291 PetscErrorCode ierr; 1292 PetscInt m = b->AIJ->m,*idx,*ii; 1293 PetscInt n,i,jrow,j; 1294 1295 PetscFunctionBegin; 1296 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 1297 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1298 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 1299 idx = a->j; 1300 v = a->a; 1301 ii = a->i; 1302 1303 for (i=0; i<m; i++) { 1304 jrow = ii[i]; 1305 n = ii[i+1] - jrow; 1306 sum1 = 0.0; 1307 sum2 = 0.0; 1308 sum3 = 0.0; 1309 sum4 = 0.0; 1310 sum5 = 0.0; 1311 sum6 = 0.0; 1312 sum7 = 0.0; 1313 sum8 = 0.0; 1314 sum9 = 0.0; 1315 for (j=0; j<n; j++) { 1316 sum1 += v[jrow]*x[9*idx[jrow]]; 1317 sum2 += v[jrow]*x[9*idx[jrow]+1]; 1318 sum3 += v[jrow]*x[9*idx[jrow]+2]; 1319 sum4 += v[jrow]*x[9*idx[jrow]+3]; 1320 sum5 += v[jrow]*x[9*idx[jrow]+4]; 1321 sum6 += v[jrow]*x[9*idx[jrow]+5]; 1322 sum7 += v[jrow]*x[9*idx[jrow]+6]; 1323 sum8 += v[jrow]*x[9*idx[jrow]+7]; 1324 sum9 += v[jrow]*x[9*idx[jrow]+8]; 1325 jrow++; 1326 } 1327 y[9*i] += sum1; 1328 y[9*i+1] += sum2; 1329 y[9*i+2] += sum3; 1330 y[9*i+3] += sum4; 1331 y[9*i+4] += sum5; 1332 y[9*i+5] += sum6; 1333 y[9*i+6] += sum7; 1334 y[9*i+7] += sum8; 1335 y[9*i+8] += sum9; 1336 } 1337 1338 PetscLogFlops(18*a->nz); 1339 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1340 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 1341 PetscFunctionReturn(0); 1342 } 1343 1344 #undef __FUNCT__ 1345 #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_9" 1346 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_9(Mat A,Vec xx,Vec yy,Vec zz) 1347 { 1348 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1349 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1350 PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9; 1351 PetscErrorCode ierr; 1352 PetscInt m = b->AIJ->m,n,i,*idx; 1353 1354 PetscFunctionBegin; 1355 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 1356 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1357 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 1358 for (i=0; i<m; i++) { 1359 idx = a->j + a->i[i] ; 1360 v = a->a + a->i[i] ; 1361 n = a->i[i+1] - a->i[i]; 1362 alpha1 = x[9*i]; 1363 alpha2 = x[9*i+1]; 1364 alpha3 = x[9*i+2]; 1365 alpha4 = x[9*i+3]; 1366 alpha5 = x[9*i+4]; 1367 alpha6 = x[9*i+5]; 1368 alpha7 = x[9*i+6]; 1369 alpha8 = x[9*i+7]; 1370 alpha9 = x[9*i+8]; 1371 while (n-->0) { 1372 y[9*(*idx)] += alpha1*(*v); 1373 y[9*(*idx)+1] += alpha2*(*v); 1374 y[9*(*idx)+2] += alpha3*(*v); 1375 y[9*(*idx)+3] += alpha4*(*v); 1376 y[9*(*idx)+4] += alpha5*(*v); 1377 y[9*(*idx)+5] += alpha6*(*v); 1378 y[9*(*idx)+6] += alpha7*(*v); 1379 y[9*(*idx)+7] += alpha8*(*v); 1380 y[9*(*idx)+8] += alpha9*(*v); 1381 idx++; v++; 1382 } 1383 } 1384 PetscLogFlops(18*a->nz); 1385 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1386 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 1387 PetscFunctionReturn(0); 1388 } 1389 1390 /*--------------------------------------------------------------------------------------------*/ 1391 #undef __FUNCT__ 1392 #define __FUNCT__ "MatMult_SeqMAIJ_16" 1393 PetscErrorCode MatMult_SeqMAIJ_16(Mat A,Vec xx,Vec yy) 1394 { 1395 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1396 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1397 PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8; 1398 PetscScalar sum9, sum10, sum11, sum12, sum13, sum14, sum15, sum16; 1399 PetscErrorCode ierr; 1400 PetscInt m = b->AIJ->m,*idx,*ii; 1401 PetscInt n,i,jrow,j; 1402 1403 PetscFunctionBegin; 1404 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1405 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 1406 idx = a->j; 1407 v = a->a; 1408 ii = a->i; 1409 1410 for (i=0; i<m; i++) { 1411 jrow = ii[i]; 1412 n = ii[i+1] - jrow; 1413 sum1 = 0.0; 1414 sum2 = 0.0; 1415 sum3 = 0.0; 1416 sum4 = 0.0; 1417 sum5 = 0.0; 1418 sum6 = 0.0; 1419 sum7 = 0.0; 1420 sum8 = 0.0; 1421 sum9 = 0.0; 1422 sum10 = 0.0; 1423 sum11 = 0.0; 1424 sum12 = 0.0; 1425 sum13 = 0.0; 1426 sum14 = 0.0; 1427 sum15 = 0.0; 1428 sum16 = 0.0; 1429 for (j=0; j<n; j++) { 1430 sum1 += v[jrow]*x[16*idx[jrow]]; 1431 sum2 += v[jrow]*x[16*idx[jrow]+1]; 1432 sum3 += v[jrow]*x[16*idx[jrow]+2]; 1433 sum4 += v[jrow]*x[16*idx[jrow]+3]; 1434 sum5 += v[jrow]*x[16*idx[jrow]+4]; 1435 sum6 += v[jrow]*x[16*idx[jrow]+5]; 1436 sum7 += v[jrow]*x[16*idx[jrow]+6]; 1437 sum8 += v[jrow]*x[16*idx[jrow]+7]; 1438 sum9 += v[jrow]*x[16*idx[jrow]+8]; 1439 sum10 += v[jrow]*x[16*idx[jrow]+9]; 1440 sum11 += v[jrow]*x[16*idx[jrow]+10]; 1441 sum12 += v[jrow]*x[16*idx[jrow]+11]; 1442 sum13 += v[jrow]*x[16*idx[jrow]+12]; 1443 sum14 += v[jrow]*x[16*idx[jrow]+13]; 1444 sum15 += v[jrow]*x[16*idx[jrow]+14]; 1445 sum16 += v[jrow]*x[16*idx[jrow]+15]; 1446 jrow++; 1447 } 1448 y[16*i] = sum1; 1449 y[16*i+1] = sum2; 1450 y[16*i+2] = sum3; 1451 y[16*i+3] = sum4; 1452 y[16*i+4] = sum5; 1453 y[16*i+5] = sum6; 1454 y[16*i+6] = sum7; 1455 y[16*i+7] = sum8; 1456 y[16*i+8] = sum9; 1457 y[16*i+9] = sum10; 1458 y[16*i+10] = sum11; 1459 y[16*i+11] = sum12; 1460 y[16*i+12] = sum13; 1461 y[16*i+13] = sum14; 1462 y[16*i+14] = sum15; 1463 y[16*i+15] = sum16; 1464 } 1465 1466 PetscLogFlops(32*a->nz - 16*m); 1467 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1468 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 1469 PetscFunctionReturn(0); 1470 } 1471 1472 #undef __FUNCT__ 1473 #define __FUNCT__ "MatMultTranspose_SeqMAIJ_16" 1474 PetscErrorCode MatMultTranspose_SeqMAIJ_16(Mat A,Vec xx,Vec yy) 1475 { 1476 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1477 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1478 PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,zero = 0.0; 1479 PetscScalar alpha9,alpha10,alpha11,alpha12,alpha13,alpha14,alpha15,alpha16; 1480 PetscErrorCode ierr; 1481 PetscInt m = b->AIJ->m,n,i,*idx; 1482 1483 PetscFunctionBegin; 1484 ierr = VecSet(&zero,yy);CHKERRQ(ierr); 1485 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1486 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 1487 1488 for (i=0; i<m; i++) { 1489 idx = a->j + a->i[i] ; 1490 v = a->a + a->i[i] ; 1491 n = a->i[i+1] - a->i[i]; 1492 alpha1 = x[16*i]; 1493 alpha2 = x[16*i+1]; 1494 alpha3 = x[16*i+2]; 1495 alpha4 = x[16*i+3]; 1496 alpha5 = x[16*i+4]; 1497 alpha6 = x[16*i+5]; 1498 alpha7 = x[16*i+6]; 1499 alpha8 = x[16*i+7]; 1500 alpha9 = x[16*i+8]; 1501 alpha10 = x[16*i+9]; 1502 alpha11 = x[16*i+10]; 1503 alpha12 = x[16*i+11]; 1504 alpha13 = x[16*i+12]; 1505 alpha14 = x[16*i+13]; 1506 alpha15 = x[16*i+14]; 1507 alpha16 = x[16*i+15]; 1508 while (n-->0) { 1509 y[16*(*idx)] += alpha1*(*v); 1510 y[16*(*idx)+1] += alpha2*(*v); 1511 y[16*(*idx)+2] += alpha3*(*v); 1512 y[16*(*idx)+3] += alpha4*(*v); 1513 y[16*(*idx)+4] += alpha5*(*v); 1514 y[16*(*idx)+5] += alpha6*(*v); 1515 y[16*(*idx)+6] += alpha7*(*v); 1516 y[16*(*idx)+7] += alpha8*(*v); 1517 y[16*(*idx)+8] += alpha9*(*v); 1518 y[16*(*idx)+9] += alpha10*(*v); 1519 y[16*(*idx)+10] += alpha11*(*v); 1520 y[16*(*idx)+11] += alpha12*(*v); 1521 y[16*(*idx)+12] += alpha13*(*v); 1522 y[16*(*idx)+13] += alpha14*(*v); 1523 y[16*(*idx)+14] += alpha15*(*v); 1524 y[16*(*idx)+15] += alpha16*(*v); 1525 idx++; v++; 1526 } 1527 } 1528 PetscLogFlops(32*a->nz - 16*b->AIJ->n); 1529 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1530 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 1531 PetscFunctionReturn(0); 1532 } 1533 1534 #undef __FUNCT__ 1535 #define __FUNCT__ "MatMultAdd_SeqMAIJ_16" 1536 PetscErrorCode MatMultAdd_SeqMAIJ_16(Mat A,Vec xx,Vec yy,Vec zz) 1537 { 1538 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1539 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1540 PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8; 1541 PetscScalar sum9, sum10, sum11, sum12, sum13, sum14, sum15, sum16; 1542 PetscErrorCode ierr; 1543 PetscInt m = b->AIJ->m,*idx,*ii; 1544 PetscInt n,i,jrow,j; 1545 1546 PetscFunctionBegin; 1547 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 1548 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1549 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 1550 idx = a->j; 1551 v = a->a; 1552 ii = a->i; 1553 1554 for (i=0; i<m; i++) { 1555 jrow = ii[i]; 1556 n = ii[i+1] - jrow; 1557 sum1 = 0.0; 1558 sum2 = 0.0; 1559 sum3 = 0.0; 1560 sum4 = 0.0; 1561 sum5 = 0.0; 1562 sum6 = 0.0; 1563 sum7 = 0.0; 1564 sum8 = 0.0; 1565 sum9 = 0.0; 1566 sum10 = 0.0; 1567 sum11 = 0.0; 1568 sum12 = 0.0; 1569 sum13 = 0.0; 1570 sum14 = 0.0; 1571 sum15 = 0.0; 1572 sum16 = 0.0; 1573 for (j=0; j<n; j++) { 1574 sum1 += v[jrow]*x[16*idx[jrow]]; 1575 sum2 += v[jrow]*x[16*idx[jrow]+1]; 1576 sum3 += v[jrow]*x[16*idx[jrow]+2]; 1577 sum4 += v[jrow]*x[16*idx[jrow]+3]; 1578 sum5 += v[jrow]*x[16*idx[jrow]+4]; 1579 sum6 += v[jrow]*x[16*idx[jrow]+5]; 1580 sum7 += v[jrow]*x[16*idx[jrow]+6]; 1581 sum8 += v[jrow]*x[16*idx[jrow]+7]; 1582 sum9 += v[jrow]*x[16*idx[jrow]+8]; 1583 sum10 += v[jrow]*x[16*idx[jrow]+9]; 1584 sum11 += v[jrow]*x[16*idx[jrow]+10]; 1585 sum12 += v[jrow]*x[16*idx[jrow]+11]; 1586 sum13 += v[jrow]*x[16*idx[jrow]+12]; 1587 sum14 += v[jrow]*x[16*idx[jrow]+13]; 1588 sum15 += v[jrow]*x[16*idx[jrow]+14]; 1589 sum16 += v[jrow]*x[16*idx[jrow]+15]; 1590 jrow++; 1591 } 1592 y[16*i] += sum1; 1593 y[16*i+1] += sum2; 1594 y[16*i+2] += sum3; 1595 y[16*i+3] += sum4; 1596 y[16*i+4] += sum5; 1597 y[16*i+5] += sum6; 1598 y[16*i+6] += sum7; 1599 y[16*i+7] += sum8; 1600 y[16*i+8] += sum9; 1601 y[16*i+9] += sum10; 1602 y[16*i+10] += sum11; 1603 y[16*i+11] += sum12; 1604 y[16*i+12] += sum13; 1605 y[16*i+13] += sum14; 1606 y[16*i+14] += sum15; 1607 y[16*i+15] += sum16; 1608 } 1609 1610 PetscLogFlops(32*a->nz); 1611 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1612 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 1613 PetscFunctionReturn(0); 1614 } 1615 1616 #undef __FUNCT__ 1617 #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_16" 1618 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_16(Mat A,Vec xx,Vec yy,Vec zz) 1619 { 1620 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1621 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1622 PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8; 1623 PetscScalar alpha9,alpha10,alpha11,alpha12,alpha13,alpha14,alpha15,alpha16; 1624 PetscErrorCode ierr; 1625 PetscInt m = b->AIJ->m,n,i,*idx; 1626 1627 PetscFunctionBegin; 1628 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 1629 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1630 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 1631 for (i=0; i<m; i++) { 1632 idx = a->j + a->i[i] ; 1633 v = a->a + a->i[i] ; 1634 n = a->i[i+1] - a->i[i]; 1635 alpha1 = x[16*i]; 1636 alpha2 = x[16*i+1]; 1637 alpha3 = x[16*i+2]; 1638 alpha4 = x[16*i+3]; 1639 alpha5 = x[16*i+4]; 1640 alpha6 = x[16*i+5]; 1641 alpha7 = x[16*i+6]; 1642 alpha8 = x[16*i+7]; 1643 alpha9 = x[16*i+8]; 1644 alpha10 = x[16*i+9]; 1645 alpha11 = x[16*i+10]; 1646 alpha12 = x[16*i+11]; 1647 alpha13 = x[16*i+12]; 1648 alpha14 = x[16*i+13]; 1649 alpha15 = x[16*i+14]; 1650 alpha16 = x[16*i+15]; 1651 while (n-->0) { 1652 y[16*(*idx)] += alpha1*(*v); 1653 y[16*(*idx)+1] += alpha2*(*v); 1654 y[16*(*idx)+2] += alpha3*(*v); 1655 y[16*(*idx)+3] += alpha4*(*v); 1656 y[16*(*idx)+4] += alpha5*(*v); 1657 y[16*(*idx)+5] += alpha6*(*v); 1658 y[16*(*idx)+6] += alpha7*(*v); 1659 y[16*(*idx)+7] += alpha8*(*v); 1660 y[16*(*idx)+8] += alpha9*(*v); 1661 y[16*(*idx)+9] += alpha10*(*v); 1662 y[16*(*idx)+10] += alpha11*(*v); 1663 y[16*(*idx)+11] += alpha12*(*v); 1664 y[16*(*idx)+12] += alpha13*(*v); 1665 y[16*(*idx)+13] += alpha14*(*v); 1666 y[16*(*idx)+14] += alpha15*(*v); 1667 y[16*(*idx)+15] += alpha16*(*v); 1668 idx++; v++; 1669 } 1670 } 1671 PetscLogFlops(32*a->nz); 1672 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1673 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 1674 PetscFunctionReturn(0); 1675 } 1676 1677 /*===================================================================================*/ 1678 #undef __FUNCT__ 1679 #define __FUNCT__ "MatMult_MPIMAIJ_dof" 1680 PetscErrorCode MatMult_MPIMAIJ_dof(Mat A,Vec xx,Vec yy) 1681 { 1682 Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data; 1683 PetscErrorCode ierr; 1684 1685 PetscFunctionBegin; 1686 /* start the scatter */ 1687 ierr = VecScatterBegin(xx,b->w,INSERT_VALUES,SCATTER_FORWARD,b->ctx);CHKERRQ(ierr); 1688 ierr = (*b->AIJ->ops->mult)(b->AIJ,xx,yy);CHKERRQ(ierr); 1689 ierr = VecScatterEnd(xx,b->w,INSERT_VALUES,SCATTER_FORWARD,b->ctx);CHKERRQ(ierr); 1690 ierr = (*b->OAIJ->ops->multadd)(b->OAIJ,b->w,yy,yy);CHKERRQ(ierr); 1691 PetscFunctionReturn(0); 1692 } 1693 1694 #undef __FUNCT__ 1695 #define __FUNCT__ "MatMultTranspose_MPIMAIJ_dof" 1696 PetscErrorCode MatMultTranspose_MPIMAIJ_dof(Mat A,Vec xx,Vec yy) 1697 { 1698 Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data; 1699 PetscErrorCode ierr; 1700 1701 PetscFunctionBegin; 1702 ierr = (*b->OAIJ->ops->multtranspose)(b->OAIJ,xx,b->w);CHKERRQ(ierr); 1703 ierr = VecScatterBegin(b->w,yy,ADD_VALUES,SCATTER_REVERSE,b->ctx);CHKERRQ(ierr); 1704 ierr = (*b->AIJ->ops->multtranspose)(b->AIJ,xx,yy);CHKERRQ(ierr); 1705 ierr = VecScatterEnd(b->w,yy,ADD_VALUES,SCATTER_REVERSE,b->ctx);CHKERRQ(ierr); 1706 PetscFunctionReturn(0); 1707 } 1708 1709 #undef __FUNCT__ 1710 #define __FUNCT__ "MatMultAdd_MPIMAIJ_dof" 1711 PetscErrorCode MatMultAdd_MPIMAIJ_dof(Mat A,Vec xx,Vec yy,Vec zz) 1712 { 1713 Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data; 1714 PetscErrorCode ierr; 1715 1716 PetscFunctionBegin; 1717 /* start the scatter */ 1718 ierr = VecScatterBegin(xx,b->w,INSERT_VALUES,SCATTER_FORWARD,b->ctx);CHKERRQ(ierr); 1719 ierr = (*b->AIJ->ops->multadd)(b->AIJ,xx,yy,zz);CHKERRQ(ierr); 1720 ierr = VecScatterEnd(xx,b->w,INSERT_VALUES,SCATTER_FORWARD,b->ctx);CHKERRQ(ierr); 1721 ierr = (*b->OAIJ->ops->multadd)(b->OAIJ,b->w,zz,zz);CHKERRQ(ierr); 1722 PetscFunctionReturn(0); 1723 } 1724 1725 #undef __FUNCT__ 1726 #define __FUNCT__ "MatMultTransposeAdd_MPIMAIJ_dof" 1727 PetscErrorCode MatMultTransposeAdd_MPIMAIJ_dof(Mat A,Vec xx,Vec yy,Vec zz) 1728 { 1729 Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data; 1730 PetscErrorCode ierr; 1731 1732 PetscFunctionBegin; 1733 ierr = (*b->OAIJ->ops->multtranspose)(b->OAIJ,xx,b->w);CHKERRQ(ierr); 1734 ierr = VecScatterBegin(b->w,zz,ADD_VALUES,SCATTER_REVERSE,b->ctx);CHKERRQ(ierr); 1735 ierr = (*b->AIJ->ops->multtransposeadd)(b->AIJ,xx,yy,zz);CHKERRQ(ierr); 1736 ierr = VecScatterEnd(b->w,zz,ADD_VALUES,SCATTER_REVERSE,b->ctx);CHKERRQ(ierr); 1737 PetscFunctionReturn(0); 1738 } 1739 1740 #undef __FUNCT__ 1741 #define __FUNCT__ "MatConvert_SeqMAIJ_SeqAIJ" 1742 PetscErrorCode MatConvert_SeqMAIJ_SeqAIJ(Mat A,const MatType newtype,Mat *B) 1743 { 1744 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1745 Mat a = b->AIJ; 1746 Mat_SeqAIJ *aij = (Mat_SeqAIJ*)a->data; 1747 PetscErrorCode ierr; 1748 PetscInt m,n,i,ncols,*ilen,nmax = 0,*icols,j,k,ii,dof = b->dof; 1749 PetscInt *cols; 1750 PetscScalar *vals; 1751 1752 PetscFunctionBegin; 1753 ierr = MatGetSize(a,&m,&n);CHKERRQ(ierr); 1754 ierr = PetscMalloc(dof*m*sizeof(int),&ilen);CHKERRQ(ierr); 1755 for (i=0; i<m; i++) { 1756 nmax = PetscMax(nmax,aij->ilen[i]); 1757 for (j=0; j<dof; j++) { 1758 ilen[dof*i+j] = aij->ilen[i]; 1759 } 1760 } 1761 ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,dof*m,dof*n,0,ilen,B);CHKERRQ(ierr); 1762 ierr = MatSetOption(*B,MAT_COLUMNS_SORTED);CHKERRQ(ierr); 1763 ierr = PetscFree(ilen);CHKERRQ(ierr); 1764 ierr = PetscMalloc(nmax*sizeof(PetscInt),&icols);CHKERRQ(ierr); 1765 ii = 0; 1766 for (i=0; i<m; i++) { 1767 ierr = MatGetRow_SeqAIJ(a,i,&ncols,&cols,&vals);CHKERRQ(ierr); 1768 for (j=0; j<dof; j++) { 1769 for (k=0; k<ncols; k++) { 1770 icols[k] = dof*cols[k]+j; 1771 } 1772 ierr = MatSetValues_SeqAIJ(*B,1,&ii,ncols,icols,vals,INSERT_VALUES);CHKERRQ(ierr); 1773 ii++; 1774 } 1775 ierr = MatRestoreRow_SeqAIJ(a,i,&ncols,&cols,&vals);CHKERRQ(ierr); 1776 } 1777 ierr = PetscFree(icols);CHKERRQ(ierr); 1778 ierr = MatAssemblyBegin(*B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1779 ierr = MatAssemblyEnd(*B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1780 PetscFunctionReturn(0); 1781 } 1782 1783 #include "src/mat/impls/aij/mpi/mpiaij.h" 1784 #undef __FUNCT__ 1785 #define __FUNCT__ "MatConvert_MPIMAIJ_MPIAIJ" 1786 PetscErrorCode MatConvert_MPIMAIJ_MPIAIJ(Mat A,const MatType newtype,Mat *B) 1787 { 1788 Mat_MPIMAIJ *maij = (Mat_MPIMAIJ*)A->data; 1789 Mat MatAIJ = ((Mat_SeqMAIJ*)maij->AIJ->data)->AIJ; 1790 Mat MatOAIJ = ((Mat_SeqMAIJ*)maij->OAIJ->data)->AIJ; 1791 Mat_SeqAIJ *AIJ = (Mat_SeqAIJ*) MatAIJ->data; 1792 Mat_SeqAIJ *OAIJ =(Mat_SeqAIJ*) MatOAIJ->data; 1793 Mat_MPIAIJ *mpiaij = (Mat_MPIAIJ*) maij->A->data; 1794 PetscInt dof = maij->dof,i,j,*dnz,*onz,nmax = 0,onmax = 0,*oicols,*icols,ncols,*cols,oncols,*ocols; 1795 PetscInt rstart,cstart,*garray,ii,k; 1796 PetscErrorCode ierr; 1797 PetscScalar *vals,*ovals; 1798 1799 PetscFunctionBegin; 1800 ierr = PetscMalloc2(A->m,PetscInt,&dnz,A->m,PetscInt,&onz);CHKERRQ(ierr); 1801 for (i=0; i<A->m/dof; i++) { 1802 nmax = PetscMax(nmax,AIJ->ilen[i]); 1803 onmax = PetscMax(onmax,OAIJ->ilen[i]); 1804 for (j=0; j<dof; j++) { 1805 dnz[dof*i+j] = AIJ->ilen[i]; 1806 onz[dof*i+j] = OAIJ->ilen[i]; 1807 } 1808 } 1809 ierr = MatCreateMPIAIJ(A->comm,A->m,A->n,A->M,A->N,0,dnz,0,onz,B);CHKERRQ(ierr); 1810 ierr = MatSetOption(*B,MAT_COLUMNS_SORTED);CHKERRQ(ierr); 1811 ierr = PetscFree2(dnz,onz);CHKERRQ(ierr); 1812 1813 ierr = PetscMalloc2(nmax,PetscInt,&icols,onmax,PetscInt,&oicols);CHKERRQ(ierr); 1814 rstart = dof*mpiaij->rstart; 1815 cstart = dof*mpiaij->cstart; 1816 garray = mpiaij->garray; 1817 1818 ii = rstart; 1819 for (i=0; i<A->m/dof; i++) { 1820 ierr = MatGetRow_SeqAIJ(MatAIJ,i,&ncols,&cols,&vals);CHKERRQ(ierr); 1821 ierr = MatGetRow_SeqAIJ(MatOAIJ,i,&oncols,&ocols,&ovals);CHKERRQ(ierr); 1822 for (j=0; j<dof; j++) { 1823 for (k=0; k<ncols; k++) { 1824 icols[k] = cstart + dof*cols[k]+j; 1825 } 1826 for (k=0; k<oncols; k++) { 1827 oicols[k] = dof*garray[ocols[k]]+j; 1828 } 1829 ierr = MatSetValues_MPIAIJ(*B,1,&ii,ncols,icols,vals,INSERT_VALUES);CHKERRQ(ierr); 1830 ierr = MatSetValues_MPIAIJ(*B,1,&ii,oncols,oicols,ovals,INSERT_VALUES);CHKERRQ(ierr); 1831 ii++; 1832 } 1833 ierr = MatRestoreRow_SeqAIJ(MatAIJ,i,&ncols,&cols,&vals);CHKERRQ(ierr); 1834 ierr = MatRestoreRow_SeqAIJ(MatOAIJ,i,&oncols,&ocols,&ovals);CHKERRQ(ierr); 1835 } 1836 ierr = PetscFree2(icols,oicols);CHKERRQ(ierr); 1837 1838 ierr = MatAssemblyBegin(*B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1839 ierr = MatAssemblyEnd(*B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1840 PetscFunctionReturn(0); 1841 } 1842 1843 1844 1845 /* ---------------------------------------------------------------------------------- */ 1846 /*MC 1847 MatCreateMAIJ - Creates a matrix type providing restriction and interpolation 1848 operations for multicomponent problems. It interpolates each component the same 1849 way independently. The matrix type is based on MATSEQAIJ for sequential matrices, 1850 and MATMPIAIJ for distributed matrices. 1851 1852 Operations provided: 1853 + MatMult 1854 . MatMultTranspose 1855 . MatMultAdd 1856 . MatMultTransposeAdd 1857 - MatView 1858 1859 Level: advanced 1860 1861 M*/ 1862 #undef __FUNCT__ 1863 #define __FUNCT__ "MatCreateMAIJ" 1864 PetscErrorCode MatCreateMAIJ(Mat A,PetscInt dof,Mat *maij) 1865 { 1866 PetscErrorCode ierr; 1867 PetscMPIInt size; 1868 PetscInt n; 1869 Mat_MPIMAIJ *b; 1870 Mat B; 1871 1872 PetscFunctionBegin; 1873 ierr = PetscObjectReference((PetscObject)A);CHKERRQ(ierr); 1874 1875 if (dof == 1) { 1876 *maij = A; 1877 } else { 1878 ierr = MatCreate(A->comm,dof*A->m,dof*A->n,dof*A->M,dof*A->N,&B);CHKERRQ(ierr); 1879 B->assembled = PETSC_TRUE; 1880 1881 ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr); 1882 if (size == 1) { 1883 ierr = MatSetType(B,MATSEQMAIJ);CHKERRQ(ierr); 1884 B->ops->destroy = MatDestroy_SeqMAIJ; 1885 B->ops->view = MatView_SeqMAIJ; 1886 b = (Mat_MPIMAIJ*)B->data; 1887 b->dof = dof; 1888 b->AIJ = A; 1889 if (dof == 2) { 1890 B->ops->mult = MatMult_SeqMAIJ_2; 1891 B->ops->multadd = MatMultAdd_SeqMAIJ_2; 1892 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_2; 1893 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_2; 1894 } else if (dof == 3) { 1895 B->ops->mult = MatMult_SeqMAIJ_3; 1896 B->ops->multadd = MatMultAdd_SeqMAIJ_3; 1897 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_3; 1898 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_3; 1899 } else if (dof == 4) { 1900 B->ops->mult = MatMult_SeqMAIJ_4; 1901 B->ops->multadd = MatMultAdd_SeqMAIJ_4; 1902 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_4; 1903 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_4; 1904 } else if (dof == 5) { 1905 B->ops->mult = MatMult_SeqMAIJ_5; 1906 B->ops->multadd = MatMultAdd_SeqMAIJ_5; 1907 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_5; 1908 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_5; 1909 } else if (dof == 6) { 1910 B->ops->mult = MatMult_SeqMAIJ_6; 1911 B->ops->multadd = MatMultAdd_SeqMAIJ_6; 1912 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_6; 1913 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_6; 1914 } else if (dof == 8) { 1915 B->ops->mult = MatMult_SeqMAIJ_8; 1916 B->ops->multadd = MatMultAdd_SeqMAIJ_8; 1917 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_8; 1918 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_8; 1919 } else if (dof == 9) { 1920 B->ops->mult = MatMult_SeqMAIJ_9; 1921 B->ops->multadd = MatMultAdd_SeqMAIJ_9; 1922 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_9; 1923 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_9; 1924 } else if (dof == 16) { 1925 B->ops->mult = MatMult_SeqMAIJ_16; 1926 B->ops->multadd = MatMultAdd_SeqMAIJ_16; 1927 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_16; 1928 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_16; 1929 } else { 1930 SETERRQ1(PETSC_ERR_SUP,"Cannot handle a dof of %D. Send request for code to petsc-maint@mcs.anl.gov\n",dof); 1931 } 1932 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqmaij_seqaij_C","MatConvert_SeqMAIJ_SeqAIJ",MatConvert_SeqMAIJ_SeqAIJ);CHKERRQ(ierr); 1933 } else { 1934 Mat_MPIAIJ *mpiaij = (Mat_MPIAIJ *)A->data; 1935 IS from,to; 1936 Vec gvec; 1937 PetscInt *garray,i; 1938 1939 ierr = MatSetType(B,MATMPIMAIJ);CHKERRQ(ierr); 1940 B->ops->destroy = MatDestroy_MPIMAIJ; 1941 B->ops->view = MatView_MPIMAIJ; 1942 b = (Mat_MPIMAIJ*)B->data; 1943 b->dof = dof; 1944 b->A = A; 1945 ierr = MatCreateMAIJ(mpiaij->A,dof,&b->AIJ);CHKERRQ(ierr); 1946 ierr = MatCreateMAIJ(mpiaij->B,dof,&b->OAIJ);CHKERRQ(ierr); 1947 1948 ierr = VecGetSize(mpiaij->lvec,&n);CHKERRQ(ierr); 1949 ierr = VecCreateSeq(PETSC_COMM_SELF,n*dof,&b->w);CHKERRQ(ierr); 1950 ierr = VecSetBlockSize(b->w,dof);CHKERRQ(ierr); 1951 1952 /* create two temporary Index sets for build scatter gather */ 1953 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&garray);CHKERRQ(ierr); 1954 for (i=0; i<n; i++) garray[i] = dof*mpiaij->garray[i]; 1955 ierr = ISCreateBlock(A->comm,dof,n,garray,&from);CHKERRQ(ierr); 1956 ierr = PetscFree(garray);CHKERRQ(ierr); 1957 ierr = ISCreateStride(PETSC_COMM_SELF,n*dof,0,1,&to);CHKERRQ(ierr); 1958 1959 /* create temporary global vector to generate scatter context */ 1960 ierr = VecCreateMPI(A->comm,dof*A->n,dof*A->N,&gvec);CHKERRQ(ierr); 1961 ierr = VecSetBlockSize(gvec,dof);CHKERRQ(ierr); 1962 1963 /* generate the scatter context */ 1964 ierr = VecScatterCreate(gvec,from,b->w,to,&b->ctx);CHKERRQ(ierr); 1965 1966 ierr = ISDestroy(from);CHKERRQ(ierr); 1967 ierr = ISDestroy(to);CHKERRQ(ierr); 1968 ierr = VecDestroy(gvec);CHKERRQ(ierr); 1969 1970 B->ops->mult = MatMult_MPIMAIJ_dof; 1971 B->ops->multtranspose = MatMultTranspose_MPIMAIJ_dof; 1972 B->ops->multadd = MatMultAdd_MPIMAIJ_dof; 1973 B->ops->multtransposeadd = MatMultTransposeAdd_MPIMAIJ_dof; 1974 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpimaij_mpiaij_C","MatConvert_MPIMAIJ_MPIAIJ",MatConvert_MPIMAIJ_MPIAIJ);CHKERRQ(ierr); 1975 } 1976 *maij = B; 1977 ierr = MatView_Private(B);CHKERRQ(ierr); 1978 } 1979 PetscFunctionReturn(0); 1980 } 1981 1982 1983 1984 1985 1986 1987 1988 1989 1990 1991 1992 1993