1 #define PETSCMAT_DLL 2 3 /* 4 Defines the basic matrix operations for the MAIJ matrix storage format. 5 This format is used for restriction and interpolation operations for 6 multicomponent problems. It interpolates each component the same way 7 independently. 8 9 We provide: 10 MatMult() 11 MatMultTranspose() 12 MatMultTransposeAdd() 13 MatMultAdd() 14 and 15 MatCreateMAIJ(Mat,dof,Mat*) 16 17 This single directory handles both the sequential and parallel codes 18 */ 19 20 #include "../src/mat/impls/maij/maij.h" /*I "petscmat.h" I*/ 21 #include "../src/mat/utils/freespace.h" 22 #include "private/vecimpl.h" 23 24 #undef __FUNCT__ 25 #define __FUNCT__ "MatMAIJGetAIJ" 26 /*@C 27 MatMAIJGetAIJ - Get the AIJ matrix describing the blockwise action of the MAIJ matrix 28 29 Not Collective, but if the MAIJ matrix is parallel, the AIJ matrix is also parallel 30 31 Input Parameter: 32 . A - the MAIJ matrix 33 34 Output Parameter: 35 . B - the AIJ matrix 36 37 Level: advanced 38 39 Notes: The reference count on the AIJ matrix is not increased so you should not destroy it. 40 41 .seealso: MatCreateMAIJ() 42 @*/ 43 PetscErrorCode PETSCMAT_DLLEXPORT MatMAIJGetAIJ(Mat A,Mat *B) 44 { 45 PetscErrorCode ierr; 46 PetscTruth ismpimaij,isseqmaij; 47 48 PetscFunctionBegin; 49 ierr = PetscTypeCompare((PetscObject)A,MATMPIMAIJ,&ismpimaij);CHKERRQ(ierr); 50 ierr = PetscTypeCompare((PetscObject)A,MATSEQMAIJ,&isseqmaij);CHKERRQ(ierr); 51 if (ismpimaij) { 52 Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data; 53 54 *B = b->A; 55 } else if (isseqmaij) { 56 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 57 58 *B = b->AIJ; 59 } else { 60 *B = A; 61 } 62 PetscFunctionReturn(0); 63 } 64 65 #undef __FUNCT__ 66 #define __FUNCT__ "MatMAIJRedimension" 67 /*@C 68 MatMAIJRedimension - Get an MAIJ matrix with the same action, but for a different block size 69 70 Collective 71 72 Input Parameter: 73 + A - the MAIJ matrix 74 - dof - the block size for the new matrix 75 76 Output Parameter: 77 . B - the new MAIJ matrix 78 79 Level: advanced 80 81 .seealso: MatCreateMAIJ() 82 @*/ 83 PetscErrorCode PETSCMAT_DLLEXPORT MatMAIJRedimension(Mat A,PetscInt dof,Mat *B) 84 { 85 PetscErrorCode ierr; 86 Mat Aij; 87 88 PetscFunctionBegin; 89 ierr = MatMAIJGetAIJ(A,&Aij);CHKERRQ(ierr); 90 ierr = MatCreateMAIJ(Aij,dof,B);CHKERRQ(ierr); 91 PetscFunctionReturn(0); 92 } 93 94 #undef __FUNCT__ 95 #define __FUNCT__ "MatDestroy_SeqMAIJ" 96 PetscErrorCode MatDestroy_SeqMAIJ(Mat A) 97 { 98 PetscErrorCode ierr; 99 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 100 101 PetscFunctionBegin; 102 if (b->AIJ) { 103 ierr = MatDestroy(b->AIJ);CHKERRQ(ierr); 104 } 105 ierr = PetscFree(b);CHKERRQ(ierr); 106 PetscFunctionReturn(0); 107 } 108 109 #undef __FUNCT__ 110 #define __FUNCT__ "MatView_SeqMAIJ" 111 PetscErrorCode MatView_SeqMAIJ(Mat A,PetscViewer viewer) 112 { 113 PetscErrorCode ierr; 114 Mat B; 115 116 PetscFunctionBegin; 117 ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B); 118 ierr = MatView(B,viewer);CHKERRQ(ierr); 119 ierr = MatDestroy(B);CHKERRQ(ierr); 120 PetscFunctionReturn(0); 121 } 122 123 #undef __FUNCT__ 124 #define __FUNCT__ "MatView_MPIMAIJ" 125 PetscErrorCode MatView_MPIMAIJ(Mat A,PetscViewer viewer) 126 { 127 PetscErrorCode ierr; 128 Mat B; 129 130 PetscFunctionBegin; 131 ierr = MatConvert(A,MATMPIAIJ,MAT_INITIAL_MATRIX,&B); 132 ierr = MatView(B,viewer);CHKERRQ(ierr); 133 ierr = MatDestroy(B);CHKERRQ(ierr); 134 PetscFunctionReturn(0); 135 } 136 137 #undef __FUNCT__ 138 #define __FUNCT__ "MatDestroy_MPIMAIJ" 139 PetscErrorCode MatDestroy_MPIMAIJ(Mat A) 140 { 141 PetscErrorCode ierr; 142 Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data; 143 144 PetscFunctionBegin; 145 if (b->AIJ) { 146 ierr = MatDestroy(b->AIJ);CHKERRQ(ierr); 147 } 148 if (b->OAIJ) { 149 ierr = MatDestroy(b->OAIJ);CHKERRQ(ierr); 150 } 151 if (b->A) { 152 ierr = MatDestroy(b->A);CHKERRQ(ierr); 153 } 154 if (b->ctx) { 155 ierr = VecScatterDestroy(b->ctx);CHKERRQ(ierr); 156 } 157 if (b->w) { 158 ierr = VecDestroy(b->w);CHKERRQ(ierr); 159 } 160 ierr = PetscFree(b);CHKERRQ(ierr); 161 ierr = PetscObjectChangeTypeName((PetscObject)A,0);CHKERRQ(ierr); 162 PetscFunctionReturn(0); 163 } 164 165 /*MC 166 MATMAIJ - MATMAIJ = "maij" - A matrix type to be used for restriction and interpolation operations for 167 multicomponent problems, interpolating or restricting each component the same way independently. 168 The matrix type is based on MATSEQAIJ for sequential matrices, and MATMPIAIJ for distributed matrices. 169 170 Operations provided: 171 . MatMult 172 . MatMultTranspose 173 . MatMultAdd 174 . MatMultTransposeAdd 175 176 Level: advanced 177 178 .seealso: MatCreateSeqDense 179 M*/ 180 181 EXTERN_C_BEGIN 182 #undef __FUNCT__ 183 #define __FUNCT__ "MatCreate_MAIJ" 184 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_MAIJ(Mat A) 185 { 186 PetscErrorCode ierr; 187 Mat_MPIMAIJ *b; 188 PetscMPIInt size; 189 190 PetscFunctionBegin; 191 ierr = PetscNewLog(A,Mat_MPIMAIJ,&b);CHKERRQ(ierr); 192 A->data = (void*)b; 193 ierr = PetscMemzero(A->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 194 A->mapping = 0; 195 196 b->AIJ = 0; 197 b->dof = 0; 198 b->OAIJ = 0; 199 b->ctx = 0; 200 b->w = 0; 201 ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr); 202 if (size == 1){ 203 ierr = PetscObjectChangeTypeName((PetscObject)A,MATSEQMAIJ);CHKERRQ(ierr); 204 } else { 205 ierr = PetscObjectChangeTypeName((PetscObject)A,MATMPIMAIJ);CHKERRQ(ierr); 206 } 207 PetscFunctionReturn(0); 208 } 209 EXTERN_C_END 210 211 /* --------------------------------------------------------------------------------------*/ 212 #undef __FUNCT__ 213 #define __FUNCT__ "MatMult_SeqMAIJ_2" 214 PetscErrorCode MatMult_SeqMAIJ_2(Mat A,Vec xx,Vec yy) 215 { 216 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 217 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 218 PetscScalar *x,*y,*v,sum1, sum2; 219 PetscErrorCode ierr; 220 PetscInt m = b->AIJ->rmap->n,nonzerorow=0,*idx,*ii; 221 PetscInt n,i,jrow,j; 222 223 PetscFunctionBegin; 224 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 225 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 226 idx = a->j; 227 v = a->a; 228 ii = a->i; 229 230 for (i=0; i<m; i++) { 231 jrow = ii[i]; 232 n = ii[i+1] - jrow; 233 sum1 = 0.0; 234 sum2 = 0.0; 235 nonzerorow += (n>0); 236 for (j=0; j<n; j++) { 237 sum1 += v[jrow]*x[2*idx[jrow]]; 238 sum2 += v[jrow]*x[2*idx[jrow]+1]; 239 jrow++; 240 } 241 y[2*i] = sum1; 242 y[2*i+1] = sum2; 243 } 244 245 ierr = PetscLogFlops(4.0*a->nz - 2.0*nonzerorow);CHKERRQ(ierr); 246 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 247 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 248 PetscFunctionReturn(0); 249 } 250 251 #undef __FUNCT__ 252 #define __FUNCT__ "MatMultTranspose_SeqMAIJ_2" 253 PetscErrorCode MatMultTranspose_SeqMAIJ_2(Mat A,Vec xx,Vec yy) 254 { 255 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 256 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 257 PetscScalar *x,*y,*v,alpha1,alpha2,zero = 0.0; 258 PetscErrorCode ierr; 259 PetscInt m = b->AIJ->rmap->n,n,i,*idx; 260 261 PetscFunctionBegin; 262 ierr = VecSet(yy,zero);CHKERRQ(ierr); 263 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 264 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 265 266 for (i=0; i<m; i++) { 267 idx = a->j + a->i[i] ; 268 v = a->a + a->i[i] ; 269 n = a->i[i+1] - a->i[i]; 270 alpha1 = x[2*i]; 271 alpha2 = x[2*i+1]; 272 while (n-->0) {y[2*(*idx)] += alpha1*(*v); y[2*(*idx)+1] += alpha2*(*v); idx++; v++;} 273 } 274 ierr = PetscLogFlops(4.0*a->nz);CHKERRQ(ierr); 275 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 276 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 277 PetscFunctionReturn(0); 278 } 279 280 #undef __FUNCT__ 281 #define __FUNCT__ "MatMultAdd_SeqMAIJ_2" 282 PetscErrorCode MatMultAdd_SeqMAIJ_2(Mat A,Vec xx,Vec yy,Vec zz) 283 { 284 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 285 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 286 PetscScalar *x,*y,*v,sum1, sum2; 287 PetscErrorCode ierr; 288 PetscInt m = b->AIJ->rmap->n,*idx,*ii; 289 PetscInt n,i,jrow,j; 290 291 PetscFunctionBegin; 292 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 293 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 294 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 295 idx = a->j; 296 v = a->a; 297 ii = a->i; 298 299 for (i=0; i<m; i++) { 300 jrow = ii[i]; 301 n = ii[i+1] - jrow; 302 sum1 = 0.0; 303 sum2 = 0.0; 304 for (j=0; j<n; j++) { 305 sum1 += v[jrow]*x[2*idx[jrow]]; 306 sum2 += v[jrow]*x[2*idx[jrow]+1]; 307 jrow++; 308 } 309 y[2*i] += sum1; 310 y[2*i+1] += sum2; 311 } 312 313 ierr = PetscLogFlops(4.0*a->nz);CHKERRQ(ierr); 314 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 315 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 316 PetscFunctionReturn(0); 317 } 318 #undef __FUNCT__ 319 #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_2" 320 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_2(Mat A,Vec xx,Vec yy,Vec zz) 321 { 322 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 323 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 324 PetscScalar *x,*y,*v,alpha1,alpha2; 325 PetscErrorCode ierr; 326 PetscInt m = b->AIJ->rmap->n,n,i,*idx; 327 328 PetscFunctionBegin; 329 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 330 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 331 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 332 333 for (i=0; i<m; i++) { 334 idx = a->j + a->i[i] ; 335 v = a->a + a->i[i] ; 336 n = a->i[i+1] - a->i[i]; 337 alpha1 = x[2*i]; 338 alpha2 = x[2*i+1]; 339 while (n-->0) {y[2*(*idx)] += alpha1*(*v); y[2*(*idx)+1] += alpha2*(*v); idx++; v++;} 340 } 341 ierr = PetscLogFlops(4.0*a->nz);CHKERRQ(ierr); 342 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 343 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 344 PetscFunctionReturn(0); 345 } 346 /* --------------------------------------------------------------------------------------*/ 347 #undef __FUNCT__ 348 #define __FUNCT__ "MatMult_SeqMAIJ_3" 349 PetscErrorCode MatMult_SeqMAIJ_3(Mat A,Vec xx,Vec yy) 350 { 351 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 352 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 353 PetscScalar *x,*y,*v,sum1, sum2, sum3; 354 PetscErrorCode ierr; 355 PetscInt m = b->AIJ->rmap->n,nonzerorow=0,*idx,*ii; 356 PetscInt n,i,jrow,j; 357 358 PetscFunctionBegin; 359 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 360 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 361 idx = a->j; 362 v = a->a; 363 ii = a->i; 364 365 for (i=0; i<m; i++) { 366 jrow = ii[i]; 367 n = ii[i+1] - jrow; 368 sum1 = 0.0; 369 sum2 = 0.0; 370 sum3 = 0.0; 371 nonzerorow += (n>0); 372 for (j=0; j<n; j++) { 373 sum1 += v[jrow]*x[3*idx[jrow]]; 374 sum2 += v[jrow]*x[3*idx[jrow]+1]; 375 sum3 += v[jrow]*x[3*idx[jrow]+2]; 376 jrow++; 377 } 378 y[3*i] = sum1; 379 y[3*i+1] = sum2; 380 y[3*i+2] = sum3; 381 } 382 383 ierr = PetscLogFlops(6.0*a->nz - 3.0*nonzerorow);CHKERRQ(ierr); 384 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 385 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 386 PetscFunctionReturn(0); 387 } 388 389 #undef __FUNCT__ 390 #define __FUNCT__ "MatMultTranspose_SeqMAIJ_3" 391 PetscErrorCode MatMultTranspose_SeqMAIJ_3(Mat A,Vec xx,Vec yy) 392 { 393 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 394 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 395 PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,zero = 0.0; 396 PetscErrorCode ierr; 397 PetscInt m = b->AIJ->rmap->n,n,i,*idx; 398 399 PetscFunctionBegin; 400 ierr = VecSet(yy,zero);CHKERRQ(ierr); 401 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 402 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 403 404 for (i=0; i<m; i++) { 405 idx = a->j + a->i[i]; 406 v = a->a + a->i[i]; 407 n = a->i[i+1] - a->i[i]; 408 alpha1 = x[3*i]; 409 alpha2 = x[3*i+1]; 410 alpha3 = x[3*i+2]; 411 while (n-->0) { 412 y[3*(*idx)] += alpha1*(*v); 413 y[3*(*idx)+1] += alpha2*(*v); 414 y[3*(*idx)+2] += alpha3*(*v); 415 idx++; v++; 416 } 417 } 418 ierr = PetscLogFlops(6.0*a->nz);CHKERRQ(ierr); 419 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 420 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 421 PetscFunctionReturn(0); 422 } 423 424 #undef __FUNCT__ 425 #define __FUNCT__ "MatMultAdd_SeqMAIJ_3" 426 PetscErrorCode MatMultAdd_SeqMAIJ_3(Mat A,Vec xx,Vec yy,Vec zz) 427 { 428 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 429 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 430 PetscScalar *x,*y,*v,sum1, sum2, sum3; 431 PetscErrorCode ierr; 432 PetscInt m = b->AIJ->rmap->n,*idx,*ii; 433 PetscInt n,i,jrow,j; 434 435 PetscFunctionBegin; 436 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 437 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 438 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 439 idx = a->j; 440 v = a->a; 441 ii = a->i; 442 443 for (i=0; i<m; i++) { 444 jrow = ii[i]; 445 n = ii[i+1] - jrow; 446 sum1 = 0.0; 447 sum2 = 0.0; 448 sum3 = 0.0; 449 for (j=0; j<n; j++) { 450 sum1 += v[jrow]*x[3*idx[jrow]]; 451 sum2 += v[jrow]*x[3*idx[jrow]+1]; 452 sum3 += v[jrow]*x[3*idx[jrow]+2]; 453 jrow++; 454 } 455 y[3*i] += sum1; 456 y[3*i+1] += sum2; 457 y[3*i+2] += sum3; 458 } 459 460 ierr = PetscLogFlops(6.0*a->nz);CHKERRQ(ierr); 461 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 462 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 463 PetscFunctionReturn(0); 464 } 465 #undef __FUNCT__ 466 #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_3" 467 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_3(Mat A,Vec xx,Vec yy,Vec zz) 468 { 469 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 470 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 471 PetscScalar *x,*y,*v,alpha1,alpha2,alpha3; 472 PetscErrorCode ierr; 473 PetscInt m = b->AIJ->rmap->n,n,i,*idx; 474 475 PetscFunctionBegin; 476 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 477 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 478 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 479 for (i=0; i<m; i++) { 480 idx = a->j + a->i[i] ; 481 v = a->a + a->i[i] ; 482 n = a->i[i+1] - a->i[i]; 483 alpha1 = x[3*i]; 484 alpha2 = x[3*i+1]; 485 alpha3 = x[3*i+2]; 486 while (n-->0) { 487 y[3*(*idx)] += alpha1*(*v); 488 y[3*(*idx)+1] += alpha2*(*v); 489 y[3*(*idx)+2] += alpha3*(*v); 490 idx++; v++; 491 } 492 } 493 ierr = PetscLogFlops(6.0*a->nz);CHKERRQ(ierr); 494 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 495 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 496 PetscFunctionReturn(0); 497 } 498 499 /* ------------------------------------------------------------------------------*/ 500 #undef __FUNCT__ 501 #define __FUNCT__ "MatMult_SeqMAIJ_4" 502 PetscErrorCode MatMult_SeqMAIJ_4(Mat A,Vec xx,Vec yy) 503 { 504 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 505 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 506 PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4; 507 PetscErrorCode ierr; 508 PetscInt m = b->AIJ->rmap->n,nonzerorow=0,*idx,*ii; 509 PetscInt n,i,jrow,j; 510 511 PetscFunctionBegin; 512 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 513 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 514 idx = a->j; 515 v = a->a; 516 ii = a->i; 517 518 for (i=0; i<m; i++) { 519 jrow = ii[i]; 520 n = ii[i+1] - jrow; 521 sum1 = 0.0; 522 sum2 = 0.0; 523 sum3 = 0.0; 524 sum4 = 0.0; 525 nonzerorow += (n>0); 526 for (j=0; j<n; j++) { 527 sum1 += v[jrow]*x[4*idx[jrow]]; 528 sum2 += v[jrow]*x[4*idx[jrow]+1]; 529 sum3 += v[jrow]*x[4*idx[jrow]+2]; 530 sum4 += v[jrow]*x[4*idx[jrow]+3]; 531 jrow++; 532 } 533 y[4*i] = sum1; 534 y[4*i+1] = sum2; 535 y[4*i+2] = sum3; 536 y[4*i+3] = sum4; 537 } 538 539 ierr = PetscLogFlops(8.0*a->nz - 4.0*nonzerorow);CHKERRQ(ierr); 540 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 541 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 542 PetscFunctionReturn(0); 543 } 544 545 #undef __FUNCT__ 546 #define __FUNCT__ "MatMultTranspose_SeqMAIJ_4" 547 PetscErrorCode MatMultTranspose_SeqMAIJ_4(Mat A,Vec xx,Vec yy) 548 { 549 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 550 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 551 PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4,zero = 0.0; 552 PetscErrorCode ierr; 553 PetscInt m = b->AIJ->rmap->n,n,i,*idx; 554 555 PetscFunctionBegin; 556 ierr = VecSet(yy,zero);CHKERRQ(ierr); 557 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 558 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 559 for (i=0; i<m; i++) { 560 idx = a->j + a->i[i] ; 561 v = a->a + a->i[i] ; 562 n = a->i[i+1] - a->i[i]; 563 alpha1 = x[4*i]; 564 alpha2 = x[4*i+1]; 565 alpha3 = x[4*i+2]; 566 alpha4 = x[4*i+3]; 567 while (n-->0) { 568 y[4*(*idx)] += alpha1*(*v); 569 y[4*(*idx)+1] += alpha2*(*v); 570 y[4*(*idx)+2] += alpha3*(*v); 571 y[4*(*idx)+3] += alpha4*(*v); 572 idx++; v++; 573 } 574 } 575 ierr = PetscLogFlops(8.0*a->nz);CHKERRQ(ierr); 576 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 577 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 578 PetscFunctionReturn(0); 579 } 580 581 #undef __FUNCT__ 582 #define __FUNCT__ "MatMultAdd_SeqMAIJ_4" 583 PetscErrorCode MatMultAdd_SeqMAIJ_4(Mat A,Vec xx,Vec yy,Vec zz) 584 { 585 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 586 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 587 PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4; 588 PetscErrorCode ierr; 589 PetscInt m = b->AIJ->rmap->n,*idx,*ii; 590 PetscInt n,i,jrow,j; 591 592 PetscFunctionBegin; 593 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 594 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 595 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 596 idx = a->j; 597 v = a->a; 598 ii = a->i; 599 600 for (i=0; i<m; i++) { 601 jrow = ii[i]; 602 n = ii[i+1] - jrow; 603 sum1 = 0.0; 604 sum2 = 0.0; 605 sum3 = 0.0; 606 sum4 = 0.0; 607 for (j=0; j<n; j++) { 608 sum1 += v[jrow]*x[4*idx[jrow]]; 609 sum2 += v[jrow]*x[4*idx[jrow]+1]; 610 sum3 += v[jrow]*x[4*idx[jrow]+2]; 611 sum4 += v[jrow]*x[4*idx[jrow]+3]; 612 jrow++; 613 } 614 y[4*i] += sum1; 615 y[4*i+1] += sum2; 616 y[4*i+2] += sum3; 617 y[4*i+3] += sum4; 618 } 619 620 ierr = PetscLogFlops(8.0*a->nz);CHKERRQ(ierr); 621 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 622 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 623 PetscFunctionReturn(0); 624 } 625 #undef __FUNCT__ 626 #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_4" 627 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_4(Mat A,Vec xx,Vec yy,Vec zz) 628 { 629 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 630 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 631 PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4; 632 PetscErrorCode ierr; 633 PetscInt m = b->AIJ->rmap->n,n,i,*idx; 634 635 PetscFunctionBegin; 636 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 637 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 638 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 639 640 for (i=0; i<m; i++) { 641 idx = a->j + a->i[i] ; 642 v = a->a + a->i[i] ; 643 n = a->i[i+1] - a->i[i]; 644 alpha1 = x[4*i]; 645 alpha2 = x[4*i+1]; 646 alpha3 = x[4*i+2]; 647 alpha4 = x[4*i+3]; 648 while (n-->0) { 649 y[4*(*idx)] += alpha1*(*v); 650 y[4*(*idx)+1] += alpha2*(*v); 651 y[4*(*idx)+2] += alpha3*(*v); 652 y[4*(*idx)+3] += alpha4*(*v); 653 idx++; v++; 654 } 655 } 656 ierr = PetscLogFlops(8.0*a->nz);CHKERRQ(ierr); 657 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 658 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 659 PetscFunctionReturn(0); 660 } 661 /* ------------------------------------------------------------------------------*/ 662 663 #undef __FUNCT__ 664 #define __FUNCT__ "MatMult_SeqMAIJ_5" 665 PetscErrorCode MatMult_SeqMAIJ_5(Mat A,Vec xx,Vec yy) 666 { 667 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 668 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 669 PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4, sum5; 670 PetscErrorCode ierr; 671 PetscInt m = b->AIJ->rmap->n,nonzerorow=0,*idx,*ii; 672 PetscInt n,i,jrow,j; 673 674 PetscFunctionBegin; 675 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 676 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 677 idx = a->j; 678 v = a->a; 679 ii = a->i; 680 681 for (i=0; i<m; i++) { 682 jrow = ii[i]; 683 n = ii[i+1] - jrow; 684 sum1 = 0.0; 685 sum2 = 0.0; 686 sum3 = 0.0; 687 sum4 = 0.0; 688 sum5 = 0.0; 689 nonzerorow += (n>0); 690 for (j=0; j<n; j++) { 691 sum1 += v[jrow]*x[5*idx[jrow]]; 692 sum2 += v[jrow]*x[5*idx[jrow]+1]; 693 sum3 += v[jrow]*x[5*idx[jrow]+2]; 694 sum4 += v[jrow]*x[5*idx[jrow]+3]; 695 sum5 += v[jrow]*x[5*idx[jrow]+4]; 696 jrow++; 697 } 698 y[5*i] = sum1; 699 y[5*i+1] = sum2; 700 y[5*i+2] = sum3; 701 y[5*i+3] = sum4; 702 y[5*i+4] = sum5; 703 } 704 705 ierr = PetscLogFlops(10.0*a->nz - 5.0*nonzerorow);CHKERRQ(ierr); 706 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 707 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 708 PetscFunctionReturn(0); 709 } 710 711 #undef __FUNCT__ 712 #define __FUNCT__ "MatMultTranspose_SeqMAIJ_5" 713 PetscErrorCode MatMultTranspose_SeqMAIJ_5(Mat A,Vec xx,Vec yy) 714 { 715 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 716 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 717 PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,zero = 0.0; 718 PetscErrorCode ierr; 719 PetscInt m = b->AIJ->rmap->n,n,i,*idx; 720 721 PetscFunctionBegin; 722 ierr = VecSet(yy,zero);CHKERRQ(ierr); 723 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 724 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 725 726 for (i=0; i<m; i++) { 727 idx = a->j + a->i[i] ; 728 v = a->a + a->i[i] ; 729 n = a->i[i+1] - a->i[i]; 730 alpha1 = x[5*i]; 731 alpha2 = x[5*i+1]; 732 alpha3 = x[5*i+2]; 733 alpha4 = x[5*i+3]; 734 alpha5 = x[5*i+4]; 735 while (n-->0) { 736 y[5*(*idx)] += alpha1*(*v); 737 y[5*(*idx)+1] += alpha2*(*v); 738 y[5*(*idx)+2] += alpha3*(*v); 739 y[5*(*idx)+3] += alpha4*(*v); 740 y[5*(*idx)+4] += alpha5*(*v); 741 idx++; v++; 742 } 743 } 744 ierr = PetscLogFlops(10.0*a->nz);CHKERRQ(ierr); 745 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 746 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 747 PetscFunctionReturn(0); 748 } 749 750 #undef __FUNCT__ 751 #define __FUNCT__ "MatMultAdd_SeqMAIJ_5" 752 PetscErrorCode MatMultAdd_SeqMAIJ_5(Mat A,Vec xx,Vec yy,Vec zz) 753 { 754 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 755 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 756 PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4, sum5; 757 PetscErrorCode ierr; 758 PetscInt m = b->AIJ->rmap->n,*idx,*ii; 759 PetscInt n,i,jrow,j; 760 761 PetscFunctionBegin; 762 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 763 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 764 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 765 idx = a->j; 766 v = a->a; 767 ii = a->i; 768 769 for (i=0; i<m; i++) { 770 jrow = ii[i]; 771 n = ii[i+1] - jrow; 772 sum1 = 0.0; 773 sum2 = 0.0; 774 sum3 = 0.0; 775 sum4 = 0.0; 776 sum5 = 0.0; 777 for (j=0; j<n; j++) { 778 sum1 += v[jrow]*x[5*idx[jrow]]; 779 sum2 += v[jrow]*x[5*idx[jrow]+1]; 780 sum3 += v[jrow]*x[5*idx[jrow]+2]; 781 sum4 += v[jrow]*x[5*idx[jrow]+3]; 782 sum5 += v[jrow]*x[5*idx[jrow]+4]; 783 jrow++; 784 } 785 y[5*i] += sum1; 786 y[5*i+1] += sum2; 787 y[5*i+2] += sum3; 788 y[5*i+3] += sum4; 789 y[5*i+4] += sum5; 790 } 791 792 ierr = PetscLogFlops(10.0*a->nz);CHKERRQ(ierr); 793 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 794 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 795 PetscFunctionReturn(0); 796 } 797 798 #undef __FUNCT__ 799 #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_5" 800 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_5(Mat A,Vec xx,Vec yy,Vec zz) 801 { 802 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 803 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 804 PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5; 805 PetscErrorCode ierr; 806 PetscInt m = b->AIJ->rmap->n,n,i,*idx; 807 808 PetscFunctionBegin; 809 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 810 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 811 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 812 813 for (i=0; i<m; i++) { 814 idx = a->j + a->i[i] ; 815 v = a->a + a->i[i] ; 816 n = a->i[i+1] - a->i[i]; 817 alpha1 = x[5*i]; 818 alpha2 = x[5*i+1]; 819 alpha3 = x[5*i+2]; 820 alpha4 = x[5*i+3]; 821 alpha5 = x[5*i+4]; 822 while (n-->0) { 823 y[5*(*idx)] += alpha1*(*v); 824 y[5*(*idx)+1] += alpha2*(*v); 825 y[5*(*idx)+2] += alpha3*(*v); 826 y[5*(*idx)+3] += alpha4*(*v); 827 y[5*(*idx)+4] += alpha5*(*v); 828 idx++; v++; 829 } 830 } 831 ierr = PetscLogFlops(10.0*a->nz);CHKERRQ(ierr); 832 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 833 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 834 PetscFunctionReturn(0); 835 } 836 837 /* ------------------------------------------------------------------------------*/ 838 #undef __FUNCT__ 839 #define __FUNCT__ "MatMult_SeqMAIJ_6" 840 PetscErrorCode MatMult_SeqMAIJ_6(Mat A,Vec xx,Vec yy) 841 { 842 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 843 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 844 PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6; 845 PetscErrorCode ierr; 846 PetscInt m = b->AIJ->rmap->n,nonzerorow=0,*idx,*ii; 847 PetscInt n,i,jrow,j; 848 849 PetscFunctionBegin; 850 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 851 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 852 idx = a->j; 853 v = a->a; 854 ii = a->i; 855 856 for (i=0; i<m; i++) { 857 jrow = ii[i]; 858 n = ii[i+1] - jrow; 859 sum1 = 0.0; 860 sum2 = 0.0; 861 sum3 = 0.0; 862 sum4 = 0.0; 863 sum5 = 0.0; 864 sum6 = 0.0; 865 nonzerorow += (n>0); 866 for (j=0; j<n; j++) { 867 sum1 += v[jrow]*x[6*idx[jrow]]; 868 sum2 += v[jrow]*x[6*idx[jrow]+1]; 869 sum3 += v[jrow]*x[6*idx[jrow]+2]; 870 sum4 += v[jrow]*x[6*idx[jrow]+3]; 871 sum5 += v[jrow]*x[6*idx[jrow]+4]; 872 sum6 += v[jrow]*x[6*idx[jrow]+5]; 873 jrow++; 874 } 875 y[6*i] = sum1; 876 y[6*i+1] = sum2; 877 y[6*i+2] = sum3; 878 y[6*i+3] = sum4; 879 y[6*i+4] = sum5; 880 y[6*i+5] = sum6; 881 } 882 883 ierr = PetscLogFlops(12.0*a->nz - 6.0*nonzerorow);CHKERRQ(ierr); 884 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 885 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 886 PetscFunctionReturn(0); 887 } 888 889 #undef __FUNCT__ 890 #define __FUNCT__ "MatMultTranspose_SeqMAIJ_6" 891 PetscErrorCode MatMultTranspose_SeqMAIJ_6(Mat A,Vec xx,Vec yy) 892 { 893 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 894 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 895 PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,zero = 0.0; 896 PetscErrorCode ierr; 897 PetscInt m = b->AIJ->rmap->n,n,i,*idx; 898 899 PetscFunctionBegin; 900 ierr = VecSet(yy,zero);CHKERRQ(ierr); 901 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 902 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 903 904 for (i=0; i<m; i++) { 905 idx = a->j + a->i[i] ; 906 v = a->a + a->i[i] ; 907 n = a->i[i+1] - a->i[i]; 908 alpha1 = x[6*i]; 909 alpha2 = x[6*i+1]; 910 alpha3 = x[6*i+2]; 911 alpha4 = x[6*i+3]; 912 alpha5 = x[6*i+4]; 913 alpha6 = x[6*i+5]; 914 while (n-->0) { 915 y[6*(*idx)] += alpha1*(*v); 916 y[6*(*idx)+1] += alpha2*(*v); 917 y[6*(*idx)+2] += alpha3*(*v); 918 y[6*(*idx)+3] += alpha4*(*v); 919 y[6*(*idx)+4] += alpha5*(*v); 920 y[6*(*idx)+5] += alpha6*(*v); 921 idx++; v++; 922 } 923 } 924 ierr = PetscLogFlops(12.0*a->nz);CHKERRQ(ierr); 925 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 926 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 927 PetscFunctionReturn(0); 928 } 929 930 #undef __FUNCT__ 931 #define __FUNCT__ "MatMultAdd_SeqMAIJ_6" 932 PetscErrorCode MatMultAdd_SeqMAIJ_6(Mat A,Vec xx,Vec yy,Vec zz) 933 { 934 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 935 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 936 PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6; 937 PetscErrorCode ierr; 938 PetscInt m = b->AIJ->rmap->n,*idx,*ii; 939 PetscInt n,i,jrow,j; 940 941 PetscFunctionBegin; 942 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 943 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 944 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 945 idx = a->j; 946 v = a->a; 947 ii = a->i; 948 949 for (i=0; i<m; i++) { 950 jrow = ii[i]; 951 n = ii[i+1] - jrow; 952 sum1 = 0.0; 953 sum2 = 0.0; 954 sum3 = 0.0; 955 sum4 = 0.0; 956 sum5 = 0.0; 957 sum6 = 0.0; 958 for (j=0; j<n; j++) { 959 sum1 += v[jrow]*x[6*idx[jrow]]; 960 sum2 += v[jrow]*x[6*idx[jrow]+1]; 961 sum3 += v[jrow]*x[6*idx[jrow]+2]; 962 sum4 += v[jrow]*x[6*idx[jrow]+3]; 963 sum5 += v[jrow]*x[6*idx[jrow]+4]; 964 sum6 += v[jrow]*x[6*idx[jrow]+5]; 965 jrow++; 966 } 967 y[6*i] += sum1; 968 y[6*i+1] += sum2; 969 y[6*i+2] += sum3; 970 y[6*i+3] += sum4; 971 y[6*i+4] += sum5; 972 y[6*i+5] += sum6; 973 } 974 975 ierr = PetscLogFlops(12.0*a->nz);CHKERRQ(ierr); 976 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 977 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 978 PetscFunctionReturn(0); 979 } 980 981 #undef __FUNCT__ 982 #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_6" 983 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_6(Mat A,Vec xx,Vec yy,Vec zz) 984 { 985 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 986 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 987 PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6; 988 PetscErrorCode ierr; 989 PetscInt m = b->AIJ->rmap->n,n,i,*idx; 990 991 PetscFunctionBegin; 992 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 993 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 994 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 995 996 for (i=0; i<m; i++) { 997 idx = a->j + a->i[i] ; 998 v = a->a + a->i[i] ; 999 n = a->i[i+1] - a->i[i]; 1000 alpha1 = x[6*i]; 1001 alpha2 = x[6*i+1]; 1002 alpha3 = x[6*i+2]; 1003 alpha4 = x[6*i+3]; 1004 alpha5 = x[6*i+4]; 1005 alpha6 = x[6*i+5]; 1006 while (n-->0) { 1007 y[6*(*idx)] += alpha1*(*v); 1008 y[6*(*idx)+1] += alpha2*(*v); 1009 y[6*(*idx)+2] += alpha3*(*v); 1010 y[6*(*idx)+3] += alpha4*(*v); 1011 y[6*(*idx)+4] += alpha5*(*v); 1012 y[6*(*idx)+5] += alpha6*(*v); 1013 idx++; v++; 1014 } 1015 } 1016 ierr = PetscLogFlops(12.0*a->nz);CHKERRQ(ierr); 1017 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1018 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 1019 PetscFunctionReturn(0); 1020 } 1021 1022 /* ------------------------------------------------------------------------------*/ 1023 #undef __FUNCT__ 1024 #define __FUNCT__ "MatMult_SeqMAIJ_7" 1025 PetscErrorCode MatMult_SeqMAIJ_7(Mat A,Vec xx,Vec yy) 1026 { 1027 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1028 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1029 PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7; 1030 PetscErrorCode ierr; 1031 PetscInt m = b->AIJ->rmap->n,nonzerorow=0,*idx,*ii; 1032 PetscInt n,i,jrow,j; 1033 1034 PetscFunctionBegin; 1035 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1036 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 1037 idx = a->j; 1038 v = a->a; 1039 ii = a->i; 1040 1041 for (i=0; i<m; i++) { 1042 jrow = ii[i]; 1043 n = ii[i+1] - jrow; 1044 sum1 = 0.0; 1045 sum2 = 0.0; 1046 sum3 = 0.0; 1047 sum4 = 0.0; 1048 sum5 = 0.0; 1049 sum6 = 0.0; 1050 sum7 = 0.0; 1051 nonzerorow += (n>0); 1052 for (j=0; j<n; j++) { 1053 sum1 += v[jrow]*x[7*idx[jrow]]; 1054 sum2 += v[jrow]*x[7*idx[jrow]+1]; 1055 sum3 += v[jrow]*x[7*idx[jrow]+2]; 1056 sum4 += v[jrow]*x[7*idx[jrow]+3]; 1057 sum5 += v[jrow]*x[7*idx[jrow]+4]; 1058 sum6 += v[jrow]*x[7*idx[jrow]+5]; 1059 sum7 += v[jrow]*x[7*idx[jrow]+6]; 1060 jrow++; 1061 } 1062 y[7*i] = sum1; 1063 y[7*i+1] = sum2; 1064 y[7*i+2] = sum3; 1065 y[7*i+3] = sum4; 1066 y[7*i+4] = sum5; 1067 y[7*i+5] = sum6; 1068 y[7*i+6] = sum7; 1069 } 1070 1071 ierr = PetscLogFlops(14.0*a->nz - 7.0*nonzerorow);CHKERRQ(ierr); 1072 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1073 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 1074 PetscFunctionReturn(0); 1075 } 1076 1077 #undef __FUNCT__ 1078 #define __FUNCT__ "MatMultTranspose_SeqMAIJ_7" 1079 PetscErrorCode MatMultTranspose_SeqMAIJ_7(Mat A,Vec xx,Vec yy) 1080 { 1081 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1082 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1083 PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,zero = 0.0; 1084 PetscErrorCode ierr; 1085 PetscInt m = b->AIJ->rmap->n,n,i,*idx; 1086 1087 PetscFunctionBegin; 1088 ierr = VecSet(yy,zero);CHKERRQ(ierr); 1089 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1090 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 1091 1092 for (i=0; i<m; i++) { 1093 idx = a->j + a->i[i] ; 1094 v = a->a + a->i[i] ; 1095 n = a->i[i+1] - a->i[i]; 1096 alpha1 = x[7*i]; 1097 alpha2 = x[7*i+1]; 1098 alpha3 = x[7*i+2]; 1099 alpha4 = x[7*i+3]; 1100 alpha5 = x[7*i+4]; 1101 alpha6 = x[7*i+5]; 1102 alpha7 = x[7*i+6]; 1103 while (n-->0) { 1104 y[7*(*idx)] += alpha1*(*v); 1105 y[7*(*idx)+1] += alpha2*(*v); 1106 y[7*(*idx)+2] += alpha3*(*v); 1107 y[7*(*idx)+3] += alpha4*(*v); 1108 y[7*(*idx)+4] += alpha5*(*v); 1109 y[7*(*idx)+5] += alpha6*(*v); 1110 y[7*(*idx)+6] += alpha7*(*v); 1111 idx++; v++; 1112 } 1113 } 1114 ierr = PetscLogFlops(14.0*a->nz);CHKERRQ(ierr); 1115 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1116 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 1117 PetscFunctionReturn(0); 1118 } 1119 1120 #undef __FUNCT__ 1121 #define __FUNCT__ "MatMultAdd_SeqMAIJ_7" 1122 PetscErrorCode MatMultAdd_SeqMAIJ_7(Mat A,Vec xx,Vec yy,Vec zz) 1123 { 1124 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1125 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1126 PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7; 1127 PetscErrorCode ierr; 1128 PetscInt m = b->AIJ->rmap->n,*idx,*ii; 1129 PetscInt n,i,jrow,j; 1130 1131 PetscFunctionBegin; 1132 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 1133 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1134 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 1135 idx = a->j; 1136 v = a->a; 1137 ii = a->i; 1138 1139 for (i=0; i<m; i++) { 1140 jrow = ii[i]; 1141 n = ii[i+1] - jrow; 1142 sum1 = 0.0; 1143 sum2 = 0.0; 1144 sum3 = 0.0; 1145 sum4 = 0.0; 1146 sum5 = 0.0; 1147 sum6 = 0.0; 1148 sum7 = 0.0; 1149 for (j=0; j<n; j++) { 1150 sum1 += v[jrow]*x[7*idx[jrow]]; 1151 sum2 += v[jrow]*x[7*idx[jrow]+1]; 1152 sum3 += v[jrow]*x[7*idx[jrow]+2]; 1153 sum4 += v[jrow]*x[7*idx[jrow]+3]; 1154 sum5 += v[jrow]*x[7*idx[jrow]+4]; 1155 sum6 += v[jrow]*x[7*idx[jrow]+5]; 1156 sum7 += v[jrow]*x[7*idx[jrow]+6]; 1157 jrow++; 1158 } 1159 y[7*i] += sum1; 1160 y[7*i+1] += sum2; 1161 y[7*i+2] += sum3; 1162 y[7*i+3] += sum4; 1163 y[7*i+4] += sum5; 1164 y[7*i+5] += sum6; 1165 y[7*i+6] += sum7; 1166 } 1167 1168 ierr = PetscLogFlops(14.0*a->nz);CHKERRQ(ierr); 1169 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1170 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 1171 PetscFunctionReturn(0); 1172 } 1173 1174 #undef __FUNCT__ 1175 #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_7" 1176 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_7(Mat A,Vec xx,Vec yy,Vec zz) 1177 { 1178 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1179 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1180 PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7; 1181 PetscErrorCode ierr; 1182 PetscInt m = b->AIJ->rmap->n,n,i,*idx; 1183 1184 PetscFunctionBegin; 1185 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 1186 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1187 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 1188 for (i=0; i<m; i++) { 1189 idx = a->j + a->i[i] ; 1190 v = a->a + a->i[i] ; 1191 n = a->i[i+1] - a->i[i]; 1192 alpha1 = x[7*i]; 1193 alpha2 = x[7*i+1]; 1194 alpha3 = x[7*i+2]; 1195 alpha4 = x[7*i+3]; 1196 alpha5 = x[7*i+4]; 1197 alpha6 = x[7*i+5]; 1198 alpha7 = x[7*i+6]; 1199 while (n-->0) { 1200 y[7*(*idx)] += alpha1*(*v); 1201 y[7*(*idx)+1] += alpha2*(*v); 1202 y[7*(*idx)+2] += alpha3*(*v); 1203 y[7*(*idx)+3] += alpha4*(*v); 1204 y[7*(*idx)+4] += alpha5*(*v); 1205 y[7*(*idx)+5] += alpha6*(*v); 1206 y[7*(*idx)+6] += alpha7*(*v); 1207 idx++; v++; 1208 } 1209 } 1210 ierr = PetscLogFlops(14.0*a->nz);CHKERRQ(ierr); 1211 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1212 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 1213 PetscFunctionReturn(0); 1214 } 1215 1216 #undef __FUNCT__ 1217 #define __FUNCT__ "MatMult_SeqMAIJ_8" 1218 PetscErrorCode MatMult_SeqMAIJ_8(Mat A,Vec xx,Vec yy) 1219 { 1220 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1221 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1222 PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8; 1223 PetscErrorCode ierr; 1224 PetscInt m = b->AIJ->rmap->n,nonzerorow=0,*idx,*ii; 1225 PetscInt n,i,jrow,j; 1226 1227 PetscFunctionBegin; 1228 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1229 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 1230 idx = a->j; 1231 v = a->a; 1232 ii = a->i; 1233 1234 for (i=0; i<m; i++) { 1235 jrow = ii[i]; 1236 n = ii[i+1] - jrow; 1237 sum1 = 0.0; 1238 sum2 = 0.0; 1239 sum3 = 0.0; 1240 sum4 = 0.0; 1241 sum5 = 0.0; 1242 sum6 = 0.0; 1243 sum7 = 0.0; 1244 sum8 = 0.0; 1245 nonzerorow += (n>0); 1246 for (j=0; j<n; j++) { 1247 sum1 += v[jrow]*x[8*idx[jrow]]; 1248 sum2 += v[jrow]*x[8*idx[jrow]+1]; 1249 sum3 += v[jrow]*x[8*idx[jrow]+2]; 1250 sum4 += v[jrow]*x[8*idx[jrow]+3]; 1251 sum5 += v[jrow]*x[8*idx[jrow]+4]; 1252 sum6 += v[jrow]*x[8*idx[jrow]+5]; 1253 sum7 += v[jrow]*x[8*idx[jrow]+6]; 1254 sum8 += v[jrow]*x[8*idx[jrow]+7]; 1255 jrow++; 1256 } 1257 y[8*i] = sum1; 1258 y[8*i+1] = sum2; 1259 y[8*i+2] = sum3; 1260 y[8*i+3] = sum4; 1261 y[8*i+4] = sum5; 1262 y[8*i+5] = sum6; 1263 y[8*i+6] = sum7; 1264 y[8*i+7] = sum8; 1265 } 1266 1267 ierr = PetscLogFlops(16.0*a->nz - 8.0*nonzerorow);CHKERRQ(ierr); 1268 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1269 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 1270 PetscFunctionReturn(0); 1271 } 1272 1273 #undef __FUNCT__ 1274 #define __FUNCT__ "MatMultTranspose_SeqMAIJ_8" 1275 PetscErrorCode MatMultTranspose_SeqMAIJ_8(Mat A,Vec xx,Vec yy) 1276 { 1277 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1278 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1279 PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,zero = 0.0; 1280 PetscErrorCode ierr; 1281 PetscInt m = b->AIJ->rmap->n,n,i,*idx; 1282 1283 PetscFunctionBegin; 1284 ierr = VecSet(yy,zero);CHKERRQ(ierr); 1285 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1286 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 1287 1288 for (i=0; i<m; i++) { 1289 idx = a->j + a->i[i] ; 1290 v = a->a + a->i[i] ; 1291 n = a->i[i+1] - a->i[i]; 1292 alpha1 = x[8*i]; 1293 alpha2 = x[8*i+1]; 1294 alpha3 = x[8*i+2]; 1295 alpha4 = x[8*i+3]; 1296 alpha5 = x[8*i+4]; 1297 alpha6 = x[8*i+5]; 1298 alpha7 = x[8*i+6]; 1299 alpha8 = x[8*i+7]; 1300 while (n-->0) { 1301 y[8*(*idx)] += alpha1*(*v); 1302 y[8*(*idx)+1] += alpha2*(*v); 1303 y[8*(*idx)+2] += alpha3*(*v); 1304 y[8*(*idx)+3] += alpha4*(*v); 1305 y[8*(*idx)+4] += alpha5*(*v); 1306 y[8*(*idx)+5] += alpha6*(*v); 1307 y[8*(*idx)+6] += alpha7*(*v); 1308 y[8*(*idx)+7] += alpha8*(*v); 1309 idx++; v++; 1310 } 1311 } 1312 ierr = PetscLogFlops(16.0*a->nz);CHKERRQ(ierr); 1313 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1314 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 1315 PetscFunctionReturn(0); 1316 } 1317 1318 #undef __FUNCT__ 1319 #define __FUNCT__ "MatMultAdd_SeqMAIJ_8" 1320 PetscErrorCode MatMultAdd_SeqMAIJ_8(Mat A,Vec xx,Vec yy,Vec zz) 1321 { 1322 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1323 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1324 PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8; 1325 PetscErrorCode ierr; 1326 PetscInt m = b->AIJ->rmap->n,*idx,*ii; 1327 PetscInt n,i,jrow,j; 1328 1329 PetscFunctionBegin; 1330 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 1331 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1332 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 1333 idx = a->j; 1334 v = a->a; 1335 ii = a->i; 1336 1337 for (i=0; i<m; i++) { 1338 jrow = ii[i]; 1339 n = ii[i+1] - jrow; 1340 sum1 = 0.0; 1341 sum2 = 0.0; 1342 sum3 = 0.0; 1343 sum4 = 0.0; 1344 sum5 = 0.0; 1345 sum6 = 0.0; 1346 sum7 = 0.0; 1347 sum8 = 0.0; 1348 for (j=0; j<n; j++) { 1349 sum1 += v[jrow]*x[8*idx[jrow]]; 1350 sum2 += v[jrow]*x[8*idx[jrow]+1]; 1351 sum3 += v[jrow]*x[8*idx[jrow]+2]; 1352 sum4 += v[jrow]*x[8*idx[jrow]+3]; 1353 sum5 += v[jrow]*x[8*idx[jrow]+4]; 1354 sum6 += v[jrow]*x[8*idx[jrow]+5]; 1355 sum7 += v[jrow]*x[8*idx[jrow]+6]; 1356 sum8 += v[jrow]*x[8*idx[jrow]+7]; 1357 jrow++; 1358 } 1359 y[8*i] += sum1; 1360 y[8*i+1] += sum2; 1361 y[8*i+2] += sum3; 1362 y[8*i+3] += sum4; 1363 y[8*i+4] += sum5; 1364 y[8*i+5] += sum6; 1365 y[8*i+6] += sum7; 1366 y[8*i+7] += sum8; 1367 } 1368 1369 ierr = PetscLogFlops(16.0*a->nz);CHKERRQ(ierr); 1370 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1371 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 1372 PetscFunctionReturn(0); 1373 } 1374 1375 #undef __FUNCT__ 1376 #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_8" 1377 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_8(Mat A,Vec xx,Vec yy,Vec zz) 1378 { 1379 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1380 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1381 PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8; 1382 PetscErrorCode ierr; 1383 PetscInt m = b->AIJ->rmap->n,n,i,*idx; 1384 1385 PetscFunctionBegin; 1386 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 1387 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1388 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 1389 for (i=0; i<m; i++) { 1390 idx = a->j + a->i[i] ; 1391 v = a->a + a->i[i] ; 1392 n = a->i[i+1] - a->i[i]; 1393 alpha1 = x[8*i]; 1394 alpha2 = x[8*i+1]; 1395 alpha3 = x[8*i+2]; 1396 alpha4 = x[8*i+3]; 1397 alpha5 = x[8*i+4]; 1398 alpha6 = x[8*i+5]; 1399 alpha7 = x[8*i+6]; 1400 alpha8 = x[8*i+7]; 1401 while (n-->0) { 1402 y[8*(*idx)] += alpha1*(*v); 1403 y[8*(*idx)+1] += alpha2*(*v); 1404 y[8*(*idx)+2] += alpha3*(*v); 1405 y[8*(*idx)+3] += alpha4*(*v); 1406 y[8*(*idx)+4] += alpha5*(*v); 1407 y[8*(*idx)+5] += alpha6*(*v); 1408 y[8*(*idx)+6] += alpha7*(*v); 1409 y[8*(*idx)+7] += alpha8*(*v); 1410 idx++; v++; 1411 } 1412 } 1413 ierr = PetscLogFlops(16.0*a->nz);CHKERRQ(ierr); 1414 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1415 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 1416 PetscFunctionReturn(0); 1417 } 1418 1419 /* ------------------------------------------------------------------------------*/ 1420 #undef __FUNCT__ 1421 #define __FUNCT__ "MatMult_SeqMAIJ_9" 1422 PetscErrorCode MatMult_SeqMAIJ_9(Mat A,Vec xx,Vec yy) 1423 { 1424 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1425 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1426 PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9; 1427 PetscErrorCode ierr; 1428 PetscInt m = b->AIJ->rmap->n,nonzerorow=0,*idx,*ii; 1429 PetscInt n,i,jrow,j; 1430 1431 PetscFunctionBegin; 1432 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1433 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 1434 idx = a->j; 1435 v = a->a; 1436 ii = a->i; 1437 1438 for (i=0; i<m; i++) { 1439 jrow = ii[i]; 1440 n = ii[i+1] - jrow; 1441 sum1 = 0.0; 1442 sum2 = 0.0; 1443 sum3 = 0.0; 1444 sum4 = 0.0; 1445 sum5 = 0.0; 1446 sum6 = 0.0; 1447 sum7 = 0.0; 1448 sum8 = 0.0; 1449 sum9 = 0.0; 1450 nonzerorow += (n>0); 1451 for (j=0; j<n; j++) { 1452 sum1 += v[jrow]*x[9*idx[jrow]]; 1453 sum2 += v[jrow]*x[9*idx[jrow]+1]; 1454 sum3 += v[jrow]*x[9*idx[jrow]+2]; 1455 sum4 += v[jrow]*x[9*idx[jrow]+3]; 1456 sum5 += v[jrow]*x[9*idx[jrow]+4]; 1457 sum6 += v[jrow]*x[9*idx[jrow]+5]; 1458 sum7 += v[jrow]*x[9*idx[jrow]+6]; 1459 sum8 += v[jrow]*x[9*idx[jrow]+7]; 1460 sum9 += v[jrow]*x[9*idx[jrow]+8]; 1461 jrow++; 1462 } 1463 y[9*i] = sum1; 1464 y[9*i+1] = sum2; 1465 y[9*i+2] = sum3; 1466 y[9*i+3] = sum4; 1467 y[9*i+4] = sum5; 1468 y[9*i+5] = sum6; 1469 y[9*i+6] = sum7; 1470 y[9*i+7] = sum8; 1471 y[9*i+8] = sum9; 1472 } 1473 1474 ierr = PetscLogFlops(18.0*a->nz - 9*nonzerorow);CHKERRQ(ierr); 1475 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1476 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 1477 PetscFunctionReturn(0); 1478 } 1479 1480 /* ------------------------------------------------------------------------------*/ 1481 1482 #undef __FUNCT__ 1483 #define __FUNCT__ "MatMultTranspose_SeqMAIJ_9" 1484 PetscErrorCode MatMultTranspose_SeqMAIJ_9(Mat A,Vec xx,Vec yy) 1485 { 1486 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1487 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1488 PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9,zero = 0.0; 1489 PetscErrorCode ierr; 1490 PetscInt m = b->AIJ->rmap->n,n,i,*idx; 1491 1492 PetscFunctionBegin; 1493 ierr = VecSet(yy,zero);CHKERRQ(ierr); 1494 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1495 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 1496 1497 for (i=0; i<m; i++) { 1498 idx = a->j + a->i[i] ; 1499 v = a->a + a->i[i] ; 1500 n = a->i[i+1] - a->i[i]; 1501 alpha1 = x[9*i]; 1502 alpha2 = x[9*i+1]; 1503 alpha3 = x[9*i+2]; 1504 alpha4 = x[9*i+3]; 1505 alpha5 = x[9*i+4]; 1506 alpha6 = x[9*i+5]; 1507 alpha7 = x[9*i+6]; 1508 alpha8 = x[9*i+7]; 1509 alpha9 = x[9*i+8]; 1510 while (n-->0) { 1511 y[9*(*idx)] += alpha1*(*v); 1512 y[9*(*idx)+1] += alpha2*(*v); 1513 y[9*(*idx)+2] += alpha3*(*v); 1514 y[9*(*idx)+3] += alpha4*(*v); 1515 y[9*(*idx)+4] += alpha5*(*v); 1516 y[9*(*idx)+5] += alpha6*(*v); 1517 y[9*(*idx)+6] += alpha7*(*v); 1518 y[9*(*idx)+7] += alpha8*(*v); 1519 y[9*(*idx)+8] += alpha9*(*v); 1520 idx++; v++; 1521 } 1522 } 1523 ierr = PetscLogFlops(18.0*a->nz);CHKERRQ(ierr); 1524 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1525 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 1526 PetscFunctionReturn(0); 1527 } 1528 1529 #undef __FUNCT__ 1530 #define __FUNCT__ "MatMultAdd_SeqMAIJ_9" 1531 PetscErrorCode MatMultAdd_SeqMAIJ_9(Mat A,Vec xx,Vec yy,Vec zz) 1532 { 1533 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1534 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1535 PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9; 1536 PetscErrorCode ierr; 1537 PetscInt m = b->AIJ->rmap->n,*idx,*ii; 1538 PetscInt n,i,jrow,j; 1539 1540 PetscFunctionBegin; 1541 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 1542 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1543 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 1544 idx = a->j; 1545 v = a->a; 1546 ii = a->i; 1547 1548 for (i=0; i<m; i++) { 1549 jrow = ii[i]; 1550 n = ii[i+1] - jrow; 1551 sum1 = 0.0; 1552 sum2 = 0.0; 1553 sum3 = 0.0; 1554 sum4 = 0.0; 1555 sum5 = 0.0; 1556 sum6 = 0.0; 1557 sum7 = 0.0; 1558 sum8 = 0.0; 1559 sum9 = 0.0; 1560 for (j=0; j<n; j++) { 1561 sum1 += v[jrow]*x[9*idx[jrow]]; 1562 sum2 += v[jrow]*x[9*idx[jrow]+1]; 1563 sum3 += v[jrow]*x[9*idx[jrow]+2]; 1564 sum4 += v[jrow]*x[9*idx[jrow]+3]; 1565 sum5 += v[jrow]*x[9*idx[jrow]+4]; 1566 sum6 += v[jrow]*x[9*idx[jrow]+5]; 1567 sum7 += v[jrow]*x[9*idx[jrow]+6]; 1568 sum8 += v[jrow]*x[9*idx[jrow]+7]; 1569 sum9 += v[jrow]*x[9*idx[jrow]+8]; 1570 jrow++; 1571 } 1572 y[9*i] += sum1; 1573 y[9*i+1] += sum2; 1574 y[9*i+2] += sum3; 1575 y[9*i+3] += sum4; 1576 y[9*i+4] += sum5; 1577 y[9*i+5] += sum6; 1578 y[9*i+6] += sum7; 1579 y[9*i+7] += sum8; 1580 y[9*i+8] += sum9; 1581 } 1582 1583 ierr = PetscLogFlops(18*a->nz);CHKERRQ(ierr); 1584 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1585 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 1586 PetscFunctionReturn(0); 1587 } 1588 1589 #undef __FUNCT__ 1590 #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_9" 1591 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_9(Mat A,Vec xx,Vec yy,Vec zz) 1592 { 1593 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1594 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1595 PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9; 1596 PetscErrorCode ierr; 1597 PetscInt m = b->AIJ->rmap->n,n,i,*idx; 1598 1599 PetscFunctionBegin; 1600 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 1601 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1602 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 1603 for (i=0; i<m; i++) { 1604 idx = a->j + a->i[i] ; 1605 v = a->a + a->i[i] ; 1606 n = a->i[i+1] - a->i[i]; 1607 alpha1 = x[9*i]; 1608 alpha2 = x[9*i+1]; 1609 alpha3 = x[9*i+2]; 1610 alpha4 = x[9*i+3]; 1611 alpha5 = x[9*i+4]; 1612 alpha6 = x[9*i+5]; 1613 alpha7 = x[9*i+6]; 1614 alpha8 = x[9*i+7]; 1615 alpha9 = x[9*i+8]; 1616 while (n-->0) { 1617 y[9*(*idx)] += alpha1*(*v); 1618 y[9*(*idx)+1] += alpha2*(*v); 1619 y[9*(*idx)+2] += alpha3*(*v); 1620 y[9*(*idx)+3] += alpha4*(*v); 1621 y[9*(*idx)+4] += alpha5*(*v); 1622 y[9*(*idx)+5] += alpha6*(*v); 1623 y[9*(*idx)+6] += alpha7*(*v); 1624 y[9*(*idx)+7] += alpha8*(*v); 1625 y[9*(*idx)+8] += alpha9*(*v); 1626 idx++; v++; 1627 } 1628 } 1629 ierr = PetscLogFlops(18.0*a->nz);CHKERRQ(ierr); 1630 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1631 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 1632 PetscFunctionReturn(0); 1633 } 1634 /*--------------------------------------------------------------------------------------------*/ 1635 #undef __FUNCT__ 1636 #define __FUNCT__ "MatMult_SeqMAIJ_10" 1637 PetscErrorCode MatMult_SeqMAIJ_10(Mat A,Vec xx,Vec yy) 1638 { 1639 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1640 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1641 PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9, sum10; 1642 PetscErrorCode ierr; 1643 PetscInt m = b->AIJ->rmap->n,nonzerorow=0,*idx,*ii; 1644 PetscInt n,i,jrow,j; 1645 1646 PetscFunctionBegin; 1647 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1648 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 1649 idx = a->j; 1650 v = a->a; 1651 ii = a->i; 1652 1653 for (i=0; i<m; i++) { 1654 jrow = ii[i]; 1655 n = ii[i+1] - jrow; 1656 sum1 = 0.0; 1657 sum2 = 0.0; 1658 sum3 = 0.0; 1659 sum4 = 0.0; 1660 sum5 = 0.0; 1661 sum6 = 0.0; 1662 sum7 = 0.0; 1663 sum8 = 0.0; 1664 sum9 = 0.0; 1665 sum10 = 0.0; 1666 nonzerorow += (n>0); 1667 for (j=0; j<n; j++) { 1668 sum1 += v[jrow]*x[10*idx[jrow]]; 1669 sum2 += v[jrow]*x[10*idx[jrow]+1]; 1670 sum3 += v[jrow]*x[10*idx[jrow]+2]; 1671 sum4 += v[jrow]*x[10*idx[jrow]+3]; 1672 sum5 += v[jrow]*x[10*idx[jrow]+4]; 1673 sum6 += v[jrow]*x[10*idx[jrow]+5]; 1674 sum7 += v[jrow]*x[10*idx[jrow]+6]; 1675 sum8 += v[jrow]*x[10*idx[jrow]+7]; 1676 sum9 += v[jrow]*x[10*idx[jrow]+8]; 1677 sum10 += v[jrow]*x[10*idx[jrow]+9]; 1678 jrow++; 1679 } 1680 y[10*i] = sum1; 1681 y[10*i+1] = sum2; 1682 y[10*i+2] = sum3; 1683 y[10*i+3] = sum4; 1684 y[10*i+4] = sum5; 1685 y[10*i+5] = sum6; 1686 y[10*i+6] = sum7; 1687 y[10*i+7] = sum8; 1688 y[10*i+8] = sum9; 1689 y[10*i+9] = sum10; 1690 } 1691 1692 ierr = PetscLogFlops(20.0*a->nz - 10.0*nonzerorow);CHKERRQ(ierr); 1693 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1694 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 1695 PetscFunctionReturn(0); 1696 } 1697 1698 #undef __FUNCT__ 1699 #define __FUNCT__ "MatMult_SeqMAIJ_10" 1700 PetscErrorCode MatMultAdd_SeqMAIJ_10(Mat A,Vec xx,Vec yy,Vec zz) 1701 { 1702 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1703 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1704 PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9, sum10; 1705 PetscErrorCode ierr; 1706 PetscInt m = b->AIJ->rmap->n,*idx,*ii; 1707 PetscInt n,i,jrow,j; 1708 1709 PetscFunctionBegin; 1710 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 1711 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1712 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 1713 idx = a->j; 1714 v = a->a; 1715 ii = a->i; 1716 1717 for (i=0; i<m; i++) { 1718 jrow = ii[i]; 1719 n = ii[i+1] - jrow; 1720 sum1 = 0.0; 1721 sum2 = 0.0; 1722 sum3 = 0.0; 1723 sum4 = 0.0; 1724 sum5 = 0.0; 1725 sum6 = 0.0; 1726 sum7 = 0.0; 1727 sum8 = 0.0; 1728 sum9 = 0.0; 1729 sum10 = 0.0; 1730 for (j=0; j<n; j++) { 1731 sum1 += v[jrow]*x[10*idx[jrow]]; 1732 sum2 += v[jrow]*x[10*idx[jrow]+1]; 1733 sum3 += v[jrow]*x[10*idx[jrow]+2]; 1734 sum4 += v[jrow]*x[10*idx[jrow]+3]; 1735 sum5 += v[jrow]*x[10*idx[jrow]+4]; 1736 sum6 += v[jrow]*x[10*idx[jrow]+5]; 1737 sum7 += v[jrow]*x[10*idx[jrow]+6]; 1738 sum8 += v[jrow]*x[10*idx[jrow]+7]; 1739 sum9 += v[jrow]*x[10*idx[jrow]+8]; 1740 sum10 += v[jrow]*x[10*idx[jrow]+9]; 1741 jrow++; 1742 } 1743 y[10*i] += sum1; 1744 y[10*i+1] += sum2; 1745 y[10*i+2] += sum3; 1746 y[10*i+3] += sum4; 1747 y[10*i+4] += sum5; 1748 y[10*i+5] += sum6; 1749 y[10*i+6] += sum7; 1750 y[10*i+7] += sum8; 1751 y[10*i+8] += sum9; 1752 y[10*i+9] += sum10; 1753 } 1754 1755 ierr = PetscLogFlops(20.0*a->nz);CHKERRQ(ierr); 1756 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1757 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 1758 PetscFunctionReturn(0); 1759 } 1760 1761 #undef __FUNCT__ 1762 #define __FUNCT__ "MatMultTranspose_SeqMAIJ_10" 1763 PetscErrorCode MatMultTranspose_SeqMAIJ_10(Mat A,Vec xx,Vec yy) 1764 { 1765 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1766 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1767 PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9,alpha10,zero = 0.0; 1768 PetscErrorCode ierr; 1769 PetscInt m = b->AIJ->rmap->n,n,i,*idx; 1770 1771 PetscFunctionBegin; 1772 ierr = VecSet(yy,zero);CHKERRQ(ierr); 1773 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1774 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 1775 1776 for (i=0; i<m; i++) { 1777 idx = a->j + a->i[i] ; 1778 v = a->a + a->i[i] ; 1779 n = a->i[i+1] - a->i[i]; 1780 alpha1 = x[10*i]; 1781 alpha2 = x[10*i+1]; 1782 alpha3 = x[10*i+2]; 1783 alpha4 = x[10*i+3]; 1784 alpha5 = x[10*i+4]; 1785 alpha6 = x[10*i+5]; 1786 alpha7 = x[10*i+6]; 1787 alpha8 = x[10*i+7]; 1788 alpha9 = x[10*i+8]; 1789 alpha10 = x[10*i+9]; 1790 while (n-->0) { 1791 y[10*(*idx)] += alpha1*(*v); 1792 y[10*(*idx)+1] += alpha2*(*v); 1793 y[10*(*idx)+2] += alpha3*(*v); 1794 y[10*(*idx)+3] += alpha4*(*v); 1795 y[10*(*idx)+4] += alpha5*(*v); 1796 y[10*(*idx)+5] += alpha6*(*v); 1797 y[10*(*idx)+6] += alpha7*(*v); 1798 y[10*(*idx)+7] += alpha8*(*v); 1799 y[10*(*idx)+8] += alpha9*(*v); 1800 y[10*(*idx)+9] += alpha10*(*v); 1801 idx++; v++; 1802 } 1803 } 1804 ierr = PetscLogFlops(20.0*a->nz);CHKERRQ(ierr); 1805 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1806 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 1807 PetscFunctionReturn(0); 1808 } 1809 1810 #undef __FUNCT__ 1811 #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_10" 1812 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_10(Mat A,Vec xx,Vec yy,Vec zz) 1813 { 1814 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1815 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1816 PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9,alpha10; 1817 PetscErrorCode ierr; 1818 PetscInt m = b->AIJ->rmap->n,n,i,*idx; 1819 1820 PetscFunctionBegin; 1821 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 1822 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1823 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 1824 for (i=0; i<m; i++) { 1825 idx = a->j + a->i[i] ; 1826 v = a->a + a->i[i] ; 1827 n = a->i[i+1] - a->i[i]; 1828 alpha1 = x[10*i]; 1829 alpha2 = x[10*i+1]; 1830 alpha3 = x[10*i+2]; 1831 alpha4 = x[10*i+3]; 1832 alpha5 = x[10*i+4]; 1833 alpha6 = x[10*i+5]; 1834 alpha7 = x[10*i+6]; 1835 alpha8 = x[10*i+7]; 1836 alpha9 = x[10*i+8]; 1837 alpha10 = x[10*i+9]; 1838 while (n-->0) { 1839 y[10*(*idx)] += alpha1*(*v); 1840 y[10*(*idx)+1] += alpha2*(*v); 1841 y[10*(*idx)+2] += alpha3*(*v); 1842 y[10*(*idx)+3] += alpha4*(*v); 1843 y[10*(*idx)+4] += alpha5*(*v); 1844 y[10*(*idx)+5] += alpha6*(*v); 1845 y[10*(*idx)+6] += alpha7*(*v); 1846 y[10*(*idx)+7] += alpha8*(*v); 1847 y[10*(*idx)+8] += alpha9*(*v); 1848 y[10*(*idx)+9] += alpha10*(*v); 1849 idx++; v++; 1850 } 1851 } 1852 ierr = PetscLogFlops(20.0*a->nz);CHKERRQ(ierr); 1853 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1854 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 1855 PetscFunctionReturn(0); 1856 } 1857 1858 1859 /*--------------------------------------------------------------------------------------------*/ 1860 #undef __FUNCT__ 1861 #define __FUNCT__ "MatMult_SeqMAIJ_16" 1862 PetscErrorCode MatMult_SeqMAIJ_16(Mat A,Vec xx,Vec yy) 1863 { 1864 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1865 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1866 PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8; 1867 PetscScalar sum9, sum10, sum11, sum12, sum13, sum14, sum15, sum16; 1868 PetscErrorCode ierr; 1869 PetscInt m = b->AIJ->rmap->n,nonzerorow=0,*idx,*ii; 1870 PetscInt n,i,jrow,j; 1871 1872 PetscFunctionBegin; 1873 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1874 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 1875 idx = a->j; 1876 v = a->a; 1877 ii = a->i; 1878 1879 for (i=0; i<m; i++) { 1880 jrow = ii[i]; 1881 n = ii[i+1] - jrow; 1882 sum1 = 0.0; 1883 sum2 = 0.0; 1884 sum3 = 0.0; 1885 sum4 = 0.0; 1886 sum5 = 0.0; 1887 sum6 = 0.0; 1888 sum7 = 0.0; 1889 sum8 = 0.0; 1890 sum9 = 0.0; 1891 sum10 = 0.0; 1892 sum11 = 0.0; 1893 sum12 = 0.0; 1894 sum13 = 0.0; 1895 sum14 = 0.0; 1896 sum15 = 0.0; 1897 sum16 = 0.0; 1898 nonzerorow += (n>0); 1899 for (j=0; j<n; j++) { 1900 sum1 += v[jrow]*x[16*idx[jrow]]; 1901 sum2 += v[jrow]*x[16*idx[jrow]+1]; 1902 sum3 += v[jrow]*x[16*idx[jrow]+2]; 1903 sum4 += v[jrow]*x[16*idx[jrow]+3]; 1904 sum5 += v[jrow]*x[16*idx[jrow]+4]; 1905 sum6 += v[jrow]*x[16*idx[jrow]+5]; 1906 sum7 += v[jrow]*x[16*idx[jrow]+6]; 1907 sum8 += v[jrow]*x[16*idx[jrow]+7]; 1908 sum9 += v[jrow]*x[16*idx[jrow]+8]; 1909 sum10 += v[jrow]*x[16*idx[jrow]+9]; 1910 sum11 += v[jrow]*x[16*idx[jrow]+10]; 1911 sum12 += v[jrow]*x[16*idx[jrow]+11]; 1912 sum13 += v[jrow]*x[16*idx[jrow]+12]; 1913 sum14 += v[jrow]*x[16*idx[jrow]+13]; 1914 sum15 += v[jrow]*x[16*idx[jrow]+14]; 1915 sum16 += v[jrow]*x[16*idx[jrow]+15]; 1916 jrow++; 1917 } 1918 y[16*i] = sum1; 1919 y[16*i+1] = sum2; 1920 y[16*i+2] = sum3; 1921 y[16*i+3] = sum4; 1922 y[16*i+4] = sum5; 1923 y[16*i+5] = sum6; 1924 y[16*i+6] = sum7; 1925 y[16*i+7] = sum8; 1926 y[16*i+8] = sum9; 1927 y[16*i+9] = sum10; 1928 y[16*i+10] = sum11; 1929 y[16*i+11] = sum12; 1930 y[16*i+12] = sum13; 1931 y[16*i+13] = sum14; 1932 y[16*i+14] = sum15; 1933 y[16*i+15] = sum16; 1934 } 1935 1936 ierr = PetscLogFlops(32.0*a->nz - 16.0*nonzerorow);CHKERRQ(ierr); 1937 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1938 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 1939 PetscFunctionReturn(0); 1940 } 1941 1942 #undef __FUNCT__ 1943 #define __FUNCT__ "MatMultTranspose_SeqMAIJ_16" 1944 PetscErrorCode MatMultTranspose_SeqMAIJ_16(Mat A,Vec xx,Vec yy) 1945 { 1946 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1947 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1948 PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,zero = 0.0; 1949 PetscScalar alpha9,alpha10,alpha11,alpha12,alpha13,alpha14,alpha15,alpha16; 1950 PetscErrorCode ierr; 1951 PetscInt m = b->AIJ->rmap->n,n,i,*idx; 1952 1953 PetscFunctionBegin; 1954 ierr = VecSet(yy,zero);CHKERRQ(ierr); 1955 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1956 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 1957 1958 for (i=0; i<m; i++) { 1959 idx = a->j + a->i[i] ; 1960 v = a->a + a->i[i] ; 1961 n = a->i[i+1] - a->i[i]; 1962 alpha1 = x[16*i]; 1963 alpha2 = x[16*i+1]; 1964 alpha3 = x[16*i+2]; 1965 alpha4 = x[16*i+3]; 1966 alpha5 = x[16*i+4]; 1967 alpha6 = x[16*i+5]; 1968 alpha7 = x[16*i+6]; 1969 alpha8 = x[16*i+7]; 1970 alpha9 = x[16*i+8]; 1971 alpha10 = x[16*i+9]; 1972 alpha11 = x[16*i+10]; 1973 alpha12 = x[16*i+11]; 1974 alpha13 = x[16*i+12]; 1975 alpha14 = x[16*i+13]; 1976 alpha15 = x[16*i+14]; 1977 alpha16 = x[16*i+15]; 1978 while (n-->0) { 1979 y[16*(*idx)] += alpha1*(*v); 1980 y[16*(*idx)+1] += alpha2*(*v); 1981 y[16*(*idx)+2] += alpha3*(*v); 1982 y[16*(*idx)+3] += alpha4*(*v); 1983 y[16*(*idx)+4] += alpha5*(*v); 1984 y[16*(*idx)+5] += alpha6*(*v); 1985 y[16*(*idx)+6] += alpha7*(*v); 1986 y[16*(*idx)+7] += alpha8*(*v); 1987 y[16*(*idx)+8] += alpha9*(*v); 1988 y[16*(*idx)+9] += alpha10*(*v); 1989 y[16*(*idx)+10] += alpha11*(*v); 1990 y[16*(*idx)+11] += alpha12*(*v); 1991 y[16*(*idx)+12] += alpha13*(*v); 1992 y[16*(*idx)+13] += alpha14*(*v); 1993 y[16*(*idx)+14] += alpha15*(*v); 1994 y[16*(*idx)+15] += alpha16*(*v); 1995 idx++; v++; 1996 } 1997 } 1998 ierr = PetscLogFlops(32.0*a->nz);CHKERRQ(ierr); 1999 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 2000 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 2001 PetscFunctionReturn(0); 2002 } 2003 2004 #undef __FUNCT__ 2005 #define __FUNCT__ "MatMultAdd_SeqMAIJ_16" 2006 PetscErrorCode MatMultAdd_SeqMAIJ_16(Mat A,Vec xx,Vec yy,Vec zz) 2007 { 2008 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 2009 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 2010 PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8; 2011 PetscScalar sum9, sum10, sum11, sum12, sum13, sum14, sum15, sum16; 2012 PetscErrorCode ierr; 2013 PetscInt m = b->AIJ->rmap->n,*idx,*ii; 2014 PetscInt n,i,jrow,j; 2015 2016 PetscFunctionBegin; 2017 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 2018 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 2019 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 2020 idx = a->j; 2021 v = a->a; 2022 ii = a->i; 2023 2024 for (i=0; i<m; i++) { 2025 jrow = ii[i]; 2026 n = ii[i+1] - jrow; 2027 sum1 = 0.0; 2028 sum2 = 0.0; 2029 sum3 = 0.0; 2030 sum4 = 0.0; 2031 sum5 = 0.0; 2032 sum6 = 0.0; 2033 sum7 = 0.0; 2034 sum8 = 0.0; 2035 sum9 = 0.0; 2036 sum10 = 0.0; 2037 sum11 = 0.0; 2038 sum12 = 0.0; 2039 sum13 = 0.0; 2040 sum14 = 0.0; 2041 sum15 = 0.0; 2042 sum16 = 0.0; 2043 for (j=0; j<n; j++) { 2044 sum1 += v[jrow]*x[16*idx[jrow]]; 2045 sum2 += v[jrow]*x[16*idx[jrow]+1]; 2046 sum3 += v[jrow]*x[16*idx[jrow]+2]; 2047 sum4 += v[jrow]*x[16*idx[jrow]+3]; 2048 sum5 += v[jrow]*x[16*idx[jrow]+4]; 2049 sum6 += v[jrow]*x[16*idx[jrow]+5]; 2050 sum7 += v[jrow]*x[16*idx[jrow]+6]; 2051 sum8 += v[jrow]*x[16*idx[jrow]+7]; 2052 sum9 += v[jrow]*x[16*idx[jrow]+8]; 2053 sum10 += v[jrow]*x[16*idx[jrow]+9]; 2054 sum11 += v[jrow]*x[16*idx[jrow]+10]; 2055 sum12 += v[jrow]*x[16*idx[jrow]+11]; 2056 sum13 += v[jrow]*x[16*idx[jrow]+12]; 2057 sum14 += v[jrow]*x[16*idx[jrow]+13]; 2058 sum15 += v[jrow]*x[16*idx[jrow]+14]; 2059 sum16 += v[jrow]*x[16*idx[jrow]+15]; 2060 jrow++; 2061 } 2062 y[16*i] += sum1; 2063 y[16*i+1] += sum2; 2064 y[16*i+2] += sum3; 2065 y[16*i+3] += sum4; 2066 y[16*i+4] += sum5; 2067 y[16*i+5] += sum6; 2068 y[16*i+6] += sum7; 2069 y[16*i+7] += sum8; 2070 y[16*i+8] += sum9; 2071 y[16*i+9] += sum10; 2072 y[16*i+10] += sum11; 2073 y[16*i+11] += sum12; 2074 y[16*i+12] += sum13; 2075 y[16*i+13] += sum14; 2076 y[16*i+14] += sum15; 2077 y[16*i+15] += sum16; 2078 } 2079 2080 ierr = PetscLogFlops(32.0*a->nz);CHKERRQ(ierr); 2081 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 2082 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 2083 PetscFunctionReturn(0); 2084 } 2085 2086 #undef __FUNCT__ 2087 #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_16" 2088 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_16(Mat A,Vec xx,Vec yy,Vec zz) 2089 { 2090 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 2091 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 2092 PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8; 2093 PetscScalar alpha9,alpha10,alpha11,alpha12,alpha13,alpha14,alpha15,alpha16; 2094 PetscErrorCode ierr; 2095 PetscInt m = b->AIJ->rmap->n,n,i,*idx; 2096 2097 PetscFunctionBegin; 2098 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 2099 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 2100 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 2101 for (i=0; i<m; i++) { 2102 idx = a->j + a->i[i] ; 2103 v = a->a + a->i[i] ; 2104 n = a->i[i+1] - a->i[i]; 2105 alpha1 = x[16*i]; 2106 alpha2 = x[16*i+1]; 2107 alpha3 = x[16*i+2]; 2108 alpha4 = x[16*i+3]; 2109 alpha5 = x[16*i+4]; 2110 alpha6 = x[16*i+5]; 2111 alpha7 = x[16*i+6]; 2112 alpha8 = x[16*i+7]; 2113 alpha9 = x[16*i+8]; 2114 alpha10 = x[16*i+9]; 2115 alpha11 = x[16*i+10]; 2116 alpha12 = x[16*i+11]; 2117 alpha13 = x[16*i+12]; 2118 alpha14 = x[16*i+13]; 2119 alpha15 = x[16*i+14]; 2120 alpha16 = x[16*i+15]; 2121 while (n-->0) { 2122 y[16*(*idx)] += alpha1*(*v); 2123 y[16*(*idx)+1] += alpha2*(*v); 2124 y[16*(*idx)+2] += alpha3*(*v); 2125 y[16*(*idx)+3] += alpha4*(*v); 2126 y[16*(*idx)+4] += alpha5*(*v); 2127 y[16*(*idx)+5] += alpha6*(*v); 2128 y[16*(*idx)+6] += alpha7*(*v); 2129 y[16*(*idx)+7] += alpha8*(*v); 2130 y[16*(*idx)+8] += alpha9*(*v); 2131 y[16*(*idx)+9] += alpha10*(*v); 2132 y[16*(*idx)+10] += alpha11*(*v); 2133 y[16*(*idx)+11] += alpha12*(*v); 2134 y[16*(*idx)+12] += alpha13*(*v); 2135 y[16*(*idx)+13] += alpha14*(*v); 2136 y[16*(*idx)+14] += alpha15*(*v); 2137 y[16*(*idx)+15] += alpha16*(*v); 2138 idx++; v++; 2139 } 2140 } 2141 ierr = PetscLogFlops(32.0*a->nz);CHKERRQ(ierr); 2142 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 2143 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 2144 PetscFunctionReturn(0); 2145 } 2146 2147 /*--------------------------------------------------------------------------------------------*/ 2148 #undef __FUNCT__ 2149 #define __FUNCT__ "MatMult_SeqMAIJ_18" 2150 PetscErrorCode MatMult_SeqMAIJ_18(Mat A,Vec xx,Vec yy) 2151 { 2152 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 2153 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 2154 PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8; 2155 PetscScalar sum9, sum10, sum11, sum12, sum13, sum14, sum15, sum16, sum17, sum18; 2156 PetscErrorCode ierr; 2157 PetscInt m = b->AIJ->rmap->n,nonzerorow=0,*idx,*ii; 2158 PetscInt n,i,jrow,j; 2159 2160 PetscFunctionBegin; 2161 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 2162 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 2163 idx = a->j; 2164 v = a->a; 2165 ii = a->i; 2166 2167 for (i=0; i<m; i++) { 2168 jrow = ii[i]; 2169 n = ii[i+1] - jrow; 2170 sum1 = 0.0; 2171 sum2 = 0.0; 2172 sum3 = 0.0; 2173 sum4 = 0.0; 2174 sum5 = 0.0; 2175 sum6 = 0.0; 2176 sum7 = 0.0; 2177 sum8 = 0.0; 2178 sum9 = 0.0; 2179 sum10 = 0.0; 2180 sum11 = 0.0; 2181 sum12 = 0.0; 2182 sum13 = 0.0; 2183 sum14 = 0.0; 2184 sum15 = 0.0; 2185 sum16 = 0.0; 2186 sum17 = 0.0; 2187 sum18 = 0.0; 2188 nonzerorow += (n>0); 2189 for (j=0; j<n; j++) { 2190 sum1 += v[jrow]*x[18*idx[jrow]]; 2191 sum2 += v[jrow]*x[18*idx[jrow]+1]; 2192 sum3 += v[jrow]*x[18*idx[jrow]+2]; 2193 sum4 += v[jrow]*x[18*idx[jrow]+3]; 2194 sum5 += v[jrow]*x[18*idx[jrow]+4]; 2195 sum6 += v[jrow]*x[18*idx[jrow]+5]; 2196 sum7 += v[jrow]*x[18*idx[jrow]+6]; 2197 sum8 += v[jrow]*x[18*idx[jrow]+7]; 2198 sum9 += v[jrow]*x[18*idx[jrow]+8]; 2199 sum10 += v[jrow]*x[18*idx[jrow]+9]; 2200 sum11 += v[jrow]*x[18*idx[jrow]+10]; 2201 sum12 += v[jrow]*x[18*idx[jrow]+11]; 2202 sum13 += v[jrow]*x[18*idx[jrow]+12]; 2203 sum14 += v[jrow]*x[18*idx[jrow]+13]; 2204 sum15 += v[jrow]*x[18*idx[jrow]+14]; 2205 sum16 += v[jrow]*x[18*idx[jrow]+15]; 2206 sum17 += v[jrow]*x[18*idx[jrow]+16]; 2207 sum18 += v[jrow]*x[18*idx[jrow]+17]; 2208 jrow++; 2209 } 2210 y[18*i] = sum1; 2211 y[18*i+1] = sum2; 2212 y[18*i+2] = sum3; 2213 y[18*i+3] = sum4; 2214 y[18*i+4] = sum5; 2215 y[18*i+5] = sum6; 2216 y[18*i+6] = sum7; 2217 y[18*i+7] = sum8; 2218 y[18*i+8] = sum9; 2219 y[18*i+9] = sum10; 2220 y[18*i+10] = sum11; 2221 y[18*i+11] = sum12; 2222 y[18*i+12] = sum13; 2223 y[18*i+13] = sum14; 2224 y[18*i+14] = sum15; 2225 y[18*i+15] = sum16; 2226 y[18*i+16] = sum17; 2227 y[18*i+17] = sum18; 2228 } 2229 2230 ierr = PetscLogFlops(36.0*a->nz - 18.0*nonzerorow);CHKERRQ(ierr); 2231 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 2232 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 2233 PetscFunctionReturn(0); 2234 } 2235 2236 #undef __FUNCT__ 2237 #define __FUNCT__ "MatMultTranspose_SeqMAIJ_18" 2238 PetscErrorCode MatMultTranspose_SeqMAIJ_18(Mat A,Vec xx,Vec yy) 2239 { 2240 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 2241 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 2242 PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,zero = 0.0; 2243 PetscScalar alpha9,alpha10,alpha11,alpha12,alpha13,alpha14,alpha15,alpha16,alpha17,alpha18; 2244 PetscErrorCode ierr; 2245 PetscInt m = b->AIJ->rmap->n,n,i,*idx; 2246 2247 PetscFunctionBegin; 2248 ierr = VecSet(yy,zero);CHKERRQ(ierr); 2249 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 2250 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 2251 2252 for (i=0; i<m; i++) { 2253 idx = a->j + a->i[i] ; 2254 v = a->a + a->i[i] ; 2255 n = a->i[i+1] - a->i[i]; 2256 alpha1 = x[18*i]; 2257 alpha2 = x[18*i+1]; 2258 alpha3 = x[18*i+2]; 2259 alpha4 = x[18*i+3]; 2260 alpha5 = x[18*i+4]; 2261 alpha6 = x[18*i+5]; 2262 alpha7 = x[18*i+6]; 2263 alpha8 = x[18*i+7]; 2264 alpha9 = x[18*i+8]; 2265 alpha10 = x[18*i+9]; 2266 alpha11 = x[18*i+10]; 2267 alpha12 = x[18*i+11]; 2268 alpha13 = x[18*i+12]; 2269 alpha14 = x[18*i+13]; 2270 alpha15 = x[18*i+14]; 2271 alpha16 = x[18*i+15]; 2272 alpha17 = x[18*i+16]; 2273 alpha18 = x[18*i+17]; 2274 while (n-->0) { 2275 y[18*(*idx)] += alpha1*(*v); 2276 y[18*(*idx)+1] += alpha2*(*v); 2277 y[18*(*idx)+2] += alpha3*(*v); 2278 y[18*(*idx)+3] += alpha4*(*v); 2279 y[18*(*idx)+4] += alpha5*(*v); 2280 y[18*(*idx)+5] += alpha6*(*v); 2281 y[18*(*idx)+6] += alpha7*(*v); 2282 y[18*(*idx)+7] += alpha8*(*v); 2283 y[18*(*idx)+8] += alpha9*(*v); 2284 y[18*(*idx)+9] += alpha10*(*v); 2285 y[18*(*idx)+10] += alpha11*(*v); 2286 y[18*(*idx)+11] += alpha12*(*v); 2287 y[18*(*idx)+12] += alpha13*(*v); 2288 y[18*(*idx)+13] += alpha14*(*v); 2289 y[18*(*idx)+14] += alpha15*(*v); 2290 y[18*(*idx)+15] += alpha16*(*v); 2291 y[18*(*idx)+16] += alpha17*(*v); 2292 y[18*(*idx)+17] += alpha18*(*v); 2293 idx++; v++; 2294 } 2295 } 2296 ierr = PetscLogFlops(36.0*a->nz);CHKERRQ(ierr); 2297 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 2298 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 2299 PetscFunctionReturn(0); 2300 } 2301 2302 #undef __FUNCT__ 2303 #define __FUNCT__ "MatMultAdd_SeqMAIJ_18" 2304 PetscErrorCode MatMultAdd_SeqMAIJ_18(Mat A,Vec xx,Vec yy,Vec zz) 2305 { 2306 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 2307 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 2308 PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8; 2309 PetscScalar sum9, sum10, sum11, sum12, sum13, sum14, sum15, sum16, sum17, sum18; 2310 PetscErrorCode ierr; 2311 PetscInt m = b->AIJ->rmap->n,*idx,*ii; 2312 PetscInt n,i,jrow,j; 2313 2314 PetscFunctionBegin; 2315 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 2316 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 2317 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 2318 idx = a->j; 2319 v = a->a; 2320 ii = a->i; 2321 2322 for (i=0; i<m; i++) { 2323 jrow = ii[i]; 2324 n = ii[i+1] - jrow; 2325 sum1 = 0.0; 2326 sum2 = 0.0; 2327 sum3 = 0.0; 2328 sum4 = 0.0; 2329 sum5 = 0.0; 2330 sum6 = 0.0; 2331 sum7 = 0.0; 2332 sum8 = 0.0; 2333 sum9 = 0.0; 2334 sum10 = 0.0; 2335 sum11 = 0.0; 2336 sum12 = 0.0; 2337 sum13 = 0.0; 2338 sum14 = 0.0; 2339 sum15 = 0.0; 2340 sum16 = 0.0; 2341 sum17 = 0.0; 2342 sum18 = 0.0; 2343 for (j=0; j<n; j++) { 2344 sum1 += v[jrow]*x[18*idx[jrow]]; 2345 sum2 += v[jrow]*x[18*idx[jrow]+1]; 2346 sum3 += v[jrow]*x[18*idx[jrow]+2]; 2347 sum4 += v[jrow]*x[18*idx[jrow]+3]; 2348 sum5 += v[jrow]*x[18*idx[jrow]+4]; 2349 sum6 += v[jrow]*x[18*idx[jrow]+5]; 2350 sum7 += v[jrow]*x[18*idx[jrow]+6]; 2351 sum8 += v[jrow]*x[18*idx[jrow]+7]; 2352 sum9 += v[jrow]*x[18*idx[jrow]+8]; 2353 sum10 += v[jrow]*x[18*idx[jrow]+9]; 2354 sum11 += v[jrow]*x[18*idx[jrow]+10]; 2355 sum12 += v[jrow]*x[18*idx[jrow]+11]; 2356 sum13 += v[jrow]*x[18*idx[jrow]+12]; 2357 sum14 += v[jrow]*x[18*idx[jrow]+13]; 2358 sum15 += v[jrow]*x[18*idx[jrow]+14]; 2359 sum16 += v[jrow]*x[18*idx[jrow]+15]; 2360 sum17 += v[jrow]*x[18*idx[jrow]+16]; 2361 sum18 += v[jrow]*x[18*idx[jrow]+17]; 2362 jrow++; 2363 } 2364 y[18*i] += sum1; 2365 y[18*i+1] += sum2; 2366 y[18*i+2] += sum3; 2367 y[18*i+3] += sum4; 2368 y[18*i+4] += sum5; 2369 y[18*i+5] += sum6; 2370 y[18*i+6] += sum7; 2371 y[18*i+7] += sum8; 2372 y[18*i+8] += sum9; 2373 y[18*i+9] += sum10; 2374 y[18*i+10] += sum11; 2375 y[18*i+11] += sum12; 2376 y[18*i+12] += sum13; 2377 y[18*i+13] += sum14; 2378 y[18*i+14] += sum15; 2379 y[18*i+15] += sum16; 2380 y[18*i+16] += sum17; 2381 y[18*i+17] += sum18; 2382 } 2383 2384 ierr = PetscLogFlops(36.0*a->nz);CHKERRQ(ierr); 2385 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 2386 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 2387 PetscFunctionReturn(0); 2388 } 2389 2390 #undef __FUNCT__ 2391 #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_18" 2392 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_18(Mat A,Vec xx,Vec yy,Vec zz) 2393 { 2394 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 2395 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 2396 PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8; 2397 PetscScalar alpha9,alpha10,alpha11,alpha12,alpha13,alpha14,alpha15,alpha16,alpha17,alpha18; 2398 PetscErrorCode ierr; 2399 PetscInt m = b->AIJ->rmap->n,n,i,*idx; 2400 2401 PetscFunctionBegin; 2402 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 2403 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 2404 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 2405 for (i=0; i<m; i++) { 2406 idx = a->j + a->i[i] ; 2407 v = a->a + a->i[i] ; 2408 n = a->i[i+1] - a->i[i]; 2409 alpha1 = x[18*i]; 2410 alpha2 = x[18*i+1]; 2411 alpha3 = x[18*i+2]; 2412 alpha4 = x[18*i+3]; 2413 alpha5 = x[18*i+4]; 2414 alpha6 = x[18*i+5]; 2415 alpha7 = x[18*i+6]; 2416 alpha8 = x[18*i+7]; 2417 alpha9 = x[18*i+8]; 2418 alpha10 = x[18*i+9]; 2419 alpha11 = x[18*i+10]; 2420 alpha12 = x[18*i+11]; 2421 alpha13 = x[18*i+12]; 2422 alpha14 = x[18*i+13]; 2423 alpha15 = x[18*i+14]; 2424 alpha16 = x[18*i+15]; 2425 alpha17 = x[18*i+16]; 2426 alpha18 = x[18*i+17]; 2427 while (n-->0) { 2428 y[18*(*idx)] += alpha1*(*v); 2429 y[18*(*idx)+1] += alpha2*(*v); 2430 y[18*(*idx)+2] += alpha3*(*v); 2431 y[18*(*idx)+3] += alpha4*(*v); 2432 y[18*(*idx)+4] += alpha5*(*v); 2433 y[18*(*idx)+5] += alpha6*(*v); 2434 y[18*(*idx)+6] += alpha7*(*v); 2435 y[18*(*idx)+7] += alpha8*(*v); 2436 y[18*(*idx)+8] += alpha9*(*v); 2437 y[18*(*idx)+9] += alpha10*(*v); 2438 y[18*(*idx)+10] += alpha11*(*v); 2439 y[18*(*idx)+11] += alpha12*(*v); 2440 y[18*(*idx)+12] += alpha13*(*v); 2441 y[18*(*idx)+13] += alpha14*(*v); 2442 y[18*(*idx)+14] += alpha15*(*v); 2443 y[18*(*idx)+15] += alpha16*(*v); 2444 y[18*(*idx)+16] += alpha17*(*v); 2445 y[18*(*idx)+17] += alpha18*(*v); 2446 idx++; v++; 2447 } 2448 } 2449 ierr = PetscLogFlops(36.0*a->nz);CHKERRQ(ierr); 2450 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 2451 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 2452 PetscFunctionReturn(0); 2453 } 2454 2455 /*===================================================================================*/ 2456 #undef __FUNCT__ 2457 #define __FUNCT__ "MatMult_MPIMAIJ_dof" 2458 PetscErrorCode MatMult_MPIMAIJ_dof(Mat A,Vec xx,Vec yy) 2459 { 2460 Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data; 2461 PetscErrorCode ierr; 2462 2463 PetscFunctionBegin; 2464 /* start the scatter */ 2465 ierr = VecScatterBegin(b->ctx,xx,b->w,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2466 ierr = (*b->AIJ->ops->mult)(b->AIJ,xx,yy);CHKERRQ(ierr); 2467 ierr = VecScatterEnd(b->ctx,xx,b->w,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2468 ierr = (*b->OAIJ->ops->multadd)(b->OAIJ,b->w,yy,yy);CHKERRQ(ierr); 2469 PetscFunctionReturn(0); 2470 } 2471 2472 #undef __FUNCT__ 2473 #define __FUNCT__ "MatMultTranspose_MPIMAIJ_dof" 2474 PetscErrorCode MatMultTranspose_MPIMAIJ_dof(Mat A,Vec xx,Vec yy) 2475 { 2476 Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data; 2477 PetscErrorCode ierr; 2478 2479 PetscFunctionBegin; 2480 ierr = (*b->OAIJ->ops->multtranspose)(b->OAIJ,xx,b->w);CHKERRQ(ierr); 2481 ierr = (*b->AIJ->ops->multtranspose)(b->AIJ,xx,yy);CHKERRQ(ierr); 2482 ierr = VecScatterBegin(b->ctx,b->w,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2483 ierr = VecScatterEnd(b->ctx,b->w,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2484 PetscFunctionReturn(0); 2485 } 2486 2487 #undef __FUNCT__ 2488 #define __FUNCT__ "MatMultAdd_MPIMAIJ_dof" 2489 PetscErrorCode MatMultAdd_MPIMAIJ_dof(Mat A,Vec xx,Vec yy,Vec zz) 2490 { 2491 Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data; 2492 PetscErrorCode ierr; 2493 2494 PetscFunctionBegin; 2495 /* start the scatter */ 2496 ierr = VecScatterBegin(b->ctx,xx,b->w,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2497 ierr = (*b->AIJ->ops->multadd)(b->AIJ,xx,yy,zz);CHKERRQ(ierr); 2498 ierr = VecScatterEnd(b->ctx,xx,b->w,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2499 ierr = (*b->OAIJ->ops->multadd)(b->OAIJ,b->w,zz,zz);CHKERRQ(ierr); 2500 PetscFunctionReturn(0); 2501 } 2502 2503 #undef __FUNCT__ 2504 #define __FUNCT__ "MatMultTransposeAdd_MPIMAIJ_dof" 2505 PetscErrorCode MatMultTransposeAdd_MPIMAIJ_dof(Mat A,Vec xx,Vec yy,Vec zz) 2506 { 2507 Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data; 2508 PetscErrorCode ierr; 2509 2510 PetscFunctionBegin; 2511 ierr = (*b->OAIJ->ops->multtranspose)(b->OAIJ,xx,b->w);CHKERRQ(ierr); 2512 ierr = VecScatterBegin(b->ctx,b->w,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2513 ierr = (*b->AIJ->ops->multtransposeadd)(b->AIJ,xx,yy,zz);CHKERRQ(ierr); 2514 ierr = VecScatterEnd(b->ctx,b->w,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2515 PetscFunctionReturn(0); 2516 } 2517 2518 /* ----------------------------------------------------------------*/ 2519 #undef __FUNCT__ 2520 #define __FUNCT__ "MatPtAPSymbolic_SeqAIJ_SeqMAIJ" 2521 PetscErrorCode MatPtAPSymbolic_SeqAIJ_SeqMAIJ(Mat A,Mat PP,PetscReal fill,Mat *C) 2522 { 2523 /* This routine requires testing -- but it's getting better. */ 2524 PetscErrorCode ierr; 2525 PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 2526 Mat_SeqMAIJ *pp=(Mat_SeqMAIJ*)PP->data; 2527 Mat P=pp->AIJ; 2528 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*p=(Mat_SeqAIJ*)P->data,*c; 2529 PetscInt *pti,*ptj,*ptJ,*ai=a->i,*aj=a->j,*ajj,*pi=p->i,*pj=p->j,*pjj; 2530 PetscInt *ci,*cj,*ptadenserow,*ptasparserow,*denserow,*sparserow,*ptaj; 2531 PetscInt an=A->cmap->N,am=A->rmap->N,pn=P->cmap->N,pm=P->rmap->N,ppdof=pp->dof,cn; 2532 PetscInt i,j,k,dof,pshift,ptnzi,arow,anzj,ptanzi,prow,pnzj,cnzi; 2533 MatScalar *ca; 2534 2535 PetscFunctionBegin; 2536 /* Start timer */ 2537 ierr = PetscLogEventBegin(MAT_PtAPSymbolic,A,PP,0,0);CHKERRQ(ierr); 2538 2539 /* Get ij structure of P^T */ 2540 ierr = MatGetSymbolicTranspose_SeqAIJ(P,&pti,&ptj);CHKERRQ(ierr); 2541 2542 cn = pn*ppdof; 2543 /* Allocate ci array, arrays for fill computation and */ 2544 /* free space for accumulating nonzero column info */ 2545 ierr = PetscMalloc((cn+1)*sizeof(PetscInt),&ci);CHKERRQ(ierr); 2546 ci[0] = 0; 2547 2548 /* Work arrays for rows of P^T*A */ 2549 ierr = PetscMalloc((2*cn+2*an+1)*sizeof(PetscInt),&ptadenserow);CHKERRQ(ierr); 2550 ierr = PetscMemzero(ptadenserow,(2*cn+2*an+1)*sizeof(PetscInt));CHKERRQ(ierr); 2551 ptasparserow = ptadenserow + an; 2552 denserow = ptasparserow + an; 2553 sparserow = denserow + cn; 2554 2555 /* Set initial free space to be nnz(A) scaled by aspect ratio of P. */ 2556 /* This should be reasonable if sparsity of PtAP is similar to that of A. */ 2557 /* Note, aspect ratio of P is the same as the aspect ratio of SeqAIJ inside P */ 2558 ierr = PetscFreeSpaceGet((ai[am]/pm)*pn,&free_space); 2559 current_space = free_space; 2560 2561 /* Determine symbolic info for each row of C: */ 2562 for (i=0;i<pn;i++) { 2563 ptnzi = pti[i+1] - pti[i]; 2564 ptJ = ptj + pti[i]; 2565 for (dof=0;dof<ppdof;dof++) { 2566 ptanzi = 0; 2567 /* Determine symbolic row of PtA: */ 2568 for (j=0;j<ptnzi;j++) { 2569 /* Expand ptJ[j] by block size and shift by dof to get the right row of A */ 2570 arow = ptJ[j]*ppdof + dof; 2571 /* Nonzeros of P^T*A will be in same locations as any element of A in that row */ 2572 anzj = ai[arow+1] - ai[arow]; 2573 ajj = aj + ai[arow]; 2574 for (k=0;k<anzj;k++) { 2575 if (!ptadenserow[ajj[k]]) { 2576 ptadenserow[ajj[k]] = -1; 2577 ptasparserow[ptanzi++] = ajj[k]; 2578 } 2579 } 2580 } 2581 /* Using symbolic info for row of PtA, determine symbolic info for row of C: */ 2582 ptaj = ptasparserow; 2583 cnzi = 0; 2584 for (j=0;j<ptanzi;j++) { 2585 /* Get offset within block of P */ 2586 pshift = *ptaj%ppdof; 2587 /* Get block row of P */ 2588 prow = (*ptaj++)/ppdof; /* integer division */ 2589 /* P has same number of nonzeros per row as the compressed form */ 2590 pnzj = pi[prow+1] - pi[prow]; 2591 pjj = pj + pi[prow]; 2592 for (k=0;k<pnzj;k++) { 2593 /* Locations in C are shifted by the offset within the block */ 2594 /* Note: we cannot use PetscLLAdd here because of the additional offset for the write location */ 2595 if (!denserow[pjj[k]*ppdof+pshift]) { 2596 denserow[pjj[k]*ppdof+pshift] = -1; 2597 sparserow[cnzi++] = pjj[k]*ppdof+pshift; 2598 } 2599 } 2600 } 2601 2602 /* sort sparserow */ 2603 ierr = PetscSortInt(cnzi,sparserow);CHKERRQ(ierr); 2604 2605 /* If free space is not available, make more free space */ 2606 /* Double the amount of total space in the list */ 2607 if (current_space->local_remaining<cnzi) { 2608 ierr = PetscFreeSpaceGet(cnzi+current_space->total_array_size,¤t_space);CHKERRQ(ierr); 2609 } 2610 2611 /* Copy data into free space, and zero out denserows */ 2612 ierr = PetscMemcpy(current_space->array,sparserow,cnzi*sizeof(PetscInt));CHKERRQ(ierr); 2613 current_space->array += cnzi; 2614 current_space->local_used += cnzi; 2615 current_space->local_remaining -= cnzi; 2616 2617 for (j=0;j<ptanzi;j++) { 2618 ptadenserow[ptasparserow[j]] = 0; 2619 } 2620 for (j=0;j<cnzi;j++) { 2621 denserow[sparserow[j]] = 0; 2622 } 2623 /* Aside: Perhaps we should save the pta info for the numerical factorization. */ 2624 /* For now, we will recompute what is needed. */ 2625 ci[i*ppdof+1+dof] = ci[i*ppdof+dof] + cnzi; 2626 } 2627 } 2628 /* nnz is now stored in ci[ptm], column indices are in the list of free space */ 2629 /* Allocate space for cj, initialize cj, and */ 2630 /* destroy list of free space and other temporary array(s) */ 2631 ierr = PetscMalloc((ci[cn]+1)*sizeof(PetscInt),&cj);CHKERRQ(ierr); 2632 ierr = PetscFreeSpaceContiguous(&free_space,cj);CHKERRQ(ierr); 2633 ierr = PetscFree(ptadenserow);CHKERRQ(ierr); 2634 2635 /* Allocate space for ca */ 2636 ierr = PetscMalloc((ci[cn]+1)*sizeof(MatScalar),&ca);CHKERRQ(ierr); 2637 ierr = PetscMemzero(ca,(ci[cn]+1)*sizeof(MatScalar));CHKERRQ(ierr); 2638 2639 /* put together the new matrix */ 2640 ierr = MatCreateSeqAIJWithArrays(((PetscObject)A)->comm,cn,cn,ci,cj,ca,C);CHKERRQ(ierr); 2641 2642 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 2643 /* Since these are PETSc arrays, change flags to free them as necessary. */ 2644 c = (Mat_SeqAIJ *)((*C)->data); 2645 c->free_a = PETSC_TRUE; 2646 c->free_ij = PETSC_TRUE; 2647 c->nonew = 0; 2648 2649 /* Clean up. */ 2650 ierr = MatRestoreSymbolicTranspose_SeqAIJ(P,&pti,&ptj);CHKERRQ(ierr); 2651 2652 ierr = PetscLogEventEnd(MAT_PtAPSymbolic,A,PP,0,0);CHKERRQ(ierr); 2653 PetscFunctionReturn(0); 2654 } 2655 2656 #undef __FUNCT__ 2657 #define __FUNCT__ "MatPtAPNumeric_SeqAIJ_SeqMAIJ" 2658 PetscErrorCode MatPtAPNumeric_SeqAIJ_SeqMAIJ(Mat A,Mat PP,Mat C) 2659 { 2660 /* This routine requires testing -- first draft only */ 2661 PetscErrorCode ierr; 2662 Mat_SeqMAIJ *pp=(Mat_SeqMAIJ*)PP->data; 2663 Mat P=pp->AIJ; 2664 Mat_SeqAIJ *a = (Mat_SeqAIJ *) A->data; 2665 Mat_SeqAIJ *p = (Mat_SeqAIJ *) P->data; 2666 Mat_SeqAIJ *c = (Mat_SeqAIJ *) C->data; 2667 PetscInt *ai=a->i,*aj=a->j,*apj,*apjdense,*pi=p->i,*pj=p->j,*pJ=p->j,*pjj; 2668 PetscInt *ci=c->i,*cj=c->j,*cjj; 2669 PetscInt am=A->rmap->N,cn=C->cmap->N,cm=C->rmap->N,ppdof=pp->dof; 2670 PetscInt i,j,k,pshift,poffset,anzi,pnzi,apnzj,nextap,pnzj,prow,crow; 2671 MatScalar *aa=a->a,*apa,*pa=p->a,*pA=p->a,*paj,*ca=c->a,*caj; 2672 2673 PetscFunctionBegin; 2674 /* Allocate temporary array for storage of one row of A*P */ 2675 ierr = PetscMalloc(cn*(sizeof(MatScalar)+2*sizeof(PetscInt)),&apa);CHKERRQ(ierr); 2676 ierr = PetscMemzero(apa,cn*(sizeof(MatScalar)+2*sizeof(PetscInt)));CHKERRQ(ierr); 2677 2678 apj = (PetscInt *)(apa + cn); 2679 apjdense = apj + cn; 2680 2681 /* Clear old values in C */ 2682 ierr = PetscMemzero(ca,ci[cm]*sizeof(MatScalar));CHKERRQ(ierr); 2683 2684 for (i=0;i<am;i++) { 2685 /* Form sparse row of A*P */ 2686 anzi = ai[i+1] - ai[i]; 2687 apnzj = 0; 2688 for (j=0;j<anzi;j++) { 2689 /* Get offset within block of P */ 2690 pshift = *aj%ppdof; 2691 /* Get block row of P */ 2692 prow = *aj++/ppdof; /* integer division */ 2693 pnzj = pi[prow+1] - pi[prow]; 2694 pjj = pj + pi[prow]; 2695 paj = pa + pi[prow]; 2696 for (k=0;k<pnzj;k++) { 2697 poffset = pjj[k]*ppdof+pshift; 2698 if (!apjdense[poffset]) { 2699 apjdense[poffset] = -1; 2700 apj[apnzj++] = poffset; 2701 } 2702 apa[poffset] += (*aa)*paj[k]; 2703 } 2704 ierr = PetscLogFlops(2.0*pnzj);CHKERRQ(ierr); 2705 aa++; 2706 } 2707 2708 /* Sort the j index array for quick sparse axpy. */ 2709 /* Note: a array does not need sorting as it is in dense storage locations. */ 2710 ierr = PetscSortInt(apnzj,apj);CHKERRQ(ierr); 2711 2712 /* Compute P^T*A*P using outer product (P^T)[:,j]*(A*P)[j,:]. */ 2713 prow = i/ppdof; /* integer division */ 2714 pshift = i%ppdof; 2715 poffset = pi[prow]; 2716 pnzi = pi[prow+1] - poffset; 2717 /* Reset pJ and pA so we can traverse the same row of P 'dof' times. */ 2718 pJ = pj+poffset; 2719 pA = pa+poffset; 2720 for (j=0;j<pnzi;j++) { 2721 crow = (*pJ)*ppdof+pshift; 2722 cjj = cj + ci[crow]; 2723 caj = ca + ci[crow]; 2724 pJ++; 2725 /* Perform sparse axpy operation. Note cjj includes apj. */ 2726 for (k=0,nextap=0;nextap<apnzj;k++) { 2727 if (cjj[k]==apj[nextap]) { 2728 caj[k] += (*pA)*apa[apj[nextap++]]; 2729 } 2730 } 2731 ierr = PetscLogFlops(2.0*apnzj);CHKERRQ(ierr); 2732 pA++; 2733 } 2734 2735 /* Zero the current row info for A*P */ 2736 for (j=0;j<apnzj;j++) { 2737 apa[apj[j]] = 0.; 2738 apjdense[apj[j]] = 0; 2739 } 2740 } 2741 2742 /* Assemble the final matrix and clean up */ 2743 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2744 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2745 ierr = PetscFree(apa);CHKERRQ(ierr); 2746 PetscFunctionReturn(0); 2747 } 2748 2749 #undef __FUNCT__ 2750 #define __FUNCT__ "MatPtAPSymbolic_MPIAIJ_MPIMAIJ" 2751 PetscErrorCode MatPtAPSymbolic_MPIAIJ_MPIMAIJ(Mat A,Mat PP,PetscReal fill,Mat *C) 2752 { 2753 PetscErrorCode ierr; 2754 2755 PetscFunctionBegin; 2756 /* MatPtAPSymbolic_MPIAIJ_MPIMAIJ() is not implemented yet. Convert PP to mpiaij format */ 2757 ierr = MatConvert(PP,MATMPIAIJ,MAT_REUSE_MATRIX,&PP);CHKERRQ(ierr); 2758 ierr =(*PP->ops->ptapsymbolic)(A,PP,fill,C);CHKERRQ(ierr); 2759 PetscFunctionReturn(0); 2760 } 2761 2762 #undef __FUNCT__ 2763 #define __FUNCT__ "MatPtAPNumeric_MPIAIJ_MPIMAIJ" 2764 PetscErrorCode MatPtAPNumeric_MPIAIJ_MPIMAIJ(Mat A,Mat PP,Mat C) 2765 { 2766 PetscFunctionBegin; 2767 SETERRQ(PETSC_ERR_SUP,"MatPtAPNumeric is not implemented for MPIMAIJ matrix yet"); 2768 PetscFunctionReturn(0); 2769 } 2770 2771 EXTERN_C_BEGIN 2772 #undef __FUNCT__ 2773 #define __FUNCT__ "MatConvert_SeqMAIJ_SeqAIJ" 2774 PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_SeqMAIJ_SeqAIJ(Mat A, MatType newtype,MatReuse reuse,Mat *newmat) 2775 { 2776 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 2777 Mat a = b->AIJ,B; 2778 Mat_SeqAIJ *aij = (Mat_SeqAIJ*)a->data; 2779 PetscErrorCode ierr; 2780 PetscInt m,n,i,ncols,*ilen,nmax = 0,*icols,j,k,ii,dof = b->dof; 2781 PetscInt *cols; 2782 PetscScalar *vals; 2783 2784 PetscFunctionBegin; 2785 ierr = MatGetSize(a,&m,&n);CHKERRQ(ierr); 2786 ierr = PetscMalloc(dof*m*sizeof(PetscInt),&ilen);CHKERRQ(ierr); 2787 for (i=0; i<m; i++) { 2788 nmax = PetscMax(nmax,aij->ilen[i]); 2789 for (j=0; j<dof; j++) { 2790 ilen[dof*i+j] = aij->ilen[i]; 2791 } 2792 } 2793 ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,dof*m,dof*n,0,ilen,&B);CHKERRQ(ierr); 2794 ierr = PetscFree(ilen);CHKERRQ(ierr); 2795 ierr = PetscMalloc(nmax*sizeof(PetscInt),&icols);CHKERRQ(ierr); 2796 ii = 0; 2797 for (i=0; i<m; i++) { 2798 ierr = MatGetRow_SeqAIJ(a,i,&ncols,&cols,&vals);CHKERRQ(ierr); 2799 for (j=0; j<dof; j++) { 2800 for (k=0; k<ncols; k++) { 2801 icols[k] = dof*cols[k]+j; 2802 } 2803 ierr = MatSetValues_SeqAIJ(B,1,&ii,ncols,icols,vals,INSERT_VALUES);CHKERRQ(ierr); 2804 ii++; 2805 } 2806 ierr = MatRestoreRow_SeqAIJ(a,i,&ncols,&cols,&vals);CHKERRQ(ierr); 2807 } 2808 ierr = PetscFree(icols);CHKERRQ(ierr); 2809 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2810 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2811 2812 if (reuse == MAT_REUSE_MATRIX) { 2813 ierr = MatHeaderReplace(A,B);CHKERRQ(ierr); 2814 } else { 2815 *newmat = B; 2816 } 2817 PetscFunctionReturn(0); 2818 } 2819 EXTERN_C_END 2820 2821 #include "../src/mat/impls/aij/mpi/mpiaij.h" 2822 2823 EXTERN_C_BEGIN 2824 #undef __FUNCT__ 2825 #define __FUNCT__ "MatConvert_MPIMAIJ_MPIAIJ" 2826 PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_MPIMAIJ_MPIAIJ(Mat A, MatType newtype,MatReuse reuse,Mat *newmat) 2827 { 2828 Mat_MPIMAIJ *maij = (Mat_MPIMAIJ*)A->data; 2829 Mat MatAIJ = ((Mat_SeqMAIJ*)maij->AIJ->data)->AIJ,B; 2830 Mat MatOAIJ = ((Mat_SeqMAIJ*)maij->OAIJ->data)->AIJ; 2831 Mat_SeqAIJ *AIJ = (Mat_SeqAIJ*) MatAIJ->data; 2832 Mat_SeqAIJ *OAIJ =(Mat_SeqAIJ*) MatOAIJ->data; 2833 Mat_MPIAIJ *mpiaij = (Mat_MPIAIJ*) maij->A->data; 2834 PetscInt dof = maij->dof,i,j,*dnz = PETSC_NULL,*onz = PETSC_NULL,nmax = 0,onmax = 0; 2835 PetscInt *oicols = PETSC_NULL,*icols = PETSC_NULL,ncols,*cols = PETSC_NULL,oncols,*ocols = PETSC_NULL; 2836 PetscInt rstart,cstart,*garray,ii,k; 2837 PetscErrorCode ierr; 2838 PetscScalar *vals,*ovals; 2839 2840 PetscFunctionBegin; 2841 ierr = PetscMalloc2(A->rmap->n,PetscInt,&dnz,A->rmap->n,PetscInt,&onz);CHKERRQ(ierr); 2842 for (i=0; i<A->rmap->n/dof; i++) { 2843 nmax = PetscMax(nmax,AIJ->ilen[i]); 2844 onmax = PetscMax(onmax,OAIJ->ilen[i]); 2845 for (j=0; j<dof; j++) { 2846 dnz[dof*i+j] = AIJ->ilen[i]; 2847 onz[dof*i+j] = OAIJ->ilen[i]; 2848 } 2849 } 2850 ierr = MatCreateMPIAIJ(((PetscObject)A)->comm,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N,0,dnz,0,onz,&B);CHKERRQ(ierr); 2851 ierr = PetscFree2(dnz,onz);CHKERRQ(ierr); 2852 2853 ierr = PetscMalloc2(nmax,PetscInt,&icols,onmax,PetscInt,&oicols);CHKERRQ(ierr); 2854 rstart = dof*maij->A->rmap->rstart; 2855 cstart = dof*maij->A->cmap->rstart; 2856 garray = mpiaij->garray; 2857 2858 ii = rstart; 2859 for (i=0; i<A->rmap->n/dof; i++) { 2860 ierr = MatGetRow_SeqAIJ(MatAIJ,i,&ncols,&cols,&vals);CHKERRQ(ierr); 2861 ierr = MatGetRow_SeqAIJ(MatOAIJ,i,&oncols,&ocols,&ovals);CHKERRQ(ierr); 2862 for (j=0; j<dof; j++) { 2863 for (k=0; k<ncols; k++) { 2864 icols[k] = cstart + dof*cols[k]+j; 2865 } 2866 for (k=0; k<oncols; k++) { 2867 oicols[k] = dof*garray[ocols[k]]+j; 2868 } 2869 ierr = MatSetValues_MPIAIJ(B,1,&ii,ncols,icols,vals,INSERT_VALUES);CHKERRQ(ierr); 2870 ierr = MatSetValues_MPIAIJ(B,1,&ii,oncols,oicols,ovals,INSERT_VALUES);CHKERRQ(ierr); 2871 ii++; 2872 } 2873 ierr = MatRestoreRow_SeqAIJ(MatAIJ,i,&ncols,&cols,&vals);CHKERRQ(ierr); 2874 ierr = MatRestoreRow_SeqAIJ(MatOAIJ,i,&oncols,&ocols,&ovals);CHKERRQ(ierr); 2875 } 2876 ierr = PetscFree2(icols,oicols);CHKERRQ(ierr); 2877 2878 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2879 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2880 2881 if (reuse == MAT_REUSE_MATRIX) { 2882 PetscInt refct = ((PetscObject)A)->refct; /* save ((PetscObject)A)->refct */ 2883 ((PetscObject)A)->refct = 1; 2884 ierr = MatHeaderReplace(A,B);CHKERRQ(ierr); 2885 ((PetscObject)A)->refct = refct; /* restore ((PetscObject)A)->refct */ 2886 } else { 2887 *newmat = B; 2888 } 2889 PetscFunctionReturn(0); 2890 } 2891 EXTERN_C_END 2892 2893 2894 /* ---------------------------------------------------------------------------------- */ 2895 #undef __FUNCT__ 2896 #define __FUNCT__ "MatCreateMAIJ" 2897 /*@C 2898 MatCreateMAIJ - Creates a matrix type providing restriction and interpolation 2899 operations for multicomponent problems. It interpolates each component the same 2900 way independently. The matrix type is based on MATSEQAIJ for sequential matrices, 2901 and MATMPIAIJ for distributed matrices. 2902 2903 Collective 2904 2905 Input Parameters: 2906 + A - the AIJ matrix describing the action on blocks 2907 - dof - the block size (number of components per node) 2908 2909 Output Parameter: 2910 . maij - the new MAIJ matrix 2911 2912 Operations provided: 2913 + MatMult 2914 . MatMultTranspose 2915 . MatMultAdd 2916 . MatMultTransposeAdd 2917 - MatView 2918 2919 Level: advanced 2920 2921 .seealso: MatMAIJGetAIJ(), MatMAIJRedimension() 2922 @*/ 2923 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMAIJ(Mat A,PetscInt dof,Mat *maij) 2924 { 2925 PetscErrorCode ierr; 2926 PetscMPIInt size; 2927 PetscInt n; 2928 Mat_MPIMAIJ *b; 2929 Mat B; 2930 2931 PetscFunctionBegin; 2932 ierr = PetscObjectReference((PetscObject)A);CHKERRQ(ierr); 2933 2934 if (dof == 1) { 2935 *maij = A; 2936 } else { 2937 ierr = MatCreate(((PetscObject)A)->comm,&B);CHKERRQ(ierr); 2938 ierr = MatSetSizes(B,dof*A->rmap->n,dof*A->cmap->n,dof*A->rmap->N,dof*A->cmap->N);CHKERRQ(ierr); 2939 B->assembled = PETSC_TRUE; 2940 2941 ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr); 2942 if (size == 1) { 2943 ierr = MatSetType(B,MATSEQMAIJ);CHKERRQ(ierr); 2944 B->ops->destroy = MatDestroy_SeqMAIJ; 2945 B->ops->view = MatView_SeqMAIJ; 2946 b = (Mat_MPIMAIJ*)B->data; 2947 b->dof = dof; 2948 b->AIJ = A; 2949 if (dof == 2) { 2950 B->ops->mult = MatMult_SeqMAIJ_2; 2951 B->ops->multadd = MatMultAdd_SeqMAIJ_2; 2952 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_2; 2953 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_2; 2954 } else if (dof == 3) { 2955 B->ops->mult = MatMult_SeqMAIJ_3; 2956 B->ops->multadd = MatMultAdd_SeqMAIJ_3; 2957 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_3; 2958 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_3; 2959 } else if (dof == 4) { 2960 B->ops->mult = MatMult_SeqMAIJ_4; 2961 B->ops->multadd = MatMultAdd_SeqMAIJ_4; 2962 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_4; 2963 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_4; 2964 } else if (dof == 5) { 2965 B->ops->mult = MatMult_SeqMAIJ_5; 2966 B->ops->multadd = MatMultAdd_SeqMAIJ_5; 2967 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_5; 2968 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_5; 2969 } else if (dof == 6) { 2970 B->ops->mult = MatMult_SeqMAIJ_6; 2971 B->ops->multadd = MatMultAdd_SeqMAIJ_6; 2972 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_6; 2973 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_6; 2974 } else if (dof == 7) { 2975 B->ops->mult = MatMult_SeqMAIJ_7; 2976 B->ops->multadd = MatMultAdd_SeqMAIJ_7; 2977 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_7; 2978 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_7; 2979 } else if (dof == 8) { 2980 B->ops->mult = MatMult_SeqMAIJ_8; 2981 B->ops->multadd = MatMultAdd_SeqMAIJ_8; 2982 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_8; 2983 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_8; 2984 } else if (dof == 9) { 2985 B->ops->mult = MatMult_SeqMAIJ_9; 2986 B->ops->multadd = MatMultAdd_SeqMAIJ_9; 2987 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_9; 2988 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_9; 2989 } else if (dof == 10) { 2990 B->ops->mult = MatMult_SeqMAIJ_10; 2991 B->ops->multadd = MatMultAdd_SeqMAIJ_10; 2992 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_10; 2993 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_10; 2994 } else if (dof == 16) { 2995 B->ops->mult = MatMult_SeqMAIJ_16; 2996 B->ops->multadd = MatMultAdd_SeqMAIJ_16; 2997 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_16; 2998 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_16; 2999 } else if (dof == 18) { 3000 B->ops->mult = MatMult_SeqMAIJ_18; 3001 B->ops->multadd = MatMultAdd_SeqMAIJ_18; 3002 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_18; 3003 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_18; 3004 } else { 3005 SETERRQ1(PETSC_ERR_SUP,"Cannot handle a dof of %D. Send request for code to petsc-maint@mcs.anl.gov\n",dof); 3006 } 3007 B->ops->ptapsymbolic_seqaij = MatPtAPSymbolic_SeqAIJ_SeqMAIJ; 3008 B->ops->ptapnumeric_seqaij = MatPtAPNumeric_SeqAIJ_SeqMAIJ; 3009 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqmaij_seqaij_C","MatConvert_SeqMAIJ_SeqAIJ",MatConvert_SeqMAIJ_SeqAIJ);CHKERRQ(ierr); 3010 } else { 3011 Mat_MPIAIJ *mpiaij = (Mat_MPIAIJ *)A->data; 3012 IS from,to; 3013 Vec gvec; 3014 PetscInt *garray,i; 3015 3016 ierr = MatSetType(B,MATMPIMAIJ);CHKERRQ(ierr); 3017 B->ops->destroy = MatDestroy_MPIMAIJ; 3018 B->ops->view = MatView_MPIMAIJ; 3019 b = (Mat_MPIMAIJ*)B->data; 3020 b->dof = dof; 3021 b->A = A; 3022 ierr = MatCreateMAIJ(mpiaij->A,dof,&b->AIJ);CHKERRQ(ierr); 3023 ierr = MatCreateMAIJ(mpiaij->B,dof,&b->OAIJ);CHKERRQ(ierr); 3024 3025 ierr = VecGetSize(mpiaij->lvec,&n);CHKERRQ(ierr); 3026 ierr = VecCreateSeq(PETSC_COMM_SELF,n*dof,&b->w);CHKERRQ(ierr); 3027 ierr = VecSetBlockSize(b->w,dof);CHKERRQ(ierr); 3028 3029 /* create two temporary Index sets for build scatter gather */ 3030 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&garray);CHKERRQ(ierr); 3031 for (i=0; i<n; i++) garray[i] = dof*mpiaij->garray[i]; 3032 ierr = ISCreateBlock(((PetscObject)A)->comm,dof,n,garray,&from);CHKERRQ(ierr); 3033 ierr = PetscFree(garray);CHKERRQ(ierr); 3034 ierr = ISCreateStride(PETSC_COMM_SELF,n*dof,0,1,&to);CHKERRQ(ierr); 3035 3036 /* create temporary global vector to generate scatter context */ 3037 ierr = VecCreateMPIWithArray(((PetscObject)A)->comm,dof*A->cmap->n,dof*A->cmap->N,PETSC_NULL,&gvec);CHKERRQ(ierr); 3038 ierr = VecSetBlockSize(gvec,dof);CHKERRQ(ierr); 3039 3040 /* generate the scatter context */ 3041 ierr = VecScatterCreate(gvec,from,b->w,to,&b->ctx);CHKERRQ(ierr); 3042 3043 ierr = ISDestroy(from);CHKERRQ(ierr); 3044 ierr = ISDestroy(to);CHKERRQ(ierr); 3045 ierr = VecDestroy(gvec);CHKERRQ(ierr); 3046 3047 B->ops->mult = MatMult_MPIMAIJ_dof; 3048 B->ops->multtranspose = MatMultTranspose_MPIMAIJ_dof; 3049 B->ops->multadd = MatMultAdd_MPIMAIJ_dof; 3050 B->ops->multtransposeadd = MatMultTransposeAdd_MPIMAIJ_dof; 3051 B->ops->ptapsymbolic_mpiaij = MatPtAPSymbolic_MPIAIJ_MPIMAIJ; 3052 B->ops->ptapnumeric_mpiaij = MatPtAPNumeric_MPIAIJ_MPIMAIJ; 3053 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpimaij_mpiaij_C","MatConvert_MPIMAIJ_MPIAIJ",MatConvert_MPIMAIJ_MPIAIJ);CHKERRQ(ierr); 3054 } 3055 *maij = B; 3056 ierr = MatView_Private(B);CHKERRQ(ierr); 3057 } 3058 PetscFunctionReturn(0); 3059 } 3060