1 #define PETSCMAT_DLL 2 3 /* 4 Defines the basic matrix operations for the MAIJ matrix storage format. 5 This format is used for restriction and interpolation operations for 6 multicomponent problems. It interpolates each component the same way 7 independently. 8 9 We provide: 10 MatMult() 11 MatMultTranspose() 12 MatMultTransposeAdd() 13 MatMultAdd() 14 and 15 MatCreateMAIJ(Mat,dof,Mat*) 16 17 This single directory handles both the sequential and parallel codes 18 */ 19 20 #include "src/mat/impls/maij/maij.h" 21 #include "src/mat/utils/freespace.h" 22 #include "private/vecimpl.h" 23 24 #undef __FUNCT__ 25 #define __FUNCT__ "MatMAIJGetAIJ" 26 PetscErrorCode PETSCMAT_DLLEXPORT MatMAIJGetAIJ(Mat A,Mat *B) 27 { 28 PetscErrorCode ierr; 29 PetscTruth ismpimaij,isseqmaij; 30 31 PetscFunctionBegin; 32 ierr = PetscTypeCompare((PetscObject)A,MATMPIMAIJ,&ismpimaij);CHKERRQ(ierr); 33 ierr = PetscTypeCompare((PetscObject)A,MATSEQMAIJ,&isseqmaij);CHKERRQ(ierr); 34 if (ismpimaij) { 35 Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data; 36 37 *B = b->A; 38 } else if (isseqmaij) { 39 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 40 41 *B = b->AIJ; 42 } else { 43 *B = A; 44 } 45 PetscFunctionReturn(0); 46 } 47 48 #undef __FUNCT__ 49 #define __FUNCT__ "MatMAIJRedimension" 50 PetscErrorCode PETSCMAT_DLLEXPORT MatMAIJRedimension(Mat A,PetscInt dof,Mat *B) 51 { 52 PetscErrorCode ierr; 53 Mat Aij; 54 55 PetscFunctionBegin; 56 ierr = MatMAIJGetAIJ(A,&Aij);CHKERRQ(ierr); 57 ierr = MatCreateMAIJ(Aij,dof,B);CHKERRQ(ierr); 58 PetscFunctionReturn(0); 59 } 60 61 #undef __FUNCT__ 62 #define __FUNCT__ "MatDestroy_SeqMAIJ" 63 PetscErrorCode MatDestroy_SeqMAIJ(Mat A) 64 { 65 PetscErrorCode ierr; 66 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 67 68 PetscFunctionBegin; 69 if (b->AIJ) { 70 ierr = MatDestroy(b->AIJ);CHKERRQ(ierr); 71 } 72 ierr = PetscFree(b);CHKERRQ(ierr); 73 PetscFunctionReturn(0); 74 } 75 76 #undef __FUNCT__ 77 #define __FUNCT__ "MatView_SeqMAIJ" 78 PetscErrorCode MatView_SeqMAIJ(Mat A,PetscViewer viewer) 79 { 80 PetscErrorCode ierr; 81 Mat B; 82 83 PetscFunctionBegin; 84 ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B); 85 ierr = MatView(B,viewer);CHKERRQ(ierr); 86 ierr = MatDestroy(B);CHKERRQ(ierr); 87 PetscFunctionReturn(0); 88 } 89 90 #undef __FUNCT__ 91 #define __FUNCT__ "MatView_MPIMAIJ" 92 PetscErrorCode MatView_MPIMAIJ(Mat A,PetscViewer viewer) 93 { 94 PetscErrorCode ierr; 95 Mat B; 96 97 PetscFunctionBegin; 98 ierr = MatConvert(A,MATMPIAIJ,MAT_INITIAL_MATRIX,&B); 99 ierr = MatView(B,viewer);CHKERRQ(ierr); 100 ierr = MatDestroy(B);CHKERRQ(ierr); 101 PetscFunctionReturn(0); 102 } 103 104 #undef __FUNCT__ 105 #define __FUNCT__ "MatDestroy_MPIMAIJ" 106 PetscErrorCode MatDestroy_MPIMAIJ(Mat A) 107 { 108 PetscErrorCode ierr; 109 Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data; 110 111 PetscFunctionBegin; 112 if (b->AIJ) { 113 ierr = MatDestroy(b->AIJ);CHKERRQ(ierr); 114 } 115 if (b->OAIJ) { 116 ierr = MatDestroy(b->OAIJ);CHKERRQ(ierr); 117 } 118 if (b->A) { 119 ierr = MatDestroy(b->A);CHKERRQ(ierr); 120 } 121 if (b->ctx) { 122 ierr = VecScatterDestroy(b->ctx);CHKERRQ(ierr); 123 } 124 if (b->w) { 125 ierr = VecDestroy(b->w);CHKERRQ(ierr); 126 } 127 ierr = PetscFree(b);CHKERRQ(ierr); 128 PetscFunctionReturn(0); 129 } 130 131 /*MC 132 MATMAIJ - MATMAIJ = "maij" - A matrix type to be used for restriction and interpolation operations for 133 multicomponent problems, interpolating or restricting each component the same way independently. 134 The matrix type is based on MATSEQAIJ for sequential matrices, and MATMPIAIJ for distributed matrices. 135 136 Operations provided: 137 . MatMult 138 . MatMultTranspose 139 . MatMultAdd 140 . MatMultTransposeAdd 141 142 Level: advanced 143 144 .seealso: MatCreateSeqDense 145 M*/ 146 147 EXTERN_C_BEGIN 148 #undef __FUNCT__ 149 #define __FUNCT__ "MatCreate_MAIJ" 150 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_MAIJ(Mat A) 151 { 152 PetscErrorCode ierr; 153 Mat_MPIMAIJ *b; 154 155 PetscFunctionBegin; 156 ierr = PetscNew(Mat_MPIMAIJ,&b);CHKERRQ(ierr); 157 A->data = (void*)b; 158 ierr = PetscMemzero(A->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 159 A->factor = 0; 160 A->mapping = 0; 161 162 b->AIJ = 0; 163 b->dof = 0; 164 b->OAIJ = 0; 165 b->ctx = 0; 166 b->w = 0; 167 ierr = PetscObjectChangeTypeName((PetscObject)A,MATMAIJ);CHKERRQ(ierr); 168 PetscFunctionReturn(0); 169 } 170 EXTERN_C_END 171 172 /* --------------------------------------------------------------------------------------*/ 173 #undef __FUNCT__ 174 #define __FUNCT__ "MatMult_SeqMAIJ_2" 175 PetscErrorCode MatMult_SeqMAIJ_2(Mat A,Vec xx,Vec yy) 176 { 177 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 178 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 179 PetscScalar *x,*y,*v,sum1, sum2; 180 PetscErrorCode ierr; 181 PetscInt m = b->AIJ->rmap.n,*idx,*ii; 182 PetscInt n,i,jrow,j; 183 184 PetscFunctionBegin; 185 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 186 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 187 idx = a->j; 188 v = a->a; 189 ii = a->i; 190 191 for (i=0; i<m; i++) { 192 jrow = ii[i]; 193 n = ii[i+1] - jrow; 194 sum1 = 0.0; 195 sum2 = 0.0; 196 for (j=0; j<n; j++) { 197 sum1 += v[jrow]*x[2*idx[jrow]]; 198 sum2 += v[jrow]*x[2*idx[jrow]+1]; 199 jrow++; 200 } 201 y[2*i] = sum1; 202 y[2*i+1] = sum2; 203 } 204 205 ierr = PetscLogFlops(4*a->nz - 2*m);CHKERRQ(ierr); 206 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 207 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 208 PetscFunctionReturn(0); 209 } 210 211 #undef __FUNCT__ 212 #define __FUNCT__ "MatMultTranspose_SeqMAIJ_2" 213 PetscErrorCode MatMultTranspose_SeqMAIJ_2(Mat A,Vec xx,Vec yy) 214 { 215 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 216 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 217 PetscScalar *x,*y,*v,alpha1,alpha2,zero = 0.0; 218 PetscErrorCode ierr; 219 PetscInt m = b->AIJ->rmap.n,n,i,*idx; 220 221 PetscFunctionBegin; 222 ierr = VecSet(yy,zero);CHKERRQ(ierr); 223 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 224 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 225 226 for (i=0; i<m; i++) { 227 idx = a->j + a->i[i] ; 228 v = a->a + a->i[i] ; 229 n = a->i[i+1] - a->i[i]; 230 alpha1 = x[2*i]; 231 alpha2 = x[2*i+1]; 232 while (n-->0) {y[2*(*idx)] += alpha1*(*v); y[2*(*idx)+1] += alpha2*(*v); idx++; v++;} 233 } 234 ierr = PetscLogFlops(4*a->nz - 2*b->AIJ->cmap.n);CHKERRQ(ierr); 235 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 236 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 237 PetscFunctionReturn(0); 238 } 239 240 #undef __FUNCT__ 241 #define __FUNCT__ "MatMultAdd_SeqMAIJ_2" 242 PetscErrorCode MatMultAdd_SeqMAIJ_2(Mat A,Vec xx,Vec yy,Vec zz) 243 { 244 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 245 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 246 PetscScalar *x,*y,*v,sum1, sum2; 247 PetscErrorCode ierr; 248 PetscInt m = b->AIJ->rmap.n,*idx,*ii; 249 PetscInt n,i,jrow,j; 250 251 PetscFunctionBegin; 252 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 253 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 254 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 255 idx = a->j; 256 v = a->a; 257 ii = a->i; 258 259 for (i=0; i<m; i++) { 260 jrow = ii[i]; 261 n = ii[i+1] - jrow; 262 sum1 = 0.0; 263 sum2 = 0.0; 264 for (j=0; j<n; j++) { 265 sum1 += v[jrow]*x[2*idx[jrow]]; 266 sum2 += v[jrow]*x[2*idx[jrow]+1]; 267 jrow++; 268 } 269 y[2*i] += sum1; 270 y[2*i+1] += sum2; 271 } 272 273 ierr = PetscLogFlops(4*a->nz - 2*m);CHKERRQ(ierr); 274 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 275 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 276 PetscFunctionReturn(0); 277 } 278 #undef __FUNCT__ 279 #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_2" 280 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_2(Mat A,Vec xx,Vec yy,Vec zz) 281 { 282 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 283 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 284 PetscScalar *x,*y,*v,alpha1,alpha2; 285 PetscErrorCode ierr; 286 PetscInt m = b->AIJ->rmap.n,n,i,*idx; 287 288 PetscFunctionBegin; 289 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 290 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 291 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 292 293 for (i=0; i<m; i++) { 294 idx = a->j + a->i[i] ; 295 v = a->a + a->i[i] ; 296 n = a->i[i+1] - a->i[i]; 297 alpha1 = x[2*i]; 298 alpha2 = x[2*i+1]; 299 while (n-->0) {y[2*(*idx)] += alpha1*(*v); y[2*(*idx)+1] += alpha2*(*v); idx++; v++;} 300 } 301 ierr = PetscLogFlops(4*a->nz - 2*b->AIJ->cmap.n);CHKERRQ(ierr); 302 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 303 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 304 PetscFunctionReturn(0); 305 } 306 /* --------------------------------------------------------------------------------------*/ 307 #undef __FUNCT__ 308 #define __FUNCT__ "MatMult_SeqMAIJ_3" 309 PetscErrorCode MatMult_SeqMAIJ_3(Mat A,Vec xx,Vec yy) 310 { 311 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 312 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 313 PetscScalar *x,*y,*v,sum1, sum2, sum3; 314 PetscErrorCode ierr; 315 PetscInt m = b->AIJ->rmap.n,*idx,*ii; 316 PetscInt n,i,jrow,j; 317 318 PetscFunctionBegin; 319 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 320 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 321 idx = a->j; 322 v = a->a; 323 ii = a->i; 324 325 for (i=0; i<m; i++) { 326 jrow = ii[i]; 327 n = ii[i+1] - jrow; 328 sum1 = 0.0; 329 sum2 = 0.0; 330 sum3 = 0.0; 331 for (j=0; j<n; j++) { 332 sum1 += v[jrow]*x[3*idx[jrow]]; 333 sum2 += v[jrow]*x[3*idx[jrow]+1]; 334 sum3 += v[jrow]*x[3*idx[jrow]+2]; 335 jrow++; 336 } 337 y[3*i] = sum1; 338 y[3*i+1] = sum2; 339 y[3*i+2] = sum3; 340 } 341 342 ierr = PetscLogFlops(6*a->nz - 3*m);CHKERRQ(ierr); 343 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 344 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 345 PetscFunctionReturn(0); 346 } 347 348 #undef __FUNCT__ 349 #define __FUNCT__ "MatMultTranspose_SeqMAIJ_3" 350 PetscErrorCode MatMultTranspose_SeqMAIJ_3(Mat A,Vec xx,Vec yy) 351 { 352 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 353 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 354 PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,zero = 0.0; 355 PetscErrorCode ierr; 356 PetscInt m = b->AIJ->rmap.n,n,i,*idx; 357 358 PetscFunctionBegin; 359 ierr = VecSet(yy,zero);CHKERRQ(ierr); 360 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 361 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 362 363 for (i=0; i<m; i++) { 364 idx = a->j + a->i[i]; 365 v = a->a + a->i[i]; 366 n = a->i[i+1] - a->i[i]; 367 alpha1 = x[3*i]; 368 alpha2 = x[3*i+1]; 369 alpha3 = x[3*i+2]; 370 while (n-->0) { 371 y[3*(*idx)] += alpha1*(*v); 372 y[3*(*idx)+1] += alpha2*(*v); 373 y[3*(*idx)+2] += alpha3*(*v); 374 idx++; v++; 375 } 376 } 377 ierr = PetscLogFlops(6*a->nz - 3*b->AIJ->cmap.n);CHKERRQ(ierr); 378 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 379 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 380 PetscFunctionReturn(0); 381 } 382 383 #undef __FUNCT__ 384 #define __FUNCT__ "MatMultAdd_SeqMAIJ_3" 385 PetscErrorCode MatMultAdd_SeqMAIJ_3(Mat A,Vec xx,Vec yy,Vec zz) 386 { 387 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 388 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 389 PetscScalar *x,*y,*v,sum1, sum2, sum3; 390 PetscErrorCode ierr; 391 PetscInt m = b->AIJ->rmap.n,*idx,*ii; 392 PetscInt n,i,jrow,j; 393 394 PetscFunctionBegin; 395 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 396 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 397 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 398 idx = a->j; 399 v = a->a; 400 ii = a->i; 401 402 for (i=0; i<m; i++) { 403 jrow = ii[i]; 404 n = ii[i+1] - jrow; 405 sum1 = 0.0; 406 sum2 = 0.0; 407 sum3 = 0.0; 408 for (j=0; j<n; j++) { 409 sum1 += v[jrow]*x[3*idx[jrow]]; 410 sum2 += v[jrow]*x[3*idx[jrow]+1]; 411 sum3 += v[jrow]*x[3*idx[jrow]+2]; 412 jrow++; 413 } 414 y[3*i] += sum1; 415 y[3*i+1] += sum2; 416 y[3*i+2] += sum3; 417 } 418 419 ierr = PetscLogFlops(6*a->nz);CHKERRQ(ierr); 420 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 421 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 422 PetscFunctionReturn(0); 423 } 424 #undef __FUNCT__ 425 #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_3" 426 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_3(Mat A,Vec xx,Vec yy,Vec zz) 427 { 428 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 429 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 430 PetscScalar *x,*y,*v,alpha1,alpha2,alpha3; 431 PetscErrorCode ierr; 432 PetscInt m = b->AIJ->rmap.n,n,i,*idx; 433 434 PetscFunctionBegin; 435 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 436 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 437 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 438 for (i=0; i<m; i++) { 439 idx = a->j + a->i[i] ; 440 v = a->a + a->i[i] ; 441 n = a->i[i+1] - a->i[i]; 442 alpha1 = x[3*i]; 443 alpha2 = x[3*i+1]; 444 alpha3 = x[3*i+2]; 445 while (n-->0) { 446 y[3*(*idx)] += alpha1*(*v); 447 y[3*(*idx)+1] += alpha2*(*v); 448 y[3*(*idx)+2] += alpha3*(*v); 449 idx++; v++; 450 } 451 } 452 ierr = PetscLogFlops(6*a->nz);CHKERRQ(ierr); 453 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 454 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 455 PetscFunctionReturn(0); 456 } 457 458 /* ------------------------------------------------------------------------------*/ 459 #undef __FUNCT__ 460 #define __FUNCT__ "MatMult_SeqMAIJ_4" 461 PetscErrorCode MatMult_SeqMAIJ_4(Mat A,Vec xx,Vec yy) 462 { 463 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 464 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 465 PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4; 466 PetscErrorCode ierr; 467 PetscInt m = b->AIJ->rmap.n,*idx,*ii; 468 PetscInt n,i,jrow,j; 469 470 PetscFunctionBegin; 471 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 472 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 473 idx = a->j; 474 v = a->a; 475 ii = a->i; 476 477 for (i=0; i<m; i++) { 478 jrow = ii[i]; 479 n = ii[i+1] - jrow; 480 sum1 = 0.0; 481 sum2 = 0.0; 482 sum3 = 0.0; 483 sum4 = 0.0; 484 for (j=0; j<n; j++) { 485 sum1 += v[jrow]*x[4*idx[jrow]]; 486 sum2 += v[jrow]*x[4*idx[jrow]+1]; 487 sum3 += v[jrow]*x[4*idx[jrow]+2]; 488 sum4 += v[jrow]*x[4*idx[jrow]+3]; 489 jrow++; 490 } 491 y[4*i] = sum1; 492 y[4*i+1] = sum2; 493 y[4*i+2] = sum3; 494 y[4*i+3] = sum4; 495 } 496 497 ierr = PetscLogFlops(8*a->nz - 4*m);CHKERRQ(ierr); 498 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 499 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 500 PetscFunctionReturn(0); 501 } 502 503 #undef __FUNCT__ 504 #define __FUNCT__ "MatMultTranspose_SeqMAIJ_4" 505 PetscErrorCode MatMultTranspose_SeqMAIJ_4(Mat A,Vec xx,Vec yy) 506 { 507 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 508 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 509 PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4,zero = 0.0; 510 PetscErrorCode ierr; 511 PetscInt m = b->AIJ->rmap.n,n,i,*idx; 512 513 PetscFunctionBegin; 514 ierr = VecSet(yy,zero);CHKERRQ(ierr); 515 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 516 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 517 for (i=0; i<m; i++) { 518 idx = a->j + a->i[i] ; 519 v = a->a + a->i[i] ; 520 n = a->i[i+1] - a->i[i]; 521 alpha1 = x[4*i]; 522 alpha2 = x[4*i+1]; 523 alpha3 = x[4*i+2]; 524 alpha4 = x[4*i+3]; 525 while (n-->0) { 526 y[4*(*idx)] += alpha1*(*v); 527 y[4*(*idx)+1] += alpha2*(*v); 528 y[4*(*idx)+2] += alpha3*(*v); 529 y[4*(*idx)+3] += alpha4*(*v); 530 idx++; v++; 531 } 532 } 533 ierr = PetscLogFlops(8*a->nz - 4*b->AIJ->cmap.n);CHKERRQ(ierr); 534 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 535 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 536 PetscFunctionReturn(0); 537 } 538 539 #undef __FUNCT__ 540 #define __FUNCT__ "MatMultAdd_SeqMAIJ_4" 541 PetscErrorCode MatMultAdd_SeqMAIJ_4(Mat A,Vec xx,Vec yy,Vec zz) 542 { 543 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 544 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 545 PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4; 546 PetscErrorCode ierr; 547 PetscInt m = b->AIJ->rmap.n,*idx,*ii; 548 PetscInt n,i,jrow,j; 549 550 PetscFunctionBegin; 551 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 552 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 553 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 554 idx = a->j; 555 v = a->a; 556 ii = a->i; 557 558 for (i=0; i<m; i++) { 559 jrow = ii[i]; 560 n = ii[i+1] - jrow; 561 sum1 = 0.0; 562 sum2 = 0.0; 563 sum3 = 0.0; 564 sum4 = 0.0; 565 for (j=0; j<n; j++) { 566 sum1 += v[jrow]*x[4*idx[jrow]]; 567 sum2 += v[jrow]*x[4*idx[jrow]+1]; 568 sum3 += v[jrow]*x[4*idx[jrow]+2]; 569 sum4 += v[jrow]*x[4*idx[jrow]+3]; 570 jrow++; 571 } 572 y[4*i] += sum1; 573 y[4*i+1] += sum2; 574 y[4*i+2] += sum3; 575 y[4*i+3] += sum4; 576 } 577 578 ierr = PetscLogFlops(8*a->nz - 4*m);CHKERRQ(ierr); 579 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 580 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 581 PetscFunctionReturn(0); 582 } 583 #undef __FUNCT__ 584 #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_4" 585 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_4(Mat A,Vec xx,Vec yy,Vec zz) 586 { 587 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 588 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 589 PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4; 590 PetscErrorCode ierr; 591 PetscInt m = b->AIJ->rmap.n,n,i,*idx; 592 593 PetscFunctionBegin; 594 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 595 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 596 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 597 598 for (i=0; i<m; i++) { 599 idx = a->j + a->i[i] ; 600 v = a->a + a->i[i] ; 601 n = a->i[i+1] - a->i[i]; 602 alpha1 = x[4*i]; 603 alpha2 = x[4*i+1]; 604 alpha3 = x[4*i+2]; 605 alpha4 = x[4*i+3]; 606 while (n-->0) { 607 y[4*(*idx)] += alpha1*(*v); 608 y[4*(*idx)+1] += alpha2*(*v); 609 y[4*(*idx)+2] += alpha3*(*v); 610 y[4*(*idx)+3] += alpha4*(*v); 611 idx++; v++; 612 } 613 } 614 ierr = PetscLogFlops(8*a->nz - 4*b->AIJ->cmap.n);CHKERRQ(ierr); 615 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 616 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 617 PetscFunctionReturn(0); 618 } 619 /* ------------------------------------------------------------------------------*/ 620 621 #undef __FUNCT__ 622 #define __FUNCT__ "MatMult_SeqMAIJ_5" 623 PetscErrorCode MatMult_SeqMAIJ_5(Mat A,Vec xx,Vec yy) 624 { 625 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 626 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 627 PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4, sum5; 628 PetscErrorCode ierr; 629 PetscInt m = b->AIJ->rmap.n,*idx,*ii; 630 PetscInt n,i,jrow,j; 631 632 PetscFunctionBegin; 633 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 634 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 635 idx = a->j; 636 v = a->a; 637 ii = a->i; 638 639 for (i=0; i<m; i++) { 640 jrow = ii[i]; 641 n = ii[i+1] - jrow; 642 sum1 = 0.0; 643 sum2 = 0.0; 644 sum3 = 0.0; 645 sum4 = 0.0; 646 sum5 = 0.0; 647 for (j=0; j<n; j++) { 648 sum1 += v[jrow]*x[5*idx[jrow]]; 649 sum2 += v[jrow]*x[5*idx[jrow]+1]; 650 sum3 += v[jrow]*x[5*idx[jrow]+2]; 651 sum4 += v[jrow]*x[5*idx[jrow]+3]; 652 sum5 += v[jrow]*x[5*idx[jrow]+4]; 653 jrow++; 654 } 655 y[5*i] = sum1; 656 y[5*i+1] = sum2; 657 y[5*i+2] = sum3; 658 y[5*i+3] = sum4; 659 y[5*i+4] = sum5; 660 } 661 662 ierr = PetscLogFlops(10*a->nz - 5*m);CHKERRQ(ierr); 663 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 664 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 665 PetscFunctionReturn(0); 666 } 667 668 #undef __FUNCT__ 669 #define __FUNCT__ "MatMultTranspose_SeqMAIJ_5" 670 PetscErrorCode MatMultTranspose_SeqMAIJ_5(Mat A,Vec xx,Vec yy) 671 { 672 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 673 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 674 PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,zero = 0.0; 675 PetscErrorCode ierr; 676 PetscInt m = b->AIJ->rmap.n,n,i,*idx; 677 678 PetscFunctionBegin; 679 ierr = VecSet(yy,zero);CHKERRQ(ierr); 680 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 681 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 682 683 for (i=0; i<m; i++) { 684 idx = a->j + a->i[i] ; 685 v = a->a + a->i[i] ; 686 n = a->i[i+1] - a->i[i]; 687 alpha1 = x[5*i]; 688 alpha2 = x[5*i+1]; 689 alpha3 = x[5*i+2]; 690 alpha4 = x[5*i+3]; 691 alpha5 = x[5*i+4]; 692 while (n-->0) { 693 y[5*(*idx)] += alpha1*(*v); 694 y[5*(*idx)+1] += alpha2*(*v); 695 y[5*(*idx)+2] += alpha3*(*v); 696 y[5*(*idx)+3] += alpha4*(*v); 697 y[5*(*idx)+4] += alpha5*(*v); 698 idx++; v++; 699 } 700 } 701 ierr = PetscLogFlops(10*a->nz - 5*b->AIJ->cmap.n);CHKERRQ(ierr); 702 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 703 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 704 PetscFunctionReturn(0); 705 } 706 707 #undef __FUNCT__ 708 #define __FUNCT__ "MatMultAdd_SeqMAIJ_5" 709 PetscErrorCode MatMultAdd_SeqMAIJ_5(Mat A,Vec xx,Vec yy,Vec zz) 710 { 711 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 712 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 713 PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4, sum5; 714 PetscErrorCode ierr; 715 PetscInt m = b->AIJ->rmap.n,*idx,*ii; 716 PetscInt n,i,jrow,j; 717 718 PetscFunctionBegin; 719 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 720 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 721 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 722 idx = a->j; 723 v = a->a; 724 ii = a->i; 725 726 for (i=0; i<m; i++) { 727 jrow = ii[i]; 728 n = ii[i+1] - jrow; 729 sum1 = 0.0; 730 sum2 = 0.0; 731 sum3 = 0.0; 732 sum4 = 0.0; 733 sum5 = 0.0; 734 for (j=0; j<n; j++) { 735 sum1 += v[jrow]*x[5*idx[jrow]]; 736 sum2 += v[jrow]*x[5*idx[jrow]+1]; 737 sum3 += v[jrow]*x[5*idx[jrow]+2]; 738 sum4 += v[jrow]*x[5*idx[jrow]+3]; 739 sum5 += v[jrow]*x[5*idx[jrow]+4]; 740 jrow++; 741 } 742 y[5*i] += sum1; 743 y[5*i+1] += sum2; 744 y[5*i+2] += sum3; 745 y[5*i+3] += sum4; 746 y[5*i+4] += sum5; 747 } 748 749 ierr = PetscLogFlops(10*a->nz);CHKERRQ(ierr); 750 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 751 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 752 PetscFunctionReturn(0); 753 } 754 755 #undef __FUNCT__ 756 #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_5" 757 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_5(Mat A,Vec xx,Vec yy,Vec zz) 758 { 759 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 760 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 761 PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5; 762 PetscErrorCode ierr; 763 PetscInt m = b->AIJ->rmap.n,n,i,*idx; 764 765 PetscFunctionBegin; 766 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 767 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 768 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 769 770 for (i=0; i<m; i++) { 771 idx = a->j + a->i[i] ; 772 v = a->a + a->i[i] ; 773 n = a->i[i+1] - a->i[i]; 774 alpha1 = x[5*i]; 775 alpha2 = x[5*i+1]; 776 alpha3 = x[5*i+2]; 777 alpha4 = x[5*i+3]; 778 alpha5 = x[5*i+4]; 779 while (n-->0) { 780 y[5*(*idx)] += alpha1*(*v); 781 y[5*(*idx)+1] += alpha2*(*v); 782 y[5*(*idx)+2] += alpha3*(*v); 783 y[5*(*idx)+3] += alpha4*(*v); 784 y[5*(*idx)+4] += alpha5*(*v); 785 idx++; v++; 786 } 787 } 788 ierr = PetscLogFlops(10*a->nz);CHKERRQ(ierr); 789 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 790 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 791 PetscFunctionReturn(0); 792 } 793 794 /* ------------------------------------------------------------------------------*/ 795 #undef __FUNCT__ 796 #define __FUNCT__ "MatMult_SeqMAIJ_6" 797 PetscErrorCode MatMult_SeqMAIJ_6(Mat A,Vec xx,Vec yy) 798 { 799 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 800 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 801 PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6; 802 PetscErrorCode ierr; 803 PetscInt m = b->AIJ->rmap.n,*idx,*ii; 804 PetscInt n,i,jrow,j; 805 806 PetscFunctionBegin; 807 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 808 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 809 idx = a->j; 810 v = a->a; 811 ii = a->i; 812 813 for (i=0; i<m; i++) { 814 jrow = ii[i]; 815 n = ii[i+1] - jrow; 816 sum1 = 0.0; 817 sum2 = 0.0; 818 sum3 = 0.0; 819 sum4 = 0.0; 820 sum5 = 0.0; 821 sum6 = 0.0; 822 for (j=0; j<n; j++) { 823 sum1 += v[jrow]*x[6*idx[jrow]]; 824 sum2 += v[jrow]*x[6*idx[jrow]+1]; 825 sum3 += v[jrow]*x[6*idx[jrow]+2]; 826 sum4 += v[jrow]*x[6*idx[jrow]+3]; 827 sum5 += v[jrow]*x[6*idx[jrow]+4]; 828 sum6 += v[jrow]*x[6*idx[jrow]+5]; 829 jrow++; 830 } 831 y[6*i] = sum1; 832 y[6*i+1] = sum2; 833 y[6*i+2] = sum3; 834 y[6*i+3] = sum4; 835 y[6*i+4] = sum5; 836 y[6*i+5] = sum6; 837 } 838 839 ierr = PetscLogFlops(12*a->nz - 6*m);CHKERRQ(ierr); 840 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 841 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 842 PetscFunctionReturn(0); 843 } 844 845 #undef __FUNCT__ 846 #define __FUNCT__ "MatMultTranspose_SeqMAIJ_6" 847 PetscErrorCode MatMultTranspose_SeqMAIJ_6(Mat A,Vec xx,Vec yy) 848 { 849 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 850 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 851 PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,zero = 0.0; 852 PetscErrorCode ierr; 853 PetscInt m = b->AIJ->rmap.n,n,i,*idx; 854 855 PetscFunctionBegin; 856 ierr = VecSet(yy,zero);CHKERRQ(ierr); 857 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 858 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 859 860 for (i=0; i<m; i++) { 861 idx = a->j + a->i[i] ; 862 v = a->a + a->i[i] ; 863 n = a->i[i+1] - a->i[i]; 864 alpha1 = x[6*i]; 865 alpha2 = x[6*i+1]; 866 alpha3 = x[6*i+2]; 867 alpha4 = x[6*i+3]; 868 alpha5 = x[6*i+4]; 869 alpha6 = x[6*i+5]; 870 while (n-->0) { 871 y[6*(*idx)] += alpha1*(*v); 872 y[6*(*idx)+1] += alpha2*(*v); 873 y[6*(*idx)+2] += alpha3*(*v); 874 y[6*(*idx)+3] += alpha4*(*v); 875 y[6*(*idx)+4] += alpha5*(*v); 876 y[6*(*idx)+5] += alpha6*(*v); 877 idx++; v++; 878 } 879 } 880 ierr = PetscLogFlops(12*a->nz - 6*b->AIJ->cmap.n);CHKERRQ(ierr); 881 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 882 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 883 PetscFunctionReturn(0); 884 } 885 886 #undef __FUNCT__ 887 #define __FUNCT__ "MatMultAdd_SeqMAIJ_6" 888 PetscErrorCode MatMultAdd_SeqMAIJ_6(Mat A,Vec xx,Vec yy,Vec zz) 889 { 890 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 891 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 892 PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6; 893 PetscErrorCode ierr; 894 PetscInt m = b->AIJ->rmap.n,*idx,*ii; 895 PetscInt n,i,jrow,j; 896 897 PetscFunctionBegin; 898 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 899 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 900 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 901 idx = a->j; 902 v = a->a; 903 ii = a->i; 904 905 for (i=0; i<m; i++) { 906 jrow = ii[i]; 907 n = ii[i+1] - jrow; 908 sum1 = 0.0; 909 sum2 = 0.0; 910 sum3 = 0.0; 911 sum4 = 0.0; 912 sum5 = 0.0; 913 sum6 = 0.0; 914 for (j=0; j<n; j++) { 915 sum1 += v[jrow]*x[6*idx[jrow]]; 916 sum2 += v[jrow]*x[6*idx[jrow]+1]; 917 sum3 += v[jrow]*x[6*idx[jrow]+2]; 918 sum4 += v[jrow]*x[6*idx[jrow]+3]; 919 sum5 += v[jrow]*x[6*idx[jrow]+4]; 920 sum6 += v[jrow]*x[6*idx[jrow]+5]; 921 jrow++; 922 } 923 y[6*i] += sum1; 924 y[6*i+1] += sum2; 925 y[6*i+2] += sum3; 926 y[6*i+3] += sum4; 927 y[6*i+4] += sum5; 928 y[6*i+5] += sum6; 929 } 930 931 ierr = PetscLogFlops(12*a->nz);CHKERRQ(ierr); 932 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 933 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 934 PetscFunctionReturn(0); 935 } 936 937 #undef __FUNCT__ 938 #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_6" 939 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_6(Mat A,Vec xx,Vec yy,Vec zz) 940 { 941 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 942 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 943 PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6; 944 PetscErrorCode ierr; 945 PetscInt m = b->AIJ->rmap.n,n,i,*idx; 946 947 PetscFunctionBegin; 948 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 949 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 950 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 951 952 for (i=0; i<m; i++) { 953 idx = a->j + a->i[i] ; 954 v = a->a + a->i[i] ; 955 n = a->i[i+1] - a->i[i]; 956 alpha1 = x[6*i]; 957 alpha2 = x[6*i+1]; 958 alpha3 = x[6*i+2]; 959 alpha4 = x[6*i+3]; 960 alpha5 = x[6*i+4]; 961 alpha6 = x[6*i+5]; 962 while (n-->0) { 963 y[6*(*idx)] += alpha1*(*v); 964 y[6*(*idx)+1] += alpha2*(*v); 965 y[6*(*idx)+2] += alpha3*(*v); 966 y[6*(*idx)+3] += alpha4*(*v); 967 y[6*(*idx)+4] += alpha5*(*v); 968 y[6*(*idx)+5] += alpha6*(*v); 969 idx++; v++; 970 } 971 } 972 ierr = PetscLogFlops(12*a->nz);CHKERRQ(ierr); 973 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 974 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 975 PetscFunctionReturn(0); 976 } 977 978 /* ------------------------------------------------------------------------------*/ 979 #undef __FUNCT__ 980 #define __FUNCT__ "MatMult_SeqMAIJ_7" 981 PetscErrorCode MatMult_SeqMAIJ_7(Mat A,Vec xx,Vec yy) 982 { 983 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 984 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 985 PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7; 986 PetscErrorCode ierr; 987 PetscInt m = b->AIJ->rmap.n,*idx,*ii; 988 PetscInt n,i,jrow,j; 989 990 PetscFunctionBegin; 991 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 992 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 993 idx = a->j; 994 v = a->a; 995 ii = a->i; 996 997 for (i=0; i<m; i++) { 998 jrow = ii[i]; 999 n = ii[i+1] - jrow; 1000 sum1 = 0.0; 1001 sum2 = 0.0; 1002 sum3 = 0.0; 1003 sum4 = 0.0; 1004 sum5 = 0.0; 1005 sum6 = 0.0; 1006 sum7 = 0.0; 1007 for (j=0; j<n; j++) { 1008 sum1 += v[jrow]*x[7*idx[jrow]]; 1009 sum2 += v[jrow]*x[7*idx[jrow]+1]; 1010 sum3 += v[jrow]*x[7*idx[jrow]+2]; 1011 sum4 += v[jrow]*x[7*idx[jrow]+3]; 1012 sum5 += v[jrow]*x[7*idx[jrow]+4]; 1013 sum6 += v[jrow]*x[7*idx[jrow]+5]; 1014 sum7 += v[jrow]*x[7*idx[jrow]+6]; 1015 jrow++; 1016 } 1017 y[7*i] = sum1; 1018 y[7*i+1] = sum2; 1019 y[7*i+2] = sum3; 1020 y[7*i+3] = sum4; 1021 y[7*i+4] = sum5; 1022 y[7*i+5] = sum6; 1023 y[7*i+6] = sum7; 1024 } 1025 1026 ierr = PetscLogFlops(14*a->nz - 7*m);CHKERRQ(ierr); 1027 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1028 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 1029 PetscFunctionReturn(0); 1030 } 1031 1032 #undef __FUNCT__ 1033 #define __FUNCT__ "MatMultTranspose_SeqMAIJ_7" 1034 PetscErrorCode MatMultTranspose_SeqMAIJ_7(Mat A,Vec xx,Vec yy) 1035 { 1036 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1037 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1038 PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,zero = 0.0; 1039 PetscErrorCode ierr; 1040 PetscInt m = b->AIJ->rmap.n,n,i,*idx; 1041 1042 PetscFunctionBegin; 1043 ierr = VecSet(yy,zero);CHKERRQ(ierr); 1044 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1045 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 1046 1047 for (i=0; i<m; i++) { 1048 idx = a->j + a->i[i] ; 1049 v = a->a + a->i[i] ; 1050 n = a->i[i+1] - a->i[i]; 1051 alpha1 = x[7*i]; 1052 alpha2 = x[7*i+1]; 1053 alpha3 = x[7*i+2]; 1054 alpha4 = x[7*i+3]; 1055 alpha5 = x[7*i+4]; 1056 alpha6 = x[7*i+5]; 1057 alpha7 = x[7*i+6]; 1058 while (n-->0) { 1059 y[7*(*idx)] += alpha1*(*v); 1060 y[7*(*idx)+1] += alpha2*(*v); 1061 y[7*(*idx)+2] += alpha3*(*v); 1062 y[7*(*idx)+3] += alpha4*(*v); 1063 y[7*(*idx)+4] += alpha5*(*v); 1064 y[7*(*idx)+5] += alpha6*(*v); 1065 y[7*(*idx)+6] += alpha7*(*v); 1066 idx++; v++; 1067 } 1068 } 1069 ierr = PetscLogFlops(14*a->nz - 7*b->AIJ->cmap.n);CHKERRQ(ierr); 1070 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1071 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 1072 PetscFunctionReturn(0); 1073 } 1074 1075 #undef __FUNCT__ 1076 #define __FUNCT__ "MatMultAdd_SeqMAIJ_7" 1077 PetscErrorCode MatMultAdd_SeqMAIJ_7(Mat A,Vec xx,Vec yy,Vec zz) 1078 { 1079 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1080 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1081 PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7; 1082 PetscErrorCode ierr; 1083 PetscInt m = b->AIJ->rmap.n,*idx,*ii; 1084 PetscInt n,i,jrow,j; 1085 1086 PetscFunctionBegin; 1087 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 1088 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1089 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 1090 idx = a->j; 1091 v = a->a; 1092 ii = a->i; 1093 1094 for (i=0; i<m; i++) { 1095 jrow = ii[i]; 1096 n = ii[i+1] - jrow; 1097 sum1 = 0.0; 1098 sum2 = 0.0; 1099 sum3 = 0.0; 1100 sum4 = 0.0; 1101 sum5 = 0.0; 1102 sum6 = 0.0; 1103 sum7 = 0.0; 1104 for (j=0; j<n; j++) { 1105 sum1 += v[jrow]*x[7*idx[jrow]]; 1106 sum2 += v[jrow]*x[7*idx[jrow]+1]; 1107 sum3 += v[jrow]*x[7*idx[jrow]+2]; 1108 sum4 += v[jrow]*x[7*idx[jrow]+3]; 1109 sum5 += v[jrow]*x[7*idx[jrow]+4]; 1110 sum6 += v[jrow]*x[7*idx[jrow]+5]; 1111 sum7 += v[jrow]*x[7*idx[jrow]+6]; 1112 jrow++; 1113 } 1114 y[7*i] += sum1; 1115 y[7*i+1] += sum2; 1116 y[7*i+2] += sum3; 1117 y[7*i+3] += sum4; 1118 y[7*i+4] += sum5; 1119 y[7*i+5] += sum6; 1120 y[7*i+6] += sum7; 1121 } 1122 1123 ierr = PetscLogFlops(14*a->nz);CHKERRQ(ierr); 1124 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1125 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 1126 PetscFunctionReturn(0); 1127 } 1128 1129 #undef __FUNCT__ 1130 #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_7" 1131 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_7(Mat A,Vec xx,Vec yy,Vec zz) 1132 { 1133 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1134 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1135 PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7; 1136 PetscErrorCode ierr; 1137 PetscInt m = b->AIJ->rmap.n,n,i,*idx; 1138 1139 PetscFunctionBegin; 1140 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 1141 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1142 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 1143 for (i=0; i<m; i++) { 1144 idx = a->j + a->i[i] ; 1145 v = a->a + a->i[i] ; 1146 n = a->i[i+1] - a->i[i]; 1147 alpha1 = x[7*i]; 1148 alpha2 = x[7*i+1]; 1149 alpha3 = x[7*i+2]; 1150 alpha4 = x[7*i+3]; 1151 alpha5 = x[7*i+4]; 1152 alpha6 = x[7*i+5]; 1153 alpha7 = x[7*i+6]; 1154 while (n-->0) { 1155 y[7*(*idx)] += alpha1*(*v); 1156 y[7*(*idx)+1] += alpha2*(*v); 1157 y[7*(*idx)+2] += alpha3*(*v); 1158 y[7*(*idx)+3] += alpha4*(*v); 1159 y[7*(*idx)+4] += alpha5*(*v); 1160 y[7*(*idx)+5] += alpha6*(*v); 1161 y[7*(*idx)+6] += alpha7*(*v); 1162 idx++; v++; 1163 } 1164 } 1165 ierr = PetscLogFlops(14*a->nz);CHKERRQ(ierr); 1166 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1167 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 1168 PetscFunctionReturn(0); 1169 } 1170 1171 #undef __FUNCT__ 1172 #define __FUNCT__ "MatMult_SeqMAIJ_8" 1173 PetscErrorCode MatMult_SeqMAIJ_8(Mat A,Vec xx,Vec yy) 1174 { 1175 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1176 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1177 PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8; 1178 PetscErrorCode ierr; 1179 PetscInt m = b->AIJ->rmap.n,*idx,*ii; 1180 PetscInt n,i,jrow,j; 1181 1182 PetscFunctionBegin; 1183 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1184 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 1185 idx = a->j; 1186 v = a->a; 1187 ii = a->i; 1188 1189 for (i=0; i<m; i++) { 1190 jrow = ii[i]; 1191 n = ii[i+1] - jrow; 1192 sum1 = 0.0; 1193 sum2 = 0.0; 1194 sum3 = 0.0; 1195 sum4 = 0.0; 1196 sum5 = 0.0; 1197 sum6 = 0.0; 1198 sum7 = 0.0; 1199 sum8 = 0.0; 1200 for (j=0; j<n; j++) { 1201 sum1 += v[jrow]*x[8*idx[jrow]]; 1202 sum2 += v[jrow]*x[8*idx[jrow]+1]; 1203 sum3 += v[jrow]*x[8*idx[jrow]+2]; 1204 sum4 += v[jrow]*x[8*idx[jrow]+3]; 1205 sum5 += v[jrow]*x[8*idx[jrow]+4]; 1206 sum6 += v[jrow]*x[8*idx[jrow]+5]; 1207 sum7 += v[jrow]*x[8*idx[jrow]+6]; 1208 sum8 += v[jrow]*x[8*idx[jrow]+7]; 1209 jrow++; 1210 } 1211 y[8*i] = sum1; 1212 y[8*i+1] = sum2; 1213 y[8*i+2] = sum3; 1214 y[8*i+3] = sum4; 1215 y[8*i+4] = sum5; 1216 y[8*i+5] = sum6; 1217 y[8*i+6] = sum7; 1218 y[8*i+7] = sum8; 1219 } 1220 1221 ierr = PetscLogFlops(16*a->nz - 8*m);CHKERRQ(ierr); 1222 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1223 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 1224 PetscFunctionReturn(0); 1225 } 1226 1227 #undef __FUNCT__ 1228 #define __FUNCT__ "MatMultTranspose_SeqMAIJ_8" 1229 PetscErrorCode MatMultTranspose_SeqMAIJ_8(Mat A,Vec xx,Vec yy) 1230 { 1231 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1232 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1233 PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,zero = 0.0; 1234 PetscErrorCode ierr; 1235 PetscInt m = b->AIJ->rmap.n,n,i,*idx; 1236 1237 PetscFunctionBegin; 1238 ierr = VecSet(yy,zero);CHKERRQ(ierr); 1239 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1240 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 1241 1242 for (i=0; i<m; i++) { 1243 idx = a->j + a->i[i] ; 1244 v = a->a + a->i[i] ; 1245 n = a->i[i+1] - a->i[i]; 1246 alpha1 = x[8*i]; 1247 alpha2 = x[8*i+1]; 1248 alpha3 = x[8*i+2]; 1249 alpha4 = x[8*i+3]; 1250 alpha5 = x[8*i+4]; 1251 alpha6 = x[8*i+5]; 1252 alpha7 = x[8*i+6]; 1253 alpha8 = x[8*i+7]; 1254 while (n-->0) { 1255 y[8*(*idx)] += alpha1*(*v); 1256 y[8*(*idx)+1] += alpha2*(*v); 1257 y[8*(*idx)+2] += alpha3*(*v); 1258 y[8*(*idx)+3] += alpha4*(*v); 1259 y[8*(*idx)+4] += alpha5*(*v); 1260 y[8*(*idx)+5] += alpha6*(*v); 1261 y[8*(*idx)+6] += alpha7*(*v); 1262 y[8*(*idx)+7] += alpha8*(*v); 1263 idx++; v++; 1264 } 1265 } 1266 ierr = PetscLogFlops(16*a->nz - 8*b->AIJ->cmap.n);CHKERRQ(ierr); 1267 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1268 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 1269 PetscFunctionReturn(0); 1270 } 1271 1272 #undef __FUNCT__ 1273 #define __FUNCT__ "MatMultAdd_SeqMAIJ_8" 1274 PetscErrorCode MatMultAdd_SeqMAIJ_8(Mat A,Vec xx,Vec yy,Vec zz) 1275 { 1276 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1277 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1278 PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8; 1279 PetscErrorCode ierr; 1280 PetscInt m = b->AIJ->rmap.n,*idx,*ii; 1281 PetscInt n,i,jrow,j; 1282 1283 PetscFunctionBegin; 1284 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 1285 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1286 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 1287 idx = a->j; 1288 v = a->a; 1289 ii = a->i; 1290 1291 for (i=0; i<m; i++) { 1292 jrow = ii[i]; 1293 n = ii[i+1] - jrow; 1294 sum1 = 0.0; 1295 sum2 = 0.0; 1296 sum3 = 0.0; 1297 sum4 = 0.0; 1298 sum5 = 0.0; 1299 sum6 = 0.0; 1300 sum7 = 0.0; 1301 sum8 = 0.0; 1302 for (j=0; j<n; j++) { 1303 sum1 += v[jrow]*x[8*idx[jrow]]; 1304 sum2 += v[jrow]*x[8*idx[jrow]+1]; 1305 sum3 += v[jrow]*x[8*idx[jrow]+2]; 1306 sum4 += v[jrow]*x[8*idx[jrow]+3]; 1307 sum5 += v[jrow]*x[8*idx[jrow]+4]; 1308 sum6 += v[jrow]*x[8*idx[jrow]+5]; 1309 sum7 += v[jrow]*x[8*idx[jrow]+6]; 1310 sum8 += v[jrow]*x[8*idx[jrow]+7]; 1311 jrow++; 1312 } 1313 y[8*i] += sum1; 1314 y[8*i+1] += sum2; 1315 y[8*i+2] += sum3; 1316 y[8*i+3] += sum4; 1317 y[8*i+4] += sum5; 1318 y[8*i+5] += sum6; 1319 y[8*i+6] += sum7; 1320 y[8*i+7] += sum8; 1321 } 1322 1323 ierr = PetscLogFlops(16*a->nz);CHKERRQ(ierr); 1324 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1325 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 1326 PetscFunctionReturn(0); 1327 } 1328 1329 #undef __FUNCT__ 1330 #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_8" 1331 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_8(Mat A,Vec xx,Vec yy,Vec zz) 1332 { 1333 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1334 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1335 PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8; 1336 PetscErrorCode ierr; 1337 PetscInt m = b->AIJ->rmap.n,n,i,*idx; 1338 1339 PetscFunctionBegin; 1340 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 1341 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1342 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 1343 for (i=0; i<m; i++) { 1344 idx = a->j + a->i[i] ; 1345 v = a->a + a->i[i] ; 1346 n = a->i[i+1] - a->i[i]; 1347 alpha1 = x[8*i]; 1348 alpha2 = x[8*i+1]; 1349 alpha3 = x[8*i+2]; 1350 alpha4 = x[8*i+3]; 1351 alpha5 = x[8*i+4]; 1352 alpha6 = x[8*i+5]; 1353 alpha7 = x[8*i+6]; 1354 alpha8 = x[8*i+7]; 1355 while (n-->0) { 1356 y[8*(*idx)] += alpha1*(*v); 1357 y[8*(*idx)+1] += alpha2*(*v); 1358 y[8*(*idx)+2] += alpha3*(*v); 1359 y[8*(*idx)+3] += alpha4*(*v); 1360 y[8*(*idx)+4] += alpha5*(*v); 1361 y[8*(*idx)+5] += alpha6*(*v); 1362 y[8*(*idx)+6] += alpha7*(*v); 1363 y[8*(*idx)+7] += alpha8*(*v); 1364 idx++; v++; 1365 } 1366 } 1367 ierr = PetscLogFlops(16*a->nz);CHKERRQ(ierr); 1368 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1369 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 1370 PetscFunctionReturn(0); 1371 } 1372 1373 /* ------------------------------------------------------------------------------*/ 1374 #undef __FUNCT__ 1375 #define __FUNCT__ "MatMult_SeqMAIJ_9" 1376 PetscErrorCode MatMult_SeqMAIJ_9(Mat A,Vec xx,Vec yy) 1377 { 1378 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1379 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1380 PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9; 1381 PetscErrorCode ierr; 1382 PetscInt m = b->AIJ->rmap.n,*idx,*ii; 1383 PetscInt n,i,jrow,j; 1384 1385 PetscFunctionBegin; 1386 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1387 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 1388 idx = a->j; 1389 v = a->a; 1390 ii = a->i; 1391 1392 for (i=0; i<m; i++) { 1393 jrow = ii[i]; 1394 n = ii[i+1] - jrow; 1395 sum1 = 0.0; 1396 sum2 = 0.0; 1397 sum3 = 0.0; 1398 sum4 = 0.0; 1399 sum5 = 0.0; 1400 sum6 = 0.0; 1401 sum7 = 0.0; 1402 sum8 = 0.0; 1403 sum9 = 0.0; 1404 for (j=0; j<n; j++) { 1405 sum1 += v[jrow]*x[9*idx[jrow]]; 1406 sum2 += v[jrow]*x[9*idx[jrow]+1]; 1407 sum3 += v[jrow]*x[9*idx[jrow]+2]; 1408 sum4 += v[jrow]*x[9*idx[jrow]+3]; 1409 sum5 += v[jrow]*x[9*idx[jrow]+4]; 1410 sum6 += v[jrow]*x[9*idx[jrow]+5]; 1411 sum7 += v[jrow]*x[9*idx[jrow]+6]; 1412 sum8 += v[jrow]*x[9*idx[jrow]+7]; 1413 sum9 += v[jrow]*x[9*idx[jrow]+8]; 1414 jrow++; 1415 } 1416 y[9*i] = sum1; 1417 y[9*i+1] = sum2; 1418 y[9*i+2] = sum3; 1419 y[9*i+3] = sum4; 1420 y[9*i+4] = sum5; 1421 y[9*i+5] = sum6; 1422 y[9*i+6] = sum7; 1423 y[9*i+7] = sum8; 1424 y[9*i+8] = sum9; 1425 } 1426 1427 ierr = PetscLogFlops(18*a->nz - 9*m);CHKERRQ(ierr); 1428 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1429 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 1430 PetscFunctionReturn(0); 1431 } 1432 1433 /* ------------------------------------------------------------------------------*/ 1434 1435 #undef __FUNCT__ 1436 #define __FUNCT__ "MatMultTranspose_SeqMAIJ_9" 1437 PetscErrorCode MatMultTranspose_SeqMAIJ_9(Mat A,Vec xx,Vec yy) 1438 { 1439 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1440 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1441 PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9,zero = 0.0; 1442 PetscErrorCode ierr; 1443 PetscInt m = b->AIJ->rmap.n,n,i,*idx; 1444 1445 PetscFunctionBegin; 1446 ierr = VecSet(yy,zero);CHKERRQ(ierr); 1447 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1448 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 1449 1450 for (i=0; i<m; i++) { 1451 idx = a->j + a->i[i] ; 1452 v = a->a + a->i[i] ; 1453 n = a->i[i+1] - a->i[i]; 1454 alpha1 = x[9*i]; 1455 alpha2 = x[9*i+1]; 1456 alpha3 = x[9*i+2]; 1457 alpha4 = x[9*i+3]; 1458 alpha5 = x[9*i+4]; 1459 alpha6 = x[9*i+5]; 1460 alpha7 = x[9*i+6]; 1461 alpha8 = x[9*i+7]; 1462 alpha9 = x[9*i+8]; 1463 while (n-->0) { 1464 y[9*(*idx)] += alpha1*(*v); 1465 y[9*(*idx)+1] += alpha2*(*v); 1466 y[9*(*idx)+2] += alpha3*(*v); 1467 y[9*(*idx)+3] += alpha4*(*v); 1468 y[9*(*idx)+4] += alpha5*(*v); 1469 y[9*(*idx)+5] += alpha6*(*v); 1470 y[9*(*idx)+6] += alpha7*(*v); 1471 y[9*(*idx)+7] += alpha8*(*v); 1472 y[9*(*idx)+8] += alpha9*(*v); 1473 idx++; v++; 1474 } 1475 } 1476 ierr = PetscLogFlops(18*a->nz - 9*b->AIJ->cmap.n);CHKERRQ(ierr); 1477 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1478 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 1479 PetscFunctionReturn(0); 1480 } 1481 1482 #undef __FUNCT__ 1483 #define __FUNCT__ "MatMultAdd_SeqMAIJ_9" 1484 PetscErrorCode MatMultAdd_SeqMAIJ_9(Mat A,Vec xx,Vec yy,Vec zz) 1485 { 1486 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1487 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1488 PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9; 1489 PetscErrorCode ierr; 1490 PetscInt m = b->AIJ->rmap.n,*idx,*ii; 1491 PetscInt n,i,jrow,j; 1492 1493 PetscFunctionBegin; 1494 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 1495 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1496 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 1497 idx = a->j; 1498 v = a->a; 1499 ii = a->i; 1500 1501 for (i=0; i<m; i++) { 1502 jrow = ii[i]; 1503 n = ii[i+1] - jrow; 1504 sum1 = 0.0; 1505 sum2 = 0.0; 1506 sum3 = 0.0; 1507 sum4 = 0.0; 1508 sum5 = 0.0; 1509 sum6 = 0.0; 1510 sum7 = 0.0; 1511 sum8 = 0.0; 1512 sum9 = 0.0; 1513 for (j=0; j<n; j++) { 1514 sum1 += v[jrow]*x[9*idx[jrow]]; 1515 sum2 += v[jrow]*x[9*idx[jrow]+1]; 1516 sum3 += v[jrow]*x[9*idx[jrow]+2]; 1517 sum4 += v[jrow]*x[9*idx[jrow]+3]; 1518 sum5 += v[jrow]*x[9*idx[jrow]+4]; 1519 sum6 += v[jrow]*x[9*idx[jrow]+5]; 1520 sum7 += v[jrow]*x[9*idx[jrow]+6]; 1521 sum8 += v[jrow]*x[9*idx[jrow]+7]; 1522 sum9 += v[jrow]*x[9*idx[jrow]+8]; 1523 jrow++; 1524 } 1525 y[9*i] += sum1; 1526 y[9*i+1] += sum2; 1527 y[9*i+2] += sum3; 1528 y[9*i+3] += sum4; 1529 y[9*i+4] += sum5; 1530 y[9*i+5] += sum6; 1531 y[9*i+6] += sum7; 1532 y[9*i+7] += sum8; 1533 y[9*i+8] += sum9; 1534 } 1535 1536 ierr = PetscLogFlops(18*a->nz);CHKERRQ(ierr); 1537 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1538 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 1539 PetscFunctionReturn(0); 1540 } 1541 1542 #undef __FUNCT__ 1543 #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_9" 1544 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_9(Mat A,Vec xx,Vec yy,Vec zz) 1545 { 1546 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1547 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1548 PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9; 1549 PetscErrorCode ierr; 1550 PetscInt m = b->AIJ->rmap.n,n,i,*idx; 1551 1552 PetscFunctionBegin; 1553 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 1554 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1555 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 1556 for (i=0; i<m; i++) { 1557 idx = a->j + a->i[i] ; 1558 v = a->a + a->i[i] ; 1559 n = a->i[i+1] - a->i[i]; 1560 alpha1 = x[9*i]; 1561 alpha2 = x[9*i+1]; 1562 alpha3 = x[9*i+2]; 1563 alpha4 = x[9*i+3]; 1564 alpha5 = x[9*i+4]; 1565 alpha6 = x[9*i+5]; 1566 alpha7 = x[9*i+6]; 1567 alpha8 = x[9*i+7]; 1568 alpha9 = x[9*i+8]; 1569 while (n-->0) { 1570 y[9*(*idx)] += alpha1*(*v); 1571 y[9*(*idx)+1] += alpha2*(*v); 1572 y[9*(*idx)+2] += alpha3*(*v); 1573 y[9*(*idx)+3] += alpha4*(*v); 1574 y[9*(*idx)+4] += alpha5*(*v); 1575 y[9*(*idx)+5] += alpha6*(*v); 1576 y[9*(*idx)+6] += alpha7*(*v); 1577 y[9*(*idx)+7] += alpha8*(*v); 1578 y[9*(*idx)+8] += alpha9*(*v); 1579 idx++; v++; 1580 } 1581 } 1582 ierr = PetscLogFlops(18*a->nz);CHKERRQ(ierr); 1583 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1584 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 1585 PetscFunctionReturn(0); 1586 } 1587 /*--------------------------------------------------------------------------------------------*/ 1588 #undef __FUNCT__ 1589 #define __FUNCT__ "MatMult_SeqMAIJ_10" 1590 PetscErrorCode MatMult_SeqMAIJ_10(Mat A,Vec xx,Vec yy) 1591 { 1592 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1593 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1594 PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9, sum10; 1595 PetscErrorCode ierr; 1596 PetscInt m = b->AIJ->rmap.n,*idx,*ii; 1597 PetscInt n,i,jrow,j; 1598 1599 PetscFunctionBegin; 1600 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1601 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 1602 idx = a->j; 1603 v = a->a; 1604 ii = a->i; 1605 1606 for (i=0; i<m; i++) { 1607 jrow = ii[i]; 1608 n = ii[i+1] - jrow; 1609 sum1 = 0.0; 1610 sum2 = 0.0; 1611 sum3 = 0.0; 1612 sum4 = 0.0; 1613 sum5 = 0.0; 1614 sum6 = 0.0; 1615 sum7 = 0.0; 1616 sum8 = 0.0; 1617 sum9 = 0.0; 1618 sum10 = 0.0; 1619 for (j=0; j<n; j++) { 1620 sum1 += v[jrow]*x[10*idx[jrow]]; 1621 sum2 += v[jrow]*x[10*idx[jrow]+1]; 1622 sum3 += v[jrow]*x[10*idx[jrow]+2]; 1623 sum4 += v[jrow]*x[10*idx[jrow]+3]; 1624 sum5 += v[jrow]*x[10*idx[jrow]+4]; 1625 sum6 += v[jrow]*x[10*idx[jrow]+5]; 1626 sum7 += v[jrow]*x[10*idx[jrow]+6]; 1627 sum8 += v[jrow]*x[10*idx[jrow]+7]; 1628 sum9 += v[jrow]*x[10*idx[jrow]+8]; 1629 sum10 += v[jrow]*x[10*idx[jrow]+9]; 1630 jrow++; 1631 } 1632 y[10*i] = sum1; 1633 y[10*i+1] = sum2; 1634 y[10*i+2] = sum3; 1635 y[10*i+3] = sum4; 1636 y[10*i+4] = sum5; 1637 y[10*i+5] = sum6; 1638 y[10*i+6] = sum7; 1639 y[10*i+7] = sum8; 1640 y[10*i+8] = sum9; 1641 y[10*i+9] = sum10; 1642 } 1643 1644 ierr = PetscLogFlops(20*a->nz - 10*m);CHKERRQ(ierr); 1645 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1646 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 1647 PetscFunctionReturn(0); 1648 } 1649 1650 #undef __FUNCT__ 1651 #define __FUNCT__ "MatMult_SeqMAIJ_10" 1652 PetscErrorCode MatMultAdd_SeqMAIJ_10(Mat A,Vec xx,Vec yy,Vec zz) 1653 { 1654 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1655 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1656 PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9, sum10; 1657 PetscErrorCode ierr; 1658 PetscInt m = b->AIJ->rmap.n,*idx,*ii; 1659 PetscInt n,i,jrow,j; 1660 1661 PetscFunctionBegin; 1662 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 1663 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1664 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 1665 idx = a->j; 1666 v = a->a; 1667 ii = a->i; 1668 1669 for (i=0; i<m; i++) { 1670 jrow = ii[i]; 1671 n = ii[i+1] - jrow; 1672 sum1 = 0.0; 1673 sum2 = 0.0; 1674 sum3 = 0.0; 1675 sum4 = 0.0; 1676 sum5 = 0.0; 1677 sum6 = 0.0; 1678 sum7 = 0.0; 1679 sum8 = 0.0; 1680 sum9 = 0.0; 1681 sum10 = 0.0; 1682 for (j=0; j<n; j++) { 1683 sum1 += v[jrow]*x[10*idx[jrow]]; 1684 sum2 += v[jrow]*x[10*idx[jrow]+1]; 1685 sum3 += v[jrow]*x[10*idx[jrow]+2]; 1686 sum4 += v[jrow]*x[10*idx[jrow]+3]; 1687 sum5 += v[jrow]*x[10*idx[jrow]+4]; 1688 sum6 += v[jrow]*x[10*idx[jrow]+5]; 1689 sum7 += v[jrow]*x[10*idx[jrow]+6]; 1690 sum8 += v[jrow]*x[10*idx[jrow]+7]; 1691 sum9 += v[jrow]*x[10*idx[jrow]+8]; 1692 sum10 += v[jrow]*x[10*idx[jrow]+9]; 1693 jrow++; 1694 } 1695 y[10*i] += sum1; 1696 y[10*i+1] += sum2; 1697 y[10*i+2] += sum3; 1698 y[10*i+3] += sum4; 1699 y[10*i+4] += sum5; 1700 y[10*i+5] += sum6; 1701 y[10*i+6] += sum7; 1702 y[10*i+7] += sum8; 1703 y[10*i+8] += sum9; 1704 y[10*i+9] += sum10; 1705 } 1706 1707 ierr = PetscLogFlops(20*a->nz - 10*m);CHKERRQ(ierr); 1708 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1709 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 1710 PetscFunctionReturn(0); 1711 } 1712 1713 #undef __FUNCT__ 1714 #define __FUNCT__ "MatMultTranspose_SeqMAIJ_10" 1715 PetscErrorCode MatMultTranspose_SeqMAIJ_10(Mat A,Vec xx,Vec yy) 1716 { 1717 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1718 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1719 PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9,alpha10,zero = 0.0; 1720 PetscErrorCode ierr; 1721 PetscInt m = b->AIJ->rmap.n,n,i,*idx; 1722 1723 PetscFunctionBegin; 1724 ierr = VecSet(yy,zero);CHKERRQ(ierr); 1725 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1726 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 1727 1728 for (i=0; i<m; i++) { 1729 idx = a->j + a->i[i] ; 1730 v = a->a + a->i[i] ; 1731 n = a->i[i+1] - a->i[i]; 1732 alpha1 = x[10*i]; 1733 alpha2 = x[10*i+1]; 1734 alpha3 = x[10*i+2]; 1735 alpha4 = x[10*i+3]; 1736 alpha5 = x[10*i+4]; 1737 alpha6 = x[10*i+5]; 1738 alpha7 = x[10*i+6]; 1739 alpha8 = x[10*i+7]; 1740 alpha9 = x[10*i+8]; 1741 alpha10 = x[10*i+9]; 1742 while (n-->0) { 1743 y[10*(*idx)] += alpha1*(*v); 1744 y[10*(*idx)+1] += alpha2*(*v); 1745 y[10*(*idx)+2] += alpha3*(*v); 1746 y[10*(*idx)+3] += alpha4*(*v); 1747 y[10*(*idx)+4] += alpha5*(*v); 1748 y[10*(*idx)+5] += alpha6*(*v); 1749 y[10*(*idx)+6] += alpha7*(*v); 1750 y[10*(*idx)+7] += alpha8*(*v); 1751 y[10*(*idx)+8] += alpha9*(*v); 1752 y[10*(*idx)+9] += alpha10*(*v); 1753 idx++; v++; 1754 } 1755 } 1756 ierr = PetscLogFlops(20*a->nz - 10*b->AIJ->cmap.n);CHKERRQ(ierr); 1757 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1758 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 1759 PetscFunctionReturn(0); 1760 } 1761 1762 #undef __FUNCT__ 1763 #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_10" 1764 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_10(Mat A,Vec xx,Vec yy,Vec zz) 1765 { 1766 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1767 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1768 PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9,alpha10; 1769 PetscErrorCode ierr; 1770 PetscInt m = b->AIJ->rmap.n,n,i,*idx; 1771 1772 PetscFunctionBegin; 1773 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 1774 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1775 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 1776 for (i=0; i<m; i++) { 1777 idx = a->j + a->i[i] ; 1778 v = a->a + a->i[i] ; 1779 n = a->i[i+1] - a->i[i]; 1780 alpha1 = x[10*i]; 1781 alpha2 = x[10*i+1]; 1782 alpha3 = x[10*i+2]; 1783 alpha4 = x[10*i+3]; 1784 alpha5 = x[10*i+4]; 1785 alpha6 = x[10*i+5]; 1786 alpha7 = x[10*i+6]; 1787 alpha8 = x[10*i+7]; 1788 alpha9 = x[10*i+8]; 1789 alpha10 = x[10*i+9]; 1790 while (n-->0) { 1791 y[10*(*idx)] += alpha1*(*v); 1792 y[10*(*idx)+1] += alpha2*(*v); 1793 y[10*(*idx)+2] += alpha3*(*v); 1794 y[10*(*idx)+3] += alpha4*(*v); 1795 y[10*(*idx)+4] += alpha5*(*v); 1796 y[10*(*idx)+5] += alpha6*(*v); 1797 y[10*(*idx)+6] += alpha7*(*v); 1798 y[10*(*idx)+7] += alpha8*(*v); 1799 y[10*(*idx)+8] += alpha9*(*v); 1800 y[10*(*idx)+9] += alpha10*(*v); 1801 idx++; v++; 1802 } 1803 } 1804 ierr = PetscLogFlops(20*a->nz);CHKERRQ(ierr); 1805 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1806 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 1807 PetscFunctionReturn(0); 1808 } 1809 1810 1811 /*--------------------------------------------------------------------------------------------*/ 1812 #undef __FUNCT__ 1813 #define __FUNCT__ "MatMult_SeqMAIJ_16" 1814 PetscErrorCode MatMult_SeqMAIJ_16(Mat A,Vec xx,Vec yy) 1815 { 1816 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1817 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1818 PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8; 1819 PetscScalar sum9, sum10, sum11, sum12, sum13, sum14, sum15, sum16; 1820 PetscErrorCode ierr; 1821 PetscInt m = b->AIJ->rmap.n,*idx,*ii; 1822 PetscInt n,i,jrow,j; 1823 1824 PetscFunctionBegin; 1825 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1826 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 1827 idx = a->j; 1828 v = a->a; 1829 ii = a->i; 1830 1831 for (i=0; i<m; i++) { 1832 jrow = ii[i]; 1833 n = ii[i+1] - jrow; 1834 sum1 = 0.0; 1835 sum2 = 0.0; 1836 sum3 = 0.0; 1837 sum4 = 0.0; 1838 sum5 = 0.0; 1839 sum6 = 0.0; 1840 sum7 = 0.0; 1841 sum8 = 0.0; 1842 sum9 = 0.0; 1843 sum10 = 0.0; 1844 sum11 = 0.0; 1845 sum12 = 0.0; 1846 sum13 = 0.0; 1847 sum14 = 0.0; 1848 sum15 = 0.0; 1849 sum16 = 0.0; 1850 for (j=0; j<n; j++) { 1851 sum1 += v[jrow]*x[16*idx[jrow]]; 1852 sum2 += v[jrow]*x[16*idx[jrow]+1]; 1853 sum3 += v[jrow]*x[16*idx[jrow]+2]; 1854 sum4 += v[jrow]*x[16*idx[jrow]+3]; 1855 sum5 += v[jrow]*x[16*idx[jrow]+4]; 1856 sum6 += v[jrow]*x[16*idx[jrow]+5]; 1857 sum7 += v[jrow]*x[16*idx[jrow]+6]; 1858 sum8 += v[jrow]*x[16*idx[jrow]+7]; 1859 sum9 += v[jrow]*x[16*idx[jrow]+8]; 1860 sum10 += v[jrow]*x[16*idx[jrow]+9]; 1861 sum11 += v[jrow]*x[16*idx[jrow]+10]; 1862 sum12 += v[jrow]*x[16*idx[jrow]+11]; 1863 sum13 += v[jrow]*x[16*idx[jrow]+12]; 1864 sum14 += v[jrow]*x[16*idx[jrow]+13]; 1865 sum15 += v[jrow]*x[16*idx[jrow]+14]; 1866 sum16 += v[jrow]*x[16*idx[jrow]+15]; 1867 jrow++; 1868 } 1869 y[16*i] = sum1; 1870 y[16*i+1] = sum2; 1871 y[16*i+2] = sum3; 1872 y[16*i+3] = sum4; 1873 y[16*i+4] = sum5; 1874 y[16*i+5] = sum6; 1875 y[16*i+6] = sum7; 1876 y[16*i+7] = sum8; 1877 y[16*i+8] = sum9; 1878 y[16*i+9] = sum10; 1879 y[16*i+10] = sum11; 1880 y[16*i+11] = sum12; 1881 y[16*i+12] = sum13; 1882 y[16*i+13] = sum14; 1883 y[16*i+14] = sum15; 1884 y[16*i+15] = sum16; 1885 } 1886 1887 ierr = PetscLogFlops(32*a->nz - 16*m);CHKERRQ(ierr); 1888 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1889 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 1890 PetscFunctionReturn(0); 1891 } 1892 1893 #undef __FUNCT__ 1894 #define __FUNCT__ "MatMultTranspose_SeqMAIJ_16" 1895 PetscErrorCode MatMultTranspose_SeqMAIJ_16(Mat A,Vec xx,Vec yy) 1896 { 1897 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1898 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1899 PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,zero = 0.0; 1900 PetscScalar alpha9,alpha10,alpha11,alpha12,alpha13,alpha14,alpha15,alpha16; 1901 PetscErrorCode ierr; 1902 PetscInt m = b->AIJ->rmap.n,n,i,*idx; 1903 1904 PetscFunctionBegin; 1905 ierr = VecSet(yy,zero);CHKERRQ(ierr); 1906 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1907 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 1908 1909 for (i=0; i<m; i++) { 1910 idx = a->j + a->i[i] ; 1911 v = a->a + a->i[i] ; 1912 n = a->i[i+1] - a->i[i]; 1913 alpha1 = x[16*i]; 1914 alpha2 = x[16*i+1]; 1915 alpha3 = x[16*i+2]; 1916 alpha4 = x[16*i+3]; 1917 alpha5 = x[16*i+4]; 1918 alpha6 = x[16*i+5]; 1919 alpha7 = x[16*i+6]; 1920 alpha8 = x[16*i+7]; 1921 alpha9 = x[16*i+8]; 1922 alpha10 = x[16*i+9]; 1923 alpha11 = x[16*i+10]; 1924 alpha12 = x[16*i+11]; 1925 alpha13 = x[16*i+12]; 1926 alpha14 = x[16*i+13]; 1927 alpha15 = x[16*i+14]; 1928 alpha16 = x[16*i+15]; 1929 while (n-->0) { 1930 y[16*(*idx)] += alpha1*(*v); 1931 y[16*(*idx)+1] += alpha2*(*v); 1932 y[16*(*idx)+2] += alpha3*(*v); 1933 y[16*(*idx)+3] += alpha4*(*v); 1934 y[16*(*idx)+4] += alpha5*(*v); 1935 y[16*(*idx)+5] += alpha6*(*v); 1936 y[16*(*idx)+6] += alpha7*(*v); 1937 y[16*(*idx)+7] += alpha8*(*v); 1938 y[16*(*idx)+8] += alpha9*(*v); 1939 y[16*(*idx)+9] += alpha10*(*v); 1940 y[16*(*idx)+10] += alpha11*(*v); 1941 y[16*(*idx)+11] += alpha12*(*v); 1942 y[16*(*idx)+12] += alpha13*(*v); 1943 y[16*(*idx)+13] += alpha14*(*v); 1944 y[16*(*idx)+14] += alpha15*(*v); 1945 y[16*(*idx)+15] += alpha16*(*v); 1946 idx++; v++; 1947 } 1948 } 1949 ierr = PetscLogFlops(32*a->nz - 16*b->AIJ->cmap.n);CHKERRQ(ierr); 1950 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1951 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 1952 PetscFunctionReturn(0); 1953 } 1954 1955 #undef __FUNCT__ 1956 #define __FUNCT__ "MatMultAdd_SeqMAIJ_16" 1957 PetscErrorCode MatMultAdd_SeqMAIJ_16(Mat A,Vec xx,Vec yy,Vec zz) 1958 { 1959 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1960 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1961 PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8; 1962 PetscScalar sum9, sum10, sum11, sum12, sum13, sum14, sum15, sum16; 1963 PetscErrorCode ierr; 1964 PetscInt m = b->AIJ->rmap.n,*idx,*ii; 1965 PetscInt n,i,jrow,j; 1966 1967 PetscFunctionBegin; 1968 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 1969 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1970 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 1971 idx = a->j; 1972 v = a->a; 1973 ii = a->i; 1974 1975 for (i=0; i<m; i++) { 1976 jrow = ii[i]; 1977 n = ii[i+1] - jrow; 1978 sum1 = 0.0; 1979 sum2 = 0.0; 1980 sum3 = 0.0; 1981 sum4 = 0.0; 1982 sum5 = 0.0; 1983 sum6 = 0.0; 1984 sum7 = 0.0; 1985 sum8 = 0.0; 1986 sum9 = 0.0; 1987 sum10 = 0.0; 1988 sum11 = 0.0; 1989 sum12 = 0.0; 1990 sum13 = 0.0; 1991 sum14 = 0.0; 1992 sum15 = 0.0; 1993 sum16 = 0.0; 1994 for (j=0; j<n; j++) { 1995 sum1 += v[jrow]*x[16*idx[jrow]]; 1996 sum2 += v[jrow]*x[16*idx[jrow]+1]; 1997 sum3 += v[jrow]*x[16*idx[jrow]+2]; 1998 sum4 += v[jrow]*x[16*idx[jrow]+3]; 1999 sum5 += v[jrow]*x[16*idx[jrow]+4]; 2000 sum6 += v[jrow]*x[16*idx[jrow]+5]; 2001 sum7 += v[jrow]*x[16*idx[jrow]+6]; 2002 sum8 += v[jrow]*x[16*idx[jrow]+7]; 2003 sum9 += v[jrow]*x[16*idx[jrow]+8]; 2004 sum10 += v[jrow]*x[16*idx[jrow]+9]; 2005 sum11 += v[jrow]*x[16*idx[jrow]+10]; 2006 sum12 += v[jrow]*x[16*idx[jrow]+11]; 2007 sum13 += v[jrow]*x[16*idx[jrow]+12]; 2008 sum14 += v[jrow]*x[16*idx[jrow]+13]; 2009 sum15 += v[jrow]*x[16*idx[jrow]+14]; 2010 sum16 += v[jrow]*x[16*idx[jrow]+15]; 2011 jrow++; 2012 } 2013 y[16*i] += sum1; 2014 y[16*i+1] += sum2; 2015 y[16*i+2] += sum3; 2016 y[16*i+3] += sum4; 2017 y[16*i+4] += sum5; 2018 y[16*i+5] += sum6; 2019 y[16*i+6] += sum7; 2020 y[16*i+7] += sum8; 2021 y[16*i+8] += sum9; 2022 y[16*i+9] += sum10; 2023 y[16*i+10] += sum11; 2024 y[16*i+11] += sum12; 2025 y[16*i+12] += sum13; 2026 y[16*i+13] += sum14; 2027 y[16*i+14] += sum15; 2028 y[16*i+15] += sum16; 2029 } 2030 2031 ierr = PetscLogFlops(32*a->nz);CHKERRQ(ierr); 2032 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 2033 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 2034 PetscFunctionReturn(0); 2035 } 2036 2037 #undef __FUNCT__ 2038 #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_16" 2039 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_16(Mat A,Vec xx,Vec yy,Vec zz) 2040 { 2041 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 2042 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 2043 PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8; 2044 PetscScalar alpha9,alpha10,alpha11,alpha12,alpha13,alpha14,alpha15,alpha16; 2045 PetscErrorCode ierr; 2046 PetscInt m = b->AIJ->rmap.n,n,i,*idx; 2047 2048 PetscFunctionBegin; 2049 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 2050 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 2051 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 2052 for (i=0; i<m; i++) { 2053 idx = a->j + a->i[i] ; 2054 v = a->a + a->i[i] ; 2055 n = a->i[i+1] - a->i[i]; 2056 alpha1 = x[16*i]; 2057 alpha2 = x[16*i+1]; 2058 alpha3 = x[16*i+2]; 2059 alpha4 = x[16*i+3]; 2060 alpha5 = x[16*i+4]; 2061 alpha6 = x[16*i+5]; 2062 alpha7 = x[16*i+6]; 2063 alpha8 = x[16*i+7]; 2064 alpha9 = x[16*i+8]; 2065 alpha10 = x[16*i+9]; 2066 alpha11 = x[16*i+10]; 2067 alpha12 = x[16*i+11]; 2068 alpha13 = x[16*i+12]; 2069 alpha14 = x[16*i+13]; 2070 alpha15 = x[16*i+14]; 2071 alpha16 = x[16*i+15]; 2072 while (n-->0) { 2073 y[16*(*idx)] += alpha1*(*v); 2074 y[16*(*idx)+1] += alpha2*(*v); 2075 y[16*(*idx)+2] += alpha3*(*v); 2076 y[16*(*idx)+3] += alpha4*(*v); 2077 y[16*(*idx)+4] += alpha5*(*v); 2078 y[16*(*idx)+5] += alpha6*(*v); 2079 y[16*(*idx)+6] += alpha7*(*v); 2080 y[16*(*idx)+7] += alpha8*(*v); 2081 y[16*(*idx)+8] += alpha9*(*v); 2082 y[16*(*idx)+9] += alpha10*(*v); 2083 y[16*(*idx)+10] += alpha11*(*v); 2084 y[16*(*idx)+11] += alpha12*(*v); 2085 y[16*(*idx)+12] += alpha13*(*v); 2086 y[16*(*idx)+13] += alpha14*(*v); 2087 y[16*(*idx)+14] += alpha15*(*v); 2088 y[16*(*idx)+15] += alpha16*(*v); 2089 idx++; v++; 2090 } 2091 } 2092 ierr = PetscLogFlops(32*a->nz);CHKERRQ(ierr); 2093 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 2094 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 2095 PetscFunctionReturn(0); 2096 } 2097 2098 /*===================================================================================*/ 2099 #undef __FUNCT__ 2100 #define __FUNCT__ "MatMult_MPIMAIJ_dof" 2101 PetscErrorCode MatMult_MPIMAIJ_dof(Mat A,Vec xx,Vec yy) 2102 { 2103 Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data; 2104 PetscErrorCode ierr; 2105 2106 PetscFunctionBegin; 2107 /* start the scatter */ 2108 ierr = VecScatterBegin(xx,b->w,INSERT_VALUES,SCATTER_FORWARD,b->ctx);CHKERRQ(ierr); 2109 ierr = (*b->AIJ->ops->mult)(b->AIJ,xx,yy);CHKERRQ(ierr); 2110 ierr = VecScatterEnd(xx,b->w,INSERT_VALUES,SCATTER_FORWARD,b->ctx);CHKERRQ(ierr); 2111 ierr = (*b->OAIJ->ops->multadd)(b->OAIJ,b->w,yy,yy);CHKERRQ(ierr); 2112 PetscFunctionReturn(0); 2113 } 2114 2115 #undef __FUNCT__ 2116 #define __FUNCT__ "MatMultTranspose_MPIMAIJ_dof" 2117 PetscErrorCode MatMultTranspose_MPIMAIJ_dof(Mat A,Vec xx,Vec yy) 2118 { 2119 Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data; 2120 PetscErrorCode ierr; 2121 2122 PetscFunctionBegin; 2123 ierr = (*b->OAIJ->ops->multtranspose)(b->OAIJ,xx,b->w);CHKERRQ(ierr); 2124 ierr = (*b->AIJ->ops->multtranspose)(b->AIJ,xx,yy);CHKERRQ(ierr); 2125 ierr = VecScatterBegin(b->w,yy,ADD_VALUES,SCATTER_REVERSE,b->ctx);CHKERRQ(ierr); 2126 ierr = VecScatterEnd(b->w,yy,ADD_VALUES,SCATTER_REVERSE,b->ctx);CHKERRQ(ierr); 2127 PetscFunctionReturn(0); 2128 } 2129 2130 #undef __FUNCT__ 2131 #define __FUNCT__ "MatMultAdd_MPIMAIJ_dof" 2132 PetscErrorCode MatMultAdd_MPIMAIJ_dof(Mat A,Vec xx,Vec yy,Vec zz) 2133 { 2134 Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data; 2135 PetscErrorCode ierr; 2136 2137 PetscFunctionBegin; 2138 /* start the scatter */ 2139 ierr = VecScatterBegin(xx,b->w,INSERT_VALUES,SCATTER_FORWARD,b->ctx);CHKERRQ(ierr); 2140 ierr = (*b->AIJ->ops->multadd)(b->AIJ,xx,yy,zz);CHKERRQ(ierr); 2141 ierr = VecScatterEnd(xx,b->w,INSERT_VALUES,SCATTER_FORWARD,b->ctx);CHKERRQ(ierr); 2142 ierr = (*b->OAIJ->ops->multadd)(b->OAIJ,b->w,zz,zz);CHKERRQ(ierr); 2143 PetscFunctionReturn(0); 2144 } 2145 2146 #undef __FUNCT__ 2147 #define __FUNCT__ "MatMultTransposeAdd_MPIMAIJ_dof" 2148 PetscErrorCode MatMultTransposeAdd_MPIMAIJ_dof(Mat A,Vec xx,Vec yy,Vec zz) 2149 { 2150 Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data; 2151 PetscErrorCode ierr; 2152 2153 PetscFunctionBegin; 2154 ierr = (*b->OAIJ->ops->multtranspose)(b->OAIJ,xx,b->w);CHKERRQ(ierr); 2155 ierr = VecScatterBegin(b->w,zz,ADD_VALUES,SCATTER_REVERSE,b->ctx);CHKERRQ(ierr); 2156 ierr = (*b->AIJ->ops->multtransposeadd)(b->AIJ,xx,yy,zz);CHKERRQ(ierr); 2157 ierr = VecScatterEnd(b->w,zz,ADD_VALUES,SCATTER_REVERSE,b->ctx);CHKERRQ(ierr); 2158 PetscFunctionReturn(0); 2159 } 2160 2161 #undef __FUNCT__ 2162 #define __FUNCT__ "MatPtAPSymbolic_SeqAIJ_SeqMAIJ" 2163 PetscErrorCode MatPtAPSymbolic_SeqAIJ_SeqMAIJ(Mat A,Mat PP,PetscReal fill,Mat *C) 2164 { 2165 /* This routine requires testing -- but it's getting better. */ 2166 PetscErrorCode ierr; 2167 PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 2168 Mat_SeqMAIJ *pp=(Mat_SeqMAIJ*)PP->data; 2169 Mat P=pp->AIJ; 2170 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*p=(Mat_SeqAIJ*)P->data,*c; 2171 PetscInt *pti,*ptj,*ptJ,*ai=a->i,*aj=a->j,*ajj,*pi=p->i,*pj=p->j,*pjj; 2172 PetscInt *ci,*cj,*ptadenserow,*ptasparserow,*denserow,*sparserow,*ptaj; 2173 PetscInt an=A->cmap.N,am=A->rmap.N,pn=P->cmap.N,pm=P->rmap.N,ppdof=pp->dof,cn; 2174 PetscInt i,j,k,dof,pshift,ptnzi,arow,anzj,ptanzi,prow,pnzj,cnzi; 2175 MatScalar *ca; 2176 2177 PetscFunctionBegin; 2178 /* Start timer */ 2179 ierr = PetscLogEventBegin(MAT_PtAPSymbolic,A,PP,0,0);CHKERRQ(ierr); 2180 2181 /* Get ij structure of P^T */ 2182 ierr = MatGetSymbolicTranspose_SeqAIJ(P,&pti,&ptj);CHKERRQ(ierr); 2183 2184 cn = pn*ppdof; 2185 /* Allocate ci array, arrays for fill computation and */ 2186 /* free space for accumulating nonzero column info */ 2187 ierr = PetscMalloc((cn+1)*sizeof(PetscInt),&ci);CHKERRQ(ierr); 2188 ci[0] = 0; 2189 2190 /* Work arrays for rows of P^T*A */ 2191 ierr = PetscMalloc((2*cn+2*an+1)*sizeof(PetscInt),&ptadenserow);CHKERRQ(ierr); 2192 ierr = PetscMemzero(ptadenserow,(2*cn+2*an+1)*sizeof(PetscInt));CHKERRQ(ierr); 2193 ptasparserow = ptadenserow + an; 2194 denserow = ptasparserow + an; 2195 sparserow = denserow + cn; 2196 2197 /* Set initial free space to be nnz(A) scaled by aspect ratio of P. */ 2198 /* This should be reasonable if sparsity of PtAP is similar to that of A. */ 2199 /* Note, aspect ratio of P is the same as the aspect ratio of SeqAIJ inside P */ 2200 ierr = PetscFreeSpaceGet((ai[am]/pm)*pn,&free_space); 2201 current_space = free_space; 2202 2203 /* Determine symbolic info for each row of C: */ 2204 for (i=0;i<pn;i++) { 2205 ptnzi = pti[i+1] - pti[i]; 2206 ptJ = ptj + pti[i]; 2207 for (dof=0;dof<ppdof;dof++) { 2208 ptanzi = 0; 2209 /* Determine symbolic row of PtA: */ 2210 for (j=0;j<ptnzi;j++) { 2211 /* Expand ptJ[j] by block size and shift by dof to get the right row of A */ 2212 arow = ptJ[j]*ppdof + dof; 2213 /* Nonzeros of P^T*A will be in same locations as any element of A in that row */ 2214 anzj = ai[arow+1] - ai[arow]; 2215 ajj = aj + ai[arow]; 2216 for (k=0;k<anzj;k++) { 2217 if (!ptadenserow[ajj[k]]) { 2218 ptadenserow[ajj[k]] = -1; 2219 ptasparserow[ptanzi++] = ajj[k]; 2220 } 2221 } 2222 } 2223 /* Using symbolic info for row of PtA, determine symbolic info for row of C: */ 2224 ptaj = ptasparserow; 2225 cnzi = 0; 2226 for (j=0;j<ptanzi;j++) { 2227 /* Get offset within block of P */ 2228 pshift = *ptaj%ppdof; 2229 /* Get block row of P */ 2230 prow = (*ptaj++)/ppdof; /* integer division */ 2231 /* P has same number of nonzeros per row as the compressed form */ 2232 pnzj = pi[prow+1] - pi[prow]; 2233 pjj = pj + pi[prow]; 2234 for (k=0;k<pnzj;k++) { 2235 /* Locations in C are shifted by the offset within the block */ 2236 /* Note: we cannot use PetscLLAdd here because of the additional offset for the write location */ 2237 if (!denserow[pjj[k]*ppdof+pshift]) { 2238 denserow[pjj[k]*ppdof+pshift] = -1; 2239 sparserow[cnzi++] = pjj[k]*ppdof+pshift; 2240 } 2241 } 2242 } 2243 2244 /* sort sparserow */ 2245 ierr = PetscSortInt(cnzi,sparserow);CHKERRQ(ierr); 2246 2247 /* If free space is not available, make more free space */ 2248 /* Double the amount of total space in the list */ 2249 if (current_space->local_remaining<cnzi) { 2250 ierr = PetscFreeSpaceGet(current_space->total_array_size,¤t_space);CHKERRQ(ierr); 2251 } 2252 2253 /* Copy data into free space, and zero out denserows */ 2254 ierr = PetscMemcpy(current_space->array,sparserow,cnzi*sizeof(PetscInt));CHKERRQ(ierr); 2255 current_space->array += cnzi; 2256 current_space->local_used += cnzi; 2257 current_space->local_remaining -= cnzi; 2258 2259 for (j=0;j<ptanzi;j++) { 2260 ptadenserow[ptasparserow[j]] = 0; 2261 } 2262 for (j=0;j<cnzi;j++) { 2263 denserow[sparserow[j]] = 0; 2264 } 2265 /* Aside: Perhaps we should save the pta info for the numerical factorization. */ 2266 /* For now, we will recompute what is needed. */ 2267 ci[i*ppdof+1+dof] = ci[i*ppdof+dof] + cnzi; 2268 } 2269 } 2270 /* nnz is now stored in ci[ptm], column indices are in the list of free space */ 2271 /* Allocate space for cj, initialize cj, and */ 2272 /* destroy list of free space and other temporary array(s) */ 2273 ierr = PetscMalloc((ci[cn]+1)*sizeof(PetscInt),&cj);CHKERRQ(ierr); 2274 ierr = PetscFreeSpaceContiguous(&free_space,cj);CHKERRQ(ierr); 2275 ierr = PetscFree(ptadenserow);CHKERRQ(ierr); 2276 2277 /* Allocate space for ca */ 2278 ierr = PetscMalloc((ci[cn]+1)*sizeof(MatScalar),&ca);CHKERRQ(ierr); 2279 ierr = PetscMemzero(ca,(ci[cn]+1)*sizeof(MatScalar));CHKERRQ(ierr); 2280 2281 /* put together the new matrix */ 2282 ierr = MatCreateSeqAIJWithArrays(A->comm,cn,cn,ci,cj,ca,C);CHKERRQ(ierr); 2283 2284 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 2285 /* Since these are PETSc arrays, change flags to free them as necessary. */ 2286 c = (Mat_SeqAIJ *)((*C)->data); 2287 c->free_a = PETSC_TRUE; 2288 c->free_ij = PETSC_TRUE; 2289 c->nonew = 0; 2290 2291 /* Clean up. */ 2292 ierr = MatRestoreSymbolicTranspose_SeqAIJ(P,&pti,&ptj);CHKERRQ(ierr); 2293 2294 ierr = PetscLogEventEnd(MAT_PtAPSymbolic,A,PP,0,0);CHKERRQ(ierr); 2295 PetscFunctionReturn(0); 2296 } 2297 2298 #undef __FUNCT__ 2299 #define __FUNCT__ "MatPtAPNumeric_SeqAIJ_SeqMAIJ" 2300 PetscErrorCode MatPtAPNumeric_SeqAIJ_SeqMAIJ(Mat A,Mat PP,Mat C) 2301 { 2302 /* This routine requires testing -- first draft only */ 2303 PetscErrorCode ierr; 2304 PetscInt flops=0; 2305 Mat_SeqMAIJ *pp=(Mat_SeqMAIJ*)PP->data; 2306 Mat P=pp->AIJ; 2307 Mat_SeqAIJ *a = (Mat_SeqAIJ *) A->data; 2308 Mat_SeqAIJ *p = (Mat_SeqAIJ *) P->data; 2309 Mat_SeqAIJ *c = (Mat_SeqAIJ *) C->data; 2310 PetscInt *ai=a->i,*aj=a->j,*apj,*apjdense,*pi=p->i,*pj=p->j,*pJ=p->j,*pjj; 2311 PetscInt *ci=c->i,*cj=c->j,*cjj; 2312 PetscInt am=A->rmap.N,cn=C->cmap.N,cm=C->rmap.N,ppdof=pp->dof; 2313 PetscInt i,j,k,pshift,poffset,anzi,pnzi,apnzj,nextap,pnzj,prow,crow; 2314 MatScalar *aa=a->a,*apa,*pa=p->a,*pA=p->a,*paj,*ca=c->a,*caj; 2315 2316 PetscFunctionBegin; 2317 /* Allocate temporary array for storage of one row of A*P */ 2318 ierr = PetscMalloc(cn*(sizeof(MatScalar)+2*sizeof(PetscInt)),&apa);CHKERRQ(ierr); 2319 ierr = PetscMemzero(apa,cn*(sizeof(MatScalar)+2*sizeof(PetscInt)));CHKERRQ(ierr); 2320 2321 apj = (PetscInt *)(apa + cn); 2322 apjdense = apj + cn; 2323 2324 /* Clear old values in C */ 2325 ierr = PetscMemzero(ca,ci[cm]*sizeof(MatScalar));CHKERRQ(ierr); 2326 2327 for (i=0;i<am;i++) { 2328 /* Form sparse row of A*P */ 2329 anzi = ai[i+1] - ai[i]; 2330 apnzj = 0; 2331 for (j=0;j<anzi;j++) { 2332 /* Get offset within block of P */ 2333 pshift = *aj%ppdof; 2334 /* Get block row of P */ 2335 prow = *aj++/ppdof; /* integer division */ 2336 pnzj = pi[prow+1] - pi[prow]; 2337 pjj = pj + pi[prow]; 2338 paj = pa + pi[prow]; 2339 for (k=0;k<pnzj;k++) { 2340 poffset = pjj[k]*ppdof+pshift; 2341 if (!apjdense[poffset]) { 2342 apjdense[poffset] = -1; 2343 apj[apnzj++] = poffset; 2344 } 2345 apa[poffset] += (*aa)*paj[k]; 2346 } 2347 flops += 2*pnzj; 2348 aa++; 2349 } 2350 2351 /* Sort the j index array for quick sparse axpy. */ 2352 /* Note: a array does not need sorting as it is in dense storage locations. */ 2353 ierr = PetscSortInt(apnzj,apj);CHKERRQ(ierr); 2354 2355 /* Compute P^T*A*P using outer product (P^T)[:,j]*(A*P)[j,:]. */ 2356 prow = i/ppdof; /* integer division */ 2357 pshift = i%ppdof; 2358 poffset = pi[prow]; 2359 pnzi = pi[prow+1] - poffset; 2360 /* Reset pJ and pA so we can traverse the same row of P 'dof' times. */ 2361 pJ = pj+poffset; 2362 pA = pa+poffset; 2363 for (j=0;j<pnzi;j++) { 2364 crow = (*pJ)*ppdof+pshift; 2365 cjj = cj + ci[crow]; 2366 caj = ca + ci[crow]; 2367 pJ++; 2368 /* Perform sparse axpy operation. Note cjj includes apj. */ 2369 for (k=0,nextap=0;nextap<apnzj;k++) { 2370 if (cjj[k]==apj[nextap]) { 2371 caj[k] += (*pA)*apa[apj[nextap++]]; 2372 } 2373 } 2374 flops += 2*apnzj; 2375 pA++; 2376 } 2377 2378 /* Zero the current row info for A*P */ 2379 for (j=0;j<apnzj;j++) { 2380 apa[apj[j]] = 0.; 2381 apjdense[apj[j]] = 0; 2382 } 2383 } 2384 2385 /* Assemble the final matrix and clean up */ 2386 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2387 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2388 ierr = PetscFree(apa);CHKERRQ(ierr); 2389 ierr = PetscLogFlops(flops);CHKERRQ(ierr); 2390 2391 PetscFunctionReturn(0); 2392 } 2393 2394 EXTERN_C_BEGIN 2395 #undef __FUNCT__ 2396 #define __FUNCT__ "MatConvert_SeqMAIJ_SeqAIJ" 2397 PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_SeqMAIJ_SeqAIJ(Mat A, MatType newtype,MatReuse reuse,Mat *newmat) 2398 { 2399 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 2400 Mat a = b->AIJ,B; 2401 Mat_SeqAIJ *aij = (Mat_SeqAIJ*)a->data; 2402 PetscErrorCode ierr; 2403 PetscInt m,n,i,ncols,*ilen,nmax = 0,*icols,j,k,ii,dof = b->dof; 2404 PetscInt *cols; 2405 PetscScalar *vals; 2406 2407 PetscFunctionBegin; 2408 ierr = MatGetSize(a,&m,&n);CHKERRQ(ierr); 2409 ierr = PetscMalloc(dof*m*sizeof(PetscInt),&ilen);CHKERRQ(ierr); 2410 for (i=0; i<m; i++) { 2411 nmax = PetscMax(nmax,aij->ilen[i]); 2412 for (j=0; j<dof; j++) { 2413 ilen[dof*i+j] = aij->ilen[i]; 2414 } 2415 } 2416 ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,dof*m,dof*n,0,ilen,&B);CHKERRQ(ierr); 2417 ierr = MatSetOption(B,MAT_COLUMNS_SORTED);CHKERRQ(ierr); 2418 ierr = PetscFree(ilen);CHKERRQ(ierr); 2419 ierr = PetscMalloc(nmax*sizeof(PetscInt),&icols);CHKERRQ(ierr); 2420 ii = 0; 2421 for (i=0; i<m; i++) { 2422 ierr = MatGetRow_SeqAIJ(a,i,&ncols,&cols,&vals);CHKERRQ(ierr); 2423 for (j=0; j<dof; j++) { 2424 for (k=0; k<ncols; k++) { 2425 icols[k] = dof*cols[k]+j; 2426 } 2427 ierr = MatSetValues_SeqAIJ(B,1,&ii,ncols,icols,vals,INSERT_VALUES);CHKERRQ(ierr); 2428 ii++; 2429 } 2430 ierr = MatRestoreRow_SeqAIJ(a,i,&ncols,&cols,&vals);CHKERRQ(ierr); 2431 } 2432 ierr = PetscFree(icols);CHKERRQ(ierr); 2433 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2434 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2435 2436 if (reuse == MAT_REUSE_MATRIX) { 2437 ierr = MatHeaderReplace(A,B);CHKERRQ(ierr); 2438 } else { 2439 *newmat = B; 2440 } 2441 PetscFunctionReturn(0); 2442 } 2443 EXTERN_C_END 2444 2445 #include "src/mat/impls/aij/mpi/mpiaij.h" 2446 2447 EXTERN_C_BEGIN 2448 #undef __FUNCT__ 2449 #define __FUNCT__ "MatConvert_MPIMAIJ_MPIAIJ" 2450 PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_MPIMAIJ_MPIAIJ(Mat A, MatType newtype,MatReuse reuse,Mat *newmat) 2451 { 2452 Mat_MPIMAIJ *maij = (Mat_MPIMAIJ*)A->data; 2453 Mat MatAIJ = ((Mat_SeqMAIJ*)maij->AIJ->data)->AIJ,B; 2454 Mat MatOAIJ = ((Mat_SeqMAIJ*)maij->OAIJ->data)->AIJ; 2455 Mat_SeqAIJ *AIJ = (Mat_SeqAIJ*) MatAIJ->data; 2456 Mat_SeqAIJ *OAIJ =(Mat_SeqAIJ*) MatOAIJ->data; 2457 Mat_MPIAIJ *mpiaij = (Mat_MPIAIJ*) maij->A->data; 2458 PetscInt dof = maij->dof,i,j,*dnz = PETSC_NULL,*onz = PETSC_NULL,nmax = 0,onmax = 0; 2459 PetscInt *oicols = PETSC_NULL,*icols = PETSC_NULL,ncols,*cols = PETSC_NULL,oncols,*ocols = PETSC_NULL; 2460 PetscInt rstart,cstart,*garray,ii,k; 2461 PetscErrorCode ierr; 2462 PetscScalar *vals,*ovals; 2463 2464 PetscFunctionBegin; 2465 ierr = PetscMalloc2(A->rmap.n,PetscInt,&dnz,A->rmap.n,PetscInt,&onz);CHKERRQ(ierr); 2466 for (i=0; i<A->rmap.n/dof; i++) { 2467 nmax = PetscMax(nmax,AIJ->ilen[i]); 2468 onmax = PetscMax(onmax,OAIJ->ilen[i]); 2469 for (j=0; j<dof; j++) { 2470 dnz[dof*i+j] = AIJ->ilen[i]; 2471 onz[dof*i+j] = OAIJ->ilen[i]; 2472 } 2473 } 2474 ierr = MatCreateMPIAIJ(A->comm,A->rmap.n,A->cmap.n,A->rmap.N,A->cmap.N,0,dnz,0,onz,&B);CHKERRQ(ierr); 2475 ierr = MatSetOption(B,MAT_COLUMNS_SORTED);CHKERRQ(ierr); 2476 ierr = PetscFree2(dnz,onz);CHKERRQ(ierr); 2477 2478 ierr = PetscMalloc2(nmax,PetscInt,&icols,onmax,PetscInt,&oicols);CHKERRQ(ierr); 2479 rstart = dof*maij->A->rmap.rstart; 2480 cstart = dof*maij->A->cmap.rstart; 2481 garray = mpiaij->garray; 2482 2483 ii = rstart; 2484 for (i=0; i<A->rmap.n/dof; i++) { 2485 ierr = MatGetRow_SeqAIJ(MatAIJ,i,&ncols,&cols,&vals);CHKERRQ(ierr); 2486 ierr = MatGetRow_SeqAIJ(MatOAIJ,i,&oncols,&ocols,&ovals);CHKERRQ(ierr); 2487 for (j=0; j<dof; j++) { 2488 for (k=0; k<ncols; k++) { 2489 icols[k] = cstart + dof*cols[k]+j; 2490 } 2491 for (k=0; k<oncols; k++) { 2492 oicols[k] = dof*garray[ocols[k]]+j; 2493 } 2494 ierr = MatSetValues_MPIAIJ(B,1,&ii,ncols,icols,vals,INSERT_VALUES);CHKERRQ(ierr); 2495 ierr = MatSetValues_MPIAIJ(B,1,&ii,oncols,oicols,ovals,INSERT_VALUES);CHKERRQ(ierr); 2496 ii++; 2497 } 2498 ierr = MatRestoreRow_SeqAIJ(MatAIJ,i,&ncols,&cols,&vals);CHKERRQ(ierr); 2499 ierr = MatRestoreRow_SeqAIJ(MatOAIJ,i,&oncols,&ocols,&ovals);CHKERRQ(ierr); 2500 } 2501 ierr = PetscFree2(icols,oicols);CHKERRQ(ierr); 2502 2503 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2504 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2505 2506 if (reuse == MAT_REUSE_MATRIX) { 2507 ierr = MatHeaderReplace(A,B);CHKERRQ(ierr); 2508 } else { 2509 *newmat = B; 2510 } 2511 PetscFunctionReturn(0); 2512 } 2513 EXTERN_C_END 2514 2515 2516 /* ---------------------------------------------------------------------------------- */ 2517 /*MC 2518 MatCreateMAIJ - Creates a matrix type providing restriction and interpolation 2519 operations for multicomponent problems. It interpolates each component the same 2520 way independently. The matrix type is based on MATSEQAIJ for sequential matrices, 2521 and MATMPIAIJ for distributed matrices. 2522 2523 Operations provided: 2524 + MatMult 2525 . MatMultTranspose 2526 . MatMultAdd 2527 . MatMultTransposeAdd 2528 - MatView 2529 2530 Level: advanced 2531 2532 M*/ 2533 #undef __FUNCT__ 2534 #define __FUNCT__ "MatCreateMAIJ" 2535 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMAIJ(Mat A,PetscInt dof,Mat *maij) 2536 { 2537 PetscErrorCode ierr; 2538 PetscMPIInt size; 2539 PetscInt n; 2540 Mat_MPIMAIJ *b; 2541 Mat B; 2542 2543 PetscFunctionBegin; 2544 ierr = PetscObjectReference((PetscObject)A);CHKERRQ(ierr); 2545 2546 if (dof == 1) { 2547 *maij = A; 2548 } else { 2549 ierr = MatCreate(A->comm,&B);CHKERRQ(ierr); 2550 ierr = MatSetSizes(B,dof*A->rmap.n,dof*A->cmap.n,dof*A->rmap.N,dof*A->cmap.N);CHKERRQ(ierr); 2551 B->assembled = PETSC_TRUE; 2552 2553 ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr); 2554 if (size == 1) { 2555 ierr = MatSetType(B,MATSEQMAIJ);CHKERRQ(ierr); 2556 B->ops->destroy = MatDestroy_SeqMAIJ; 2557 B->ops->view = MatView_SeqMAIJ; 2558 b = (Mat_MPIMAIJ*)B->data; 2559 b->dof = dof; 2560 b->AIJ = A; 2561 if (dof == 2) { 2562 B->ops->mult = MatMult_SeqMAIJ_2; 2563 B->ops->multadd = MatMultAdd_SeqMAIJ_2; 2564 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_2; 2565 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_2; 2566 } else if (dof == 3) { 2567 B->ops->mult = MatMult_SeqMAIJ_3; 2568 B->ops->multadd = MatMultAdd_SeqMAIJ_3; 2569 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_3; 2570 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_3; 2571 } else if (dof == 4) { 2572 B->ops->mult = MatMult_SeqMAIJ_4; 2573 B->ops->multadd = MatMultAdd_SeqMAIJ_4; 2574 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_4; 2575 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_4; 2576 } else if (dof == 5) { 2577 B->ops->mult = MatMult_SeqMAIJ_5; 2578 B->ops->multadd = MatMultAdd_SeqMAIJ_5; 2579 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_5; 2580 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_5; 2581 } else if (dof == 6) { 2582 B->ops->mult = MatMult_SeqMAIJ_6; 2583 B->ops->multadd = MatMultAdd_SeqMAIJ_6; 2584 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_6; 2585 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_6; 2586 } else if (dof == 7) { 2587 B->ops->mult = MatMult_SeqMAIJ_7; 2588 B->ops->multadd = MatMultAdd_SeqMAIJ_7; 2589 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_7; 2590 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_7; 2591 } else if (dof == 8) { 2592 B->ops->mult = MatMult_SeqMAIJ_8; 2593 B->ops->multadd = MatMultAdd_SeqMAIJ_8; 2594 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_8; 2595 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_8; 2596 } else if (dof == 9) { 2597 B->ops->mult = MatMult_SeqMAIJ_9; 2598 B->ops->multadd = MatMultAdd_SeqMAIJ_9; 2599 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_9; 2600 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_9; 2601 } else if (dof == 10) { 2602 B->ops->mult = MatMult_SeqMAIJ_10; 2603 B->ops->multadd = MatMultAdd_SeqMAIJ_10; 2604 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_10; 2605 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_10; 2606 } else if (dof == 16) { 2607 B->ops->mult = MatMult_SeqMAIJ_16; 2608 B->ops->multadd = MatMultAdd_SeqMAIJ_16; 2609 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_16; 2610 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_16; 2611 } else { 2612 SETERRQ1(PETSC_ERR_SUP,"Cannot handle a dof of %D. Send request for code to petsc-maint@mcs.anl.gov\n",dof); 2613 } 2614 B->ops->ptapsymbolic_seqaij = MatPtAPSymbolic_SeqAIJ_SeqMAIJ; 2615 B->ops->ptapnumeric_seqaij = MatPtAPNumeric_SeqAIJ_SeqMAIJ; 2616 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqmaij_seqaij_C","MatConvert_SeqMAIJ_SeqAIJ",MatConvert_SeqMAIJ_SeqAIJ);CHKERRQ(ierr); 2617 } else { 2618 Mat_MPIAIJ *mpiaij = (Mat_MPIAIJ *)A->data; 2619 IS from,to; 2620 Vec gvec; 2621 PetscInt *garray,i; 2622 2623 ierr = MatSetType(B,MATMPIMAIJ);CHKERRQ(ierr); 2624 B->ops->destroy = MatDestroy_MPIMAIJ; 2625 B->ops->view = MatView_MPIMAIJ; 2626 b = (Mat_MPIMAIJ*)B->data; 2627 b->dof = dof; 2628 b->A = A; 2629 ierr = MatCreateMAIJ(mpiaij->A,dof,&b->AIJ);CHKERRQ(ierr); 2630 ierr = MatCreateMAIJ(mpiaij->B,dof,&b->OAIJ);CHKERRQ(ierr); 2631 2632 ierr = VecGetSize(mpiaij->lvec,&n);CHKERRQ(ierr); 2633 ierr = VecCreateSeq(PETSC_COMM_SELF,n*dof,&b->w);CHKERRQ(ierr); 2634 ierr = VecSetBlockSize(b->w,dof);CHKERRQ(ierr); 2635 2636 /* create two temporary Index sets for build scatter gather */ 2637 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&garray);CHKERRQ(ierr); 2638 for (i=0; i<n; i++) garray[i] = dof*mpiaij->garray[i]; 2639 ierr = ISCreateBlock(A->comm,dof,n,garray,&from);CHKERRQ(ierr); 2640 ierr = PetscFree(garray);CHKERRQ(ierr); 2641 ierr = ISCreateStride(PETSC_COMM_SELF,n*dof,0,1,&to);CHKERRQ(ierr); 2642 2643 /* create temporary global vector to generate scatter context */ 2644 ierr = VecCreateMPI(A->comm,dof*A->cmap.n,dof*A->cmap.N,&gvec);CHKERRQ(ierr); 2645 ierr = VecSetBlockSize(gvec,dof);CHKERRQ(ierr); 2646 2647 /* generate the scatter context */ 2648 ierr = VecScatterCreate(gvec,from,b->w,to,&b->ctx);CHKERRQ(ierr); 2649 2650 ierr = ISDestroy(from);CHKERRQ(ierr); 2651 ierr = ISDestroy(to);CHKERRQ(ierr); 2652 ierr = VecDestroy(gvec);CHKERRQ(ierr); 2653 2654 B->ops->mult = MatMult_MPIMAIJ_dof; 2655 B->ops->multtranspose = MatMultTranspose_MPIMAIJ_dof; 2656 B->ops->multadd = MatMultAdd_MPIMAIJ_dof; 2657 B->ops->multtransposeadd = MatMultTransposeAdd_MPIMAIJ_dof; 2658 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpimaij_mpiaij_C","MatConvert_MPIMAIJ_MPIAIJ",MatConvert_MPIMAIJ_MPIAIJ);CHKERRQ(ierr); 2659 } 2660 *maij = B; 2661 ierr = MatView_Private(B);CHKERRQ(ierr); 2662 } 2663 PetscFunctionReturn(0); 2664 } 2665 2666 2667 2668 2669 2670 2671 2672 2673 2674 2675 2676 2677