1 #define PETSCMAT_DLL 2 3 /* 4 Defines the basic matrix operations for the MAIJ matrix storage format. 5 This format is used for restriction and interpolation operations for 6 multicomponent problems. It interpolates each component the same way 7 independently. 8 9 We provide: 10 MatMult() 11 MatMultTranspose() 12 MatMultTransposeAdd() 13 MatMultAdd() 14 and 15 MatCreateMAIJ(Mat,dof,Mat*) 16 17 This single directory handles both the sequential and parallel codes 18 */ 19 20 #include "src/mat/impls/maij/maij.h" 21 #include "src/mat/utils/freespace.h" 22 #include "private/vecimpl.h" 23 24 #undef __FUNCT__ 25 #define __FUNCT__ "MatMAIJGetAIJ" 26 PetscErrorCode PETSCMAT_DLLEXPORT MatMAIJGetAIJ(Mat A,Mat *B) 27 { 28 PetscErrorCode ierr; 29 PetscTruth ismpimaij,isseqmaij; 30 31 PetscFunctionBegin; 32 ierr = PetscTypeCompare((PetscObject)A,MATMPIMAIJ,&ismpimaij);CHKERRQ(ierr); 33 ierr = PetscTypeCompare((PetscObject)A,MATSEQMAIJ,&isseqmaij);CHKERRQ(ierr); 34 if (ismpimaij) { 35 Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data; 36 37 *B = b->A; 38 } else if (isseqmaij) { 39 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 40 41 *B = b->AIJ; 42 } else { 43 *B = A; 44 } 45 PetscFunctionReturn(0); 46 } 47 48 #undef __FUNCT__ 49 #define __FUNCT__ "MatMAIJRedimension" 50 PetscErrorCode PETSCMAT_DLLEXPORT MatMAIJRedimension(Mat A,PetscInt dof,Mat *B) 51 { 52 PetscErrorCode ierr; 53 Mat Aij; 54 55 PetscFunctionBegin; 56 ierr = MatMAIJGetAIJ(A,&Aij);CHKERRQ(ierr); 57 ierr = MatCreateMAIJ(Aij,dof,B);CHKERRQ(ierr); 58 PetscFunctionReturn(0); 59 } 60 61 #undef __FUNCT__ 62 #define __FUNCT__ "MatDestroy_SeqMAIJ" 63 PetscErrorCode MatDestroy_SeqMAIJ(Mat A) 64 { 65 PetscErrorCode ierr; 66 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 67 68 PetscFunctionBegin; 69 if (b->AIJ) { 70 ierr = MatDestroy(b->AIJ);CHKERRQ(ierr); 71 } 72 ierr = PetscFree(b);CHKERRQ(ierr); 73 PetscFunctionReturn(0); 74 } 75 76 #undef __FUNCT__ 77 #define __FUNCT__ "MatView_SeqMAIJ" 78 PetscErrorCode MatView_SeqMAIJ(Mat A,PetscViewer viewer) 79 { 80 PetscErrorCode ierr; 81 Mat B; 82 83 PetscFunctionBegin; 84 ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B); 85 ierr = MatView(B,viewer);CHKERRQ(ierr); 86 ierr = MatDestroy(B);CHKERRQ(ierr); 87 PetscFunctionReturn(0); 88 } 89 90 #undef __FUNCT__ 91 #define __FUNCT__ "MatView_MPIMAIJ" 92 PetscErrorCode MatView_MPIMAIJ(Mat A,PetscViewer viewer) 93 { 94 PetscErrorCode ierr; 95 Mat B; 96 97 PetscFunctionBegin; 98 ierr = MatConvert(A,MATMPIAIJ,MAT_INITIAL_MATRIX,&B); 99 ierr = MatView(B,viewer);CHKERRQ(ierr); 100 ierr = MatDestroy(B);CHKERRQ(ierr); 101 PetscFunctionReturn(0); 102 } 103 104 #undef __FUNCT__ 105 #define __FUNCT__ "MatDestroy_MPIMAIJ" 106 PetscErrorCode MatDestroy_MPIMAIJ(Mat A) 107 { 108 PetscErrorCode ierr; 109 Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data; 110 111 PetscFunctionBegin; 112 if (b->AIJ) { 113 ierr = MatDestroy(b->AIJ);CHKERRQ(ierr); 114 } 115 if (b->OAIJ) { 116 ierr = MatDestroy(b->OAIJ);CHKERRQ(ierr); 117 } 118 if (b->A) { 119 ierr = MatDestroy(b->A);CHKERRQ(ierr); 120 } 121 if (b->ctx) { 122 ierr = VecScatterDestroy(b->ctx);CHKERRQ(ierr); 123 } 124 if (b->w) { 125 ierr = VecDestroy(b->w);CHKERRQ(ierr); 126 } 127 ierr = PetscFree(b);CHKERRQ(ierr); 128 PetscFunctionReturn(0); 129 } 130 131 /*MC 132 MATMAIJ - MATMAIJ = "maij" - A matrix type to be used for restriction and interpolation operations for 133 multicomponent problems, interpolating or restricting each component the same way independently. 134 The matrix type is based on MATSEQAIJ for sequential matrices, and MATMPIAIJ for distributed matrices. 135 136 Operations provided: 137 . MatMult 138 . MatMultTranspose 139 . MatMultAdd 140 . MatMultTransposeAdd 141 142 Level: advanced 143 144 .seealso: MatCreateSeqDense 145 M*/ 146 147 EXTERN_C_BEGIN 148 #undef __FUNCT__ 149 #define __FUNCT__ "MatCreate_MAIJ" 150 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_MAIJ(Mat A) 151 { 152 PetscErrorCode ierr; 153 Mat_MPIMAIJ *b; 154 PetscMPIInt size; 155 156 PetscFunctionBegin; 157 ierr = PetscNew(Mat_MPIMAIJ,&b);CHKERRQ(ierr); 158 A->data = (void*)b; 159 ierr = PetscMemzero(A->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 160 A->factor = 0; 161 A->mapping = 0; 162 163 b->AIJ = 0; 164 b->dof = 0; 165 b->OAIJ = 0; 166 b->ctx = 0; 167 b->w = 0; 168 ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr); 169 if (size == 1){ 170 ierr = PetscObjectChangeTypeName((PetscObject)A,MATSEQMAIJ);CHKERRQ(ierr); 171 } else { 172 ierr = PetscObjectChangeTypeName((PetscObject)A,MATMPIMAIJ);CHKERRQ(ierr); 173 } 174 PetscFunctionReturn(0); 175 } 176 EXTERN_C_END 177 178 /* --------------------------------------------------------------------------------------*/ 179 #undef __FUNCT__ 180 #define __FUNCT__ "MatMult_SeqMAIJ_2" 181 PetscErrorCode MatMult_SeqMAIJ_2(Mat A,Vec xx,Vec yy) 182 { 183 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 184 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 185 PetscScalar *x,*y,*v,sum1, sum2; 186 PetscErrorCode ierr; 187 PetscInt m = b->AIJ->rmap.n,*idx,*ii; 188 PetscInt n,i,jrow,j; 189 190 PetscFunctionBegin; 191 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 192 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 193 idx = a->j; 194 v = a->a; 195 ii = a->i; 196 197 for (i=0; i<m; i++) { 198 jrow = ii[i]; 199 n = ii[i+1] - jrow; 200 sum1 = 0.0; 201 sum2 = 0.0; 202 for (j=0; j<n; j++) { 203 sum1 += v[jrow]*x[2*idx[jrow]]; 204 sum2 += v[jrow]*x[2*idx[jrow]+1]; 205 jrow++; 206 } 207 y[2*i] = sum1; 208 y[2*i+1] = sum2; 209 } 210 211 ierr = PetscLogFlops(4*a->nz - 2*m);CHKERRQ(ierr); 212 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 213 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 214 PetscFunctionReturn(0); 215 } 216 217 #undef __FUNCT__ 218 #define __FUNCT__ "MatMultTranspose_SeqMAIJ_2" 219 PetscErrorCode MatMultTranspose_SeqMAIJ_2(Mat A,Vec xx,Vec yy) 220 { 221 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 222 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 223 PetscScalar *x,*y,*v,alpha1,alpha2,zero = 0.0; 224 PetscErrorCode ierr; 225 PetscInt m = b->AIJ->rmap.n,n,i,*idx; 226 227 PetscFunctionBegin; 228 ierr = VecSet(yy,zero);CHKERRQ(ierr); 229 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 230 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 231 232 for (i=0; i<m; i++) { 233 idx = a->j + a->i[i] ; 234 v = a->a + a->i[i] ; 235 n = a->i[i+1] - a->i[i]; 236 alpha1 = x[2*i]; 237 alpha2 = x[2*i+1]; 238 while (n-->0) {y[2*(*idx)] += alpha1*(*v); y[2*(*idx)+1] += alpha2*(*v); idx++; v++;} 239 } 240 ierr = PetscLogFlops(4*a->nz - 2*b->AIJ->cmap.n);CHKERRQ(ierr); 241 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 242 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 243 PetscFunctionReturn(0); 244 } 245 246 #undef __FUNCT__ 247 #define __FUNCT__ "MatMultAdd_SeqMAIJ_2" 248 PetscErrorCode MatMultAdd_SeqMAIJ_2(Mat A,Vec xx,Vec yy,Vec zz) 249 { 250 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 251 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 252 PetscScalar *x,*y,*v,sum1, sum2; 253 PetscErrorCode ierr; 254 PetscInt m = b->AIJ->rmap.n,*idx,*ii; 255 PetscInt n,i,jrow,j; 256 257 PetscFunctionBegin; 258 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 259 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 260 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 261 idx = a->j; 262 v = a->a; 263 ii = a->i; 264 265 for (i=0; i<m; i++) { 266 jrow = ii[i]; 267 n = ii[i+1] - jrow; 268 sum1 = 0.0; 269 sum2 = 0.0; 270 for (j=0; j<n; j++) { 271 sum1 += v[jrow]*x[2*idx[jrow]]; 272 sum2 += v[jrow]*x[2*idx[jrow]+1]; 273 jrow++; 274 } 275 y[2*i] += sum1; 276 y[2*i+1] += sum2; 277 } 278 279 ierr = PetscLogFlops(4*a->nz - 2*m);CHKERRQ(ierr); 280 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 281 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 282 PetscFunctionReturn(0); 283 } 284 #undef __FUNCT__ 285 #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_2" 286 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_2(Mat A,Vec xx,Vec yy,Vec zz) 287 { 288 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 289 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 290 PetscScalar *x,*y,*v,alpha1,alpha2; 291 PetscErrorCode ierr; 292 PetscInt m = b->AIJ->rmap.n,n,i,*idx; 293 294 PetscFunctionBegin; 295 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 296 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 297 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 298 299 for (i=0; i<m; i++) { 300 idx = a->j + a->i[i] ; 301 v = a->a + a->i[i] ; 302 n = a->i[i+1] - a->i[i]; 303 alpha1 = x[2*i]; 304 alpha2 = x[2*i+1]; 305 while (n-->0) {y[2*(*idx)] += alpha1*(*v); y[2*(*idx)+1] += alpha2*(*v); idx++; v++;} 306 } 307 ierr = PetscLogFlops(4*a->nz - 2*b->AIJ->cmap.n);CHKERRQ(ierr); 308 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 309 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 310 PetscFunctionReturn(0); 311 } 312 /* --------------------------------------------------------------------------------------*/ 313 #undef __FUNCT__ 314 #define __FUNCT__ "MatMult_SeqMAIJ_3" 315 PetscErrorCode MatMult_SeqMAIJ_3(Mat A,Vec xx,Vec yy) 316 { 317 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 318 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 319 PetscScalar *x,*y,*v,sum1, sum2, sum3; 320 PetscErrorCode ierr; 321 PetscInt m = b->AIJ->rmap.n,*idx,*ii; 322 PetscInt n,i,jrow,j; 323 324 PetscFunctionBegin; 325 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 326 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 327 idx = a->j; 328 v = a->a; 329 ii = a->i; 330 331 for (i=0; i<m; i++) { 332 jrow = ii[i]; 333 n = ii[i+1] - jrow; 334 sum1 = 0.0; 335 sum2 = 0.0; 336 sum3 = 0.0; 337 for (j=0; j<n; j++) { 338 sum1 += v[jrow]*x[3*idx[jrow]]; 339 sum2 += v[jrow]*x[3*idx[jrow]+1]; 340 sum3 += v[jrow]*x[3*idx[jrow]+2]; 341 jrow++; 342 } 343 y[3*i] = sum1; 344 y[3*i+1] = sum2; 345 y[3*i+2] = sum3; 346 } 347 348 ierr = PetscLogFlops(6*a->nz - 3*m);CHKERRQ(ierr); 349 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 350 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 351 PetscFunctionReturn(0); 352 } 353 354 #undef __FUNCT__ 355 #define __FUNCT__ "MatMultTranspose_SeqMAIJ_3" 356 PetscErrorCode MatMultTranspose_SeqMAIJ_3(Mat A,Vec xx,Vec yy) 357 { 358 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 359 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 360 PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,zero = 0.0; 361 PetscErrorCode ierr; 362 PetscInt m = b->AIJ->rmap.n,n,i,*idx; 363 364 PetscFunctionBegin; 365 ierr = VecSet(yy,zero);CHKERRQ(ierr); 366 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 367 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 368 369 for (i=0; i<m; i++) { 370 idx = a->j + a->i[i]; 371 v = a->a + a->i[i]; 372 n = a->i[i+1] - a->i[i]; 373 alpha1 = x[3*i]; 374 alpha2 = x[3*i+1]; 375 alpha3 = x[3*i+2]; 376 while (n-->0) { 377 y[3*(*idx)] += alpha1*(*v); 378 y[3*(*idx)+1] += alpha2*(*v); 379 y[3*(*idx)+2] += alpha3*(*v); 380 idx++; v++; 381 } 382 } 383 ierr = PetscLogFlops(6*a->nz - 3*b->AIJ->cmap.n);CHKERRQ(ierr); 384 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 385 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 386 PetscFunctionReturn(0); 387 } 388 389 #undef __FUNCT__ 390 #define __FUNCT__ "MatMultAdd_SeqMAIJ_3" 391 PetscErrorCode MatMultAdd_SeqMAIJ_3(Mat A,Vec xx,Vec yy,Vec zz) 392 { 393 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 394 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 395 PetscScalar *x,*y,*v,sum1, sum2, sum3; 396 PetscErrorCode ierr; 397 PetscInt m = b->AIJ->rmap.n,*idx,*ii; 398 PetscInt n,i,jrow,j; 399 400 PetscFunctionBegin; 401 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 402 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 403 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 404 idx = a->j; 405 v = a->a; 406 ii = a->i; 407 408 for (i=0; i<m; i++) { 409 jrow = ii[i]; 410 n = ii[i+1] - jrow; 411 sum1 = 0.0; 412 sum2 = 0.0; 413 sum3 = 0.0; 414 for (j=0; j<n; j++) { 415 sum1 += v[jrow]*x[3*idx[jrow]]; 416 sum2 += v[jrow]*x[3*idx[jrow]+1]; 417 sum3 += v[jrow]*x[3*idx[jrow]+2]; 418 jrow++; 419 } 420 y[3*i] += sum1; 421 y[3*i+1] += sum2; 422 y[3*i+2] += sum3; 423 } 424 425 ierr = PetscLogFlops(6*a->nz);CHKERRQ(ierr); 426 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 427 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 428 PetscFunctionReturn(0); 429 } 430 #undef __FUNCT__ 431 #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_3" 432 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_3(Mat A,Vec xx,Vec yy,Vec zz) 433 { 434 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 435 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 436 PetscScalar *x,*y,*v,alpha1,alpha2,alpha3; 437 PetscErrorCode ierr; 438 PetscInt m = b->AIJ->rmap.n,n,i,*idx; 439 440 PetscFunctionBegin; 441 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 442 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 443 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 444 for (i=0; i<m; i++) { 445 idx = a->j + a->i[i] ; 446 v = a->a + a->i[i] ; 447 n = a->i[i+1] - a->i[i]; 448 alpha1 = x[3*i]; 449 alpha2 = x[3*i+1]; 450 alpha3 = x[3*i+2]; 451 while (n-->0) { 452 y[3*(*idx)] += alpha1*(*v); 453 y[3*(*idx)+1] += alpha2*(*v); 454 y[3*(*idx)+2] += alpha3*(*v); 455 idx++; v++; 456 } 457 } 458 ierr = PetscLogFlops(6*a->nz);CHKERRQ(ierr); 459 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 460 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 461 PetscFunctionReturn(0); 462 } 463 464 /* ------------------------------------------------------------------------------*/ 465 #undef __FUNCT__ 466 #define __FUNCT__ "MatMult_SeqMAIJ_4" 467 PetscErrorCode MatMult_SeqMAIJ_4(Mat A,Vec xx,Vec yy) 468 { 469 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 470 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 471 PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4; 472 PetscErrorCode ierr; 473 PetscInt m = b->AIJ->rmap.n,*idx,*ii; 474 PetscInt n,i,jrow,j; 475 476 PetscFunctionBegin; 477 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 478 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 479 idx = a->j; 480 v = a->a; 481 ii = a->i; 482 483 for (i=0; i<m; i++) { 484 jrow = ii[i]; 485 n = ii[i+1] - jrow; 486 sum1 = 0.0; 487 sum2 = 0.0; 488 sum3 = 0.0; 489 sum4 = 0.0; 490 for (j=0; j<n; j++) { 491 sum1 += v[jrow]*x[4*idx[jrow]]; 492 sum2 += v[jrow]*x[4*idx[jrow]+1]; 493 sum3 += v[jrow]*x[4*idx[jrow]+2]; 494 sum4 += v[jrow]*x[4*idx[jrow]+3]; 495 jrow++; 496 } 497 y[4*i] = sum1; 498 y[4*i+1] = sum2; 499 y[4*i+2] = sum3; 500 y[4*i+3] = sum4; 501 } 502 503 ierr = PetscLogFlops(8*a->nz - 4*m);CHKERRQ(ierr); 504 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 505 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 506 PetscFunctionReturn(0); 507 } 508 509 #undef __FUNCT__ 510 #define __FUNCT__ "MatMultTranspose_SeqMAIJ_4" 511 PetscErrorCode MatMultTranspose_SeqMAIJ_4(Mat A,Vec xx,Vec yy) 512 { 513 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 514 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 515 PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4,zero = 0.0; 516 PetscErrorCode ierr; 517 PetscInt m = b->AIJ->rmap.n,n,i,*idx; 518 519 PetscFunctionBegin; 520 ierr = VecSet(yy,zero);CHKERRQ(ierr); 521 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 522 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 523 for (i=0; i<m; i++) { 524 idx = a->j + a->i[i] ; 525 v = a->a + a->i[i] ; 526 n = a->i[i+1] - a->i[i]; 527 alpha1 = x[4*i]; 528 alpha2 = x[4*i+1]; 529 alpha3 = x[4*i+2]; 530 alpha4 = x[4*i+3]; 531 while (n-->0) { 532 y[4*(*idx)] += alpha1*(*v); 533 y[4*(*idx)+1] += alpha2*(*v); 534 y[4*(*idx)+2] += alpha3*(*v); 535 y[4*(*idx)+3] += alpha4*(*v); 536 idx++; v++; 537 } 538 } 539 ierr = PetscLogFlops(8*a->nz - 4*b->AIJ->cmap.n);CHKERRQ(ierr); 540 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 541 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 542 PetscFunctionReturn(0); 543 } 544 545 #undef __FUNCT__ 546 #define __FUNCT__ "MatMultAdd_SeqMAIJ_4" 547 PetscErrorCode MatMultAdd_SeqMAIJ_4(Mat A,Vec xx,Vec yy,Vec zz) 548 { 549 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 550 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 551 PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4; 552 PetscErrorCode ierr; 553 PetscInt m = b->AIJ->rmap.n,*idx,*ii; 554 PetscInt n,i,jrow,j; 555 556 PetscFunctionBegin; 557 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 558 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 559 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 560 idx = a->j; 561 v = a->a; 562 ii = a->i; 563 564 for (i=0; i<m; i++) { 565 jrow = ii[i]; 566 n = ii[i+1] - jrow; 567 sum1 = 0.0; 568 sum2 = 0.0; 569 sum3 = 0.0; 570 sum4 = 0.0; 571 for (j=0; j<n; j++) { 572 sum1 += v[jrow]*x[4*idx[jrow]]; 573 sum2 += v[jrow]*x[4*idx[jrow]+1]; 574 sum3 += v[jrow]*x[4*idx[jrow]+2]; 575 sum4 += v[jrow]*x[4*idx[jrow]+3]; 576 jrow++; 577 } 578 y[4*i] += sum1; 579 y[4*i+1] += sum2; 580 y[4*i+2] += sum3; 581 y[4*i+3] += sum4; 582 } 583 584 ierr = PetscLogFlops(8*a->nz - 4*m);CHKERRQ(ierr); 585 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 586 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 587 PetscFunctionReturn(0); 588 } 589 #undef __FUNCT__ 590 #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_4" 591 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_4(Mat A,Vec xx,Vec yy,Vec zz) 592 { 593 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 594 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 595 PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4; 596 PetscErrorCode ierr; 597 PetscInt m = b->AIJ->rmap.n,n,i,*idx; 598 599 PetscFunctionBegin; 600 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 601 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 602 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 603 604 for (i=0; i<m; i++) { 605 idx = a->j + a->i[i] ; 606 v = a->a + a->i[i] ; 607 n = a->i[i+1] - a->i[i]; 608 alpha1 = x[4*i]; 609 alpha2 = x[4*i+1]; 610 alpha3 = x[4*i+2]; 611 alpha4 = x[4*i+3]; 612 while (n-->0) { 613 y[4*(*idx)] += alpha1*(*v); 614 y[4*(*idx)+1] += alpha2*(*v); 615 y[4*(*idx)+2] += alpha3*(*v); 616 y[4*(*idx)+3] += alpha4*(*v); 617 idx++; v++; 618 } 619 } 620 ierr = PetscLogFlops(8*a->nz - 4*b->AIJ->cmap.n);CHKERRQ(ierr); 621 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 622 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 623 PetscFunctionReturn(0); 624 } 625 /* ------------------------------------------------------------------------------*/ 626 627 #undef __FUNCT__ 628 #define __FUNCT__ "MatMult_SeqMAIJ_5" 629 PetscErrorCode MatMult_SeqMAIJ_5(Mat A,Vec xx,Vec yy) 630 { 631 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 632 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 633 PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4, sum5; 634 PetscErrorCode ierr; 635 PetscInt m = b->AIJ->rmap.n,*idx,*ii; 636 PetscInt n,i,jrow,j; 637 638 PetscFunctionBegin; 639 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 640 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 641 idx = a->j; 642 v = a->a; 643 ii = a->i; 644 645 for (i=0; i<m; i++) { 646 jrow = ii[i]; 647 n = ii[i+1] - jrow; 648 sum1 = 0.0; 649 sum2 = 0.0; 650 sum3 = 0.0; 651 sum4 = 0.0; 652 sum5 = 0.0; 653 for (j=0; j<n; j++) { 654 sum1 += v[jrow]*x[5*idx[jrow]]; 655 sum2 += v[jrow]*x[5*idx[jrow]+1]; 656 sum3 += v[jrow]*x[5*idx[jrow]+2]; 657 sum4 += v[jrow]*x[5*idx[jrow]+3]; 658 sum5 += v[jrow]*x[5*idx[jrow]+4]; 659 jrow++; 660 } 661 y[5*i] = sum1; 662 y[5*i+1] = sum2; 663 y[5*i+2] = sum3; 664 y[5*i+3] = sum4; 665 y[5*i+4] = sum5; 666 } 667 668 ierr = PetscLogFlops(10*a->nz - 5*m);CHKERRQ(ierr); 669 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 670 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 671 PetscFunctionReturn(0); 672 } 673 674 #undef __FUNCT__ 675 #define __FUNCT__ "MatMultTranspose_SeqMAIJ_5" 676 PetscErrorCode MatMultTranspose_SeqMAIJ_5(Mat A,Vec xx,Vec yy) 677 { 678 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 679 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 680 PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,zero = 0.0; 681 PetscErrorCode ierr; 682 PetscInt m = b->AIJ->rmap.n,n,i,*idx; 683 684 PetscFunctionBegin; 685 ierr = VecSet(yy,zero);CHKERRQ(ierr); 686 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 687 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 688 689 for (i=0; i<m; i++) { 690 idx = a->j + a->i[i] ; 691 v = a->a + a->i[i] ; 692 n = a->i[i+1] - a->i[i]; 693 alpha1 = x[5*i]; 694 alpha2 = x[5*i+1]; 695 alpha3 = x[5*i+2]; 696 alpha4 = x[5*i+3]; 697 alpha5 = x[5*i+4]; 698 while (n-->0) { 699 y[5*(*idx)] += alpha1*(*v); 700 y[5*(*idx)+1] += alpha2*(*v); 701 y[5*(*idx)+2] += alpha3*(*v); 702 y[5*(*idx)+3] += alpha4*(*v); 703 y[5*(*idx)+4] += alpha5*(*v); 704 idx++; v++; 705 } 706 } 707 ierr = PetscLogFlops(10*a->nz - 5*b->AIJ->cmap.n);CHKERRQ(ierr); 708 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 709 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 710 PetscFunctionReturn(0); 711 } 712 713 #undef __FUNCT__ 714 #define __FUNCT__ "MatMultAdd_SeqMAIJ_5" 715 PetscErrorCode MatMultAdd_SeqMAIJ_5(Mat A,Vec xx,Vec yy,Vec zz) 716 { 717 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 718 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 719 PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4, sum5; 720 PetscErrorCode ierr; 721 PetscInt m = b->AIJ->rmap.n,*idx,*ii; 722 PetscInt n,i,jrow,j; 723 724 PetscFunctionBegin; 725 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 726 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 727 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 728 idx = a->j; 729 v = a->a; 730 ii = a->i; 731 732 for (i=0; i<m; i++) { 733 jrow = ii[i]; 734 n = ii[i+1] - jrow; 735 sum1 = 0.0; 736 sum2 = 0.0; 737 sum3 = 0.0; 738 sum4 = 0.0; 739 sum5 = 0.0; 740 for (j=0; j<n; j++) { 741 sum1 += v[jrow]*x[5*idx[jrow]]; 742 sum2 += v[jrow]*x[5*idx[jrow]+1]; 743 sum3 += v[jrow]*x[5*idx[jrow]+2]; 744 sum4 += v[jrow]*x[5*idx[jrow]+3]; 745 sum5 += v[jrow]*x[5*idx[jrow]+4]; 746 jrow++; 747 } 748 y[5*i] += sum1; 749 y[5*i+1] += sum2; 750 y[5*i+2] += sum3; 751 y[5*i+3] += sum4; 752 y[5*i+4] += sum5; 753 } 754 755 ierr = PetscLogFlops(10*a->nz);CHKERRQ(ierr); 756 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 757 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 758 PetscFunctionReturn(0); 759 } 760 761 #undef __FUNCT__ 762 #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_5" 763 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_5(Mat A,Vec xx,Vec yy,Vec zz) 764 { 765 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 766 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 767 PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5; 768 PetscErrorCode ierr; 769 PetscInt m = b->AIJ->rmap.n,n,i,*idx; 770 771 PetscFunctionBegin; 772 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 773 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 774 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 775 776 for (i=0; i<m; i++) { 777 idx = a->j + a->i[i] ; 778 v = a->a + a->i[i] ; 779 n = a->i[i+1] - a->i[i]; 780 alpha1 = x[5*i]; 781 alpha2 = x[5*i+1]; 782 alpha3 = x[5*i+2]; 783 alpha4 = x[5*i+3]; 784 alpha5 = x[5*i+4]; 785 while (n-->0) { 786 y[5*(*idx)] += alpha1*(*v); 787 y[5*(*idx)+1] += alpha2*(*v); 788 y[5*(*idx)+2] += alpha3*(*v); 789 y[5*(*idx)+3] += alpha4*(*v); 790 y[5*(*idx)+4] += alpha5*(*v); 791 idx++; v++; 792 } 793 } 794 ierr = PetscLogFlops(10*a->nz);CHKERRQ(ierr); 795 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 796 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 797 PetscFunctionReturn(0); 798 } 799 800 /* ------------------------------------------------------------------------------*/ 801 #undef __FUNCT__ 802 #define __FUNCT__ "MatMult_SeqMAIJ_6" 803 PetscErrorCode MatMult_SeqMAIJ_6(Mat A,Vec xx,Vec yy) 804 { 805 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 806 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 807 PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6; 808 PetscErrorCode ierr; 809 PetscInt m = b->AIJ->rmap.n,*idx,*ii; 810 PetscInt n,i,jrow,j; 811 812 PetscFunctionBegin; 813 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 814 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 815 idx = a->j; 816 v = a->a; 817 ii = a->i; 818 819 for (i=0; i<m; i++) { 820 jrow = ii[i]; 821 n = ii[i+1] - jrow; 822 sum1 = 0.0; 823 sum2 = 0.0; 824 sum3 = 0.0; 825 sum4 = 0.0; 826 sum5 = 0.0; 827 sum6 = 0.0; 828 for (j=0; j<n; j++) { 829 sum1 += v[jrow]*x[6*idx[jrow]]; 830 sum2 += v[jrow]*x[6*idx[jrow]+1]; 831 sum3 += v[jrow]*x[6*idx[jrow]+2]; 832 sum4 += v[jrow]*x[6*idx[jrow]+3]; 833 sum5 += v[jrow]*x[6*idx[jrow]+4]; 834 sum6 += v[jrow]*x[6*idx[jrow]+5]; 835 jrow++; 836 } 837 y[6*i] = sum1; 838 y[6*i+1] = sum2; 839 y[6*i+2] = sum3; 840 y[6*i+3] = sum4; 841 y[6*i+4] = sum5; 842 y[6*i+5] = sum6; 843 } 844 845 ierr = PetscLogFlops(12*a->nz - 6*m);CHKERRQ(ierr); 846 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 847 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 848 PetscFunctionReturn(0); 849 } 850 851 #undef __FUNCT__ 852 #define __FUNCT__ "MatMultTranspose_SeqMAIJ_6" 853 PetscErrorCode MatMultTranspose_SeqMAIJ_6(Mat A,Vec xx,Vec yy) 854 { 855 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 856 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 857 PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,zero = 0.0; 858 PetscErrorCode ierr; 859 PetscInt m = b->AIJ->rmap.n,n,i,*idx; 860 861 PetscFunctionBegin; 862 ierr = VecSet(yy,zero);CHKERRQ(ierr); 863 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 864 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 865 866 for (i=0; i<m; i++) { 867 idx = a->j + a->i[i] ; 868 v = a->a + a->i[i] ; 869 n = a->i[i+1] - a->i[i]; 870 alpha1 = x[6*i]; 871 alpha2 = x[6*i+1]; 872 alpha3 = x[6*i+2]; 873 alpha4 = x[6*i+3]; 874 alpha5 = x[6*i+4]; 875 alpha6 = x[6*i+5]; 876 while (n-->0) { 877 y[6*(*idx)] += alpha1*(*v); 878 y[6*(*idx)+1] += alpha2*(*v); 879 y[6*(*idx)+2] += alpha3*(*v); 880 y[6*(*idx)+3] += alpha4*(*v); 881 y[6*(*idx)+4] += alpha5*(*v); 882 y[6*(*idx)+5] += alpha6*(*v); 883 idx++; v++; 884 } 885 } 886 ierr = PetscLogFlops(12*a->nz - 6*b->AIJ->cmap.n);CHKERRQ(ierr); 887 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 888 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 889 PetscFunctionReturn(0); 890 } 891 892 #undef __FUNCT__ 893 #define __FUNCT__ "MatMultAdd_SeqMAIJ_6" 894 PetscErrorCode MatMultAdd_SeqMAIJ_6(Mat A,Vec xx,Vec yy,Vec zz) 895 { 896 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 897 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 898 PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6; 899 PetscErrorCode ierr; 900 PetscInt m = b->AIJ->rmap.n,*idx,*ii; 901 PetscInt n,i,jrow,j; 902 903 PetscFunctionBegin; 904 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 905 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 906 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 907 idx = a->j; 908 v = a->a; 909 ii = a->i; 910 911 for (i=0; i<m; i++) { 912 jrow = ii[i]; 913 n = ii[i+1] - jrow; 914 sum1 = 0.0; 915 sum2 = 0.0; 916 sum3 = 0.0; 917 sum4 = 0.0; 918 sum5 = 0.0; 919 sum6 = 0.0; 920 for (j=0; j<n; j++) { 921 sum1 += v[jrow]*x[6*idx[jrow]]; 922 sum2 += v[jrow]*x[6*idx[jrow]+1]; 923 sum3 += v[jrow]*x[6*idx[jrow]+2]; 924 sum4 += v[jrow]*x[6*idx[jrow]+3]; 925 sum5 += v[jrow]*x[6*idx[jrow]+4]; 926 sum6 += v[jrow]*x[6*idx[jrow]+5]; 927 jrow++; 928 } 929 y[6*i] += sum1; 930 y[6*i+1] += sum2; 931 y[6*i+2] += sum3; 932 y[6*i+3] += sum4; 933 y[6*i+4] += sum5; 934 y[6*i+5] += sum6; 935 } 936 937 ierr = PetscLogFlops(12*a->nz);CHKERRQ(ierr); 938 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 939 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 940 PetscFunctionReturn(0); 941 } 942 943 #undef __FUNCT__ 944 #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_6" 945 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_6(Mat A,Vec xx,Vec yy,Vec zz) 946 { 947 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 948 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 949 PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6; 950 PetscErrorCode ierr; 951 PetscInt m = b->AIJ->rmap.n,n,i,*idx; 952 953 PetscFunctionBegin; 954 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 955 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 956 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 957 958 for (i=0; i<m; i++) { 959 idx = a->j + a->i[i] ; 960 v = a->a + a->i[i] ; 961 n = a->i[i+1] - a->i[i]; 962 alpha1 = x[6*i]; 963 alpha2 = x[6*i+1]; 964 alpha3 = x[6*i+2]; 965 alpha4 = x[6*i+3]; 966 alpha5 = x[6*i+4]; 967 alpha6 = x[6*i+5]; 968 while (n-->0) { 969 y[6*(*idx)] += alpha1*(*v); 970 y[6*(*idx)+1] += alpha2*(*v); 971 y[6*(*idx)+2] += alpha3*(*v); 972 y[6*(*idx)+3] += alpha4*(*v); 973 y[6*(*idx)+4] += alpha5*(*v); 974 y[6*(*idx)+5] += alpha6*(*v); 975 idx++; v++; 976 } 977 } 978 ierr = PetscLogFlops(12*a->nz);CHKERRQ(ierr); 979 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 980 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 981 PetscFunctionReturn(0); 982 } 983 984 /* ------------------------------------------------------------------------------*/ 985 #undef __FUNCT__ 986 #define __FUNCT__ "MatMult_SeqMAIJ_7" 987 PetscErrorCode MatMult_SeqMAIJ_7(Mat A,Vec xx,Vec yy) 988 { 989 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 990 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 991 PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7; 992 PetscErrorCode ierr; 993 PetscInt m = b->AIJ->rmap.n,*idx,*ii; 994 PetscInt n,i,jrow,j; 995 996 PetscFunctionBegin; 997 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 998 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 999 idx = a->j; 1000 v = a->a; 1001 ii = a->i; 1002 1003 for (i=0; i<m; i++) { 1004 jrow = ii[i]; 1005 n = ii[i+1] - jrow; 1006 sum1 = 0.0; 1007 sum2 = 0.0; 1008 sum3 = 0.0; 1009 sum4 = 0.0; 1010 sum5 = 0.0; 1011 sum6 = 0.0; 1012 sum7 = 0.0; 1013 for (j=0; j<n; j++) { 1014 sum1 += v[jrow]*x[7*idx[jrow]]; 1015 sum2 += v[jrow]*x[7*idx[jrow]+1]; 1016 sum3 += v[jrow]*x[7*idx[jrow]+2]; 1017 sum4 += v[jrow]*x[7*idx[jrow]+3]; 1018 sum5 += v[jrow]*x[7*idx[jrow]+4]; 1019 sum6 += v[jrow]*x[7*idx[jrow]+5]; 1020 sum7 += v[jrow]*x[7*idx[jrow]+6]; 1021 jrow++; 1022 } 1023 y[7*i] = sum1; 1024 y[7*i+1] = sum2; 1025 y[7*i+2] = sum3; 1026 y[7*i+3] = sum4; 1027 y[7*i+4] = sum5; 1028 y[7*i+5] = sum6; 1029 y[7*i+6] = sum7; 1030 } 1031 1032 ierr = PetscLogFlops(14*a->nz - 7*m);CHKERRQ(ierr); 1033 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1034 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 1035 PetscFunctionReturn(0); 1036 } 1037 1038 #undef __FUNCT__ 1039 #define __FUNCT__ "MatMultTranspose_SeqMAIJ_7" 1040 PetscErrorCode MatMultTranspose_SeqMAIJ_7(Mat A,Vec xx,Vec yy) 1041 { 1042 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1043 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1044 PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,zero = 0.0; 1045 PetscErrorCode ierr; 1046 PetscInt m = b->AIJ->rmap.n,n,i,*idx; 1047 1048 PetscFunctionBegin; 1049 ierr = VecSet(yy,zero);CHKERRQ(ierr); 1050 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1051 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 1052 1053 for (i=0; i<m; i++) { 1054 idx = a->j + a->i[i] ; 1055 v = a->a + a->i[i] ; 1056 n = a->i[i+1] - a->i[i]; 1057 alpha1 = x[7*i]; 1058 alpha2 = x[7*i+1]; 1059 alpha3 = x[7*i+2]; 1060 alpha4 = x[7*i+3]; 1061 alpha5 = x[7*i+4]; 1062 alpha6 = x[7*i+5]; 1063 alpha7 = x[7*i+6]; 1064 while (n-->0) { 1065 y[7*(*idx)] += alpha1*(*v); 1066 y[7*(*idx)+1] += alpha2*(*v); 1067 y[7*(*idx)+2] += alpha3*(*v); 1068 y[7*(*idx)+3] += alpha4*(*v); 1069 y[7*(*idx)+4] += alpha5*(*v); 1070 y[7*(*idx)+5] += alpha6*(*v); 1071 y[7*(*idx)+6] += alpha7*(*v); 1072 idx++; v++; 1073 } 1074 } 1075 ierr = PetscLogFlops(14*a->nz - 7*b->AIJ->cmap.n);CHKERRQ(ierr); 1076 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1077 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 1078 PetscFunctionReturn(0); 1079 } 1080 1081 #undef __FUNCT__ 1082 #define __FUNCT__ "MatMultAdd_SeqMAIJ_7" 1083 PetscErrorCode MatMultAdd_SeqMAIJ_7(Mat A,Vec xx,Vec yy,Vec zz) 1084 { 1085 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1086 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1087 PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7; 1088 PetscErrorCode ierr; 1089 PetscInt m = b->AIJ->rmap.n,*idx,*ii; 1090 PetscInt n,i,jrow,j; 1091 1092 PetscFunctionBegin; 1093 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 1094 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1095 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 1096 idx = a->j; 1097 v = a->a; 1098 ii = a->i; 1099 1100 for (i=0; i<m; i++) { 1101 jrow = ii[i]; 1102 n = ii[i+1] - jrow; 1103 sum1 = 0.0; 1104 sum2 = 0.0; 1105 sum3 = 0.0; 1106 sum4 = 0.0; 1107 sum5 = 0.0; 1108 sum6 = 0.0; 1109 sum7 = 0.0; 1110 for (j=0; j<n; j++) { 1111 sum1 += v[jrow]*x[7*idx[jrow]]; 1112 sum2 += v[jrow]*x[7*idx[jrow]+1]; 1113 sum3 += v[jrow]*x[7*idx[jrow]+2]; 1114 sum4 += v[jrow]*x[7*idx[jrow]+3]; 1115 sum5 += v[jrow]*x[7*idx[jrow]+4]; 1116 sum6 += v[jrow]*x[7*idx[jrow]+5]; 1117 sum7 += v[jrow]*x[7*idx[jrow]+6]; 1118 jrow++; 1119 } 1120 y[7*i] += sum1; 1121 y[7*i+1] += sum2; 1122 y[7*i+2] += sum3; 1123 y[7*i+3] += sum4; 1124 y[7*i+4] += sum5; 1125 y[7*i+5] += sum6; 1126 y[7*i+6] += sum7; 1127 } 1128 1129 ierr = PetscLogFlops(14*a->nz);CHKERRQ(ierr); 1130 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1131 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 1132 PetscFunctionReturn(0); 1133 } 1134 1135 #undef __FUNCT__ 1136 #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_7" 1137 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_7(Mat A,Vec xx,Vec yy,Vec zz) 1138 { 1139 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1140 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1141 PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7; 1142 PetscErrorCode ierr; 1143 PetscInt m = b->AIJ->rmap.n,n,i,*idx; 1144 1145 PetscFunctionBegin; 1146 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 1147 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1148 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 1149 for (i=0; i<m; i++) { 1150 idx = a->j + a->i[i] ; 1151 v = a->a + a->i[i] ; 1152 n = a->i[i+1] - a->i[i]; 1153 alpha1 = x[7*i]; 1154 alpha2 = x[7*i+1]; 1155 alpha3 = x[7*i+2]; 1156 alpha4 = x[7*i+3]; 1157 alpha5 = x[7*i+4]; 1158 alpha6 = x[7*i+5]; 1159 alpha7 = x[7*i+6]; 1160 while (n-->0) { 1161 y[7*(*idx)] += alpha1*(*v); 1162 y[7*(*idx)+1] += alpha2*(*v); 1163 y[7*(*idx)+2] += alpha3*(*v); 1164 y[7*(*idx)+3] += alpha4*(*v); 1165 y[7*(*idx)+4] += alpha5*(*v); 1166 y[7*(*idx)+5] += alpha6*(*v); 1167 y[7*(*idx)+6] += alpha7*(*v); 1168 idx++; v++; 1169 } 1170 } 1171 ierr = PetscLogFlops(14*a->nz);CHKERRQ(ierr); 1172 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1173 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 1174 PetscFunctionReturn(0); 1175 } 1176 1177 #undef __FUNCT__ 1178 #define __FUNCT__ "MatMult_SeqMAIJ_8" 1179 PetscErrorCode MatMult_SeqMAIJ_8(Mat A,Vec xx,Vec yy) 1180 { 1181 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1182 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1183 PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8; 1184 PetscErrorCode ierr; 1185 PetscInt m = b->AIJ->rmap.n,*idx,*ii; 1186 PetscInt n,i,jrow,j; 1187 1188 PetscFunctionBegin; 1189 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1190 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 1191 idx = a->j; 1192 v = a->a; 1193 ii = a->i; 1194 1195 for (i=0; i<m; i++) { 1196 jrow = ii[i]; 1197 n = ii[i+1] - jrow; 1198 sum1 = 0.0; 1199 sum2 = 0.0; 1200 sum3 = 0.0; 1201 sum4 = 0.0; 1202 sum5 = 0.0; 1203 sum6 = 0.0; 1204 sum7 = 0.0; 1205 sum8 = 0.0; 1206 for (j=0; j<n; j++) { 1207 sum1 += v[jrow]*x[8*idx[jrow]]; 1208 sum2 += v[jrow]*x[8*idx[jrow]+1]; 1209 sum3 += v[jrow]*x[8*idx[jrow]+2]; 1210 sum4 += v[jrow]*x[8*idx[jrow]+3]; 1211 sum5 += v[jrow]*x[8*idx[jrow]+4]; 1212 sum6 += v[jrow]*x[8*idx[jrow]+5]; 1213 sum7 += v[jrow]*x[8*idx[jrow]+6]; 1214 sum8 += v[jrow]*x[8*idx[jrow]+7]; 1215 jrow++; 1216 } 1217 y[8*i] = sum1; 1218 y[8*i+1] = sum2; 1219 y[8*i+2] = sum3; 1220 y[8*i+3] = sum4; 1221 y[8*i+4] = sum5; 1222 y[8*i+5] = sum6; 1223 y[8*i+6] = sum7; 1224 y[8*i+7] = sum8; 1225 } 1226 1227 ierr = PetscLogFlops(16*a->nz - 8*m);CHKERRQ(ierr); 1228 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1229 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 1230 PetscFunctionReturn(0); 1231 } 1232 1233 #undef __FUNCT__ 1234 #define __FUNCT__ "MatMultTranspose_SeqMAIJ_8" 1235 PetscErrorCode MatMultTranspose_SeqMAIJ_8(Mat A,Vec xx,Vec yy) 1236 { 1237 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1238 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1239 PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,zero = 0.0; 1240 PetscErrorCode ierr; 1241 PetscInt m = b->AIJ->rmap.n,n,i,*idx; 1242 1243 PetscFunctionBegin; 1244 ierr = VecSet(yy,zero);CHKERRQ(ierr); 1245 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1246 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 1247 1248 for (i=0; i<m; i++) { 1249 idx = a->j + a->i[i] ; 1250 v = a->a + a->i[i] ; 1251 n = a->i[i+1] - a->i[i]; 1252 alpha1 = x[8*i]; 1253 alpha2 = x[8*i+1]; 1254 alpha3 = x[8*i+2]; 1255 alpha4 = x[8*i+3]; 1256 alpha5 = x[8*i+4]; 1257 alpha6 = x[8*i+5]; 1258 alpha7 = x[8*i+6]; 1259 alpha8 = x[8*i+7]; 1260 while (n-->0) { 1261 y[8*(*idx)] += alpha1*(*v); 1262 y[8*(*idx)+1] += alpha2*(*v); 1263 y[8*(*idx)+2] += alpha3*(*v); 1264 y[8*(*idx)+3] += alpha4*(*v); 1265 y[8*(*idx)+4] += alpha5*(*v); 1266 y[8*(*idx)+5] += alpha6*(*v); 1267 y[8*(*idx)+6] += alpha7*(*v); 1268 y[8*(*idx)+7] += alpha8*(*v); 1269 idx++; v++; 1270 } 1271 } 1272 ierr = PetscLogFlops(16*a->nz - 8*b->AIJ->cmap.n);CHKERRQ(ierr); 1273 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1274 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 1275 PetscFunctionReturn(0); 1276 } 1277 1278 #undef __FUNCT__ 1279 #define __FUNCT__ "MatMultAdd_SeqMAIJ_8" 1280 PetscErrorCode MatMultAdd_SeqMAIJ_8(Mat A,Vec xx,Vec yy,Vec zz) 1281 { 1282 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1283 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1284 PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8; 1285 PetscErrorCode ierr; 1286 PetscInt m = b->AIJ->rmap.n,*idx,*ii; 1287 PetscInt n,i,jrow,j; 1288 1289 PetscFunctionBegin; 1290 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 1291 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1292 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 1293 idx = a->j; 1294 v = a->a; 1295 ii = a->i; 1296 1297 for (i=0; i<m; i++) { 1298 jrow = ii[i]; 1299 n = ii[i+1] - jrow; 1300 sum1 = 0.0; 1301 sum2 = 0.0; 1302 sum3 = 0.0; 1303 sum4 = 0.0; 1304 sum5 = 0.0; 1305 sum6 = 0.0; 1306 sum7 = 0.0; 1307 sum8 = 0.0; 1308 for (j=0; j<n; j++) { 1309 sum1 += v[jrow]*x[8*idx[jrow]]; 1310 sum2 += v[jrow]*x[8*idx[jrow]+1]; 1311 sum3 += v[jrow]*x[8*idx[jrow]+2]; 1312 sum4 += v[jrow]*x[8*idx[jrow]+3]; 1313 sum5 += v[jrow]*x[8*idx[jrow]+4]; 1314 sum6 += v[jrow]*x[8*idx[jrow]+5]; 1315 sum7 += v[jrow]*x[8*idx[jrow]+6]; 1316 sum8 += v[jrow]*x[8*idx[jrow]+7]; 1317 jrow++; 1318 } 1319 y[8*i] += sum1; 1320 y[8*i+1] += sum2; 1321 y[8*i+2] += sum3; 1322 y[8*i+3] += sum4; 1323 y[8*i+4] += sum5; 1324 y[8*i+5] += sum6; 1325 y[8*i+6] += sum7; 1326 y[8*i+7] += sum8; 1327 } 1328 1329 ierr = PetscLogFlops(16*a->nz);CHKERRQ(ierr); 1330 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1331 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 1332 PetscFunctionReturn(0); 1333 } 1334 1335 #undef __FUNCT__ 1336 #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_8" 1337 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_8(Mat A,Vec xx,Vec yy,Vec zz) 1338 { 1339 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1340 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1341 PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8; 1342 PetscErrorCode ierr; 1343 PetscInt m = b->AIJ->rmap.n,n,i,*idx; 1344 1345 PetscFunctionBegin; 1346 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 1347 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1348 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 1349 for (i=0; i<m; i++) { 1350 idx = a->j + a->i[i] ; 1351 v = a->a + a->i[i] ; 1352 n = a->i[i+1] - a->i[i]; 1353 alpha1 = x[8*i]; 1354 alpha2 = x[8*i+1]; 1355 alpha3 = x[8*i+2]; 1356 alpha4 = x[8*i+3]; 1357 alpha5 = x[8*i+4]; 1358 alpha6 = x[8*i+5]; 1359 alpha7 = x[8*i+6]; 1360 alpha8 = x[8*i+7]; 1361 while (n-->0) { 1362 y[8*(*idx)] += alpha1*(*v); 1363 y[8*(*idx)+1] += alpha2*(*v); 1364 y[8*(*idx)+2] += alpha3*(*v); 1365 y[8*(*idx)+3] += alpha4*(*v); 1366 y[8*(*idx)+4] += alpha5*(*v); 1367 y[8*(*idx)+5] += alpha6*(*v); 1368 y[8*(*idx)+6] += alpha7*(*v); 1369 y[8*(*idx)+7] += alpha8*(*v); 1370 idx++; v++; 1371 } 1372 } 1373 ierr = PetscLogFlops(16*a->nz);CHKERRQ(ierr); 1374 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1375 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 1376 PetscFunctionReturn(0); 1377 } 1378 1379 /* ------------------------------------------------------------------------------*/ 1380 #undef __FUNCT__ 1381 #define __FUNCT__ "MatMult_SeqMAIJ_9" 1382 PetscErrorCode MatMult_SeqMAIJ_9(Mat A,Vec xx,Vec yy) 1383 { 1384 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1385 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1386 PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9; 1387 PetscErrorCode ierr; 1388 PetscInt m = b->AIJ->rmap.n,*idx,*ii; 1389 PetscInt n,i,jrow,j; 1390 1391 PetscFunctionBegin; 1392 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1393 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 1394 idx = a->j; 1395 v = a->a; 1396 ii = a->i; 1397 1398 for (i=0; i<m; i++) { 1399 jrow = ii[i]; 1400 n = ii[i+1] - jrow; 1401 sum1 = 0.0; 1402 sum2 = 0.0; 1403 sum3 = 0.0; 1404 sum4 = 0.0; 1405 sum5 = 0.0; 1406 sum6 = 0.0; 1407 sum7 = 0.0; 1408 sum8 = 0.0; 1409 sum9 = 0.0; 1410 for (j=0; j<n; j++) { 1411 sum1 += v[jrow]*x[9*idx[jrow]]; 1412 sum2 += v[jrow]*x[9*idx[jrow]+1]; 1413 sum3 += v[jrow]*x[9*idx[jrow]+2]; 1414 sum4 += v[jrow]*x[9*idx[jrow]+3]; 1415 sum5 += v[jrow]*x[9*idx[jrow]+4]; 1416 sum6 += v[jrow]*x[9*idx[jrow]+5]; 1417 sum7 += v[jrow]*x[9*idx[jrow]+6]; 1418 sum8 += v[jrow]*x[9*idx[jrow]+7]; 1419 sum9 += v[jrow]*x[9*idx[jrow]+8]; 1420 jrow++; 1421 } 1422 y[9*i] = sum1; 1423 y[9*i+1] = sum2; 1424 y[9*i+2] = sum3; 1425 y[9*i+3] = sum4; 1426 y[9*i+4] = sum5; 1427 y[9*i+5] = sum6; 1428 y[9*i+6] = sum7; 1429 y[9*i+7] = sum8; 1430 y[9*i+8] = sum9; 1431 } 1432 1433 ierr = PetscLogFlops(18*a->nz - 9*m);CHKERRQ(ierr); 1434 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1435 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 1436 PetscFunctionReturn(0); 1437 } 1438 1439 /* ------------------------------------------------------------------------------*/ 1440 1441 #undef __FUNCT__ 1442 #define __FUNCT__ "MatMultTranspose_SeqMAIJ_9" 1443 PetscErrorCode MatMultTranspose_SeqMAIJ_9(Mat A,Vec xx,Vec yy) 1444 { 1445 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1446 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1447 PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9,zero = 0.0; 1448 PetscErrorCode ierr; 1449 PetscInt m = b->AIJ->rmap.n,n,i,*idx; 1450 1451 PetscFunctionBegin; 1452 ierr = VecSet(yy,zero);CHKERRQ(ierr); 1453 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1454 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 1455 1456 for (i=0; i<m; i++) { 1457 idx = a->j + a->i[i] ; 1458 v = a->a + a->i[i] ; 1459 n = a->i[i+1] - a->i[i]; 1460 alpha1 = x[9*i]; 1461 alpha2 = x[9*i+1]; 1462 alpha3 = x[9*i+2]; 1463 alpha4 = x[9*i+3]; 1464 alpha5 = x[9*i+4]; 1465 alpha6 = x[9*i+5]; 1466 alpha7 = x[9*i+6]; 1467 alpha8 = x[9*i+7]; 1468 alpha9 = x[9*i+8]; 1469 while (n-->0) { 1470 y[9*(*idx)] += alpha1*(*v); 1471 y[9*(*idx)+1] += alpha2*(*v); 1472 y[9*(*idx)+2] += alpha3*(*v); 1473 y[9*(*idx)+3] += alpha4*(*v); 1474 y[9*(*idx)+4] += alpha5*(*v); 1475 y[9*(*idx)+5] += alpha6*(*v); 1476 y[9*(*idx)+6] += alpha7*(*v); 1477 y[9*(*idx)+7] += alpha8*(*v); 1478 y[9*(*idx)+8] += alpha9*(*v); 1479 idx++; v++; 1480 } 1481 } 1482 ierr = PetscLogFlops(18*a->nz - 9*b->AIJ->cmap.n);CHKERRQ(ierr); 1483 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1484 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 1485 PetscFunctionReturn(0); 1486 } 1487 1488 #undef __FUNCT__ 1489 #define __FUNCT__ "MatMultAdd_SeqMAIJ_9" 1490 PetscErrorCode MatMultAdd_SeqMAIJ_9(Mat A,Vec xx,Vec yy,Vec zz) 1491 { 1492 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1493 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1494 PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9; 1495 PetscErrorCode ierr; 1496 PetscInt m = b->AIJ->rmap.n,*idx,*ii; 1497 PetscInt n,i,jrow,j; 1498 1499 PetscFunctionBegin; 1500 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 1501 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1502 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 1503 idx = a->j; 1504 v = a->a; 1505 ii = a->i; 1506 1507 for (i=0; i<m; i++) { 1508 jrow = ii[i]; 1509 n = ii[i+1] - jrow; 1510 sum1 = 0.0; 1511 sum2 = 0.0; 1512 sum3 = 0.0; 1513 sum4 = 0.0; 1514 sum5 = 0.0; 1515 sum6 = 0.0; 1516 sum7 = 0.0; 1517 sum8 = 0.0; 1518 sum9 = 0.0; 1519 for (j=0; j<n; j++) { 1520 sum1 += v[jrow]*x[9*idx[jrow]]; 1521 sum2 += v[jrow]*x[9*idx[jrow]+1]; 1522 sum3 += v[jrow]*x[9*idx[jrow]+2]; 1523 sum4 += v[jrow]*x[9*idx[jrow]+3]; 1524 sum5 += v[jrow]*x[9*idx[jrow]+4]; 1525 sum6 += v[jrow]*x[9*idx[jrow]+5]; 1526 sum7 += v[jrow]*x[9*idx[jrow]+6]; 1527 sum8 += v[jrow]*x[9*idx[jrow]+7]; 1528 sum9 += v[jrow]*x[9*idx[jrow]+8]; 1529 jrow++; 1530 } 1531 y[9*i] += sum1; 1532 y[9*i+1] += sum2; 1533 y[9*i+2] += sum3; 1534 y[9*i+3] += sum4; 1535 y[9*i+4] += sum5; 1536 y[9*i+5] += sum6; 1537 y[9*i+6] += sum7; 1538 y[9*i+7] += sum8; 1539 y[9*i+8] += sum9; 1540 } 1541 1542 ierr = PetscLogFlops(18*a->nz);CHKERRQ(ierr); 1543 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1544 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 1545 PetscFunctionReturn(0); 1546 } 1547 1548 #undef __FUNCT__ 1549 #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_9" 1550 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_9(Mat A,Vec xx,Vec yy,Vec zz) 1551 { 1552 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1553 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1554 PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9; 1555 PetscErrorCode ierr; 1556 PetscInt m = b->AIJ->rmap.n,n,i,*idx; 1557 1558 PetscFunctionBegin; 1559 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 1560 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1561 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 1562 for (i=0; i<m; i++) { 1563 idx = a->j + a->i[i] ; 1564 v = a->a + a->i[i] ; 1565 n = a->i[i+1] - a->i[i]; 1566 alpha1 = x[9*i]; 1567 alpha2 = x[9*i+1]; 1568 alpha3 = x[9*i+2]; 1569 alpha4 = x[9*i+3]; 1570 alpha5 = x[9*i+4]; 1571 alpha6 = x[9*i+5]; 1572 alpha7 = x[9*i+6]; 1573 alpha8 = x[9*i+7]; 1574 alpha9 = x[9*i+8]; 1575 while (n-->0) { 1576 y[9*(*idx)] += alpha1*(*v); 1577 y[9*(*idx)+1] += alpha2*(*v); 1578 y[9*(*idx)+2] += alpha3*(*v); 1579 y[9*(*idx)+3] += alpha4*(*v); 1580 y[9*(*idx)+4] += alpha5*(*v); 1581 y[9*(*idx)+5] += alpha6*(*v); 1582 y[9*(*idx)+6] += alpha7*(*v); 1583 y[9*(*idx)+7] += alpha8*(*v); 1584 y[9*(*idx)+8] += alpha9*(*v); 1585 idx++; v++; 1586 } 1587 } 1588 ierr = PetscLogFlops(18*a->nz);CHKERRQ(ierr); 1589 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1590 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 1591 PetscFunctionReturn(0); 1592 } 1593 /*--------------------------------------------------------------------------------------------*/ 1594 #undef __FUNCT__ 1595 #define __FUNCT__ "MatMult_SeqMAIJ_10" 1596 PetscErrorCode MatMult_SeqMAIJ_10(Mat A,Vec xx,Vec yy) 1597 { 1598 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1599 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1600 PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9, sum10; 1601 PetscErrorCode ierr; 1602 PetscInt m = b->AIJ->rmap.n,*idx,*ii; 1603 PetscInt n,i,jrow,j; 1604 1605 PetscFunctionBegin; 1606 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1607 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 1608 idx = a->j; 1609 v = a->a; 1610 ii = a->i; 1611 1612 for (i=0; i<m; i++) { 1613 jrow = ii[i]; 1614 n = ii[i+1] - jrow; 1615 sum1 = 0.0; 1616 sum2 = 0.0; 1617 sum3 = 0.0; 1618 sum4 = 0.0; 1619 sum5 = 0.0; 1620 sum6 = 0.0; 1621 sum7 = 0.0; 1622 sum8 = 0.0; 1623 sum9 = 0.0; 1624 sum10 = 0.0; 1625 for (j=0; j<n; j++) { 1626 sum1 += v[jrow]*x[10*idx[jrow]]; 1627 sum2 += v[jrow]*x[10*idx[jrow]+1]; 1628 sum3 += v[jrow]*x[10*idx[jrow]+2]; 1629 sum4 += v[jrow]*x[10*idx[jrow]+3]; 1630 sum5 += v[jrow]*x[10*idx[jrow]+4]; 1631 sum6 += v[jrow]*x[10*idx[jrow]+5]; 1632 sum7 += v[jrow]*x[10*idx[jrow]+6]; 1633 sum8 += v[jrow]*x[10*idx[jrow]+7]; 1634 sum9 += v[jrow]*x[10*idx[jrow]+8]; 1635 sum10 += v[jrow]*x[10*idx[jrow]+9]; 1636 jrow++; 1637 } 1638 y[10*i] = sum1; 1639 y[10*i+1] = sum2; 1640 y[10*i+2] = sum3; 1641 y[10*i+3] = sum4; 1642 y[10*i+4] = sum5; 1643 y[10*i+5] = sum6; 1644 y[10*i+6] = sum7; 1645 y[10*i+7] = sum8; 1646 y[10*i+8] = sum9; 1647 y[10*i+9] = sum10; 1648 } 1649 1650 ierr = PetscLogFlops(20*a->nz - 10*m);CHKERRQ(ierr); 1651 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1652 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 1653 PetscFunctionReturn(0); 1654 } 1655 1656 #undef __FUNCT__ 1657 #define __FUNCT__ "MatMult_SeqMAIJ_10" 1658 PetscErrorCode MatMultAdd_SeqMAIJ_10(Mat A,Vec xx,Vec yy,Vec zz) 1659 { 1660 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1661 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1662 PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9, sum10; 1663 PetscErrorCode ierr; 1664 PetscInt m = b->AIJ->rmap.n,*idx,*ii; 1665 PetscInt n,i,jrow,j; 1666 1667 PetscFunctionBegin; 1668 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 1669 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1670 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 1671 idx = a->j; 1672 v = a->a; 1673 ii = a->i; 1674 1675 for (i=0; i<m; i++) { 1676 jrow = ii[i]; 1677 n = ii[i+1] - jrow; 1678 sum1 = 0.0; 1679 sum2 = 0.0; 1680 sum3 = 0.0; 1681 sum4 = 0.0; 1682 sum5 = 0.0; 1683 sum6 = 0.0; 1684 sum7 = 0.0; 1685 sum8 = 0.0; 1686 sum9 = 0.0; 1687 sum10 = 0.0; 1688 for (j=0; j<n; j++) { 1689 sum1 += v[jrow]*x[10*idx[jrow]]; 1690 sum2 += v[jrow]*x[10*idx[jrow]+1]; 1691 sum3 += v[jrow]*x[10*idx[jrow]+2]; 1692 sum4 += v[jrow]*x[10*idx[jrow]+3]; 1693 sum5 += v[jrow]*x[10*idx[jrow]+4]; 1694 sum6 += v[jrow]*x[10*idx[jrow]+5]; 1695 sum7 += v[jrow]*x[10*idx[jrow]+6]; 1696 sum8 += v[jrow]*x[10*idx[jrow]+7]; 1697 sum9 += v[jrow]*x[10*idx[jrow]+8]; 1698 sum10 += v[jrow]*x[10*idx[jrow]+9]; 1699 jrow++; 1700 } 1701 y[10*i] += sum1; 1702 y[10*i+1] += sum2; 1703 y[10*i+2] += sum3; 1704 y[10*i+3] += sum4; 1705 y[10*i+4] += sum5; 1706 y[10*i+5] += sum6; 1707 y[10*i+6] += sum7; 1708 y[10*i+7] += sum8; 1709 y[10*i+8] += sum9; 1710 y[10*i+9] += sum10; 1711 } 1712 1713 ierr = PetscLogFlops(20*a->nz - 10*m);CHKERRQ(ierr); 1714 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1715 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 1716 PetscFunctionReturn(0); 1717 } 1718 1719 #undef __FUNCT__ 1720 #define __FUNCT__ "MatMultTranspose_SeqMAIJ_10" 1721 PetscErrorCode MatMultTranspose_SeqMAIJ_10(Mat A,Vec xx,Vec yy) 1722 { 1723 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1724 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1725 PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9,alpha10,zero = 0.0; 1726 PetscErrorCode ierr; 1727 PetscInt m = b->AIJ->rmap.n,n,i,*idx; 1728 1729 PetscFunctionBegin; 1730 ierr = VecSet(yy,zero);CHKERRQ(ierr); 1731 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1732 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 1733 1734 for (i=0; i<m; i++) { 1735 idx = a->j + a->i[i] ; 1736 v = a->a + a->i[i] ; 1737 n = a->i[i+1] - a->i[i]; 1738 alpha1 = x[10*i]; 1739 alpha2 = x[10*i+1]; 1740 alpha3 = x[10*i+2]; 1741 alpha4 = x[10*i+3]; 1742 alpha5 = x[10*i+4]; 1743 alpha6 = x[10*i+5]; 1744 alpha7 = x[10*i+6]; 1745 alpha8 = x[10*i+7]; 1746 alpha9 = x[10*i+8]; 1747 alpha10 = x[10*i+9]; 1748 while (n-->0) { 1749 y[10*(*idx)] += alpha1*(*v); 1750 y[10*(*idx)+1] += alpha2*(*v); 1751 y[10*(*idx)+2] += alpha3*(*v); 1752 y[10*(*idx)+3] += alpha4*(*v); 1753 y[10*(*idx)+4] += alpha5*(*v); 1754 y[10*(*idx)+5] += alpha6*(*v); 1755 y[10*(*idx)+6] += alpha7*(*v); 1756 y[10*(*idx)+7] += alpha8*(*v); 1757 y[10*(*idx)+8] += alpha9*(*v); 1758 y[10*(*idx)+9] += alpha10*(*v); 1759 idx++; v++; 1760 } 1761 } 1762 ierr = PetscLogFlops(20*a->nz - 10*b->AIJ->cmap.n);CHKERRQ(ierr); 1763 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1764 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 1765 PetscFunctionReturn(0); 1766 } 1767 1768 #undef __FUNCT__ 1769 #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_10" 1770 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_10(Mat A,Vec xx,Vec yy,Vec zz) 1771 { 1772 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1773 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1774 PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9,alpha10; 1775 PetscErrorCode ierr; 1776 PetscInt m = b->AIJ->rmap.n,n,i,*idx; 1777 1778 PetscFunctionBegin; 1779 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 1780 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1781 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 1782 for (i=0; i<m; i++) { 1783 idx = a->j + a->i[i] ; 1784 v = a->a + a->i[i] ; 1785 n = a->i[i+1] - a->i[i]; 1786 alpha1 = x[10*i]; 1787 alpha2 = x[10*i+1]; 1788 alpha3 = x[10*i+2]; 1789 alpha4 = x[10*i+3]; 1790 alpha5 = x[10*i+4]; 1791 alpha6 = x[10*i+5]; 1792 alpha7 = x[10*i+6]; 1793 alpha8 = x[10*i+7]; 1794 alpha9 = x[10*i+8]; 1795 alpha10 = x[10*i+9]; 1796 while (n-->0) { 1797 y[10*(*idx)] += alpha1*(*v); 1798 y[10*(*idx)+1] += alpha2*(*v); 1799 y[10*(*idx)+2] += alpha3*(*v); 1800 y[10*(*idx)+3] += alpha4*(*v); 1801 y[10*(*idx)+4] += alpha5*(*v); 1802 y[10*(*idx)+5] += alpha6*(*v); 1803 y[10*(*idx)+6] += alpha7*(*v); 1804 y[10*(*idx)+7] += alpha8*(*v); 1805 y[10*(*idx)+8] += alpha9*(*v); 1806 y[10*(*idx)+9] += alpha10*(*v); 1807 idx++; v++; 1808 } 1809 } 1810 ierr = PetscLogFlops(20*a->nz);CHKERRQ(ierr); 1811 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1812 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 1813 PetscFunctionReturn(0); 1814 } 1815 1816 1817 /*--------------------------------------------------------------------------------------------*/ 1818 #undef __FUNCT__ 1819 #define __FUNCT__ "MatMult_SeqMAIJ_16" 1820 PetscErrorCode MatMult_SeqMAIJ_16(Mat A,Vec xx,Vec yy) 1821 { 1822 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1823 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1824 PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8; 1825 PetscScalar sum9, sum10, sum11, sum12, sum13, sum14, sum15, sum16; 1826 PetscErrorCode ierr; 1827 PetscInt m = b->AIJ->rmap.n,*idx,*ii; 1828 PetscInt n,i,jrow,j; 1829 1830 PetscFunctionBegin; 1831 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1832 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 1833 idx = a->j; 1834 v = a->a; 1835 ii = a->i; 1836 1837 for (i=0; i<m; i++) { 1838 jrow = ii[i]; 1839 n = ii[i+1] - jrow; 1840 sum1 = 0.0; 1841 sum2 = 0.0; 1842 sum3 = 0.0; 1843 sum4 = 0.0; 1844 sum5 = 0.0; 1845 sum6 = 0.0; 1846 sum7 = 0.0; 1847 sum8 = 0.0; 1848 sum9 = 0.0; 1849 sum10 = 0.0; 1850 sum11 = 0.0; 1851 sum12 = 0.0; 1852 sum13 = 0.0; 1853 sum14 = 0.0; 1854 sum15 = 0.0; 1855 sum16 = 0.0; 1856 for (j=0; j<n; j++) { 1857 sum1 += v[jrow]*x[16*idx[jrow]]; 1858 sum2 += v[jrow]*x[16*idx[jrow]+1]; 1859 sum3 += v[jrow]*x[16*idx[jrow]+2]; 1860 sum4 += v[jrow]*x[16*idx[jrow]+3]; 1861 sum5 += v[jrow]*x[16*idx[jrow]+4]; 1862 sum6 += v[jrow]*x[16*idx[jrow]+5]; 1863 sum7 += v[jrow]*x[16*idx[jrow]+6]; 1864 sum8 += v[jrow]*x[16*idx[jrow]+7]; 1865 sum9 += v[jrow]*x[16*idx[jrow]+8]; 1866 sum10 += v[jrow]*x[16*idx[jrow]+9]; 1867 sum11 += v[jrow]*x[16*idx[jrow]+10]; 1868 sum12 += v[jrow]*x[16*idx[jrow]+11]; 1869 sum13 += v[jrow]*x[16*idx[jrow]+12]; 1870 sum14 += v[jrow]*x[16*idx[jrow]+13]; 1871 sum15 += v[jrow]*x[16*idx[jrow]+14]; 1872 sum16 += v[jrow]*x[16*idx[jrow]+15]; 1873 jrow++; 1874 } 1875 y[16*i] = sum1; 1876 y[16*i+1] = sum2; 1877 y[16*i+2] = sum3; 1878 y[16*i+3] = sum4; 1879 y[16*i+4] = sum5; 1880 y[16*i+5] = sum6; 1881 y[16*i+6] = sum7; 1882 y[16*i+7] = sum8; 1883 y[16*i+8] = sum9; 1884 y[16*i+9] = sum10; 1885 y[16*i+10] = sum11; 1886 y[16*i+11] = sum12; 1887 y[16*i+12] = sum13; 1888 y[16*i+13] = sum14; 1889 y[16*i+14] = sum15; 1890 y[16*i+15] = sum16; 1891 } 1892 1893 ierr = PetscLogFlops(32*a->nz - 16*m);CHKERRQ(ierr); 1894 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1895 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 1896 PetscFunctionReturn(0); 1897 } 1898 1899 #undef __FUNCT__ 1900 #define __FUNCT__ "MatMultTranspose_SeqMAIJ_16" 1901 PetscErrorCode MatMultTranspose_SeqMAIJ_16(Mat A,Vec xx,Vec yy) 1902 { 1903 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1904 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1905 PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,zero = 0.0; 1906 PetscScalar alpha9,alpha10,alpha11,alpha12,alpha13,alpha14,alpha15,alpha16; 1907 PetscErrorCode ierr; 1908 PetscInt m = b->AIJ->rmap.n,n,i,*idx; 1909 1910 PetscFunctionBegin; 1911 ierr = VecSet(yy,zero);CHKERRQ(ierr); 1912 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1913 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 1914 1915 for (i=0; i<m; i++) { 1916 idx = a->j + a->i[i] ; 1917 v = a->a + a->i[i] ; 1918 n = a->i[i+1] - a->i[i]; 1919 alpha1 = x[16*i]; 1920 alpha2 = x[16*i+1]; 1921 alpha3 = x[16*i+2]; 1922 alpha4 = x[16*i+3]; 1923 alpha5 = x[16*i+4]; 1924 alpha6 = x[16*i+5]; 1925 alpha7 = x[16*i+6]; 1926 alpha8 = x[16*i+7]; 1927 alpha9 = x[16*i+8]; 1928 alpha10 = x[16*i+9]; 1929 alpha11 = x[16*i+10]; 1930 alpha12 = x[16*i+11]; 1931 alpha13 = x[16*i+12]; 1932 alpha14 = x[16*i+13]; 1933 alpha15 = x[16*i+14]; 1934 alpha16 = x[16*i+15]; 1935 while (n-->0) { 1936 y[16*(*idx)] += alpha1*(*v); 1937 y[16*(*idx)+1] += alpha2*(*v); 1938 y[16*(*idx)+2] += alpha3*(*v); 1939 y[16*(*idx)+3] += alpha4*(*v); 1940 y[16*(*idx)+4] += alpha5*(*v); 1941 y[16*(*idx)+5] += alpha6*(*v); 1942 y[16*(*idx)+6] += alpha7*(*v); 1943 y[16*(*idx)+7] += alpha8*(*v); 1944 y[16*(*idx)+8] += alpha9*(*v); 1945 y[16*(*idx)+9] += alpha10*(*v); 1946 y[16*(*idx)+10] += alpha11*(*v); 1947 y[16*(*idx)+11] += alpha12*(*v); 1948 y[16*(*idx)+12] += alpha13*(*v); 1949 y[16*(*idx)+13] += alpha14*(*v); 1950 y[16*(*idx)+14] += alpha15*(*v); 1951 y[16*(*idx)+15] += alpha16*(*v); 1952 idx++; v++; 1953 } 1954 } 1955 ierr = PetscLogFlops(32*a->nz - 16*b->AIJ->cmap.n);CHKERRQ(ierr); 1956 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1957 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 1958 PetscFunctionReturn(0); 1959 } 1960 1961 #undef __FUNCT__ 1962 #define __FUNCT__ "MatMultAdd_SeqMAIJ_16" 1963 PetscErrorCode MatMultAdd_SeqMAIJ_16(Mat A,Vec xx,Vec yy,Vec zz) 1964 { 1965 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1966 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1967 PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8; 1968 PetscScalar sum9, sum10, sum11, sum12, sum13, sum14, sum15, sum16; 1969 PetscErrorCode ierr; 1970 PetscInt m = b->AIJ->rmap.n,*idx,*ii; 1971 PetscInt n,i,jrow,j; 1972 1973 PetscFunctionBegin; 1974 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 1975 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1976 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 1977 idx = a->j; 1978 v = a->a; 1979 ii = a->i; 1980 1981 for (i=0; i<m; i++) { 1982 jrow = ii[i]; 1983 n = ii[i+1] - jrow; 1984 sum1 = 0.0; 1985 sum2 = 0.0; 1986 sum3 = 0.0; 1987 sum4 = 0.0; 1988 sum5 = 0.0; 1989 sum6 = 0.0; 1990 sum7 = 0.0; 1991 sum8 = 0.0; 1992 sum9 = 0.0; 1993 sum10 = 0.0; 1994 sum11 = 0.0; 1995 sum12 = 0.0; 1996 sum13 = 0.0; 1997 sum14 = 0.0; 1998 sum15 = 0.0; 1999 sum16 = 0.0; 2000 for (j=0; j<n; j++) { 2001 sum1 += v[jrow]*x[16*idx[jrow]]; 2002 sum2 += v[jrow]*x[16*idx[jrow]+1]; 2003 sum3 += v[jrow]*x[16*idx[jrow]+2]; 2004 sum4 += v[jrow]*x[16*idx[jrow]+3]; 2005 sum5 += v[jrow]*x[16*idx[jrow]+4]; 2006 sum6 += v[jrow]*x[16*idx[jrow]+5]; 2007 sum7 += v[jrow]*x[16*idx[jrow]+6]; 2008 sum8 += v[jrow]*x[16*idx[jrow]+7]; 2009 sum9 += v[jrow]*x[16*idx[jrow]+8]; 2010 sum10 += v[jrow]*x[16*idx[jrow]+9]; 2011 sum11 += v[jrow]*x[16*idx[jrow]+10]; 2012 sum12 += v[jrow]*x[16*idx[jrow]+11]; 2013 sum13 += v[jrow]*x[16*idx[jrow]+12]; 2014 sum14 += v[jrow]*x[16*idx[jrow]+13]; 2015 sum15 += v[jrow]*x[16*idx[jrow]+14]; 2016 sum16 += v[jrow]*x[16*idx[jrow]+15]; 2017 jrow++; 2018 } 2019 y[16*i] += sum1; 2020 y[16*i+1] += sum2; 2021 y[16*i+2] += sum3; 2022 y[16*i+3] += sum4; 2023 y[16*i+4] += sum5; 2024 y[16*i+5] += sum6; 2025 y[16*i+6] += sum7; 2026 y[16*i+7] += sum8; 2027 y[16*i+8] += sum9; 2028 y[16*i+9] += sum10; 2029 y[16*i+10] += sum11; 2030 y[16*i+11] += sum12; 2031 y[16*i+12] += sum13; 2032 y[16*i+13] += sum14; 2033 y[16*i+14] += sum15; 2034 y[16*i+15] += sum16; 2035 } 2036 2037 ierr = PetscLogFlops(32*a->nz);CHKERRQ(ierr); 2038 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 2039 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 2040 PetscFunctionReturn(0); 2041 } 2042 2043 #undef __FUNCT__ 2044 #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_16" 2045 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_16(Mat A,Vec xx,Vec yy,Vec zz) 2046 { 2047 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 2048 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 2049 PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8; 2050 PetscScalar alpha9,alpha10,alpha11,alpha12,alpha13,alpha14,alpha15,alpha16; 2051 PetscErrorCode ierr; 2052 PetscInt m = b->AIJ->rmap.n,n,i,*idx; 2053 2054 PetscFunctionBegin; 2055 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 2056 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 2057 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 2058 for (i=0; i<m; i++) { 2059 idx = a->j + a->i[i] ; 2060 v = a->a + a->i[i] ; 2061 n = a->i[i+1] - a->i[i]; 2062 alpha1 = x[16*i]; 2063 alpha2 = x[16*i+1]; 2064 alpha3 = x[16*i+2]; 2065 alpha4 = x[16*i+3]; 2066 alpha5 = x[16*i+4]; 2067 alpha6 = x[16*i+5]; 2068 alpha7 = x[16*i+6]; 2069 alpha8 = x[16*i+7]; 2070 alpha9 = x[16*i+8]; 2071 alpha10 = x[16*i+9]; 2072 alpha11 = x[16*i+10]; 2073 alpha12 = x[16*i+11]; 2074 alpha13 = x[16*i+12]; 2075 alpha14 = x[16*i+13]; 2076 alpha15 = x[16*i+14]; 2077 alpha16 = x[16*i+15]; 2078 while (n-->0) { 2079 y[16*(*idx)] += alpha1*(*v); 2080 y[16*(*idx)+1] += alpha2*(*v); 2081 y[16*(*idx)+2] += alpha3*(*v); 2082 y[16*(*idx)+3] += alpha4*(*v); 2083 y[16*(*idx)+4] += alpha5*(*v); 2084 y[16*(*idx)+5] += alpha6*(*v); 2085 y[16*(*idx)+6] += alpha7*(*v); 2086 y[16*(*idx)+7] += alpha8*(*v); 2087 y[16*(*idx)+8] += alpha9*(*v); 2088 y[16*(*idx)+9] += alpha10*(*v); 2089 y[16*(*idx)+10] += alpha11*(*v); 2090 y[16*(*idx)+11] += alpha12*(*v); 2091 y[16*(*idx)+12] += alpha13*(*v); 2092 y[16*(*idx)+13] += alpha14*(*v); 2093 y[16*(*idx)+14] += alpha15*(*v); 2094 y[16*(*idx)+15] += alpha16*(*v); 2095 idx++; v++; 2096 } 2097 } 2098 ierr = PetscLogFlops(32*a->nz);CHKERRQ(ierr); 2099 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 2100 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 2101 PetscFunctionReturn(0); 2102 } 2103 2104 /*===================================================================================*/ 2105 #undef __FUNCT__ 2106 #define __FUNCT__ "MatMult_MPIMAIJ_dof" 2107 PetscErrorCode MatMult_MPIMAIJ_dof(Mat A,Vec xx,Vec yy) 2108 { 2109 Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data; 2110 PetscErrorCode ierr; 2111 2112 PetscFunctionBegin; 2113 /* start the scatter */ 2114 ierr = VecScatterBegin(xx,b->w,INSERT_VALUES,SCATTER_FORWARD,b->ctx);CHKERRQ(ierr); 2115 ierr = (*b->AIJ->ops->mult)(b->AIJ,xx,yy);CHKERRQ(ierr); 2116 ierr = VecScatterEnd(xx,b->w,INSERT_VALUES,SCATTER_FORWARD,b->ctx);CHKERRQ(ierr); 2117 ierr = (*b->OAIJ->ops->multadd)(b->OAIJ,b->w,yy,yy);CHKERRQ(ierr); 2118 PetscFunctionReturn(0); 2119 } 2120 2121 #undef __FUNCT__ 2122 #define __FUNCT__ "MatMultTranspose_MPIMAIJ_dof" 2123 PetscErrorCode MatMultTranspose_MPIMAIJ_dof(Mat A,Vec xx,Vec yy) 2124 { 2125 Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data; 2126 PetscErrorCode ierr; 2127 2128 PetscFunctionBegin; 2129 ierr = (*b->OAIJ->ops->multtranspose)(b->OAIJ,xx,b->w);CHKERRQ(ierr); 2130 ierr = (*b->AIJ->ops->multtranspose)(b->AIJ,xx,yy);CHKERRQ(ierr); 2131 ierr = VecScatterBegin(b->w,yy,ADD_VALUES,SCATTER_REVERSE,b->ctx);CHKERRQ(ierr); 2132 ierr = VecScatterEnd(b->w,yy,ADD_VALUES,SCATTER_REVERSE,b->ctx);CHKERRQ(ierr); 2133 PetscFunctionReturn(0); 2134 } 2135 2136 #undef __FUNCT__ 2137 #define __FUNCT__ "MatMultAdd_MPIMAIJ_dof" 2138 PetscErrorCode MatMultAdd_MPIMAIJ_dof(Mat A,Vec xx,Vec yy,Vec zz) 2139 { 2140 Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data; 2141 PetscErrorCode ierr; 2142 2143 PetscFunctionBegin; 2144 /* start the scatter */ 2145 ierr = VecScatterBegin(xx,b->w,INSERT_VALUES,SCATTER_FORWARD,b->ctx);CHKERRQ(ierr); 2146 ierr = (*b->AIJ->ops->multadd)(b->AIJ,xx,yy,zz);CHKERRQ(ierr); 2147 ierr = VecScatterEnd(xx,b->w,INSERT_VALUES,SCATTER_FORWARD,b->ctx);CHKERRQ(ierr); 2148 ierr = (*b->OAIJ->ops->multadd)(b->OAIJ,b->w,zz,zz);CHKERRQ(ierr); 2149 PetscFunctionReturn(0); 2150 } 2151 2152 #undef __FUNCT__ 2153 #define __FUNCT__ "MatMultTransposeAdd_MPIMAIJ_dof" 2154 PetscErrorCode MatMultTransposeAdd_MPIMAIJ_dof(Mat A,Vec xx,Vec yy,Vec zz) 2155 { 2156 Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data; 2157 PetscErrorCode ierr; 2158 2159 PetscFunctionBegin; 2160 ierr = (*b->OAIJ->ops->multtranspose)(b->OAIJ,xx,b->w);CHKERRQ(ierr); 2161 ierr = VecScatterBegin(b->w,zz,ADD_VALUES,SCATTER_REVERSE,b->ctx);CHKERRQ(ierr); 2162 ierr = (*b->AIJ->ops->multtransposeadd)(b->AIJ,xx,yy,zz);CHKERRQ(ierr); 2163 ierr = VecScatterEnd(b->w,zz,ADD_VALUES,SCATTER_REVERSE,b->ctx);CHKERRQ(ierr); 2164 PetscFunctionReturn(0); 2165 } 2166 2167 #undef __FUNCT__ 2168 #define __FUNCT__ "MatPtAPSymbolic_SeqAIJ_SeqMAIJ" 2169 PetscErrorCode MatPtAPSymbolic_SeqAIJ_SeqMAIJ(Mat A,Mat PP,PetscReal fill,Mat *C) 2170 { 2171 /* This routine requires testing -- but it's getting better. */ 2172 PetscErrorCode ierr; 2173 PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 2174 Mat_SeqMAIJ *pp=(Mat_SeqMAIJ*)PP->data; 2175 Mat P=pp->AIJ; 2176 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*p=(Mat_SeqAIJ*)P->data,*c; 2177 PetscInt *pti,*ptj,*ptJ,*ai=a->i,*aj=a->j,*ajj,*pi=p->i,*pj=p->j,*pjj; 2178 PetscInt *ci,*cj,*ptadenserow,*ptasparserow,*denserow,*sparserow,*ptaj; 2179 PetscInt an=A->cmap.N,am=A->rmap.N,pn=P->cmap.N,pm=P->rmap.N,ppdof=pp->dof,cn; 2180 PetscInt i,j,k,dof,pshift,ptnzi,arow,anzj,ptanzi,prow,pnzj,cnzi; 2181 MatScalar *ca; 2182 2183 PetscFunctionBegin; 2184 /* Start timer */ 2185 ierr = PetscLogEventBegin(MAT_PtAPSymbolic,A,PP,0,0);CHKERRQ(ierr); 2186 2187 /* Get ij structure of P^T */ 2188 ierr = MatGetSymbolicTranspose_SeqAIJ(P,&pti,&ptj);CHKERRQ(ierr); 2189 2190 cn = pn*ppdof; 2191 /* Allocate ci array, arrays for fill computation and */ 2192 /* free space for accumulating nonzero column info */ 2193 ierr = PetscMalloc((cn+1)*sizeof(PetscInt),&ci);CHKERRQ(ierr); 2194 ci[0] = 0; 2195 2196 /* Work arrays for rows of P^T*A */ 2197 ierr = PetscMalloc((2*cn+2*an+1)*sizeof(PetscInt),&ptadenserow);CHKERRQ(ierr); 2198 ierr = PetscMemzero(ptadenserow,(2*cn+2*an+1)*sizeof(PetscInt));CHKERRQ(ierr); 2199 ptasparserow = ptadenserow + an; 2200 denserow = ptasparserow + an; 2201 sparserow = denserow + cn; 2202 2203 /* Set initial free space to be nnz(A) scaled by aspect ratio of P. */ 2204 /* This should be reasonable if sparsity of PtAP is similar to that of A. */ 2205 /* Note, aspect ratio of P is the same as the aspect ratio of SeqAIJ inside P */ 2206 ierr = PetscFreeSpaceGet((ai[am]/pm)*pn,&free_space); 2207 current_space = free_space; 2208 2209 /* Determine symbolic info for each row of C: */ 2210 for (i=0;i<pn;i++) { 2211 ptnzi = pti[i+1] - pti[i]; 2212 ptJ = ptj + pti[i]; 2213 for (dof=0;dof<ppdof;dof++) { 2214 ptanzi = 0; 2215 /* Determine symbolic row of PtA: */ 2216 for (j=0;j<ptnzi;j++) { 2217 /* Expand ptJ[j] by block size and shift by dof to get the right row of A */ 2218 arow = ptJ[j]*ppdof + dof; 2219 /* Nonzeros of P^T*A will be in same locations as any element of A in that row */ 2220 anzj = ai[arow+1] - ai[arow]; 2221 ajj = aj + ai[arow]; 2222 for (k=0;k<anzj;k++) { 2223 if (!ptadenserow[ajj[k]]) { 2224 ptadenserow[ajj[k]] = -1; 2225 ptasparserow[ptanzi++] = ajj[k]; 2226 } 2227 } 2228 } 2229 /* Using symbolic info for row of PtA, determine symbolic info for row of C: */ 2230 ptaj = ptasparserow; 2231 cnzi = 0; 2232 for (j=0;j<ptanzi;j++) { 2233 /* Get offset within block of P */ 2234 pshift = *ptaj%ppdof; 2235 /* Get block row of P */ 2236 prow = (*ptaj++)/ppdof; /* integer division */ 2237 /* P has same number of nonzeros per row as the compressed form */ 2238 pnzj = pi[prow+1] - pi[prow]; 2239 pjj = pj + pi[prow]; 2240 for (k=0;k<pnzj;k++) { 2241 /* Locations in C are shifted by the offset within the block */ 2242 /* Note: we cannot use PetscLLAdd here because of the additional offset for the write location */ 2243 if (!denserow[pjj[k]*ppdof+pshift]) { 2244 denserow[pjj[k]*ppdof+pshift] = -1; 2245 sparserow[cnzi++] = pjj[k]*ppdof+pshift; 2246 } 2247 } 2248 } 2249 2250 /* sort sparserow */ 2251 ierr = PetscSortInt(cnzi,sparserow);CHKERRQ(ierr); 2252 2253 /* If free space is not available, make more free space */ 2254 /* Double the amount of total space in the list */ 2255 if (current_space->local_remaining<cnzi) { 2256 ierr = PetscFreeSpaceGet(current_space->total_array_size,¤t_space);CHKERRQ(ierr); 2257 } 2258 2259 /* Copy data into free space, and zero out denserows */ 2260 ierr = PetscMemcpy(current_space->array,sparserow,cnzi*sizeof(PetscInt));CHKERRQ(ierr); 2261 current_space->array += cnzi; 2262 current_space->local_used += cnzi; 2263 current_space->local_remaining -= cnzi; 2264 2265 for (j=0;j<ptanzi;j++) { 2266 ptadenserow[ptasparserow[j]] = 0; 2267 } 2268 for (j=0;j<cnzi;j++) { 2269 denserow[sparserow[j]] = 0; 2270 } 2271 /* Aside: Perhaps we should save the pta info for the numerical factorization. */ 2272 /* For now, we will recompute what is needed. */ 2273 ci[i*ppdof+1+dof] = ci[i*ppdof+dof] + cnzi; 2274 } 2275 } 2276 /* nnz is now stored in ci[ptm], column indices are in the list of free space */ 2277 /* Allocate space for cj, initialize cj, and */ 2278 /* destroy list of free space and other temporary array(s) */ 2279 ierr = PetscMalloc((ci[cn]+1)*sizeof(PetscInt),&cj);CHKERRQ(ierr); 2280 ierr = PetscFreeSpaceContiguous(&free_space,cj);CHKERRQ(ierr); 2281 ierr = PetscFree(ptadenserow);CHKERRQ(ierr); 2282 2283 /* Allocate space for ca */ 2284 ierr = PetscMalloc((ci[cn]+1)*sizeof(MatScalar),&ca);CHKERRQ(ierr); 2285 ierr = PetscMemzero(ca,(ci[cn]+1)*sizeof(MatScalar));CHKERRQ(ierr); 2286 2287 /* put together the new matrix */ 2288 ierr = MatCreateSeqAIJWithArrays(A->comm,cn,cn,ci,cj,ca,C);CHKERRQ(ierr); 2289 2290 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 2291 /* Since these are PETSc arrays, change flags to free them as necessary. */ 2292 c = (Mat_SeqAIJ *)((*C)->data); 2293 c->free_a = PETSC_TRUE; 2294 c->free_ij = PETSC_TRUE; 2295 c->nonew = 0; 2296 2297 /* Clean up. */ 2298 ierr = MatRestoreSymbolicTranspose_SeqAIJ(P,&pti,&ptj);CHKERRQ(ierr); 2299 2300 ierr = PetscLogEventEnd(MAT_PtAPSymbolic,A,PP,0,0);CHKERRQ(ierr); 2301 PetscFunctionReturn(0); 2302 } 2303 2304 #undef __FUNCT__ 2305 #define __FUNCT__ "MatPtAPNumeric_SeqAIJ_SeqMAIJ" 2306 PetscErrorCode MatPtAPNumeric_SeqAIJ_SeqMAIJ(Mat A,Mat PP,Mat C) 2307 { 2308 /* This routine requires testing -- first draft only */ 2309 PetscErrorCode ierr; 2310 PetscInt flops=0; 2311 Mat_SeqMAIJ *pp=(Mat_SeqMAIJ*)PP->data; 2312 Mat P=pp->AIJ; 2313 Mat_SeqAIJ *a = (Mat_SeqAIJ *) A->data; 2314 Mat_SeqAIJ *p = (Mat_SeqAIJ *) P->data; 2315 Mat_SeqAIJ *c = (Mat_SeqAIJ *) C->data; 2316 PetscInt *ai=a->i,*aj=a->j,*apj,*apjdense,*pi=p->i,*pj=p->j,*pJ=p->j,*pjj; 2317 PetscInt *ci=c->i,*cj=c->j,*cjj; 2318 PetscInt am=A->rmap.N,cn=C->cmap.N,cm=C->rmap.N,ppdof=pp->dof; 2319 PetscInt i,j,k,pshift,poffset,anzi,pnzi,apnzj,nextap,pnzj,prow,crow; 2320 MatScalar *aa=a->a,*apa,*pa=p->a,*pA=p->a,*paj,*ca=c->a,*caj; 2321 2322 PetscFunctionBegin; 2323 /* Allocate temporary array for storage of one row of A*P */ 2324 ierr = PetscMalloc(cn*(sizeof(MatScalar)+2*sizeof(PetscInt)),&apa);CHKERRQ(ierr); 2325 ierr = PetscMemzero(apa,cn*(sizeof(MatScalar)+2*sizeof(PetscInt)));CHKERRQ(ierr); 2326 2327 apj = (PetscInt *)(apa + cn); 2328 apjdense = apj + cn; 2329 2330 /* Clear old values in C */ 2331 ierr = PetscMemzero(ca,ci[cm]*sizeof(MatScalar));CHKERRQ(ierr); 2332 2333 for (i=0;i<am;i++) { 2334 /* Form sparse row of A*P */ 2335 anzi = ai[i+1] - ai[i]; 2336 apnzj = 0; 2337 for (j=0;j<anzi;j++) { 2338 /* Get offset within block of P */ 2339 pshift = *aj%ppdof; 2340 /* Get block row of P */ 2341 prow = *aj++/ppdof; /* integer division */ 2342 pnzj = pi[prow+1] - pi[prow]; 2343 pjj = pj + pi[prow]; 2344 paj = pa + pi[prow]; 2345 for (k=0;k<pnzj;k++) { 2346 poffset = pjj[k]*ppdof+pshift; 2347 if (!apjdense[poffset]) { 2348 apjdense[poffset] = -1; 2349 apj[apnzj++] = poffset; 2350 } 2351 apa[poffset] += (*aa)*paj[k]; 2352 } 2353 flops += 2*pnzj; 2354 aa++; 2355 } 2356 2357 /* Sort the j index array for quick sparse axpy. */ 2358 /* Note: a array does not need sorting as it is in dense storage locations. */ 2359 ierr = PetscSortInt(apnzj,apj);CHKERRQ(ierr); 2360 2361 /* Compute P^T*A*P using outer product (P^T)[:,j]*(A*P)[j,:]. */ 2362 prow = i/ppdof; /* integer division */ 2363 pshift = i%ppdof; 2364 poffset = pi[prow]; 2365 pnzi = pi[prow+1] - poffset; 2366 /* Reset pJ and pA so we can traverse the same row of P 'dof' times. */ 2367 pJ = pj+poffset; 2368 pA = pa+poffset; 2369 for (j=0;j<pnzi;j++) { 2370 crow = (*pJ)*ppdof+pshift; 2371 cjj = cj + ci[crow]; 2372 caj = ca + ci[crow]; 2373 pJ++; 2374 /* Perform sparse axpy operation. Note cjj includes apj. */ 2375 for (k=0,nextap=0;nextap<apnzj;k++) { 2376 if (cjj[k]==apj[nextap]) { 2377 caj[k] += (*pA)*apa[apj[nextap++]]; 2378 } 2379 } 2380 flops += 2*apnzj; 2381 pA++; 2382 } 2383 2384 /* Zero the current row info for A*P */ 2385 for (j=0;j<apnzj;j++) { 2386 apa[apj[j]] = 0.; 2387 apjdense[apj[j]] = 0; 2388 } 2389 } 2390 2391 /* Assemble the final matrix and clean up */ 2392 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2393 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2394 ierr = PetscFree(apa);CHKERRQ(ierr); 2395 ierr = PetscLogFlops(flops);CHKERRQ(ierr); 2396 2397 PetscFunctionReturn(0); 2398 } 2399 2400 EXTERN_C_BEGIN 2401 #undef __FUNCT__ 2402 #define __FUNCT__ "MatConvert_SeqMAIJ_SeqAIJ" 2403 PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_SeqMAIJ_SeqAIJ(Mat A, MatType newtype,MatReuse reuse,Mat *newmat) 2404 { 2405 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 2406 Mat a = b->AIJ,B; 2407 Mat_SeqAIJ *aij = (Mat_SeqAIJ*)a->data; 2408 PetscErrorCode ierr; 2409 PetscInt m,n,i,ncols,*ilen,nmax = 0,*icols,j,k,ii,dof = b->dof; 2410 PetscInt *cols; 2411 PetscScalar *vals; 2412 2413 PetscFunctionBegin; 2414 ierr = MatGetSize(a,&m,&n);CHKERRQ(ierr); 2415 ierr = PetscMalloc(dof*m*sizeof(PetscInt),&ilen);CHKERRQ(ierr); 2416 for (i=0; i<m; i++) { 2417 nmax = PetscMax(nmax,aij->ilen[i]); 2418 for (j=0; j<dof; j++) { 2419 ilen[dof*i+j] = aij->ilen[i]; 2420 } 2421 } 2422 ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,dof*m,dof*n,0,ilen,&B);CHKERRQ(ierr); 2423 ierr = MatSetOption(B,MAT_COLUMNS_SORTED);CHKERRQ(ierr); 2424 ierr = PetscFree(ilen);CHKERRQ(ierr); 2425 ierr = PetscMalloc(nmax*sizeof(PetscInt),&icols);CHKERRQ(ierr); 2426 ii = 0; 2427 for (i=0; i<m; i++) { 2428 ierr = MatGetRow_SeqAIJ(a,i,&ncols,&cols,&vals);CHKERRQ(ierr); 2429 for (j=0; j<dof; j++) { 2430 for (k=0; k<ncols; k++) { 2431 icols[k] = dof*cols[k]+j; 2432 } 2433 ierr = MatSetValues_SeqAIJ(B,1,&ii,ncols,icols,vals,INSERT_VALUES);CHKERRQ(ierr); 2434 ii++; 2435 } 2436 ierr = MatRestoreRow_SeqAIJ(a,i,&ncols,&cols,&vals);CHKERRQ(ierr); 2437 } 2438 ierr = PetscFree(icols);CHKERRQ(ierr); 2439 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2440 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2441 2442 if (reuse == MAT_REUSE_MATRIX) { 2443 ierr = MatHeaderReplace(A,B);CHKERRQ(ierr); 2444 } else { 2445 *newmat = B; 2446 } 2447 PetscFunctionReturn(0); 2448 } 2449 EXTERN_C_END 2450 2451 #include "src/mat/impls/aij/mpi/mpiaij.h" 2452 2453 EXTERN_C_BEGIN 2454 #undef __FUNCT__ 2455 #define __FUNCT__ "MatConvert_MPIMAIJ_MPIAIJ" 2456 PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_MPIMAIJ_MPIAIJ(Mat A, MatType newtype,MatReuse reuse,Mat *newmat) 2457 { 2458 Mat_MPIMAIJ *maij = (Mat_MPIMAIJ*)A->data; 2459 Mat MatAIJ = ((Mat_SeqMAIJ*)maij->AIJ->data)->AIJ,B; 2460 Mat MatOAIJ = ((Mat_SeqMAIJ*)maij->OAIJ->data)->AIJ; 2461 Mat_SeqAIJ *AIJ = (Mat_SeqAIJ*) MatAIJ->data; 2462 Mat_SeqAIJ *OAIJ =(Mat_SeqAIJ*) MatOAIJ->data; 2463 Mat_MPIAIJ *mpiaij = (Mat_MPIAIJ*) maij->A->data; 2464 PetscInt dof = maij->dof,i,j,*dnz = PETSC_NULL,*onz = PETSC_NULL,nmax = 0,onmax = 0; 2465 PetscInt *oicols = PETSC_NULL,*icols = PETSC_NULL,ncols,*cols = PETSC_NULL,oncols,*ocols = PETSC_NULL; 2466 PetscInt rstart,cstart,*garray,ii,k; 2467 PetscErrorCode ierr; 2468 PetscScalar *vals,*ovals; 2469 2470 PetscFunctionBegin; 2471 ierr = PetscMalloc2(A->rmap.n,PetscInt,&dnz,A->rmap.n,PetscInt,&onz);CHKERRQ(ierr); 2472 for (i=0; i<A->rmap.n/dof; i++) { 2473 nmax = PetscMax(nmax,AIJ->ilen[i]); 2474 onmax = PetscMax(onmax,OAIJ->ilen[i]); 2475 for (j=0; j<dof; j++) { 2476 dnz[dof*i+j] = AIJ->ilen[i]; 2477 onz[dof*i+j] = OAIJ->ilen[i]; 2478 } 2479 } 2480 ierr = MatCreateMPIAIJ(A->comm,A->rmap.n,A->cmap.n,A->rmap.N,A->cmap.N,0,dnz,0,onz,&B);CHKERRQ(ierr); 2481 ierr = MatSetOption(B,MAT_COLUMNS_SORTED);CHKERRQ(ierr); 2482 ierr = PetscFree2(dnz,onz);CHKERRQ(ierr); 2483 2484 ierr = PetscMalloc2(nmax,PetscInt,&icols,onmax,PetscInt,&oicols);CHKERRQ(ierr); 2485 rstart = dof*maij->A->rmap.rstart; 2486 cstart = dof*maij->A->cmap.rstart; 2487 garray = mpiaij->garray; 2488 2489 ii = rstart; 2490 for (i=0; i<A->rmap.n/dof; i++) { 2491 ierr = MatGetRow_SeqAIJ(MatAIJ,i,&ncols,&cols,&vals);CHKERRQ(ierr); 2492 ierr = MatGetRow_SeqAIJ(MatOAIJ,i,&oncols,&ocols,&ovals);CHKERRQ(ierr); 2493 for (j=0; j<dof; j++) { 2494 for (k=0; k<ncols; k++) { 2495 icols[k] = cstart + dof*cols[k]+j; 2496 } 2497 for (k=0; k<oncols; k++) { 2498 oicols[k] = dof*garray[ocols[k]]+j; 2499 } 2500 ierr = MatSetValues_MPIAIJ(B,1,&ii,ncols,icols,vals,INSERT_VALUES);CHKERRQ(ierr); 2501 ierr = MatSetValues_MPIAIJ(B,1,&ii,oncols,oicols,ovals,INSERT_VALUES);CHKERRQ(ierr); 2502 ii++; 2503 } 2504 ierr = MatRestoreRow_SeqAIJ(MatAIJ,i,&ncols,&cols,&vals);CHKERRQ(ierr); 2505 ierr = MatRestoreRow_SeqAIJ(MatOAIJ,i,&oncols,&ocols,&ovals);CHKERRQ(ierr); 2506 } 2507 ierr = PetscFree2(icols,oicols);CHKERRQ(ierr); 2508 2509 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2510 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2511 2512 if (reuse == MAT_REUSE_MATRIX) { 2513 ierr = MatHeaderReplace(A,B);CHKERRQ(ierr); 2514 } else { 2515 *newmat = B; 2516 } 2517 PetscFunctionReturn(0); 2518 } 2519 EXTERN_C_END 2520 2521 2522 /* ---------------------------------------------------------------------------------- */ 2523 /*MC 2524 MatCreateMAIJ - Creates a matrix type providing restriction and interpolation 2525 operations for multicomponent problems. It interpolates each component the same 2526 way independently. The matrix type is based on MATSEQAIJ for sequential matrices, 2527 and MATMPIAIJ for distributed matrices. 2528 2529 Operations provided: 2530 + MatMult 2531 . MatMultTranspose 2532 . MatMultAdd 2533 . MatMultTransposeAdd 2534 - MatView 2535 2536 Level: advanced 2537 2538 M*/ 2539 #undef __FUNCT__ 2540 #define __FUNCT__ "MatCreateMAIJ" 2541 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMAIJ(Mat A,PetscInt dof,Mat *maij) 2542 { 2543 PetscErrorCode ierr; 2544 PetscMPIInt size; 2545 PetscInt n; 2546 Mat_MPIMAIJ *b; 2547 Mat B; 2548 2549 PetscFunctionBegin; 2550 ierr = PetscObjectReference((PetscObject)A);CHKERRQ(ierr); 2551 2552 if (dof == 1) { 2553 *maij = A; 2554 } else { 2555 ierr = MatCreate(A->comm,&B);CHKERRQ(ierr); 2556 ierr = MatSetSizes(B,dof*A->rmap.n,dof*A->cmap.n,dof*A->rmap.N,dof*A->cmap.N);CHKERRQ(ierr); 2557 B->assembled = PETSC_TRUE; 2558 2559 ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr); 2560 if (size == 1) { 2561 ierr = MatSetType(B,MATSEQMAIJ);CHKERRQ(ierr); 2562 B->ops->destroy = MatDestroy_SeqMAIJ; 2563 B->ops->view = MatView_SeqMAIJ; 2564 b = (Mat_MPIMAIJ*)B->data; 2565 b->dof = dof; 2566 b->AIJ = A; 2567 if (dof == 2) { 2568 B->ops->mult = MatMult_SeqMAIJ_2; 2569 B->ops->multadd = MatMultAdd_SeqMAIJ_2; 2570 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_2; 2571 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_2; 2572 } else if (dof == 3) { 2573 B->ops->mult = MatMult_SeqMAIJ_3; 2574 B->ops->multadd = MatMultAdd_SeqMAIJ_3; 2575 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_3; 2576 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_3; 2577 } else if (dof == 4) { 2578 B->ops->mult = MatMult_SeqMAIJ_4; 2579 B->ops->multadd = MatMultAdd_SeqMAIJ_4; 2580 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_4; 2581 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_4; 2582 } else if (dof == 5) { 2583 B->ops->mult = MatMult_SeqMAIJ_5; 2584 B->ops->multadd = MatMultAdd_SeqMAIJ_5; 2585 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_5; 2586 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_5; 2587 } else if (dof == 6) { 2588 B->ops->mult = MatMult_SeqMAIJ_6; 2589 B->ops->multadd = MatMultAdd_SeqMAIJ_6; 2590 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_6; 2591 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_6; 2592 } else if (dof == 7) { 2593 B->ops->mult = MatMult_SeqMAIJ_7; 2594 B->ops->multadd = MatMultAdd_SeqMAIJ_7; 2595 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_7; 2596 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_7; 2597 } else if (dof == 8) { 2598 B->ops->mult = MatMult_SeqMAIJ_8; 2599 B->ops->multadd = MatMultAdd_SeqMAIJ_8; 2600 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_8; 2601 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_8; 2602 } else if (dof == 9) { 2603 B->ops->mult = MatMult_SeqMAIJ_9; 2604 B->ops->multadd = MatMultAdd_SeqMAIJ_9; 2605 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_9; 2606 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_9; 2607 } else if (dof == 10) { 2608 B->ops->mult = MatMult_SeqMAIJ_10; 2609 B->ops->multadd = MatMultAdd_SeqMAIJ_10; 2610 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_10; 2611 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_10; 2612 } else if (dof == 16) { 2613 B->ops->mult = MatMult_SeqMAIJ_16; 2614 B->ops->multadd = MatMultAdd_SeqMAIJ_16; 2615 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_16; 2616 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_16; 2617 } else { 2618 SETERRQ1(PETSC_ERR_SUP,"Cannot handle a dof of %D. Send request for code to petsc-maint@mcs.anl.gov\n",dof); 2619 } 2620 B->ops->ptapsymbolic_seqaij = MatPtAPSymbolic_SeqAIJ_SeqMAIJ; 2621 B->ops->ptapnumeric_seqaij = MatPtAPNumeric_SeqAIJ_SeqMAIJ; 2622 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqmaij_seqaij_C","MatConvert_SeqMAIJ_SeqAIJ",MatConvert_SeqMAIJ_SeqAIJ);CHKERRQ(ierr); 2623 } else { 2624 Mat_MPIAIJ *mpiaij = (Mat_MPIAIJ *)A->data; 2625 IS from,to; 2626 Vec gvec; 2627 PetscInt *garray,i; 2628 2629 ierr = MatSetType(B,MATMPIMAIJ);CHKERRQ(ierr); 2630 B->ops->destroy = MatDestroy_MPIMAIJ; 2631 B->ops->view = MatView_MPIMAIJ; 2632 b = (Mat_MPIMAIJ*)B->data; 2633 b->dof = dof; 2634 b->A = A; 2635 ierr = MatCreateMAIJ(mpiaij->A,dof,&b->AIJ);CHKERRQ(ierr); 2636 ierr = MatCreateMAIJ(mpiaij->B,dof,&b->OAIJ);CHKERRQ(ierr); 2637 2638 ierr = VecGetSize(mpiaij->lvec,&n);CHKERRQ(ierr); 2639 ierr = VecCreateSeq(PETSC_COMM_SELF,n*dof,&b->w);CHKERRQ(ierr); 2640 ierr = VecSetBlockSize(b->w,dof);CHKERRQ(ierr); 2641 2642 /* create two temporary Index sets for build scatter gather */ 2643 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&garray);CHKERRQ(ierr); 2644 for (i=0; i<n; i++) garray[i] = dof*mpiaij->garray[i]; 2645 ierr = ISCreateBlock(A->comm,dof,n,garray,&from);CHKERRQ(ierr); 2646 ierr = PetscFree(garray);CHKERRQ(ierr); 2647 ierr = ISCreateStride(PETSC_COMM_SELF,n*dof,0,1,&to);CHKERRQ(ierr); 2648 2649 /* create temporary global vector to generate scatter context */ 2650 ierr = VecCreateMPI(A->comm,dof*A->cmap.n,dof*A->cmap.N,&gvec);CHKERRQ(ierr); 2651 ierr = VecSetBlockSize(gvec,dof);CHKERRQ(ierr); 2652 2653 /* generate the scatter context */ 2654 ierr = VecScatterCreate(gvec,from,b->w,to,&b->ctx);CHKERRQ(ierr); 2655 2656 ierr = ISDestroy(from);CHKERRQ(ierr); 2657 ierr = ISDestroy(to);CHKERRQ(ierr); 2658 ierr = VecDestroy(gvec);CHKERRQ(ierr); 2659 2660 B->ops->mult = MatMult_MPIMAIJ_dof; 2661 B->ops->multtranspose = MatMultTranspose_MPIMAIJ_dof; 2662 B->ops->multadd = MatMultAdd_MPIMAIJ_dof; 2663 B->ops->multtransposeadd = MatMultTransposeAdd_MPIMAIJ_dof; 2664 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpimaij_mpiaij_C","MatConvert_MPIMAIJ_MPIAIJ",MatConvert_MPIMAIJ_MPIAIJ);CHKERRQ(ierr); 2665 } 2666 *maij = B; 2667 ierr = MatView_Private(B);CHKERRQ(ierr); 2668 } 2669 PetscFunctionReturn(0); 2670 } 2671 2672 2673 2674 2675 2676 2677 2678 2679 2680 2681 2682 2683