1 #define PETSCMAT_DLL 2 3 /* 4 Defines the basic matrix operations for the MAIJ matrix storage format. 5 This format is used for restriction and interpolation operations for 6 multicomponent problems. It interpolates each component the same way 7 independently. 8 9 We provide: 10 MatMult() 11 MatMultTranspose() 12 MatMultTransposeAdd() 13 MatMultAdd() 14 and 15 MatCreateMAIJ(Mat,dof,Mat*) 16 17 This single directory handles both the sequential and parallel codes 18 */ 19 20 #include "../src/mat/impls/maij/maij.h" 21 #include "../src/mat/utils/freespace.h" 22 #include "private/vecimpl.h" 23 24 #undef __FUNCT__ 25 #define __FUNCT__ "MatMAIJGetAIJ" 26 PetscErrorCode PETSCMAT_DLLEXPORT MatMAIJGetAIJ(Mat A,Mat *B) 27 { 28 PetscErrorCode ierr; 29 PetscTruth ismpimaij,isseqmaij; 30 31 PetscFunctionBegin; 32 ierr = PetscTypeCompare((PetscObject)A,MATMPIMAIJ,&ismpimaij);CHKERRQ(ierr); 33 ierr = PetscTypeCompare((PetscObject)A,MATSEQMAIJ,&isseqmaij);CHKERRQ(ierr); 34 if (ismpimaij) { 35 Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data; 36 37 *B = b->A; 38 } else if (isseqmaij) { 39 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 40 41 *B = b->AIJ; 42 } else { 43 *B = A; 44 } 45 PetscFunctionReturn(0); 46 } 47 48 #undef __FUNCT__ 49 #define __FUNCT__ "MatMAIJRedimension" 50 PetscErrorCode PETSCMAT_DLLEXPORT MatMAIJRedimension(Mat A,PetscInt dof,Mat *B) 51 { 52 PetscErrorCode ierr; 53 Mat Aij; 54 55 PetscFunctionBegin; 56 ierr = MatMAIJGetAIJ(A,&Aij);CHKERRQ(ierr); 57 ierr = MatCreateMAIJ(Aij,dof,B);CHKERRQ(ierr); 58 PetscFunctionReturn(0); 59 } 60 61 #undef __FUNCT__ 62 #define __FUNCT__ "MatDestroy_SeqMAIJ" 63 PetscErrorCode MatDestroy_SeqMAIJ(Mat A) 64 { 65 PetscErrorCode ierr; 66 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 67 68 PetscFunctionBegin; 69 if (b->AIJ) { 70 ierr = MatDestroy(b->AIJ);CHKERRQ(ierr); 71 } 72 ierr = PetscFree(b);CHKERRQ(ierr); 73 PetscFunctionReturn(0); 74 } 75 76 #undef __FUNCT__ 77 #define __FUNCT__ "MatView_SeqMAIJ" 78 PetscErrorCode MatView_SeqMAIJ(Mat A,PetscViewer viewer) 79 { 80 PetscErrorCode ierr; 81 Mat B; 82 83 PetscFunctionBegin; 84 ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B); 85 ierr = MatView(B,viewer);CHKERRQ(ierr); 86 ierr = MatDestroy(B);CHKERRQ(ierr); 87 PetscFunctionReturn(0); 88 } 89 90 #undef __FUNCT__ 91 #define __FUNCT__ "MatView_MPIMAIJ" 92 PetscErrorCode MatView_MPIMAIJ(Mat A,PetscViewer viewer) 93 { 94 PetscErrorCode ierr; 95 Mat B; 96 97 PetscFunctionBegin; 98 ierr = MatConvert(A,MATMPIAIJ,MAT_INITIAL_MATRIX,&B); 99 ierr = MatView(B,viewer);CHKERRQ(ierr); 100 ierr = MatDestroy(B);CHKERRQ(ierr); 101 PetscFunctionReturn(0); 102 } 103 104 #undef __FUNCT__ 105 #define __FUNCT__ "MatDestroy_MPIMAIJ" 106 PetscErrorCode MatDestroy_MPIMAIJ(Mat A) 107 { 108 PetscErrorCode ierr; 109 Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data; 110 111 PetscFunctionBegin; 112 if (b->AIJ) { 113 ierr = MatDestroy(b->AIJ);CHKERRQ(ierr); 114 } 115 if (b->OAIJ) { 116 ierr = MatDestroy(b->OAIJ);CHKERRQ(ierr); 117 } 118 if (b->A) { 119 ierr = MatDestroy(b->A);CHKERRQ(ierr); 120 } 121 if (b->ctx) { 122 ierr = VecScatterDestroy(b->ctx);CHKERRQ(ierr); 123 } 124 if (b->w) { 125 ierr = VecDestroy(b->w);CHKERRQ(ierr); 126 } 127 ierr = PetscFree(b);CHKERRQ(ierr); 128 ierr = PetscObjectChangeTypeName((PetscObject)A,0);CHKERRQ(ierr); 129 PetscFunctionReturn(0); 130 } 131 132 /*MC 133 MATMAIJ - MATMAIJ = "maij" - A matrix type to be used for restriction and interpolation operations for 134 multicomponent problems, interpolating or restricting each component the same way independently. 135 The matrix type is based on MATSEQAIJ for sequential matrices, and MATMPIAIJ for distributed matrices. 136 137 Operations provided: 138 . MatMult 139 . MatMultTranspose 140 . MatMultAdd 141 . MatMultTransposeAdd 142 143 Level: advanced 144 145 .seealso: MatCreateSeqDense 146 M*/ 147 148 EXTERN_C_BEGIN 149 #undef __FUNCT__ 150 #define __FUNCT__ "MatCreate_MAIJ" 151 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_MAIJ(Mat A) 152 { 153 PetscErrorCode ierr; 154 Mat_MPIMAIJ *b; 155 PetscMPIInt size; 156 157 PetscFunctionBegin; 158 ierr = PetscNewLog(A,Mat_MPIMAIJ,&b);CHKERRQ(ierr); 159 A->data = (void*)b; 160 ierr = PetscMemzero(A->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 161 A->mapping = 0; 162 163 b->AIJ = 0; 164 b->dof = 0; 165 b->OAIJ = 0; 166 b->ctx = 0; 167 b->w = 0; 168 ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr); 169 if (size == 1){ 170 ierr = PetscObjectChangeTypeName((PetscObject)A,MATSEQMAIJ);CHKERRQ(ierr); 171 } else { 172 ierr = PetscObjectChangeTypeName((PetscObject)A,MATMPIMAIJ);CHKERRQ(ierr); 173 } 174 PetscFunctionReturn(0); 175 } 176 EXTERN_C_END 177 178 /* --------------------------------------------------------------------------------------*/ 179 #undef __FUNCT__ 180 #define __FUNCT__ "MatMult_SeqMAIJ_2" 181 PetscErrorCode MatMult_SeqMAIJ_2(Mat A,Vec xx,Vec yy) 182 { 183 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 184 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 185 PetscScalar *x,*y,*v,sum1, sum2; 186 PetscErrorCode ierr; 187 PetscInt m = b->AIJ->rmap->n,nonzerorow=0,*idx,*ii; 188 PetscInt n,i,jrow,j; 189 190 PetscFunctionBegin; 191 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 192 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 193 idx = a->j; 194 v = a->a; 195 ii = a->i; 196 197 for (i=0; i<m; i++) { 198 jrow = ii[i]; 199 n = ii[i+1] - jrow; 200 sum1 = 0.0; 201 sum2 = 0.0; 202 nonzerorow += (n>0); 203 for (j=0; j<n; j++) { 204 sum1 += v[jrow]*x[2*idx[jrow]]; 205 sum2 += v[jrow]*x[2*idx[jrow]+1]; 206 jrow++; 207 } 208 y[2*i] = sum1; 209 y[2*i+1] = sum2; 210 } 211 212 ierr = PetscLogFlops(4*a->nz - 2*nonzerorow);CHKERRQ(ierr); 213 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 214 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 215 PetscFunctionReturn(0); 216 } 217 218 #undef __FUNCT__ 219 #define __FUNCT__ "MatMultTranspose_SeqMAIJ_2" 220 PetscErrorCode MatMultTranspose_SeqMAIJ_2(Mat A,Vec xx,Vec yy) 221 { 222 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 223 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 224 PetscScalar *x,*y,*v,alpha1,alpha2,zero = 0.0; 225 PetscErrorCode ierr; 226 PetscInt m = b->AIJ->rmap->n,n,i,*idx; 227 228 PetscFunctionBegin; 229 ierr = VecSet(yy,zero);CHKERRQ(ierr); 230 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 231 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 232 233 for (i=0; i<m; i++) { 234 idx = a->j + a->i[i] ; 235 v = a->a + a->i[i] ; 236 n = a->i[i+1] - a->i[i]; 237 alpha1 = x[2*i]; 238 alpha2 = x[2*i+1]; 239 while (n-->0) {y[2*(*idx)] += alpha1*(*v); y[2*(*idx)+1] += alpha2*(*v); idx++; v++;} 240 } 241 ierr = PetscLogFlops(4*a->nz);CHKERRQ(ierr); 242 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 243 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 244 PetscFunctionReturn(0); 245 } 246 247 #undef __FUNCT__ 248 #define __FUNCT__ "MatMultAdd_SeqMAIJ_2" 249 PetscErrorCode MatMultAdd_SeqMAIJ_2(Mat A,Vec xx,Vec yy,Vec zz) 250 { 251 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 252 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 253 PetscScalar *x,*y,*v,sum1, sum2; 254 PetscErrorCode ierr; 255 PetscInt m = b->AIJ->rmap->n,*idx,*ii; 256 PetscInt n,i,jrow,j; 257 258 PetscFunctionBegin; 259 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 260 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 261 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 262 idx = a->j; 263 v = a->a; 264 ii = a->i; 265 266 for (i=0; i<m; i++) { 267 jrow = ii[i]; 268 n = ii[i+1] - jrow; 269 sum1 = 0.0; 270 sum2 = 0.0; 271 for (j=0; j<n; j++) { 272 sum1 += v[jrow]*x[2*idx[jrow]]; 273 sum2 += v[jrow]*x[2*idx[jrow]+1]; 274 jrow++; 275 } 276 y[2*i] += sum1; 277 y[2*i+1] += sum2; 278 } 279 280 ierr = PetscLogFlops(4*a->nz);CHKERRQ(ierr); 281 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 282 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 283 PetscFunctionReturn(0); 284 } 285 #undef __FUNCT__ 286 #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_2" 287 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_2(Mat A,Vec xx,Vec yy,Vec zz) 288 { 289 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 290 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 291 PetscScalar *x,*y,*v,alpha1,alpha2; 292 PetscErrorCode ierr; 293 PetscInt m = b->AIJ->rmap->n,n,i,*idx; 294 295 PetscFunctionBegin; 296 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 297 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 298 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 299 300 for (i=0; i<m; i++) { 301 idx = a->j + a->i[i] ; 302 v = a->a + a->i[i] ; 303 n = a->i[i+1] - a->i[i]; 304 alpha1 = x[2*i]; 305 alpha2 = x[2*i+1]; 306 while (n-->0) {y[2*(*idx)] += alpha1*(*v); y[2*(*idx)+1] += alpha2*(*v); idx++; v++;} 307 } 308 ierr = PetscLogFlops(4*a->nz);CHKERRQ(ierr); 309 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 310 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 311 PetscFunctionReturn(0); 312 } 313 /* --------------------------------------------------------------------------------------*/ 314 #undef __FUNCT__ 315 #define __FUNCT__ "MatMult_SeqMAIJ_3" 316 PetscErrorCode MatMult_SeqMAIJ_3(Mat A,Vec xx,Vec yy) 317 { 318 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 319 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 320 PetscScalar *x,*y,*v,sum1, sum2, sum3; 321 PetscErrorCode ierr; 322 PetscInt m = b->AIJ->rmap->n,nonzerorow=0,*idx,*ii; 323 PetscInt n,i,jrow,j; 324 325 PetscFunctionBegin; 326 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 327 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 328 idx = a->j; 329 v = a->a; 330 ii = a->i; 331 332 for (i=0; i<m; i++) { 333 jrow = ii[i]; 334 n = ii[i+1] - jrow; 335 sum1 = 0.0; 336 sum2 = 0.0; 337 sum3 = 0.0; 338 nonzerorow += (n>0); 339 for (j=0; j<n; j++) { 340 sum1 += v[jrow]*x[3*idx[jrow]]; 341 sum2 += v[jrow]*x[3*idx[jrow]+1]; 342 sum3 += v[jrow]*x[3*idx[jrow]+2]; 343 jrow++; 344 } 345 y[3*i] = sum1; 346 y[3*i+1] = sum2; 347 y[3*i+2] = sum3; 348 } 349 350 ierr = PetscLogFlops(6*a->nz - 3*nonzerorow);CHKERRQ(ierr); 351 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 352 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 353 PetscFunctionReturn(0); 354 } 355 356 #undef __FUNCT__ 357 #define __FUNCT__ "MatMultTranspose_SeqMAIJ_3" 358 PetscErrorCode MatMultTranspose_SeqMAIJ_3(Mat A,Vec xx,Vec yy) 359 { 360 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 361 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 362 PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,zero = 0.0; 363 PetscErrorCode ierr; 364 PetscInt m = b->AIJ->rmap->n,n,i,*idx; 365 366 PetscFunctionBegin; 367 ierr = VecSet(yy,zero);CHKERRQ(ierr); 368 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 369 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 370 371 for (i=0; i<m; i++) { 372 idx = a->j + a->i[i]; 373 v = a->a + a->i[i]; 374 n = a->i[i+1] - a->i[i]; 375 alpha1 = x[3*i]; 376 alpha2 = x[3*i+1]; 377 alpha3 = x[3*i+2]; 378 while (n-->0) { 379 y[3*(*idx)] += alpha1*(*v); 380 y[3*(*idx)+1] += alpha2*(*v); 381 y[3*(*idx)+2] += alpha3*(*v); 382 idx++; v++; 383 } 384 } 385 ierr = PetscLogFlops(6*a->nz);CHKERRQ(ierr); 386 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 387 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 388 PetscFunctionReturn(0); 389 } 390 391 #undef __FUNCT__ 392 #define __FUNCT__ "MatMultAdd_SeqMAIJ_3" 393 PetscErrorCode MatMultAdd_SeqMAIJ_3(Mat A,Vec xx,Vec yy,Vec zz) 394 { 395 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 396 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 397 PetscScalar *x,*y,*v,sum1, sum2, sum3; 398 PetscErrorCode ierr; 399 PetscInt m = b->AIJ->rmap->n,*idx,*ii; 400 PetscInt n,i,jrow,j; 401 402 PetscFunctionBegin; 403 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 404 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 405 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 406 idx = a->j; 407 v = a->a; 408 ii = a->i; 409 410 for (i=0; i<m; i++) { 411 jrow = ii[i]; 412 n = ii[i+1] - jrow; 413 sum1 = 0.0; 414 sum2 = 0.0; 415 sum3 = 0.0; 416 for (j=0; j<n; j++) { 417 sum1 += v[jrow]*x[3*idx[jrow]]; 418 sum2 += v[jrow]*x[3*idx[jrow]+1]; 419 sum3 += v[jrow]*x[3*idx[jrow]+2]; 420 jrow++; 421 } 422 y[3*i] += sum1; 423 y[3*i+1] += sum2; 424 y[3*i+2] += sum3; 425 } 426 427 ierr = PetscLogFlops(6*a->nz);CHKERRQ(ierr); 428 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 429 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 430 PetscFunctionReturn(0); 431 } 432 #undef __FUNCT__ 433 #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_3" 434 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_3(Mat A,Vec xx,Vec yy,Vec zz) 435 { 436 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 437 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 438 PetscScalar *x,*y,*v,alpha1,alpha2,alpha3; 439 PetscErrorCode ierr; 440 PetscInt m = b->AIJ->rmap->n,n,i,*idx; 441 442 PetscFunctionBegin; 443 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 444 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 445 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 446 for (i=0; i<m; i++) { 447 idx = a->j + a->i[i] ; 448 v = a->a + a->i[i] ; 449 n = a->i[i+1] - a->i[i]; 450 alpha1 = x[3*i]; 451 alpha2 = x[3*i+1]; 452 alpha3 = x[3*i+2]; 453 while (n-->0) { 454 y[3*(*idx)] += alpha1*(*v); 455 y[3*(*idx)+1] += alpha2*(*v); 456 y[3*(*idx)+2] += alpha3*(*v); 457 idx++; v++; 458 } 459 } 460 ierr = PetscLogFlops(6*a->nz);CHKERRQ(ierr); 461 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 462 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 463 PetscFunctionReturn(0); 464 } 465 466 /* ------------------------------------------------------------------------------*/ 467 #undef __FUNCT__ 468 #define __FUNCT__ "MatMult_SeqMAIJ_4" 469 PetscErrorCode MatMult_SeqMAIJ_4(Mat A,Vec xx,Vec yy) 470 { 471 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 472 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 473 PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4; 474 PetscErrorCode ierr; 475 PetscInt m = b->AIJ->rmap->n,nonzerorow=0,*idx,*ii; 476 PetscInt n,i,jrow,j; 477 478 PetscFunctionBegin; 479 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 480 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 481 idx = a->j; 482 v = a->a; 483 ii = a->i; 484 485 for (i=0; i<m; i++) { 486 jrow = ii[i]; 487 n = ii[i+1] - jrow; 488 sum1 = 0.0; 489 sum2 = 0.0; 490 sum3 = 0.0; 491 sum4 = 0.0; 492 nonzerorow += (n>0); 493 for (j=0; j<n; j++) { 494 sum1 += v[jrow]*x[4*idx[jrow]]; 495 sum2 += v[jrow]*x[4*idx[jrow]+1]; 496 sum3 += v[jrow]*x[4*idx[jrow]+2]; 497 sum4 += v[jrow]*x[4*idx[jrow]+3]; 498 jrow++; 499 } 500 y[4*i] = sum1; 501 y[4*i+1] = sum2; 502 y[4*i+2] = sum3; 503 y[4*i+3] = sum4; 504 } 505 506 ierr = PetscLogFlops(8*a->nz - 4*nonzerorow);CHKERRQ(ierr); 507 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 508 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 509 PetscFunctionReturn(0); 510 } 511 512 #undef __FUNCT__ 513 #define __FUNCT__ "MatMultTranspose_SeqMAIJ_4" 514 PetscErrorCode MatMultTranspose_SeqMAIJ_4(Mat A,Vec xx,Vec yy) 515 { 516 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 517 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 518 PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4,zero = 0.0; 519 PetscErrorCode ierr; 520 PetscInt m = b->AIJ->rmap->n,n,i,*idx; 521 522 PetscFunctionBegin; 523 ierr = VecSet(yy,zero);CHKERRQ(ierr); 524 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 525 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 526 for (i=0; i<m; i++) { 527 idx = a->j + a->i[i] ; 528 v = a->a + a->i[i] ; 529 n = a->i[i+1] - a->i[i]; 530 alpha1 = x[4*i]; 531 alpha2 = x[4*i+1]; 532 alpha3 = x[4*i+2]; 533 alpha4 = x[4*i+3]; 534 while (n-->0) { 535 y[4*(*idx)] += alpha1*(*v); 536 y[4*(*idx)+1] += alpha2*(*v); 537 y[4*(*idx)+2] += alpha3*(*v); 538 y[4*(*idx)+3] += alpha4*(*v); 539 idx++; v++; 540 } 541 } 542 ierr = PetscLogFlops(8*a->nz);CHKERRQ(ierr); 543 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 544 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 545 PetscFunctionReturn(0); 546 } 547 548 #undef __FUNCT__ 549 #define __FUNCT__ "MatMultAdd_SeqMAIJ_4" 550 PetscErrorCode MatMultAdd_SeqMAIJ_4(Mat A,Vec xx,Vec yy,Vec zz) 551 { 552 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 553 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 554 PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4; 555 PetscErrorCode ierr; 556 PetscInt m = b->AIJ->rmap->n,*idx,*ii; 557 PetscInt n,i,jrow,j; 558 559 PetscFunctionBegin; 560 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 561 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 562 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 563 idx = a->j; 564 v = a->a; 565 ii = a->i; 566 567 for (i=0; i<m; i++) { 568 jrow = ii[i]; 569 n = ii[i+1] - jrow; 570 sum1 = 0.0; 571 sum2 = 0.0; 572 sum3 = 0.0; 573 sum4 = 0.0; 574 for (j=0; j<n; j++) { 575 sum1 += v[jrow]*x[4*idx[jrow]]; 576 sum2 += v[jrow]*x[4*idx[jrow]+1]; 577 sum3 += v[jrow]*x[4*idx[jrow]+2]; 578 sum4 += v[jrow]*x[4*idx[jrow]+3]; 579 jrow++; 580 } 581 y[4*i] += sum1; 582 y[4*i+1] += sum2; 583 y[4*i+2] += sum3; 584 y[4*i+3] += sum4; 585 } 586 587 ierr = PetscLogFlops(8*a->nz);CHKERRQ(ierr); 588 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 589 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 590 PetscFunctionReturn(0); 591 } 592 #undef __FUNCT__ 593 #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_4" 594 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_4(Mat A,Vec xx,Vec yy,Vec zz) 595 { 596 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 597 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 598 PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4; 599 PetscErrorCode ierr; 600 PetscInt m = b->AIJ->rmap->n,n,i,*idx; 601 602 PetscFunctionBegin; 603 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 604 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 605 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 606 607 for (i=0; i<m; i++) { 608 idx = a->j + a->i[i] ; 609 v = a->a + a->i[i] ; 610 n = a->i[i+1] - a->i[i]; 611 alpha1 = x[4*i]; 612 alpha2 = x[4*i+1]; 613 alpha3 = x[4*i+2]; 614 alpha4 = x[4*i+3]; 615 while (n-->0) { 616 y[4*(*idx)] += alpha1*(*v); 617 y[4*(*idx)+1] += alpha2*(*v); 618 y[4*(*idx)+2] += alpha3*(*v); 619 y[4*(*idx)+3] += alpha4*(*v); 620 idx++; v++; 621 } 622 } 623 ierr = PetscLogFlops(8*a->nz);CHKERRQ(ierr); 624 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 625 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 626 PetscFunctionReturn(0); 627 } 628 /* ------------------------------------------------------------------------------*/ 629 630 #undef __FUNCT__ 631 #define __FUNCT__ "MatMult_SeqMAIJ_5" 632 PetscErrorCode MatMult_SeqMAIJ_5(Mat A,Vec xx,Vec yy) 633 { 634 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 635 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 636 PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4, sum5; 637 PetscErrorCode ierr; 638 PetscInt m = b->AIJ->rmap->n,nonzerorow=0,*idx,*ii; 639 PetscInt n,i,jrow,j; 640 641 PetscFunctionBegin; 642 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 643 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 644 idx = a->j; 645 v = a->a; 646 ii = a->i; 647 648 for (i=0; i<m; i++) { 649 jrow = ii[i]; 650 n = ii[i+1] - jrow; 651 sum1 = 0.0; 652 sum2 = 0.0; 653 sum3 = 0.0; 654 sum4 = 0.0; 655 sum5 = 0.0; 656 nonzerorow += (n>0); 657 for (j=0; j<n; j++) { 658 sum1 += v[jrow]*x[5*idx[jrow]]; 659 sum2 += v[jrow]*x[5*idx[jrow]+1]; 660 sum3 += v[jrow]*x[5*idx[jrow]+2]; 661 sum4 += v[jrow]*x[5*idx[jrow]+3]; 662 sum5 += v[jrow]*x[5*idx[jrow]+4]; 663 jrow++; 664 } 665 y[5*i] = sum1; 666 y[5*i+1] = sum2; 667 y[5*i+2] = sum3; 668 y[5*i+3] = sum4; 669 y[5*i+4] = sum5; 670 } 671 672 ierr = PetscLogFlops(10*a->nz - 5*nonzerorow);CHKERRQ(ierr); 673 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 674 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 675 PetscFunctionReturn(0); 676 } 677 678 #undef __FUNCT__ 679 #define __FUNCT__ "MatMultTranspose_SeqMAIJ_5" 680 PetscErrorCode MatMultTranspose_SeqMAIJ_5(Mat A,Vec xx,Vec yy) 681 { 682 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 683 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 684 PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,zero = 0.0; 685 PetscErrorCode ierr; 686 PetscInt m = b->AIJ->rmap->n,n,i,*idx; 687 688 PetscFunctionBegin; 689 ierr = VecSet(yy,zero);CHKERRQ(ierr); 690 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 691 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 692 693 for (i=0; i<m; i++) { 694 idx = a->j + a->i[i] ; 695 v = a->a + a->i[i] ; 696 n = a->i[i+1] - a->i[i]; 697 alpha1 = x[5*i]; 698 alpha2 = x[5*i+1]; 699 alpha3 = x[5*i+2]; 700 alpha4 = x[5*i+3]; 701 alpha5 = x[5*i+4]; 702 while (n-->0) { 703 y[5*(*idx)] += alpha1*(*v); 704 y[5*(*idx)+1] += alpha2*(*v); 705 y[5*(*idx)+2] += alpha3*(*v); 706 y[5*(*idx)+3] += alpha4*(*v); 707 y[5*(*idx)+4] += alpha5*(*v); 708 idx++; v++; 709 } 710 } 711 ierr = PetscLogFlops(10*a->nz);CHKERRQ(ierr); 712 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 713 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 714 PetscFunctionReturn(0); 715 } 716 717 #undef __FUNCT__ 718 #define __FUNCT__ "MatMultAdd_SeqMAIJ_5" 719 PetscErrorCode MatMultAdd_SeqMAIJ_5(Mat A,Vec xx,Vec yy,Vec zz) 720 { 721 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 722 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 723 PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4, sum5; 724 PetscErrorCode ierr; 725 PetscInt m = b->AIJ->rmap->n,*idx,*ii; 726 PetscInt n,i,jrow,j; 727 728 PetscFunctionBegin; 729 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 730 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 731 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 732 idx = a->j; 733 v = a->a; 734 ii = a->i; 735 736 for (i=0; i<m; i++) { 737 jrow = ii[i]; 738 n = ii[i+1] - jrow; 739 sum1 = 0.0; 740 sum2 = 0.0; 741 sum3 = 0.0; 742 sum4 = 0.0; 743 sum5 = 0.0; 744 for (j=0; j<n; j++) { 745 sum1 += v[jrow]*x[5*idx[jrow]]; 746 sum2 += v[jrow]*x[5*idx[jrow]+1]; 747 sum3 += v[jrow]*x[5*idx[jrow]+2]; 748 sum4 += v[jrow]*x[5*idx[jrow]+3]; 749 sum5 += v[jrow]*x[5*idx[jrow]+4]; 750 jrow++; 751 } 752 y[5*i] += sum1; 753 y[5*i+1] += sum2; 754 y[5*i+2] += sum3; 755 y[5*i+3] += sum4; 756 y[5*i+4] += sum5; 757 } 758 759 ierr = PetscLogFlops(10*a->nz);CHKERRQ(ierr); 760 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 761 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 762 PetscFunctionReturn(0); 763 } 764 765 #undef __FUNCT__ 766 #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_5" 767 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_5(Mat A,Vec xx,Vec yy,Vec zz) 768 { 769 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 770 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 771 PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5; 772 PetscErrorCode ierr; 773 PetscInt m = b->AIJ->rmap->n,n,i,*idx; 774 775 PetscFunctionBegin; 776 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 777 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 778 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 779 780 for (i=0; i<m; i++) { 781 idx = a->j + a->i[i] ; 782 v = a->a + a->i[i] ; 783 n = a->i[i+1] - a->i[i]; 784 alpha1 = x[5*i]; 785 alpha2 = x[5*i+1]; 786 alpha3 = x[5*i+2]; 787 alpha4 = x[5*i+3]; 788 alpha5 = x[5*i+4]; 789 while (n-->0) { 790 y[5*(*idx)] += alpha1*(*v); 791 y[5*(*idx)+1] += alpha2*(*v); 792 y[5*(*idx)+2] += alpha3*(*v); 793 y[5*(*idx)+3] += alpha4*(*v); 794 y[5*(*idx)+4] += alpha5*(*v); 795 idx++; v++; 796 } 797 } 798 ierr = PetscLogFlops(10*a->nz);CHKERRQ(ierr); 799 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 800 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 801 PetscFunctionReturn(0); 802 } 803 804 /* ------------------------------------------------------------------------------*/ 805 #undef __FUNCT__ 806 #define __FUNCT__ "MatMult_SeqMAIJ_6" 807 PetscErrorCode MatMult_SeqMAIJ_6(Mat A,Vec xx,Vec yy) 808 { 809 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 810 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 811 PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6; 812 PetscErrorCode ierr; 813 PetscInt m = b->AIJ->rmap->n,nonzerorow=0,*idx,*ii; 814 PetscInt n,i,jrow,j; 815 816 PetscFunctionBegin; 817 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 818 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 819 idx = a->j; 820 v = a->a; 821 ii = a->i; 822 823 for (i=0; i<m; i++) { 824 jrow = ii[i]; 825 n = ii[i+1] - jrow; 826 sum1 = 0.0; 827 sum2 = 0.0; 828 sum3 = 0.0; 829 sum4 = 0.0; 830 sum5 = 0.0; 831 sum6 = 0.0; 832 nonzerorow += (n>0); 833 for (j=0; j<n; j++) { 834 sum1 += v[jrow]*x[6*idx[jrow]]; 835 sum2 += v[jrow]*x[6*idx[jrow]+1]; 836 sum3 += v[jrow]*x[6*idx[jrow]+2]; 837 sum4 += v[jrow]*x[6*idx[jrow]+3]; 838 sum5 += v[jrow]*x[6*idx[jrow]+4]; 839 sum6 += v[jrow]*x[6*idx[jrow]+5]; 840 jrow++; 841 } 842 y[6*i] = sum1; 843 y[6*i+1] = sum2; 844 y[6*i+2] = sum3; 845 y[6*i+3] = sum4; 846 y[6*i+4] = sum5; 847 y[6*i+5] = sum6; 848 } 849 850 ierr = PetscLogFlops(12*a->nz - 6*nonzerorow);CHKERRQ(ierr); 851 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 852 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 853 PetscFunctionReturn(0); 854 } 855 856 #undef __FUNCT__ 857 #define __FUNCT__ "MatMultTranspose_SeqMAIJ_6" 858 PetscErrorCode MatMultTranspose_SeqMAIJ_6(Mat A,Vec xx,Vec yy) 859 { 860 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 861 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 862 PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,zero = 0.0; 863 PetscErrorCode ierr; 864 PetscInt m = b->AIJ->rmap->n,n,i,*idx; 865 866 PetscFunctionBegin; 867 ierr = VecSet(yy,zero);CHKERRQ(ierr); 868 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 869 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 870 871 for (i=0; i<m; i++) { 872 idx = a->j + a->i[i] ; 873 v = a->a + a->i[i] ; 874 n = a->i[i+1] - a->i[i]; 875 alpha1 = x[6*i]; 876 alpha2 = x[6*i+1]; 877 alpha3 = x[6*i+2]; 878 alpha4 = x[6*i+3]; 879 alpha5 = x[6*i+4]; 880 alpha6 = x[6*i+5]; 881 while (n-->0) { 882 y[6*(*idx)] += alpha1*(*v); 883 y[6*(*idx)+1] += alpha2*(*v); 884 y[6*(*idx)+2] += alpha3*(*v); 885 y[6*(*idx)+3] += alpha4*(*v); 886 y[6*(*idx)+4] += alpha5*(*v); 887 y[6*(*idx)+5] += alpha6*(*v); 888 idx++; v++; 889 } 890 } 891 ierr = PetscLogFlops(12*a->nz);CHKERRQ(ierr); 892 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 893 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 894 PetscFunctionReturn(0); 895 } 896 897 #undef __FUNCT__ 898 #define __FUNCT__ "MatMultAdd_SeqMAIJ_6" 899 PetscErrorCode MatMultAdd_SeqMAIJ_6(Mat A,Vec xx,Vec yy,Vec zz) 900 { 901 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 902 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 903 PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6; 904 PetscErrorCode ierr; 905 PetscInt m = b->AIJ->rmap->n,*idx,*ii; 906 PetscInt n,i,jrow,j; 907 908 PetscFunctionBegin; 909 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 910 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 911 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 912 idx = a->j; 913 v = a->a; 914 ii = a->i; 915 916 for (i=0; i<m; i++) { 917 jrow = ii[i]; 918 n = ii[i+1] - jrow; 919 sum1 = 0.0; 920 sum2 = 0.0; 921 sum3 = 0.0; 922 sum4 = 0.0; 923 sum5 = 0.0; 924 sum6 = 0.0; 925 for (j=0; j<n; j++) { 926 sum1 += v[jrow]*x[6*idx[jrow]]; 927 sum2 += v[jrow]*x[6*idx[jrow]+1]; 928 sum3 += v[jrow]*x[6*idx[jrow]+2]; 929 sum4 += v[jrow]*x[6*idx[jrow]+3]; 930 sum5 += v[jrow]*x[6*idx[jrow]+4]; 931 sum6 += v[jrow]*x[6*idx[jrow]+5]; 932 jrow++; 933 } 934 y[6*i] += sum1; 935 y[6*i+1] += sum2; 936 y[6*i+2] += sum3; 937 y[6*i+3] += sum4; 938 y[6*i+4] += sum5; 939 y[6*i+5] += sum6; 940 } 941 942 ierr = PetscLogFlops(12*a->nz);CHKERRQ(ierr); 943 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 944 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 945 PetscFunctionReturn(0); 946 } 947 948 #undef __FUNCT__ 949 #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_6" 950 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_6(Mat A,Vec xx,Vec yy,Vec zz) 951 { 952 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 953 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 954 PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6; 955 PetscErrorCode ierr; 956 PetscInt m = b->AIJ->rmap->n,n,i,*idx; 957 958 PetscFunctionBegin; 959 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 960 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 961 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 962 963 for (i=0; i<m; i++) { 964 idx = a->j + a->i[i] ; 965 v = a->a + a->i[i] ; 966 n = a->i[i+1] - a->i[i]; 967 alpha1 = x[6*i]; 968 alpha2 = x[6*i+1]; 969 alpha3 = x[6*i+2]; 970 alpha4 = x[6*i+3]; 971 alpha5 = x[6*i+4]; 972 alpha6 = x[6*i+5]; 973 while (n-->0) { 974 y[6*(*idx)] += alpha1*(*v); 975 y[6*(*idx)+1] += alpha2*(*v); 976 y[6*(*idx)+2] += alpha3*(*v); 977 y[6*(*idx)+3] += alpha4*(*v); 978 y[6*(*idx)+4] += alpha5*(*v); 979 y[6*(*idx)+5] += alpha6*(*v); 980 idx++; v++; 981 } 982 } 983 ierr = PetscLogFlops(12*a->nz);CHKERRQ(ierr); 984 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 985 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 986 PetscFunctionReturn(0); 987 } 988 989 /* ------------------------------------------------------------------------------*/ 990 #undef __FUNCT__ 991 #define __FUNCT__ "MatMult_SeqMAIJ_7" 992 PetscErrorCode MatMult_SeqMAIJ_7(Mat A,Vec xx,Vec yy) 993 { 994 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 995 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 996 PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7; 997 PetscErrorCode ierr; 998 PetscInt m = b->AIJ->rmap->n,nonzerorow=0,*idx,*ii; 999 PetscInt n,i,jrow,j; 1000 1001 PetscFunctionBegin; 1002 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1003 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 1004 idx = a->j; 1005 v = a->a; 1006 ii = a->i; 1007 1008 for (i=0; i<m; i++) { 1009 jrow = ii[i]; 1010 n = ii[i+1] - jrow; 1011 sum1 = 0.0; 1012 sum2 = 0.0; 1013 sum3 = 0.0; 1014 sum4 = 0.0; 1015 sum5 = 0.0; 1016 sum6 = 0.0; 1017 sum7 = 0.0; 1018 nonzerorow += (n>0); 1019 for (j=0; j<n; j++) { 1020 sum1 += v[jrow]*x[7*idx[jrow]]; 1021 sum2 += v[jrow]*x[7*idx[jrow]+1]; 1022 sum3 += v[jrow]*x[7*idx[jrow]+2]; 1023 sum4 += v[jrow]*x[7*idx[jrow]+3]; 1024 sum5 += v[jrow]*x[7*idx[jrow]+4]; 1025 sum6 += v[jrow]*x[7*idx[jrow]+5]; 1026 sum7 += v[jrow]*x[7*idx[jrow]+6]; 1027 jrow++; 1028 } 1029 y[7*i] = sum1; 1030 y[7*i+1] = sum2; 1031 y[7*i+2] = sum3; 1032 y[7*i+3] = sum4; 1033 y[7*i+4] = sum5; 1034 y[7*i+5] = sum6; 1035 y[7*i+6] = sum7; 1036 } 1037 1038 ierr = PetscLogFlops(14*a->nz - 7*nonzerorow);CHKERRQ(ierr); 1039 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1040 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 1041 PetscFunctionReturn(0); 1042 } 1043 1044 #undef __FUNCT__ 1045 #define __FUNCT__ "MatMultTranspose_SeqMAIJ_7" 1046 PetscErrorCode MatMultTranspose_SeqMAIJ_7(Mat A,Vec xx,Vec yy) 1047 { 1048 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1049 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1050 PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,zero = 0.0; 1051 PetscErrorCode ierr; 1052 PetscInt m = b->AIJ->rmap->n,n,i,*idx; 1053 1054 PetscFunctionBegin; 1055 ierr = VecSet(yy,zero);CHKERRQ(ierr); 1056 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1057 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 1058 1059 for (i=0; i<m; i++) { 1060 idx = a->j + a->i[i] ; 1061 v = a->a + a->i[i] ; 1062 n = a->i[i+1] - a->i[i]; 1063 alpha1 = x[7*i]; 1064 alpha2 = x[7*i+1]; 1065 alpha3 = x[7*i+2]; 1066 alpha4 = x[7*i+3]; 1067 alpha5 = x[7*i+4]; 1068 alpha6 = x[7*i+5]; 1069 alpha7 = x[7*i+6]; 1070 while (n-->0) { 1071 y[7*(*idx)] += alpha1*(*v); 1072 y[7*(*idx)+1] += alpha2*(*v); 1073 y[7*(*idx)+2] += alpha3*(*v); 1074 y[7*(*idx)+3] += alpha4*(*v); 1075 y[7*(*idx)+4] += alpha5*(*v); 1076 y[7*(*idx)+5] += alpha6*(*v); 1077 y[7*(*idx)+6] += alpha7*(*v); 1078 idx++; v++; 1079 } 1080 } 1081 ierr = PetscLogFlops(14*a->nz);CHKERRQ(ierr); 1082 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1083 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 1084 PetscFunctionReturn(0); 1085 } 1086 1087 #undef __FUNCT__ 1088 #define __FUNCT__ "MatMultAdd_SeqMAIJ_7" 1089 PetscErrorCode MatMultAdd_SeqMAIJ_7(Mat A,Vec xx,Vec yy,Vec zz) 1090 { 1091 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1092 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1093 PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7; 1094 PetscErrorCode ierr; 1095 PetscInt m = b->AIJ->rmap->n,*idx,*ii; 1096 PetscInt n,i,jrow,j; 1097 1098 PetscFunctionBegin; 1099 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 1100 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1101 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 1102 idx = a->j; 1103 v = a->a; 1104 ii = a->i; 1105 1106 for (i=0; i<m; i++) { 1107 jrow = ii[i]; 1108 n = ii[i+1] - jrow; 1109 sum1 = 0.0; 1110 sum2 = 0.0; 1111 sum3 = 0.0; 1112 sum4 = 0.0; 1113 sum5 = 0.0; 1114 sum6 = 0.0; 1115 sum7 = 0.0; 1116 for (j=0; j<n; j++) { 1117 sum1 += v[jrow]*x[7*idx[jrow]]; 1118 sum2 += v[jrow]*x[7*idx[jrow]+1]; 1119 sum3 += v[jrow]*x[7*idx[jrow]+2]; 1120 sum4 += v[jrow]*x[7*idx[jrow]+3]; 1121 sum5 += v[jrow]*x[7*idx[jrow]+4]; 1122 sum6 += v[jrow]*x[7*idx[jrow]+5]; 1123 sum7 += v[jrow]*x[7*idx[jrow]+6]; 1124 jrow++; 1125 } 1126 y[7*i] += sum1; 1127 y[7*i+1] += sum2; 1128 y[7*i+2] += sum3; 1129 y[7*i+3] += sum4; 1130 y[7*i+4] += sum5; 1131 y[7*i+5] += sum6; 1132 y[7*i+6] += sum7; 1133 } 1134 1135 ierr = PetscLogFlops(14*a->nz);CHKERRQ(ierr); 1136 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1137 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 1138 PetscFunctionReturn(0); 1139 } 1140 1141 #undef __FUNCT__ 1142 #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_7" 1143 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_7(Mat A,Vec xx,Vec yy,Vec zz) 1144 { 1145 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1146 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1147 PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7; 1148 PetscErrorCode ierr; 1149 PetscInt m = b->AIJ->rmap->n,n,i,*idx; 1150 1151 PetscFunctionBegin; 1152 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 1153 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1154 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 1155 for (i=0; i<m; i++) { 1156 idx = a->j + a->i[i] ; 1157 v = a->a + a->i[i] ; 1158 n = a->i[i+1] - a->i[i]; 1159 alpha1 = x[7*i]; 1160 alpha2 = x[7*i+1]; 1161 alpha3 = x[7*i+2]; 1162 alpha4 = x[7*i+3]; 1163 alpha5 = x[7*i+4]; 1164 alpha6 = x[7*i+5]; 1165 alpha7 = x[7*i+6]; 1166 while (n-->0) { 1167 y[7*(*idx)] += alpha1*(*v); 1168 y[7*(*idx)+1] += alpha2*(*v); 1169 y[7*(*idx)+2] += alpha3*(*v); 1170 y[7*(*idx)+3] += alpha4*(*v); 1171 y[7*(*idx)+4] += alpha5*(*v); 1172 y[7*(*idx)+5] += alpha6*(*v); 1173 y[7*(*idx)+6] += alpha7*(*v); 1174 idx++; v++; 1175 } 1176 } 1177 ierr = PetscLogFlops(14*a->nz);CHKERRQ(ierr); 1178 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1179 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 1180 PetscFunctionReturn(0); 1181 } 1182 1183 #undef __FUNCT__ 1184 #define __FUNCT__ "MatMult_SeqMAIJ_8" 1185 PetscErrorCode MatMult_SeqMAIJ_8(Mat A,Vec xx,Vec yy) 1186 { 1187 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1188 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1189 PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8; 1190 PetscErrorCode ierr; 1191 PetscInt m = b->AIJ->rmap->n,nonzerorow=0,*idx,*ii; 1192 PetscInt n,i,jrow,j; 1193 1194 PetscFunctionBegin; 1195 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1196 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 1197 idx = a->j; 1198 v = a->a; 1199 ii = a->i; 1200 1201 for (i=0; i<m; i++) { 1202 jrow = ii[i]; 1203 n = ii[i+1] - jrow; 1204 sum1 = 0.0; 1205 sum2 = 0.0; 1206 sum3 = 0.0; 1207 sum4 = 0.0; 1208 sum5 = 0.0; 1209 sum6 = 0.0; 1210 sum7 = 0.0; 1211 sum8 = 0.0; 1212 nonzerorow += (n>0); 1213 for (j=0; j<n; j++) { 1214 sum1 += v[jrow]*x[8*idx[jrow]]; 1215 sum2 += v[jrow]*x[8*idx[jrow]+1]; 1216 sum3 += v[jrow]*x[8*idx[jrow]+2]; 1217 sum4 += v[jrow]*x[8*idx[jrow]+3]; 1218 sum5 += v[jrow]*x[8*idx[jrow]+4]; 1219 sum6 += v[jrow]*x[8*idx[jrow]+5]; 1220 sum7 += v[jrow]*x[8*idx[jrow]+6]; 1221 sum8 += v[jrow]*x[8*idx[jrow]+7]; 1222 jrow++; 1223 } 1224 y[8*i] = sum1; 1225 y[8*i+1] = sum2; 1226 y[8*i+2] = sum3; 1227 y[8*i+3] = sum4; 1228 y[8*i+4] = sum5; 1229 y[8*i+5] = sum6; 1230 y[8*i+6] = sum7; 1231 y[8*i+7] = sum8; 1232 } 1233 1234 ierr = PetscLogFlops(16*a->nz - 8*nonzerorow);CHKERRQ(ierr); 1235 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1236 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 1237 PetscFunctionReturn(0); 1238 } 1239 1240 #undef __FUNCT__ 1241 #define __FUNCT__ "MatMultTranspose_SeqMAIJ_8" 1242 PetscErrorCode MatMultTranspose_SeqMAIJ_8(Mat A,Vec xx,Vec yy) 1243 { 1244 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1245 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1246 PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,zero = 0.0; 1247 PetscErrorCode ierr; 1248 PetscInt m = b->AIJ->rmap->n,n,i,*idx; 1249 1250 PetscFunctionBegin; 1251 ierr = VecSet(yy,zero);CHKERRQ(ierr); 1252 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1253 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 1254 1255 for (i=0; i<m; i++) { 1256 idx = a->j + a->i[i] ; 1257 v = a->a + a->i[i] ; 1258 n = a->i[i+1] - a->i[i]; 1259 alpha1 = x[8*i]; 1260 alpha2 = x[8*i+1]; 1261 alpha3 = x[8*i+2]; 1262 alpha4 = x[8*i+3]; 1263 alpha5 = x[8*i+4]; 1264 alpha6 = x[8*i+5]; 1265 alpha7 = x[8*i+6]; 1266 alpha8 = x[8*i+7]; 1267 while (n-->0) { 1268 y[8*(*idx)] += alpha1*(*v); 1269 y[8*(*idx)+1] += alpha2*(*v); 1270 y[8*(*idx)+2] += alpha3*(*v); 1271 y[8*(*idx)+3] += alpha4*(*v); 1272 y[8*(*idx)+4] += alpha5*(*v); 1273 y[8*(*idx)+5] += alpha6*(*v); 1274 y[8*(*idx)+6] += alpha7*(*v); 1275 y[8*(*idx)+7] += alpha8*(*v); 1276 idx++; v++; 1277 } 1278 } 1279 ierr = PetscLogFlops(16*a->nz);CHKERRQ(ierr); 1280 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1281 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 1282 PetscFunctionReturn(0); 1283 } 1284 1285 #undef __FUNCT__ 1286 #define __FUNCT__ "MatMultAdd_SeqMAIJ_8" 1287 PetscErrorCode MatMultAdd_SeqMAIJ_8(Mat A,Vec xx,Vec yy,Vec zz) 1288 { 1289 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1290 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1291 PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8; 1292 PetscErrorCode ierr; 1293 PetscInt m = b->AIJ->rmap->n,*idx,*ii; 1294 PetscInt n,i,jrow,j; 1295 1296 PetscFunctionBegin; 1297 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 1298 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1299 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 1300 idx = a->j; 1301 v = a->a; 1302 ii = a->i; 1303 1304 for (i=0; i<m; i++) { 1305 jrow = ii[i]; 1306 n = ii[i+1] - jrow; 1307 sum1 = 0.0; 1308 sum2 = 0.0; 1309 sum3 = 0.0; 1310 sum4 = 0.0; 1311 sum5 = 0.0; 1312 sum6 = 0.0; 1313 sum7 = 0.0; 1314 sum8 = 0.0; 1315 for (j=0; j<n; j++) { 1316 sum1 += v[jrow]*x[8*idx[jrow]]; 1317 sum2 += v[jrow]*x[8*idx[jrow]+1]; 1318 sum3 += v[jrow]*x[8*idx[jrow]+2]; 1319 sum4 += v[jrow]*x[8*idx[jrow]+3]; 1320 sum5 += v[jrow]*x[8*idx[jrow]+4]; 1321 sum6 += v[jrow]*x[8*idx[jrow]+5]; 1322 sum7 += v[jrow]*x[8*idx[jrow]+6]; 1323 sum8 += v[jrow]*x[8*idx[jrow]+7]; 1324 jrow++; 1325 } 1326 y[8*i] += sum1; 1327 y[8*i+1] += sum2; 1328 y[8*i+2] += sum3; 1329 y[8*i+3] += sum4; 1330 y[8*i+4] += sum5; 1331 y[8*i+5] += sum6; 1332 y[8*i+6] += sum7; 1333 y[8*i+7] += sum8; 1334 } 1335 1336 ierr = PetscLogFlops(16*a->nz);CHKERRQ(ierr); 1337 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1338 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 1339 PetscFunctionReturn(0); 1340 } 1341 1342 #undef __FUNCT__ 1343 #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_8" 1344 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_8(Mat A,Vec xx,Vec yy,Vec zz) 1345 { 1346 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1347 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1348 PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8; 1349 PetscErrorCode ierr; 1350 PetscInt m = b->AIJ->rmap->n,n,i,*idx; 1351 1352 PetscFunctionBegin; 1353 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 1354 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1355 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 1356 for (i=0; i<m; i++) { 1357 idx = a->j + a->i[i] ; 1358 v = a->a + a->i[i] ; 1359 n = a->i[i+1] - a->i[i]; 1360 alpha1 = x[8*i]; 1361 alpha2 = x[8*i+1]; 1362 alpha3 = x[8*i+2]; 1363 alpha4 = x[8*i+3]; 1364 alpha5 = x[8*i+4]; 1365 alpha6 = x[8*i+5]; 1366 alpha7 = x[8*i+6]; 1367 alpha8 = x[8*i+7]; 1368 while (n-->0) { 1369 y[8*(*idx)] += alpha1*(*v); 1370 y[8*(*idx)+1] += alpha2*(*v); 1371 y[8*(*idx)+2] += alpha3*(*v); 1372 y[8*(*idx)+3] += alpha4*(*v); 1373 y[8*(*idx)+4] += alpha5*(*v); 1374 y[8*(*idx)+5] += alpha6*(*v); 1375 y[8*(*idx)+6] += alpha7*(*v); 1376 y[8*(*idx)+7] += alpha8*(*v); 1377 idx++; v++; 1378 } 1379 } 1380 ierr = PetscLogFlops(16*a->nz);CHKERRQ(ierr); 1381 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1382 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 1383 PetscFunctionReturn(0); 1384 } 1385 1386 /* ------------------------------------------------------------------------------*/ 1387 #undef __FUNCT__ 1388 #define __FUNCT__ "MatMult_SeqMAIJ_9" 1389 PetscErrorCode MatMult_SeqMAIJ_9(Mat A,Vec xx,Vec yy) 1390 { 1391 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1392 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1393 PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9; 1394 PetscErrorCode ierr; 1395 PetscInt m = b->AIJ->rmap->n,nonzerorow=0,*idx,*ii; 1396 PetscInt n,i,jrow,j; 1397 1398 PetscFunctionBegin; 1399 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1400 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 1401 idx = a->j; 1402 v = a->a; 1403 ii = a->i; 1404 1405 for (i=0; i<m; i++) { 1406 jrow = ii[i]; 1407 n = ii[i+1] - jrow; 1408 sum1 = 0.0; 1409 sum2 = 0.0; 1410 sum3 = 0.0; 1411 sum4 = 0.0; 1412 sum5 = 0.0; 1413 sum6 = 0.0; 1414 sum7 = 0.0; 1415 sum8 = 0.0; 1416 sum9 = 0.0; 1417 nonzerorow += (n>0); 1418 for (j=0; j<n; j++) { 1419 sum1 += v[jrow]*x[9*idx[jrow]]; 1420 sum2 += v[jrow]*x[9*idx[jrow]+1]; 1421 sum3 += v[jrow]*x[9*idx[jrow]+2]; 1422 sum4 += v[jrow]*x[9*idx[jrow]+3]; 1423 sum5 += v[jrow]*x[9*idx[jrow]+4]; 1424 sum6 += v[jrow]*x[9*idx[jrow]+5]; 1425 sum7 += v[jrow]*x[9*idx[jrow]+6]; 1426 sum8 += v[jrow]*x[9*idx[jrow]+7]; 1427 sum9 += v[jrow]*x[9*idx[jrow]+8]; 1428 jrow++; 1429 } 1430 y[9*i] = sum1; 1431 y[9*i+1] = sum2; 1432 y[9*i+2] = sum3; 1433 y[9*i+3] = sum4; 1434 y[9*i+4] = sum5; 1435 y[9*i+5] = sum6; 1436 y[9*i+6] = sum7; 1437 y[9*i+7] = sum8; 1438 y[9*i+8] = sum9; 1439 } 1440 1441 ierr = PetscLogFlops(18*a->nz - 9*nonzerorow);CHKERRQ(ierr); 1442 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1443 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 1444 PetscFunctionReturn(0); 1445 } 1446 1447 /* ------------------------------------------------------------------------------*/ 1448 1449 #undef __FUNCT__ 1450 #define __FUNCT__ "MatMultTranspose_SeqMAIJ_9" 1451 PetscErrorCode MatMultTranspose_SeqMAIJ_9(Mat A,Vec xx,Vec yy) 1452 { 1453 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1454 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1455 PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9,zero = 0.0; 1456 PetscErrorCode ierr; 1457 PetscInt m = b->AIJ->rmap->n,n,i,*idx; 1458 1459 PetscFunctionBegin; 1460 ierr = VecSet(yy,zero);CHKERRQ(ierr); 1461 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1462 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 1463 1464 for (i=0; i<m; i++) { 1465 idx = a->j + a->i[i] ; 1466 v = a->a + a->i[i] ; 1467 n = a->i[i+1] - a->i[i]; 1468 alpha1 = x[9*i]; 1469 alpha2 = x[9*i+1]; 1470 alpha3 = x[9*i+2]; 1471 alpha4 = x[9*i+3]; 1472 alpha5 = x[9*i+4]; 1473 alpha6 = x[9*i+5]; 1474 alpha7 = x[9*i+6]; 1475 alpha8 = x[9*i+7]; 1476 alpha9 = x[9*i+8]; 1477 while (n-->0) { 1478 y[9*(*idx)] += alpha1*(*v); 1479 y[9*(*idx)+1] += alpha2*(*v); 1480 y[9*(*idx)+2] += alpha3*(*v); 1481 y[9*(*idx)+3] += alpha4*(*v); 1482 y[9*(*idx)+4] += alpha5*(*v); 1483 y[9*(*idx)+5] += alpha6*(*v); 1484 y[9*(*idx)+6] += alpha7*(*v); 1485 y[9*(*idx)+7] += alpha8*(*v); 1486 y[9*(*idx)+8] += alpha9*(*v); 1487 idx++; v++; 1488 } 1489 } 1490 ierr = PetscLogFlops(18*a->nz);CHKERRQ(ierr); 1491 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1492 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 1493 PetscFunctionReturn(0); 1494 } 1495 1496 #undef __FUNCT__ 1497 #define __FUNCT__ "MatMultAdd_SeqMAIJ_9" 1498 PetscErrorCode MatMultAdd_SeqMAIJ_9(Mat A,Vec xx,Vec yy,Vec zz) 1499 { 1500 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1501 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1502 PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9; 1503 PetscErrorCode ierr; 1504 PetscInt m = b->AIJ->rmap->n,*idx,*ii; 1505 PetscInt n,i,jrow,j; 1506 1507 PetscFunctionBegin; 1508 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 1509 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1510 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 1511 idx = a->j; 1512 v = a->a; 1513 ii = a->i; 1514 1515 for (i=0; i<m; i++) { 1516 jrow = ii[i]; 1517 n = ii[i+1] - jrow; 1518 sum1 = 0.0; 1519 sum2 = 0.0; 1520 sum3 = 0.0; 1521 sum4 = 0.0; 1522 sum5 = 0.0; 1523 sum6 = 0.0; 1524 sum7 = 0.0; 1525 sum8 = 0.0; 1526 sum9 = 0.0; 1527 for (j=0; j<n; j++) { 1528 sum1 += v[jrow]*x[9*idx[jrow]]; 1529 sum2 += v[jrow]*x[9*idx[jrow]+1]; 1530 sum3 += v[jrow]*x[9*idx[jrow]+2]; 1531 sum4 += v[jrow]*x[9*idx[jrow]+3]; 1532 sum5 += v[jrow]*x[9*idx[jrow]+4]; 1533 sum6 += v[jrow]*x[9*idx[jrow]+5]; 1534 sum7 += v[jrow]*x[9*idx[jrow]+6]; 1535 sum8 += v[jrow]*x[9*idx[jrow]+7]; 1536 sum9 += v[jrow]*x[9*idx[jrow]+8]; 1537 jrow++; 1538 } 1539 y[9*i] += sum1; 1540 y[9*i+1] += sum2; 1541 y[9*i+2] += sum3; 1542 y[9*i+3] += sum4; 1543 y[9*i+4] += sum5; 1544 y[9*i+5] += sum6; 1545 y[9*i+6] += sum7; 1546 y[9*i+7] += sum8; 1547 y[9*i+8] += sum9; 1548 } 1549 1550 ierr = PetscLogFlops(18*a->nz);CHKERRQ(ierr); 1551 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1552 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 1553 PetscFunctionReturn(0); 1554 } 1555 1556 #undef __FUNCT__ 1557 #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_9" 1558 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_9(Mat A,Vec xx,Vec yy,Vec zz) 1559 { 1560 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1561 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1562 PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9; 1563 PetscErrorCode ierr; 1564 PetscInt m = b->AIJ->rmap->n,n,i,*idx; 1565 1566 PetscFunctionBegin; 1567 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 1568 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1569 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 1570 for (i=0; i<m; i++) { 1571 idx = a->j + a->i[i] ; 1572 v = a->a + a->i[i] ; 1573 n = a->i[i+1] - a->i[i]; 1574 alpha1 = x[9*i]; 1575 alpha2 = x[9*i+1]; 1576 alpha3 = x[9*i+2]; 1577 alpha4 = x[9*i+3]; 1578 alpha5 = x[9*i+4]; 1579 alpha6 = x[9*i+5]; 1580 alpha7 = x[9*i+6]; 1581 alpha8 = x[9*i+7]; 1582 alpha9 = x[9*i+8]; 1583 while (n-->0) { 1584 y[9*(*idx)] += alpha1*(*v); 1585 y[9*(*idx)+1] += alpha2*(*v); 1586 y[9*(*idx)+2] += alpha3*(*v); 1587 y[9*(*idx)+3] += alpha4*(*v); 1588 y[9*(*idx)+4] += alpha5*(*v); 1589 y[9*(*idx)+5] += alpha6*(*v); 1590 y[9*(*idx)+6] += alpha7*(*v); 1591 y[9*(*idx)+7] += alpha8*(*v); 1592 y[9*(*idx)+8] += alpha9*(*v); 1593 idx++; v++; 1594 } 1595 } 1596 ierr = PetscLogFlops(18*a->nz);CHKERRQ(ierr); 1597 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1598 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 1599 PetscFunctionReturn(0); 1600 } 1601 #undef __FUNCT__ 1602 #define __FUNCT__ "MatMult_SeqMAIJ_10" 1603 PetscErrorCode MatMult_SeqMAIJ_10(Mat A,Vec xx,Vec yy) 1604 { 1605 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1606 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1607 PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9, sum10; 1608 PetscErrorCode ierr; 1609 PetscInt m = b->AIJ->rmap->n,nonzerorow=0,*idx,*ii; 1610 PetscInt n,i,jrow,j; 1611 1612 PetscFunctionBegin; 1613 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1614 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 1615 idx = a->j; 1616 v = a->a; 1617 ii = a->i; 1618 1619 for (i=0; i<m; i++) { 1620 jrow = ii[i]; 1621 n = ii[i+1] - jrow; 1622 sum1 = 0.0; 1623 sum2 = 0.0; 1624 sum3 = 0.0; 1625 sum4 = 0.0; 1626 sum5 = 0.0; 1627 sum6 = 0.0; 1628 sum7 = 0.0; 1629 sum8 = 0.0; 1630 sum9 = 0.0; 1631 sum10 = 0.0; 1632 nonzerorow += (n>0); 1633 for (j=0; j<n; j++) { 1634 sum1 += v[jrow]*x[10*idx[jrow]]; 1635 sum2 += v[jrow]*x[10*idx[jrow]+1]; 1636 sum3 += v[jrow]*x[10*idx[jrow]+2]; 1637 sum4 += v[jrow]*x[10*idx[jrow]+3]; 1638 sum5 += v[jrow]*x[10*idx[jrow]+4]; 1639 sum6 += v[jrow]*x[10*idx[jrow]+5]; 1640 sum7 += v[jrow]*x[10*idx[jrow]+6]; 1641 sum8 += v[jrow]*x[10*idx[jrow]+7]; 1642 sum9 += v[jrow]*x[10*idx[jrow]+8]; 1643 sum10 += v[jrow]*x[10*idx[jrow]+9]; 1644 jrow++; 1645 } 1646 y[10*i] = sum1; 1647 y[10*i+1] = sum2; 1648 y[10*i+2] = sum3; 1649 y[10*i+3] = sum4; 1650 y[10*i+4] = sum5; 1651 y[10*i+5] = sum6; 1652 y[10*i+6] = sum7; 1653 y[10*i+7] = sum8; 1654 y[10*i+8] = sum9; 1655 y[10*i+9] = sum10; 1656 } 1657 1658 ierr = PetscLogFlops(20*a->nz - 10*nonzerorow);CHKERRQ(ierr); 1659 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1660 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 1661 PetscFunctionReturn(0); 1662 } 1663 1664 #undef __FUNCT__ 1665 #define __FUNCT__ "MatMultAdd_SeqMAIJ_10" 1666 PetscErrorCode MatMultAdd_SeqMAIJ_10(Mat A,Vec xx,Vec yy,Vec zz) 1667 { 1668 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1669 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1670 PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9, sum10; 1671 PetscErrorCode ierr; 1672 PetscInt m = b->AIJ->rmap->n,*idx,*ii; 1673 PetscInt n,i,jrow,j; 1674 1675 PetscFunctionBegin; 1676 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 1677 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1678 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 1679 idx = a->j; 1680 v = a->a; 1681 ii = a->i; 1682 1683 for (i=0; i<m; i++) { 1684 jrow = ii[i]; 1685 n = ii[i+1] - jrow; 1686 sum1 = 0.0; 1687 sum2 = 0.0; 1688 sum3 = 0.0; 1689 sum4 = 0.0; 1690 sum5 = 0.0; 1691 sum6 = 0.0; 1692 sum7 = 0.0; 1693 sum8 = 0.0; 1694 sum9 = 0.0; 1695 sum10 = 0.0; 1696 for (j=0; j<n; j++) { 1697 sum1 += v[jrow]*x[10*idx[jrow]]; 1698 sum2 += v[jrow]*x[10*idx[jrow]+1]; 1699 sum3 += v[jrow]*x[10*idx[jrow]+2]; 1700 sum4 += v[jrow]*x[10*idx[jrow]+3]; 1701 sum5 += v[jrow]*x[10*idx[jrow]+4]; 1702 sum6 += v[jrow]*x[10*idx[jrow]+5]; 1703 sum7 += v[jrow]*x[10*idx[jrow]+6]; 1704 sum8 += v[jrow]*x[10*idx[jrow]+7]; 1705 sum9 += v[jrow]*x[10*idx[jrow]+8]; 1706 sum10 += v[jrow]*x[10*idx[jrow]+9]; 1707 jrow++; 1708 } 1709 y[10*i] += sum1; 1710 y[10*i+1] += sum2; 1711 y[10*i+2] += sum3; 1712 y[10*i+3] += sum4; 1713 y[10*i+4] += sum5; 1714 y[10*i+5] += sum6; 1715 y[10*i+6] += sum7; 1716 y[10*i+7] += sum8; 1717 y[10*i+8] += sum9; 1718 y[10*i+9] += sum10; 1719 } 1720 1721 ierr = PetscLogFlops(20*a->nz);CHKERRQ(ierr); 1722 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1723 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 1724 PetscFunctionReturn(0); 1725 } 1726 1727 #undef __FUNCT__ 1728 #define __FUNCT__ "MatMultTranspose_SeqMAIJ_10" 1729 PetscErrorCode MatMultTranspose_SeqMAIJ_10(Mat A,Vec xx,Vec yy) 1730 { 1731 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1732 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1733 PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9,alpha10,zero = 0.0; 1734 PetscErrorCode ierr; 1735 PetscInt m = b->AIJ->rmap->n,n,i,*idx; 1736 1737 PetscFunctionBegin; 1738 ierr = VecSet(yy,zero);CHKERRQ(ierr); 1739 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1740 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 1741 1742 for (i=0; i<m; i++) { 1743 idx = a->j + a->i[i] ; 1744 v = a->a + a->i[i] ; 1745 n = a->i[i+1] - a->i[i]; 1746 alpha1 = x[10*i]; 1747 alpha2 = x[10*i+1]; 1748 alpha3 = x[10*i+2]; 1749 alpha4 = x[10*i+3]; 1750 alpha5 = x[10*i+4]; 1751 alpha6 = x[10*i+5]; 1752 alpha7 = x[10*i+6]; 1753 alpha8 = x[10*i+7]; 1754 alpha9 = x[10*i+8]; 1755 alpha10 = x[10*i+9]; 1756 while (n-->0) { 1757 y[10*(*idx)] += alpha1*(*v); 1758 y[10*(*idx)+1] += alpha2*(*v); 1759 y[10*(*idx)+2] += alpha3*(*v); 1760 y[10*(*idx)+3] += alpha4*(*v); 1761 y[10*(*idx)+4] += alpha5*(*v); 1762 y[10*(*idx)+5] += alpha6*(*v); 1763 y[10*(*idx)+6] += alpha7*(*v); 1764 y[10*(*idx)+7] += alpha8*(*v); 1765 y[10*(*idx)+8] += alpha9*(*v); 1766 y[10*(*idx)+9] += alpha10*(*v); 1767 idx++; v++; 1768 } 1769 } 1770 ierr = PetscLogFlops(20*a->nz);CHKERRQ(ierr); 1771 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1772 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 1773 PetscFunctionReturn(0); 1774 } 1775 1776 #undef __FUNCT__ 1777 #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_10" 1778 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_10(Mat A,Vec xx,Vec yy,Vec zz) 1779 { 1780 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1781 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1782 PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9,alpha10; 1783 PetscErrorCode ierr; 1784 PetscInt m = b->AIJ->rmap->n,n,i,*idx; 1785 1786 PetscFunctionBegin; 1787 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 1788 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1789 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 1790 for (i=0; i<m; i++) { 1791 idx = a->j + a->i[i] ; 1792 v = a->a + a->i[i] ; 1793 n = a->i[i+1] - a->i[i]; 1794 alpha1 = x[10*i]; 1795 alpha2 = x[10*i+1]; 1796 alpha3 = x[10*i+2]; 1797 alpha4 = x[10*i+3]; 1798 alpha5 = x[10*i+4]; 1799 alpha6 = x[10*i+5]; 1800 alpha7 = x[10*i+6]; 1801 alpha8 = x[10*i+7]; 1802 alpha9 = x[10*i+8]; 1803 alpha10 = x[10*i+9]; 1804 while (n-->0) { 1805 y[10*(*idx)] += alpha1*(*v); 1806 y[10*(*idx)+1] += alpha2*(*v); 1807 y[10*(*idx)+2] += alpha3*(*v); 1808 y[10*(*idx)+3] += alpha4*(*v); 1809 y[10*(*idx)+4] += alpha5*(*v); 1810 y[10*(*idx)+5] += alpha6*(*v); 1811 y[10*(*idx)+6] += alpha7*(*v); 1812 y[10*(*idx)+7] += alpha8*(*v); 1813 y[10*(*idx)+8] += alpha9*(*v); 1814 y[10*(*idx)+9] += alpha10*(*v); 1815 idx++; v++; 1816 } 1817 } 1818 ierr = PetscLogFlops(20*a->nz);CHKERRQ(ierr); 1819 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1820 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 1821 PetscFunctionReturn(0); 1822 } 1823 1824 1825 /*--------------------------------------------------------------------------------------------*/ 1826 #undef __FUNCT__ 1827 #define __FUNCT__ "MatMult_SeqMAIJ_11" 1828 PetscErrorCode MatMult_SeqMAIJ_11(Mat A,Vec xx,Vec yy) 1829 { 1830 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1831 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1832 PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9, sum10, sum11; 1833 PetscErrorCode ierr; 1834 PetscInt m = b->AIJ->rmap->n,nonzerorow=0,*idx,*ii; 1835 PetscInt n,i,jrow,j; 1836 1837 PetscFunctionBegin; 1838 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1839 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 1840 idx = a->j; 1841 v = a->a; 1842 ii = a->i; 1843 1844 for (i=0; i<m; i++) { 1845 jrow = ii[i]; 1846 n = ii[i+1] - jrow; 1847 sum1 = 0.0; 1848 sum2 = 0.0; 1849 sum3 = 0.0; 1850 sum4 = 0.0; 1851 sum5 = 0.0; 1852 sum6 = 0.0; 1853 sum7 = 0.0; 1854 sum8 = 0.0; 1855 sum9 = 0.0; 1856 sum10 = 0.0; 1857 sum11 = 0.0; 1858 nonzerorow += (n>0); 1859 for (j=0; j<n; j++) { 1860 sum1 += v[jrow]*x[11*idx[jrow]]; 1861 sum2 += v[jrow]*x[11*idx[jrow]+1]; 1862 sum3 += v[jrow]*x[11*idx[jrow]+2]; 1863 sum4 += v[jrow]*x[11*idx[jrow]+3]; 1864 sum5 += v[jrow]*x[11*idx[jrow]+4]; 1865 sum6 += v[jrow]*x[11*idx[jrow]+5]; 1866 sum7 += v[jrow]*x[11*idx[jrow]+6]; 1867 sum8 += v[jrow]*x[11*idx[jrow]+7]; 1868 sum9 += v[jrow]*x[11*idx[jrow]+8]; 1869 sum10 += v[jrow]*x[11*idx[jrow]+9]; 1870 sum11 += v[jrow]*x[11*idx[jrow]+10]; 1871 jrow++; 1872 } 1873 y[11*i] = sum1; 1874 y[11*i+1] = sum2; 1875 y[11*i+2] = sum3; 1876 y[11*i+3] = sum4; 1877 y[11*i+4] = sum5; 1878 y[11*i+5] = sum6; 1879 y[11*i+6] = sum7; 1880 y[11*i+7] = sum8; 1881 y[11*i+8] = sum9; 1882 y[11*i+9] = sum10; 1883 y[11*i+10] = sum11; 1884 } 1885 1886 ierr = PetscLogFlops(22*a->nz - 11*nonzerorow);CHKERRQ(ierr); 1887 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1888 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 1889 PetscFunctionReturn(0); 1890 } 1891 1892 #undef __FUNCT__ 1893 #define __FUNCT__ "MatMultAdd_SeqMAIJ_11" 1894 PetscErrorCode MatMultAdd_SeqMAIJ_11(Mat A,Vec xx,Vec yy,Vec zz) 1895 { 1896 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1897 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1898 PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9, sum10, sum11; 1899 PetscErrorCode ierr; 1900 PetscInt m = b->AIJ->rmap->n,*idx,*ii; 1901 PetscInt n,i,jrow,j; 1902 1903 PetscFunctionBegin; 1904 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 1905 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1906 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 1907 idx = a->j; 1908 v = a->a; 1909 ii = a->i; 1910 1911 for (i=0; i<m; i++) { 1912 jrow = ii[i]; 1913 n = ii[i+1] - jrow; 1914 sum1 = 0.0; 1915 sum2 = 0.0; 1916 sum3 = 0.0; 1917 sum4 = 0.0; 1918 sum5 = 0.0; 1919 sum6 = 0.0; 1920 sum7 = 0.0; 1921 sum8 = 0.0; 1922 sum9 = 0.0; 1923 sum10 = 0.0; 1924 sum11 = 0.0; 1925 for (j=0; j<n; j++) { 1926 sum1 += v[jrow]*x[11*idx[jrow]]; 1927 sum2 += v[jrow]*x[11*idx[jrow]+1]; 1928 sum3 += v[jrow]*x[11*idx[jrow]+2]; 1929 sum4 += v[jrow]*x[11*idx[jrow]+3]; 1930 sum5 += v[jrow]*x[11*idx[jrow]+4]; 1931 sum6 += v[jrow]*x[11*idx[jrow]+5]; 1932 sum7 += v[jrow]*x[11*idx[jrow]+6]; 1933 sum8 += v[jrow]*x[11*idx[jrow]+7]; 1934 sum9 += v[jrow]*x[11*idx[jrow]+8]; 1935 sum10 += v[jrow]*x[11*idx[jrow]+9]; 1936 sum11 += v[jrow]*x[11*idx[jrow]+10]; 1937 jrow++; 1938 } 1939 y[11*i] += sum1; 1940 y[11*i+1] += sum2; 1941 y[11*i+2] += sum3; 1942 y[11*i+3] += sum4; 1943 y[11*i+4] += sum5; 1944 y[11*i+5] += sum6; 1945 y[11*i+6] += sum7; 1946 y[11*i+7] += sum8; 1947 y[11*i+8] += sum9; 1948 y[11*i+9] += sum10; 1949 y[11*i+10] += sum11; 1950 } 1951 1952 ierr = PetscLogFlops(22*a->nz);CHKERRQ(ierr); 1953 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1954 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 1955 PetscFunctionReturn(0); 1956 } 1957 1958 #undef __FUNCT__ 1959 #define __FUNCT__ "MatMultTranspose_SeqMAIJ_11" 1960 PetscErrorCode MatMultTranspose_SeqMAIJ_11(Mat A,Vec xx,Vec yy) 1961 { 1962 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1963 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1964 PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9,alpha10,alpha11,zero = 0.0; 1965 PetscErrorCode ierr; 1966 PetscInt m = b->AIJ->rmap->n,n,i,*idx; 1967 1968 PetscFunctionBegin; 1969 ierr = VecSet(yy,zero);CHKERRQ(ierr); 1970 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1971 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 1972 1973 for (i=0; i<m; i++) { 1974 idx = a->j + a->i[i] ; 1975 v = a->a + a->i[i] ; 1976 n = a->i[i+1] - a->i[i]; 1977 alpha1 = x[11*i]; 1978 alpha2 = x[11*i+1]; 1979 alpha3 = x[11*i+2]; 1980 alpha4 = x[11*i+3]; 1981 alpha5 = x[11*i+4]; 1982 alpha6 = x[11*i+5]; 1983 alpha7 = x[11*i+6]; 1984 alpha8 = x[11*i+7]; 1985 alpha9 = x[11*i+8]; 1986 alpha10 = x[11*i+9]; 1987 alpha11 = x[11*i+10]; 1988 while (n-->0) { 1989 y[11*(*idx)] += alpha1*(*v); 1990 y[11*(*idx)+1] += alpha2*(*v); 1991 y[11*(*idx)+2] += alpha3*(*v); 1992 y[11*(*idx)+3] += alpha4*(*v); 1993 y[11*(*idx)+4] += alpha5*(*v); 1994 y[11*(*idx)+5] += alpha6*(*v); 1995 y[11*(*idx)+6] += alpha7*(*v); 1996 y[11*(*idx)+7] += alpha8*(*v); 1997 y[11*(*idx)+8] += alpha9*(*v); 1998 y[11*(*idx)+9] += alpha10*(*v); 1999 y[11*(*idx)+10] += alpha11*(*v); 2000 idx++; v++; 2001 } 2002 } 2003 ierr = PetscLogFlops(22*a->nz);CHKERRQ(ierr); 2004 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 2005 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 2006 PetscFunctionReturn(0); 2007 } 2008 2009 #undef __FUNCT__ 2010 #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_11" 2011 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_11(Mat A,Vec xx,Vec yy,Vec zz) 2012 { 2013 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 2014 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 2015 PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9,alpha10,alpha11; 2016 PetscErrorCode ierr; 2017 PetscInt m = b->AIJ->rmap->n,n,i,*idx; 2018 2019 PetscFunctionBegin; 2020 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 2021 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 2022 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 2023 for (i=0; i<m; i++) { 2024 idx = a->j + a->i[i] ; 2025 v = a->a + a->i[i] ; 2026 n = a->i[i+1] - a->i[i]; 2027 alpha1 = x[11*i]; 2028 alpha2 = x[11*i+1]; 2029 alpha3 = x[11*i+2]; 2030 alpha4 = x[11*i+3]; 2031 alpha5 = x[11*i+4]; 2032 alpha6 = x[11*i+5]; 2033 alpha7 = x[11*i+6]; 2034 alpha8 = x[11*i+7]; 2035 alpha9 = x[11*i+8]; 2036 alpha10 = x[11*i+9]; 2037 alpha11 = x[11*i+10]; 2038 while (n-->0) { 2039 y[11*(*idx)] += alpha1*(*v); 2040 y[11*(*idx)+1] += alpha2*(*v); 2041 y[11*(*idx)+2] += alpha3*(*v); 2042 y[11*(*idx)+3] += alpha4*(*v); 2043 y[11*(*idx)+4] += alpha5*(*v); 2044 y[11*(*idx)+5] += alpha6*(*v); 2045 y[11*(*idx)+6] += alpha7*(*v); 2046 y[11*(*idx)+7] += alpha8*(*v); 2047 y[11*(*idx)+8] += alpha9*(*v); 2048 y[11*(*idx)+9] += alpha10*(*v); 2049 y[11*(*idx)+10] += alpha11*(*v); 2050 idx++; v++; 2051 } 2052 } 2053 ierr = PetscLogFlops(22*a->nz);CHKERRQ(ierr); 2054 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 2055 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 2056 PetscFunctionReturn(0); 2057 } 2058 2059 2060 /*--------------------------------------------------------------------------------------------*/ 2061 #undef __FUNCT__ 2062 #define __FUNCT__ "MatMult_SeqMAIJ_16" 2063 PetscErrorCode MatMult_SeqMAIJ_16(Mat A,Vec xx,Vec yy) 2064 { 2065 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 2066 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 2067 PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8; 2068 PetscScalar sum9, sum10, sum11, sum12, sum13, sum14, sum15, sum16; 2069 PetscErrorCode ierr; 2070 PetscInt m = b->AIJ->rmap->n,nonzerorow=0,*idx,*ii; 2071 PetscInt n,i,jrow,j; 2072 2073 PetscFunctionBegin; 2074 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 2075 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 2076 idx = a->j; 2077 v = a->a; 2078 ii = a->i; 2079 2080 for (i=0; i<m; i++) { 2081 jrow = ii[i]; 2082 n = ii[i+1] - jrow; 2083 sum1 = 0.0; 2084 sum2 = 0.0; 2085 sum3 = 0.0; 2086 sum4 = 0.0; 2087 sum5 = 0.0; 2088 sum6 = 0.0; 2089 sum7 = 0.0; 2090 sum8 = 0.0; 2091 sum9 = 0.0; 2092 sum10 = 0.0; 2093 sum11 = 0.0; 2094 sum12 = 0.0; 2095 sum13 = 0.0; 2096 sum14 = 0.0; 2097 sum15 = 0.0; 2098 sum16 = 0.0; 2099 nonzerorow += (n>0); 2100 for (j=0; j<n; j++) { 2101 sum1 += v[jrow]*x[16*idx[jrow]]; 2102 sum2 += v[jrow]*x[16*idx[jrow]+1]; 2103 sum3 += v[jrow]*x[16*idx[jrow]+2]; 2104 sum4 += v[jrow]*x[16*idx[jrow]+3]; 2105 sum5 += v[jrow]*x[16*idx[jrow]+4]; 2106 sum6 += v[jrow]*x[16*idx[jrow]+5]; 2107 sum7 += v[jrow]*x[16*idx[jrow]+6]; 2108 sum8 += v[jrow]*x[16*idx[jrow]+7]; 2109 sum9 += v[jrow]*x[16*idx[jrow]+8]; 2110 sum10 += v[jrow]*x[16*idx[jrow]+9]; 2111 sum11 += v[jrow]*x[16*idx[jrow]+10]; 2112 sum12 += v[jrow]*x[16*idx[jrow]+11]; 2113 sum13 += v[jrow]*x[16*idx[jrow]+12]; 2114 sum14 += v[jrow]*x[16*idx[jrow]+13]; 2115 sum15 += v[jrow]*x[16*idx[jrow]+14]; 2116 sum16 += v[jrow]*x[16*idx[jrow]+15]; 2117 jrow++; 2118 } 2119 y[16*i] = sum1; 2120 y[16*i+1] = sum2; 2121 y[16*i+2] = sum3; 2122 y[16*i+3] = sum4; 2123 y[16*i+4] = sum5; 2124 y[16*i+5] = sum6; 2125 y[16*i+6] = sum7; 2126 y[16*i+7] = sum8; 2127 y[16*i+8] = sum9; 2128 y[16*i+9] = sum10; 2129 y[16*i+10] = sum11; 2130 y[16*i+11] = sum12; 2131 y[16*i+12] = sum13; 2132 y[16*i+13] = sum14; 2133 y[16*i+14] = sum15; 2134 y[16*i+15] = sum16; 2135 } 2136 2137 ierr = PetscLogFlops(32*a->nz - 16*nonzerorow);CHKERRQ(ierr); 2138 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 2139 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 2140 PetscFunctionReturn(0); 2141 } 2142 2143 #undef __FUNCT__ 2144 #define __FUNCT__ "MatMultTranspose_SeqMAIJ_16" 2145 PetscErrorCode MatMultTranspose_SeqMAIJ_16(Mat A,Vec xx,Vec yy) 2146 { 2147 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 2148 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 2149 PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,zero = 0.0; 2150 PetscScalar alpha9,alpha10,alpha11,alpha12,alpha13,alpha14,alpha15,alpha16; 2151 PetscErrorCode ierr; 2152 PetscInt m = b->AIJ->rmap->n,n,i,*idx; 2153 2154 PetscFunctionBegin; 2155 ierr = VecSet(yy,zero);CHKERRQ(ierr); 2156 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 2157 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 2158 2159 for (i=0; i<m; i++) { 2160 idx = a->j + a->i[i] ; 2161 v = a->a + a->i[i] ; 2162 n = a->i[i+1] - a->i[i]; 2163 alpha1 = x[16*i]; 2164 alpha2 = x[16*i+1]; 2165 alpha3 = x[16*i+2]; 2166 alpha4 = x[16*i+3]; 2167 alpha5 = x[16*i+4]; 2168 alpha6 = x[16*i+5]; 2169 alpha7 = x[16*i+6]; 2170 alpha8 = x[16*i+7]; 2171 alpha9 = x[16*i+8]; 2172 alpha10 = x[16*i+9]; 2173 alpha11 = x[16*i+10]; 2174 alpha12 = x[16*i+11]; 2175 alpha13 = x[16*i+12]; 2176 alpha14 = x[16*i+13]; 2177 alpha15 = x[16*i+14]; 2178 alpha16 = x[16*i+15]; 2179 while (n-->0) { 2180 y[16*(*idx)] += alpha1*(*v); 2181 y[16*(*idx)+1] += alpha2*(*v); 2182 y[16*(*idx)+2] += alpha3*(*v); 2183 y[16*(*idx)+3] += alpha4*(*v); 2184 y[16*(*idx)+4] += alpha5*(*v); 2185 y[16*(*idx)+5] += alpha6*(*v); 2186 y[16*(*idx)+6] += alpha7*(*v); 2187 y[16*(*idx)+7] += alpha8*(*v); 2188 y[16*(*idx)+8] += alpha9*(*v); 2189 y[16*(*idx)+9] += alpha10*(*v); 2190 y[16*(*idx)+10] += alpha11*(*v); 2191 y[16*(*idx)+11] += alpha12*(*v); 2192 y[16*(*idx)+12] += alpha13*(*v); 2193 y[16*(*idx)+13] += alpha14*(*v); 2194 y[16*(*idx)+14] += alpha15*(*v); 2195 y[16*(*idx)+15] += alpha16*(*v); 2196 idx++; v++; 2197 } 2198 } 2199 ierr = PetscLogFlops(32*a->nz);CHKERRQ(ierr); 2200 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 2201 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 2202 PetscFunctionReturn(0); 2203 } 2204 2205 #undef __FUNCT__ 2206 #define __FUNCT__ "MatMultAdd_SeqMAIJ_16" 2207 PetscErrorCode MatMultAdd_SeqMAIJ_16(Mat A,Vec xx,Vec yy,Vec zz) 2208 { 2209 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 2210 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 2211 PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8; 2212 PetscScalar sum9, sum10, sum11, sum12, sum13, sum14, sum15, sum16; 2213 PetscErrorCode ierr; 2214 PetscInt m = b->AIJ->rmap->n,*idx,*ii; 2215 PetscInt n,i,jrow,j; 2216 2217 PetscFunctionBegin; 2218 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 2219 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 2220 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 2221 idx = a->j; 2222 v = a->a; 2223 ii = a->i; 2224 2225 for (i=0; i<m; i++) { 2226 jrow = ii[i]; 2227 n = ii[i+1] - jrow; 2228 sum1 = 0.0; 2229 sum2 = 0.0; 2230 sum3 = 0.0; 2231 sum4 = 0.0; 2232 sum5 = 0.0; 2233 sum6 = 0.0; 2234 sum7 = 0.0; 2235 sum8 = 0.0; 2236 sum9 = 0.0; 2237 sum10 = 0.0; 2238 sum11 = 0.0; 2239 sum12 = 0.0; 2240 sum13 = 0.0; 2241 sum14 = 0.0; 2242 sum15 = 0.0; 2243 sum16 = 0.0; 2244 for (j=0; j<n; j++) { 2245 sum1 += v[jrow]*x[16*idx[jrow]]; 2246 sum2 += v[jrow]*x[16*idx[jrow]+1]; 2247 sum3 += v[jrow]*x[16*idx[jrow]+2]; 2248 sum4 += v[jrow]*x[16*idx[jrow]+3]; 2249 sum5 += v[jrow]*x[16*idx[jrow]+4]; 2250 sum6 += v[jrow]*x[16*idx[jrow]+5]; 2251 sum7 += v[jrow]*x[16*idx[jrow]+6]; 2252 sum8 += v[jrow]*x[16*idx[jrow]+7]; 2253 sum9 += v[jrow]*x[16*idx[jrow]+8]; 2254 sum10 += v[jrow]*x[16*idx[jrow]+9]; 2255 sum11 += v[jrow]*x[16*idx[jrow]+10]; 2256 sum12 += v[jrow]*x[16*idx[jrow]+11]; 2257 sum13 += v[jrow]*x[16*idx[jrow]+12]; 2258 sum14 += v[jrow]*x[16*idx[jrow]+13]; 2259 sum15 += v[jrow]*x[16*idx[jrow]+14]; 2260 sum16 += v[jrow]*x[16*idx[jrow]+15]; 2261 jrow++; 2262 } 2263 y[16*i] += sum1; 2264 y[16*i+1] += sum2; 2265 y[16*i+2] += sum3; 2266 y[16*i+3] += sum4; 2267 y[16*i+4] += sum5; 2268 y[16*i+5] += sum6; 2269 y[16*i+6] += sum7; 2270 y[16*i+7] += sum8; 2271 y[16*i+8] += sum9; 2272 y[16*i+9] += sum10; 2273 y[16*i+10] += sum11; 2274 y[16*i+11] += sum12; 2275 y[16*i+12] += sum13; 2276 y[16*i+13] += sum14; 2277 y[16*i+14] += sum15; 2278 y[16*i+15] += sum16; 2279 } 2280 2281 ierr = PetscLogFlops(32*a->nz);CHKERRQ(ierr); 2282 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 2283 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 2284 PetscFunctionReturn(0); 2285 } 2286 2287 #undef __FUNCT__ 2288 #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_16" 2289 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_16(Mat A,Vec xx,Vec yy,Vec zz) 2290 { 2291 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 2292 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 2293 PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8; 2294 PetscScalar alpha9,alpha10,alpha11,alpha12,alpha13,alpha14,alpha15,alpha16; 2295 PetscErrorCode ierr; 2296 PetscInt m = b->AIJ->rmap->n,n,i,*idx; 2297 2298 PetscFunctionBegin; 2299 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 2300 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 2301 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 2302 for (i=0; i<m; i++) { 2303 idx = a->j + a->i[i] ; 2304 v = a->a + a->i[i] ; 2305 n = a->i[i+1] - a->i[i]; 2306 alpha1 = x[16*i]; 2307 alpha2 = x[16*i+1]; 2308 alpha3 = x[16*i+2]; 2309 alpha4 = x[16*i+3]; 2310 alpha5 = x[16*i+4]; 2311 alpha6 = x[16*i+5]; 2312 alpha7 = x[16*i+6]; 2313 alpha8 = x[16*i+7]; 2314 alpha9 = x[16*i+8]; 2315 alpha10 = x[16*i+9]; 2316 alpha11 = x[16*i+10]; 2317 alpha12 = x[16*i+11]; 2318 alpha13 = x[16*i+12]; 2319 alpha14 = x[16*i+13]; 2320 alpha15 = x[16*i+14]; 2321 alpha16 = x[16*i+15]; 2322 while (n-->0) { 2323 y[16*(*idx)] += alpha1*(*v); 2324 y[16*(*idx)+1] += alpha2*(*v); 2325 y[16*(*idx)+2] += alpha3*(*v); 2326 y[16*(*idx)+3] += alpha4*(*v); 2327 y[16*(*idx)+4] += alpha5*(*v); 2328 y[16*(*idx)+5] += alpha6*(*v); 2329 y[16*(*idx)+6] += alpha7*(*v); 2330 y[16*(*idx)+7] += alpha8*(*v); 2331 y[16*(*idx)+8] += alpha9*(*v); 2332 y[16*(*idx)+9] += alpha10*(*v); 2333 y[16*(*idx)+10] += alpha11*(*v); 2334 y[16*(*idx)+11] += alpha12*(*v); 2335 y[16*(*idx)+12] += alpha13*(*v); 2336 y[16*(*idx)+13] += alpha14*(*v); 2337 y[16*(*idx)+14] += alpha15*(*v); 2338 y[16*(*idx)+15] += alpha16*(*v); 2339 idx++; v++; 2340 } 2341 } 2342 ierr = PetscLogFlops(32*a->nz);CHKERRQ(ierr); 2343 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 2344 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 2345 PetscFunctionReturn(0); 2346 } 2347 2348 /*--------------------------------------------------------------------------------------------*/ 2349 #undef __FUNCT__ 2350 #define __FUNCT__ "MatMult_SeqMAIJ_18" 2351 PetscErrorCode MatMult_SeqMAIJ_18(Mat A,Vec xx,Vec yy) 2352 { 2353 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 2354 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 2355 PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8; 2356 PetscScalar sum9, sum10, sum11, sum12, sum13, sum14, sum15, sum16, sum17, sum18; 2357 PetscErrorCode ierr; 2358 PetscInt m = b->AIJ->rmap->n,nonzerorow=0,*idx,*ii; 2359 PetscInt n,i,jrow,j; 2360 2361 PetscFunctionBegin; 2362 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 2363 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 2364 idx = a->j; 2365 v = a->a; 2366 ii = a->i; 2367 2368 for (i=0; i<m; i++) { 2369 jrow = ii[i]; 2370 n = ii[i+1] - jrow; 2371 sum1 = 0.0; 2372 sum2 = 0.0; 2373 sum3 = 0.0; 2374 sum4 = 0.0; 2375 sum5 = 0.0; 2376 sum6 = 0.0; 2377 sum7 = 0.0; 2378 sum8 = 0.0; 2379 sum9 = 0.0; 2380 sum10 = 0.0; 2381 sum11 = 0.0; 2382 sum12 = 0.0; 2383 sum13 = 0.0; 2384 sum14 = 0.0; 2385 sum15 = 0.0; 2386 sum16 = 0.0; 2387 sum17 = 0.0; 2388 sum18 = 0.0; 2389 nonzerorow += (n>0); 2390 for (j=0; j<n; j++) { 2391 sum1 += v[jrow]*x[18*idx[jrow]]; 2392 sum2 += v[jrow]*x[18*idx[jrow]+1]; 2393 sum3 += v[jrow]*x[18*idx[jrow]+2]; 2394 sum4 += v[jrow]*x[18*idx[jrow]+3]; 2395 sum5 += v[jrow]*x[18*idx[jrow]+4]; 2396 sum6 += v[jrow]*x[18*idx[jrow]+5]; 2397 sum7 += v[jrow]*x[18*idx[jrow]+6]; 2398 sum8 += v[jrow]*x[18*idx[jrow]+7]; 2399 sum9 += v[jrow]*x[18*idx[jrow]+8]; 2400 sum10 += v[jrow]*x[18*idx[jrow]+9]; 2401 sum11 += v[jrow]*x[18*idx[jrow]+10]; 2402 sum12 += v[jrow]*x[18*idx[jrow]+11]; 2403 sum13 += v[jrow]*x[18*idx[jrow]+12]; 2404 sum14 += v[jrow]*x[18*idx[jrow]+13]; 2405 sum15 += v[jrow]*x[18*idx[jrow]+14]; 2406 sum16 += v[jrow]*x[18*idx[jrow]+15]; 2407 sum17 += v[jrow]*x[18*idx[jrow]+16]; 2408 sum18 += v[jrow]*x[18*idx[jrow]+17]; 2409 jrow++; 2410 } 2411 y[18*i] = sum1; 2412 y[18*i+1] = sum2; 2413 y[18*i+2] = sum3; 2414 y[18*i+3] = sum4; 2415 y[18*i+4] = sum5; 2416 y[18*i+5] = sum6; 2417 y[18*i+6] = sum7; 2418 y[18*i+7] = sum8; 2419 y[18*i+8] = sum9; 2420 y[18*i+9] = sum10; 2421 y[18*i+10] = sum11; 2422 y[18*i+11] = sum12; 2423 y[18*i+12] = sum13; 2424 y[18*i+13] = sum14; 2425 y[18*i+14] = sum15; 2426 y[18*i+15] = sum16; 2427 y[18*i+16] = sum17; 2428 y[18*i+17] = sum18; 2429 } 2430 2431 ierr = PetscLogFlops(36*a->nz - 18*nonzerorow);CHKERRQ(ierr); 2432 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 2433 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 2434 PetscFunctionReturn(0); 2435 } 2436 2437 #undef __FUNCT__ 2438 #define __FUNCT__ "MatMultTranspose_SeqMAIJ_18" 2439 PetscErrorCode MatMultTranspose_SeqMAIJ_18(Mat A,Vec xx,Vec yy) 2440 { 2441 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 2442 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 2443 PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,zero = 0.0; 2444 PetscScalar alpha9,alpha10,alpha11,alpha12,alpha13,alpha14,alpha15,alpha16,alpha17,alpha18; 2445 PetscErrorCode ierr; 2446 PetscInt m = b->AIJ->rmap->n,n,i,*idx; 2447 2448 PetscFunctionBegin; 2449 ierr = VecSet(yy,zero);CHKERRQ(ierr); 2450 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 2451 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 2452 2453 for (i=0; i<m; i++) { 2454 idx = a->j + a->i[i] ; 2455 v = a->a + a->i[i] ; 2456 n = a->i[i+1] - a->i[i]; 2457 alpha1 = x[18*i]; 2458 alpha2 = x[18*i+1]; 2459 alpha3 = x[18*i+2]; 2460 alpha4 = x[18*i+3]; 2461 alpha5 = x[18*i+4]; 2462 alpha6 = x[18*i+5]; 2463 alpha7 = x[18*i+6]; 2464 alpha8 = x[18*i+7]; 2465 alpha9 = x[18*i+8]; 2466 alpha10 = x[18*i+9]; 2467 alpha11 = x[18*i+10]; 2468 alpha12 = x[18*i+11]; 2469 alpha13 = x[18*i+12]; 2470 alpha14 = x[18*i+13]; 2471 alpha15 = x[18*i+14]; 2472 alpha16 = x[18*i+15]; 2473 alpha17 = x[18*i+16]; 2474 alpha18 = x[18*i+17]; 2475 while (n-->0) { 2476 y[18*(*idx)] += alpha1*(*v); 2477 y[18*(*idx)+1] += alpha2*(*v); 2478 y[18*(*idx)+2] += alpha3*(*v); 2479 y[18*(*idx)+3] += alpha4*(*v); 2480 y[18*(*idx)+4] += alpha5*(*v); 2481 y[18*(*idx)+5] += alpha6*(*v); 2482 y[18*(*idx)+6] += alpha7*(*v); 2483 y[18*(*idx)+7] += alpha8*(*v); 2484 y[18*(*idx)+8] += alpha9*(*v); 2485 y[18*(*idx)+9] += alpha10*(*v); 2486 y[18*(*idx)+10] += alpha11*(*v); 2487 y[18*(*idx)+11] += alpha12*(*v); 2488 y[18*(*idx)+12] += alpha13*(*v); 2489 y[18*(*idx)+13] += alpha14*(*v); 2490 y[18*(*idx)+14] += alpha15*(*v); 2491 y[18*(*idx)+15] += alpha16*(*v); 2492 y[18*(*idx)+16] += alpha17*(*v); 2493 y[18*(*idx)+17] += alpha18*(*v); 2494 idx++; v++; 2495 } 2496 } 2497 ierr = PetscLogFlops(36*a->nz);CHKERRQ(ierr); 2498 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 2499 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 2500 PetscFunctionReturn(0); 2501 } 2502 2503 #undef __FUNCT__ 2504 #define __FUNCT__ "MatMultAdd_SeqMAIJ_18" 2505 PetscErrorCode MatMultAdd_SeqMAIJ_18(Mat A,Vec xx,Vec yy,Vec zz) 2506 { 2507 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 2508 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 2509 PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8; 2510 PetscScalar sum9, sum10, sum11, sum12, sum13, sum14, sum15, sum16, sum17, sum18; 2511 PetscErrorCode ierr; 2512 PetscInt m = b->AIJ->rmap->n,*idx,*ii; 2513 PetscInt n,i,jrow,j; 2514 2515 PetscFunctionBegin; 2516 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 2517 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 2518 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 2519 idx = a->j; 2520 v = a->a; 2521 ii = a->i; 2522 2523 for (i=0; i<m; i++) { 2524 jrow = ii[i]; 2525 n = ii[i+1] - jrow; 2526 sum1 = 0.0; 2527 sum2 = 0.0; 2528 sum3 = 0.0; 2529 sum4 = 0.0; 2530 sum5 = 0.0; 2531 sum6 = 0.0; 2532 sum7 = 0.0; 2533 sum8 = 0.0; 2534 sum9 = 0.0; 2535 sum10 = 0.0; 2536 sum11 = 0.0; 2537 sum12 = 0.0; 2538 sum13 = 0.0; 2539 sum14 = 0.0; 2540 sum15 = 0.0; 2541 sum16 = 0.0; 2542 sum17 = 0.0; 2543 sum18 = 0.0; 2544 for (j=0; j<n; j++) { 2545 sum1 += v[jrow]*x[18*idx[jrow]]; 2546 sum2 += v[jrow]*x[18*idx[jrow]+1]; 2547 sum3 += v[jrow]*x[18*idx[jrow]+2]; 2548 sum4 += v[jrow]*x[18*idx[jrow]+3]; 2549 sum5 += v[jrow]*x[18*idx[jrow]+4]; 2550 sum6 += v[jrow]*x[18*idx[jrow]+5]; 2551 sum7 += v[jrow]*x[18*idx[jrow]+6]; 2552 sum8 += v[jrow]*x[18*idx[jrow]+7]; 2553 sum9 += v[jrow]*x[18*idx[jrow]+8]; 2554 sum10 += v[jrow]*x[18*idx[jrow]+9]; 2555 sum11 += v[jrow]*x[18*idx[jrow]+10]; 2556 sum12 += v[jrow]*x[18*idx[jrow]+11]; 2557 sum13 += v[jrow]*x[18*idx[jrow]+12]; 2558 sum14 += v[jrow]*x[18*idx[jrow]+13]; 2559 sum15 += v[jrow]*x[18*idx[jrow]+14]; 2560 sum16 += v[jrow]*x[18*idx[jrow]+15]; 2561 sum17 += v[jrow]*x[18*idx[jrow]+16]; 2562 sum18 += v[jrow]*x[18*idx[jrow]+17]; 2563 jrow++; 2564 } 2565 y[18*i] += sum1; 2566 y[18*i+1] += sum2; 2567 y[18*i+2] += sum3; 2568 y[18*i+3] += sum4; 2569 y[18*i+4] += sum5; 2570 y[18*i+5] += sum6; 2571 y[18*i+6] += sum7; 2572 y[18*i+7] += sum8; 2573 y[18*i+8] += sum9; 2574 y[18*i+9] += sum10; 2575 y[18*i+10] += sum11; 2576 y[18*i+11] += sum12; 2577 y[18*i+12] += sum13; 2578 y[18*i+13] += sum14; 2579 y[18*i+14] += sum15; 2580 y[18*i+15] += sum16; 2581 y[18*i+16] += sum17; 2582 y[18*i+17] += sum18; 2583 } 2584 2585 ierr = PetscLogFlops(36*a->nz);CHKERRQ(ierr); 2586 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 2587 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 2588 PetscFunctionReturn(0); 2589 } 2590 2591 #undef __FUNCT__ 2592 #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_18" 2593 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_18(Mat A,Vec xx,Vec yy,Vec zz) 2594 { 2595 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 2596 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 2597 PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8; 2598 PetscScalar alpha9,alpha10,alpha11,alpha12,alpha13,alpha14,alpha15,alpha16,alpha17,alpha18; 2599 PetscErrorCode ierr; 2600 PetscInt m = b->AIJ->rmap->n,n,i,*idx; 2601 2602 PetscFunctionBegin; 2603 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 2604 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 2605 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 2606 for (i=0; i<m; i++) { 2607 idx = a->j + a->i[i] ; 2608 v = a->a + a->i[i] ; 2609 n = a->i[i+1] - a->i[i]; 2610 alpha1 = x[18*i]; 2611 alpha2 = x[18*i+1]; 2612 alpha3 = x[18*i+2]; 2613 alpha4 = x[18*i+3]; 2614 alpha5 = x[18*i+4]; 2615 alpha6 = x[18*i+5]; 2616 alpha7 = x[18*i+6]; 2617 alpha8 = x[18*i+7]; 2618 alpha9 = x[18*i+8]; 2619 alpha10 = x[18*i+9]; 2620 alpha11 = x[18*i+10]; 2621 alpha12 = x[18*i+11]; 2622 alpha13 = x[18*i+12]; 2623 alpha14 = x[18*i+13]; 2624 alpha15 = x[18*i+14]; 2625 alpha16 = x[18*i+15]; 2626 alpha17 = x[18*i+16]; 2627 alpha18 = x[18*i+17]; 2628 while (n-->0) { 2629 y[18*(*idx)] += alpha1*(*v); 2630 y[18*(*idx)+1] += alpha2*(*v); 2631 y[18*(*idx)+2] += alpha3*(*v); 2632 y[18*(*idx)+3] += alpha4*(*v); 2633 y[18*(*idx)+4] += alpha5*(*v); 2634 y[18*(*idx)+5] += alpha6*(*v); 2635 y[18*(*idx)+6] += alpha7*(*v); 2636 y[18*(*idx)+7] += alpha8*(*v); 2637 y[18*(*idx)+8] += alpha9*(*v); 2638 y[18*(*idx)+9] += alpha10*(*v); 2639 y[18*(*idx)+10] += alpha11*(*v); 2640 y[18*(*idx)+11] += alpha12*(*v); 2641 y[18*(*idx)+12] += alpha13*(*v); 2642 y[18*(*idx)+13] += alpha14*(*v); 2643 y[18*(*idx)+14] += alpha15*(*v); 2644 y[18*(*idx)+15] += alpha16*(*v); 2645 y[18*(*idx)+16] += alpha17*(*v); 2646 y[18*(*idx)+17] += alpha18*(*v); 2647 idx++; v++; 2648 } 2649 } 2650 ierr = PetscLogFlops(36*a->nz);CHKERRQ(ierr); 2651 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 2652 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 2653 PetscFunctionReturn(0); 2654 } 2655 2656 /*===================================================================================*/ 2657 #undef __FUNCT__ 2658 #define __FUNCT__ "MatMult_MPIMAIJ_dof" 2659 PetscErrorCode MatMult_MPIMAIJ_dof(Mat A,Vec xx,Vec yy) 2660 { 2661 Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data; 2662 PetscErrorCode ierr; 2663 2664 PetscFunctionBegin; 2665 /* start the scatter */ 2666 ierr = VecScatterBegin(b->ctx,xx,b->w,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2667 ierr = (*b->AIJ->ops->mult)(b->AIJ,xx,yy);CHKERRQ(ierr); 2668 ierr = VecScatterEnd(b->ctx,xx,b->w,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2669 ierr = (*b->OAIJ->ops->multadd)(b->OAIJ,b->w,yy,yy);CHKERRQ(ierr); 2670 PetscFunctionReturn(0); 2671 } 2672 2673 #undef __FUNCT__ 2674 #define __FUNCT__ "MatMultTranspose_MPIMAIJ_dof" 2675 PetscErrorCode MatMultTranspose_MPIMAIJ_dof(Mat A,Vec xx,Vec yy) 2676 { 2677 Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data; 2678 PetscErrorCode ierr; 2679 2680 PetscFunctionBegin; 2681 ierr = (*b->OAIJ->ops->multtranspose)(b->OAIJ,xx,b->w);CHKERRQ(ierr); 2682 ierr = (*b->AIJ->ops->multtranspose)(b->AIJ,xx,yy);CHKERRQ(ierr); 2683 ierr = VecScatterBegin(b->ctx,b->w,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2684 ierr = VecScatterEnd(b->ctx,b->w,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2685 PetscFunctionReturn(0); 2686 } 2687 2688 #undef __FUNCT__ 2689 #define __FUNCT__ "MatMultAdd_MPIMAIJ_dof" 2690 PetscErrorCode MatMultAdd_MPIMAIJ_dof(Mat A,Vec xx,Vec yy,Vec zz) 2691 { 2692 Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data; 2693 PetscErrorCode ierr; 2694 2695 PetscFunctionBegin; 2696 /* start the scatter */ 2697 ierr = VecScatterBegin(b->ctx,xx,b->w,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2698 ierr = (*b->AIJ->ops->multadd)(b->AIJ,xx,yy,zz);CHKERRQ(ierr); 2699 ierr = VecScatterEnd(b->ctx,xx,b->w,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2700 ierr = (*b->OAIJ->ops->multadd)(b->OAIJ,b->w,zz,zz);CHKERRQ(ierr); 2701 PetscFunctionReturn(0); 2702 } 2703 2704 #undef __FUNCT__ 2705 #define __FUNCT__ "MatMultTransposeAdd_MPIMAIJ_dof" 2706 PetscErrorCode MatMultTransposeAdd_MPIMAIJ_dof(Mat A,Vec xx,Vec yy,Vec zz) 2707 { 2708 Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data; 2709 PetscErrorCode ierr; 2710 2711 PetscFunctionBegin; 2712 ierr = (*b->OAIJ->ops->multtranspose)(b->OAIJ,xx,b->w);CHKERRQ(ierr); 2713 ierr = VecScatterBegin(b->ctx,b->w,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2714 ierr = (*b->AIJ->ops->multtransposeadd)(b->AIJ,xx,yy,zz);CHKERRQ(ierr); 2715 ierr = VecScatterEnd(b->ctx,b->w,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2716 PetscFunctionReturn(0); 2717 } 2718 2719 /* ----------------------------------------------------------------*/ 2720 #undef __FUNCT__ 2721 #define __FUNCT__ "MatPtAPSymbolic_SeqAIJ_SeqMAIJ" 2722 PetscErrorCode MatPtAPSymbolic_SeqAIJ_SeqMAIJ(Mat A,Mat PP,PetscReal fill,Mat *C) 2723 { 2724 /* This routine requires testing -- but it's getting better. */ 2725 PetscErrorCode ierr; 2726 PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 2727 Mat_SeqMAIJ *pp=(Mat_SeqMAIJ*)PP->data; 2728 Mat P=pp->AIJ; 2729 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*p=(Mat_SeqAIJ*)P->data,*c; 2730 PetscInt *pti,*ptj,*ptJ,*ai=a->i,*aj=a->j,*ajj,*pi=p->i,*pj=p->j,*pjj; 2731 PetscInt *ci,*cj,*ptadenserow,*ptasparserow,*denserow,*sparserow,*ptaj; 2732 PetscInt an=A->cmap->N,am=A->rmap->N,pn=P->cmap->N,pm=P->rmap->N,ppdof=pp->dof,cn; 2733 PetscInt i,j,k,dof,pshift,ptnzi,arow,anzj,ptanzi,prow,pnzj,cnzi; 2734 MatScalar *ca; 2735 2736 PetscFunctionBegin; 2737 /* Start timer */ 2738 ierr = PetscLogEventBegin(MAT_PtAPSymbolic,A,PP,0,0);CHKERRQ(ierr); 2739 2740 /* Get ij structure of P^T */ 2741 ierr = MatGetSymbolicTranspose_SeqAIJ(P,&pti,&ptj);CHKERRQ(ierr); 2742 2743 cn = pn*ppdof; 2744 /* Allocate ci array, arrays for fill computation and */ 2745 /* free space for accumulating nonzero column info */ 2746 ierr = PetscMalloc((cn+1)*sizeof(PetscInt),&ci);CHKERRQ(ierr); 2747 ci[0] = 0; 2748 2749 /* Work arrays for rows of P^T*A */ 2750 ierr = PetscMalloc((2*cn+2*an+1)*sizeof(PetscInt),&ptadenserow);CHKERRQ(ierr); 2751 ierr = PetscMemzero(ptadenserow,(2*cn+2*an+1)*sizeof(PetscInt));CHKERRQ(ierr); 2752 ptasparserow = ptadenserow + an; 2753 denserow = ptasparserow + an; 2754 sparserow = denserow + cn; 2755 2756 /* Set initial free space to be nnz(A) scaled by aspect ratio of P. */ 2757 /* This should be reasonable if sparsity of PtAP is similar to that of A. */ 2758 /* Note, aspect ratio of P is the same as the aspect ratio of SeqAIJ inside P */ 2759 ierr = PetscFreeSpaceGet((ai[am]/pm)*pn,&free_space); 2760 current_space = free_space; 2761 2762 /* Determine symbolic info for each row of C: */ 2763 for (i=0;i<pn;i++) { 2764 ptnzi = pti[i+1] - pti[i]; 2765 ptJ = ptj + pti[i]; 2766 for (dof=0;dof<ppdof;dof++) { 2767 ptanzi = 0; 2768 /* Determine symbolic row of PtA: */ 2769 for (j=0;j<ptnzi;j++) { 2770 /* Expand ptJ[j] by block size and shift by dof to get the right row of A */ 2771 arow = ptJ[j]*ppdof + dof; 2772 /* Nonzeros of P^T*A will be in same locations as any element of A in that row */ 2773 anzj = ai[arow+1] - ai[arow]; 2774 ajj = aj + ai[arow]; 2775 for (k=0;k<anzj;k++) { 2776 if (!ptadenserow[ajj[k]]) { 2777 ptadenserow[ajj[k]] = -1; 2778 ptasparserow[ptanzi++] = ajj[k]; 2779 } 2780 } 2781 } 2782 /* Using symbolic info for row of PtA, determine symbolic info for row of C: */ 2783 ptaj = ptasparserow; 2784 cnzi = 0; 2785 for (j=0;j<ptanzi;j++) { 2786 /* Get offset within block of P */ 2787 pshift = *ptaj%ppdof; 2788 /* Get block row of P */ 2789 prow = (*ptaj++)/ppdof; /* integer division */ 2790 /* P has same number of nonzeros per row as the compressed form */ 2791 pnzj = pi[prow+1] - pi[prow]; 2792 pjj = pj + pi[prow]; 2793 for (k=0;k<pnzj;k++) { 2794 /* Locations in C are shifted by the offset within the block */ 2795 /* Note: we cannot use PetscLLAdd here because of the additional offset for the write location */ 2796 if (!denserow[pjj[k]*ppdof+pshift]) { 2797 denserow[pjj[k]*ppdof+pshift] = -1; 2798 sparserow[cnzi++] = pjj[k]*ppdof+pshift; 2799 } 2800 } 2801 } 2802 2803 /* sort sparserow */ 2804 ierr = PetscSortInt(cnzi,sparserow);CHKERRQ(ierr); 2805 2806 /* If free space is not available, make more free space */ 2807 /* Double the amount of total space in the list */ 2808 if (current_space->local_remaining<cnzi) { 2809 ierr = PetscFreeSpaceGet(cnzi+current_space->total_array_size,¤t_space);CHKERRQ(ierr); 2810 } 2811 2812 /* Copy data into free space, and zero out denserows */ 2813 ierr = PetscMemcpy(current_space->array,sparserow,cnzi*sizeof(PetscInt));CHKERRQ(ierr); 2814 current_space->array += cnzi; 2815 current_space->local_used += cnzi; 2816 current_space->local_remaining -= cnzi; 2817 2818 for (j=0;j<ptanzi;j++) { 2819 ptadenserow[ptasparserow[j]] = 0; 2820 } 2821 for (j=0;j<cnzi;j++) { 2822 denserow[sparserow[j]] = 0; 2823 } 2824 /* Aside: Perhaps we should save the pta info for the numerical factorization. */ 2825 /* For now, we will recompute what is needed. */ 2826 ci[i*ppdof+1+dof] = ci[i*ppdof+dof] + cnzi; 2827 } 2828 } 2829 /* nnz is now stored in ci[ptm], column indices are in the list of free space */ 2830 /* Allocate space for cj, initialize cj, and */ 2831 /* destroy list of free space and other temporary array(s) */ 2832 ierr = PetscMalloc((ci[cn]+1)*sizeof(PetscInt),&cj);CHKERRQ(ierr); 2833 ierr = PetscFreeSpaceContiguous(&free_space,cj);CHKERRQ(ierr); 2834 ierr = PetscFree(ptadenserow);CHKERRQ(ierr); 2835 2836 /* Allocate space for ca */ 2837 ierr = PetscMalloc((ci[cn]+1)*sizeof(MatScalar),&ca);CHKERRQ(ierr); 2838 ierr = PetscMemzero(ca,(ci[cn]+1)*sizeof(MatScalar));CHKERRQ(ierr); 2839 2840 /* put together the new matrix */ 2841 ierr = MatCreateSeqAIJWithArrays(((PetscObject)A)->comm,cn,cn,ci,cj,ca,C);CHKERRQ(ierr); 2842 2843 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 2844 /* Since these are PETSc arrays, change flags to free them as necessary. */ 2845 c = (Mat_SeqAIJ *)((*C)->data); 2846 c->free_a = PETSC_TRUE; 2847 c->free_ij = PETSC_TRUE; 2848 c->nonew = 0; 2849 2850 /* Clean up. */ 2851 ierr = MatRestoreSymbolicTranspose_SeqAIJ(P,&pti,&ptj);CHKERRQ(ierr); 2852 2853 ierr = PetscLogEventEnd(MAT_PtAPSymbolic,A,PP,0,0);CHKERRQ(ierr); 2854 PetscFunctionReturn(0); 2855 } 2856 2857 #undef __FUNCT__ 2858 #define __FUNCT__ "MatPtAPNumeric_SeqAIJ_SeqMAIJ" 2859 PetscErrorCode MatPtAPNumeric_SeqAIJ_SeqMAIJ(Mat A,Mat PP,Mat C) 2860 { 2861 /* This routine requires testing -- first draft only */ 2862 PetscErrorCode ierr; 2863 Mat_SeqMAIJ *pp=(Mat_SeqMAIJ*)PP->data; 2864 Mat P=pp->AIJ; 2865 Mat_SeqAIJ *a = (Mat_SeqAIJ *) A->data; 2866 Mat_SeqAIJ *p = (Mat_SeqAIJ *) P->data; 2867 Mat_SeqAIJ *c = (Mat_SeqAIJ *) C->data; 2868 PetscInt *ai=a->i,*aj=a->j,*apj,*apjdense,*pi=p->i,*pj=p->j,*pJ=p->j,*pjj; 2869 PetscInt *ci=c->i,*cj=c->j,*cjj; 2870 PetscInt am=A->rmap->N,cn=C->cmap->N,cm=C->rmap->N,ppdof=pp->dof; 2871 PetscInt i,j,k,pshift,poffset,anzi,pnzi,apnzj,nextap,pnzj,prow,crow; 2872 MatScalar *aa=a->a,*apa,*pa=p->a,*pA=p->a,*paj,*ca=c->a,*caj; 2873 2874 PetscFunctionBegin; 2875 /* Allocate temporary array for storage of one row of A*P */ 2876 ierr = PetscMalloc(cn*(sizeof(MatScalar)+2*sizeof(PetscInt)),&apa);CHKERRQ(ierr); 2877 ierr = PetscMemzero(apa,cn*(sizeof(MatScalar)+2*sizeof(PetscInt)));CHKERRQ(ierr); 2878 2879 apj = (PetscInt *)(apa + cn); 2880 apjdense = apj + cn; 2881 2882 /* Clear old values in C */ 2883 ierr = PetscMemzero(ca,ci[cm]*sizeof(MatScalar));CHKERRQ(ierr); 2884 2885 for (i=0;i<am;i++) { 2886 /* Form sparse row of A*P */ 2887 anzi = ai[i+1] - ai[i]; 2888 apnzj = 0; 2889 for (j=0;j<anzi;j++) { 2890 /* Get offset within block of P */ 2891 pshift = *aj%ppdof; 2892 /* Get block row of P */ 2893 prow = *aj++/ppdof; /* integer division */ 2894 pnzj = pi[prow+1] - pi[prow]; 2895 pjj = pj + pi[prow]; 2896 paj = pa + pi[prow]; 2897 for (k=0;k<pnzj;k++) { 2898 poffset = pjj[k]*ppdof+pshift; 2899 if (!apjdense[poffset]) { 2900 apjdense[poffset] = -1; 2901 apj[apnzj++] = poffset; 2902 } 2903 apa[poffset] += (*aa)*paj[k]; 2904 } 2905 ierr = PetscLogFlops(2*pnzj);CHKERRQ(ierr); 2906 aa++; 2907 } 2908 2909 /* Sort the j index array for quick sparse axpy. */ 2910 /* Note: a array does not need sorting as it is in dense storage locations. */ 2911 ierr = PetscSortInt(apnzj,apj);CHKERRQ(ierr); 2912 2913 /* Compute P^T*A*P using outer product (P^T)[:,j]*(A*P)[j,:]. */ 2914 prow = i/ppdof; /* integer division */ 2915 pshift = i%ppdof; 2916 poffset = pi[prow]; 2917 pnzi = pi[prow+1] - poffset; 2918 /* Reset pJ and pA so we can traverse the same row of P 'dof' times. */ 2919 pJ = pj+poffset; 2920 pA = pa+poffset; 2921 for (j=0;j<pnzi;j++) { 2922 crow = (*pJ)*ppdof+pshift; 2923 cjj = cj + ci[crow]; 2924 caj = ca + ci[crow]; 2925 pJ++; 2926 /* Perform sparse axpy operation. Note cjj includes apj. */ 2927 for (k=0,nextap=0;nextap<apnzj;k++) { 2928 if (cjj[k]==apj[nextap]) { 2929 caj[k] += (*pA)*apa[apj[nextap++]]; 2930 } 2931 } 2932 ierr = PetscLogFlops(2*apnzj);CHKERRQ(ierr); 2933 pA++; 2934 } 2935 2936 /* Zero the current row info for A*P */ 2937 for (j=0;j<apnzj;j++) { 2938 apa[apj[j]] = 0.; 2939 apjdense[apj[j]] = 0; 2940 } 2941 } 2942 2943 /* Assemble the final matrix and clean up */ 2944 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2945 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2946 ierr = PetscFree(apa);CHKERRQ(ierr); 2947 PetscFunctionReturn(0); 2948 } 2949 2950 #undef __FUNCT__ 2951 #define __FUNCT__ "MatPtAPSymbolic_MPIAIJ_MPIMAIJ" 2952 PetscErrorCode MatPtAPSymbolic_MPIAIJ_MPIMAIJ(Mat A,Mat PP,PetscReal fill,Mat *C) 2953 { 2954 PetscErrorCode ierr; 2955 2956 PetscFunctionBegin; 2957 /* MatPtAPSymbolic_MPIAIJ_MPIMAIJ() is not implemented yet. Convert PP to mpiaij format */ 2958 ierr = MatConvert(PP,MATMPIAIJ,MAT_REUSE_MATRIX,&PP);CHKERRQ(ierr); 2959 ierr =(*PP->ops->ptapsymbolic)(A,PP,fill,C);CHKERRQ(ierr); 2960 PetscFunctionReturn(0); 2961 } 2962 2963 #undef __FUNCT__ 2964 #define __FUNCT__ "MatPtAPNumeric_MPIAIJ_MPIMAIJ" 2965 PetscErrorCode MatPtAPNumeric_MPIAIJ_MPIMAIJ(Mat A,Mat PP,Mat C) 2966 { 2967 PetscFunctionBegin; 2968 SETERRQ(PETSC_ERR_SUP,"MatPtAPNumeric is not implemented for MPIMAIJ matrix yet"); 2969 PetscFunctionReturn(0); 2970 } 2971 2972 EXTERN_C_BEGIN 2973 #undef __FUNCT__ 2974 #define __FUNCT__ "MatConvert_SeqMAIJ_SeqAIJ" 2975 PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_SeqMAIJ_SeqAIJ(Mat A, MatType newtype,MatReuse reuse,Mat *newmat) 2976 { 2977 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 2978 Mat a = b->AIJ,B; 2979 Mat_SeqAIJ *aij = (Mat_SeqAIJ*)a->data; 2980 PetscErrorCode ierr; 2981 PetscInt m,n,i,ncols,*ilen,nmax = 0,*icols,j,k,ii,dof = b->dof; 2982 PetscInt *cols; 2983 PetscScalar *vals; 2984 2985 PetscFunctionBegin; 2986 ierr = MatGetSize(a,&m,&n);CHKERRQ(ierr); 2987 ierr = PetscMalloc(dof*m*sizeof(PetscInt),&ilen);CHKERRQ(ierr); 2988 for (i=0; i<m; i++) { 2989 nmax = PetscMax(nmax,aij->ilen[i]); 2990 for (j=0; j<dof; j++) { 2991 ilen[dof*i+j] = aij->ilen[i]; 2992 } 2993 } 2994 ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,dof*m,dof*n,0,ilen,&B);CHKERRQ(ierr); 2995 ierr = PetscFree(ilen);CHKERRQ(ierr); 2996 ierr = PetscMalloc(nmax*sizeof(PetscInt),&icols);CHKERRQ(ierr); 2997 ii = 0; 2998 for (i=0; i<m; i++) { 2999 ierr = MatGetRow_SeqAIJ(a,i,&ncols,&cols,&vals);CHKERRQ(ierr); 3000 for (j=0; j<dof; j++) { 3001 for (k=0; k<ncols; k++) { 3002 icols[k] = dof*cols[k]+j; 3003 } 3004 ierr = MatSetValues_SeqAIJ(B,1,&ii,ncols,icols,vals,INSERT_VALUES);CHKERRQ(ierr); 3005 ii++; 3006 } 3007 ierr = MatRestoreRow_SeqAIJ(a,i,&ncols,&cols,&vals);CHKERRQ(ierr); 3008 } 3009 ierr = PetscFree(icols);CHKERRQ(ierr); 3010 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3011 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3012 3013 if (reuse == MAT_REUSE_MATRIX) { 3014 ierr = MatHeaderReplace(A,B);CHKERRQ(ierr); 3015 } else { 3016 *newmat = B; 3017 } 3018 PetscFunctionReturn(0); 3019 } 3020 EXTERN_C_END 3021 3022 #include "../src/mat/impls/aij/mpi/mpiaij.h" 3023 3024 EXTERN_C_BEGIN 3025 #undef __FUNCT__ 3026 #define __FUNCT__ "MatConvert_MPIMAIJ_MPIAIJ" 3027 PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_MPIMAIJ_MPIAIJ(Mat A, MatType newtype,MatReuse reuse,Mat *newmat) 3028 { 3029 Mat_MPIMAIJ *maij = (Mat_MPIMAIJ*)A->data; 3030 Mat MatAIJ = ((Mat_SeqMAIJ*)maij->AIJ->data)->AIJ,B; 3031 Mat MatOAIJ = ((Mat_SeqMAIJ*)maij->OAIJ->data)->AIJ; 3032 Mat_SeqAIJ *AIJ = (Mat_SeqAIJ*) MatAIJ->data; 3033 Mat_SeqAIJ *OAIJ =(Mat_SeqAIJ*) MatOAIJ->data; 3034 Mat_MPIAIJ *mpiaij = (Mat_MPIAIJ*) maij->A->data; 3035 PetscInt dof = maij->dof,i,j,*dnz = PETSC_NULL,*onz = PETSC_NULL,nmax = 0,onmax = 0; 3036 PetscInt *oicols = PETSC_NULL,*icols = PETSC_NULL,ncols,*cols = PETSC_NULL,oncols,*ocols = PETSC_NULL; 3037 PetscInt rstart,cstart,*garray,ii,k; 3038 PetscErrorCode ierr; 3039 PetscScalar *vals,*ovals; 3040 3041 PetscFunctionBegin; 3042 ierr = PetscMalloc2(A->rmap->n,PetscInt,&dnz,A->rmap->n,PetscInt,&onz);CHKERRQ(ierr); 3043 for (i=0; i<A->rmap->n/dof; i++) { 3044 nmax = PetscMax(nmax,AIJ->ilen[i]); 3045 onmax = PetscMax(onmax,OAIJ->ilen[i]); 3046 for (j=0; j<dof; j++) { 3047 dnz[dof*i+j] = AIJ->ilen[i]; 3048 onz[dof*i+j] = OAIJ->ilen[i]; 3049 } 3050 } 3051 ierr = MatCreateMPIAIJ(((PetscObject)A)->comm,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N,0,dnz,0,onz,&B);CHKERRQ(ierr); 3052 ierr = PetscFree2(dnz,onz);CHKERRQ(ierr); 3053 3054 ierr = PetscMalloc2(nmax,PetscInt,&icols,onmax,PetscInt,&oicols);CHKERRQ(ierr); 3055 rstart = dof*maij->A->rmap->rstart; 3056 cstart = dof*maij->A->cmap->rstart; 3057 garray = mpiaij->garray; 3058 3059 ii = rstart; 3060 for (i=0; i<A->rmap->n/dof; i++) { 3061 ierr = MatGetRow_SeqAIJ(MatAIJ,i,&ncols,&cols,&vals);CHKERRQ(ierr); 3062 ierr = MatGetRow_SeqAIJ(MatOAIJ,i,&oncols,&ocols,&ovals);CHKERRQ(ierr); 3063 for (j=0; j<dof; j++) { 3064 for (k=0; k<ncols; k++) { 3065 icols[k] = cstart + dof*cols[k]+j; 3066 } 3067 for (k=0; k<oncols; k++) { 3068 oicols[k] = dof*garray[ocols[k]]+j; 3069 } 3070 ierr = MatSetValues_MPIAIJ(B,1,&ii,ncols,icols,vals,INSERT_VALUES);CHKERRQ(ierr); 3071 ierr = MatSetValues_MPIAIJ(B,1,&ii,oncols,oicols,ovals,INSERT_VALUES);CHKERRQ(ierr); 3072 ii++; 3073 } 3074 ierr = MatRestoreRow_SeqAIJ(MatAIJ,i,&ncols,&cols,&vals);CHKERRQ(ierr); 3075 ierr = MatRestoreRow_SeqAIJ(MatOAIJ,i,&oncols,&ocols,&ovals);CHKERRQ(ierr); 3076 } 3077 ierr = PetscFree2(icols,oicols);CHKERRQ(ierr); 3078 3079 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3080 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3081 3082 if (reuse == MAT_REUSE_MATRIX) { 3083 PetscInt refct = ((PetscObject)A)->refct; /* save ((PetscObject)A)->refct */ 3084 ((PetscObject)A)->refct = 1; 3085 ierr = MatHeaderReplace(A,B);CHKERRQ(ierr); 3086 ((PetscObject)A)->refct = refct; /* restore ((PetscObject)A)->refct */ 3087 } else { 3088 *newmat = B; 3089 } 3090 PetscFunctionReturn(0); 3091 } 3092 EXTERN_C_END 3093 3094 3095 /* ---------------------------------------------------------------------------------- */ 3096 /*MC 3097 MatCreateMAIJ - Creates a matrix type providing restriction and interpolation 3098 operations for multicomponent problems. It interpolates each component the same 3099 way independently. The matrix type is based on MATSEQAIJ for sequential matrices, 3100 and MATMPIAIJ for distributed matrices. 3101 3102 Operations provided: 3103 + MatMult 3104 . MatMultTranspose 3105 . MatMultAdd 3106 . MatMultTransposeAdd 3107 - MatView 3108 3109 Level: advanced 3110 3111 M*/ 3112 #undef __FUNCT__ 3113 #define __FUNCT__ "MatCreateMAIJ" 3114 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMAIJ(Mat A,PetscInt dof,Mat *maij) 3115 { 3116 PetscErrorCode ierr; 3117 PetscMPIInt size; 3118 PetscInt n; 3119 Mat_MPIMAIJ *b; 3120 Mat B; 3121 3122 PetscFunctionBegin; 3123 ierr = PetscObjectReference((PetscObject)A);CHKERRQ(ierr); 3124 3125 if (dof == 1) { 3126 *maij = A; 3127 } else { 3128 ierr = MatCreate(((PetscObject)A)->comm,&B);CHKERRQ(ierr); 3129 ierr = MatSetSizes(B,dof*A->rmap->n,dof*A->cmap->n,dof*A->rmap->N,dof*A->cmap->N);CHKERRQ(ierr); 3130 B->assembled = PETSC_TRUE; 3131 3132 ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr); 3133 if (size == 1) { 3134 ierr = MatSetType(B,MATSEQMAIJ);CHKERRQ(ierr); 3135 B->ops->destroy = MatDestroy_SeqMAIJ; 3136 B->ops->view = MatView_SeqMAIJ; 3137 b = (Mat_MPIMAIJ*)B->data; 3138 b->dof = dof; 3139 b->AIJ = A; 3140 if (dof == 2) { 3141 B->ops->mult = MatMult_SeqMAIJ_2; 3142 B->ops->multadd = MatMultAdd_SeqMAIJ_2; 3143 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_2; 3144 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_2; 3145 } else if (dof == 3) { 3146 B->ops->mult = MatMult_SeqMAIJ_3; 3147 B->ops->multadd = MatMultAdd_SeqMAIJ_3; 3148 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_3; 3149 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_3; 3150 } else if (dof == 4) { 3151 B->ops->mult = MatMult_SeqMAIJ_4; 3152 B->ops->multadd = MatMultAdd_SeqMAIJ_4; 3153 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_4; 3154 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_4; 3155 } else if (dof == 5) { 3156 B->ops->mult = MatMult_SeqMAIJ_5; 3157 B->ops->multadd = MatMultAdd_SeqMAIJ_5; 3158 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_5; 3159 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_5; 3160 } else if (dof == 6) { 3161 B->ops->mult = MatMult_SeqMAIJ_6; 3162 B->ops->multadd = MatMultAdd_SeqMAIJ_6; 3163 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_6; 3164 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_6; 3165 } else if (dof == 7) { 3166 B->ops->mult = MatMult_SeqMAIJ_7; 3167 B->ops->multadd = MatMultAdd_SeqMAIJ_7; 3168 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_7; 3169 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_7; 3170 } else if (dof == 8) { 3171 B->ops->mult = MatMult_SeqMAIJ_8; 3172 B->ops->multadd = MatMultAdd_SeqMAIJ_8; 3173 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_8; 3174 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_8; 3175 } else if (dof == 9) { 3176 B->ops->mult = MatMult_SeqMAIJ_9; 3177 B->ops->multadd = MatMultAdd_SeqMAIJ_9; 3178 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_9; 3179 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_9; 3180 } else if (dof == 10) { 3181 B->ops->mult = MatMult_SeqMAIJ_10; 3182 B->ops->multadd = MatMultAdd_SeqMAIJ_10; 3183 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_10; 3184 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_10; 3185 } else if (dof == 11) { 3186 B->ops->mult = MatMult_SeqMAIJ_11; 3187 B->ops->multadd = MatMultAdd_SeqMAIJ_11; 3188 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_11; 3189 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_11; 3190 } else if (dof == 16) { 3191 B->ops->mult = MatMult_SeqMAIJ_16; 3192 B->ops->multadd = MatMultAdd_SeqMAIJ_16; 3193 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_16; 3194 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_16; 3195 } else if (dof == 18) { 3196 B->ops->mult = MatMult_SeqMAIJ_18; 3197 B->ops->multadd = MatMultAdd_SeqMAIJ_18; 3198 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_18; 3199 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_18; 3200 } else { 3201 SETERRQ1(PETSC_ERR_SUP,"Cannot handle a dof of %D. Send request for code to petsc-maint@mcs.anl.gov\n",dof); 3202 } 3203 B->ops->ptapsymbolic_seqaij = MatPtAPSymbolic_SeqAIJ_SeqMAIJ; 3204 B->ops->ptapnumeric_seqaij = MatPtAPNumeric_SeqAIJ_SeqMAIJ; 3205 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqmaij_seqaij_C","MatConvert_SeqMAIJ_SeqAIJ",MatConvert_SeqMAIJ_SeqAIJ);CHKERRQ(ierr); 3206 } else { 3207 Mat_MPIAIJ *mpiaij = (Mat_MPIAIJ *)A->data; 3208 IS from,to; 3209 Vec gvec; 3210 PetscInt *garray,i; 3211 3212 ierr = MatSetType(B,MATMPIMAIJ);CHKERRQ(ierr); 3213 B->ops->destroy = MatDestroy_MPIMAIJ; 3214 B->ops->view = MatView_MPIMAIJ; 3215 b = (Mat_MPIMAIJ*)B->data; 3216 b->dof = dof; 3217 b->A = A; 3218 ierr = MatCreateMAIJ(mpiaij->A,dof,&b->AIJ);CHKERRQ(ierr); 3219 ierr = MatCreateMAIJ(mpiaij->B,dof,&b->OAIJ);CHKERRQ(ierr); 3220 3221 ierr = VecGetSize(mpiaij->lvec,&n);CHKERRQ(ierr); 3222 ierr = VecCreateSeq(PETSC_COMM_SELF,n*dof,&b->w);CHKERRQ(ierr); 3223 ierr = VecSetBlockSize(b->w,dof);CHKERRQ(ierr); 3224 3225 /* create two temporary Index sets for build scatter gather */ 3226 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&garray);CHKERRQ(ierr); 3227 for (i=0; i<n; i++) garray[i] = dof*mpiaij->garray[i]; 3228 ierr = ISCreateBlock(((PetscObject)A)->comm,dof,n,garray,&from);CHKERRQ(ierr); 3229 ierr = PetscFree(garray);CHKERRQ(ierr); 3230 ierr = ISCreateStride(PETSC_COMM_SELF,n*dof,0,1,&to);CHKERRQ(ierr); 3231 3232 /* create temporary global vector to generate scatter context */ 3233 ierr = VecCreateMPIWithArray(((PetscObject)A)->comm,dof*A->cmap->n,dof*A->cmap->N,PETSC_NULL,&gvec);CHKERRQ(ierr); 3234 ierr = VecSetBlockSize(gvec,dof);CHKERRQ(ierr); 3235 3236 /* generate the scatter context */ 3237 ierr = VecScatterCreate(gvec,from,b->w,to,&b->ctx);CHKERRQ(ierr); 3238 3239 ierr = ISDestroy(from);CHKERRQ(ierr); 3240 ierr = ISDestroy(to);CHKERRQ(ierr); 3241 ierr = VecDestroy(gvec);CHKERRQ(ierr); 3242 3243 B->ops->mult = MatMult_MPIMAIJ_dof; 3244 B->ops->multtranspose = MatMultTranspose_MPIMAIJ_dof; 3245 B->ops->multadd = MatMultAdd_MPIMAIJ_dof; 3246 B->ops->multtransposeadd = MatMultTransposeAdd_MPIMAIJ_dof; 3247 B->ops->ptapsymbolic_mpiaij = MatPtAPSymbolic_MPIAIJ_MPIMAIJ; 3248 B->ops->ptapnumeric_mpiaij = MatPtAPNumeric_MPIAIJ_MPIMAIJ; 3249 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpimaij_mpiaij_C","MatConvert_MPIMAIJ_MPIAIJ",MatConvert_MPIMAIJ_MPIAIJ);CHKERRQ(ierr); 3250 } 3251 *maij = B; 3252 ierr = MatView_Private(B);CHKERRQ(ierr); 3253 } 3254 PetscFunctionReturn(0); 3255 } 3256 3257 3258 3259 3260 3261 3262 3263 3264 3265 3266 3267 3268