1 #define PETSCMAT_DLL 2 3 #include "../src/mat/impls/baij/seq/baij.h" 4 #include "../src/mat/blockinvert.h" 5 #include "petscbt.h" 6 7 #undef __FUNCT__ 8 #define __FUNCT__ "MatIncreaseOverlap_SeqBAIJ" 9 PetscErrorCode MatIncreaseOverlap_SeqBAIJ(Mat A,PetscInt is_max,IS is[],PetscInt ov) 10 { 11 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 12 PetscErrorCode ierr; 13 PetscInt row,i,j,k,l,m,n,*nidx,isz,val,ival; 14 const PetscInt *idx; 15 PetscInt start,end,*ai,*aj,bs,*nidx2; 16 PetscBT table; 17 18 PetscFunctionBegin; 19 m = a->mbs; 20 ai = a->i; 21 aj = a->j; 22 bs = A->rmap->bs; 23 24 if (ov < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative overlap specified"); 25 26 ierr = PetscBTCreate(m,table);CHKERRQ(ierr); 27 ierr = PetscMalloc((m+1)*sizeof(PetscInt),&nidx);CHKERRQ(ierr); 28 ierr = PetscMalloc((A->rmap->N+1)*sizeof(PetscInt),&nidx2);CHKERRQ(ierr); 29 30 for (i=0; i<is_max; i++) { 31 /* Initialise the two local arrays */ 32 isz = 0; 33 ierr = PetscBTMemzero(m,table);CHKERRQ(ierr); 34 35 /* Extract the indices, assume there can be duplicate entries */ 36 ierr = ISGetIndices(is[i],&idx);CHKERRQ(ierr); 37 ierr = ISGetLocalSize(is[i],&n);CHKERRQ(ierr); 38 39 /* Enter these into the temp arrays i.e mark table[row], enter row into new index */ 40 for (j=0; j<n ; ++j){ 41 ival = idx[j]/bs; /* convert the indices into block indices */ 42 if (ival>=m) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"index greater than mat-dim"); 43 if(!PetscBTLookupSet(table,ival)) { nidx[isz++] = ival;} 44 } 45 ierr = ISRestoreIndices(is[i],&idx);CHKERRQ(ierr); 46 ierr = ISDestroy(is[i]);CHKERRQ(ierr); 47 48 k = 0; 49 for (j=0; j<ov; j++){ /* for each overlap*/ 50 n = isz; 51 for (; k<n ; k++){ /* do only those rows in nidx[k], which are not done yet */ 52 row = nidx[k]; 53 start = ai[row]; 54 end = ai[row+1]; 55 for (l = start; l<end ; l++){ 56 val = aj[l]; 57 if (!PetscBTLookupSet(table,val)) {nidx[isz++] = val;} 58 } 59 } 60 } 61 /* expand the Index Set */ 62 for (j=0; j<isz; j++) { 63 for (k=0; k<bs; k++) 64 nidx2[j*bs+k] = nidx[j]*bs+k; 65 } 66 ierr = ISCreateGeneral(PETSC_COMM_SELF,isz*bs,nidx2,is+i);CHKERRQ(ierr); 67 } 68 ierr = PetscBTDestroy(table);CHKERRQ(ierr); 69 ierr = PetscFree(nidx);CHKERRQ(ierr); 70 ierr = PetscFree(nidx2);CHKERRQ(ierr); 71 PetscFunctionReturn(0); 72 } 73 74 #undef __FUNCT__ 75 #define __FUNCT__ "MatGetSubMatrix_SeqBAIJ_Private" 76 PetscErrorCode MatGetSubMatrix_SeqBAIJ_Private(Mat A,IS isrow,IS iscol,MatReuse scall,Mat *B) 77 { 78 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data,*c; 79 PetscErrorCode ierr; 80 PetscInt *smap,i,k,kstart,kend,oldcols = a->nbs,*lens; 81 PetscInt row,mat_i,*mat_j,tcol,*mat_ilen; 82 const PetscInt *irow,*icol; 83 PetscInt nrows,ncols,*ssmap,bs=A->rmap->bs,bs2=a->bs2; 84 PetscInt *aj = a->j,*ai = a->i; 85 MatScalar *mat_a; 86 Mat C; 87 PetscTruth flag,sorted; 88 89 PetscFunctionBegin; 90 ierr = ISSorted(iscol,&sorted);CHKERRQ(ierr); 91 if (!sorted) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"IS is not sorted"); 92 93 ierr = ISGetIndices(isrow,&irow);CHKERRQ(ierr); 94 ierr = ISGetIndices(iscol,&icol);CHKERRQ(ierr); 95 ierr = ISGetLocalSize(isrow,&nrows);CHKERRQ(ierr); 96 ierr = ISGetLocalSize(iscol,&ncols);CHKERRQ(ierr); 97 98 ierr = PetscMalloc((1+oldcols)*sizeof(PetscInt),&smap);CHKERRQ(ierr); 99 ssmap = smap; 100 ierr = PetscMalloc((1+nrows)*sizeof(PetscInt),&lens);CHKERRQ(ierr); 101 ierr = PetscMemzero(smap,oldcols*sizeof(PetscInt));CHKERRQ(ierr); 102 for (i=0; i<ncols; i++) smap[icol[i]] = i+1; 103 /* determine lens of each row */ 104 for (i=0; i<nrows; i++) { 105 kstart = ai[irow[i]]; 106 kend = kstart + a->ilen[irow[i]]; 107 lens[i] = 0; 108 for (k=kstart; k<kend; k++) { 109 if (ssmap[aj[k]]) { 110 lens[i]++; 111 } 112 } 113 } 114 /* Create and fill new matrix */ 115 if (scall == MAT_REUSE_MATRIX) { 116 c = (Mat_SeqBAIJ *)((*B)->data); 117 118 if (c->mbs!=nrows || c->nbs!=ncols || (*B)->rmap->bs!=bs) SETERRQ(PETSC_ERR_ARG_SIZ,"Submatrix wrong size"); 119 ierr = PetscMemcmp(c->ilen,lens,c->mbs *sizeof(PetscInt),&flag);CHKERRQ(ierr); 120 if (!flag) { 121 SETERRQ(PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. wrong no of nonzeros"); 122 } 123 ierr = PetscMemzero(c->ilen,c->mbs*sizeof(PetscInt));CHKERRQ(ierr); 124 C = *B; 125 } else { 126 ierr = MatCreate(((PetscObject)A)->comm,&C);CHKERRQ(ierr); 127 ierr = MatSetSizes(C,nrows*bs,ncols*bs,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 128 ierr = MatSetType(C,((PetscObject)A)->type_name);CHKERRQ(ierr); 129 ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(C,bs,0,lens);CHKERRQ(ierr); 130 } 131 c = (Mat_SeqBAIJ *)(C->data); 132 for (i=0; i<nrows; i++) { 133 row = irow[i]; 134 kstart = ai[row]; 135 kend = kstart + a->ilen[row]; 136 mat_i = c->i[i]; 137 mat_j = c->j + mat_i; 138 mat_a = c->a + mat_i*bs2; 139 mat_ilen = c->ilen + i; 140 for (k=kstart; k<kend; k++) { 141 if ((tcol=ssmap[a->j[k]])) { 142 *mat_j++ = tcol - 1; 143 ierr = PetscMemcpy(mat_a,a->a+k*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr); 144 mat_a += bs2; 145 (*mat_ilen)++; 146 } 147 } 148 } 149 150 /* Free work space */ 151 ierr = ISRestoreIndices(iscol,&icol);CHKERRQ(ierr); 152 ierr = PetscFree(smap);CHKERRQ(ierr); 153 ierr = PetscFree(lens);CHKERRQ(ierr); 154 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 155 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 156 157 ierr = ISRestoreIndices(isrow,&irow);CHKERRQ(ierr); 158 *B = C; 159 PetscFunctionReturn(0); 160 } 161 162 #undef __FUNCT__ 163 #define __FUNCT__ "MatGetSubMatrix_SeqBAIJ" 164 PetscErrorCode MatGetSubMatrix_SeqBAIJ(Mat A,IS isrow,IS iscol,MatReuse scall,Mat *B) 165 { 166 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 167 IS is1,is2; 168 PetscErrorCode ierr; 169 PetscInt *vary,*iary,nrows,ncols,i,bs=A->rmap->bs,count; 170 const PetscInt *irow,*icol; 171 172 PetscFunctionBegin; 173 ierr = ISGetIndices(isrow,&irow);CHKERRQ(ierr); 174 ierr = ISGetIndices(iscol,&icol);CHKERRQ(ierr); 175 ierr = ISGetLocalSize(isrow,&nrows);CHKERRQ(ierr); 176 ierr = ISGetLocalSize(iscol,&ncols);CHKERRQ(ierr); 177 178 /* Verify if the indices corespond to each element in a block 179 and form the IS with compressed IS */ 180 ierr = PetscMalloc(2*(a->mbs+1)*sizeof(PetscInt),&vary);CHKERRQ(ierr); 181 iary = vary + a->mbs; 182 ierr = PetscMemzero(vary,(a->mbs)*sizeof(PetscInt));CHKERRQ(ierr); 183 for (i=0; i<nrows; i++) vary[irow[i]/bs]++; 184 count = 0; 185 for (i=0; i<a->mbs; i++) { 186 if (vary[i]!=0 && vary[i]!=bs) SETERRQ(PETSC_ERR_ARG_SIZ,"Index set does not match blocks"); 187 if (vary[i]==bs) iary[count++] = i; 188 } 189 ierr = ISCreateGeneral(PETSC_COMM_SELF,count,iary,&is1);CHKERRQ(ierr); 190 191 ierr = PetscMemzero(vary,(a->mbs)*sizeof(PetscInt));CHKERRQ(ierr); 192 for (i=0; i<ncols; i++) vary[icol[i]/bs]++; 193 count = 0; 194 for (i=0; i<a->mbs; i++) { 195 if (vary[i]!=0 && vary[i]!=bs) SETERRQ(PETSC_ERR_PLIB,"Internal error in PETSc"); 196 if (vary[i]==bs) iary[count++] = i; 197 } 198 ierr = ISCreateGeneral(PETSC_COMM_SELF,count,iary,&is2);CHKERRQ(ierr); 199 ierr = ISRestoreIndices(isrow,&irow);CHKERRQ(ierr); 200 ierr = ISRestoreIndices(iscol,&icol);CHKERRQ(ierr); 201 ierr = PetscFree(vary);CHKERRQ(ierr); 202 203 ierr = MatGetSubMatrix_SeqBAIJ_Private(A,is1,is2,scall,B);CHKERRQ(ierr); 204 ISDestroy(is1); 205 ISDestroy(is2); 206 PetscFunctionReturn(0); 207 } 208 209 #undef __FUNCT__ 210 #define __FUNCT__ "MatGetSubMatrices_SeqBAIJ" 211 PetscErrorCode MatGetSubMatrices_SeqBAIJ(Mat A,PetscInt n,const IS irow[],const IS icol[],MatReuse scall,Mat *B[]) 212 { 213 PetscErrorCode ierr; 214 PetscInt i; 215 216 PetscFunctionBegin; 217 if (scall == MAT_INITIAL_MATRIX) { 218 ierr = PetscMalloc((n+1)*sizeof(Mat),B);CHKERRQ(ierr); 219 } 220 221 for (i=0; i<n; i++) { 222 ierr = MatGetSubMatrix_SeqBAIJ(A,irow[i],icol[i],scall,&(*B)[i]);CHKERRQ(ierr); 223 } 224 PetscFunctionReturn(0); 225 } 226 227 228 /* -------------------------------------------------------*/ 229 /* Should check that shapes of vectors and matrices match */ 230 /* -------------------------------------------------------*/ 231 #include "petscblaslapack.h" 232 233 #undef __FUNCT__ 234 #define __FUNCT__ "MatMult_SeqBAIJ_1" 235 PetscErrorCode MatMult_SeqBAIJ_1(Mat A,Vec xx,Vec zz) 236 { 237 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 238 PetscScalar *z,sum; 239 const PetscScalar *x; 240 const MatScalar *v; 241 PetscErrorCode ierr; 242 PetscInt mbs,i,n,nonzerorow=0; 243 const PetscInt *idx,*ii,*ridx=PETSC_NULL; 244 PetscTruth usecprow=a->compressedrow.use; 245 246 PetscFunctionBegin; 247 ierr = VecGetArray(xx,(PetscScalar**)&x);CHKERRQ(ierr); 248 ierr = VecGetArray(zz,&z);CHKERRQ(ierr); 249 250 if (usecprow){ 251 mbs = a->compressedrow.nrows; 252 ii = a->compressedrow.i; 253 ridx = a->compressedrow.rindex; 254 ierr = PetscMemzero(z,mbs*sizeof(PetscScalar));CHKERRQ(ierr); 255 } else { 256 mbs = a->mbs; 257 ii = a->i; 258 } 259 260 for (i=0; i<mbs; i++) { 261 n = ii[1] - ii[0]; 262 v = a->a + ii[0]; 263 idx = a->j + ii[0]; 264 ii++; 265 sum = 0.0; 266 nonzerorow += (n>0); 267 PetscSparseDensePlusDot(sum,x,v,idx,n); 268 if (usecprow){ 269 z[ridx[i]] = sum; 270 } else { 271 z[i] = sum; 272 } 273 } 274 ierr = VecRestoreArray(xx,(PetscScalar**)&x);CHKERRQ(ierr); 275 ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr); 276 ierr = PetscLogFlops(2.0*a->nz - nonzerorow);CHKERRQ(ierr); 277 PetscFunctionReturn(0); 278 } 279 280 #undef __FUNCT__ 281 #define __FUNCT__ "MatMult_SeqBAIJ_2" 282 PetscErrorCode MatMult_SeqBAIJ_2(Mat A,Vec xx,Vec zz) 283 { 284 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 285 PetscScalar *z = 0,sum1,sum2,*zarray; 286 const PetscScalar *x,*xb; 287 PetscScalar x1,x2; 288 const MatScalar *v; 289 PetscErrorCode ierr; 290 PetscInt mbs,i,*idx,*ii,j,n,*ridx=PETSC_NULL,nonzerorow=0; 291 PetscTruth usecprow=a->compressedrow.use; 292 293 PetscFunctionBegin; 294 ierr = VecGetArray(xx,(PetscScalar**)&x);CHKERRQ(ierr); 295 ierr = VecGetArray(zz,&zarray);CHKERRQ(ierr); 296 297 idx = a->j; 298 v = a->a; 299 if (usecprow){ 300 mbs = a->compressedrow.nrows; 301 ii = a->compressedrow.i; 302 ridx = a->compressedrow.rindex; 303 } else { 304 mbs = a->mbs; 305 ii = a->i; 306 z = zarray; 307 } 308 309 for (i=0; i<mbs; i++) { 310 n = ii[1] - ii[0]; ii++; 311 sum1 = 0.0; sum2 = 0.0; 312 nonzerorow += (n>0); 313 for (j=0; j<n; j++) { 314 xb = x + 2*(*idx++); x1 = xb[0]; x2 = xb[1]; 315 sum1 += v[0]*x1 + v[2]*x2; 316 sum2 += v[1]*x1 + v[3]*x2; 317 v += 4; 318 } 319 if (usecprow) z = zarray + 2*ridx[i]; 320 z[0] = sum1; z[1] = sum2; 321 if (!usecprow) z += 2; 322 } 323 ierr = VecRestoreArray(xx,(PetscScalar**)&x);CHKERRQ(ierr); 324 ierr = VecRestoreArray(zz,&zarray);CHKERRQ(ierr); 325 ierr = PetscLogFlops(8.0*a->nz - 2.0*nonzerorow);CHKERRQ(ierr); 326 PetscFunctionReturn(0); 327 } 328 329 #undef __FUNCT__ 330 #define __FUNCT__ "MatMult_SeqBAIJ_3" 331 PetscErrorCode MatMult_SeqBAIJ_3(Mat A,Vec xx,Vec zz) 332 { 333 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 334 PetscScalar *z = 0,sum1,sum2,sum3,x1,x2,x3,*zarray; 335 const PetscScalar *x,*xb; 336 const MatScalar *v; 337 PetscErrorCode ierr; 338 PetscInt mbs,i,*idx,*ii,j,n,*ridx=PETSC_NULL,nonzerorow=0; 339 PetscTruth usecprow=a->compressedrow.use; 340 341 342 #if defined(PETSC_HAVE_PRAGMA_DISJOINT) 343 #pragma disjoint(*v,*z,*xb) 344 #endif 345 346 PetscFunctionBegin; 347 ierr = VecGetArray(xx,(PetscScalar**)&x);CHKERRQ(ierr); 348 ierr = VecGetArray(zz,&zarray);CHKERRQ(ierr); 349 350 idx = a->j; 351 v = a->a; 352 if (usecprow){ 353 mbs = a->compressedrow.nrows; 354 ii = a->compressedrow.i; 355 ridx = a->compressedrow.rindex; 356 } else { 357 mbs = a->mbs; 358 ii = a->i; 359 z = zarray; 360 } 361 362 for (i=0; i<mbs; i++) { 363 n = ii[1] - ii[0]; ii++; 364 sum1 = 0.0; sum2 = 0.0; sum3 = 0.0; 365 nonzerorow += (n>0); 366 for (j=0; j<n; j++) { 367 xb = x + 3*(*idx++); x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; 368 sum1 += v[0]*x1 + v[3]*x2 + v[6]*x3; 369 sum2 += v[1]*x1 + v[4]*x2 + v[7]*x3; 370 sum3 += v[2]*x1 + v[5]*x2 + v[8]*x3; 371 v += 9; 372 } 373 if (usecprow) z = zarray + 3*ridx[i]; 374 z[0] = sum1; z[1] = sum2; z[2] = sum3; 375 if (!usecprow) z += 3; 376 } 377 ierr = VecRestoreArray(xx,(PetscScalar**)&x);CHKERRQ(ierr); 378 ierr = VecRestoreArray(zz,&zarray);CHKERRQ(ierr); 379 ierr = PetscLogFlops(18.0*a->nz - 3.0*nonzerorow);CHKERRQ(ierr); 380 PetscFunctionReturn(0); 381 } 382 383 #undef __FUNCT__ 384 #define __FUNCT__ "MatMult_SeqBAIJ_4" 385 PetscErrorCode MatMult_SeqBAIJ_4(Mat A,Vec xx,Vec zz) 386 { 387 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 388 PetscScalar *z = 0,sum1,sum2,sum3,sum4,x1,x2,x3,x4,*zarray; 389 const PetscScalar *x,*xb; 390 const MatScalar *v; 391 PetscErrorCode ierr; 392 PetscInt mbs,i,*idx,*ii,j,n,*ridx=PETSC_NULL,nonzerorow=0; 393 PetscTruth usecprow=a->compressedrow.use; 394 395 PetscFunctionBegin; 396 ierr = VecGetArray(xx,(PetscScalar**)&x);CHKERRQ(ierr); 397 ierr = VecGetArray(zz,&zarray);CHKERRQ(ierr); 398 399 idx = a->j; 400 v = a->a; 401 if (usecprow){ 402 mbs = a->compressedrow.nrows; 403 ii = a->compressedrow.i; 404 ridx = a->compressedrow.rindex; 405 } else { 406 mbs = a->mbs; 407 ii = a->i; 408 z = zarray; 409 } 410 411 for (i=0; i<mbs; i++) { 412 n = ii[1] - ii[0]; ii++; 413 sum1 = 0.0; sum2 = 0.0; sum3 = 0.0; sum4 = 0.0; 414 nonzerorow += (n>0); 415 for (j=0; j<n; j++) { 416 xb = x + 4*(*idx++); 417 x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; 418 sum1 += v[0]*x1 + v[4]*x2 + v[8]*x3 + v[12]*x4; 419 sum2 += v[1]*x1 + v[5]*x2 + v[9]*x3 + v[13]*x4; 420 sum3 += v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4; 421 sum4 += v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4; 422 v += 16; 423 } 424 if (usecprow) z = zarray + 4*ridx[i]; 425 z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4; 426 if (!usecprow) z += 4; 427 } 428 ierr = VecRestoreArray(xx,(PetscScalar**)&x);CHKERRQ(ierr); 429 ierr = VecRestoreArray(zz,&zarray);CHKERRQ(ierr); 430 ierr = PetscLogFlops(32.0*a->nz - 4.0*nonzerorow);CHKERRQ(ierr); 431 PetscFunctionReturn(0); 432 } 433 434 #undef __FUNCT__ 435 #define __FUNCT__ "MatMult_SeqBAIJ_5" 436 PetscErrorCode MatMult_SeqBAIJ_5(Mat A,Vec xx,Vec zz) 437 { 438 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 439 PetscScalar sum1,sum2,sum3,sum4,sum5,x1,x2,x3,x4,x5,*z = 0,*zarray; 440 const PetscScalar *xb,*x; 441 const MatScalar *v; 442 PetscErrorCode ierr; 443 PetscInt mbs,i,*idx,*ii,j,n,*ridx=PETSC_NULL,nonzerorow=0; 444 PetscTruth usecprow=a->compressedrow.use; 445 446 PetscFunctionBegin; 447 ierr = VecGetArray(xx,(PetscScalar**)&x);CHKERRQ(ierr); 448 ierr = VecGetArray(zz,&zarray);CHKERRQ(ierr); 449 450 idx = a->j; 451 v = a->a; 452 if (usecprow){ 453 mbs = a->compressedrow.nrows; 454 ii = a->compressedrow.i; 455 ridx = a->compressedrow.rindex; 456 } else { 457 mbs = a->mbs; 458 ii = a->i; 459 z = zarray; 460 } 461 462 for (i=0; i<mbs; i++) { 463 n = ii[1] - ii[0]; ii++; 464 sum1 = 0.0; sum2 = 0.0; sum3 = 0.0; sum4 = 0.0; sum5 = 0.0; 465 nonzerorow += (n>0); 466 for (j=0; j<n; j++) { 467 xb = x + 5*(*idx++); 468 x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5 = xb[4]; 469 sum1 += v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5; 470 sum2 += v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5; 471 sum3 += v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5; 472 sum4 += v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5; 473 sum5 += v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5; 474 v += 25; 475 } 476 if (usecprow) z = zarray + 5*ridx[i]; 477 z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4; z[4] = sum5; 478 if (!usecprow) z += 5; 479 } 480 ierr = VecRestoreArray(xx,(PetscScalar**)&x);CHKERRQ(ierr); 481 ierr = VecRestoreArray(zz,&zarray);CHKERRQ(ierr); 482 ierr = PetscLogFlops(50.0*a->nz - 5.0*nonzerorow);CHKERRQ(ierr); 483 PetscFunctionReturn(0); 484 } 485 486 487 #undef __FUNCT__ 488 #define __FUNCT__ "MatMult_SeqBAIJ_6" 489 PetscErrorCode MatMult_SeqBAIJ_6(Mat A,Vec xx,Vec zz) 490 { 491 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 492 PetscScalar *z = 0,sum1,sum2,sum3,sum4,sum5,sum6; 493 const PetscScalar *x,*xb; 494 PetscScalar x1,x2,x3,x4,x5,x6,*zarray; 495 const MatScalar *v; 496 PetscErrorCode ierr; 497 PetscInt mbs=a->mbs,i,*idx,*ii,j,n,*ridx=PETSC_NULL,nonzerorow=0; 498 PetscTruth usecprow=a->compressedrow.use; 499 500 PetscFunctionBegin; 501 ierr = VecGetArray(xx,(PetscScalar**)&x);CHKERRQ(ierr); 502 ierr = VecGetArray(zz,&zarray);CHKERRQ(ierr); 503 504 idx = a->j; 505 v = a->a; 506 if (usecprow){ 507 mbs = a->compressedrow.nrows; 508 ii = a->compressedrow.i; 509 ridx = a->compressedrow.rindex; 510 } else { 511 mbs = a->mbs; 512 ii = a->i; 513 z = zarray; 514 } 515 516 for (i=0; i<mbs; i++) { 517 n = ii[1] - ii[0]; ii++; 518 sum1 = 0.0; sum2 = 0.0; sum3 = 0.0; sum4 = 0.0; sum5 = 0.0; sum6 = 0.0; 519 nonzerorow += (n>0); 520 for (j=0; j<n; j++) { 521 xb = x + 6*(*idx++); 522 x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5 = xb[4]; x6 = xb[5]; 523 sum1 += v[0]*x1 + v[6]*x2 + v[12]*x3 + v[18]*x4 + v[24]*x5 + v[30]*x6; 524 sum2 += v[1]*x1 + v[7]*x2 + v[13]*x3 + v[19]*x4 + v[25]*x5 + v[31]*x6; 525 sum3 += v[2]*x1 + v[8]*x2 + v[14]*x3 + v[20]*x4 + v[26]*x5 + v[32]*x6; 526 sum4 += v[3]*x1 + v[9]*x2 + v[15]*x3 + v[21]*x4 + v[27]*x5 + v[33]*x6; 527 sum5 += v[4]*x1 + v[10]*x2 + v[16]*x3 + v[22]*x4 + v[28]*x5 + v[34]*x6; 528 sum6 += v[5]*x1 + v[11]*x2 + v[17]*x3 + v[23]*x4 + v[29]*x5 + v[35]*x6; 529 v += 36; 530 } 531 if (usecprow) z = zarray + 6*ridx[i]; 532 z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4; z[4] = sum5; z[5] = sum6; 533 if (!usecprow) z += 6; 534 } 535 536 ierr = VecRestoreArray(xx,(PetscScalar**)&x);CHKERRQ(ierr); 537 ierr = VecRestoreArray(zz,&zarray);CHKERRQ(ierr); 538 ierr = PetscLogFlops(72.0*a->nz - 6.0*nonzerorow);CHKERRQ(ierr); 539 PetscFunctionReturn(0); 540 } 541 #undef __FUNCT__ 542 #define __FUNCT__ "MatMult_SeqBAIJ_7" 543 PetscErrorCode MatMult_SeqBAIJ_7(Mat A,Vec xx,Vec zz) 544 { 545 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 546 PetscScalar *z = 0,sum1,sum2,sum3,sum4,sum5,sum6,sum7; 547 const PetscScalar *x,*xb; 548 PetscScalar x1,x2,x3,x4,x5,x6,x7,*zarray; 549 const MatScalar *v; 550 PetscErrorCode ierr; 551 PetscInt mbs,i,*idx,*ii,j,n,*ridx=PETSC_NULL,nonzerorow=0; 552 PetscTruth usecprow=a->compressedrow.use; 553 554 PetscFunctionBegin; 555 ierr = VecGetArray(xx,(PetscScalar**)&x);CHKERRQ(ierr); 556 ierr = VecGetArray(zz,&zarray);CHKERRQ(ierr); 557 558 idx = a->j; 559 v = a->a; 560 if (usecprow){ 561 mbs = a->compressedrow.nrows; 562 ii = a->compressedrow.i; 563 ridx = a->compressedrow.rindex; 564 } else { 565 mbs = a->mbs; 566 ii = a->i; 567 z = zarray; 568 } 569 570 for (i=0; i<mbs; i++) { 571 n = ii[1] - ii[0]; ii++; 572 sum1 = 0.0; sum2 = 0.0; sum3 = 0.0; sum4 = 0.0; sum5 = 0.0; sum6 = 0.0; sum7 = 0.0; 573 nonzerorow += (n>0); 574 for (j=0; j<n; j++) { 575 xb = x + 7*(*idx++); 576 x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5 = xb[4]; x6 = xb[5]; x7 = xb[6]; 577 sum1 += v[0]*x1 + v[7]*x2 + v[14]*x3 + v[21]*x4 + v[28]*x5 + v[35]*x6 + v[42]*x7; 578 sum2 += v[1]*x1 + v[8]*x2 + v[15]*x3 + v[22]*x4 + v[29]*x5 + v[36]*x6 + v[43]*x7; 579 sum3 += v[2]*x1 + v[9]*x2 + v[16]*x3 + v[23]*x4 + v[30]*x5 + v[37]*x6 + v[44]*x7; 580 sum4 += v[3]*x1 + v[10]*x2 + v[17]*x3 + v[24]*x4 + v[31]*x5 + v[38]*x6 + v[45]*x7; 581 sum5 += v[4]*x1 + v[11]*x2 + v[18]*x3 + v[25]*x4 + v[32]*x5 + v[39]*x6 + v[46]*x7; 582 sum6 += v[5]*x1 + v[12]*x2 + v[19]*x3 + v[26]*x4 + v[33]*x5 + v[40]*x6 + v[47]*x7; 583 sum7 += v[6]*x1 + v[13]*x2 + v[20]*x3 + v[27]*x4 + v[34]*x5 + v[41]*x6 + v[48]*x7; 584 v += 49; 585 } 586 if (usecprow) z = zarray + 7*ridx[i]; 587 z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4; z[4] = sum5; z[5] = sum6; z[6] = sum7; 588 if (!usecprow) z += 7; 589 } 590 591 ierr = VecRestoreArray(xx,(PetscScalar**)&x);CHKERRQ(ierr); 592 ierr = VecRestoreArray(zz,&zarray);CHKERRQ(ierr); 593 ierr = PetscLogFlops(98.0*a->nz - 7.0*nonzerorow);CHKERRQ(ierr); 594 PetscFunctionReturn(0); 595 } 596 597 /* 598 This will not work with MatScalar == float because it calls the BLAS 599 */ 600 #undef __FUNCT__ 601 #define __FUNCT__ "MatMult_SeqBAIJ_N" 602 PetscErrorCode MatMult_SeqBAIJ_N(Mat A,Vec xx,Vec zz) 603 { 604 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 605 PetscScalar *x,*z = 0,*xb,*work,*workt,*zarray; 606 MatScalar *v; 607 PetscErrorCode ierr; 608 PetscInt mbs=a->mbs,i,*idx,*ii,bs=A->rmap->bs,j,n,bs2=a->bs2; 609 PetscInt ncols,k,*ridx=PETSC_NULL,nonzerorow=0; 610 PetscTruth usecprow=a->compressedrow.use; 611 612 PetscFunctionBegin; 613 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 614 ierr = VecGetArray(zz,&zarray);CHKERRQ(ierr); 615 616 idx = a->j; 617 v = a->a; 618 if (usecprow){ 619 mbs = a->compressedrow.nrows; 620 ii = a->compressedrow.i; 621 ridx = a->compressedrow.rindex; 622 } else { 623 mbs = a->mbs; 624 ii = a->i; 625 z = zarray; 626 } 627 628 if (!a->mult_work) { 629 k = PetscMax(A->rmap->n,A->cmap->n); 630 ierr = PetscMalloc((k+1)*sizeof(PetscScalar),&a->mult_work);CHKERRQ(ierr); 631 } 632 work = a->mult_work; 633 for (i=0; i<mbs; i++) { 634 n = ii[1] - ii[0]; ii++; 635 ncols = n*bs; 636 workt = work; 637 nonzerorow += (n>0); 638 for (j=0; j<n; j++) { 639 xb = x + bs*(*idx++); 640 for (k=0; k<bs; k++) workt[k] = xb[k]; 641 workt += bs; 642 } 643 if (usecprow) z = zarray + bs*ridx[i]; 644 Kernel_w_gets_Ar_times_v(bs,ncols,work,v,z); 645 /* BLASgemv_("N",&bs,&ncols,&_DOne,v,&bs,work,&_One,&_DZero,z,&_One); */ 646 v += n*bs2; 647 if (!usecprow) z += bs; 648 } 649 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 650 ierr = VecRestoreArray(zz,&zarray);CHKERRQ(ierr); 651 ierr = PetscLogFlops(2.0*a->nz*bs2 - bs*nonzerorow);CHKERRQ(ierr); 652 PetscFunctionReturn(0); 653 } 654 655 extern PetscErrorCode VecCopy_Seq(Vec,Vec); 656 #undef __FUNCT__ 657 #define __FUNCT__ "MatMultAdd_SeqBAIJ_1" 658 PetscErrorCode MatMultAdd_SeqBAIJ_1(Mat A,Vec xx,Vec yy,Vec zz) 659 { 660 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 661 PetscScalar *x,*y = 0,*z = 0,sum,*yarray,*zarray; 662 MatScalar *v; 663 PetscErrorCode ierr; 664 PetscInt mbs=a->mbs,i,*idx,*ii,n,*ridx=PETSC_NULL; 665 PetscTruth usecprow=a->compressedrow.use; 666 667 PetscFunctionBegin; 668 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 669 ierr = VecGetArray(yy,&yarray);CHKERRQ(ierr); 670 if (zz != yy) { 671 ierr = VecGetArray(zz,&zarray);CHKERRQ(ierr); 672 } else { 673 zarray = yarray; 674 } 675 676 idx = a->j; 677 v = a->a; 678 if (usecprow){ 679 if (zz != yy){ 680 ierr = PetscMemcpy(zarray,yarray,mbs*sizeof(PetscScalar));CHKERRQ(ierr); 681 } 682 mbs = a->compressedrow.nrows; 683 ii = a->compressedrow.i; 684 ridx = a->compressedrow.rindex; 685 } else { 686 ii = a->i; 687 y = yarray; 688 z = zarray; 689 } 690 691 for (i=0; i<mbs; i++) { 692 n = ii[1] - ii[0]; ii++; 693 if (usecprow){ 694 z = zarray + ridx[i]; 695 y = yarray + ridx[i]; 696 } 697 sum = y[0]; 698 while (n--) sum += *v++ * x[*idx++]; 699 z[0] = sum; 700 if (!usecprow){ 701 z++; y++; 702 } 703 } 704 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 705 ierr = VecRestoreArray(yy,&yarray);CHKERRQ(ierr); 706 if (zz != yy) { 707 ierr = VecRestoreArray(zz,&zarray);CHKERRQ(ierr); 708 } 709 ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr); 710 PetscFunctionReturn(0); 711 } 712 713 #undef __FUNCT__ 714 #define __FUNCT__ "MatMultAdd_SeqBAIJ_2" 715 PetscErrorCode MatMultAdd_SeqBAIJ_2(Mat A,Vec xx,Vec yy,Vec zz) 716 { 717 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 718 PetscScalar *x,*y = 0,*z = 0,*xb,sum1,sum2; 719 PetscScalar x1,x2,*yarray,*zarray; 720 MatScalar *v; 721 PetscErrorCode ierr; 722 PetscInt mbs=a->mbs,i,*idx,*ii,j,n,*ridx=PETSC_NULL; 723 PetscTruth usecprow=a->compressedrow.use; 724 725 PetscFunctionBegin; 726 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 727 ierr = VecGetArray(yy,&yarray);CHKERRQ(ierr); 728 if (zz != yy) { 729 ierr = VecGetArray(zz,&zarray);CHKERRQ(ierr); 730 } else { 731 zarray = yarray; 732 } 733 734 idx = a->j; 735 v = a->a; 736 if (usecprow){ 737 if (zz != yy){ 738 ierr = PetscMemcpy(zarray,yarray,2*mbs*sizeof(PetscScalar));CHKERRQ(ierr); 739 } 740 mbs = a->compressedrow.nrows; 741 ii = a->compressedrow.i; 742 ridx = a->compressedrow.rindex; 743 if (zz != yy){ 744 ierr = PetscMemcpy(zarray,yarray,a->mbs*sizeof(PetscScalar));CHKERRQ(ierr); 745 } 746 } else { 747 ii = a->i; 748 y = yarray; 749 z = zarray; 750 } 751 752 for (i=0; i<mbs; i++) { 753 n = ii[1] - ii[0]; ii++; 754 if (usecprow){ 755 z = zarray + 2*ridx[i]; 756 y = yarray + 2*ridx[i]; 757 } 758 sum1 = y[0]; sum2 = y[1]; 759 for (j=0; j<n; j++) { 760 xb = x + 2*(*idx++); x1 = xb[0]; x2 = xb[1]; 761 sum1 += v[0]*x1 + v[2]*x2; 762 sum2 += v[1]*x1 + v[3]*x2; 763 v += 4; 764 } 765 z[0] = sum1; z[1] = sum2; 766 if (!usecprow){ 767 z += 2; y += 2; 768 } 769 } 770 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 771 ierr = VecRestoreArray(yy,&yarray);CHKERRQ(ierr); 772 if (zz != yy) { 773 ierr = VecRestoreArray(zz,&zarray);CHKERRQ(ierr); 774 } 775 ierr = PetscLogFlops(4.0*a->nz);CHKERRQ(ierr); 776 PetscFunctionReturn(0); 777 } 778 779 #undef __FUNCT__ 780 #define __FUNCT__ "MatMultAdd_SeqBAIJ_3" 781 PetscErrorCode MatMultAdd_SeqBAIJ_3(Mat A,Vec xx,Vec yy,Vec zz) 782 { 783 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 784 PetscScalar *x,*y = 0,*z = 0,*xb,sum1,sum2,sum3,x1,x2,x3,*yarray,*zarray; 785 MatScalar *v; 786 PetscErrorCode ierr; 787 PetscInt mbs=a->mbs,i,*idx,*ii,j,n,*ridx=PETSC_NULL; 788 PetscTruth usecprow=a->compressedrow.use; 789 790 PetscFunctionBegin; 791 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 792 ierr = VecGetArray(yy,&yarray);CHKERRQ(ierr); 793 if (zz != yy) { 794 ierr = VecGetArray(zz,&zarray);CHKERRQ(ierr); 795 } else { 796 zarray = yarray; 797 } 798 799 idx = a->j; 800 v = a->a; 801 if (usecprow){ 802 if (zz != yy){ 803 ierr = PetscMemcpy(zarray,yarray,3*mbs*sizeof(PetscScalar));CHKERRQ(ierr); 804 } 805 mbs = a->compressedrow.nrows; 806 ii = a->compressedrow.i; 807 ridx = a->compressedrow.rindex; 808 } else { 809 ii = a->i; 810 y = yarray; 811 z = zarray; 812 } 813 814 for (i=0; i<mbs; i++) { 815 n = ii[1] - ii[0]; ii++; 816 if (usecprow){ 817 z = zarray + 3*ridx[i]; 818 y = yarray + 3*ridx[i]; 819 } 820 sum1 = y[0]; sum2 = y[1]; sum3 = y[2]; 821 for (j=0; j<n; j++) { 822 xb = x + 3*(*idx++); x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; 823 sum1 += v[0]*x1 + v[3]*x2 + v[6]*x3; 824 sum2 += v[1]*x1 + v[4]*x2 + v[7]*x3; 825 sum3 += v[2]*x1 + v[5]*x2 + v[8]*x3; 826 v += 9; 827 } 828 z[0] = sum1; z[1] = sum2; z[2] = sum3; 829 if (!usecprow){ 830 z += 3; y += 3; 831 } 832 } 833 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 834 ierr = VecRestoreArray(yy,&yarray);CHKERRQ(ierr); 835 if (zz != yy) { 836 ierr = VecRestoreArray(zz,&zarray);CHKERRQ(ierr); 837 } 838 ierr = PetscLogFlops(18.0*a->nz);CHKERRQ(ierr); 839 PetscFunctionReturn(0); 840 } 841 842 #undef __FUNCT__ 843 #define __FUNCT__ "MatMultAdd_SeqBAIJ_4" 844 PetscErrorCode MatMultAdd_SeqBAIJ_4(Mat A,Vec xx,Vec yy,Vec zz) 845 { 846 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 847 PetscScalar *x,*y = 0,*z = 0,*xb,sum1,sum2,sum3,sum4,x1,x2,x3,x4,*yarray,*zarray; 848 MatScalar *v; 849 PetscErrorCode ierr; 850 PetscInt mbs=a->mbs,i,*idx,*ii,j,n,*ridx=PETSC_NULL; 851 PetscTruth usecprow=a->compressedrow.use; 852 853 PetscFunctionBegin; 854 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 855 ierr = VecGetArray(yy,&yarray);CHKERRQ(ierr); 856 if (zz != yy) { 857 ierr = VecGetArray(zz,&zarray);CHKERRQ(ierr); 858 } else { 859 zarray = yarray; 860 } 861 862 idx = a->j; 863 v = a->a; 864 if (usecprow){ 865 if (zz != yy){ 866 ierr = PetscMemcpy(zarray,yarray,4*mbs*sizeof(PetscScalar));CHKERRQ(ierr); 867 } 868 mbs = a->compressedrow.nrows; 869 ii = a->compressedrow.i; 870 ridx = a->compressedrow.rindex; 871 } else { 872 ii = a->i; 873 y = yarray; 874 z = zarray; 875 } 876 877 for (i=0; i<mbs; i++) { 878 n = ii[1] - ii[0]; ii++; 879 if (usecprow){ 880 z = zarray + 4*ridx[i]; 881 y = yarray + 4*ridx[i]; 882 } 883 sum1 = y[0]; sum2 = y[1]; sum3 = y[2]; sum4 = y[3]; 884 for (j=0; j<n; j++) { 885 xb = x + 4*(*idx++); 886 x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; 887 sum1 += v[0]*x1 + v[4]*x2 + v[8]*x3 + v[12]*x4; 888 sum2 += v[1]*x1 + v[5]*x2 + v[9]*x3 + v[13]*x4; 889 sum3 += v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4; 890 sum4 += v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4; 891 v += 16; 892 } 893 z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4; 894 if (!usecprow){ 895 z += 4; y += 4; 896 } 897 } 898 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 899 ierr = VecRestoreArray(yy,&yarray);CHKERRQ(ierr); 900 if (zz != yy) { 901 ierr = VecRestoreArray(zz,&zarray);CHKERRQ(ierr); 902 } 903 ierr = PetscLogFlops(32.0*a->nz);CHKERRQ(ierr); 904 PetscFunctionReturn(0); 905 } 906 907 #undef __FUNCT__ 908 #define __FUNCT__ "MatMultAdd_SeqBAIJ_5" 909 PetscErrorCode MatMultAdd_SeqBAIJ_5(Mat A,Vec xx,Vec yy,Vec zz) 910 { 911 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 912 PetscScalar *x,*y = 0,*z = 0,*xb,sum1,sum2,sum3,sum4,sum5,x1,x2,x3,x4,x5; 913 PetscScalar *yarray,*zarray; 914 MatScalar *v; 915 PetscErrorCode ierr; 916 PetscInt mbs=a->mbs,i,*idx,*ii,j,n,*ridx=PETSC_NULL; 917 PetscTruth usecprow=a->compressedrow.use; 918 919 PetscFunctionBegin; 920 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 921 ierr = VecGetArray(yy,&yarray);CHKERRQ(ierr); 922 if (zz != yy) { 923 ierr = VecGetArray(zz,&zarray);CHKERRQ(ierr); 924 } else { 925 zarray = yarray; 926 } 927 928 idx = a->j; 929 v = a->a; 930 if (usecprow){ 931 if (zz != yy){ 932 ierr = PetscMemcpy(zarray,yarray,5*mbs*sizeof(PetscScalar));CHKERRQ(ierr); 933 } 934 mbs = a->compressedrow.nrows; 935 ii = a->compressedrow.i; 936 ridx = a->compressedrow.rindex; 937 } else { 938 ii = a->i; 939 y = yarray; 940 z = zarray; 941 } 942 943 for (i=0; i<mbs; i++) { 944 n = ii[1] - ii[0]; ii++; 945 if (usecprow){ 946 z = zarray + 5*ridx[i]; 947 y = yarray + 5*ridx[i]; 948 } 949 sum1 = y[0]; sum2 = y[1]; sum3 = y[2]; sum4 = y[3]; sum5 = y[4]; 950 for (j=0; j<n; j++) { 951 xb = x + 5*(*idx++); 952 x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5 = xb[4]; 953 sum1 += v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5; 954 sum2 += v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5; 955 sum3 += v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5; 956 sum4 += v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5; 957 sum5 += v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5; 958 v += 25; 959 } 960 z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4; z[4] = sum5; 961 if (!usecprow){ 962 z += 5; y += 5; 963 } 964 } 965 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 966 ierr = VecRestoreArray(yy,&yarray);CHKERRQ(ierr); 967 if (zz != yy) { 968 ierr = VecRestoreArray(zz,&zarray);CHKERRQ(ierr); 969 } 970 ierr = PetscLogFlops(50.0*a->nz);CHKERRQ(ierr); 971 PetscFunctionReturn(0); 972 } 973 #undef __FUNCT__ 974 #define __FUNCT__ "MatMultAdd_SeqBAIJ_6" 975 PetscErrorCode MatMultAdd_SeqBAIJ_6(Mat A,Vec xx,Vec yy,Vec zz) 976 { 977 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 978 PetscScalar *x,*y = 0,*z = 0,*xb,sum1,sum2,sum3,sum4,sum5,sum6; 979 PetscScalar x1,x2,x3,x4,x5,x6,*yarray,*zarray; 980 MatScalar *v; 981 PetscErrorCode ierr; 982 PetscInt mbs=a->mbs,i,*idx,*ii,j,n,*ridx=PETSC_NULL; 983 PetscTruth usecprow=a->compressedrow.use; 984 985 PetscFunctionBegin; 986 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 987 ierr = VecGetArray(yy,&yarray);CHKERRQ(ierr); 988 if (zz != yy) { 989 ierr = VecGetArray(zz,&zarray);CHKERRQ(ierr); 990 } else { 991 zarray = yarray; 992 } 993 994 idx = a->j; 995 v = a->a; 996 if (usecprow){ 997 if (zz != yy){ 998 ierr = PetscMemcpy(zarray,yarray,6*mbs*sizeof(PetscScalar));CHKERRQ(ierr); 999 } 1000 mbs = a->compressedrow.nrows; 1001 ii = a->compressedrow.i; 1002 ridx = a->compressedrow.rindex; 1003 } else { 1004 ii = a->i; 1005 y = yarray; 1006 z = zarray; 1007 } 1008 1009 for (i=0; i<mbs; i++) { 1010 n = ii[1] - ii[0]; ii++; 1011 if (usecprow){ 1012 z = zarray + 6*ridx[i]; 1013 y = yarray + 6*ridx[i]; 1014 } 1015 sum1 = y[0]; sum2 = y[1]; sum3 = y[2]; sum4 = y[3]; sum5 = y[4]; sum6 = y[5]; 1016 for (j=0; j<n; j++) { 1017 xb = x + 6*(*idx++); 1018 x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5 = xb[4]; x6 = xb[5]; 1019 sum1 += v[0]*x1 + v[6]*x2 + v[12]*x3 + v[18]*x4 + v[24]*x5 + v[30]*x6; 1020 sum2 += v[1]*x1 + v[7]*x2 + v[13]*x3 + v[19]*x4 + v[25]*x5 + v[31]*x6; 1021 sum3 += v[2]*x1 + v[8]*x2 + v[14]*x3 + v[20]*x4 + v[26]*x5 + v[32]*x6; 1022 sum4 += v[3]*x1 + v[9]*x2 + v[15]*x3 + v[21]*x4 + v[27]*x5 + v[33]*x6; 1023 sum5 += v[4]*x1 + v[10]*x2 + v[16]*x3 + v[22]*x4 + v[28]*x5 + v[34]*x6; 1024 sum6 += v[5]*x1 + v[11]*x2 + v[17]*x3 + v[23]*x4 + v[29]*x5 + v[35]*x6; 1025 v += 36; 1026 } 1027 z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4; z[4] = sum5; z[5] = sum6; 1028 if (!usecprow){ 1029 z += 6; y += 6; 1030 } 1031 } 1032 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1033 ierr = VecRestoreArray(yy,&yarray);CHKERRQ(ierr); 1034 if (zz != yy) { 1035 ierr = VecRestoreArray(zz,&zarray);CHKERRQ(ierr); 1036 } 1037 ierr = PetscLogFlops(72.0*a->nz);CHKERRQ(ierr); 1038 PetscFunctionReturn(0); 1039 } 1040 1041 #undef __FUNCT__ 1042 #define __FUNCT__ "MatMultAdd_SeqBAIJ_7" 1043 PetscErrorCode MatMultAdd_SeqBAIJ_7(Mat A,Vec xx,Vec yy,Vec zz) 1044 { 1045 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1046 PetscScalar *x,*y = 0,*z = 0,*xb,sum1,sum2,sum3,sum4,sum5,sum6,sum7; 1047 PetscScalar x1,x2,x3,x4,x5,x6,x7,*yarray,*zarray; 1048 MatScalar *v; 1049 PetscErrorCode ierr; 1050 PetscInt mbs=a->mbs,i,*idx,*ii,j,n,*ridx=PETSC_NULL; 1051 PetscTruth usecprow=a->compressedrow.use; 1052 1053 PetscFunctionBegin; 1054 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1055 ierr = VecGetArray(yy,&yarray);CHKERRQ(ierr); 1056 if (zz != yy) { 1057 ierr = VecGetArray(zz,&zarray);CHKERRQ(ierr); 1058 } else { 1059 zarray = yarray; 1060 } 1061 1062 idx = a->j; 1063 v = a->a; 1064 if (usecprow){ 1065 if (zz != yy){ 1066 ierr = PetscMemcpy(zarray,yarray,7*mbs*sizeof(PetscScalar));CHKERRQ(ierr); 1067 } 1068 mbs = a->compressedrow.nrows; 1069 ii = a->compressedrow.i; 1070 ridx = a->compressedrow.rindex; 1071 } else { 1072 ii = a->i; 1073 y = yarray; 1074 z = zarray; 1075 } 1076 1077 for (i=0; i<mbs; i++) { 1078 n = ii[1] - ii[0]; ii++; 1079 if (usecprow){ 1080 z = zarray + 7*ridx[i]; 1081 y = yarray + 7*ridx[i]; 1082 } 1083 sum1 = y[0]; sum2 = y[1]; sum3 = y[2]; sum4 = y[3]; sum5 = y[4]; sum6 = y[5]; sum7 = y[6]; 1084 for (j=0; j<n; j++) { 1085 xb = x + 7*(*idx++); 1086 x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5 = xb[4]; x6 = xb[5]; x7 = xb[6]; 1087 sum1 += v[0]*x1 + v[7]*x2 + v[14]*x3 + v[21]*x4 + v[28]*x5 + v[35]*x6 + v[42]*x7; 1088 sum2 += v[1]*x1 + v[8]*x2 + v[15]*x3 + v[22]*x4 + v[29]*x5 + v[36]*x6 + v[43]*x7; 1089 sum3 += v[2]*x1 + v[9]*x2 + v[16]*x3 + v[23]*x4 + v[30]*x5 + v[37]*x6 + v[44]*x7; 1090 sum4 += v[3]*x1 + v[10]*x2 + v[17]*x3 + v[24]*x4 + v[31]*x5 + v[38]*x6 + v[45]*x7; 1091 sum5 += v[4]*x1 + v[11]*x2 + v[18]*x3 + v[25]*x4 + v[32]*x5 + v[39]*x6 + v[46]*x7; 1092 sum6 += v[5]*x1 + v[12]*x2 + v[19]*x3 + v[26]*x4 + v[33]*x5 + v[40]*x6 + v[47]*x7; 1093 sum7 += v[6]*x1 + v[13]*x2 + v[20]*x3 + v[27]*x4 + v[34]*x5 + v[41]*x6 + v[48]*x7; 1094 v += 49; 1095 } 1096 z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4; z[4] = sum5; z[5] = sum6; z[6] = sum7; 1097 if (!usecprow){ 1098 z += 7; y += 7; 1099 } 1100 } 1101 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1102 ierr = VecRestoreArray(yy,&yarray);CHKERRQ(ierr); 1103 if (zz != yy) { 1104 ierr = VecRestoreArray(zz,&zarray);CHKERRQ(ierr); 1105 } 1106 ierr = PetscLogFlops(98.0*a->nz);CHKERRQ(ierr); 1107 PetscFunctionReturn(0); 1108 } 1109 1110 #undef __FUNCT__ 1111 #define __FUNCT__ "MatMultAdd_SeqBAIJ_N" 1112 PetscErrorCode MatMultAdd_SeqBAIJ_N(Mat A,Vec xx,Vec yy,Vec zz) 1113 { 1114 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1115 PetscScalar *x,*z = 0,*xb,*work,*workt,*zarray; 1116 MatScalar *v; 1117 PetscErrorCode ierr; 1118 PetscInt mbs,i,*idx,*ii,bs=A->rmap->bs,j,n,bs2=a->bs2; 1119 PetscInt ncols,k,*ridx=PETSC_NULL; 1120 PetscTruth usecprow=a->compressedrow.use; 1121 1122 PetscFunctionBegin; 1123 ierr = VecCopy_Seq(yy,zz);CHKERRQ(ierr); 1124 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1125 ierr = VecGetArray(zz,&zarray);CHKERRQ(ierr); 1126 1127 idx = a->j; 1128 v = a->a; 1129 if (usecprow){ 1130 mbs = a->compressedrow.nrows; 1131 ii = a->compressedrow.i; 1132 ridx = a->compressedrow.rindex; 1133 } else { 1134 mbs = a->mbs; 1135 ii = a->i; 1136 z = zarray; 1137 } 1138 1139 if (!a->mult_work) { 1140 k = PetscMax(A->rmap->n,A->cmap->n); 1141 ierr = PetscMalloc((k+1)*sizeof(PetscScalar),&a->mult_work);CHKERRQ(ierr); 1142 } 1143 work = a->mult_work; 1144 for (i=0; i<mbs; i++) { 1145 n = ii[1] - ii[0]; ii++; 1146 ncols = n*bs; 1147 workt = work; 1148 for (j=0; j<n; j++) { 1149 xb = x + bs*(*idx++); 1150 for (k=0; k<bs; k++) workt[k] = xb[k]; 1151 workt += bs; 1152 } 1153 if (usecprow) z = zarray + bs*ridx[i]; 1154 Kernel_w_gets_w_plus_Ar_times_v(bs,ncols,work,v,z); 1155 /* BLASgemv_("N",&bs,&ncols,&_DOne,v,&bs,work,&_One,&_DOne,z,&_One); */ 1156 v += n*bs2; 1157 if (!usecprow){ 1158 z += bs; 1159 } 1160 } 1161 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1162 ierr = VecRestoreArray(zz,&zarray);CHKERRQ(ierr); 1163 ierr = PetscLogFlops(2.0*a->nz*bs2);CHKERRQ(ierr); 1164 PetscFunctionReturn(0); 1165 } 1166 1167 #undef __FUNCT__ 1168 #define __FUNCT__ "MatMultTranspose_SeqBAIJ" 1169 PetscErrorCode MatMultTranspose_SeqBAIJ(Mat A,Vec xx,Vec zz) 1170 { 1171 PetscScalar zero = 0.0; 1172 PetscErrorCode ierr; 1173 1174 PetscFunctionBegin; 1175 ierr = VecSet(zz,zero);CHKERRQ(ierr); 1176 ierr = MatMultTransposeAdd_SeqBAIJ(A,xx,zz,zz);CHKERRQ(ierr); 1177 PetscFunctionReturn(0); 1178 } 1179 1180 #undef __FUNCT__ 1181 #define __FUNCT__ "MatMultTransposeAdd_SeqBAIJ" 1182 PetscErrorCode MatMultTransposeAdd_SeqBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1183 1184 { 1185 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1186 PetscScalar *zb,*x,*z,*xb = 0,x1,x2,x3,x4,x5; 1187 MatScalar *v; 1188 PetscErrorCode ierr; 1189 PetscInt mbs,i,*idx,*ii,rval,bs=A->rmap->bs,j,n,bs2=a->bs2,*ib,*ridx=PETSC_NULL; 1190 Mat_CompressedRow cprow = a->compressedrow; 1191 PetscTruth usecprow=cprow.use; 1192 1193 PetscFunctionBegin; 1194 if (yy != zz) { ierr = VecCopy_Seq(yy,zz);CHKERRQ(ierr); } 1195 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1196 ierr = VecGetArray(zz,&z);CHKERRQ(ierr); 1197 1198 idx = a->j; 1199 v = a->a; 1200 if (usecprow){ 1201 mbs = cprow.nrows; 1202 ii = cprow.i; 1203 ridx = cprow.rindex; 1204 } else { 1205 mbs=a->mbs; 1206 ii = a->i; 1207 xb = x; 1208 } 1209 1210 switch (bs) { 1211 case 1: 1212 for (i=0; i<mbs; i++) { 1213 if (usecprow) xb = x + ridx[i]; 1214 x1 = xb[0]; 1215 ib = idx + ii[0]; 1216 n = ii[1] - ii[0]; ii++; 1217 for (j=0; j<n; j++) { 1218 rval = ib[j]; 1219 z[rval] += *v * x1; 1220 v++; 1221 } 1222 if (!usecprow) xb++; 1223 } 1224 break; 1225 case 2: 1226 for (i=0; i<mbs; i++) { 1227 if (usecprow) xb = x + 2*ridx[i]; 1228 x1 = xb[0]; x2 = xb[1]; 1229 ib = idx + ii[0]; 1230 n = ii[1] - ii[0]; ii++; 1231 for (j=0; j<n; j++) { 1232 rval = ib[j]*2; 1233 z[rval++] += v[0]*x1 + v[1]*x2; 1234 z[rval++] += v[2]*x1 + v[3]*x2; 1235 v += 4; 1236 } 1237 if (!usecprow) xb += 2; 1238 } 1239 break; 1240 case 3: 1241 for (i=0; i<mbs; i++) { 1242 if (usecprow) xb = x + 3*ridx[i]; 1243 x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; 1244 ib = idx + ii[0]; 1245 n = ii[1] - ii[0]; ii++; 1246 for (j=0; j<n; j++) { 1247 rval = ib[j]*3; 1248 z[rval++] += v[0]*x1 + v[1]*x2 + v[2]*x3; 1249 z[rval++] += v[3]*x1 + v[4]*x2 + v[5]*x3; 1250 z[rval++] += v[6]*x1 + v[7]*x2 + v[8]*x3; 1251 v += 9; 1252 } 1253 if (!usecprow) xb += 3; 1254 } 1255 break; 1256 case 4: 1257 for (i=0; i<mbs; i++) { 1258 if (usecprow) xb = x + 4*ridx[i]; 1259 x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; 1260 ib = idx + ii[0]; 1261 n = ii[1] - ii[0]; ii++; 1262 for (j=0; j<n; j++) { 1263 rval = ib[j]*4; 1264 z[rval++] += v[0]*x1 + v[1]*x2 + v[2]*x3 + v[3]*x4; 1265 z[rval++] += v[4]*x1 + v[5]*x2 + v[6]*x3 + v[7]*x4; 1266 z[rval++] += v[8]*x1 + v[9]*x2 + v[10]*x3 + v[11]*x4; 1267 z[rval++] += v[12]*x1 + v[13]*x2 + v[14]*x3 + v[15]*x4; 1268 v += 16; 1269 } 1270 if (!usecprow) xb += 4; 1271 } 1272 break; 1273 case 5: 1274 for (i=0; i<mbs; i++) { 1275 if (usecprow) xb = x + 5*ridx[i]; 1276 x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; 1277 x4 = xb[3]; x5 = xb[4]; 1278 ib = idx + ii[0]; 1279 n = ii[1] - ii[0]; ii++; 1280 for (j=0; j<n; j++) { 1281 rval = ib[j]*5; 1282 z[rval++] += v[0]*x1 + v[1]*x2 + v[2]*x3 + v[3]*x4 + v[4]*x5; 1283 z[rval++] += v[5]*x1 + v[6]*x2 + v[7]*x3 + v[8]*x4 + v[9]*x5; 1284 z[rval++] += v[10]*x1 + v[11]*x2 + v[12]*x3 + v[13]*x4 + v[14]*x5; 1285 z[rval++] += v[15]*x1 + v[16]*x2 + v[17]*x3 + v[18]*x4 + v[19]*x5; 1286 z[rval++] += v[20]*x1 + v[21]*x2 + v[22]*x3 + v[23]*x4 + v[24]*x5; 1287 v += 25; 1288 } 1289 if (!usecprow) xb += 5; 1290 } 1291 break; 1292 default: { /* block sizes larger then 5 by 5 are handled by BLAS */ 1293 PetscInt ncols,k; 1294 PetscScalar *work,*workt,*xtmp; 1295 1296 if (!a->mult_work) { 1297 k = PetscMax(A->rmap->n,A->cmap->n); 1298 ierr = PetscMalloc((k+1)*sizeof(PetscScalar),&a->mult_work);CHKERRQ(ierr); 1299 } 1300 work = a->mult_work; 1301 xtmp = x; 1302 for (i=0; i<mbs; i++) { 1303 n = ii[1] - ii[0]; ii++; 1304 ncols = n*bs; 1305 ierr = PetscMemzero(work,ncols*sizeof(PetscScalar));CHKERRQ(ierr); 1306 if (usecprow) { 1307 xtmp = x + bs*ridx[i]; 1308 } 1309 Kernel_w_gets_w_plus_trans_Ar_times_v(bs,ncols,xtmp,v,work); 1310 /* BLASgemv_("T",&bs,&ncols,&_DOne,v,&bs,xtmp,&_One,&_DOne,work,&_One); */ 1311 v += n*bs2; 1312 if (!usecprow) xtmp += bs; 1313 workt = work; 1314 for (j=0; j<n; j++) { 1315 zb = z + bs*(*idx++); 1316 for (k=0; k<bs; k++) zb[k] += workt[k] ; 1317 workt += bs; 1318 } 1319 } 1320 } 1321 } 1322 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1323 ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr); 1324 ierr = PetscLogFlops(2.0*a->nz*a->bs2);CHKERRQ(ierr); 1325 PetscFunctionReturn(0); 1326 } 1327 1328 #undef __FUNCT__ 1329 #define __FUNCT__ "MatScale_SeqBAIJ" 1330 PetscErrorCode MatScale_SeqBAIJ(Mat inA,PetscScalar alpha) 1331 { 1332 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)inA->data; 1333 PetscInt totalnz = a->bs2*a->nz; 1334 PetscScalar oalpha = alpha; 1335 PetscErrorCode ierr; 1336 PetscBLASInt one = 1,tnz = PetscBLASIntCast(totalnz); 1337 1338 PetscFunctionBegin; 1339 BLASscal_(&tnz,&oalpha,a->a,&one); 1340 ierr = PetscLogFlops(totalnz);CHKERRQ(ierr); 1341 PetscFunctionReturn(0); 1342 } 1343 1344 #undef __FUNCT__ 1345 #define __FUNCT__ "MatNorm_SeqBAIJ" 1346 PetscErrorCode MatNorm_SeqBAIJ(Mat A,NormType type,PetscReal *norm) 1347 { 1348 PetscErrorCode ierr; 1349 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1350 MatScalar *v = a->a; 1351 PetscReal sum = 0.0; 1352 PetscInt i,j,k,bs=A->rmap->bs,nz=a->nz,bs2=a->bs2,k1; 1353 1354 PetscFunctionBegin; 1355 if (type == NORM_FROBENIUS) { 1356 for (i=0; i< bs2*nz; i++) { 1357 #if defined(PETSC_USE_COMPLEX) 1358 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1359 #else 1360 sum += (*v)*(*v); v++; 1361 #endif 1362 } 1363 *norm = sqrt(sum); 1364 } else if (type == NORM_1) { /* maximum column sum */ 1365 PetscReal *tmp; 1366 PetscInt *bcol = a->j; 1367 ierr = PetscMalloc((A->cmap->n+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr); 1368 ierr = PetscMemzero(tmp,A->cmap->n*sizeof(PetscReal));CHKERRQ(ierr); 1369 for (i=0; i<nz; i++){ 1370 for (j=0; j<bs; j++){ 1371 k1 = bs*(*bcol) + j; /* column index */ 1372 for (k=0; k<bs; k++){ 1373 tmp[k1] += PetscAbsScalar(*v); v++; 1374 } 1375 } 1376 bcol++; 1377 } 1378 *norm = 0.0; 1379 for (j=0; j<A->cmap->n; j++) { 1380 if (tmp[j] > *norm) *norm = tmp[j]; 1381 } 1382 ierr = PetscFree(tmp);CHKERRQ(ierr); 1383 } else if (type == NORM_INFINITY) { /* maximum row sum */ 1384 *norm = 0.0; 1385 for (k=0; k<bs; k++) { 1386 for (j=0; j<a->mbs; j++) { 1387 v = a->a + bs2*a->i[j] + k; 1388 sum = 0.0; 1389 for (i=0; i<a->i[j+1]-a->i[j]; i++) { 1390 for (k1=0; k1<bs; k1++){ 1391 sum += PetscAbsScalar(*v); 1392 v += bs; 1393 } 1394 } 1395 if (sum > *norm) *norm = sum; 1396 } 1397 } 1398 } else { 1399 SETERRQ(PETSC_ERR_SUP,"No support for this norm yet"); 1400 } 1401 PetscFunctionReturn(0); 1402 } 1403 1404 1405 #undef __FUNCT__ 1406 #define __FUNCT__ "MatEqual_SeqBAIJ" 1407 PetscErrorCode MatEqual_SeqBAIJ(Mat A,Mat B,PetscTruth* flg) 1408 { 1409 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data,*b = (Mat_SeqBAIJ *)B->data; 1410 PetscErrorCode ierr; 1411 1412 PetscFunctionBegin; 1413 /* If the matrix/block dimensions are not equal, or no of nonzeros or shift */ 1414 if ((A->rmap->N != B->rmap->N) || (A->cmap->n != B->cmap->n) || (A->rmap->bs != B->rmap->bs)|| (a->nz != b->nz)) { 1415 *flg = PETSC_FALSE; 1416 PetscFunctionReturn(0); 1417 } 1418 1419 /* if the a->i are the same */ 1420 ierr = PetscMemcmp(a->i,b->i,(a->mbs+1)*sizeof(PetscInt),flg);CHKERRQ(ierr); 1421 if (!*flg) { 1422 PetscFunctionReturn(0); 1423 } 1424 1425 /* if a->j are the same */ 1426 ierr = PetscMemcmp(a->j,b->j,(a->nz)*sizeof(PetscInt),flg);CHKERRQ(ierr); 1427 if (!*flg) { 1428 PetscFunctionReturn(0); 1429 } 1430 /* if a->a are the same */ 1431 ierr = PetscMemcmp(a->a,b->a,(a->nz)*(A->rmap->bs)*(B->rmap->bs)*sizeof(PetscScalar),flg);CHKERRQ(ierr); 1432 PetscFunctionReturn(0); 1433 1434 } 1435 1436 #undef __FUNCT__ 1437 #define __FUNCT__ "MatGetDiagonal_SeqBAIJ" 1438 PetscErrorCode MatGetDiagonal_SeqBAIJ(Mat A,Vec v) 1439 { 1440 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1441 PetscErrorCode ierr; 1442 PetscInt i,j,k,n,row,bs,*ai,*aj,ambs,bs2; 1443 PetscScalar *x,zero = 0.0; 1444 MatScalar *aa,*aa_j; 1445 1446 PetscFunctionBegin; 1447 if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 1448 bs = A->rmap->bs; 1449 aa = a->a; 1450 ai = a->i; 1451 aj = a->j; 1452 ambs = a->mbs; 1453 bs2 = a->bs2; 1454 1455 ierr = VecSet(v,zero);CHKERRQ(ierr); 1456 ierr = VecGetArray(v,&x);CHKERRQ(ierr); 1457 ierr = VecGetLocalSize(v,&n);CHKERRQ(ierr); 1458 if (n != A->rmap->N) SETERRQ(PETSC_ERR_ARG_SIZ,"Nonconforming matrix and vector"); 1459 for (i=0; i<ambs; i++) { 1460 for (j=ai[i]; j<ai[i+1]; j++) { 1461 if (aj[j] == i) { 1462 row = i*bs; 1463 aa_j = aa+j*bs2; 1464 for (k=0; k<bs2; k+=(bs+1),row++) x[row] = aa_j[k]; 1465 break; 1466 } 1467 } 1468 } 1469 ierr = VecRestoreArray(v,&x);CHKERRQ(ierr); 1470 PetscFunctionReturn(0); 1471 } 1472 1473 #undef __FUNCT__ 1474 #define __FUNCT__ "MatDiagonalScale_SeqBAIJ" 1475 PetscErrorCode MatDiagonalScale_SeqBAIJ(Mat A,Vec ll,Vec rr) 1476 { 1477 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1478 PetscScalar *l,*r,x,*li,*ri; 1479 MatScalar *aa,*v; 1480 PetscErrorCode ierr; 1481 PetscInt i,j,k,lm,rn,M,m,n,*ai,*aj,mbs,tmp,bs,bs2; 1482 1483 PetscFunctionBegin; 1484 ai = a->i; 1485 aj = a->j; 1486 aa = a->a; 1487 m = A->rmap->n; 1488 n = A->cmap->n; 1489 bs = A->rmap->bs; 1490 mbs = a->mbs; 1491 bs2 = a->bs2; 1492 if (ll) { 1493 ierr = VecGetArray(ll,&l);CHKERRQ(ierr); 1494 ierr = VecGetLocalSize(ll,&lm);CHKERRQ(ierr); 1495 if (lm != m) SETERRQ(PETSC_ERR_ARG_SIZ,"Left scaling vector wrong length"); 1496 for (i=0; i<mbs; i++) { /* for each block row */ 1497 M = ai[i+1] - ai[i]; 1498 li = l + i*bs; 1499 v = aa + bs2*ai[i]; 1500 for (j=0; j<M; j++) { /* for each block */ 1501 for (k=0; k<bs2; k++) { 1502 (*v++) *= li[k%bs]; 1503 } 1504 } 1505 } 1506 ierr = VecRestoreArray(ll,&l);CHKERRQ(ierr); 1507 ierr = PetscLogFlops(a->nz);CHKERRQ(ierr); 1508 } 1509 1510 if (rr) { 1511 ierr = VecGetArray(rr,&r);CHKERRQ(ierr); 1512 ierr = VecGetLocalSize(rr,&rn);CHKERRQ(ierr); 1513 if (rn != n) SETERRQ(PETSC_ERR_ARG_SIZ,"Right scaling vector wrong length"); 1514 for (i=0; i<mbs; i++) { /* for each block row */ 1515 M = ai[i+1] - ai[i]; 1516 v = aa + bs2*ai[i]; 1517 for (j=0; j<M; j++) { /* for each block */ 1518 ri = r + bs*aj[ai[i]+j]; 1519 for (k=0; k<bs; k++) { 1520 x = ri[k]; 1521 for (tmp=0; tmp<bs; tmp++) (*v++) *= x; 1522 } 1523 } 1524 } 1525 ierr = VecRestoreArray(rr,&r);CHKERRQ(ierr); 1526 ierr = PetscLogFlops(a->nz);CHKERRQ(ierr); 1527 } 1528 PetscFunctionReturn(0); 1529 } 1530 1531 1532 #undef __FUNCT__ 1533 #define __FUNCT__ "MatGetInfo_SeqBAIJ" 1534 PetscErrorCode MatGetInfo_SeqBAIJ(Mat A,MatInfoType flag,MatInfo *info) 1535 { 1536 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1537 1538 PetscFunctionBegin; 1539 info->block_size = a->bs2; 1540 info->nz_allocated = a->maxnz; 1541 info->nz_used = a->bs2*a->nz; 1542 info->nz_unneeded = (double)(info->nz_allocated - info->nz_used); 1543 info->assemblies = A->num_ass; 1544 info->mallocs = a->reallocs; 1545 info->memory = ((PetscObject)A)->mem; 1546 if (A->factor) { 1547 info->fill_ratio_given = A->info.fill_ratio_given; 1548 info->fill_ratio_needed = A->info.fill_ratio_needed; 1549 info->factor_mallocs = A->info.factor_mallocs; 1550 } else { 1551 info->fill_ratio_given = 0; 1552 info->fill_ratio_needed = 0; 1553 info->factor_mallocs = 0; 1554 } 1555 PetscFunctionReturn(0); 1556 } 1557 1558 1559 #undef __FUNCT__ 1560 #define __FUNCT__ "MatZeroEntries_SeqBAIJ" 1561 PetscErrorCode MatZeroEntries_SeqBAIJ(Mat A) 1562 { 1563 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1564 PetscErrorCode ierr; 1565 1566 PetscFunctionBegin; 1567 ierr = PetscMemzero(a->a,a->bs2*a->i[a->mbs]*sizeof(MatScalar));CHKERRQ(ierr); 1568 PetscFunctionReturn(0); 1569 } 1570