1 #define PETSCMAT_DLL 2 3 #include "../src/mat/impls/baij/seq/baij.h" 4 #include "../src/mat/blockinvert.h" 5 #include "petscbt.h" 6 7 #undef __FUNCT__ 8 #define __FUNCT__ "MatIncreaseOverlap_SeqBAIJ" 9 PetscErrorCode MatIncreaseOverlap_SeqBAIJ(Mat A,PetscInt is_max,IS is[],PetscInt ov) 10 { 11 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 12 PetscErrorCode ierr; 13 PetscInt row,i,j,k,l,m,n,*nidx,isz,val,ival; 14 const PetscInt *idx; 15 PetscInt start,end,*ai,*aj,bs,*nidx2; 16 PetscBT table; 17 18 PetscFunctionBegin; 19 m = a->mbs; 20 ai = a->i; 21 aj = a->j; 22 bs = A->rmap->bs; 23 24 if (ov < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative overlap specified"); 25 26 ierr = PetscBTCreate(m,table);CHKERRQ(ierr); 27 ierr = PetscMalloc((m+1)*sizeof(PetscInt),&nidx);CHKERRQ(ierr); 28 ierr = PetscMalloc((A->rmap->N+1)*sizeof(PetscInt),&nidx2);CHKERRQ(ierr); 29 30 for (i=0; i<is_max; i++) { 31 /* Initialise the two local arrays */ 32 isz = 0; 33 ierr = PetscBTMemzero(m,table);CHKERRQ(ierr); 34 35 /* Extract the indices, assume there can be duplicate entries */ 36 ierr = ISGetIndices(is[i],&idx);CHKERRQ(ierr); 37 ierr = ISGetLocalSize(is[i],&n);CHKERRQ(ierr); 38 39 /* Enter these into the temp arrays i.e mark table[row], enter row into new index */ 40 for (j=0; j<n ; ++j){ 41 ival = idx[j]/bs; /* convert the indices into block indices */ 42 if (ival>=m) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"index greater than mat-dim"); 43 if(!PetscBTLookupSet(table,ival)) { nidx[isz++] = ival;} 44 } 45 ierr = ISRestoreIndices(is[i],&idx);CHKERRQ(ierr); 46 ierr = ISDestroy(is[i]);CHKERRQ(ierr); 47 48 k = 0; 49 for (j=0; j<ov; j++){ /* for each overlap*/ 50 n = isz; 51 for (; k<n ; k++){ /* do only those rows in nidx[k], which are not done yet */ 52 row = nidx[k]; 53 start = ai[row]; 54 end = ai[row+1]; 55 for (l = start; l<end ; l++){ 56 val = aj[l]; 57 if (!PetscBTLookupSet(table,val)) {nidx[isz++] = val;} 58 } 59 } 60 } 61 /* expand the Index Set */ 62 for (j=0; j<isz; j++) { 63 for (k=0; k<bs; k++) 64 nidx2[j*bs+k] = nidx[j]*bs+k; 65 } 66 ierr = ISCreateGeneral(PETSC_COMM_SELF,isz*bs,nidx2,is+i);CHKERRQ(ierr); 67 } 68 ierr = PetscBTDestroy(table);CHKERRQ(ierr); 69 ierr = PetscFree(nidx);CHKERRQ(ierr); 70 ierr = PetscFree(nidx2);CHKERRQ(ierr); 71 PetscFunctionReturn(0); 72 } 73 74 #undef __FUNCT__ 75 #define __FUNCT__ "MatGetSubMatrix_SeqBAIJ_Private" 76 PetscErrorCode MatGetSubMatrix_SeqBAIJ_Private(Mat A,IS isrow,IS iscol,MatReuse scall,Mat *B) 77 { 78 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data,*c; 79 PetscErrorCode ierr; 80 PetscInt *smap,i,k,kstart,kend,oldcols = a->nbs,*lens; 81 PetscInt row,mat_i,*mat_j,tcol,*mat_ilen; 82 const PetscInt *irow,*icol; 83 PetscInt nrows,ncols,*ssmap,bs=A->rmap->bs,bs2=a->bs2; 84 PetscInt *aj = a->j,*ai = a->i; 85 MatScalar *mat_a; 86 Mat C; 87 PetscTruth flag,sorted; 88 89 PetscFunctionBegin; 90 ierr = ISSorted(iscol,&sorted);CHKERRQ(ierr); 91 if (!sorted) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"IS is not sorted"); 92 93 ierr = ISGetIndices(isrow,&irow);CHKERRQ(ierr); 94 ierr = ISGetIndices(iscol,&icol);CHKERRQ(ierr); 95 ierr = ISGetLocalSize(isrow,&nrows);CHKERRQ(ierr); 96 ierr = ISGetLocalSize(iscol,&ncols);CHKERRQ(ierr); 97 98 ierr = PetscMalloc((1+oldcols)*sizeof(PetscInt),&smap);CHKERRQ(ierr); 99 ssmap = smap; 100 ierr = PetscMalloc((1+nrows)*sizeof(PetscInt),&lens);CHKERRQ(ierr); 101 ierr = PetscMemzero(smap,oldcols*sizeof(PetscInt));CHKERRQ(ierr); 102 for (i=0; i<ncols; i++) smap[icol[i]] = i+1; 103 /* determine lens of each row */ 104 for (i=0; i<nrows; i++) { 105 kstart = ai[irow[i]]; 106 kend = kstart + a->ilen[irow[i]]; 107 lens[i] = 0; 108 for (k=kstart; k<kend; k++) { 109 if (ssmap[aj[k]]) { 110 lens[i]++; 111 } 112 } 113 } 114 /* Create and fill new matrix */ 115 if (scall == MAT_REUSE_MATRIX) { 116 c = (Mat_SeqBAIJ *)((*B)->data); 117 118 if (c->mbs!=nrows || c->nbs!=ncols || (*B)->rmap->bs!=bs) SETERRQ(PETSC_ERR_ARG_SIZ,"Submatrix wrong size"); 119 ierr = PetscMemcmp(c->ilen,lens,c->mbs *sizeof(PetscInt),&flag);CHKERRQ(ierr); 120 if (!flag) { 121 SETERRQ(PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. wrong no of nonzeros"); 122 } 123 ierr = PetscMemzero(c->ilen,c->mbs*sizeof(PetscInt));CHKERRQ(ierr); 124 C = *B; 125 } else { 126 ierr = MatCreate(((PetscObject)A)->comm,&C);CHKERRQ(ierr); 127 ierr = MatSetSizes(C,nrows*bs,ncols*bs,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 128 ierr = MatSetType(C,((PetscObject)A)->type_name);CHKERRQ(ierr); 129 ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(C,bs,0,lens);CHKERRQ(ierr); 130 } 131 c = (Mat_SeqBAIJ *)(C->data); 132 for (i=0; i<nrows; i++) { 133 row = irow[i]; 134 kstart = ai[row]; 135 kend = kstart + a->ilen[row]; 136 mat_i = c->i[i]; 137 mat_j = c->j + mat_i; 138 mat_a = c->a + mat_i*bs2; 139 mat_ilen = c->ilen + i; 140 for (k=kstart; k<kend; k++) { 141 if ((tcol=ssmap[a->j[k]])) { 142 *mat_j++ = tcol - 1; 143 ierr = PetscMemcpy(mat_a,a->a+k*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr); 144 mat_a += bs2; 145 (*mat_ilen)++; 146 } 147 } 148 } 149 150 /* Free work space */ 151 ierr = ISRestoreIndices(iscol,&icol);CHKERRQ(ierr); 152 ierr = PetscFree(smap);CHKERRQ(ierr); 153 ierr = PetscFree(lens);CHKERRQ(ierr); 154 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 155 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 156 157 ierr = ISRestoreIndices(isrow,&irow);CHKERRQ(ierr); 158 *B = C; 159 PetscFunctionReturn(0); 160 } 161 162 #undef __FUNCT__ 163 #define __FUNCT__ "MatGetSubMatrix_SeqBAIJ" 164 PetscErrorCode MatGetSubMatrix_SeqBAIJ(Mat A,IS isrow,IS iscol,MatReuse scall,Mat *B) 165 { 166 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 167 IS is1,is2; 168 PetscErrorCode ierr; 169 PetscInt *vary,*iary,nrows,ncols,i,bs=A->rmap->bs,count; 170 const PetscInt *irow,*icol; 171 172 PetscFunctionBegin; 173 ierr = ISGetIndices(isrow,&irow);CHKERRQ(ierr); 174 ierr = ISGetIndices(iscol,&icol);CHKERRQ(ierr); 175 ierr = ISGetLocalSize(isrow,&nrows);CHKERRQ(ierr); 176 ierr = ISGetLocalSize(iscol,&ncols);CHKERRQ(ierr); 177 178 /* Verify if the indices corespond to each element in a block 179 and form the IS with compressed IS */ 180 ierr = PetscMalloc(2*(a->mbs+1)*sizeof(PetscInt),&vary);CHKERRQ(ierr); 181 iary = vary + a->mbs; 182 ierr = PetscMemzero(vary,(a->mbs)*sizeof(PetscInt));CHKERRQ(ierr); 183 for (i=0; i<nrows; i++) vary[irow[i]/bs]++; 184 count = 0; 185 for (i=0; i<a->mbs; i++) { 186 if (vary[i]!=0 && vary[i]!=bs) SETERRQ(PETSC_ERR_ARG_SIZ,"Index set does not match blocks"); 187 if (vary[i]==bs) iary[count++] = i; 188 } 189 ierr = ISCreateGeneral(PETSC_COMM_SELF,count,iary,&is1);CHKERRQ(ierr); 190 191 ierr = PetscMemzero(vary,(a->mbs)*sizeof(PetscInt));CHKERRQ(ierr); 192 for (i=0; i<ncols; i++) vary[icol[i]/bs]++; 193 count = 0; 194 for (i=0; i<a->mbs; i++) { 195 if (vary[i]!=0 && vary[i]!=bs) SETERRQ(PETSC_ERR_PLIB,"Internal error in PETSc"); 196 if (vary[i]==bs) iary[count++] = i; 197 } 198 ierr = ISCreateGeneral(PETSC_COMM_SELF,count,iary,&is2);CHKERRQ(ierr); 199 ierr = ISRestoreIndices(isrow,&irow);CHKERRQ(ierr); 200 ierr = ISRestoreIndices(iscol,&icol);CHKERRQ(ierr); 201 ierr = PetscFree(vary);CHKERRQ(ierr); 202 203 ierr = MatGetSubMatrix_SeqBAIJ_Private(A,is1,is2,scall,B);CHKERRQ(ierr); 204 ISDestroy(is1); 205 ISDestroy(is2); 206 PetscFunctionReturn(0); 207 } 208 209 #undef __FUNCT__ 210 #define __FUNCT__ "MatGetSubMatrices_SeqBAIJ" 211 PetscErrorCode MatGetSubMatrices_SeqBAIJ(Mat A,PetscInt n,const IS irow[],const IS icol[],MatReuse scall,Mat *B[]) 212 { 213 PetscErrorCode ierr; 214 PetscInt i; 215 216 PetscFunctionBegin; 217 if (scall == MAT_INITIAL_MATRIX) { 218 ierr = PetscMalloc((n+1)*sizeof(Mat),B);CHKERRQ(ierr); 219 } 220 221 for (i=0; i<n; i++) { 222 ierr = MatGetSubMatrix_SeqBAIJ(A,irow[i],icol[i],scall,&(*B)[i]);CHKERRQ(ierr); 223 } 224 PetscFunctionReturn(0); 225 } 226 227 228 /* -------------------------------------------------------*/ 229 /* Should check that shapes of vectors and matrices match */ 230 /* -------------------------------------------------------*/ 231 #include "petscblaslapack.h" 232 233 #undef __FUNCT__ 234 #define __FUNCT__ "MatMult_SeqBAIJ_1" 235 PetscErrorCode MatMult_SeqBAIJ_1(Mat A,Vec xx,Vec zz) 236 { 237 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 238 PetscScalar *z,sum; 239 const PetscScalar *x; 240 const MatScalar *v; 241 PetscErrorCode ierr; 242 PetscInt mbs,i,n,nonzerorow=0; 243 const PetscInt *idx,*ii,*ridx=PETSC_NULL; 244 PetscTruth usecprow=a->compressedrow.use; 245 246 PetscFunctionBegin; 247 ierr = VecGetArray(xx,(PetscScalar**)&x);CHKERRQ(ierr); 248 ierr = VecGetArray(zz,&z);CHKERRQ(ierr); 249 250 if (usecprow){ 251 mbs = a->compressedrow.nrows; 252 ii = a->compressedrow.i; 253 ridx = a->compressedrow.rindex; 254 } else { 255 mbs = a->mbs; 256 ii = a->i; 257 } 258 259 for (i=0; i<mbs; i++) { 260 n = ii[1] - ii[0]; 261 v = a->a + ii[0]; 262 idx = a->j + ii[0]; 263 ii++; 264 sum = 0.0; 265 nonzerorow += (n>0); 266 PetscSparseDensePlusDot(sum,x,v,idx,n); 267 if (usecprow){ 268 z[ridx[i]] = sum; 269 } else { 270 z[i] = sum; 271 } 272 } 273 ierr = VecRestoreArray(xx,(PetscScalar**)&x);CHKERRQ(ierr); 274 ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr); 275 ierr = PetscLogFlops(2.0*a->nz - nonzerorow);CHKERRQ(ierr); 276 PetscFunctionReturn(0); 277 } 278 279 #undef __FUNCT__ 280 #define __FUNCT__ "MatMult_SeqBAIJ_2" 281 PetscErrorCode MatMult_SeqBAIJ_2(Mat A,Vec xx,Vec zz) 282 { 283 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 284 PetscScalar *z = 0,sum1,sum2,*zarray; 285 const PetscScalar *x,*xb; 286 PetscScalar x1,x2; 287 const MatScalar *v; 288 PetscErrorCode ierr; 289 PetscInt mbs,i,*idx,*ii,j,n,*ridx=PETSC_NULL,nonzerorow=0; 290 PetscTruth usecprow=a->compressedrow.use; 291 292 PetscFunctionBegin; 293 ierr = VecGetArray(xx,(PetscScalar**)&x);CHKERRQ(ierr); 294 ierr = VecGetArray(zz,&zarray);CHKERRQ(ierr); 295 296 idx = a->j; 297 v = a->a; 298 if (usecprow){ 299 mbs = a->compressedrow.nrows; 300 ii = a->compressedrow.i; 301 ridx = a->compressedrow.rindex; 302 } else { 303 mbs = a->mbs; 304 ii = a->i; 305 z = zarray; 306 } 307 308 for (i=0; i<mbs; i++) { 309 n = ii[1] - ii[0]; ii++; 310 sum1 = 0.0; sum2 = 0.0; 311 nonzerorow += (n>0); 312 for (j=0; j<n; j++) { 313 xb = x + 2*(*idx++); x1 = xb[0]; x2 = xb[1]; 314 sum1 += v[0]*x1 + v[2]*x2; 315 sum2 += v[1]*x1 + v[3]*x2; 316 v += 4; 317 } 318 if (usecprow) z = zarray + 2*ridx[i]; 319 z[0] = sum1; z[1] = sum2; 320 if (!usecprow) z += 2; 321 } 322 ierr = VecRestoreArray(xx,(PetscScalar**)&x);CHKERRQ(ierr); 323 ierr = VecRestoreArray(zz,&zarray);CHKERRQ(ierr); 324 ierr = PetscLogFlops(8.0*a->nz - 2.0*nonzerorow);CHKERRQ(ierr); 325 PetscFunctionReturn(0); 326 } 327 328 #undef __FUNCT__ 329 #define __FUNCT__ "MatMult_SeqBAIJ_3" 330 PetscErrorCode MatMult_SeqBAIJ_3(Mat A,Vec xx,Vec zz) 331 { 332 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 333 PetscScalar *z = 0,sum1,sum2,sum3,x1,x2,x3,*zarray; 334 const PetscScalar *x,*xb; 335 const MatScalar *v; 336 PetscErrorCode ierr; 337 PetscInt mbs,i,*idx,*ii,j,n,*ridx=PETSC_NULL,nonzerorow=0; 338 PetscTruth usecprow=a->compressedrow.use; 339 340 341 #if defined(PETSC_HAVE_PRAGMA_DISJOINT) 342 #pragma disjoint(*v,*z,*xb) 343 #endif 344 345 PetscFunctionBegin; 346 ierr = VecGetArray(xx,(PetscScalar**)&x);CHKERRQ(ierr); 347 ierr = VecGetArray(zz,&zarray);CHKERRQ(ierr); 348 349 idx = a->j; 350 v = a->a; 351 if (usecprow){ 352 mbs = a->compressedrow.nrows; 353 ii = a->compressedrow.i; 354 ridx = a->compressedrow.rindex; 355 } else { 356 mbs = a->mbs; 357 ii = a->i; 358 z = zarray; 359 } 360 361 for (i=0; i<mbs; i++) { 362 n = ii[1] - ii[0]; ii++; 363 sum1 = 0.0; sum2 = 0.0; sum3 = 0.0; 364 nonzerorow += (n>0); 365 for (j=0; j<n; j++) { 366 xb = x + 3*(*idx++); x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; 367 sum1 += v[0]*x1 + v[3]*x2 + v[6]*x3; 368 sum2 += v[1]*x1 + v[4]*x2 + v[7]*x3; 369 sum3 += v[2]*x1 + v[5]*x2 + v[8]*x3; 370 v += 9; 371 } 372 if (usecprow) z = zarray + 3*ridx[i]; 373 z[0] = sum1; z[1] = sum2; z[2] = sum3; 374 if (!usecprow) z += 3; 375 } 376 ierr = VecRestoreArray(xx,(PetscScalar**)&x);CHKERRQ(ierr); 377 ierr = VecRestoreArray(zz,&zarray);CHKERRQ(ierr); 378 ierr = PetscLogFlops(18.0*a->nz - 3.0*nonzerorow);CHKERRQ(ierr); 379 PetscFunctionReturn(0); 380 } 381 382 #undef __FUNCT__ 383 #define __FUNCT__ "MatMult_SeqBAIJ_4" 384 PetscErrorCode MatMult_SeqBAIJ_4(Mat A,Vec xx,Vec zz) 385 { 386 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 387 PetscScalar *z = 0,sum1,sum2,sum3,sum4,x1,x2,x3,x4,*zarray; 388 const PetscScalar *x,*xb; 389 const MatScalar *v; 390 PetscErrorCode ierr; 391 PetscInt mbs,i,*idx,*ii,j,n,*ridx=PETSC_NULL,nonzerorow=0; 392 PetscTruth usecprow=a->compressedrow.use; 393 394 PetscFunctionBegin; 395 ierr = VecGetArray(xx,(PetscScalar**)&x);CHKERRQ(ierr); 396 ierr = VecGetArray(zz,&zarray);CHKERRQ(ierr); 397 398 idx = a->j; 399 v = a->a; 400 if (usecprow){ 401 mbs = a->compressedrow.nrows; 402 ii = a->compressedrow.i; 403 ridx = a->compressedrow.rindex; 404 } else { 405 mbs = a->mbs; 406 ii = a->i; 407 z = zarray; 408 } 409 410 for (i=0; i<mbs; i++) { 411 n = ii[1] - ii[0]; ii++; 412 sum1 = 0.0; sum2 = 0.0; sum3 = 0.0; sum4 = 0.0; 413 nonzerorow += (n>0); 414 for (j=0; j<n; j++) { 415 xb = x + 4*(*idx++); 416 x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; 417 sum1 += v[0]*x1 + v[4]*x2 + v[8]*x3 + v[12]*x4; 418 sum2 += v[1]*x1 + v[5]*x2 + v[9]*x3 + v[13]*x4; 419 sum3 += v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4; 420 sum4 += v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4; 421 v += 16; 422 } 423 if (usecprow) z = zarray + 4*ridx[i]; 424 z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4; 425 if (!usecprow) z += 4; 426 } 427 ierr = VecRestoreArray(xx,(PetscScalar**)&x);CHKERRQ(ierr); 428 ierr = VecRestoreArray(zz,&zarray);CHKERRQ(ierr); 429 ierr = PetscLogFlops(32.0*a->nz - 4.0*nonzerorow);CHKERRQ(ierr); 430 PetscFunctionReturn(0); 431 } 432 433 #undef __FUNCT__ 434 #define __FUNCT__ "MatMult_SeqBAIJ_5" 435 PetscErrorCode MatMult_SeqBAIJ_5(Mat A,Vec xx,Vec zz) 436 { 437 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 438 PetscScalar sum1,sum2,sum3,sum4,sum5,x1,x2,x3,x4,x5,*z = 0,*zarray; 439 const PetscScalar *xb,*x; 440 const MatScalar *v; 441 PetscErrorCode ierr; 442 PetscInt mbs,i,*idx,*ii,j,n,*ridx=PETSC_NULL,nonzerorow=0; 443 PetscTruth usecprow=a->compressedrow.use; 444 445 PetscFunctionBegin; 446 ierr = VecGetArray(xx,(PetscScalar**)&x);CHKERRQ(ierr); 447 ierr = VecGetArray(zz,&zarray);CHKERRQ(ierr); 448 449 idx = a->j; 450 v = a->a; 451 if (usecprow){ 452 mbs = a->compressedrow.nrows; 453 ii = a->compressedrow.i; 454 ridx = a->compressedrow.rindex; 455 } else { 456 mbs = a->mbs; 457 ii = a->i; 458 z = zarray; 459 } 460 461 for (i=0; i<mbs; i++) { 462 n = ii[1] - ii[0]; ii++; 463 sum1 = 0.0; sum2 = 0.0; sum3 = 0.0; sum4 = 0.0; sum5 = 0.0; 464 nonzerorow += (n>0); 465 for (j=0; j<n; j++) { 466 xb = x + 5*(*idx++); 467 x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5 = xb[4]; 468 sum1 += v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5; 469 sum2 += v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5; 470 sum3 += v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5; 471 sum4 += v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5; 472 sum5 += v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5; 473 v += 25; 474 } 475 if (usecprow) z = zarray + 5*ridx[i]; 476 z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4; z[4] = sum5; 477 if (!usecprow) z += 5; 478 } 479 ierr = VecRestoreArray(xx,(PetscScalar**)&x);CHKERRQ(ierr); 480 ierr = VecRestoreArray(zz,&zarray);CHKERRQ(ierr); 481 ierr = PetscLogFlops(50.0*a->nz - 5.0*nonzerorow);CHKERRQ(ierr); 482 PetscFunctionReturn(0); 483 } 484 485 486 #undef __FUNCT__ 487 #define __FUNCT__ "MatMult_SeqBAIJ_6" 488 PetscErrorCode MatMult_SeqBAIJ_6(Mat A,Vec xx,Vec zz) 489 { 490 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 491 PetscScalar *z = 0,sum1,sum2,sum3,sum4,sum5,sum6; 492 const PetscScalar *x,*xb; 493 PetscScalar x1,x2,x3,x4,x5,x6,*zarray; 494 const MatScalar *v; 495 PetscErrorCode ierr; 496 PetscInt mbs=a->mbs,i,*idx,*ii,j,n,*ridx=PETSC_NULL,nonzerorow=0; 497 PetscTruth usecprow=a->compressedrow.use; 498 499 PetscFunctionBegin; 500 ierr = VecGetArray(xx,(PetscScalar**)&x);CHKERRQ(ierr); 501 ierr = VecGetArray(zz,&zarray);CHKERRQ(ierr); 502 503 idx = a->j; 504 v = a->a; 505 if (usecprow){ 506 mbs = a->compressedrow.nrows; 507 ii = a->compressedrow.i; 508 ridx = a->compressedrow.rindex; 509 } else { 510 mbs = a->mbs; 511 ii = a->i; 512 z = zarray; 513 } 514 515 for (i=0; i<mbs; i++) { 516 n = ii[1] - ii[0]; ii++; 517 sum1 = 0.0; sum2 = 0.0; sum3 = 0.0; sum4 = 0.0; sum5 = 0.0; sum6 = 0.0; 518 nonzerorow += (n>0); 519 for (j=0; j<n; j++) { 520 xb = x + 6*(*idx++); 521 x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5 = xb[4]; x6 = xb[5]; 522 sum1 += v[0]*x1 + v[6]*x2 + v[12]*x3 + v[18]*x4 + v[24]*x5 + v[30]*x6; 523 sum2 += v[1]*x1 + v[7]*x2 + v[13]*x3 + v[19]*x4 + v[25]*x5 + v[31]*x6; 524 sum3 += v[2]*x1 + v[8]*x2 + v[14]*x3 + v[20]*x4 + v[26]*x5 + v[32]*x6; 525 sum4 += v[3]*x1 + v[9]*x2 + v[15]*x3 + v[21]*x4 + v[27]*x5 + v[33]*x6; 526 sum5 += v[4]*x1 + v[10]*x2 + v[16]*x3 + v[22]*x4 + v[28]*x5 + v[34]*x6; 527 sum6 += v[5]*x1 + v[11]*x2 + v[17]*x3 + v[23]*x4 + v[29]*x5 + v[35]*x6; 528 v += 36; 529 } 530 if (usecprow) z = zarray + 6*ridx[i]; 531 z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4; z[4] = sum5; z[5] = sum6; 532 if (!usecprow) z += 6; 533 } 534 535 ierr = VecRestoreArray(xx,(PetscScalar**)&x);CHKERRQ(ierr); 536 ierr = VecRestoreArray(zz,&zarray);CHKERRQ(ierr); 537 ierr = PetscLogFlops(72.0*a->nz - 6.0*nonzerorow);CHKERRQ(ierr); 538 PetscFunctionReturn(0); 539 } 540 #undef __FUNCT__ 541 #define __FUNCT__ "MatMult_SeqBAIJ_7" 542 PetscErrorCode MatMult_SeqBAIJ_7(Mat A,Vec xx,Vec zz) 543 { 544 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 545 PetscScalar *z = 0,sum1,sum2,sum3,sum4,sum5,sum6,sum7; 546 const PetscScalar *x,*xb; 547 PetscScalar x1,x2,x3,x4,x5,x6,x7,*zarray; 548 const MatScalar *v; 549 PetscErrorCode ierr; 550 PetscInt mbs,i,*idx,*ii,j,n,*ridx=PETSC_NULL,nonzerorow=0; 551 PetscTruth usecprow=a->compressedrow.use; 552 553 PetscFunctionBegin; 554 ierr = VecGetArray(xx,(PetscScalar**)&x);CHKERRQ(ierr); 555 ierr = VecGetArray(zz,&zarray);CHKERRQ(ierr); 556 557 idx = a->j; 558 v = a->a; 559 if (usecprow){ 560 mbs = a->compressedrow.nrows; 561 ii = a->compressedrow.i; 562 ridx = a->compressedrow.rindex; 563 } else { 564 mbs = a->mbs; 565 ii = a->i; 566 z = zarray; 567 } 568 569 for (i=0; i<mbs; i++) { 570 n = ii[1] - ii[0]; ii++; 571 sum1 = 0.0; sum2 = 0.0; sum3 = 0.0; sum4 = 0.0; sum5 = 0.0; sum6 = 0.0; sum7 = 0.0; 572 nonzerorow += (n>0); 573 for (j=0; j<n; j++) { 574 xb = x + 7*(*idx++); 575 x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5 = xb[4]; x6 = xb[5]; x7 = xb[6]; 576 sum1 += v[0]*x1 + v[7]*x2 + v[14]*x3 + v[21]*x4 + v[28]*x5 + v[35]*x6 + v[42]*x7; 577 sum2 += v[1]*x1 + v[8]*x2 + v[15]*x3 + v[22]*x4 + v[29]*x5 + v[36]*x6 + v[43]*x7; 578 sum3 += v[2]*x1 + v[9]*x2 + v[16]*x3 + v[23]*x4 + v[30]*x5 + v[37]*x6 + v[44]*x7; 579 sum4 += v[3]*x1 + v[10]*x2 + v[17]*x3 + v[24]*x4 + v[31]*x5 + v[38]*x6 + v[45]*x7; 580 sum5 += v[4]*x1 + v[11]*x2 + v[18]*x3 + v[25]*x4 + v[32]*x5 + v[39]*x6 + v[46]*x7; 581 sum6 += v[5]*x1 + v[12]*x2 + v[19]*x3 + v[26]*x4 + v[33]*x5 + v[40]*x6 + v[47]*x7; 582 sum7 += v[6]*x1 + v[13]*x2 + v[20]*x3 + v[27]*x4 + v[34]*x5 + v[41]*x6 + v[48]*x7; 583 v += 49; 584 } 585 if (usecprow) z = zarray + 7*ridx[i]; 586 z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4; z[4] = sum5; z[5] = sum6; z[6] = sum7; 587 if (!usecprow) z += 7; 588 } 589 590 ierr = VecRestoreArray(xx,(PetscScalar**)&x);CHKERRQ(ierr); 591 ierr = VecRestoreArray(zz,&zarray);CHKERRQ(ierr); 592 ierr = PetscLogFlops(98.0*a->nz - 7.0*nonzerorow);CHKERRQ(ierr); 593 PetscFunctionReturn(0); 594 } 595 596 /* 597 This will not work with MatScalar == float because it calls the BLAS 598 */ 599 #undef __FUNCT__ 600 #define __FUNCT__ "MatMult_SeqBAIJ_N" 601 PetscErrorCode MatMult_SeqBAIJ_N(Mat A,Vec xx,Vec zz) 602 { 603 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 604 PetscScalar *x,*z = 0,*xb,*work,*workt,*zarray; 605 MatScalar *v; 606 PetscErrorCode ierr; 607 PetscInt mbs=a->mbs,i,*idx,*ii,bs=A->rmap->bs,j,n,bs2=a->bs2; 608 PetscInt ncols,k,*ridx=PETSC_NULL,nonzerorow=0; 609 PetscTruth usecprow=a->compressedrow.use; 610 611 PetscFunctionBegin; 612 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 613 ierr = VecGetArray(zz,&zarray);CHKERRQ(ierr); 614 615 idx = a->j; 616 v = a->a; 617 if (usecprow){ 618 mbs = a->compressedrow.nrows; 619 ii = a->compressedrow.i; 620 ridx = a->compressedrow.rindex; 621 } else { 622 mbs = a->mbs; 623 ii = a->i; 624 z = zarray; 625 } 626 627 if (!a->mult_work) { 628 k = PetscMax(A->rmap->n,A->cmap->n); 629 ierr = PetscMalloc((k+1)*sizeof(PetscScalar),&a->mult_work);CHKERRQ(ierr); 630 } 631 work = a->mult_work; 632 for (i=0; i<mbs; i++) { 633 n = ii[1] - ii[0]; ii++; 634 ncols = n*bs; 635 workt = work; 636 nonzerorow += (n>0); 637 for (j=0; j<n; j++) { 638 xb = x + bs*(*idx++); 639 for (k=0; k<bs; k++) workt[k] = xb[k]; 640 workt += bs; 641 } 642 if (usecprow) z = zarray + bs*ridx[i]; 643 Kernel_w_gets_Ar_times_v(bs,ncols,work,v,z); 644 /* BLASgemv_("N",&bs,&ncols,&_DOne,v,&bs,work,&_One,&_DZero,z,&_One); */ 645 v += n*bs2; 646 if (!usecprow) z += bs; 647 } 648 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 649 ierr = VecRestoreArray(zz,&zarray);CHKERRQ(ierr); 650 ierr = PetscLogFlops(2.0*a->nz*bs2 - bs*nonzerorow);CHKERRQ(ierr); 651 PetscFunctionReturn(0); 652 } 653 654 extern PetscErrorCode VecCopy_Seq(Vec,Vec); 655 #undef __FUNCT__ 656 #define __FUNCT__ "MatMultAdd_SeqBAIJ_1" 657 PetscErrorCode MatMultAdd_SeqBAIJ_1(Mat A,Vec xx,Vec yy,Vec zz) 658 { 659 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 660 PetscScalar *x,*y = 0,*z = 0,sum,*yarray,*zarray; 661 MatScalar *v; 662 PetscErrorCode ierr; 663 PetscInt mbs=a->mbs,i,*idx,*ii,n,*ridx=PETSC_NULL; 664 PetscTruth usecprow=a->compressedrow.use; 665 666 PetscFunctionBegin; 667 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 668 ierr = VecGetArray(yy,&yarray);CHKERRQ(ierr); 669 if (zz != yy) { 670 ierr = VecGetArray(zz,&zarray);CHKERRQ(ierr); 671 } else { 672 zarray = yarray; 673 } 674 675 idx = a->j; 676 v = a->a; 677 if (usecprow){ 678 if (zz != yy){ 679 ierr = PetscMemcpy(zarray,yarray,mbs*sizeof(PetscScalar));CHKERRQ(ierr); 680 } 681 mbs = a->compressedrow.nrows; 682 ii = a->compressedrow.i; 683 ridx = a->compressedrow.rindex; 684 } else { 685 ii = a->i; 686 y = yarray; 687 z = zarray; 688 } 689 690 for (i=0; i<mbs; i++) { 691 n = ii[1] - ii[0]; ii++; 692 if (usecprow){ 693 z = zarray + ridx[i]; 694 y = yarray + ridx[i]; 695 } 696 sum = y[0]; 697 while (n--) sum += *v++ * x[*idx++]; 698 z[0] = sum; 699 if (!usecprow){ 700 z++; y++; 701 } 702 } 703 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 704 ierr = VecRestoreArray(yy,&yarray);CHKERRQ(ierr); 705 if (zz != yy) { 706 ierr = VecRestoreArray(zz,&zarray);CHKERRQ(ierr); 707 } 708 ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr); 709 PetscFunctionReturn(0); 710 } 711 712 #undef __FUNCT__ 713 #define __FUNCT__ "MatMultAdd_SeqBAIJ_2" 714 PetscErrorCode MatMultAdd_SeqBAIJ_2(Mat A,Vec xx,Vec yy,Vec zz) 715 { 716 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 717 PetscScalar *x,*y = 0,*z = 0,*xb,sum1,sum2; 718 PetscScalar x1,x2,*yarray,*zarray; 719 MatScalar *v; 720 PetscErrorCode ierr; 721 PetscInt mbs=a->mbs,i,*idx,*ii,j,n,*ridx=PETSC_NULL; 722 PetscTruth usecprow=a->compressedrow.use; 723 724 PetscFunctionBegin; 725 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 726 ierr = VecGetArray(yy,&yarray);CHKERRQ(ierr); 727 if (zz != yy) { 728 ierr = VecGetArray(zz,&zarray);CHKERRQ(ierr); 729 } else { 730 zarray = yarray; 731 } 732 733 idx = a->j; 734 v = a->a; 735 if (usecprow){ 736 if (zz != yy){ 737 ierr = PetscMemcpy(zarray,yarray,2*mbs*sizeof(PetscScalar));CHKERRQ(ierr); 738 } 739 mbs = a->compressedrow.nrows; 740 ii = a->compressedrow.i; 741 ridx = a->compressedrow.rindex; 742 if (zz != yy){ 743 ierr = PetscMemcpy(zarray,yarray,a->mbs*sizeof(PetscScalar));CHKERRQ(ierr); 744 } 745 } else { 746 ii = a->i; 747 y = yarray; 748 z = zarray; 749 } 750 751 for (i=0; i<mbs; i++) { 752 n = ii[1] - ii[0]; ii++; 753 if (usecprow){ 754 z = zarray + 2*ridx[i]; 755 y = yarray + 2*ridx[i]; 756 } 757 sum1 = y[0]; sum2 = y[1]; 758 for (j=0; j<n; j++) { 759 xb = x + 2*(*idx++); x1 = xb[0]; x2 = xb[1]; 760 sum1 += v[0]*x1 + v[2]*x2; 761 sum2 += v[1]*x1 + v[3]*x2; 762 v += 4; 763 } 764 z[0] = sum1; z[1] = sum2; 765 if (!usecprow){ 766 z += 2; y += 2; 767 } 768 } 769 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 770 ierr = VecRestoreArray(yy,&yarray);CHKERRQ(ierr); 771 if (zz != yy) { 772 ierr = VecRestoreArray(zz,&zarray);CHKERRQ(ierr); 773 } 774 ierr = PetscLogFlops(4.0*a->nz);CHKERRQ(ierr); 775 PetscFunctionReturn(0); 776 } 777 778 #undef __FUNCT__ 779 #define __FUNCT__ "MatMultAdd_SeqBAIJ_3" 780 PetscErrorCode MatMultAdd_SeqBAIJ_3(Mat A,Vec xx,Vec yy,Vec zz) 781 { 782 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 783 PetscScalar *x,*y = 0,*z = 0,*xb,sum1,sum2,sum3,x1,x2,x3,*yarray,*zarray; 784 MatScalar *v; 785 PetscErrorCode ierr; 786 PetscInt mbs=a->mbs,i,*idx,*ii,j,n,*ridx=PETSC_NULL; 787 PetscTruth usecprow=a->compressedrow.use; 788 789 PetscFunctionBegin; 790 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 791 ierr = VecGetArray(yy,&yarray);CHKERRQ(ierr); 792 if (zz != yy) { 793 ierr = VecGetArray(zz,&zarray);CHKERRQ(ierr); 794 } else { 795 zarray = yarray; 796 } 797 798 idx = a->j; 799 v = a->a; 800 if (usecprow){ 801 if (zz != yy){ 802 ierr = PetscMemcpy(zarray,yarray,3*mbs*sizeof(PetscScalar));CHKERRQ(ierr); 803 } 804 mbs = a->compressedrow.nrows; 805 ii = a->compressedrow.i; 806 ridx = a->compressedrow.rindex; 807 } else { 808 ii = a->i; 809 y = yarray; 810 z = zarray; 811 } 812 813 for (i=0; i<mbs; i++) { 814 n = ii[1] - ii[0]; ii++; 815 if (usecprow){ 816 z = zarray + 3*ridx[i]; 817 y = yarray + 3*ridx[i]; 818 } 819 sum1 = y[0]; sum2 = y[1]; sum3 = y[2]; 820 for (j=0; j<n; j++) { 821 xb = x + 3*(*idx++); x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; 822 sum1 += v[0]*x1 + v[3]*x2 + v[6]*x3; 823 sum2 += v[1]*x1 + v[4]*x2 + v[7]*x3; 824 sum3 += v[2]*x1 + v[5]*x2 + v[8]*x3; 825 v += 9; 826 } 827 z[0] = sum1; z[1] = sum2; z[2] = sum3; 828 if (!usecprow){ 829 z += 3; y += 3; 830 } 831 } 832 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 833 ierr = VecRestoreArray(yy,&yarray);CHKERRQ(ierr); 834 if (zz != yy) { 835 ierr = VecRestoreArray(zz,&zarray);CHKERRQ(ierr); 836 } 837 ierr = PetscLogFlops(18.0*a->nz);CHKERRQ(ierr); 838 PetscFunctionReturn(0); 839 } 840 841 #undef __FUNCT__ 842 #define __FUNCT__ "MatMultAdd_SeqBAIJ_4" 843 PetscErrorCode MatMultAdd_SeqBAIJ_4(Mat A,Vec xx,Vec yy,Vec zz) 844 { 845 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 846 PetscScalar *x,*y = 0,*z = 0,*xb,sum1,sum2,sum3,sum4,x1,x2,x3,x4,*yarray,*zarray; 847 MatScalar *v; 848 PetscErrorCode ierr; 849 PetscInt mbs=a->mbs,i,*idx,*ii,j,n,*ridx=PETSC_NULL; 850 PetscTruth usecprow=a->compressedrow.use; 851 852 PetscFunctionBegin; 853 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 854 ierr = VecGetArray(yy,&yarray);CHKERRQ(ierr); 855 if (zz != yy) { 856 ierr = VecGetArray(zz,&zarray);CHKERRQ(ierr); 857 } else { 858 zarray = yarray; 859 } 860 861 idx = a->j; 862 v = a->a; 863 if (usecprow){ 864 if (zz != yy){ 865 ierr = PetscMemcpy(zarray,yarray,4*mbs*sizeof(PetscScalar));CHKERRQ(ierr); 866 } 867 mbs = a->compressedrow.nrows; 868 ii = a->compressedrow.i; 869 ridx = a->compressedrow.rindex; 870 } else { 871 ii = a->i; 872 y = yarray; 873 z = zarray; 874 } 875 876 for (i=0; i<mbs; i++) { 877 n = ii[1] - ii[0]; ii++; 878 if (usecprow){ 879 z = zarray + 4*ridx[i]; 880 y = yarray + 4*ridx[i]; 881 } 882 sum1 = y[0]; sum2 = y[1]; sum3 = y[2]; sum4 = y[3]; 883 for (j=0; j<n; j++) { 884 xb = x + 4*(*idx++); 885 x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; 886 sum1 += v[0]*x1 + v[4]*x2 + v[8]*x3 + v[12]*x4; 887 sum2 += v[1]*x1 + v[5]*x2 + v[9]*x3 + v[13]*x4; 888 sum3 += v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4; 889 sum4 += v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4; 890 v += 16; 891 } 892 z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4; 893 if (!usecprow){ 894 z += 4; y += 4; 895 } 896 } 897 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 898 ierr = VecRestoreArray(yy,&yarray);CHKERRQ(ierr); 899 if (zz != yy) { 900 ierr = VecRestoreArray(zz,&zarray);CHKERRQ(ierr); 901 } 902 ierr = PetscLogFlops(32.0*a->nz);CHKERRQ(ierr); 903 PetscFunctionReturn(0); 904 } 905 906 #undef __FUNCT__ 907 #define __FUNCT__ "MatMultAdd_SeqBAIJ_5" 908 PetscErrorCode MatMultAdd_SeqBAIJ_5(Mat A,Vec xx,Vec yy,Vec zz) 909 { 910 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 911 PetscScalar *x,*y = 0,*z = 0,*xb,sum1,sum2,sum3,sum4,sum5,x1,x2,x3,x4,x5; 912 PetscScalar *yarray,*zarray; 913 MatScalar *v; 914 PetscErrorCode ierr; 915 PetscInt mbs=a->mbs,i,*idx,*ii,j,n,*ridx=PETSC_NULL; 916 PetscTruth usecprow=a->compressedrow.use; 917 918 PetscFunctionBegin; 919 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 920 ierr = VecGetArray(yy,&yarray);CHKERRQ(ierr); 921 if (zz != yy) { 922 ierr = VecGetArray(zz,&zarray);CHKERRQ(ierr); 923 } else { 924 zarray = yarray; 925 } 926 927 idx = a->j; 928 v = a->a; 929 if (usecprow){ 930 if (zz != yy){ 931 ierr = PetscMemcpy(zarray,yarray,5*mbs*sizeof(PetscScalar));CHKERRQ(ierr); 932 } 933 mbs = a->compressedrow.nrows; 934 ii = a->compressedrow.i; 935 ridx = a->compressedrow.rindex; 936 } else { 937 ii = a->i; 938 y = yarray; 939 z = zarray; 940 } 941 942 for (i=0; i<mbs; i++) { 943 n = ii[1] - ii[0]; ii++; 944 if (usecprow){ 945 z = zarray + 5*ridx[i]; 946 y = yarray + 5*ridx[i]; 947 } 948 sum1 = y[0]; sum2 = y[1]; sum3 = y[2]; sum4 = y[3]; sum5 = y[4]; 949 for (j=0; j<n; j++) { 950 xb = x + 5*(*idx++); 951 x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5 = xb[4]; 952 sum1 += v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5; 953 sum2 += v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5; 954 sum3 += v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5; 955 sum4 += v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5; 956 sum5 += v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5; 957 v += 25; 958 } 959 z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4; z[4] = sum5; 960 if (!usecprow){ 961 z += 5; y += 5; 962 } 963 } 964 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 965 ierr = VecRestoreArray(yy,&yarray);CHKERRQ(ierr); 966 if (zz != yy) { 967 ierr = VecRestoreArray(zz,&zarray);CHKERRQ(ierr); 968 } 969 ierr = PetscLogFlops(50.0*a->nz);CHKERRQ(ierr); 970 PetscFunctionReturn(0); 971 } 972 #undef __FUNCT__ 973 #define __FUNCT__ "MatMultAdd_SeqBAIJ_6" 974 PetscErrorCode MatMultAdd_SeqBAIJ_6(Mat A,Vec xx,Vec yy,Vec zz) 975 { 976 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 977 PetscScalar *x,*y = 0,*z = 0,*xb,sum1,sum2,sum3,sum4,sum5,sum6; 978 PetscScalar x1,x2,x3,x4,x5,x6,*yarray,*zarray; 979 MatScalar *v; 980 PetscErrorCode ierr; 981 PetscInt mbs=a->mbs,i,*idx,*ii,j,n,*ridx=PETSC_NULL; 982 PetscTruth usecprow=a->compressedrow.use; 983 984 PetscFunctionBegin; 985 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 986 ierr = VecGetArray(yy,&yarray);CHKERRQ(ierr); 987 if (zz != yy) { 988 ierr = VecGetArray(zz,&zarray);CHKERRQ(ierr); 989 } else { 990 zarray = yarray; 991 } 992 993 idx = a->j; 994 v = a->a; 995 if (usecprow){ 996 if (zz != yy){ 997 ierr = PetscMemcpy(zarray,yarray,6*mbs*sizeof(PetscScalar));CHKERRQ(ierr); 998 } 999 mbs = a->compressedrow.nrows; 1000 ii = a->compressedrow.i; 1001 ridx = a->compressedrow.rindex; 1002 } else { 1003 ii = a->i; 1004 y = yarray; 1005 z = zarray; 1006 } 1007 1008 for (i=0; i<mbs; i++) { 1009 n = ii[1] - ii[0]; ii++; 1010 if (usecprow){ 1011 z = zarray + 6*ridx[i]; 1012 y = yarray + 6*ridx[i]; 1013 } 1014 sum1 = y[0]; sum2 = y[1]; sum3 = y[2]; sum4 = y[3]; sum5 = y[4]; sum6 = y[5]; 1015 for (j=0; j<n; j++) { 1016 xb = x + 6*(*idx++); 1017 x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5 = xb[4]; x6 = xb[5]; 1018 sum1 += v[0]*x1 + v[6]*x2 + v[12]*x3 + v[18]*x4 + v[24]*x5 + v[30]*x6; 1019 sum2 += v[1]*x1 + v[7]*x2 + v[13]*x3 + v[19]*x4 + v[25]*x5 + v[31]*x6; 1020 sum3 += v[2]*x1 + v[8]*x2 + v[14]*x3 + v[20]*x4 + v[26]*x5 + v[32]*x6; 1021 sum4 += v[3]*x1 + v[9]*x2 + v[15]*x3 + v[21]*x4 + v[27]*x5 + v[33]*x6; 1022 sum5 += v[4]*x1 + v[10]*x2 + v[16]*x3 + v[22]*x4 + v[28]*x5 + v[34]*x6; 1023 sum6 += v[5]*x1 + v[11]*x2 + v[17]*x3 + v[23]*x4 + v[29]*x5 + v[35]*x6; 1024 v += 36; 1025 } 1026 z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4; z[4] = sum5; z[5] = sum6; 1027 if (!usecprow){ 1028 z += 6; y += 6; 1029 } 1030 } 1031 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1032 ierr = VecRestoreArray(yy,&yarray);CHKERRQ(ierr); 1033 if (zz != yy) { 1034 ierr = VecRestoreArray(zz,&zarray);CHKERRQ(ierr); 1035 } 1036 ierr = PetscLogFlops(72.0*a->nz);CHKERRQ(ierr); 1037 PetscFunctionReturn(0); 1038 } 1039 1040 #undef __FUNCT__ 1041 #define __FUNCT__ "MatMultAdd_SeqBAIJ_7" 1042 PetscErrorCode MatMultAdd_SeqBAIJ_7(Mat A,Vec xx,Vec yy,Vec zz) 1043 { 1044 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1045 PetscScalar *x,*y = 0,*z = 0,*xb,sum1,sum2,sum3,sum4,sum5,sum6,sum7; 1046 PetscScalar x1,x2,x3,x4,x5,x6,x7,*yarray,*zarray; 1047 MatScalar *v; 1048 PetscErrorCode ierr; 1049 PetscInt mbs=a->mbs,i,*idx,*ii,j,n,*ridx=PETSC_NULL; 1050 PetscTruth usecprow=a->compressedrow.use; 1051 1052 PetscFunctionBegin; 1053 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1054 ierr = VecGetArray(yy,&yarray);CHKERRQ(ierr); 1055 if (zz != yy) { 1056 ierr = VecGetArray(zz,&zarray);CHKERRQ(ierr); 1057 } else { 1058 zarray = yarray; 1059 } 1060 1061 idx = a->j; 1062 v = a->a; 1063 if (usecprow){ 1064 if (zz != yy){ 1065 ierr = PetscMemcpy(zarray,yarray,7*mbs*sizeof(PetscScalar));CHKERRQ(ierr); 1066 } 1067 mbs = a->compressedrow.nrows; 1068 ii = a->compressedrow.i; 1069 ridx = a->compressedrow.rindex; 1070 } else { 1071 ii = a->i; 1072 y = yarray; 1073 z = zarray; 1074 } 1075 1076 for (i=0; i<mbs; i++) { 1077 n = ii[1] - ii[0]; ii++; 1078 if (usecprow){ 1079 z = zarray + 7*ridx[i]; 1080 y = yarray + 7*ridx[i]; 1081 } 1082 sum1 = y[0]; sum2 = y[1]; sum3 = y[2]; sum4 = y[3]; sum5 = y[4]; sum6 = y[5]; sum7 = y[6]; 1083 for (j=0; j<n; j++) { 1084 xb = x + 7*(*idx++); 1085 x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5 = xb[4]; x6 = xb[5]; x7 = xb[6]; 1086 sum1 += v[0]*x1 + v[7]*x2 + v[14]*x3 + v[21]*x4 + v[28]*x5 + v[35]*x6 + v[42]*x7; 1087 sum2 += v[1]*x1 + v[8]*x2 + v[15]*x3 + v[22]*x4 + v[29]*x5 + v[36]*x6 + v[43]*x7; 1088 sum3 += v[2]*x1 + v[9]*x2 + v[16]*x3 + v[23]*x4 + v[30]*x5 + v[37]*x6 + v[44]*x7; 1089 sum4 += v[3]*x1 + v[10]*x2 + v[17]*x3 + v[24]*x4 + v[31]*x5 + v[38]*x6 + v[45]*x7; 1090 sum5 += v[4]*x1 + v[11]*x2 + v[18]*x3 + v[25]*x4 + v[32]*x5 + v[39]*x6 + v[46]*x7; 1091 sum6 += v[5]*x1 + v[12]*x2 + v[19]*x3 + v[26]*x4 + v[33]*x5 + v[40]*x6 + v[47]*x7; 1092 sum7 += v[6]*x1 + v[13]*x2 + v[20]*x3 + v[27]*x4 + v[34]*x5 + v[41]*x6 + v[48]*x7; 1093 v += 49; 1094 } 1095 z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4; z[4] = sum5; z[5] = sum6; z[6] = sum7; 1096 if (!usecprow){ 1097 z += 7; y += 7; 1098 } 1099 } 1100 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1101 ierr = VecRestoreArray(yy,&yarray);CHKERRQ(ierr); 1102 if (zz != yy) { 1103 ierr = VecRestoreArray(zz,&zarray);CHKERRQ(ierr); 1104 } 1105 ierr = PetscLogFlops(98.0*a->nz);CHKERRQ(ierr); 1106 PetscFunctionReturn(0); 1107 } 1108 1109 #undef __FUNCT__ 1110 #define __FUNCT__ "MatMultAdd_SeqBAIJ_N" 1111 PetscErrorCode MatMultAdd_SeqBAIJ_N(Mat A,Vec xx,Vec yy,Vec zz) 1112 { 1113 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1114 PetscScalar *x,*z = 0,*xb,*work,*workt,*zarray; 1115 MatScalar *v; 1116 PetscErrorCode ierr; 1117 PetscInt mbs,i,*idx,*ii,bs=A->rmap->bs,j,n,bs2=a->bs2; 1118 PetscInt ncols,k,*ridx=PETSC_NULL; 1119 PetscTruth usecprow=a->compressedrow.use; 1120 1121 PetscFunctionBegin; 1122 ierr = VecCopy_Seq(yy,zz);CHKERRQ(ierr); 1123 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1124 ierr = VecGetArray(zz,&zarray);CHKERRQ(ierr); 1125 1126 idx = a->j; 1127 v = a->a; 1128 if (usecprow){ 1129 mbs = a->compressedrow.nrows; 1130 ii = a->compressedrow.i; 1131 ridx = a->compressedrow.rindex; 1132 } else { 1133 mbs = a->mbs; 1134 ii = a->i; 1135 z = zarray; 1136 } 1137 1138 if (!a->mult_work) { 1139 k = PetscMax(A->rmap->n,A->cmap->n); 1140 ierr = PetscMalloc((k+1)*sizeof(PetscScalar),&a->mult_work);CHKERRQ(ierr); 1141 } 1142 work = a->mult_work; 1143 for (i=0; i<mbs; i++) { 1144 n = ii[1] - ii[0]; ii++; 1145 ncols = n*bs; 1146 workt = work; 1147 for (j=0; j<n; j++) { 1148 xb = x + bs*(*idx++); 1149 for (k=0; k<bs; k++) workt[k] = xb[k]; 1150 workt += bs; 1151 } 1152 if (usecprow) z = zarray + bs*ridx[i]; 1153 Kernel_w_gets_w_plus_Ar_times_v(bs,ncols,work,v,z); 1154 /* BLASgemv_("N",&bs,&ncols,&_DOne,v,&bs,work,&_One,&_DOne,z,&_One); */ 1155 v += n*bs2; 1156 if (!usecprow){ 1157 z += bs; 1158 } 1159 } 1160 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1161 ierr = VecRestoreArray(zz,&zarray);CHKERRQ(ierr); 1162 ierr = PetscLogFlops(2.0*a->nz*bs2);CHKERRQ(ierr); 1163 PetscFunctionReturn(0); 1164 } 1165 1166 #undef __FUNCT__ 1167 #define __FUNCT__ "MatMultTranspose_SeqBAIJ" 1168 PetscErrorCode MatMultTranspose_SeqBAIJ(Mat A,Vec xx,Vec zz) 1169 { 1170 PetscScalar zero = 0.0; 1171 PetscErrorCode ierr; 1172 1173 PetscFunctionBegin; 1174 ierr = VecSet(zz,zero);CHKERRQ(ierr); 1175 ierr = MatMultTransposeAdd_SeqBAIJ(A,xx,zz,zz);CHKERRQ(ierr); 1176 PetscFunctionReturn(0); 1177 } 1178 1179 #undef __FUNCT__ 1180 #define __FUNCT__ "MatMultTransposeAdd_SeqBAIJ" 1181 PetscErrorCode MatMultTransposeAdd_SeqBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1182 1183 { 1184 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1185 PetscScalar *zb,*x,*z,*xb = 0,x1,x2,x3,x4,x5; 1186 MatScalar *v; 1187 PetscErrorCode ierr; 1188 PetscInt mbs,i,*idx,*ii,rval,bs=A->rmap->bs,j,n,bs2=a->bs2,*ib,*ridx=PETSC_NULL; 1189 Mat_CompressedRow cprow = a->compressedrow; 1190 PetscTruth usecprow=cprow.use; 1191 1192 PetscFunctionBegin; 1193 if (yy != zz) { ierr = VecCopy_Seq(yy,zz);CHKERRQ(ierr); } 1194 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1195 ierr = VecGetArray(zz,&z);CHKERRQ(ierr); 1196 1197 idx = a->j; 1198 v = a->a; 1199 if (usecprow){ 1200 mbs = cprow.nrows; 1201 ii = cprow.i; 1202 ridx = cprow.rindex; 1203 } else { 1204 mbs=a->mbs; 1205 ii = a->i; 1206 xb = x; 1207 } 1208 1209 switch (bs) { 1210 case 1: 1211 for (i=0; i<mbs; i++) { 1212 if (usecprow) xb = x + ridx[i]; 1213 x1 = xb[0]; 1214 ib = idx + ii[0]; 1215 n = ii[1] - ii[0]; ii++; 1216 for (j=0; j<n; j++) { 1217 rval = ib[j]; 1218 z[rval] += *v * x1; 1219 v++; 1220 } 1221 if (!usecprow) xb++; 1222 } 1223 break; 1224 case 2: 1225 for (i=0; i<mbs; i++) { 1226 if (usecprow) xb = x + 2*ridx[i]; 1227 x1 = xb[0]; x2 = xb[1]; 1228 ib = idx + ii[0]; 1229 n = ii[1] - ii[0]; ii++; 1230 for (j=0; j<n; j++) { 1231 rval = ib[j]*2; 1232 z[rval++] += v[0]*x1 + v[1]*x2; 1233 z[rval++] += v[2]*x1 + v[3]*x2; 1234 v += 4; 1235 } 1236 if (!usecprow) xb += 2; 1237 } 1238 break; 1239 case 3: 1240 for (i=0; i<mbs; i++) { 1241 if (usecprow) xb = x + 3*ridx[i]; 1242 x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; 1243 ib = idx + ii[0]; 1244 n = ii[1] - ii[0]; ii++; 1245 for (j=0; j<n; j++) { 1246 rval = ib[j]*3; 1247 z[rval++] += v[0]*x1 + v[1]*x2 + v[2]*x3; 1248 z[rval++] += v[3]*x1 + v[4]*x2 + v[5]*x3; 1249 z[rval++] += v[6]*x1 + v[7]*x2 + v[8]*x3; 1250 v += 9; 1251 } 1252 if (!usecprow) xb += 3; 1253 } 1254 break; 1255 case 4: 1256 for (i=0; i<mbs; i++) { 1257 if (usecprow) xb = x + 4*ridx[i]; 1258 x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; 1259 ib = idx + ii[0]; 1260 n = ii[1] - ii[0]; ii++; 1261 for (j=0; j<n; j++) { 1262 rval = ib[j]*4; 1263 z[rval++] += v[0]*x1 + v[1]*x2 + v[2]*x3 + v[3]*x4; 1264 z[rval++] += v[4]*x1 + v[5]*x2 + v[6]*x3 + v[7]*x4; 1265 z[rval++] += v[8]*x1 + v[9]*x2 + v[10]*x3 + v[11]*x4; 1266 z[rval++] += v[12]*x1 + v[13]*x2 + v[14]*x3 + v[15]*x4; 1267 v += 16; 1268 } 1269 if (!usecprow) xb += 4; 1270 } 1271 break; 1272 case 5: 1273 for (i=0; i<mbs; i++) { 1274 if (usecprow) xb = x + 5*ridx[i]; 1275 x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; 1276 x4 = xb[3]; x5 = xb[4]; 1277 ib = idx + ii[0]; 1278 n = ii[1] - ii[0]; ii++; 1279 for (j=0; j<n; j++) { 1280 rval = ib[j]*5; 1281 z[rval++] += v[0]*x1 + v[1]*x2 + v[2]*x3 + v[3]*x4 + v[4]*x5; 1282 z[rval++] += v[5]*x1 + v[6]*x2 + v[7]*x3 + v[8]*x4 + v[9]*x5; 1283 z[rval++] += v[10]*x1 + v[11]*x2 + v[12]*x3 + v[13]*x4 + v[14]*x5; 1284 z[rval++] += v[15]*x1 + v[16]*x2 + v[17]*x3 + v[18]*x4 + v[19]*x5; 1285 z[rval++] += v[20]*x1 + v[21]*x2 + v[22]*x3 + v[23]*x4 + v[24]*x5; 1286 v += 25; 1287 } 1288 if (!usecprow) xb += 5; 1289 } 1290 break; 1291 default: { /* block sizes larger then 5 by 5 are handled by BLAS */ 1292 PetscInt ncols,k; 1293 PetscScalar *work,*workt,*xtmp; 1294 1295 if (!a->mult_work) { 1296 k = PetscMax(A->rmap->n,A->cmap->n); 1297 ierr = PetscMalloc((k+1)*sizeof(PetscScalar),&a->mult_work);CHKERRQ(ierr); 1298 } 1299 work = a->mult_work; 1300 xtmp = x; 1301 for (i=0; i<mbs; i++) { 1302 n = ii[1] - ii[0]; ii++; 1303 ncols = n*bs; 1304 ierr = PetscMemzero(work,ncols*sizeof(PetscScalar));CHKERRQ(ierr); 1305 if (usecprow) { 1306 xtmp = x + bs*ridx[i]; 1307 } 1308 Kernel_w_gets_w_plus_trans_Ar_times_v(bs,ncols,xtmp,v,work); 1309 /* BLASgemv_("T",&bs,&ncols,&_DOne,v,&bs,xtmp,&_One,&_DOne,work,&_One); */ 1310 v += n*bs2; 1311 if (!usecprow) xtmp += bs; 1312 workt = work; 1313 for (j=0; j<n; j++) { 1314 zb = z + bs*(*idx++); 1315 for (k=0; k<bs; k++) zb[k] += workt[k] ; 1316 workt += bs; 1317 } 1318 } 1319 } 1320 } 1321 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1322 ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr); 1323 ierr = PetscLogFlops(2.0*a->nz*a->bs2);CHKERRQ(ierr); 1324 PetscFunctionReturn(0); 1325 } 1326 1327 #undef __FUNCT__ 1328 #define __FUNCT__ "MatScale_SeqBAIJ" 1329 PetscErrorCode MatScale_SeqBAIJ(Mat inA,PetscScalar alpha) 1330 { 1331 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)inA->data; 1332 PetscInt totalnz = a->bs2*a->nz; 1333 PetscScalar oalpha = alpha; 1334 PetscErrorCode ierr; 1335 PetscBLASInt one = 1,tnz = PetscBLASIntCast(totalnz); 1336 1337 PetscFunctionBegin; 1338 BLASscal_(&tnz,&oalpha,a->a,&one); 1339 ierr = PetscLogFlops(totalnz);CHKERRQ(ierr); 1340 PetscFunctionReturn(0); 1341 } 1342 1343 #undef __FUNCT__ 1344 #define __FUNCT__ "MatNorm_SeqBAIJ" 1345 PetscErrorCode MatNorm_SeqBAIJ(Mat A,NormType type,PetscReal *norm) 1346 { 1347 PetscErrorCode ierr; 1348 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1349 MatScalar *v = a->a; 1350 PetscReal sum = 0.0; 1351 PetscInt i,j,k,bs=A->rmap->bs,nz=a->nz,bs2=a->bs2,k1; 1352 1353 PetscFunctionBegin; 1354 if (type == NORM_FROBENIUS) { 1355 for (i=0; i< bs2*nz; i++) { 1356 #if defined(PETSC_USE_COMPLEX) 1357 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1358 #else 1359 sum += (*v)*(*v); v++; 1360 #endif 1361 } 1362 *norm = sqrt(sum); 1363 } else if (type == NORM_1) { /* maximum column sum */ 1364 PetscReal *tmp; 1365 PetscInt *bcol = a->j; 1366 ierr = PetscMalloc((A->cmap->n+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr); 1367 ierr = PetscMemzero(tmp,A->cmap->n*sizeof(PetscReal));CHKERRQ(ierr); 1368 for (i=0; i<nz; i++){ 1369 for (j=0; j<bs; j++){ 1370 k1 = bs*(*bcol) + j; /* column index */ 1371 for (k=0; k<bs; k++){ 1372 tmp[k1] += PetscAbsScalar(*v); v++; 1373 } 1374 } 1375 bcol++; 1376 } 1377 *norm = 0.0; 1378 for (j=0; j<A->cmap->n; j++) { 1379 if (tmp[j] > *norm) *norm = tmp[j]; 1380 } 1381 ierr = PetscFree(tmp);CHKERRQ(ierr); 1382 } else if (type == NORM_INFINITY) { /* maximum row sum */ 1383 *norm = 0.0; 1384 for (k=0; k<bs; k++) { 1385 for (j=0; j<a->mbs; j++) { 1386 v = a->a + bs2*a->i[j] + k; 1387 sum = 0.0; 1388 for (i=0; i<a->i[j+1]-a->i[j]; i++) { 1389 for (k1=0; k1<bs; k1++){ 1390 sum += PetscAbsScalar(*v); 1391 v += bs; 1392 } 1393 } 1394 if (sum > *norm) *norm = sum; 1395 } 1396 } 1397 } else { 1398 SETERRQ(PETSC_ERR_SUP,"No support for this norm yet"); 1399 } 1400 PetscFunctionReturn(0); 1401 } 1402 1403 1404 #undef __FUNCT__ 1405 #define __FUNCT__ "MatEqual_SeqBAIJ" 1406 PetscErrorCode MatEqual_SeqBAIJ(Mat A,Mat B,PetscTruth* flg) 1407 { 1408 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data,*b = (Mat_SeqBAIJ *)B->data; 1409 PetscErrorCode ierr; 1410 1411 PetscFunctionBegin; 1412 /* If the matrix/block dimensions are not equal, or no of nonzeros or shift */ 1413 if ((A->rmap->N != B->rmap->N) || (A->cmap->n != B->cmap->n) || (A->rmap->bs != B->rmap->bs)|| (a->nz != b->nz)) { 1414 *flg = PETSC_FALSE; 1415 PetscFunctionReturn(0); 1416 } 1417 1418 /* if the a->i are the same */ 1419 ierr = PetscMemcmp(a->i,b->i,(a->mbs+1)*sizeof(PetscInt),flg);CHKERRQ(ierr); 1420 if (!*flg) { 1421 PetscFunctionReturn(0); 1422 } 1423 1424 /* if a->j are the same */ 1425 ierr = PetscMemcmp(a->j,b->j,(a->nz)*sizeof(PetscInt),flg);CHKERRQ(ierr); 1426 if (!*flg) { 1427 PetscFunctionReturn(0); 1428 } 1429 /* if a->a are the same */ 1430 ierr = PetscMemcmp(a->a,b->a,(a->nz)*(A->rmap->bs)*(B->rmap->bs)*sizeof(PetscScalar),flg);CHKERRQ(ierr); 1431 PetscFunctionReturn(0); 1432 1433 } 1434 1435 #undef __FUNCT__ 1436 #define __FUNCT__ "MatGetDiagonal_SeqBAIJ" 1437 PetscErrorCode MatGetDiagonal_SeqBAIJ(Mat A,Vec v) 1438 { 1439 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1440 PetscErrorCode ierr; 1441 PetscInt i,j,k,n,row,bs,*ai,*aj,ambs,bs2; 1442 PetscScalar *x,zero = 0.0; 1443 MatScalar *aa,*aa_j; 1444 1445 PetscFunctionBegin; 1446 if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 1447 bs = A->rmap->bs; 1448 aa = a->a; 1449 ai = a->i; 1450 aj = a->j; 1451 ambs = a->mbs; 1452 bs2 = a->bs2; 1453 1454 ierr = VecSet(v,zero);CHKERRQ(ierr); 1455 ierr = VecGetArray(v,&x);CHKERRQ(ierr); 1456 ierr = VecGetLocalSize(v,&n);CHKERRQ(ierr); 1457 if (n != A->rmap->N) SETERRQ(PETSC_ERR_ARG_SIZ,"Nonconforming matrix and vector"); 1458 for (i=0; i<ambs; i++) { 1459 for (j=ai[i]; j<ai[i+1]; j++) { 1460 if (aj[j] == i) { 1461 row = i*bs; 1462 aa_j = aa+j*bs2; 1463 for (k=0; k<bs2; k+=(bs+1),row++) x[row] = aa_j[k]; 1464 break; 1465 } 1466 } 1467 } 1468 ierr = VecRestoreArray(v,&x);CHKERRQ(ierr); 1469 PetscFunctionReturn(0); 1470 } 1471 1472 #undef __FUNCT__ 1473 #define __FUNCT__ "MatDiagonalScale_SeqBAIJ" 1474 PetscErrorCode MatDiagonalScale_SeqBAIJ(Mat A,Vec ll,Vec rr) 1475 { 1476 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1477 PetscScalar *l,*r,x,*li,*ri; 1478 MatScalar *aa,*v; 1479 PetscErrorCode ierr; 1480 PetscInt i,j,k,lm,rn,M,m,n,*ai,*aj,mbs,tmp,bs,bs2; 1481 1482 PetscFunctionBegin; 1483 ai = a->i; 1484 aj = a->j; 1485 aa = a->a; 1486 m = A->rmap->n; 1487 n = A->cmap->n; 1488 bs = A->rmap->bs; 1489 mbs = a->mbs; 1490 bs2 = a->bs2; 1491 if (ll) { 1492 ierr = VecGetArray(ll,&l);CHKERRQ(ierr); 1493 ierr = VecGetLocalSize(ll,&lm);CHKERRQ(ierr); 1494 if (lm != m) SETERRQ(PETSC_ERR_ARG_SIZ,"Left scaling vector wrong length"); 1495 for (i=0; i<mbs; i++) { /* for each block row */ 1496 M = ai[i+1] - ai[i]; 1497 li = l + i*bs; 1498 v = aa + bs2*ai[i]; 1499 for (j=0; j<M; j++) { /* for each block */ 1500 for (k=0; k<bs2; k++) { 1501 (*v++) *= li[k%bs]; 1502 } 1503 } 1504 } 1505 ierr = VecRestoreArray(ll,&l);CHKERRQ(ierr); 1506 ierr = PetscLogFlops(a->nz);CHKERRQ(ierr); 1507 } 1508 1509 if (rr) { 1510 ierr = VecGetArray(rr,&r);CHKERRQ(ierr); 1511 ierr = VecGetLocalSize(rr,&rn);CHKERRQ(ierr); 1512 if (rn != n) SETERRQ(PETSC_ERR_ARG_SIZ,"Right scaling vector wrong length"); 1513 for (i=0; i<mbs; i++) { /* for each block row */ 1514 M = ai[i+1] - ai[i]; 1515 v = aa + bs2*ai[i]; 1516 for (j=0; j<M; j++) { /* for each block */ 1517 ri = r + bs*aj[ai[i]+j]; 1518 for (k=0; k<bs; k++) { 1519 x = ri[k]; 1520 for (tmp=0; tmp<bs; tmp++) (*v++) *= x; 1521 } 1522 } 1523 } 1524 ierr = VecRestoreArray(rr,&r);CHKERRQ(ierr); 1525 ierr = PetscLogFlops(a->nz);CHKERRQ(ierr); 1526 } 1527 PetscFunctionReturn(0); 1528 } 1529 1530 1531 #undef __FUNCT__ 1532 #define __FUNCT__ "MatGetInfo_SeqBAIJ" 1533 PetscErrorCode MatGetInfo_SeqBAIJ(Mat A,MatInfoType flag,MatInfo *info) 1534 { 1535 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1536 1537 PetscFunctionBegin; 1538 info->block_size = a->bs2; 1539 info->nz_allocated = a->maxnz; 1540 info->nz_used = a->bs2*a->nz; 1541 info->nz_unneeded = (double)(info->nz_allocated - info->nz_used); 1542 info->assemblies = A->num_ass; 1543 info->mallocs = a->reallocs; 1544 info->memory = ((PetscObject)A)->mem; 1545 if (A->factor) { 1546 info->fill_ratio_given = A->info.fill_ratio_given; 1547 info->fill_ratio_needed = A->info.fill_ratio_needed; 1548 info->factor_mallocs = A->info.factor_mallocs; 1549 } else { 1550 info->fill_ratio_given = 0; 1551 info->fill_ratio_needed = 0; 1552 info->factor_mallocs = 0; 1553 } 1554 PetscFunctionReturn(0); 1555 } 1556 1557 1558 #undef __FUNCT__ 1559 #define __FUNCT__ "MatZeroEntries_SeqBAIJ" 1560 PetscErrorCode MatZeroEntries_SeqBAIJ(Mat A) 1561 { 1562 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1563 PetscErrorCode ierr; 1564 1565 PetscFunctionBegin; 1566 ierr = PetscMemzero(a->a,a->bs2*a->i[a->mbs]*sizeof(MatScalar));CHKERRQ(ierr); 1567 PetscFunctionReturn(0); 1568 } 1569