1 /*$Id: sbaij2.c,v 1.3 2000/06/25 17:01:23 balay Exp balay $*/ 2 3 #include "petscsys.h" 4 #include "src/mat/impls/baij/seq/baij.h" 5 #include "src/vec/vecimpl.h" 6 #include "src/inline/spops.h" 7 #include "src/inline/ilu.h" 8 #include "petscbt.h" 9 #include "sbaij.h" 10 11 #undef __FUNC__ 12 #define __FUNC__ "MatIncreaseOverlap_SeqSBAIJ" 13 int MatIncreaseOverlap_SeqSBAIJ(Mat A,int is_max,IS *is,int ov) 14 { 15 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 16 int row,i,j,k,l,m,n,*idx,ierr,*nidx,isz,val,ival; 17 int start,end,*ai,*aj,bs,*nidx2; 18 PetscBT table; 19 20 PetscFunctionBegin; 21 m = a->mbs; 22 ai = a->i; 23 aj = a->j; 24 bs = a->bs; 25 26 if (ov < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative overlap specified"); 27 28 ierr = PetscBTCreate(m,table);CHKERRQ(ierr); 29 nidx = (int*)PetscMalloc((m+1)*sizeof(int));CHKPTRQ(nidx); 30 nidx2 = (int *)PetscMalloc((a->m+1)*sizeof(int));CHKPTRQ(nidx2); 31 32 for (i=0; i<is_max; i++) { 33 /* Initialise the two local arrays */ 34 isz = 0; 35 ierr = PetscBTMemzero(m,table);CHKERRQ(ierr); 36 37 /* Extract the indices, assume there can be duplicate entries */ 38 ierr = ISGetIndices(is[i],&idx);CHKERRQ(ierr); 39 ierr = ISGetSize(is[i],&n);CHKERRQ(ierr); 40 41 /* Enter these into the temp arrays i.e mark table[row], enter row into new index */ 42 for (j=0; j<n ; ++j){ 43 ival = idx[j]/bs; /* convert the indices into block indices */ 44 if (ival>m) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"index greater than mat-dim"); 45 if(!PetscBTLookupSet(table,ival)) { nidx[isz++] = ival;} 46 } 47 ierr = ISRestoreIndices(is[i],&idx);CHKERRQ(ierr); 48 ierr = ISDestroy(is[i]);CHKERRQ(ierr); 49 k = 0; 50 for (j=0; j<ov; j++){ /* for each overlap*/ 51 n = isz; 52 for (; k<n ; k++){ /* do only those rows in nidx[k], which are not done yet */ 53 row = nidx[k]; 54 start = ai[row]; 55 end = ai[row+1]; 56 for (l = start; l<end ; l++){ 57 val = aj[l]; 58 if (!PetscBTLookupSet(table,val)) {nidx[isz++] = val;} 59 } 60 } 61 } 62 /* expand the Index Set */ 63 for (j=0; j<isz; j++) { 64 for (k=0; k<bs; k++) 65 nidx2[j*bs+k] = nidx[j]*bs+k; 66 } 67 ierr = ISCreateGeneral(PETSC_COMM_SELF,isz*bs,nidx2,is+i);CHKERRQ(ierr); 68 } 69 ierr = PetscBTDestroy(table);CHKERRQ(ierr); 70 ierr = PetscFree(nidx);CHKERRQ(ierr); 71 ierr = PetscFree(nidx2);CHKERRQ(ierr); 72 PetscFunctionReturn(0); 73 } 74 75 #undef __FUNC__ 76 #define __FUNC__ "MatGetSubMatrix_SeqSBAIJ_Private" 77 int MatGetSubMatrix_SeqSBAIJ_Private(Mat A,IS isrow,IS iscol,int cs,MatReuse scall,Mat *B) 78 { 79 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data,*c; 80 int *smap,i,k,kstart,kend,ierr,oldcols = a->mbs,*lens; 81 int row,mat_i,*mat_j,tcol,*mat_ilen; 82 int *irow,nrows,*ssmap,bs=a->bs,bs2=a->bs2; 83 int *aj = a->j,*ai = a->i; 84 MatScalar *mat_a; 85 Mat C; 86 PetscTruth flag; 87 88 PetscFunctionBegin; 89 90 if ( isrow != iscol ) SETERRA(1,0,"MatGetSubmatrices_SeqSBAIJ: For symm. format, iscol must equal isro"); 91 ierr = ISSorted(iscol,(PetscTruth*)&i);CHKERRQ(ierr); 92 if (!i) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"IS is not sorted"); 93 94 ierr = ISGetIndices(isrow,&irow);CHKERRQ(ierr); 95 ierr = ISGetSize(isrow,&nrows);CHKERRQ(ierr); 96 97 smap = (int*)PetscMalloc((1+oldcols)*sizeof(int));CHKPTRQ(smap); 98 ssmap = smap; 99 lens = (int*)PetscMalloc((1+nrows)*sizeof(int));CHKPTRQ(lens); 100 ierr = PetscMemzero(smap,oldcols*sizeof(int));CHKERRQ(ierr); 101 for (i=0; i<nrows; i++) smap[irow[i]] = i+1; /* nrows = ncols */ 102 /* determine lens of each row */ 103 for (i=0; i<nrows; i++) { 104 kstart = ai[irow[i]]; 105 kend = kstart + a->ilen[irow[i]]; 106 lens[i] = 0; 107 for (k=kstart; k<kend; k++) { 108 if (ssmap[aj[k]]) { 109 lens[i]++; 110 } 111 } 112 } 113 /* Create and fill new matrix */ 114 if (scall == MAT_REUSE_MATRIX) { 115 c = (Mat_SeqSBAIJ *)((*B)->data); 116 117 if (c->mbs!=nrows || c->bs!=bs) SETERRQ(PETSC_ERR_ARG_SIZ,0,"Submatrix wrong size"); 118 ierr = PetscMemcmp(c->ilen,lens,c->mbs *sizeof(int),&flag);CHKERRQ(ierr); 119 if (flag == PETSC_FALSE) { 120 SETERRQ(PETSC_ERR_ARG_SIZ,0,"Cannot reuse matrix. wrong no of nonzeros"); 121 } 122 ierr = PetscMemzero(c->ilen,c->mbs*sizeof(int));CHKERRQ(ierr); 123 C = *B; 124 } else { 125 ierr = MatCreateSeqSBAIJ(A->comm,bs,nrows*bs,nrows*bs,0,lens,&C);CHKERRQ(ierr); 126 } 127 c = (Mat_SeqSBAIJ *)(C->data); 128 for (i=0; i<nrows; i++) { 129 row = irow[i]; 130 kstart = ai[row]; 131 kend = kstart + a->ilen[row]; 132 mat_i = c->i[i]; 133 mat_j = c->j + mat_i; 134 mat_a = c->a + mat_i*bs2; 135 mat_ilen = c->ilen + i; 136 for (k=kstart; k<kend; k++) { 137 if ((tcol=ssmap[a->j[k]])) { 138 *mat_j++ = tcol - 1; 139 ierr = PetscMemcpy(mat_a,a->a+k*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr); 140 mat_a += bs2; 141 (*mat_ilen)++; 142 } 143 } 144 } 145 146 /* Free work space */ 147 ierr = PetscFree(smap);CHKERRQ(ierr); 148 ierr = PetscFree(lens);CHKERRQ(ierr); 149 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 150 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 151 152 ierr = ISRestoreIndices(isrow,&irow);CHKERRQ(ierr); 153 *B = C; 154 PetscFunctionReturn(0); 155 } 156 157 #undef __FUNC__ 158 #define __FUNC__ "MatGetSubMatrix_SeqSBAIJ" 159 int MatGetSubMatrix_SeqSBAIJ(Mat A,IS isrow,IS iscol,int cs,MatReuse scall,Mat *B) 160 { 161 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 162 IS is1; 163 int *vary,*iary,*irow,nrows,i,ierr,bs=a->bs,count; 164 165 PetscFunctionBegin; 166 if ( isrow != iscol ) SETERRA(1,0,"MatGetSubmatrices_SeqSBAIJ: For symm. format, iscol must equal isro"); 167 168 ierr = ISGetIndices(isrow,&irow);CHKERRQ(ierr); 169 ierr = ISGetSize(isrow,&nrows);CHKERRQ(ierr); 170 171 /* Verify if the indices corespond to each element in a block 172 and form the IS with compressed IS */ 173 vary = (int*)PetscMalloc(2*(a->mbs+1)*sizeof(int));CHKPTRQ(vary); 174 iary = vary + a->mbs; 175 ierr = PetscMemzero(vary,(a->mbs)*sizeof(int));CHKERRQ(ierr); 176 for (i=0; i<nrows; i++) vary[irow[i]/bs]++; 177 178 count = 0; 179 for (i=0; i<a->mbs; i++) { 180 if (vary[i]!=0 && vary[i]!=bs) SETERRA(1,0,"Index set does not match blocks"); 181 if (vary[i]==bs) iary[count++] = i; 182 } 183 ierr = ISCreateGeneral(PETSC_COMM_SELF,count,iary,&is1);CHKERRQ(ierr); 184 185 ierr = ISRestoreIndices(isrow,&irow);CHKERRQ(ierr); 186 ierr = PetscFree(vary);CHKERRQ(ierr); 187 188 ierr = MatGetSubMatrix_SeqSBAIJ_Private(A,is1,is1,cs,scall,B);CHKERRQ(ierr); 189 ISDestroy(is1); 190 PetscFunctionReturn(0); 191 } 192 193 #undef __FUNC__ 194 #define __FUNC__ "MatGetSubMatrices_SeqSBAIJ" 195 int MatGetSubMatrices_SeqSBAIJ(Mat A,int n,IS *irow,IS *icol,MatReuse scall,Mat **B) 196 { 197 int ierr,i; 198 199 PetscFunctionBegin; 200 if (scall == MAT_INITIAL_MATRIX) { 201 *B = (Mat*)PetscMalloc((n+1)*sizeof(Mat));CHKPTRQ(*B); 202 } 203 204 for (i=0; i<n; i++) { 205 ierr = MatGetSubMatrix_SeqSBAIJ(A,irow[i],icol[i],PETSC_DECIDE,scall,&(*B)[i]);CHKERRQ(ierr); 206 } 207 PetscFunctionReturn(0); 208 } 209 210 /* -------------------------------------------------------*/ 211 /* Should check that shapes of vectors and matrices match */ 212 /* -------------------------------------------------------*/ 213 #include "pinclude/blaslapack.h" 214 215 #undef __FUNC__ 216 #define __FUNC__ "MatMult_SeqSBAIJ_1" 217 int MatMult_SeqSBAIJ_1(Mat A,Vec xx,Vec zz) 218 { 219 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 220 Scalar *x,*z,*xb,x1,zero=0.0; 221 MatScalar *v; 222 int mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,ierr,*ib,cval,j; 223 224 PetscFunctionBegin; 225 ierr = VecSet(&zero,zz);CHKERRQ(ierr); 226 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 227 ierr = VecGetArray(zz,&z);CHKERRQ(ierr); 228 229 v = a->a; 230 xb = x; 231 232 for (i=0; i<mbs; i++) { 233 n = ai[1] - ai[0]; /* length of i_th row of A */ 234 x1 = xb[0]; 235 ib = aj + *ai; 236 z[i] += *v++ * x[*ib++]; /* (diag of A)*x */ 237 for (j=1; j<n; j++) { 238 cval = *ib; 239 z[cval] += *v * x1; /* (strict lower triangular part of A)*x */ 240 z[i] += *v++ * x[*ib++]; /* (strict upper triangular part of A)*x */ 241 } 242 xb++; ai++; 243 } 244 245 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 246 ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr); 247 PLogFlops(2*(a->s_nz*2 - a->m) - a->m); /* s_nz = (nz+m)/2 */ 248 PetscFunctionReturn(0); 249 } 250 251 #undef __FUNC__ 252 #define __FUNC__ "MatMult_SeqSBAIJ_2" 253 int MatMult_SeqSBAIJ_2(Mat A,Vec xx,Vec zz) 254 { 255 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 256 Scalar *x,*z,*xb,x1,x2,zero=0.0; 257 MatScalar *v; 258 int mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,ierr,*ib,cval,j; 259 260 261 PetscFunctionBegin; 262 ierr = VecSet(&zero,zz);CHKERRQ(ierr); 263 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 264 ierr = VecGetArray(zz,&z);CHKERRQ(ierr); 265 266 v = a->a; 267 xb = x; 268 269 for (i=0; i<mbs; i++) { 270 n = ai[1] - ai[0]; /* length of i_th block row of A */ 271 x1 = xb[0]; x2 = xb[1]; 272 ib = aj + *ai; 273 /* (diag of A)*x */ 274 z[2*i] += v[0]*x1 + v[2]*x2; 275 z[2*i+1] += v[2]*x1 + v[3]*x2; 276 v += 4; 277 for (j=1; j<n; j++) { 278 /* (strict lower triangular part of A)*x */ 279 cval = ib[j]*2; 280 z[cval] += v[0]*x1 + v[1]*x2; 281 z[cval+1] += v[2]*x1 + v[3]*x2; 282 /* (strict upper triangular part of A)*x */ 283 z[2*i] += v[0]*x[cval] + v[2]*x[cval+1]; 284 z[2*i+1] += v[1]*x[cval] + v[3]*x[cval+1]; 285 v += 4; 286 } 287 xb +=2; ai++; 288 } 289 290 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 291 ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr); 292 PLogFlops(8*(a->s_nz*2 - a->m) - a->m); 293 PetscFunctionReturn(0); 294 } 295 296 #undef __FUNC__ 297 #define __FUNC__ "MatMult_SeqSBAIJ_3" 298 int MatMult_SeqSBAIJ_3(Mat A,Vec xx,Vec zz) 299 { 300 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 301 Scalar *x,*z,*xb,x1,x2,x3,zero=0.0; 302 MatScalar *v; 303 int mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,ierr,*ib,cval,j; 304 305 306 PetscFunctionBegin; 307 ierr = VecSet(&zero,zz);CHKERRQ(ierr); 308 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 309 ierr = VecGetArray(zz,&z);CHKERRQ(ierr); 310 311 v = a->a; 312 xb = x; 313 314 for (i=0; i<mbs; i++) { 315 n = ai[1] - ai[0]; /* length of i_th block row of A */ 316 x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; 317 ib = aj + *ai; 318 /* (diag of A)*x */ 319 z[3*i] += v[0]*x1 + v[3]*x2 + v[6]*x3; 320 z[3*i+1] += v[3]*x1 + v[4]*x2 + v[7]*x3; 321 z[3*i+2] += v[6]*x1 + v[7]*x2 + v[8]*x3; 322 v += 9; 323 for (j=1; j<n; j++) { 324 /* (strict lower triangular part of A)*x */ 325 cval = ib[j]*3; 326 z[cval] += v[0]*x1 + v[1]*x2 + v[2]*x3; 327 z[cval+1] += v[3]*x1 + v[4]*x2 + v[5]*x3; 328 z[cval+2] += v[6]*x1 + v[7]*x2 + v[8]*x3; 329 /* (strict upper triangular part of A)*x */ 330 z[3*i] += v[0]*x[cval] + v[3]*x[cval+1]+ v[6]*x[cval+2]; 331 z[3*i+1] += v[1]*x[cval] + v[4]*x[cval+1]+ v[7]*x[cval+2]; 332 z[3*i+2] += v[2]*x[cval] + v[5]*x[cval+1]+ v[8]*x[cval+2]; 333 v += 9; 334 } 335 xb +=3; ai++; 336 } 337 338 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 339 ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr); 340 PLogFlops(18*(a->s_nz*2 - a->m) - a->m); 341 PetscFunctionReturn(0); 342 } 343 344 #undef __FUNC__ 345 #define __FUNC__ "MatMult_SeqSBAIJ_4" 346 int MatMult_SeqSBAIJ_4(Mat A,Vec xx,Vec zz) 347 { 348 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 349 Scalar *x,*z,*xb,x1,x2,x3,x4,zero=0.0; 350 MatScalar *v; 351 int mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,ierr,*ib,cval,j; 352 353 PetscFunctionBegin; 354 ierr = VecSet(&zero,zz);CHKERRQ(ierr); 355 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 356 ierr = VecGetArray(zz,&z);CHKERRQ(ierr); 357 358 v = a->a; 359 xb = x; 360 361 for (i=0; i<mbs; i++) { 362 n = ai[1] - ai[0]; /* length of i_th block row of A */ 363 x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; 364 ib = aj + *ai; 365 /* (diag of A)*x */ 366 z[4*i] += v[0]*x1 + v[4]*x2 + v[8]*x3 + v[12]*x4; 367 z[4*i+1] += v[4]*x1 + v[5]*x2 + v[9]*x3 + v[13]*x4; 368 z[4*i+2] += v[8]*x1 + v[9]*x2 + v[10]*x3 + v[14]*x4; 369 z[4*i+3] += v[12]*x1+ v[13]*x2+ v[14]*x3 + v[15]*x4; 370 v += 16; 371 for (j=1; j<n; j++) { 372 /* (strict lower triangular part of A)*x */ 373 cval = ib[j]*4; 374 z[cval] += v[0]*x1 + v[1]*x2 + v[2]*x3 + v[3]*x4; 375 z[cval+1] += v[4]*x1 + v[5]*x2 + v[6]*x3 + v[7]*x4; 376 z[cval+2] += v[8]*x1 + v[9]*x2 + v[10]*x3 + v[11]*x4; 377 z[cval+3] += v[12]*x1 + v[13]*x2 + v[14]*x3 + v[15]*x4; 378 /* (strict upper triangular part of A)*x */ 379 z[4*i] += v[0]*x[cval] + v[4]*x[cval+1]+ v[8]*x[cval+2] + v[12]*x[cval+3]; 380 z[4*i+1] += v[1]*x[cval] + v[5]*x[cval+1]+ v[9]*x[cval+2] + v[13]*x[cval+3]; 381 z[4*i+2] += v[2]*x[cval] + v[6]*x[cval+1]+ v[10]*x[cval+2]+ v[14]*x[cval+3]; 382 z[4*i+3] += v[3]*x[cval] + v[7]*x[cval+1]+ v[11]*x[cval+2]+ v[15]*x[cval+3]; 383 v += 16; 384 } 385 xb +=4; ai++; 386 } 387 388 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 389 ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr); 390 PLogFlops(32*(a->s_nz*2 - a->m) - a->m); 391 PetscFunctionReturn(0); 392 } 393 394 #undef __FUNC__ 395 #define __FUNC__ "MatMult_SeqSBAIJ_5" 396 int MatMult_SeqSBAIJ_5(Mat A,Vec xx,Vec zz) 397 { 398 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 399 Scalar *x,*z,*xb,x1,x2,x3,x4,x5,zero=0.0; 400 MatScalar *v; 401 int mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,ierr,*ib,cval,j; 402 403 PetscFunctionBegin; 404 ierr = VecSet(&zero,zz);CHKERRQ(ierr); 405 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 406 ierr = VecGetArray(zz,&z);CHKERRQ(ierr); 407 408 v = a->a; 409 xb = x; 410 411 for (i=0; i<mbs; i++) { 412 n = ai[1] - ai[0]; /* length of i_th block row of A */ 413 x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5=xb[4]; 414 ib = aj + *ai; 415 /* (diag of A)*x */ 416 z[5*i] += v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4+ v[20]*x5; 417 z[5*i+1] += v[5]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4+ v[21]*x5; 418 z[5*i+2] += v[10]*x1 +v[11]*x2 + v[12]*x3 + v[17]*x4+ v[22]*x5; 419 z[5*i+3] += v[15]*x1 +v[16]*x2 + v[17]*x3 + v[18]*x4+ v[23]*x5; 420 z[5*i+4] += v[20]*x1 +v[21]*x2 + v[22]*x3 + v[23]*x4+ v[24]*x5; 421 v += 25; 422 for (j=1; j<n; j++) { 423 /* (strict lower triangular part of A)*x */ 424 cval = ib[j]*5; 425 z[cval] += v[0]*x1 + v[1]*x2 + v[2]*x3 + v[3]*x4 + v[4]*x5; 426 z[cval+1] += v[5]*x1 + v[6]*x2 + v[7]*x3 + v[8]*x4 + v[9]*x5; 427 z[cval+2] += v[10]*x1 + v[11]*x2 + v[12]*x3 + v[13]*x4+ v[14]*x5; 428 z[cval+3] += v[15]*x1 + v[16]*x2 + v[17]*x3 + v[18]*x4+ v[19]*x5; 429 z[cval+4] += v[20]*x1 + v[21]*x2 + v[22]*x3 + v[23]*x4+ v[24]*x5; 430 /* (strict upper triangular part of A)*x */ 431 z[5*i] +=v[0]*x[cval]+v[5]*x[cval+1]+v[10]*x[cval+2]+v[15]*x[cval+3]+v[20]*x[cval+4]; 432 z[5*i+1] +=v[1]*x[cval]+v[6]*x[cval+1]+v[11]*x[cval+2]+v[16]*x[cval+3]+v[21]*x[cval+4]; 433 z[5*i+2] +=v[2]*x[cval]+v[7]*x[cval+1]+v[12]*x[cval+2]+v[17]*x[cval+3]+v[22]*x[cval+4]; 434 z[5*i+3] +=v[3]*x[cval]+v[8]*x[cval+1]+v[13]*x[cval+2]+v[18]*x[cval+3]+v[23]*x[cval+4]; 435 z[5*i+4] +=v[4]*x[cval]+v[9]*x[cval+1]+v[14]*x[cval+2]+v[19]*x[cval+3]+v[24]*x[cval+4]; 436 v += 25; 437 } 438 xb +=5; ai++; 439 } 440 441 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 442 ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr); 443 PLogFlops(50*(a->s_nz*2 - a->m) - a->m); 444 PetscFunctionReturn(0); 445 } 446 447 448 #undef __FUNC__ 449 #define __FUNC__ "MatMult_SeqSBAIJ_6" 450 int MatMult_SeqSBAIJ_6(Mat A,Vec xx,Vec zz) 451 { 452 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 453 Scalar *x,*z,*xb,x1,x2,x3,x4,x5,x6,zero=0.0; 454 MatScalar *v; 455 int mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,ierr,*ib,cval,j; 456 457 PetscFunctionBegin; 458 ierr = VecSet(&zero,zz);CHKERRQ(ierr); 459 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 460 ierr = VecGetArray(zz,&z);CHKERRQ(ierr); 461 462 v = a->a; 463 xb = x; 464 465 for (i=0; i<mbs; i++) { 466 n = ai[1] - ai[0]; /* length of i_th block row of A */ 467 x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5=xb[4]; x6=xb[5]; 468 ib = aj + *ai; 469 /* (diag of A)*x */ 470 z[6*i] += v[0]*x1 + v[6]*x2 + v[12]*x3 + v[18]*x4+ v[24]*x5 + v[30]*x6; 471 z[6*i+1] += v[6]*x1 + v[7]*x2 + v[13]*x3 + v[19]*x4+ v[25]*x5 + v[31]*x6; 472 z[6*i+2] += v[12]*x1 +v[13]*x2 + v[14]*x3 + v[20]*x4+ v[26]*x5 + v[32]*x6; 473 z[6*i+3] += v[18]*x1 +v[19]*x2 + v[20]*x3 + v[21]*x4+ v[27]*x5 + v[33]*x6; 474 z[6*i+4] += v[24]*x1 +v[25]*x2 + v[26]*x3 + v[27]*x4+ v[28]*x5 + v[34]*x6; 475 z[6*i+5] += v[30]*x1 +v[31]*x2 + v[32]*x3 + v[33]*x4+ v[34]*x5 + v[35]*x6; 476 v += 36; 477 for (j=1; j<n; j++) { 478 /* (strict lower triangular part of A)*x */ 479 cval = ib[j]*6; 480 z[cval] += v[0]*x1 + v[1]*x2 + v[2]*x3 + v[3]*x4+ v[4]*x5 + v[5]*x6; 481 z[cval+1] += v[6]*x1 + v[7]*x2 + v[8]*x3 + v[9]*x4+ v[10]*x5 + v[11]*x6; 482 z[cval+2] += v[12]*x1 + v[13]*x2 + v[14]*x3 + v[15]*x4+ v[16]*x5 + v[17]*x6; 483 z[cval+3] += v[18]*x1 + v[19]*x2 + v[20]*x3 + v[21]*x4+ v[22]*x5 + v[23]*x6; 484 z[cval+4] += v[24]*x1 + v[25]*x2 + v[26]*x3 + v[27]*x4+ v[28]*x5 + v[29]*x6; 485 z[cval+5] += v[30]*x1 + v[31]*x2 + v[32]*x3 + v[33]*x4+ v[34]*x5 + v[35]*x6; 486 /* (strict upper triangular part of A)*x */ 487 z[6*i] +=v[0]*x[cval]+v[6]*x[cval+1]+v[12]*x[cval+2]+v[18]*x[cval+3]+v[24]*x[cval+4]+v[30]*x[cval+5]; 488 z[6*i+1] +=v[1]*x[cval]+v[7]*x[cval+1]+v[13]*x[cval+2]+v[19]*x[cval+3]+v[25]*x[cval+4]+v[31]*x[cval+5]; 489 z[6*i+2] +=v[2]*x[cval]+v[8]*x[cval+1]+v[14]*x[cval+2]+v[20]*x[cval+3]+v[26]*x[cval+4]+v[32]*x[cval+5]; 490 z[6*i+3] +=v[3]*x[cval]+v[9]*x[cval+1]+v[15]*x[cval+2]+v[21]*x[cval+3]+v[27]*x[cval+4]+v[33]*x[cval+5]; 491 z[6*i+4] +=v[4]*x[cval]+v[10]*x[cval+1]+v[16]*x[cval+2]+v[22]*x[cval+3]+v[28]*x[cval+4]+v[34]*x[cval+5]; 492 z[6*i+5] +=v[5]*x[cval]+v[11]*x[cval+1]+v[17]*x[cval+2]+v[23]*x[cval+3]+v[29]*x[cval+4]+v[35]*x[cval+5]; 493 v += 36; 494 } 495 xb +=6; ai++; 496 } 497 498 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 499 ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr); 500 PLogFlops(72*(a->s_nz*2 - a->m) - a->m); 501 PetscFunctionReturn(0); 502 } 503 #undef __FUNC__ 504 #define __FUNC__ "MatMult_SeqSBAIJ_7" 505 int MatMult_SeqSBAIJ_7(Mat A,Vec xx,Vec zz) 506 { 507 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 508 Scalar *x,*z,*xb,x1,x2,x3,x4,x5,x6,x7,zero=0.0; 509 MatScalar *v; 510 int mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,ierr,*ib,cval,j; 511 512 PetscFunctionBegin; 513 ierr = VecSet(&zero,zz);CHKERRQ(ierr); 514 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 515 ierr = VecGetArray(zz,&z);CHKERRQ(ierr); 516 517 v = a->a; 518 xb = x; 519 520 for (i=0; i<mbs; i++) { 521 n = ai[1] - ai[0]; /* length of i_th block row of A */ 522 x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5=xb[4]; x6=xb[5]; x7=xb[6]; 523 ib = aj + *ai; 524 /* (diag of A)*x */ 525 z[7*i] += v[0]*x1 + v[7]*x2 + v[14]*x3 + v[21]*x4+ v[28]*x5 + v[35]*x6+ v[42]*x7; 526 z[7*i+1] += v[7]*x1 + v[8]*x2 + v[15]*x3 + v[22]*x4+ v[29]*x5 + v[36]*x6+ v[43]*x7; 527 z[7*i+2] += v[14]*x1+ v[15]*x2 +v[16]*x3 + v[23]*x4+ v[30]*x5 + v[37]*x6+ v[44]*x7; 528 z[7*i+3] += v[21]*x1+ v[22]*x2 +v[23]*x3 + v[24]*x4+ v[31]*x5 + v[38]*x6+ v[45]*x7; 529 z[7*i+4] += v[28]*x1+ v[29]*x2 +v[30]*x3 + v[31]*x4+ v[32]*x5 + v[39]*x6+ v[46]*x7; 530 z[7*i+5] += v[35]*x1+ v[36]*x2 +v[37]*x3 + v[38]*x4+ v[39]*x5 + v[40]*x6+ v[47]*x7; 531 z[7*i+6] += v[42]*x1+ v[43]*x2 +v[44]*x3 + v[45]*x4+ v[46]*x5 + v[47]*x6+ v[48]*x7; 532 v += 49; 533 for (j=1; j<n; j++) { 534 /* (strict lower triangular part of A)*x */ 535 cval = ib[j]*7; 536 z[cval] += v[0]*x1 + v[1]*x2 + v[2]*x3 + v[3]*x4+ v[4]*x5 + v[5]*x6+ v[6]*x7; 537 z[cval+1] += v[7]*x1 + v[8]*x2 + v[9]*x3 + v[10]*x4+ v[11]*x5 + v[12]*x6+ v[13]*x7; 538 z[cval+2] += v[14]*x1 + v[15]*x2 + v[16]*x3 + v[17]*x4+ v[18]*x5 + v[19]*x6+ v[20]*x7; 539 z[cval+3] += v[21]*x1 + v[22]*x2 + v[23]*x3 + v[24]*x4+ v[25]*x5 + v[26]*x6+ v[27]*x7; 540 z[cval+4] += v[28]*x1 + v[29]*x2 + v[30]*x3 + v[31]*x4+ v[32]*x5 + v[33]*x6+ v[34]*x7; 541 z[cval+5] += v[35]*x1 + v[36]*x2 + v[37]*x3 + v[38]*x4+ v[39]*x5 + v[40]*x6+ v[41]*x7; 542 z[cval+6] += v[42]*x1 + v[43]*x2 + v[44]*x3 + v[45]*x4+ v[46]*x5 + v[47]*x6+ v[48]*x7; 543 /* (strict upper triangular part of A)*x */ 544 z[7*i] +=v[0]*x[cval]+v[7]*x[cval+1]+v[14]*x[cval+2]+v[21]*x[cval+3]+v[28]*x[cval+4]+v[35]*x[cval+5]+v[42]*x[cval+6]; 545 z[7*i+1]+=v[1]*x[cval]+v[8]*x[cval+1]+v[15]*x[cval+2]+v[22]*x[cval+3]+v[29]*x[cval+4]+v[36]*x[cval+5]+v[43]*x[cval+6]; 546 z[7*i+2]+=v[2]*x[cval]+v[9]*x[cval+1]+v[16]*x[cval+2]+v[23]*x[cval+3]+v[30]*x[cval+4]+v[37]*x[cval+5]+v[44]*x[cval+6]; 547 z[7*i+3]+=v[3]*x[cval]+v[10]*x[cval+1]+v[17]*x[cval+2]+v[24]*x[cval+3]+v[31]*x[cval+4]+v[38]*x[cval+5]+v[45]*x[cval+6]; 548 z[7*i+4]+=v[4]*x[cval]+v[11]*x[cval+1]+v[18]*x[cval+2]+v[25]*x[cval+3]+v[32]*x[cval+4]+v[39]*x[cval+5]+v[46]*x[cval+6]; 549 z[7*i+5]+=v[5]*x[cval]+v[12]*x[cval+1]+v[19]*x[cval+2]+v[26]*x[cval+3]+v[33]*x[cval+4]+v[40]*x[cval+5]+v[47]*x[cval+6]; 550 z[7*i+6]+=v[6]*x[cval]+v[13]*x[cval+1]+v[20]*x[cval+2]+v[27]*x[cval+3]+v[34]*x[cval+4]+v[41]*x[cval+5]+v[48]*x[cval+6]; 551 v += 49; 552 } 553 xb +=7; ai++; 554 } 555 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 556 ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr); 557 PLogFlops(98*(a->s_nz*2 - a->m) - a->m); 558 PetscFunctionReturn(0); 559 } 560 561 /* 562 This will not work with MatScalar == float because it calls the BLAS 563 */ 564 #undef __FUNC__ 565 #define __FUNC__ "MatMult_SeqSBAIJ_N" 566 int MatMult_SeqSBAIJ_N(Mat A,Vec xx,Vec zz) 567 { 568 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 569 Scalar *x,*x_ptr,*z,*z_ptr,*xb,*zb,*work,*workt,zero=0.0; 570 MatScalar *v; 571 int ierr,mbs=a->mbs,i,*idx,*aj,*ii,bs=a->bs,j,n,bs2=a->bs2; 572 int ncols,k; 573 574 PetscFunctionBegin; 575 ierr = VecSet(&zero,zz);CHKERRQ(ierr); 576 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); x_ptr=x; 577 ierr = VecGetArray(zz,&z);CHKERRQ(ierr); z_ptr=z; 578 579 aj = a->j; 580 v = a->a; 581 ii = a->i; 582 583 if (!a->mult_work) { 584 k = a->m; 585 a->mult_work = (Scalar*)PetscMalloc((k+1)*sizeof(Scalar));CHKPTRQ(a->mult_work); 586 } 587 work = a->mult_work; 588 589 for (i=0; i<mbs; i++) { 590 n = ii[1] - ii[0]; ncols = n*bs; 591 workt = work; idx=aj+ii[0]; 592 593 /* upper triangular part */ 594 for (j=0; j<n; j++) { 595 /* col = bs*(*idx); */ 596 xb = x_ptr + bs*(*idx++); 597 for (k=0; k<bs; k++) workt[k] = xb[k]; 598 workt += bs; 599 } 600 /* z(i*bs:(i+1)*bs-1) += A(i,:)*x */ 601 Kernel_w_gets_w_plus_Ar_times_v(bs,ncols,work,v,z); 602 603 /* strict lower triangular part */ 604 ncols -= bs; 605 if (ncols > 0){ 606 workt = work; 607 ierr = PetscMemzero(workt,ncols*sizeof(Scalar));CHKERRQ(ierr); 608 Kernel_w_gets_w_plus_trans_Ar_times_v(bs,ncols,x,v+bs2,workt); 609 610 idx=aj+ii[0]+1; 611 for (j=1; j<n; j++) { 612 zb = z_ptr + bs*(*idx); 613 idx++; 614 for (k=0; k<bs; k++) zb[k] += workt[k] ; 615 workt += bs; 616 } 617 } 618 619 x += bs; v += n*bs2; z += bs; ii++; 620 } 621 622 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 623 ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr); 624 PLogFlops(2*(a->s_nz*2 - a->m)*bs2 - a->m); 625 PetscFunctionReturn(0); 626 } 627 628 #undef __FUNC__ 629 #define __FUNC__ "MatMultAdd_SeqSBAIJ_1" 630 int MatMultAdd_SeqSBAIJ_1(Mat A,Vec xx,Vec yy,Vec zz) 631 { 632 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 633 Scalar *x,*y,*z,*xb,x1; 634 MatScalar *v; 635 int mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,ierr,*ib,cval,j; 636 637 PetscFunctionBegin; 638 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 639 if (yy != xx) { 640 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 641 } else { 642 y = x; 643 } 644 if (zz != yy) { 645 ierr = VecCopy(yy,zz); CHKERRQ(ierr); 646 ierr = VecGetArray(zz,&z);CHKERRQ(ierr); 647 } else { 648 z = y; 649 } 650 651 v = a->a; 652 xb = x; 653 654 for (i=0; i<mbs; i++) { 655 n = ai[1] - ai[0]; /* length of i_th row of A */ 656 x1 = xb[0]; 657 ib = aj + *ai; 658 z[i] += *v++ * x[*ib++]; /* (diag of A)*x */ 659 for (j=1; j<n; j++) { 660 cval = *ib; 661 z[cval] += *v * x1; /* (strict lower triangular part of A)*x */ 662 z[i] += *v++ * x[*ib++]; /* (strict upper triangular part of A)*x */ 663 } 664 xb++; ai++; 665 } 666 667 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 668 if (yy != xx) ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 669 if (zz != yy) ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr); 670 671 PLogFlops(2*(a->s_nz*2 - a->m)); 672 PetscFunctionReturn(0); 673 } 674 675 #undef __FUNC__ 676 #define __FUNC__ "MatMultAdd_SeqSBAIJ_2" 677 int MatMultAdd_SeqSBAIJ_2(Mat A,Vec xx,Vec yy,Vec zz) 678 { 679 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 680 Scalar *x,*y,*z,*xb,x1,x2; 681 MatScalar *v; 682 int mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,ierr,*ib,cval,j; 683 684 PetscFunctionBegin; 685 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 686 if (yy != xx) { 687 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 688 } else { 689 y = x; 690 } 691 if (zz != yy) { 692 ierr = VecCopy(yy,zz); CHKERRQ(ierr); 693 ierr = VecGetArray(zz,&z);CHKERRQ(ierr); 694 } else { 695 z = y; 696 } 697 698 v = a->a; 699 xb = x; 700 701 for (i=0; i<mbs; i++) { 702 n = ai[1] - ai[0]; /* length of i_th block row of A */ 703 x1 = xb[0]; x2 = xb[1]; 704 ib = aj + *ai; 705 /* (diag of A)*x */ 706 z[2*i] += v[0]*x1 + v[2]*x2; 707 z[2*i+1] += v[2]*x1 + v[3]*x2; 708 v += 4; 709 for (j=1; j<n; j++) { 710 /* (strict lower triangular part of A)*x */ 711 cval = ib[j]*2; 712 z[cval] += v[0]*x1 + v[1]*x2; 713 z[cval+1] += v[2]*x1 + v[3]*x2; 714 /* (strict upper triangular part of A)*x */ 715 z[2*i] += v[0]*x[cval] + v[2]*x[cval+1]; 716 z[2*i+1] += v[1]*x[cval] + v[3]*x[cval+1]; 717 v += 4; 718 } 719 xb +=2; ai++; 720 } 721 722 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 723 if (yy != xx) ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 724 if (zz != yy) ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr); 725 726 PLogFlops(4*(a->s_nz*2 - a->m)); 727 PetscFunctionReturn(0); 728 } 729 730 #undef __FUNC__ 731 #define __FUNC__ "MatMultAdd_SeqSBAIJ_3" 732 int MatMultAdd_SeqSBAIJ_3(Mat A,Vec xx,Vec yy,Vec zz) 733 { 734 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 735 Scalar *x,*y,*z,*xb,x1,x2,x3; 736 MatScalar *v; 737 int mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,ierr,*ib,cval,j; 738 739 PetscFunctionBegin; 740 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 741 if (yy != xx) { 742 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 743 } else { 744 y = x; 745 } 746 if (zz != yy) { 747 ierr = VecCopy(yy,zz); CHKERRQ(ierr); 748 ierr = VecGetArray(zz,&z);CHKERRQ(ierr); 749 } else { 750 z = y; 751 } 752 753 v = a->a; 754 xb = x; 755 756 for (i=0; i<mbs; i++) { 757 n = ai[1] - ai[0]; /* length of i_th block row of A */ 758 x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; 759 ib = aj + *ai; 760 /* (diag of A)*x */ 761 z[3*i] += v[0]*x1 + v[3]*x2 + v[6]*x3; 762 z[3*i+1] += v[3]*x1 + v[4]*x2 + v[7]*x3; 763 z[3*i+2] += v[6]*x1 + v[7]*x2 + v[8]*x3; 764 v += 9; 765 for (j=1; j<n; j++) { 766 /* (strict lower triangular part of A)*x */ 767 cval = ib[j]*3; 768 z[cval] += v[0]*x1 + v[1]*x2 + v[2]*x3; 769 z[cval+1] += v[3]*x1 + v[4]*x2 + v[5]*x3; 770 z[cval+2] += v[6]*x1 + v[7]*x2 + v[8]*x3; 771 /* (strict upper triangular part of A)*x */ 772 z[3*i] += v[0]*x[cval] + v[3]*x[cval+1]+ v[6]*x[cval+2]; 773 z[3*i+1] += v[1]*x[cval] + v[4]*x[cval+1]+ v[7]*x[cval+2]; 774 z[3*i+2] += v[2]*x[cval] + v[5]*x[cval+1]+ v[8]*x[cval+2]; 775 v += 9; 776 } 777 xb +=3; ai++; 778 } 779 780 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 781 if (yy != xx) ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 782 if (zz != yy) ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr); 783 784 PLogFlops(18*(a->s_nz*2 - a->m)); 785 PetscFunctionReturn(0); 786 } 787 788 #undef __FUNC__ 789 #define __FUNC__ "MatMultAdd_SeqSBAIJ_4" 790 int MatMultAdd_SeqSBAIJ_4(Mat A,Vec xx,Vec yy,Vec zz) 791 { 792 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 793 Scalar *x,*y,*z,*xb,x1,x2,x3,x4; 794 MatScalar *v; 795 int mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,ierr,*ib,cval,j; 796 797 PetscFunctionBegin; 798 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 799 if (yy != xx) { 800 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 801 } else { 802 y = x; 803 } 804 if (zz != yy) { 805 ierr = VecCopy(yy,zz); CHKERRQ(ierr); 806 ierr = VecGetArray(zz,&z);CHKERRQ(ierr); 807 } else { 808 z = y; 809 } 810 811 v = a->a; 812 xb = x; 813 814 for (i=0; i<mbs; i++) { 815 n = ai[1] - ai[0]; /* length of i_th block row of A */ 816 x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; 817 ib = aj + *ai; 818 /* (diag of A)*x */ 819 z[4*i] += v[0]*x1 + v[4]*x2 + v[8]*x3 + v[12]*x4; 820 z[4*i+1] += v[4]*x1 + v[5]*x2 + v[9]*x3 + v[13]*x4; 821 z[4*i+2] += v[8]*x1 + v[9]*x2 + v[10]*x3 + v[14]*x4; 822 z[4*i+3] += v[12]*x1+ v[13]*x2+ v[14]*x3 + v[15]*x4; 823 v += 16; 824 for (j=1; j<n; j++) { 825 /* (strict lower triangular part of A)*x */ 826 cval = ib[j]*4; 827 z[cval] += v[0]*x1 + v[1]*x2 + v[2]*x3 + v[3]*x4; 828 z[cval+1] += v[4]*x1 + v[5]*x2 + v[6]*x3 + v[7]*x4; 829 z[cval+2] += v[8]*x1 + v[9]*x2 + v[10]*x3 + v[11]*x4; 830 z[cval+3] += v[12]*x1 + v[13]*x2 + v[14]*x3 + v[15]*x4; 831 /* (strict upper triangular part of A)*x */ 832 z[4*i] += v[0]*x[cval] + v[4]*x[cval+1]+ v[8]*x[cval+2] + v[12]*x[cval+3]; 833 z[4*i+1] += v[1]*x[cval] + v[5]*x[cval+1]+ v[9]*x[cval+2] + v[13]*x[cval+3]; 834 z[4*i+2] += v[2]*x[cval] + v[6]*x[cval+1]+ v[10]*x[cval+2]+ v[14]*x[cval+3]; 835 z[4*i+3] += v[3]*x[cval] + v[7]*x[cval+1]+ v[11]*x[cval+2]+ v[15]*x[cval+3]; 836 v += 16; 837 } 838 xb +=4; ai++; 839 } 840 841 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 842 if (yy != xx) ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 843 if (zz != yy) ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr); 844 845 PLogFlops(32*(a->s_nz*2 - a->m)); 846 PetscFunctionReturn(0); 847 } 848 849 #undef __FUNC__ 850 #define __FUNC__ "MatMultAdd_SeqSBAIJ_5" 851 int MatMultAdd_SeqSBAIJ_5(Mat A,Vec xx,Vec yy,Vec zz) 852 { 853 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 854 Scalar *x,*y,*z,*xb,x1,x2,x3,x4,x5; 855 MatScalar *v; 856 int mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,ierr,*ib,cval,j; 857 858 PetscFunctionBegin; 859 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 860 if (yy != xx) { 861 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 862 } else { 863 y = x; 864 } 865 if (zz != yy) { 866 ierr = VecCopy(yy,zz); CHKERRQ(ierr); 867 ierr = VecGetArray(zz,&z);CHKERRQ(ierr); 868 } else { 869 z = y; 870 } 871 872 v = a->a; 873 xb = x; 874 875 for (i=0; i<mbs; i++) { 876 n = ai[1] - ai[0]; /* length of i_th block row of A */ 877 x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5=xb[4]; 878 ib = aj + *ai; 879 /* (diag of A)*x */ 880 z[5*i] += v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4+ v[20]*x5; 881 z[5*i+1] += v[5]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4+ v[21]*x5; 882 z[5*i+2] += v[10]*x1 +v[11]*x2 + v[12]*x3 + v[17]*x4+ v[22]*x5; 883 z[5*i+3] += v[15]*x1 +v[16]*x2 + v[17]*x3 + v[18]*x4+ v[23]*x5; 884 z[5*i+4] += v[20]*x1 +v[21]*x2 + v[22]*x3 + v[23]*x4+ v[24]*x5; 885 v += 25; 886 for (j=1; j<n; j++) { 887 /* (strict lower triangular part of A)*x */ 888 cval = ib[j]*5; 889 z[cval] += v[0]*x1 + v[1]*x2 + v[2]*x3 + v[3]*x4 + v[4]*x5; 890 z[cval+1] += v[5]*x1 + v[6]*x2 + v[7]*x3 + v[8]*x4 + v[9]*x5; 891 z[cval+2] += v[10]*x1 + v[11]*x2 + v[12]*x3 + v[13]*x4+ v[14]*x5; 892 z[cval+3] += v[15]*x1 + v[16]*x2 + v[17]*x3 + v[18]*x4+ v[19]*x5; 893 z[cval+4] += v[20]*x1 + v[21]*x2 + v[22]*x3 + v[23]*x4+ v[24]*x5; 894 /* (strict upper triangular part of A)*x */ 895 z[5*i] +=v[0]*x[cval]+v[5]*x[cval+1]+v[10]*x[cval+2]+v[15]*x[cval+3]+v[20]*x[cval+4]; 896 z[5*i+1] +=v[1]*x[cval]+v[6]*x[cval+1]+v[11]*x[cval+2]+v[16]*x[cval+3]+v[21]*x[cval+4]; 897 z[5*i+2] +=v[2]*x[cval]+v[7]*x[cval+1]+v[12]*x[cval+2]+v[17]*x[cval+3]+v[22]*x[cval+4]; 898 z[5*i+3] +=v[3]*x[cval]+v[8]*x[cval+1]+v[13]*x[cval+2]+v[18]*x[cval+3]+v[23]*x[cval+4]; 899 z[5*i+4] +=v[4]*x[cval]+v[9]*x[cval+1]+v[14]*x[cval+2]+v[19]*x[cval+3]+v[24]*x[cval+4]; 900 v += 25; 901 } 902 xb +=5; ai++; 903 } 904 905 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 906 if (yy != xx) ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 907 if (zz != yy) ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr); 908 909 PLogFlops(50*(a->s_nz*2 - a->m)); 910 PetscFunctionReturn(0); 911 } 912 #undef __FUNC__ 913 #define __FUNC__ "MatMultAdd_SeqSBAIJ_6" 914 int MatMultAdd_SeqSBAIJ_6(Mat A,Vec xx,Vec yy,Vec zz) 915 { 916 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 917 Scalar *x,*y,*z,*xb,x1,x2,x3,x4,x5,x6; 918 MatScalar *v; 919 int mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,ierr,*ib,cval,j; 920 921 PetscFunctionBegin; 922 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 923 if (yy != xx) { 924 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 925 } else { 926 y = x; 927 } 928 if (zz != yy) { 929 ierr = VecCopy(yy,zz); CHKERRQ(ierr); 930 ierr = VecGetArray(zz,&z);CHKERRQ(ierr); 931 } else { 932 z = y; 933 } 934 935 v = a->a; 936 xb = x; 937 938 for (i=0; i<mbs; i++) { 939 n = ai[1] - ai[0]; /* length of i_th block row of A */ 940 x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5=xb[4]; x6=xb[5]; 941 ib = aj + *ai; 942 /* (diag of A)*x */ 943 z[6*i] += v[0]*x1 + v[6]*x2 + v[12]*x3 + v[18]*x4+ v[24]*x5 + v[30]*x6; 944 z[6*i+1] += v[6]*x1 + v[7]*x2 + v[13]*x3 + v[19]*x4+ v[25]*x5 + v[31]*x6; 945 z[6*i+2] += v[12]*x1 +v[13]*x2 + v[14]*x3 + v[20]*x4+ v[26]*x5 + v[32]*x6; 946 z[6*i+3] += v[18]*x1 +v[19]*x2 + v[20]*x3 + v[21]*x4+ v[27]*x5 + v[33]*x6; 947 z[6*i+4] += v[24]*x1 +v[25]*x2 + v[26]*x3 + v[27]*x4+ v[28]*x5 + v[34]*x6; 948 z[6*i+5] += v[30]*x1 +v[31]*x2 + v[32]*x3 + v[33]*x4+ v[34]*x5 + v[35]*x6; 949 v += 36; 950 for (j=1; j<n; j++) { 951 /* (strict lower triangular part of A)*x */ 952 cval = ib[j]*6; 953 z[cval] += v[0]*x1 + v[1]*x2 + v[2]*x3 + v[3]*x4+ v[4]*x5 + v[5]*x6; 954 z[cval+1] += v[6]*x1 + v[7]*x2 + v[8]*x3 + v[9]*x4+ v[10]*x5 + v[11]*x6; 955 z[cval+2] += v[12]*x1 + v[13]*x2 + v[14]*x3 + v[15]*x4+ v[16]*x5 + v[17]*x6; 956 z[cval+3] += v[18]*x1 + v[19]*x2 + v[20]*x3 + v[21]*x4+ v[22]*x5 + v[23]*x6; 957 z[cval+4] += v[24]*x1 + v[25]*x2 + v[26]*x3 + v[27]*x4+ v[28]*x5 + v[29]*x6; 958 z[cval+5] += v[30]*x1 + v[31]*x2 + v[32]*x3 + v[33]*x4+ v[34]*x5 + v[35]*x6; 959 /* (strict upper triangular part of A)*x */ 960 z[6*i] +=v[0]*x[cval]+v[6]*x[cval+1]+v[12]*x[cval+2]+v[18]*x[cval+3]+v[24]*x[cval+4]+v[30]*x[cval+5]; 961 z[6*i+1] +=v[1]*x[cval]+v[7]*x[cval+1]+v[13]*x[cval+2]+v[19]*x[cval+3]+v[25]*x[cval+4]+v[31]*x[cval+5]; 962 z[6*i+2] +=v[2]*x[cval]+v[8]*x[cval+1]+v[14]*x[cval+2]+v[20]*x[cval+3]+v[26]*x[cval+4]+v[32]*x[cval+5]; 963 z[6*i+3] +=v[3]*x[cval]+v[9]*x[cval+1]+v[15]*x[cval+2]+v[21]*x[cval+3]+v[27]*x[cval+4]+v[33]*x[cval+5]; 964 z[6*i+4] +=v[4]*x[cval]+v[10]*x[cval+1]+v[16]*x[cval+2]+v[22]*x[cval+3]+v[28]*x[cval+4]+v[34]*x[cval+5]; 965 z[6*i+5] +=v[5]*x[cval]+v[11]*x[cval+1]+v[17]*x[cval+2]+v[23]*x[cval+3]+v[29]*x[cval+4]+v[35]*x[cval+5]; 966 v += 36; 967 } 968 xb +=6; ai++; 969 } 970 971 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 972 if (yy != xx) ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 973 if (zz != yy) ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr); 974 975 PLogFlops(72*(a->s_nz*2 - a->m)); 976 PetscFunctionReturn(0); 977 } 978 979 #undef __FUNC__ 980 #define __FUNC__ "MatMultAdd_SeqSBAIJ_7" 981 int MatMultAdd_SeqSBAIJ_7(Mat A,Vec xx,Vec yy,Vec zz) 982 { 983 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 984 Scalar *x,*y,*z,*xb,x1,x2,x3,x4,x5,x6,x7; 985 MatScalar *v; 986 int mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,ierr,*ib,cval,j; 987 988 PetscFunctionBegin; 989 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 990 if (yy != xx) { 991 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 992 } else { 993 y = x; 994 } 995 if (zz != yy) { 996 ierr = VecCopy(yy,zz); CHKERRQ(ierr); 997 ierr = VecGetArray(zz,&z);CHKERRQ(ierr); 998 } else { 999 z = y; 1000 } 1001 1002 v = a->a; 1003 xb = x; 1004 1005 for (i=0; i<mbs; i++) { 1006 n = ai[1] - ai[0]; /* length of i_th block row of A */ 1007 x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5=xb[4]; x6=xb[5]; x7=xb[6]; 1008 ib = aj + *ai; 1009 /* (diag of A)*x */ 1010 z[7*i] += v[0]*x1 + v[7]*x2 + v[14]*x3 + v[21]*x4+ v[28]*x5 + v[35]*x6+ v[42]*x7; 1011 z[7*i+1] += v[7]*x1 + v[8]*x2 + v[15]*x3 + v[22]*x4+ v[29]*x5 + v[36]*x6+ v[43]*x7; 1012 z[7*i+2] += v[14]*x1+ v[15]*x2 +v[16]*x3 + v[23]*x4+ v[30]*x5 + v[37]*x6+ v[44]*x7; 1013 z[7*i+3] += v[21]*x1+ v[22]*x2 +v[23]*x3 + v[24]*x4+ v[31]*x5 + v[38]*x6+ v[45]*x7; 1014 z[7*i+4] += v[28]*x1+ v[29]*x2 +v[30]*x3 + v[31]*x4+ v[32]*x5 + v[39]*x6+ v[46]*x7; 1015 z[7*i+5] += v[35]*x1+ v[36]*x2 +v[37]*x3 + v[38]*x4+ v[39]*x5 + v[40]*x6+ v[47]*x7; 1016 z[7*i+6] += v[42]*x1+ v[43]*x2 +v[44]*x3 + v[45]*x4+ v[46]*x5 + v[47]*x6+ v[48]*x7; 1017 v += 49; 1018 for (j=1; j<n; j++) { 1019 /* (strict lower triangular part of A)*x */ 1020 cval = ib[j]*7; 1021 z[cval] += v[0]*x1 + v[1]*x2 + v[2]*x3 + v[3]*x4+ v[4]*x5 + v[5]*x6+ v[6]*x7; 1022 z[cval+1] += v[7]*x1 + v[8]*x2 + v[9]*x3 + v[10]*x4+ v[11]*x5 + v[12]*x6+ v[13]*x7; 1023 z[cval+2] += v[14]*x1 + v[15]*x2 + v[16]*x3 + v[17]*x4+ v[18]*x5 + v[19]*x6+ v[20]*x7; 1024 z[cval+3] += v[21]*x1 + v[22]*x2 + v[23]*x3 + v[24]*x4+ v[25]*x5 + v[26]*x6+ v[27]*x7; 1025 z[cval+4] += v[28]*x1 + v[29]*x2 + v[30]*x3 + v[31]*x4+ v[32]*x5 + v[33]*x6+ v[34]*x7; 1026 z[cval+5] += v[35]*x1 + v[36]*x2 + v[37]*x3 + v[38]*x4+ v[39]*x5 + v[40]*x6+ v[41]*x7; 1027 z[cval+6] += v[42]*x1 + v[43]*x2 + v[44]*x3 + v[45]*x4+ v[46]*x5 + v[47]*x6+ v[48]*x7; 1028 /* (strict upper triangular part of A)*x */ 1029 z[7*i] +=v[0]*x[cval]+v[7]*x[cval+1]+v[14]*x[cval+2]+v[21]*x[cval+3]+v[28]*x[cval+4]+v[35]*x[cval+5]+v[42]*x[cval+6]; 1030 z[7*i+1]+=v[1]*x[cval]+v[8]*x[cval+1]+v[15]*x[cval+2]+v[22]*x[cval+3]+v[29]*x[cval+4]+v[36]*x[cval+5]+v[43]*x[cval+6]; 1031 z[7*i+2]+=v[2]*x[cval]+v[9]*x[cval+1]+v[16]*x[cval+2]+v[23]*x[cval+3]+v[30]*x[cval+4]+v[37]*x[cval+5]+v[44]*x[cval+6]; 1032 z[7*i+3]+=v[3]*x[cval]+v[10]*x[cval+1]+v[17]*x[cval+2]+v[24]*x[cval+3]+v[31]*x[cval+4]+v[38]*x[cval+5]+v[45]*x[cval+6]; 1033 z[7*i+4]+=v[4]*x[cval]+v[11]*x[cval+1]+v[18]*x[cval+2]+v[25]*x[cval+3]+v[32]*x[cval+4]+v[39]*x[cval+5]+v[46]*x[cval+6]; 1034 z[7*i+5]+=v[5]*x[cval]+v[12]*x[cval+1]+v[19]*x[cval+2]+v[26]*x[cval+3]+v[33]*x[cval+4]+v[40]*x[cval+5]+v[47]*x[cval+6]; 1035 z[7*i+6]+=v[6]*x[cval]+v[13]*x[cval+1]+v[20]*x[cval+2]+v[27]*x[cval+3]+v[34]*x[cval+4]+v[41]*x[cval+5]+v[48]*x[cval+6]; 1036 v += 49; 1037 } 1038 xb +=7; ai++; 1039 } 1040 1041 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1042 if (yy != xx) ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 1043 if (zz != yy) ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr); 1044 1045 PLogFlops(98*(a->s_nz*2 - a->m)); 1046 PetscFunctionReturn(0); 1047 } 1048 1049 #undef __FUNC__ 1050 #define __FUNC__ "MatMultAdd_SeqSBAIJ_N" 1051 int MatMultAdd_SeqSBAIJ_N(Mat A,Vec xx,Vec yy,Vec zz) 1052 { 1053 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 1054 Scalar *x,*x_ptr,*y,*z,*z_ptr=0,*xb,*zb,*work,*workt; 1055 MatScalar *v; 1056 int ierr,mbs=a->mbs,i,*idx,*aj,*ii,bs=a->bs,j,n,bs2=a->bs2; 1057 int ncols,k; 1058 1059 PetscFunctionBegin; 1060 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); x_ptr=x; 1061 if (yy != xx) { 1062 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 1063 } else { 1064 y = x; 1065 } 1066 if (zz != yy) { 1067 ierr = VecCopy(yy,zz); CHKERRQ(ierr); 1068 ierr = VecGetArray(zz,&z);CHKERRQ(ierr); z_ptr=z; 1069 } else { 1070 z = y; 1071 } 1072 1073 aj = a->j; 1074 v = a->a; 1075 ii = a->i; 1076 1077 if (!a->mult_work) { 1078 k = a->m; 1079 a->mult_work = (Scalar*)PetscMalloc((k+1)*sizeof(Scalar));CHKPTRQ(a->mult_work); 1080 } 1081 work = a->mult_work; 1082 1083 1084 for (i=0; i<mbs; i++) { 1085 n = ii[1] - ii[0]; ncols = n*bs; 1086 workt = work; idx=aj+ii[0]; 1087 1088 /* upper triangular part */ 1089 for (j=0; j<n; j++) { 1090 /* col = bs*(*idx); */ 1091 xb = x_ptr + bs*(*idx++); 1092 for (k=0; k<bs; k++) workt[k] = xb[k]; 1093 workt += bs; 1094 } 1095 /* z(i*bs:(i+1)*bs-1) += A(i,:)*x */ 1096 Kernel_w_gets_w_plus_Ar_times_v(bs,ncols,work,v,z); 1097 1098 /* strict lower triangular part */ 1099 ncols -= bs; 1100 if (ncols > 0){ 1101 workt = work; 1102 ierr = PetscMemzero(workt,ncols*sizeof(Scalar));CHKERRQ(ierr); 1103 Kernel_w_gets_w_plus_trans_Ar_times_v(bs,ncols,x,v+bs2,workt); 1104 1105 idx=aj+ii[0]+1; 1106 for (j=1; j<n; j++) { 1107 zb = z_ptr + bs*(*idx); 1108 idx++; 1109 for (k=0; k<bs; k++) zb[k] += workt[k] ; 1110 workt += bs; 1111 } 1112 } 1113 1114 x += bs; v += n*bs2; z += bs; ii++; 1115 } 1116 1117 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1118 if (yy != xx) ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 1119 if (zz != yy) ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr); 1120 1121 PLogFlops(2*(a->s_nz*2 - a->m)); 1122 PetscFunctionReturn(0); 1123 } 1124 1125 #undef __FUNC__ 1126 #define __FUNC__ "MatMultTranspose_SeqSBAIJ" 1127 int MatMultTranspose_SeqSBAIJ(Mat A,Vec xx,Vec zz) 1128 { 1129 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1130 Scalar *xg,*zg,*zb,zero = 0.0; 1131 Scalar *x,*z,*xb,x1,x2,x3,x4,x5,x6,x7; 1132 MatScalar *v; 1133 int mbs=a->mbs,i,*idx,*ii,*ai=a->i,rval; 1134 int bs=a->bs,j,n,bs2=a->bs2,*ib,ierr; 1135 1136 PetscFunctionBegin; 1137 ierr = VecSet(&zero,zz);CHKERRQ(ierr); 1138 ierr = VecGetArray(xx,&xg);CHKERRQ(ierr); x = xg; 1139 ierr = VecGetArray(zz,&zg);CHKERRQ(ierr); z = zg; 1140 1141 idx = a->j; 1142 v = a->a; 1143 ii = a->i; 1144 xb = x; 1145 switch (bs) { 1146 case 1: 1147 for (i=0; i<mbs; i++) { 1148 n = ii[1] - ii[0]; ii++; 1149 x1 = xb[0]; 1150 ib = idx + ai[i]; 1151 for (j=0; j<n; j++) { 1152 rval = ib[j]; 1153 z[rval] += *v * x1; 1154 v++; 1155 } 1156 xb++; 1157 } 1158 break; 1159 case 2: 1160 for (i=0; i<mbs; i++) { 1161 n = ii[1] - ii[0]; ii++; 1162 x1 = xb[0]; x2 = xb[1]; 1163 ib = idx + ai[i]; 1164 for (j=0; j<n; j++) { 1165 rval = ib[j]*2; 1166 z[rval++] += v[0]*x1 + v[1]*x2; 1167 z[rval] += v[2]*x1 + v[3]*x2; 1168 v += 4; 1169 } 1170 xb += 2; 1171 } 1172 break; 1173 case 3: 1174 for (i=0; i<mbs; i++) { 1175 n = ii[1] - ii[0]; ii++; 1176 x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; 1177 ib = idx + ai[i]; 1178 for (j=0; j<n; j++) { 1179 rval = ib[j]*3; 1180 z[rval++] += v[0]*x1 + v[1]*x2 + v[2]*x3; 1181 z[rval++] += v[3]*x1 + v[4]*x2 + v[5]*x3; 1182 z[rval] += v[6]*x1 + v[7]*x2 + v[8]*x3; 1183 v += 9; 1184 } 1185 xb += 3; 1186 } 1187 break; 1188 case 4: 1189 for (i=0; i<mbs; i++) { 1190 n = ii[1] - ii[0]; ii++; 1191 x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; 1192 ib = idx + ai[i]; 1193 for (j=0; j<n; j++) { 1194 rval = ib[j]*4; 1195 z[rval++] += v[0]*x1 + v[1]*x2 + v[2]*x3 + v[3]*x4; 1196 z[rval++] += v[4]*x1 + v[5]*x2 + v[6]*x3 + v[7]*x4; 1197 z[rval++] += v[8]*x1 + v[9]*x2 + v[10]*x3 + v[11]*x4; 1198 z[rval] += v[12]*x1 + v[13]*x2 + v[14]*x3 + v[15]*x4; 1199 v += 16; 1200 } 1201 xb += 4; 1202 } 1203 break; 1204 case 5: 1205 for (i=0; i<mbs; i++) { 1206 n = ii[1] - ii[0]; ii++; 1207 x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; 1208 x4 = xb[3]; x5 = xb[4]; 1209 ib = idx + ai[i]; 1210 for (j=0; j<n; j++) { 1211 rval = ib[j]*5; 1212 z[rval++] += v[0]*x1 + v[1]*x2 + v[2]*x3 + v[3]*x4 + v[4]*x5; 1213 z[rval++] += v[5]*x1 + v[6]*x2 + v[7]*x3 + v[8]*x4 + v[9]*x5; 1214 z[rval++] += v[10]*x1 + v[11]*x2 + v[12]*x3 + v[13]*x4 + v[14]*x5; 1215 z[rval++] += v[15]*x1 + v[16]*x2 + v[17]*x3 + v[18]*x4 + v[19]*x5; 1216 z[rval] += v[20]*x1 + v[21]*x2 + v[22]*x3 + v[23]*x4 + v[24]*x5; 1217 v += 25; 1218 } 1219 xb += 5; 1220 } 1221 break; 1222 case 6: 1223 for (i=0; i<mbs; i++) { 1224 n = ii[1] - ii[0]; ii++; 1225 x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; 1226 x4 = xb[3]; x5 = xb[4]; x6 = xb[5]; 1227 ib = idx + ai[i]; 1228 for (j=0; j<n; j++) { 1229 rval = ib[j]*6; 1230 z[rval++] += v[0]*x1 + v[1]*x2 + v[2]*x3 + v[3]*x4 + v[4]*x5 + v[5]*x6; 1231 z[rval++] += v[6]*x1 + v[7]*x2 + v[8]*x3 + v[9]*x4 + v[10]*x5 + v[11]*x6; 1232 z[rval++] += v[12]*x1 + v[13]*x2 + v[14]*x3 + v[15]*x4 + v[16]*x5 + v[17]*x6; 1233 z[rval++] += v[18]*x1 + v[19]*x2 + v[20]*x3 + v[21]*x4 + v[22]*x5 + v[23]*x6; 1234 z[rval++] += v[24]*x1 + v[25]*x2 + v[26]*x3 + v[27]*x4 + v[28]*x5 + v[29]*x6; 1235 z[rval] += v[30]*x1 + v[31]*x2 + v[32]*x3 + v[33]*x4 + v[34]*x5 + v[35]*x6; 1236 v += 36; 1237 } 1238 xb += 6; 1239 } 1240 break; 1241 case 7: 1242 for (i=0; i<mbs; i++) { 1243 n = ii[1] - ii[0]; ii++; 1244 x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; 1245 x4 = xb[3]; x5 = xb[4]; x6 = xb[5]; x7 = xb[6]; 1246 ib = idx + ai[i]; 1247 for (j=0; j<n; j++) { 1248 rval = ib[j]*7; 1249 z[rval++] += v[0]*x1 + v[1]*x2 + v[2]*x3 + v[3]*x4 + v[4]*x5 + v[5]*x6 + v[6]*x7; 1250 z[rval++] += v[7]*x1 + v[8]*x2 + v[9]*x3 + v[10]*x4 + v[11]*x5 + v[12]*x6 + v[13]*x7; 1251 z[rval++] += v[14]*x1 + v[15]*x2 + v[16]*x3 + v[17]*x4 + v[18]*x5 + v[19]*x6 + v[20]*x7; 1252 z[rval++] += v[21]*x1 + v[22]*x2 + v[23]*x3 + v[24]*x4 + v[25]*x5 + v[26]*x6 + v[27]*x7; 1253 z[rval++] += v[28]*x1 + v[29]*x2 + v[30]*x3 + v[31]*x4 + v[32]*x5 + v[33]*x6 + v[34]*x7; 1254 z[rval++] += v[35]*x1 + v[36]*x2 + v[37]*x3 + v[38]*x4 + v[39]*x5 + v[40]*x6 + v[41]*x7; 1255 z[rval] += v[42]*x1 + v[43]*x2 + v[44]*x3 + v[45]*x4 + v[46]*x5 + v[47]*x6 + v[48]*x7; 1256 v += 49; 1257 } 1258 xb += 7; 1259 } 1260 break; 1261 default: { /* block sizes larger then 7 by 7 are handled by BLAS */ 1262 int ncols,k; 1263 Scalar *work,*workt; 1264 1265 if (!a->mult_work) { 1266 k = PetscMax(a->m,a->n); 1267 a->mult_work = (Scalar*)PetscMalloc((k+1)*sizeof(Scalar));CHKPTRQ(a->mult_work); 1268 } 1269 work = a->mult_work; 1270 for (i=0; i<mbs; i++) { 1271 n = ii[1] - ii[0]; ii++; 1272 ncols = n*bs; 1273 ierr = PetscMemzero(work,ncols*sizeof(Scalar));CHKERRQ(ierr); 1274 Kernel_w_gets_w_plus_trans_Ar_times_v(bs,ncols,x,v,work); 1275 /* LAgemv_("T",&bs,&ncols,&_DOne,v,&bs,x,&_One,&_DOne,work,&_One); */ 1276 v += n*bs2; 1277 x += bs; 1278 workt = work; 1279 for (j=0; j<n; j++) { 1280 zb = z + bs*(*idx++); 1281 for (k=0; k<bs; k++) zb[k] += workt[k] ; 1282 workt += bs; 1283 } 1284 } 1285 } 1286 } 1287 ierr = VecRestoreArray(xx,&xg);CHKERRQ(ierr); 1288 ierr = VecRestoreArray(zz,&zg);CHKERRQ(ierr); 1289 PLogFlops(2*a->nz*a->bs2 - a->n); 1290 PetscFunctionReturn(0); 1291 } 1292 1293 #undef __FUNC__ 1294 #define __FUNC__ "MatMultTransposeAdd_SeqSBAIJ" 1295 int MatMultTransposeAdd_SeqSBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1296 1297 { 1298 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1299 Scalar *xg,*zg,*zb,*x,*z,*xb,x1,x2,x3,x4,x5; 1300 MatScalar *v; 1301 int mbs=a->mbs,i,*idx,*ii,*ai=a->i,rval,bs=a->bs,j,n,bs2=a->bs2,*ib,ierr; 1302 1303 PetscFunctionBegin; 1304 ierr = VecGetArray(xx,&xg);CHKERRQ(ierr); x = xg; 1305 ierr = VecGetArray(zz,&zg);CHKERRQ(ierr); z = zg; 1306 1307 if (yy != zz) { ierr = VecCopy(yy,zz);CHKERRQ(ierr); } 1308 1309 idx = a->j; 1310 v = a->a; 1311 ii = a->i; 1312 xb = x; 1313 1314 switch (bs) { 1315 case 1: 1316 for (i=0; i<mbs; i++) { 1317 n = ii[1] - ii[0]; ii++; 1318 x1 = xb[0]; 1319 ib = idx + ai[i]; 1320 for (j=0; j<n; j++) { 1321 rval = ib[j]; 1322 z[rval] += *v * x1; 1323 v++; 1324 } 1325 xb++; 1326 } 1327 break; 1328 case 2: 1329 for (i=0; i<mbs; i++) { 1330 n = ii[1] - ii[0]; ii++; 1331 x1 = xb[0]; x2 = xb[1]; 1332 ib = idx + ai[i]; 1333 for (j=0; j<n; j++) { 1334 rval = ib[j]*2; 1335 z[rval++] += v[0]*x1 + v[1]*x2; 1336 z[rval++] += v[2]*x1 + v[3]*x2; 1337 v += 4; 1338 } 1339 xb += 2; 1340 } 1341 break; 1342 case 3: 1343 for (i=0; i<mbs; i++) { 1344 n = ii[1] - ii[0]; ii++; 1345 x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; 1346 ib = idx + ai[i]; 1347 for (j=0; j<n; j++) { 1348 rval = ib[j]*3; 1349 z[rval++] += v[0]*x1 + v[1]*x2 + v[2]*x3; 1350 z[rval++] += v[3]*x1 + v[4]*x2 + v[5]*x3; 1351 z[rval++] += v[6]*x1 + v[7]*x2 + v[8]*x3; 1352 v += 9; 1353 } 1354 xb += 3; 1355 } 1356 break; 1357 case 4: 1358 for (i=0; i<mbs; i++) { 1359 n = ii[1] - ii[0]; ii++; 1360 x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; 1361 ib = idx + ai[i]; 1362 for (j=0; j<n; j++) { 1363 rval = ib[j]*4; 1364 z[rval++] += v[0]*x1 + v[1]*x2 + v[2]*x3 + v[3]*x4; 1365 z[rval++] += v[4]*x1 + v[5]*x2 + v[6]*x3 + v[7]*x4; 1366 z[rval++] += v[8]*x1 + v[9]*x2 + v[10]*x3 + v[11]*x4; 1367 z[rval++] += v[12]*x1 + v[13]*x2 + v[14]*x3 + v[15]*x4; 1368 v += 16; 1369 } 1370 xb += 4; 1371 } 1372 break; 1373 case 5: 1374 for (i=0; i<mbs; i++) { 1375 n = ii[1] - ii[0]; ii++; 1376 x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; 1377 x4 = xb[3]; x5 = xb[4]; 1378 ib = idx + ai[i]; 1379 for (j=0; j<n; j++) { 1380 rval = ib[j]*5; 1381 z[rval++] += v[0]*x1 + v[1]*x2 + v[2]*x3 + v[3]*x4 + v[4]*x5; 1382 z[rval++] += v[5]*x1 + v[6]*x2 + v[7]*x3 + v[8]*x4 + v[9]*x5; 1383 z[rval++] += v[10]*x1 + v[11]*x2 + v[12]*x3 + v[13]*x4 + v[14]*x5; 1384 z[rval++] += v[15]*x1 + v[16]*x2 + v[17]*x3 + v[18]*x4 + v[19]*x5; 1385 z[rval++] += v[20]*x1 + v[21]*x2 + v[22]*x3 + v[23]*x4 + v[24]*x5; 1386 v += 25; 1387 } 1388 xb += 5; 1389 } 1390 break; 1391 default: { /* block sizes larger then 5 by 5 are handled by BLAS */ 1392 int ncols,k; 1393 Scalar *work,*workt; 1394 1395 if (!a->mult_work) { 1396 k = PetscMax(a->m,a->n); 1397 a->mult_work = (Scalar*)PetscMalloc((k+1)*sizeof(Scalar));CHKPTRQ(a->mult_work); 1398 } 1399 work = a->mult_work; 1400 for (i=0; i<mbs; i++) { 1401 n = ii[1] - ii[0]; ii++; 1402 ncols = n*bs; 1403 ierr = PetscMemzero(work,ncols*sizeof(Scalar));CHKERRQ(ierr); 1404 Kernel_w_gets_w_plus_trans_Ar_times_v(bs,ncols,x,v,work); 1405 /* LAgemv_("T",&bs,&ncols,&_DOne,v,&bs,x,&_One,&_DOne,work,&_One); */ 1406 v += n*bs2; 1407 x += bs; 1408 workt = work; 1409 for (j=0; j<n; j++) { 1410 zb = z + bs*(*idx++); 1411 for (k=0; k<bs; k++) zb[k] += workt[k] ; 1412 workt += bs; 1413 } 1414 } 1415 } 1416 } 1417 ierr = VecRestoreArray(xx,&xg);CHKERRQ(ierr); 1418 ierr = VecRestoreArray(zz,&zg);CHKERRQ(ierr); 1419 PLogFlops(2*a->nz*a->bs2); 1420 PetscFunctionReturn(0); 1421 } 1422 1423 #undef __FUNC__ 1424 #define __FUNC__ "MatScale_SeqSBAIJ" 1425 int MatScale_SeqSBAIJ(Scalar *alpha,Mat inA) 1426 { 1427 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)inA->data; 1428 int one = 1,totalnz = a->bs2*a->s_nz; 1429 1430 PetscFunctionBegin; 1431 BLscal_(&totalnz,alpha,a->a,&one); 1432 PLogFlops(totalnz); 1433 PetscFunctionReturn(0); 1434 } 1435 1436 #undef __FUNC__ 1437 #define __FUNC__ "MatNorm_SeqSBAIJ" 1438 int MatNorm_SeqSBAIJ(Mat A,NormType type,PetscReal *norm) 1439 { 1440 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 1441 MatScalar *v = a->a; 1442 PetscReal sum_diag = 0.0, sum_off = 0.0, *sum; 1443 int i,j,k,bs = a->bs,bs2=a->bs2,k1,mbs=a->mbs; 1444 int *jl,*il,jmin,jmax,ierr,nexti,ik; 1445 1446 PetscFunctionBegin; 1447 if (type == NORM_FROBENIUS) { 1448 for (k=0; k<mbs; k++){ 1449 jmin = a->i[k]; jmax = a->i[k+1]; 1450 /* diagonal block */ 1451 for (i=0; i<bs2; i++){ 1452 #if defined(PETSC_USE_COMPLEX) 1453 sum_diag += PetscRealPart(PetscConj(*v)*(*v)); v++; 1454 #else 1455 sum_diag += (*v)*(*v); v++; 1456 #endif 1457 } 1458 for (j=jmin+1; j<jmax; j++){ /* off-diagonal blocks */ 1459 for (i=0; i<bs2; i++){ 1460 #if defined(PETSC_USE_COMPLEX) 1461 sum_off += PetscRealPart(PetscConj(*v)*(*v)); v++; 1462 #else 1463 sum_off += (*v)*(*v); v++; 1464 #endif 1465 } 1466 } 1467 } 1468 *norm = sqrt(sum_diag + 2*sum_off); 1469 1470 } else if (type == NORM_INFINITY) { /* maximum row sum */ 1471 il = (int*)PetscMalloc(mbs*sizeof(int));CHKPTRQ(il); 1472 jl = (int*)PetscMalloc(mbs*sizeof(int));CHKPTRQ(jl); 1473 sum = (PetscReal*)PetscMalloc(bs*sizeof(PetscReal));CHKPTRQ(sum); 1474 for (i=0; i<mbs; i++) { 1475 jl[i] = mbs; il[0] = 0; 1476 } 1477 1478 *norm = 0.0; 1479 for (k=0; k<mbs; k++) { /* k_th block row */ 1480 for (j=0; j<bs; j++) sum[j]=0.0; 1481 1482 /*-- col sum --*/ 1483 i = jl[k]; /* first |A(i,k)| to be added */ 1484 while (i<mbs){ 1485 nexti = jl[i]; /* next block row to be added */ 1486 ik = il[i]; /* block index of A(i,k) in the array a */ 1487 for (j=0; j<bs; j++){ 1488 v = a->a + ik*bs2 + j*bs; 1489 for (k1=0; k1<bs; k1++) { 1490 sum[j] += PetscAbsScalar(*v); v++; 1491 } 1492 } 1493 /* update il, jl */ 1494 jmin = ik + 1; jmax = a->i[i+1]; 1495 if (jmin < jmax){ 1496 il[i] = jmin; 1497 j = a->j[jmin]; 1498 jl[i] = jl[j]; jl[j]=i; 1499 } 1500 i = nexti; 1501 } 1502 1503 /*-- row sum --*/ 1504 jmin = a->i[k]; jmax = a->i[k+1]; 1505 for (i=jmin; i<jmax; i++) { 1506 for (j=0; j<bs; j++){ 1507 v = a->a + i*bs2 + j; 1508 for (k1=0; k1<bs; k1++){ 1509 sum[j] += PetscAbsScalar(*v); 1510 v += bs; 1511 } 1512 } 1513 } 1514 /* add k_th block row to il, jl */ 1515 jmin++; 1516 if (jmin < jmax){ 1517 il[k] = jmin; 1518 j = a->j[jmin]; 1519 jl[k] = jl[j]; jl[j] = k; 1520 } 1521 for (j=0; j<bs; j++){ 1522 if (sum[j] > *norm) *norm = sum[j]; 1523 } 1524 } 1525 ierr = PetscFree(il);CHKERRQ(ierr); 1526 ierr = PetscFree(jl);CHKERRQ(ierr); 1527 ierr = PetscFree(sum);CHKERRQ(ierr); 1528 } else { 1529 SETERRQ(PETSC_ERR_SUP,0,"No support for this norm yet"); 1530 } 1531 PetscFunctionReturn(0); 1532 } 1533 1534 #ifdef MatNorm_SeqBAIJ 1535 /* This is modified MatNorm_SeqBAIJ. 1536 MatNorm_SeqBAIJ in baij/seq/baij2.c is not correct for bs>1 */ 1537 1538 #undef __FUNC__ 1539 #define __FUNC__ "MatNorm_SeqBAIJ" 1540 int MatNorm_SeqBAIJ(Mat A,NormType type,PetscReal *norm) 1541 { 1542 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1543 MatScalar *v = a->a; 1544 PetscReal sum = 0.0; 1545 int i,j,k,bs = a->bs,nz=a->nz,bs2=a->bs2,k1; 1546 1547 PetscFunctionBegin; 1548 if (type == NORM_FROBENIUS) { 1549 for (i=0; i< bs2*nz; i++) { 1550 #if defined(PETSC_USE_COMPLEX) 1551 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1552 #else 1553 sum += (*v)*(*v); v++; 1554 #endif 1555 } 1556 *norm = sqrt(sum); 1557 } else if (type == NORM_INFINITY) { /* maximum row sum */ 1558 *norm = 0.0; 1559 for (j=0; j<a->mbs; j++) { 1560 for (k=0; k<bs; k++) { 1561 v = a->a + bs2*a->i[j] + k; 1562 sum = 0.0; 1563 for (i=0; i<a->i[j+1]-a->i[j]; i++) { 1564 for (k1=0; k1<bs; k1++){ /* this loop was missing in the original code*/ 1565 sum += PetscAbsScalar(*v); 1566 v += bs; 1567 } 1568 } 1569 if (sum > *norm) *norm = sum; 1570 } 1571 } 1572 } else { 1573 SETERRQ(PETSC_ERR_SUP,0,"No support for this norm yet"); 1574 } 1575 PetscFunctionReturn(0); 1576 } 1577 #endif 1578 1579 #undef __FUNC__ 1580 #define __FUNC__ "MatEqual_SeqSBAIJ" 1581 int MatEqual_SeqSBAIJ(Mat A,Mat B,PetscTruth* flg) 1582 { 1583 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ *)A->data,*b = (Mat_SeqSBAIJ *)B->data; 1584 int ierr; 1585 1586 PetscFunctionBegin; 1587 if (B->type !=MATSEQSBAIJ) SETERRQ(PETSC_ERR_ARG_INCOMP,0,"Matrices must be same type"); 1588 1589 /* If the matrix/block dimensions are not equal, or no of nonzeros or shift */ 1590 if ((a->m != b->m) || (a->n !=b->n) || (a->bs != b->bs)|| (a->s_nz != b->s_nz)) { 1591 *flg = PETSC_FALSE; PetscFunctionReturn(0); 1592 } 1593 1594 /* if the a->i are the same */ 1595 ierr = PetscMemcmp(a->i,b->i,(a->mbs+1)*sizeof(int),flg);CHKERRQ(ierr); 1596 if (*flg == PETSC_FALSE) { 1597 PetscFunctionReturn(0); 1598 } 1599 1600 /* if a->j are the same */ 1601 ierr = PetscMemcmp(a->j,b->j,(a->s_nz)*sizeof(int),flg);CHKERRQ(ierr); 1602 if (*flg == PETSC_FALSE) { 1603 PetscFunctionReturn(0); 1604 } 1605 /* if a->a are the same */ 1606 ierr = PetscMemcmp(a->a,b->a,(a->s_nz)*(a->bs)*(a->bs)*sizeof(Scalar),flg);CHKERRQ(ierr); 1607 PetscFunctionReturn(0); 1608 1609 } 1610 1611 #undef __FUNC__ 1612 #define __FUNC__ "MatGetDiagonal_SeqSBAIJ" 1613 int MatGetDiagonal_SeqSBAIJ(Mat A,Vec v) 1614 { 1615 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 1616 int ierr,i,j,k,n,row,bs,*ai,*aj,ambs,bs2; 1617 Scalar *x,zero = 0.0; 1618 MatScalar *aa,*aa_j; 1619 1620 PetscFunctionBegin; 1621 if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"Not for factored matrix"); 1622 bs = a->bs; 1623 aa = a->a; 1624 ai = a->i; 1625 aj = a->j; 1626 ambs = a->mbs; 1627 bs2 = a->bs2; 1628 1629 ierr = VecSet(&zero,v);CHKERRQ(ierr); 1630 ierr = VecGetArray(v,&x);CHKERRQ(ierr); 1631 ierr = VecGetLocalSize(v,&n);CHKERRQ(ierr); 1632 /* if (n != a->m) SETERRQ(PETSC_ERR_ARG_SIZ,0,"Nonconforming matrix and vector");*/ 1633 for (i=0; i<ambs; i++) { 1634 j=ai[i]; 1635 if (aj[j] == i) { /* if this is a diagonal element */ 1636 row = i*bs; 1637 aa_j = aa + j*bs2; 1638 for (k=0; k<bs2; k+=(bs+1),row++) x[row] = aa_j[k]; 1639 } 1640 } 1641 ierr = VecRestoreArray(v,&x);CHKERRQ(ierr); 1642 PetscFunctionReturn(0); 1643 } 1644 1645 #undef __FUNC__ 1646 #define __FUNC__ "MatDiagonalScale_SeqSBAIJ" 1647 int MatDiagonalScale_SeqSBAIJ(Mat A,Vec ll,Vec rr) 1648 { 1649 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 1650 Scalar *l,*r,x,*li,*ri; 1651 MatScalar *aa,*v; 1652 int ierr,i,j,k,lm,rn,M,m,*ai,*aj,mbs,tmp,bs,bs2; 1653 1654 PetscFunctionBegin; 1655 ai = a->i; 1656 aj = a->j; 1657 aa = a->a; 1658 m = a->m; 1659 bs = a->bs; 1660 mbs = a->mbs; 1661 bs2 = a->bs2; 1662 1663 if (ll != rr) { 1664 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"For symmetric format, left and right scaling vectors must be same\n"); 1665 } 1666 if (ll) { 1667 ierr = VecGetArray(ll,&l);CHKERRQ(ierr); 1668 ierr = VecGetLocalSize(ll,&lm);CHKERRQ(ierr); 1669 if (lm != m) SETERRQ(PETSC_ERR_ARG_SIZ,0,"Left scaling vector wrong length"); 1670 for (i=0; i<mbs; i++) { /* for each block row */ 1671 M = ai[i+1] - ai[i]; 1672 li = l + i*bs; 1673 v = aa + bs2*ai[i]; 1674 for (j=0; j<M; j++) { /* for each block */ 1675 for (k=0; k<bs2; k++) { 1676 (*v++) *= li[k%bs]; 1677 } 1678 #ifdef CONT 1679 /* will be used to replace the above loop */ 1680 ri = l + bs*aj[ai[i]+j]; 1681 for (k=0; k<bs; k++) { /* column value */ 1682 x = ri[k]; 1683 for (tmp=0; tmp<bs; tmp++) (*v++) *= li[tmp]*x; 1684 } 1685 #endif 1686 1687 } 1688 } 1689 ierr = VecRestoreArray(ll,&l);CHKERRQ(ierr); 1690 PLogFlops(2*a->s_nz); 1691 } 1692 /* will be deleted */ 1693 if (rr) { 1694 ierr = VecGetArray(rr,&r);CHKERRQ(ierr); 1695 ierr = VecGetLocalSize(rr,&rn);CHKERRQ(ierr); 1696 if (rn != m) SETERRQ(PETSC_ERR_ARG_SIZ,0,"Right scaling vector wrong length"); 1697 for (i=0; i<mbs; i++) { /* for each block row */ 1698 M = ai[i+1] - ai[i]; 1699 v = aa + bs2*ai[i]; 1700 for (j=0; j<M; j++) { /* for each block */ 1701 ri = r + bs*aj[ai[i]+j]; 1702 for (k=0; k<bs; k++) { 1703 x = ri[k]; 1704 for (tmp=0; tmp<bs; tmp++) (*v++) *= x; 1705 } 1706 } 1707 } 1708 ierr = VecRestoreArray(rr,&r);CHKERRQ(ierr); 1709 PLogFlops(a->s_nz); 1710 } 1711 PetscFunctionReturn(0); 1712 } 1713 1714 #undef __FUNC__ 1715 #define __FUNC__ "MatGetInfo_SeqSBAIJ" 1716 int MatGetInfo_SeqSBAIJ(Mat A,MatInfoType flag,MatInfo *info) 1717 { 1718 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 1719 1720 PetscFunctionBegin; 1721 info->rows_global = (double)a->m; 1722 info->columns_global = (double)a->m; 1723 info->rows_local = (double)a->m; 1724 info->columns_local = (double)a->m; 1725 info->block_size = a->bs2; 1726 info->nz_allocated = a->s_maxnz; /*num. of nonzeros in upper triangular part */ 1727 info->nz_used = a->bs2*a->s_nz; /*num. of nonzeros in upper triangular part */ 1728 info->nz_unneeded = (double)(info->nz_allocated - info->nz_used); 1729 info->assemblies = A->num_ass; 1730 info->mallocs = a->reallocs; 1731 info->memory = A->mem; 1732 if (A->factor) { 1733 info->fill_ratio_given = A->info.fill_ratio_given; 1734 info->fill_ratio_needed = A->info.fill_ratio_needed; 1735 info->factor_mallocs = A->info.factor_mallocs; 1736 } else { 1737 info->fill_ratio_given = 0; 1738 info->fill_ratio_needed = 0; 1739 info->factor_mallocs = 0; 1740 } 1741 PetscFunctionReturn(0); 1742 } 1743 1744 1745 #undef __FUNC__ 1746 #define __FUNC__ "MatZeroEntries_SeqSBAIJ" 1747 int MatZeroEntries_SeqSBAIJ(Mat A) 1748 { 1749 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 1750 int ierr; 1751 1752 PetscFunctionBegin; 1753 ierr = PetscMemzero(a->a,a->bs2*a->i[a->mbs]*sizeof(MatScalar));CHKERRQ(ierr); 1754 PetscFunctionReturn(0); 1755 } 1756