1 /*$Id: sbaij2.c,v 1.6 2000/08/16 15:15:02 balay Exp hzhang $*/ 2 3 #include "petscsys.h" 4 #include "src/mat/impls/baij/seq/baij.h" 5 #include "src/vec/vecimpl.h" 6 #include "src/inline/spops.h" 7 #include "src/inline/ilu.h" 8 #include "petscbt.h" 9 #include "sbaij.h" 10 11 #undef __FUNC__ 12 #define __FUNC__ "MatIncreaseOverlap_SeqSBAIJ" 13 int MatIncreaseOverlap_SeqSBAIJ(Mat A,int is_max,IS *is,int ov) 14 { 15 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 16 int row,i,j,k,l,m,n,*idx,ierr,*nidx,isz,val,ival; 17 int start,end,*ai,*aj,bs,*nidx2; 18 PetscBT table; 19 20 PetscFunctionBegin; 21 SETERRQ(1,1,"Function not yet written for SBAIJ format"); 22 PetscFunctionReturn(0); 23 } 24 25 #undef __FUNC__ 26 #define __FUNC__ "MatGetSubMatrix_SeqSBAIJ_Private" 27 int MatGetSubMatrix_SeqSBAIJ_Private(Mat A,IS isrow,IS iscol,int cs,MatReuse scall,Mat *B) 28 { 29 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data,*c; 30 int *smap,i,k,kstart,kend,ierr,oldcols = a->mbs,*lens; 31 int row,mat_i,*mat_j,tcol,*mat_ilen; 32 int *irow,nrows,*ssmap,bs=a->bs,bs2=a->bs2; 33 int *aj = a->j,*ai = a->i; 34 MatScalar *mat_a; 35 Mat C; 36 PetscTruth flag; 37 38 PetscFunctionBegin; 39 40 if (isrow != iscol) SETERRA(1,0,"MatGetSubmatrices_SeqSBAIJ: For symm. format, iscol must equal isro"); 41 ierr = ISSorted(iscol,(PetscTruth*)&i);CHKERRQ(ierr); 42 if (!i) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"IS is not sorted"); 43 44 ierr = ISGetIndices(isrow,&irow);CHKERRQ(ierr); 45 ierr = ISGetSize(isrow,&nrows);CHKERRQ(ierr); 46 47 smap = (int*)PetscMalloc((1+oldcols)*sizeof(int));CHKPTRQ(smap); 48 ssmap = smap; 49 lens = (int*)PetscMalloc((1+nrows)*sizeof(int));CHKPTRQ(lens); 50 ierr = PetscMemzero(smap,oldcols*sizeof(int));CHKERRQ(ierr); 51 for (i=0; i<nrows; i++) smap[irow[i]] = i+1; /* nrows = ncols */ 52 /* determine lens of each row */ 53 for (i=0; i<nrows; i++) { 54 kstart = ai[irow[i]]; 55 kend = kstart + a->ilen[irow[i]]; 56 lens[i] = 0; 57 for (k=kstart; k<kend; k++) { 58 if (ssmap[aj[k]]) { 59 lens[i]++; 60 } 61 } 62 } 63 /* Create and fill new matrix */ 64 if (scall == MAT_REUSE_MATRIX) { 65 c = (Mat_SeqSBAIJ *)((*B)->data); 66 67 if (c->mbs!=nrows || c->bs!=bs) SETERRQ(PETSC_ERR_ARG_SIZ,0,"Submatrix wrong size"); 68 ierr = PetscMemcmp(c->ilen,lens,c->mbs *sizeof(int),&flag);CHKERRQ(ierr); 69 if (flag == PETSC_FALSE) { 70 SETERRQ(PETSC_ERR_ARG_SIZ,0,"Cannot reuse matrix. wrong no of nonzeros"); 71 } 72 ierr = PetscMemzero(c->ilen,c->mbs*sizeof(int));CHKERRQ(ierr); 73 C = *B; 74 } else { 75 ierr = MatCreateSeqSBAIJ(A->comm,bs,nrows*bs,nrows*bs,0,lens,&C);CHKERRQ(ierr); 76 } 77 c = (Mat_SeqSBAIJ *)(C->data); 78 for (i=0; i<nrows; i++) { 79 row = irow[i]; 80 kstart = ai[row]; 81 kend = kstart + a->ilen[row]; 82 mat_i = c->i[i]; 83 mat_j = c->j + mat_i; 84 mat_a = c->a + mat_i*bs2; 85 mat_ilen = c->ilen + i; 86 for (k=kstart; k<kend; k++) { 87 if ((tcol=ssmap[a->j[k]])) { 88 *mat_j++ = tcol - 1; 89 ierr = PetscMemcpy(mat_a,a->a+k*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr); 90 mat_a += bs2; 91 (*mat_ilen)++; 92 } 93 } 94 } 95 96 /* Free work space */ 97 ierr = PetscFree(smap);CHKERRQ(ierr); 98 ierr = PetscFree(lens);CHKERRQ(ierr); 99 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 100 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 101 102 ierr = ISRestoreIndices(isrow,&irow);CHKERRQ(ierr); 103 *B = C; 104 PetscFunctionReturn(0); 105 } 106 107 #undef __FUNC__ 108 #define __FUNC__ "MatGetSubMatrix_SeqSBAIJ" 109 int MatGetSubMatrix_SeqSBAIJ(Mat A,IS isrow,IS iscol,int cs,MatReuse scall,Mat *B) 110 { 111 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 112 IS is1; 113 int *vary,*iary,*irow,nrows,i,ierr,bs=a->bs,count; 114 115 PetscFunctionBegin; 116 if (isrow != iscol) SETERRA(1,0,"MatGetSubmatrices_SeqSBAIJ: For symm. format, iscol must equal isro"); 117 118 ierr = ISGetIndices(isrow,&irow);CHKERRQ(ierr); 119 ierr = ISGetSize(isrow,&nrows);CHKERRQ(ierr); 120 121 /* Verify if the indices corespond to each element in a block 122 and form the IS with compressed IS */ 123 vary = (int*)PetscMalloc(2*(a->mbs+1)*sizeof(int));CHKPTRQ(vary); 124 iary = vary + a->mbs; 125 ierr = PetscMemzero(vary,(a->mbs)*sizeof(int));CHKERRQ(ierr); 126 for (i=0; i<nrows; i++) vary[irow[i]/bs]++; 127 128 count = 0; 129 for (i=0; i<a->mbs; i++) { 130 if (vary[i]!=0 && vary[i]!=bs) SETERRA(1,0,"Index set does not match blocks"); 131 if (vary[i]==bs) iary[count++] = i; 132 } 133 ierr = ISCreateGeneral(PETSC_COMM_SELF,count,iary,&is1);CHKERRQ(ierr); 134 135 ierr = ISRestoreIndices(isrow,&irow);CHKERRQ(ierr); 136 ierr = PetscFree(vary);CHKERRQ(ierr); 137 138 ierr = MatGetSubMatrix_SeqSBAIJ_Private(A,is1,is1,cs,scall,B);CHKERRQ(ierr); 139 ISDestroy(is1); 140 PetscFunctionReturn(0); 141 } 142 143 #undef __FUNC__ 144 #define __FUNC__ "MatGetSubMatrices_SeqSBAIJ" 145 int MatGetSubMatrices_SeqSBAIJ(Mat A,int n,IS *irow,IS *icol,MatReuse scall,Mat **B) 146 { 147 int ierr,i; 148 149 PetscFunctionBegin; 150 if (scall == MAT_INITIAL_MATRIX) { 151 *B = (Mat*)PetscMalloc((n+1)*sizeof(Mat));CHKPTRQ(*B); 152 } 153 154 for (i=0; i<n; i++) { 155 ierr = MatGetSubMatrix_SeqSBAIJ(A,irow[i],icol[i],PETSC_DECIDE,scall,&(*B)[i]);CHKERRQ(ierr); 156 } 157 PetscFunctionReturn(0); 158 } 159 160 /* -------------------------------------------------------*/ 161 /* Should check that shapes of vectors and matrices match */ 162 /* -------------------------------------------------------*/ 163 #include "petscblaslapack.h" 164 165 #undef __FUNC__ 166 #define __FUNC__ "MatMult_SeqSBAIJ_1" 167 int MatMult_SeqSBAIJ_1(Mat A,Vec xx,Vec zz) 168 { 169 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 170 Scalar *x,*z,*xb,x1,zero=0.0; 171 MatScalar *v; 172 int mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,ierr,*ib,cval,j; 173 174 PetscFunctionBegin; 175 ierr = VecSet(&zero,zz);CHKERRQ(ierr); 176 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 177 ierr = VecGetArray(zz,&z);CHKERRQ(ierr); 178 179 v = a->a; 180 xb = x; 181 182 for (i=0; i<mbs; i++) { 183 n = ai[1] - ai[0]; /* length of i_th row of A */ 184 x1 = xb[0]; 185 ib = aj + *ai; 186 z[i] += *v++ * x[*ib++]; /* (diag of A)*x */ 187 for (j=1; j<n; j++) { 188 cval = *ib; 189 z[cval] += *v * x1; /* (strict lower triangular part of A)*x */ 190 z[i] += *v++ * x[*ib++]; /* (strict upper triangular part of A)*x */ 191 } 192 xb++; ai++; 193 } 194 195 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 196 ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr); 197 PLogFlops(2*(a->s_nz*2 - a->m) - a->m); /* s_nz = (nz+m)/2 */ 198 PetscFunctionReturn(0); 199 } 200 201 #undef __FUNC__ 202 #define __FUNC__ "MatMult_SeqSBAIJ_2" 203 int MatMult_SeqSBAIJ_2(Mat A,Vec xx,Vec zz) 204 { 205 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 206 Scalar *x,*z,*xb,x1,x2,zero=0.0; 207 MatScalar *v; 208 int mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,ierr,*ib,cval,j; 209 210 211 PetscFunctionBegin; 212 ierr = VecSet(&zero,zz);CHKERRQ(ierr); 213 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 214 ierr = VecGetArray(zz,&z);CHKERRQ(ierr); 215 216 v = a->a; 217 xb = x; 218 219 for (i=0; i<mbs; i++) { 220 n = ai[1] - ai[0]; /* length of i_th block row of A */ 221 x1 = xb[0]; x2 = xb[1]; 222 ib = aj + *ai; 223 /* (diag of A)*x */ 224 z[2*i] += v[0]*x1 + v[2]*x2; 225 z[2*i+1] += v[2]*x1 + v[3]*x2; 226 v += 4; 227 for (j=1; j<n; j++) { 228 /* (strict lower triangular part of A)*x */ 229 cval = ib[j]*2; 230 z[cval] += v[0]*x1 + v[1]*x2; 231 z[cval+1] += v[2]*x1 + v[3]*x2; 232 /* (strict upper triangular part of A)*x */ 233 z[2*i] += v[0]*x[cval] + v[2]*x[cval+1]; 234 z[2*i+1] += v[1]*x[cval] + v[3]*x[cval+1]; 235 v += 4; 236 } 237 xb +=2; ai++; 238 } 239 240 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 241 ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr); 242 PLogFlops(8*(a->s_nz*2 - a->m) - a->m); 243 PetscFunctionReturn(0); 244 } 245 246 #undef __FUNC__ 247 #define __FUNC__ "MatMult_SeqSBAIJ_3" 248 int MatMult_SeqSBAIJ_3(Mat A,Vec xx,Vec zz) 249 { 250 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 251 Scalar *x,*z,*xb,x1,x2,x3,zero=0.0; 252 MatScalar *v; 253 int mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,ierr,*ib,cval,j; 254 255 256 PetscFunctionBegin; 257 ierr = VecSet(&zero,zz);CHKERRQ(ierr); 258 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 259 ierr = VecGetArray(zz,&z);CHKERRQ(ierr); 260 261 v = a->a; 262 xb = x; 263 264 for (i=0; i<mbs; i++) { 265 n = ai[1] - ai[0]; /* length of i_th block row of A */ 266 x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; 267 ib = aj + *ai; 268 /* (diag of A)*x */ 269 z[3*i] += v[0]*x1 + v[3]*x2 + v[6]*x3; 270 z[3*i+1] += v[3]*x1 + v[4]*x2 + v[7]*x3; 271 z[3*i+2] += v[6]*x1 + v[7]*x2 + v[8]*x3; 272 v += 9; 273 for (j=1; j<n; j++) { 274 /* (strict lower triangular part of A)*x */ 275 cval = ib[j]*3; 276 z[cval] += v[0]*x1 + v[1]*x2 + v[2]*x3; 277 z[cval+1] += v[3]*x1 + v[4]*x2 + v[5]*x3; 278 z[cval+2] += v[6]*x1 + v[7]*x2 + v[8]*x3; 279 /* (strict upper triangular part of A)*x */ 280 z[3*i] += v[0]*x[cval] + v[3]*x[cval+1]+ v[6]*x[cval+2]; 281 z[3*i+1] += v[1]*x[cval] + v[4]*x[cval+1]+ v[7]*x[cval+2]; 282 z[3*i+2] += v[2]*x[cval] + v[5]*x[cval+1]+ v[8]*x[cval+2]; 283 v += 9; 284 } 285 xb +=3; ai++; 286 } 287 288 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 289 ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr); 290 PLogFlops(18*(a->s_nz*2 - a->m) - a->m); 291 PetscFunctionReturn(0); 292 } 293 294 #undef __FUNC__ 295 #define __FUNC__ "MatMult_SeqSBAIJ_4" 296 int MatMult_SeqSBAIJ_4(Mat A,Vec xx,Vec zz) 297 { 298 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 299 Scalar *x,*z,*xb,x1,x2,x3,x4,zero=0.0; 300 MatScalar *v; 301 int mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,ierr,*ib,cval,j; 302 303 PetscFunctionBegin; 304 ierr = VecSet(&zero,zz);CHKERRQ(ierr); 305 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 306 ierr = VecGetArray(zz,&z);CHKERRQ(ierr); 307 308 v = a->a; 309 xb = x; 310 311 for (i=0; i<mbs; i++) { 312 n = ai[1] - ai[0]; /* length of i_th block row of A */ 313 x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; 314 ib = aj + *ai; 315 /* (diag of A)*x */ 316 z[4*i] += v[0]*x1 + v[4]*x2 + v[8]*x3 + v[12]*x4; 317 z[4*i+1] += v[4]*x1 + v[5]*x2 + v[9]*x3 + v[13]*x4; 318 z[4*i+2] += v[8]*x1 + v[9]*x2 + v[10]*x3 + v[14]*x4; 319 z[4*i+3] += v[12]*x1+ v[13]*x2+ v[14]*x3 + v[15]*x4; 320 v += 16; 321 for (j=1; j<n; j++) { 322 /* (strict lower triangular part of A)*x */ 323 cval = ib[j]*4; 324 z[cval] += v[0]*x1 + v[1]*x2 + v[2]*x3 + v[3]*x4; 325 z[cval+1] += v[4]*x1 + v[5]*x2 + v[6]*x3 + v[7]*x4; 326 z[cval+2] += v[8]*x1 + v[9]*x2 + v[10]*x3 + v[11]*x4; 327 z[cval+3] += v[12]*x1 + v[13]*x2 + v[14]*x3 + v[15]*x4; 328 /* (strict upper triangular part of A)*x */ 329 z[4*i] += v[0]*x[cval] + v[4]*x[cval+1]+ v[8]*x[cval+2] + v[12]*x[cval+3]; 330 z[4*i+1] += v[1]*x[cval] + v[5]*x[cval+1]+ v[9]*x[cval+2] + v[13]*x[cval+3]; 331 z[4*i+2] += v[2]*x[cval] + v[6]*x[cval+1]+ v[10]*x[cval+2]+ v[14]*x[cval+3]; 332 z[4*i+3] += v[3]*x[cval] + v[7]*x[cval+1]+ v[11]*x[cval+2]+ v[15]*x[cval+3]; 333 v += 16; 334 } 335 xb +=4; ai++; 336 } 337 338 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 339 ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr); 340 PLogFlops(32*(a->s_nz*2 - a->m) - a->m); 341 PetscFunctionReturn(0); 342 } 343 344 #undef __FUNC__ 345 #define __FUNC__ "MatMult_SeqSBAIJ_5" 346 int MatMult_SeqSBAIJ_5(Mat A,Vec xx,Vec zz) 347 { 348 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 349 Scalar *x,*z,*xb,x1,x2,x3,x4,x5,zero=0.0; 350 MatScalar *v; 351 int mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,ierr,*ib,cval,j; 352 353 PetscFunctionBegin; 354 ierr = VecSet(&zero,zz);CHKERRQ(ierr); 355 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 356 ierr = VecGetArray(zz,&z);CHKERRQ(ierr); 357 358 v = a->a; 359 xb = x; 360 361 for (i=0; i<mbs; i++) { 362 n = ai[1] - ai[0]; /* length of i_th block row of A */ 363 x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5=xb[4]; 364 ib = aj + *ai; 365 /* (diag of A)*x */ 366 z[5*i] += v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4+ v[20]*x5; 367 z[5*i+1] += v[5]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4+ v[21]*x5; 368 z[5*i+2] += v[10]*x1 +v[11]*x2 + v[12]*x3 + v[17]*x4+ v[22]*x5; 369 z[5*i+3] += v[15]*x1 +v[16]*x2 + v[17]*x3 + v[18]*x4+ v[23]*x5; 370 z[5*i+4] += v[20]*x1 +v[21]*x2 + v[22]*x3 + v[23]*x4+ v[24]*x5; 371 v += 25; 372 for (j=1; j<n; j++) { 373 /* (strict lower triangular part of A)*x */ 374 cval = ib[j]*5; 375 z[cval] += v[0]*x1 + v[1]*x2 + v[2]*x3 + v[3]*x4 + v[4]*x5; 376 z[cval+1] += v[5]*x1 + v[6]*x2 + v[7]*x3 + v[8]*x4 + v[9]*x5; 377 z[cval+2] += v[10]*x1 + v[11]*x2 + v[12]*x3 + v[13]*x4+ v[14]*x5; 378 z[cval+3] += v[15]*x1 + v[16]*x2 + v[17]*x3 + v[18]*x4+ v[19]*x5; 379 z[cval+4] += v[20]*x1 + v[21]*x2 + v[22]*x3 + v[23]*x4+ v[24]*x5; 380 /* (strict upper triangular part of A)*x */ 381 z[5*i] +=v[0]*x[cval]+v[5]*x[cval+1]+v[10]*x[cval+2]+v[15]*x[cval+3]+v[20]*x[cval+4]; 382 z[5*i+1] +=v[1]*x[cval]+v[6]*x[cval+1]+v[11]*x[cval+2]+v[16]*x[cval+3]+v[21]*x[cval+4]; 383 z[5*i+2] +=v[2]*x[cval]+v[7]*x[cval+1]+v[12]*x[cval+2]+v[17]*x[cval+3]+v[22]*x[cval+4]; 384 z[5*i+3] +=v[3]*x[cval]+v[8]*x[cval+1]+v[13]*x[cval+2]+v[18]*x[cval+3]+v[23]*x[cval+4]; 385 z[5*i+4] +=v[4]*x[cval]+v[9]*x[cval+1]+v[14]*x[cval+2]+v[19]*x[cval+3]+v[24]*x[cval+4]; 386 v += 25; 387 } 388 xb +=5; ai++; 389 } 390 391 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 392 ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr); 393 PLogFlops(50*(a->s_nz*2 - a->m) - a->m); 394 PetscFunctionReturn(0); 395 } 396 397 398 #undef __FUNC__ 399 #define __FUNC__ "MatMult_SeqSBAIJ_6" 400 int MatMult_SeqSBAIJ_6(Mat A,Vec xx,Vec zz) 401 { 402 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 403 Scalar *x,*z,*xb,x1,x2,x3,x4,x5,x6,zero=0.0; 404 MatScalar *v; 405 int mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,ierr,*ib,cval,j; 406 407 PetscFunctionBegin; 408 ierr = VecSet(&zero,zz);CHKERRQ(ierr); 409 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 410 ierr = VecGetArray(zz,&z);CHKERRQ(ierr); 411 412 v = a->a; 413 xb = x; 414 415 for (i=0; i<mbs; i++) { 416 n = ai[1] - ai[0]; /* length of i_th block row of A */ 417 x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5=xb[4]; x6=xb[5]; 418 ib = aj + *ai; 419 /* (diag of A)*x */ 420 z[6*i] += v[0]*x1 + v[6]*x2 + v[12]*x3 + v[18]*x4+ v[24]*x5 + v[30]*x6; 421 z[6*i+1] += v[6]*x1 + v[7]*x2 + v[13]*x3 + v[19]*x4+ v[25]*x5 + v[31]*x6; 422 z[6*i+2] += v[12]*x1 +v[13]*x2 + v[14]*x3 + v[20]*x4+ v[26]*x5 + v[32]*x6; 423 z[6*i+3] += v[18]*x1 +v[19]*x2 + v[20]*x3 + v[21]*x4+ v[27]*x5 + v[33]*x6; 424 z[6*i+4] += v[24]*x1 +v[25]*x2 + v[26]*x3 + v[27]*x4+ v[28]*x5 + v[34]*x6; 425 z[6*i+5] += v[30]*x1 +v[31]*x2 + v[32]*x3 + v[33]*x4+ v[34]*x5 + v[35]*x6; 426 v += 36; 427 for (j=1; j<n; j++) { 428 /* (strict lower triangular part of A)*x */ 429 cval = ib[j]*6; 430 z[cval] += v[0]*x1 + v[1]*x2 + v[2]*x3 + v[3]*x4+ v[4]*x5 + v[5]*x6; 431 z[cval+1] += v[6]*x1 + v[7]*x2 + v[8]*x3 + v[9]*x4+ v[10]*x5 + v[11]*x6; 432 z[cval+2] += v[12]*x1 + v[13]*x2 + v[14]*x3 + v[15]*x4+ v[16]*x5 + v[17]*x6; 433 z[cval+3] += v[18]*x1 + v[19]*x2 + v[20]*x3 + v[21]*x4+ v[22]*x5 + v[23]*x6; 434 z[cval+4] += v[24]*x1 + v[25]*x2 + v[26]*x3 + v[27]*x4+ v[28]*x5 + v[29]*x6; 435 z[cval+5] += v[30]*x1 + v[31]*x2 + v[32]*x3 + v[33]*x4+ v[34]*x5 + v[35]*x6; 436 /* (strict upper triangular part of A)*x */ 437 z[6*i] +=v[0]*x[cval]+v[6]*x[cval+1]+v[12]*x[cval+2]+v[18]*x[cval+3]+v[24]*x[cval+4]+v[30]*x[cval+5]; 438 z[6*i+1] +=v[1]*x[cval]+v[7]*x[cval+1]+v[13]*x[cval+2]+v[19]*x[cval+3]+v[25]*x[cval+4]+v[31]*x[cval+5]; 439 z[6*i+2] +=v[2]*x[cval]+v[8]*x[cval+1]+v[14]*x[cval+2]+v[20]*x[cval+3]+v[26]*x[cval+4]+v[32]*x[cval+5]; 440 z[6*i+3] +=v[3]*x[cval]+v[9]*x[cval+1]+v[15]*x[cval+2]+v[21]*x[cval+3]+v[27]*x[cval+4]+v[33]*x[cval+5]; 441 z[6*i+4] +=v[4]*x[cval]+v[10]*x[cval+1]+v[16]*x[cval+2]+v[22]*x[cval+3]+v[28]*x[cval+4]+v[34]*x[cval+5]; 442 z[6*i+5] +=v[5]*x[cval]+v[11]*x[cval+1]+v[17]*x[cval+2]+v[23]*x[cval+3]+v[29]*x[cval+4]+v[35]*x[cval+5]; 443 v += 36; 444 } 445 xb +=6; ai++; 446 } 447 448 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 449 ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr); 450 PLogFlops(72*(a->s_nz*2 - a->m) - a->m); 451 PetscFunctionReturn(0); 452 } 453 #undef __FUNC__ 454 #define __FUNC__ "MatMult_SeqSBAIJ_7" 455 int MatMult_SeqSBAIJ_7(Mat A,Vec xx,Vec zz) 456 { 457 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 458 Scalar *x,*z,*xb,x1,x2,x3,x4,x5,x6,x7,zero=0.0; 459 MatScalar *v; 460 int mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,ierr,*ib,cval,j; 461 462 PetscFunctionBegin; 463 ierr = VecSet(&zero,zz);CHKERRQ(ierr); 464 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 465 ierr = VecGetArray(zz,&z);CHKERRQ(ierr); 466 467 v = a->a; 468 xb = x; 469 470 for (i=0; i<mbs; i++) { 471 n = ai[1] - ai[0]; /* length of i_th block row of A */ 472 x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5=xb[4]; x6=xb[5]; x7=xb[6]; 473 ib = aj + *ai; 474 /* (diag of A)*x */ 475 z[7*i] += v[0]*x1 + v[7]*x2 + v[14]*x3 + v[21]*x4+ v[28]*x5 + v[35]*x6+ v[42]*x7; 476 z[7*i+1] += v[7]*x1 + v[8]*x2 + v[15]*x3 + v[22]*x4+ v[29]*x5 + v[36]*x6+ v[43]*x7; 477 z[7*i+2] += v[14]*x1+ v[15]*x2 +v[16]*x3 + v[23]*x4+ v[30]*x5 + v[37]*x6+ v[44]*x7; 478 z[7*i+3] += v[21]*x1+ v[22]*x2 +v[23]*x3 + v[24]*x4+ v[31]*x5 + v[38]*x6+ v[45]*x7; 479 z[7*i+4] += v[28]*x1+ v[29]*x2 +v[30]*x3 + v[31]*x4+ v[32]*x5 + v[39]*x6+ v[46]*x7; 480 z[7*i+5] += v[35]*x1+ v[36]*x2 +v[37]*x3 + v[38]*x4+ v[39]*x5 + v[40]*x6+ v[47]*x7; 481 z[7*i+6] += v[42]*x1+ v[43]*x2 +v[44]*x3 + v[45]*x4+ v[46]*x5 + v[47]*x6+ v[48]*x7; 482 v += 49; 483 for (j=1; j<n; j++) { 484 /* (strict lower triangular part of A)*x */ 485 cval = ib[j]*7; 486 z[cval] += v[0]*x1 + v[1]*x2 + v[2]*x3 + v[3]*x4+ v[4]*x5 + v[5]*x6+ v[6]*x7; 487 z[cval+1] += v[7]*x1 + v[8]*x2 + v[9]*x3 + v[10]*x4+ v[11]*x5 + v[12]*x6+ v[13]*x7; 488 z[cval+2] += v[14]*x1 + v[15]*x2 + v[16]*x3 + v[17]*x4+ v[18]*x5 + v[19]*x6+ v[20]*x7; 489 z[cval+3] += v[21]*x1 + v[22]*x2 + v[23]*x3 + v[24]*x4+ v[25]*x5 + v[26]*x6+ v[27]*x7; 490 z[cval+4] += v[28]*x1 + v[29]*x2 + v[30]*x3 + v[31]*x4+ v[32]*x5 + v[33]*x6+ v[34]*x7; 491 z[cval+5] += v[35]*x1 + v[36]*x2 + v[37]*x3 + v[38]*x4+ v[39]*x5 + v[40]*x6+ v[41]*x7; 492 z[cval+6] += v[42]*x1 + v[43]*x2 + v[44]*x3 + v[45]*x4+ v[46]*x5 + v[47]*x6+ v[48]*x7; 493 /* (strict upper triangular part of A)*x */ 494 z[7*i] +=v[0]*x[cval]+v[7]*x[cval+1]+v[14]*x[cval+2]+v[21]*x[cval+3]+v[28]*x[cval+4]+v[35]*x[cval+5]+v[42]*x[cval+6]; 495 z[7*i+1]+=v[1]*x[cval]+v[8]*x[cval+1]+v[15]*x[cval+2]+v[22]*x[cval+3]+v[29]*x[cval+4]+v[36]*x[cval+5]+v[43]*x[cval+6]; 496 z[7*i+2]+=v[2]*x[cval]+v[9]*x[cval+1]+v[16]*x[cval+2]+v[23]*x[cval+3]+v[30]*x[cval+4]+v[37]*x[cval+5]+v[44]*x[cval+6]; 497 z[7*i+3]+=v[3]*x[cval]+v[10]*x[cval+1]+v[17]*x[cval+2]+v[24]*x[cval+3]+v[31]*x[cval+4]+v[38]*x[cval+5]+v[45]*x[cval+6]; 498 z[7*i+4]+=v[4]*x[cval]+v[11]*x[cval+1]+v[18]*x[cval+2]+v[25]*x[cval+3]+v[32]*x[cval+4]+v[39]*x[cval+5]+v[46]*x[cval+6]; 499 z[7*i+5]+=v[5]*x[cval]+v[12]*x[cval+1]+v[19]*x[cval+2]+v[26]*x[cval+3]+v[33]*x[cval+4]+v[40]*x[cval+5]+v[47]*x[cval+6]; 500 z[7*i+6]+=v[6]*x[cval]+v[13]*x[cval+1]+v[20]*x[cval+2]+v[27]*x[cval+3]+v[34]*x[cval+4]+v[41]*x[cval+5]+v[48]*x[cval+6]; 501 v += 49; 502 } 503 xb +=7; ai++; 504 } 505 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 506 ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr); 507 PLogFlops(98*(a->s_nz*2 - a->m) - a->m); 508 PetscFunctionReturn(0); 509 } 510 511 /* 512 This will not work with MatScalar == float because it calls the BLAS 513 */ 514 #undef __FUNC__ 515 #define __FUNC__ "MatMult_SeqSBAIJ_N" 516 int MatMult_SeqSBAIJ_N(Mat A,Vec xx,Vec zz) 517 { 518 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 519 Scalar *x,*x_ptr,*z,*z_ptr,*xb,*zb,*work,*workt,zero=0.0; 520 MatScalar *v; 521 int ierr,mbs=a->mbs,i,*idx,*aj,*ii,bs=a->bs,j,n,bs2=a->bs2; 522 int ncols,k; 523 524 PetscFunctionBegin; 525 ierr = VecSet(&zero,zz);CHKERRQ(ierr); 526 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); x_ptr=x; 527 ierr = VecGetArray(zz,&z);CHKERRQ(ierr); z_ptr=z; 528 529 aj = a->j; 530 v = a->a; 531 ii = a->i; 532 533 if (!a->mult_work) { 534 k = a->m; 535 a->mult_work = (Scalar*)PetscMalloc((k+1)*sizeof(Scalar));CHKPTRQ(a->mult_work); 536 } 537 work = a->mult_work; 538 539 for (i=0; i<mbs; i++) { 540 n = ii[1] - ii[0]; ncols = n*bs; 541 workt = work; idx=aj+ii[0]; 542 543 /* upper triangular part */ 544 for (j=0; j<n; j++) { 545 /* col = bs*(*idx); */ 546 xb = x_ptr + bs*(*idx++); 547 for (k=0; k<bs; k++) workt[k] = xb[k]; 548 workt += bs; 549 } 550 /* z(i*bs:(i+1)*bs-1) += A(i,:)*x */ 551 Kernel_w_gets_w_plus_Ar_times_v(bs,ncols,work,v,z); 552 553 /* strict lower triangular part */ 554 ncols -= bs; 555 if (ncols > 0){ 556 workt = work; 557 ierr = PetscMemzero(workt,ncols*sizeof(Scalar));CHKERRQ(ierr); 558 Kernel_w_gets_w_plus_trans_Ar_times_v(bs,ncols,x,v+bs2,workt); 559 560 idx=aj+ii[0]+1; 561 for (j=1; j<n; j++) { 562 zb = z_ptr + bs*(*idx); 563 idx++; 564 for (k=0; k<bs; k++) zb[k] += workt[k] ; 565 workt += bs; 566 } 567 } 568 569 x += bs; v += n*bs2; z += bs; ii++; 570 } 571 572 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 573 ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr); 574 PLogFlops(2*(a->s_nz*2 - a->m)*bs2 - a->m); 575 PetscFunctionReturn(0); 576 } 577 578 #undef __FUNC__ 579 #define __FUNC__ "MatMultAdd_SeqSBAIJ_1" 580 int MatMultAdd_SeqSBAIJ_1(Mat A,Vec xx,Vec yy,Vec zz) 581 { 582 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 583 Scalar *x,*y,*z,*xb,x1; 584 MatScalar *v; 585 int mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,ierr,*ib,cval,j; 586 587 PetscFunctionBegin; 588 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 589 if (yy != xx) { 590 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 591 } else { 592 y = x; 593 } 594 if (zz != yy) { 595 ierr = VecCopy(yy,zz);CHKERRQ(ierr); 596 ierr = VecGetArray(zz,&z);CHKERRQ(ierr); 597 } else { 598 z = y; 599 } 600 601 v = a->a; 602 xb = x; 603 604 for (i=0; i<mbs; i++) { 605 n = ai[1] - ai[0]; /* length of i_th row of A */ 606 x1 = xb[0]; 607 ib = aj + *ai; 608 z[i] += *v++ * x[*ib++]; /* (diag of A)*x */ 609 for (j=1; j<n; j++) { 610 cval = *ib; 611 z[cval] += *v * x1; /* (strict lower triangular part of A)*x */ 612 z[i] += *v++ * x[*ib++]; /* (strict upper triangular part of A)*x */ 613 } 614 xb++; ai++; 615 } 616 617 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 618 if (yy != xx) ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 619 if (zz != yy) ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr); 620 621 PLogFlops(2*(a->s_nz*2 - a->m)); 622 PetscFunctionReturn(0); 623 } 624 625 #undef __FUNC__ 626 #define __FUNC__ "MatMultAdd_SeqSBAIJ_2" 627 int MatMultAdd_SeqSBAIJ_2(Mat A,Vec xx,Vec yy,Vec zz) 628 { 629 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 630 Scalar *x,*y,*z,*xb,x1,x2; 631 MatScalar *v; 632 int mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,ierr,*ib,cval,j; 633 634 PetscFunctionBegin; 635 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 636 if (yy != xx) { 637 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 638 } else { 639 y = x; 640 } 641 if (zz != yy) { 642 ierr = VecCopy(yy,zz);CHKERRQ(ierr); 643 ierr = VecGetArray(zz,&z);CHKERRQ(ierr); 644 } else { 645 z = y; 646 } 647 648 v = a->a; 649 xb = x; 650 651 for (i=0; i<mbs; i++) { 652 n = ai[1] - ai[0]; /* length of i_th block row of A */ 653 x1 = xb[0]; x2 = xb[1]; 654 ib = aj + *ai; 655 /* (diag of A)*x */ 656 z[2*i] += v[0]*x1 + v[2]*x2; 657 z[2*i+1] += v[2]*x1 + v[3]*x2; 658 v += 4; 659 for (j=1; j<n; j++) { 660 /* (strict lower triangular part of A)*x */ 661 cval = ib[j]*2; 662 z[cval] += v[0]*x1 + v[1]*x2; 663 z[cval+1] += v[2]*x1 + v[3]*x2; 664 /* (strict upper triangular part of A)*x */ 665 z[2*i] += v[0]*x[cval] + v[2]*x[cval+1]; 666 z[2*i+1] += v[1]*x[cval] + v[3]*x[cval+1]; 667 v += 4; 668 } 669 xb +=2; ai++; 670 } 671 672 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 673 if (yy != xx) ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 674 if (zz != yy) ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr); 675 676 PLogFlops(4*(a->s_nz*2 - a->m)); 677 PetscFunctionReturn(0); 678 } 679 680 #undef __FUNC__ 681 #define __FUNC__ "MatMultAdd_SeqSBAIJ_3" 682 int MatMultAdd_SeqSBAIJ_3(Mat A,Vec xx,Vec yy,Vec zz) 683 { 684 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 685 Scalar *x,*y,*z,*xb,x1,x2,x3; 686 MatScalar *v; 687 int mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,ierr,*ib,cval,j; 688 689 PetscFunctionBegin; 690 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 691 if (yy != xx) { 692 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 693 } else { 694 y = x; 695 } 696 if (zz != yy) { 697 ierr = VecCopy(yy,zz);CHKERRQ(ierr); 698 ierr = VecGetArray(zz,&z);CHKERRQ(ierr); 699 } else { 700 z = y; 701 } 702 703 v = a->a; 704 xb = x; 705 706 for (i=0; i<mbs; i++) { 707 n = ai[1] - ai[0]; /* length of i_th block row of A */ 708 x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; 709 ib = aj + *ai; 710 /* (diag of A)*x */ 711 z[3*i] += v[0]*x1 + v[3]*x2 + v[6]*x3; 712 z[3*i+1] += v[3]*x1 + v[4]*x2 + v[7]*x3; 713 z[3*i+2] += v[6]*x1 + v[7]*x2 + v[8]*x3; 714 v += 9; 715 for (j=1; j<n; j++) { 716 /* (strict lower triangular part of A)*x */ 717 cval = ib[j]*3; 718 z[cval] += v[0]*x1 + v[1]*x2 + v[2]*x3; 719 z[cval+1] += v[3]*x1 + v[4]*x2 + v[5]*x3; 720 z[cval+2] += v[6]*x1 + v[7]*x2 + v[8]*x3; 721 /* (strict upper triangular part of A)*x */ 722 z[3*i] += v[0]*x[cval] + v[3]*x[cval+1]+ v[6]*x[cval+2]; 723 z[3*i+1] += v[1]*x[cval] + v[4]*x[cval+1]+ v[7]*x[cval+2]; 724 z[3*i+2] += v[2]*x[cval] + v[5]*x[cval+1]+ v[8]*x[cval+2]; 725 v += 9; 726 } 727 xb +=3; ai++; 728 } 729 730 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 731 if (yy != xx) ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 732 if (zz != yy) ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr); 733 734 PLogFlops(18*(a->s_nz*2 - a->m)); 735 PetscFunctionReturn(0); 736 } 737 738 #undef __FUNC__ 739 #define __FUNC__ "MatMultAdd_SeqSBAIJ_4" 740 int MatMultAdd_SeqSBAIJ_4(Mat A,Vec xx,Vec yy,Vec zz) 741 { 742 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 743 Scalar *x,*y,*z,*xb,x1,x2,x3,x4; 744 MatScalar *v; 745 int mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,ierr,*ib,cval,j; 746 747 PetscFunctionBegin; 748 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 749 if (yy != xx) { 750 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 751 } else { 752 y = x; 753 } 754 if (zz != yy) { 755 ierr = VecCopy(yy,zz);CHKERRQ(ierr); 756 ierr = VecGetArray(zz,&z);CHKERRQ(ierr); 757 } else { 758 z = y; 759 } 760 761 v = a->a; 762 xb = x; 763 764 for (i=0; i<mbs; i++) { 765 n = ai[1] - ai[0]; /* length of i_th block row of A */ 766 x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; 767 ib = aj + *ai; 768 /* (diag of A)*x */ 769 z[4*i] += v[0]*x1 + v[4]*x2 + v[8]*x3 + v[12]*x4; 770 z[4*i+1] += v[4]*x1 + v[5]*x2 + v[9]*x3 + v[13]*x4; 771 z[4*i+2] += v[8]*x1 + v[9]*x2 + v[10]*x3 + v[14]*x4; 772 z[4*i+3] += v[12]*x1+ v[13]*x2+ v[14]*x3 + v[15]*x4; 773 v += 16; 774 for (j=1; j<n; j++) { 775 /* (strict lower triangular part of A)*x */ 776 cval = ib[j]*4; 777 z[cval] += v[0]*x1 + v[1]*x2 + v[2]*x3 + v[3]*x4; 778 z[cval+1] += v[4]*x1 + v[5]*x2 + v[6]*x3 + v[7]*x4; 779 z[cval+2] += v[8]*x1 + v[9]*x2 + v[10]*x3 + v[11]*x4; 780 z[cval+3] += v[12]*x1 + v[13]*x2 + v[14]*x3 + v[15]*x4; 781 /* (strict upper triangular part of A)*x */ 782 z[4*i] += v[0]*x[cval] + v[4]*x[cval+1]+ v[8]*x[cval+2] + v[12]*x[cval+3]; 783 z[4*i+1] += v[1]*x[cval] + v[5]*x[cval+1]+ v[9]*x[cval+2] + v[13]*x[cval+3]; 784 z[4*i+2] += v[2]*x[cval] + v[6]*x[cval+1]+ v[10]*x[cval+2]+ v[14]*x[cval+3]; 785 z[4*i+3] += v[3]*x[cval] + v[7]*x[cval+1]+ v[11]*x[cval+2]+ v[15]*x[cval+3]; 786 v += 16; 787 } 788 xb +=4; ai++; 789 } 790 791 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 792 if (yy != xx) ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 793 if (zz != yy) ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr); 794 795 PLogFlops(32*(a->s_nz*2 - a->m)); 796 PetscFunctionReturn(0); 797 } 798 799 #undef __FUNC__ 800 #define __FUNC__ "MatMultAdd_SeqSBAIJ_5" 801 int MatMultAdd_SeqSBAIJ_5(Mat A,Vec xx,Vec yy,Vec zz) 802 { 803 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 804 Scalar *x,*y,*z,*xb,x1,x2,x3,x4,x5; 805 MatScalar *v; 806 int mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,ierr,*ib,cval,j; 807 808 PetscFunctionBegin; 809 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 810 if (yy != xx) { 811 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 812 } else { 813 y = x; 814 } 815 if (zz != yy) { 816 ierr = VecCopy(yy,zz);CHKERRQ(ierr); 817 ierr = VecGetArray(zz,&z);CHKERRQ(ierr); 818 } else { 819 z = y; 820 } 821 822 v = a->a; 823 xb = x; 824 825 for (i=0; i<mbs; i++) { 826 n = ai[1] - ai[0]; /* length of i_th block row of A */ 827 x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5=xb[4]; 828 ib = aj + *ai; 829 /* (diag of A)*x */ 830 z[5*i] += v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4+ v[20]*x5; 831 z[5*i+1] += v[5]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4+ v[21]*x5; 832 z[5*i+2] += v[10]*x1 +v[11]*x2 + v[12]*x3 + v[17]*x4+ v[22]*x5; 833 z[5*i+3] += v[15]*x1 +v[16]*x2 + v[17]*x3 + v[18]*x4+ v[23]*x5; 834 z[5*i+4] += v[20]*x1 +v[21]*x2 + v[22]*x3 + v[23]*x4+ v[24]*x5; 835 v += 25; 836 for (j=1; j<n; j++) { 837 /* (strict lower triangular part of A)*x */ 838 cval = ib[j]*5; 839 z[cval] += v[0]*x1 + v[1]*x2 + v[2]*x3 + v[3]*x4 + v[4]*x5; 840 z[cval+1] += v[5]*x1 + v[6]*x2 + v[7]*x3 + v[8]*x4 + v[9]*x5; 841 z[cval+2] += v[10]*x1 + v[11]*x2 + v[12]*x3 + v[13]*x4+ v[14]*x5; 842 z[cval+3] += v[15]*x1 + v[16]*x2 + v[17]*x3 + v[18]*x4+ v[19]*x5; 843 z[cval+4] += v[20]*x1 + v[21]*x2 + v[22]*x3 + v[23]*x4+ v[24]*x5; 844 /* (strict upper triangular part of A)*x */ 845 z[5*i] +=v[0]*x[cval]+v[5]*x[cval+1]+v[10]*x[cval+2]+v[15]*x[cval+3]+v[20]*x[cval+4]; 846 z[5*i+1] +=v[1]*x[cval]+v[6]*x[cval+1]+v[11]*x[cval+2]+v[16]*x[cval+3]+v[21]*x[cval+4]; 847 z[5*i+2] +=v[2]*x[cval]+v[7]*x[cval+1]+v[12]*x[cval+2]+v[17]*x[cval+3]+v[22]*x[cval+4]; 848 z[5*i+3] +=v[3]*x[cval]+v[8]*x[cval+1]+v[13]*x[cval+2]+v[18]*x[cval+3]+v[23]*x[cval+4]; 849 z[5*i+4] +=v[4]*x[cval]+v[9]*x[cval+1]+v[14]*x[cval+2]+v[19]*x[cval+3]+v[24]*x[cval+4]; 850 v += 25; 851 } 852 xb +=5; ai++; 853 } 854 855 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 856 if (yy != xx) ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 857 if (zz != yy) ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr); 858 859 PLogFlops(50*(a->s_nz*2 - a->m)); 860 PetscFunctionReturn(0); 861 } 862 #undef __FUNC__ 863 #define __FUNC__ "MatMultAdd_SeqSBAIJ_6" 864 int MatMultAdd_SeqSBAIJ_6(Mat A,Vec xx,Vec yy,Vec zz) 865 { 866 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 867 Scalar *x,*y,*z,*xb,x1,x2,x3,x4,x5,x6; 868 MatScalar *v; 869 int mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,ierr,*ib,cval,j; 870 871 PetscFunctionBegin; 872 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 873 if (yy != xx) { 874 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 875 } else { 876 y = x; 877 } 878 if (zz != yy) { 879 ierr = VecCopy(yy,zz);CHKERRQ(ierr); 880 ierr = VecGetArray(zz,&z);CHKERRQ(ierr); 881 } else { 882 z = y; 883 } 884 885 v = a->a; 886 xb = x; 887 888 for (i=0; i<mbs; i++) { 889 n = ai[1] - ai[0]; /* length of i_th block row of A */ 890 x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5=xb[4]; x6=xb[5]; 891 ib = aj + *ai; 892 /* (diag of A)*x */ 893 z[6*i] += v[0]*x1 + v[6]*x2 + v[12]*x3 + v[18]*x4+ v[24]*x5 + v[30]*x6; 894 z[6*i+1] += v[6]*x1 + v[7]*x2 + v[13]*x3 + v[19]*x4+ v[25]*x5 + v[31]*x6; 895 z[6*i+2] += v[12]*x1 +v[13]*x2 + v[14]*x3 + v[20]*x4+ v[26]*x5 + v[32]*x6; 896 z[6*i+3] += v[18]*x1 +v[19]*x2 + v[20]*x3 + v[21]*x4+ v[27]*x5 + v[33]*x6; 897 z[6*i+4] += v[24]*x1 +v[25]*x2 + v[26]*x3 + v[27]*x4+ v[28]*x5 + v[34]*x6; 898 z[6*i+5] += v[30]*x1 +v[31]*x2 + v[32]*x3 + v[33]*x4+ v[34]*x5 + v[35]*x6; 899 v += 36; 900 for (j=1; j<n; j++) { 901 /* (strict lower triangular part of A)*x */ 902 cval = ib[j]*6; 903 z[cval] += v[0]*x1 + v[1]*x2 + v[2]*x3 + v[3]*x4+ v[4]*x5 + v[5]*x6; 904 z[cval+1] += v[6]*x1 + v[7]*x2 + v[8]*x3 + v[9]*x4+ v[10]*x5 + v[11]*x6; 905 z[cval+2] += v[12]*x1 + v[13]*x2 + v[14]*x3 + v[15]*x4+ v[16]*x5 + v[17]*x6; 906 z[cval+3] += v[18]*x1 + v[19]*x2 + v[20]*x3 + v[21]*x4+ v[22]*x5 + v[23]*x6; 907 z[cval+4] += v[24]*x1 + v[25]*x2 + v[26]*x3 + v[27]*x4+ v[28]*x5 + v[29]*x6; 908 z[cval+5] += v[30]*x1 + v[31]*x2 + v[32]*x3 + v[33]*x4+ v[34]*x5 + v[35]*x6; 909 /* (strict upper triangular part of A)*x */ 910 z[6*i] +=v[0]*x[cval]+v[6]*x[cval+1]+v[12]*x[cval+2]+v[18]*x[cval+3]+v[24]*x[cval+4]+v[30]*x[cval+5]; 911 z[6*i+1] +=v[1]*x[cval]+v[7]*x[cval+1]+v[13]*x[cval+2]+v[19]*x[cval+3]+v[25]*x[cval+4]+v[31]*x[cval+5]; 912 z[6*i+2] +=v[2]*x[cval]+v[8]*x[cval+1]+v[14]*x[cval+2]+v[20]*x[cval+3]+v[26]*x[cval+4]+v[32]*x[cval+5]; 913 z[6*i+3] +=v[3]*x[cval]+v[9]*x[cval+1]+v[15]*x[cval+2]+v[21]*x[cval+3]+v[27]*x[cval+4]+v[33]*x[cval+5]; 914 z[6*i+4] +=v[4]*x[cval]+v[10]*x[cval+1]+v[16]*x[cval+2]+v[22]*x[cval+3]+v[28]*x[cval+4]+v[34]*x[cval+5]; 915 z[6*i+5] +=v[5]*x[cval]+v[11]*x[cval+1]+v[17]*x[cval+2]+v[23]*x[cval+3]+v[29]*x[cval+4]+v[35]*x[cval+5]; 916 v += 36; 917 } 918 xb +=6; ai++; 919 } 920 921 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 922 if (yy != xx) ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 923 if (zz != yy) ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr); 924 925 PLogFlops(72*(a->s_nz*2 - a->m)); 926 PetscFunctionReturn(0); 927 } 928 929 #undef __FUNC__ 930 #define __FUNC__ "MatMultAdd_SeqSBAIJ_7" 931 int MatMultAdd_SeqSBAIJ_7(Mat A,Vec xx,Vec yy,Vec zz) 932 { 933 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 934 Scalar *x,*y,*z,*xb,x1,x2,x3,x4,x5,x6,x7; 935 MatScalar *v; 936 int mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,ierr,*ib,cval,j; 937 938 PetscFunctionBegin; 939 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 940 if (yy != xx) { 941 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 942 } else { 943 y = x; 944 } 945 if (zz != yy) { 946 ierr = VecCopy(yy,zz);CHKERRQ(ierr); 947 ierr = VecGetArray(zz,&z);CHKERRQ(ierr); 948 } else { 949 z = y; 950 } 951 952 v = a->a; 953 xb = x; 954 955 for (i=0; i<mbs; i++) { 956 n = ai[1] - ai[0]; /* length of i_th block row of A */ 957 x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5=xb[4]; x6=xb[5]; x7=xb[6]; 958 ib = aj + *ai; 959 /* (diag of A)*x */ 960 z[7*i] += v[0]*x1 + v[7]*x2 + v[14]*x3 + v[21]*x4+ v[28]*x5 + v[35]*x6+ v[42]*x7; 961 z[7*i+1] += v[7]*x1 + v[8]*x2 + v[15]*x3 + v[22]*x4+ v[29]*x5 + v[36]*x6+ v[43]*x7; 962 z[7*i+2] += v[14]*x1+ v[15]*x2 +v[16]*x3 + v[23]*x4+ v[30]*x5 + v[37]*x6+ v[44]*x7; 963 z[7*i+3] += v[21]*x1+ v[22]*x2 +v[23]*x3 + v[24]*x4+ v[31]*x5 + v[38]*x6+ v[45]*x7; 964 z[7*i+4] += v[28]*x1+ v[29]*x2 +v[30]*x3 + v[31]*x4+ v[32]*x5 + v[39]*x6+ v[46]*x7; 965 z[7*i+5] += v[35]*x1+ v[36]*x2 +v[37]*x3 + v[38]*x4+ v[39]*x5 + v[40]*x6+ v[47]*x7; 966 z[7*i+6] += v[42]*x1+ v[43]*x2 +v[44]*x3 + v[45]*x4+ v[46]*x5 + v[47]*x6+ v[48]*x7; 967 v += 49; 968 for (j=1; j<n; j++) { 969 /* (strict lower triangular part of A)*x */ 970 cval = ib[j]*7; 971 z[cval] += v[0]*x1 + v[1]*x2 + v[2]*x3 + v[3]*x4+ v[4]*x5 + v[5]*x6+ v[6]*x7; 972 z[cval+1] += v[7]*x1 + v[8]*x2 + v[9]*x3 + v[10]*x4+ v[11]*x5 + v[12]*x6+ v[13]*x7; 973 z[cval+2] += v[14]*x1 + v[15]*x2 + v[16]*x3 + v[17]*x4+ v[18]*x5 + v[19]*x6+ v[20]*x7; 974 z[cval+3] += v[21]*x1 + v[22]*x2 + v[23]*x3 + v[24]*x4+ v[25]*x5 + v[26]*x6+ v[27]*x7; 975 z[cval+4] += v[28]*x1 + v[29]*x2 + v[30]*x3 + v[31]*x4+ v[32]*x5 + v[33]*x6+ v[34]*x7; 976 z[cval+5] += v[35]*x1 + v[36]*x2 + v[37]*x3 + v[38]*x4+ v[39]*x5 + v[40]*x6+ v[41]*x7; 977 z[cval+6] += v[42]*x1 + v[43]*x2 + v[44]*x3 + v[45]*x4+ v[46]*x5 + v[47]*x6+ v[48]*x7; 978 /* (strict upper triangular part of A)*x */ 979 z[7*i] +=v[0]*x[cval]+v[7]*x[cval+1]+v[14]*x[cval+2]+v[21]*x[cval+3]+v[28]*x[cval+4]+v[35]*x[cval+5]+v[42]*x[cval+6]; 980 z[7*i+1]+=v[1]*x[cval]+v[8]*x[cval+1]+v[15]*x[cval+2]+v[22]*x[cval+3]+v[29]*x[cval+4]+v[36]*x[cval+5]+v[43]*x[cval+6]; 981 z[7*i+2]+=v[2]*x[cval]+v[9]*x[cval+1]+v[16]*x[cval+2]+v[23]*x[cval+3]+v[30]*x[cval+4]+v[37]*x[cval+5]+v[44]*x[cval+6]; 982 z[7*i+3]+=v[3]*x[cval]+v[10]*x[cval+1]+v[17]*x[cval+2]+v[24]*x[cval+3]+v[31]*x[cval+4]+v[38]*x[cval+5]+v[45]*x[cval+6]; 983 z[7*i+4]+=v[4]*x[cval]+v[11]*x[cval+1]+v[18]*x[cval+2]+v[25]*x[cval+3]+v[32]*x[cval+4]+v[39]*x[cval+5]+v[46]*x[cval+6]; 984 z[7*i+5]+=v[5]*x[cval]+v[12]*x[cval+1]+v[19]*x[cval+2]+v[26]*x[cval+3]+v[33]*x[cval+4]+v[40]*x[cval+5]+v[47]*x[cval+6]; 985 z[7*i+6]+=v[6]*x[cval]+v[13]*x[cval+1]+v[20]*x[cval+2]+v[27]*x[cval+3]+v[34]*x[cval+4]+v[41]*x[cval+5]+v[48]*x[cval+6]; 986 v += 49; 987 } 988 xb +=7; ai++; 989 } 990 991 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 992 if (yy != xx) ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 993 if (zz != yy) ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr); 994 995 PLogFlops(98*(a->s_nz*2 - a->m)); 996 PetscFunctionReturn(0); 997 } 998 999 #undef __FUNC__ 1000 #define __FUNC__ "MatMultAdd_SeqSBAIJ_N" 1001 int MatMultAdd_SeqSBAIJ_N(Mat A,Vec xx,Vec yy,Vec zz) 1002 { 1003 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 1004 Scalar *x,*x_ptr,*y,*z,*z_ptr=0,*xb,*zb,*work,*workt; 1005 MatScalar *v; 1006 int ierr,mbs=a->mbs,i,*idx,*aj,*ii,bs=a->bs,j,n,bs2=a->bs2; 1007 int ncols,k; 1008 1009 PetscFunctionBegin; 1010 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); x_ptr=x; 1011 if (yy != xx) { 1012 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 1013 } else { 1014 y = x; 1015 } 1016 if (zz != yy) { 1017 ierr = VecCopy(yy,zz);CHKERRQ(ierr); 1018 ierr = VecGetArray(zz,&z);CHKERRQ(ierr); z_ptr=z; 1019 } else { 1020 z = y; 1021 } 1022 1023 aj = a->j; 1024 v = a->a; 1025 ii = a->i; 1026 1027 if (!a->mult_work) { 1028 k = a->m; 1029 a->mult_work = (Scalar*)PetscMalloc((k+1)*sizeof(Scalar));CHKPTRQ(a->mult_work); 1030 } 1031 work = a->mult_work; 1032 1033 1034 for (i=0; i<mbs; i++) { 1035 n = ii[1] - ii[0]; ncols = n*bs; 1036 workt = work; idx=aj+ii[0]; 1037 1038 /* upper triangular part */ 1039 for (j=0; j<n; j++) { 1040 /* col = bs*(*idx); */ 1041 xb = x_ptr + bs*(*idx++); 1042 for (k=0; k<bs; k++) workt[k] = xb[k]; 1043 workt += bs; 1044 } 1045 /* z(i*bs:(i+1)*bs-1) += A(i,:)*x */ 1046 Kernel_w_gets_w_plus_Ar_times_v(bs,ncols,work,v,z); 1047 1048 /* strict lower triangular part */ 1049 ncols -= bs; 1050 if (ncols > 0){ 1051 workt = work; 1052 ierr = PetscMemzero(workt,ncols*sizeof(Scalar));CHKERRQ(ierr); 1053 Kernel_w_gets_w_plus_trans_Ar_times_v(bs,ncols,x,v+bs2,workt); 1054 1055 idx=aj+ii[0]+1; 1056 for (j=1; j<n; j++) { 1057 zb = z_ptr + bs*(*idx); 1058 idx++; 1059 for (k=0; k<bs; k++) zb[k] += workt[k] ; 1060 workt += bs; 1061 } 1062 } 1063 1064 x += bs; v += n*bs2; z += bs; ii++; 1065 } 1066 1067 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1068 if (yy != xx) ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 1069 if (zz != yy) ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr); 1070 1071 PLogFlops(2*(a->s_nz*2 - a->m)); 1072 PetscFunctionReturn(0); 1073 } 1074 1075 #undef __FUNC__ 1076 #define __FUNC__ "MatMultTranspose_SeqSBAIJ" 1077 int MatMultTranspose_SeqSBAIJ(Mat A,Vec xx,Vec zz) 1078 { 1079 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 1080 Scalar *xg,*zg,*zb,zero = 0.0; 1081 Scalar *x,*z,*xb,x1,x2,x3,x4,x5,x6,x7; 1082 MatScalar *v; 1083 int mbs=a->mbs,i,*idx,*ii,*ai=a->i,rval; 1084 int bs=a->bs,j,n,bs2=a->bs2,*ib,ierr; 1085 1086 PetscFunctionBegin; 1087 SETERRQ(1,1,"Matrix is symmetric. Call MatMult()."); 1088 PetscFunctionReturn(0); 1089 } 1090 1091 #undef __FUNC__ 1092 #define __FUNC__ "MatMultTransposeAdd_SeqSBAIJ" 1093 int MatMultTransposeAdd_SeqSBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1094 1095 { 1096 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1097 Scalar *xg,*zg,*zb,*x,*z,*xb,x1,x2,x3,x4,x5; 1098 MatScalar *v; 1099 int mbs=a->mbs,i,*idx,*ii,*ai=a->i,rval,bs=a->bs,j,n,bs2=a->bs2,*ib,ierr; 1100 1101 PetscFunctionBegin; 1102 SETERRQ(1,1,"Matrix is symmetric. Call MatMultAdd()."); 1103 PetscFunctionReturn(0); 1104 } 1105 1106 #undef __FUNC__ 1107 #define __FUNC__ "MatScale_SeqSBAIJ" 1108 int MatScale_SeqSBAIJ(Scalar *alpha,Mat inA) 1109 { 1110 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)inA->data; 1111 int one = 1,totalnz = a->bs2*a->s_nz; 1112 1113 PetscFunctionBegin; 1114 BLscal_(&totalnz,alpha,a->a,&one); 1115 PLogFlops(totalnz); 1116 PetscFunctionReturn(0); 1117 } 1118 1119 #undef __FUNC__ 1120 #define __FUNC__ "MatNorm_SeqSBAIJ" 1121 int MatNorm_SeqSBAIJ(Mat A,NormType type,PetscReal *norm) 1122 { 1123 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 1124 MatScalar *v = a->a; 1125 PetscReal sum_diag = 0.0, sum_off = 0.0, *sum; 1126 int i,j,k,bs = a->bs,bs2=a->bs2,k1,mbs=a->mbs; 1127 int *jl,*il,jmin,jmax,ierr,nexti,ik; 1128 1129 PetscFunctionBegin; 1130 if (type == NORM_FROBENIUS) { 1131 for (k=0; k<mbs; k++){ 1132 jmin = a->i[k]; jmax = a->i[k+1]; 1133 /* diagonal block */ 1134 for (i=0; i<bs2; i++){ 1135 #if defined(PETSC_USE_COMPLEX) 1136 sum_diag += PetscRealPart(PetscConj(*v)*(*v)); v++; 1137 #else 1138 sum_diag += (*v)*(*v); v++; 1139 #endif 1140 } 1141 for (j=jmin+1; j<jmax; j++){ /* off-diagonal blocks */ 1142 for (i=0; i<bs2; i++){ 1143 #if defined(PETSC_USE_COMPLEX) 1144 sum_off += PetscRealPart(PetscConj(*v)*(*v)); v++; 1145 #else 1146 sum_off += (*v)*(*v); v++; 1147 #endif 1148 } 1149 } 1150 } 1151 *norm = sqrt(sum_diag + 2*sum_off); 1152 1153 } else if (type == NORM_INFINITY) { /* maximum row sum */ 1154 il = (int*)PetscMalloc(mbs*sizeof(int));CHKPTRQ(il); 1155 jl = (int*)PetscMalloc(mbs*sizeof(int));CHKPTRQ(jl); 1156 sum = (PetscReal*)PetscMalloc(bs*sizeof(PetscReal));CHKPTRQ(sum); 1157 for (i=0; i<mbs; i++) { 1158 jl[i] = mbs; il[0] = 0; 1159 } 1160 1161 *norm = 0.0; 1162 for (k=0; k<mbs; k++) { /* k_th block row */ 1163 for (j=0; j<bs; j++) sum[j]=0.0; 1164 1165 /*-- col sum --*/ 1166 i = jl[k]; /* first |A(i,k)| to be added */ 1167 while (i<mbs){ 1168 nexti = jl[i]; /* next block row to be added */ 1169 ik = il[i]; /* block index of A(i,k) in the array a */ 1170 for (j=0; j<bs; j++){ 1171 v = a->a + ik*bs2 + j*bs; 1172 for (k1=0; k1<bs; k1++) { 1173 sum[j] += PetscAbsScalar(*v); v++; 1174 } 1175 } 1176 /* update il, jl */ 1177 jmin = ik + 1; jmax = a->i[i+1]; 1178 if (jmin < jmax){ 1179 il[i] = jmin; 1180 j = a->j[jmin]; 1181 jl[i] = jl[j]; jl[j]=i; 1182 } 1183 i = nexti; 1184 } 1185 1186 /*-- row sum --*/ 1187 jmin = a->i[k]; jmax = a->i[k+1]; 1188 for (i=jmin; i<jmax; i++) { 1189 for (j=0; j<bs; j++){ 1190 v = a->a + i*bs2 + j; 1191 for (k1=0; k1<bs; k1++){ 1192 sum[j] += PetscAbsScalar(*v); 1193 v += bs; 1194 } 1195 } 1196 } 1197 /* add k_th block row to il, jl */ 1198 jmin++; 1199 if (jmin < jmax){ 1200 il[k] = jmin; 1201 j = a->j[jmin]; 1202 jl[k] = jl[j]; jl[j] = k; 1203 } 1204 for (j=0; j<bs; j++){ 1205 if (sum[j] > *norm) *norm = sum[j]; 1206 } 1207 } 1208 ierr = PetscFree(il);CHKERRQ(ierr); 1209 ierr = PetscFree(jl);CHKERRQ(ierr); 1210 ierr = PetscFree(sum);CHKERRQ(ierr); 1211 } else { 1212 SETERRQ(PETSC_ERR_SUP,0,"No support for this norm yet"); 1213 } 1214 PetscFunctionReturn(0); 1215 } 1216 1217 #undef __FUNC__ 1218 #define __FUNC__ "MatEqual_SeqSBAIJ" 1219 int MatEqual_SeqSBAIJ(Mat A,Mat B,PetscTruth* flg) 1220 { 1221 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ *)A->data,*b = (Mat_SeqSBAIJ *)B->data; 1222 int ierr; 1223 1224 PetscFunctionBegin; 1225 if (B->type !=MATSEQSBAIJ) SETERRQ(PETSC_ERR_ARG_INCOMP,0,"Matrices must be same type"); 1226 1227 /* If the matrix/block dimensions are not equal, or no of nonzeros or shift */ 1228 if ((a->m != b->m) || (a->n !=b->n) || (a->bs != b->bs)|| (a->s_nz != b->s_nz)) { 1229 *flg = PETSC_FALSE; PetscFunctionReturn(0); 1230 } 1231 1232 /* if the a->i are the same */ 1233 ierr = PetscMemcmp(a->i,b->i,(a->mbs+1)*sizeof(int),flg);CHKERRQ(ierr); 1234 if (*flg == PETSC_FALSE) { 1235 PetscFunctionReturn(0); 1236 } 1237 1238 /* if a->j are the same */ 1239 ierr = PetscMemcmp(a->j,b->j,(a->s_nz)*sizeof(int),flg);CHKERRQ(ierr); 1240 if (*flg == PETSC_FALSE) { 1241 PetscFunctionReturn(0); 1242 } 1243 /* if a->a are the same */ 1244 ierr = PetscMemcmp(a->a,b->a,(a->s_nz)*(a->bs)*(a->bs)*sizeof(Scalar),flg);CHKERRQ(ierr); 1245 PetscFunctionReturn(0); 1246 1247 } 1248 1249 #undef __FUNC__ 1250 #define __FUNC__ "MatGetDiagonal_SeqSBAIJ" 1251 int MatGetDiagonal_SeqSBAIJ(Mat A,Vec v) 1252 { 1253 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 1254 int ierr,i,j,k,n,row,bs,*ai,*aj,ambs,bs2; 1255 Scalar *x,zero = 0.0; 1256 MatScalar *aa,*aa_j; 1257 1258 PetscFunctionBegin; 1259 if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"Not for factored matrix"); 1260 bs = a->bs; 1261 aa = a->a; 1262 ai = a->i; 1263 aj = a->j; 1264 ambs = a->mbs; 1265 bs2 = a->bs2; 1266 1267 ierr = VecSet(&zero,v);CHKERRQ(ierr); 1268 ierr = VecGetArray(v,&x);CHKERRQ(ierr); 1269 ierr = VecGetLocalSize(v,&n);CHKERRQ(ierr); 1270 /* if (n != a->m) SETERRQ(PETSC_ERR_ARG_SIZ,0,"Nonconforming matrix and vector");*/ 1271 for (i=0; i<ambs; i++) { 1272 j=ai[i]; 1273 if (aj[j] == i) { /* if this is a diagonal element */ 1274 row = i*bs; 1275 aa_j = aa + j*bs2; 1276 for (k=0; k<bs2; k+=(bs+1),row++) x[row] = aa_j[k]; 1277 } 1278 } 1279 ierr = VecRestoreArray(v,&x);CHKERRQ(ierr); 1280 PetscFunctionReturn(0); 1281 } 1282 1283 #undef __FUNC__ 1284 #define __FUNC__ "MatDiagonalScale_SeqSBAIJ" 1285 int MatDiagonalScale_SeqSBAIJ(Mat A,Vec ll,Vec rr) 1286 { 1287 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 1288 Scalar *l,*r,x,*li,*ri; 1289 MatScalar *aa,*v; 1290 int ierr,i,j,k,lm,rn,M,m,*ai,*aj,mbs,tmp,bs,bs2; 1291 1292 PetscFunctionBegin; 1293 ai = a->i; 1294 aj = a->j; 1295 aa = a->a; 1296 m = a->m; 1297 bs = a->bs; 1298 mbs = a->mbs; 1299 bs2 = a->bs2; 1300 1301 if (ll != rr) { 1302 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"For symmetric format, left and right scaling vectors must be same\n"); 1303 } 1304 if (ll) { 1305 ierr = VecGetArray(ll,&l);CHKERRQ(ierr); 1306 ierr = VecGetLocalSize(ll,&lm);CHKERRQ(ierr); 1307 if (lm != m) SETERRQ(PETSC_ERR_ARG_SIZ,0,"Left scaling vector wrong length"); 1308 for (i=0; i<mbs; i++) { /* for each block row */ 1309 M = ai[i+1] - ai[i]; 1310 li = l + i*bs; 1311 v = aa + bs2*ai[i]; 1312 for (j=0; j<M; j++) { /* for each block */ 1313 for (k=0; k<bs2; k++) { 1314 (*v++) *= li[k%bs]; 1315 } 1316 #ifdef CONT 1317 /* will be used to replace the above loop */ 1318 ri = l + bs*aj[ai[i]+j]; 1319 for (k=0; k<bs; k++) { /* column value */ 1320 x = ri[k]; 1321 for (tmp=0; tmp<bs; tmp++) (*v++) *= li[tmp]*x; 1322 } 1323 #endif 1324 1325 } 1326 } 1327 ierr = VecRestoreArray(ll,&l);CHKERRQ(ierr); 1328 PLogFlops(2*a->s_nz); 1329 } 1330 /* will be deleted */ 1331 if (rr) { 1332 ierr = VecGetArray(rr,&r);CHKERRQ(ierr); 1333 ierr = VecGetLocalSize(rr,&rn);CHKERRQ(ierr); 1334 if (rn != m) SETERRQ(PETSC_ERR_ARG_SIZ,0,"Right scaling vector wrong length"); 1335 for (i=0; i<mbs; i++) { /* for each block row */ 1336 M = ai[i+1] - ai[i]; 1337 v = aa + bs2*ai[i]; 1338 for (j=0; j<M; j++) { /* for each block */ 1339 ri = r + bs*aj[ai[i]+j]; 1340 for (k=0; k<bs; k++) { 1341 x = ri[k]; 1342 for (tmp=0; tmp<bs; tmp++) (*v++) *= x; 1343 } 1344 } 1345 } 1346 ierr = VecRestoreArray(rr,&r);CHKERRQ(ierr); 1347 PLogFlops(a->s_nz); 1348 } 1349 PetscFunctionReturn(0); 1350 } 1351 1352 #undef __FUNC__ 1353 #define __FUNC__ "MatGetInfo_SeqSBAIJ" 1354 int MatGetInfo_SeqSBAIJ(Mat A,MatInfoType flag,MatInfo *info) 1355 { 1356 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 1357 1358 PetscFunctionBegin; 1359 info->rows_global = (double)a->m; 1360 info->columns_global = (double)a->m; 1361 info->rows_local = (double)a->m; 1362 info->columns_local = (double)a->m; 1363 info->block_size = a->bs2; 1364 info->nz_allocated = a->s_maxnz; /*num. of nonzeros in upper triangular part */ 1365 info->nz_used = a->bs2*a->s_nz; /*num. of nonzeros in upper triangular part */ 1366 info->nz_unneeded = (double)(info->nz_allocated - info->nz_used); 1367 info->assemblies = A->num_ass; 1368 info->mallocs = a->reallocs; 1369 info->memory = A->mem; 1370 if (A->factor) { 1371 info->fill_ratio_given = A->info.fill_ratio_given; 1372 info->fill_ratio_needed = A->info.fill_ratio_needed; 1373 info->factor_mallocs = A->info.factor_mallocs; 1374 } else { 1375 info->fill_ratio_given = 0; 1376 info->fill_ratio_needed = 0; 1377 info->factor_mallocs = 0; 1378 } 1379 PetscFunctionReturn(0); 1380 } 1381 1382 1383 #undef __FUNC__ 1384 #define __FUNC__ "MatZeroEntries_SeqSBAIJ" 1385 int MatZeroEntries_SeqSBAIJ(Mat A) 1386 { 1387 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 1388 int ierr; 1389 1390 PetscFunctionBegin; 1391 ierr = PetscMemzero(a->a,a->bs2*a->i[a->mbs]*sizeof(MatScalar));CHKERRQ(ierr); 1392 PetscFunctionReturn(0); 1393 } 1394