1 /*$Id: sbaij2.c,v 1.13 2000/09/28 20:51:54 bsmith Exp hzhang $*/ 2 3 #include "petscsys.h" 4 #include "src/mat/impls/baij/seq/baij.h" 5 #include "src/vec/vecimpl.h" 6 #include "src/inline/spops.h" 7 #include "src/inline/ilu.h" 8 #include "petscbt.h" 9 #include "sbaij.h" 10 11 #undef __FUNC__ 12 #define __FUNC__ "MatIncreaseOverlap_SeqSBAIJ" 13 int MatIncreaseOverlap_SeqSBAIJ(Mat A,int is_max,IS *is,int ov) 14 { 15 PetscFunctionBegin; 16 SETERRQ(1,"Function not yet written for SBAIJ format"); 17 /* PetscFunctionReturn(0); */ 18 } 19 20 #undef __FUNC__ 21 #define __FUNC__ "MatGetSubMatrix_SeqSBAIJ_Private" 22 int MatGetSubMatrix_SeqSBAIJ_Private(Mat A,IS isrow,IS iscol,int cs,MatReuse scall,Mat *B) 23 { 24 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data,*c; 25 int *smap,i,k,kstart,kend,ierr,oldcols = a->mbs,*lens; 26 int row,mat_i,*mat_j,tcol,*mat_ilen; 27 int *irow,nrows,*ssmap,bs=a->bs,bs2=a->bs2; 28 int *aj = a->j,*ai = a->i; 29 MatScalar *mat_a; 30 Mat C; 31 PetscTruth flag; 32 33 PetscFunctionBegin; 34 35 if (isrow != iscol) SETERRA(1,"MatGetSubmatrices_SeqSBAIJ: For symm. format, iscol must equal isro"); 36 ierr = ISSorted(iscol,(PetscTruth*)&i);CHKERRQ(ierr); 37 if (!i) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"IS is not sorted"); 38 39 ierr = ISGetIndices(isrow,&irow);CHKERRQ(ierr); 40 ierr = ISGetSize(isrow,&nrows);CHKERRQ(ierr); 41 42 smap = (int*)PetscMalloc((1+oldcols)*sizeof(int));CHKPTRQ(smap); 43 ssmap = smap; 44 lens = (int*)PetscMalloc((1+nrows)*sizeof(int));CHKPTRQ(lens); 45 ierr = PetscMemzero(smap,oldcols*sizeof(int));CHKERRQ(ierr); 46 for (i=0; i<nrows; i++) smap[irow[i]] = i+1; /* nrows = ncols */ 47 /* determine lens of each row */ 48 for (i=0; i<nrows; i++) { 49 kstart = ai[irow[i]]; 50 kend = kstart + a->ilen[irow[i]]; 51 lens[i] = 0; 52 for (k=kstart; k<kend; k++) { 53 if (ssmap[aj[k]]) { 54 lens[i]++; 55 } 56 } 57 } 58 /* Create and fill new matrix */ 59 if (scall == MAT_REUSE_MATRIX) { 60 c = (Mat_SeqSBAIJ *)((*B)->data); 61 62 if (c->mbs!=nrows || c->bs!=bs) SETERRQ(PETSC_ERR_ARG_SIZ,"Submatrix wrong size"); 63 ierr = PetscMemcmp(c->ilen,lens,c->mbs *sizeof(int),&flag);CHKERRQ(ierr); 64 if (flag == PETSC_FALSE) { 65 SETERRQ(PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. wrong no of nonzeros"); 66 } 67 ierr = PetscMemzero(c->ilen,c->mbs*sizeof(int));CHKERRQ(ierr); 68 C = *B; 69 } else { 70 ierr = MatCreateSeqSBAIJ(A->comm,bs,nrows*bs,nrows*bs,0,lens,&C);CHKERRQ(ierr); 71 } 72 c = (Mat_SeqSBAIJ *)(C->data); 73 for (i=0; i<nrows; i++) { 74 row = irow[i]; 75 kstart = ai[row]; 76 kend = kstart + a->ilen[row]; 77 mat_i = c->i[i]; 78 mat_j = c->j + mat_i; 79 mat_a = c->a + mat_i*bs2; 80 mat_ilen = c->ilen + i; 81 for (k=kstart; k<kend; k++) { 82 if ((tcol=ssmap[a->j[k]])) { 83 *mat_j++ = tcol - 1; 84 ierr = PetscMemcpy(mat_a,a->a+k*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr); 85 mat_a += bs2; 86 (*mat_ilen)++; 87 } 88 } 89 } 90 91 /* Free work space */ 92 ierr = PetscFree(smap);CHKERRQ(ierr); 93 ierr = PetscFree(lens);CHKERRQ(ierr); 94 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 95 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 96 97 ierr = ISRestoreIndices(isrow,&irow);CHKERRQ(ierr); 98 *B = C; 99 PetscFunctionReturn(0); 100 } 101 102 #undef __FUNC__ 103 #define __FUNC__ "MatGetSubMatrix_SeqSBAIJ" 104 int MatGetSubMatrix_SeqSBAIJ(Mat A,IS isrow,IS iscol,int cs,MatReuse scall,Mat *B) 105 { 106 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 107 IS is1; 108 int *vary,*iary,*irow,nrows,i,ierr,bs=a->bs,count; 109 110 PetscFunctionBegin; 111 if (isrow != iscol) SETERRA(1,"MatGetSubmatrices_SeqSBAIJ: For symm. format, iscol must equal isro"); 112 113 ierr = ISGetIndices(isrow,&irow);CHKERRQ(ierr); 114 ierr = ISGetSize(isrow,&nrows);CHKERRQ(ierr); 115 116 /* Verify if the indices corespond to each element in a block 117 and form the IS with compressed IS */ 118 vary = (int*)PetscMalloc(2*(a->mbs+1)*sizeof(int));CHKPTRQ(vary); 119 iary = vary + a->mbs; 120 ierr = PetscMemzero(vary,(a->mbs)*sizeof(int));CHKERRQ(ierr); 121 for (i=0; i<nrows; i++) vary[irow[i]/bs]++; 122 123 count = 0; 124 for (i=0; i<a->mbs; i++) { 125 if (vary[i]!=0 && vary[i]!=bs) SETERRA(1,"Index set does not match blocks"); 126 if (vary[i]==bs) iary[count++] = i; 127 } 128 ierr = ISCreateGeneral(PETSC_COMM_SELF,count,iary,&is1);CHKERRQ(ierr); 129 130 ierr = ISRestoreIndices(isrow,&irow);CHKERRQ(ierr); 131 ierr = PetscFree(vary);CHKERRQ(ierr); 132 133 ierr = MatGetSubMatrix_SeqSBAIJ_Private(A,is1,is1,cs,scall,B);CHKERRQ(ierr); 134 ISDestroy(is1); 135 PetscFunctionReturn(0); 136 } 137 138 #undef __FUNC__ 139 #define __FUNC__ "MatGetSubMatrices_SeqSBAIJ" 140 int MatGetSubMatrices_SeqSBAIJ(Mat A,int n,IS *irow,IS *icol,MatReuse scall,Mat **B) 141 { 142 int ierr,i; 143 144 PetscFunctionBegin; 145 if (scall == MAT_INITIAL_MATRIX) { 146 *B = (Mat*)PetscMalloc((n+1)*sizeof(Mat));CHKPTRQ(*B); 147 } 148 149 for (i=0; i<n; i++) { 150 ierr = MatGetSubMatrix_SeqSBAIJ(A,irow[i],icol[i],PETSC_DECIDE,scall,&(*B)[i]);CHKERRQ(ierr); 151 } 152 PetscFunctionReturn(0); 153 } 154 155 /* -------------------------------------------------------*/ 156 /* Should check that shapes of vectors and matrices match */ 157 /* -------------------------------------------------------*/ 158 #include "petscblaslapack.h" 159 160 #undef __FUNC__ 161 #define __FUNC__ "MatMult_SeqSBAIJ_1" 162 int MatMult_SeqSBAIJ_1(Mat A,Vec xx,Vec zz) 163 { 164 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 165 Scalar *x,*z,*xb,x1,zero=0.0; 166 MatScalar *v; 167 int mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,ierr,*ib,cval,j; 168 169 PetscFunctionBegin; 170 ierr = VecSet(&zero,zz);CHKERRQ(ierr); 171 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 172 ierr = VecGetArray(zz,&z);CHKERRQ(ierr); 173 174 v = a->a; 175 xb = x; 176 177 for (i=0; i<mbs; i++) { 178 n = ai[1] - ai[0]; /* length of i_th row of A */ 179 x1 = xb[0]; 180 ib = aj + *ai; 181 if (*ib == i) z[i] += *v++ * x[*ib++]; /* (diag of A)*x */ 182 for (j=1; j<n; j++) { 183 cval = *ib; 184 z[cval] += *v * x1; /* (strict lower triangular part of A)*x */ 185 z[i] += *v++ * x[*ib++]; /* (strict upper triangular part of A)*x */ 186 } 187 xb++; ai++; 188 } 189 190 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 191 ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr); 192 PLogFlops(2*(a->s_nz*2 - a->m) - a->m); /* s_nz = (nz+m)/2 */ 193 PetscFunctionReturn(0); 194 } 195 196 #undef __FUNC__ 197 #define __FUNC__ "MatMult_SeqSBAIJ_2" 198 int MatMult_SeqSBAIJ_2(Mat A,Vec xx,Vec zz) 199 { 200 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 201 Scalar *x,*z,*xb,x1,x2,zero=0.0; 202 MatScalar *v; 203 int mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,ierr,*ib,cval,j; 204 205 206 PetscFunctionBegin; 207 ierr = VecSet(&zero,zz);CHKERRQ(ierr); 208 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 209 ierr = VecGetArray(zz,&z);CHKERRQ(ierr); 210 211 v = a->a; 212 xb = x; 213 214 for (i=0; i<mbs; i++) { 215 n = ai[1] - ai[0]; /* length of i_th block row of A */ 216 x1 = xb[0]; x2 = xb[1]; 217 ib = aj + *ai; 218 if (*ib == i){ /* (diag of A)*x */ 219 z[2*i] += v[0]*x1 + v[2]*x2; 220 z[2*i+1] += v[2]*x1 + v[3]*x2; 221 v += 4; 222 } 223 for (j=1; j<n; j++) { 224 /* (strict lower triangular part of A)*x */ 225 cval = ib[j]*2; 226 z[cval] += v[0]*x1 + v[1]*x2; 227 z[cval+1] += v[2]*x1 + v[3]*x2; 228 /* (strict upper triangular part of A)*x */ 229 z[2*i] += v[0]*x[cval] + v[2]*x[cval+1]; 230 z[2*i+1] += v[1]*x[cval] + v[3]*x[cval+1]; 231 v += 4; 232 } 233 xb +=2; ai++; 234 } 235 236 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 237 ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr); 238 PLogFlops(8*(a->s_nz*2 - a->m) - a->m); 239 PetscFunctionReturn(0); 240 } 241 242 #undef __FUNC__ 243 #define __FUNC__ "MatMult_SeqSBAIJ_3" 244 int MatMult_SeqSBAIJ_3(Mat A,Vec xx,Vec zz) 245 { 246 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 247 Scalar *x,*z,*xb,x1,x2,x3,zero=0.0; 248 MatScalar *v; 249 int mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,ierr,*ib,cval,j; 250 251 252 PetscFunctionBegin; 253 ierr = VecSet(&zero,zz);CHKERRQ(ierr); 254 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 255 ierr = VecGetArray(zz,&z);CHKERRQ(ierr); 256 257 v = a->a; 258 xb = x; 259 260 for (i=0; i<mbs; i++) { 261 n = ai[1] - ai[0]; /* length of i_th block row of A */ 262 x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; 263 ib = aj + *ai; 264 if (*ib == i){ /* (diag of A)*x */ 265 z[3*i] += v[0]*x1 + v[3]*x2 + v[6]*x3; 266 z[3*i+1] += v[3]*x1 + v[4]*x2 + v[7]*x3; 267 z[3*i+2] += v[6]*x1 + v[7]*x2 + v[8]*x3; 268 v += 9; 269 } 270 for (j=1; j<n; j++) { 271 /* (strict lower triangular part of A)*x */ 272 cval = ib[j]*3; 273 z[cval] += v[0]*x1 + v[1]*x2 + v[2]*x3; 274 z[cval+1] += v[3]*x1 + v[4]*x2 + v[5]*x3; 275 z[cval+2] += v[6]*x1 + v[7]*x2 + v[8]*x3; 276 /* (strict upper triangular part of A)*x */ 277 z[3*i] += v[0]*x[cval] + v[3]*x[cval+1]+ v[6]*x[cval+2]; 278 z[3*i+1] += v[1]*x[cval] + v[4]*x[cval+1]+ v[7]*x[cval+2]; 279 z[3*i+2] += v[2]*x[cval] + v[5]*x[cval+1]+ v[8]*x[cval+2]; 280 v += 9; 281 } 282 xb +=3; ai++; 283 } 284 285 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 286 ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr); 287 PLogFlops(18*(a->s_nz*2 - a->m) - a->m); 288 PetscFunctionReturn(0); 289 } 290 291 #undef __FUNC__ 292 #define __FUNC__ "MatMult_SeqSBAIJ_4" 293 int MatMult_SeqSBAIJ_4(Mat A,Vec xx,Vec zz) 294 { 295 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 296 Scalar *x,*z,*xb,x1,x2,x3,x4,zero=0.0; 297 MatScalar *v; 298 int mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,ierr,*ib,cval,j; 299 300 PetscFunctionBegin; 301 ierr = VecSet(&zero,zz);CHKERRQ(ierr); 302 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 303 ierr = VecGetArray(zz,&z);CHKERRQ(ierr); 304 305 v = a->a; 306 xb = x; 307 308 for (i=0; i<mbs; i++) { 309 n = ai[1] - ai[0]; /* length of i_th block row of A */ 310 x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; 311 ib = aj + *ai; 312 if (*ib == i){ /* (diag of A)*x */ 313 z[4*i] += v[0]*x1 + v[4]*x2 + v[8]*x3 + v[12]*x4; 314 z[4*i+1] += v[4]*x1 + v[5]*x2 + v[9]*x3 + v[13]*x4; 315 z[4*i+2] += v[8]*x1 + v[9]*x2 + v[10]*x3 + v[14]*x4; 316 z[4*i+3] += v[12]*x1+ v[13]*x2+ v[14]*x3 + v[15]*x4; 317 v += 16; 318 } 319 for (j=1; j<n; j++) { 320 /* (strict lower triangular part of A)*x */ 321 cval = ib[j]*4; 322 z[cval] += v[0]*x1 + v[1]*x2 + v[2]*x3 + v[3]*x4; 323 z[cval+1] += v[4]*x1 + v[5]*x2 + v[6]*x3 + v[7]*x4; 324 z[cval+2] += v[8]*x1 + v[9]*x2 + v[10]*x3 + v[11]*x4; 325 z[cval+3] += v[12]*x1 + v[13]*x2 + v[14]*x3 + v[15]*x4; 326 /* (strict upper triangular part of A)*x */ 327 z[4*i] += v[0]*x[cval] + v[4]*x[cval+1]+ v[8]*x[cval+2] + v[12]*x[cval+3]; 328 z[4*i+1] += v[1]*x[cval] + v[5]*x[cval+1]+ v[9]*x[cval+2] + v[13]*x[cval+3]; 329 z[4*i+2] += v[2]*x[cval] + v[6]*x[cval+1]+ v[10]*x[cval+2]+ v[14]*x[cval+3]; 330 z[4*i+3] += v[3]*x[cval] + v[7]*x[cval+1]+ v[11]*x[cval+2]+ v[15]*x[cval+3]; 331 v += 16; 332 } 333 xb +=4; ai++; 334 } 335 336 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 337 ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr); 338 PLogFlops(32*(a->s_nz*2 - a->m) - a->m); 339 PetscFunctionReturn(0); 340 } 341 342 #undef __FUNC__ 343 #define __FUNC__ "MatMult_SeqSBAIJ_5" 344 int MatMult_SeqSBAIJ_5(Mat A,Vec xx,Vec zz) 345 { 346 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 347 Scalar *x,*z,*xb,x1,x2,x3,x4,x5,zero=0.0; 348 MatScalar *v; 349 int mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,ierr,*ib,cval,j; 350 351 PetscFunctionBegin; 352 ierr = VecSet(&zero,zz);CHKERRQ(ierr); 353 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 354 ierr = VecGetArray(zz,&z);CHKERRQ(ierr); 355 356 v = a->a; 357 xb = x; 358 359 for (i=0; i<mbs; i++) { 360 n = ai[1] - ai[0]; /* length of i_th block row of A */ 361 x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5=xb[4]; 362 ib = aj + *ai; 363 if (*ib == i){ /* (diag of A)*x */ 364 z[5*i] += v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4+ v[20]*x5; 365 z[5*i+1] += v[5]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4+ v[21]*x5; 366 z[5*i+2] += v[10]*x1 +v[11]*x2 + v[12]*x3 + v[17]*x4+ v[22]*x5; 367 z[5*i+3] += v[15]*x1 +v[16]*x2 + v[17]*x3 + v[18]*x4+ v[23]*x5; 368 z[5*i+4] += v[20]*x1 +v[21]*x2 + v[22]*x3 + v[23]*x4+ v[24]*x5; 369 v += 25; 370 } 371 for (j=1; j<n; j++) { 372 /* (strict lower triangular part of A)*x */ 373 cval = ib[j]*5; 374 z[cval] += v[0]*x1 + v[1]*x2 + v[2]*x3 + v[3]*x4 + v[4]*x5; 375 z[cval+1] += v[5]*x1 + v[6]*x2 + v[7]*x3 + v[8]*x4 + v[9]*x5; 376 z[cval+2] += v[10]*x1 + v[11]*x2 + v[12]*x3 + v[13]*x4+ v[14]*x5; 377 z[cval+3] += v[15]*x1 + v[16]*x2 + v[17]*x3 + v[18]*x4+ v[19]*x5; 378 z[cval+4] += v[20]*x1 + v[21]*x2 + v[22]*x3 + v[23]*x4+ v[24]*x5; 379 /* (strict upper triangular part of A)*x */ 380 z[5*i] +=v[0]*x[cval]+v[5]*x[cval+1]+v[10]*x[cval+2]+v[15]*x[cval+3]+v[20]*x[cval+4]; 381 z[5*i+1] +=v[1]*x[cval]+v[6]*x[cval+1]+v[11]*x[cval+2]+v[16]*x[cval+3]+v[21]*x[cval+4]; 382 z[5*i+2] +=v[2]*x[cval]+v[7]*x[cval+1]+v[12]*x[cval+2]+v[17]*x[cval+3]+v[22]*x[cval+4]; 383 z[5*i+3] +=v[3]*x[cval]+v[8]*x[cval+1]+v[13]*x[cval+2]+v[18]*x[cval+3]+v[23]*x[cval+4]; 384 z[5*i+4] +=v[4]*x[cval]+v[9]*x[cval+1]+v[14]*x[cval+2]+v[19]*x[cval+3]+v[24]*x[cval+4]; 385 v += 25; 386 } 387 xb +=5; ai++; 388 } 389 390 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 391 ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr); 392 PLogFlops(50*(a->s_nz*2 - a->m) - a->m); 393 PetscFunctionReturn(0); 394 } 395 396 397 #undef __FUNC__ 398 #define __FUNC__ "MatMult_SeqSBAIJ_6" 399 int MatMult_SeqSBAIJ_6(Mat A,Vec xx,Vec zz) 400 { 401 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 402 Scalar *x,*z,*xb,x1,x2,x3,x4,x5,x6,zero=0.0; 403 MatScalar *v; 404 int mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,ierr,*ib,cval,j; 405 406 PetscFunctionBegin; 407 ierr = VecSet(&zero,zz);CHKERRQ(ierr); 408 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 409 ierr = VecGetArray(zz,&z);CHKERRQ(ierr); 410 411 v = a->a; 412 xb = x; 413 414 for (i=0; i<mbs; i++) { 415 n = ai[1] - ai[0]; /* length of i_th block row of A */ 416 x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5=xb[4]; x6=xb[5]; 417 ib = aj + *ai; 418 if (*ib == i){ /* (diag of A)*x */ 419 z[6*i] += v[0]*x1 + v[6]*x2 + v[12]*x3 + v[18]*x4+ v[24]*x5 + v[30]*x6; 420 z[6*i+1] += v[6]*x1 + v[7]*x2 + v[13]*x3 + v[19]*x4+ v[25]*x5 + v[31]*x6; 421 z[6*i+2] += v[12]*x1 +v[13]*x2 + v[14]*x3 + v[20]*x4+ v[26]*x5 + v[32]*x6; 422 z[6*i+3] += v[18]*x1 +v[19]*x2 + v[20]*x3 + v[21]*x4+ v[27]*x5 + v[33]*x6; 423 z[6*i+4] += v[24]*x1 +v[25]*x2 + v[26]*x3 + v[27]*x4+ v[28]*x5 + v[34]*x6; 424 z[6*i+5] += v[30]*x1 +v[31]*x2 + v[32]*x3 + v[33]*x4+ v[34]*x5 + v[35]*x6; 425 v += 36; 426 } 427 for (j=1; j<n; j++) { 428 /* (strict lower triangular part of A)*x */ 429 cval = ib[j]*6; 430 z[cval] += v[0]*x1 + v[1]*x2 + v[2]*x3 + v[3]*x4+ v[4]*x5 + v[5]*x6; 431 z[cval+1] += v[6]*x1 + v[7]*x2 + v[8]*x3 + v[9]*x4+ v[10]*x5 + v[11]*x6; 432 z[cval+2] += v[12]*x1 + v[13]*x2 + v[14]*x3 + v[15]*x4+ v[16]*x5 + v[17]*x6; 433 z[cval+3] += v[18]*x1 + v[19]*x2 + v[20]*x3 + v[21]*x4+ v[22]*x5 + v[23]*x6; 434 z[cval+4] += v[24]*x1 + v[25]*x2 + v[26]*x3 + v[27]*x4+ v[28]*x5 + v[29]*x6; 435 z[cval+5] += v[30]*x1 + v[31]*x2 + v[32]*x3 + v[33]*x4+ v[34]*x5 + v[35]*x6; 436 /* (strict upper triangular part of A)*x */ 437 z[6*i] +=v[0]*x[cval]+v[6]*x[cval+1]+v[12]*x[cval+2]+v[18]*x[cval+3]+v[24]*x[cval+4]+v[30]*x[cval+5]; 438 z[6*i+1] +=v[1]*x[cval]+v[7]*x[cval+1]+v[13]*x[cval+2]+v[19]*x[cval+3]+v[25]*x[cval+4]+v[31]*x[cval+5]; 439 z[6*i+2] +=v[2]*x[cval]+v[8]*x[cval+1]+v[14]*x[cval+2]+v[20]*x[cval+3]+v[26]*x[cval+4]+v[32]*x[cval+5]; 440 z[6*i+3] +=v[3]*x[cval]+v[9]*x[cval+1]+v[15]*x[cval+2]+v[21]*x[cval+3]+v[27]*x[cval+4]+v[33]*x[cval+5]; 441 z[6*i+4] +=v[4]*x[cval]+v[10]*x[cval+1]+v[16]*x[cval+2]+v[22]*x[cval+3]+v[28]*x[cval+4]+v[34]*x[cval+5]; 442 z[6*i+5] +=v[5]*x[cval]+v[11]*x[cval+1]+v[17]*x[cval+2]+v[23]*x[cval+3]+v[29]*x[cval+4]+v[35]*x[cval+5]; 443 v += 36; 444 } 445 xb +=6; ai++; 446 } 447 448 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 449 ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr); 450 PLogFlops(72*(a->s_nz*2 - a->m) - a->m); 451 PetscFunctionReturn(0); 452 } 453 #undef __FUNC__ 454 #define __FUNC__ "MatMult_SeqSBAIJ_7" 455 int MatMult_SeqSBAIJ_7(Mat A,Vec xx,Vec zz) 456 { 457 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 458 Scalar *x,*z,*xb,x1,x2,x3,x4,x5,x6,x7,zero=0.0; 459 MatScalar *v; 460 int mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,ierr,*ib,cval,j; 461 462 PetscFunctionBegin; 463 ierr = VecSet(&zero,zz);CHKERRQ(ierr); 464 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 465 ierr = VecGetArray(zz,&z);CHKERRQ(ierr); 466 467 v = a->a; 468 xb = x; 469 470 for (i=0; i<mbs; i++) { 471 n = ai[1] - ai[0]; /* length of i_th block row of A */ 472 x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5=xb[4]; x6=xb[5]; x7=xb[6]; 473 ib = aj + *ai; 474 if (*ib == i){ /* (diag of A)*x */ 475 z[7*i] += v[0]*x1 + v[7]*x2 + v[14]*x3 + v[21]*x4+ v[28]*x5 + v[35]*x6+ v[42]*x7; 476 z[7*i+1] += v[7]*x1 + v[8]*x2 + v[15]*x3 + v[22]*x4+ v[29]*x5 + v[36]*x6+ v[43]*x7; 477 z[7*i+2] += v[14]*x1+ v[15]*x2 +v[16]*x3 + v[23]*x4+ v[30]*x5 + v[37]*x6+ v[44]*x7; 478 z[7*i+3] += v[21]*x1+ v[22]*x2 +v[23]*x3 + v[24]*x4+ v[31]*x5 + v[38]*x6+ v[45]*x7; 479 z[7*i+4] += v[28]*x1+ v[29]*x2 +v[30]*x3 + v[31]*x4+ v[32]*x5 + v[39]*x6+ v[46]*x7; 480 z[7*i+5] += v[35]*x1+ v[36]*x2 +v[37]*x3 + v[38]*x4+ v[39]*x5 + v[40]*x6+ v[47]*x7; 481 z[7*i+6] += v[42]*x1+ v[43]*x2 +v[44]*x3 + v[45]*x4+ v[46]*x5 + v[47]*x6+ v[48]*x7; 482 v += 49; 483 } 484 for (j=1; j<n; j++) { 485 /* (strict lower triangular part of A)*x */ 486 cval = ib[j]*7; 487 z[cval] += v[0]*x1 + v[1]*x2 + v[2]*x3 + v[3]*x4+ v[4]*x5 + v[5]*x6+ v[6]*x7; 488 z[cval+1] += v[7]*x1 + v[8]*x2 + v[9]*x3 + v[10]*x4+ v[11]*x5 + v[12]*x6+ v[13]*x7; 489 z[cval+2] += v[14]*x1 + v[15]*x2 + v[16]*x3 + v[17]*x4+ v[18]*x5 + v[19]*x6+ v[20]*x7; 490 z[cval+3] += v[21]*x1 + v[22]*x2 + v[23]*x3 + v[24]*x4+ v[25]*x5 + v[26]*x6+ v[27]*x7; 491 z[cval+4] += v[28]*x1 + v[29]*x2 + v[30]*x3 + v[31]*x4+ v[32]*x5 + v[33]*x6+ v[34]*x7; 492 z[cval+5] += v[35]*x1 + v[36]*x2 + v[37]*x3 + v[38]*x4+ v[39]*x5 + v[40]*x6+ v[41]*x7; 493 z[cval+6] += v[42]*x1 + v[43]*x2 + v[44]*x3 + v[45]*x4+ v[46]*x5 + v[47]*x6+ v[48]*x7; 494 /* (strict upper triangular part of A)*x */ 495 z[7*i] +=v[0]*x[cval]+v[7]*x[cval+1]+v[14]*x[cval+2]+v[21]*x[cval+3]+v[28]*x[cval+4]+v[35]*x[cval+5]+v[42]*x[cval+6]; 496 z[7*i+1]+=v[1]*x[cval]+v[8]*x[cval+1]+v[15]*x[cval+2]+v[22]*x[cval+3]+v[29]*x[cval+4]+v[36]*x[cval+5]+v[43]*x[cval+6]; 497 z[7*i+2]+=v[2]*x[cval]+v[9]*x[cval+1]+v[16]*x[cval+2]+v[23]*x[cval+3]+v[30]*x[cval+4]+v[37]*x[cval+5]+v[44]*x[cval+6]; 498 z[7*i+3]+=v[3]*x[cval]+v[10]*x[cval+1]+v[17]*x[cval+2]+v[24]*x[cval+3]+v[31]*x[cval+4]+v[38]*x[cval+5]+v[45]*x[cval+6]; 499 z[7*i+4]+=v[4]*x[cval]+v[11]*x[cval+1]+v[18]*x[cval+2]+v[25]*x[cval+3]+v[32]*x[cval+4]+v[39]*x[cval+5]+v[46]*x[cval+6]; 500 z[7*i+5]+=v[5]*x[cval]+v[12]*x[cval+1]+v[19]*x[cval+2]+v[26]*x[cval+3]+v[33]*x[cval+4]+v[40]*x[cval+5]+v[47]*x[cval+6]; 501 z[7*i+6]+=v[6]*x[cval]+v[13]*x[cval+1]+v[20]*x[cval+2]+v[27]*x[cval+3]+v[34]*x[cval+4]+v[41]*x[cval+5]+v[48]*x[cval+6]; 502 v += 49; 503 } 504 xb +=7; ai++; 505 } 506 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 507 ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr); 508 PLogFlops(98*(a->s_nz*2 - a->m) - a->m); 509 PetscFunctionReturn(0); 510 } 511 512 /* 513 This will not work with MatScalar == float because it calls the BLAS 514 */ 515 #undef __FUNC__ 516 #define __FUNC__ "MatMult_SeqSBAIJ_N" 517 int MatMult_SeqSBAIJ_N(Mat A,Vec xx,Vec zz) 518 { 519 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 520 Scalar *x,*x_ptr,*z,*z_ptr,*xb,*zb,*work,*workt,zero=0.0; 521 MatScalar *v; 522 int ierr,mbs=a->mbs,i,*idx,*aj,*ii,bs=a->bs,j,n,bs2=a->bs2; 523 int ncols,k; 524 525 PetscFunctionBegin; 526 ierr = VecSet(&zero,zz);CHKERRQ(ierr); 527 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); x_ptr=x; 528 ierr = VecGetArray(zz,&z);CHKERRQ(ierr); z_ptr=z; 529 530 aj = a->j; 531 v = a->a; 532 ii = a->i; 533 534 if (!a->mult_work) { 535 k = a->m; 536 a->mult_work = (Scalar*)PetscMalloc((k+1)*sizeof(Scalar));CHKPTRQ(a->mult_work); 537 } 538 work = a->mult_work; 539 540 for (i=0; i<mbs; i++) { 541 n = ii[1] - ii[0]; ncols = n*bs; 542 workt = work; idx=aj+ii[0]; 543 544 /* upper triangular part */ 545 for (j=0; j<n; j++) { 546 /* col = bs*(*idx); */ 547 xb = x_ptr + bs*(*idx++); 548 for (k=0; k<bs; k++) workt[k] = xb[k]; 549 workt += bs; 550 } 551 /* z(i*bs:(i+1)*bs-1) += A(i,:)*x */ 552 Kernel_w_gets_w_plus_Ar_times_v(bs,ncols,work,v,z); 553 554 /* strict lower triangular part */ 555 ncols -= bs; 556 if (ncols > 0){ 557 workt = work; 558 ierr = PetscMemzero(workt,ncols*sizeof(Scalar));CHKERRQ(ierr); 559 Kernel_w_gets_w_plus_trans_Ar_times_v(bs,ncols,x,v+bs2,workt); 560 561 idx=aj+ii[0]+1; 562 for (j=1; j<n; j++) { 563 zb = z_ptr + bs*(*idx); 564 idx++; 565 for (k=0; k<bs; k++) zb[k] += workt[k] ; 566 workt += bs; 567 } 568 } 569 570 x += bs; v += n*bs2; z += bs; ii++; 571 } 572 573 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 574 ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr); 575 PLogFlops(2*(a->s_nz*2 - a->m)*bs2 - a->m); 576 PetscFunctionReturn(0); 577 } 578 579 #undef __FUNC__ 580 #define __FUNC__ "MatMultAdd_SeqSBAIJ_1" 581 int MatMultAdd_SeqSBAIJ_1(Mat A,Vec xx,Vec yy,Vec zz) 582 { 583 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 584 Scalar *x,*y,*z,*xb,x1; 585 MatScalar *v; 586 int mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,ierr,*ib,cval,j; 587 588 PetscFunctionBegin; 589 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 590 if (yy != xx) { 591 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 592 } else { 593 y = x; 594 } 595 if (zz != yy) { 596 ierr = VecCopy(yy,zz);CHKERRQ(ierr); 597 ierr = VecGetArray(zz,&z);CHKERRQ(ierr); 598 } else { 599 z = y; 600 } 601 602 v = a->a; 603 xb = x; 604 605 for (i=0; i<mbs; i++) { 606 n = ai[1] - ai[0]; /* length of i_th row of A */ 607 x1 = xb[0]; 608 ib = aj + *ai; 609 if (*ib == i) z[i] += *v++ * x[*ib++]; /* (diag of A)*x */ 610 for (j=1; j<n; j++) { 611 cval = *ib; 612 z[cval] += *v * x1; /* (strict lower triangular part of A)*x */ 613 z[i] += *v++ * x[*ib++]; /* (strict upper triangular part of A)*x */ 614 } 615 xb++; ai++; 616 } 617 618 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 619 if (yy != xx) ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 620 if (zz != yy) ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr); 621 622 PLogFlops(2*(a->s_nz*2 - a->m)); 623 PetscFunctionReturn(0); 624 } 625 626 #undef __FUNC__ 627 #define __FUNC__ "MatMultAdd_SeqSBAIJ_2" 628 int MatMultAdd_SeqSBAIJ_2(Mat A,Vec xx,Vec yy,Vec zz) 629 { 630 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 631 Scalar *x,*y,*z,*xb,x1,x2; 632 MatScalar *v; 633 int mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,ierr,*ib,cval,j; 634 635 PetscFunctionBegin; 636 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 637 if (yy != xx) { 638 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 639 } else { 640 y = x; 641 } 642 if (zz != yy) { 643 ierr = VecCopy(yy,zz);CHKERRQ(ierr); 644 ierr = VecGetArray(zz,&z);CHKERRQ(ierr); 645 } else { 646 z = y; 647 } 648 649 v = a->a; 650 xb = x; 651 652 for (i=0; i<mbs; i++) { 653 n = ai[1] - ai[0]; /* length of i_th block row of A */ 654 x1 = xb[0]; x2 = xb[1]; 655 ib = aj + *ai; 656 if (*ib == i){ /* (diag of A)*x */ 657 z[2*i] += v[0]*x1 + v[2]*x2; 658 z[2*i+1] += v[2]*x1 + v[3]*x2; 659 v += 4; 660 } 661 for (j=1; j<n; j++) { 662 /* (strict lower triangular part of A)*x */ 663 cval = ib[j]*2; 664 z[cval] += v[0]*x1 + v[1]*x2; 665 z[cval+1] += v[2]*x1 + v[3]*x2; 666 /* (strict upper triangular part of A)*x */ 667 z[2*i] += v[0]*x[cval] + v[2]*x[cval+1]; 668 z[2*i+1] += v[1]*x[cval] + v[3]*x[cval+1]; 669 v += 4; 670 } 671 xb +=2; ai++; 672 } 673 674 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 675 if (yy != xx) ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 676 if (zz != yy) ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr); 677 678 PLogFlops(4*(a->s_nz*2 - a->m)); 679 PetscFunctionReturn(0); 680 } 681 682 #undef __FUNC__ 683 #define __FUNC__ "MatMultAdd_SeqSBAIJ_3" 684 int MatMultAdd_SeqSBAIJ_3(Mat A,Vec xx,Vec yy,Vec zz) 685 { 686 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 687 Scalar *x,*y,*z,*xb,x1,x2,x3; 688 MatScalar *v; 689 int mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,ierr,*ib,cval,j; 690 691 PetscFunctionBegin; 692 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 693 if (yy != xx) { 694 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 695 } else { 696 y = x; 697 } 698 if (zz != yy) { 699 ierr = VecCopy(yy,zz);CHKERRQ(ierr); 700 ierr = VecGetArray(zz,&z);CHKERRQ(ierr); 701 } else { 702 z = y; 703 } 704 705 v = a->a; 706 xb = x; 707 708 for (i=0; i<mbs; i++) { 709 n = ai[1] - ai[0]; /* length of i_th block row of A */ 710 x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; 711 ib = aj + *ai; 712 if (*ib == i){ /* (diag of A)*x */ 713 z[3*i] += v[0]*x1 + v[3]*x2 + v[6]*x3; 714 z[3*i+1] += v[3]*x1 + v[4]*x2 + v[7]*x3; 715 z[3*i+2] += v[6]*x1 + v[7]*x2 + v[8]*x3; 716 v += 9; 717 } 718 for (j=1; j<n; j++) { 719 /* (strict lower triangular part of A)*x */ 720 cval = ib[j]*3; 721 z[cval] += v[0]*x1 + v[1]*x2 + v[2]*x3; 722 z[cval+1] += v[3]*x1 + v[4]*x2 + v[5]*x3; 723 z[cval+2] += v[6]*x1 + v[7]*x2 + v[8]*x3; 724 /* (strict upper triangular part of A)*x */ 725 z[3*i] += v[0]*x[cval] + v[3]*x[cval+1]+ v[6]*x[cval+2]; 726 z[3*i+1] += v[1]*x[cval] + v[4]*x[cval+1]+ v[7]*x[cval+2]; 727 z[3*i+2] += v[2]*x[cval] + v[5]*x[cval+1]+ v[8]*x[cval+2]; 728 v += 9; 729 } 730 xb +=3; ai++; 731 } 732 733 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 734 if (yy != xx) ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 735 if (zz != yy) ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr); 736 737 PLogFlops(18*(a->s_nz*2 - a->m)); 738 PetscFunctionReturn(0); 739 } 740 741 #undef __FUNC__ 742 #define __FUNC__ "MatMultAdd_SeqSBAIJ_4" 743 int MatMultAdd_SeqSBAIJ_4(Mat A,Vec xx,Vec yy,Vec zz) 744 { 745 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 746 Scalar *x,*y,*z,*xb,x1,x2,x3,x4; 747 MatScalar *v; 748 int mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,ierr,*ib,cval,j; 749 750 PetscFunctionBegin; 751 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 752 if (yy != xx) { 753 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 754 } else { 755 y = x; 756 } 757 if (zz != yy) { 758 ierr = VecCopy(yy,zz);CHKERRQ(ierr); 759 ierr = VecGetArray(zz,&z);CHKERRQ(ierr); 760 } else { 761 z = y; 762 } 763 764 v = a->a; 765 xb = x; 766 767 for (i=0; i<mbs; i++) { 768 n = ai[1] - ai[0]; /* length of i_th block row of A */ 769 x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; 770 ib = aj + *ai; 771 if (*ib == i){ /* (diag of A)*x */ 772 z[4*i] += v[0]*x1 + v[4]*x2 + v[8]*x3 + v[12]*x4; 773 z[4*i+1] += v[4]*x1 + v[5]*x2 + v[9]*x3 + v[13]*x4; 774 z[4*i+2] += v[8]*x1 + v[9]*x2 + v[10]*x3 + v[14]*x4; 775 z[4*i+3] += v[12]*x1+ v[13]*x2+ v[14]*x3 + v[15]*x4; 776 v += 16; 777 } 778 for (j=1; j<n; j++) { 779 /* (strict lower triangular part of A)*x */ 780 cval = ib[j]*4; 781 z[cval] += v[0]*x1 + v[1]*x2 + v[2]*x3 + v[3]*x4; 782 z[cval+1] += v[4]*x1 + v[5]*x2 + v[6]*x3 + v[7]*x4; 783 z[cval+2] += v[8]*x1 + v[9]*x2 + v[10]*x3 + v[11]*x4; 784 z[cval+3] += v[12]*x1 + v[13]*x2 + v[14]*x3 + v[15]*x4; 785 /* (strict upper triangular part of A)*x */ 786 z[4*i] += v[0]*x[cval] + v[4]*x[cval+1]+ v[8]*x[cval+2] + v[12]*x[cval+3]; 787 z[4*i+1] += v[1]*x[cval] + v[5]*x[cval+1]+ v[9]*x[cval+2] + v[13]*x[cval+3]; 788 z[4*i+2] += v[2]*x[cval] + v[6]*x[cval+1]+ v[10]*x[cval+2]+ v[14]*x[cval+3]; 789 z[4*i+3] += v[3]*x[cval] + v[7]*x[cval+1]+ v[11]*x[cval+2]+ v[15]*x[cval+3]; 790 v += 16; 791 } 792 xb +=4; ai++; 793 } 794 795 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 796 if (yy != xx) ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 797 if (zz != yy) ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr); 798 799 PLogFlops(32*(a->s_nz*2 - a->m)); 800 PetscFunctionReturn(0); 801 } 802 803 #undef __FUNC__ 804 #define __FUNC__ "MatMultAdd_SeqSBAIJ_5" 805 int MatMultAdd_SeqSBAIJ_5(Mat A,Vec xx,Vec yy,Vec zz) 806 { 807 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 808 Scalar *x,*y,*z,*xb,x1,x2,x3,x4,x5; 809 MatScalar *v; 810 int mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,ierr,*ib,cval,j; 811 812 PetscFunctionBegin; 813 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 814 if (yy != xx) { 815 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 816 } else { 817 y = x; 818 } 819 if (zz != yy) { 820 ierr = VecCopy(yy,zz);CHKERRQ(ierr); 821 ierr = VecGetArray(zz,&z);CHKERRQ(ierr); 822 } else { 823 z = y; 824 } 825 826 v = a->a; 827 xb = x; 828 829 for (i=0; i<mbs; i++) { 830 n = ai[1] - ai[0]; /* length of i_th block row of A */ 831 x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5=xb[4]; 832 ib = aj + *ai; 833 if (*ib == i){ /* (diag of A)*x */ 834 z[5*i] += v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4+ v[20]*x5; 835 z[5*i+1] += v[5]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4+ v[21]*x5; 836 z[5*i+2] += v[10]*x1 +v[11]*x2 + v[12]*x3 + v[17]*x4+ v[22]*x5; 837 z[5*i+3] += v[15]*x1 +v[16]*x2 + v[17]*x3 + v[18]*x4+ v[23]*x5; 838 z[5*i+4] += v[20]*x1 +v[21]*x2 + v[22]*x3 + v[23]*x4+ v[24]*x5; 839 v += 25; 840 } 841 for (j=1; j<n; j++) { 842 /* (strict lower triangular part of A)*x */ 843 cval = ib[j]*5; 844 z[cval] += v[0]*x1 + v[1]*x2 + v[2]*x3 + v[3]*x4 + v[4]*x5; 845 z[cval+1] += v[5]*x1 + v[6]*x2 + v[7]*x3 + v[8]*x4 + v[9]*x5; 846 z[cval+2] += v[10]*x1 + v[11]*x2 + v[12]*x3 + v[13]*x4+ v[14]*x5; 847 z[cval+3] += v[15]*x1 + v[16]*x2 + v[17]*x3 + v[18]*x4+ v[19]*x5; 848 z[cval+4] += v[20]*x1 + v[21]*x2 + v[22]*x3 + v[23]*x4+ v[24]*x5; 849 /* (strict upper triangular part of A)*x */ 850 z[5*i] +=v[0]*x[cval]+v[5]*x[cval+1]+v[10]*x[cval+2]+v[15]*x[cval+3]+v[20]*x[cval+4]; 851 z[5*i+1] +=v[1]*x[cval]+v[6]*x[cval+1]+v[11]*x[cval+2]+v[16]*x[cval+3]+v[21]*x[cval+4]; 852 z[5*i+2] +=v[2]*x[cval]+v[7]*x[cval+1]+v[12]*x[cval+2]+v[17]*x[cval+3]+v[22]*x[cval+4]; 853 z[5*i+3] +=v[3]*x[cval]+v[8]*x[cval+1]+v[13]*x[cval+2]+v[18]*x[cval+3]+v[23]*x[cval+4]; 854 z[5*i+4] +=v[4]*x[cval]+v[9]*x[cval+1]+v[14]*x[cval+2]+v[19]*x[cval+3]+v[24]*x[cval+4]; 855 v += 25; 856 } 857 xb +=5; ai++; 858 } 859 860 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 861 if (yy != xx) ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 862 if (zz != yy) ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr); 863 864 PLogFlops(50*(a->s_nz*2 - a->m)); 865 PetscFunctionReturn(0); 866 } 867 #undef __FUNC__ 868 #define __FUNC__ "MatMultAdd_SeqSBAIJ_6" 869 int MatMultAdd_SeqSBAIJ_6(Mat A,Vec xx,Vec yy,Vec zz) 870 { 871 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 872 Scalar *x,*y,*z,*xb,x1,x2,x3,x4,x5,x6; 873 MatScalar *v; 874 int mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,ierr,*ib,cval,j; 875 876 PetscFunctionBegin; 877 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 878 if (yy != xx) { 879 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 880 } else { 881 y = x; 882 } 883 if (zz != yy) { 884 ierr = VecCopy(yy,zz);CHKERRQ(ierr); 885 ierr = VecGetArray(zz,&z);CHKERRQ(ierr); 886 } else { 887 z = y; 888 } 889 890 v = a->a; 891 xb = x; 892 893 for (i=0; i<mbs; i++) { 894 n = ai[1] - ai[0]; /* length of i_th block row of A */ 895 x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5=xb[4]; x6=xb[5]; 896 ib = aj + *ai; 897 if (*ib == i){ /* (diag of A)*x */ 898 z[6*i] += v[0]*x1 + v[6]*x2 + v[12]*x3 + v[18]*x4+ v[24]*x5 + v[30]*x6; 899 z[6*i+1] += v[6]*x1 + v[7]*x2 + v[13]*x3 + v[19]*x4+ v[25]*x5 + v[31]*x6; 900 z[6*i+2] += v[12]*x1 +v[13]*x2 + v[14]*x3 + v[20]*x4+ v[26]*x5 + v[32]*x6; 901 z[6*i+3] += v[18]*x1 +v[19]*x2 + v[20]*x3 + v[21]*x4+ v[27]*x5 + v[33]*x6; 902 z[6*i+4] += v[24]*x1 +v[25]*x2 + v[26]*x3 + v[27]*x4+ v[28]*x5 + v[34]*x6; 903 z[6*i+5] += v[30]*x1 +v[31]*x2 + v[32]*x3 + v[33]*x4+ v[34]*x5 + v[35]*x6; 904 v += 36; 905 } 906 for (j=1; j<n; j++) { 907 /* (strict lower triangular part of A)*x */ 908 cval = ib[j]*6; 909 z[cval] += v[0]*x1 + v[1]*x2 + v[2]*x3 + v[3]*x4+ v[4]*x5 + v[5]*x6; 910 z[cval+1] += v[6]*x1 + v[7]*x2 + v[8]*x3 + v[9]*x4+ v[10]*x5 + v[11]*x6; 911 z[cval+2] += v[12]*x1 + v[13]*x2 + v[14]*x3 + v[15]*x4+ v[16]*x5 + v[17]*x6; 912 z[cval+3] += v[18]*x1 + v[19]*x2 + v[20]*x3 + v[21]*x4+ v[22]*x5 + v[23]*x6; 913 z[cval+4] += v[24]*x1 + v[25]*x2 + v[26]*x3 + v[27]*x4+ v[28]*x5 + v[29]*x6; 914 z[cval+5] += v[30]*x1 + v[31]*x2 + v[32]*x3 + v[33]*x4+ v[34]*x5 + v[35]*x6; 915 /* (strict upper triangular part of A)*x */ 916 z[6*i] +=v[0]*x[cval]+v[6]*x[cval+1]+v[12]*x[cval+2]+v[18]*x[cval+3]+v[24]*x[cval+4]+v[30]*x[cval+5]; 917 z[6*i+1] +=v[1]*x[cval]+v[7]*x[cval+1]+v[13]*x[cval+2]+v[19]*x[cval+3]+v[25]*x[cval+4]+v[31]*x[cval+5]; 918 z[6*i+2] +=v[2]*x[cval]+v[8]*x[cval+1]+v[14]*x[cval+2]+v[20]*x[cval+3]+v[26]*x[cval+4]+v[32]*x[cval+5]; 919 z[6*i+3] +=v[3]*x[cval]+v[9]*x[cval+1]+v[15]*x[cval+2]+v[21]*x[cval+3]+v[27]*x[cval+4]+v[33]*x[cval+5]; 920 z[6*i+4] +=v[4]*x[cval]+v[10]*x[cval+1]+v[16]*x[cval+2]+v[22]*x[cval+3]+v[28]*x[cval+4]+v[34]*x[cval+5]; 921 z[6*i+5] +=v[5]*x[cval]+v[11]*x[cval+1]+v[17]*x[cval+2]+v[23]*x[cval+3]+v[29]*x[cval+4]+v[35]*x[cval+5]; 922 v += 36; 923 } 924 xb +=6; ai++; 925 } 926 927 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 928 if (yy != xx) ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 929 if (zz != yy) ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr); 930 931 PLogFlops(72*(a->s_nz*2 - a->m)); 932 PetscFunctionReturn(0); 933 } 934 935 #undef __FUNC__ 936 #define __FUNC__ "MatMultAdd_SeqSBAIJ_7" 937 int MatMultAdd_SeqSBAIJ_7(Mat A,Vec xx,Vec yy,Vec zz) 938 { 939 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 940 Scalar *x,*y,*z,*xb,x1,x2,x3,x4,x5,x6,x7; 941 MatScalar *v; 942 int mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,ierr,*ib,cval,j; 943 944 PetscFunctionBegin; 945 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 946 if (yy != xx) { 947 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 948 } else { 949 y = x; 950 } 951 if (zz != yy) { 952 ierr = VecCopy(yy,zz);CHKERRQ(ierr); 953 ierr = VecGetArray(zz,&z);CHKERRQ(ierr); 954 } else { 955 z = y; 956 } 957 958 v = a->a; 959 xb = x; 960 961 for (i=0; i<mbs; i++) { 962 n = ai[1] - ai[0]; /* length of i_th block row of A */ 963 x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5=xb[4]; x6=xb[5]; x7=xb[6]; 964 ib = aj + *ai; 965 if (*ib == i){ /* (diag of A)*x */ 966 z[7*i] += v[0]*x1 + v[7]*x2 + v[14]*x3 + v[21]*x4+ v[28]*x5 + v[35]*x6+ v[42]*x7; 967 z[7*i+1] += v[7]*x1 + v[8]*x2 + v[15]*x3 + v[22]*x4+ v[29]*x5 + v[36]*x6+ v[43]*x7; 968 z[7*i+2] += v[14]*x1+ v[15]*x2 +v[16]*x3 + v[23]*x4+ v[30]*x5 + v[37]*x6+ v[44]*x7; 969 z[7*i+3] += v[21]*x1+ v[22]*x2 +v[23]*x3 + v[24]*x4+ v[31]*x5 + v[38]*x6+ v[45]*x7; 970 z[7*i+4] += v[28]*x1+ v[29]*x2 +v[30]*x3 + v[31]*x4+ v[32]*x5 + v[39]*x6+ v[46]*x7; 971 z[7*i+5] += v[35]*x1+ v[36]*x2 +v[37]*x3 + v[38]*x4+ v[39]*x5 + v[40]*x6+ v[47]*x7; 972 z[7*i+6] += v[42]*x1+ v[43]*x2 +v[44]*x3 + v[45]*x4+ v[46]*x5 + v[47]*x6+ v[48]*x7; 973 v += 49; 974 } 975 for (j=1; j<n; j++) { 976 /* (strict lower triangular part of A)*x */ 977 cval = ib[j]*7; 978 z[cval] += v[0]*x1 + v[1]*x2 + v[2]*x3 + v[3]*x4+ v[4]*x5 + v[5]*x6+ v[6]*x7; 979 z[cval+1] += v[7]*x1 + v[8]*x2 + v[9]*x3 + v[10]*x4+ v[11]*x5 + v[12]*x6+ v[13]*x7; 980 z[cval+2] += v[14]*x1 + v[15]*x2 + v[16]*x3 + v[17]*x4+ v[18]*x5 + v[19]*x6+ v[20]*x7; 981 z[cval+3] += v[21]*x1 + v[22]*x2 + v[23]*x3 + v[24]*x4+ v[25]*x5 + v[26]*x6+ v[27]*x7; 982 z[cval+4] += v[28]*x1 + v[29]*x2 + v[30]*x3 + v[31]*x4+ v[32]*x5 + v[33]*x6+ v[34]*x7; 983 z[cval+5] += v[35]*x1 + v[36]*x2 + v[37]*x3 + v[38]*x4+ v[39]*x5 + v[40]*x6+ v[41]*x7; 984 z[cval+6] += v[42]*x1 + v[43]*x2 + v[44]*x3 + v[45]*x4+ v[46]*x5 + v[47]*x6+ v[48]*x7; 985 /* (strict upper triangular part of A)*x */ 986 z[7*i] +=v[0]*x[cval]+v[7]*x[cval+1]+v[14]*x[cval+2]+v[21]*x[cval+3]+v[28]*x[cval+4]+v[35]*x[cval+5]+v[42]*x[cval+6]; 987 z[7*i+1]+=v[1]*x[cval]+v[8]*x[cval+1]+v[15]*x[cval+2]+v[22]*x[cval+3]+v[29]*x[cval+4]+v[36]*x[cval+5]+v[43]*x[cval+6]; 988 z[7*i+2]+=v[2]*x[cval]+v[9]*x[cval+1]+v[16]*x[cval+2]+v[23]*x[cval+3]+v[30]*x[cval+4]+v[37]*x[cval+5]+v[44]*x[cval+6]; 989 z[7*i+3]+=v[3]*x[cval]+v[10]*x[cval+1]+v[17]*x[cval+2]+v[24]*x[cval+3]+v[31]*x[cval+4]+v[38]*x[cval+5]+v[45]*x[cval+6]; 990 z[7*i+4]+=v[4]*x[cval]+v[11]*x[cval+1]+v[18]*x[cval+2]+v[25]*x[cval+3]+v[32]*x[cval+4]+v[39]*x[cval+5]+v[46]*x[cval+6]; 991 z[7*i+5]+=v[5]*x[cval]+v[12]*x[cval+1]+v[19]*x[cval+2]+v[26]*x[cval+3]+v[33]*x[cval+4]+v[40]*x[cval+5]+v[47]*x[cval+6]; 992 z[7*i+6]+=v[6]*x[cval]+v[13]*x[cval+1]+v[20]*x[cval+2]+v[27]*x[cval+3]+v[34]*x[cval+4]+v[41]*x[cval+5]+v[48]*x[cval+6]; 993 v += 49; 994 } 995 xb +=7; ai++; 996 } 997 998 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 999 if (yy != xx) ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 1000 if (zz != yy) ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr); 1001 1002 PLogFlops(98*(a->s_nz*2 - a->m)); 1003 PetscFunctionReturn(0); 1004 } 1005 1006 #undef __FUNC__ 1007 #define __FUNC__ "MatMultAdd_SeqSBAIJ_N" 1008 int MatMultAdd_SeqSBAIJ_N(Mat A,Vec xx,Vec yy,Vec zz) 1009 { 1010 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 1011 Scalar *x,*x_ptr,*y,*z,*z_ptr=0,*xb,*zb,*work,*workt; 1012 MatScalar *v; 1013 int ierr,mbs=a->mbs,i,*idx,*aj,*ii,bs=a->bs,j,n,bs2=a->bs2; 1014 int ncols,k; 1015 1016 PetscFunctionBegin; 1017 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); x_ptr=x; 1018 if (yy != xx) { 1019 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 1020 } else { 1021 y = x; 1022 } 1023 if (zz != yy) { 1024 ierr = VecCopy(yy,zz);CHKERRQ(ierr); 1025 ierr = VecGetArray(zz,&z);CHKERRQ(ierr); z_ptr=z; 1026 } else { 1027 z = y; 1028 } 1029 1030 aj = a->j; 1031 v = a->a; 1032 ii = a->i; 1033 1034 if (!a->mult_work) { 1035 k = a->m; 1036 a->mult_work = (Scalar*)PetscMalloc((k+1)*sizeof(Scalar));CHKPTRQ(a->mult_work); 1037 } 1038 work = a->mult_work; 1039 1040 1041 for (i=0; i<mbs; i++) { 1042 n = ii[1] - ii[0]; ncols = n*bs; 1043 workt = work; idx=aj+ii[0]; 1044 1045 /* upper triangular part */ 1046 for (j=0; j<n; j++) { 1047 /* col = bs*(*idx); */ 1048 xb = x_ptr + bs*(*idx++); 1049 for (k=0; k<bs; k++) workt[k] = xb[k]; 1050 workt += bs; 1051 } 1052 /* z(i*bs:(i+1)*bs-1) += A(i,:)*x */ 1053 Kernel_w_gets_w_plus_Ar_times_v(bs,ncols,work,v,z); 1054 1055 /* strict lower triangular part */ 1056 ncols -= bs; 1057 if (ncols > 0){ 1058 workt = work; 1059 ierr = PetscMemzero(workt,ncols*sizeof(Scalar));CHKERRQ(ierr); 1060 Kernel_w_gets_w_plus_trans_Ar_times_v(bs,ncols,x,v+bs2,workt); 1061 1062 idx=aj+ii[0]+1; 1063 for (j=1; j<n; j++) { 1064 zb = z_ptr + bs*(*idx); 1065 idx++; 1066 for (k=0; k<bs; k++) zb[k] += workt[k] ; 1067 workt += bs; 1068 } 1069 } 1070 1071 x += bs; v += n*bs2; z += bs; ii++; 1072 } 1073 1074 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1075 if (yy != xx) ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 1076 if (zz != yy) ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr); 1077 1078 PLogFlops(2*(a->s_nz*2 - a->m)); 1079 PetscFunctionReturn(0); 1080 } 1081 1082 #undef __FUNC__ 1083 #define __FUNC__ "MatMultTranspose_SeqSBAIJ" 1084 int MatMultTranspose_SeqSBAIJ(Mat A,Vec xx,Vec zz) 1085 { 1086 PetscFunctionBegin; 1087 SETERRQ(1,"Matrix is symmetric. Call MatMult()."); 1088 /* PetscFunctionReturn(0); */ 1089 } 1090 1091 #undef __FUNC__ 1092 #define __FUNC__ "MatMultTransposeAdd_SeqSBAIJ" 1093 int MatMultTransposeAdd_SeqSBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1094 1095 { 1096 PetscFunctionBegin; 1097 SETERRQ(1,"Matrix is symmetric. Call MatMultAdd()."); 1098 /* PetscFunctionReturn(0); */ 1099 } 1100 1101 #undef __FUNC__ 1102 #define __FUNC__ "MatScale_SeqSBAIJ" 1103 int MatScale_SeqSBAIJ(Scalar *alpha,Mat inA) 1104 { 1105 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)inA->data; 1106 int one = 1,totalnz = a->bs2*a->s_nz; 1107 1108 PetscFunctionBegin; 1109 BLscal_(&totalnz,alpha,a->a,&one); 1110 PLogFlops(totalnz); 1111 PetscFunctionReturn(0); 1112 } 1113 1114 #undef __FUNC__ 1115 #define __FUNC__ "MatNorm_SeqSBAIJ" 1116 int MatNorm_SeqSBAIJ(Mat A,NormType type,PetscReal *norm) 1117 { 1118 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 1119 MatScalar *v = a->a; 1120 PetscReal sum_diag = 0.0, sum_off = 0.0, *sum; 1121 int i,j,k,bs = a->bs,bs2=a->bs2,k1,mbs=a->mbs; 1122 int *jl,*il,jmin,jmax,ierr,nexti,ik; 1123 1124 PetscFunctionBegin; 1125 if (type == NORM_FROBENIUS) { 1126 for (k=0; k<mbs; k++){ 1127 jmin = a->i[k]; jmax = a->i[k+1]; 1128 /* diagonal block */ 1129 for (i=0; i<bs2; i++){ 1130 #if defined(PETSC_USE_COMPLEX) 1131 sum_diag += PetscRealPart(PetscConj(*v)*(*v)); v++; 1132 #else 1133 sum_diag += (*v)*(*v); v++; 1134 #endif 1135 } 1136 for (j=jmin+1; j<jmax; j++){ /* off-diagonal blocks */ 1137 for (i=0; i<bs2; i++){ 1138 #if defined(PETSC_USE_COMPLEX) 1139 sum_off += PetscRealPart(PetscConj(*v)*(*v)); v++; 1140 #else 1141 sum_off += (*v)*(*v); v++; 1142 #endif 1143 } 1144 } 1145 } 1146 *norm = sqrt(sum_diag + 2*sum_off); 1147 1148 } else if (type == NORM_INFINITY) { /* maximum row sum */ 1149 il = (int*)PetscMalloc(mbs*sizeof(int));CHKPTRQ(il); 1150 jl = (int*)PetscMalloc(mbs*sizeof(int));CHKPTRQ(jl); 1151 sum = (PetscReal*)PetscMalloc(bs*sizeof(PetscReal));CHKPTRQ(sum); 1152 for (i=0; i<mbs; i++) { 1153 jl[i] = mbs; il[0] = 0; 1154 } 1155 1156 *norm = 0.0; 1157 for (k=0; k<mbs; k++) { /* k_th block row */ 1158 for (j=0; j<bs; j++) sum[j]=0.0; 1159 1160 /*-- col sum --*/ 1161 i = jl[k]; /* first |A(i,k)| to be added */ 1162 while (i<mbs){ 1163 nexti = jl[i]; /* next block row to be added */ 1164 ik = il[i]; /* block index of A(i,k) in the array a */ 1165 for (j=0; j<bs; j++){ 1166 v = a->a + ik*bs2 + j*bs; 1167 for (k1=0; k1<bs; k1++) { 1168 sum[j] += PetscAbsScalar(*v); v++; 1169 } 1170 } 1171 /* update il, jl */ 1172 jmin = ik + 1; jmax = a->i[i+1]; 1173 if (jmin < jmax){ 1174 il[i] = jmin; 1175 j = a->j[jmin]; 1176 jl[i] = jl[j]; jl[j]=i; 1177 } 1178 i = nexti; 1179 } 1180 1181 /*-- row sum --*/ 1182 jmin = a->i[k]; jmax = a->i[k+1]; 1183 for (i=jmin; i<jmax; i++) { 1184 for (j=0; j<bs; j++){ 1185 v = a->a + i*bs2 + j; 1186 for (k1=0; k1<bs; k1++){ 1187 sum[j] += PetscAbsScalar(*v); 1188 v += bs; 1189 } 1190 } 1191 } 1192 /* add k_th block row to il, jl */ 1193 jmin++; 1194 if (jmin < jmax){ 1195 il[k] = jmin; 1196 j = a->j[jmin]; 1197 jl[k] = jl[j]; jl[j] = k; 1198 } 1199 for (j=0; j<bs; j++){ 1200 if (sum[j] > *norm) *norm = sum[j]; 1201 } 1202 } 1203 ierr = PetscFree(il);CHKERRQ(ierr); 1204 ierr = PetscFree(jl);CHKERRQ(ierr); 1205 ierr = PetscFree(sum);CHKERRQ(ierr); 1206 } else { 1207 SETERRQ(PETSC_ERR_SUP,"No support for this norm yet"); 1208 } 1209 PetscFunctionReturn(0); 1210 } 1211 1212 #undef __FUNC__ 1213 #define __FUNC__ "MatEqual_SeqSBAIJ" 1214 int MatEqual_SeqSBAIJ(Mat A,Mat B,PetscTruth* flg) 1215 { 1216 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ *)A->data,*b = (Mat_SeqSBAIJ *)B->data; 1217 int ierr; 1218 1219 PetscFunctionBegin; 1220 if (B->type !=MATSEQSBAIJ) SETERRQ(PETSC_ERR_ARG_INCOMP,"Matrices must be same type"); 1221 1222 /* If the matrix/block dimensions are not equal, or no of nonzeros or shift */ 1223 if ((a->m != b->m) || (a->n !=b->n) || (a->bs != b->bs)|| (a->s_nz != b->s_nz)) { 1224 *flg = PETSC_FALSE; PetscFunctionReturn(0); 1225 } 1226 1227 /* if the a->i are the same */ 1228 ierr = PetscMemcmp(a->i,b->i,(a->mbs+1)*sizeof(int),flg);CHKERRQ(ierr); 1229 if (*flg == PETSC_FALSE) { 1230 PetscFunctionReturn(0); 1231 } 1232 1233 /* if a->j are the same */ 1234 ierr = PetscMemcmp(a->j,b->j,(a->s_nz)*sizeof(int),flg);CHKERRQ(ierr); 1235 if (*flg == PETSC_FALSE) { 1236 PetscFunctionReturn(0); 1237 } 1238 /* if a->a are the same */ 1239 ierr = PetscMemcmp(a->a,b->a,(a->s_nz)*(a->bs)*(a->bs)*sizeof(Scalar),flg);CHKERRQ(ierr); 1240 PetscFunctionReturn(0); 1241 1242 } 1243 1244 #undef __FUNC__ 1245 #define __FUNC__ "MatGetDiagonal_SeqSBAIJ" 1246 int MatGetDiagonal_SeqSBAIJ(Mat A,Vec v) 1247 { 1248 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 1249 int ierr,i,j,k,n,row,bs,*ai,*aj,ambs,bs2; 1250 Scalar *x,zero = 0.0; 1251 MatScalar *aa,*aa_j; 1252 1253 PetscFunctionBegin; 1254 if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 1255 bs = a->bs; 1256 aa = a->a; 1257 ai = a->i; 1258 aj = a->j; 1259 ambs = a->mbs; 1260 bs2 = a->bs2; 1261 1262 ierr = VecSet(&zero,v);CHKERRQ(ierr); 1263 ierr = VecGetArray(v,&x);CHKERRQ(ierr); 1264 ierr = VecGetLocalSize(v,&n);CHKERRQ(ierr); 1265 /* if (n != a->m) SETERRQ(PETSC_ERR_ARG_SIZ,"Nonconforming matrix and vector");*/ 1266 for (i=0; i<ambs; i++) { 1267 j=ai[i]; 1268 if (aj[j] == i) { /* if this is a diagonal element */ 1269 row = i*bs; 1270 aa_j = aa + j*bs2; 1271 for (k=0; k<bs2; k+=(bs+1),row++) x[row] = aa_j[k]; 1272 } 1273 } 1274 ierr = VecRestoreArray(v,&x);CHKERRQ(ierr); 1275 PetscFunctionReturn(0); 1276 } 1277 1278 #undef __FUNC__ 1279 #define __FUNC__ "MatDiagonalScale_SeqSBAIJ" 1280 int MatDiagonalScale_SeqSBAIJ(Mat A,Vec ll,Vec rr) 1281 { 1282 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 1283 Scalar *l,*r,x,*li,*ri; 1284 MatScalar *aa,*v; 1285 int ierr,i,j,k,lm,rn,M,m,*ai,*aj,mbs,tmp,bs,bs2; 1286 1287 PetscFunctionBegin; 1288 ai = a->i; 1289 aj = a->j; 1290 aa = a->a; 1291 m = a->m; 1292 bs = a->bs; 1293 mbs = a->mbs; 1294 bs2 = a->bs2; 1295 1296 if (ll != rr) { 1297 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"For symmetric format, left and right scaling vectors must be same\n"); 1298 } 1299 if (ll) { 1300 ierr = VecGetArray(ll,&l);CHKERRQ(ierr); 1301 ierr = VecGetLocalSize(ll,&lm);CHKERRQ(ierr); 1302 if (lm != m) SETERRQ(PETSC_ERR_ARG_SIZ,"Left scaling vector wrong length"); 1303 for (i=0; i<mbs; i++) { /* for each block row */ 1304 M = ai[i+1] - ai[i]; 1305 li = l + i*bs; 1306 v = aa + bs2*ai[i]; 1307 for (j=0; j<M; j++) { /* for each block */ 1308 for (k=0; k<bs2; k++) { 1309 (*v++) *= li[k%bs]; 1310 } 1311 #ifdef CONT 1312 /* will be used to replace the above loop */ 1313 ri = l + bs*aj[ai[i]+j]; 1314 for (k=0; k<bs; k++) { /* column value */ 1315 x = ri[k]; 1316 for (tmp=0; tmp<bs; tmp++) (*v++) *= li[tmp]*x; 1317 } 1318 #endif 1319 1320 } 1321 } 1322 ierr = VecRestoreArray(ll,&l);CHKERRQ(ierr); 1323 PLogFlops(2*a->s_nz); 1324 } 1325 /* will be deleted */ 1326 if (rr) { 1327 ierr = VecGetArray(rr,&r);CHKERRQ(ierr); 1328 ierr = VecGetLocalSize(rr,&rn);CHKERRQ(ierr); 1329 if (rn != m) SETERRQ(PETSC_ERR_ARG_SIZ,"Right scaling vector wrong length"); 1330 for (i=0; i<mbs; i++) { /* for each block row */ 1331 M = ai[i+1] - ai[i]; 1332 v = aa + bs2*ai[i]; 1333 for (j=0; j<M; j++) { /* for each block */ 1334 ri = r + bs*aj[ai[i]+j]; 1335 for (k=0; k<bs; k++) { 1336 x = ri[k]; 1337 for (tmp=0; tmp<bs; tmp++) (*v++) *= x; 1338 } 1339 } 1340 } 1341 ierr = VecRestoreArray(rr,&r);CHKERRQ(ierr); 1342 PLogFlops(a->s_nz); 1343 } 1344 PetscFunctionReturn(0); 1345 } 1346 1347 #undef __FUNC__ 1348 #define __FUNC__ "MatGetInfo_SeqSBAIJ" 1349 int MatGetInfo_SeqSBAIJ(Mat A,MatInfoType flag,MatInfo *info) 1350 { 1351 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 1352 1353 PetscFunctionBegin; 1354 info->rows_global = (double)a->m; 1355 info->columns_global = (double)a->m; 1356 info->rows_local = (double)a->m; 1357 info->columns_local = (double)a->m; 1358 info->block_size = a->bs2; 1359 info->nz_allocated = a->s_maxnz; /*num. of nonzeros in upper triangular part */ 1360 info->nz_used = a->bs2*a->s_nz; /*num. of nonzeros in upper triangular part */ 1361 info->nz_unneeded = (double)(info->nz_allocated - info->nz_used); 1362 info->assemblies = A->num_ass; 1363 info->mallocs = a->reallocs; 1364 info->memory = A->mem; 1365 if (A->factor) { 1366 info->fill_ratio_given = A->info.fill_ratio_given; 1367 info->fill_ratio_needed = A->info.fill_ratio_needed; 1368 info->factor_mallocs = A->info.factor_mallocs; 1369 } else { 1370 info->fill_ratio_given = 0; 1371 info->fill_ratio_needed = 0; 1372 info->factor_mallocs = 0; 1373 } 1374 PetscFunctionReturn(0); 1375 } 1376 1377 1378 #undef __FUNC__ 1379 #define __FUNC__ "MatZeroEntries_SeqSBAIJ" 1380 int MatZeroEntries_SeqSBAIJ(Mat A) 1381 { 1382 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 1383 int ierr; 1384 1385 PetscFunctionBegin; 1386 ierr = PetscMemzero(a->a,a->bs2*a->i[a->mbs]*sizeof(MatScalar));CHKERRQ(ierr); 1387 PetscFunctionReturn(0); 1388 } 1389