1 /*$Id: sbaij2.c,v 1.32 2001/08/07 03:03:01 balay Exp $*/ 2 3 #include "src/mat/impls/baij/seq/baij.h" 4 #include "src/vec/vecimpl.h" 5 #include "src/inline/spops.h" 6 #include "src/inline/ilu.h" 7 #include "petscbt.h" 8 #include "src/mat/impls/sbaij/seq/sbaij.h" 9 10 #undef __FUNCT__ 11 #define __FUNCT__ "MatIncreaseOverlap_SeqSBAIJ" 12 int MatIncreaseOverlap_SeqSBAIJ(Mat A,int is_max,IS *is,int ov) 13 { 14 PetscFunctionBegin; 15 SETERRQ(1,"Function not yet written for SBAIJ format"); 16 /* PetscFunctionReturn(0); */ 17 } 18 19 #undef __FUNCT__ 20 #define __FUNCT__ "MatGetSubMatrix_SeqSBAIJ_Private" 21 int MatGetSubMatrix_SeqSBAIJ_Private(Mat A,IS isrow,IS iscol,int cs,MatReuse scall,Mat *B) 22 { 23 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data,*c; 24 int *smap,i,k,kstart,kend,ierr,oldcols = a->mbs,*lens; 25 int row,mat_i,*mat_j,tcol,*mat_ilen; 26 int *irow,nrows,*ssmap,bs=a->bs,bs2=a->bs2; 27 int *aj = a->j,*ai = a->i; 28 MatScalar *mat_a; 29 Mat C; 30 PetscTruth flag; 31 32 PetscFunctionBegin; 33 34 if (isrow != iscol) SETERRQ(1,"MatGetSubmatrices_SeqSBAIJ: For symm. format, iscol must equal isro"); 35 ierr = ISSorted(iscol,(PetscTruth*)&i);CHKERRQ(ierr); 36 if (!i) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"IS is not sorted"); 37 38 ierr = ISGetIndices(isrow,&irow);CHKERRQ(ierr); 39 ierr = ISGetSize(isrow,&nrows);CHKERRQ(ierr); 40 41 ierr = PetscMalloc((1+oldcols)*sizeof(int),&smap);CHKERRQ(ierr); 42 ssmap = smap; 43 ierr = PetscMalloc((1+nrows)*sizeof(int),&lens);CHKERRQ(ierr); 44 ierr = PetscMemzero(smap,oldcols*sizeof(int));CHKERRQ(ierr); 45 for (i=0; i<nrows; i++) smap[irow[i]] = i+1; /* nrows = ncols */ 46 /* determine lens of each row */ 47 for (i=0; i<nrows; i++) { 48 kstart = ai[irow[i]]; 49 kend = kstart + a->ilen[irow[i]]; 50 lens[i] = 0; 51 for (k=kstart; k<kend; k++) { 52 if (ssmap[aj[k]]) { 53 lens[i]++; 54 } 55 } 56 } 57 /* Create and fill new matrix */ 58 if (scall == MAT_REUSE_MATRIX) { 59 c = (Mat_SeqSBAIJ *)((*B)->data); 60 61 if (c->mbs!=nrows || c->bs!=bs) SETERRQ(PETSC_ERR_ARG_SIZ,"Submatrix wrong size"); 62 ierr = PetscMemcmp(c->ilen,lens,c->mbs *sizeof(int),&flag);CHKERRQ(ierr); 63 if (flag == PETSC_FALSE) { 64 SETERRQ(PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. wrong no of nonzeros"); 65 } 66 ierr = PetscMemzero(c->ilen,c->mbs*sizeof(int));CHKERRQ(ierr); 67 C = *B; 68 } else { 69 ierr = MatCreateSeqSBAIJ(A->comm,bs,nrows*bs,nrows*bs,0,lens,&C);CHKERRQ(ierr); 70 } 71 c = (Mat_SeqSBAIJ *)(C->data); 72 for (i=0; i<nrows; i++) { 73 row = irow[i]; 74 kstart = ai[row]; 75 kend = kstart + a->ilen[row]; 76 mat_i = c->i[i]; 77 mat_j = c->j + mat_i; 78 mat_a = c->a + mat_i*bs2; 79 mat_ilen = c->ilen + i; 80 for (k=kstart; k<kend; k++) { 81 if ((tcol=ssmap[a->j[k]])) { 82 *mat_j++ = tcol - 1; 83 ierr = PetscMemcpy(mat_a,a->a+k*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr); 84 mat_a += bs2; 85 (*mat_ilen)++; 86 } 87 } 88 } 89 90 /* Free work space */ 91 ierr = PetscFree(smap);CHKERRQ(ierr); 92 ierr = PetscFree(lens);CHKERRQ(ierr); 93 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 94 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 95 96 ierr = ISRestoreIndices(isrow,&irow);CHKERRQ(ierr); 97 *B = C; 98 PetscFunctionReturn(0); 99 } 100 101 #undef __FUNCT__ 102 #define __FUNCT__ "MatGetSubMatrix_SeqSBAIJ" 103 int MatGetSubMatrix_SeqSBAIJ(Mat A,IS isrow,IS iscol,int cs,MatReuse scall,Mat *B) 104 { 105 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 106 IS is1; 107 int *vary,*iary,*irow,nrows,i,ierr,bs=a->bs,count; 108 109 PetscFunctionBegin; 110 if (isrow != iscol) SETERRQ(1,"MatGetSubmatrices_SeqSBAIJ: For symm. format, iscol must equal isro"); 111 112 ierr = ISGetIndices(isrow,&irow);CHKERRQ(ierr); 113 ierr = ISGetSize(isrow,&nrows);CHKERRQ(ierr); 114 115 /* Verify if the indices corespond to each element in a block 116 and form the IS with compressed IS */ 117 ierr = PetscMalloc(2*(a->mbs+1)*sizeof(int),&vary);CHKERRQ(ierr); 118 iary = vary + a->mbs; 119 ierr = PetscMemzero(vary,(a->mbs)*sizeof(int));CHKERRQ(ierr); 120 for (i=0; i<nrows; i++) vary[irow[i]/bs]++; 121 122 count = 0; 123 for (i=0; i<a->mbs; i++) { 124 if (vary[i]!=0 && vary[i]!=bs) SETERRQ(1,"Index set does not match blocks"); 125 if (vary[i]==bs) iary[count++] = i; 126 } 127 ierr = ISCreateGeneral(PETSC_COMM_SELF,count,iary,&is1);CHKERRQ(ierr); 128 129 ierr = ISRestoreIndices(isrow,&irow);CHKERRQ(ierr); 130 ierr = PetscFree(vary);CHKERRQ(ierr); 131 132 ierr = MatGetSubMatrix_SeqSBAIJ_Private(A,is1,is1,cs,scall,B);CHKERRQ(ierr); 133 ISDestroy(is1); 134 PetscFunctionReturn(0); 135 } 136 137 #undef __FUNCT__ 138 #define __FUNCT__ "MatGetSubMatrices_SeqSBAIJ" 139 int MatGetSubMatrices_SeqSBAIJ(Mat A,int n,IS *irow,IS *icol,MatReuse scall,Mat **B) 140 { 141 int ierr,i; 142 143 PetscFunctionBegin; 144 if (scall == MAT_INITIAL_MATRIX) { 145 ierr = PetscMalloc((n+1)*sizeof(Mat),B);CHKERRQ(ierr); 146 } 147 148 for (i=0; i<n; i++) { 149 ierr = MatGetSubMatrix_SeqSBAIJ(A,irow[i],icol[i],PETSC_DECIDE,scall,&(*B)[i]);CHKERRQ(ierr); 150 } 151 PetscFunctionReturn(0); 152 } 153 154 /* -------------------------------------------------------*/ 155 /* Should check that shapes of vectors and matrices match */ 156 /* -------------------------------------------------------*/ 157 #include "petscblaslapack.h" 158 159 #undef __FUNCT__ 160 #define __FUNCT__ "MatMult_SeqSBAIJ_1" 161 int MatMult_SeqSBAIJ_1(Mat A,Vec xx,Vec zz) 162 { 163 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 164 PetscScalar *x,*z,*xb,x1,zero=0.0; 165 MatScalar *v; 166 int mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,ierr,*ib,cval,j,jmin; 167 168 PetscFunctionBegin; 169 ierr = VecSet(&zero,zz);CHKERRQ(ierr); 170 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 171 ierr = VecGetArray(zz,&z);CHKERRQ(ierr); 172 173 v = a->a; 174 xb = x; 175 176 for (i=0; i<mbs; i++) { 177 n = ai[1] - ai[0]; /* length of i_th row of A */ 178 x1 = xb[0]; 179 ib = aj + *ai; 180 jmin = 0; 181 if (*ib == i) { /* (diag of A)*x */ 182 z[i] += *v++ * x[*ib++]; 183 jmin++; 184 } 185 for (j=jmin; j<n; j++) { 186 cval = *ib; 187 z[cval] += *v * x1; /* (strict lower triangular part of A)*x */ 188 z[i] += *v++ * x[*ib++]; /* (strict upper triangular part of A)*x */ 189 } 190 xb++; ai++; 191 } 192 193 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 194 ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr); 195 PetscLogFlops(2*(a->s_nz*2 - A->m) - A->m); /* s_nz = (nz+m)/2 */ 196 PetscFunctionReturn(0); 197 } 198 199 #undef __FUNCT__ 200 #define __FUNCT__ "MatMult_SeqSBAIJ_2" 201 int MatMult_SeqSBAIJ_2(Mat A,Vec xx,Vec zz) 202 { 203 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 204 PetscScalar *x,*z,*xb,x1,x2,zero=0.0; 205 MatScalar *v; 206 int mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,ierr,*ib,cval,j,jmin; 207 208 209 PetscFunctionBegin; 210 ierr = VecSet(&zero,zz);CHKERRQ(ierr); 211 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 212 ierr = VecGetArray(zz,&z);CHKERRQ(ierr); 213 214 v = a->a; 215 xb = x; 216 217 for (i=0; i<mbs; i++) { 218 n = ai[1] - ai[0]; /* length of i_th block row of A */ 219 x1 = xb[0]; x2 = xb[1]; 220 ib = aj + *ai; 221 jmin = 0; 222 if (*ib == i){ /* (diag of A)*x */ 223 z[2*i] += v[0]*x1 + v[2]*x2; 224 z[2*i+1] += v[2]*x1 + v[3]*x2; 225 v += 4; jmin++; 226 } 227 for (j=jmin; j<n; j++) { 228 /* (strict lower triangular part of A)*x */ 229 cval = ib[j]*2; 230 z[cval] += v[0]*x1 + v[1]*x2; 231 z[cval+1] += v[2]*x1 + v[3]*x2; 232 /* (strict upper triangular part of A)*x */ 233 z[2*i] += v[0]*x[cval] + v[2]*x[cval+1]; 234 z[2*i+1] += v[1]*x[cval] + v[3]*x[cval+1]; 235 v += 4; 236 } 237 xb +=2; ai++; 238 } 239 240 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 241 ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr); 242 PetscLogFlops(8*(a->s_nz*2 - A->m) - A->m); 243 PetscFunctionReturn(0); 244 } 245 246 #undef __FUNCT__ 247 #define __FUNCT__ "MatMult_SeqSBAIJ_3" 248 int MatMult_SeqSBAIJ_3(Mat A,Vec xx,Vec zz) 249 { 250 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 251 PetscScalar *x,*z,*xb,x1,x2,x3,zero=0.0; 252 MatScalar *v; 253 int mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,ierr,*ib,cval,j,jmin; 254 255 256 PetscFunctionBegin; 257 ierr = VecSet(&zero,zz);CHKERRQ(ierr); 258 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 259 ierr = VecGetArray(zz,&z);CHKERRQ(ierr); 260 261 v = a->a; 262 xb = x; 263 264 for (i=0; i<mbs; i++) { 265 n = ai[1] - ai[0]; /* length of i_th block row of A */ 266 x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; 267 ib = aj + *ai; 268 jmin = 0; 269 if (*ib == i){ /* (diag of A)*x */ 270 z[3*i] += v[0]*x1 + v[3]*x2 + v[6]*x3; 271 z[3*i+1] += v[3]*x1 + v[4]*x2 + v[7]*x3; 272 z[3*i+2] += v[6]*x1 + v[7]*x2 + v[8]*x3; 273 v += 9; jmin++; 274 } 275 for (j=jmin; j<n; j++) { 276 /* (strict lower triangular part of A)*x */ 277 cval = ib[j]*3; 278 z[cval] += v[0]*x1 + v[1]*x2 + v[2]*x3; 279 z[cval+1] += v[3]*x1 + v[4]*x2 + v[5]*x3; 280 z[cval+2] += v[6]*x1 + v[7]*x2 + v[8]*x3; 281 /* (strict upper triangular part of A)*x */ 282 z[3*i] += v[0]*x[cval] + v[3]*x[cval+1]+ v[6]*x[cval+2]; 283 z[3*i+1] += v[1]*x[cval] + v[4]*x[cval+1]+ v[7]*x[cval+2]; 284 z[3*i+2] += v[2]*x[cval] + v[5]*x[cval+1]+ v[8]*x[cval+2]; 285 v += 9; 286 } 287 xb +=3; ai++; 288 } 289 290 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 291 ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr); 292 PetscLogFlops(18*(a->s_nz*2 - A->m) - A->m); 293 PetscFunctionReturn(0); 294 } 295 296 #undef __FUNCT__ 297 #define __FUNCT__ "MatMult_SeqSBAIJ_4" 298 int MatMult_SeqSBAIJ_4(Mat A,Vec xx,Vec zz) 299 { 300 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 301 PetscScalar *x,*z,*xb,x1,x2,x3,x4,zero=0.0; 302 MatScalar *v; 303 int mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,ierr,*ib,cval,j,jmin; 304 305 PetscFunctionBegin; 306 ierr = VecSet(&zero,zz);CHKERRQ(ierr); 307 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 308 ierr = VecGetArray(zz,&z);CHKERRQ(ierr); 309 310 v = a->a; 311 xb = x; 312 313 for (i=0; i<mbs; i++) { 314 n = ai[1] - ai[0]; /* length of i_th block row of A */ 315 x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; 316 ib = aj + *ai; 317 jmin = 0; 318 if (*ib == i){ /* (diag of A)*x */ 319 z[4*i] += v[0]*x1 + v[4]*x2 + v[8]*x3 + v[12]*x4; 320 z[4*i+1] += v[4]*x1 + v[5]*x2 + v[9]*x3 + v[13]*x4; 321 z[4*i+2] += v[8]*x1 + v[9]*x2 + v[10]*x3 + v[14]*x4; 322 z[4*i+3] += v[12]*x1+ v[13]*x2+ v[14]*x3 + v[15]*x4; 323 v += 16; jmin++; 324 } 325 for (j=jmin; j<n; j++) { 326 /* (strict lower triangular part of A)*x */ 327 cval = ib[j]*4; 328 z[cval] += v[0]*x1 + v[1]*x2 + v[2]*x3 + v[3]*x4; 329 z[cval+1] += v[4]*x1 + v[5]*x2 + v[6]*x3 + v[7]*x4; 330 z[cval+2] += v[8]*x1 + v[9]*x2 + v[10]*x3 + v[11]*x4; 331 z[cval+3] += v[12]*x1 + v[13]*x2 + v[14]*x3 + v[15]*x4; 332 /* (strict upper triangular part of A)*x */ 333 z[4*i] += v[0]*x[cval] + v[4]*x[cval+1]+ v[8]*x[cval+2] + v[12]*x[cval+3]; 334 z[4*i+1] += v[1]*x[cval] + v[5]*x[cval+1]+ v[9]*x[cval+2] + v[13]*x[cval+3]; 335 z[4*i+2] += v[2]*x[cval] + v[6]*x[cval+1]+ v[10]*x[cval+2]+ v[14]*x[cval+3]; 336 z[4*i+3] += v[3]*x[cval] + v[7]*x[cval+1]+ v[11]*x[cval+2]+ v[15]*x[cval+3]; 337 v += 16; 338 } 339 xb +=4; ai++; 340 } 341 342 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 343 ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr); 344 PetscLogFlops(32*(a->s_nz*2 - A->m) - A->m); 345 PetscFunctionReturn(0); 346 } 347 348 #undef __FUNCT__ 349 #define __FUNCT__ "MatMult_SeqSBAIJ_5" 350 int MatMult_SeqSBAIJ_5(Mat A,Vec xx,Vec zz) 351 { 352 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 353 PetscScalar *x,*z,*xb,x1,x2,x3,x4,x5,zero=0.0; 354 MatScalar *v; 355 int mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,ierr,*ib,cval,j,jmin; 356 357 PetscFunctionBegin; 358 ierr = VecSet(&zero,zz);CHKERRQ(ierr); 359 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 360 ierr = VecGetArray(zz,&z);CHKERRQ(ierr); 361 362 v = a->a; 363 xb = x; 364 365 for (i=0; i<mbs; i++) { 366 n = ai[1] - ai[0]; /* length of i_th block row of A */ 367 x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5=xb[4]; 368 ib = aj + *ai; 369 jmin = 0; 370 if (*ib == i){ /* (diag of A)*x */ 371 z[5*i] += v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4+ v[20]*x5; 372 z[5*i+1] += v[5]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4+ v[21]*x5; 373 z[5*i+2] += v[10]*x1 +v[11]*x2 + v[12]*x3 + v[17]*x4+ v[22]*x5; 374 z[5*i+3] += v[15]*x1 +v[16]*x2 + v[17]*x3 + v[18]*x4+ v[23]*x5; 375 z[5*i+4] += v[20]*x1 +v[21]*x2 + v[22]*x3 + v[23]*x4+ v[24]*x5; 376 v += 25; jmin++; 377 } 378 for (j=jmin; j<n; j++) { 379 /* (strict lower triangular part of A)*x */ 380 cval = ib[j]*5; 381 z[cval] += v[0]*x1 + v[1]*x2 + v[2]*x3 + v[3]*x4 + v[4]*x5; 382 z[cval+1] += v[5]*x1 + v[6]*x2 + v[7]*x3 + v[8]*x4 + v[9]*x5; 383 z[cval+2] += v[10]*x1 + v[11]*x2 + v[12]*x3 + v[13]*x4+ v[14]*x5; 384 z[cval+3] += v[15]*x1 + v[16]*x2 + v[17]*x3 + v[18]*x4+ v[19]*x5; 385 z[cval+4] += v[20]*x1 + v[21]*x2 + v[22]*x3 + v[23]*x4+ v[24]*x5; 386 /* (strict upper triangular part of A)*x */ 387 z[5*i] +=v[0]*x[cval]+v[5]*x[cval+1]+v[10]*x[cval+2]+v[15]*x[cval+3]+v[20]*x[cval+4]; 388 z[5*i+1] +=v[1]*x[cval]+v[6]*x[cval+1]+v[11]*x[cval+2]+v[16]*x[cval+3]+v[21]*x[cval+4]; 389 z[5*i+2] +=v[2]*x[cval]+v[7]*x[cval+1]+v[12]*x[cval+2]+v[17]*x[cval+3]+v[22]*x[cval+4]; 390 z[5*i+3] +=v[3]*x[cval]+v[8]*x[cval+1]+v[13]*x[cval+2]+v[18]*x[cval+3]+v[23]*x[cval+4]; 391 z[5*i+4] +=v[4]*x[cval]+v[9]*x[cval+1]+v[14]*x[cval+2]+v[19]*x[cval+3]+v[24]*x[cval+4]; 392 v += 25; 393 } 394 xb +=5; ai++; 395 } 396 397 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 398 ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr); 399 PetscLogFlops(50*(a->s_nz*2 - A->m) - A->m); 400 PetscFunctionReturn(0); 401 } 402 403 404 #undef __FUNCT__ 405 #define __FUNCT__ "MatMult_SeqSBAIJ_6" 406 int MatMult_SeqSBAIJ_6(Mat A,Vec xx,Vec zz) 407 { 408 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 409 PetscScalar *x,*z,*xb,x1,x2,x3,x4,x5,x6,zero=0.0; 410 MatScalar *v; 411 int mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,ierr,*ib,cval,j,jmin; 412 413 PetscFunctionBegin; 414 ierr = VecSet(&zero,zz);CHKERRQ(ierr); 415 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 416 ierr = VecGetArray(zz,&z);CHKERRQ(ierr); 417 418 v = a->a; 419 xb = x; 420 421 for (i=0; i<mbs; i++) { 422 n = ai[1] - ai[0]; /* length of i_th block row of A */ 423 x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5=xb[4]; x6=xb[5]; 424 ib = aj + *ai; 425 jmin = 0; 426 if (*ib == i){ /* (diag of A)*x */ 427 z[6*i] += v[0]*x1 + v[6]*x2 + v[12]*x3 + v[18]*x4+ v[24]*x5 + v[30]*x6; 428 z[6*i+1] += v[6]*x1 + v[7]*x2 + v[13]*x3 + v[19]*x4+ v[25]*x5 + v[31]*x6; 429 z[6*i+2] += v[12]*x1 +v[13]*x2 + v[14]*x3 + v[20]*x4+ v[26]*x5 + v[32]*x6; 430 z[6*i+3] += v[18]*x1 +v[19]*x2 + v[20]*x3 + v[21]*x4+ v[27]*x5 + v[33]*x6; 431 z[6*i+4] += v[24]*x1 +v[25]*x2 + v[26]*x3 + v[27]*x4+ v[28]*x5 + v[34]*x6; 432 z[6*i+5] += v[30]*x1 +v[31]*x2 + v[32]*x3 + v[33]*x4+ v[34]*x5 + v[35]*x6; 433 v += 36; jmin++; 434 } 435 for (j=jmin; j<n; j++) { 436 /* (strict lower triangular part of A)*x */ 437 cval = ib[j]*6; 438 z[cval] += v[0]*x1 + v[1]*x2 + v[2]*x3 + v[3]*x4+ v[4]*x5 + v[5]*x6; 439 z[cval+1] += v[6]*x1 + v[7]*x2 + v[8]*x3 + v[9]*x4+ v[10]*x5 + v[11]*x6; 440 z[cval+2] += v[12]*x1 + v[13]*x2 + v[14]*x3 + v[15]*x4+ v[16]*x5 + v[17]*x6; 441 z[cval+3] += v[18]*x1 + v[19]*x2 + v[20]*x3 + v[21]*x4+ v[22]*x5 + v[23]*x6; 442 z[cval+4] += v[24]*x1 + v[25]*x2 + v[26]*x3 + v[27]*x4+ v[28]*x5 + v[29]*x6; 443 z[cval+5] += v[30]*x1 + v[31]*x2 + v[32]*x3 + v[33]*x4+ v[34]*x5 + v[35]*x6; 444 /* (strict upper triangular part of A)*x */ 445 z[6*i] +=v[0]*x[cval]+v[6]*x[cval+1]+v[12]*x[cval+2]+v[18]*x[cval+3]+v[24]*x[cval+4]+v[30]*x[cval+5]; 446 z[6*i+1] +=v[1]*x[cval]+v[7]*x[cval+1]+v[13]*x[cval+2]+v[19]*x[cval+3]+v[25]*x[cval+4]+v[31]*x[cval+5]; 447 z[6*i+2] +=v[2]*x[cval]+v[8]*x[cval+1]+v[14]*x[cval+2]+v[20]*x[cval+3]+v[26]*x[cval+4]+v[32]*x[cval+5]; 448 z[6*i+3] +=v[3]*x[cval]+v[9]*x[cval+1]+v[15]*x[cval+2]+v[21]*x[cval+3]+v[27]*x[cval+4]+v[33]*x[cval+5]; 449 z[6*i+4] +=v[4]*x[cval]+v[10]*x[cval+1]+v[16]*x[cval+2]+v[22]*x[cval+3]+v[28]*x[cval+4]+v[34]*x[cval+5]; 450 z[6*i+5] +=v[5]*x[cval]+v[11]*x[cval+1]+v[17]*x[cval+2]+v[23]*x[cval+3]+v[29]*x[cval+4]+v[35]*x[cval+5]; 451 v += 36; 452 } 453 xb +=6; ai++; 454 } 455 456 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 457 ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr); 458 PetscLogFlops(72*(a->s_nz*2 - A->m) - A->m); 459 PetscFunctionReturn(0); 460 } 461 #undef __FUNCT__ 462 #define __FUNCT__ "MatMult_SeqSBAIJ_7" 463 int MatMult_SeqSBAIJ_7(Mat A,Vec xx,Vec zz) 464 { 465 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 466 PetscScalar *x,*z,*xb,x1,x2,x3,x4,x5,x6,x7,zero=0.0; 467 MatScalar *v; 468 int mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,ierr,*ib,cval,j,jmin; 469 470 PetscFunctionBegin; 471 ierr = VecSet(&zero,zz);CHKERRQ(ierr); 472 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 473 ierr = VecGetArray(zz,&z);CHKERRQ(ierr); 474 475 v = a->a; 476 xb = x; 477 478 for (i=0; i<mbs; i++) { 479 n = ai[1] - ai[0]; /* length of i_th block row of A */ 480 x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5=xb[4]; x6=xb[5]; x7=xb[6]; 481 ib = aj + *ai; 482 jmin = 0; 483 if (*ib == i){ /* (diag of A)*x */ 484 z[7*i] += v[0]*x1 + v[7]*x2 + v[14]*x3 + v[21]*x4+ v[28]*x5 + v[35]*x6+ v[42]*x7; 485 z[7*i+1] += v[7]*x1 + v[8]*x2 + v[15]*x3 + v[22]*x4+ v[29]*x5 + v[36]*x6+ v[43]*x7; 486 z[7*i+2] += v[14]*x1+ v[15]*x2 +v[16]*x3 + v[23]*x4+ v[30]*x5 + v[37]*x6+ v[44]*x7; 487 z[7*i+3] += v[21]*x1+ v[22]*x2 +v[23]*x3 + v[24]*x4+ v[31]*x5 + v[38]*x6+ v[45]*x7; 488 z[7*i+4] += v[28]*x1+ v[29]*x2 +v[30]*x3 + v[31]*x4+ v[32]*x5 + v[39]*x6+ v[46]*x7; 489 z[7*i+5] += v[35]*x1+ v[36]*x2 +v[37]*x3 + v[38]*x4+ v[39]*x5 + v[40]*x6+ v[47]*x7; 490 z[7*i+6] += v[42]*x1+ v[43]*x2 +v[44]*x3 + v[45]*x4+ v[46]*x5 + v[47]*x6+ v[48]*x7; 491 v += 49; jmin++; 492 } 493 for (j=jmin; j<n; j++) { 494 /* (strict lower triangular part of A)*x */ 495 cval = ib[j]*7; 496 z[cval] += v[0]*x1 + v[1]*x2 + v[2]*x3 + v[3]*x4+ v[4]*x5 + v[5]*x6+ v[6]*x7; 497 z[cval+1] += v[7]*x1 + v[8]*x2 + v[9]*x3 + v[10]*x4+ v[11]*x5 + v[12]*x6+ v[13]*x7; 498 z[cval+2] += v[14]*x1 + v[15]*x2 + v[16]*x3 + v[17]*x4+ v[18]*x5 + v[19]*x6+ v[20]*x7; 499 z[cval+3] += v[21]*x1 + v[22]*x2 + v[23]*x3 + v[24]*x4+ v[25]*x5 + v[26]*x6+ v[27]*x7; 500 z[cval+4] += v[28]*x1 + v[29]*x2 + v[30]*x3 + v[31]*x4+ v[32]*x5 + v[33]*x6+ v[34]*x7; 501 z[cval+5] += v[35]*x1 + v[36]*x2 + v[37]*x3 + v[38]*x4+ v[39]*x5 + v[40]*x6+ v[41]*x7; 502 z[cval+6] += v[42]*x1 + v[43]*x2 + v[44]*x3 + v[45]*x4+ v[46]*x5 + v[47]*x6+ v[48]*x7; 503 /* (strict upper triangular part of A)*x */ 504 z[7*i] +=v[0]*x[cval]+v[7]*x[cval+1]+v[14]*x[cval+2]+v[21]*x[cval+3]+v[28]*x[cval+4]+v[35]*x[cval+5]+v[42]*x[cval+6]; 505 z[7*i+1]+=v[1]*x[cval]+v[8]*x[cval+1]+v[15]*x[cval+2]+v[22]*x[cval+3]+v[29]*x[cval+4]+v[36]*x[cval+5]+v[43]*x[cval+6]; 506 z[7*i+2]+=v[2]*x[cval]+v[9]*x[cval+1]+v[16]*x[cval+2]+v[23]*x[cval+3]+v[30]*x[cval+4]+v[37]*x[cval+5]+v[44]*x[cval+6]; 507 z[7*i+3]+=v[3]*x[cval]+v[10]*x[cval+1]+v[17]*x[cval+2]+v[24]*x[cval+3]+v[31]*x[cval+4]+v[38]*x[cval+5]+v[45]*x[cval+6]; 508 z[7*i+4]+=v[4]*x[cval]+v[11]*x[cval+1]+v[18]*x[cval+2]+v[25]*x[cval+3]+v[32]*x[cval+4]+v[39]*x[cval+5]+v[46]*x[cval+6]; 509 z[7*i+5]+=v[5]*x[cval]+v[12]*x[cval+1]+v[19]*x[cval+2]+v[26]*x[cval+3]+v[33]*x[cval+4]+v[40]*x[cval+5]+v[47]*x[cval+6]; 510 z[7*i+6]+=v[6]*x[cval]+v[13]*x[cval+1]+v[20]*x[cval+2]+v[27]*x[cval+3]+v[34]*x[cval+4]+v[41]*x[cval+5]+v[48]*x[cval+6]; 511 v += 49; 512 } 513 xb +=7; ai++; 514 } 515 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 516 ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr); 517 PetscLogFlops(98*(a->s_nz*2 - A->m) - A->m); 518 PetscFunctionReturn(0); 519 } 520 521 /* 522 This will not work with MatScalar == float because it calls the BLAS 523 */ 524 #undef __FUNCT__ 525 #define __FUNCT__ "MatMult_SeqSBAIJ_N" 526 int MatMult_SeqSBAIJ_N(Mat A,Vec xx,Vec zz) 527 { 528 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 529 PetscScalar *x,*x_ptr,*z,*z_ptr,*xb,*zb,*work,*workt,zero=0.0; 530 MatScalar *v; 531 int ierr,mbs=a->mbs,i,*idx,*aj,*ii,bs=a->bs,j,n,bs2=a->bs2; 532 int ncols,k; 533 534 PetscFunctionBegin; 535 ierr = VecSet(&zero,zz);CHKERRQ(ierr); 536 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); x_ptr=x; 537 ierr = VecGetArray(zz,&z);CHKERRQ(ierr); z_ptr=z; 538 539 aj = a->j; 540 v = a->a; 541 ii = a->i; 542 543 if (!a->mult_work) { 544 ierr = PetscMalloc((A->m+1)*sizeof(PetscScalar),&a->mult_work);CHKERRQ(ierr); 545 } 546 work = a->mult_work; 547 548 for (i=0; i<mbs; i++) { 549 n = ii[1] - ii[0]; ncols = n*bs; 550 workt = work; idx=aj+ii[0]; 551 552 /* upper triangular part */ 553 for (j=0; j<n; j++) { 554 xb = x_ptr + bs*(*idx++); 555 for (k=0; k<bs; k++) workt[k] = xb[k]; 556 workt += bs; 557 } 558 /* z(i*bs:(i+1)*bs-1) += A(i,:)*x */ 559 Kernel_w_gets_w_plus_Ar_times_v(bs,ncols,work,v,z); 560 561 /* strict lower triangular part */ 562 idx = aj+ii[0]; 563 if (*idx == i){ 564 ncols -= bs; v += bs2; idx++; n--; 565 } 566 567 if (ncols > 0){ 568 workt = work; 569 ierr = PetscMemzero(workt,ncols*sizeof(PetscScalar));CHKERRQ(ierr); 570 Kernel_w_gets_w_plus_trans_Ar_times_v(bs,ncols,x,v,workt); 571 for (j=0; j<n; j++) { 572 zb = z_ptr + bs*(*idx++); 573 for (k=0; k<bs; k++) zb[k] += workt[k] ; 574 workt += bs; 575 } 576 } 577 x += bs; v += n*bs2; z += bs; ii++; 578 } 579 580 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 581 ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr); 582 PetscLogFlops(2*(a->s_nz*2 - A->m)*bs2 - A->m); 583 PetscFunctionReturn(0); 584 } 585 586 #undef __FUNCT__ 587 #define __FUNCT__ "MatMultAdd_SeqSBAIJ_1" 588 int MatMultAdd_SeqSBAIJ_1(Mat A,Vec xx,Vec yy,Vec zz) 589 { 590 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 591 PetscScalar *x,*y,*z,*xb,x1; 592 MatScalar *v; 593 int mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,ierr,*ib,cval,j,jmin; 594 595 PetscFunctionBegin; 596 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 597 if (yy != xx) { 598 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 599 } else { 600 y = x; 601 } 602 if (zz != yy) { 603 ierr = VecCopy(yy,zz);CHKERRQ(ierr); 604 ierr = VecGetArray(zz,&z);CHKERRQ(ierr); 605 } else { 606 z = y; 607 } 608 609 v = a->a; 610 xb = x; 611 612 for (i=0; i<mbs; i++) { 613 n = ai[1] - ai[0]; /* length of i_th row of A */ 614 x1 = xb[0]; 615 ib = aj + *ai; 616 jmin = 0; 617 if (*ib == i) { /* (diag of A)*x */ 618 z[i] += *v++ * x[*ib++]; jmin++; 619 } 620 for (j=jmin; j<n; j++) { 621 cval = *ib; 622 z[cval] += *v * x1; /* (strict lower triangular part of A)*x */ 623 z[i] += *v++ * x[*ib++]; /* (strict upper triangular part of A)*x */ 624 } 625 xb++; ai++; 626 } 627 628 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 629 if (yy != xx) ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 630 if (zz != yy) ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr); 631 632 PetscLogFlops(2*(a->s_nz*2 - A->m)); 633 PetscFunctionReturn(0); 634 } 635 636 #undef __FUNCT__ 637 #define __FUNCT__ "MatMultAdd_SeqSBAIJ_2" 638 int MatMultAdd_SeqSBAIJ_2(Mat A,Vec xx,Vec yy,Vec zz) 639 { 640 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 641 PetscScalar *x,*y,*z,*xb,x1,x2; 642 MatScalar *v; 643 int mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,ierr,*ib,cval,j,jmin; 644 645 PetscFunctionBegin; 646 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 647 if (yy != xx) { 648 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 649 } else { 650 y = x; 651 } 652 if (zz != yy) { 653 ierr = VecCopy(yy,zz);CHKERRQ(ierr); 654 ierr = VecGetArray(zz,&z);CHKERRQ(ierr); 655 } else { 656 z = y; 657 } 658 659 v = a->a; 660 xb = x; 661 662 for (i=0; i<mbs; i++) { 663 n = ai[1] - ai[0]; /* length of i_th block row of A */ 664 x1 = xb[0]; x2 = xb[1]; 665 ib = aj + *ai; 666 jmin = 0; 667 if (*ib == i){ /* (diag of A)*x */ 668 z[2*i] += v[0]*x1 + v[2]*x2; 669 z[2*i+1] += v[2]*x1 + v[3]*x2; 670 v += 4; jmin++; 671 } 672 for (j=jmin; j<n; j++) { 673 /* (strict lower triangular part of A)*x */ 674 cval = ib[j]*2; 675 z[cval] += v[0]*x1 + v[1]*x2; 676 z[cval+1] += v[2]*x1 + v[3]*x2; 677 /* (strict upper triangular part of A)*x */ 678 z[2*i] += v[0]*x[cval] + v[2]*x[cval+1]; 679 z[2*i+1] += v[1]*x[cval] + v[3]*x[cval+1]; 680 v += 4; 681 } 682 xb +=2; ai++; 683 } 684 685 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 686 if (yy != xx) ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 687 if (zz != yy) ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr); 688 689 PetscLogFlops(4*(a->s_nz*2 - A->m)); 690 PetscFunctionReturn(0); 691 } 692 693 #undef __FUNCT__ 694 #define __FUNCT__ "MatMultAdd_SeqSBAIJ_3" 695 int MatMultAdd_SeqSBAIJ_3(Mat A,Vec xx,Vec yy,Vec zz) 696 { 697 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 698 PetscScalar *x,*y,*z,*xb,x1,x2,x3; 699 MatScalar *v; 700 int mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,ierr,*ib,cval,j,jmin; 701 702 PetscFunctionBegin; 703 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 704 if (yy != xx) { 705 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 706 } else { 707 y = x; 708 } 709 if (zz != yy) { 710 ierr = VecCopy(yy,zz);CHKERRQ(ierr); 711 ierr = VecGetArray(zz,&z);CHKERRQ(ierr); 712 } else { 713 z = y; 714 } 715 716 v = a->a; 717 xb = x; 718 719 for (i=0; i<mbs; i++) { 720 n = ai[1] - ai[0]; /* length of i_th block row of A */ 721 x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; 722 ib = aj + *ai; 723 jmin = 0; 724 if (*ib == i){ /* (diag of A)*x */ 725 z[3*i] += v[0]*x1 + v[3]*x2 + v[6]*x3; 726 z[3*i+1] += v[3]*x1 + v[4]*x2 + v[7]*x3; 727 z[3*i+2] += v[6]*x1 + v[7]*x2 + v[8]*x3; 728 v += 9; jmin++; 729 } 730 for (j=jmin; j<n; j++) { 731 /* (strict lower triangular part of A)*x */ 732 cval = ib[j]*3; 733 z[cval] += v[0]*x1 + v[1]*x2 + v[2]*x3; 734 z[cval+1] += v[3]*x1 + v[4]*x2 + v[5]*x3; 735 z[cval+2] += v[6]*x1 + v[7]*x2 + v[8]*x3; 736 /* (strict upper triangular part of A)*x */ 737 z[3*i] += v[0]*x[cval] + v[3]*x[cval+1]+ v[6]*x[cval+2]; 738 z[3*i+1] += v[1]*x[cval] + v[4]*x[cval+1]+ v[7]*x[cval+2]; 739 z[3*i+2] += v[2]*x[cval] + v[5]*x[cval+1]+ v[8]*x[cval+2]; 740 v += 9; 741 } 742 xb +=3; ai++; 743 } 744 745 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 746 if (yy != xx) ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 747 if (zz != yy) ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr); 748 749 PetscLogFlops(18*(a->s_nz*2 - A->m)); 750 PetscFunctionReturn(0); 751 } 752 753 #undef __FUNCT__ 754 #define __FUNCT__ "MatMultAdd_SeqSBAIJ_4" 755 int MatMultAdd_SeqSBAIJ_4(Mat A,Vec xx,Vec yy,Vec zz) 756 { 757 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 758 PetscScalar *x,*y,*z,*xb,x1,x2,x3,x4; 759 MatScalar *v; 760 int mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,ierr,*ib,cval,j,jmin; 761 762 PetscFunctionBegin; 763 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 764 if (yy != xx) { 765 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 766 } else { 767 y = x; 768 } 769 if (zz != yy) { 770 ierr = VecCopy(yy,zz);CHKERRQ(ierr); 771 ierr = VecGetArray(zz,&z);CHKERRQ(ierr); 772 } else { 773 z = y; 774 } 775 776 v = a->a; 777 xb = x; 778 779 for (i=0; i<mbs; i++) { 780 n = ai[1] - ai[0]; /* length of i_th block row of A */ 781 x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; 782 ib = aj + *ai; 783 jmin = 0; 784 if (*ib == i){ /* (diag of A)*x */ 785 z[4*i] += v[0]*x1 + v[4]*x2 + v[8]*x3 + v[12]*x4; 786 z[4*i+1] += v[4]*x1 + v[5]*x2 + v[9]*x3 + v[13]*x4; 787 z[4*i+2] += v[8]*x1 + v[9]*x2 + v[10]*x3 + v[14]*x4; 788 z[4*i+3] += v[12]*x1+ v[13]*x2+ v[14]*x3 + v[15]*x4; 789 v += 16; jmin++; 790 } 791 for (j=jmin; j<n; j++) { 792 /* (strict lower triangular part of A)*x */ 793 cval = ib[j]*4; 794 z[cval] += v[0]*x1 + v[1]*x2 + v[2]*x3 + v[3]*x4; 795 z[cval+1] += v[4]*x1 + v[5]*x2 + v[6]*x3 + v[7]*x4; 796 z[cval+2] += v[8]*x1 + v[9]*x2 + v[10]*x3 + v[11]*x4; 797 z[cval+3] += v[12]*x1 + v[13]*x2 + v[14]*x3 + v[15]*x4; 798 /* (strict upper triangular part of A)*x */ 799 z[4*i] += v[0]*x[cval] + v[4]*x[cval+1]+ v[8]*x[cval+2] + v[12]*x[cval+3]; 800 z[4*i+1] += v[1]*x[cval] + v[5]*x[cval+1]+ v[9]*x[cval+2] + v[13]*x[cval+3]; 801 z[4*i+2] += v[2]*x[cval] + v[6]*x[cval+1]+ v[10]*x[cval+2]+ v[14]*x[cval+3]; 802 z[4*i+3] += v[3]*x[cval] + v[7]*x[cval+1]+ v[11]*x[cval+2]+ v[15]*x[cval+3]; 803 v += 16; 804 } 805 xb +=4; ai++; 806 } 807 808 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 809 if (yy != xx) ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 810 if (zz != yy) ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr); 811 812 PetscLogFlops(32*(a->s_nz*2 - A->m)); 813 PetscFunctionReturn(0); 814 } 815 816 #undef __FUNCT__ 817 #define __FUNCT__ "MatMultAdd_SeqSBAIJ_5" 818 int MatMultAdd_SeqSBAIJ_5(Mat A,Vec xx,Vec yy,Vec zz) 819 { 820 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 821 PetscScalar *x,*y,*z,*xb,x1,x2,x3,x4,x5; 822 MatScalar *v; 823 int mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,ierr,*ib,cval,j,jmin; 824 825 PetscFunctionBegin; 826 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 827 if (yy != xx) { 828 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 829 } else { 830 y = x; 831 } 832 if (zz != yy) { 833 ierr = VecCopy(yy,zz);CHKERRQ(ierr); 834 ierr = VecGetArray(zz,&z);CHKERRQ(ierr); 835 } else { 836 z = y; 837 } 838 839 v = a->a; 840 xb = x; 841 842 for (i=0; i<mbs; i++) { 843 n = ai[1] - ai[0]; /* length of i_th block row of A */ 844 x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5=xb[4]; 845 ib = aj + *ai; 846 jmin = 0; 847 if (*ib == i){ /* (diag of A)*x */ 848 z[5*i] += v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4+ v[20]*x5; 849 z[5*i+1] += v[5]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4+ v[21]*x5; 850 z[5*i+2] += v[10]*x1 +v[11]*x2 + v[12]*x3 + v[17]*x4+ v[22]*x5; 851 z[5*i+3] += v[15]*x1 +v[16]*x2 + v[17]*x3 + v[18]*x4+ v[23]*x5; 852 z[5*i+4] += v[20]*x1 +v[21]*x2 + v[22]*x3 + v[23]*x4+ v[24]*x5; 853 v += 25; jmin++; 854 } 855 for (j=jmin; j<n; j++) { 856 /* (strict lower triangular part of A)*x */ 857 cval = ib[j]*5; 858 z[cval] += v[0]*x1 + v[1]*x2 + v[2]*x3 + v[3]*x4 + v[4]*x5; 859 z[cval+1] += v[5]*x1 + v[6]*x2 + v[7]*x3 + v[8]*x4 + v[9]*x5; 860 z[cval+2] += v[10]*x1 + v[11]*x2 + v[12]*x3 + v[13]*x4+ v[14]*x5; 861 z[cval+3] += v[15]*x1 + v[16]*x2 + v[17]*x3 + v[18]*x4+ v[19]*x5; 862 z[cval+4] += v[20]*x1 + v[21]*x2 + v[22]*x3 + v[23]*x4+ v[24]*x5; 863 /* (strict upper triangular part of A)*x */ 864 z[5*i] +=v[0]*x[cval]+v[5]*x[cval+1]+v[10]*x[cval+2]+v[15]*x[cval+3]+v[20]*x[cval+4]; 865 z[5*i+1] +=v[1]*x[cval]+v[6]*x[cval+1]+v[11]*x[cval+2]+v[16]*x[cval+3]+v[21]*x[cval+4]; 866 z[5*i+2] +=v[2]*x[cval]+v[7]*x[cval+1]+v[12]*x[cval+2]+v[17]*x[cval+3]+v[22]*x[cval+4]; 867 z[5*i+3] +=v[3]*x[cval]+v[8]*x[cval+1]+v[13]*x[cval+2]+v[18]*x[cval+3]+v[23]*x[cval+4]; 868 z[5*i+4] +=v[4]*x[cval]+v[9]*x[cval+1]+v[14]*x[cval+2]+v[19]*x[cval+3]+v[24]*x[cval+4]; 869 v += 25; 870 } 871 xb +=5; ai++; 872 } 873 874 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 875 if (yy != xx) ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 876 if (zz != yy) ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr); 877 878 PetscLogFlops(50*(a->s_nz*2 - A->m)); 879 PetscFunctionReturn(0); 880 } 881 #undef __FUNCT__ 882 #define __FUNCT__ "MatMultAdd_SeqSBAIJ_6" 883 int MatMultAdd_SeqSBAIJ_6(Mat A,Vec xx,Vec yy,Vec zz) 884 { 885 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 886 PetscScalar *x,*y,*z,*xb,x1,x2,x3,x4,x5,x6; 887 MatScalar *v; 888 int mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,ierr,*ib,cval,j,jmin; 889 890 PetscFunctionBegin; 891 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 892 if (yy != xx) { 893 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 894 } else { 895 y = x; 896 } 897 if (zz != yy) { 898 ierr = VecCopy(yy,zz);CHKERRQ(ierr); 899 ierr = VecGetArray(zz,&z);CHKERRQ(ierr); 900 } else { 901 z = y; 902 } 903 904 v = a->a; 905 xb = x; 906 907 for (i=0; i<mbs; i++) { 908 n = ai[1] - ai[0]; /* length of i_th block row of A */ 909 x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5=xb[4]; x6=xb[5]; 910 ib = aj + *ai; 911 jmin = 0; 912 if (*ib == i){ /* (diag of A)*x */ 913 z[6*i] += v[0]*x1 + v[6]*x2 + v[12]*x3 + v[18]*x4+ v[24]*x5 + v[30]*x6; 914 z[6*i+1] += v[6]*x1 + v[7]*x2 + v[13]*x3 + v[19]*x4+ v[25]*x5 + v[31]*x6; 915 z[6*i+2] += v[12]*x1 +v[13]*x2 + v[14]*x3 + v[20]*x4+ v[26]*x5 + v[32]*x6; 916 z[6*i+3] += v[18]*x1 +v[19]*x2 + v[20]*x3 + v[21]*x4+ v[27]*x5 + v[33]*x6; 917 z[6*i+4] += v[24]*x1 +v[25]*x2 + v[26]*x3 + v[27]*x4+ v[28]*x5 + v[34]*x6; 918 z[6*i+5] += v[30]*x1 +v[31]*x2 + v[32]*x3 + v[33]*x4+ v[34]*x5 + v[35]*x6; 919 v += 36; jmin++; 920 } 921 for (j=jmin; j<n; j++) { 922 /* (strict lower triangular part of A)*x */ 923 cval = ib[j]*6; 924 z[cval] += v[0]*x1 + v[1]*x2 + v[2]*x3 + v[3]*x4+ v[4]*x5 + v[5]*x6; 925 z[cval+1] += v[6]*x1 + v[7]*x2 + v[8]*x3 + v[9]*x4+ v[10]*x5 + v[11]*x6; 926 z[cval+2] += v[12]*x1 + v[13]*x2 + v[14]*x3 + v[15]*x4+ v[16]*x5 + v[17]*x6; 927 z[cval+3] += v[18]*x1 + v[19]*x2 + v[20]*x3 + v[21]*x4+ v[22]*x5 + v[23]*x6; 928 z[cval+4] += v[24]*x1 + v[25]*x2 + v[26]*x3 + v[27]*x4+ v[28]*x5 + v[29]*x6; 929 z[cval+5] += v[30]*x1 + v[31]*x2 + v[32]*x3 + v[33]*x4+ v[34]*x5 + v[35]*x6; 930 /* (strict upper triangular part of A)*x */ 931 z[6*i] +=v[0]*x[cval]+v[6]*x[cval+1]+v[12]*x[cval+2]+v[18]*x[cval+3]+v[24]*x[cval+4]+v[30]*x[cval+5]; 932 z[6*i+1] +=v[1]*x[cval]+v[7]*x[cval+1]+v[13]*x[cval+2]+v[19]*x[cval+3]+v[25]*x[cval+4]+v[31]*x[cval+5]; 933 z[6*i+2] +=v[2]*x[cval]+v[8]*x[cval+1]+v[14]*x[cval+2]+v[20]*x[cval+3]+v[26]*x[cval+4]+v[32]*x[cval+5]; 934 z[6*i+3] +=v[3]*x[cval]+v[9]*x[cval+1]+v[15]*x[cval+2]+v[21]*x[cval+3]+v[27]*x[cval+4]+v[33]*x[cval+5]; 935 z[6*i+4] +=v[4]*x[cval]+v[10]*x[cval+1]+v[16]*x[cval+2]+v[22]*x[cval+3]+v[28]*x[cval+4]+v[34]*x[cval+5]; 936 z[6*i+5] +=v[5]*x[cval]+v[11]*x[cval+1]+v[17]*x[cval+2]+v[23]*x[cval+3]+v[29]*x[cval+4]+v[35]*x[cval+5]; 937 v += 36; 938 } 939 xb +=6; ai++; 940 } 941 942 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 943 if (yy != xx) ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 944 if (zz != yy) ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr); 945 946 PetscLogFlops(72*(a->s_nz*2 - A->m)); 947 PetscFunctionReturn(0); 948 } 949 950 #undef __FUNCT__ 951 #define __FUNCT__ "MatMultAdd_SeqSBAIJ_7" 952 int MatMultAdd_SeqSBAIJ_7(Mat A,Vec xx,Vec yy,Vec zz) 953 { 954 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 955 PetscScalar *x,*y,*z,*xb,x1,x2,x3,x4,x5,x6,x7; 956 MatScalar *v; 957 int mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,ierr,*ib,cval,j,jmin; 958 959 PetscFunctionBegin; 960 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 961 if (yy != xx) { 962 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 963 } else { 964 y = x; 965 } 966 if (zz != yy) { 967 ierr = VecCopy(yy,zz);CHKERRQ(ierr); 968 ierr = VecGetArray(zz,&z);CHKERRQ(ierr); 969 } else { 970 z = y; 971 } 972 973 v = a->a; 974 xb = x; 975 976 for (i=0; i<mbs; i++) { 977 n = ai[1] - ai[0]; /* length of i_th block row of A */ 978 x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5=xb[4]; x6=xb[5]; x7=xb[6]; 979 ib = aj + *ai; 980 jmin = 0; 981 if (*ib == i){ /* (diag of A)*x */ 982 z[7*i] += v[0]*x1 + v[7]*x2 + v[14]*x3 + v[21]*x4+ v[28]*x5 + v[35]*x6+ v[42]*x7; 983 z[7*i+1] += v[7]*x1 + v[8]*x2 + v[15]*x3 + v[22]*x4+ v[29]*x5 + v[36]*x6+ v[43]*x7; 984 z[7*i+2] += v[14]*x1+ v[15]*x2 +v[16]*x3 + v[23]*x4+ v[30]*x5 + v[37]*x6+ v[44]*x7; 985 z[7*i+3] += v[21]*x1+ v[22]*x2 +v[23]*x3 + v[24]*x4+ v[31]*x5 + v[38]*x6+ v[45]*x7; 986 z[7*i+4] += v[28]*x1+ v[29]*x2 +v[30]*x3 + v[31]*x4+ v[32]*x5 + v[39]*x6+ v[46]*x7; 987 z[7*i+5] += v[35]*x1+ v[36]*x2 +v[37]*x3 + v[38]*x4+ v[39]*x5 + v[40]*x6+ v[47]*x7; 988 z[7*i+6] += v[42]*x1+ v[43]*x2 +v[44]*x3 + v[45]*x4+ v[46]*x5 + v[47]*x6+ v[48]*x7; 989 v += 49; jmin++; 990 } 991 for (j=jmin; j<n; j++) { 992 /* (strict lower triangular part of A)*x */ 993 cval = ib[j]*7; 994 z[cval] += v[0]*x1 + v[1]*x2 + v[2]*x3 + v[3]*x4+ v[4]*x5 + v[5]*x6+ v[6]*x7; 995 z[cval+1] += v[7]*x1 + v[8]*x2 + v[9]*x3 + v[10]*x4+ v[11]*x5 + v[12]*x6+ v[13]*x7; 996 z[cval+2] += v[14]*x1 + v[15]*x2 + v[16]*x3 + v[17]*x4+ v[18]*x5 + v[19]*x6+ v[20]*x7; 997 z[cval+3] += v[21]*x1 + v[22]*x2 + v[23]*x3 + v[24]*x4+ v[25]*x5 + v[26]*x6+ v[27]*x7; 998 z[cval+4] += v[28]*x1 + v[29]*x2 + v[30]*x3 + v[31]*x4+ v[32]*x5 + v[33]*x6+ v[34]*x7; 999 z[cval+5] += v[35]*x1 + v[36]*x2 + v[37]*x3 + v[38]*x4+ v[39]*x5 + v[40]*x6+ v[41]*x7; 1000 z[cval+6] += v[42]*x1 + v[43]*x2 + v[44]*x3 + v[45]*x4+ v[46]*x5 + v[47]*x6+ v[48]*x7; 1001 /* (strict upper triangular part of A)*x */ 1002 z[7*i] +=v[0]*x[cval]+v[7]*x[cval+1]+v[14]*x[cval+2]+v[21]*x[cval+3]+v[28]*x[cval+4]+v[35]*x[cval+5]+v[42]*x[cval+6]; 1003 z[7*i+1]+=v[1]*x[cval]+v[8]*x[cval+1]+v[15]*x[cval+2]+v[22]*x[cval+3]+v[29]*x[cval+4]+v[36]*x[cval+5]+v[43]*x[cval+6]; 1004 z[7*i+2]+=v[2]*x[cval]+v[9]*x[cval+1]+v[16]*x[cval+2]+v[23]*x[cval+3]+v[30]*x[cval+4]+v[37]*x[cval+5]+v[44]*x[cval+6]; 1005 z[7*i+3]+=v[3]*x[cval]+v[10]*x[cval+1]+v[17]*x[cval+2]+v[24]*x[cval+3]+v[31]*x[cval+4]+v[38]*x[cval+5]+v[45]*x[cval+6]; 1006 z[7*i+4]+=v[4]*x[cval]+v[11]*x[cval+1]+v[18]*x[cval+2]+v[25]*x[cval+3]+v[32]*x[cval+4]+v[39]*x[cval+5]+v[46]*x[cval+6]; 1007 z[7*i+5]+=v[5]*x[cval]+v[12]*x[cval+1]+v[19]*x[cval+2]+v[26]*x[cval+3]+v[33]*x[cval+4]+v[40]*x[cval+5]+v[47]*x[cval+6]; 1008 z[7*i+6]+=v[6]*x[cval]+v[13]*x[cval+1]+v[20]*x[cval+2]+v[27]*x[cval+3]+v[34]*x[cval+4]+v[41]*x[cval+5]+v[48]*x[cval+6]; 1009 v += 49; 1010 } 1011 xb +=7; ai++; 1012 } 1013 1014 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1015 if (yy != xx) ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 1016 if (zz != yy) ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr); 1017 1018 PetscLogFlops(98*(a->s_nz*2 - A->m)); 1019 PetscFunctionReturn(0); 1020 } 1021 1022 #undef __FUNCT__ 1023 #define __FUNCT__ "MatMultAdd_SeqSBAIJ_N" 1024 int MatMultAdd_SeqSBAIJ_N(Mat A,Vec xx,Vec yy,Vec zz) 1025 { 1026 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 1027 PetscScalar *x,*x_ptr,*y,*z,*z_ptr=0,*xb,*zb,*work,*workt; 1028 MatScalar *v; 1029 int ierr,mbs=a->mbs,i,*idx,*aj,*ii,bs=a->bs,j,n,bs2=a->bs2; 1030 int ncols,k; 1031 1032 PetscFunctionBegin; 1033 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); x_ptr=x; 1034 if (yy != xx) { 1035 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 1036 } else { 1037 y = x; 1038 } 1039 if (zz != yy) { 1040 ierr = VecCopy(yy,zz);CHKERRQ(ierr); 1041 ierr = VecGetArray(zz,&z);CHKERRQ(ierr); z_ptr=z; 1042 } else { 1043 z = y; 1044 } 1045 1046 aj = a->j; 1047 v = a->a; 1048 ii = a->i; 1049 1050 if (!a->mult_work) { 1051 ierr = PetscMalloc((A->m+1)*sizeof(PetscScalar),&a->mult_work);CHKERRQ(ierr); 1052 } 1053 work = a->mult_work; 1054 1055 1056 for (i=0; i<mbs; i++) { 1057 n = ii[1] - ii[0]; ncols = n*bs; 1058 workt = work; idx=aj+ii[0]; 1059 1060 /* upper triangular part */ 1061 for (j=0; j<n; j++) { 1062 xb = x_ptr + bs*(*idx++); 1063 for (k=0; k<bs; k++) workt[k] = xb[k]; 1064 workt += bs; 1065 } 1066 /* z(i*bs:(i+1)*bs-1) += A(i,:)*x */ 1067 Kernel_w_gets_w_plus_Ar_times_v(bs,ncols,work,v,z); 1068 1069 /* strict lower triangular part */ 1070 idx = aj+ii[0]; 1071 if (*idx == i){ 1072 ncols -= bs; v += bs2; idx++; n--; 1073 } 1074 if (ncols > 0){ 1075 workt = work; 1076 ierr = PetscMemzero(workt,ncols*sizeof(PetscScalar));CHKERRQ(ierr); 1077 Kernel_w_gets_w_plus_trans_Ar_times_v(bs,ncols,x,v,workt); 1078 for (j=0; j<n; j++) { 1079 zb = z_ptr + bs*(*idx++); 1080 /* idx++; */ 1081 for (k=0; k<bs; k++) zb[k] += workt[k] ; 1082 workt += bs; 1083 } 1084 } 1085 1086 x += bs; v += n*bs2; z += bs; ii++; 1087 } 1088 1089 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1090 if (yy != xx) ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 1091 if (zz != yy) ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr); 1092 1093 PetscLogFlops(2*(a->s_nz*2 - A->m)); 1094 PetscFunctionReturn(0); 1095 } 1096 1097 #undef __FUNCT__ 1098 #define __FUNCT__ "MatMultTranspose_SeqSBAIJ" 1099 int MatMultTranspose_SeqSBAIJ(Mat A,Vec xx,Vec zz) 1100 { 1101 PetscFunctionBegin; 1102 SETERRQ(1,"Matrix is symmetric. Call MatMult()."); 1103 /* PetscFunctionReturn(0); */ 1104 } 1105 1106 #undef __FUNCT__ 1107 #define __FUNCT__ "MatMultTransposeAdd_SeqSBAIJ" 1108 int MatMultTransposeAdd_SeqSBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1109 1110 { 1111 PetscFunctionBegin; 1112 SETERRQ(1,"Matrix is symmetric. Call MatMultAdd()."); 1113 /* PetscFunctionReturn(0); */ 1114 } 1115 1116 #undef __FUNCT__ 1117 #define __FUNCT__ "MatScale_SeqSBAIJ" 1118 int MatScale_SeqSBAIJ(PetscScalar *alpha,Mat inA) 1119 { 1120 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)inA->data; 1121 int one = 1,totalnz = a->bs2*a->s_nz; 1122 1123 PetscFunctionBegin; 1124 BLscal_(&totalnz,alpha,a->a,&one); 1125 PetscLogFlops(totalnz); 1126 PetscFunctionReturn(0); 1127 } 1128 1129 #undef __FUNCT__ 1130 #define __FUNCT__ "MatNorm_SeqSBAIJ" 1131 int MatNorm_SeqSBAIJ(Mat A,NormType type,PetscReal *norm) 1132 { 1133 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 1134 MatScalar *v = a->a; 1135 PetscReal sum_diag = 0.0, sum_off = 0.0, *sum; 1136 int i,j,k,bs = a->bs,bs2=a->bs2,k1,mbs=a->mbs,*aj=a->j; 1137 int *jl,*il,jmin,jmax,ierr,nexti,ik,*col; 1138 1139 PetscFunctionBegin; 1140 if (type == NORM_FROBENIUS) { 1141 for (k=0; k<mbs; k++){ 1142 jmin = a->i[k]; jmax = a->i[k+1]; 1143 col = aj + jmin; 1144 if (*col == k){ /* diagonal block */ 1145 for (i=0; i<bs2; i++){ 1146 #if defined(PETSC_USE_COMPLEX) 1147 sum_diag += PetscRealPart(PetscConj(*v)*(*v)); v++; 1148 #else 1149 sum_diag += (*v)*(*v); v++; 1150 #endif 1151 } 1152 jmin++; 1153 } 1154 for (j=jmin; j<jmax; j++){ /* off-diagonal blocks */ 1155 for (i=0; i<bs2; i++){ 1156 #if defined(PETSC_USE_COMPLEX) 1157 sum_off += PetscRealPart(PetscConj(*v)*(*v)); v++; 1158 #else 1159 sum_off += (*v)*(*v); v++; 1160 #endif 1161 } 1162 } 1163 } 1164 *norm = sqrt(sum_diag + 2*sum_off); 1165 1166 } else if (type == NORM_INFINITY) { /* maximum row sum */ 1167 ierr = PetscMalloc(mbs*sizeof(int),&il);CHKERRQ(ierr); 1168 ierr = PetscMalloc(mbs*sizeof(int),&jl);CHKERRQ(ierr); 1169 ierr = PetscMalloc(bs*sizeof(PetscReal),&sum);CHKERRQ(ierr); 1170 for (i=0; i<mbs; i++) { 1171 jl[i] = mbs; il[0] = 0; 1172 } 1173 1174 *norm = 0.0; 1175 for (k=0; k<mbs; k++) { /* k_th block row */ 1176 for (j=0; j<bs; j++) sum[j]=0.0; 1177 1178 /*-- col sum --*/ 1179 i = jl[k]; /* first |A(i,k)| to be added */ 1180 /* jl[k]=i: first nozero element in row i for submatrix A(1:k,k:n) (active window) 1181 at step k */ 1182 while (i<mbs){ 1183 nexti = jl[i]; /* next block row to be added */ 1184 ik = il[i]; /* block index of A(i,k) in the array a */ 1185 for (j=0; j<bs; j++){ 1186 v = a->a + ik*bs2 + j*bs; 1187 for (k1=0; k1<bs; k1++) { 1188 sum[j] += PetscAbsScalar(*v); v++; 1189 } 1190 } 1191 /* update il, jl */ 1192 jmin = ik + 1; /* block index of array a: points to the next nonzero of A in row i */ 1193 jmax = a->i[i+1]; 1194 if (jmin < jmax){ 1195 il[i] = jmin; 1196 j = a->j[jmin]; 1197 jl[i] = jl[j]; jl[j]=i; 1198 } 1199 i = nexti; 1200 } 1201 1202 /*-- row sum --*/ 1203 jmin = a->i[k]; jmax = a->i[k+1]; 1204 for (i=jmin; i<jmax; i++) { 1205 for (j=0; j<bs; j++){ 1206 v = a->a + i*bs2 + j; 1207 for (k1=0; k1<bs; k1++){ 1208 sum[j] += PetscAbsScalar(*v); 1209 v += bs; 1210 } 1211 } 1212 } 1213 /* add k_th block row to il, jl */ 1214 col = aj+jmin; 1215 if (*col == k) jmin++; 1216 if (jmin < jmax){ 1217 il[k] = jmin; 1218 j = a->j[jmin]; 1219 jl[k] = jl[j]; jl[j] = k; 1220 } 1221 for (j=0; j<bs; j++){ 1222 if (sum[j] > *norm) *norm = sum[j]; 1223 } 1224 } 1225 ierr = PetscFree(il);CHKERRQ(ierr); 1226 ierr = PetscFree(jl);CHKERRQ(ierr); 1227 ierr = PetscFree(sum);CHKERRQ(ierr); 1228 } else { 1229 SETERRQ(PETSC_ERR_SUP,"No support for this norm yet"); 1230 } 1231 PetscFunctionReturn(0); 1232 } 1233 1234 #undef __FUNCT__ 1235 #define __FUNCT__ "MatEqual_SeqSBAIJ" 1236 int MatEqual_SeqSBAIJ(Mat A,Mat B,PetscTruth* flg) 1237 { 1238 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ *)A->data,*b = (Mat_SeqSBAIJ *)B->data; 1239 int ierr; 1240 PetscTruth flag; 1241 1242 PetscFunctionBegin; 1243 ierr = PetscTypeCompare((PetscObject)A,MATSEQBAIJ,&flag);CHKERRQ(ierr); 1244 if (!flag) SETERRQ(PETSC_ERR_ARG_INCOMP,"Matrices must be same type"); 1245 1246 /* If the matrix/block dimensions are not equal, or no of nonzeros or shift */ 1247 if ((A->m != B->m) || (A->n != B->n) || (a->bs != b->bs)|| (a->s_nz != b->s_nz)) { 1248 *flg = PETSC_FALSE; 1249 PetscFunctionReturn(0); 1250 } 1251 1252 /* if the a->i are the same */ 1253 ierr = PetscMemcmp(a->i,b->i,(a->mbs+1)*sizeof(int),flg);CHKERRQ(ierr); 1254 if (*flg == PETSC_FALSE) { 1255 PetscFunctionReturn(0); 1256 } 1257 1258 /* if a->j are the same */ 1259 ierr = PetscMemcmp(a->j,b->j,(a->s_nz)*sizeof(int),flg);CHKERRQ(ierr); 1260 if (*flg == PETSC_FALSE) { 1261 PetscFunctionReturn(0); 1262 } 1263 /* if a->a are the same */ 1264 ierr = PetscMemcmp(a->a,b->a,(a->s_nz)*(a->bs)*(a->bs)*sizeof(PetscScalar),flg);CHKERRQ(ierr); 1265 PetscFunctionReturn(0); 1266 1267 } 1268 1269 #undef __FUNCT__ 1270 #define __FUNCT__ "MatGetDiagonal_SeqSBAIJ" 1271 int MatGetDiagonal_SeqSBAIJ(Mat A,Vec v) 1272 { 1273 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 1274 int ierr,i,j,k,n,row,bs,*ai,*aj,ambs,bs2; 1275 PetscScalar *x,zero = 0.0; 1276 MatScalar *aa,*aa_j; 1277 1278 PetscFunctionBegin; 1279 if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 1280 bs = a->bs; 1281 aa = a->a; 1282 ai = a->i; 1283 aj = a->j; 1284 ambs = a->mbs; 1285 bs2 = a->bs2; 1286 1287 ierr = VecSet(&zero,v);CHKERRQ(ierr); 1288 ierr = VecGetArray(v,&x);CHKERRQ(ierr); 1289 ierr = VecGetLocalSize(v,&n);CHKERRQ(ierr); 1290 if (n != A->m) SETERRQ(PETSC_ERR_ARG_SIZ,"Nonconforming matrix and vector"); 1291 for (i=0; i<ambs; i++) { 1292 j=ai[i]; 1293 if (aj[j] == i) { /* if this is a diagonal element */ 1294 row = i*bs; 1295 aa_j = aa + j*bs2; 1296 for (k=0; k<bs2; k+=(bs+1),row++) x[row] = aa_j[k]; 1297 } 1298 } 1299 ierr = VecRestoreArray(v,&x);CHKERRQ(ierr); 1300 PetscFunctionReturn(0); 1301 } 1302 1303 #undef __FUNCT__ 1304 #define __FUNCT__ "MatDiagonalScale_SeqSBAIJ" 1305 int MatDiagonalScale_SeqSBAIJ(Mat A,Vec ll,Vec rr) 1306 { 1307 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 1308 PetscScalar *l,*r,x,*li,*ri; 1309 MatScalar *aa,*v; 1310 int ierr,i,j,k,lm,rn,M,m,*ai,*aj,mbs,tmp,bs,bs2; 1311 1312 PetscFunctionBegin; 1313 ai = a->i; 1314 aj = a->j; 1315 aa = a->a; 1316 m = A->m; 1317 bs = a->bs; 1318 mbs = a->mbs; 1319 bs2 = a->bs2; 1320 1321 if (ll != rr) { 1322 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"For symmetric format, left and right scaling vectors must be same\n"); 1323 } 1324 if (ll) { 1325 ierr = VecGetArray(ll,&l);CHKERRQ(ierr); 1326 ierr = VecGetLocalSize(ll,&lm);CHKERRQ(ierr); 1327 if (lm != m) SETERRQ(PETSC_ERR_ARG_SIZ,"Left scaling vector wrong length"); 1328 for (i=0; i<mbs; i++) { /* for each block row */ 1329 M = ai[i+1] - ai[i]; 1330 li = l + i*bs; 1331 v = aa + bs2*ai[i]; 1332 for (j=0; j<M; j++) { /* for each block */ 1333 for (k=0; k<bs2; k++) { 1334 (*v++) *= li[k%bs]; 1335 } 1336 #ifdef CONT 1337 /* will be used to replace the above loop */ 1338 ri = l + bs*aj[ai[i]+j]; 1339 for (k=0; k<bs; k++) { /* column value */ 1340 x = ri[k]; 1341 for (tmp=0; tmp<bs; tmp++) (*v++) *= li[tmp]*x; 1342 } 1343 #endif 1344 1345 } 1346 } 1347 ierr = VecRestoreArray(ll,&l);CHKERRQ(ierr); 1348 PetscLogFlops(2*a->s_nz); 1349 } 1350 /* will be deleted */ 1351 if (rr) { 1352 ierr = VecGetArray(rr,&r);CHKERRQ(ierr); 1353 ierr = VecGetLocalSize(rr,&rn);CHKERRQ(ierr); 1354 if (rn != m) SETERRQ(PETSC_ERR_ARG_SIZ,"Right scaling vector wrong length"); 1355 for (i=0; i<mbs; i++) { /* for each block row */ 1356 M = ai[i+1] - ai[i]; 1357 v = aa + bs2*ai[i]; 1358 for (j=0; j<M; j++) { /* for each block */ 1359 ri = r + bs*aj[ai[i]+j]; 1360 for (k=0; k<bs; k++) { 1361 x = ri[k]; 1362 for (tmp=0; tmp<bs; tmp++) (*v++) *= x; 1363 } 1364 } 1365 } 1366 ierr = VecRestoreArray(rr,&r);CHKERRQ(ierr); 1367 PetscLogFlops(a->s_nz); 1368 } 1369 PetscFunctionReturn(0); 1370 } 1371 1372 #undef __FUNCT__ 1373 #define __FUNCT__ "MatGetInfo_SeqSBAIJ" 1374 int MatGetInfo_SeqSBAIJ(Mat A,MatInfoType flag,MatInfo *info) 1375 { 1376 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 1377 1378 PetscFunctionBegin; 1379 info->rows_global = (double)A->m; 1380 info->columns_global = (double)A->m; 1381 info->rows_local = (double)A->m; 1382 info->columns_local = (double)A->m; 1383 info->block_size = a->bs2; 1384 info->nz_allocated = a->s_maxnz; /*num. of nonzeros in upper triangular part */ 1385 info->nz_used = a->bs2*a->s_nz; /*num. of nonzeros in upper triangular part */ 1386 info->nz_unneeded = (double)(info->nz_allocated - info->nz_used); 1387 info->assemblies = A->num_ass; 1388 info->mallocs = a->reallocs; 1389 info->memory = A->mem; 1390 if (A->factor) { 1391 info->fill_ratio_given = A->info.fill_ratio_given; 1392 info->fill_ratio_needed = A->info.fill_ratio_needed; 1393 info->factor_mallocs = A->info.factor_mallocs; 1394 } else { 1395 info->fill_ratio_given = 0; 1396 info->fill_ratio_needed = 0; 1397 info->factor_mallocs = 0; 1398 } 1399 PetscFunctionReturn(0); 1400 } 1401 1402 1403 #undef __FUNCT__ 1404 #define __FUNCT__ "MatZeroEntries_SeqSBAIJ" 1405 int MatZeroEntries_SeqSBAIJ(Mat A) 1406 { 1407 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 1408 int ierr; 1409 1410 PetscFunctionBegin; 1411 ierr = PetscMemzero(a->a,a->bs2*a->i[a->mbs]*sizeof(MatScalar));CHKERRQ(ierr); 1412 PetscFunctionReturn(0); 1413 } 1414 1415 #undef __FUNCT__ 1416 #define __FUNCT__ "MatGetRowMax_SeqSBAIJ" 1417 int MatGetRowMax_SeqSBAIJ(Mat A,Vec v) 1418 { 1419 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 1420 int ierr,i,j,n,row,col,bs,*ai,*aj,mbs; 1421 PetscReal atmp; 1422 MatScalar *aa; 1423 PetscScalar zero = 0.0,*x; 1424 int ncols,brow,bcol,krow,kcol; 1425 1426 PetscFunctionBegin; 1427 if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 1428 bs = a->bs; 1429 aa = a->a; 1430 ai = a->i; 1431 aj = a->j; 1432 mbs = a->mbs; 1433 1434 ierr = VecSet(&zero,v);CHKERRQ(ierr); 1435 ierr = VecGetArray(v,&x);CHKERRQ(ierr); 1436 ierr = VecGetLocalSize(v,&n);CHKERRQ(ierr); 1437 if (n != A->m) SETERRQ(PETSC_ERR_ARG_SIZ,"Nonconforming matrix and vector"); 1438 for (i=0; i<mbs; i++) { 1439 ncols = ai[1] - ai[0]; ai++; 1440 brow = bs*i; 1441 for (j=0; j<ncols; j++){ 1442 bcol = bs*(*aj); 1443 for (kcol=0; kcol<bs; kcol++){ 1444 col = bcol + kcol; /* col index */ 1445 for (krow=0; krow<bs; krow++){ 1446 atmp = PetscAbsScalar(*aa); aa++; 1447 row = brow + krow; /* row index */ 1448 /* printf("val[%d,%d]: %g\n",row,col,atmp); */ 1449 if (PetscRealPart(x[row]) < atmp) x[row] = atmp; 1450 if (*aj > i && PetscRealPart(x[col]) < atmp) x[col] = atmp; 1451 } 1452 } 1453 aj++; 1454 } 1455 } 1456 ierr = VecRestoreArray(v,&x);CHKERRQ(ierr); 1457 PetscFunctionReturn(0); 1458 } 1459