1 /*$Id: sbaij2.c,v 1.27 2001/01/16 18:18:03 balay Exp bsmith $*/ 2 3 #include "petscsys.h" 4 #include "src/mat/impls/baij/seq/baij.h" 5 #include "src/vec/vecimpl.h" 6 #include "src/inline/spops.h" 7 #include "src/inline/ilu.h" 8 #include "petscbt.h" 9 #include "sbaij.h" 10 11 #undef __FUNC__ 12 #define __FUNC__ "MatIncreaseOverlap_SeqSBAIJ" 13 int MatIncreaseOverlap_SeqSBAIJ(Mat A,int is_max,IS *is,int ov) 14 { 15 PetscFunctionBegin; 16 SETERRQ(1,"Function not yet written for SBAIJ format"); 17 /* PetscFunctionReturn(0); */ 18 } 19 20 #undef __FUNC__ 21 #define __FUNC__ "MatGetSubMatrix_SeqSBAIJ_Private" 22 int MatGetSubMatrix_SeqSBAIJ_Private(Mat A,IS isrow,IS iscol,int cs,MatReuse scall,Mat *B) 23 { 24 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data,*c; 25 int *smap,i,k,kstart,kend,ierr,oldcols = a->mbs,*lens; 26 int row,mat_i,*mat_j,tcol,*mat_ilen; 27 int *irow,nrows,*ssmap,bs=a->bs,bs2=a->bs2; 28 int *aj = a->j,*ai = a->i; 29 MatScalar *mat_a; 30 Mat C; 31 PetscTruth flag; 32 33 PetscFunctionBegin; 34 35 if (isrow != iscol) SETERRQ(1,"MatGetSubmatrices_SeqSBAIJ: For symm. format, iscol must equal isro"); 36 ierr = ISSorted(iscol,(PetscTruth*)&i);CHKERRQ(ierr); 37 if (!i) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"IS is not sorted"); 38 39 ierr = ISGetIndices(isrow,&irow);CHKERRQ(ierr); 40 ierr = ISGetSize(isrow,&nrows);CHKERRQ(ierr); 41 42 ierr = PetscMalloc((1+oldcols)*sizeof(int),&smap);CHKERRQ(ierr); 43 ssmap = smap; 44 ierr = PetscMalloc((1+nrows)*sizeof(int),&lens);CHKERRQ(ierr); 45 ierr = PetscMemzero(smap,oldcols*sizeof(int));CHKERRQ(ierr); 46 for (i=0; i<nrows; i++) smap[irow[i]] = i+1; /* nrows = ncols */ 47 /* determine lens of each row */ 48 for (i=0; i<nrows; i++) { 49 kstart = ai[irow[i]]; 50 kend = kstart + a->ilen[irow[i]]; 51 lens[i] = 0; 52 for (k=kstart; k<kend; k++) { 53 if (ssmap[aj[k]]) { 54 lens[i]++; 55 } 56 } 57 } 58 /* Create and fill new matrix */ 59 if (scall == MAT_REUSE_MATRIX) { 60 c = (Mat_SeqSBAIJ *)((*B)->data); 61 62 if (c->mbs!=nrows || c->bs!=bs) SETERRQ(PETSC_ERR_ARG_SIZ,"Submatrix wrong size"); 63 ierr = PetscMemcmp(c->ilen,lens,c->mbs *sizeof(int),&flag);CHKERRQ(ierr); 64 if (flag == PETSC_FALSE) { 65 SETERRQ(PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. wrong no of nonzeros"); 66 } 67 ierr = PetscMemzero(c->ilen,c->mbs*sizeof(int));CHKERRQ(ierr); 68 C = *B; 69 } else { 70 ierr = MatCreateSeqSBAIJ(A->comm,bs,nrows*bs,nrows*bs,0,lens,&C);CHKERRQ(ierr); 71 } 72 c = (Mat_SeqSBAIJ *)(C->data); 73 for (i=0; i<nrows; i++) { 74 row = irow[i]; 75 kstart = ai[row]; 76 kend = kstart + a->ilen[row]; 77 mat_i = c->i[i]; 78 mat_j = c->j + mat_i; 79 mat_a = c->a + mat_i*bs2; 80 mat_ilen = c->ilen + i; 81 for (k=kstart; k<kend; k++) { 82 if ((tcol=ssmap[a->j[k]])) { 83 *mat_j++ = tcol - 1; 84 ierr = PetscMemcpy(mat_a,a->a+k*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr); 85 mat_a += bs2; 86 (*mat_ilen)++; 87 } 88 } 89 } 90 91 /* Free work space */ 92 ierr = PetscFree(smap);CHKERRQ(ierr); 93 ierr = PetscFree(lens);CHKERRQ(ierr); 94 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 95 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 96 97 ierr = ISRestoreIndices(isrow,&irow);CHKERRQ(ierr); 98 *B = C; 99 PetscFunctionReturn(0); 100 } 101 102 #undef __FUNC__ 103 #define __FUNC__ "MatGetSubMatrix_SeqSBAIJ" 104 int MatGetSubMatrix_SeqSBAIJ(Mat A,IS isrow,IS iscol,int cs,MatReuse scall,Mat *B) 105 { 106 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 107 IS is1; 108 int *vary,*iary,*irow,nrows,i,ierr,bs=a->bs,count; 109 110 PetscFunctionBegin; 111 if (isrow != iscol) SETERRQ(1,"MatGetSubmatrices_SeqSBAIJ: For symm. format, iscol must equal isro"); 112 113 ierr = ISGetIndices(isrow,&irow);CHKERRQ(ierr); 114 ierr = ISGetSize(isrow,&nrows);CHKERRQ(ierr); 115 116 /* Verify if the indices corespond to each element in a block 117 and form the IS with compressed IS */ 118 ierr = PetscMalloc(2*(a->mbs+1)*sizeof(int),&vary);CHKERRQ(ierr); 119 iary = vary + a->mbs; 120 ierr = PetscMemzero(vary,(a->mbs)*sizeof(int));CHKERRQ(ierr); 121 for (i=0; i<nrows; i++) vary[irow[i]/bs]++; 122 123 count = 0; 124 for (i=0; i<a->mbs; i++) { 125 if (vary[i]!=0 && vary[i]!=bs) SETERRQ(1,"Index set does not match blocks"); 126 if (vary[i]==bs) iary[count++] = i; 127 } 128 ierr = ISCreateGeneral(PETSC_COMM_SELF,count,iary,&is1);CHKERRQ(ierr); 129 130 ierr = ISRestoreIndices(isrow,&irow);CHKERRQ(ierr); 131 ierr = PetscFree(vary);CHKERRQ(ierr); 132 133 ierr = MatGetSubMatrix_SeqSBAIJ_Private(A,is1,is1,cs,scall,B);CHKERRQ(ierr); 134 ISDestroy(is1); 135 PetscFunctionReturn(0); 136 } 137 138 #undef __FUNC__ 139 #define __FUNC__ "MatGetSubMatrices_SeqSBAIJ" 140 int MatGetSubMatrices_SeqSBAIJ(Mat A,int n,IS *irow,IS *icol,MatReuse scall,Mat **B) 141 { 142 int ierr,i; 143 144 PetscFunctionBegin; 145 if (scall == MAT_INITIAL_MATRIX) { 146 ierr = PetscMalloc((n+1)*sizeof(Mat),B);CHKERRQ(ierr); 147 } 148 149 for (i=0; i<n; i++) { 150 ierr = MatGetSubMatrix_SeqSBAIJ(A,irow[i],icol[i],PETSC_DECIDE,scall,&(*B)[i]);CHKERRQ(ierr); 151 } 152 PetscFunctionReturn(0); 153 } 154 155 /* -------------------------------------------------------*/ 156 /* Should check that shapes of vectors and matrices match */ 157 /* -------------------------------------------------------*/ 158 #include "petscblaslapack.h" 159 160 #undef __FUNC__ 161 #define __FUNC__ "MatMult_SeqSBAIJ_1" 162 int MatMult_SeqSBAIJ_1(Mat A,Vec xx,Vec zz) 163 { 164 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 165 Scalar *x,*z,*xb,x1,zero=0.0; 166 MatScalar *v; 167 int mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,ierr,*ib,cval,j,jmin; 168 169 PetscFunctionBegin; 170 ierr = VecSet(&zero,zz);CHKERRQ(ierr); 171 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 172 ierr = VecGetArray(zz,&z);CHKERRQ(ierr); 173 174 v = a->a; 175 xb = x; 176 177 for (i=0; i<mbs; i++) { 178 n = ai[1] - ai[0]; /* length of i_th row of A */ 179 x1 = xb[0]; 180 ib = aj + *ai; 181 jmin = 0; 182 if (*ib == i) { /* (diag of A)*x */ 183 z[i] += *v++ * x[*ib++]; 184 jmin++; 185 } 186 for (j=jmin; j<n; j++) { 187 cval = *ib; 188 z[cval] += *v * x1; /* (strict lower triangular part of A)*x */ 189 z[i] += *v++ * x[*ib++]; /* (strict upper triangular part of A)*x */ 190 } 191 xb++; ai++; 192 } 193 194 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 195 ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr); 196 PetscLogFlops(2*(a->s_nz*2 - A->m) - A->m); /* s_nz = (nz+m)/2 */ 197 PetscFunctionReturn(0); 198 } 199 200 #undef __FUNC__ 201 #define __FUNC__ "MatMult_SeqSBAIJ_2" 202 int MatMult_SeqSBAIJ_2(Mat A,Vec xx,Vec zz) 203 { 204 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 205 Scalar *x,*z,*xb,x1,x2,zero=0.0; 206 MatScalar *v; 207 int mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,ierr,*ib,cval,j,jmin; 208 209 210 PetscFunctionBegin; 211 ierr = VecSet(&zero,zz);CHKERRQ(ierr); 212 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 213 ierr = VecGetArray(zz,&z);CHKERRQ(ierr); 214 215 v = a->a; 216 xb = x; 217 218 for (i=0; i<mbs; i++) { 219 n = ai[1] - ai[0]; /* length of i_th block row of A */ 220 x1 = xb[0]; x2 = xb[1]; 221 ib = aj + *ai; 222 jmin = 0; 223 if (*ib == i){ /* (diag of A)*x */ 224 z[2*i] += v[0]*x1 + v[2]*x2; 225 z[2*i+1] += v[2]*x1 + v[3]*x2; 226 v += 4; jmin++; 227 } 228 for (j=jmin; j<n; j++) { 229 /* (strict lower triangular part of A)*x */ 230 cval = ib[j]*2; 231 z[cval] += v[0]*x1 + v[1]*x2; 232 z[cval+1] += v[2]*x1 + v[3]*x2; 233 /* (strict upper triangular part of A)*x */ 234 z[2*i] += v[0]*x[cval] + v[2]*x[cval+1]; 235 z[2*i+1] += v[1]*x[cval] + v[3]*x[cval+1]; 236 v += 4; 237 } 238 xb +=2; ai++; 239 } 240 241 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 242 ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr); 243 PetscLogFlops(8*(a->s_nz*2 - A->m) - A->m); 244 PetscFunctionReturn(0); 245 } 246 247 #undef __FUNC__ 248 #define __FUNC__ "MatMult_SeqSBAIJ_3" 249 int MatMult_SeqSBAIJ_3(Mat A,Vec xx,Vec zz) 250 { 251 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 252 Scalar *x,*z,*xb,x1,x2,x3,zero=0.0; 253 MatScalar *v; 254 int mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,ierr,*ib,cval,j,jmin; 255 256 257 PetscFunctionBegin; 258 ierr = VecSet(&zero,zz);CHKERRQ(ierr); 259 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 260 ierr = VecGetArray(zz,&z);CHKERRQ(ierr); 261 262 v = a->a; 263 xb = x; 264 265 for (i=0; i<mbs; i++) { 266 n = ai[1] - ai[0]; /* length of i_th block row of A */ 267 x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; 268 ib = aj + *ai; 269 jmin = 0; 270 if (*ib == i){ /* (diag of A)*x */ 271 z[3*i] += v[0]*x1 + v[3]*x2 + v[6]*x3; 272 z[3*i+1] += v[3]*x1 + v[4]*x2 + v[7]*x3; 273 z[3*i+2] += v[6]*x1 + v[7]*x2 + v[8]*x3; 274 v += 9; jmin++; 275 } 276 for (j=jmin; j<n; j++) { 277 /* (strict lower triangular part of A)*x */ 278 cval = ib[j]*3; 279 z[cval] += v[0]*x1 + v[1]*x2 + v[2]*x3; 280 z[cval+1] += v[3]*x1 + v[4]*x2 + v[5]*x3; 281 z[cval+2] += v[6]*x1 + v[7]*x2 + v[8]*x3; 282 /* (strict upper triangular part of A)*x */ 283 z[3*i] += v[0]*x[cval] + v[3]*x[cval+1]+ v[6]*x[cval+2]; 284 z[3*i+1] += v[1]*x[cval] + v[4]*x[cval+1]+ v[7]*x[cval+2]; 285 z[3*i+2] += v[2]*x[cval] + v[5]*x[cval+1]+ v[8]*x[cval+2]; 286 v += 9; 287 } 288 xb +=3; ai++; 289 } 290 291 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 292 ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr); 293 PetscLogFlops(18*(a->s_nz*2 - A->m) - A->m); 294 PetscFunctionReturn(0); 295 } 296 297 #undef __FUNC__ 298 #define __FUNC__ "MatMult_SeqSBAIJ_4" 299 int MatMult_SeqSBAIJ_4(Mat A,Vec xx,Vec zz) 300 { 301 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 302 Scalar *x,*z,*xb,x1,x2,x3,x4,zero=0.0; 303 MatScalar *v; 304 int mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,ierr,*ib,cval,j,jmin; 305 306 PetscFunctionBegin; 307 ierr = VecSet(&zero,zz);CHKERRQ(ierr); 308 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 309 ierr = VecGetArray(zz,&z);CHKERRQ(ierr); 310 311 v = a->a; 312 xb = x; 313 314 for (i=0; i<mbs; i++) { 315 n = ai[1] - ai[0]; /* length of i_th block row of A */ 316 x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; 317 ib = aj + *ai; 318 jmin = 0; 319 if (*ib == i){ /* (diag of A)*x */ 320 z[4*i] += v[0]*x1 + v[4]*x2 + v[8]*x3 + v[12]*x4; 321 z[4*i+1] += v[4]*x1 + v[5]*x2 + v[9]*x3 + v[13]*x4; 322 z[4*i+2] += v[8]*x1 + v[9]*x2 + v[10]*x3 + v[14]*x4; 323 z[4*i+3] += v[12]*x1+ v[13]*x2+ v[14]*x3 + v[15]*x4; 324 v += 16; jmin++; 325 } 326 for (j=jmin; j<n; j++) { 327 /* (strict lower triangular part of A)*x */ 328 cval = ib[j]*4; 329 z[cval] += v[0]*x1 + v[1]*x2 + v[2]*x3 + v[3]*x4; 330 z[cval+1] += v[4]*x1 + v[5]*x2 + v[6]*x3 + v[7]*x4; 331 z[cval+2] += v[8]*x1 + v[9]*x2 + v[10]*x3 + v[11]*x4; 332 z[cval+3] += v[12]*x1 + v[13]*x2 + v[14]*x3 + v[15]*x4; 333 /* (strict upper triangular part of A)*x */ 334 z[4*i] += v[0]*x[cval] + v[4]*x[cval+1]+ v[8]*x[cval+2] + v[12]*x[cval+3]; 335 z[4*i+1] += v[1]*x[cval] + v[5]*x[cval+1]+ v[9]*x[cval+2] + v[13]*x[cval+3]; 336 z[4*i+2] += v[2]*x[cval] + v[6]*x[cval+1]+ v[10]*x[cval+2]+ v[14]*x[cval+3]; 337 z[4*i+3] += v[3]*x[cval] + v[7]*x[cval+1]+ v[11]*x[cval+2]+ v[15]*x[cval+3]; 338 v += 16; 339 } 340 xb +=4; ai++; 341 } 342 343 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 344 ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr); 345 PetscLogFlops(32*(a->s_nz*2 - A->m) - A->m); 346 PetscFunctionReturn(0); 347 } 348 349 #undef __FUNC__ 350 #define __FUNC__ "MatMult_SeqSBAIJ_5" 351 int MatMult_SeqSBAIJ_5(Mat A,Vec xx,Vec zz) 352 { 353 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 354 Scalar *x,*z,*xb,x1,x2,x3,x4,x5,zero=0.0; 355 MatScalar *v; 356 int mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,ierr,*ib,cval,j,jmin; 357 358 PetscFunctionBegin; 359 ierr = VecSet(&zero,zz);CHKERRQ(ierr); 360 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 361 ierr = VecGetArray(zz,&z);CHKERRQ(ierr); 362 363 v = a->a; 364 xb = x; 365 366 for (i=0; i<mbs; i++) { 367 n = ai[1] - ai[0]; /* length of i_th block row of A */ 368 x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5=xb[4]; 369 ib = aj + *ai; 370 jmin = 0; 371 if (*ib == i){ /* (diag of A)*x */ 372 z[5*i] += v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4+ v[20]*x5; 373 z[5*i+1] += v[5]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4+ v[21]*x5; 374 z[5*i+2] += v[10]*x1 +v[11]*x2 + v[12]*x3 + v[17]*x4+ v[22]*x5; 375 z[5*i+3] += v[15]*x1 +v[16]*x2 + v[17]*x3 + v[18]*x4+ v[23]*x5; 376 z[5*i+4] += v[20]*x1 +v[21]*x2 + v[22]*x3 + v[23]*x4+ v[24]*x5; 377 v += 25; jmin++; 378 } 379 for (j=jmin; j<n; j++) { 380 /* (strict lower triangular part of A)*x */ 381 cval = ib[j]*5; 382 z[cval] += v[0]*x1 + v[1]*x2 + v[2]*x3 + v[3]*x4 + v[4]*x5; 383 z[cval+1] += v[5]*x1 + v[6]*x2 + v[7]*x3 + v[8]*x4 + v[9]*x5; 384 z[cval+2] += v[10]*x1 + v[11]*x2 + v[12]*x3 + v[13]*x4+ v[14]*x5; 385 z[cval+3] += v[15]*x1 + v[16]*x2 + v[17]*x3 + v[18]*x4+ v[19]*x5; 386 z[cval+4] += v[20]*x1 + v[21]*x2 + v[22]*x3 + v[23]*x4+ v[24]*x5; 387 /* (strict upper triangular part of A)*x */ 388 z[5*i] +=v[0]*x[cval]+v[5]*x[cval+1]+v[10]*x[cval+2]+v[15]*x[cval+3]+v[20]*x[cval+4]; 389 z[5*i+1] +=v[1]*x[cval]+v[6]*x[cval+1]+v[11]*x[cval+2]+v[16]*x[cval+3]+v[21]*x[cval+4]; 390 z[5*i+2] +=v[2]*x[cval]+v[7]*x[cval+1]+v[12]*x[cval+2]+v[17]*x[cval+3]+v[22]*x[cval+4]; 391 z[5*i+3] +=v[3]*x[cval]+v[8]*x[cval+1]+v[13]*x[cval+2]+v[18]*x[cval+3]+v[23]*x[cval+4]; 392 z[5*i+4] +=v[4]*x[cval]+v[9]*x[cval+1]+v[14]*x[cval+2]+v[19]*x[cval+3]+v[24]*x[cval+4]; 393 v += 25; 394 } 395 xb +=5; ai++; 396 } 397 398 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 399 ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr); 400 PetscLogFlops(50*(a->s_nz*2 - A->m) - A->m); 401 PetscFunctionReturn(0); 402 } 403 404 405 #undef __FUNC__ 406 #define __FUNC__ "MatMult_SeqSBAIJ_6" 407 int MatMult_SeqSBAIJ_6(Mat A,Vec xx,Vec zz) 408 { 409 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 410 Scalar *x,*z,*xb,x1,x2,x3,x4,x5,x6,zero=0.0; 411 MatScalar *v; 412 int mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,ierr,*ib,cval,j,jmin; 413 414 PetscFunctionBegin; 415 ierr = VecSet(&zero,zz);CHKERRQ(ierr); 416 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 417 ierr = VecGetArray(zz,&z);CHKERRQ(ierr); 418 419 v = a->a; 420 xb = x; 421 422 for (i=0; i<mbs; i++) { 423 n = ai[1] - ai[0]; /* length of i_th block row of A */ 424 x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5=xb[4]; x6=xb[5]; 425 ib = aj + *ai; 426 jmin = 0; 427 if (*ib == i){ /* (diag of A)*x */ 428 z[6*i] += v[0]*x1 + v[6]*x2 + v[12]*x3 + v[18]*x4+ v[24]*x5 + v[30]*x6; 429 z[6*i+1] += v[6]*x1 + v[7]*x2 + v[13]*x3 + v[19]*x4+ v[25]*x5 + v[31]*x6; 430 z[6*i+2] += v[12]*x1 +v[13]*x2 + v[14]*x3 + v[20]*x4+ v[26]*x5 + v[32]*x6; 431 z[6*i+3] += v[18]*x1 +v[19]*x2 + v[20]*x3 + v[21]*x4+ v[27]*x5 + v[33]*x6; 432 z[6*i+4] += v[24]*x1 +v[25]*x2 + v[26]*x3 + v[27]*x4+ v[28]*x5 + v[34]*x6; 433 z[6*i+5] += v[30]*x1 +v[31]*x2 + v[32]*x3 + v[33]*x4+ v[34]*x5 + v[35]*x6; 434 v += 36; jmin++; 435 } 436 for (j=jmin; j<n; j++) { 437 /* (strict lower triangular part of A)*x */ 438 cval = ib[j]*6; 439 z[cval] += v[0]*x1 + v[1]*x2 + v[2]*x3 + v[3]*x4+ v[4]*x5 + v[5]*x6; 440 z[cval+1] += v[6]*x1 + v[7]*x2 + v[8]*x3 + v[9]*x4+ v[10]*x5 + v[11]*x6; 441 z[cval+2] += v[12]*x1 + v[13]*x2 + v[14]*x3 + v[15]*x4+ v[16]*x5 + v[17]*x6; 442 z[cval+3] += v[18]*x1 + v[19]*x2 + v[20]*x3 + v[21]*x4+ v[22]*x5 + v[23]*x6; 443 z[cval+4] += v[24]*x1 + v[25]*x2 + v[26]*x3 + v[27]*x4+ v[28]*x5 + v[29]*x6; 444 z[cval+5] += v[30]*x1 + v[31]*x2 + v[32]*x3 + v[33]*x4+ v[34]*x5 + v[35]*x6; 445 /* (strict upper triangular part of A)*x */ 446 z[6*i] +=v[0]*x[cval]+v[6]*x[cval+1]+v[12]*x[cval+2]+v[18]*x[cval+3]+v[24]*x[cval+4]+v[30]*x[cval+5]; 447 z[6*i+1] +=v[1]*x[cval]+v[7]*x[cval+1]+v[13]*x[cval+2]+v[19]*x[cval+3]+v[25]*x[cval+4]+v[31]*x[cval+5]; 448 z[6*i+2] +=v[2]*x[cval]+v[8]*x[cval+1]+v[14]*x[cval+2]+v[20]*x[cval+3]+v[26]*x[cval+4]+v[32]*x[cval+5]; 449 z[6*i+3] +=v[3]*x[cval]+v[9]*x[cval+1]+v[15]*x[cval+2]+v[21]*x[cval+3]+v[27]*x[cval+4]+v[33]*x[cval+5]; 450 z[6*i+4] +=v[4]*x[cval]+v[10]*x[cval+1]+v[16]*x[cval+2]+v[22]*x[cval+3]+v[28]*x[cval+4]+v[34]*x[cval+5]; 451 z[6*i+5] +=v[5]*x[cval]+v[11]*x[cval+1]+v[17]*x[cval+2]+v[23]*x[cval+3]+v[29]*x[cval+4]+v[35]*x[cval+5]; 452 v += 36; 453 } 454 xb +=6; ai++; 455 } 456 457 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 458 ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr); 459 PetscLogFlops(72*(a->s_nz*2 - A->m) - A->m); 460 PetscFunctionReturn(0); 461 } 462 #undef __FUNC__ 463 #define __FUNC__ "MatMult_SeqSBAIJ_7" 464 int MatMult_SeqSBAIJ_7(Mat A,Vec xx,Vec zz) 465 { 466 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 467 Scalar *x,*z,*xb,x1,x2,x3,x4,x5,x6,x7,zero=0.0; 468 MatScalar *v; 469 int mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,ierr,*ib,cval,j,jmin; 470 471 PetscFunctionBegin; 472 ierr = VecSet(&zero,zz);CHKERRQ(ierr); 473 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 474 ierr = VecGetArray(zz,&z);CHKERRQ(ierr); 475 476 v = a->a; 477 xb = x; 478 479 for (i=0; i<mbs; i++) { 480 n = ai[1] - ai[0]; /* length of i_th block row of A */ 481 x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5=xb[4]; x6=xb[5]; x7=xb[6]; 482 ib = aj + *ai; 483 jmin = 0; 484 if (*ib == i){ /* (diag of A)*x */ 485 z[7*i] += v[0]*x1 + v[7]*x2 + v[14]*x3 + v[21]*x4+ v[28]*x5 + v[35]*x6+ v[42]*x7; 486 z[7*i+1] += v[7]*x1 + v[8]*x2 + v[15]*x3 + v[22]*x4+ v[29]*x5 + v[36]*x6+ v[43]*x7; 487 z[7*i+2] += v[14]*x1+ v[15]*x2 +v[16]*x3 + v[23]*x4+ v[30]*x5 + v[37]*x6+ v[44]*x7; 488 z[7*i+3] += v[21]*x1+ v[22]*x2 +v[23]*x3 + v[24]*x4+ v[31]*x5 + v[38]*x6+ v[45]*x7; 489 z[7*i+4] += v[28]*x1+ v[29]*x2 +v[30]*x3 + v[31]*x4+ v[32]*x5 + v[39]*x6+ v[46]*x7; 490 z[7*i+5] += v[35]*x1+ v[36]*x2 +v[37]*x3 + v[38]*x4+ v[39]*x5 + v[40]*x6+ v[47]*x7; 491 z[7*i+6] += v[42]*x1+ v[43]*x2 +v[44]*x3 + v[45]*x4+ v[46]*x5 + v[47]*x6+ v[48]*x7; 492 v += 49; jmin++; 493 } 494 for (j=jmin; j<n; j++) { 495 /* (strict lower triangular part of A)*x */ 496 cval = ib[j]*7; 497 z[cval] += v[0]*x1 + v[1]*x2 + v[2]*x3 + v[3]*x4+ v[4]*x5 + v[5]*x6+ v[6]*x7; 498 z[cval+1] += v[7]*x1 + v[8]*x2 + v[9]*x3 + v[10]*x4+ v[11]*x5 + v[12]*x6+ v[13]*x7; 499 z[cval+2] += v[14]*x1 + v[15]*x2 + v[16]*x3 + v[17]*x4+ v[18]*x5 + v[19]*x6+ v[20]*x7; 500 z[cval+3] += v[21]*x1 + v[22]*x2 + v[23]*x3 + v[24]*x4+ v[25]*x5 + v[26]*x6+ v[27]*x7; 501 z[cval+4] += v[28]*x1 + v[29]*x2 + v[30]*x3 + v[31]*x4+ v[32]*x5 + v[33]*x6+ v[34]*x7; 502 z[cval+5] += v[35]*x1 + v[36]*x2 + v[37]*x3 + v[38]*x4+ v[39]*x5 + v[40]*x6+ v[41]*x7; 503 z[cval+6] += v[42]*x1 + v[43]*x2 + v[44]*x3 + v[45]*x4+ v[46]*x5 + v[47]*x6+ v[48]*x7; 504 /* (strict upper triangular part of A)*x */ 505 z[7*i] +=v[0]*x[cval]+v[7]*x[cval+1]+v[14]*x[cval+2]+v[21]*x[cval+3]+v[28]*x[cval+4]+v[35]*x[cval+5]+v[42]*x[cval+6]; 506 z[7*i+1]+=v[1]*x[cval]+v[8]*x[cval+1]+v[15]*x[cval+2]+v[22]*x[cval+3]+v[29]*x[cval+4]+v[36]*x[cval+5]+v[43]*x[cval+6]; 507 z[7*i+2]+=v[2]*x[cval]+v[9]*x[cval+1]+v[16]*x[cval+2]+v[23]*x[cval+3]+v[30]*x[cval+4]+v[37]*x[cval+5]+v[44]*x[cval+6]; 508 z[7*i+3]+=v[3]*x[cval]+v[10]*x[cval+1]+v[17]*x[cval+2]+v[24]*x[cval+3]+v[31]*x[cval+4]+v[38]*x[cval+5]+v[45]*x[cval+6]; 509 z[7*i+4]+=v[4]*x[cval]+v[11]*x[cval+1]+v[18]*x[cval+2]+v[25]*x[cval+3]+v[32]*x[cval+4]+v[39]*x[cval+5]+v[46]*x[cval+6]; 510 z[7*i+5]+=v[5]*x[cval]+v[12]*x[cval+1]+v[19]*x[cval+2]+v[26]*x[cval+3]+v[33]*x[cval+4]+v[40]*x[cval+5]+v[47]*x[cval+6]; 511 z[7*i+6]+=v[6]*x[cval]+v[13]*x[cval+1]+v[20]*x[cval+2]+v[27]*x[cval+3]+v[34]*x[cval+4]+v[41]*x[cval+5]+v[48]*x[cval+6]; 512 v += 49; 513 } 514 xb +=7; ai++; 515 } 516 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 517 ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr); 518 PetscLogFlops(98*(a->s_nz*2 - A->m) - A->m); 519 PetscFunctionReturn(0); 520 } 521 522 /* 523 This will not work with MatScalar == float because it calls the BLAS 524 */ 525 #undef __FUNC__ 526 #define __FUNC__ "MatMult_SeqSBAIJ_N" 527 int MatMult_SeqSBAIJ_N(Mat A,Vec xx,Vec zz) 528 { 529 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 530 Scalar *x,*x_ptr,*z,*z_ptr,*xb,*zb,*work,*workt,zero=0.0; 531 MatScalar *v; 532 int ierr,mbs=a->mbs,i,*idx,*aj,*ii,bs=a->bs,j,n,bs2=a->bs2; 533 int ncols,k; 534 535 PetscFunctionBegin; 536 ierr = VecSet(&zero,zz);CHKERRQ(ierr); 537 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); x_ptr=x; 538 ierr = VecGetArray(zz,&z);CHKERRQ(ierr); z_ptr=z; 539 540 aj = a->j; 541 v = a->a; 542 ii = a->i; 543 544 if (!a->mult_work) { 545 ierr = PetscMalloc((A->m+1)*sizeof(Scalar),&a->mult_work);CHKERRQ(ierr); 546 } 547 work = a->mult_work; 548 549 for (i=0; i<mbs; i++) { 550 n = ii[1] - ii[0]; ncols = n*bs; 551 workt = work; idx=aj+ii[0]; 552 553 /* upper triangular part */ 554 for (j=0; j<n; j++) { 555 xb = x_ptr + bs*(*idx++); 556 for (k=0; k<bs; k++) workt[k] = xb[k]; 557 workt += bs; 558 } 559 /* z(i*bs:(i+1)*bs-1) += A(i,:)*x */ 560 Kernel_w_gets_w_plus_Ar_times_v(bs,ncols,work,v,z); 561 562 /* strict lower triangular part */ 563 idx = aj+ii[0]; 564 if (*idx == i){ 565 ncols -= bs; v += bs2; idx++; n--; 566 } 567 568 if (ncols > 0){ 569 workt = work; 570 ierr = PetscMemzero(workt,ncols*sizeof(Scalar));CHKERRQ(ierr); 571 Kernel_w_gets_w_plus_trans_Ar_times_v(bs,ncols,x,v,workt); 572 for (j=0; j<n; j++) { 573 zb = z_ptr + bs*(*idx++); 574 for (k=0; k<bs; k++) zb[k] += workt[k] ; 575 workt += bs; 576 } 577 } 578 x += bs; v += n*bs2; z += bs; ii++; 579 } 580 581 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 582 ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr); 583 PetscLogFlops(2*(a->s_nz*2 - A->m)*bs2 - A->m); 584 PetscFunctionReturn(0); 585 } 586 587 #undef __FUNC__ 588 #define __FUNC__ "MatMultAdd_SeqSBAIJ_1" 589 int MatMultAdd_SeqSBAIJ_1(Mat A,Vec xx,Vec yy,Vec zz) 590 { 591 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 592 Scalar *x,*y,*z,*xb,x1; 593 MatScalar *v; 594 int mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,ierr,*ib,cval,j,jmin; 595 596 PetscFunctionBegin; 597 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 598 if (yy != xx) { 599 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 600 } else { 601 y = x; 602 } 603 if (zz != yy) { 604 ierr = VecCopy(yy,zz);CHKERRQ(ierr); 605 ierr = VecGetArray(zz,&z);CHKERRQ(ierr); 606 } else { 607 z = y; 608 } 609 610 v = a->a; 611 xb = x; 612 613 for (i=0; i<mbs; i++) { 614 n = ai[1] - ai[0]; /* length of i_th row of A */ 615 x1 = xb[0]; 616 ib = aj + *ai; 617 jmin = 0; 618 if (*ib == i) { /* (diag of A)*x */ 619 z[i] += *v++ * x[*ib++]; jmin++; 620 } 621 for (j=jmin; j<n; j++) { 622 cval = *ib; 623 z[cval] += *v * x1; /* (strict lower triangular part of A)*x */ 624 z[i] += *v++ * x[*ib++]; /* (strict upper triangular part of A)*x */ 625 } 626 xb++; ai++; 627 } 628 629 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 630 if (yy != xx) ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 631 if (zz != yy) ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr); 632 633 PetscLogFlops(2*(a->s_nz*2 - A->m)); 634 PetscFunctionReturn(0); 635 } 636 637 #undef __FUNC__ 638 #define __FUNC__ "MatMultAdd_SeqSBAIJ_2" 639 int MatMultAdd_SeqSBAIJ_2(Mat A,Vec xx,Vec yy,Vec zz) 640 { 641 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 642 Scalar *x,*y,*z,*xb,x1,x2; 643 MatScalar *v; 644 int mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,ierr,*ib,cval,j,jmin; 645 646 PetscFunctionBegin; 647 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 648 if (yy != xx) { 649 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 650 } else { 651 y = x; 652 } 653 if (zz != yy) { 654 ierr = VecCopy(yy,zz);CHKERRQ(ierr); 655 ierr = VecGetArray(zz,&z);CHKERRQ(ierr); 656 } else { 657 z = y; 658 } 659 660 v = a->a; 661 xb = x; 662 663 for (i=0; i<mbs; i++) { 664 n = ai[1] - ai[0]; /* length of i_th block row of A */ 665 x1 = xb[0]; x2 = xb[1]; 666 ib = aj + *ai; 667 jmin = 0; 668 if (*ib == i){ /* (diag of A)*x */ 669 z[2*i] += v[0]*x1 + v[2]*x2; 670 z[2*i+1] += v[2]*x1 + v[3]*x2; 671 v += 4; jmin++; 672 } 673 for (j=jmin; j<n; j++) { 674 /* (strict lower triangular part of A)*x */ 675 cval = ib[j]*2; 676 z[cval] += v[0]*x1 + v[1]*x2; 677 z[cval+1] += v[2]*x1 + v[3]*x2; 678 /* (strict upper triangular part of A)*x */ 679 z[2*i] += v[0]*x[cval] + v[2]*x[cval+1]; 680 z[2*i+1] += v[1]*x[cval] + v[3]*x[cval+1]; 681 v += 4; 682 } 683 xb +=2; ai++; 684 } 685 686 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 687 if (yy != xx) ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 688 if (zz != yy) ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr); 689 690 PetscLogFlops(4*(a->s_nz*2 - A->m)); 691 PetscFunctionReturn(0); 692 } 693 694 #undef __FUNC__ 695 #define __FUNC__ "MatMultAdd_SeqSBAIJ_3" 696 int MatMultAdd_SeqSBAIJ_3(Mat A,Vec xx,Vec yy,Vec zz) 697 { 698 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 699 Scalar *x,*y,*z,*xb,x1,x2,x3; 700 MatScalar *v; 701 int mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,ierr,*ib,cval,j,jmin; 702 703 PetscFunctionBegin; 704 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 705 if (yy != xx) { 706 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 707 } else { 708 y = x; 709 } 710 if (zz != yy) { 711 ierr = VecCopy(yy,zz);CHKERRQ(ierr); 712 ierr = VecGetArray(zz,&z);CHKERRQ(ierr); 713 } else { 714 z = y; 715 } 716 717 v = a->a; 718 xb = x; 719 720 for (i=0; i<mbs; i++) { 721 n = ai[1] - ai[0]; /* length of i_th block row of A */ 722 x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; 723 ib = aj + *ai; 724 jmin = 0; 725 if (*ib == i){ /* (diag of A)*x */ 726 z[3*i] += v[0]*x1 + v[3]*x2 + v[6]*x3; 727 z[3*i+1] += v[3]*x1 + v[4]*x2 + v[7]*x3; 728 z[3*i+2] += v[6]*x1 + v[7]*x2 + v[8]*x3; 729 v += 9; jmin++; 730 } 731 for (j=jmin; j<n; j++) { 732 /* (strict lower triangular part of A)*x */ 733 cval = ib[j]*3; 734 z[cval] += v[0]*x1 + v[1]*x2 + v[2]*x3; 735 z[cval+1] += v[3]*x1 + v[4]*x2 + v[5]*x3; 736 z[cval+2] += v[6]*x1 + v[7]*x2 + v[8]*x3; 737 /* (strict upper triangular part of A)*x */ 738 z[3*i] += v[0]*x[cval] + v[3]*x[cval+1]+ v[6]*x[cval+2]; 739 z[3*i+1] += v[1]*x[cval] + v[4]*x[cval+1]+ v[7]*x[cval+2]; 740 z[3*i+2] += v[2]*x[cval] + v[5]*x[cval+1]+ v[8]*x[cval+2]; 741 v += 9; 742 } 743 xb +=3; ai++; 744 } 745 746 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 747 if (yy != xx) ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 748 if (zz != yy) ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr); 749 750 PetscLogFlops(18*(a->s_nz*2 - A->m)); 751 PetscFunctionReturn(0); 752 } 753 754 #undef __FUNC__ 755 #define __FUNC__ "MatMultAdd_SeqSBAIJ_4" 756 int MatMultAdd_SeqSBAIJ_4(Mat A,Vec xx,Vec yy,Vec zz) 757 { 758 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 759 Scalar *x,*y,*z,*xb,x1,x2,x3,x4; 760 MatScalar *v; 761 int mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,ierr,*ib,cval,j,jmin; 762 763 PetscFunctionBegin; 764 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 765 if (yy != xx) { 766 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 767 } else { 768 y = x; 769 } 770 if (zz != yy) { 771 ierr = VecCopy(yy,zz);CHKERRQ(ierr); 772 ierr = VecGetArray(zz,&z);CHKERRQ(ierr); 773 } else { 774 z = y; 775 } 776 777 v = a->a; 778 xb = x; 779 780 for (i=0; i<mbs; i++) { 781 n = ai[1] - ai[0]; /* length of i_th block row of A */ 782 x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; 783 ib = aj + *ai; 784 jmin = 0; 785 if (*ib == i){ /* (diag of A)*x */ 786 z[4*i] += v[0]*x1 + v[4]*x2 + v[8]*x3 + v[12]*x4; 787 z[4*i+1] += v[4]*x1 + v[5]*x2 + v[9]*x3 + v[13]*x4; 788 z[4*i+2] += v[8]*x1 + v[9]*x2 + v[10]*x3 + v[14]*x4; 789 z[4*i+3] += v[12]*x1+ v[13]*x2+ v[14]*x3 + v[15]*x4; 790 v += 16; jmin++; 791 } 792 for (j=jmin; j<n; j++) { 793 /* (strict lower triangular part of A)*x */ 794 cval = ib[j]*4; 795 z[cval] += v[0]*x1 + v[1]*x2 + v[2]*x3 + v[3]*x4; 796 z[cval+1] += v[4]*x1 + v[5]*x2 + v[6]*x3 + v[7]*x4; 797 z[cval+2] += v[8]*x1 + v[9]*x2 + v[10]*x3 + v[11]*x4; 798 z[cval+3] += v[12]*x1 + v[13]*x2 + v[14]*x3 + v[15]*x4; 799 /* (strict upper triangular part of A)*x */ 800 z[4*i] += v[0]*x[cval] + v[4]*x[cval+1]+ v[8]*x[cval+2] + v[12]*x[cval+3]; 801 z[4*i+1] += v[1]*x[cval] + v[5]*x[cval+1]+ v[9]*x[cval+2] + v[13]*x[cval+3]; 802 z[4*i+2] += v[2]*x[cval] + v[6]*x[cval+1]+ v[10]*x[cval+2]+ v[14]*x[cval+3]; 803 z[4*i+3] += v[3]*x[cval] + v[7]*x[cval+1]+ v[11]*x[cval+2]+ v[15]*x[cval+3]; 804 v += 16; 805 } 806 xb +=4; ai++; 807 } 808 809 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 810 if (yy != xx) ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 811 if (zz != yy) ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr); 812 813 PetscLogFlops(32*(a->s_nz*2 - A->m)); 814 PetscFunctionReturn(0); 815 } 816 817 #undef __FUNC__ 818 #define __FUNC__ "MatMultAdd_SeqSBAIJ_5" 819 int MatMultAdd_SeqSBAIJ_5(Mat A,Vec xx,Vec yy,Vec zz) 820 { 821 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 822 Scalar *x,*y,*z,*xb,x1,x2,x3,x4,x5; 823 MatScalar *v; 824 int mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,ierr,*ib,cval,j,jmin; 825 826 PetscFunctionBegin; 827 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 828 if (yy != xx) { 829 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 830 } else { 831 y = x; 832 } 833 if (zz != yy) { 834 ierr = VecCopy(yy,zz);CHKERRQ(ierr); 835 ierr = VecGetArray(zz,&z);CHKERRQ(ierr); 836 } else { 837 z = y; 838 } 839 840 v = a->a; 841 xb = x; 842 843 for (i=0; i<mbs; i++) { 844 n = ai[1] - ai[0]; /* length of i_th block row of A */ 845 x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5=xb[4]; 846 ib = aj + *ai; 847 jmin = 0; 848 if (*ib == i){ /* (diag of A)*x */ 849 z[5*i] += v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4+ v[20]*x5; 850 z[5*i+1] += v[5]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4+ v[21]*x5; 851 z[5*i+2] += v[10]*x1 +v[11]*x2 + v[12]*x3 + v[17]*x4+ v[22]*x5; 852 z[5*i+3] += v[15]*x1 +v[16]*x2 + v[17]*x3 + v[18]*x4+ v[23]*x5; 853 z[5*i+4] += v[20]*x1 +v[21]*x2 + v[22]*x3 + v[23]*x4+ v[24]*x5; 854 v += 25; jmin++; 855 } 856 for (j=jmin; j<n; j++) { 857 /* (strict lower triangular part of A)*x */ 858 cval = ib[j]*5; 859 z[cval] += v[0]*x1 + v[1]*x2 + v[2]*x3 + v[3]*x4 + v[4]*x5; 860 z[cval+1] += v[5]*x1 + v[6]*x2 + v[7]*x3 + v[8]*x4 + v[9]*x5; 861 z[cval+2] += v[10]*x1 + v[11]*x2 + v[12]*x3 + v[13]*x4+ v[14]*x5; 862 z[cval+3] += v[15]*x1 + v[16]*x2 + v[17]*x3 + v[18]*x4+ v[19]*x5; 863 z[cval+4] += v[20]*x1 + v[21]*x2 + v[22]*x3 + v[23]*x4+ v[24]*x5; 864 /* (strict upper triangular part of A)*x */ 865 z[5*i] +=v[0]*x[cval]+v[5]*x[cval+1]+v[10]*x[cval+2]+v[15]*x[cval+3]+v[20]*x[cval+4]; 866 z[5*i+1] +=v[1]*x[cval]+v[6]*x[cval+1]+v[11]*x[cval+2]+v[16]*x[cval+3]+v[21]*x[cval+4]; 867 z[5*i+2] +=v[2]*x[cval]+v[7]*x[cval+1]+v[12]*x[cval+2]+v[17]*x[cval+3]+v[22]*x[cval+4]; 868 z[5*i+3] +=v[3]*x[cval]+v[8]*x[cval+1]+v[13]*x[cval+2]+v[18]*x[cval+3]+v[23]*x[cval+4]; 869 z[5*i+4] +=v[4]*x[cval]+v[9]*x[cval+1]+v[14]*x[cval+2]+v[19]*x[cval+3]+v[24]*x[cval+4]; 870 v += 25; 871 } 872 xb +=5; ai++; 873 } 874 875 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 876 if (yy != xx) ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 877 if (zz != yy) ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr); 878 879 PetscLogFlops(50*(a->s_nz*2 - A->m)); 880 PetscFunctionReturn(0); 881 } 882 #undef __FUNC__ 883 #define __FUNC__ "MatMultAdd_SeqSBAIJ_6" 884 int MatMultAdd_SeqSBAIJ_6(Mat A,Vec xx,Vec yy,Vec zz) 885 { 886 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 887 Scalar *x,*y,*z,*xb,x1,x2,x3,x4,x5,x6; 888 MatScalar *v; 889 int mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,ierr,*ib,cval,j,jmin; 890 891 PetscFunctionBegin; 892 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 893 if (yy != xx) { 894 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 895 } else { 896 y = x; 897 } 898 if (zz != yy) { 899 ierr = VecCopy(yy,zz);CHKERRQ(ierr); 900 ierr = VecGetArray(zz,&z);CHKERRQ(ierr); 901 } else { 902 z = y; 903 } 904 905 v = a->a; 906 xb = x; 907 908 for (i=0; i<mbs; i++) { 909 n = ai[1] - ai[0]; /* length of i_th block row of A */ 910 x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5=xb[4]; x6=xb[5]; 911 ib = aj + *ai; 912 jmin = 0; 913 if (*ib == i){ /* (diag of A)*x */ 914 z[6*i] += v[0]*x1 + v[6]*x2 + v[12]*x3 + v[18]*x4+ v[24]*x5 + v[30]*x6; 915 z[6*i+1] += v[6]*x1 + v[7]*x2 + v[13]*x3 + v[19]*x4+ v[25]*x5 + v[31]*x6; 916 z[6*i+2] += v[12]*x1 +v[13]*x2 + v[14]*x3 + v[20]*x4+ v[26]*x5 + v[32]*x6; 917 z[6*i+3] += v[18]*x1 +v[19]*x2 + v[20]*x3 + v[21]*x4+ v[27]*x5 + v[33]*x6; 918 z[6*i+4] += v[24]*x1 +v[25]*x2 + v[26]*x3 + v[27]*x4+ v[28]*x5 + v[34]*x6; 919 z[6*i+5] += v[30]*x1 +v[31]*x2 + v[32]*x3 + v[33]*x4+ v[34]*x5 + v[35]*x6; 920 v += 36; jmin++; 921 } 922 for (j=jmin; j<n; j++) { 923 /* (strict lower triangular part of A)*x */ 924 cval = ib[j]*6; 925 z[cval] += v[0]*x1 + v[1]*x2 + v[2]*x3 + v[3]*x4+ v[4]*x5 + v[5]*x6; 926 z[cval+1] += v[6]*x1 + v[7]*x2 + v[8]*x3 + v[9]*x4+ v[10]*x5 + v[11]*x6; 927 z[cval+2] += v[12]*x1 + v[13]*x2 + v[14]*x3 + v[15]*x4+ v[16]*x5 + v[17]*x6; 928 z[cval+3] += v[18]*x1 + v[19]*x2 + v[20]*x3 + v[21]*x4+ v[22]*x5 + v[23]*x6; 929 z[cval+4] += v[24]*x1 + v[25]*x2 + v[26]*x3 + v[27]*x4+ v[28]*x5 + v[29]*x6; 930 z[cval+5] += v[30]*x1 + v[31]*x2 + v[32]*x3 + v[33]*x4+ v[34]*x5 + v[35]*x6; 931 /* (strict upper triangular part of A)*x */ 932 z[6*i] +=v[0]*x[cval]+v[6]*x[cval+1]+v[12]*x[cval+2]+v[18]*x[cval+3]+v[24]*x[cval+4]+v[30]*x[cval+5]; 933 z[6*i+1] +=v[1]*x[cval]+v[7]*x[cval+1]+v[13]*x[cval+2]+v[19]*x[cval+3]+v[25]*x[cval+4]+v[31]*x[cval+5]; 934 z[6*i+2] +=v[2]*x[cval]+v[8]*x[cval+1]+v[14]*x[cval+2]+v[20]*x[cval+3]+v[26]*x[cval+4]+v[32]*x[cval+5]; 935 z[6*i+3] +=v[3]*x[cval]+v[9]*x[cval+1]+v[15]*x[cval+2]+v[21]*x[cval+3]+v[27]*x[cval+4]+v[33]*x[cval+5]; 936 z[6*i+4] +=v[4]*x[cval]+v[10]*x[cval+1]+v[16]*x[cval+2]+v[22]*x[cval+3]+v[28]*x[cval+4]+v[34]*x[cval+5]; 937 z[6*i+5] +=v[5]*x[cval]+v[11]*x[cval+1]+v[17]*x[cval+2]+v[23]*x[cval+3]+v[29]*x[cval+4]+v[35]*x[cval+5]; 938 v += 36; 939 } 940 xb +=6; ai++; 941 } 942 943 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 944 if (yy != xx) ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 945 if (zz != yy) ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr); 946 947 PetscLogFlops(72*(a->s_nz*2 - A->m)); 948 PetscFunctionReturn(0); 949 } 950 951 #undef __FUNC__ 952 #define __FUNC__ "MatMultAdd_SeqSBAIJ_7" 953 int MatMultAdd_SeqSBAIJ_7(Mat A,Vec xx,Vec yy,Vec zz) 954 { 955 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 956 Scalar *x,*y,*z,*xb,x1,x2,x3,x4,x5,x6,x7; 957 MatScalar *v; 958 int mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,ierr,*ib,cval,j,jmin; 959 960 PetscFunctionBegin; 961 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 962 if (yy != xx) { 963 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 964 } else { 965 y = x; 966 } 967 if (zz != yy) { 968 ierr = VecCopy(yy,zz);CHKERRQ(ierr); 969 ierr = VecGetArray(zz,&z);CHKERRQ(ierr); 970 } else { 971 z = y; 972 } 973 974 v = a->a; 975 xb = x; 976 977 for (i=0; i<mbs; i++) { 978 n = ai[1] - ai[0]; /* length of i_th block row of A */ 979 x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5=xb[4]; x6=xb[5]; x7=xb[6]; 980 ib = aj + *ai; 981 jmin = 0; 982 if (*ib == i){ /* (diag of A)*x */ 983 z[7*i] += v[0]*x1 + v[7]*x2 + v[14]*x3 + v[21]*x4+ v[28]*x5 + v[35]*x6+ v[42]*x7; 984 z[7*i+1] += v[7]*x1 + v[8]*x2 + v[15]*x3 + v[22]*x4+ v[29]*x5 + v[36]*x6+ v[43]*x7; 985 z[7*i+2] += v[14]*x1+ v[15]*x2 +v[16]*x3 + v[23]*x4+ v[30]*x5 + v[37]*x6+ v[44]*x7; 986 z[7*i+3] += v[21]*x1+ v[22]*x2 +v[23]*x3 + v[24]*x4+ v[31]*x5 + v[38]*x6+ v[45]*x7; 987 z[7*i+4] += v[28]*x1+ v[29]*x2 +v[30]*x3 + v[31]*x4+ v[32]*x5 + v[39]*x6+ v[46]*x7; 988 z[7*i+5] += v[35]*x1+ v[36]*x2 +v[37]*x3 + v[38]*x4+ v[39]*x5 + v[40]*x6+ v[47]*x7; 989 z[7*i+6] += v[42]*x1+ v[43]*x2 +v[44]*x3 + v[45]*x4+ v[46]*x5 + v[47]*x6+ v[48]*x7; 990 v += 49; jmin++; 991 } 992 for (j=jmin; j<n; j++) { 993 /* (strict lower triangular part of A)*x */ 994 cval = ib[j]*7; 995 z[cval] += v[0]*x1 + v[1]*x2 + v[2]*x3 + v[3]*x4+ v[4]*x5 + v[5]*x6+ v[6]*x7; 996 z[cval+1] += v[7]*x1 + v[8]*x2 + v[9]*x3 + v[10]*x4+ v[11]*x5 + v[12]*x6+ v[13]*x7; 997 z[cval+2] += v[14]*x1 + v[15]*x2 + v[16]*x3 + v[17]*x4+ v[18]*x5 + v[19]*x6+ v[20]*x7; 998 z[cval+3] += v[21]*x1 + v[22]*x2 + v[23]*x3 + v[24]*x4+ v[25]*x5 + v[26]*x6+ v[27]*x7; 999 z[cval+4] += v[28]*x1 + v[29]*x2 + v[30]*x3 + v[31]*x4+ v[32]*x5 + v[33]*x6+ v[34]*x7; 1000 z[cval+5] += v[35]*x1 + v[36]*x2 + v[37]*x3 + v[38]*x4+ v[39]*x5 + v[40]*x6+ v[41]*x7; 1001 z[cval+6] += v[42]*x1 + v[43]*x2 + v[44]*x3 + v[45]*x4+ v[46]*x5 + v[47]*x6+ v[48]*x7; 1002 /* (strict upper triangular part of A)*x */ 1003 z[7*i] +=v[0]*x[cval]+v[7]*x[cval+1]+v[14]*x[cval+2]+v[21]*x[cval+3]+v[28]*x[cval+4]+v[35]*x[cval+5]+v[42]*x[cval+6]; 1004 z[7*i+1]+=v[1]*x[cval]+v[8]*x[cval+1]+v[15]*x[cval+2]+v[22]*x[cval+3]+v[29]*x[cval+4]+v[36]*x[cval+5]+v[43]*x[cval+6]; 1005 z[7*i+2]+=v[2]*x[cval]+v[9]*x[cval+1]+v[16]*x[cval+2]+v[23]*x[cval+3]+v[30]*x[cval+4]+v[37]*x[cval+5]+v[44]*x[cval+6]; 1006 z[7*i+3]+=v[3]*x[cval]+v[10]*x[cval+1]+v[17]*x[cval+2]+v[24]*x[cval+3]+v[31]*x[cval+4]+v[38]*x[cval+5]+v[45]*x[cval+6]; 1007 z[7*i+4]+=v[4]*x[cval]+v[11]*x[cval+1]+v[18]*x[cval+2]+v[25]*x[cval+3]+v[32]*x[cval+4]+v[39]*x[cval+5]+v[46]*x[cval+6]; 1008 z[7*i+5]+=v[5]*x[cval]+v[12]*x[cval+1]+v[19]*x[cval+2]+v[26]*x[cval+3]+v[33]*x[cval+4]+v[40]*x[cval+5]+v[47]*x[cval+6]; 1009 z[7*i+6]+=v[6]*x[cval]+v[13]*x[cval+1]+v[20]*x[cval+2]+v[27]*x[cval+3]+v[34]*x[cval+4]+v[41]*x[cval+5]+v[48]*x[cval+6]; 1010 v += 49; 1011 } 1012 xb +=7; ai++; 1013 } 1014 1015 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1016 if (yy != xx) ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 1017 if (zz != yy) ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr); 1018 1019 PetscLogFlops(98*(a->s_nz*2 - A->m)); 1020 PetscFunctionReturn(0); 1021 } 1022 1023 #undef __FUNC__ 1024 #define __FUNC__ "MatMultAdd_SeqSBAIJ_N" 1025 int MatMultAdd_SeqSBAIJ_N(Mat A,Vec xx,Vec yy,Vec zz) 1026 { 1027 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 1028 Scalar *x,*x_ptr,*y,*z,*z_ptr=0,*xb,*zb,*work,*workt; 1029 MatScalar *v; 1030 int ierr,mbs=a->mbs,i,*idx,*aj,*ii,bs=a->bs,j,n,bs2=a->bs2; 1031 int ncols,k; 1032 1033 PetscFunctionBegin; 1034 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); x_ptr=x; 1035 if (yy != xx) { 1036 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 1037 } else { 1038 y = x; 1039 } 1040 if (zz != yy) { 1041 ierr = VecCopy(yy,zz);CHKERRQ(ierr); 1042 ierr = VecGetArray(zz,&z);CHKERRQ(ierr); z_ptr=z; 1043 } else { 1044 z = y; 1045 } 1046 1047 aj = a->j; 1048 v = a->a; 1049 ii = a->i; 1050 1051 if (!a->mult_work) { 1052 ierr = PetscMalloc((A->m+1)*sizeof(Scalar),&a->mult_work);CHKERRQ(ierr); 1053 } 1054 work = a->mult_work; 1055 1056 1057 for (i=0; i<mbs; i++) { 1058 n = ii[1] - ii[0]; ncols = n*bs; 1059 workt = work; idx=aj+ii[0]; 1060 1061 /* upper triangular part */ 1062 for (j=0; j<n; j++) { 1063 xb = x_ptr + bs*(*idx++); 1064 for (k=0; k<bs; k++) workt[k] = xb[k]; 1065 workt += bs; 1066 } 1067 /* z(i*bs:(i+1)*bs-1) += A(i,:)*x */ 1068 Kernel_w_gets_w_plus_Ar_times_v(bs,ncols,work,v,z); 1069 1070 /* strict lower triangular part */ 1071 idx = aj+ii[0]; 1072 if (*idx == i){ 1073 ncols -= bs; v += bs2; idx++; n--; 1074 } 1075 if (ncols > 0){ 1076 workt = work; 1077 ierr = PetscMemzero(workt,ncols*sizeof(Scalar));CHKERRQ(ierr); 1078 Kernel_w_gets_w_plus_trans_Ar_times_v(bs,ncols,x,v,workt); 1079 for (j=0; j<n; j++) { 1080 zb = z_ptr + bs*(*idx++); 1081 /* idx++; */ 1082 for (k=0; k<bs; k++) zb[k] += workt[k] ; 1083 workt += bs; 1084 } 1085 } 1086 1087 x += bs; v += n*bs2; z += bs; ii++; 1088 } 1089 1090 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1091 if (yy != xx) ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 1092 if (zz != yy) ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr); 1093 1094 PetscLogFlops(2*(a->s_nz*2 - A->m)); 1095 PetscFunctionReturn(0); 1096 } 1097 1098 #undef __FUNC__ 1099 #define __FUNC__ "MatMultTranspose_SeqSBAIJ" 1100 int MatMultTranspose_SeqSBAIJ(Mat A,Vec xx,Vec zz) 1101 { 1102 PetscFunctionBegin; 1103 SETERRQ(1,"Matrix is symmetric. Call MatMult()."); 1104 /* PetscFunctionReturn(0); */ 1105 } 1106 1107 #undef __FUNC__ 1108 #define __FUNC__ "MatMultTransposeAdd_SeqSBAIJ" 1109 int MatMultTransposeAdd_SeqSBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1110 1111 { 1112 PetscFunctionBegin; 1113 SETERRQ(1,"Matrix is symmetric. Call MatMultAdd()."); 1114 /* PetscFunctionReturn(0); */ 1115 } 1116 1117 #undef __FUNC__ 1118 #define __FUNC__ "MatScale_SeqSBAIJ" 1119 int MatScale_SeqSBAIJ(Scalar *alpha,Mat inA) 1120 { 1121 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)inA->data; 1122 int one = 1,totalnz = a->bs2*a->s_nz; 1123 1124 PetscFunctionBegin; 1125 BLscal_(&totalnz,alpha,a->a,&one); 1126 PetscLogFlops(totalnz); 1127 PetscFunctionReturn(0); 1128 } 1129 1130 #undef __FUNC__ 1131 #define __FUNC__ "MatNorm_SeqSBAIJ" 1132 int MatNorm_SeqSBAIJ(Mat A,NormType type,PetscReal *norm) 1133 { 1134 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 1135 MatScalar *v = a->a; 1136 PetscReal sum_diag = 0.0, sum_off = 0.0, *sum; 1137 int i,j,k,bs = a->bs,bs2=a->bs2,k1,mbs=a->mbs,*aj=a->j; 1138 int *jl,*il,jmin,jmax,ierr,nexti,ik,*col; 1139 1140 PetscFunctionBegin; 1141 if (type == NORM_FROBENIUS) { 1142 for (k=0; k<mbs; k++){ 1143 jmin = a->i[k]; jmax = a->i[k+1]; 1144 col = aj + jmin; 1145 if (*col == k){ /* diagonal block */ 1146 for (i=0; i<bs2; i++){ 1147 #if defined(PETSC_USE_COMPLEX) 1148 sum_diag += PetscRealPart(PetscConj(*v)*(*v)); v++; 1149 #else 1150 sum_diag += (*v)*(*v); v++; 1151 #endif 1152 } 1153 jmin++; 1154 } 1155 for (j=jmin; j<jmax; j++){ /* off-diagonal blocks */ 1156 for (i=0; i<bs2; i++){ 1157 #if defined(PETSC_USE_COMPLEX) 1158 sum_off += PetscRealPart(PetscConj(*v)*(*v)); v++; 1159 #else 1160 sum_off += (*v)*(*v); v++; 1161 #endif 1162 } 1163 } 1164 } 1165 *norm = sqrt(sum_diag + 2*sum_off); 1166 1167 } else if (type == NORM_INFINITY) { /* maximum row sum */ 1168 ierr = PetscMalloc(mbs*sizeof(int),&il);CHKERRQ(ierr); 1169 ierr = PetscMalloc(mbs*sizeof(int),&jl);CHKERRQ(ierr); 1170 ierr = PetscMalloc(bs*sizeof(PetscReal),&sum);CHKERRQ(ierr); 1171 for (i=0; i<mbs; i++) { 1172 jl[i] = mbs; il[0] = 0; 1173 } 1174 1175 *norm = 0.0; 1176 for (k=0; k<mbs; k++) { /* k_th block row */ 1177 for (j=0; j<bs; j++) sum[j]=0.0; 1178 1179 /*-- col sum --*/ 1180 i = jl[k]; /* first |A(i,k)| to be added */ 1181 /* jl[k]=i: first nozero element in row i for submatrix A(1:k,k:n) (active window) 1182 at step k */ 1183 while (i<mbs){ 1184 nexti = jl[i]; /* next block row to be added */ 1185 ik = il[i]; /* block index of A(i,k) in the array a */ 1186 for (j=0; j<bs; j++){ 1187 v = a->a + ik*bs2 + j*bs; 1188 for (k1=0; k1<bs; k1++) { 1189 sum[j] += PetscAbsScalar(*v); v++; 1190 } 1191 } 1192 /* update il, jl */ 1193 jmin = ik + 1; /* block index of array a: points to the next nonzero of A in row i */ 1194 jmax = a->i[i+1]; 1195 if (jmin < jmax){ 1196 il[i] = jmin; 1197 j = a->j[jmin]; 1198 jl[i] = jl[j]; jl[j]=i; 1199 } 1200 i = nexti; 1201 } 1202 1203 /*-- row sum --*/ 1204 jmin = a->i[k]; jmax = a->i[k+1]; 1205 for (i=jmin; i<jmax; i++) { 1206 for (j=0; j<bs; j++){ 1207 v = a->a + i*bs2 + j; 1208 for (k1=0; k1<bs; k1++){ 1209 sum[j] += PetscAbsScalar(*v); 1210 v += bs; 1211 } 1212 } 1213 } 1214 /* add k_th block row to il, jl */ 1215 col = aj+jmin; 1216 if (*col == k) jmin++; 1217 if (jmin < jmax){ 1218 il[k] = jmin; 1219 j = a->j[jmin]; 1220 jl[k] = jl[j]; jl[j] = k; 1221 } 1222 for (j=0; j<bs; j++){ 1223 if (sum[j] > *norm) *norm = sum[j]; 1224 } 1225 } 1226 ierr = PetscFree(il);CHKERRQ(ierr); 1227 ierr = PetscFree(jl);CHKERRQ(ierr); 1228 ierr = PetscFree(sum);CHKERRQ(ierr); 1229 } else { 1230 SETERRQ(PETSC_ERR_SUP,"No support for this norm yet"); 1231 } 1232 PetscFunctionReturn(0); 1233 } 1234 1235 #undef __FUNC__ 1236 #define __FUNC__ "MatEqual_SeqSBAIJ" 1237 int MatEqual_SeqSBAIJ(Mat A,Mat B,PetscTruth* flg) 1238 { 1239 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ *)A->data,*b = (Mat_SeqSBAIJ *)B->data; 1240 int ierr; 1241 PetscTruth flag; 1242 1243 PetscFunctionBegin; 1244 ierr = PetscTypeCompare((PetscObject)A,MATSEQBAIJ,&flag);CHKERRQ(ierr); 1245 if (!flag) SETERRQ(PETSC_ERR_ARG_INCOMP,"Matrices must be same type"); 1246 1247 /* If the matrix/block dimensions are not equal, or no of nonzeros or shift */ 1248 if ((A->m != B->m) || (A->n != B->n) || (a->bs != b->bs)|| (a->s_nz != b->s_nz)) { 1249 *flg = PETSC_FALSE; 1250 PetscFunctionReturn(0); 1251 } 1252 1253 /* if the a->i are the same */ 1254 ierr = PetscMemcmp(a->i,b->i,(a->mbs+1)*sizeof(int),flg);CHKERRQ(ierr); 1255 if (*flg == PETSC_FALSE) { 1256 PetscFunctionReturn(0); 1257 } 1258 1259 /* if a->j are the same */ 1260 ierr = PetscMemcmp(a->j,b->j,(a->s_nz)*sizeof(int),flg);CHKERRQ(ierr); 1261 if (*flg == PETSC_FALSE) { 1262 PetscFunctionReturn(0); 1263 } 1264 /* if a->a are the same */ 1265 ierr = PetscMemcmp(a->a,b->a,(a->s_nz)*(a->bs)*(a->bs)*sizeof(Scalar),flg);CHKERRQ(ierr); 1266 PetscFunctionReturn(0); 1267 1268 } 1269 1270 #undef __FUNC__ 1271 #define __FUNC__ "MatGetDiagonal_SeqSBAIJ" 1272 int MatGetDiagonal_SeqSBAIJ(Mat A,Vec v) 1273 { 1274 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 1275 int ierr,i,j,k,n,row,bs,*ai,*aj,ambs,bs2; 1276 Scalar *x,zero = 0.0; 1277 MatScalar *aa,*aa_j; 1278 1279 PetscFunctionBegin; 1280 if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 1281 bs = a->bs; 1282 aa = a->a; 1283 ai = a->i; 1284 aj = a->j; 1285 ambs = a->mbs; 1286 bs2 = a->bs2; 1287 1288 ierr = VecSet(&zero,v);CHKERRQ(ierr); 1289 ierr = VecGetArray(v,&x);CHKERRQ(ierr); 1290 ierr = VecGetLocalSize(v,&n);CHKERRQ(ierr); 1291 if (n != A->m) SETERRQ(PETSC_ERR_ARG_SIZ,"Nonconforming matrix and vector"); 1292 for (i=0; i<ambs; i++) { 1293 j=ai[i]; 1294 if (aj[j] == i) { /* if this is a diagonal element */ 1295 row = i*bs; 1296 aa_j = aa + j*bs2; 1297 for (k=0; k<bs2; k+=(bs+1),row++) x[row] = aa_j[k]; 1298 } 1299 } 1300 ierr = VecRestoreArray(v,&x);CHKERRQ(ierr); 1301 PetscFunctionReturn(0); 1302 } 1303 1304 #undef __FUNC__ 1305 #define __FUNC__ "MatDiagonalScale_SeqSBAIJ" 1306 int MatDiagonalScale_SeqSBAIJ(Mat A,Vec ll,Vec rr) 1307 { 1308 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 1309 Scalar *l,*r,x,*li,*ri; 1310 MatScalar *aa,*v; 1311 int ierr,i,j,k,lm,rn,M,m,*ai,*aj,mbs,tmp,bs,bs2; 1312 1313 PetscFunctionBegin; 1314 ai = a->i; 1315 aj = a->j; 1316 aa = a->a; 1317 m = A->m; 1318 bs = a->bs; 1319 mbs = a->mbs; 1320 bs2 = a->bs2; 1321 1322 if (ll != rr) { 1323 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"For symmetric format, left and right scaling vectors must be same\n"); 1324 } 1325 if (ll) { 1326 ierr = VecGetArray(ll,&l);CHKERRQ(ierr); 1327 ierr = VecGetLocalSize(ll,&lm);CHKERRQ(ierr); 1328 if (lm != m) SETERRQ(PETSC_ERR_ARG_SIZ,"Left scaling vector wrong length"); 1329 for (i=0; i<mbs; i++) { /* for each block row */ 1330 M = ai[i+1] - ai[i]; 1331 li = l + i*bs; 1332 v = aa + bs2*ai[i]; 1333 for (j=0; j<M; j++) { /* for each block */ 1334 for (k=0; k<bs2; k++) { 1335 (*v++) *= li[k%bs]; 1336 } 1337 #ifdef CONT 1338 /* will be used to replace the above loop */ 1339 ri = l + bs*aj[ai[i]+j]; 1340 for (k=0; k<bs; k++) { /* column value */ 1341 x = ri[k]; 1342 for (tmp=0; tmp<bs; tmp++) (*v++) *= li[tmp]*x; 1343 } 1344 #endif 1345 1346 } 1347 } 1348 ierr = VecRestoreArray(ll,&l);CHKERRQ(ierr); 1349 PetscLogFlops(2*a->s_nz); 1350 } 1351 /* will be deleted */ 1352 if (rr) { 1353 ierr = VecGetArray(rr,&r);CHKERRQ(ierr); 1354 ierr = VecGetLocalSize(rr,&rn);CHKERRQ(ierr); 1355 if (rn != m) SETERRQ(PETSC_ERR_ARG_SIZ,"Right scaling vector wrong length"); 1356 for (i=0; i<mbs; i++) { /* for each block row */ 1357 M = ai[i+1] - ai[i]; 1358 v = aa + bs2*ai[i]; 1359 for (j=0; j<M; j++) { /* for each block */ 1360 ri = r + bs*aj[ai[i]+j]; 1361 for (k=0; k<bs; k++) { 1362 x = ri[k]; 1363 for (tmp=0; tmp<bs; tmp++) (*v++) *= x; 1364 } 1365 } 1366 } 1367 ierr = VecRestoreArray(rr,&r);CHKERRQ(ierr); 1368 PetscLogFlops(a->s_nz); 1369 } 1370 PetscFunctionReturn(0); 1371 } 1372 1373 #undef __FUNC__ 1374 #define __FUNC__ "MatGetInfo_SeqSBAIJ" 1375 int MatGetInfo_SeqSBAIJ(Mat A,MatInfoType flag,MatInfo *info) 1376 { 1377 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 1378 1379 PetscFunctionBegin; 1380 info->rows_global = (double)A->m; 1381 info->columns_global = (double)A->m; 1382 info->rows_local = (double)A->m; 1383 info->columns_local = (double)A->m; 1384 info->block_size = a->bs2; 1385 info->nz_allocated = a->s_maxnz; /*num. of nonzeros in upper triangular part */ 1386 info->nz_used = a->bs2*a->s_nz; /*num. of nonzeros in upper triangular part */ 1387 info->nz_unneeded = (double)(info->nz_allocated - info->nz_used); 1388 info->assemblies = A->num_ass; 1389 info->mallocs = a->reallocs; 1390 info->memory = A->mem; 1391 if (A->factor) { 1392 info->fill_ratio_given = A->info.fill_ratio_given; 1393 info->fill_ratio_needed = A->info.fill_ratio_needed; 1394 info->factor_mallocs = A->info.factor_mallocs; 1395 } else { 1396 info->fill_ratio_given = 0; 1397 info->fill_ratio_needed = 0; 1398 info->factor_mallocs = 0; 1399 } 1400 PetscFunctionReturn(0); 1401 } 1402 1403 1404 #undef __FUNC__ 1405 #define __FUNC__ "MatZeroEntries_SeqSBAIJ" 1406 int MatZeroEntries_SeqSBAIJ(Mat A) 1407 { 1408 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 1409 int ierr; 1410 1411 PetscFunctionBegin; 1412 ierr = PetscMemzero(a->a,a->bs2*a->i[a->mbs]*sizeof(MatScalar));CHKERRQ(ierr); 1413 PetscFunctionReturn(0); 1414 } 1415 1416 #undef __FUNC__ 1417 #define __FUNC__ "MatGetRowMax_SeqSBAIJ" 1418 int MatGetRowMax_SeqSBAIJ(Mat A,Vec v) 1419 { 1420 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 1421 int ierr,i,j,n,row,col,bs,*ai,*aj,mbs; 1422 PetscReal atmp; 1423 MatScalar *aa; 1424 Scalar zero = 0.0,*x; 1425 int ncols,brow,bcol,krow,kcol; 1426 1427 PetscFunctionBegin; 1428 if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 1429 bs = a->bs; 1430 aa = a->a; 1431 ai = a->i; 1432 aj = a->j; 1433 mbs = a->mbs; 1434 1435 ierr = VecSet(&zero,v);CHKERRQ(ierr); 1436 ierr = VecGetArray(v,&x);CHKERRQ(ierr); 1437 ierr = VecGetLocalSize(v,&n);CHKERRQ(ierr); 1438 if (n != A->m) SETERRQ(PETSC_ERR_ARG_SIZ,"Nonconforming matrix and vector"); 1439 for (i=0; i<mbs; i++) { 1440 ncols = ai[1] - ai[0]; ai++; 1441 brow = bs*i; 1442 for (j=0; j<ncols; j++){ 1443 bcol = bs*(*aj); 1444 for (kcol=0; kcol<bs; kcol++){ 1445 col = bcol + kcol; /* col index */ 1446 for (krow=0; krow<bs; krow++){ 1447 atmp = PetscAbsScalar(*aa); aa++; 1448 row = brow + krow; /* row index */ 1449 /* printf("val[%d,%d]: %g\n",row,col,atmp); */ 1450 if (PetscRealPart(x[row]) < atmp) x[row] = atmp; 1451 if (*aj > i && PetscRealPart(x[col]) < atmp) x[col] = atmp; 1452 } 1453 } 1454 aj++; 1455 } 1456 } 1457 ierr = VecRestoreArray(v,&x);CHKERRQ(ierr); 1458 PetscFunctionReturn(0); 1459 } 1460