1 2 /* 3 Defines the basic matrix operations for the BAIJ (compressed row) 4 matrix storage format. 5 */ 6 #include <../src/mat/impls/baij/seq/baij.h> /*I "petscmat.h" I*/ 7 #include <petscblaslapack.h> 8 #include <petsc/private/kernels/blockinvert.h> 9 #include <petsc/private/kernels/blockmatmult.h> 10 11 #if defined(PETSC_HAVE_HYPRE) 12 PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat,MatType,MatReuse,Mat*); 13 #endif 14 15 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE) 16 PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqBAIJMKL(Mat,const MatType,MatReuse,Mat*); 17 #endif 18 19 PetscErrorCode MatInvertBlockDiagonal_SeqBAIJ(Mat A,const PetscScalar **values) 20 { 21 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*) A->data; 22 PetscErrorCode ierr; 23 PetscInt *diag_offset,i,bs = A->rmap->bs,mbs = a->mbs,ipvt[5],bs2 = bs*bs,*v_pivots; 24 MatScalar *v = a->a,*odiag,*diag,work[25],*v_work; 25 PetscReal shift = 0.0; 26 PetscBool allowzeropivot,zeropivotdetected=PETSC_FALSE; 27 28 PetscFunctionBegin; 29 allowzeropivot = PetscNot(A->erroriffailure); 30 31 if (a->idiagvalid) { 32 if (values) *values = a->idiag; 33 PetscFunctionReturn(0); 34 } 35 ierr = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr); 36 diag_offset = a->diag; 37 if (!a->idiag) { 38 ierr = PetscMalloc1(bs2*mbs,&a->idiag);CHKERRQ(ierr); 39 ierr = PetscLogObjectMemory((PetscObject)A,bs2*mbs*sizeof(PetscScalar));CHKERRQ(ierr); 40 } 41 diag = a->idiag; 42 if (values) *values = a->idiag; 43 /* factor and invert each block */ 44 switch (bs) { 45 case 1: 46 for (i=0; i<mbs; i++) { 47 odiag = v + 1*diag_offset[i]; 48 diag[0] = odiag[0]; 49 50 if (PetscAbsScalar(diag[0] + shift) < PETSC_MACHINE_EPSILON) { 51 if (allowzeropivot) { 52 A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 53 A->factorerror_zeropivot_value = PetscAbsScalar(diag[0]); 54 A->factorerror_zeropivot_row = i; 55 ierr = PetscInfo1(A,"Zero pivot, row %D\n",i);CHKERRQ(ierr); 56 } else SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot, row %D pivot value %g tolerance %g",i,(double)PetscAbsScalar(diag[0]),(double)PETSC_MACHINE_EPSILON); 57 } 58 59 diag[0] = (PetscScalar)1.0 / (diag[0] + shift); 60 diag += 1; 61 } 62 break; 63 case 2: 64 for (i=0; i<mbs; i++) { 65 odiag = v + 4*diag_offset[i]; 66 diag[0] = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3]; 67 ierr = PetscKernel_A_gets_inverse_A_2(diag,shift,allowzeropivot,&zeropivotdetected);CHKERRQ(ierr); 68 if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 69 diag += 4; 70 } 71 break; 72 case 3: 73 for (i=0; i<mbs; i++) { 74 odiag = v + 9*diag_offset[i]; 75 diag[0] = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3]; 76 diag[4] = odiag[4]; diag[5] = odiag[5]; diag[6] = odiag[6]; diag[7] = odiag[7]; 77 diag[8] = odiag[8]; 78 ierr = PetscKernel_A_gets_inverse_A_3(diag,shift,allowzeropivot,&zeropivotdetected);CHKERRQ(ierr); 79 if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 80 diag += 9; 81 } 82 break; 83 case 4: 84 for (i=0; i<mbs; i++) { 85 odiag = v + 16*diag_offset[i]; 86 ierr = PetscMemcpy(diag,odiag,16*sizeof(PetscScalar));CHKERRQ(ierr); 87 ierr = PetscKernel_A_gets_inverse_A_4(diag,shift,allowzeropivot,&zeropivotdetected);CHKERRQ(ierr); 88 if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 89 diag += 16; 90 } 91 break; 92 case 5: 93 for (i=0; i<mbs; i++) { 94 odiag = v + 25*diag_offset[i]; 95 ierr = PetscMemcpy(diag,odiag,25*sizeof(PetscScalar));CHKERRQ(ierr); 96 ierr = PetscKernel_A_gets_inverse_A_5(diag,ipvt,work,shift,allowzeropivot,&zeropivotdetected);CHKERRQ(ierr); 97 if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 98 diag += 25; 99 } 100 break; 101 case 6: 102 for (i=0; i<mbs; i++) { 103 odiag = v + 36*diag_offset[i]; 104 ierr = PetscMemcpy(diag,odiag,36*sizeof(PetscScalar));CHKERRQ(ierr); 105 ierr = PetscKernel_A_gets_inverse_A_6(diag,shift,allowzeropivot,&zeropivotdetected);CHKERRQ(ierr); 106 if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 107 diag += 36; 108 } 109 break; 110 case 7: 111 for (i=0; i<mbs; i++) { 112 odiag = v + 49*diag_offset[i]; 113 ierr = PetscMemcpy(diag,odiag,49*sizeof(PetscScalar));CHKERRQ(ierr); 114 ierr = PetscKernel_A_gets_inverse_A_7(diag,shift,allowzeropivot,&zeropivotdetected);CHKERRQ(ierr); 115 if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 116 diag += 49; 117 } 118 break; 119 default: 120 ierr = PetscMalloc2(bs,&v_work,bs,&v_pivots);CHKERRQ(ierr); 121 for (i=0; i<mbs; i++) { 122 odiag = v + bs2*diag_offset[i]; 123 ierr = PetscMemcpy(diag,odiag,bs2*sizeof(PetscScalar));CHKERRQ(ierr); 124 ierr = PetscKernel_A_gets_inverse_A(bs,diag,v_pivots,v_work,allowzeropivot,&zeropivotdetected);CHKERRQ(ierr); 125 if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 126 diag += bs2; 127 } 128 ierr = PetscFree2(v_work,v_pivots);CHKERRQ(ierr); 129 } 130 a->idiagvalid = PETSC_TRUE; 131 PetscFunctionReturn(0); 132 } 133 134 PetscErrorCode MatSOR_SeqBAIJ(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 135 { 136 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 137 PetscScalar *x,*work,*w,*workt,*t; 138 const MatScalar *v,*aa = a->a, *idiag; 139 const PetscScalar *b,*xb; 140 PetscScalar s[7], xw[7]={0}; /* avoid some compilers thinking xw is uninitialized */ 141 PetscErrorCode ierr; 142 PetscInt m = a->mbs,i,i2,nz,bs = A->rmap->bs,bs2 = bs*bs,k,j,idx,it; 143 const PetscInt *diag,*ai = a->i,*aj = a->j,*vi; 144 145 PetscFunctionBegin; 146 its = its*lits; 147 if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat"); 148 if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 149 if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 150 if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 151 if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts"); 152 153 if (!a->idiagvalid) {ierr = MatInvertBlockDiagonal(A,NULL);CHKERRQ(ierr);} 154 155 if (!m) PetscFunctionReturn(0); 156 diag = a->diag; 157 idiag = a->idiag; 158 k = PetscMax(A->rmap->n,A->cmap->n); 159 if (!a->mult_work) { 160 ierr = PetscMalloc1(k+1,&a->mult_work);CHKERRQ(ierr); 161 } 162 if (!a->sor_workt) { 163 ierr = PetscMalloc1(k,&a->sor_workt);CHKERRQ(ierr); 164 } 165 if (!a->sor_work) { 166 ierr = PetscMalloc1(bs,&a->sor_work);CHKERRQ(ierr); 167 } 168 work = a->mult_work; 169 t = a->sor_workt; 170 w = a->sor_work; 171 172 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 173 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 174 175 if (flag & SOR_ZERO_INITIAL_GUESS) { 176 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) { 177 switch (bs) { 178 case 1: 179 PetscKernel_v_gets_A_times_w_1(x,idiag,b); 180 t[0] = b[0]; 181 i2 = 1; 182 idiag += 1; 183 for (i=1; i<m; i++) { 184 v = aa + ai[i]; 185 vi = aj + ai[i]; 186 nz = diag[i] - ai[i]; 187 s[0] = b[i2]; 188 for (j=0; j<nz; j++) { 189 xw[0] = x[vi[j]]; 190 PetscKernel_v_gets_v_minus_A_times_w_1(s,(v+j),xw); 191 } 192 t[i2] = s[0]; 193 PetscKernel_v_gets_A_times_w_1(xw,idiag,s); 194 x[i2] = xw[0]; 195 idiag += 1; 196 i2 += 1; 197 } 198 break; 199 case 2: 200 PetscKernel_v_gets_A_times_w_2(x,idiag,b); 201 t[0] = b[0]; t[1] = b[1]; 202 i2 = 2; 203 idiag += 4; 204 for (i=1; i<m; i++) { 205 v = aa + 4*ai[i]; 206 vi = aj + ai[i]; 207 nz = diag[i] - ai[i]; 208 s[0] = b[i2]; s[1] = b[i2+1]; 209 for (j=0; j<nz; j++) { 210 idx = 2*vi[j]; 211 it = 4*j; 212 xw[0] = x[idx]; xw[1] = x[1+idx]; 213 PetscKernel_v_gets_v_minus_A_times_w_2(s,(v+it),xw); 214 } 215 t[i2] = s[0]; t[i2+1] = s[1]; 216 PetscKernel_v_gets_A_times_w_2(xw,idiag,s); 217 x[i2] = xw[0]; x[i2+1] = xw[1]; 218 idiag += 4; 219 i2 += 2; 220 } 221 break; 222 case 3: 223 PetscKernel_v_gets_A_times_w_3(x,idiag,b); 224 t[0] = b[0]; t[1] = b[1]; t[2] = b[2]; 225 i2 = 3; 226 idiag += 9; 227 for (i=1; i<m; i++) { 228 v = aa + 9*ai[i]; 229 vi = aj + ai[i]; 230 nz = diag[i] - ai[i]; 231 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; 232 while (nz--) { 233 idx = 3*(*vi++); 234 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 235 PetscKernel_v_gets_v_minus_A_times_w_3(s,v,xw); 236 v += 9; 237 } 238 t[i2] = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2]; 239 PetscKernel_v_gets_A_times_w_3(xw,idiag,s); 240 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; 241 idiag += 9; 242 i2 += 3; 243 } 244 break; 245 case 4: 246 PetscKernel_v_gets_A_times_w_4(x,idiag,b); 247 t[0] = b[0]; t[1] = b[1]; t[2] = b[2]; t[3] = b[3]; 248 i2 = 4; 249 idiag += 16; 250 for (i=1; i<m; i++) { 251 v = aa + 16*ai[i]; 252 vi = aj + ai[i]; 253 nz = diag[i] - ai[i]; 254 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; 255 while (nz--) { 256 idx = 4*(*vi++); 257 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; 258 PetscKernel_v_gets_v_minus_A_times_w_4(s,v,xw); 259 v += 16; 260 } 261 t[i2] = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2]; t[i2 + 3] = s[3]; 262 PetscKernel_v_gets_A_times_w_4(xw,idiag,s); 263 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; 264 idiag += 16; 265 i2 += 4; 266 } 267 break; 268 case 5: 269 PetscKernel_v_gets_A_times_w_5(x,idiag,b); 270 t[0] = b[0]; t[1] = b[1]; t[2] = b[2]; t[3] = b[3]; t[4] = b[4]; 271 i2 = 5; 272 idiag += 25; 273 for (i=1; i<m; i++) { 274 v = aa + 25*ai[i]; 275 vi = aj + ai[i]; 276 nz = diag[i] - ai[i]; 277 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4]; 278 while (nz--) { 279 idx = 5*(*vi++); 280 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; xw[4] = x[4+idx]; 281 PetscKernel_v_gets_v_minus_A_times_w_5(s,v,xw); 282 v += 25; 283 } 284 t[i2] = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2]; t[i2+3] = s[3]; t[i2+4] = s[4]; 285 PetscKernel_v_gets_A_times_w_5(xw,idiag,s); 286 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4]; 287 idiag += 25; 288 i2 += 5; 289 } 290 break; 291 case 6: 292 PetscKernel_v_gets_A_times_w_6(x,idiag,b); 293 t[0] = b[0]; t[1] = b[1]; t[2] = b[2]; t[3] = b[3]; t[4] = b[4]; t[5] = b[5]; 294 i2 = 6; 295 idiag += 36; 296 for (i=1; i<m; i++) { 297 v = aa + 36*ai[i]; 298 vi = aj + ai[i]; 299 nz = diag[i] - ai[i]; 300 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5]; 301 while (nz--) { 302 idx = 6*(*vi++); 303 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 304 xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; 305 PetscKernel_v_gets_v_minus_A_times_w_6(s,v,xw); 306 v += 36; 307 } 308 t[i2] = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2]; 309 t[i2+3] = s[3]; t[i2+4] = s[4]; t[i2+5] = s[5]; 310 PetscKernel_v_gets_A_times_w_6(xw,idiag,s); 311 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5]; 312 idiag += 36; 313 i2 += 6; 314 } 315 break; 316 case 7: 317 PetscKernel_v_gets_A_times_w_7(x,idiag,b); 318 t[0] = b[0]; t[1] = b[1]; t[2] = b[2]; 319 t[3] = b[3]; t[4] = b[4]; t[5] = b[5]; t[6] = b[6]; 320 i2 = 7; 321 idiag += 49; 322 for (i=1; i<m; i++) { 323 v = aa + 49*ai[i]; 324 vi = aj + ai[i]; 325 nz = diag[i] - ai[i]; 326 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; 327 s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5]; s[6] = b[i2+6]; 328 while (nz--) { 329 idx = 7*(*vi++); 330 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 331 xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; xw[6] = x[6+idx]; 332 PetscKernel_v_gets_v_minus_A_times_w_7(s,v,xw); 333 v += 49; 334 } 335 t[i2] = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2]; 336 t[i2+3] = s[3]; t[i2+4] = s[4]; t[i2+5] = s[5]; t[i2+6] = s[6]; 337 PetscKernel_v_gets_A_times_w_7(xw,idiag,s); 338 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; 339 x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5]; x[i2+6] = xw[6]; 340 idiag += 49; 341 i2 += 7; 342 } 343 break; 344 default: 345 PetscKernel_w_gets_Ar_times_v(bs,bs,b,idiag,x); 346 ierr = PetscMemcpy(t,b,bs*sizeof(PetscScalar));CHKERRQ(ierr); 347 i2 = bs; 348 idiag += bs2; 349 for (i=1; i<m; i++) { 350 v = aa + bs2*ai[i]; 351 vi = aj + ai[i]; 352 nz = diag[i] - ai[i]; 353 354 ierr = PetscMemcpy(w,b+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr); 355 /* copy all rows of x that are needed into contiguous space */ 356 workt = work; 357 for (j=0; j<nz; j++) { 358 ierr = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr); 359 workt += bs; 360 } 361 PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work); 362 ierr = PetscMemcpy(t+i2,w,bs*sizeof(PetscScalar));CHKERRQ(ierr); 363 PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2); 364 365 idiag += bs2; 366 i2 += bs; 367 } 368 break; 369 } 370 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 371 ierr = PetscLogFlops(1.0*bs2*a->nz);CHKERRQ(ierr); 372 xb = t; 373 } 374 else xb = b; 375 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 376 idiag = a->idiag+bs2*(a->mbs-1); 377 i2 = bs * (m-1); 378 switch (bs) { 379 case 1: 380 s[0] = xb[i2]; 381 PetscKernel_v_gets_A_times_w_1(xw,idiag,s); 382 x[i2] = xw[0]; 383 i2 -= 1; 384 for (i=m-2; i>=0; i--) { 385 v = aa + (diag[i]+1); 386 vi = aj + diag[i] + 1; 387 nz = ai[i+1] - diag[i] - 1; 388 s[0] = xb[i2]; 389 for (j=0; j<nz; j++) { 390 xw[0] = x[vi[j]]; 391 PetscKernel_v_gets_v_minus_A_times_w_1(s,(v+j),xw); 392 } 393 PetscKernel_v_gets_A_times_w_1(xw,idiag,s); 394 x[i2] = xw[0]; 395 idiag -= 1; 396 i2 -= 1; 397 } 398 break; 399 case 2: 400 s[0] = xb[i2]; s[1] = xb[i2+1]; 401 PetscKernel_v_gets_A_times_w_2(xw,idiag,s); 402 x[i2] = xw[0]; x[i2+1] = xw[1]; 403 i2 -= 2; 404 idiag -= 4; 405 for (i=m-2; i>=0; i--) { 406 v = aa + 4*(diag[i] + 1); 407 vi = aj + diag[i] + 1; 408 nz = ai[i+1] - diag[i] - 1; 409 s[0] = xb[i2]; s[1] = xb[i2+1]; 410 for (j=0; j<nz; j++) { 411 idx = 2*vi[j]; 412 it = 4*j; 413 xw[0] = x[idx]; xw[1] = x[1+idx]; 414 PetscKernel_v_gets_v_minus_A_times_w_2(s,(v+it),xw); 415 } 416 PetscKernel_v_gets_A_times_w_2(xw,idiag,s); 417 x[i2] = xw[0]; x[i2+1] = xw[1]; 418 idiag -= 4; 419 i2 -= 2; 420 } 421 break; 422 case 3: 423 s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; 424 PetscKernel_v_gets_A_times_w_3(xw,idiag,s); 425 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; 426 i2 -= 3; 427 idiag -= 9; 428 for (i=m-2; i>=0; i--) { 429 v = aa + 9*(diag[i]+1); 430 vi = aj + diag[i] + 1; 431 nz = ai[i+1] - diag[i] - 1; 432 s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; 433 while (nz--) { 434 idx = 3*(*vi++); 435 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 436 PetscKernel_v_gets_v_minus_A_times_w_3(s,v,xw); 437 v += 9; 438 } 439 PetscKernel_v_gets_A_times_w_3(xw,idiag,s); 440 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; 441 idiag -= 9; 442 i2 -= 3; 443 } 444 break; 445 case 4: 446 s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; 447 PetscKernel_v_gets_A_times_w_4(xw,idiag,s); 448 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; 449 i2 -= 4; 450 idiag -= 16; 451 for (i=m-2; i>=0; i--) { 452 v = aa + 16*(diag[i]+1); 453 vi = aj + diag[i] + 1; 454 nz = ai[i+1] - diag[i] - 1; 455 s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; 456 while (nz--) { 457 idx = 4*(*vi++); 458 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; 459 PetscKernel_v_gets_v_minus_A_times_w_4(s,v,xw); 460 v += 16; 461 } 462 PetscKernel_v_gets_A_times_w_4(xw,idiag,s); 463 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; 464 idiag -= 16; 465 i2 -= 4; 466 } 467 break; 468 case 5: 469 s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; s[4] = xb[i2+4]; 470 PetscKernel_v_gets_A_times_w_5(xw,idiag,s); 471 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4]; 472 i2 -= 5; 473 idiag -= 25; 474 for (i=m-2; i>=0; i--) { 475 v = aa + 25*(diag[i]+1); 476 vi = aj + diag[i] + 1; 477 nz = ai[i+1] - diag[i] - 1; 478 s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; s[4] = xb[i2+4]; 479 while (nz--) { 480 idx = 5*(*vi++); 481 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; xw[4] = x[4+idx]; 482 PetscKernel_v_gets_v_minus_A_times_w_5(s,v,xw); 483 v += 25; 484 } 485 PetscKernel_v_gets_A_times_w_5(xw,idiag,s); 486 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4]; 487 idiag -= 25; 488 i2 -= 5; 489 } 490 break; 491 case 6: 492 s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; s[4] = xb[i2+4]; s[5] = xb[i2+5]; 493 PetscKernel_v_gets_A_times_w_6(xw,idiag,s); 494 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5]; 495 i2 -= 6; 496 idiag -= 36; 497 for (i=m-2; i>=0; i--) { 498 v = aa + 36*(diag[i]+1); 499 vi = aj + diag[i] + 1; 500 nz = ai[i+1] - diag[i] - 1; 501 s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; s[4] = xb[i2+4]; s[5] = xb[i2+5]; 502 while (nz--) { 503 idx = 6*(*vi++); 504 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 505 xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; 506 PetscKernel_v_gets_v_minus_A_times_w_6(s,v,xw); 507 v += 36; 508 } 509 PetscKernel_v_gets_A_times_w_6(xw,idiag,s); 510 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5]; 511 idiag -= 36; 512 i2 -= 6; 513 } 514 break; 515 case 7: 516 s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; 517 s[3] = xb[i2+3]; s[4] = xb[i2+4]; s[5] = xb[i2+5]; s[6] = xb[i2+6]; 518 PetscKernel_v_gets_A_times_w_7(x,idiag,b); 519 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; 520 x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5]; x[i2+6] = xw[6]; 521 i2 -= 7; 522 idiag -= 49; 523 for (i=m-2; i>=0; i--) { 524 v = aa + 49*(diag[i]+1); 525 vi = aj + diag[i] + 1; 526 nz = ai[i+1] - diag[i] - 1; 527 s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; 528 s[3] = xb[i2+3]; s[4] = xb[i2+4]; s[5] = xb[i2+5]; s[6] = xb[i2+6]; 529 while (nz--) { 530 idx = 7*(*vi++); 531 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 532 xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; xw[6] = x[6+idx]; 533 PetscKernel_v_gets_v_minus_A_times_w_7(s,v,xw); 534 v += 49; 535 } 536 PetscKernel_v_gets_A_times_w_7(xw,idiag,s); 537 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; 538 x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5]; x[i2+6] = xw[6]; 539 idiag -= 49; 540 i2 -= 7; 541 } 542 break; 543 default: 544 ierr = PetscMemcpy(w,xb+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr); 545 PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2); 546 i2 -= bs; 547 idiag -= bs2; 548 for (i=m-2; i>=0; i--) { 549 v = aa + bs2*(diag[i]+1); 550 vi = aj + diag[i] + 1; 551 nz = ai[i+1] - diag[i] - 1; 552 553 ierr = PetscMemcpy(w,xb+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr); 554 /* copy all rows of x that are needed into contiguous space */ 555 workt = work; 556 for (j=0; j<nz; j++) { 557 ierr = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr); 558 workt += bs; 559 } 560 PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work); 561 PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2); 562 563 idiag -= bs2; 564 i2 -= bs; 565 } 566 break; 567 } 568 ierr = PetscLogFlops(1.0*bs2*(a->nz));CHKERRQ(ierr); 569 } 570 its--; 571 } 572 while (its--) { 573 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) { 574 idiag = a->idiag; 575 i2 = 0; 576 switch (bs) { 577 case 1: 578 for (i=0; i<m; i++) { 579 v = aa + ai[i]; 580 vi = aj + ai[i]; 581 nz = ai[i+1] - ai[i]; 582 s[0] = b[i2]; 583 for (j=0; j<nz; j++) { 584 xw[0] = x[vi[j]]; 585 PetscKernel_v_gets_v_minus_A_times_w_1(s,(v+j),xw); 586 } 587 PetscKernel_v_gets_A_times_w_1(xw,idiag,s); 588 x[i2] += xw[0]; 589 idiag += 1; 590 i2 += 1; 591 } 592 break; 593 case 2: 594 for (i=0; i<m; i++) { 595 v = aa + 4*ai[i]; 596 vi = aj + ai[i]; 597 nz = ai[i+1] - ai[i]; 598 s[0] = b[i2]; s[1] = b[i2+1]; 599 for (j=0; j<nz; j++) { 600 idx = 2*vi[j]; 601 it = 4*j; 602 xw[0] = x[idx]; xw[1] = x[1+idx]; 603 PetscKernel_v_gets_v_minus_A_times_w_2(s,(v+it),xw); 604 } 605 PetscKernel_v_gets_A_times_w_2(xw,idiag,s); 606 x[i2] += xw[0]; x[i2+1] += xw[1]; 607 idiag += 4; 608 i2 += 2; 609 } 610 break; 611 case 3: 612 for (i=0; i<m; i++) { 613 v = aa + 9*ai[i]; 614 vi = aj + ai[i]; 615 nz = ai[i+1] - ai[i]; 616 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; 617 while (nz--) { 618 idx = 3*(*vi++); 619 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 620 PetscKernel_v_gets_v_minus_A_times_w_3(s,v,xw); 621 v += 9; 622 } 623 PetscKernel_v_gets_A_times_w_3(xw,idiag,s); 624 x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; 625 idiag += 9; 626 i2 += 3; 627 } 628 break; 629 case 4: 630 for (i=0; i<m; i++) { 631 v = aa + 16*ai[i]; 632 vi = aj + ai[i]; 633 nz = ai[i+1] - ai[i]; 634 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; 635 while (nz--) { 636 idx = 4*(*vi++); 637 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; 638 PetscKernel_v_gets_v_minus_A_times_w_4(s,v,xw); 639 v += 16; 640 } 641 PetscKernel_v_gets_A_times_w_4(xw,idiag,s); 642 x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; x[i2+3] += xw[3]; 643 idiag += 16; 644 i2 += 4; 645 } 646 break; 647 case 5: 648 for (i=0; i<m; i++) { 649 v = aa + 25*ai[i]; 650 vi = aj + ai[i]; 651 nz = ai[i+1] - ai[i]; 652 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4]; 653 while (nz--) { 654 idx = 5*(*vi++); 655 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; xw[4] = x[4+idx]; 656 PetscKernel_v_gets_v_minus_A_times_w_5(s,v,xw); 657 v += 25; 658 } 659 PetscKernel_v_gets_A_times_w_5(xw,idiag,s); 660 x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; x[i2+3] += xw[3]; x[i2+4] += xw[4]; 661 idiag += 25; 662 i2 += 5; 663 } 664 break; 665 case 6: 666 for (i=0; i<m; i++) { 667 v = aa + 36*ai[i]; 668 vi = aj + ai[i]; 669 nz = ai[i+1] - ai[i]; 670 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5]; 671 while (nz--) { 672 idx = 6*(*vi++); 673 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 674 xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; 675 PetscKernel_v_gets_v_minus_A_times_w_6(s,v,xw); 676 v += 36; 677 } 678 PetscKernel_v_gets_A_times_w_6(xw,idiag,s); 679 x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; 680 x[i2+3] += xw[3]; x[i2+4] += xw[4]; x[i2+5] += xw[5]; 681 idiag += 36; 682 i2 += 6; 683 } 684 break; 685 case 7: 686 for (i=0; i<m; i++) { 687 v = aa + 49*ai[i]; 688 vi = aj + ai[i]; 689 nz = ai[i+1] - ai[i]; 690 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; 691 s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5]; s[6] = b[i2+6]; 692 while (nz--) { 693 idx = 7*(*vi++); 694 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 695 xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; xw[6] = x[6+idx]; 696 PetscKernel_v_gets_v_minus_A_times_w_7(s,v,xw); 697 v += 49; 698 } 699 PetscKernel_v_gets_A_times_w_7(xw,idiag,s); 700 x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; 701 x[i2+3] += xw[3]; x[i2+4] += xw[4]; x[i2+5] += xw[5]; x[i2+6] += xw[6]; 702 idiag += 49; 703 i2 += 7; 704 } 705 break; 706 default: 707 for (i=0; i<m; i++) { 708 v = aa + bs2*ai[i]; 709 vi = aj + ai[i]; 710 nz = ai[i+1] - ai[i]; 711 712 ierr = PetscMemcpy(w,b+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr); 713 /* copy all rows of x that are needed into contiguous space */ 714 workt = work; 715 for (j=0; j<nz; j++) { 716 ierr = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr); 717 workt += bs; 718 } 719 PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work); 720 PetscKernel_w_gets_w_plus_Ar_times_v(bs,bs,w,idiag,x+i2); 721 722 idiag += bs2; 723 i2 += bs; 724 } 725 break; 726 } 727 ierr = PetscLogFlops(2.0*bs2*a->nz);CHKERRQ(ierr); 728 } 729 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 730 idiag = a->idiag+bs2*(a->mbs-1); 731 i2 = bs * (m-1); 732 switch (bs) { 733 case 1: 734 for (i=m-1; i>=0; i--) { 735 v = aa + ai[i]; 736 vi = aj + ai[i]; 737 nz = ai[i+1] - ai[i]; 738 s[0] = b[i2]; 739 for (j=0; j<nz; j++) { 740 xw[0] = x[vi[j]]; 741 PetscKernel_v_gets_v_minus_A_times_w_1(s,(v+j),xw); 742 } 743 PetscKernel_v_gets_A_times_w_1(xw,idiag,s); 744 x[i2] += xw[0]; 745 idiag -= 1; 746 i2 -= 1; 747 } 748 break; 749 case 2: 750 for (i=m-1; i>=0; i--) { 751 v = aa + 4*ai[i]; 752 vi = aj + ai[i]; 753 nz = ai[i+1] - ai[i]; 754 s[0] = b[i2]; s[1] = b[i2+1]; 755 for (j=0; j<nz; j++) { 756 idx = 2*vi[j]; 757 it = 4*j; 758 xw[0] = x[idx]; xw[1] = x[1+idx]; 759 PetscKernel_v_gets_v_minus_A_times_w_2(s,(v+it),xw); 760 } 761 PetscKernel_v_gets_A_times_w_2(xw,idiag,s); 762 x[i2] += xw[0]; x[i2+1] += xw[1]; 763 idiag -= 4; 764 i2 -= 2; 765 } 766 break; 767 case 3: 768 for (i=m-1; i>=0; i--) { 769 v = aa + 9*ai[i]; 770 vi = aj + ai[i]; 771 nz = ai[i+1] - ai[i]; 772 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; 773 while (nz--) { 774 idx = 3*(*vi++); 775 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 776 PetscKernel_v_gets_v_minus_A_times_w_3(s,v,xw); 777 v += 9; 778 } 779 PetscKernel_v_gets_A_times_w_3(xw,idiag,s); 780 x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; 781 idiag -= 9; 782 i2 -= 3; 783 } 784 break; 785 case 4: 786 for (i=m-1; i>=0; i--) { 787 v = aa + 16*ai[i]; 788 vi = aj + ai[i]; 789 nz = ai[i+1] - ai[i]; 790 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; 791 while (nz--) { 792 idx = 4*(*vi++); 793 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; 794 PetscKernel_v_gets_v_minus_A_times_w_4(s,v,xw); 795 v += 16; 796 } 797 PetscKernel_v_gets_A_times_w_4(xw,idiag,s); 798 x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; x[i2+3] += xw[3]; 799 idiag -= 16; 800 i2 -= 4; 801 } 802 break; 803 case 5: 804 for (i=m-1; i>=0; i--) { 805 v = aa + 25*ai[i]; 806 vi = aj + ai[i]; 807 nz = ai[i+1] - ai[i]; 808 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4]; 809 while (nz--) { 810 idx = 5*(*vi++); 811 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; xw[4] = x[4+idx]; 812 PetscKernel_v_gets_v_minus_A_times_w_5(s,v,xw); 813 v += 25; 814 } 815 PetscKernel_v_gets_A_times_w_5(xw,idiag,s); 816 x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; x[i2+3] += xw[3]; x[i2+4] += xw[4]; 817 idiag -= 25; 818 i2 -= 5; 819 } 820 break; 821 case 6: 822 for (i=m-1; i>=0; i--) { 823 v = aa + 36*ai[i]; 824 vi = aj + ai[i]; 825 nz = ai[i+1] - ai[i]; 826 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5]; 827 while (nz--) { 828 idx = 6*(*vi++); 829 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 830 xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; 831 PetscKernel_v_gets_v_minus_A_times_w_6(s,v,xw); 832 v += 36; 833 } 834 PetscKernel_v_gets_A_times_w_6(xw,idiag,s); 835 x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; 836 x[i2+3] += xw[3]; x[i2+4] += xw[4]; x[i2+5] += xw[5]; 837 idiag -= 36; 838 i2 -= 6; 839 } 840 break; 841 case 7: 842 for (i=m-1; i>=0; i--) { 843 v = aa + 49*ai[i]; 844 vi = aj + ai[i]; 845 nz = ai[i+1] - ai[i]; 846 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; 847 s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5]; s[6] = b[i2+6]; 848 while (nz--) { 849 idx = 7*(*vi++); 850 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 851 xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; xw[6] = x[6+idx]; 852 PetscKernel_v_gets_v_minus_A_times_w_7(s,v,xw); 853 v += 49; 854 } 855 PetscKernel_v_gets_A_times_w_7(xw,idiag,s); 856 x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; 857 x[i2+3] += xw[3]; x[i2+4] += xw[4]; x[i2+5] += xw[5]; x[i2+6] += xw[6]; 858 idiag -= 49; 859 i2 -= 7; 860 } 861 break; 862 default: 863 for (i=m-1; i>=0; i--) { 864 v = aa + bs2*ai[i]; 865 vi = aj + ai[i]; 866 nz = ai[i+1] - ai[i]; 867 868 ierr = PetscMemcpy(w,b+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr); 869 /* copy all rows of x that are needed into contiguous space */ 870 workt = work; 871 for (j=0; j<nz; j++) { 872 ierr = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr); 873 workt += bs; 874 } 875 PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work); 876 PetscKernel_w_gets_w_plus_Ar_times_v(bs,bs,w,idiag,x+i2); 877 878 idiag -= bs2; 879 i2 -= bs; 880 } 881 break; 882 } 883 ierr = PetscLogFlops(2.0*bs2*(a->nz));CHKERRQ(ierr); 884 } 885 } 886 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 887 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 888 PetscFunctionReturn(0); 889 } 890 891 892 /* 893 Special version for direct calls from Fortran (Used in PETSc-fun3d) 894 */ 895 #if defined(PETSC_HAVE_FORTRAN_CAPS) 896 #define matsetvaluesblocked4_ MATSETVALUESBLOCKED4 897 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 898 #define matsetvaluesblocked4_ matsetvaluesblocked4 899 #endif 900 901 PETSC_EXTERN void matsetvaluesblocked4_(Mat *AA,PetscInt *mm,const PetscInt im[],PetscInt *nn,const PetscInt in[],const PetscScalar v[]) 902 { 903 Mat A = *AA; 904 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 905 PetscInt *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,N,m = *mm,n = *nn; 906 PetscInt *ai =a->i,*ailen=a->ilen; 907 PetscInt *aj =a->j,stepval,lastcol = -1; 908 const PetscScalar *value = v; 909 MatScalar *ap,*aa = a->a,*bap; 910 911 PetscFunctionBegin; 912 if (A->rmap->bs != 4) SETERRABORT(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Can only be called with a block size of 4"); 913 stepval = (n-1)*4; 914 for (k=0; k<m; k++) { /* loop over added rows */ 915 row = im[k]; 916 rp = aj + ai[row]; 917 ap = aa + 16*ai[row]; 918 nrow = ailen[row]; 919 low = 0; 920 high = nrow; 921 for (l=0; l<n; l++) { /* loop over added columns */ 922 col = in[l]; 923 if (col <= lastcol) low = 0; 924 else high = nrow; 925 lastcol = col; 926 value = v + k*(stepval+4 + l)*4; 927 while (high-low > 7) { 928 t = (low+high)/2; 929 if (rp[t] > col) high = t; 930 else low = t; 931 } 932 for (i=low; i<high; i++) { 933 if (rp[i] > col) break; 934 if (rp[i] == col) { 935 bap = ap + 16*i; 936 for (ii=0; ii<4; ii++,value+=stepval) { 937 for (jj=ii; jj<16; jj+=4) { 938 bap[jj] += *value++; 939 } 940 } 941 goto noinsert2; 942 } 943 } 944 N = nrow++ - 1; 945 high++; /* added new column index thus must search to one higher than before */ 946 /* shift up all the later entries in this row */ 947 for (ii=N; ii>=i; ii--) { 948 rp[ii+1] = rp[ii]; 949 PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar)); 950 } 951 if (N >= i) { 952 PetscMemzero(ap+16*i,16*sizeof(MatScalar)); 953 } 954 rp[i] = col; 955 bap = ap + 16*i; 956 for (ii=0; ii<4; ii++,value+=stepval) { 957 for (jj=ii; jj<16; jj+=4) { 958 bap[jj] = *value++; 959 } 960 } 961 noinsert2:; 962 low = i; 963 } 964 ailen[row] = nrow; 965 } 966 PetscFunctionReturnVoid(); 967 } 968 969 #if defined(PETSC_HAVE_FORTRAN_CAPS) 970 #define matsetvalues4_ MATSETVALUES4 971 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 972 #define matsetvalues4_ matsetvalues4 973 #endif 974 975 PETSC_EXTERN void matsetvalues4_(Mat *AA,PetscInt *mm,PetscInt *im,PetscInt *nn,PetscInt *in,PetscScalar *v) 976 { 977 Mat A = *AA; 978 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 979 PetscInt *rp,k,low,high,t,ii,row,nrow,i,col,l,N,n = *nn,m = *mm; 980 PetscInt *ai=a->i,*ailen=a->ilen; 981 PetscInt *aj=a->j,brow,bcol; 982 PetscInt ridx,cidx,lastcol = -1; 983 MatScalar *ap,value,*aa=a->a,*bap; 984 985 PetscFunctionBegin; 986 for (k=0; k<m; k++) { /* loop over added rows */ 987 row = im[k]; brow = row/4; 988 rp = aj + ai[brow]; 989 ap = aa + 16*ai[brow]; 990 nrow = ailen[brow]; 991 low = 0; 992 high = nrow; 993 for (l=0; l<n; l++) { /* loop over added columns */ 994 col = in[l]; bcol = col/4; 995 ridx = row % 4; cidx = col % 4; 996 value = v[l + k*n]; 997 if (col <= lastcol) low = 0; 998 else high = nrow; 999 lastcol = col; 1000 while (high-low > 7) { 1001 t = (low+high)/2; 1002 if (rp[t] > bcol) high = t; 1003 else low = t; 1004 } 1005 for (i=low; i<high; i++) { 1006 if (rp[i] > bcol) break; 1007 if (rp[i] == bcol) { 1008 bap = ap + 16*i + 4*cidx + ridx; 1009 *bap += value; 1010 goto noinsert1; 1011 } 1012 } 1013 N = nrow++ - 1; 1014 high++; /* added new column thus must search to one higher than before */ 1015 /* shift up all the later entries in this row */ 1016 for (ii=N; ii>=i; ii--) { 1017 rp[ii+1] = rp[ii]; 1018 PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar)); 1019 } 1020 if (N>=i) { 1021 PetscMemzero(ap+16*i,16*sizeof(MatScalar)); 1022 } 1023 rp[i] = bcol; 1024 ap[16*i + 4*cidx + ridx] = value; 1025 noinsert1:; 1026 low = i; 1027 } 1028 ailen[brow] = nrow; 1029 } 1030 PetscFunctionReturnVoid(); 1031 } 1032 1033 /* 1034 Checks for missing diagonals 1035 */ 1036 PetscErrorCode MatMissingDiagonal_SeqBAIJ(Mat A,PetscBool *missing,PetscInt *d) 1037 { 1038 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1039 PetscErrorCode ierr; 1040 PetscInt *diag,*ii = a->i,i; 1041 1042 PetscFunctionBegin; 1043 ierr = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr); 1044 *missing = PETSC_FALSE; 1045 if (A->rmap->n > 0 && !ii) { 1046 *missing = PETSC_TRUE; 1047 if (d) *d = 0; 1048 PetscInfo(A,"Matrix has no entries therefore is missing diagonal\n"); 1049 } else { 1050 diag = a->diag; 1051 for (i=0; i<a->mbs; i++) { 1052 if (diag[i] >= ii[i+1]) { 1053 *missing = PETSC_TRUE; 1054 if (d) *d = i; 1055 PetscInfo1(A,"Matrix is missing block diagonal number %D\n",i); 1056 break; 1057 } 1058 } 1059 } 1060 PetscFunctionReturn(0); 1061 } 1062 1063 PetscErrorCode MatMarkDiagonal_SeqBAIJ(Mat A) 1064 { 1065 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1066 PetscErrorCode ierr; 1067 PetscInt i,j,m = a->mbs; 1068 1069 PetscFunctionBegin; 1070 if (!a->diag) { 1071 ierr = PetscMalloc1(m,&a->diag);CHKERRQ(ierr); 1072 ierr = PetscLogObjectMemory((PetscObject)A,m*sizeof(PetscInt));CHKERRQ(ierr); 1073 a->free_diag = PETSC_TRUE; 1074 } 1075 for (i=0; i<m; i++) { 1076 a->diag[i] = a->i[i+1]; 1077 for (j=a->i[i]; j<a->i[i+1]; j++) { 1078 if (a->j[j] == i) { 1079 a->diag[i] = j; 1080 break; 1081 } 1082 } 1083 } 1084 PetscFunctionReturn(0); 1085 } 1086 1087 1088 static PetscErrorCode MatGetRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool blockcompressed,PetscInt *nn,const PetscInt *inia[],const PetscInt *inja[],PetscBool *done) 1089 { 1090 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1091 PetscErrorCode ierr; 1092 PetscInt i,j,n = a->mbs,nz = a->i[n],*tia,*tja,bs = A->rmap->bs,k,l,cnt; 1093 PetscInt **ia = (PetscInt**)inia,**ja = (PetscInt**)inja; 1094 1095 PetscFunctionBegin; 1096 *nn = n; 1097 if (!ia) PetscFunctionReturn(0); 1098 if (symmetric) { 1099 ierr = MatToSymmetricIJ_SeqAIJ(n,a->i,a->j,PETSC_TRUE,0,0,&tia,&tja);CHKERRQ(ierr); 1100 nz = tia[n]; 1101 } else { 1102 tia = a->i; tja = a->j; 1103 } 1104 1105 if (!blockcompressed && bs > 1) { 1106 (*nn) *= bs; 1107 /* malloc & create the natural set of indices */ 1108 ierr = PetscMalloc1((n+1)*bs,ia);CHKERRQ(ierr); 1109 if (n) { 1110 (*ia)[0] = oshift; 1111 for (j=1; j<bs; j++) { 1112 (*ia)[j] = (tia[1]-tia[0])*bs+(*ia)[j-1]; 1113 } 1114 } 1115 1116 for (i=1; i<n; i++) { 1117 (*ia)[i*bs] = (tia[i]-tia[i-1])*bs + (*ia)[i*bs-1]; 1118 for (j=1; j<bs; j++) { 1119 (*ia)[i*bs+j] = (tia[i+1]-tia[i])*bs + (*ia)[i*bs+j-1]; 1120 } 1121 } 1122 if (n) { 1123 (*ia)[n*bs] = (tia[n]-tia[n-1])*bs + (*ia)[n*bs-1]; 1124 } 1125 1126 if (inja) { 1127 ierr = PetscMalloc1(nz*bs*bs,ja);CHKERRQ(ierr); 1128 cnt = 0; 1129 for (i=0; i<n; i++) { 1130 for (j=0; j<bs; j++) { 1131 for (k=tia[i]; k<tia[i+1]; k++) { 1132 for (l=0; l<bs; l++) { 1133 (*ja)[cnt++] = bs*tja[k] + l; 1134 } 1135 } 1136 } 1137 } 1138 } 1139 1140 if (symmetric) { /* deallocate memory allocated in MatToSymmetricIJ_SeqAIJ() */ 1141 ierr = PetscFree(tia);CHKERRQ(ierr); 1142 ierr = PetscFree(tja);CHKERRQ(ierr); 1143 } 1144 } else if (oshift == 1) { 1145 if (symmetric) { 1146 nz = tia[A->rmap->n/bs]; 1147 /* add 1 to i and j indices */ 1148 for (i=0; i<A->rmap->n/bs+1; i++) tia[i] = tia[i] + 1; 1149 *ia = tia; 1150 if (ja) { 1151 for (i=0; i<nz; i++) tja[i] = tja[i] + 1; 1152 *ja = tja; 1153 } 1154 } else { 1155 nz = a->i[A->rmap->n/bs]; 1156 /* malloc space and add 1 to i and j indices */ 1157 ierr = PetscMalloc1(A->rmap->n/bs+1,ia);CHKERRQ(ierr); 1158 for (i=0; i<A->rmap->n/bs+1; i++) (*ia)[i] = a->i[i] + 1; 1159 if (ja) { 1160 ierr = PetscMalloc1(nz,ja);CHKERRQ(ierr); 1161 for (i=0; i<nz; i++) (*ja)[i] = a->j[i] + 1; 1162 } 1163 } 1164 } else { 1165 *ia = tia; 1166 if (ja) *ja = tja; 1167 } 1168 PetscFunctionReturn(0); 1169 } 1170 1171 static PetscErrorCode MatRestoreRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool blockcompressed,PetscInt *nn,const PetscInt *ia[],const PetscInt *ja[],PetscBool *done) 1172 { 1173 PetscErrorCode ierr; 1174 1175 PetscFunctionBegin; 1176 if (!ia) PetscFunctionReturn(0); 1177 if ((!blockcompressed && A->rmap->bs > 1) || (symmetric || oshift == 1)) { 1178 ierr = PetscFree(*ia);CHKERRQ(ierr); 1179 if (ja) {ierr = PetscFree(*ja);CHKERRQ(ierr);} 1180 } 1181 PetscFunctionReturn(0); 1182 } 1183 1184 PetscErrorCode MatDestroy_SeqBAIJ(Mat A) 1185 { 1186 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1187 PetscErrorCode ierr; 1188 1189 PetscFunctionBegin; 1190 #if defined(PETSC_USE_LOG) 1191 PetscLogObjectState((PetscObject)A,"Rows=%D, Cols=%D, NZ=%D",A->rmap->N,A->cmap->n,a->nz); 1192 #endif 1193 ierr = MatSeqXAIJFreeAIJ(A,&a->a,&a->j,&a->i);CHKERRQ(ierr); 1194 ierr = ISDestroy(&a->row);CHKERRQ(ierr); 1195 ierr = ISDestroy(&a->col);CHKERRQ(ierr); 1196 if (a->free_diag) {ierr = PetscFree(a->diag);CHKERRQ(ierr);} 1197 ierr = PetscFree(a->idiag);CHKERRQ(ierr); 1198 if (a->free_imax_ilen) {ierr = PetscFree2(a->imax,a->ilen);CHKERRQ(ierr);} 1199 ierr = PetscFree(a->solve_work);CHKERRQ(ierr); 1200 ierr = PetscFree(a->mult_work);CHKERRQ(ierr); 1201 ierr = PetscFree(a->sor_workt);CHKERRQ(ierr); 1202 ierr = PetscFree(a->sor_work);CHKERRQ(ierr); 1203 ierr = ISDestroy(&a->icol);CHKERRQ(ierr); 1204 ierr = PetscFree(a->saved_values);CHKERRQ(ierr); 1205 ierr = PetscFree2(a->compressedrow.i,a->compressedrow.rindex);CHKERRQ(ierr); 1206 1207 ierr = MatDestroy(&a->sbaijMat);CHKERRQ(ierr); 1208 ierr = MatDestroy(&a->parent);CHKERRQ(ierr); 1209 ierr = PetscFree(A->data);CHKERRQ(ierr); 1210 1211 ierr = PetscObjectChangeTypeName((PetscObject)A,0);CHKERRQ(ierr); 1212 ierr = PetscObjectComposeFunction((PetscObject)A,"MatInvertBlockDiagonal_C",NULL);CHKERRQ(ierr); 1213 ierr = PetscObjectComposeFunction((PetscObject)A,"MatStoreValues_C",NULL);CHKERRQ(ierr); 1214 ierr = PetscObjectComposeFunction((PetscObject)A,"MatRetrieveValues_C",NULL);CHKERRQ(ierr); 1215 ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetColumnIndices_C",NULL);CHKERRQ(ierr); 1216 ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqaij_C",NULL);CHKERRQ(ierr); 1217 ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqsbaij_C",NULL);CHKERRQ(ierr); 1218 ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetPreallocation_C",NULL);CHKERRQ(ierr); 1219 ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetPreallocationCSR_C",NULL);CHKERRQ(ierr); 1220 ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqbstrm_C",NULL);CHKERRQ(ierr); 1221 ierr = PetscObjectComposeFunction((PetscObject)A,"MatIsTranspose_C",NULL);CHKERRQ(ierr); 1222 #if defined(PETSC_HAVE_HYPRE) 1223 ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_mpiaij_hypre_C",NULL);CHKERRQ(ierr); 1224 #endif 1225 PetscFunctionReturn(0); 1226 } 1227 1228 PetscErrorCode MatSetOption_SeqBAIJ(Mat A,MatOption op,PetscBool flg) 1229 { 1230 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1231 PetscErrorCode ierr; 1232 1233 PetscFunctionBegin; 1234 switch (op) { 1235 case MAT_ROW_ORIENTED: 1236 a->roworiented = flg; 1237 break; 1238 case MAT_KEEP_NONZERO_PATTERN: 1239 a->keepnonzeropattern = flg; 1240 break; 1241 case MAT_NEW_NONZERO_LOCATIONS: 1242 a->nonew = (flg ? 0 : 1); 1243 break; 1244 case MAT_NEW_NONZERO_LOCATION_ERR: 1245 a->nonew = (flg ? -1 : 0); 1246 break; 1247 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1248 a->nonew = (flg ? -2 : 0); 1249 break; 1250 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1251 a->nounused = (flg ? -1 : 0); 1252 break; 1253 case MAT_NEW_DIAGONALS: 1254 case MAT_IGNORE_OFF_PROC_ENTRIES: 1255 case MAT_USE_HASH_TABLE: 1256 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1257 break; 1258 case MAT_SPD: 1259 case MAT_SYMMETRIC: 1260 case MAT_STRUCTURALLY_SYMMETRIC: 1261 case MAT_HERMITIAN: 1262 case MAT_SYMMETRY_ETERNAL: 1263 case MAT_SUBMAT_SINGLEIS: 1264 case MAT_STRUCTURE_ONLY: 1265 /* These options are handled directly by MatSetOption() */ 1266 break; 1267 default: 1268 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op); 1269 } 1270 PetscFunctionReturn(0); 1271 } 1272 1273 /* used for both SeqBAIJ and SeqSBAIJ matrices */ 1274 PetscErrorCode MatGetRow_SeqBAIJ_private(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v,PetscInt *ai,PetscInt *aj,PetscScalar *aa) 1275 { 1276 PetscErrorCode ierr; 1277 PetscInt itmp,i,j,k,M,bn,bp,*idx_i,bs,bs2; 1278 MatScalar *aa_i; 1279 PetscScalar *v_i; 1280 1281 PetscFunctionBegin; 1282 bs = A->rmap->bs; 1283 bs2 = bs*bs; 1284 if (row < 0 || row >= A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range", row); 1285 1286 bn = row/bs; /* Block number */ 1287 bp = row % bs; /* Block Position */ 1288 M = ai[bn+1] - ai[bn]; 1289 *nz = bs*M; 1290 1291 if (v) { 1292 *v = 0; 1293 if (*nz) { 1294 ierr = PetscMalloc1(*nz,v);CHKERRQ(ierr); 1295 for (i=0; i<M; i++) { /* for each block in the block row */ 1296 v_i = *v + i*bs; 1297 aa_i = aa + bs2*(ai[bn] + i); 1298 for (j=bp,k=0; j<bs2; j+=bs,k++) v_i[k] = aa_i[j]; 1299 } 1300 } 1301 } 1302 1303 if (idx) { 1304 *idx = 0; 1305 if (*nz) { 1306 ierr = PetscMalloc1(*nz,idx);CHKERRQ(ierr); 1307 for (i=0; i<M; i++) { /* for each block in the block row */ 1308 idx_i = *idx + i*bs; 1309 itmp = bs*aj[ai[bn] + i]; 1310 for (j=0; j<bs; j++) idx_i[j] = itmp++; 1311 } 1312 } 1313 } 1314 PetscFunctionReturn(0); 1315 } 1316 1317 PetscErrorCode MatGetRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1318 { 1319 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1320 PetscErrorCode ierr; 1321 1322 PetscFunctionBegin; 1323 ierr = MatGetRow_SeqBAIJ_private(A,row,nz,idx,v,a->i,a->j,a->a);CHKERRQ(ierr); 1324 PetscFunctionReturn(0); 1325 } 1326 1327 PetscErrorCode MatRestoreRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1328 { 1329 PetscErrorCode ierr; 1330 1331 PetscFunctionBegin; 1332 if (idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);} 1333 if (v) {ierr = PetscFree(*v);CHKERRQ(ierr);} 1334 PetscFunctionReturn(0); 1335 } 1336 1337 PetscErrorCode MatTranspose_SeqBAIJ(Mat A,MatReuse reuse,Mat *B) 1338 { 1339 Mat_SeqBAIJ *a=(Mat_SeqBAIJ*)A->data; 1340 Mat C; 1341 PetscErrorCode ierr; 1342 PetscInt i,j,k,*aj=a->j,*ai=a->i,bs=A->rmap->bs,mbs=a->mbs,nbs=a->nbs,len,*col; 1343 PetscInt *rows,*cols,bs2=a->bs2; 1344 MatScalar *array; 1345 1346 PetscFunctionBegin; 1347 if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_INPLACE_MATRIX) { 1348 ierr = PetscCalloc1(1+nbs,&col);CHKERRQ(ierr); 1349 1350 for (i=0; i<ai[mbs]; i++) col[aj[i]] += 1; 1351 ierr = MatCreate(PetscObjectComm((PetscObject)A),&C);CHKERRQ(ierr); 1352 ierr = MatSetSizes(C,A->cmap->n,A->rmap->N,A->cmap->n,A->rmap->N);CHKERRQ(ierr); 1353 ierr = MatSetType(C,((PetscObject)A)->type_name);CHKERRQ(ierr); 1354 ierr = MatSeqBAIJSetPreallocation(C,bs,0,col);CHKERRQ(ierr); 1355 ierr = PetscFree(col);CHKERRQ(ierr); 1356 } else { 1357 C = *B; 1358 } 1359 1360 array = a->a; 1361 ierr = PetscMalloc2(bs,&rows,bs,&cols);CHKERRQ(ierr); 1362 for (i=0; i<mbs; i++) { 1363 cols[0] = i*bs; 1364 for (k=1; k<bs; k++) cols[k] = cols[k-1] + 1; 1365 len = ai[i+1] - ai[i]; 1366 for (j=0; j<len; j++) { 1367 rows[0] = (*aj++)*bs; 1368 for (k=1; k<bs; k++) rows[k] = rows[k-1] + 1; 1369 ierr = MatSetValues_SeqBAIJ(C,bs,rows,bs,cols,array,INSERT_VALUES);CHKERRQ(ierr); 1370 array += bs2; 1371 } 1372 } 1373 ierr = PetscFree2(rows,cols);CHKERRQ(ierr); 1374 1375 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1376 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1377 1378 if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) { 1379 *B = C; 1380 } else { 1381 ierr = MatHeaderMerge(A,&C);CHKERRQ(ierr); 1382 } 1383 PetscFunctionReturn(0); 1384 } 1385 1386 PetscErrorCode MatIsTranspose_SeqBAIJ(Mat A,Mat B,PetscReal tol,PetscBool *f) 1387 { 1388 PetscErrorCode ierr; 1389 Mat Btrans; 1390 1391 PetscFunctionBegin; 1392 *f = PETSC_FALSE; 1393 ierr = MatTranspose_SeqBAIJ(A,MAT_INITIAL_MATRIX,&Btrans);CHKERRQ(ierr); 1394 ierr = MatEqual_SeqBAIJ(B,Btrans,f);CHKERRQ(ierr); 1395 ierr = MatDestroy(&Btrans);CHKERRQ(ierr); 1396 PetscFunctionReturn(0); 1397 } 1398 1399 static PetscErrorCode MatView_SeqBAIJ_Binary(Mat A,PetscViewer viewer) 1400 { 1401 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1402 PetscErrorCode ierr; 1403 PetscInt i,*col_lens,bs = A->rmap->bs,count,*jj,j,k,l,bs2=a->bs2; 1404 int fd; 1405 PetscScalar *aa; 1406 FILE *file; 1407 1408 PetscFunctionBegin; 1409 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1410 ierr = PetscMalloc1(4+A->rmap->N,&col_lens);CHKERRQ(ierr); 1411 col_lens[0] = MAT_FILE_CLASSID; 1412 1413 col_lens[1] = A->rmap->N; 1414 col_lens[2] = A->cmap->n; 1415 col_lens[3] = a->nz*bs2; 1416 1417 /* store lengths of each row and write (including header) to file */ 1418 count = 0; 1419 for (i=0; i<a->mbs; i++) { 1420 for (j=0; j<bs; j++) { 1421 col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]); 1422 } 1423 } 1424 ierr = PetscBinaryWrite(fd,col_lens,4+A->rmap->N,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1425 ierr = PetscFree(col_lens);CHKERRQ(ierr); 1426 1427 /* store column indices (zero start index) */ 1428 ierr = PetscMalloc1((a->nz+1)*bs2,&jj);CHKERRQ(ierr); 1429 count = 0; 1430 for (i=0; i<a->mbs; i++) { 1431 for (j=0; j<bs; j++) { 1432 for (k=a->i[i]; k<a->i[i+1]; k++) { 1433 for (l=0; l<bs; l++) { 1434 jj[count++] = bs*a->j[k] + l; 1435 } 1436 } 1437 } 1438 } 1439 ierr = PetscBinaryWrite(fd,jj,bs2*a->nz,PETSC_INT,PETSC_FALSE);CHKERRQ(ierr); 1440 ierr = PetscFree(jj);CHKERRQ(ierr); 1441 1442 /* store nonzero values */ 1443 ierr = PetscMalloc1((a->nz+1)*bs2,&aa);CHKERRQ(ierr); 1444 count = 0; 1445 for (i=0; i<a->mbs; i++) { 1446 for (j=0; j<bs; j++) { 1447 for (k=a->i[i]; k<a->i[i+1]; k++) { 1448 for (l=0; l<bs; l++) { 1449 aa[count++] = a->a[bs2*k + l*bs + j]; 1450 } 1451 } 1452 } 1453 } 1454 ierr = PetscBinaryWrite(fd,aa,bs2*a->nz,PETSC_SCALAR,PETSC_FALSE);CHKERRQ(ierr); 1455 ierr = PetscFree(aa);CHKERRQ(ierr); 1456 1457 ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr); 1458 if (file) { 1459 fprintf(file,"-matload_block_size %d\n",(int)A->rmap->bs); 1460 } 1461 PetscFunctionReturn(0); 1462 } 1463 1464 static PetscErrorCode MatView_SeqBAIJ_ASCII_structonly(Mat A,PetscViewer viewer) 1465 { 1466 PetscErrorCode ierr; 1467 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1468 PetscInt i,bs = A->rmap->bs,k; 1469 1470 PetscFunctionBegin; 1471 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);CHKERRQ(ierr); 1472 for (i=0; i<a->mbs; i++) { 1473 ierr = PetscViewerASCIIPrintf(viewer,"row %D-%D:",i*bs,i*bs+bs-1);CHKERRQ(ierr); 1474 for (k=a->i[i]; k<a->i[i+1]; k++) { 1475 ierr = PetscViewerASCIIPrintf(viewer," (%D-%D) ",bs*a->j[k],bs*a->j[k]+bs-1);CHKERRQ(ierr); 1476 } 1477 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 1478 } 1479 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);CHKERRQ(ierr); 1480 PetscFunctionReturn(0); 1481 } 1482 1483 static PetscErrorCode MatView_SeqBAIJ_ASCII(Mat A,PetscViewer viewer) 1484 { 1485 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1486 PetscErrorCode ierr; 1487 PetscInt i,j,bs = A->rmap->bs,k,l,bs2=a->bs2; 1488 PetscViewerFormat format; 1489 1490 PetscFunctionBegin; 1491 if (A->structure_only) { 1492 ierr = MatView_SeqBAIJ_ASCII_structonly(A,viewer);CHKERRQ(ierr); 1493 PetscFunctionReturn(0); 1494 } 1495 1496 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1497 if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1498 ierr = PetscViewerASCIIPrintf(viewer," block size is %D\n",bs);CHKERRQ(ierr); 1499 } else if (format == PETSC_VIEWER_ASCII_MATLAB) { 1500 const char *matname; 1501 Mat aij; 1502 ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&aij);CHKERRQ(ierr); 1503 ierr = PetscObjectGetName((PetscObject)A,&matname);CHKERRQ(ierr); 1504 ierr = PetscObjectSetName((PetscObject)aij,matname);CHKERRQ(ierr); 1505 ierr = MatView(aij,viewer);CHKERRQ(ierr); 1506 ierr = MatDestroy(&aij);CHKERRQ(ierr); 1507 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1508 PetscFunctionReturn(0); 1509 } else if (format == PETSC_VIEWER_ASCII_COMMON) { 1510 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);CHKERRQ(ierr); 1511 for (i=0; i<a->mbs; i++) { 1512 for (j=0; j<bs; j++) { 1513 ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr); 1514 for (k=a->i[i]; k<a->i[i+1]; k++) { 1515 for (l=0; l<bs; l++) { 1516 #if defined(PETSC_USE_COMPLEX) 1517 if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 1518 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g + %gi) ",bs*a->j[k]+l, 1519 (double)PetscRealPart(a->a[bs2*k + l*bs + j]),(double)PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1520 } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 1521 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g - %gi) ",bs*a->j[k]+l, 1522 (double)PetscRealPart(a->a[bs2*k + l*bs + j]),-(double)PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1523 } else if (PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 1524 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,(double)PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1525 } 1526 #else 1527 if (a->a[bs2*k + l*bs + j] != 0.0) { 1528 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,(double)a->a[bs2*k + l*bs + j]);CHKERRQ(ierr); 1529 } 1530 #endif 1531 } 1532 } 1533 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 1534 } 1535 } 1536 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);CHKERRQ(ierr); 1537 } else { 1538 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);CHKERRQ(ierr); 1539 for (i=0; i<a->mbs; i++) { 1540 for (j=0; j<bs; j++) { 1541 ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr); 1542 for (k=a->i[i]; k<a->i[i+1]; k++) { 1543 for (l=0; l<bs; l++) { 1544 #if defined(PETSC_USE_COMPLEX) 1545 if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0) { 1546 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g + %g i) ",bs*a->j[k]+l, 1547 (double)PetscRealPart(a->a[bs2*k + l*bs + j]),(double)PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1548 } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0) { 1549 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g - %g i) ",bs*a->j[k]+l, 1550 (double)PetscRealPart(a->a[bs2*k + l*bs + j]),-(double)PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1551 } else { 1552 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,(double)PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1553 } 1554 #else 1555 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,(double)a->a[bs2*k + l*bs + j]);CHKERRQ(ierr); 1556 #endif 1557 } 1558 } 1559 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 1560 } 1561 } 1562 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);CHKERRQ(ierr); 1563 } 1564 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1565 PetscFunctionReturn(0); 1566 } 1567 1568 #include <petscdraw.h> 1569 static PetscErrorCode MatView_SeqBAIJ_Draw_Zoom(PetscDraw draw,void *Aa) 1570 { 1571 Mat A = (Mat) Aa; 1572 Mat_SeqBAIJ *a=(Mat_SeqBAIJ*)A->data; 1573 PetscErrorCode ierr; 1574 PetscInt row,i,j,k,l,mbs=a->mbs,color,bs=A->rmap->bs,bs2=a->bs2; 1575 PetscReal xl,yl,xr,yr,x_l,x_r,y_l,y_r; 1576 MatScalar *aa; 1577 PetscViewer viewer; 1578 PetscViewerFormat format; 1579 1580 PetscFunctionBegin; 1581 ierr = PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer);CHKERRQ(ierr); 1582 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1583 ierr = PetscDrawGetCoordinates(draw,&xl,&yl,&xr,&yr);CHKERRQ(ierr); 1584 1585 /* loop over matrix elements drawing boxes */ 1586 1587 if (format != PETSC_VIEWER_DRAW_CONTOUR) { 1588 ierr = PetscDrawCollectiveBegin(draw);CHKERRQ(ierr); 1589 /* Blue for negative, Cyan for zero and Red for positive */ 1590 color = PETSC_DRAW_BLUE; 1591 for (i=0,row=0; i<mbs; i++,row+=bs) { 1592 for (j=a->i[i]; j<a->i[i+1]; j++) { 1593 y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0; 1594 x_l = a->j[j]*bs; x_r = x_l + 1.0; 1595 aa = a->a + j*bs2; 1596 for (k=0; k<bs; k++) { 1597 for (l=0; l<bs; l++) { 1598 if (PetscRealPart(*aa++) >= 0.) continue; 1599 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 1600 } 1601 } 1602 } 1603 } 1604 color = PETSC_DRAW_CYAN; 1605 for (i=0,row=0; i<mbs; i++,row+=bs) { 1606 for (j=a->i[i]; j<a->i[i+1]; j++) { 1607 y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0; 1608 x_l = a->j[j]*bs; x_r = x_l + 1.0; 1609 aa = a->a + j*bs2; 1610 for (k=0; k<bs; k++) { 1611 for (l=0; l<bs; l++) { 1612 if (PetscRealPart(*aa++) != 0.) continue; 1613 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 1614 } 1615 } 1616 } 1617 } 1618 color = PETSC_DRAW_RED; 1619 for (i=0,row=0; i<mbs; i++,row+=bs) { 1620 for (j=a->i[i]; j<a->i[i+1]; j++) { 1621 y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0; 1622 x_l = a->j[j]*bs; x_r = x_l + 1.0; 1623 aa = a->a + j*bs2; 1624 for (k=0; k<bs; k++) { 1625 for (l=0; l<bs; l++) { 1626 if (PetscRealPart(*aa++) <= 0.) continue; 1627 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 1628 } 1629 } 1630 } 1631 } 1632 ierr = PetscDrawCollectiveEnd(draw);CHKERRQ(ierr); 1633 } else { 1634 /* use contour shading to indicate magnitude of values */ 1635 /* first determine max of all nonzero values */ 1636 PetscReal minv = 0.0, maxv = 0.0; 1637 PetscDraw popup; 1638 1639 for (i=0; i<a->nz*a->bs2; i++) { 1640 if (PetscAbsScalar(a->a[i]) > maxv) maxv = PetscAbsScalar(a->a[i]); 1641 } 1642 if (minv >= maxv) maxv = minv + PETSC_SMALL; 1643 ierr = PetscDrawGetPopup(draw,&popup);CHKERRQ(ierr); 1644 ierr = PetscDrawScalePopup(popup,0.0,maxv);CHKERRQ(ierr); 1645 1646 ierr = PetscDrawCollectiveBegin(draw);CHKERRQ(ierr); 1647 for (i=0,row=0; i<mbs; i++,row+=bs) { 1648 for (j=a->i[i]; j<a->i[i+1]; j++) { 1649 y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0; 1650 x_l = a->j[j]*bs; x_r = x_l + 1.0; 1651 aa = a->a + j*bs2; 1652 for (k=0; k<bs; k++) { 1653 for (l=0; l<bs; l++) { 1654 MatScalar v = *aa++; 1655 color = PetscDrawRealToColor(PetscAbsScalar(v),minv,maxv); 1656 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 1657 } 1658 } 1659 } 1660 } 1661 ierr = PetscDrawCollectiveEnd(draw);CHKERRQ(ierr); 1662 } 1663 PetscFunctionReturn(0); 1664 } 1665 1666 static PetscErrorCode MatView_SeqBAIJ_Draw(Mat A,PetscViewer viewer) 1667 { 1668 PetscErrorCode ierr; 1669 PetscReal xl,yl,xr,yr,w,h; 1670 PetscDraw draw; 1671 PetscBool isnull; 1672 1673 PetscFunctionBegin; 1674 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 1675 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); 1676 if (isnull) PetscFunctionReturn(0); 1677 1678 xr = A->cmap->n; yr = A->rmap->N; h = yr/10.0; w = xr/10.0; 1679 xr += w; yr += h; xl = -w; yl = -h; 1680 ierr = PetscDrawSetCoordinates(draw,xl,yl,xr,yr);CHKERRQ(ierr); 1681 ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);CHKERRQ(ierr); 1682 ierr = PetscDrawZoom(draw,MatView_SeqBAIJ_Draw_Zoom,A);CHKERRQ(ierr); 1683 ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",NULL);CHKERRQ(ierr); 1684 ierr = PetscDrawSave(draw);CHKERRQ(ierr); 1685 PetscFunctionReturn(0); 1686 } 1687 1688 PetscErrorCode MatView_SeqBAIJ(Mat A,PetscViewer viewer) 1689 { 1690 PetscErrorCode ierr; 1691 PetscBool iascii,isbinary,isdraw; 1692 1693 PetscFunctionBegin; 1694 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1695 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1696 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1697 if (iascii) { 1698 ierr = MatView_SeqBAIJ_ASCII(A,viewer);CHKERRQ(ierr); 1699 } else if (isbinary) { 1700 ierr = MatView_SeqBAIJ_Binary(A,viewer);CHKERRQ(ierr); 1701 } else if (isdraw) { 1702 ierr = MatView_SeqBAIJ_Draw(A,viewer);CHKERRQ(ierr); 1703 } else { 1704 Mat B; 1705 ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr); 1706 ierr = MatView(B,viewer);CHKERRQ(ierr); 1707 ierr = MatDestroy(&B);CHKERRQ(ierr); 1708 } 1709 PetscFunctionReturn(0); 1710 } 1711 1712 1713 PetscErrorCode MatGetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],PetscScalar v[]) 1714 { 1715 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1716 PetscInt *rp,k,low,high,t,row,nrow,i,col,l,*aj = a->j; 1717 PetscInt *ai = a->i,*ailen = a->ilen; 1718 PetscInt brow,bcol,ridx,cidx,bs=A->rmap->bs,bs2=a->bs2; 1719 MatScalar *ap,*aa = a->a; 1720 1721 PetscFunctionBegin; 1722 for (k=0; k<m; k++) { /* loop over rows */ 1723 row = im[k]; brow = row/bs; 1724 if (row < 0) {v += n; continue;} /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); */ 1725 if (row >= A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D too large", row); 1726 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; 1727 nrow = ailen[brow]; 1728 for (l=0; l<n; l++) { /* loop over columns */ 1729 if (in[l] < 0) {v++; continue;} /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); */ 1730 if (in[l] >= A->cmap->n) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column %D too large", in[l]); 1731 col = in[l]; 1732 bcol = col/bs; 1733 cidx = col%bs; 1734 ridx = row%bs; 1735 high = nrow; 1736 low = 0; /* assume unsorted */ 1737 while (high-low > 5) { 1738 t = (low+high)/2; 1739 if (rp[t] > bcol) high = t; 1740 else low = t; 1741 } 1742 for (i=low; i<high; i++) { 1743 if (rp[i] > bcol) break; 1744 if (rp[i] == bcol) { 1745 *v++ = ap[bs2*i+bs*cidx+ridx]; 1746 goto finished; 1747 } 1748 } 1749 *v++ = 0.0; 1750 finished:; 1751 } 1752 } 1753 PetscFunctionReturn(0); 1754 } 1755 1756 PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is) 1757 { 1758 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1759 PetscInt *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,lastcol = -1; 1760 PetscInt *imax=a->imax,*ai=a->i,*ailen=a->ilen; 1761 PetscErrorCode ierr; 1762 PetscInt *aj =a->j,nonew=a->nonew,bs2=a->bs2,bs=A->rmap->bs,stepval; 1763 PetscBool roworiented=a->roworiented; 1764 const PetscScalar *value = v; 1765 MatScalar *ap=NULL,*aa = a->a,*bap; 1766 1767 PetscFunctionBegin; 1768 if (roworiented) { 1769 stepval = (n-1)*bs; 1770 } else { 1771 stepval = (m-1)*bs; 1772 } 1773 for (k=0; k<m; k++) { /* loop over added rows */ 1774 row = im[k]; 1775 if (row < 0) continue; 1776 #if defined(PETSC_USE_DEBUG) 1777 if (row >= a->mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Block row index too large %D max %D",row,a->mbs-1); 1778 #endif 1779 rp = aj + ai[row]; 1780 if (!A->structure_only) ap = aa + bs2*ai[row]; 1781 rmax = imax[row]; 1782 nrow = ailen[row]; 1783 low = 0; 1784 high = nrow; 1785 for (l=0; l<n; l++) { /* loop over added columns */ 1786 if (in[l] < 0) continue; 1787 #if defined(PETSC_USE_DEBUG) 1788 if (in[l] >= a->nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Block column index too large %D max %D",in[l],a->nbs-1); 1789 #endif 1790 col = in[l]; 1791 if (!A->structure_only) { 1792 if (roworiented) { 1793 value = v + (k*(stepval+bs) + l)*bs; 1794 } else { 1795 value = v + (l*(stepval+bs) + k)*bs; 1796 } 1797 } 1798 if (col <= lastcol) low = 0; 1799 else high = nrow; 1800 lastcol = col; 1801 while (high-low > 7) { 1802 t = (low+high)/2; 1803 if (rp[t] > col) high = t; 1804 else low = t; 1805 } 1806 for (i=low; i<high; i++) { 1807 if (rp[i] > col) break; 1808 if (rp[i] == col) { 1809 if (A->structure_only) goto noinsert2; 1810 bap = ap + bs2*i; 1811 if (roworiented) { 1812 if (is == ADD_VALUES) { 1813 for (ii=0; ii<bs; ii++,value+=stepval) { 1814 for (jj=ii; jj<bs2; jj+=bs) { 1815 bap[jj] += *value++; 1816 } 1817 } 1818 } else { 1819 for (ii=0; ii<bs; ii++,value+=stepval) { 1820 for (jj=ii; jj<bs2; jj+=bs) { 1821 bap[jj] = *value++; 1822 } 1823 } 1824 } 1825 } else { 1826 if (is == ADD_VALUES) { 1827 for (ii=0; ii<bs; ii++,value+=bs+stepval) { 1828 for (jj=0; jj<bs; jj++) { 1829 bap[jj] += value[jj]; 1830 } 1831 bap += bs; 1832 } 1833 } else { 1834 for (ii=0; ii<bs; ii++,value+=bs+stepval) { 1835 for (jj=0; jj<bs; jj++) { 1836 bap[jj] = value[jj]; 1837 } 1838 bap += bs; 1839 } 1840 } 1841 } 1842 goto noinsert2; 1843 } 1844 } 1845 if (nonew == 1) goto noinsert2; 1846 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new blocked index new nonzero block (%D, %D) in the matrix", row, col); 1847 if (A->structure_only) { 1848 MatSeqXAIJReallocateAIJ_structure_only(A,a->mbs,bs2,nrow,row,col,rmax,ai,aj,rp,imax,nonew,MatScalar); 1849 } else { 1850 MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,row,col,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar); 1851 } 1852 N = nrow++ - 1; high++; 1853 /* shift up all the later entries in this row */ 1854 for (ii=N; ii>=i; ii--) { 1855 rp[ii+1] = rp[ii]; 1856 if (!A->structure_only) { 1857 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); 1858 } 1859 } 1860 if (N >= i && !A->structure_only) { 1861 ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr); 1862 } 1863 1864 rp[i] = col; 1865 if (!A->structure_only) { 1866 bap = ap + bs2*i; 1867 if (roworiented) { 1868 for (ii=0; ii<bs; ii++,value+=stepval) { 1869 for (jj=ii; jj<bs2; jj+=bs) { 1870 bap[jj] = *value++; 1871 } 1872 } 1873 } else { 1874 for (ii=0; ii<bs; ii++,value+=stepval) { 1875 for (jj=0; jj<bs; jj++) { 1876 *bap++ = *value++; 1877 } 1878 } 1879 } 1880 } 1881 noinsert2:; 1882 low = i; 1883 } 1884 ailen[row] = nrow; 1885 } 1886 PetscFunctionReturn(0); 1887 } 1888 1889 PetscErrorCode MatAssemblyEnd_SeqBAIJ(Mat A,MatAssemblyType mode) 1890 { 1891 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1892 PetscInt fshift = 0,i,j,*ai = a->i,*aj = a->j,*imax = a->imax; 1893 PetscInt m = A->rmap->N,*ip,N,*ailen = a->ilen; 1894 PetscErrorCode ierr; 1895 PetscInt mbs = a->mbs,bs2 = a->bs2,rmax = 0; 1896 MatScalar *aa = a->a,*ap; 1897 PetscReal ratio=0.6; 1898 1899 PetscFunctionBegin; 1900 if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0); 1901 1902 if (m) rmax = ailen[0]; 1903 for (i=1; i<mbs; i++) { 1904 /* move each row back by the amount of empty slots (fshift) before it*/ 1905 fshift += imax[i-1] - ailen[i-1]; 1906 rmax = PetscMax(rmax,ailen[i]); 1907 if (fshift) { 1908 ip = aj + ai[i]; ap = aa + bs2*ai[i]; 1909 N = ailen[i]; 1910 for (j=0; j<N; j++) { 1911 ip[j-fshift] = ip[j]; 1912 if (!A->structure_only) { 1913 ierr = PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr); 1914 } 1915 } 1916 } 1917 ai[i] = ai[i-1] + ailen[i-1]; 1918 } 1919 if (mbs) { 1920 fshift += imax[mbs-1] - ailen[mbs-1]; 1921 ai[mbs] = ai[mbs-1] + ailen[mbs-1]; 1922 } 1923 1924 /* reset ilen and imax for each row */ 1925 a->nonzerorowcnt = 0; 1926 if (A->structure_only) { 1927 ierr = PetscFree2(a->imax,a->ilen);CHKERRQ(ierr); 1928 } else { /* !A->structure_only */ 1929 for (i=0; i<mbs; i++) { 1930 ailen[i] = imax[i] = ai[i+1] - ai[i]; 1931 a->nonzerorowcnt += ((ai[i+1] - ai[i]) > 0); 1932 } 1933 } 1934 a->nz = ai[mbs]; 1935 1936 /* diagonals may have moved, so kill the diagonal pointers */ 1937 a->idiagvalid = PETSC_FALSE; 1938 if (fshift && a->diag) { 1939 ierr = PetscFree(a->diag);CHKERRQ(ierr); 1940 ierr = PetscLogObjectMemory((PetscObject)A,-(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 1941 a->diag = 0; 1942 } 1943 if (fshift && a->nounused == -1) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB, "Unused space detected in matrix: %D X %D block size %D, %D unneeded", m, A->cmap->n, A->rmap->bs, fshift*bs2); 1944 ierr = PetscInfo5(A,"Matrix size: %D X %D, block size %D; storage space: %D unneeded, %D used\n",m,A->cmap->n,A->rmap->bs,fshift*bs2,a->nz*bs2);CHKERRQ(ierr); 1945 ierr = PetscInfo1(A,"Number of mallocs during MatSetValues is %D\n",a->reallocs);CHKERRQ(ierr); 1946 ierr = PetscInfo1(A,"Most nonzeros blocks in any row is %D\n",rmax);CHKERRQ(ierr); 1947 1948 A->info.mallocs += a->reallocs; 1949 a->reallocs = 0; 1950 A->info.nz_unneeded = (PetscReal)fshift*bs2; 1951 a->rmax = rmax; 1952 1953 if (!A->structure_only) { 1954 ierr = MatCheckCompressedRow(A,a->nonzerorowcnt,&a->compressedrow,a->i,mbs,ratio);CHKERRQ(ierr); 1955 } 1956 PetscFunctionReturn(0); 1957 } 1958 1959 /* 1960 This function returns an array of flags which indicate the locations of contiguous 1961 blocks that should be zeroed. for eg: if bs = 3 and is = [0,1,2,3,5,6,7,8,9] 1962 then the resulting sizes = [3,1,1,3,1] correspondig to sets [(0,1,2),(3),(5),(6,7,8),(9)] 1963 Assume: sizes should be long enough to hold all the values. 1964 */ 1965 static PetscErrorCode MatZeroRows_SeqBAIJ_Check_Blocks(PetscInt idx[],PetscInt n,PetscInt bs,PetscInt sizes[], PetscInt *bs_max) 1966 { 1967 PetscInt i,j,k,row; 1968 PetscBool flg; 1969 1970 PetscFunctionBegin; 1971 for (i=0,j=0; i<n; j++) { 1972 row = idx[i]; 1973 if (row%bs!=0) { /* Not the begining of a block */ 1974 sizes[j] = 1; 1975 i++; 1976 } else if (i+bs > n) { /* complete block doesn't exist (at idx end) */ 1977 sizes[j] = 1; /* Also makes sure atleast 'bs' values exist for next else */ 1978 i++; 1979 } else { /* Begining of the block, so check if the complete block exists */ 1980 flg = PETSC_TRUE; 1981 for (k=1; k<bs; k++) { 1982 if (row+k != idx[i+k]) { /* break in the block */ 1983 flg = PETSC_FALSE; 1984 break; 1985 } 1986 } 1987 if (flg) { /* No break in the bs */ 1988 sizes[j] = bs; 1989 i += bs; 1990 } else { 1991 sizes[j] = 1; 1992 i++; 1993 } 1994 } 1995 } 1996 *bs_max = j; 1997 PetscFunctionReturn(0); 1998 } 1999 2000 PetscErrorCode MatZeroRows_SeqBAIJ(Mat A,PetscInt is_n,const PetscInt is_idx[],PetscScalar diag,Vec x, Vec b) 2001 { 2002 Mat_SeqBAIJ *baij=(Mat_SeqBAIJ*)A->data; 2003 PetscErrorCode ierr; 2004 PetscInt i,j,k,count,*rows; 2005 PetscInt bs=A->rmap->bs,bs2=baij->bs2,*sizes,row,bs_max; 2006 PetscScalar zero = 0.0; 2007 MatScalar *aa; 2008 const PetscScalar *xx; 2009 PetscScalar *bb; 2010 2011 PetscFunctionBegin; 2012 /* fix right hand side if needed */ 2013 if (x && b) { 2014 ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr); 2015 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 2016 for (i=0; i<is_n; i++) { 2017 bb[is_idx[i]] = diag*xx[is_idx[i]]; 2018 } 2019 ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr); 2020 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 2021 } 2022 2023 /* Make a copy of the IS and sort it */ 2024 /* allocate memory for rows,sizes */ 2025 ierr = PetscMalloc2(is_n,&rows,2*is_n,&sizes);CHKERRQ(ierr); 2026 2027 /* copy IS values to rows, and sort them */ 2028 for (i=0; i<is_n; i++) rows[i] = is_idx[i]; 2029 ierr = PetscSortInt(is_n,rows);CHKERRQ(ierr); 2030 2031 if (baij->keepnonzeropattern) { 2032 for (i=0; i<is_n; i++) sizes[i] = 1; 2033 bs_max = is_n; 2034 } else { 2035 ierr = MatZeroRows_SeqBAIJ_Check_Blocks(rows,is_n,bs,sizes,&bs_max);CHKERRQ(ierr); 2036 A->nonzerostate++; 2037 } 2038 2039 for (i=0,j=0; i<bs_max; j+=sizes[i],i++) { 2040 row = rows[j]; 2041 if (row < 0 || row > A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"row %D out of range",row); 2042 count = (baij->i[row/bs +1] - baij->i[row/bs])*bs; 2043 aa = ((MatScalar*)(baij->a)) + baij->i[row/bs]*bs2 + (row%bs); 2044 if (sizes[i] == bs && !baij->keepnonzeropattern) { 2045 if (diag != (PetscScalar)0.0) { 2046 if (baij->ilen[row/bs] > 0) { 2047 baij->ilen[row/bs] = 1; 2048 baij->j[baij->i[row/bs]] = row/bs; 2049 2050 ierr = PetscMemzero(aa,count*bs*sizeof(MatScalar));CHKERRQ(ierr); 2051 } 2052 /* Now insert all the diagonal values for this bs */ 2053 for (k=0; k<bs; k++) { 2054 ierr = (*A->ops->setvalues)(A,1,rows+j+k,1,rows+j+k,&diag,INSERT_VALUES);CHKERRQ(ierr); 2055 } 2056 } else { /* (diag == 0.0) */ 2057 baij->ilen[row/bs] = 0; 2058 } /* end (diag == 0.0) */ 2059 } else { /* (sizes[i] != bs) */ 2060 #if defined(PETSC_USE_DEBUG) 2061 if (sizes[i] != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal Error. Value should be 1"); 2062 #endif 2063 for (k=0; k<count; k++) { 2064 aa[0] = zero; 2065 aa += bs; 2066 } 2067 if (diag != (PetscScalar)0.0) { 2068 ierr = (*A->ops->setvalues)(A,1,rows+j,1,rows+j,&diag,INSERT_VALUES);CHKERRQ(ierr); 2069 } 2070 } 2071 } 2072 2073 ierr = PetscFree2(rows,sizes);CHKERRQ(ierr); 2074 ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2075 PetscFunctionReturn(0); 2076 } 2077 2078 PetscErrorCode MatZeroRowsColumns_SeqBAIJ(Mat A,PetscInt is_n,const PetscInt is_idx[],PetscScalar diag,Vec x, Vec b) 2079 { 2080 Mat_SeqBAIJ *baij=(Mat_SeqBAIJ*)A->data; 2081 PetscErrorCode ierr; 2082 PetscInt i,j,k,count; 2083 PetscInt bs =A->rmap->bs,bs2=baij->bs2,row,col; 2084 PetscScalar zero = 0.0; 2085 MatScalar *aa; 2086 const PetscScalar *xx; 2087 PetscScalar *bb; 2088 PetscBool *zeroed,vecs = PETSC_FALSE; 2089 2090 PetscFunctionBegin; 2091 /* fix right hand side if needed */ 2092 if (x && b) { 2093 ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr); 2094 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 2095 vecs = PETSC_TRUE; 2096 } 2097 2098 /* zero the columns */ 2099 ierr = PetscCalloc1(A->rmap->n,&zeroed);CHKERRQ(ierr); 2100 for (i=0; i<is_n; i++) { 2101 if (is_idx[i] < 0 || is_idx[i] >= A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"row %D out of range",is_idx[i]); 2102 zeroed[is_idx[i]] = PETSC_TRUE; 2103 } 2104 for (i=0; i<A->rmap->N; i++) { 2105 if (!zeroed[i]) { 2106 row = i/bs; 2107 for (j=baij->i[row]; j<baij->i[row+1]; j++) { 2108 for (k=0; k<bs; k++) { 2109 col = bs*baij->j[j] + k; 2110 if (zeroed[col]) { 2111 aa = ((MatScalar*)(baij->a)) + j*bs2 + (i%bs) + bs*k; 2112 if (vecs) bb[i] -= aa[0]*xx[col]; 2113 aa[0] = 0.0; 2114 } 2115 } 2116 } 2117 } else if (vecs) bb[i] = diag*xx[i]; 2118 } 2119 ierr = PetscFree(zeroed);CHKERRQ(ierr); 2120 if (vecs) { 2121 ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr); 2122 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 2123 } 2124 2125 /* zero the rows */ 2126 for (i=0; i<is_n; i++) { 2127 row = is_idx[i]; 2128 count = (baij->i[row/bs +1] - baij->i[row/bs])*bs; 2129 aa = ((MatScalar*)(baij->a)) + baij->i[row/bs]*bs2 + (row%bs); 2130 for (k=0; k<count; k++) { 2131 aa[0] = zero; 2132 aa += bs; 2133 } 2134 if (diag != (PetscScalar)0.0) { 2135 ierr = (*A->ops->setvalues)(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr); 2136 } 2137 } 2138 ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2139 PetscFunctionReturn(0); 2140 } 2141 2142 PetscErrorCode MatSetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is) 2143 { 2144 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2145 PetscInt *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,lastcol = -1; 2146 PetscInt *imax=a->imax,*ai=a->i,*ailen=a->ilen; 2147 PetscInt *aj =a->j,nonew=a->nonew,bs=A->rmap->bs,brow,bcol; 2148 PetscErrorCode ierr; 2149 PetscInt ridx,cidx,bs2=a->bs2; 2150 PetscBool roworiented=a->roworiented; 2151 MatScalar *ap=NULL,value=0.0,*aa=a->a,*bap; 2152 2153 PetscFunctionBegin; 2154 for (k=0; k<m; k++) { /* loop over added rows */ 2155 row = im[k]; 2156 brow = row/bs; 2157 if (row < 0) continue; 2158 #if defined(PETSC_USE_DEBUG) 2159 if (row >= A->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,A->rmap->N-1); 2160 #endif 2161 rp = aj + ai[brow]; 2162 if (!A->structure_only) ap = aa + bs2*ai[brow]; 2163 rmax = imax[brow]; 2164 nrow = ailen[brow]; 2165 low = 0; 2166 high = nrow; 2167 for (l=0; l<n; l++) { /* loop over added columns */ 2168 if (in[l] < 0) continue; 2169 #if defined(PETSC_USE_DEBUG) 2170 if (in[l] >= A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],A->cmap->n-1); 2171 #endif 2172 col = in[l]; bcol = col/bs; 2173 ridx = row % bs; cidx = col % bs; 2174 if (!A->structure_only) { 2175 if (roworiented) { 2176 value = v[l + k*n]; 2177 } else { 2178 value = v[k + l*m]; 2179 } 2180 } 2181 if (col <= lastcol) low = 0; else high = nrow; 2182 lastcol = col; 2183 while (high-low > 7) { 2184 t = (low+high)/2; 2185 if (rp[t] > bcol) high = t; 2186 else low = t; 2187 } 2188 for (i=low; i<high; i++) { 2189 if (rp[i] > bcol) break; 2190 if (rp[i] == bcol) { 2191 bap = ap + bs2*i + bs*cidx + ridx; 2192 if (!A->structure_only) { 2193 if (is == ADD_VALUES) *bap += value; 2194 else *bap = value; 2195 } 2196 goto noinsert1; 2197 } 2198 } 2199 if (nonew == 1) goto noinsert1; 2200 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col); 2201 if (A->structure_only) { 2202 MatSeqXAIJReallocateAIJ_structure_only(A,a->mbs,bs2,nrow,brow,bcol,rmax,ai,aj,rp,imax,nonew,MatScalar); 2203 } else { 2204 MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar); 2205 } 2206 N = nrow++ - 1; high++; 2207 /* shift up all the later entries in this row */ 2208 for (ii=N; ii>=i; ii--) { 2209 rp[ii+1] = rp[ii]; 2210 if (!A->structure_only) { 2211 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); 2212 } 2213 } 2214 if (N>=i && !A->structure_only) { 2215 ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr); 2216 } 2217 rp[i] = bcol; 2218 if (!A->structure_only) ap[bs2*i + bs*cidx + ridx] = value; 2219 a->nz++; 2220 A->nonzerostate++; 2221 noinsert1:; 2222 low = i; 2223 } 2224 ailen[brow] = nrow; 2225 } 2226 PetscFunctionReturn(0); 2227 } 2228 2229 PetscErrorCode MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,const MatFactorInfo *info) 2230 { 2231 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)inA->data; 2232 Mat outA; 2233 PetscErrorCode ierr; 2234 PetscBool row_identity,col_identity; 2235 2236 PetscFunctionBegin; 2237 if (info->levels != 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only levels = 0 supported for in-place ILU"); 2238 ierr = ISIdentity(row,&row_identity);CHKERRQ(ierr); 2239 ierr = ISIdentity(col,&col_identity);CHKERRQ(ierr); 2240 if (!row_identity || !col_identity) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for in-place ILU"); 2241 2242 outA = inA; 2243 inA->factortype = MAT_FACTOR_LU; 2244 ierr = PetscFree(inA->solvertype);CHKERRQ(ierr); 2245 ierr = PetscStrallocpy(MATSOLVERPETSC,&inA->solvertype);CHKERRQ(ierr); 2246 2247 ierr = MatMarkDiagonal_SeqBAIJ(inA);CHKERRQ(ierr); 2248 2249 ierr = PetscObjectReference((PetscObject)row);CHKERRQ(ierr); 2250 ierr = ISDestroy(&a->row);CHKERRQ(ierr); 2251 a->row = row; 2252 ierr = PetscObjectReference((PetscObject)col);CHKERRQ(ierr); 2253 ierr = ISDestroy(&a->col);CHKERRQ(ierr); 2254 a->col = col; 2255 2256 /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */ 2257 ierr = ISDestroy(&a->icol);CHKERRQ(ierr); 2258 ierr = ISInvertPermutation(col,PETSC_DECIDE,&a->icol);CHKERRQ(ierr); 2259 ierr = PetscLogObjectParent((PetscObject)inA,(PetscObject)a->icol);CHKERRQ(ierr); 2260 2261 ierr = MatSeqBAIJSetNumericFactorization_inplace(inA,(PetscBool)(row_identity && col_identity));CHKERRQ(ierr); 2262 if (!a->solve_work) { 2263 ierr = PetscMalloc1(inA->rmap->N+inA->rmap->bs,&a->solve_work);CHKERRQ(ierr); 2264 ierr = PetscLogObjectMemory((PetscObject)inA,(inA->rmap->N+inA->rmap->bs)*sizeof(PetscScalar));CHKERRQ(ierr); 2265 } 2266 ierr = MatLUFactorNumeric(outA,inA,info);CHKERRQ(ierr); 2267 PetscFunctionReturn(0); 2268 } 2269 2270 PetscErrorCode MatSeqBAIJSetColumnIndices_SeqBAIJ(Mat mat,PetscInt *indices) 2271 { 2272 Mat_SeqBAIJ *baij = (Mat_SeqBAIJ*)mat->data; 2273 PetscInt i,nz,mbs; 2274 2275 PetscFunctionBegin; 2276 nz = baij->maxnz; 2277 mbs = baij->mbs; 2278 for (i=0; i<nz; i++) { 2279 baij->j[i] = indices[i]; 2280 } 2281 baij->nz = nz; 2282 for (i=0; i<mbs; i++) { 2283 baij->ilen[i] = baij->imax[i]; 2284 } 2285 PetscFunctionReturn(0); 2286 } 2287 2288 /*@ 2289 MatSeqBAIJSetColumnIndices - Set the column indices for all the rows 2290 in the matrix. 2291 2292 Input Parameters: 2293 + mat - the SeqBAIJ matrix 2294 - indices - the column indices 2295 2296 Level: advanced 2297 2298 Notes: 2299 This can be called if you have precomputed the nonzero structure of the 2300 matrix and want to provide it to the matrix object to improve the performance 2301 of the MatSetValues() operation. 2302 2303 You MUST have set the correct numbers of nonzeros per row in the call to 2304 MatCreateSeqBAIJ(), and the columns indices MUST be sorted. 2305 2306 MUST be called before any calls to MatSetValues(); 2307 2308 @*/ 2309 PetscErrorCode MatSeqBAIJSetColumnIndices(Mat mat,PetscInt *indices) 2310 { 2311 PetscErrorCode ierr; 2312 2313 PetscFunctionBegin; 2314 PetscValidHeaderSpecific(mat,MAT_CLASSID,1); 2315 PetscValidPointer(indices,2); 2316 ierr = PetscUseMethod(mat,"MatSeqBAIJSetColumnIndices_C",(Mat,PetscInt*),(mat,indices));CHKERRQ(ierr); 2317 PetscFunctionReturn(0); 2318 } 2319 2320 PetscErrorCode MatGetRowMaxAbs_SeqBAIJ(Mat A,Vec v,PetscInt idx[]) 2321 { 2322 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2323 PetscErrorCode ierr; 2324 PetscInt i,j,n,row,bs,*ai,*aj,mbs; 2325 PetscReal atmp; 2326 PetscScalar *x,zero = 0.0; 2327 MatScalar *aa; 2328 PetscInt ncols,brow,krow,kcol; 2329 2330 PetscFunctionBegin; 2331 if (A->factortype) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 2332 bs = A->rmap->bs; 2333 aa = a->a; 2334 ai = a->i; 2335 aj = a->j; 2336 mbs = a->mbs; 2337 2338 ierr = VecSet(v,zero);CHKERRQ(ierr); 2339 ierr = VecGetArray(v,&x);CHKERRQ(ierr); 2340 ierr = VecGetLocalSize(v,&n);CHKERRQ(ierr); 2341 if (n != A->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Nonconforming matrix and vector"); 2342 for (i=0; i<mbs; i++) { 2343 ncols = ai[1] - ai[0]; ai++; 2344 brow = bs*i; 2345 for (j=0; j<ncols; j++) { 2346 for (kcol=0; kcol<bs; kcol++) { 2347 for (krow=0; krow<bs; krow++) { 2348 atmp = PetscAbsScalar(*aa);aa++; 2349 row = brow + krow; /* row index */ 2350 if (PetscAbsScalar(x[row]) < atmp) {x[row] = atmp; if (idx) idx[row] = bs*(*aj) + kcol;} 2351 } 2352 } 2353 aj++; 2354 } 2355 } 2356 ierr = VecRestoreArray(v,&x);CHKERRQ(ierr); 2357 PetscFunctionReturn(0); 2358 } 2359 2360 PetscErrorCode MatCopy_SeqBAIJ(Mat A,Mat B,MatStructure str) 2361 { 2362 PetscErrorCode ierr; 2363 2364 PetscFunctionBegin; 2365 /* If the two matrices have the same copy implementation, use fast copy. */ 2366 if (str == SAME_NONZERO_PATTERN && (A->ops->copy == B->ops->copy)) { 2367 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2368 Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)B->data; 2369 PetscInt ambs=a->mbs,bmbs=b->mbs,abs=A->rmap->bs,bbs=B->rmap->bs,bs2=abs*abs; 2370 2371 if (a->i[ambs] != b->i[bmbs]) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Number of nonzero blocks in matrices A %D and B %D are different",a->i[ambs],b->i[bmbs]); 2372 if (abs != bbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Block size A %D and B %D are different",abs,bbs); 2373 ierr = PetscMemcpy(b->a,a->a,(bs2*a->i[ambs])*sizeof(PetscScalar));CHKERRQ(ierr); 2374 ierr = PetscObjectStateIncrease((PetscObject)B);CHKERRQ(ierr); 2375 } else { 2376 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 2377 } 2378 PetscFunctionReturn(0); 2379 } 2380 2381 PetscErrorCode MatSetUp_SeqBAIJ(Mat A) 2382 { 2383 PetscErrorCode ierr; 2384 2385 PetscFunctionBegin; 2386 ierr = MatSeqBAIJSetPreallocation(A,A->rmap->bs,PETSC_DEFAULT,0);CHKERRQ(ierr); 2387 PetscFunctionReturn(0); 2388 } 2389 2390 PetscErrorCode MatSeqBAIJGetArray_SeqBAIJ(Mat A,PetscScalar *array[]) 2391 { 2392 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2393 2394 PetscFunctionBegin; 2395 *array = a->a; 2396 PetscFunctionReturn(0); 2397 } 2398 2399 PetscErrorCode MatSeqBAIJRestoreArray_SeqBAIJ(Mat A,PetscScalar *array[]) 2400 { 2401 PetscFunctionBegin; 2402 PetscFunctionReturn(0); 2403 } 2404 2405 PetscErrorCode MatAXPYGetPreallocation_SeqBAIJ(Mat Y,Mat X,PetscInt *nnz) 2406 { 2407 PetscInt bs = Y->rmap->bs,mbs = Y->rmap->N/bs; 2408 Mat_SeqBAIJ *x = (Mat_SeqBAIJ*)X->data; 2409 Mat_SeqBAIJ *y = (Mat_SeqBAIJ*)Y->data; 2410 PetscErrorCode ierr; 2411 2412 PetscFunctionBegin; 2413 /* Set the number of nonzeros in the new matrix */ 2414 ierr = MatAXPYGetPreallocation_SeqX_private(mbs,x->i,x->j,y->i,y->j,nnz);CHKERRQ(ierr); 2415 PetscFunctionReturn(0); 2416 } 2417 2418 PetscErrorCode MatAXPY_SeqBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 2419 { 2420 Mat_SeqBAIJ *x = (Mat_SeqBAIJ*)X->data,*y = (Mat_SeqBAIJ*)Y->data; 2421 PetscErrorCode ierr; 2422 PetscInt bs=Y->rmap->bs,bs2=bs*bs; 2423 PetscBLASInt one=1; 2424 2425 PetscFunctionBegin; 2426 if (str == SAME_NONZERO_PATTERN) { 2427 PetscScalar alpha = a; 2428 PetscBLASInt bnz; 2429 ierr = PetscBLASIntCast(x->nz*bs2,&bnz);CHKERRQ(ierr); 2430 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one)); 2431 ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr); 2432 } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */ 2433 ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr); 2434 } else { 2435 Mat B; 2436 PetscInt *nnz; 2437 if (bs != X->rmap->bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrices must have same block size"); 2438 ierr = PetscMalloc1(Y->rmap->N,&nnz);CHKERRQ(ierr); 2439 ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr); 2440 ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr); 2441 ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr); 2442 ierr = MatSetBlockSizesFromMats(B,Y,Y);CHKERRQ(ierr); 2443 ierr = MatSetType(B,(MatType) ((PetscObject)Y)->type_name);CHKERRQ(ierr); 2444 ierr = MatAXPYGetPreallocation_SeqBAIJ(Y,X,nnz);CHKERRQ(ierr); 2445 ierr = MatSeqBAIJSetPreallocation(B,bs,0,nnz);CHKERRQ(ierr); 2446 ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr); 2447 ierr = MatHeaderReplace(Y,&B);CHKERRQ(ierr); 2448 ierr = PetscFree(nnz);CHKERRQ(ierr); 2449 } 2450 PetscFunctionReturn(0); 2451 } 2452 2453 PetscErrorCode MatRealPart_SeqBAIJ(Mat A) 2454 { 2455 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2456 PetscInt i,nz = a->bs2*a->i[a->mbs]; 2457 MatScalar *aa = a->a; 2458 2459 PetscFunctionBegin; 2460 for (i=0; i<nz; i++) aa[i] = PetscRealPart(aa[i]); 2461 PetscFunctionReturn(0); 2462 } 2463 2464 PetscErrorCode MatImaginaryPart_SeqBAIJ(Mat A) 2465 { 2466 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2467 PetscInt i,nz = a->bs2*a->i[a->mbs]; 2468 MatScalar *aa = a->a; 2469 2470 PetscFunctionBegin; 2471 for (i=0; i<nz; i++) aa[i] = PetscImaginaryPart(aa[i]); 2472 PetscFunctionReturn(0); 2473 } 2474 2475 /* 2476 Code almost idential to MatGetColumnIJ_SeqAIJ() should share common code 2477 */ 2478 PetscErrorCode MatGetColumnIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *nn,const PetscInt *ia[],const PetscInt *ja[],PetscBool *done) 2479 { 2480 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2481 PetscErrorCode ierr; 2482 PetscInt bs = A->rmap->bs,i,*collengths,*cia,*cja,n = A->cmap->n/bs,m = A->rmap->n/bs; 2483 PetscInt nz = a->i[m],row,*jj,mr,col; 2484 2485 PetscFunctionBegin; 2486 *nn = n; 2487 if (!ia) PetscFunctionReturn(0); 2488 if (symmetric) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not for BAIJ matrices"); 2489 else { 2490 ierr = PetscCalloc1(n+1,&collengths);CHKERRQ(ierr); 2491 ierr = PetscMalloc1(n+1,&cia);CHKERRQ(ierr); 2492 ierr = PetscMalloc1(nz+1,&cja);CHKERRQ(ierr); 2493 jj = a->j; 2494 for (i=0; i<nz; i++) { 2495 collengths[jj[i]]++; 2496 } 2497 cia[0] = oshift; 2498 for (i=0; i<n; i++) { 2499 cia[i+1] = cia[i] + collengths[i]; 2500 } 2501 ierr = PetscMemzero(collengths,n*sizeof(PetscInt));CHKERRQ(ierr); 2502 jj = a->j; 2503 for (row=0; row<m; row++) { 2504 mr = a->i[row+1] - a->i[row]; 2505 for (i=0; i<mr; i++) { 2506 col = *jj++; 2507 2508 cja[cia[col] + collengths[col]++ - oshift] = row + oshift; 2509 } 2510 } 2511 ierr = PetscFree(collengths);CHKERRQ(ierr); 2512 *ia = cia; *ja = cja; 2513 } 2514 PetscFunctionReturn(0); 2515 } 2516 2517 PetscErrorCode MatRestoreColumnIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *n,const PetscInt *ia[],const PetscInt *ja[],PetscBool *done) 2518 { 2519 PetscErrorCode ierr; 2520 2521 PetscFunctionBegin; 2522 if (!ia) PetscFunctionReturn(0); 2523 ierr = PetscFree(*ia);CHKERRQ(ierr); 2524 ierr = PetscFree(*ja);CHKERRQ(ierr); 2525 PetscFunctionReturn(0); 2526 } 2527 2528 /* 2529 MatGetColumnIJ_SeqBAIJ_Color() and MatRestoreColumnIJ_SeqBAIJ_Color() are customized from 2530 MatGetColumnIJ_SeqBAIJ() and MatRestoreColumnIJ_SeqBAIJ() by adding an output 2531 spidx[], index of a->a, to be used in MatTransposeColoringCreate() and MatFDColoringCreate() 2532 */ 2533 PetscErrorCode MatGetColumnIJ_SeqBAIJ_Color(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *nn,const PetscInt *ia[],const PetscInt *ja[],PetscInt *spidx[],PetscBool *done) 2534 { 2535 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2536 PetscErrorCode ierr; 2537 PetscInt i,*collengths,*cia,*cja,n=a->nbs,m=a->mbs; 2538 PetscInt nz = a->i[m],row,*jj,mr,col; 2539 PetscInt *cspidx; 2540 2541 PetscFunctionBegin; 2542 *nn = n; 2543 if (!ia) PetscFunctionReturn(0); 2544 2545 ierr = PetscCalloc1(n+1,&collengths);CHKERRQ(ierr); 2546 ierr = PetscMalloc1(n+1,&cia);CHKERRQ(ierr); 2547 ierr = PetscMalloc1(nz+1,&cja);CHKERRQ(ierr); 2548 ierr = PetscMalloc1(nz+1,&cspidx);CHKERRQ(ierr); 2549 jj = a->j; 2550 for (i=0; i<nz; i++) { 2551 collengths[jj[i]]++; 2552 } 2553 cia[0] = oshift; 2554 for (i=0; i<n; i++) { 2555 cia[i+1] = cia[i] + collengths[i]; 2556 } 2557 ierr = PetscMemzero(collengths,n*sizeof(PetscInt));CHKERRQ(ierr); 2558 jj = a->j; 2559 for (row=0; row<m; row++) { 2560 mr = a->i[row+1] - a->i[row]; 2561 for (i=0; i<mr; i++) { 2562 col = *jj++; 2563 cspidx[cia[col] + collengths[col] - oshift] = a->i[row] + i; /* index of a->j */ 2564 cja[cia[col] + collengths[col]++ - oshift] = row + oshift; 2565 } 2566 } 2567 ierr = PetscFree(collengths);CHKERRQ(ierr); 2568 *ia = cia; *ja = cja; 2569 *spidx = cspidx; 2570 PetscFunctionReturn(0); 2571 } 2572 2573 PetscErrorCode MatRestoreColumnIJ_SeqBAIJ_Color(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *n,const PetscInt *ia[],const PetscInt *ja[],PetscInt *spidx[],PetscBool *done) 2574 { 2575 PetscErrorCode ierr; 2576 2577 PetscFunctionBegin; 2578 ierr = MatRestoreColumnIJ_SeqBAIJ(A,oshift,symmetric,inodecompressed,n,ia,ja,done);CHKERRQ(ierr); 2579 ierr = PetscFree(*spidx);CHKERRQ(ierr); 2580 PetscFunctionReturn(0); 2581 } 2582 2583 PetscErrorCode MatShift_SeqBAIJ(Mat Y,PetscScalar a) 2584 { 2585 PetscErrorCode ierr; 2586 Mat_SeqBAIJ *aij = (Mat_SeqBAIJ*)Y->data; 2587 2588 PetscFunctionBegin; 2589 if (!Y->preallocated || !aij->nz) { 2590 ierr = MatSeqBAIJSetPreallocation(Y,Y->rmap->bs,1,NULL);CHKERRQ(ierr); 2591 } 2592 ierr = MatShift_Basic(Y,a);CHKERRQ(ierr); 2593 PetscFunctionReturn(0); 2594 } 2595 2596 /* -------------------------------------------------------------------*/ 2597 static struct _MatOps MatOps_Values = {MatSetValues_SeqBAIJ, 2598 MatGetRow_SeqBAIJ, 2599 MatRestoreRow_SeqBAIJ, 2600 MatMult_SeqBAIJ_N, 2601 /* 4*/ MatMultAdd_SeqBAIJ_N, 2602 MatMultTranspose_SeqBAIJ, 2603 MatMultTransposeAdd_SeqBAIJ, 2604 0, 2605 0, 2606 0, 2607 /* 10*/ 0, 2608 MatLUFactor_SeqBAIJ, 2609 0, 2610 0, 2611 MatTranspose_SeqBAIJ, 2612 /* 15*/ MatGetInfo_SeqBAIJ, 2613 MatEqual_SeqBAIJ, 2614 MatGetDiagonal_SeqBAIJ, 2615 MatDiagonalScale_SeqBAIJ, 2616 MatNorm_SeqBAIJ, 2617 /* 20*/ 0, 2618 MatAssemblyEnd_SeqBAIJ, 2619 MatSetOption_SeqBAIJ, 2620 MatZeroEntries_SeqBAIJ, 2621 /* 24*/ MatZeroRows_SeqBAIJ, 2622 0, 2623 0, 2624 0, 2625 0, 2626 /* 29*/ MatSetUp_SeqBAIJ, 2627 0, 2628 0, 2629 0, 2630 0, 2631 /* 34*/ MatDuplicate_SeqBAIJ, 2632 0, 2633 0, 2634 MatILUFactor_SeqBAIJ, 2635 0, 2636 /* 39*/ MatAXPY_SeqBAIJ, 2637 MatCreateSubMatrices_SeqBAIJ, 2638 MatIncreaseOverlap_SeqBAIJ, 2639 MatGetValues_SeqBAIJ, 2640 MatCopy_SeqBAIJ, 2641 /* 44*/ 0, 2642 MatScale_SeqBAIJ, 2643 MatShift_SeqBAIJ, 2644 0, 2645 MatZeroRowsColumns_SeqBAIJ, 2646 /* 49*/ 0, 2647 MatGetRowIJ_SeqBAIJ, 2648 MatRestoreRowIJ_SeqBAIJ, 2649 MatGetColumnIJ_SeqBAIJ, 2650 MatRestoreColumnIJ_SeqBAIJ, 2651 /* 54*/ MatFDColoringCreate_SeqXAIJ, 2652 0, 2653 0, 2654 0, 2655 MatSetValuesBlocked_SeqBAIJ, 2656 /* 59*/ MatCreateSubMatrix_SeqBAIJ, 2657 MatDestroy_SeqBAIJ, 2658 MatView_SeqBAIJ, 2659 0, 2660 0, 2661 /* 64*/ 0, 2662 0, 2663 0, 2664 0, 2665 0, 2666 /* 69*/ MatGetRowMaxAbs_SeqBAIJ, 2667 0, 2668 MatConvert_Basic, 2669 0, 2670 0, 2671 /* 74*/ 0, 2672 MatFDColoringApply_BAIJ, 2673 0, 2674 0, 2675 0, 2676 /* 79*/ 0, 2677 0, 2678 0, 2679 0, 2680 MatLoad_SeqBAIJ, 2681 /* 84*/ 0, 2682 0, 2683 0, 2684 0, 2685 0, 2686 /* 89*/ 0, 2687 0, 2688 0, 2689 0, 2690 0, 2691 /* 94*/ 0, 2692 0, 2693 0, 2694 0, 2695 0, 2696 /* 99*/ 0, 2697 0, 2698 0, 2699 0, 2700 0, 2701 /*104*/ 0, 2702 MatRealPart_SeqBAIJ, 2703 MatImaginaryPart_SeqBAIJ, 2704 0, 2705 0, 2706 /*109*/ 0, 2707 0, 2708 0, 2709 0, 2710 MatMissingDiagonal_SeqBAIJ, 2711 /*114*/ 0, 2712 0, 2713 0, 2714 0, 2715 0, 2716 /*119*/ 0, 2717 0, 2718 MatMultHermitianTranspose_SeqBAIJ, 2719 MatMultHermitianTransposeAdd_SeqBAIJ, 2720 0, 2721 /*124*/ 0, 2722 0, 2723 MatInvertBlockDiagonal_SeqBAIJ, 2724 0, 2725 0, 2726 /*129*/ 0, 2727 0, 2728 0, 2729 0, 2730 0, 2731 /*134*/ 0, 2732 0, 2733 0, 2734 0, 2735 0, 2736 /*139*/ MatSetBlockSizes_Default, 2737 0, 2738 0, 2739 MatFDColoringSetUp_SeqXAIJ, 2740 0, 2741 /*144*/MatCreateMPIMatConcatenateSeqMat_SeqBAIJ, 2742 MatDestroySubMatrices_SeqBAIJ 2743 }; 2744 2745 PetscErrorCode MatStoreValues_SeqBAIJ(Mat mat) 2746 { 2747 Mat_SeqBAIJ *aij = (Mat_SeqBAIJ*)mat->data; 2748 PetscInt nz = aij->i[aij->mbs]*aij->bs2; 2749 PetscErrorCode ierr; 2750 2751 PetscFunctionBegin; 2752 if (aij->nonew != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first"); 2753 2754 /* allocate space for values if not already there */ 2755 if (!aij->saved_values) { 2756 ierr = PetscMalloc1(nz+1,&aij->saved_values);CHKERRQ(ierr); 2757 ierr = PetscLogObjectMemory((PetscObject)mat,(nz+1)*sizeof(PetscScalar));CHKERRQ(ierr); 2758 } 2759 2760 /* copy values over */ 2761 ierr = PetscMemcpy(aij->saved_values,aij->a,nz*sizeof(PetscScalar));CHKERRQ(ierr); 2762 PetscFunctionReturn(0); 2763 } 2764 2765 PetscErrorCode MatRetrieveValues_SeqBAIJ(Mat mat) 2766 { 2767 Mat_SeqBAIJ *aij = (Mat_SeqBAIJ*)mat->data; 2768 PetscErrorCode ierr; 2769 PetscInt nz = aij->i[aij->mbs]*aij->bs2; 2770 2771 PetscFunctionBegin; 2772 if (aij->nonew != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first"); 2773 if (!aij->saved_values) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatStoreValues(A);first"); 2774 2775 /* copy values over */ 2776 ierr = PetscMemcpy(aij->a,aij->saved_values,nz*sizeof(PetscScalar));CHKERRQ(ierr); 2777 PetscFunctionReturn(0); 2778 } 2779 2780 PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqAIJ(Mat, MatType,MatReuse,Mat*); 2781 PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqSBAIJ(Mat, MatType,MatReuse,Mat*); 2782 2783 PetscErrorCode MatSeqBAIJSetPreallocation_SeqBAIJ(Mat B,PetscInt bs,PetscInt nz,PetscInt *nnz) 2784 { 2785 Mat_SeqBAIJ *b; 2786 PetscErrorCode ierr; 2787 PetscInt i,mbs,nbs,bs2; 2788 PetscBool flg = PETSC_FALSE,skipallocation = PETSC_FALSE,realalloc = PETSC_FALSE; 2789 2790 PetscFunctionBegin; 2791 if (nz >= 0 || nnz) realalloc = PETSC_TRUE; 2792 if (nz == MAT_SKIP_ALLOCATION) { 2793 skipallocation = PETSC_TRUE; 2794 nz = 0; 2795 } 2796 2797 ierr = MatSetBlockSize(B,PetscAbs(bs));CHKERRQ(ierr); 2798 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 2799 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 2800 ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr); 2801 2802 B->preallocated = PETSC_TRUE; 2803 2804 mbs = B->rmap->n/bs; 2805 nbs = B->cmap->n/bs; 2806 bs2 = bs*bs; 2807 2808 if (mbs*bs!=B->rmap->n || nbs*bs!=B->cmap->n) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Number rows %D, cols %D must be divisible by blocksize %D",B->rmap->N,B->cmap->n,bs); 2809 2810 if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5; 2811 if (nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nz cannot be less than 0: value %D",nz); 2812 if (nnz) { 2813 for (i=0; i<mbs; i++) { 2814 if (nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be less than 0: local row %D value %D",i,nnz[i]); 2815 if (nnz[i] > nbs) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be greater than block row length: local row %D value %D rowlength %D",i,nnz[i],nbs); 2816 } 2817 } 2818 2819 b = (Mat_SeqBAIJ*)B->data; 2820 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)B),NULL,"Optimize options for SEQBAIJ matrix 2 ","Mat");CHKERRQ(ierr); 2821 ierr = PetscOptionsBool("-mat_no_unroll","Do not optimize for block size (slow)",NULL,flg,&flg,NULL);CHKERRQ(ierr); 2822 ierr = PetscOptionsEnd();CHKERRQ(ierr); 2823 2824 if (!flg) { 2825 switch (bs) { 2826 case 1: 2827 B->ops->mult = MatMult_SeqBAIJ_1; 2828 B->ops->multadd = MatMultAdd_SeqBAIJ_1; 2829 break; 2830 case 2: 2831 B->ops->mult = MatMult_SeqBAIJ_2; 2832 B->ops->multadd = MatMultAdd_SeqBAIJ_2; 2833 break; 2834 case 3: 2835 B->ops->mult = MatMult_SeqBAIJ_3; 2836 B->ops->multadd = MatMultAdd_SeqBAIJ_3; 2837 break; 2838 case 4: 2839 B->ops->mult = MatMult_SeqBAIJ_4; 2840 B->ops->multadd = MatMultAdd_SeqBAIJ_4; 2841 break; 2842 case 5: 2843 B->ops->mult = MatMult_SeqBAIJ_5; 2844 B->ops->multadd = MatMultAdd_SeqBAIJ_5; 2845 break; 2846 case 6: 2847 B->ops->mult = MatMult_SeqBAIJ_6; 2848 B->ops->multadd = MatMultAdd_SeqBAIJ_6; 2849 break; 2850 case 7: 2851 B->ops->mult = MatMult_SeqBAIJ_7; 2852 B->ops->multadd = MatMultAdd_SeqBAIJ_7; 2853 break; 2854 case 9: 2855 #if defined(PETSC_HAVE_IMMINTRIN_H) && defined(__AVX2__) && defined(__FMA__) && defined(PETSC_USE_REAL_DOUBLE) && !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_64BIT_INDICES) 2856 B->ops->mult = MatMult_SeqBAIJ_9_AVX2; 2857 B->ops->multadd = MatMultAdd_SeqBAIJ_9_AVX2; 2858 #else 2859 B->ops->mult = MatMult_SeqBAIJ_N; 2860 B->ops->multadd = MatMultAdd_SeqBAIJ_N; 2861 #endif 2862 break; 2863 case 11: 2864 B->ops->mult = MatMult_SeqBAIJ_11; 2865 B->ops->multadd = MatMultAdd_SeqBAIJ_11; 2866 break; 2867 case 15: 2868 B->ops->mult = MatMult_SeqBAIJ_15_ver1; 2869 B->ops->multadd = MatMultAdd_SeqBAIJ_N; 2870 break; 2871 default: 2872 B->ops->mult = MatMult_SeqBAIJ_N; 2873 B->ops->multadd = MatMultAdd_SeqBAIJ_N; 2874 break; 2875 } 2876 } 2877 B->ops->sor = MatSOR_SeqBAIJ; 2878 b->mbs = mbs; 2879 b->nbs = nbs; 2880 if (!skipallocation) { 2881 if (!b->imax) { 2882 ierr = PetscMalloc2(mbs,&b->imax,mbs,&b->ilen);CHKERRQ(ierr); 2883 ierr = PetscLogObjectMemory((PetscObject)B,2*mbs*sizeof(PetscInt));CHKERRQ(ierr); 2884 2885 b->free_imax_ilen = PETSC_TRUE; 2886 } 2887 /* b->ilen will count nonzeros in each block row so far. */ 2888 for (i=0; i<mbs; i++) b->ilen[i] = 0; 2889 if (!nnz) { 2890 if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5; 2891 else if (nz < 0) nz = 1; 2892 for (i=0; i<mbs; i++) b->imax[i] = nz; 2893 nz = nz*mbs; 2894 } else { 2895 nz = 0; 2896 for (i=0; i<mbs; i++) {b->imax[i] = nnz[i]; nz += nnz[i];} 2897 } 2898 2899 /* allocate the matrix space */ 2900 ierr = MatSeqXAIJFreeAIJ(B,&b->a,&b->j,&b->i);CHKERRQ(ierr); 2901 if (B->structure_only) { 2902 ierr = PetscMalloc1(nz,&b->j);CHKERRQ(ierr); 2903 ierr = PetscMalloc1(B->rmap->N+1,&b->i);CHKERRQ(ierr); 2904 ierr = PetscLogObjectMemory((PetscObject)B,(B->rmap->N+1)*sizeof(PetscInt)+nz*sizeof(PetscInt));CHKERRQ(ierr); 2905 } else { 2906 ierr = PetscMalloc3(bs2*nz,&b->a,nz,&b->j,B->rmap->N+1,&b->i);CHKERRQ(ierr); 2907 ierr = PetscLogObjectMemory((PetscObject)B,(B->rmap->N+1)*sizeof(PetscInt)+nz*(bs2*sizeof(PetscScalar)+sizeof(PetscInt)));CHKERRQ(ierr); 2908 ierr = PetscMemzero(b->a,nz*bs2*sizeof(MatScalar));CHKERRQ(ierr); 2909 } 2910 ierr = PetscMemzero(b->j,nz*sizeof(PetscInt));CHKERRQ(ierr); 2911 2912 if (B->structure_only) { 2913 b->singlemalloc = PETSC_FALSE; 2914 b->free_a = PETSC_FALSE; 2915 } else { 2916 b->singlemalloc = PETSC_TRUE; 2917 b->free_a = PETSC_TRUE; 2918 } 2919 b->free_ij = PETSC_TRUE; 2920 2921 b->i[0] = 0; 2922 for (i=1; i<mbs+1; i++) { 2923 b->i[i] = b->i[i-1] + b->imax[i-1]; 2924 } 2925 2926 } else { 2927 b->free_a = PETSC_FALSE; 2928 b->free_ij = PETSC_FALSE; 2929 } 2930 2931 b->bs2 = bs2; 2932 b->mbs = mbs; 2933 b->nz = 0; 2934 b->maxnz = nz; 2935 B->info.nz_unneeded = (PetscReal)b->maxnz*bs2; 2936 B->was_assembled = PETSC_FALSE; 2937 B->assembled = PETSC_FALSE; 2938 if (realalloc) {ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);} 2939 PetscFunctionReturn(0); 2940 } 2941 2942 PetscErrorCode MatSeqBAIJSetPreallocationCSR_SeqBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[]) 2943 { 2944 PetscInt i,m,nz,nz_max=0,*nnz; 2945 PetscScalar *values=0; 2946 PetscBool roworiented = ((Mat_SeqBAIJ*)B->data)->roworiented; 2947 PetscErrorCode ierr; 2948 2949 PetscFunctionBegin; 2950 if (bs < 1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive but it is %D",bs); 2951 ierr = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr); 2952 ierr = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr); 2953 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 2954 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 2955 ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr); 2956 m = B->rmap->n/bs; 2957 2958 if (ii[0] != 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE, "ii[0] must be 0 but it is %D",ii[0]); 2959 ierr = PetscMalloc1(m+1, &nnz);CHKERRQ(ierr); 2960 for (i=0; i<m; i++) { 2961 nz = ii[i+1]- ii[i]; 2962 if (nz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE, "Local row %D has a negative number of columns %D",i,nz); 2963 nz_max = PetscMax(nz_max, nz); 2964 nnz[i] = nz; 2965 } 2966 ierr = MatSeqBAIJSetPreallocation(B,bs,0,nnz);CHKERRQ(ierr); 2967 ierr = PetscFree(nnz);CHKERRQ(ierr); 2968 2969 values = (PetscScalar*)V; 2970 if (!values) { 2971 ierr = PetscCalloc1(bs*bs*(nz_max+1),&values);CHKERRQ(ierr); 2972 } 2973 for (i=0; i<m; i++) { 2974 PetscInt ncols = ii[i+1] - ii[i]; 2975 const PetscInt *icols = jj + ii[i]; 2976 const PetscScalar *svals = values + (V ? (bs*bs*ii[i]) : 0); 2977 if (!roworiented) { 2978 ierr = MatSetValuesBlocked_SeqBAIJ(B,1,&i,ncols,icols,svals,INSERT_VALUES);CHKERRQ(ierr); 2979 } else { 2980 PetscInt j; 2981 for (j=0; j<ncols; j++) { 2982 const PetscScalar *svals = values + (V ? (bs*bs*(ii[i]+j)) : 0); 2983 ierr = MatSetValuesBlocked_SeqBAIJ(B,1,&i,1,&icols[j],svals,INSERT_VALUES);CHKERRQ(ierr); 2984 } 2985 } 2986 } 2987 if (!V) { ierr = PetscFree(values);CHKERRQ(ierr); } 2988 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2989 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2990 ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 2991 PetscFunctionReturn(0); 2992 } 2993 2994 /*MC 2995 MATSEQBAIJ - MATSEQBAIJ = "seqbaij" - A matrix type to be used for sequential block sparse matrices, based on 2996 block sparse compressed row format. 2997 2998 Options Database Keys: 2999 . -mat_type seqbaij - sets the matrix type to "seqbaij" during a call to MatSetFromOptions() 3000 3001 Level: beginner 3002 3003 .seealso: MatCreateSeqBAIJ() 3004 M*/ 3005 3006 PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqBSTRM(Mat, MatType,MatReuse,Mat*); 3007 3008 PETSC_EXTERN PetscErrorCode MatCreate_SeqBAIJ(Mat B) 3009 { 3010 PetscErrorCode ierr; 3011 PetscMPIInt size; 3012 Mat_SeqBAIJ *b; 3013 3014 PetscFunctionBegin; 3015 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRQ(ierr); 3016 if (size > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Comm must be of size 1"); 3017 3018 ierr = PetscNewLog(B,&b);CHKERRQ(ierr); 3019 B->data = (void*)b; 3020 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 3021 3022 b->row = 0; 3023 b->col = 0; 3024 b->icol = 0; 3025 b->reallocs = 0; 3026 b->saved_values = 0; 3027 3028 b->roworiented = PETSC_TRUE; 3029 b->nonew = 0; 3030 b->diag = 0; 3031 B->spptr = 0; 3032 B->info.nz_unneeded = (PetscReal)b->maxnz*b->bs2; 3033 b->keepnonzeropattern = PETSC_FALSE; 3034 3035 ierr = PetscObjectComposeFunction((PetscObject)B,"MatInvertBlockDiagonal_C",MatInvertBlockDiagonal_SeqBAIJ);CHKERRQ(ierr); 3036 ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_SeqBAIJ);CHKERRQ(ierr); 3037 ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_SeqBAIJ);CHKERRQ(ierr); 3038 ierr = PetscObjectComposeFunction((PetscObject)B,"MatSeqBAIJSetColumnIndices_C",MatSeqBAIJSetColumnIndices_SeqBAIJ);CHKERRQ(ierr); 3039 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqbaij_seqaij_C",MatConvert_SeqBAIJ_SeqAIJ);CHKERRQ(ierr); 3040 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqbaij_seqsbaij_C",MatConvert_SeqBAIJ_SeqSBAIJ);CHKERRQ(ierr); 3041 ierr = PetscObjectComposeFunction((PetscObject)B,"MatSeqBAIJSetPreallocation_C",MatSeqBAIJSetPreallocation_SeqBAIJ);CHKERRQ(ierr); 3042 ierr = PetscObjectComposeFunction((PetscObject)B,"MatSeqBAIJSetPreallocationCSR_C",MatSeqBAIJSetPreallocationCSR_SeqBAIJ);CHKERRQ(ierr); 3043 ierr = PetscObjectComposeFunction((PetscObject)B,"MatIsTranspose_C",MatIsTranspose_SeqBAIJ);CHKERRQ(ierr); 3044 #if defined(PETSC_HAVE_HYPRE) 3045 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_baij_hypre_C",MatConvert_AIJ_HYPRE);CHKERRQ(ierr); 3046 #endif 3047 ierr = PetscObjectChangeTypeName((PetscObject)B,MATSEQBAIJ);CHKERRQ(ierr); 3048 PetscFunctionReturn(0); 3049 } 3050 3051 PetscErrorCode MatDuplicateNoCreate_SeqBAIJ(Mat C,Mat A,MatDuplicateOption cpvalues,PetscBool mallocmatspace) 3052 { 3053 Mat_SeqBAIJ *c = (Mat_SeqBAIJ*)C->data,*a = (Mat_SeqBAIJ*)A->data; 3054 PetscErrorCode ierr; 3055 PetscInt i,mbs = a->mbs,nz = a->nz,bs2 = a->bs2; 3056 3057 PetscFunctionBegin; 3058 if (a->i[mbs] != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Corrupt matrix"); 3059 3060 if (cpvalues == MAT_SHARE_NONZERO_PATTERN) { 3061 c->imax = a->imax; 3062 c->ilen = a->ilen; 3063 c->free_imax_ilen = PETSC_FALSE; 3064 } else { 3065 ierr = PetscMalloc2(mbs,&c->imax,mbs,&c->ilen);CHKERRQ(ierr); 3066 ierr = PetscLogObjectMemory((PetscObject)C,2*mbs*sizeof(PetscInt));CHKERRQ(ierr); 3067 for (i=0; i<mbs; i++) { 3068 c->imax[i] = a->imax[i]; 3069 c->ilen[i] = a->ilen[i]; 3070 } 3071 c->free_imax_ilen = PETSC_TRUE; 3072 } 3073 3074 /* allocate the matrix space */ 3075 if (mallocmatspace) { 3076 if (cpvalues == MAT_SHARE_NONZERO_PATTERN) { 3077 ierr = PetscCalloc1(bs2*nz,&c->a);CHKERRQ(ierr); 3078 ierr = PetscLogObjectMemory((PetscObject)C,a->i[mbs]*bs2*sizeof(PetscScalar));CHKERRQ(ierr); 3079 3080 c->i = a->i; 3081 c->j = a->j; 3082 c->singlemalloc = PETSC_FALSE; 3083 c->free_a = PETSC_TRUE; 3084 c->free_ij = PETSC_FALSE; 3085 c->parent = A; 3086 C->preallocated = PETSC_TRUE; 3087 C->assembled = PETSC_TRUE; 3088 3089 ierr = PetscObjectReference((PetscObject)A);CHKERRQ(ierr); 3090 ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 3091 ierr = MatSetOption(C,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 3092 } else { 3093 ierr = PetscMalloc3(bs2*nz,&c->a,nz,&c->j,mbs+1,&c->i);CHKERRQ(ierr); 3094 ierr = PetscLogObjectMemory((PetscObject)C,a->i[mbs]*(bs2*sizeof(PetscScalar)+sizeof(PetscInt))+(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 3095 3096 c->singlemalloc = PETSC_TRUE; 3097 c->free_a = PETSC_TRUE; 3098 c->free_ij = PETSC_TRUE; 3099 3100 ierr = PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 3101 if (mbs > 0) { 3102 ierr = PetscMemcpy(c->j,a->j,nz*sizeof(PetscInt));CHKERRQ(ierr); 3103 if (cpvalues == MAT_COPY_VALUES) { 3104 ierr = PetscMemcpy(c->a,a->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr); 3105 } else { 3106 ierr = PetscMemzero(c->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr); 3107 } 3108 } 3109 C->preallocated = PETSC_TRUE; 3110 C->assembled = PETSC_TRUE; 3111 } 3112 } 3113 3114 c->roworiented = a->roworiented; 3115 c->nonew = a->nonew; 3116 3117 ierr = PetscLayoutReference(A->rmap,&C->rmap);CHKERRQ(ierr); 3118 ierr = PetscLayoutReference(A->cmap,&C->cmap);CHKERRQ(ierr); 3119 3120 c->bs2 = a->bs2; 3121 c->mbs = a->mbs; 3122 c->nbs = a->nbs; 3123 3124 if (a->diag) { 3125 if (cpvalues == MAT_SHARE_NONZERO_PATTERN) { 3126 c->diag = a->diag; 3127 c->free_diag = PETSC_FALSE; 3128 } else { 3129 ierr = PetscMalloc1(mbs+1,&c->diag);CHKERRQ(ierr); 3130 ierr = PetscLogObjectMemory((PetscObject)C,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 3131 for (i=0; i<mbs; i++) c->diag[i] = a->diag[i]; 3132 c->free_diag = PETSC_TRUE; 3133 } 3134 } else c->diag = 0; 3135 3136 c->nz = a->nz; 3137 c->maxnz = a->nz; /* Since we allocate exactly the right amount */ 3138 c->solve_work = NULL; 3139 c->mult_work = NULL; 3140 c->sor_workt = NULL; 3141 c->sor_work = NULL; 3142 3143 c->compressedrow.use = a->compressedrow.use; 3144 c->compressedrow.nrows = a->compressedrow.nrows; 3145 if (a->compressedrow.use) { 3146 i = a->compressedrow.nrows; 3147 ierr = PetscMalloc2(i+1,&c->compressedrow.i,i+1,&c->compressedrow.rindex);CHKERRQ(ierr); 3148 ierr = PetscLogObjectMemory((PetscObject)C,(2*i+1)*sizeof(PetscInt));CHKERRQ(ierr); 3149 ierr = PetscMemcpy(c->compressedrow.i,a->compressedrow.i,(i+1)*sizeof(PetscInt));CHKERRQ(ierr); 3150 ierr = PetscMemcpy(c->compressedrow.rindex,a->compressedrow.rindex,i*sizeof(PetscInt));CHKERRQ(ierr); 3151 } else { 3152 c->compressedrow.use = PETSC_FALSE; 3153 c->compressedrow.i = NULL; 3154 c->compressedrow.rindex = NULL; 3155 } 3156 C->nonzerostate = A->nonzerostate; 3157 3158 ierr = PetscFunctionListDuplicate(((PetscObject)A)->qlist,&((PetscObject)C)->qlist);CHKERRQ(ierr); 3159 PetscFunctionReturn(0); 3160 } 3161 3162 PetscErrorCode MatDuplicate_SeqBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B) 3163 { 3164 PetscErrorCode ierr; 3165 3166 PetscFunctionBegin; 3167 ierr = MatCreate(PetscObjectComm((PetscObject)A),B);CHKERRQ(ierr); 3168 ierr = MatSetSizes(*B,A->rmap->N,A->cmap->n,A->rmap->N,A->cmap->n);CHKERRQ(ierr); 3169 ierr = MatSetType(*B,MATSEQBAIJ);CHKERRQ(ierr); 3170 ierr = MatDuplicateNoCreate_SeqBAIJ(*B,A,cpvalues,PETSC_TRUE);CHKERRQ(ierr); 3171 PetscFunctionReturn(0); 3172 } 3173 3174 PetscErrorCode MatLoad_SeqBAIJ(Mat newmat,PetscViewer viewer) 3175 { 3176 Mat_SeqBAIJ *a; 3177 PetscErrorCode ierr; 3178 PetscInt i,nz,header[4],*rowlengths=0,M,N,bs = newmat->rmap->bs; 3179 PetscInt *mask,mbs,*jj,j,rowcount,nzcount,k,*browlengths,maskcount; 3180 PetscInt kmax,jcount,block,idx,point,nzcountb,extra_rows,rows,cols; 3181 PetscInt *masked,nmask,tmp,bs2,ishift; 3182 PetscMPIInt size; 3183 int fd; 3184 PetscScalar *aa; 3185 MPI_Comm comm; 3186 3187 PetscFunctionBegin; 3188 /* force binary viewer to load .info file if it has not yet done so */ 3189 ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr); 3190 ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr); 3191 ierr = PetscOptionsBegin(comm,NULL,"Options for loading SEQBAIJ matrix","Mat");CHKERRQ(ierr); 3192 ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,NULL);CHKERRQ(ierr); 3193 ierr = PetscOptionsEnd();CHKERRQ(ierr); 3194 if (bs < 0) bs = 1; 3195 bs2 = bs*bs; 3196 3197 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3198 if (size > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"view must have one processor"); 3199 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 3200 ierr = PetscBinaryRead(fd,header,4,PETSC_INT);CHKERRQ(ierr); 3201 if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not Mat object"); 3202 M = header[1]; N = header[2]; nz = header[3]; 3203 3204 if (header[3] < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as SeqBAIJ"); 3205 if (M != N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Can only do square matrices"); 3206 3207 /* 3208 This code adds extra rows to make sure the number of rows is 3209 divisible by the blocksize 3210 */ 3211 mbs = M/bs; 3212 extra_rows = bs - M + bs*(mbs); 3213 if (extra_rows == bs) extra_rows = 0; 3214 else mbs++; 3215 if (extra_rows) { 3216 ierr = PetscInfo(viewer,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr); 3217 } 3218 3219 /* Set global sizes if not already set */ 3220 if (newmat->rmap->n < 0 && newmat->rmap->N < 0 && newmat->cmap->n < 0 && newmat->cmap->N < 0) { 3221 ierr = MatSetSizes(newmat,PETSC_DECIDE,PETSC_DECIDE,M+extra_rows,N+extra_rows);CHKERRQ(ierr); 3222 } else { /* Check if the matrix global sizes are correct */ 3223 ierr = MatGetSize(newmat,&rows,&cols);CHKERRQ(ierr); 3224 if (rows < 0 && cols < 0) { /* user might provide local size instead of global size */ 3225 ierr = MatGetLocalSize(newmat,&rows,&cols);CHKERRQ(ierr); 3226 } 3227 if (M != rows || N != cols) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix in file of different length (%d, %d) than the input matrix (%d, %d)",M,N,rows,cols); 3228 } 3229 3230 /* read in row lengths */ 3231 ierr = PetscMalloc1(M+extra_rows,&rowlengths);CHKERRQ(ierr); 3232 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 3233 for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1; 3234 3235 /* read in column indices */ 3236 ierr = PetscMalloc1(nz+extra_rows,&jj);CHKERRQ(ierr); 3237 ierr = PetscBinaryRead(fd,jj,nz,PETSC_INT);CHKERRQ(ierr); 3238 for (i=0; i<extra_rows; i++) jj[nz+i] = M+i; 3239 3240 /* loop over row lengths determining block row lengths */ 3241 ierr = PetscCalloc1(mbs,&browlengths);CHKERRQ(ierr); 3242 ierr = PetscMalloc2(mbs,&mask,mbs,&masked);CHKERRQ(ierr); 3243 ierr = PetscMemzero(mask,mbs*sizeof(PetscInt));CHKERRQ(ierr); 3244 rowcount = 0; 3245 nzcount = 0; 3246 for (i=0; i<mbs; i++) { 3247 nmask = 0; 3248 for (j=0; j<bs; j++) { 3249 kmax = rowlengths[rowcount]; 3250 for (k=0; k<kmax; k++) { 3251 tmp = jj[nzcount++]/bs; 3252 if (!mask[tmp]) {masked[nmask++] = tmp; mask[tmp] = 1;} 3253 } 3254 rowcount++; 3255 } 3256 browlengths[i] += nmask; 3257 /* zero out the mask elements we set */ 3258 for (j=0; j<nmask; j++) mask[masked[j]] = 0; 3259 } 3260 3261 /* Do preallocation */ 3262 ierr = MatSeqBAIJSetPreallocation(newmat,bs,0,browlengths);CHKERRQ(ierr); 3263 a = (Mat_SeqBAIJ*)newmat->data; 3264 3265 /* set matrix "i" values */ 3266 a->i[0] = 0; 3267 for (i=1; i<= mbs; i++) { 3268 a->i[i] = a->i[i-1] + browlengths[i-1]; 3269 a->ilen[i-1] = browlengths[i-1]; 3270 } 3271 a->nz = 0; 3272 for (i=0; i<mbs; i++) a->nz += browlengths[i]; 3273 3274 /* read in nonzero values */ 3275 ierr = PetscMalloc1(nz+extra_rows,&aa);CHKERRQ(ierr); 3276 ierr = PetscBinaryRead(fd,aa,nz,PETSC_SCALAR);CHKERRQ(ierr); 3277 for (i=0; i<extra_rows; i++) aa[nz+i] = 1.0; 3278 3279 /* set "a" and "j" values into matrix */ 3280 nzcount = 0; jcount = 0; 3281 for (i=0; i<mbs; i++) { 3282 nzcountb = nzcount; 3283 nmask = 0; 3284 for (j=0; j<bs; j++) { 3285 kmax = rowlengths[i*bs+j]; 3286 for (k=0; k<kmax; k++) { 3287 tmp = jj[nzcount++]/bs; 3288 if (!mask[tmp]) { masked[nmask++] = tmp; mask[tmp] = 1;} 3289 } 3290 } 3291 /* sort the masked values */ 3292 ierr = PetscSortInt(nmask,masked);CHKERRQ(ierr); 3293 3294 /* set "j" values into matrix */ 3295 maskcount = 1; 3296 for (j=0; j<nmask; j++) { 3297 a->j[jcount++] = masked[j]; 3298 mask[masked[j]] = maskcount++; 3299 } 3300 /* set "a" values into matrix */ 3301 ishift = bs2*a->i[i]; 3302 for (j=0; j<bs; j++) { 3303 kmax = rowlengths[i*bs+j]; 3304 for (k=0; k<kmax; k++) { 3305 tmp = jj[nzcountb]/bs; 3306 block = mask[tmp] - 1; 3307 point = jj[nzcountb] - bs*tmp; 3308 idx = ishift + bs2*block + j + bs*point; 3309 a->a[idx] = (MatScalar)aa[nzcountb++]; 3310 } 3311 } 3312 /* zero out the mask elements we set */ 3313 for (j=0; j<nmask; j++) mask[masked[j]] = 0; 3314 } 3315 if (jcount != a->nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Bad binary matrix"); 3316 3317 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 3318 ierr = PetscFree(browlengths);CHKERRQ(ierr); 3319 ierr = PetscFree(aa);CHKERRQ(ierr); 3320 ierr = PetscFree(jj);CHKERRQ(ierr); 3321 ierr = PetscFree2(mask,masked);CHKERRQ(ierr); 3322 3323 ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3324 ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3325 PetscFunctionReturn(0); 3326 } 3327 3328 /*@C 3329 MatCreateSeqBAIJ - Creates a sparse matrix in block AIJ (block 3330 compressed row) format. For good matrix assembly performance the 3331 user should preallocate the matrix storage by setting the parameter nz 3332 (or the array nnz). By setting these parameters accurately, performance 3333 during matrix assembly can be increased by more than a factor of 50. 3334 3335 Collective on MPI_Comm 3336 3337 Input Parameters: 3338 + comm - MPI communicator, set to PETSC_COMM_SELF 3339 . bs - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row 3340 blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs() 3341 . m - number of rows 3342 . n - number of columns 3343 . nz - number of nonzero blocks per block row (same for all rows) 3344 - nnz - array containing the number of nonzero blocks in the various block rows 3345 (possibly different for each block row) or NULL 3346 3347 Output Parameter: 3348 . A - the matrix 3349 3350 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 3351 MatXXXXSetPreallocation() paradgm instead of this routine directly. 3352 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 3353 3354 Options Database Keys: 3355 . -mat_no_unroll - uses code that does not unroll the loops in the 3356 block calculations (much slower) 3357 . -mat_block_size - size of the blocks to use 3358 3359 Level: intermediate 3360 3361 Notes: 3362 The number of rows and columns must be divisible by blocksize. 3363 3364 If the nnz parameter is given then the nz parameter is ignored 3365 3366 A nonzero block is any block that as 1 or more nonzeros in it 3367 3368 The block AIJ format is fully compatible with standard Fortran 77 3369 storage. That is, the stored row and column indices can begin at 3370 either one (as in Fortran) or zero. See the users' manual for details. 3371 3372 Specify the preallocated storage with either nz or nnz (not both). 3373 Set nz=PETSC_DEFAULT and nnz=NULL for PETSc to control dynamic memory 3374 allocation. See Users-Manual: ch_mat for details. 3375 matrices. 3376 3377 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateBAIJ() 3378 @*/ 3379 PetscErrorCode MatCreateSeqBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A) 3380 { 3381 PetscErrorCode ierr; 3382 3383 PetscFunctionBegin; 3384 ierr = MatCreate(comm,A);CHKERRQ(ierr); 3385 ierr = MatSetSizes(*A,m,n,m,n);CHKERRQ(ierr); 3386 ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr); 3387 ierr = MatSeqBAIJSetPreallocation(*A,bs,nz,(PetscInt*)nnz);CHKERRQ(ierr); 3388 PetscFunctionReturn(0); 3389 } 3390 3391 /*@C 3392 MatSeqBAIJSetPreallocation - Sets the block size and expected nonzeros 3393 per row in the matrix. For good matrix assembly performance the 3394 user should preallocate the matrix storage by setting the parameter nz 3395 (or the array nnz). By setting these parameters accurately, performance 3396 during matrix assembly can be increased by more than a factor of 50. 3397 3398 Collective on MPI_Comm 3399 3400 Input Parameters: 3401 + B - the matrix 3402 . bs - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row 3403 blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs() 3404 . nz - number of block nonzeros per block row (same for all rows) 3405 - nnz - array containing the number of block nonzeros in the various block rows 3406 (possibly different for each block row) or NULL 3407 3408 Options Database Keys: 3409 . -mat_no_unroll - uses code that does not unroll the loops in the 3410 block calculations (much slower) 3411 . -mat_block_size - size of the blocks to use 3412 3413 Level: intermediate 3414 3415 Notes: 3416 If the nnz parameter is given then the nz parameter is ignored 3417 3418 You can call MatGetInfo() to get information on how effective the preallocation was; 3419 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 3420 You can also run with the option -info and look for messages with the string 3421 malloc in them to see if additional memory allocation was needed. 3422 3423 The block AIJ format is fully compatible with standard Fortran 77 3424 storage. That is, the stored row and column indices can begin at 3425 either one (as in Fortran) or zero. See the users' manual for details. 3426 3427 Specify the preallocated storage with either nz or nnz (not both). 3428 Set nz=PETSC_DEFAULT and nnz=NULL for PETSc to control dynamic memory 3429 allocation. See Users-Manual: ch_mat for details. 3430 3431 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateBAIJ(), MatGetInfo() 3432 @*/ 3433 PetscErrorCode MatSeqBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt nz,const PetscInt nnz[]) 3434 { 3435 PetscErrorCode ierr; 3436 3437 PetscFunctionBegin; 3438 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 3439 PetscValidType(B,1); 3440 PetscValidLogicalCollectiveInt(B,bs,2); 3441 ierr = PetscTryMethod(B,"MatSeqBAIJSetPreallocation_C",(Mat,PetscInt,PetscInt,const PetscInt[]),(B,bs,nz,nnz));CHKERRQ(ierr); 3442 PetscFunctionReturn(0); 3443 } 3444 3445 /*@C 3446 MatSeqBAIJSetPreallocationCSR - Allocates memory for a sparse sequential matrix in AIJ format 3447 (the default sequential PETSc format). 3448 3449 Collective on MPI_Comm 3450 3451 Input Parameters: 3452 + B - the matrix 3453 . i - the indices into j for the start of each local row (starts with zero) 3454 . j - the column indices for each local row (starts with zero) these must be sorted for each row 3455 - v - optional values in the matrix 3456 3457 Level: developer 3458 3459 Notes: 3460 The order of the entries in values is specified by the MatOption MAT_ROW_ORIENTED. For example, C programs 3461 may want to use the default MAT_ROW_ORIENTED=PETSC_TRUE and use an array v[nnz][bs][bs] where the second index is 3462 over rows within a block and the last index is over columns within a block row. Fortran programs will likely set 3463 MAT_ROW_ORIENTED=PETSC_FALSE and use a Fortran array v(bs,bs,nnz) in which the first index is over rows within a 3464 block column and the second index is over columns within a block. 3465 3466 .keywords: matrix, aij, compressed row, sparse 3467 3468 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatSeqBAIJSetPreallocation(), MATSEQBAIJ 3469 @*/ 3470 PetscErrorCode MatSeqBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 3471 { 3472 PetscErrorCode ierr; 3473 3474 PetscFunctionBegin; 3475 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 3476 PetscValidType(B,1); 3477 PetscValidLogicalCollectiveInt(B,bs,2); 3478 ierr = PetscTryMethod(B,"MatSeqBAIJSetPreallocationCSR_C",(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,bs,i,j,v));CHKERRQ(ierr); 3479 PetscFunctionReturn(0); 3480 } 3481 3482 3483 /*@ 3484 MatCreateSeqBAIJWithArrays - Creates an sequential BAIJ matrix using matrix elements provided by the user. 3485 3486 Collective on MPI_Comm 3487 3488 Input Parameters: 3489 + comm - must be an MPI communicator of size 1 3490 . bs - size of block 3491 . m - number of rows 3492 . n - number of columns 3493 . i - row indices 3494 . j - column indices 3495 - a - matrix values 3496 3497 Output Parameter: 3498 . mat - the matrix 3499 3500 Level: advanced 3501 3502 Notes: 3503 The i, j, and a arrays are not copied by this routine, the user must free these arrays 3504 once the matrix is destroyed 3505 3506 You cannot set new nonzero locations into this matrix, that will generate an error. 3507 3508 The i and j indices are 0 based 3509 3510 When block size is greater than 1 the matrix values must be stored using the BAIJ storage format (see the BAIJ code to determine this). 3511 3512 The order of the entries in values is the same as the block compressed sparse row storage format; that is, it is 3513 the same as a three dimensional array in Fortran values(bs,bs,nnz) that contains the first column of the first 3514 block, followed by the second column of the first block etc etc. That is, the blocks are contiguous in memory 3515 with column-major ordering within blocks. 3516 3517 .seealso: MatCreate(), MatCreateBAIJ(), MatCreateSeqBAIJ() 3518 3519 @*/ 3520 PetscErrorCode MatCreateSeqBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt i[],PetscInt j[],PetscScalar a[],Mat *mat) 3521 { 3522 PetscErrorCode ierr; 3523 PetscInt ii; 3524 Mat_SeqBAIJ *baij; 3525 3526 PetscFunctionBegin; 3527 if (bs != 1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"block size %D > 1 is not supported yet",bs); 3528 if (m > 0 && i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 3529 3530 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 3531 ierr = MatSetSizes(*mat,m,n,m,n);CHKERRQ(ierr); 3532 ierr = MatSetType(*mat,MATSEQBAIJ);CHKERRQ(ierr); 3533 ierr = MatSeqBAIJSetPreallocation(*mat,bs,MAT_SKIP_ALLOCATION,0);CHKERRQ(ierr); 3534 baij = (Mat_SeqBAIJ*)(*mat)->data; 3535 ierr = PetscMalloc2(m,&baij->imax,m,&baij->ilen);CHKERRQ(ierr); 3536 ierr = PetscLogObjectMemory((PetscObject)*mat,2*m*sizeof(PetscInt));CHKERRQ(ierr); 3537 3538 baij->i = i; 3539 baij->j = j; 3540 baij->a = a; 3541 3542 baij->singlemalloc = PETSC_FALSE; 3543 baij->nonew = -1; /*this indicates that inserting a new value in the matrix that generates a new nonzero is an error*/ 3544 baij->free_a = PETSC_FALSE; 3545 baij->free_ij = PETSC_FALSE; 3546 3547 for (ii=0; ii<m; ii++) { 3548 baij->ilen[ii] = baij->imax[ii] = i[ii+1] - i[ii]; 3549 #if defined(PETSC_USE_DEBUG) 3550 if (i[ii+1] - i[ii] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row length in i (row indices) row = %d length = %d",ii,i[ii+1] - i[ii]); 3551 #endif 3552 } 3553 #if defined(PETSC_USE_DEBUG) 3554 for (ii=0; ii<baij->i[m]; ii++) { 3555 if (j[ii] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column index at location = %d index = %d",ii,j[ii]); 3556 if (j[ii] > n - 1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column index to large at location = %d index = %d",ii,j[ii]); 3557 } 3558 #endif 3559 3560 ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3561 ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3562 PetscFunctionReturn(0); 3563 } 3564 3565 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_SeqBAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat) 3566 { 3567 PetscErrorCode ierr; 3568 PetscMPIInt size; 3569 3570 PetscFunctionBegin; 3571 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3572 if (size == 1 && scall == MAT_REUSE_MATRIX) { 3573 ierr = MatCopy(inmat,*outmat,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 3574 } else { 3575 ierr = MatCreateMPIMatConcatenateSeqMat_MPIBAIJ(comm,inmat,n,scall,outmat);CHKERRQ(ierr); 3576 } 3577 PetscFunctionReturn(0); 3578 } 3579