1 2 /* 3 Defines the basic matrix operations for the BAIJ (compressed row) 4 matrix storage format. 5 */ 6 #include <../src/mat/impls/baij/seq/baij.h> /*I "petscmat.h" I*/ 7 #include <petscblaslapack.h> 8 #include <petsc/private/kernels/blockinvert.h> 9 #include <petsc/private/kernels/blockmatmult.h> 10 11 #if defined(PETSC_HAVE_HYPRE) 12 PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat,MatType,MatReuse,Mat*); 13 #endif 14 15 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE) 16 PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqBAIJMKL(Mat,const MatType,MatReuse,Mat*); 17 #endif 18 19 PetscErrorCode MatInvertBlockDiagonal_SeqBAIJ(Mat A,const PetscScalar **values) 20 { 21 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*) A->data; 22 PetscErrorCode ierr; 23 PetscInt *diag_offset,i,bs = A->rmap->bs,mbs = a->mbs,ipvt[5],bs2 = bs*bs,*v_pivots; 24 MatScalar *v = a->a,*odiag,*diag,work[25],*v_work; 25 PetscReal shift = 0.0; 26 PetscBool allowzeropivot,zeropivotdetected=PETSC_FALSE; 27 28 PetscFunctionBegin; 29 allowzeropivot = PetscNot(A->erroriffailure); 30 31 if (a->idiagvalid) { 32 if (values) *values = a->idiag; 33 PetscFunctionReturn(0); 34 } 35 ierr = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr); 36 diag_offset = a->diag; 37 if (!a->idiag) { 38 ierr = PetscMalloc1(bs2*mbs,&a->idiag);CHKERRQ(ierr); 39 ierr = PetscLogObjectMemory((PetscObject)A,bs2*mbs*sizeof(PetscScalar));CHKERRQ(ierr); 40 } 41 diag = a->idiag; 42 if (values) *values = a->idiag; 43 /* factor and invert each block */ 44 switch (bs) { 45 case 1: 46 for (i=0; i<mbs; i++) { 47 odiag = v + 1*diag_offset[i]; 48 diag[0] = odiag[0]; 49 50 if (PetscAbsScalar(diag[0] + shift) < PETSC_MACHINE_EPSILON) { 51 if (allowzeropivot) { 52 A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 53 A->factorerror_zeropivot_value = PetscAbsScalar(diag[0]); 54 A->factorerror_zeropivot_row = i; 55 ierr = PetscInfo1(A,"Zero pivot, row %D\n",i);CHKERRQ(ierr); 56 } else SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot, row %D pivot value %g tolerance %g",i,(double)PetscAbsScalar(diag[0]),(double)PETSC_MACHINE_EPSILON); 57 } 58 59 diag[0] = (PetscScalar)1.0 / (diag[0] + shift); 60 diag += 1; 61 } 62 break; 63 case 2: 64 for (i=0; i<mbs; i++) { 65 odiag = v + 4*diag_offset[i]; 66 diag[0] = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3]; 67 ierr = PetscKernel_A_gets_inverse_A_2(diag,shift,allowzeropivot,&zeropivotdetected);CHKERRQ(ierr); 68 if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 69 diag += 4; 70 } 71 break; 72 case 3: 73 for (i=0; i<mbs; i++) { 74 odiag = v + 9*diag_offset[i]; 75 diag[0] = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3]; 76 diag[4] = odiag[4]; diag[5] = odiag[5]; diag[6] = odiag[6]; diag[7] = odiag[7]; 77 diag[8] = odiag[8]; 78 ierr = PetscKernel_A_gets_inverse_A_3(diag,shift,allowzeropivot,&zeropivotdetected);CHKERRQ(ierr); 79 if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 80 diag += 9; 81 } 82 break; 83 case 4: 84 for (i=0; i<mbs; i++) { 85 odiag = v + 16*diag_offset[i]; 86 ierr = PetscMemcpy(diag,odiag,16*sizeof(PetscScalar));CHKERRQ(ierr); 87 ierr = PetscKernel_A_gets_inverse_A_4(diag,shift,allowzeropivot,&zeropivotdetected);CHKERRQ(ierr); 88 if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 89 diag += 16; 90 } 91 break; 92 case 5: 93 for (i=0; i<mbs; i++) { 94 odiag = v + 25*diag_offset[i]; 95 ierr = PetscMemcpy(diag,odiag,25*sizeof(PetscScalar));CHKERRQ(ierr); 96 ierr = PetscKernel_A_gets_inverse_A_5(diag,ipvt,work,shift,allowzeropivot,&zeropivotdetected);CHKERRQ(ierr); 97 if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 98 diag += 25; 99 } 100 break; 101 case 6: 102 for (i=0; i<mbs; i++) { 103 odiag = v + 36*diag_offset[i]; 104 ierr = PetscMemcpy(diag,odiag,36*sizeof(PetscScalar));CHKERRQ(ierr); 105 ierr = PetscKernel_A_gets_inverse_A_6(diag,shift,allowzeropivot,&zeropivotdetected);CHKERRQ(ierr); 106 if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 107 diag += 36; 108 } 109 break; 110 case 7: 111 for (i=0; i<mbs; i++) { 112 odiag = v + 49*diag_offset[i]; 113 ierr = PetscMemcpy(diag,odiag,49*sizeof(PetscScalar));CHKERRQ(ierr); 114 ierr = PetscKernel_A_gets_inverse_A_7(diag,shift,allowzeropivot,&zeropivotdetected);CHKERRQ(ierr); 115 if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 116 diag += 49; 117 } 118 break; 119 default: 120 ierr = PetscMalloc2(bs,&v_work,bs,&v_pivots);CHKERRQ(ierr); 121 for (i=0; i<mbs; i++) { 122 odiag = v + bs2*diag_offset[i]; 123 ierr = PetscMemcpy(diag,odiag,bs2*sizeof(PetscScalar));CHKERRQ(ierr); 124 ierr = PetscKernel_A_gets_inverse_A(bs,diag,v_pivots,v_work,allowzeropivot,&zeropivotdetected);CHKERRQ(ierr); 125 if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 126 diag += bs2; 127 } 128 ierr = PetscFree2(v_work,v_pivots);CHKERRQ(ierr); 129 } 130 a->idiagvalid = PETSC_TRUE; 131 PetscFunctionReturn(0); 132 } 133 134 PetscErrorCode MatSOR_SeqBAIJ(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 135 { 136 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 137 PetscScalar *x,*work,*w,*workt,*t; 138 const MatScalar *v,*aa = a->a, *idiag; 139 const PetscScalar *b,*xb; 140 PetscScalar s[7], xw[7]={0}; /* avoid some compilers thinking xw is uninitialized */ 141 PetscErrorCode ierr; 142 PetscInt m = a->mbs,i,i2,nz,bs = A->rmap->bs,bs2 = bs*bs,k,j,idx,it; 143 const PetscInt *diag,*ai = a->i,*aj = a->j,*vi; 144 145 PetscFunctionBegin; 146 if (fshift == -1.0) fshift = 0.0; /* negative fshift indicates do not error on zero diagonal; this code never errors on zero diagonal */ 147 its = its*lits; 148 if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat"); 149 if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 150 if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 151 if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 152 if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts"); 153 154 if (!a->idiagvalid) {ierr = MatInvertBlockDiagonal(A,NULL);CHKERRQ(ierr);} 155 156 if (!m) PetscFunctionReturn(0); 157 diag = a->diag; 158 idiag = a->idiag; 159 k = PetscMax(A->rmap->n,A->cmap->n); 160 if (!a->mult_work) { 161 ierr = PetscMalloc1(k+1,&a->mult_work);CHKERRQ(ierr); 162 } 163 if (!a->sor_workt) { 164 ierr = PetscMalloc1(k,&a->sor_workt);CHKERRQ(ierr); 165 } 166 if (!a->sor_work) { 167 ierr = PetscMalloc1(bs,&a->sor_work);CHKERRQ(ierr); 168 } 169 work = a->mult_work; 170 t = a->sor_workt; 171 w = a->sor_work; 172 173 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 174 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 175 176 if (flag & SOR_ZERO_INITIAL_GUESS) { 177 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) { 178 switch (bs) { 179 case 1: 180 PetscKernel_v_gets_A_times_w_1(x,idiag,b); 181 t[0] = b[0]; 182 i2 = 1; 183 idiag += 1; 184 for (i=1; i<m; i++) { 185 v = aa + ai[i]; 186 vi = aj + ai[i]; 187 nz = diag[i] - ai[i]; 188 s[0] = b[i2]; 189 for (j=0; j<nz; j++) { 190 xw[0] = x[vi[j]]; 191 PetscKernel_v_gets_v_minus_A_times_w_1(s,(v+j),xw); 192 } 193 t[i2] = s[0]; 194 PetscKernel_v_gets_A_times_w_1(xw,idiag,s); 195 x[i2] = xw[0]; 196 idiag += 1; 197 i2 += 1; 198 } 199 break; 200 case 2: 201 PetscKernel_v_gets_A_times_w_2(x,idiag,b); 202 t[0] = b[0]; t[1] = b[1]; 203 i2 = 2; 204 idiag += 4; 205 for (i=1; i<m; i++) { 206 v = aa + 4*ai[i]; 207 vi = aj + ai[i]; 208 nz = diag[i] - ai[i]; 209 s[0] = b[i2]; s[1] = b[i2+1]; 210 for (j=0; j<nz; j++) { 211 idx = 2*vi[j]; 212 it = 4*j; 213 xw[0] = x[idx]; xw[1] = x[1+idx]; 214 PetscKernel_v_gets_v_minus_A_times_w_2(s,(v+it),xw); 215 } 216 t[i2] = s[0]; t[i2+1] = s[1]; 217 PetscKernel_v_gets_A_times_w_2(xw,idiag,s); 218 x[i2] = xw[0]; x[i2+1] = xw[1]; 219 idiag += 4; 220 i2 += 2; 221 } 222 break; 223 case 3: 224 PetscKernel_v_gets_A_times_w_3(x,idiag,b); 225 t[0] = b[0]; t[1] = b[1]; t[2] = b[2]; 226 i2 = 3; 227 idiag += 9; 228 for (i=1; i<m; i++) { 229 v = aa + 9*ai[i]; 230 vi = aj + ai[i]; 231 nz = diag[i] - ai[i]; 232 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; 233 while (nz--) { 234 idx = 3*(*vi++); 235 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 236 PetscKernel_v_gets_v_minus_A_times_w_3(s,v,xw); 237 v += 9; 238 } 239 t[i2] = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2]; 240 PetscKernel_v_gets_A_times_w_3(xw,idiag,s); 241 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; 242 idiag += 9; 243 i2 += 3; 244 } 245 break; 246 case 4: 247 PetscKernel_v_gets_A_times_w_4(x,idiag,b); 248 t[0] = b[0]; t[1] = b[1]; t[2] = b[2]; t[3] = b[3]; 249 i2 = 4; 250 idiag += 16; 251 for (i=1; i<m; i++) { 252 v = aa + 16*ai[i]; 253 vi = aj + ai[i]; 254 nz = diag[i] - ai[i]; 255 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; 256 while (nz--) { 257 idx = 4*(*vi++); 258 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; 259 PetscKernel_v_gets_v_minus_A_times_w_4(s,v,xw); 260 v += 16; 261 } 262 t[i2] = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2]; t[i2 + 3] = s[3]; 263 PetscKernel_v_gets_A_times_w_4(xw,idiag,s); 264 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; 265 idiag += 16; 266 i2 += 4; 267 } 268 break; 269 case 5: 270 PetscKernel_v_gets_A_times_w_5(x,idiag,b); 271 t[0] = b[0]; t[1] = b[1]; t[2] = b[2]; t[3] = b[3]; t[4] = b[4]; 272 i2 = 5; 273 idiag += 25; 274 for (i=1; i<m; i++) { 275 v = aa + 25*ai[i]; 276 vi = aj + ai[i]; 277 nz = diag[i] - ai[i]; 278 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4]; 279 while (nz--) { 280 idx = 5*(*vi++); 281 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; xw[4] = x[4+idx]; 282 PetscKernel_v_gets_v_minus_A_times_w_5(s,v,xw); 283 v += 25; 284 } 285 t[i2] = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2]; t[i2+3] = s[3]; t[i2+4] = s[4]; 286 PetscKernel_v_gets_A_times_w_5(xw,idiag,s); 287 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4]; 288 idiag += 25; 289 i2 += 5; 290 } 291 break; 292 case 6: 293 PetscKernel_v_gets_A_times_w_6(x,idiag,b); 294 t[0] = b[0]; t[1] = b[1]; t[2] = b[2]; t[3] = b[3]; t[4] = b[4]; t[5] = b[5]; 295 i2 = 6; 296 idiag += 36; 297 for (i=1; i<m; i++) { 298 v = aa + 36*ai[i]; 299 vi = aj + ai[i]; 300 nz = diag[i] - ai[i]; 301 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5]; 302 while (nz--) { 303 idx = 6*(*vi++); 304 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 305 xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; 306 PetscKernel_v_gets_v_minus_A_times_w_6(s,v,xw); 307 v += 36; 308 } 309 t[i2] = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2]; 310 t[i2+3] = s[3]; t[i2+4] = s[4]; t[i2+5] = s[5]; 311 PetscKernel_v_gets_A_times_w_6(xw,idiag,s); 312 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5]; 313 idiag += 36; 314 i2 += 6; 315 } 316 break; 317 case 7: 318 PetscKernel_v_gets_A_times_w_7(x,idiag,b); 319 t[0] = b[0]; t[1] = b[1]; t[2] = b[2]; 320 t[3] = b[3]; t[4] = b[4]; t[5] = b[5]; t[6] = b[6]; 321 i2 = 7; 322 idiag += 49; 323 for (i=1; i<m; i++) { 324 v = aa + 49*ai[i]; 325 vi = aj + ai[i]; 326 nz = diag[i] - ai[i]; 327 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; 328 s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5]; s[6] = b[i2+6]; 329 while (nz--) { 330 idx = 7*(*vi++); 331 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 332 xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; xw[6] = x[6+idx]; 333 PetscKernel_v_gets_v_minus_A_times_w_7(s,v,xw); 334 v += 49; 335 } 336 t[i2] = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2]; 337 t[i2+3] = s[3]; t[i2+4] = s[4]; t[i2+5] = s[5]; t[i2+6] = s[6]; 338 PetscKernel_v_gets_A_times_w_7(xw,idiag,s); 339 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; 340 x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5]; x[i2+6] = xw[6]; 341 idiag += 49; 342 i2 += 7; 343 } 344 break; 345 default: 346 PetscKernel_w_gets_Ar_times_v(bs,bs,b,idiag,x); 347 ierr = PetscMemcpy(t,b,bs*sizeof(PetscScalar));CHKERRQ(ierr); 348 i2 = bs; 349 idiag += bs2; 350 for (i=1; i<m; i++) { 351 v = aa + bs2*ai[i]; 352 vi = aj + ai[i]; 353 nz = diag[i] - ai[i]; 354 355 ierr = PetscMemcpy(w,b+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr); 356 /* copy all rows of x that are needed into contiguous space */ 357 workt = work; 358 for (j=0; j<nz; j++) { 359 ierr = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr); 360 workt += bs; 361 } 362 PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work); 363 ierr = PetscMemcpy(t+i2,w,bs*sizeof(PetscScalar));CHKERRQ(ierr); 364 PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2); 365 366 idiag += bs2; 367 i2 += bs; 368 } 369 break; 370 } 371 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 372 ierr = PetscLogFlops(1.0*bs2*a->nz);CHKERRQ(ierr); 373 xb = t; 374 } 375 else xb = b; 376 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 377 idiag = a->idiag+bs2*(a->mbs-1); 378 i2 = bs * (m-1); 379 switch (bs) { 380 case 1: 381 s[0] = xb[i2]; 382 PetscKernel_v_gets_A_times_w_1(xw,idiag,s); 383 x[i2] = xw[0]; 384 i2 -= 1; 385 for (i=m-2; i>=0; i--) { 386 v = aa + (diag[i]+1); 387 vi = aj + diag[i] + 1; 388 nz = ai[i+1] - diag[i] - 1; 389 s[0] = xb[i2]; 390 for (j=0; j<nz; j++) { 391 xw[0] = x[vi[j]]; 392 PetscKernel_v_gets_v_minus_A_times_w_1(s,(v+j),xw); 393 } 394 PetscKernel_v_gets_A_times_w_1(xw,idiag,s); 395 x[i2] = xw[0]; 396 idiag -= 1; 397 i2 -= 1; 398 } 399 break; 400 case 2: 401 s[0] = xb[i2]; s[1] = xb[i2+1]; 402 PetscKernel_v_gets_A_times_w_2(xw,idiag,s); 403 x[i2] = xw[0]; x[i2+1] = xw[1]; 404 i2 -= 2; 405 idiag -= 4; 406 for (i=m-2; i>=0; i--) { 407 v = aa + 4*(diag[i] + 1); 408 vi = aj + diag[i] + 1; 409 nz = ai[i+1] - diag[i] - 1; 410 s[0] = xb[i2]; s[1] = xb[i2+1]; 411 for (j=0; j<nz; j++) { 412 idx = 2*vi[j]; 413 it = 4*j; 414 xw[0] = x[idx]; xw[1] = x[1+idx]; 415 PetscKernel_v_gets_v_minus_A_times_w_2(s,(v+it),xw); 416 } 417 PetscKernel_v_gets_A_times_w_2(xw,idiag,s); 418 x[i2] = xw[0]; x[i2+1] = xw[1]; 419 idiag -= 4; 420 i2 -= 2; 421 } 422 break; 423 case 3: 424 s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; 425 PetscKernel_v_gets_A_times_w_3(xw,idiag,s); 426 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; 427 i2 -= 3; 428 idiag -= 9; 429 for (i=m-2; i>=0; i--) { 430 v = aa + 9*(diag[i]+1); 431 vi = aj + diag[i] + 1; 432 nz = ai[i+1] - diag[i] - 1; 433 s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; 434 while (nz--) { 435 idx = 3*(*vi++); 436 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 437 PetscKernel_v_gets_v_minus_A_times_w_3(s,v,xw); 438 v += 9; 439 } 440 PetscKernel_v_gets_A_times_w_3(xw,idiag,s); 441 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; 442 idiag -= 9; 443 i2 -= 3; 444 } 445 break; 446 case 4: 447 s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; 448 PetscKernel_v_gets_A_times_w_4(xw,idiag,s); 449 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; 450 i2 -= 4; 451 idiag -= 16; 452 for (i=m-2; i>=0; i--) { 453 v = aa + 16*(diag[i]+1); 454 vi = aj + diag[i] + 1; 455 nz = ai[i+1] - diag[i] - 1; 456 s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; 457 while (nz--) { 458 idx = 4*(*vi++); 459 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; 460 PetscKernel_v_gets_v_minus_A_times_w_4(s,v,xw); 461 v += 16; 462 } 463 PetscKernel_v_gets_A_times_w_4(xw,idiag,s); 464 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; 465 idiag -= 16; 466 i2 -= 4; 467 } 468 break; 469 case 5: 470 s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; s[4] = xb[i2+4]; 471 PetscKernel_v_gets_A_times_w_5(xw,idiag,s); 472 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4]; 473 i2 -= 5; 474 idiag -= 25; 475 for (i=m-2; i>=0; i--) { 476 v = aa + 25*(diag[i]+1); 477 vi = aj + diag[i] + 1; 478 nz = ai[i+1] - diag[i] - 1; 479 s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; s[4] = xb[i2+4]; 480 while (nz--) { 481 idx = 5*(*vi++); 482 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; xw[4] = x[4+idx]; 483 PetscKernel_v_gets_v_minus_A_times_w_5(s,v,xw); 484 v += 25; 485 } 486 PetscKernel_v_gets_A_times_w_5(xw,idiag,s); 487 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4]; 488 idiag -= 25; 489 i2 -= 5; 490 } 491 break; 492 case 6: 493 s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; s[4] = xb[i2+4]; s[5] = xb[i2+5]; 494 PetscKernel_v_gets_A_times_w_6(xw,idiag,s); 495 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5]; 496 i2 -= 6; 497 idiag -= 36; 498 for (i=m-2; i>=0; i--) { 499 v = aa + 36*(diag[i]+1); 500 vi = aj + diag[i] + 1; 501 nz = ai[i+1] - diag[i] - 1; 502 s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; s[4] = xb[i2+4]; s[5] = xb[i2+5]; 503 while (nz--) { 504 idx = 6*(*vi++); 505 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 506 xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; 507 PetscKernel_v_gets_v_minus_A_times_w_6(s,v,xw); 508 v += 36; 509 } 510 PetscKernel_v_gets_A_times_w_6(xw,idiag,s); 511 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5]; 512 idiag -= 36; 513 i2 -= 6; 514 } 515 break; 516 case 7: 517 s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; 518 s[3] = xb[i2+3]; s[4] = xb[i2+4]; s[5] = xb[i2+5]; s[6] = xb[i2+6]; 519 PetscKernel_v_gets_A_times_w_7(x,idiag,b); 520 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; 521 x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5]; x[i2+6] = xw[6]; 522 i2 -= 7; 523 idiag -= 49; 524 for (i=m-2; i>=0; i--) { 525 v = aa + 49*(diag[i]+1); 526 vi = aj + diag[i] + 1; 527 nz = ai[i+1] - diag[i] - 1; 528 s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; 529 s[3] = xb[i2+3]; s[4] = xb[i2+4]; s[5] = xb[i2+5]; s[6] = xb[i2+6]; 530 while (nz--) { 531 idx = 7*(*vi++); 532 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 533 xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; xw[6] = x[6+idx]; 534 PetscKernel_v_gets_v_minus_A_times_w_7(s,v,xw); 535 v += 49; 536 } 537 PetscKernel_v_gets_A_times_w_7(xw,idiag,s); 538 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; 539 x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5]; x[i2+6] = xw[6]; 540 idiag -= 49; 541 i2 -= 7; 542 } 543 break; 544 default: 545 ierr = PetscMemcpy(w,xb+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr); 546 PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2); 547 i2 -= bs; 548 idiag -= bs2; 549 for (i=m-2; i>=0; i--) { 550 v = aa + bs2*(diag[i]+1); 551 vi = aj + diag[i] + 1; 552 nz = ai[i+1] - diag[i] - 1; 553 554 ierr = PetscMemcpy(w,xb+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr); 555 /* copy all rows of x that are needed into contiguous space */ 556 workt = work; 557 for (j=0; j<nz; j++) { 558 ierr = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr); 559 workt += bs; 560 } 561 PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work); 562 PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2); 563 564 idiag -= bs2; 565 i2 -= bs; 566 } 567 break; 568 } 569 ierr = PetscLogFlops(1.0*bs2*(a->nz));CHKERRQ(ierr); 570 } 571 its--; 572 } 573 while (its--) { 574 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) { 575 idiag = a->idiag; 576 i2 = 0; 577 switch (bs) { 578 case 1: 579 for (i=0; i<m; i++) { 580 v = aa + ai[i]; 581 vi = aj + ai[i]; 582 nz = ai[i+1] - ai[i]; 583 s[0] = b[i2]; 584 for (j=0; j<nz; j++) { 585 xw[0] = x[vi[j]]; 586 PetscKernel_v_gets_v_minus_A_times_w_1(s,(v+j),xw); 587 } 588 PetscKernel_v_gets_A_times_w_1(xw,idiag,s); 589 x[i2] += xw[0]; 590 idiag += 1; 591 i2 += 1; 592 } 593 break; 594 case 2: 595 for (i=0; i<m; i++) { 596 v = aa + 4*ai[i]; 597 vi = aj + ai[i]; 598 nz = ai[i+1] - ai[i]; 599 s[0] = b[i2]; s[1] = b[i2+1]; 600 for (j=0; j<nz; j++) { 601 idx = 2*vi[j]; 602 it = 4*j; 603 xw[0] = x[idx]; xw[1] = x[1+idx]; 604 PetscKernel_v_gets_v_minus_A_times_w_2(s,(v+it),xw); 605 } 606 PetscKernel_v_gets_A_times_w_2(xw,idiag,s); 607 x[i2] += xw[0]; x[i2+1] += xw[1]; 608 idiag += 4; 609 i2 += 2; 610 } 611 break; 612 case 3: 613 for (i=0; i<m; i++) { 614 v = aa + 9*ai[i]; 615 vi = aj + ai[i]; 616 nz = ai[i+1] - ai[i]; 617 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; 618 while (nz--) { 619 idx = 3*(*vi++); 620 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 621 PetscKernel_v_gets_v_minus_A_times_w_3(s,v,xw); 622 v += 9; 623 } 624 PetscKernel_v_gets_A_times_w_3(xw,idiag,s); 625 x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; 626 idiag += 9; 627 i2 += 3; 628 } 629 break; 630 case 4: 631 for (i=0; i<m; i++) { 632 v = aa + 16*ai[i]; 633 vi = aj + ai[i]; 634 nz = ai[i+1] - ai[i]; 635 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; 636 while (nz--) { 637 idx = 4*(*vi++); 638 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; 639 PetscKernel_v_gets_v_minus_A_times_w_4(s,v,xw); 640 v += 16; 641 } 642 PetscKernel_v_gets_A_times_w_4(xw,idiag,s); 643 x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; x[i2+3] += xw[3]; 644 idiag += 16; 645 i2 += 4; 646 } 647 break; 648 case 5: 649 for (i=0; i<m; i++) { 650 v = aa + 25*ai[i]; 651 vi = aj + ai[i]; 652 nz = ai[i+1] - ai[i]; 653 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4]; 654 while (nz--) { 655 idx = 5*(*vi++); 656 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; xw[4] = x[4+idx]; 657 PetscKernel_v_gets_v_minus_A_times_w_5(s,v,xw); 658 v += 25; 659 } 660 PetscKernel_v_gets_A_times_w_5(xw,idiag,s); 661 x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; x[i2+3] += xw[3]; x[i2+4] += xw[4]; 662 idiag += 25; 663 i2 += 5; 664 } 665 break; 666 case 6: 667 for (i=0; i<m; i++) { 668 v = aa + 36*ai[i]; 669 vi = aj + ai[i]; 670 nz = ai[i+1] - ai[i]; 671 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5]; 672 while (nz--) { 673 idx = 6*(*vi++); 674 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 675 xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; 676 PetscKernel_v_gets_v_minus_A_times_w_6(s,v,xw); 677 v += 36; 678 } 679 PetscKernel_v_gets_A_times_w_6(xw,idiag,s); 680 x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; 681 x[i2+3] += xw[3]; x[i2+4] += xw[4]; x[i2+5] += xw[5]; 682 idiag += 36; 683 i2 += 6; 684 } 685 break; 686 case 7: 687 for (i=0; i<m; i++) { 688 v = aa + 49*ai[i]; 689 vi = aj + ai[i]; 690 nz = ai[i+1] - ai[i]; 691 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; 692 s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5]; s[6] = b[i2+6]; 693 while (nz--) { 694 idx = 7*(*vi++); 695 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 696 xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; xw[6] = x[6+idx]; 697 PetscKernel_v_gets_v_minus_A_times_w_7(s,v,xw); 698 v += 49; 699 } 700 PetscKernel_v_gets_A_times_w_7(xw,idiag,s); 701 x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; 702 x[i2+3] += xw[3]; x[i2+4] += xw[4]; x[i2+5] += xw[5]; x[i2+6] += xw[6]; 703 idiag += 49; 704 i2 += 7; 705 } 706 break; 707 default: 708 for (i=0; i<m; i++) { 709 v = aa + bs2*ai[i]; 710 vi = aj + ai[i]; 711 nz = ai[i+1] - ai[i]; 712 713 ierr = PetscMemcpy(w,b+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr); 714 /* copy all rows of x that are needed into contiguous space */ 715 workt = work; 716 for (j=0; j<nz; j++) { 717 ierr = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr); 718 workt += bs; 719 } 720 PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work); 721 PetscKernel_w_gets_w_plus_Ar_times_v(bs,bs,w,idiag,x+i2); 722 723 idiag += bs2; 724 i2 += bs; 725 } 726 break; 727 } 728 ierr = PetscLogFlops(2.0*bs2*a->nz);CHKERRQ(ierr); 729 } 730 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 731 idiag = a->idiag+bs2*(a->mbs-1); 732 i2 = bs * (m-1); 733 switch (bs) { 734 case 1: 735 for (i=m-1; i>=0; i--) { 736 v = aa + ai[i]; 737 vi = aj + ai[i]; 738 nz = ai[i+1] - ai[i]; 739 s[0] = b[i2]; 740 for (j=0; j<nz; j++) { 741 xw[0] = x[vi[j]]; 742 PetscKernel_v_gets_v_minus_A_times_w_1(s,(v+j),xw); 743 } 744 PetscKernel_v_gets_A_times_w_1(xw,idiag,s); 745 x[i2] += xw[0]; 746 idiag -= 1; 747 i2 -= 1; 748 } 749 break; 750 case 2: 751 for (i=m-1; i>=0; i--) { 752 v = aa + 4*ai[i]; 753 vi = aj + ai[i]; 754 nz = ai[i+1] - ai[i]; 755 s[0] = b[i2]; s[1] = b[i2+1]; 756 for (j=0; j<nz; j++) { 757 idx = 2*vi[j]; 758 it = 4*j; 759 xw[0] = x[idx]; xw[1] = x[1+idx]; 760 PetscKernel_v_gets_v_minus_A_times_w_2(s,(v+it),xw); 761 } 762 PetscKernel_v_gets_A_times_w_2(xw,idiag,s); 763 x[i2] += xw[0]; x[i2+1] += xw[1]; 764 idiag -= 4; 765 i2 -= 2; 766 } 767 break; 768 case 3: 769 for (i=m-1; i>=0; i--) { 770 v = aa + 9*ai[i]; 771 vi = aj + ai[i]; 772 nz = ai[i+1] - ai[i]; 773 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; 774 while (nz--) { 775 idx = 3*(*vi++); 776 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 777 PetscKernel_v_gets_v_minus_A_times_w_3(s,v,xw); 778 v += 9; 779 } 780 PetscKernel_v_gets_A_times_w_3(xw,idiag,s); 781 x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; 782 idiag -= 9; 783 i2 -= 3; 784 } 785 break; 786 case 4: 787 for (i=m-1; i>=0; i--) { 788 v = aa + 16*ai[i]; 789 vi = aj + ai[i]; 790 nz = ai[i+1] - ai[i]; 791 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; 792 while (nz--) { 793 idx = 4*(*vi++); 794 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; 795 PetscKernel_v_gets_v_minus_A_times_w_4(s,v,xw); 796 v += 16; 797 } 798 PetscKernel_v_gets_A_times_w_4(xw,idiag,s); 799 x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; x[i2+3] += xw[3]; 800 idiag -= 16; 801 i2 -= 4; 802 } 803 break; 804 case 5: 805 for (i=m-1; i>=0; i--) { 806 v = aa + 25*ai[i]; 807 vi = aj + ai[i]; 808 nz = ai[i+1] - ai[i]; 809 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4]; 810 while (nz--) { 811 idx = 5*(*vi++); 812 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; xw[4] = x[4+idx]; 813 PetscKernel_v_gets_v_minus_A_times_w_5(s,v,xw); 814 v += 25; 815 } 816 PetscKernel_v_gets_A_times_w_5(xw,idiag,s); 817 x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; x[i2+3] += xw[3]; x[i2+4] += xw[4]; 818 idiag -= 25; 819 i2 -= 5; 820 } 821 break; 822 case 6: 823 for (i=m-1; i>=0; i--) { 824 v = aa + 36*ai[i]; 825 vi = aj + ai[i]; 826 nz = ai[i+1] - ai[i]; 827 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5]; 828 while (nz--) { 829 idx = 6*(*vi++); 830 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 831 xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; 832 PetscKernel_v_gets_v_minus_A_times_w_6(s,v,xw); 833 v += 36; 834 } 835 PetscKernel_v_gets_A_times_w_6(xw,idiag,s); 836 x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; 837 x[i2+3] += xw[3]; x[i2+4] += xw[4]; x[i2+5] += xw[5]; 838 idiag -= 36; 839 i2 -= 6; 840 } 841 break; 842 case 7: 843 for (i=m-1; i>=0; i--) { 844 v = aa + 49*ai[i]; 845 vi = aj + ai[i]; 846 nz = ai[i+1] - ai[i]; 847 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; 848 s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5]; s[6] = b[i2+6]; 849 while (nz--) { 850 idx = 7*(*vi++); 851 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 852 xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; xw[6] = x[6+idx]; 853 PetscKernel_v_gets_v_minus_A_times_w_7(s,v,xw); 854 v += 49; 855 } 856 PetscKernel_v_gets_A_times_w_7(xw,idiag,s); 857 x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; 858 x[i2+3] += xw[3]; x[i2+4] += xw[4]; x[i2+5] += xw[5]; x[i2+6] += xw[6]; 859 idiag -= 49; 860 i2 -= 7; 861 } 862 break; 863 default: 864 for (i=m-1; i>=0; i--) { 865 v = aa + bs2*ai[i]; 866 vi = aj + ai[i]; 867 nz = ai[i+1] - ai[i]; 868 869 ierr = PetscMemcpy(w,b+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr); 870 /* copy all rows of x that are needed into contiguous space */ 871 workt = work; 872 for (j=0; j<nz; j++) { 873 ierr = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr); 874 workt += bs; 875 } 876 PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work); 877 PetscKernel_w_gets_w_plus_Ar_times_v(bs,bs,w,idiag,x+i2); 878 879 idiag -= bs2; 880 i2 -= bs; 881 } 882 break; 883 } 884 ierr = PetscLogFlops(2.0*bs2*(a->nz));CHKERRQ(ierr); 885 } 886 } 887 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 888 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 889 PetscFunctionReturn(0); 890 } 891 892 893 /* 894 Special version for direct calls from Fortran (Used in PETSc-fun3d) 895 */ 896 #if defined(PETSC_HAVE_FORTRAN_CAPS) 897 #define matsetvaluesblocked4_ MATSETVALUESBLOCKED4 898 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 899 #define matsetvaluesblocked4_ matsetvaluesblocked4 900 #endif 901 902 PETSC_EXTERN void matsetvaluesblocked4_(Mat *AA,PetscInt *mm,const PetscInt im[],PetscInt *nn,const PetscInt in[],const PetscScalar v[]) 903 { 904 Mat A = *AA; 905 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 906 PetscInt *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,N,m = *mm,n = *nn; 907 PetscInt *ai =a->i,*ailen=a->ilen; 908 PetscInt *aj =a->j,stepval,lastcol = -1; 909 const PetscScalar *value = v; 910 MatScalar *ap,*aa = a->a,*bap; 911 912 PetscFunctionBegin; 913 if (A->rmap->bs != 4) SETERRABORT(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Can only be called with a block size of 4"); 914 stepval = (n-1)*4; 915 for (k=0; k<m; k++) { /* loop over added rows */ 916 row = im[k]; 917 rp = aj + ai[row]; 918 ap = aa + 16*ai[row]; 919 nrow = ailen[row]; 920 low = 0; 921 high = nrow; 922 for (l=0; l<n; l++) { /* loop over added columns */ 923 col = in[l]; 924 if (col <= lastcol) low = 0; 925 else high = nrow; 926 lastcol = col; 927 value = v + k*(stepval+4 + l)*4; 928 while (high-low > 7) { 929 t = (low+high)/2; 930 if (rp[t] > col) high = t; 931 else low = t; 932 } 933 for (i=low; i<high; i++) { 934 if (rp[i] > col) break; 935 if (rp[i] == col) { 936 bap = ap + 16*i; 937 for (ii=0; ii<4; ii++,value+=stepval) { 938 for (jj=ii; jj<16; jj+=4) { 939 bap[jj] += *value++; 940 } 941 } 942 goto noinsert2; 943 } 944 } 945 N = nrow++ - 1; 946 high++; /* added new column index thus must search to one higher than before */ 947 /* shift up all the later entries in this row */ 948 for (ii=N; ii>=i; ii--) { 949 rp[ii+1] = rp[ii]; 950 PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar)); 951 } 952 if (N >= i) { 953 PetscMemzero(ap+16*i,16*sizeof(MatScalar)); 954 } 955 rp[i] = col; 956 bap = ap + 16*i; 957 for (ii=0; ii<4; ii++,value+=stepval) { 958 for (jj=ii; jj<16; jj+=4) { 959 bap[jj] = *value++; 960 } 961 } 962 noinsert2:; 963 low = i; 964 } 965 ailen[row] = nrow; 966 } 967 PetscFunctionReturnVoid(); 968 } 969 970 #if defined(PETSC_HAVE_FORTRAN_CAPS) 971 #define matsetvalues4_ MATSETVALUES4 972 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 973 #define matsetvalues4_ matsetvalues4 974 #endif 975 976 PETSC_EXTERN void matsetvalues4_(Mat *AA,PetscInt *mm,PetscInt *im,PetscInt *nn,PetscInt *in,PetscScalar *v) 977 { 978 Mat A = *AA; 979 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 980 PetscInt *rp,k,low,high,t,ii,row,nrow,i,col,l,N,n = *nn,m = *mm; 981 PetscInt *ai=a->i,*ailen=a->ilen; 982 PetscInt *aj=a->j,brow,bcol; 983 PetscInt ridx,cidx,lastcol = -1; 984 MatScalar *ap,value,*aa=a->a,*bap; 985 986 PetscFunctionBegin; 987 for (k=0; k<m; k++) { /* loop over added rows */ 988 row = im[k]; brow = row/4; 989 rp = aj + ai[brow]; 990 ap = aa + 16*ai[brow]; 991 nrow = ailen[brow]; 992 low = 0; 993 high = nrow; 994 for (l=0; l<n; l++) { /* loop over added columns */ 995 col = in[l]; bcol = col/4; 996 ridx = row % 4; cidx = col % 4; 997 value = v[l + k*n]; 998 if (col <= lastcol) low = 0; 999 else high = nrow; 1000 lastcol = col; 1001 while (high-low > 7) { 1002 t = (low+high)/2; 1003 if (rp[t] > bcol) high = t; 1004 else low = t; 1005 } 1006 for (i=low; i<high; i++) { 1007 if (rp[i] > bcol) break; 1008 if (rp[i] == bcol) { 1009 bap = ap + 16*i + 4*cidx + ridx; 1010 *bap += value; 1011 goto noinsert1; 1012 } 1013 } 1014 N = nrow++ - 1; 1015 high++; /* added new column thus must search to one higher than before */ 1016 /* shift up all the later entries in this row */ 1017 for (ii=N; ii>=i; ii--) { 1018 rp[ii+1] = rp[ii]; 1019 PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar)); 1020 } 1021 if (N>=i) { 1022 PetscMemzero(ap+16*i,16*sizeof(MatScalar)); 1023 } 1024 rp[i] = bcol; 1025 ap[16*i + 4*cidx + ridx] = value; 1026 noinsert1:; 1027 low = i; 1028 } 1029 ailen[brow] = nrow; 1030 } 1031 PetscFunctionReturnVoid(); 1032 } 1033 1034 /* 1035 Checks for missing diagonals 1036 */ 1037 PetscErrorCode MatMissingDiagonal_SeqBAIJ(Mat A,PetscBool *missing,PetscInt *d) 1038 { 1039 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1040 PetscErrorCode ierr; 1041 PetscInt *diag,*ii = a->i,i; 1042 1043 PetscFunctionBegin; 1044 ierr = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr); 1045 *missing = PETSC_FALSE; 1046 if (A->rmap->n > 0 && !ii) { 1047 *missing = PETSC_TRUE; 1048 if (d) *d = 0; 1049 PetscInfo(A,"Matrix has no entries therefore is missing diagonal\n"); 1050 } else { 1051 diag = a->diag; 1052 for (i=0; i<a->mbs; i++) { 1053 if (diag[i] >= ii[i+1]) { 1054 *missing = PETSC_TRUE; 1055 if (d) *d = i; 1056 PetscInfo1(A,"Matrix is missing block diagonal number %D\n",i); 1057 break; 1058 } 1059 } 1060 } 1061 PetscFunctionReturn(0); 1062 } 1063 1064 PetscErrorCode MatMarkDiagonal_SeqBAIJ(Mat A) 1065 { 1066 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1067 PetscErrorCode ierr; 1068 PetscInt i,j,m = a->mbs; 1069 1070 PetscFunctionBegin; 1071 if (!a->diag) { 1072 ierr = PetscMalloc1(m,&a->diag);CHKERRQ(ierr); 1073 ierr = PetscLogObjectMemory((PetscObject)A,m*sizeof(PetscInt));CHKERRQ(ierr); 1074 a->free_diag = PETSC_TRUE; 1075 } 1076 for (i=0; i<m; i++) { 1077 a->diag[i] = a->i[i+1]; 1078 for (j=a->i[i]; j<a->i[i+1]; j++) { 1079 if (a->j[j] == i) { 1080 a->diag[i] = j; 1081 break; 1082 } 1083 } 1084 } 1085 PetscFunctionReturn(0); 1086 } 1087 1088 1089 static PetscErrorCode MatGetRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool blockcompressed,PetscInt *nn,const PetscInt *inia[],const PetscInt *inja[],PetscBool *done) 1090 { 1091 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1092 PetscErrorCode ierr; 1093 PetscInt i,j,n = a->mbs,nz = a->i[n],*tia,*tja,bs = A->rmap->bs,k,l,cnt; 1094 PetscInt **ia = (PetscInt**)inia,**ja = (PetscInt**)inja; 1095 1096 PetscFunctionBegin; 1097 *nn = n; 1098 if (!ia) PetscFunctionReturn(0); 1099 if (symmetric) { 1100 ierr = MatToSymmetricIJ_SeqAIJ(n,a->i,a->j,PETSC_TRUE,0,0,&tia,&tja);CHKERRQ(ierr); 1101 nz = tia[n]; 1102 } else { 1103 tia = a->i; tja = a->j; 1104 } 1105 1106 if (!blockcompressed && bs > 1) { 1107 (*nn) *= bs; 1108 /* malloc & create the natural set of indices */ 1109 ierr = PetscMalloc1((n+1)*bs,ia);CHKERRQ(ierr); 1110 if (n) { 1111 (*ia)[0] = oshift; 1112 for (j=1; j<bs; j++) { 1113 (*ia)[j] = (tia[1]-tia[0])*bs+(*ia)[j-1]; 1114 } 1115 } 1116 1117 for (i=1; i<n; i++) { 1118 (*ia)[i*bs] = (tia[i]-tia[i-1])*bs + (*ia)[i*bs-1]; 1119 for (j=1; j<bs; j++) { 1120 (*ia)[i*bs+j] = (tia[i+1]-tia[i])*bs + (*ia)[i*bs+j-1]; 1121 } 1122 } 1123 if (n) { 1124 (*ia)[n*bs] = (tia[n]-tia[n-1])*bs + (*ia)[n*bs-1]; 1125 } 1126 1127 if (inja) { 1128 ierr = PetscMalloc1(nz*bs*bs,ja);CHKERRQ(ierr); 1129 cnt = 0; 1130 for (i=0; i<n; i++) { 1131 for (j=0; j<bs; j++) { 1132 for (k=tia[i]; k<tia[i+1]; k++) { 1133 for (l=0; l<bs; l++) { 1134 (*ja)[cnt++] = bs*tja[k] + l; 1135 } 1136 } 1137 } 1138 } 1139 } 1140 1141 if (symmetric) { /* deallocate memory allocated in MatToSymmetricIJ_SeqAIJ() */ 1142 ierr = PetscFree(tia);CHKERRQ(ierr); 1143 ierr = PetscFree(tja);CHKERRQ(ierr); 1144 } 1145 } else if (oshift == 1) { 1146 if (symmetric) { 1147 nz = tia[A->rmap->n/bs]; 1148 /* add 1 to i and j indices */ 1149 for (i=0; i<A->rmap->n/bs+1; i++) tia[i] = tia[i] + 1; 1150 *ia = tia; 1151 if (ja) { 1152 for (i=0; i<nz; i++) tja[i] = tja[i] + 1; 1153 *ja = tja; 1154 } 1155 } else { 1156 nz = a->i[A->rmap->n/bs]; 1157 /* malloc space and add 1 to i and j indices */ 1158 ierr = PetscMalloc1(A->rmap->n/bs+1,ia);CHKERRQ(ierr); 1159 for (i=0; i<A->rmap->n/bs+1; i++) (*ia)[i] = a->i[i] + 1; 1160 if (ja) { 1161 ierr = PetscMalloc1(nz,ja);CHKERRQ(ierr); 1162 for (i=0; i<nz; i++) (*ja)[i] = a->j[i] + 1; 1163 } 1164 } 1165 } else { 1166 *ia = tia; 1167 if (ja) *ja = tja; 1168 } 1169 PetscFunctionReturn(0); 1170 } 1171 1172 static PetscErrorCode MatRestoreRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool blockcompressed,PetscInt *nn,const PetscInt *ia[],const PetscInt *ja[],PetscBool *done) 1173 { 1174 PetscErrorCode ierr; 1175 1176 PetscFunctionBegin; 1177 if (!ia) PetscFunctionReturn(0); 1178 if ((!blockcompressed && A->rmap->bs > 1) || (symmetric || oshift == 1)) { 1179 ierr = PetscFree(*ia);CHKERRQ(ierr); 1180 if (ja) {ierr = PetscFree(*ja);CHKERRQ(ierr);} 1181 } 1182 PetscFunctionReturn(0); 1183 } 1184 1185 PetscErrorCode MatDestroy_SeqBAIJ(Mat A) 1186 { 1187 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1188 PetscErrorCode ierr; 1189 1190 PetscFunctionBegin; 1191 #if defined(PETSC_USE_LOG) 1192 PetscLogObjectState((PetscObject)A,"Rows=%D, Cols=%D, NZ=%D",A->rmap->N,A->cmap->n,a->nz); 1193 #endif 1194 ierr = MatSeqXAIJFreeAIJ(A,&a->a,&a->j,&a->i);CHKERRQ(ierr); 1195 ierr = ISDestroy(&a->row);CHKERRQ(ierr); 1196 ierr = ISDestroy(&a->col);CHKERRQ(ierr); 1197 if (a->free_diag) {ierr = PetscFree(a->diag);CHKERRQ(ierr);} 1198 ierr = PetscFree(a->idiag);CHKERRQ(ierr); 1199 if (a->free_imax_ilen) {ierr = PetscFree2(a->imax,a->ilen);CHKERRQ(ierr);} 1200 ierr = PetscFree(a->solve_work);CHKERRQ(ierr); 1201 ierr = PetscFree(a->mult_work);CHKERRQ(ierr); 1202 ierr = PetscFree(a->sor_workt);CHKERRQ(ierr); 1203 ierr = PetscFree(a->sor_work);CHKERRQ(ierr); 1204 ierr = ISDestroy(&a->icol);CHKERRQ(ierr); 1205 ierr = PetscFree(a->saved_values);CHKERRQ(ierr); 1206 ierr = PetscFree2(a->compressedrow.i,a->compressedrow.rindex);CHKERRQ(ierr); 1207 1208 ierr = MatDestroy(&a->sbaijMat);CHKERRQ(ierr); 1209 ierr = MatDestroy(&a->parent);CHKERRQ(ierr); 1210 ierr = PetscFree(A->data);CHKERRQ(ierr); 1211 1212 ierr = PetscObjectChangeTypeName((PetscObject)A,0);CHKERRQ(ierr); 1213 ierr = PetscObjectComposeFunction((PetscObject)A,"MatInvertBlockDiagonal_C",NULL);CHKERRQ(ierr); 1214 ierr = PetscObjectComposeFunction((PetscObject)A,"MatStoreValues_C",NULL);CHKERRQ(ierr); 1215 ierr = PetscObjectComposeFunction((PetscObject)A,"MatRetrieveValues_C",NULL);CHKERRQ(ierr); 1216 ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetColumnIndices_C",NULL);CHKERRQ(ierr); 1217 ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqaij_C",NULL);CHKERRQ(ierr); 1218 ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqsbaij_C",NULL);CHKERRQ(ierr); 1219 ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetPreallocation_C",NULL);CHKERRQ(ierr); 1220 ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetPreallocationCSR_C",NULL);CHKERRQ(ierr); 1221 ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqbstrm_C",NULL);CHKERRQ(ierr); 1222 ierr = PetscObjectComposeFunction((PetscObject)A,"MatIsTranspose_C",NULL);CHKERRQ(ierr); 1223 #if defined(PETSC_HAVE_HYPRE) 1224 ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_mpiaij_hypre_C",NULL);CHKERRQ(ierr); 1225 #endif 1226 PetscFunctionReturn(0); 1227 } 1228 1229 PetscErrorCode MatSetOption_SeqBAIJ(Mat A,MatOption op,PetscBool flg) 1230 { 1231 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1232 PetscErrorCode ierr; 1233 1234 PetscFunctionBegin; 1235 switch (op) { 1236 case MAT_ROW_ORIENTED: 1237 a->roworiented = flg; 1238 break; 1239 case MAT_KEEP_NONZERO_PATTERN: 1240 a->keepnonzeropattern = flg; 1241 break; 1242 case MAT_NEW_NONZERO_LOCATIONS: 1243 a->nonew = (flg ? 0 : 1); 1244 break; 1245 case MAT_NEW_NONZERO_LOCATION_ERR: 1246 a->nonew = (flg ? -1 : 0); 1247 break; 1248 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1249 a->nonew = (flg ? -2 : 0); 1250 break; 1251 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1252 a->nounused = (flg ? -1 : 0); 1253 break; 1254 case MAT_NEW_DIAGONALS: 1255 case MAT_IGNORE_OFF_PROC_ENTRIES: 1256 case MAT_USE_HASH_TABLE: 1257 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1258 break; 1259 case MAT_SPD: 1260 case MAT_SYMMETRIC: 1261 case MAT_STRUCTURALLY_SYMMETRIC: 1262 case MAT_HERMITIAN: 1263 case MAT_SYMMETRY_ETERNAL: 1264 case MAT_SUBMAT_SINGLEIS: 1265 case MAT_STRUCTURE_ONLY: 1266 /* These options are handled directly by MatSetOption() */ 1267 break; 1268 default: 1269 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op); 1270 } 1271 PetscFunctionReturn(0); 1272 } 1273 1274 /* used for both SeqBAIJ and SeqSBAIJ matrices */ 1275 PetscErrorCode MatGetRow_SeqBAIJ_private(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v,PetscInt *ai,PetscInt *aj,PetscScalar *aa) 1276 { 1277 PetscErrorCode ierr; 1278 PetscInt itmp,i,j,k,M,bn,bp,*idx_i,bs,bs2; 1279 MatScalar *aa_i; 1280 PetscScalar *v_i; 1281 1282 PetscFunctionBegin; 1283 bs = A->rmap->bs; 1284 bs2 = bs*bs; 1285 if (row < 0 || row >= A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range", row); 1286 1287 bn = row/bs; /* Block number */ 1288 bp = row % bs; /* Block Position */ 1289 M = ai[bn+1] - ai[bn]; 1290 *nz = bs*M; 1291 1292 if (v) { 1293 *v = 0; 1294 if (*nz) { 1295 ierr = PetscMalloc1(*nz,v);CHKERRQ(ierr); 1296 for (i=0; i<M; i++) { /* for each block in the block row */ 1297 v_i = *v + i*bs; 1298 aa_i = aa + bs2*(ai[bn] + i); 1299 for (j=bp,k=0; j<bs2; j+=bs,k++) v_i[k] = aa_i[j]; 1300 } 1301 } 1302 } 1303 1304 if (idx) { 1305 *idx = 0; 1306 if (*nz) { 1307 ierr = PetscMalloc1(*nz,idx);CHKERRQ(ierr); 1308 for (i=0; i<M; i++) { /* for each block in the block row */ 1309 idx_i = *idx + i*bs; 1310 itmp = bs*aj[ai[bn] + i]; 1311 for (j=0; j<bs; j++) idx_i[j] = itmp++; 1312 } 1313 } 1314 } 1315 PetscFunctionReturn(0); 1316 } 1317 1318 PetscErrorCode MatGetRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1319 { 1320 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1321 PetscErrorCode ierr; 1322 1323 PetscFunctionBegin; 1324 ierr = MatGetRow_SeqBAIJ_private(A,row,nz,idx,v,a->i,a->j,a->a);CHKERRQ(ierr); 1325 PetscFunctionReturn(0); 1326 } 1327 1328 PetscErrorCode MatRestoreRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1329 { 1330 PetscErrorCode ierr; 1331 1332 PetscFunctionBegin; 1333 if (idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);} 1334 if (v) {ierr = PetscFree(*v);CHKERRQ(ierr);} 1335 PetscFunctionReturn(0); 1336 } 1337 1338 PetscErrorCode MatTranspose_SeqBAIJ(Mat A,MatReuse reuse,Mat *B) 1339 { 1340 Mat_SeqBAIJ *a=(Mat_SeqBAIJ*)A->data; 1341 Mat C; 1342 PetscErrorCode ierr; 1343 PetscInt i,j,k,*aj=a->j,*ai=a->i,bs=A->rmap->bs,mbs=a->mbs,nbs=a->nbs,len,*col; 1344 PetscInt *rows,*cols,bs2=a->bs2; 1345 MatScalar *array; 1346 1347 PetscFunctionBegin; 1348 if (reuse == MAT_INPLACE_MATRIX && mbs != nbs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Square matrix only for in-place"); 1349 if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_INPLACE_MATRIX) { 1350 ierr = PetscCalloc1(1+nbs,&col);CHKERRQ(ierr); 1351 1352 for (i=0; i<ai[mbs]; i++) col[aj[i]] += 1; 1353 ierr = MatCreate(PetscObjectComm((PetscObject)A),&C);CHKERRQ(ierr); 1354 ierr = MatSetSizes(C,A->cmap->n,A->rmap->N,A->cmap->n,A->rmap->N);CHKERRQ(ierr); 1355 ierr = MatSetType(C,((PetscObject)A)->type_name);CHKERRQ(ierr); 1356 ierr = MatSeqBAIJSetPreallocation(C,bs,0,col);CHKERRQ(ierr); 1357 ierr = PetscFree(col);CHKERRQ(ierr); 1358 } else { 1359 C = *B; 1360 } 1361 1362 array = a->a; 1363 ierr = PetscMalloc2(bs,&rows,bs,&cols);CHKERRQ(ierr); 1364 for (i=0; i<mbs; i++) { 1365 cols[0] = i*bs; 1366 for (k=1; k<bs; k++) cols[k] = cols[k-1] + 1; 1367 len = ai[i+1] - ai[i]; 1368 for (j=0; j<len; j++) { 1369 rows[0] = (*aj++)*bs; 1370 for (k=1; k<bs; k++) rows[k] = rows[k-1] + 1; 1371 ierr = MatSetValues_SeqBAIJ(C,bs,rows,bs,cols,array,INSERT_VALUES);CHKERRQ(ierr); 1372 array += bs2; 1373 } 1374 } 1375 ierr = PetscFree2(rows,cols);CHKERRQ(ierr); 1376 1377 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1378 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1379 1380 if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) { 1381 *B = C; 1382 } else { 1383 ierr = MatHeaderMerge(A,&C);CHKERRQ(ierr); 1384 } 1385 PetscFunctionReturn(0); 1386 } 1387 1388 PetscErrorCode MatIsTranspose_SeqBAIJ(Mat A,Mat B,PetscReal tol,PetscBool *f) 1389 { 1390 PetscErrorCode ierr; 1391 Mat Btrans; 1392 1393 PetscFunctionBegin; 1394 *f = PETSC_FALSE; 1395 ierr = MatTranspose_SeqBAIJ(A,MAT_INITIAL_MATRIX,&Btrans);CHKERRQ(ierr); 1396 ierr = MatEqual_SeqBAIJ(B,Btrans,f);CHKERRQ(ierr); 1397 ierr = MatDestroy(&Btrans);CHKERRQ(ierr); 1398 PetscFunctionReturn(0); 1399 } 1400 1401 static PetscErrorCode MatView_SeqBAIJ_Binary(Mat A,PetscViewer viewer) 1402 { 1403 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1404 PetscErrorCode ierr; 1405 PetscInt i,*col_lens,bs = A->rmap->bs,count,*jj,j,k,l,bs2=a->bs2; 1406 int fd; 1407 PetscScalar *aa; 1408 FILE *file; 1409 1410 PetscFunctionBegin; 1411 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1412 ierr = PetscMalloc1(4+A->rmap->N,&col_lens);CHKERRQ(ierr); 1413 col_lens[0] = MAT_FILE_CLASSID; 1414 1415 col_lens[1] = A->rmap->N; 1416 col_lens[2] = A->cmap->n; 1417 col_lens[3] = a->nz*bs2; 1418 1419 /* store lengths of each row and write (including header) to file */ 1420 count = 0; 1421 for (i=0; i<a->mbs; i++) { 1422 for (j=0; j<bs; j++) { 1423 col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]); 1424 } 1425 } 1426 ierr = PetscBinaryWrite(fd,col_lens,4+A->rmap->N,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1427 ierr = PetscFree(col_lens);CHKERRQ(ierr); 1428 1429 /* store column indices (zero start index) */ 1430 ierr = PetscMalloc1((a->nz+1)*bs2,&jj);CHKERRQ(ierr); 1431 count = 0; 1432 for (i=0; i<a->mbs; i++) { 1433 for (j=0; j<bs; j++) { 1434 for (k=a->i[i]; k<a->i[i+1]; k++) { 1435 for (l=0; l<bs; l++) { 1436 jj[count++] = bs*a->j[k] + l; 1437 } 1438 } 1439 } 1440 } 1441 ierr = PetscBinaryWrite(fd,jj,bs2*a->nz,PETSC_INT,PETSC_FALSE);CHKERRQ(ierr); 1442 ierr = PetscFree(jj);CHKERRQ(ierr); 1443 1444 /* store nonzero values */ 1445 ierr = PetscMalloc1((a->nz+1)*bs2,&aa);CHKERRQ(ierr); 1446 count = 0; 1447 for (i=0; i<a->mbs; i++) { 1448 for (j=0; j<bs; j++) { 1449 for (k=a->i[i]; k<a->i[i+1]; k++) { 1450 for (l=0; l<bs; l++) { 1451 aa[count++] = a->a[bs2*k + l*bs + j]; 1452 } 1453 } 1454 } 1455 } 1456 ierr = PetscBinaryWrite(fd,aa,bs2*a->nz,PETSC_SCALAR,PETSC_FALSE);CHKERRQ(ierr); 1457 ierr = PetscFree(aa);CHKERRQ(ierr); 1458 1459 ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr); 1460 if (file) { 1461 fprintf(file,"-matload_block_size %d\n",(int)A->rmap->bs); 1462 } 1463 PetscFunctionReturn(0); 1464 } 1465 1466 static PetscErrorCode MatView_SeqBAIJ_ASCII_structonly(Mat A,PetscViewer viewer) 1467 { 1468 PetscErrorCode ierr; 1469 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1470 PetscInt i,bs = A->rmap->bs,k; 1471 1472 PetscFunctionBegin; 1473 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);CHKERRQ(ierr); 1474 for (i=0; i<a->mbs; i++) { 1475 ierr = PetscViewerASCIIPrintf(viewer,"row %D-%D:",i*bs,i*bs+bs-1);CHKERRQ(ierr); 1476 for (k=a->i[i]; k<a->i[i+1]; k++) { 1477 ierr = PetscViewerASCIIPrintf(viewer," (%D-%D) ",bs*a->j[k],bs*a->j[k]+bs-1);CHKERRQ(ierr); 1478 } 1479 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 1480 } 1481 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);CHKERRQ(ierr); 1482 PetscFunctionReturn(0); 1483 } 1484 1485 static PetscErrorCode MatView_SeqBAIJ_ASCII(Mat A,PetscViewer viewer) 1486 { 1487 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1488 PetscErrorCode ierr; 1489 PetscInt i,j,bs = A->rmap->bs,k,l,bs2=a->bs2; 1490 PetscViewerFormat format; 1491 1492 PetscFunctionBegin; 1493 if (A->structure_only) { 1494 ierr = MatView_SeqBAIJ_ASCII_structonly(A,viewer);CHKERRQ(ierr); 1495 PetscFunctionReturn(0); 1496 } 1497 1498 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1499 if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1500 ierr = PetscViewerASCIIPrintf(viewer," block size is %D\n",bs);CHKERRQ(ierr); 1501 } else if (format == PETSC_VIEWER_ASCII_MATLAB) { 1502 const char *matname; 1503 Mat aij; 1504 ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&aij);CHKERRQ(ierr); 1505 ierr = PetscObjectGetName((PetscObject)A,&matname);CHKERRQ(ierr); 1506 ierr = PetscObjectSetName((PetscObject)aij,matname);CHKERRQ(ierr); 1507 ierr = MatView(aij,viewer);CHKERRQ(ierr); 1508 ierr = MatDestroy(&aij);CHKERRQ(ierr); 1509 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1510 PetscFunctionReturn(0); 1511 } else if (format == PETSC_VIEWER_ASCII_COMMON) { 1512 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);CHKERRQ(ierr); 1513 for (i=0; i<a->mbs; i++) { 1514 for (j=0; j<bs; j++) { 1515 ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr); 1516 for (k=a->i[i]; k<a->i[i+1]; k++) { 1517 for (l=0; l<bs; l++) { 1518 #if defined(PETSC_USE_COMPLEX) 1519 if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 1520 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g + %gi) ",bs*a->j[k]+l, 1521 (double)PetscRealPart(a->a[bs2*k + l*bs + j]),(double)PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1522 } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 1523 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g - %gi) ",bs*a->j[k]+l, 1524 (double)PetscRealPart(a->a[bs2*k + l*bs + j]),-(double)PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1525 } else if (PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 1526 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,(double)PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1527 } 1528 #else 1529 if (a->a[bs2*k + l*bs + j] != 0.0) { 1530 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,(double)a->a[bs2*k + l*bs + j]);CHKERRQ(ierr); 1531 } 1532 #endif 1533 } 1534 } 1535 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 1536 } 1537 } 1538 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);CHKERRQ(ierr); 1539 } else { 1540 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);CHKERRQ(ierr); 1541 for (i=0; i<a->mbs; i++) { 1542 for (j=0; j<bs; j++) { 1543 ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr); 1544 for (k=a->i[i]; k<a->i[i+1]; k++) { 1545 for (l=0; l<bs; l++) { 1546 #if defined(PETSC_USE_COMPLEX) 1547 if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0) { 1548 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g + %g i) ",bs*a->j[k]+l, 1549 (double)PetscRealPart(a->a[bs2*k + l*bs + j]),(double)PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1550 } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0) { 1551 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g - %g i) ",bs*a->j[k]+l, 1552 (double)PetscRealPart(a->a[bs2*k + l*bs + j]),-(double)PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1553 } else { 1554 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,(double)PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1555 } 1556 #else 1557 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,(double)a->a[bs2*k + l*bs + j]);CHKERRQ(ierr); 1558 #endif 1559 } 1560 } 1561 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 1562 } 1563 } 1564 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);CHKERRQ(ierr); 1565 } 1566 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1567 PetscFunctionReturn(0); 1568 } 1569 1570 #include <petscdraw.h> 1571 static PetscErrorCode MatView_SeqBAIJ_Draw_Zoom(PetscDraw draw,void *Aa) 1572 { 1573 Mat A = (Mat) Aa; 1574 Mat_SeqBAIJ *a=(Mat_SeqBAIJ*)A->data; 1575 PetscErrorCode ierr; 1576 PetscInt row,i,j,k,l,mbs=a->mbs,color,bs=A->rmap->bs,bs2=a->bs2; 1577 PetscReal xl,yl,xr,yr,x_l,x_r,y_l,y_r; 1578 MatScalar *aa; 1579 PetscViewer viewer; 1580 PetscViewerFormat format; 1581 1582 PetscFunctionBegin; 1583 ierr = PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer);CHKERRQ(ierr); 1584 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1585 ierr = PetscDrawGetCoordinates(draw,&xl,&yl,&xr,&yr);CHKERRQ(ierr); 1586 1587 /* loop over matrix elements drawing boxes */ 1588 1589 if (format != PETSC_VIEWER_DRAW_CONTOUR) { 1590 ierr = PetscDrawCollectiveBegin(draw);CHKERRQ(ierr); 1591 /* Blue for negative, Cyan for zero and Red for positive */ 1592 color = PETSC_DRAW_BLUE; 1593 for (i=0,row=0; i<mbs; i++,row+=bs) { 1594 for (j=a->i[i]; j<a->i[i+1]; j++) { 1595 y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0; 1596 x_l = a->j[j]*bs; x_r = x_l + 1.0; 1597 aa = a->a + j*bs2; 1598 for (k=0; k<bs; k++) { 1599 for (l=0; l<bs; l++) { 1600 if (PetscRealPart(*aa++) >= 0.) continue; 1601 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 1602 } 1603 } 1604 } 1605 } 1606 color = PETSC_DRAW_CYAN; 1607 for (i=0,row=0; i<mbs; i++,row+=bs) { 1608 for (j=a->i[i]; j<a->i[i+1]; j++) { 1609 y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0; 1610 x_l = a->j[j]*bs; x_r = x_l + 1.0; 1611 aa = a->a + j*bs2; 1612 for (k=0; k<bs; k++) { 1613 for (l=0; l<bs; l++) { 1614 if (PetscRealPart(*aa++) != 0.) continue; 1615 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 1616 } 1617 } 1618 } 1619 } 1620 color = PETSC_DRAW_RED; 1621 for (i=0,row=0; i<mbs; i++,row+=bs) { 1622 for (j=a->i[i]; j<a->i[i+1]; j++) { 1623 y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0; 1624 x_l = a->j[j]*bs; x_r = x_l + 1.0; 1625 aa = a->a + j*bs2; 1626 for (k=0; k<bs; k++) { 1627 for (l=0; l<bs; l++) { 1628 if (PetscRealPart(*aa++) <= 0.) continue; 1629 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 1630 } 1631 } 1632 } 1633 } 1634 ierr = PetscDrawCollectiveEnd(draw);CHKERRQ(ierr); 1635 } else { 1636 /* use contour shading to indicate magnitude of values */ 1637 /* first determine max of all nonzero values */ 1638 PetscReal minv = 0.0, maxv = 0.0; 1639 PetscDraw popup; 1640 1641 for (i=0; i<a->nz*a->bs2; i++) { 1642 if (PetscAbsScalar(a->a[i]) > maxv) maxv = PetscAbsScalar(a->a[i]); 1643 } 1644 if (minv >= maxv) maxv = minv + PETSC_SMALL; 1645 ierr = PetscDrawGetPopup(draw,&popup);CHKERRQ(ierr); 1646 ierr = PetscDrawScalePopup(popup,0.0,maxv);CHKERRQ(ierr); 1647 1648 ierr = PetscDrawCollectiveBegin(draw);CHKERRQ(ierr); 1649 for (i=0,row=0; i<mbs; i++,row+=bs) { 1650 for (j=a->i[i]; j<a->i[i+1]; j++) { 1651 y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0; 1652 x_l = a->j[j]*bs; x_r = x_l + 1.0; 1653 aa = a->a + j*bs2; 1654 for (k=0; k<bs; k++) { 1655 for (l=0; l<bs; l++) { 1656 MatScalar v = *aa++; 1657 color = PetscDrawRealToColor(PetscAbsScalar(v),minv,maxv); 1658 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 1659 } 1660 } 1661 } 1662 } 1663 ierr = PetscDrawCollectiveEnd(draw);CHKERRQ(ierr); 1664 } 1665 PetscFunctionReturn(0); 1666 } 1667 1668 static PetscErrorCode MatView_SeqBAIJ_Draw(Mat A,PetscViewer viewer) 1669 { 1670 PetscErrorCode ierr; 1671 PetscReal xl,yl,xr,yr,w,h; 1672 PetscDraw draw; 1673 PetscBool isnull; 1674 1675 PetscFunctionBegin; 1676 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 1677 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); 1678 if (isnull) PetscFunctionReturn(0); 1679 1680 xr = A->cmap->n; yr = A->rmap->N; h = yr/10.0; w = xr/10.0; 1681 xr += w; yr += h; xl = -w; yl = -h; 1682 ierr = PetscDrawSetCoordinates(draw,xl,yl,xr,yr);CHKERRQ(ierr); 1683 ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);CHKERRQ(ierr); 1684 ierr = PetscDrawZoom(draw,MatView_SeqBAIJ_Draw_Zoom,A);CHKERRQ(ierr); 1685 ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",NULL);CHKERRQ(ierr); 1686 ierr = PetscDrawSave(draw);CHKERRQ(ierr); 1687 PetscFunctionReturn(0); 1688 } 1689 1690 PetscErrorCode MatView_SeqBAIJ(Mat A,PetscViewer viewer) 1691 { 1692 PetscErrorCode ierr; 1693 PetscBool iascii,isbinary,isdraw; 1694 1695 PetscFunctionBegin; 1696 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1697 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1698 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1699 if (iascii) { 1700 ierr = MatView_SeqBAIJ_ASCII(A,viewer);CHKERRQ(ierr); 1701 } else if (isbinary) { 1702 ierr = MatView_SeqBAIJ_Binary(A,viewer);CHKERRQ(ierr); 1703 } else if (isdraw) { 1704 ierr = MatView_SeqBAIJ_Draw(A,viewer);CHKERRQ(ierr); 1705 } else { 1706 Mat B; 1707 ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr); 1708 ierr = MatView(B,viewer);CHKERRQ(ierr); 1709 ierr = MatDestroy(&B);CHKERRQ(ierr); 1710 } 1711 PetscFunctionReturn(0); 1712 } 1713 1714 1715 PetscErrorCode MatGetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],PetscScalar v[]) 1716 { 1717 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1718 PetscInt *rp,k,low,high,t,row,nrow,i,col,l,*aj = a->j; 1719 PetscInt *ai = a->i,*ailen = a->ilen; 1720 PetscInt brow,bcol,ridx,cidx,bs=A->rmap->bs,bs2=a->bs2; 1721 MatScalar *ap,*aa = a->a; 1722 1723 PetscFunctionBegin; 1724 for (k=0; k<m; k++) { /* loop over rows */ 1725 row = im[k]; brow = row/bs; 1726 if (row < 0) {v += n; continue;} /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); */ 1727 if (row >= A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D too large", row); 1728 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; 1729 nrow = ailen[brow]; 1730 for (l=0; l<n; l++) { /* loop over columns */ 1731 if (in[l] < 0) {v++; continue;} /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); */ 1732 if (in[l] >= A->cmap->n) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column %D too large", in[l]); 1733 col = in[l]; 1734 bcol = col/bs; 1735 cidx = col%bs; 1736 ridx = row%bs; 1737 high = nrow; 1738 low = 0; /* assume unsorted */ 1739 while (high-low > 5) { 1740 t = (low+high)/2; 1741 if (rp[t] > bcol) high = t; 1742 else low = t; 1743 } 1744 for (i=low; i<high; i++) { 1745 if (rp[i] > bcol) break; 1746 if (rp[i] == bcol) { 1747 *v++ = ap[bs2*i+bs*cidx+ridx]; 1748 goto finished; 1749 } 1750 } 1751 *v++ = 0.0; 1752 finished:; 1753 } 1754 } 1755 PetscFunctionReturn(0); 1756 } 1757 1758 PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is) 1759 { 1760 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1761 PetscInt *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,lastcol = -1; 1762 PetscInt *imax=a->imax,*ai=a->i,*ailen=a->ilen; 1763 PetscErrorCode ierr; 1764 PetscInt *aj =a->j,nonew=a->nonew,bs2=a->bs2,bs=A->rmap->bs,stepval; 1765 PetscBool roworiented=a->roworiented; 1766 const PetscScalar *value = v; 1767 MatScalar *ap=NULL,*aa = a->a,*bap; 1768 1769 PetscFunctionBegin; 1770 if (roworiented) { 1771 stepval = (n-1)*bs; 1772 } else { 1773 stepval = (m-1)*bs; 1774 } 1775 for (k=0; k<m; k++) { /* loop over added rows */ 1776 row = im[k]; 1777 if (row < 0) continue; 1778 #if defined(PETSC_USE_DEBUG) 1779 if (row >= a->mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Block row index too large %D max %D",row,a->mbs-1); 1780 #endif 1781 rp = aj + ai[row]; 1782 if (!A->structure_only) ap = aa + bs2*ai[row]; 1783 rmax = imax[row]; 1784 nrow = ailen[row]; 1785 low = 0; 1786 high = nrow; 1787 for (l=0; l<n; l++) { /* loop over added columns */ 1788 if (in[l] < 0) continue; 1789 #if defined(PETSC_USE_DEBUG) 1790 if (in[l] >= a->nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Block column index too large %D max %D",in[l],a->nbs-1); 1791 #endif 1792 col = in[l]; 1793 if (!A->structure_only) { 1794 if (roworiented) { 1795 value = v + (k*(stepval+bs) + l)*bs; 1796 } else { 1797 value = v + (l*(stepval+bs) + k)*bs; 1798 } 1799 } 1800 if (col <= lastcol) low = 0; 1801 else high = nrow; 1802 lastcol = col; 1803 while (high-low > 7) { 1804 t = (low+high)/2; 1805 if (rp[t] > col) high = t; 1806 else low = t; 1807 } 1808 for (i=low; i<high; i++) { 1809 if (rp[i] > col) break; 1810 if (rp[i] == col) { 1811 if (A->structure_only) goto noinsert2; 1812 bap = ap + bs2*i; 1813 if (roworiented) { 1814 if (is == ADD_VALUES) { 1815 for (ii=0; ii<bs; ii++,value+=stepval) { 1816 for (jj=ii; jj<bs2; jj+=bs) { 1817 bap[jj] += *value++; 1818 } 1819 } 1820 } else { 1821 for (ii=0; ii<bs; ii++,value+=stepval) { 1822 for (jj=ii; jj<bs2; jj+=bs) { 1823 bap[jj] = *value++; 1824 } 1825 } 1826 } 1827 } else { 1828 if (is == ADD_VALUES) { 1829 for (ii=0; ii<bs; ii++,value+=bs+stepval) { 1830 for (jj=0; jj<bs; jj++) { 1831 bap[jj] += value[jj]; 1832 } 1833 bap += bs; 1834 } 1835 } else { 1836 for (ii=0; ii<bs; ii++,value+=bs+stepval) { 1837 for (jj=0; jj<bs; jj++) { 1838 bap[jj] = value[jj]; 1839 } 1840 bap += bs; 1841 } 1842 } 1843 } 1844 goto noinsert2; 1845 } 1846 } 1847 if (nonew == 1) goto noinsert2; 1848 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new blocked index new nonzero block (%D, %D) in the matrix", row, col); 1849 if (A->structure_only) { 1850 MatSeqXAIJReallocateAIJ_structure_only(A,a->mbs,bs2,nrow,row,col,rmax,ai,aj,rp,imax,nonew,MatScalar); 1851 } else { 1852 MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,row,col,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar); 1853 } 1854 N = nrow++ - 1; high++; 1855 /* shift up all the later entries in this row */ 1856 for (ii=N; ii>=i; ii--) { 1857 rp[ii+1] = rp[ii]; 1858 if (!A->structure_only) { 1859 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); 1860 } 1861 } 1862 if (N >= i && !A->structure_only) { 1863 ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr); 1864 } 1865 1866 rp[i] = col; 1867 if (!A->structure_only) { 1868 bap = ap + bs2*i; 1869 if (roworiented) { 1870 for (ii=0; ii<bs; ii++,value+=stepval) { 1871 for (jj=ii; jj<bs2; jj+=bs) { 1872 bap[jj] = *value++; 1873 } 1874 } 1875 } else { 1876 for (ii=0; ii<bs; ii++,value+=stepval) { 1877 for (jj=0; jj<bs; jj++) { 1878 *bap++ = *value++; 1879 } 1880 } 1881 } 1882 } 1883 noinsert2:; 1884 low = i; 1885 } 1886 ailen[row] = nrow; 1887 } 1888 PetscFunctionReturn(0); 1889 } 1890 1891 PetscErrorCode MatAssemblyEnd_SeqBAIJ(Mat A,MatAssemblyType mode) 1892 { 1893 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1894 PetscInt fshift = 0,i,j,*ai = a->i,*aj = a->j,*imax = a->imax; 1895 PetscInt m = A->rmap->N,*ip,N,*ailen = a->ilen; 1896 PetscErrorCode ierr; 1897 PetscInt mbs = a->mbs,bs2 = a->bs2,rmax = 0; 1898 MatScalar *aa = a->a,*ap; 1899 PetscReal ratio=0.6; 1900 1901 PetscFunctionBegin; 1902 if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0); 1903 1904 if (m) rmax = ailen[0]; 1905 for (i=1; i<mbs; i++) { 1906 /* move each row back by the amount of empty slots (fshift) before it*/ 1907 fshift += imax[i-1] - ailen[i-1]; 1908 rmax = PetscMax(rmax,ailen[i]); 1909 if (fshift) { 1910 ip = aj + ai[i]; ap = aa + bs2*ai[i]; 1911 N = ailen[i]; 1912 for (j=0; j<N; j++) { 1913 ip[j-fshift] = ip[j]; 1914 if (!A->structure_only) { 1915 ierr = PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr); 1916 } 1917 } 1918 } 1919 ai[i] = ai[i-1] + ailen[i-1]; 1920 } 1921 if (mbs) { 1922 fshift += imax[mbs-1] - ailen[mbs-1]; 1923 ai[mbs] = ai[mbs-1] + ailen[mbs-1]; 1924 } 1925 1926 /* reset ilen and imax for each row */ 1927 a->nonzerorowcnt = 0; 1928 if (A->structure_only) { 1929 ierr = PetscFree2(a->imax,a->ilen);CHKERRQ(ierr); 1930 } else { /* !A->structure_only */ 1931 for (i=0; i<mbs; i++) { 1932 ailen[i] = imax[i] = ai[i+1] - ai[i]; 1933 a->nonzerorowcnt += ((ai[i+1] - ai[i]) > 0); 1934 } 1935 } 1936 a->nz = ai[mbs]; 1937 1938 /* diagonals may have moved, so kill the diagonal pointers */ 1939 a->idiagvalid = PETSC_FALSE; 1940 if (fshift && a->diag) { 1941 ierr = PetscFree(a->diag);CHKERRQ(ierr); 1942 ierr = PetscLogObjectMemory((PetscObject)A,-(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 1943 a->diag = 0; 1944 } 1945 if (fshift && a->nounused == -1) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB, "Unused space detected in matrix: %D X %D block size %D, %D unneeded", m, A->cmap->n, A->rmap->bs, fshift*bs2); 1946 ierr = PetscInfo5(A,"Matrix size: %D X %D, block size %D; storage space: %D unneeded, %D used\n",m,A->cmap->n,A->rmap->bs,fshift*bs2,a->nz*bs2);CHKERRQ(ierr); 1947 ierr = PetscInfo1(A,"Number of mallocs during MatSetValues is %D\n",a->reallocs);CHKERRQ(ierr); 1948 ierr = PetscInfo1(A,"Most nonzeros blocks in any row is %D\n",rmax);CHKERRQ(ierr); 1949 1950 A->info.mallocs += a->reallocs; 1951 a->reallocs = 0; 1952 A->info.nz_unneeded = (PetscReal)fshift*bs2; 1953 a->rmax = rmax; 1954 1955 if (!A->structure_only) { 1956 ierr = MatCheckCompressedRow(A,a->nonzerorowcnt,&a->compressedrow,a->i,mbs,ratio);CHKERRQ(ierr); 1957 } 1958 PetscFunctionReturn(0); 1959 } 1960 1961 /* 1962 This function returns an array of flags which indicate the locations of contiguous 1963 blocks that should be zeroed. for eg: if bs = 3 and is = [0,1,2,3,5,6,7,8,9] 1964 then the resulting sizes = [3,1,1,3,1] correspondig to sets [(0,1,2),(3),(5),(6,7,8),(9)] 1965 Assume: sizes should be long enough to hold all the values. 1966 */ 1967 static PetscErrorCode MatZeroRows_SeqBAIJ_Check_Blocks(PetscInt idx[],PetscInt n,PetscInt bs,PetscInt sizes[], PetscInt *bs_max) 1968 { 1969 PetscInt i,j,k,row; 1970 PetscBool flg; 1971 1972 PetscFunctionBegin; 1973 for (i=0,j=0; i<n; j++) { 1974 row = idx[i]; 1975 if (row%bs!=0) { /* Not the begining of a block */ 1976 sizes[j] = 1; 1977 i++; 1978 } else if (i+bs > n) { /* complete block doesn't exist (at idx end) */ 1979 sizes[j] = 1; /* Also makes sure atleast 'bs' values exist for next else */ 1980 i++; 1981 } else { /* Begining of the block, so check if the complete block exists */ 1982 flg = PETSC_TRUE; 1983 for (k=1; k<bs; k++) { 1984 if (row+k != idx[i+k]) { /* break in the block */ 1985 flg = PETSC_FALSE; 1986 break; 1987 } 1988 } 1989 if (flg) { /* No break in the bs */ 1990 sizes[j] = bs; 1991 i += bs; 1992 } else { 1993 sizes[j] = 1; 1994 i++; 1995 } 1996 } 1997 } 1998 *bs_max = j; 1999 PetscFunctionReturn(0); 2000 } 2001 2002 PetscErrorCode MatZeroRows_SeqBAIJ(Mat A,PetscInt is_n,const PetscInt is_idx[],PetscScalar diag,Vec x, Vec b) 2003 { 2004 Mat_SeqBAIJ *baij=(Mat_SeqBAIJ*)A->data; 2005 PetscErrorCode ierr; 2006 PetscInt i,j,k,count,*rows; 2007 PetscInt bs=A->rmap->bs,bs2=baij->bs2,*sizes,row,bs_max; 2008 PetscScalar zero = 0.0; 2009 MatScalar *aa; 2010 const PetscScalar *xx; 2011 PetscScalar *bb; 2012 2013 PetscFunctionBegin; 2014 /* fix right hand side if needed */ 2015 if (x && b) { 2016 ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr); 2017 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 2018 for (i=0; i<is_n; i++) { 2019 bb[is_idx[i]] = diag*xx[is_idx[i]]; 2020 } 2021 ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr); 2022 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 2023 } 2024 2025 /* Make a copy of the IS and sort it */ 2026 /* allocate memory for rows,sizes */ 2027 ierr = PetscMalloc2(is_n,&rows,2*is_n,&sizes);CHKERRQ(ierr); 2028 2029 /* copy IS values to rows, and sort them */ 2030 for (i=0; i<is_n; i++) rows[i] = is_idx[i]; 2031 ierr = PetscSortInt(is_n,rows);CHKERRQ(ierr); 2032 2033 if (baij->keepnonzeropattern) { 2034 for (i=0; i<is_n; i++) sizes[i] = 1; 2035 bs_max = is_n; 2036 } else { 2037 ierr = MatZeroRows_SeqBAIJ_Check_Blocks(rows,is_n,bs,sizes,&bs_max);CHKERRQ(ierr); 2038 A->nonzerostate++; 2039 } 2040 2041 for (i=0,j=0; i<bs_max; j+=sizes[i],i++) { 2042 row = rows[j]; 2043 if (row < 0 || row > A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"row %D out of range",row); 2044 count = (baij->i[row/bs +1] - baij->i[row/bs])*bs; 2045 aa = ((MatScalar*)(baij->a)) + baij->i[row/bs]*bs2 + (row%bs); 2046 if (sizes[i] == bs && !baij->keepnonzeropattern) { 2047 if (diag != (PetscScalar)0.0) { 2048 if (baij->ilen[row/bs] > 0) { 2049 baij->ilen[row/bs] = 1; 2050 baij->j[baij->i[row/bs]] = row/bs; 2051 2052 ierr = PetscMemzero(aa,count*bs*sizeof(MatScalar));CHKERRQ(ierr); 2053 } 2054 /* Now insert all the diagonal values for this bs */ 2055 for (k=0; k<bs; k++) { 2056 ierr = (*A->ops->setvalues)(A,1,rows+j+k,1,rows+j+k,&diag,INSERT_VALUES);CHKERRQ(ierr); 2057 } 2058 } else { /* (diag == 0.0) */ 2059 baij->ilen[row/bs] = 0; 2060 } /* end (diag == 0.0) */ 2061 } else { /* (sizes[i] != bs) */ 2062 #if defined(PETSC_USE_DEBUG) 2063 if (sizes[i] != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal Error. Value should be 1"); 2064 #endif 2065 for (k=0; k<count; k++) { 2066 aa[0] = zero; 2067 aa += bs; 2068 } 2069 if (diag != (PetscScalar)0.0) { 2070 ierr = (*A->ops->setvalues)(A,1,rows+j,1,rows+j,&diag,INSERT_VALUES);CHKERRQ(ierr); 2071 } 2072 } 2073 } 2074 2075 ierr = PetscFree2(rows,sizes);CHKERRQ(ierr); 2076 ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2077 PetscFunctionReturn(0); 2078 } 2079 2080 PetscErrorCode MatZeroRowsColumns_SeqBAIJ(Mat A,PetscInt is_n,const PetscInt is_idx[],PetscScalar diag,Vec x, Vec b) 2081 { 2082 Mat_SeqBAIJ *baij=(Mat_SeqBAIJ*)A->data; 2083 PetscErrorCode ierr; 2084 PetscInt i,j,k,count; 2085 PetscInt bs =A->rmap->bs,bs2=baij->bs2,row,col; 2086 PetscScalar zero = 0.0; 2087 MatScalar *aa; 2088 const PetscScalar *xx; 2089 PetscScalar *bb; 2090 PetscBool *zeroed,vecs = PETSC_FALSE; 2091 2092 PetscFunctionBegin; 2093 /* fix right hand side if needed */ 2094 if (x && b) { 2095 ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr); 2096 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 2097 vecs = PETSC_TRUE; 2098 } 2099 2100 /* zero the columns */ 2101 ierr = PetscCalloc1(A->rmap->n,&zeroed);CHKERRQ(ierr); 2102 for (i=0; i<is_n; i++) { 2103 if (is_idx[i] < 0 || is_idx[i] >= A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"row %D out of range",is_idx[i]); 2104 zeroed[is_idx[i]] = PETSC_TRUE; 2105 } 2106 for (i=0; i<A->rmap->N; i++) { 2107 if (!zeroed[i]) { 2108 row = i/bs; 2109 for (j=baij->i[row]; j<baij->i[row+1]; j++) { 2110 for (k=0; k<bs; k++) { 2111 col = bs*baij->j[j] + k; 2112 if (zeroed[col]) { 2113 aa = ((MatScalar*)(baij->a)) + j*bs2 + (i%bs) + bs*k; 2114 if (vecs) bb[i] -= aa[0]*xx[col]; 2115 aa[0] = 0.0; 2116 } 2117 } 2118 } 2119 } else if (vecs) bb[i] = diag*xx[i]; 2120 } 2121 ierr = PetscFree(zeroed);CHKERRQ(ierr); 2122 if (vecs) { 2123 ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr); 2124 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 2125 } 2126 2127 /* zero the rows */ 2128 for (i=0; i<is_n; i++) { 2129 row = is_idx[i]; 2130 count = (baij->i[row/bs +1] - baij->i[row/bs])*bs; 2131 aa = ((MatScalar*)(baij->a)) + baij->i[row/bs]*bs2 + (row%bs); 2132 for (k=0; k<count; k++) { 2133 aa[0] = zero; 2134 aa += bs; 2135 } 2136 if (diag != (PetscScalar)0.0) { 2137 ierr = (*A->ops->setvalues)(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr); 2138 } 2139 } 2140 ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2141 PetscFunctionReturn(0); 2142 } 2143 2144 PetscErrorCode MatSetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is) 2145 { 2146 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2147 PetscInt *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,lastcol = -1; 2148 PetscInt *imax=a->imax,*ai=a->i,*ailen=a->ilen; 2149 PetscInt *aj =a->j,nonew=a->nonew,bs=A->rmap->bs,brow,bcol; 2150 PetscErrorCode ierr; 2151 PetscInt ridx,cidx,bs2=a->bs2; 2152 PetscBool roworiented=a->roworiented; 2153 MatScalar *ap=NULL,value=0.0,*aa=a->a,*bap; 2154 2155 PetscFunctionBegin; 2156 for (k=0; k<m; k++) { /* loop over added rows */ 2157 row = im[k]; 2158 brow = row/bs; 2159 if (row < 0) continue; 2160 #if defined(PETSC_USE_DEBUG) 2161 if (row >= A->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,A->rmap->N-1); 2162 #endif 2163 rp = aj + ai[brow]; 2164 if (!A->structure_only) ap = aa + bs2*ai[brow]; 2165 rmax = imax[brow]; 2166 nrow = ailen[brow]; 2167 low = 0; 2168 high = nrow; 2169 for (l=0; l<n; l++) { /* loop over added columns */ 2170 if (in[l] < 0) continue; 2171 #if defined(PETSC_USE_DEBUG) 2172 if (in[l] >= A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],A->cmap->n-1); 2173 #endif 2174 col = in[l]; bcol = col/bs; 2175 ridx = row % bs; cidx = col % bs; 2176 if (!A->structure_only) { 2177 if (roworiented) { 2178 value = v[l + k*n]; 2179 } else { 2180 value = v[k + l*m]; 2181 } 2182 } 2183 if (col <= lastcol) low = 0; else high = nrow; 2184 lastcol = col; 2185 while (high-low > 7) { 2186 t = (low+high)/2; 2187 if (rp[t] > bcol) high = t; 2188 else low = t; 2189 } 2190 for (i=low; i<high; i++) { 2191 if (rp[i] > bcol) break; 2192 if (rp[i] == bcol) { 2193 bap = ap + bs2*i + bs*cidx + ridx; 2194 if (!A->structure_only) { 2195 if (is == ADD_VALUES) *bap += value; 2196 else *bap = value; 2197 } 2198 goto noinsert1; 2199 } 2200 } 2201 if (nonew == 1) goto noinsert1; 2202 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col); 2203 if (A->structure_only) { 2204 MatSeqXAIJReallocateAIJ_structure_only(A,a->mbs,bs2,nrow,brow,bcol,rmax,ai,aj,rp,imax,nonew,MatScalar); 2205 } else { 2206 MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar); 2207 } 2208 N = nrow++ - 1; high++; 2209 /* shift up all the later entries in this row */ 2210 for (ii=N; ii>=i; ii--) { 2211 rp[ii+1] = rp[ii]; 2212 if (!A->structure_only) { 2213 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); 2214 } 2215 } 2216 if (N>=i && !A->structure_only) { 2217 ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr); 2218 } 2219 rp[i] = bcol; 2220 if (!A->structure_only) ap[bs2*i + bs*cidx + ridx] = value; 2221 a->nz++; 2222 A->nonzerostate++; 2223 noinsert1:; 2224 low = i; 2225 } 2226 ailen[brow] = nrow; 2227 } 2228 PetscFunctionReturn(0); 2229 } 2230 2231 PetscErrorCode MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,const MatFactorInfo *info) 2232 { 2233 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)inA->data; 2234 Mat outA; 2235 PetscErrorCode ierr; 2236 PetscBool row_identity,col_identity; 2237 2238 PetscFunctionBegin; 2239 if (info->levels != 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only levels = 0 supported for in-place ILU"); 2240 ierr = ISIdentity(row,&row_identity);CHKERRQ(ierr); 2241 ierr = ISIdentity(col,&col_identity);CHKERRQ(ierr); 2242 if (!row_identity || !col_identity) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for in-place ILU"); 2243 2244 outA = inA; 2245 inA->factortype = MAT_FACTOR_LU; 2246 ierr = PetscFree(inA->solvertype);CHKERRQ(ierr); 2247 ierr = PetscStrallocpy(MATSOLVERPETSC,&inA->solvertype);CHKERRQ(ierr); 2248 2249 ierr = MatMarkDiagonal_SeqBAIJ(inA);CHKERRQ(ierr); 2250 2251 ierr = PetscObjectReference((PetscObject)row);CHKERRQ(ierr); 2252 ierr = ISDestroy(&a->row);CHKERRQ(ierr); 2253 a->row = row; 2254 ierr = PetscObjectReference((PetscObject)col);CHKERRQ(ierr); 2255 ierr = ISDestroy(&a->col);CHKERRQ(ierr); 2256 a->col = col; 2257 2258 /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */ 2259 ierr = ISDestroy(&a->icol);CHKERRQ(ierr); 2260 ierr = ISInvertPermutation(col,PETSC_DECIDE,&a->icol);CHKERRQ(ierr); 2261 ierr = PetscLogObjectParent((PetscObject)inA,(PetscObject)a->icol);CHKERRQ(ierr); 2262 2263 ierr = MatSeqBAIJSetNumericFactorization_inplace(inA,(PetscBool)(row_identity && col_identity));CHKERRQ(ierr); 2264 if (!a->solve_work) { 2265 ierr = PetscMalloc1(inA->rmap->N+inA->rmap->bs,&a->solve_work);CHKERRQ(ierr); 2266 ierr = PetscLogObjectMemory((PetscObject)inA,(inA->rmap->N+inA->rmap->bs)*sizeof(PetscScalar));CHKERRQ(ierr); 2267 } 2268 ierr = MatLUFactorNumeric(outA,inA,info);CHKERRQ(ierr); 2269 PetscFunctionReturn(0); 2270 } 2271 2272 PetscErrorCode MatSeqBAIJSetColumnIndices_SeqBAIJ(Mat mat,PetscInt *indices) 2273 { 2274 Mat_SeqBAIJ *baij = (Mat_SeqBAIJ*)mat->data; 2275 PetscInt i,nz,mbs; 2276 2277 PetscFunctionBegin; 2278 nz = baij->maxnz; 2279 mbs = baij->mbs; 2280 for (i=0; i<nz; i++) { 2281 baij->j[i] = indices[i]; 2282 } 2283 baij->nz = nz; 2284 for (i=0; i<mbs; i++) { 2285 baij->ilen[i] = baij->imax[i]; 2286 } 2287 PetscFunctionReturn(0); 2288 } 2289 2290 /*@ 2291 MatSeqBAIJSetColumnIndices - Set the column indices for all the rows 2292 in the matrix. 2293 2294 Input Parameters: 2295 + mat - the SeqBAIJ matrix 2296 - indices - the column indices 2297 2298 Level: advanced 2299 2300 Notes: 2301 This can be called if you have precomputed the nonzero structure of the 2302 matrix and want to provide it to the matrix object to improve the performance 2303 of the MatSetValues() operation. 2304 2305 You MUST have set the correct numbers of nonzeros per row in the call to 2306 MatCreateSeqBAIJ(), and the columns indices MUST be sorted. 2307 2308 MUST be called before any calls to MatSetValues(); 2309 2310 @*/ 2311 PetscErrorCode MatSeqBAIJSetColumnIndices(Mat mat,PetscInt *indices) 2312 { 2313 PetscErrorCode ierr; 2314 2315 PetscFunctionBegin; 2316 PetscValidHeaderSpecific(mat,MAT_CLASSID,1); 2317 PetscValidPointer(indices,2); 2318 ierr = PetscUseMethod(mat,"MatSeqBAIJSetColumnIndices_C",(Mat,PetscInt*),(mat,indices));CHKERRQ(ierr); 2319 PetscFunctionReturn(0); 2320 } 2321 2322 PetscErrorCode MatGetRowMaxAbs_SeqBAIJ(Mat A,Vec v,PetscInt idx[]) 2323 { 2324 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2325 PetscErrorCode ierr; 2326 PetscInt i,j,n,row,bs,*ai,*aj,mbs; 2327 PetscReal atmp; 2328 PetscScalar *x,zero = 0.0; 2329 MatScalar *aa; 2330 PetscInt ncols,brow,krow,kcol; 2331 2332 PetscFunctionBegin; 2333 if (A->factortype) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 2334 bs = A->rmap->bs; 2335 aa = a->a; 2336 ai = a->i; 2337 aj = a->j; 2338 mbs = a->mbs; 2339 2340 ierr = VecSet(v,zero);CHKERRQ(ierr); 2341 ierr = VecGetArray(v,&x);CHKERRQ(ierr); 2342 ierr = VecGetLocalSize(v,&n);CHKERRQ(ierr); 2343 if (n != A->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Nonconforming matrix and vector"); 2344 for (i=0; i<mbs; i++) { 2345 ncols = ai[1] - ai[0]; ai++; 2346 brow = bs*i; 2347 for (j=0; j<ncols; j++) { 2348 for (kcol=0; kcol<bs; kcol++) { 2349 for (krow=0; krow<bs; krow++) { 2350 atmp = PetscAbsScalar(*aa);aa++; 2351 row = brow + krow; /* row index */ 2352 if (PetscAbsScalar(x[row]) < atmp) {x[row] = atmp; if (idx) idx[row] = bs*(*aj) + kcol;} 2353 } 2354 } 2355 aj++; 2356 } 2357 } 2358 ierr = VecRestoreArray(v,&x);CHKERRQ(ierr); 2359 PetscFunctionReturn(0); 2360 } 2361 2362 PetscErrorCode MatCopy_SeqBAIJ(Mat A,Mat B,MatStructure str) 2363 { 2364 PetscErrorCode ierr; 2365 2366 PetscFunctionBegin; 2367 /* If the two matrices have the same copy implementation, use fast copy. */ 2368 if (str == SAME_NONZERO_PATTERN && (A->ops->copy == B->ops->copy)) { 2369 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2370 Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)B->data; 2371 PetscInt ambs=a->mbs,bmbs=b->mbs,abs=A->rmap->bs,bbs=B->rmap->bs,bs2=abs*abs; 2372 2373 if (a->i[ambs] != b->i[bmbs]) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Number of nonzero blocks in matrices A %D and B %D are different",a->i[ambs],b->i[bmbs]); 2374 if (abs != bbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Block size A %D and B %D are different",abs,bbs); 2375 ierr = PetscMemcpy(b->a,a->a,(bs2*a->i[ambs])*sizeof(PetscScalar));CHKERRQ(ierr); 2376 ierr = PetscObjectStateIncrease((PetscObject)B);CHKERRQ(ierr); 2377 } else { 2378 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 2379 } 2380 PetscFunctionReturn(0); 2381 } 2382 2383 PetscErrorCode MatSetUp_SeqBAIJ(Mat A) 2384 { 2385 PetscErrorCode ierr; 2386 2387 PetscFunctionBegin; 2388 ierr = MatSeqBAIJSetPreallocation(A,A->rmap->bs,PETSC_DEFAULT,0);CHKERRQ(ierr); 2389 PetscFunctionReturn(0); 2390 } 2391 2392 PetscErrorCode MatSeqBAIJGetArray_SeqBAIJ(Mat A,PetscScalar *array[]) 2393 { 2394 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2395 2396 PetscFunctionBegin; 2397 *array = a->a; 2398 PetscFunctionReturn(0); 2399 } 2400 2401 PetscErrorCode MatSeqBAIJRestoreArray_SeqBAIJ(Mat A,PetscScalar *array[]) 2402 { 2403 PetscFunctionBegin; 2404 PetscFunctionReturn(0); 2405 } 2406 2407 PetscErrorCode MatAXPYGetPreallocation_SeqBAIJ(Mat Y,Mat X,PetscInt *nnz) 2408 { 2409 PetscInt bs = Y->rmap->bs,mbs = Y->rmap->N/bs; 2410 Mat_SeqBAIJ *x = (Mat_SeqBAIJ*)X->data; 2411 Mat_SeqBAIJ *y = (Mat_SeqBAIJ*)Y->data; 2412 PetscErrorCode ierr; 2413 2414 PetscFunctionBegin; 2415 /* Set the number of nonzeros in the new matrix */ 2416 ierr = MatAXPYGetPreallocation_SeqX_private(mbs,x->i,x->j,y->i,y->j,nnz);CHKERRQ(ierr); 2417 PetscFunctionReturn(0); 2418 } 2419 2420 PetscErrorCode MatAXPY_SeqBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 2421 { 2422 Mat_SeqBAIJ *x = (Mat_SeqBAIJ*)X->data,*y = (Mat_SeqBAIJ*)Y->data; 2423 PetscErrorCode ierr; 2424 PetscInt bs=Y->rmap->bs,bs2=bs*bs; 2425 PetscBLASInt one=1; 2426 2427 PetscFunctionBegin; 2428 if (str == SAME_NONZERO_PATTERN) { 2429 PetscScalar alpha = a; 2430 PetscBLASInt bnz; 2431 ierr = PetscBLASIntCast(x->nz*bs2,&bnz);CHKERRQ(ierr); 2432 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one)); 2433 ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr); 2434 } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */ 2435 ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr); 2436 } else { 2437 Mat B; 2438 PetscInt *nnz; 2439 if (bs != X->rmap->bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrices must have same block size"); 2440 ierr = PetscMalloc1(Y->rmap->N,&nnz);CHKERRQ(ierr); 2441 ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr); 2442 ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr); 2443 ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr); 2444 ierr = MatSetBlockSizesFromMats(B,Y,Y);CHKERRQ(ierr); 2445 ierr = MatSetType(B,(MatType) ((PetscObject)Y)->type_name);CHKERRQ(ierr); 2446 ierr = MatAXPYGetPreallocation_SeqBAIJ(Y,X,nnz);CHKERRQ(ierr); 2447 ierr = MatSeqBAIJSetPreallocation(B,bs,0,nnz);CHKERRQ(ierr); 2448 ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr); 2449 ierr = MatHeaderReplace(Y,&B);CHKERRQ(ierr); 2450 ierr = PetscFree(nnz);CHKERRQ(ierr); 2451 } 2452 PetscFunctionReturn(0); 2453 } 2454 2455 PetscErrorCode MatRealPart_SeqBAIJ(Mat A) 2456 { 2457 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2458 PetscInt i,nz = a->bs2*a->i[a->mbs]; 2459 MatScalar *aa = a->a; 2460 2461 PetscFunctionBegin; 2462 for (i=0; i<nz; i++) aa[i] = PetscRealPart(aa[i]); 2463 PetscFunctionReturn(0); 2464 } 2465 2466 PetscErrorCode MatImaginaryPart_SeqBAIJ(Mat A) 2467 { 2468 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2469 PetscInt i,nz = a->bs2*a->i[a->mbs]; 2470 MatScalar *aa = a->a; 2471 2472 PetscFunctionBegin; 2473 for (i=0; i<nz; i++) aa[i] = PetscImaginaryPart(aa[i]); 2474 PetscFunctionReturn(0); 2475 } 2476 2477 /* 2478 Code almost idential to MatGetColumnIJ_SeqAIJ() should share common code 2479 */ 2480 PetscErrorCode MatGetColumnIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *nn,const PetscInt *ia[],const PetscInt *ja[],PetscBool *done) 2481 { 2482 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2483 PetscErrorCode ierr; 2484 PetscInt bs = A->rmap->bs,i,*collengths,*cia,*cja,n = A->cmap->n/bs,m = A->rmap->n/bs; 2485 PetscInt nz = a->i[m],row,*jj,mr,col; 2486 2487 PetscFunctionBegin; 2488 *nn = n; 2489 if (!ia) PetscFunctionReturn(0); 2490 if (symmetric) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not for BAIJ matrices"); 2491 else { 2492 ierr = PetscCalloc1(n+1,&collengths);CHKERRQ(ierr); 2493 ierr = PetscMalloc1(n+1,&cia);CHKERRQ(ierr); 2494 ierr = PetscMalloc1(nz+1,&cja);CHKERRQ(ierr); 2495 jj = a->j; 2496 for (i=0; i<nz; i++) { 2497 collengths[jj[i]]++; 2498 } 2499 cia[0] = oshift; 2500 for (i=0; i<n; i++) { 2501 cia[i+1] = cia[i] + collengths[i]; 2502 } 2503 ierr = PetscMemzero(collengths,n*sizeof(PetscInt));CHKERRQ(ierr); 2504 jj = a->j; 2505 for (row=0; row<m; row++) { 2506 mr = a->i[row+1] - a->i[row]; 2507 for (i=0; i<mr; i++) { 2508 col = *jj++; 2509 2510 cja[cia[col] + collengths[col]++ - oshift] = row + oshift; 2511 } 2512 } 2513 ierr = PetscFree(collengths);CHKERRQ(ierr); 2514 *ia = cia; *ja = cja; 2515 } 2516 PetscFunctionReturn(0); 2517 } 2518 2519 PetscErrorCode MatRestoreColumnIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *n,const PetscInt *ia[],const PetscInt *ja[],PetscBool *done) 2520 { 2521 PetscErrorCode ierr; 2522 2523 PetscFunctionBegin; 2524 if (!ia) PetscFunctionReturn(0); 2525 ierr = PetscFree(*ia);CHKERRQ(ierr); 2526 ierr = PetscFree(*ja);CHKERRQ(ierr); 2527 PetscFunctionReturn(0); 2528 } 2529 2530 /* 2531 MatGetColumnIJ_SeqBAIJ_Color() and MatRestoreColumnIJ_SeqBAIJ_Color() are customized from 2532 MatGetColumnIJ_SeqBAIJ() and MatRestoreColumnIJ_SeqBAIJ() by adding an output 2533 spidx[], index of a->a, to be used in MatTransposeColoringCreate() and MatFDColoringCreate() 2534 */ 2535 PetscErrorCode MatGetColumnIJ_SeqBAIJ_Color(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *nn,const PetscInt *ia[],const PetscInt *ja[],PetscInt *spidx[],PetscBool *done) 2536 { 2537 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2538 PetscErrorCode ierr; 2539 PetscInt i,*collengths,*cia,*cja,n=a->nbs,m=a->mbs; 2540 PetscInt nz = a->i[m],row,*jj,mr,col; 2541 PetscInt *cspidx; 2542 2543 PetscFunctionBegin; 2544 *nn = n; 2545 if (!ia) PetscFunctionReturn(0); 2546 2547 ierr = PetscCalloc1(n+1,&collengths);CHKERRQ(ierr); 2548 ierr = PetscMalloc1(n+1,&cia);CHKERRQ(ierr); 2549 ierr = PetscMalloc1(nz+1,&cja);CHKERRQ(ierr); 2550 ierr = PetscMalloc1(nz+1,&cspidx);CHKERRQ(ierr); 2551 jj = a->j; 2552 for (i=0; i<nz; i++) { 2553 collengths[jj[i]]++; 2554 } 2555 cia[0] = oshift; 2556 for (i=0; i<n; i++) { 2557 cia[i+1] = cia[i] + collengths[i]; 2558 } 2559 ierr = PetscMemzero(collengths,n*sizeof(PetscInt));CHKERRQ(ierr); 2560 jj = a->j; 2561 for (row=0; row<m; row++) { 2562 mr = a->i[row+1] - a->i[row]; 2563 for (i=0; i<mr; i++) { 2564 col = *jj++; 2565 cspidx[cia[col] + collengths[col] - oshift] = a->i[row] + i; /* index of a->j */ 2566 cja[cia[col] + collengths[col]++ - oshift] = row + oshift; 2567 } 2568 } 2569 ierr = PetscFree(collengths);CHKERRQ(ierr); 2570 *ia = cia; *ja = cja; 2571 *spidx = cspidx; 2572 PetscFunctionReturn(0); 2573 } 2574 2575 PetscErrorCode MatRestoreColumnIJ_SeqBAIJ_Color(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *n,const PetscInt *ia[],const PetscInt *ja[],PetscInt *spidx[],PetscBool *done) 2576 { 2577 PetscErrorCode ierr; 2578 2579 PetscFunctionBegin; 2580 ierr = MatRestoreColumnIJ_SeqBAIJ(A,oshift,symmetric,inodecompressed,n,ia,ja,done);CHKERRQ(ierr); 2581 ierr = PetscFree(*spidx);CHKERRQ(ierr); 2582 PetscFunctionReturn(0); 2583 } 2584 2585 PetscErrorCode MatShift_SeqBAIJ(Mat Y,PetscScalar a) 2586 { 2587 PetscErrorCode ierr; 2588 Mat_SeqBAIJ *aij = (Mat_SeqBAIJ*)Y->data; 2589 2590 PetscFunctionBegin; 2591 if (!Y->preallocated || !aij->nz) { 2592 ierr = MatSeqBAIJSetPreallocation(Y,Y->rmap->bs,1,NULL);CHKERRQ(ierr); 2593 } 2594 ierr = MatShift_Basic(Y,a);CHKERRQ(ierr); 2595 PetscFunctionReturn(0); 2596 } 2597 2598 /* -------------------------------------------------------------------*/ 2599 static struct _MatOps MatOps_Values = {MatSetValues_SeqBAIJ, 2600 MatGetRow_SeqBAIJ, 2601 MatRestoreRow_SeqBAIJ, 2602 MatMult_SeqBAIJ_N, 2603 /* 4*/ MatMultAdd_SeqBAIJ_N, 2604 MatMultTranspose_SeqBAIJ, 2605 MatMultTransposeAdd_SeqBAIJ, 2606 0, 2607 0, 2608 0, 2609 /* 10*/ 0, 2610 MatLUFactor_SeqBAIJ, 2611 0, 2612 0, 2613 MatTranspose_SeqBAIJ, 2614 /* 15*/ MatGetInfo_SeqBAIJ, 2615 MatEqual_SeqBAIJ, 2616 MatGetDiagonal_SeqBAIJ, 2617 MatDiagonalScale_SeqBAIJ, 2618 MatNorm_SeqBAIJ, 2619 /* 20*/ 0, 2620 MatAssemblyEnd_SeqBAIJ, 2621 MatSetOption_SeqBAIJ, 2622 MatZeroEntries_SeqBAIJ, 2623 /* 24*/ MatZeroRows_SeqBAIJ, 2624 0, 2625 0, 2626 0, 2627 0, 2628 /* 29*/ MatSetUp_SeqBAIJ, 2629 0, 2630 0, 2631 0, 2632 0, 2633 /* 34*/ MatDuplicate_SeqBAIJ, 2634 0, 2635 0, 2636 MatILUFactor_SeqBAIJ, 2637 0, 2638 /* 39*/ MatAXPY_SeqBAIJ, 2639 MatCreateSubMatrices_SeqBAIJ, 2640 MatIncreaseOverlap_SeqBAIJ, 2641 MatGetValues_SeqBAIJ, 2642 MatCopy_SeqBAIJ, 2643 /* 44*/ 0, 2644 MatScale_SeqBAIJ, 2645 MatShift_SeqBAIJ, 2646 0, 2647 MatZeroRowsColumns_SeqBAIJ, 2648 /* 49*/ 0, 2649 MatGetRowIJ_SeqBAIJ, 2650 MatRestoreRowIJ_SeqBAIJ, 2651 MatGetColumnIJ_SeqBAIJ, 2652 MatRestoreColumnIJ_SeqBAIJ, 2653 /* 54*/ MatFDColoringCreate_SeqXAIJ, 2654 0, 2655 0, 2656 0, 2657 MatSetValuesBlocked_SeqBAIJ, 2658 /* 59*/ MatCreateSubMatrix_SeqBAIJ, 2659 MatDestroy_SeqBAIJ, 2660 MatView_SeqBAIJ, 2661 0, 2662 0, 2663 /* 64*/ 0, 2664 0, 2665 0, 2666 0, 2667 0, 2668 /* 69*/ MatGetRowMaxAbs_SeqBAIJ, 2669 0, 2670 MatConvert_Basic, 2671 0, 2672 0, 2673 /* 74*/ 0, 2674 MatFDColoringApply_BAIJ, 2675 0, 2676 0, 2677 0, 2678 /* 79*/ 0, 2679 0, 2680 0, 2681 0, 2682 MatLoad_SeqBAIJ, 2683 /* 84*/ 0, 2684 0, 2685 0, 2686 0, 2687 0, 2688 /* 89*/ 0, 2689 0, 2690 0, 2691 0, 2692 0, 2693 /* 94*/ 0, 2694 0, 2695 0, 2696 0, 2697 0, 2698 /* 99*/ 0, 2699 0, 2700 0, 2701 0, 2702 0, 2703 /*104*/ 0, 2704 MatRealPart_SeqBAIJ, 2705 MatImaginaryPart_SeqBAIJ, 2706 0, 2707 0, 2708 /*109*/ 0, 2709 0, 2710 0, 2711 0, 2712 MatMissingDiagonal_SeqBAIJ, 2713 /*114*/ 0, 2714 0, 2715 0, 2716 0, 2717 0, 2718 /*119*/ 0, 2719 0, 2720 MatMultHermitianTranspose_SeqBAIJ, 2721 MatMultHermitianTransposeAdd_SeqBAIJ, 2722 0, 2723 /*124*/ 0, 2724 0, 2725 MatInvertBlockDiagonal_SeqBAIJ, 2726 0, 2727 0, 2728 /*129*/ 0, 2729 0, 2730 0, 2731 0, 2732 0, 2733 /*134*/ 0, 2734 0, 2735 0, 2736 0, 2737 0, 2738 /*139*/ MatSetBlockSizes_Default, 2739 0, 2740 0, 2741 MatFDColoringSetUp_SeqXAIJ, 2742 0, 2743 /*144*/MatCreateMPIMatConcatenateSeqMat_SeqBAIJ, 2744 MatDestroySubMatrices_SeqBAIJ 2745 }; 2746 2747 PetscErrorCode MatStoreValues_SeqBAIJ(Mat mat) 2748 { 2749 Mat_SeqBAIJ *aij = (Mat_SeqBAIJ*)mat->data; 2750 PetscInt nz = aij->i[aij->mbs]*aij->bs2; 2751 PetscErrorCode ierr; 2752 2753 PetscFunctionBegin; 2754 if (aij->nonew != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first"); 2755 2756 /* allocate space for values if not already there */ 2757 if (!aij->saved_values) { 2758 ierr = PetscMalloc1(nz+1,&aij->saved_values);CHKERRQ(ierr); 2759 ierr = PetscLogObjectMemory((PetscObject)mat,(nz+1)*sizeof(PetscScalar));CHKERRQ(ierr); 2760 } 2761 2762 /* copy values over */ 2763 ierr = PetscMemcpy(aij->saved_values,aij->a,nz*sizeof(PetscScalar));CHKERRQ(ierr); 2764 PetscFunctionReturn(0); 2765 } 2766 2767 PetscErrorCode MatRetrieveValues_SeqBAIJ(Mat mat) 2768 { 2769 Mat_SeqBAIJ *aij = (Mat_SeqBAIJ*)mat->data; 2770 PetscErrorCode ierr; 2771 PetscInt nz = aij->i[aij->mbs]*aij->bs2; 2772 2773 PetscFunctionBegin; 2774 if (aij->nonew != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first"); 2775 if (!aij->saved_values) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatStoreValues(A);first"); 2776 2777 /* copy values over */ 2778 ierr = PetscMemcpy(aij->a,aij->saved_values,nz*sizeof(PetscScalar));CHKERRQ(ierr); 2779 PetscFunctionReturn(0); 2780 } 2781 2782 PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqAIJ(Mat, MatType,MatReuse,Mat*); 2783 PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqSBAIJ(Mat, MatType,MatReuse,Mat*); 2784 2785 PetscErrorCode MatSeqBAIJSetPreallocation_SeqBAIJ(Mat B,PetscInt bs,PetscInt nz,PetscInt *nnz) 2786 { 2787 Mat_SeqBAIJ *b; 2788 PetscErrorCode ierr; 2789 PetscInt i,mbs,nbs,bs2; 2790 PetscBool flg = PETSC_FALSE,skipallocation = PETSC_FALSE,realalloc = PETSC_FALSE; 2791 2792 PetscFunctionBegin; 2793 if (nz >= 0 || nnz) realalloc = PETSC_TRUE; 2794 if (nz == MAT_SKIP_ALLOCATION) { 2795 skipallocation = PETSC_TRUE; 2796 nz = 0; 2797 } 2798 2799 ierr = MatSetBlockSize(B,PetscAbs(bs));CHKERRQ(ierr); 2800 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 2801 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 2802 ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr); 2803 2804 B->preallocated = PETSC_TRUE; 2805 2806 mbs = B->rmap->n/bs; 2807 nbs = B->cmap->n/bs; 2808 bs2 = bs*bs; 2809 2810 if (mbs*bs!=B->rmap->n || nbs*bs!=B->cmap->n) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Number rows %D, cols %D must be divisible by blocksize %D",B->rmap->N,B->cmap->n,bs); 2811 2812 if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5; 2813 if (nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nz cannot be less than 0: value %D",nz); 2814 if (nnz) { 2815 for (i=0; i<mbs; i++) { 2816 if (nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be less than 0: local row %D value %D",i,nnz[i]); 2817 if (nnz[i] > nbs) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be greater than block row length: local row %D value %D rowlength %D",i,nnz[i],nbs); 2818 } 2819 } 2820 2821 b = (Mat_SeqBAIJ*)B->data; 2822 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)B),NULL,"Optimize options for SEQBAIJ matrix 2 ","Mat");CHKERRQ(ierr); 2823 ierr = PetscOptionsBool("-mat_no_unroll","Do not optimize for block size (slow)",NULL,flg,&flg,NULL);CHKERRQ(ierr); 2824 ierr = PetscOptionsEnd();CHKERRQ(ierr); 2825 2826 if (!flg) { 2827 switch (bs) { 2828 case 1: 2829 B->ops->mult = MatMult_SeqBAIJ_1; 2830 B->ops->multadd = MatMultAdd_SeqBAIJ_1; 2831 break; 2832 case 2: 2833 B->ops->mult = MatMult_SeqBAIJ_2; 2834 B->ops->multadd = MatMultAdd_SeqBAIJ_2; 2835 break; 2836 case 3: 2837 B->ops->mult = MatMult_SeqBAIJ_3; 2838 B->ops->multadd = MatMultAdd_SeqBAIJ_3; 2839 break; 2840 case 4: 2841 B->ops->mult = MatMult_SeqBAIJ_4; 2842 B->ops->multadd = MatMultAdd_SeqBAIJ_4; 2843 break; 2844 case 5: 2845 B->ops->mult = MatMult_SeqBAIJ_5; 2846 B->ops->multadd = MatMultAdd_SeqBAIJ_5; 2847 break; 2848 case 6: 2849 B->ops->mult = MatMult_SeqBAIJ_6; 2850 B->ops->multadd = MatMultAdd_SeqBAIJ_6; 2851 break; 2852 case 7: 2853 B->ops->mult = MatMult_SeqBAIJ_7; 2854 B->ops->multadd = MatMultAdd_SeqBAIJ_7; 2855 break; 2856 case 11: 2857 B->ops->mult = MatMult_SeqBAIJ_11; 2858 B->ops->multadd = MatMultAdd_SeqBAIJ_11; 2859 break; 2860 case 15: 2861 B->ops->mult = MatMult_SeqBAIJ_15_ver1; 2862 B->ops->multadd = MatMultAdd_SeqBAIJ_N; 2863 break; 2864 default: 2865 B->ops->mult = MatMult_SeqBAIJ_N; 2866 B->ops->multadd = MatMultAdd_SeqBAIJ_N; 2867 break; 2868 } 2869 } 2870 B->ops->sor = MatSOR_SeqBAIJ; 2871 b->mbs = mbs; 2872 b->nbs = nbs; 2873 if (!skipallocation) { 2874 if (!b->imax) { 2875 ierr = PetscMalloc2(mbs,&b->imax,mbs,&b->ilen);CHKERRQ(ierr); 2876 ierr = PetscLogObjectMemory((PetscObject)B,2*mbs*sizeof(PetscInt));CHKERRQ(ierr); 2877 2878 b->free_imax_ilen = PETSC_TRUE; 2879 } 2880 /* b->ilen will count nonzeros in each block row so far. */ 2881 for (i=0; i<mbs; i++) b->ilen[i] = 0; 2882 if (!nnz) { 2883 if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5; 2884 else if (nz < 0) nz = 1; 2885 for (i=0; i<mbs; i++) b->imax[i] = nz; 2886 nz = nz*mbs; 2887 } else { 2888 nz = 0; 2889 for (i=0; i<mbs; i++) {b->imax[i] = nnz[i]; nz += nnz[i];} 2890 } 2891 2892 /* allocate the matrix space */ 2893 ierr = MatSeqXAIJFreeAIJ(B,&b->a,&b->j,&b->i);CHKERRQ(ierr); 2894 if (B->structure_only) { 2895 ierr = PetscMalloc1(nz,&b->j);CHKERRQ(ierr); 2896 ierr = PetscMalloc1(B->rmap->N+1,&b->i);CHKERRQ(ierr); 2897 ierr = PetscLogObjectMemory((PetscObject)B,(B->rmap->N+1)*sizeof(PetscInt)+nz*sizeof(PetscInt));CHKERRQ(ierr); 2898 } else { 2899 ierr = PetscMalloc3(bs2*nz,&b->a,nz,&b->j,B->rmap->N+1,&b->i);CHKERRQ(ierr); 2900 ierr = PetscLogObjectMemory((PetscObject)B,(B->rmap->N+1)*sizeof(PetscInt)+nz*(bs2*sizeof(PetscScalar)+sizeof(PetscInt)));CHKERRQ(ierr); 2901 ierr = PetscMemzero(b->a,nz*bs2*sizeof(MatScalar));CHKERRQ(ierr); 2902 } 2903 ierr = PetscMemzero(b->j,nz*sizeof(PetscInt));CHKERRQ(ierr); 2904 2905 if (B->structure_only) { 2906 b->singlemalloc = PETSC_FALSE; 2907 b->free_a = PETSC_FALSE; 2908 } else { 2909 b->singlemalloc = PETSC_TRUE; 2910 b->free_a = PETSC_TRUE; 2911 } 2912 b->free_ij = PETSC_TRUE; 2913 2914 b->i[0] = 0; 2915 for (i=1; i<mbs+1; i++) { 2916 b->i[i] = b->i[i-1] + b->imax[i-1]; 2917 } 2918 2919 } else { 2920 b->free_a = PETSC_FALSE; 2921 b->free_ij = PETSC_FALSE; 2922 } 2923 2924 b->bs2 = bs2; 2925 b->mbs = mbs; 2926 b->nz = 0; 2927 b->maxnz = nz; 2928 B->info.nz_unneeded = (PetscReal)b->maxnz*bs2; 2929 B->was_assembled = PETSC_FALSE; 2930 B->assembled = PETSC_FALSE; 2931 if (realalloc) {ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);} 2932 PetscFunctionReturn(0); 2933 } 2934 2935 PetscErrorCode MatSeqBAIJSetPreallocationCSR_SeqBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[]) 2936 { 2937 PetscInt i,m,nz,nz_max=0,*nnz; 2938 PetscScalar *values=0; 2939 PetscBool roworiented = ((Mat_SeqBAIJ*)B->data)->roworiented; 2940 PetscErrorCode ierr; 2941 2942 PetscFunctionBegin; 2943 if (bs < 1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive but it is %D",bs); 2944 ierr = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr); 2945 ierr = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr); 2946 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 2947 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 2948 ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr); 2949 m = B->rmap->n/bs; 2950 2951 if (ii[0] != 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE, "ii[0] must be 0 but it is %D",ii[0]); 2952 ierr = PetscMalloc1(m+1, &nnz);CHKERRQ(ierr); 2953 for (i=0; i<m; i++) { 2954 nz = ii[i+1]- ii[i]; 2955 if (nz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE, "Local row %D has a negative number of columns %D",i,nz); 2956 nz_max = PetscMax(nz_max, nz); 2957 nnz[i] = nz; 2958 } 2959 ierr = MatSeqBAIJSetPreallocation(B,bs,0,nnz);CHKERRQ(ierr); 2960 ierr = PetscFree(nnz);CHKERRQ(ierr); 2961 2962 values = (PetscScalar*)V; 2963 if (!values) { 2964 ierr = PetscCalloc1(bs*bs*(nz_max+1),&values);CHKERRQ(ierr); 2965 } 2966 for (i=0; i<m; i++) { 2967 PetscInt ncols = ii[i+1] - ii[i]; 2968 const PetscInt *icols = jj + ii[i]; 2969 const PetscScalar *svals = values + (V ? (bs*bs*ii[i]) : 0); 2970 if (!roworiented) { 2971 ierr = MatSetValuesBlocked_SeqBAIJ(B,1,&i,ncols,icols,svals,INSERT_VALUES);CHKERRQ(ierr); 2972 } else { 2973 PetscInt j; 2974 for (j=0; j<ncols; j++) { 2975 const PetscScalar *svals = values + (V ? (bs*bs*(ii[i]+j)) : 0); 2976 ierr = MatSetValuesBlocked_SeqBAIJ(B,1,&i,1,&icols[j],svals,INSERT_VALUES);CHKERRQ(ierr); 2977 } 2978 } 2979 } 2980 if (!V) { ierr = PetscFree(values);CHKERRQ(ierr); } 2981 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2982 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2983 ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 2984 PetscFunctionReturn(0); 2985 } 2986 2987 /*MC 2988 MATSEQBAIJ - MATSEQBAIJ = "seqbaij" - A matrix type to be used for sequential block sparse matrices, based on 2989 block sparse compressed row format. 2990 2991 Options Database Keys: 2992 . -mat_type seqbaij - sets the matrix type to "seqbaij" during a call to MatSetFromOptions() 2993 2994 Level: beginner 2995 2996 .seealso: MatCreateSeqBAIJ() 2997 M*/ 2998 2999 PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqBSTRM(Mat, MatType,MatReuse,Mat*); 3000 3001 PETSC_EXTERN PetscErrorCode MatCreate_SeqBAIJ(Mat B) 3002 { 3003 PetscErrorCode ierr; 3004 PetscMPIInt size; 3005 Mat_SeqBAIJ *b; 3006 3007 PetscFunctionBegin; 3008 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRQ(ierr); 3009 if (size > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Comm must be of size 1"); 3010 3011 ierr = PetscNewLog(B,&b);CHKERRQ(ierr); 3012 B->data = (void*)b; 3013 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 3014 3015 b->row = 0; 3016 b->col = 0; 3017 b->icol = 0; 3018 b->reallocs = 0; 3019 b->saved_values = 0; 3020 3021 b->roworiented = PETSC_TRUE; 3022 b->nonew = 0; 3023 b->diag = 0; 3024 B->spptr = 0; 3025 B->info.nz_unneeded = (PetscReal)b->maxnz*b->bs2; 3026 b->keepnonzeropattern = PETSC_FALSE; 3027 3028 ierr = PetscObjectComposeFunction((PetscObject)B,"MatInvertBlockDiagonal_C",MatInvertBlockDiagonal_SeqBAIJ);CHKERRQ(ierr); 3029 ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_SeqBAIJ);CHKERRQ(ierr); 3030 ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_SeqBAIJ);CHKERRQ(ierr); 3031 ierr = PetscObjectComposeFunction((PetscObject)B,"MatSeqBAIJSetColumnIndices_C",MatSeqBAIJSetColumnIndices_SeqBAIJ);CHKERRQ(ierr); 3032 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqbaij_seqaij_C",MatConvert_SeqBAIJ_SeqAIJ);CHKERRQ(ierr); 3033 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqbaij_seqsbaij_C",MatConvert_SeqBAIJ_SeqSBAIJ);CHKERRQ(ierr); 3034 ierr = PetscObjectComposeFunction((PetscObject)B,"MatSeqBAIJSetPreallocation_C",MatSeqBAIJSetPreallocation_SeqBAIJ);CHKERRQ(ierr); 3035 ierr = PetscObjectComposeFunction((PetscObject)B,"MatSeqBAIJSetPreallocationCSR_C",MatSeqBAIJSetPreallocationCSR_SeqBAIJ);CHKERRQ(ierr); 3036 ierr = PetscObjectComposeFunction((PetscObject)B,"MatIsTranspose_C",MatIsTranspose_SeqBAIJ);CHKERRQ(ierr); 3037 #if defined(PETSC_HAVE_HYPRE) 3038 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_baij_hypre_C",MatConvert_AIJ_HYPRE);CHKERRQ(ierr); 3039 #endif 3040 ierr = PetscObjectChangeTypeName((PetscObject)B,MATSEQBAIJ);CHKERRQ(ierr); 3041 PetscFunctionReturn(0); 3042 } 3043 3044 PetscErrorCode MatDuplicateNoCreate_SeqBAIJ(Mat C,Mat A,MatDuplicateOption cpvalues,PetscBool mallocmatspace) 3045 { 3046 Mat_SeqBAIJ *c = (Mat_SeqBAIJ*)C->data,*a = (Mat_SeqBAIJ*)A->data; 3047 PetscErrorCode ierr; 3048 PetscInt i,mbs = a->mbs,nz = a->nz,bs2 = a->bs2; 3049 3050 PetscFunctionBegin; 3051 if (a->i[mbs] != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Corrupt matrix"); 3052 3053 if (cpvalues == MAT_SHARE_NONZERO_PATTERN) { 3054 c->imax = a->imax; 3055 c->ilen = a->ilen; 3056 c->free_imax_ilen = PETSC_FALSE; 3057 } else { 3058 ierr = PetscMalloc2(mbs,&c->imax,mbs,&c->ilen);CHKERRQ(ierr); 3059 ierr = PetscLogObjectMemory((PetscObject)C,2*mbs*sizeof(PetscInt));CHKERRQ(ierr); 3060 for (i=0; i<mbs; i++) { 3061 c->imax[i] = a->imax[i]; 3062 c->ilen[i] = a->ilen[i]; 3063 } 3064 c->free_imax_ilen = PETSC_TRUE; 3065 } 3066 3067 /* allocate the matrix space */ 3068 if (mallocmatspace) { 3069 if (cpvalues == MAT_SHARE_NONZERO_PATTERN) { 3070 ierr = PetscCalloc1(bs2*nz,&c->a);CHKERRQ(ierr); 3071 ierr = PetscLogObjectMemory((PetscObject)C,a->i[mbs]*bs2*sizeof(PetscScalar));CHKERRQ(ierr); 3072 3073 c->i = a->i; 3074 c->j = a->j; 3075 c->singlemalloc = PETSC_FALSE; 3076 c->free_a = PETSC_TRUE; 3077 c->free_ij = PETSC_FALSE; 3078 c->parent = A; 3079 C->preallocated = PETSC_TRUE; 3080 C->assembled = PETSC_TRUE; 3081 3082 ierr = PetscObjectReference((PetscObject)A);CHKERRQ(ierr); 3083 ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 3084 ierr = MatSetOption(C,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 3085 } else { 3086 ierr = PetscMalloc3(bs2*nz,&c->a,nz,&c->j,mbs+1,&c->i);CHKERRQ(ierr); 3087 ierr = PetscLogObjectMemory((PetscObject)C,a->i[mbs]*(bs2*sizeof(PetscScalar)+sizeof(PetscInt))+(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 3088 3089 c->singlemalloc = PETSC_TRUE; 3090 c->free_a = PETSC_TRUE; 3091 c->free_ij = PETSC_TRUE; 3092 3093 ierr = PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 3094 if (mbs > 0) { 3095 ierr = PetscMemcpy(c->j,a->j,nz*sizeof(PetscInt));CHKERRQ(ierr); 3096 if (cpvalues == MAT_COPY_VALUES) { 3097 ierr = PetscMemcpy(c->a,a->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr); 3098 } else { 3099 ierr = PetscMemzero(c->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr); 3100 } 3101 } 3102 C->preallocated = PETSC_TRUE; 3103 C->assembled = PETSC_TRUE; 3104 } 3105 } 3106 3107 c->roworiented = a->roworiented; 3108 c->nonew = a->nonew; 3109 3110 ierr = PetscLayoutReference(A->rmap,&C->rmap);CHKERRQ(ierr); 3111 ierr = PetscLayoutReference(A->cmap,&C->cmap);CHKERRQ(ierr); 3112 3113 c->bs2 = a->bs2; 3114 c->mbs = a->mbs; 3115 c->nbs = a->nbs; 3116 3117 if (a->diag) { 3118 if (cpvalues == MAT_SHARE_NONZERO_PATTERN) { 3119 c->diag = a->diag; 3120 c->free_diag = PETSC_FALSE; 3121 } else { 3122 ierr = PetscMalloc1(mbs+1,&c->diag);CHKERRQ(ierr); 3123 ierr = PetscLogObjectMemory((PetscObject)C,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 3124 for (i=0; i<mbs; i++) c->diag[i] = a->diag[i]; 3125 c->free_diag = PETSC_TRUE; 3126 } 3127 } else c->diag = 0; 3128 3129 c->nz = a->nz; 3130 c->maxnz = a->nz; /* Since we allocate exactly the right amount */ 3131 c->solve_work = NULL; 3132 c->mult_work = NULL; 3133 c->sor_workt = NULL; 3134 c->sor_work = NULL; 3135 3136 c->compressedrow.use = a->compressedrow.use; 3137 c->compressedrow.nrows = a->compressedrow.nrows; 3138 if (a->compressedrow.use) { 3139 i = a->compressedrow.nrows; 3140 ierr = PetscMalloc2(i+1,&c->compressedrow.i,i+1,&c->compressedrow.rindex);CHKERRQ(ierr); 3141 ierr = PetscLogObjectMemory((PetscObject)C,(2*i+1)*sizeof(PetscInt));CHKERRQ(ierr); 3142 ierr = PetscMemcpy(c->compressedrow.i,a->compressedrow.i,(i+1)*sizeof(PetscInt));CHKERRQ(ierr); 3143 ierr = PetscMemcpy(c->compressedrow.rindex,a->compressedrow.rindex,i*sizeof(PetscInt));CHKERRQ(ierr); 3144 } else { 3145 c->compressedrow.use = PETSC_FALSE; 3146 c->compressedrow.i = NULL; 3147 c->compressedrow.rindex = NULL; 3148 } 3149 C->nonzerostate = A->nonzerostate; 3150 3151 ierr = PetscFunctionListDuplicate(((PetscObject)A)->qlist,&((PetscObject)C)->qlist);CHKERRQ(ierr); 3152 PetscFunctionReturn(0); 3153 } 3154 3155 PetscErrorCode MatDuplicate_SeqBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B) 3156 { 3157 PetscErrorCode ierr; 3158 3159 PetscFunctionBegin; 3160 ierr = MatCreate(PetscObjectComm((PetscObject)A),B);CHKERRQ(ierr); 3161 ierr = MatSetSizes(*B,A->rmap->N,A->cmap->n,A->rmap->N,A->cmap->n);CHKERRQ(ierr); 3162 ierr = MatSetType(*B,MATSEQBAIJ);CHKERRQ(ierr); 3163 ierr = MatDuplicateNoCreate_SeqBAIJ(*B,A,cpvalues,PETSC_TRUE);CHKERRQ(ierr); 3164 PetscFunctionReturn(0); 3165 } 3166 3167 PetscErrorCode MatLoad_SeqBAIJ(Mat newmat,PetscViewer viewer) 3168 { 3169 Mat_SeqBAIJ *a; 3170 PetscErrorCode ierr; 3171 PetscInt i,nz,header[4],*rowlengths=0,M,N,bs = newmat->rmap->bs; 3172 PetscInt *mask,mbs,*jj,j,rowcount,nzcount,k,*browlengths,maskcount; 3173 PetscInt kmax,jcount,block,idx,point,nzcountb,extra_rows,rows,cols; 3174 PetscInt *masked,nmask,tmp,bs2,ishift; 3175 PetscMPIInt size; 3176 int fd; 3177 PetscScalar *aa; 3178 MPI_Comm comm; 3179 3180 PetscFunctionBegin; 3181 /* force binary viewer to load .info file if it has not yet done so */ 3182 ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr); 3183 ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr); 3184 ierr = PetscOptionsBegin(comm,NULL,"Options for loading SEQBAIJ matrix","Mat");CHKERRQ(ierr); 3185 ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,NULL);CHKERRQ(ierr); 3186 ierr = PetscOptionsEnd();CHKERRQ(ierr); 3187 if (bs < 0) bs = 1; 3188 bs2 = bs*bs; 3189 3190 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3191 if (size > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"view must have one processor"); 3192 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 3193 ierr = PetscBinaryRead(fd,header,4,PETSC_INT);CHKERRQ(ierr); 3194 if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not Mat object"); 3195 M = header[1]; N = header[2]; nz = header[3]; 3196 3197 if (header[3] < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as SeqBAIJ"); 3198 if (M != N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Can only do square matrices"); 3199 3200 /* 3201 This code adds extra rows to make sure the number of rows is 3202 divisible by the blocksize 3203 */ 3204 mbs = M/bs; 3205 extra_rows = bs - M + bs*(mbs); 3206 if (extra_rows == bs) extra_rows = 0; 3207 else mbs++; 3208 if (extra_rows) { 3209 ierr = PetscInfo(viewer,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr); 3210 } 3211 3212 /* Set global sizes if not already set */ 3213 if (newmat->rmap->n < 0 && newmat->rmap->N < 0 && newmat->cmap->n < 0 && newmat->cmap->N < 0) { 3214 ierr = MatSetSizes(newmat,PETSC_DECIDE,PETSC_DECIDE,M+extra_rows,N+extra_rows);CHKERRQ(ierr); 3215 } else { /* Check if the matrix global sizes are correct */ 3216 ierr = MatGetSize(newmat,&rows,&cols);CHKERRQ(ierr); 3217 if (rows < 0 && cols < 0) { /* user might provide local size instead of global size */ 3218 ierr = MatGetLocalSize(newmat,&rows,&cols);CHKERRQ(ierr); 3219 } 3220 if (M != rows || N != cols) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix in file of different length (%d, %d) than the input matrix (%d, %d)",M,N,rows,cols); 3221 } 3222 3223 /* read in row lengths */ 3224 ierr = PetscMalloc1(M+extra_rows,&rowlengths);CHKERRQ(ierr); 3225 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 3226 for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1; 3227 3228 /* read in column indices */ 3229 ierr = PetscMalloc1(nz+extra_rows,&jj);CHKERRQ(ierr); 3230 ierr = PetscBinaryRead(fd,jj,nz,PETSC_INT);CHKERRQ(ierr); 3231 for (i=0; i<extra_rows; i++) jj[nz+i] = M+i; 3232 3233 /* loop over row lengths determining block row lengths */ 3234 ierr = PetscCalloc1(mbs,&browlengths);CHKERRQ(ierr); 3235 ierr = PetscMalloc2(mbs,&mask,mbs,&masked);CHKERRQ(ierr); 3236 ierr = PetscMemzero(mask,mbs*sizeof(PetscInt));CHKERRQ(ierr); 3237 rowcount = 0; 3238 nzcount = 0; 3239 for (i=0; i<mbs; i++) { 3240 nmask = 0; 3241 for (j=0; j<bs; j++) { 3242 kmax = rowlengths[rowcount]; 3243 for (k=0; k<kmax; k++) { 3244 tmp = jj[nzcount++]/bs; 3245 if (!mask[tmp]) {masked[nmask++] = tmp; mask[tmp] = 1;} 3246 } 3247 rowcount++; 3248 } 3249 browlengths[i] += nmask; 3250 /* zero out the mask elements we set */ 3251 for (j=0; j<nmask; j++) mask[masked[j]] = 0; 3252 } 3253 3254 /* Do preallocation */ 3255 ierr = MatSeqBAIJSetPreallocation(newmat,bs,0,browlengths);CHKERRQ(ierr); 3256 a = (Mat_SeqBAIJ*)newmat->data; 3257 3258 /* set matrix "i" values */ 3259 a->i[0] = 0; 3260 for (i=1; i<= mbs; i++) { 3261 a->i[i] = a->i[i-1] + browlengths[i-1]; 3262 a->ilen[i-1] = browlengths[i-1]; 3263 } 3264 a->nz = 0; 3265 for (i=0; i<mbs; i++) a->nz += browlengths[i]; 3266 3267 /* read in nonzero values */ 3268 ierr = PetscMalloc1(nz+extra_rows,&aa);CHKERRQ(ierr); 3269 ierr = PetscBinaryRead(fd,aa,nz,PETSC_SCALAR);CHKERRQ(ierr); 3270 for (i=0; i<extra_rows; i++) aa[nz+i] = 1.0; 3271 3272 /* set "a" and "j" values into matrix */ 3273 nzcount = 0; jcount = 0; 3274 for (i=0; i<mbs; i++) { 3275 nzcountb = nzcount; 3276 nmask = 0; 3277 for (j=0; j<bs; j++) { 3278 kmax = rowlengths[i*bs+j]; 3279 for (k=0; k<kmax; k++) { 3280 tmp = jj[nzcount++]/bs; 3281 if (!mask[tmp]) { masked[nmask++] = tmp; mask[tmp] = 1;} 3282 } 3283 } 3284 /* sort the masked values */ 3285 ierr = PetscSortInt(nmask,masked);CHKERRQ(ierr); 3286 3287 /* set "j" values into matrix */ 3288 maskcount = 1; 3289 for (j=0; j<nmask; j++) { 3290 a->j[jcount++] = masked[j]; 3291 mask[masked[j]] = maskcount++; 3292 } 3293 /* set "a" values into matrix */ 3294 ishift = bs2*a->i[i]; 3295 for (j=0; j<bs; j++) { 3296 kmax = rowlengths[i*bs+j]; 3297 for (k=0; k<kmax; k++) { 3298 tmp = jj[nzcountb]/bs; 3299 block = mask[tmp] - 1; 3300 point = jj[nzcountb] - bs*tmp; 3301 idx = ishift + bs2*block + j + bs*point; 3302 a->a[idx] = (MatScalar)aa[nzcountb++]; 3303 } 3304 } 3305 /* zero out the mask elements we set */ 3306 for (j=0; j<nmask; j++) mask[masked[j]] = 0; 3307 } 3308 if (jcount != a->nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Bad binary matrix"); 3309 3310 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 3311 ierr = PetscFree(browlengths);CHKERRQ(ierr); 3312 ierr = PetscFree(aa);CHKERRQ(ierr); 3313 ierr = PetscFree(jj);CHKERRQ(ierr); 3314 ierr = PetscFree2(mask,masked);CHKERRQ(ierr); 3315 3316 ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3317 ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3318 PetscFunctionReturn(0); 3319 } 3320 3321 /*@C 3322 MatCreateSeqBAIJ - Creates a sparse matrix in block AIJ (block 3323 compressed row) format. For good matrix assembly performance the 3324 user should preallocate the matrix storage by setting the parameter nz 3325 (or the array nnz). By setting these parameters accurately, performance 3326 during matrix assembly can be increased by more than a factor of 50. 3327 3328 Collective on MPI_Comm 3329 3330 Input Parameters: 3331 + comm - MPI communicator, set to PETSC_COMM_SELF 3332 . bs - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row 3333 blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs() 3334 . m - number of rows 3335 . n - number of columns 3336 . nz - number of nonzero blocks per block row (same for all rows) 3337 - nnz - array containing the number of nonzero blocks in the various block rows 3338 (possibly different for each block row) or NULL 3339 3340 Output Parameter: 3341 . A - the matrix 3342 3343 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 3344 MatXXXXSetPreallocation() paradgm instead of this routine directly. 3345 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 3346 3347 Options Database Keys: 3348 . -mat_no_unroll - uses code that does not unroll the loops in the 3349 block calculations (much slower) 3350 . -mat_block_size - size of the blocks to use 3351 3352 Level: intermediate 3353 3354 Notes: 3355 The number of rows and columns must be divisible by blocksize. 3356 3357 If the nnz parameter is given then the nz parameter is ignored 3358 3359 A nonzero block is any block that as 1 or more nonzeros in it 3360 3361 The block AIJ format is fully compatible with standard Fortran 77 3362 storage. That is, the stored row and column indices can begin at 3363 either one (as in Fortran) or zero. See the users' manual for details. 3364 3365 Specify the preallocated storage with either nz or nnz (not both). 3366 Set nz=PETSC_DEFAULT and nnz=NULL for PETSc to control dynamic memory 3367 allocation. See Users-Manual: ch_mat for details. 3368 matrices. 3369 3370 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateBAIJ() 3371 @*/ 3372 PetscErrorCode MatCreateSeqBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A) 3373 { 3374 PetscErrorCode ierr; 3375 3376 PetscFunctionBegin; 3377 ierr = MatCreate(comm,A);CHKERRQ(ierr); 3378 ierr = MatSetSizes(*A,m,n,m,n);CHKERRQ(ierr); 3379 ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr); 3380 ierr = MatSeqBAIJSetPreallocation(*A,bs,nz,(PetscInt*)nnz);CHKERRQ(ierr); 3381 PetscFunctionReturn(0); 3382 } 3383 3384 /*@C 3385 MatSeqBAIJSetPreallocation - Sets the block size and expected nonzeros 3386 per row in the matrix. For good matrix assembly performance the 3387 user should preallocate the matrix storage by setting the parameter nz 3388 (or the array nnz). By setting these parameters accurately, performance 3389 during matrix assembly can be increased by more than a factor of 50. 3390 3391 Collective on MPI_Comm 3392 3393 Input Parameters: 3394 + B - the matrix 3395 . bs - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row 3396 blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs() 3397 . nz - number of block nonzeros per block row (same for all rows) 3398 - nnz - array containing the number of block nonzeros in the various block rows 3399 (possibly different for each block row) or NULL 3400 3401 Options Database Keys: 3402 . -mat_no_unroll - uses code that does not unroll the loops in the 3403 block calculations (much slower) 3404 . -mat_block_size - size of the blocks to use 3405 3406 Level: intermediate 3407 3408 Notes: 3409 If the nnz parameter is given then the nz parameter is ignored 3410 3411 You can call MatGetInfo() to get information on how effective the preallocation was; 3412 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 3413 You can also run with the option -info and look for messages with the string 3414 malloc in them to see if additional memory allocation was needed. 3415 3416 The block AIJ format is fully compatible with standard Fortran 77 3417 storage. That is, the stored row and column indices can begin at 3418 either one (as in Fortran) or zero. See the users' manual for details. 3419 3420 Specify the preallocated storage with either nz or nnz (not both). 3421 Set nz=PETSC_DEFAULT and nnz=NULL for PETSc to control dynamic memory 3422 allocation. See Users-Manual: ch_mat for details. 3423 3424 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateBAIJ(), MatGetInfo() 3425 @*/ 3426 PetscErrorCode MatSeqBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt nz,const PetscInt nnz[]) 3427 { 3428 PetscErrorCode ierr; 3429 3430 PetscFunctionBegin; 3431 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 3432 PetscValidType(B,1); 3433 PetscValidLogicalCollectiveInt(B,bs,2); 3434 ierr = PetscTryMethod(B,"MatSeqBAIJSetPreallocation_C",(Mat,PetscInt,PetscInt,const PetscInt[]),(B,bs,nz,nnz));CHKERRQ(ierr); 3435 PetscFunctionReturn(0); 3436 } 3437 3438 /*@C 3439 MatSeqBAIJSetPreallocationCSR - Allocates memory for a sparse sequential matrix in AIJ format 3440 (the default sequential PETSc format). 3441 3442 Collective on MPI_Comm 3443 3444 Input Parameters: 3445 + B - the matrix 3446 . i - the indices into j for the start of each local row (starts with zero) 3447 . j - the column indices for each local row (starts with zero) these must be sorted for each row 3448 - v - optional values in the matrix 3449 3450 Level: developer 3451 3452 Notes: 3453 The order of the entries in values is specified by the MatOption MAT_ROW_ORIENTED. For example, C programs 3454 may want to use the default MAT_ROW_ORIENTED=PETSC_TRUE and use an array v[nnz][bs][bs] where the second index is 3455 over rows within a block and the last index is over columns within a block row. Fortran programs will likely set 3456 MAT_ROW_ORIENTED=PETSC_FALSE and use a Fortran array v(bs,bs,nnz) in which the first index is over rows within a 3457 block column and the second index is over columns within a block. 3458 3459 .keywords: matrix, aij, compressed row, sparse 3460 3461 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatSeqBAIJSetPreallocation(), MATSEQBAIJ 3462 @*/ 3463 PetscErrorCode MatSeqBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 3464 { 3465 PetscErrorCode ierr; 3466 3467 PetscFunctionBegin; 3468 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 3469 PetscValidType(B,1); 3470 PetscValidLogicalCollectiveInt(B,bs,2); 3471 ierr = PetscTryMethod(B,"MatSeqBAIJSetPreallocationCSR_C",(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,bs,i,j,v));CHKERRQ(ierr); 3472 PetscFunctionReturn(0); 3473 } 3474 3475 3476 /*@ 3477 MatCreateSeqBAIJWithArrays - Creates an sequential BAIJ matrix using matrix elements provided by the user. 3478 3479 Collective on MPI_Comm 3480 3481 Input Parameters: 3482 + comm - must be an MPI communicator of size 1 3483 . bs - size of block 3484 . m - number of rows 3485 . n - number of columns 3486 . i - row indices 3487 . j - column indices 3488 - a - matrix values 3489 3490 Output Parameter: 3491 . mat - the matrix 3492 3493 Level: advanced 3494 3495 Notes: 3496 The i, j, and a arrays are not copied by this routine, the user must free these arrays 3497 once the matrix is destroyed 3498 3499 You cannot set new nonzero locations into this matrix, that will generate an error. 3500 3501 The i and j indices are 0 based 3502 3503 When block size is greater than 1 the matrix values must be stored using the BAIJ storage format (see the BAIJ code to determine this). 3504 3505 The order of the entries in values is the same as the block compressed sparse row storage format; that is, it is 3506 the same as a three dimensional array in Fortran values(bs,bs,nnz) that contains the first column of the first 3507 block, followed by the second column of the first block etc etc. That is, the blocks are contiguous in memory 3508 with column-major ordering within blocks. 3509 3510 .seealso: MatCreate(), MatCreateBAIJ(), MatCreateSeqBAIJ() 3511 3512 @*/ 3513 PetscErrorCode MatCreateSeqBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt i[],PetscInt j[],PetscScalar a[],Mat *mat) 3514 { 3515 PetscErrorCode ierr; 3516 PetscInt ii; 3517 Mat_SeqBAIJ *baij; 3518 3519 PetscFunctionBegin; 3520 if (bs != 1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"block size %D > 1 is not supported yet",bs); 3521 if (m > 0 && i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 3522 3523 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 3524 ierr = MatSetSizes(*mat,m,n,m,n);CHKERRQ(ierr); 3525 ierr = MatSetType(*mat,MATSEQBAIJ);CHKERRQ(ierr); 3526 ierr = MatSeqBAIJSetPreallocation(*mat,bs,MAT_SKIP_ALLOCATION,0);CHKERRQ(ierr); 3527 baij = (Mat_SeqBAIJ*)(*mat)->data; 3528 ierr = PetscMalloc2(m,&baij->imax,m,&baij->ilen);CHKERRQ(ierr); 3529 ierr = PetscLogObjectMemory((PetscObject)*mat,2*m*sizeof(PetscInt));CHKERRQ(ierr); 3530 3531 baij->i = i; 3532 baij->j = j; 3533 baij->a = a; 3534 3535 baij->singlemalloc = PETSC_FALSE; 3536 baij->nonew = -1; /*this indicates that inserting a new value in the matrix that generates a new nonzero is an error*/ 3537 baij->free_a = PETSC_FALSE; 3538 baij->free_ij = PETSC_FALSE; 3539 3540 for (ii=0; ii<m; ii++) { 3541 baij->ilen[ii] = baij->imax[ii] = i[ii+1] - i[ii]; 3542 #if defined(PETSC_USE_DEBUG) 3543 if (i[ii+1] - i[ii] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row length in i (row indices) row = %d length = %d",ii,i[ii+1] - i[ii]); 3544 #endif 3545 } 3546 #if defined(PETSC_USE_DEBUG) 3547 for (ii=0; ii<baij->i[m]; ii++) { 3548 if (j[ii] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column index at location = %d index = %d",ii,j[ii]); 3549 if (j[ii] > n - 1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column index to large at location = %d index = %d",ii,j[ii]); 3550 } 3551 #endif 3552 3553 ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3554 ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3555 PetscFunctionReturn(0); 3556 } 3557 3558 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_SeqBAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat) 3559 { 3560 PetscErrorCode ierr; 3561 PetscMPIInt size; 3562 3563 PetscFunctionBegin; 3564 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3565 if (size == 1 && scall == MAT_REUSE_MATRIX) { 3566 ierr = MatCopy(inmat,*outmat,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 3567 } else { 3568 ierr = MatCreateMPIMatConcatenateSeqMat_MPIBAIJ(comm,inmat,n,scall,outmat);CHKERRQ(ierr); 3569 } 3570 PetscFunctionReturn(0); 3571 } 3572