1 2 /* 3 Defines the basic matrix operations for the BAIJ (compressed row) 4 matrix storage format. 5 */ 6 #include <../src/mat/impls/baij/seq/baij.h> /*I "petscmat.h" I*/ 7 #include <petscblaslapack.h> 8 #include <petsc/private/kernels/blockinvert.h> 9 #include <petsc/private/kernels/blockmatmult.h> 10 11 #if defined(PETSC_HAVE_HYPRE) 12 PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat,MatType,MatReuse,Mat*); 13 #endif 14 15 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE) 16 PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqBAIJMKL(Mat,MatType,MatReuse,Mat*); 17 #endif 18 PETSC_INTERN PetscErrorCode MatConvert_XAIJ_IS(Mat,MatType,MatReuse,Mat*); 19 20 PetscErrorCode MatGetColumnNorms_SeqBAIJ(Mat A,NormType type,PetscReal *norms) 21 { 22 PetscErrorCode ierr; 23 Mat_SeqBAIJ *a_aij = (Mat_SeqBAIJ*) A->data; 24 PetscInt N,i; 25 PetscInt ib,jb,bs = A->rmap->bs; 26 MatScalar *a_val = a_aij->a; 27 28 PetscFunctionBegin; 29 ierr = MatGetSize(A,NULL,&N);CHKERRQ(ierr); 30 for (i=0; i<N; i++) norms[i] = 0.0; 31 if (type == NORM_2) { 32 for (i=a_aij->i[0]; i<a_aij->i[A->rmap->n/bs]; i++) { 33 for (jb=0; jb<bs; jb++) { 34 for (ib=0; ib<bs; ib++) { 35 norms[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscAbsScalar(*a_val * *a_val); 36 a_val++; 37 } 38 } 39 } 40 } else if (type == NORM_1) { 41 for (i=a_aij->i[0]; i<a_aij->i[A->rmap->n/bs]; i++) { 42 for (jb=0; jb<bs; jb++) { 43 for (ib=0; ib<bs; ib++) { 44 norms[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscAbsScalar(*a_val); 45 a_val++; 46 } 47 } 48 } 49 } else if (type == NORM_INFINITY) { 50 for (i=a_aij->i[0]; i<a_aij->i[A->rmap->n/bs]; i++) { 51 for (jb=0; jb<bs; jb++) { 52 for (ib=0; ib<bs; ib++) { 53 int col = A->cmap->rstart + a_aij->j[i] * bs + jb; 54 norms[col] = PetscMax(PetscAbsScalar(*a_val), norms[col]); 55 a_val++; 56 } 57 } 58 } 59 } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Unknown NormType"); 60 if (type == NORM_2) { 61 for (i=0; i<N; i++) norms[i] = PetscSqrtReal(norms[i]); 62 } 63 PetscFunctionReturn(0); 64 } 65 66 PetscErrorCode MatInvertBlockDiagonal_SeqBAIJ(Mat A,const PetscScalar **values) 67 { 68 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*) A->data; 69 PetscErrorCode ierr; 70 PetscInt *diag_offset,i,bs = A->rmap->bs,mbs = a->mbs,ipvt[5],bs2 = bs*bs,*v_pivots; 71 MatScalar *v = a->a,*odiag,*diag,work[25],*v_work; 72 PetscReal shift = 0.0; 73 PetscBool allowzeropivot,zeropivotdetected=PETSC_FALSE; 74 75 PetscFunctionBegin; 76 allowzeropivot = PetscNot(A->erroriffailure); 77 78 if (a->idiagvalid) { 79 if (values) *values = a->idiag; 80 PetscFunctionReturn(0); 81 } 82 ierr = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr); 83 diag_offset = a->diag; 84 if (!a->idiag) { 85 ierr = PetscMalloc1(bs2*mbs,&a->idiag);CHKERRQ(ierr); 86 ierr = PetscLogObjectMemory((PetscObject)A,bs2*mbs*sizeof(PetscScalar));CHKERRQ(ierr); 87 } 88 diag = a->idiag; 89 if (values) *values = a->idiag; 90 /* factor and invert each block */ 91 switch (bs) { 92 case 1: 93 for (i=0; i<mbs; i++) { 94 odiag = v + 1*diag_offset[i]; 95 diag[0] = odiag[0]; 96 97 if (PetscAbsScalar(diag[0] + shift) < PETSC_MACHINE_EPSILON) { 98 if (allowzeropivot) { 99 A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 100 A->factorerror_zeropivot_value = PetscAbsScalar(diag[0]); 101 A->factorerror_zeropivot_row = i; 102 ierr = PetscInfo1(A,"Zero pivot, row %D\n",i);CHKERRQ(ierr); 103 } else SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot, row %D pivot value %g tolerance %g",i,(double)PetscAbsScalar(diag[0]),(double)PETSC_MACHINE_EPSILON); 104 } 105 106 diag[0] = (PetscScalar)1.0 / (diag[0] + shift); 107 diag += 1; 108 } 109 break; 110 case 2: 111 for (i=0; i<mbs; i++) { 112 odiag = v + 4*diag_offset[i]; 113 diag[0] = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3]; 114 ierr = PetscKernel_A_gets_inverse_A_2(diag,shift,allowzeropivot,&zeropivotdetected);CHKERRQ(ierr); 115 if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 116 diag += 4; 117 } 118 break; 119 case 3: 120 for (i=0; i<mbs; i++) { 121 odiag = v + 9*diag_offset[i]; 122 diag[0] = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3]; 123 diag[4] = odiag[4]; diag[5] = odiag[5]; diag[6] = odiag[6]; diag[7] = odiag[7]; 124 diag[8] = odiag[8]; 125 ierr = PetscKernel_A_gets_inverse_A_3(diag,shift,allowzeropivot,&zeropivotdetected);CHKERRQ(ierr); 126 if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 127 diag += 9; 128 } 129 break; 130 case 4: 131 for (i=0; i<mbs; i++) { 132 odiag = v + 16*diag_offset[i]; 133 ierr = PetscArraycpy(diag,odiag,16);CHKERRQ(ierr); 134 ierr = PetscKernel_A_gets_inverse_A_4(diag,shift,allowzeropivot,&zeropivotdetected);CHKERRQ(ierr); 135 if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 136 diag += 16; 137 } 138 break; 139 case 5: 140 for (i=0; i<mbs; i++) { 141 odiag = v + 25*diag_offset[i]; 142 ierr = PetscArraycpy(diag,odiag,25);CHKERRQ(ierr); 143 ierr = PetscKernel_A_gets_inverse_A_5(diag,ipvt,work,shift,allowzeropivot,&zeropivotdetected);CHKERRQ(ierr); 144 if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 145 diag += 25; 146 } 147 break; 148 case 6: 149 for (i=0; i<mbs; i++) { 150 odiag = v + 36*diag_offset[i]; 151 ierr = PetscArraycpy(diag,odiag,36);CHKERRQ(ierr); 152 ierr = PetscKernel_A_gets_inverse_A_6(diag,shift,allowzeropivot,&zeropivotdetected);CHKERRQ(ierr); 153 if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 154 diag += 36; 155 } 156 break; 157 case 7: 158 for (i=0; i<mbs; i++) { 159 odiag = v + 49*diag_offset[i]; 160 ierr = PetscArraycpy(diag,odiag,49);CHKERRQ(ierr); 161 ierr = PetscKernel_A_gets_inverse_A_7(diag,shift,allowzeropivot,&zeropivotdetected);CHKERRQ(ierr); 162 if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 163 diag += 49; 164 } 165 break; 166 default: 167 ierr = PetscMalloc2(bs,&v_work,bs,&v_pivots);CHKERRQ(ierr); 168 for (i=0; i<mbs; i++) { 169 odiag = v + bs2*diag_offset[i]; 170 ierr = PetscArraycpy(diag,odiag,bs2);CHKERRQ(ierr); 171 ierr = PetscKernel_A_gets_inverse_A(bs,diag,v_pivots,v_work,allowzeropivot,&zeropivotdetected);CHKERRQ(ierr); 172 if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 173 diag += bs2; 174 } 175 ierr = PetscFree2(v_work,v_pivots);CHKERRQ(ierr); 176 } 177 a->idiagvalid = PETSC_TRUE; 178 PetscFunctionReturn(0); 179 } 180 181 PetscErrorCode MatSOR_SeqBAIJ(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 182 { 183 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 184 PetscScalar *x,*work,*w,*workt,*t; 185 const MatScalar *v,*aa = a->a, *idiag; 186 const PetscScalar *b,*xb; 187 PetscScalar s[7], xw[7]={0}; /* avoid some compilers thinking xw is uninitialized */ 188 PetscErrorCode ierr; 189 PetscInt m = a->mbs,i,i2,nz,bs = A->rmap->bs,bs2 = bs*bs,k,j,idx,it; 190 const PetscInt *diag,*ai = a->i,*aj = a->j,*vi; 191 192 PetscFunctionBegin; 193 its = its*lits; 194 if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat"); 195 if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 196 if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support for diagonal shift"); 197 if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support for non-trivial relaxation factor"); 198 if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support for applying upper or lower triangular parts"); 199 200 if (!a->idiagvalid) {ierr = MatInvertBlockDiagonal(A,NULL);CHKERRQ(ierr);} 201 202 if (!m) PetscFunctionReturn(0); 203 diag = a->diag; 204 idiag = a->idiag; 205 k = PetscMax(A->rmap->n,A->cmap->n); 206 if (!a->mult_work) { 207 ierr = PetscMalloc1(k+1,&a->mult_work);CHKERRQ(ierr); 208 } 209 if (!a->sor_workt) { 210 ierr = PetscMalloc1(k,&a->sor_workt);CHKERRQ(ierr); 211 } 212 if (!a->sor_work) { 213 ierr = PetscMalloc1(bs,&a->sor_work);CHKERRQ(ierr); 214 } 215 work = a->mult_work; 216 t = a->sor_workt; 217 w = a->sor_work; 218 219 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 220 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 221 222 if (flag & SOR_ZERO_INITIAL_GUESS) { 223 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) { 224 switch (bs) { 225 case 1: 226 PetscKernel_v_gets_A_times_w_1(x,idiag,b); 227 t[0] = b[0]; 228 i2 = 1; 229 idiag += 1; 230 for (i=1; i<m; i++) { 231 v = aa + ai[i]; 232 vi = aj + ai[i]; 233 nz = diag[i] - ai[i]; 234 s[0] = b[i2]; 235 for (j=0; j<nz; j++) { 236 xw[0] = x[vi[j]]; 237 PetscKernel_v_gets_v_minus_A_times_w_1(s,(v+j),xw); 238 } 239 t[i2] = s[0]; 240 PetscKernel_v_gets_A_times_w_1(xw,idiag,s); 241 x[i2] = xw[0]; 242 idiag += 1; 243 i2 += 1; 244 } 245 break; 246 case 2: 247 PetscKernel_v_gets_A_times_w_2(x,idiag,b); 248 t[0] = b[0]; t[1] = b[1]; 249 i2 = 2; 250 idiag += 4; 251 for (i=1; i<m; i++) { 252 v = aa + 4*ai[i]; 253 vi = aj + ai[i]; 254 nz = diag[i] - ai[i]; 255 s[0] = b[i2]; s[1] = b[i2+1]; 256 for (j=0; j<nz; j++) { 257 idx = 2*vi[j]; 258 it = 4*j; 259 xw[0] = x[idx]; xw[1] = x[1+idx]; 260 PetscKernel_v_gets_v_minus_A_times_w_2(s,(v+it),xw); 261 } 262 t[i2] = s[0]; t[i2+1] = s[1]; 263 PetscKernel_v_gets_A_times_w_2(xw,idiag,s); 264 x[i2] = xw[0]; x[i2+1] = xw[1]; 265 idiag += 4; 266 i2 += 2; 267 } 268 break; 269 case 3: 270 PetscKernel_v_gets_A_times_w_3(x,idiag,b); 271 t[0] = b[0]; t[1] = b[1]; t[2] = b[2]; 272 i2 = 3; 273 idiag += 9; 274 for (i=1; i<m; i++) { 275 v = aa + 9*ai[i]; 276 vi = aj + ai[i]; 277 nz = diag[i] - ai[i]; 278 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; 279 while (nz--) { 280 idx = 3*(*vi++); 281 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 282 PetscKernel_v_gets_v_minus_A_times_w_3(s,v,xw); 283 v += 9; 284 } 285 t[i2] = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2]; 286 PetscKernel_v_gets_A_times_w_3(xw,idiag,s); 287 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; 288 idiag += 9; 289 i2 += 3; 290 } 291 break; 292 case 4: 293 PetscKernel_v_gets_A_times_w_4(x,idiag,b); 294 t[0] = b[0]; t[1] = b[1]; t[2] = b[2]; t[3] = b[3]; 295 i2 = 4; 296 idiag += 16; 297 for (i=1; i<m; i++) { 298 v = aa + 16*ai[i]; 299 vi = aj + ai[i]; 300 nz = diag[i] - ai[i]; 301 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; 302 while (nz--) { 303 idx = 4*(*vi++); 304 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; 305 PetscKernel_v_gets_v_minus_A_times_w_4(s,v,xw); 306 v += 16; 307 } 308 t[i2] = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2]; t[i2 + 3] = s[3]; 309 PetscKernel_v_gets_A_times_w_4(xw,idiag,s); 310 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; 311 idiag += 16; 312 i2 += 4; 313 } 314 break; 315 case 5: 316 PetscKernel_v_gets_A_times_w_5(x,idiag,b); 317 t[0] = b[0]; t[1] = b[1]; t[2] = b[2]; t[3] = b[3]; t[4] = b[4]; 318 i2 = 5; 319 idiag += 25; 320 for (i=1; i<m; i++) { 321 v = aa + 25*ai[i]; 322 vi = aj + ai[i]; 323 nz = diag[i] - ai[i]; 324 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4]; 325 while (nz--) { 326 idx = 5*(*vi++); 327 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; xw[4] = x[4+idx]; 328 PetscKernel_v_gets_v_minus_A_times_w_5(s,v,xw); 329 v += 25; 330 } 331 t[i2] = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2]; t[i2+3] = s[3]; t[i2+4] = s[4]; 332 PetscKernel_v_gets_A_times_w_5(xw,idiag,s); 333 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4]; 334 idiag += 25; 335 i2 += 5; 336 } 337 break; 338 case 6: 339 PetscKernel_v_gets_A_times_w_6(x,idiag,b); 340 t[0] = b[0]; t[1] = b[1]; t[2] = b[2]; t[3] = b[3]; t[4] = b[4]; t[5] = b[5]; 341 i2 = 6; 342 idiag += 36; 343 for (i=1; i<m; i++) { 344 v = aa + 36*ai[i]; 345 vi = aj + ai[i]; 346 nz = diag[i] - ai[i]; 347 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5]; 348 while (nz--) { 349 idx = 6*(*vi++); 350 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 351 xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; 352 PetscKernel_v_gets_v_minus_A_times_w_6(s,v,xw); 353 v += 36; 354 } 355 t[i2] = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2]; 356 t[i2+3] = s[3]; t[i2+4] = s[4]; t[i2+5] = s[5]; 357 PetscKernel_v_gets_A_times_w_6(xw,idiag,s); 358 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5]; 359 idiag += 36; 360 i2 += 6; 361 } 362 break; 363 case 7: 364 PetscKernel_v_gets_A_times_w_7(x,idiag,b); 365 t[0] = b[0]; t[1] = b[1]; t[2] = b[2]; 366 t[3] = b[3]; t[4] = b[4]; t[5] = b[5]; t[6] = b[6]; 367 i2 = 7; 368 idiag += 49; 369 for (i=1; i<m; i++) { 370 v = aa + 49*ai[i]; 371 vi = aj + ai[i]; 372 nz = diag[i] - ai[i]; 373 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; 374 s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5]; s[6] = b[i2+6]; 375 while (nz--) { 376 idx = 7*(*vi++); 377 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 378 xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; xw[6] = x[6+idx]; 379 PetscKernel_v_gets_v_minus_A_times_w_7(s,v,xw); 380 v += 49; 381 } 382 t[i2] = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2]; 383 t[i2+3] = s[3]; t[i2+4] = s[4]; t[i2+5] = s[5]; t[i2+6] = s[6]; 384 PetscKernel_v_gets_A_times_w_7(xw,idiag,s); 385 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; 386 x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5]; x[i2+6] = xw[6]; 387 idiag += 49; 388 i2 += 7; 389 } 390 break; 391 default: 392 PetscKernel_w_gets_Ar_times_v(bs,bs,b,idiag,x); 393 ierr = PetscArraycpy(t,b,bs);CHKERRQ(ierr); 394 i2 = bs; 395 idiag += bs2; 396 for (i=1; i<m; i++) { 397 v = aa + bs2*ai[i]; 398 vi = aj + ai[i]; 399 nz = diag[i] - ai[i]; 400 401 ierr = PetscArraycpy(w,b+i2,bs);CHKERRQ(ierr); 402 /* copy all rows of x that are needed into contiguous space */ 403 workt = work; 404 for (j=0; j<nz; j++) { 405 ierr = PetscArraycpy(workt,x + bs*(*vi++),bs);CHKERRQ(ierr); 406 workt += bs; 407 } 408 PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work); 409 ierr = PetscArraycpy(t+i2,w,bs);CHKERRQ(ierr); 410 PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2); 411 412 idiag += bs2; 413 i2 += bs; 414 } 415 break; 416 } 417 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 418 ierr = PetscLogFlops(1.0*bs2*a->nz);CHKERRQ(ierr); 419 xb = t; 420 } 421 else xb = b; 422 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 423 idiag = a->idiag+bs2*(a->mbs-1); 424 i2 = bs * (m-1); 425 switch (bs) { 426 case 1: 427 s[0] = xb[i2]; 428 PetscKernel_v_gets_A_times_w_1(xw,idiag,s); 429 x[i2] = xw[0]; 430 i2 -= 1; 431 for (i=m-2; i>=0; i--) { 432 v = aa + (diag[i]+1); 433 vi = aj + diag[i] + 1; 434 nz = ai[i+1] - diag[i] - 1; 435 s[0] = xb[i2]; 436 for (j=0; j<nz; j++) { 437 xw[0] = x[vi[j]]; 438 PetscKernel_v_gets_v_minus_A_times_w_1(s,(v+j),xw); 439 } 440 PetscKernel_v_gets_A_times_w_1(xw,idiag,s); 441 x[i2] = xw[0]; 442 idiag -= 1; 443 i2 -= 1; 444 } 445 break; 446 case 2: 447 s[0] = xb[i2]; s[1] = xb[i2+1]; 448 PetscKernel_v_gets_A_times_w_2(xw,idiag,s); 449 x[i2] = xw[0]; x[i2+1] = xw[1]; 450 i2 -= 2; 451 idiag -= 4; 452 for (i=m-2; i>=0; i--) { 453 v = aa + 4*(diag[i] + 1); 454 vi = aj + diag[i] + 1; 455 nz = ai[i+1] - diag[i] - 1; 456 s[0] = xb[i2]; s[1] = xb[i2+1]; 457 for (j=0; j<nz; j++) { 458 idx = 2*vi[j]; 459 it = 4*j; 460 xw[0] = x[idx]; xw[1] = x[1+idx]; 461 PetscKernel_v_gets_v_minus_A_times_w_2(s,(v+it),xw); 462 } 463 PetscKernel_v_gets_A_times_w_2(xw,idiag,s); 464 x[i2] = xw[0]; x[i2+1] = xw[1]; 465 idiag -= 4; 466 i2 -= 2; 467 } 468 break; 469 case 3: 470 s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; 471 PetscKernel_v_gets_A_times_w_3(xw,idiag,s); 472 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; 473 i2 -= 3; 474 idiag -= 9; 475 for (i=m-2; i>=0; i--) { 476 v = aa + 9*(diag[i]+1); 477 vi = aj + diag[i] + 1; 478 nz = ai[i+1] - diag[i] - 1; 479 s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; 480 while (nz--) { 481 idx = 3*(*vi++); 482 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 483 PetscKernel_v_gets_v_minus_A_times_w_3(s,v,xw); 484 v += 9; 485 } 486 PetscKernel_v_gets_A_times_w_3(xw,idiag,s); 487 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; 488 idiag -= 9; 489 i2 -= 3; 490 } 491 break; 492 case 4: 493 s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; 494 PetscKernel_v_gets_A_times_w_4(xw,idiag,s); 495 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; 496 i2 -= 4; 497 idiag -= 16; 498 for (i=m-2; i>=0; i--) { 499 v = aa + 16*(diag[i]+1); 500 vi = aj + diag[i] + 1; 501 nz = ai[i+1] - diag[i] - 1; 502 s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; 503 while (nz--) { 504 idx = 4*(*vi++); 505 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; 506 PetscKernel_v_gets_v_minus_A_times_w_4(s,v,xw); 507 v += 16; 508 } 509 PetscKernel_v_gets_A_times_w_4(xw,idiag,s); 510 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; 511 idiag -= 16; 512 i2 -= 4; 513 } 514 break; 515 case 5: 516 s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; s[4] = xb[i2+4]; 517 PetscKernel_v_gets_A_times_w_5(xw,idiag,s); 518 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4]; 519 i2 -= 5; 520 idiag -= 25; 521 for (i=m-2; i>=0; i--) { 522 v = aa + 25*(diag[i]+1); 523 vi = aj + diag[i] + 1; 524 nz = ai[i+1] - diag[i] - 1; 525 s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; s[4] = xb[i2+4]; 526 while (nz--) { 527 idx = 5*(*vi++); 528 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; xw[4] = x[4+idx]; 529 PetscKernel_v_gets_v_minus_A_times_w_5(s,v,xw); 530 v += 25; 531 } 532 PetscKernel_v_gets_A_times_w_5(xw,idiag,s); 533 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4]; 534 idiag -= 25; 535 i2 -= 5; 536 } 537 break; 538 case 6: 539 s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; s[4] = xb[i2+4]; s[5] = xb[i2+5]; 540 PetscKernel_v_gets_A_times_w_6(xw,idiag,s); 541 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5]; 542 i2 -= 6; 543 idiag -= 36; 544 for (i=m-2; i>=0; i--) { 545 v = aa + 36*(diag[i]+1); 546 vi = aj + diag[i] + 1; 547 nz = ai[i+1] - diag[i] - 1; 548 s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; s[4] = xb[i2+4]; s[5] = xb[i2+5]; 549 while (nz--) { 550 idx = 6*(*vi++); 551 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 552 xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; 553 PetscKernel_v_gets_v_minus_A_times_w_6(s,v,xw); 554 v += 36; 555 } 556 PetscKernel_v_gets_A_times_w_6(xw,idiag,s); 557 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5]; 558 idiag -= 36; 559 i2 -= 6; 560 } 561 break; 562 case 7: 563 s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; 564 s[3] = xb[i2+3]; s[4] = xb[i2+4]; s[5] = xb[i2+5]; s[6] = xb[i2+6]; 565 PetscKernel_v_gets_A_times_w_7(x,idiag,b); 566 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; 567 x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5]; x[i2+6] = xw[6]; 568 i2 -= 7; 569 idiag -= 49; 570 for (i=m-2; i>=0; i--) { 571 v = aa + 49*(diag[i]+1); 572 vi = aj + diag[i] + 1; 573 nz = ai[i+1] - diag[i] - 1; 574 s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; 575 s[3] = xb[i2+3]; s[4] = xb[i2+4]; s[5] = xb[i2+5]; s[6] = xb[i2+6]; 576 while (nz--) { 577 idx = 7*(*vi++); 578 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 579 xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; xw[6] = x[6+idx]; 580 PetscKernel_v_gets_v_minus_A_times_w_7(s,v,xw); 581 v += 49; 582 } 583 PetscKernel_v_gets_A_times_w_7(xw,idiag,s); 584 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; 585 x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5]; x[i2+6] = xw[6]; 586 idiag -= 49; 587 i2 -= 7; 588 } 589 break; 590 default: 591 ierr = PetscArraycpy(w,xb+i2,bs);CHKERRQ(ierr); 592 PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2); 593 i2 -= bs; 594 idiag -= bs2; 595 for (i=m-2; i>=0; i--) { 596 v = aa + bs2*(diag[i]+1); 597 vi = aj + diag[i] + 1; 598 nz = ai[i+1] - diag[i] - 1; 599 600 ierr = PetscArraycpy(w,xb+i2,bs);CHKERRQ(ierr); 601 /* copy all rows of x that are needed into contiguous space */ 602 workt = work; 603 for (j=0; j<nz; j++) { 604 ierr = PetscArraycpy(workt,x + bs*(*vi++),bs);CHKERRQ(ierr); 605 workt += bs; 606 } 607 PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work); 608 PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2); 609 610 idiag -= bs2; 611 i2 -= bs; 612 } 613 break; 614 } 615 ierr = PetscLogFlops(1.0*bs2*(a->nz));CHKERRQ(ierr); 616 } 617 its--; 618 } 619 while (its--) { 620 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) { 621 idiag = a->idiag; 622 i2 = 0; 623 switch (bs) { 624 case 1: 625 for (i=0; i<m; i++) { 626 v = aa + ai[i]; 627 vi = aj + ai[i]; 628 nz = ai[i+1] - ai[i]; 629 s[0] = b[i2]; 630 for (j=0; j<nz; j++) { 631 xw[0] = x[vi[j]]; 632 PetscKernel_v_gets_v_minus_A_times_w_1(s,(v+j),xw); 633 } 634 PetscKernel_v_gets_A_times_w_1(xw,idiag,s); 635 x[i2] += xw[0]; 636 idiag += 1; 637 i2 += 1; 638 } 639 break; 640 case 2: 641 for (i=0; i<m; i++) { 642 v = aa + 4*ai[i]; 643 vi = aj + ai[i]; 644 nz = ai[i+1] - ai[i]; 645 s[0] = b[i2]; s[1] = b[i2+1]; 646 for (j=0; j<nz; j++) { 647 idx = 2*vi[j]; 648 it = 4*j; 649 xw[0] = x[idx]; xw[1] = x[1+idx]; 650 PetscKernel_v_gets_v_minus_A_times_w_2(s,(v+it),xw); 651 } 652 PetscKernel_v_gets_A_times_w_2(xw,idiag,s); 653 x[i2] += xw[0]; x[i2+1] += xw[1]; 654 idiag += 4; 655 i2 += 2; 656 } 657 break; 658 case 3: 659 for (i=0; i<m; i++) { 660 v = aa + 9*ai[i]; 661 vi = aj + ai[i]; 662 nz = ai[i+1] - ai[i]; 663 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; 664 while (nz--) { 665 idx = 3*(*vi++); 666 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 667 PetscKernel_v_gets_v_minus_A_times_w_3(s,v,xw); 668 v += 9; 669 } 670 PetscKernel_v_gets_A_times_w_3(xw,idiag,s); 671 x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; 672 idiag += 9; 673 i2 += 3; 674 } 675 break; 676 case 4: 677 for (i=0; i<m; i++) { 678 v = aa + 16*ai[i]; 679 vi = aj + ai[i]; 680 nz = ai[i+1] - ai[i]; 681 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; 682 while (nz--) { 683 idx = 4*(*vi++); 684 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; 685 PetscKernel_v_gets_v_minus_A_times_w_4(s,v,xw); 686 v += 16; 687 } 688 PetscKernel_v_gets_A_times_w_4(xw,idiag,s); 689 x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; x[i2+3] += xw[3]; 690 idiag += 16; 691 i2 += 4; 692 } 693 break; 694 case 5: 695 for (i=0; i<m; i++) { 696 v = aa + 25*ai[i]; 697 vi = aj + ai[i]; 698 nz = ai[i+1] - ai[i]; 699 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4]; 700 while (nz--) { 701 idx = 5*(*vi++); 702 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; xw[4] = x[4+idx]; 703 PetscKernel_v_gets_v_minus_A_times_w_5(s,v,xw); 704 v += 25; 705 } 706 PetscKernel_v_gets_A_times_w_5(xw,idiag,s); 707 x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; x[i2+3] += xw[3]; x[i2+4] += xw[4]; 708 idiag += 25; 709 i2 += 5; 710 } 711 break; 712 case 6: 713 for (i=0; i<m; i++) { 714 v = aa + 36*ai[i]; 715 vi = aj + ai[i]; 716 nz = ai[i+1] - ai[i]; 717 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5]; 718 while (nz--) { 719 idx = 6*(*vi++); 720 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 721 xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; 722 PetscKernel_v_gets_v_minus_A_times_w_6(s,v,xw); 723 v += 36; 724 } 725 PetscKernel_v_gets_A_times_w_6(xw,idiag,s); 726 x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; 727 x[i2+3] += xw[3]; x[i2+4] += xw[4]; x[i2+5] += xw[5]; 728 idiag += 36; 729 i2 += 6; 730 } 731 break; 732 case 7: 733 for (i=0; i<m; i++) { 734 v = aa + 49*ai[i]; 735 vi = aj + ai[i]; 736 nz = ai[i+1] - ai[i]; 737 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; 738 s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5]; s[6] = b[i2+6]; 739 while (nz--) { 740 idx = 7*(*vi++); 741 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 742 xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; xw[6] = x[6+idx]; 743 PetscKernel_v_gets_v_minus_A_times_w_7(s,v,xw); 744 v += 49; 745 } 746 PetscKernel_v_gets_A_times_w_7(xw,idiag,s); 747 x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; 748 x[i2+3] += xw[3]; x[i2+4] += xw[4]; x[i2+5] += xw[5]; x[i2+6] += xw[6]; 749 idiag += 49; 750 i2 += 7; 751 } 752 break; 753 default: 754 for (i=0; i<m; i++) { 755 v = aa + bs2*ai[i]; 756 vi = aj + ai[i]; 757 nz = ai[i+1] - ai[i]; 758 759 ierr = PetscArraycpy(w,b+i2,bs);CHKERRQ(ierr); 760 /* copy all rows of x that are needed into contiguous space */ 761 workt = work; 762 for (j=0; j<nz; j++) { 763 ierr = PetscArraycpy(workt,x + bs*(*vi++),bs);CHKERRQ(ierr); 764 workt += bs; 765 } 766 PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work); 767 PetscKernel_w_gets_w_plus_Ar_times_v(bs,bs,w,idiag,x+i2); 768 769 idiag += bs2; 770 i2 += bs; 771 } 772 break; 773 } 774 ierr = PetscLogFlops(2.0*bs2*a->nz);CHKERRQ(ierr); 775 } 776 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 777 idiag = a->idiag+bs2*(a->mbs-1); 778 i2 = bs * (m-1); 779 switch (bs) { 780 case 1: 781 for (i=m-1; i>=0; i--) { 782 v = aa + ai[i]; 783 vi = aj + ai[i]; 784 nz = ai[i+1] - ai[i]; 785 s[0] = b[i2]; 786 for (j=0; j<nz; j++) { 787 xw[0] = x[vi[j]]; 788 PetscKernel_v_gets_v_minus_A_times_w_1(s,(v+j),xw); 789 } 790 PetscKernel_v_gets_A_times_w_1(xw,idiag,s); 791 x[i2] += xw[0]; 792 idiag -= 1; 793 i2 -= 1; 794 } 795 break; 796 case 2: 797 for (i=m-1; i>=0; i--) { 798 v = aa + 4*ai[i]; 799 vi = aj + ai[i]; 800 nz = ai[i+1] - ai[i]; 801 s[0] = b[i2]; s[1] = b[i2+1]; 802 for (j=0; j<nz; j++) { 803 idx = 2*vi[j]; 804 it = 4*j; 805 xw[0] = x[idx]; xw[1] = x[1+idx]; 806 PetscKernel_v_gets_v_minus_A_times_w_2(s,(v+it),xw); 807 } 808 PetscKernel_v_gets_A_times_w_2(xw,idiag,s); 809 x[i2] += xw[0]; x[i2+1] += xw[1]; 810 idiag -= 4; 811 i2 -= 2; 812 } 813 break; 814 case 3: 815 for (i=m-1; i>=0; i--) { 816 v = aa + 9*ai[i]; 817 vi = aj + ai[i]; 818 nz = ai[i+1] - ai[i]; 819 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; 820 while (nz--) { 821 idx = 3*(*vi++); 822 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 823 PetscKernel_v_gets_v_minus_A_times_w_3(s,v,xw); 824 v += 9; 825 } 826 PetscKernel_v_gets_A_times_w_3(xw,idiag,s); 827 x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; 828 idiag -= 9; 829 i2 -= 3; 830 } 831 break; 832 case 4: 833 for (i=m-1; i>=0; i--) { 834 v = aa + 16*ai[i]; 835 vi = aj + ai[i]; 836 nz = ai[i+1] - ai[i]; 837 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; 838 while (nz--) { 839 idx = 4*(*vi++); 840 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; 841 PetscKernel_v_gets_v_minus_A_times_w_4(s,v,xw); 842 v += 16; 843 } 844 PetscKernel_v_gets_A_times_w_4(xw,idiag,s); 845 x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; x[i2+3] += xw[3]; 846 idiag -= 16; 847 i2 -= 4; 848 } 849 break; 850 case 5: 851 for (i=m-1; i>=0; i--) { 852 v = aa + 25*ai[i]; 853 vi = aj + ai[i]; 854 nz = ai[i+1] - ai[i]; 855 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4]; 856 while (nz--) { 857 idx = 5*(*vi++); 858 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; xw[4] = x[4+idx]; 859 PetscKernel_v_gets_v_minus_A_times_w_5(s,v,xw); 860 v += 25; 861 } 862 PetscKernel_v_gets_A_times_w_5(xw,idiag,s); 863 x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; x[i2+3] += xw[3]; x[i2+4] += xw[4]; 864 idiag -= 25; 865 i2 -= 5; 866 } 867 break; 868 case 6: 869 for (i=m-1; i>=0; i--) { 870 v = aa + 36*ai[i]; 871 vi = aj + ai[i]; 872 nz = ai[i+1] - ai[i]; 873 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5]; 874 while (nz--) { 875 idx = 6*(*vi++); 876 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 877 xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; 878 PetscKernel_v_gets_v_minus_A_times_w_6(s,v,xw); 879 v += 36; 880 } 881 PetscKernel_v_gets_A_times_w_6(xw,idiag,s); 882 x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; 883 x[i2+3] += xw[3]; x[i2+4] += xw[4]; x[i2+5] += xw[5]; 884 idiag -= 36; 885 i2 -= 6; 886 } 887 break; 888 case 7: 889 for (i=m-1; i>=0; i--) { 890 v = aa + 49*ai[i]; 891 vi = aj + ai[i]; 892 nz = ai[i+1] - ai[i]; 893 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; 894 s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5]; s[6] = b[i2+6]; 895 while (nz--) { 896 idx = 7*(*vi++); 897 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 898 xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; xw[6] = x[6+idx]; 899 PetscKernel_v_gets_v_minus_A_times_w_7(s,v,xw); 900 v += 49; 901 } 902 PetscKernel_v_gets_A_times_w_7(xw,idiag,s); 903 x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; 904 x[i2+3] += xw[3]; x[i2+4] += xw[4]; x[i2+5] += xw[5]; x[i2+6] += xw[6]; 905 idiag -= 49; 906 i2 -= 7; 907 } 908 break; 909 default: 910 for (i=m-1; i>=0; i--) { 911 v = aa + bs2*ai[i]; 912 vi = aj + ai[i]; 913 nz = ai[i+1] - ai[i]; 914 915 ierr = PetscArraycpy(w,b+i2,bs);CHKERRQ(ierr); 916 /* copy all rows of x that are needed into contiguous space */ 917 workt = work; 918 for (j=0; j<nz; j++) { 919 ierr = PetscArraycpy(workt,x + bs*(*vi++),bs);CHKERRQ(ierr); 920 workt += bs; 921 } 922 PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work); 923 PetscKernel_w_gets_w_plus_Ar_times_v(bs,bs,w,idiag,x+i2); 924 925 idiag -= bs2; 926 i2 -= bs; 927 } 928 break; 929 } 930 ierr = PetscLogFlops(2.0*bs2*(a->nz));CHKERRQ(ierr); 931 } 932 } 933 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 934 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 935 PetscFunctionReturn(0); 936 } 937 938 /* 939 Special version for direct calls from Fortran (Used in PETSc-fun3d) 940 */ 941 #if defined(PETSC_HAVE_FORTRAN_CAPS) 942 #define matsetvaluesblocked4_ MATSETVALUESBLOCKED4 943 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 944 #define matsetvaluesblocked4_ matsetvaluesblocked4 945 #endif 946 947 PETSC_EXTERN void matsetvaluesblocked4_(Mat *AA,PetscInt *mm,const PetscInt im[],PetscInt *nn,const PetscInt in[],const PetscScalar v[]) 948 { 949 Mat A = *AA; 950 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 951 PetscInt *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,N,m = *mm,n = *nn; 952 PetscInt *ai =a->i,*ailen=a->ilen; 953 PetscInt *aj =a->j,stepval,lastcol = -1; 954 const PetscScalar *value = v; 955 MatScalar *ap,*aa = a->a,*bap; 956 PetscErrorCode ierr; 957 958 PetscFunctionBegin; 959 if (A->rmap->bs != 4) SETERRABORT(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Can only be called with a block size of 4"); 960 stepval = (n-1)*4; 961 for (k=0; k<m; k++) { /* loop over added rows */ 962 row = im[k]; 963 rp = aj + ai[row]; 964 ap = aa + 16*ai[row]; 965 nrow = ailen[row]; 966 low = 0; 967 high = nrow; 968 for (l=0; l<n; l++) { /* loop over added columns */ 969 col = in[l]; 970 if (col <= lastcol) low = 0; 971 else high = nrow; 972 lastcol = col; 973 value = v + k*(stepval+4 + l)*4; 974 while (high-low > 7) { 975 t = (low+high)/2; 976 if (rp[t] > col) high = t; 977 else low = t; 978 } 979 for (i=low; i<high; i++) { 980 if (rp[i] > col) break; 981 if (rp[i] == col) { 982 bap = ap + 16*i; 983 for (ii=0; ii<4; ii++,value+=stepval) { 984 for (jj=ii; jj<16; jj+=4) { 985 bap[jj] += *value++; 986 } 987 } 988 goto noinsert2; 989 } 990 } 991 N = nrow++ - 1; 992 high++; /* added new column index thus must search to one higher than before */ 993 /* shift up all the later entries in this row */ 994 for (ii=N; ii>=i; ii--) { 995 rp[ii+1] = rp[ii]; 996 ierr = PetscArraycpy(ap+16*(ii+1),ap+16*(ii),16);CHKERRV(ierr); 997 } 998 if (N >= i) { 999 ierr = PetscArrayzero(ap+16*i,16);CHKERRV(ierr); 1000 } 1001 rp[i] = col; 1002 bap = ap + 16*i; 1003 for (ii=0; ii<4; ii++,value+=stepval) { 1004 for (jj=ii; jj<16; jj+=4) { 1005 bap[jj] = *value++; 1006 } 1007 } 1008 noinsert2:; 1009 low = i; 1010 } 1011 ailen[row] = nrow; 1012 } 1013 PetscFunctionReturnVoid(); 1014 } 1015 1016 #if defined(PETSC_HAVE_FORTRAN_CAPS) 1017 #define matsetvalues4_ MATSETVALUES4 1018 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 1019 #define matsetvalues4_ matsetvalues4 1020 #endif 1021 1022 PETSC_EXTERN void matsetvalues4_(Mat *AA,PetscInt *mm,PetscInt *im,PetscInt *nn,PetscInt *in,PetscScalar *v) 1023 { 1024 Mat A = *AA; 1025 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1026 PetscInt *rp,k,low,high,t,row,nrow,i,col,l,N,n = *nn,m = *mm; 1027 PetscInt *ai=a->i,*ailen=a->ilen; 1028 PetscInt *aj=a->j,brow,bcol; 1029 PetscInt ridx,cidx,lastcol = -1; 1030 MatScalar *ap,value,*aa=a->a,*bap; 1031 PetscErrorCode ierr; 1032 1033 PetscFunctionBegin; 1034 for (k=0; k<m; k++) { /* loop over added rows */ 1035 row = im[k]; brow = row/4; 1036 rp = aj + ai[brow]; 1037 ap = aa + 16*ai[brow]; 1038 nrow = ailen[brow]; 1039 low = 0; 1040 high = nrow; 1041 for (l=0; l<n; l++) { /* loop over added columns */ 1042 col = in[l]; bcol = col/4; 1043 ridx = row % 4; cidx = col % 4; 1044 value = v[l + k*n]; 1045 if (col <= lastcol) low = 0; 1046 else high = nrow; 1047 lastcol = col; 1048 while (high-low > 7) { 1049 t = (low+high)/2; 1050 if (rp[t] > bcol) high = t; 1051 else low = t; 1052 } 1053 for (i=low; i<high; i++) { 1054 if (rp[i] > bcol) break; 1055 if (rp[i] == bcol) { 1056 bap = ap + 16*i + 4*cidx + ridx; 1057 *bap += value; 1058 goto noinsert1; 1059 } 1060 } 1061 N = nrow++ - 1; 1062 high++; /* added new column thus must search to one higher than before */ 1063 /* shift up all the later entries in this row */ 1064 ierr = PetscArraymove(rp+i+1,rp+i,N-i+1);CHKERRV(ierr); 1065 ierr = PetscArraymove(ap+16*i+16,ap+16*i,16*(N-i+1));CHKERRV(ierr); 1066 ierr = PetscArrayzero(ap+16*i,16);CHKERRV(ierr); 1067 rp[i] = bcol; 1068 ap[16*i + 4*cidx + ridx] = value; 1069 noinsert1:; 1070 low = i; 1071 } 1072 ailen[brow] = nrow; 1073 } 1074 PetscFunctionReturnVoid(); 1075 } 1076 1077 /* 1078 Checks for missing diagonals 1079 */ 1080 PetscErrorCode MatMissingDiagonal_SeqBAIJ(Mat A,PetscBool *missing,PetscInt *d) 1081 { 1082 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1083 PetscErrorCode ierr; 1084 PetscInt *diag,*ii = a->i,i; 1085 1086 PetscFunctionBegin; 1087 ierr = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr); 1088 *missing = PETSC_FALSE; 1089 if (A->rmap->n > 0 && !ii) { 1090 *missing = PETSC_TRUE; 1091 if (d) *d = 0; 1092 ierr = PetscInfo(A,"Matrix has no entries therefore is missing diagonal\n");CHKERRQ(ierr); 1093 } else { 1094 PetscInt n; 1095 n = PetscMin(a->mbs, a->nbs); 1096 diag = a->diag; 1097 for (i=0; i<n; i++) { 1098 if (diag[i] >= ii[i+1]) { 1099 *missing = PETSC_TRUE; 1100 if (d) *d = i; 1101 ierr = PetscInfo1(A,"Matrix is missing block diagonal number %D\n",i);CHKERRQ(ierr); 1102 break; 1103 } 1104 } 1105 } 1106 PetscFunctionReturn(0); 1107 } 1108 1109 PetscErrorCode MatMarkDiagonal_SeqBAIJ(Mat A) 1110 { 1111 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1112 PetscErrorCode ierr; 1113 PetscInt i,j,m = a->mbs; 1114 1115 PetscFunctionBegin; 1116 if (!a->diag) { 1117 ierr = PetscMalloc1(m,&a->diag);CHKERRQ(ierr); 1118 ierr = PetscLogObjectMemory((PetscObject)A,m*sizeof(PetscInt));CHKERRQ(ierr); 1119 a->free_diag = PETSC_TRUE; 1120 } 1121 for (i=0; i<m; i++) { 1122 a->diag[i] = a->i[i+1]; 1123 for (j=a->i[i]; j<a->i[i+1]; j++) { 1124 if (a->j[j] == i) { 1125 a->diag[i] = j; 1126 break; 1127 } 1128 } 1129 } 1130 PetscFunctionReturn(0); 1131 } 1132 1133 static PetscErrorCode MatGetRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool blockcompressed,PetscInt *nn,const PetscInt *inia[],const PetscInt *inja[],PetscBool *done) 1134 { 1135 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1136 PetscErrorCode ierr; 1137 PetscInt i,j,n = a->mbs,nz = a->i[n],*tia,*tja,bs = A->rmap->bs,k,l,cnt; 1138 PetscInt **ia = (PetscInt**)inia,**ja = (PetscInt**)inja; 1139 1140 PetscFunctionBegin; 1141 *nn = n; 1142 if (!ia) PetscFunctionReturn(0); 1143 if (symmetric) { 1144 ierr = MatToSymmetricIJ_SeqAIJ(n,a->i,a->j,PETSC_TRUE,0,0,&tia,&tja);CHKERRQ(ierr); 1145 nz = tia[n]; 1146 } else { 1147 tia = a->i; tja = a->j; 1148 } 1149 1150 if (!blockcompressed && bs > 1) { 1151 (*nn) *= bs; 1152 /* malloc & create the natural set of indices */ 1153 ierr = PetscMalloc1((n+1)*bs,ia);CHKERRQ(ierr); 1154 if (n) { 1155 (*ia)[0] = oshift; 1156 for (j=1; j<bs; j++) { 1157 (*ia)[j] = (tia[1]-tia[0])*bs+(*ia)[j-1]; 1158 } 1159 } 1160 1161 for (i=1; i<n; i++) { 1162 (*ia)[i*bs] = (tia[i]-tia[i-1])*bs + (*ia)[i*bs-1]; 1163 for (j=1; j<bs; j++) { 1164 (*ia)[i*bs+j] = (tia[i+1]-tia[i])*bs + (*ia)[i*bs+j-1]; 1165 } 1166 } 1167 if (n) { 1168 (*ia)[n*bs] = (tia[n]-tia[n-1])*bs + (*ia)[n*bs-1]; 1169 } 1170 1171 if (inja) { 1172 ierr = PetscMalloc1(nz*bs*bs,ja);CHKERRQ(ierr); 1173 cnt = 0; 1174 for (i=0; i<n; i++) { 1175 for (j=0; j<bs; j++) { 1176 for (k=tia[i]; k<tia[i+1]; k++) { 1177 for (l=0; l<bs; l++) { 1178 (*ja)[cnt++] = bs*tja[k] + l; 1179 } 1180 } 1181 } 1182 } 1183 } 1184 1185 if (symmetric) { /* deallocate memory allocated in MatToSymmetricIJ_SeqAIJ() */ 1186 ierr = PetscFree(tia);CHKERRQ(ierr); 1187 ierr = PetscFree(tja);CHKERRQ(ierr); 1188 } 1189 } else if (oshift == 1) { 1190 if (symmetric) { 1191 nz = tia[A->rmap->n/bs]; 1192 /* add 1 to i and j indices */ 1193 for (i=0; i<A->rmap->n/bs+1; i++) tia[i] = tia[i] + 1; 1194 *ia = tia; 1195 if (ja) { 1196 for (i=0; i<nz; i++) tja[i] = tja[i] + 1; 1197 *ja = tja; 1198 } 1199 } else { 1200 nz = a->i[A->rmap->n/bs]; 1201 /* malloc space and add 1 to i and j indices */ 1202 ierr = PetscMalloc1(A->rmap->n/bs+1,ia);CHKERRQ(ierr); 1203 for (i=0; i<A->rmap->n/bs+1; i++) (*ia)[i] = a->i[i] + 1; 1204 if (ja) { 1205 ierr = PetscMalloc1(nz,ja);CHKERRQ(ierr); 1206 for (i=0; i<nz; i++) (*ja)[i] = a->j[i] + 1; 1207 } 1208 } 1209 } else { 1210 *ia = tia; 1211 if (ja) *ja = tja; 1212 } 1213 PetscFunctionReturn(0); 1214 } 1215 1216 static PetscErrorCode MatRestoreRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool blockcompressed,PetscInt *nn,const PetscInt *ia[],const PetscInt *ja[],PetscBool *done) 1217 { 1218 PetscErrorCode ierr; 1219 1220 PetscFunctionBegin; 1221 if (!ia) PetscFunctionReturn(0); 1222 if ((!blockcompressed && A->rmap->bs > 1) || (symmetric || oshift == 1)) { 1223 ierr = PetscFree(*ia);CHKERRQ(ierr); 1224 if (ja) {ierr = PetscFree(*ja);CHKERRQ(ierr);} 1225 } 1226 PetscFunctionReturn(0); 1227 } 1228 1229 PetscErrorCode MatDestroy_SeqBAIJ(Mat A) 1230 { 1231 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1232 PetscErrorCode ierr; 1233 1234 PetscFunctionBegin; 1235 #if defined(PETSC_USE_LOG) 1236 PetscLogObjectState((PetscObject)A,"Rows=%D, Cols=%D, NZ=%D",A->rmap->N,A->cmap->n,a->nz); 1237 #endif 1238 ierr = MatSeqXAIJFreeAIJ(A,&a->a,&a->j,&a->i);CHKERRQ(ierr); 1239 ierr = ISDestroy(&a->row);CHKERRQ(ierr); 1240 ierr = ISDestroy(&a->col);CHKERRQ(ierr); 1241 if (a->free_diag) {ierr = PetscFree(a->diag);CHKERRQ(ierr);} 1242 ierr = PetscFree(a->idiag);CHKERRQ(ierr); 1243 if (a->free_imax_ilen) {ierr = PetscFree2(a->imax,a->ilen);CHKERRQ(ierr);} 1244 ierr = PetscFree(a->solve_work);CHKERRQ(ierr); 1245 ierr = PetscFree(a->mult_work);CHKERRQ(ierr); 1246 ierr = PetscFree(a->sor_workt);CHKERRQ(ierr); 1247 ierr = PetscFree(a->sor_work);CHKERRQ(ierr); 1248 ierr = ISDestroy(&a->icol);CHKERRQ(ierr); 1249 ierr = PetscFree(a->saved_values);CHKERRQ(ierr); 1250 ierr = PetscFree2(a->compressedrow.i,a->compressedrow.rindex);CHKERRQ(ierr); 1251 1252 ierr = MatDestroy(&a->sbaijMat);CHKERRQ(ierr); 1253 ierr = MatDestroy(&a->parent);CHKERRQ(ierr); 1254 ierr = PetscFree(A->data);CHKERRQ(ierr); 1255 1256 ierr = PetscObjectChangeTypeName((PetscObject)A,NULL);CHKERRQ(ierr); 1257 ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJGetArray_C",NULL);CHKERRQ(ierr); 1258 ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJRestoreArray_C",NULL);CHKERRQ(ierr); 1259 ierr = PetscObjectComposeFunction((PetscObject)A,"MatStoreValues_C",NULL);CHKERRQ(ierr); 1260 ierr = PetscObjectComposeFunction((PetscObject)A,"MatRetrieveValues_C",NULL);CHKERRQ(ierr); 1261 ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetColumnIndices_C",NULL);CHKERRQ(ierr); 1262 ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqaij_C",NULL);CHKERRQ(ierr); 1263 ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqsbaij_C",NULL);CHKERRQ(ierr); 1264 ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetPreallocation_C",NULL);CHKERRQ(ierr); 1265 ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetPreallocationCSR_C",NULL);CHKERRQ(ierr); 1266 ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqbstrm_C",NULL);CHKERRQ(ierr); 1267 ierr = PetscObjectComposeFunction((PetscObject)A,"MatIsTranspose_C",NULL);CHKERRQ(ierr); 1268 #if defined(PETSC_HAVE_HYPRE) 1269 ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_hypre_C",NULL);CHKERRQ(ierr); 1270 #endif 1271 ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_is_C",NULL);CHKERRQ(ierr); 1272 PetscFunctionReturn(0); 1273 } 1274 1275 PetscErrorCode MatSetOption_SeqBAIJ(Mat A,MatOption op,PetscBool flg) 1276 { 1277 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1278 PetscErrorCode ierr; 1279 1280 PetscFunctionBegin; 1281 switch (op) { 1282 case MAT_ROW_ORIENTED: 1283 a->roworiented = flg; 1284 break; 1285 case MAT_KEEP_NONZERO_PATTERN: 1286 a->keepnonzeropattern = flg; 1287 break; 1288 case MAT_NEW_NONZERO_LOCATIONS: 1289 a->nonew = (flg ? 0 : 1); 1290 break; 1291 case MAT_NEW_NONZERO_LOCATION_ERR: 1292 a->nonew = (flg ? -1 : 0); 1293 break; 1294 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1295 a->nonew = (flg ? -2 : 0); 1296 break; 1297 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1298 a->nounused = (flg ? -1 : 0); 1299 break; 1300 case MAT_FORCE_DIAGONAL_ENTRIES: 1301 case MAT_IGNORE_OFF_PROC_ENTRIES: 1302 case MAT_USE_HASH_TABLE: 1303 case MAT_SORTED_FULL: 1304 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1305 break; 1306 case MAT_SPD: 1307 case MAT_SYMMETRIC: 1308 case MAT_STRUCTURALLY_SYMMETRIC: 1309 case MAT_HERMITIAN: 1310 case MAT_SYMMETRY_ETERNAL: 1311 case MAT_SUBMAT_SINGLEIS: 1312 case MAT_STRUCTURE_ONLY: 1313 /* These options are handled directly by MatSetOption() */ 1314 break; 1315 default: 1316 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op); 1317 } 1318 PetscFunctionReturn(0); 1319 } 1320 1321 /* used for both SeqBAIJ and SeqSBAIJ matrices */ 1322 PetscErrorCode MatGetRow_SeqBAIJ_private(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v,PetscInt *ai,PetscInt *aj,PetscScalar *aa) 1323 { 1324 PetscErrorCode ierr; 1325 PetscInt itmp,i,j,k,M,bn,bp,*idx_i,bs,bs2; 1326 MatScalar *aa_i; 1327 PetscScalar *v_i; 1328 1329 PetscFunctionBegin; 1330 bs = A->rmap->bs; 1331 bs2 = bs*bs; 1332 if (row < 0 || row >= A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range", row); 1333 1334 bn = row/bs; /* Block number */ 1335 bp = row % bs; /* Block Position */ 1336 M = ai[bn+1] - ai[bn]; 1337 *nz = bs*M; 1338 1339 if (v) { 1340 *v = NULL; 1341 if (*nz) { 1342 ierr = PetscMalloc1(*nz,v);CHKERRQ(ierr); 1343 for (i=0; i<M; i++) { /* for each block in the block row */ 1344 v_i = *v + i*bs; 1345 aa_i = aa + bs2*(ai[bn] + i); 1346 for (j=bp,k=0; j<bs2; j+=bs,k++) v_i[k] = aa_i[j]; 1347 } 1348 } 1349 } 1350 1351 if (idx) { 1352 *idx = NULL; 1353 if (*nz) { 1354 ierr = PetscMalloc1(*nz,idx);CHKERRQ(ierr); 1355 for (i=0; i<M; i++) { /* for each block in the block row */ 1356 idx_i = *idx + i*bs; 1357 itmp = bs*aj[ai[bn] + i]; 1358 for (j=0; j<bs; j++) idx_i[j] = itmp++; 1359 } 1360 } 1361 } 1362 PetscFunctionReturn(0); 1363 } 1364 1365 PetscErrorCode MatGetRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1366 { 1367 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1368 PetscErrorCode ierr; 1369 1370 PetscFunctionBegin; 1371 ierr = MatGetRow_SeqBAIJ_private(A,row,nz,idx,v,a->i,a->j,a->a);CHKERRQ(ierr); 1372 PetscFunctionReturn(0); 1373 } 1374 1375 PetscErrorCode MatRestoreRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1376 { 1377 PetscErrorCode ierr; 1378 1379 PetscFunctionBegin; 1380 if (nz) *nz = 0; 1381 if (idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);} 1382 if (v) {ierr = PetscFree(*v);CHKERRQ(ierr);} 1383 PetscFunctionReturn(0); 1384 } 1385 1386 PetscErrorCode MatTranspose_SeqBAIJ(Mat A,MatReuse reuse,Mat *B) 1387 { 1388 Mat_SeqBAIJ *a=(Mat_SeqBAIJ*)A->data,*at; 1389 Mat C; 1390 PetscErrorCode ierr; 1391 PetscInt i,j,k,*aj=a->j,*ai=a->i,bs=A->rmap->bs,mbs=a->mbs,nbs=a->nbs,*atfill; 1392 PetscInt bs2=a->bs2,*ati,*atj,anzj,kr; 1393 MatScalar *ata,*aa=a->a; 1394 1395 PetscFunctionBegin; 1396 ierr = PetscCalloc1(1+nbs,&atfill);CHKERRQ(ierr); 1397 if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_INPLACE_MATRIX) { 1398 for (i=0; i<ai[mbs]; i++) atfill[aj[i]] += 1; /* count num of non-zeros in row aj[i] */ 1399 1400 ierr = MatCreate(PetscObjectComm((PetscObject)A),&C);CHKERRQ(ierr); 1401 ierr = MatSetSizes(C,A->cmap->n,A->rmap->N,A->cmap->n,A->rmap->N);CHKERRQ(ierr); 1402 ierr = MatSetType(C,((PetscObject)A)->type_name);CHKERRQ(ierr); 1403 ierr = MatSeqBAIJSetPreallocation(C,bs,0,atfill);CHKERRQ(ierr); 1404 1405 at = (Mat_SeqBAIJ*)C->data; 1406 ati = at->i; 1407 for (i=0; i<nbs; i++) at->ilen[i] = at->imax[i] = ati[i+1] - ati[i]; 1408 } else { 1409 C = *B; 1410 at = (Mat_SeqBAIJ*)C->data; 1411 ati = at->i; 1412 } 1413 1414 atj = at->j; 1415 ata = at->a; 1416 1417 /* Copy ati into atfill so we have locations of the next free space in atj */ 1418 ierr = PetscArraycpy(atfill,ati,nbs);CHKERRQ(ierr); 1419 1420 /* Walk through A row-wise and mark nonzero entries of A^T. */ 1421 for (i=0; i<mbs; i++) { 1422 anzj = ai[i+1] - ai[i]; 1423 for (j=0; j<anzj; j++) { 1424 atj[atfill[*aj]] = i; 1425 for (kr=0; kr<bs; kr++) { 1426 for (k=0; k<bs; k++) { 1427 ata[bs2*atfill[*aj]+k*bs+kr] = *aa++; 1428 } 1429 } 1430 atfill[*aj++] += 1; 1431 } 1432 } 1433 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1434 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1435 1436 /* Clean up temporary space and complete requests. */ 1437 ierr = PetscFree(atfill);CHKERRQ(ierr); 1438 1439 if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) { 1440 ierr = MatSetBlockSizes(C,PetscAbs(A->cmap->bs),PetscAbs(A->rmap->bs));CHKERRQ(ierr); 1441 *B = C; 1442 } else { 1443 ierr = MatHeaderMerge(A,&C);CHKERRQ(ierr); 1444 } 1445 PetscFunctionReturn(0); 1446 } 1447 1448 PetscErrorCode MatIsTranspose_SeqBAIJ(Mat A,Mat B,PetscReal tol,PetscBool *f) 1449 { 1450 PetscErrorCode ierr; 1451 Mat Btrans; 1452 1453 PetscFunctionBegin; 1454 *f = PETSC_FALSE; 1455 ierr = MatTranspose_SeqBAIJ(A,MAT_INITIAL_MATRIX,&Btrans);CHKERRQ(ierr); 1456 ierr = MatEqual_SeqBAIJ(B,Btrans,f);CHKERRQ(ierr); 1457 ierr = MatDestroy(&Btrans);CHKERRQ(ierr); 1458 PetscFunctionReturn(0); 1459 } 1460 1461 /* Used for both SeqBAIJ and SeqSBAIJ matrices */ 1462 PetscErrorCode MatView_SeqBAIJ_Binary(Mat mat,PetscViewer viewer) 1463 { 1464 Mat_SeqBAIJ *A = (Mat_SeqBAIJ*)mat->data; 1465 PetscInt header[4],M,N,m,bs,nz,cnt,i,j,k,l; 1466 PetscInt *rowlens,*colidxs; 1467 PetscScalar *matvals; 1468 PetscErrorCode ierr; 1469 1470 PetscFunctionBegin; 1471 ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr); 1472 1473 M = mat->rmap->N; 1474 N = mat->cmap->N; 1475 m = mat->rmap->n; 1476 bs = mat->rmap->bs; 1477 nz = bs*bs*A->nz; 1478 1479 /* write matrix header */ 1480 header[0] = MAT_FILE_CLASSID; 1481 header[1] = M; header[2] = N; header[3] = nz; 1482 ierr = PetscViewerBinaryWrite(viewer,header,4,PETSC_INT);CHKERRQ(ierr); 1483 1484 /* store row lengths */ 1485 ierr = PetscMalloc1(m,&rowlens);CHKERRQ(ierr); 1486 for (cnt=0, i=0; i<A->mbs; i++) 1487 for (j=0; j<bs; j++) 1488 rowlens[cnt++] = bs*(A->i[i+1] - A->i[i]); 1489 ierr = PetscViewerBinaryWrite(viewer,rowlens,m,PETSC_INT);CHKERRQ(ierr); 1490 ierr = PetscFree(rowlens);CHKERRQ(ierr); 1491 1492 /* store column indices */ 1493 ierr = PetscMalloc1(nz,&colidxs);CHKERRQ(ierr); 1494 for (cnt=0, i=0; i<A->mbs; i++) 1495 for (k=0; k<bs; k++) 1496 for (j=A->i[i]; j<A->i[i+1]; j++) 1497 for (l=0; l<bs; l++) 1498 colidxs[cnt++] = bs*A->j[j] + l; 1499 if (cnt != nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,nz); 1500 ierr = PetscViewerBinaryWrite(viewer,colidxs,nz,PETSC_INT);CHKERRQ(ierr); 1501 ierr = PetscFree(colidxs);CHKERRQ(ierr); 1502 1503 /* store nonzero values */ 1504 ierr = PetscMalloc1(nz,&matvals);CHKERRQ(ierr); 1505 for (cnt=0, i=0; i<A->mbs; i++) 1506 for (k=0; k<bs; k++) 1507 for (j=A->i[i]; j<A->i[i+1]; j++) 1508 for (l=0; l<bs; l++) 1509 matvals[cnt++] = A->a[bs*(bs*j + l) + k]; 1510 if (cnt != nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,nz); 1511 ierr = PetscViewerBinaryWrite(viewer,matvals,nz,PETSC_SCALAR);CHKERRQ(ierr); 1512 ierr = PetscFree(matvals);CHKERRQ(ierr); 1513 1514 /* write block size option to the viewer's .info file */ 1515 ierr = MatView_Binary_BlockSizes(mat,viewer);CHKERRQ(ierr); 1516 PetscFunctionReturn(0); 1517 } 1518 1519 static PetscErrorCode MatView_SeqBAIJ_ASCII_structonly(Mat A,PetscViewer viewer) 1520 { 1521 PetscErrorCode ierr; 1522 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1523 PetscInt i,bs = A->rmap->bs,k; 1524 1525 PetscFunctionBegin; 1526 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);CHKERRQ(ierr); 1527 for (i=0; i<a->mbs; i++) { 1528 ierr = PetscViewerASCIIPrintf(viewer,"row %D-%D:",i*bs,i*bs+bs-1);CHKERRQ(ierr); 1529 for (k=a->i[i]; k<a->i[i+1]; k++) { 1530 ierr = PetscViewerASCIIPrintf(viewer," (%D-%D) ",bs*a->j[k],bs*a->j[k]+bs-1);CHKERRQ(ierr); 1531 } 1532 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 1533 } 1534 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);CHKERRQ(ierr); 1535 PetscFunctionReturn(0); 1536 } 1537 1538 static PetscErrorCode MatView_SeqBAIJ_ASCII(Mat A,PetscViewer viewer) 1539 { 1540 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1541 PetscErrorCode ierr; 1542 PetscInt i,j,bs = A->rmap->bs,k,l,bs2=a->bs2; 1543 PetscViewerFormat format; 1544 1545 PetscFunctionBegin; 1546 if (A->structure_only) { 1547 ierr = MatView_SeqBAIJ_ASCII_structonly(A,viewer);CHKERRQ(ierr); 1548 PetscFunctionReturn(0); 1549 } 1550 1551 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1552 if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1553 ierr = PetscViewerASCIIPrintf(viewer," block size is %D\n",bs);CHKERRQ(ierr); 1554 } else if (format == PETSC_VIEWER_ASCII_MATLAB) { 1555 const char *matname; 1556 Mat aij; 1557 ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&aij);CHKERRQ(ierr); 1558 ierr = PetscObjectGetName((PetscObject)A,&matname);CHKERRQ(ierr); 1559 ierr = PetscObjectSetName((PetscObject)aij,matname);CHKERRQ(ierr); 1560 ierr = MatView(aij,viewer);CHKERRQ(ierr); 1561 ierr = MatDestroy(&aij);CHKERRQ(ierr); 1562 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1563 PetscFunctionReturn(0); 1564 } else if (format == PETSC_VIEWER_ASCII_COMMON) { 1565 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);CHKERRQ(ierr); 1566 for (i=0; i<a->mbs; i++) { 1567 for (j=0; j<bs; j++) { 1568 ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr); 1569 for (k=a->i[i]; k<a->i[i+1]; k++) { 1570 for (l=0; l<bs; l++) { 1571 #if defined(PETSC_USE_COMPLEX) 1572 if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 1573 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g + %gi) ",bs*a->j[k]+l, 1574 (double)PetscRealPart(a->a[bs2*k + l*bs + j]),(double)PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1575 } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 1576 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g - %gi) ",bs*a->j[k]+l, 1577 (double)PetscRealPart(a->a[bs2*k + l*bs + j]),-(double)PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1578 } else if (PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 1579 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,(double)PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1580 } 1581 #else 1582 if (a->a[bs2*k + l*bs + j] != 0.0) { 1583 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,(double)a->a[bs2*k + l*bs + j]);CHKERRQ(ierr); 1584 } 1585 #endif 1586 } 1587 } 1588 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 1589 } 1590 } 1591 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);CHKERRQ(ierr); 1592 } else { 1593 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);CHKERRQ(ierr); 1594 for (i=0; i<a->mbs; i++) { 1595 for (j=0; j<bs; j++) { 1596 ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr); 1597 for (k=a->i[i]; k<a->i[i+1]; k++) { 1598 for (l=0; l<bs; l++) { 1599 #if defined(PETSC_USE_COMPLEX) 1600 if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0) { 1601 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g + %g i) ",bs*a->j[k]+l, 1602 (double)PetscRealPart(a->a[bs2*k + l*bs + j]),(double)PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1603 } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0) { 1604 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g - %g i) ",bs*a->j[k]+l, 1605 (double)PetscRealPart(a->a[bs2*k + l*bs + j]),-(double)PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1606 } else { 1607 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,(double)PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1608 } 1609 #else 1610 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,(double)a->a[bs2*k + l*bs + j]);CHKERRQ(ierr); 1611 #endif 1612 } 1613 } 1614 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 1615 } 1616 } 1617 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);CHKERRQ(ierr); 1618 } 1619 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1620 PetscFunctionReturn(0); 1621 } 1622 1623 #include <petscdraw.h> 1624 static PetscErrorCode MatView_SeqBAIJ_Draw_Zoom(PetscDraw draw,void *Aa) 1625 { 1626 Mat A = (Mat) Aa; 1627 Mat_SeqBAIJ *a=(Mat_SeqBAIJ*)A->data; 1628 PetscErrorCode ierr; 1629 PetscInt row,i,j,k,l,mbs=a->mbs,color,bs=A->rmap->bs,bs2=a->bs2; 1630 PetscReal xl,yl,xr,yr,x_l,x_r,y_l,y_r; 1631 MatScalar *aa; 1632 PetscViewer viewer; 1633 PetscViewerFormat format; 1634 1635 PetscFunctionBegin; 1636 ierr = PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer);CHKERRQ(ierr); 1637 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1638 ierr = PetscDrawGetCoordinates(draw,&xl,&yl,&xr,&yr);CHKERRQ(ierr); 1639 1640 /* loop over matrix elements drawing boxes */ 1641 1642 if (format != PETSC_VIEWER_DRAW_CONTOUR) { 1643 ierr = PetscDrawCollectiveBegin(draw);CHKERRQ(ierr); 1644 /* Blue for negative, Cyan for zero and Red for positive */ 1645 color = PETSC_DRAW_BLUE; 1646 for (i=0,row=0; i<mbs; i++,row+=bs) { 1647 for (j=a->i[i]; j<a->i[i+1]; j++) { 1648 y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0; 1649 x_l = a->j[j]*bs; x_r = x_l + 1.0; 1650 aa = a->a + j*bs2; 1651 for (k=0; k<bs; k++) { 1652 for (l=0; l<bs; l++) { 1653 if (PetscRealPart(*aa++) >= 0.) continue; 1654 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 1655 } 1656 } 1657 } 1658 } 1659 color = PETSC_DRAW_CYAN; 1660 for (i=0,row=0; i<mbs; i++,row+=bs) { 1661 for (j=a->i[i]; j<a->i[i+1]; j++) { 1662 y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0; 1663 x_l = a->j[j]*bs; x_r = x_l + 1.0; 1664 aa = a->a + j*bs2; 1665 for (k=0; k<bs; k++) { 1666 for (l=0; l<bs; l++) { 1667 if (PetscRealPart(*aa++) != 0.) continue; 1668 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 1669 } 1670 } 1671 } 1672 } 1673 color = PETSC_DRAW_RED; 1674 for (i=0,row=0; i<mbs; i++,row+=bs) { 1675 for (j=a->i[i]; j<a->i[i+1]; j++) { 1676 y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0; 1677 x_l = a->j[j]*bs; x_r = x_l + 1.0; 1678 aa = a->a + j*bs2; 1679 for (k=0; k<bs; k++) { 1680 for (l=0; l<bs; l++) { 1681 if (PetscRealPart(*aa++) <= 0.) continue; 1682 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 1683 } 1684 } 1685 } 1686 } 1687 ierr = PetscDrawCollectiveEnd(draw);CHKERRQ(ierr); 1688 } else { 1689 /* use contour shading to indicate magnitude of values */ 1690 /* first determine max of all nonzero values */ 1691 PetscReal minv = 0.0, maxv = 0.0; 1692 PetscDraw popup; 1693 1694 for (i=0; i<a->nz*a->bs2; i++) { 1695 if (PetscAbsScalar(a->a[i]) > maxv) maxv = PetscAbsScalar(a->a[i]); 1696 } 1697 if (minv >= maxv) maxv = minv + PETSC_SMALL; 1698 ierr = PetscDrawGetPopup(draw,&popup);CHKERRQ(ierr); 1699 ierr = PetscDrawScalePopup(popup,0.0,maxv);CHKERRQ(ierr); 1700 1701 ierr = PetscDrawCollectiveBegin(draw);CHKERRQ(ierr); 1702 for (i=0,row=0; i<mbs; i++,row+=bs) { 1703 for (j=a->i[i]; j<a->i[i+1]; j++) { 1704 y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0; 1705 x_l = a->j[j]*bs; x_r = x_l + 1.0; 1706 aa = a->a + j*bs2; 1707 for (k=0; k<bs; k++) { 1708 for (l=0; l<bs; l++) { 1709 MatScalar v = *aa++; 1710 color = PetscDrawRealToColor(PetscAbsScalar(v),minv,maxv); 1711 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 1712 } 1713 } 1714 } 1715 } 1716 ierr = PetscDrawCollectiveEnd(draw);CHKERRQ(ierr); 1717 } 1718 PetscFunctionReturn(0); 1719 } 1720 1721 static PetscErrorCode MatView_SeqBAIJ_Draw(Mat A,PetscViewer viewer) 1722 { 1723 PetscErrorCode ierr; 1724 PetscReal xl,yl,xr,yr,w,h; 1725 PetscDraw draw; 1726 PetscBool isnull; 1727 1728 PetscFunctionBegin; 1729 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 1730 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); 1731 if (isnull) PetscFunctionReturn(0); 1732 1733 xr = A->cmap->n; yr = A->rmap->N; h = yr/10.0; w = xr/10.0; 1734 xr += w; yr += h; xl = -w; yl = -h; 1735 ierr = PetscDrawSetCoordinates(draw,xl,yl,xr,yr);CHKERRQ(ierr); 1736 ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);CHKERRQ(ierr); 1737 ierr = PetscDrawZoom(draw,MatView_SeqBAIJ_Draw_Zoom,A);CHKERRQ(ierr); 1738 ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",NULL);CHKERRQ(ierr); 1739 ierr = PetscDrawSave(draw);CHKERRQ(ierr); 1740 PetscFunctionReturn(0); 1741 } 1742 1743 PetscErrorCode MatView_SeqBAIJ(Mat A,PetscViewer viewer) 1744 { 1745 PetscErrorCode ierr; 1746 PetscBool iascii,isbinary,isdraw; 1747 1748 PetscFunctionBegin; 1749 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1750 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1751 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1752 if (iascii) { 1753 ierr = MatView_SeqBAIJ_ASCII(A,viewer);CHKERRQ(ierr); 1754 } else if (isbinary) { 1755 ierr = MatView_SeqBAIJ_Binary(A,viewer);CHKERRQ(ierr); 1756 } else if (isdraw) { 1757 ierr = MatView_SeqBAIJ_Draw(A,viewer);CHKERRQ(ierr); 1758 } else { 1759 Mat B; 1760 ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr); 1761 ierr = MatView(B,viewer);CHKERRQ(ierr); 1762 ierr = MatDestroy(&B);CHKERRQ(ierr); 1763 } 1764 PetscFunctionReturn(0); 1765 } 1766 1767 PetscErrorCode MatGetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],PetscScalar v[]) 1768 { 1769 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1770 PetscInt *rp,k,low,high,t,row,nrow,i,col,l,*aj = a->j; 1771 PetscInt *ai = a->i,*ailen = a->ilen; 1772 PetscInt brow,bcol,ridx,cidx,bs=A->rmap->bs,bs2=a->bs2; 1773 MatScalar *ap,*aa = a->a; 1774 1775 PetscFunctionBegin; 1776 for (k=0; k<m; k++) { /* loop over rows */ 1777 row = im[k]; brow = row/bs; 1778 if (row < 0) {v += n; continue;} /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); */ 1779 if (row >= A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D too large", row); 1780 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; 1781 nrow = ailen[brow]; 1782 for (l=0; l<n; l++) { /* loop over columns */ 1783 if (in[l] < 0) {v++; continue;} /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); */ 1784 if (in[l] >= A->cmap->n) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column %D too large", in[l]); 1785 col = in[l]; 1786 bcol = col/bs; 1787 cidx = col%bs; 1788 ridx = row%bs; 1789 high = nrow; 1790 low = 0; /* assume unsorted */ 1791 while (high-low > 5) { 1792 t = (low+high)/2; 1793 if (rp[t] > bcol) high = t; 1794 else low = t; 1795 } 1796 for (i=low; i<high; i++) { 1797 if (rp[i] > bcol) break; 1798 if (rp[i] == bcol) { 1799 *v++ = ap[bs2*i+bs*cidx+ridx]; 1800 goto finished; 1801 } 1802 } 1803 *v++ = 0.0; 1804 finished:; 1805 } 1806 } 1807 PetscFunctionReturn(0); 1808 } 1809 1810 PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is) 1811 { 1812 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1813 PetscInt *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,lastcol = -1; 1814 PetscInt *imax=a->imax,*ai=a->i,*ailen=a->ilen; 1815 PetscErrorCode ierr; 1816 PetscInt *aj =a->j,nonew=a->nonew,bs2=a->bs2,bs=A->rmap->bs,stepval; 1817 PetscBool roworiented=a->roworiented; 1818 const PetscScalar *value = v; 1819 MatScalar *ap=NULL,*aa = a->a,*bap; 1820 1821 PetscFunctionBegin; 1822 if (roworiented) { 1823 stepval = (n-1)*bs; 1824 } else { 1825 stepval = (m-1)*bs; 1826 } 1827 for (k=0; k<m; k++) { /* loop over added rows */ 1828 row = im[k]; 1829 if (row < 0) continue; 1830 if (PetscUnlikelyDebug(row >= a->mbs)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Block row index too large %D max %D",row,a->mbs-1); 1831 rp = aj + ai[row]; 1832 if (!A->structure_only) ap = aa + bs2*ai[row]; 1833 rmax = imax[row]; 1834 nrow = ailen[row]; 1835 low = 0; 1836 high = nrow; 1837 for (l=0; l<n; l++) { /* loop over added columns */ 1838 if (in[l] < 0) continue; 1839 if (PetscUnlikelyDebug(in[l] >= a->nbs)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Block column index too large %D max %D",in[l],a->nbs-1); 1840 col = in[l]; 1841 if (!A->structure_only) { 1842 if (roworiented) { 1843 value = v + (k*(stepval+bs) + l)*bs; 1844 } else { 1845 value = v + (l*(stepval+bs) + k)*bs; 1846 } 1847 } 1848 if (col <= lastcol) low = 0; 1849 else high = nrow; 1850 lastcol = col; 1851 while (high-low > 7) { 1852 t = (low+high)/2; 1853 if (rp[t] > col) high = t; 1854 else low = t; 1855 } 1856 for (i=low; i<high; i++) { 1857 if (rp[i] > col) break; 1858 if (rp[i] == col) { 1859 if (A->structure_only) goto noinsert2; 1860 bap = ap + bs2*i; 1861 if (roworiented) { 1862 if (is == ADD_VALUES) { 1863 for (ii=0; ii<bs; ii++,value+=stepval) { 1864 for (jj=ii; jj<bs2; jj+=bs) { 1865 bap[jj] += *value++; 1866 } 1867 } 1868 } else { 1869 for (ii=0; ii<bs; ii++,value+=stepval) { 1870 for (jj=ii; jj<bs2; jj+=bs) { 1871 bap[jj] = *value++; 1872 } 1873 } 1874 } 1875 } else { 1876 if (is == ADD_VALUES) { 1877 for (ii=0; ii<bs; ii++,value+=bs+stepval) { 1878 for (jj=0; jj<bs; jj++) { 1879 bap[jj] += value[jj]; 1880 } 1881 bap += bs; 1882 } 1883 } else { 1884 for (ii=0; ii<bs; ii++,value+=bs+stepval) { 1885 for (jj=0; jj<bs; jj++) { 1886 bap[jj] = value[jj]; 1887 } 1888 bap += bs; 1889 } 1890 } 1891 } 1892 goto noinsert2; 1893 } 1894 } 1895 if (nonew == 1) goto noinsert2; 1896 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new blocked index new nonzero block (%D, %D) in the matrix", row, col); 1897 if (A->structure_only) { 1898 MatSeqXAIJReallocateAIJ_structure_only(A,a->mbs,bs2,nrow,row,col,rmax,ai,aj,rp,imax,nonew,MatScalar); 1899 } else { 1900 MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,row,col,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar); 1901 } 1902 N = nrow++ - 1; high++; 1903 /* shift up all the later entries in this row */ 1904 ierr = PetscArraymove(rp+i+1,rp+i,N-i+1);CHKERRQ(ierr); 1905 rp[i] = col; 1906 if (!A->structure_only) { 1907 ierr = PetscArraymove(ap+bs2*(i+1),ap+bs2*i,bs2*(N-i+1));CHKERRQ(ierr); 1908 bap = ap + bs2*i; 1909 if (roworiented) { 1910 for (ii=0; ii<bs; ii++,value+=stepval) { 1911 for (jj=ii; jj<bs2; jj+=bs) { 1912 bap[jj] = *value++; 1913 } 1914 } 1915 } else { 1916 for (ii=0; ii<bs; ii++,value+=stepval) { 1917 for (jj=0; jj<bs; jj++) { 1918 *bap++ = *value++; 1919 } 1920 } 1921 } 1922 } 1923 noinsert2:; 1924 low = i; 1925 } 1926 ailen[row] = nrow; 1927 } 1928 PetscFunctionReturn(0); 1929 } 1930 1931 PetscErrorCode MatAssemblyEnd_SeqBAIJ(Mat A,MatAssemblyType mode) 1932 { 1933 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1934 PetscInt fshift = 0,i,*ai = a->i,*aj = a->j,*imax = a->imax; 1935 PetscInt m = A->rmap->N,*ip,N,*ailen = a->ilen; 1936 PetscErrorCode ierr; 1937 PetscInt mbs = a->mbs,bs2 = a->bs2,rmax = 0; 1938 MatScalar *aa = a->a,*ap; 1939 PetscReal ratio=0.6; 1940 1941 PetscFunctionBegin; 1942 if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0); 1943 1944 if (m) rmax = ailen[0]; 1945 for (i=1; i<mbs; i++) { 1946 /* move each row back by the amount of empty slots (fshift) before it*/ 1947 fshift += imax[i-1] - ailen[i-1]; 1948 rmax = PetscMax(rmax,ailen[i]); 1949 if (fshift) { 1950 ip = aj + ai[i]; 1951 ap = aa + bs2*ai[i]; 1952 N = ailen[i]; 1953 ierr = PetscArraymove(ip-fshift,ip,N);CHKERRQ(ierr); 1954 if (!A->structure_only) { 1955 ierr = PetscArraymove(ap-bs2*fshift,ap,bs2*N);CHKERRQ(ierr); 1956 } 1957 } 1958 ai[i] = ai[i-1] + ailen[i-1]; 1959 } 1960 if (mbs) { 1961 fshift += imax[mbs-1] - ailen[mbs-1]; 1962 ai[mbs] = ai[mbs-1] + ailen[mbs-1]; 1963 } 1964 1965 /* reset ilen and imax for each row */ 1966 a->nonzerorowcnt = 0; 1967 if (A->structure_only) { 1968 ierr = PetscFree2(a->imax,a->ilen);CHKERRQ(ierr); 1969 } else { /* !A->structure_only */ 1970 for (i=0; i<mbs; i++) { 1971 ailen[i] = imax[i] = ai[i+1] - ai[i]; 1972 a->nonzerorowcnt += ((ai[i+1] - ai[i]) > 0); 1973 } 1974 } 1975 a->nz = ai[mbs]; 1976 1977 /* diagonals may have moved, so kill the diagonal pointers */ 1978 a->idiagvalid = PETSC_FALSE; 1979 if (fshift && a->diag) { 1980 ierr = PetscFree(a->diag);CHKERRQ(ierr); 1981 ierr = PetscLogObjectMemory((PetscObject)A,-(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 1982 a->diag = NULL; 1983 } 1984 if (fshift && a->nounused == -1) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB, "Unused space detected in matrix: %D X %D block size %D, %D unneeded", m, A->cmap->n, A->rmap->bs, fshift*bs2); 1985 ierr = PetscInfo5(A,"Matrix size: %D X %D, block size %D; storage space: %D unneeded, %D used\n",m,A->cmap->n,A->rmap->bs,fshift*bs2,a->nz*bs2);CHKERRQ(ierr); 1986 ierr = PetscInfo1(A,"Number of mallocs during MatSetValues is %D\n",a->reallocs);CHKERRQ(ierr); 1987 ierr = PetscInfo1(A,"Most nonzeros blocks in any row is %D\n",rmax);CHKERRQ(ierr); 1988 1989 A->info.mallocs += a->reallocs; 1990 a->reallocs = 0; 1991 A->info.nz_unneeded = (PetscReal)fshift*bs2; 1992 a->rmax = rmax; 1993 1994 if (!A->structure_only) { 1995 ierr = MatCheckCompressedRow(A,a->nonzerorowcnt,&a->compressedrow,a->i,mbs,ratio);CHKERRQ(ierr); 1996 } 1997 PetscFunctionReturn(0); 1998 } 1999 2000 /* 2001 This function returns an array of flags which indicate the locations of contiguous 2002 blocks that should be zeroed. for eg: if bs = 3 and is = [0,1,2,3,5,6,7,8,9] 2003 then the resulting sizes = [3,1,1,3,1] corresponding to sets [(0,1,2),(3),(5),(6,7,8),(9)] 2004 Assume: sizes should be long enough to hold all the values. 2005 */ 2006 static PetscErrorCode MatZeroRows_SeqBAIJ_Check_Blocks(PetscInt idx[],PetscInt n,PetscInt bs,PetscInt sizes[], PetscInt *bs_max) 2007 { 2008 PetscInt i,j,k,row; 2009 PetscBool flg; 2010 2011 PetscFunctionBegin; 2012 for (i=0,j=0; i<n; j++) { 2013 row = idx[i]; 2014 if (row%bs!=0) { /* Not the beginning of a block */ 2015 sizes[j] = 1; 2016 i++; 2017 } else if (i+bs > n) { /* complete block doesn't exist (at idx end) */ 2018 sizes[j] = 1; /* Also makes sure atleast 'bs' values exist for next else */ 2019 i++; 2020 } else { /* Begining of the block, so check if the complete block exists */ 2021 flg = PETSC_TRUE; 2022 for (k=1; k<bs; k++) { 2023 if (row+k != idx[i+k]) { /* break in the block */ 2024 flg = PETSC_FALSE; 2025 break; 2026 } 2027 } 2028 if (flg) { /* No break in the bs */ 2029 sizes[j] = bs; 2030 i += bs; 2031 } else { 2032 sizes[j] = 1; 2033 i++; 2034 } 2035 } 2036 } 2037 *bs_max = j; 2038 PetscFunctionReturn(0); 2039 } 2040 2041 PetscErrorCode MatZeroRows_SeqBAIJ(Mat A,PetscInt is_n,const PetscInt is_idx[],PetscScalar diag,Vec x, Vec b) 2042 { 2043 Mat_SeqBAIJ *baij=(Mat_SeqBAIJ*)A->data; 2044 PetscErrorCode ierr; 2045 PetscInt i,j,k,count,*rows; 2046 PetscInt bs=A->rmap->bs,bs2=baij->bs2,*sizes,row,bs_max; 2047 PetscScalar zero = 0.0; 2048 MatScalar *aa; 2049 const PetscScalar *xx; 2050 PetscScalar *bb; 2051 2052 PetscFunctionBegin; 2053 /* fix right hand side if needed */ 2054 if (x && b) { 2055 ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr); 2056 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 2057 for (i=0; i<is_n; i++) { 2058 bb[is_idx[i]] = diag*xx[is_idx[i]]; 2059 } 2060 ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr); 2061 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 2062 } 2063 2064 /* Make a copy of the IS and sort it */ 2065 /* allocate memory for rows,sizes */ 2066 ierr = PetscMalloc2(is_n,&rows,2*is_n,&sizes);CHKERRQ(ierr); 2067 2068 /* copy IS values to rows, and sort them */ 2069 for (i=0; i<is_n; i++) rows[i] = is_idx[i]; 2070 ierr = PetscSortInt(is_n,rows);CHKERRQ(ierr); 2071 2072 if (baij->keepnonzeropattern) { 2073 for (i=0; i<is_n; i++) sizes[i] = 1; 2074 bs_max = is_n; 2075 } else { 2076 ierr = MatZeroRows_SeqBAIJ_Check_Blocks(rows,is_n,bs,sizes,&bs_max);CHKERRQ(ierr); 2077 A->nonzerostate++; 2078 } 2079 2080 for (i=0,j=0; i<bs_max; j+=sizes[i],i++) { 2081 row = rows[j]; 2082 if (row < 0 || row > A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"row %D out of range",row); 2083 count = (baij->i[row/bs +1] - baij->i[row/bs])*bs; 2084 aa = ((MatScalar*)(baij->a)) + baij->i[row/bs]*bs2 + (row%bs); 2085 if (sizes[i] == bs && !baij->keepnonzeropattern) { 2086 if (diag != (PetscScalar)0.0) { 2087 if (baij->ilen[row/bs] > 0) { 2088 baij->ilen[row/bs] = 1; 2089 baij->j[baij->i[row/bs]] = row/bs; 2090 2091 ierr = PetscArrayzero(aa,count*bs);CHKERRQ(ierr); 2092 } 2093 /* Now insert all the diagonal values for this bs */ 2094 for (k=0; k<bs; k++) { 2095 ierr = (*A->ops->setvalues)(A,1,rows+j+k,1,rows+j+k,&diag,INSERT_VALUES);CHKERRQ(ierr); 2096 } 2097 } else { /* (diag == 0.0) */ 2098 baij->ilen[row/bs] = 0; 2099 } /* end (diag == 0.0) */ 2100 } else { /* (sizes[i] != bs) */ 2101 if (PetscUnlikelyDebug(sizes[i] != 1)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal Error. Value should be 1"); 2102 for (k=0; k<count; k++) { 2103 aa[0] = zero; 2104 aa += bs; 2105 } 2106 if (diag != (PetscScalar)0.0) { 2107 ierr = (*A->ops->setvalues)(A,1,rows+j,1,rows+j,&diag,INSERT_VALUES);CHKERRQ(ierr); 2108 } 2109 } 2110 } 2111 2112 ierr = PetscFree2(rows,sizes);CHKERRQ(ierr); 2113 ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2114 PetscFunctionReturn(0); 2115 } 2116 2117 PetscErrorCode MatZeroRowsColumns_SeqBAIJ(Mat A,PetscInt is_n,const PetscInt is_idx[],PetscScalar diag,Vec x, Vec b) 2118 { 2119 Mat_SeqBAIJ *baij=(Mat_SeqBAIJ*)A->data; 2120 PetscErrorCode ierr; 2121 PetscInt i,j,k,count; 2122 PetscInt bs =A->rmap->bs,bs2=baij->bs2,row,col; 2123 PetscScalar zero = 0.0; 2124 MatScalar *aa; 2125 const PetscScalar *xx; 2126 PetscScalar *bb; 2127 PetscBool *zeroed,vecs = PETSC_FALSE; 2128 2129 PetscFunctionBegin; 2130 /* fix right hand side if needed */ 2131 if (x && b) { 2132 ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr); 2133 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 2134 vecs = PETSC_TRUE; 2135 } 2136 2137 /* zero the columns */ 2138 ierr = PetscCalloc1(A->rmap->n,&zeroed);CHKERRQ(ierr); 2139 for (i=0; i<is_n; i++) { 2140 if (is_idx[i] < 0 || is_idx[i] >= A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"row %D out of range",is_idx[i]); 2141 zeroed[is_idx[i]] = PETSC_TRUE; 2142 } 2143 for (i=0; i<A->rmap->N; i++) { 2144 if (!zeroed[i]) { 2145 row = i/bs; 2146 for (j=baij->i[row]; j<baij->i[row+1]; j++) { 2147 for (k=0; k<bs; k++) { 2148 col = bs*baij->j[j] + k; 2149 if (zeroed[col]) { 2150 aa = ((MatScalar*)(baij->a)) + j*bs2 + (i%bs) + bs*k; 2151 if (vecs) bb[i] -= aa[0]*xx[col]; 2152 aa[0] = 0.0; 2153 } 2154 } 2155 } 2156 } else if (vecs) bb[i] = diag*xx[i]; 2157 } 2158 ierr = PetscFree(zeroed);CHKERRQ(ierr); 2159 if (vecs) { 2160 ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr); 2161 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 2162 } 2163 2164 /* zero the rows */ 2165 for (i=0; i<is_n; i++) { 2166 row = is_idx[i]; 2167 count = (baij->i[row/bs +1] - baij->i[row/bs])*bs; 2168 aa = ((MatScalar*)(baij->a)) + baij->i[row/bs]*bs2 + (row%bs); 2169 for (k=0; k<count; k++) { 2170 aa[0] = zero; 2171 aa += bs; 2172 } 2173 if (diag != (PetscScalar)0.0) { 2174 ierr = (*A->ops->setvalues)(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr); 2175 } 2176 } 2177 ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2178 PetscFunctionReturn(0); 2179 } 2180 2181 PetscErrorCode MatSetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is) 2182 { 2183 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2184 PetscInt *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,lastcol = -1; 2185 PetscInt *imax=a->imax,*ai=a->i,*ailen=a->ilen; 2186 PetscInt *aj =a->j,nonew=a->nonew,bs=A->rmap->bs,brow,bcol; 2187 PetscErrorCode ierr; 2188 PetscInt ridx,cidx,bs2=a->bs2; 2189 PetscBool roworiented=a->roworiented; 2190 MatScalar *ap=NULL,value=0.0,*aa=a->a,*bap; 2191 2192 PetscFunctionBegin; 2193 for (k=0; k<m; k++) { /* loop over added rows */ 2194 row = im[k]; 2195 brow = row/bs; 2196 if (row < 0) continue; 2197 if (PetscUnlikelyDebug(row >= A->rmap->N)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,A->rmap->N-1); 2198 rp = aj + ai[brow]; 2199 if (!A->structure_only) ap = aa + bs2*ai[brow]; 2200 rmax = imax[brow]; 2201 nrow = ailen[brow]; 2202 low = 0; 2203 high = nrow; 2204 for (l=0; l<n; l++) { /* loop over added columns */ 2205 if (in[l] < 0) continue; 2206 if (PetscUnlikelyDebug(in[l] >= A->cmap->n)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],A->cmap->n-1); 2207 col = in[l]; bcol = col/bs; 2208 ridx = row % bs; cidx = col % bs; 2209 if (!A->structure_only) { 2210 if (roworiented) { 2211 value = v[l + k*n]; 2212 } else { 2213 value = v[k + l*m]; 2214 } 2215 } 2216 if (col <= lastcol) low = 0; else high = nrow; 2217 lastcol = col; 2218 while (high-low > 7) { 2219 t = (low+high)/2; 2220 if (rp[t] > bcol) high = t; 2221 else low = t; 2222 } 2223 for (i=low; i<high; i++) { 2224 if (rp[i] > bcol) break; 2225 if (rp[i] == bcol) { 2226 bap = ap + bs2*i + bs*cidx + ridx; 2227 if (!A->structure_only) { 2228 if (is == ADD_VALUES) *bap += value; 2229 else *bap = value; 2230 } 2231 goto noinsert1; 2232 } 2233 } 2234 if (nonew == 1) goto noinsert1; 2235 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col); 2236 if (A->structure_only) { 2237 MatSeqXAIJReallocateAIJ_structure_only(A,a->mbs,bs2,nrow,brow,bcol,rmax,ai,aj,rp,imax,nonew,MatScalar); 2238 } else { 2239 MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar); 2240 } 2241 N = nrow++ - 1; high++; 2242 /* shift up all the later entries in this row */ 2243 ierr = PetscArraymove(rp+i+1,rp+i,N-i+1);CHKERRQ(ierr); 2244 rp[i] = bcol; 2245 if (!A->structure_only) { 2246 ierr = PetscArraymove(ap+bs2*(i+1),ap+bs2*i,bs2*(N-i+1));CHKERRQ(ierr); 2247 ierr = PetscArrayzero(ap+bs2*i,bs2);CHKERRQ(ierr); 2248 ap[bs2*i + bs*cidx + ridx] = value; 2249 } 2250 a->nz++; 2251 A->nonzerostate++; 2252 noinsert1:; 2253 low = i; 2254 } 2255 ailen[brow] = nrow; 2256 } 2257 PetscFunctionReturn(0); 2258 } 2259 2260 PetscErrorCode MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,const MatFactorInfo *info) 2261 { 2262 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)inA->data; 2263 Mat outA; 2264 PetscErrorCode ierr; 2265 PetscBool row_identity,col_identity; 2266 2267 PetscFunctionBegin; 2268 if (info->levels != 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only levels = 0 supported for in-place ILU"); 2269 ierr = ISIdentity(row,&row_identity);CHKERRQ(ierr); 2270 ierr = ISIdentity(col,&col_identity);CHKERRQ(ierr); 2271 if (!row_identity || !col_identity) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for in-place ILU"); 2272 2273 outA = inA; 2274 inA->factortype = MAT_FACTOR_LU; 2275 ierr = PetscFree(inA->solvertype);CHKERRQ(ierr); 2276 ierr = PetscStrallocpy(MATSOLVERPETSC,&inA->solvertype);CHKERRQ(ierr); 2277 2278 ierr = MatMarkDiagonal_SeqBAIJ(inA);CHKERRQ(ierr); 2279 2280 ierr = PetscObjectReference((PetscObject)row);CHKERRQ(ierr); 2281 ierr = ISDestroy(&a->row);CHKERRQ(ierr); 2282 a->row = row; 2283 ierr = PetscObjectReference((PetscObject)col);CHKERRQ(ierr); 2284 ierr = ISDestroy(&a->col);CHKERRQ(ierr); 2285 a->col = col; 2286 2287 /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */ 2288 ierr = ISDestroy(&a->icol);CHKERRQ(ierr); 2289 ierr = ISInvertPermutation(col,PETSC_DECIDE,&a->icol);CHKERRQ(ierr); 2290 ierr = PetscLogObjectParent((PetscObject)inA,(PetscObject)a->icol);CHKERRQ(ierr); 2291 2292 ierr = MatSeqBAIJSetNumericFactorization_inplace(inA,(PetscBool)(row_identity && col_identity));CHKERRQ(ierr); 2293 if (!a->solve_work) { 2294 ierr = PetscMalloc1(inA->rmap->N+inA->rmap->bs,&a->solve_work);CHKERRQ(ierr); 2295 ierr = PetscLogObjectMemory((PetscObject)inA,(inA->rmap->N+inA->rmap->bs)*sizeof(PetscScalar));CHKERRQ(ierr); 2296 } 2297 ierr = MatLUFactorNumeric(outA,inA,info);CHKERRQ(ierr); 2298 PetscFunctionReturn(0); 2299 } 2300 2301 PetscErrorCode MatSeqBAIJSetColumnIndices_SeqBAIJ(Mat mat,PetscInt *indices) 2302 { 2303 Mat_SeqBAIJ *baij = (Mat_SeqBAIJ*)mat->data; 2304 PetscInt i,nz,mbs; 2305 2306 PetscFunctionBegin; 2307 nz = baij->maxnz; 2308 mbs = baij->mbs; 2309 for (i=0; i<nz; i++) { 2310 baij->j[i] = indices[i]; 2311 } 2312 baij->nz = nz; 2313 for (i=0; i<mbs; i++) { 2314 baij->ilen[i] = baij->imax[i]; 2315 } 2316 PetscFunctionReturn(0); 2317 } 2318 2319 /*@ 2320 MatSeqBAIJSetColumnIndices - Set the column indices for all the rows 2321 in the matrix. 2322 2323 Input Parameters: 2324 + mat - the SeqBAIJ matrix 2325 - indices - the column indices 2326 2327 Level: advanced 2328 2329 Notes: 2330 This can be called if you have precomputed the nonzero structure of the 2331 matrix and want to provide it to the matrix object to improve the performance 2332 of the MatSetValues() operation. 2333 2334 You MUST have set the correct numbers of nonzeros per row in the call to 2335 MatCreateSeqBAIJ(), and the columns indices MUST be sorted. 2336 2337 MUST be called before any calls to MatSetValues(); 2338 2339 @*/ 2340 PetscErrorCode MatSeqBAIJSetColumnIndices(Mat mat,PetscInt *indices) 2341 { 2342 PetscErrorCode ierr; 2343 2344 PetscFunctionBegin; 2345 PetscValidHeaderSpecific(mat,MAT_CLASSID,1); 2346 PetscValidPointer(indices,2); 2347 ierr = PetscUseMethod(mat,"MatSeqBAIJSetColumnIndices_C",(Mat,PetscInt*),(mat,indices));CHKERRQ(ierr); 2348 PetscFunctionReturn(0); 2349 } 2350 2351 PetscErrorCode MatGetRowMaxAbs_SeqBAIJ(Mat A,Vec v,PetscInt idx[]) 2352 { 2353 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2354 PetscErrorCode ierr; 2355 PetscInt i,j,n,row,bs,*ai,*aj,mbs; 2356 PetscReal atmp; 2357 PetscScalar *x,zero = 0.0; 2358 MatScalar *aa; 2359 PetscInt ncols,brow,krow,kcol; 2360 2361 PetscFunctionBegin; 2362 if (A->factortype) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 2363 bs = A->rmap->bs; 2364 aa = a->a; 2365 ai = a->i; 2366 aj = a->j; 2367 mbs = a->mbs; 2368 2369 ierr = VecSet(v,zero);CHKERRQ(ierr); 2370 ierr = VecGetArray(v,&x);CHKERRQ(ierr); 2371 ierr = VecGetLocalSize(v,&n);CHKERRQ(ierr); 2372 if (n != A->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Nonconforming matrix and vector"); 2373 for (i=0; i<mbs; i++) { 2374 ncols = ai[1] - ai[0]; ai++; 2375 brow = bs*i; 2376 for (j=0; j<ncols; j++) { 2377 for (kcol=0; kcol<bs; kcol++) { 2378 for (krow=0; krow<bs; krow++) { 2379 atmp = PetscAbsScalar(*aa);aa++; 2380 row = brow + krow; /* row index */ 2381 if (PetscAbsScalar(x[row]) < atmp) {x[row] = atmp; if (idx) idx[row] = bs*(*aj) + kcol;} 2382 } 2383 } 2384 aj++; 2385 } 2386 } 2387 ierr = VecRestoreArray(v,&x);CHKERRQ(ierr); 2388 PetscFunctionReturn(0); 2389 } 2390 2391 PetscErrorCode MatCopy_SeqBAIJ(Mat A,Mat B,MatStructure str) 2392 { 2393 PetscErrorCode ierr; 2394 2395 PetscFunctionBegin; 2396 /* If the two matrices have the same copy implementation, use fast copy. */ 2397 if (str == SAME_NONZERO_PATTERN && (A->ops->copy == B->ops->copy)) { 2398 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2399 Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)B->data; 2400 PetscInt ambs=a->mbs,bmbs=b->mbs,abs=A->rmap->bs,bbs=B->rmap->bs,bs2=abs*abs; 2401 2402 if (a->i[ambs] != b->i[bmbs]) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Number of nonzero blocks in matrices A %D and B %D are different",a->i[ambs],b->i[bmbs]); 2403 if (abs != bbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Block size A %D and B %D are different",abs,bbs); 2404 ierr = PetscArraycpy(b->a,a->a,bs2*a->i[ambs]);CHKERRQ(ierr); 2405 ierr = PetscObjectStateIncrease((PetscObject)B);CHKERRQ(ierr); 2406 } else { 2407 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 2408 } 2409 PetscFunctionReturn(0); 2410 } 2411 2412 PetscErrorCode MatSetUp_SeqBAIJ(Mat A) 2413 { 2414 PetscErrorCode ierr; 2415 2416 PetscFunctionBegin; 2417 ierr = MatSeqBAIJSetPreallocation(A,A->rmap->bs,PETSC_DEFAULT,NULL);CHKERRQ(ierr); 2418 PetscFunctionReturn(0); 2419 } 2420 2421 static PetscErrorCode MatSeqBAIJGetArray_SeqBAIJ(Mat A,PetscScalar *array[]) 2422 { 2423 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2424 2425 PetscFunctionBegin; 2426 *array = a->a; 2427 PetscFunctionReturn(0); 2428 } 2429 2430 static PetscErrorCode MatSeqBAIJRestoreArray_SeqBAIJ(Mat A,PetscScalar *array[]) 2431 { 2432 PetscFunctionBegin; 2433 *array = NULL; 2434 PetscFunctionReturn(0); 2435 } 2436 2437 PetscErrorCode MatAXPYGetPreallocation_SeqBAIJ(Mat Y,Mat X,PetscInt *nnz) 2438 { 2439 PetscInt bs = Y->rmap->bs,mbs = Y->rmap->N/bs; 2440 Mat_SeqBAIJ *x = (Mat_SeqBAIJ*)X->data; 2441 Mat_SeqBAIJ *y = (Mat_SeqBAIJ*)Y->data; 2442 PetscErrorCode ierr; 2443 2444 PetscFunctionBegin; 2445 /* Set the number of nonzeros in the new matrix */ 2446 ierr = MatAXPYGetPreallocation_SeqX_private(mbs,x->i,x->j,y->i,y->j,nnz);CHKERRQ(ierr); 2447 PetscFunctionReturn(0); 2448 } 2449 2450 PetscErrorCode MatAXPY_SeqBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 2451 { 2452 Mat_SeqBAIJ *x = (Mat_SeqBAIJ*)X->data,*y = (Mat_SeqBAIJ*)Y->data; 2453 PetscErrorCode ierr; 2454 PetscInt bs=Y->rmap->bs,bs2=bs*bs; 2455 PetscBLASInt one=1; 2456 2457 PetscFunctionBegin; 2458 if (str == SAME_NONZERO_PATTERN) { 2459 PetscScalar alpha = a; 2460 PetscBLASInt bnz; 2461 ierr = PetscBLASIntCast(x->nz*bs2,&bnz);CHKERRQ(ierr); 2462 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one)); 2463 ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr); 2464 } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */ 2465 ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr); 2466 } else { 2467 Mat B; 2468 PetscInt *nnz; 2469 if (bs != X->rmap->bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrices must have same block size"); 2470 ierr = PetscMalloc1(Y->rmap->N,&nnz);CHKERRQ(ierr); 2471 ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr); 2472 ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr); 2473 ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr); 2474 ierr = MatSetBlockSizesFromMats(B,Y,Y);CHKERRQ(ierr); 2475 ierr = MatSetType(B,(MatType) ((PetscObject)Y)->type_name);CHKERRQ(ierr); 2476 ierr = MatAXPYGetPreallocation_SeqBAIJ(Y,X,nnz);CHKERRQ(ierr); 2477 ierr = MatSeqBAIJSetPreallocation(B,bs,0,nnz);CHKERRQ(ierr); 2478 ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr); 2479 ierr = MatHeaderReplace(Y,&B);CHKERRQ(ierr); 2480 ierr = PetscFree(nnz);CHKERRQ(ierr); 2481 } 2482 PetscFunctionReturn(0); 2483 } 2484 2485 PETSC_INTERN PetscErrorCode MatConjugate_SeqBAIJ(Mat A) 2486 { 2487 #if defined(PETSC_USE_COMPLEX) 2488 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2489 PetscInt i,nz = a->bs2*a->i[a->mbs]; 2490 MatScalar *aa = a->a; 2491 2492 PetscFunctionBegin; 2493 for (i=0; i<nz; i++) aa[i] = PetscConj(aa[i]); 2494 #else 2495 PetscFunctionBegin; 2496 #endif 2497 PetscFunctionReturn(0); 2498 } 2499 2500 PetscErrorCode MatRealPart_SeqBAIJ(Mat A) 2501 { 2502 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2503 PetscInt i,nz = a->bs2*a->i[a->mbs]; 2504 MatScalar *aa = a->a; 2505 2506 PetscFunctionBegin; 2507 for (i=0; i<nz; i++) aa[i] = PetscRealPart(aa[i]); 2508 PetscFunctionReturn(0); 2509 } 2510 2511 PetscErrorCode MatImaginaryPart_SeqBAIJ(Mat A) 2512 { 2513 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2514 PetscInt i,nz = a->bs2*a->i[a->mbs]; 2515 MatScalar *aa = a->a; 2516 2517 PetscFunctionBegin; 2518 for (i=0; i<nz; i++) aa[i] = PetscImaginaryPart(aa[i]); 2519 PetscFunctionReturn(0); 2520 } 2521 2522 /* 2523 Code almost identical to MatGetColumnIJ_SeqAIJ() should share common code 2524 */ 2525 PetscErrorCode MatGetColumnIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *nn,const PetscInt *ia[],const PetscInt *ja[],PetscBool *done) 2526 { 2527 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2528 PetscErrorCode ierr; 2529 PetscInt bs = A->rmap->bs,i,*collengths,*cia,*cja,n = A->cmap->n/bs,m = A->rmap->n/bs; 2530 PetscInt nz = a->i[m],row,*jj,mr,col; 2531 2532 PetscFunctionBegin; 2533 *nn = n; 2534 if (!ia) PetscFunctionReturn(0); 2535 if (symmetric) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not for BAIJ matrices"); 2536 else { 2537 ierr = PetscCalloc1(n,&collengths);CHKERRQ(ierr); 2538 ierr = PetscMalloc1(n+1,&cia);CHKERRQ(ierr); 2539 ierr = PetscMalloc1(nz,&cja);CHKERRQ(ierr); 2540 jj = a->j; 2541 for (i=0; i<nz; i++) { 2542 collengths[jj[i]]++; 2543 } 2544 cia[0] = oshift; 2545 for (i=0; i<n; i++) { 2546 cia[i+1] = cia[i] + collengths[i]; 2547 } 2548 ierr = PetscArrayzero(collengths,n);CHKERRQ(ierr); 2549 jj = a->j; 2550 for (row=0; row<m; row++) { 2551 mr = a->i[row+1] - a->i[row]; 2552 for (i=0; i<mr; i++) { 2553 col = *jj++; 2554 2555 cja[cia[col] + collengths[col]++ - oshift] = row + oshift; 2556 } 2557 } 2558 ierr = PetscFree(collengths);CHKERRQ(ierr); 2559 *ia = cia; *ja = cja; 2560 } 2561 PetscFunctionReturn(0); 2562 } 2563 2564 PetscErrorCode MatRestoreColumnIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *n,const PetscInt *ia[],const PetscInt *ja[],PetscBool *done) 2565 { 2566 PetscErrorCode ierr; 2567 2568 PetscFunctionBegin; 2569 if (!ia) PetscFunctionReturn(0); 2570 ierr = PetscFree(*ia);CHKERRQ(ierr); 2571 ierr = PetscFree(*ja);CHKERRQ(ierr); 2572 PetscFunctionReturn(0); 2573 } 2574 2575 /* 2576 MatGetColumnIJ_SeqBAIJ_Color() and MatRestoreColumnIJ_SeqBAIJ_Color() are customized from 2577 MatGetColumnIJ_SeqBAIJ() and MatRestoreColumnIJ_SeqBAIJ() by adding an output 2578 spidx[], index of a->a, to be used in MatTransposeColoringCreate() and MatFDColoringCreate() 2579 */ 2580 PetscErrorCode MatGetColumnIJ_SeqBAIJ_Color(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *nn,const PetscInt *ia[],const PetscInt *ja[],PetscInt *spidx[],PetscBool *done) 2581 { 2582 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2583 PetscErrorCode ierr; 2584 PetscInt i,*collengths,*cia,*cja,n=a->nbs,m=a->mbs; 2585 PetscInt nz = a->i[m],row,*jj,mr,col; 2586 PetscInt *cspidx; 2587 2588 PetscFunctionBegin; 2589 *nn = n; 2590 if (!ia) PetscFunctionReturn(0); 2591 2592 ierr = PetscCalloc1(n,&collengths);CHKERRQ(ierr); 2593 ierr = PetscMalloc1(n+1,&cia);CHKERRQ(ierr); 2594 ierr = PetscMalloc1(nz,&cja);CHKERRQ(ierr); 2595 ierr = PetscMalloc1(nz,&cspidx);CHKERRQ(ierr); 2596 jj = a->j; 2597 for (i=0; i<nz; i++) { 2598 collengths[jj[i]]++; 2599 } 2600 cia[0] = oshift; 2601 for (i=0; i<n; i++) { 2602 cia[i+1] = cia[i] + collengths[i]; 2603 } 2604 ierr = PetscArrayzero(collengths,n);CHKERRQ(ierr); 2605 jj = a->j; 2606 for (row=0; row<m; row++) { 2607 mr = a->i[row+1] - a->i[row]; 2608 for (i=0; i<mr; i++) { 2609 col = *jj++; 2610 cspidx[cia[col] + collengths[col] - oshift] = a->i[row] + i; /* index of a->j */ 2611 cja[cia[col] + collengths[col]++ - oshift] = row + oshift; 2612 } 2613 } 2614 ierr = PetscFree(collengths);CHKERRQ(ierr); 2615 *ia = cia; 2616 *ja = cja; 2617 *spidx = cspidx; 2618 PetscFunctionReturn(0); 2619 } 2620 2621 PetscErrorCode MatRestoreColumnIJ_SeqBAIJ_Color(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *n,const PetscInt *ia[],const PetscInt *ja[],PetscInt *spidx[],PetscBool *done) 2622 { 2623 PetscErrorCode ierr; 2624 2625 PetscFunctionBegin; 2626 ierr = MatRestoreColumnIJ_SeqBAIJ(A,oshift,symmetric,inodecompressed,n,ia,ja,done);CHKERRQ(ierr); 2627 ierr = PetscFree(*spidx);CHKERRQ(ierr); 2628 PetscFunctionReturn(0); 2629 } 2630 2631 PetscErrorCode MatShift_SeqBAIJ(Mat Y,PetscScalar a) 2632 { 2633 PetscErrorCode ierr; 2634 Mat_SeqBAIJ *aij = (Mat_SeqBAIJ*)Y->data; 2635 2636 PetscFunctionBegin; 2637 if (!Y->preallocated || !aij->nz) { 2638 ierr = MatSeqBAIJSetPreallocation(Y,Y->rmap->bs,1,NULL);CHKERRQ(ierr); 2639 } 2640 ierr = MatShift_Basic(Y,a);CHKERRQ(ierr); 2641 PetscFunctionReturn(0); 2642 } 2643 2644 /* -------------------------------------------------------------------*/ 2645 static struct _MatOps MatOps_Values = {MatSetValues_SeqBAIJ, 2646 MatGetRow_SeqBAIJ, 2647 MatRestoreRow_SeqBAIJ, 2648 MatMult_SeqBAIJ_N, 2649 /* 4*/ MatMultAdd_SeqBAIJ_N, 2650 MatMultTranspose_SeqBAIJ, 2651 MatMultTransposeAdd_SeqBAIJ, 2652 NULL, 2653 NULL, 2654 NULL, 2655 /* 10*/ NULL, 2656 MatLUFactor_SeqBAIJ, 2657 NULL, 2658 NULL, 2659 MatTranspose_SeqBAIJ, 2660 /* 15*/ MatGetInfo_SeqBAIJ, 2661 MatEqual_SeqBAIJ, 2662 MatGetDiagonal_SeqBAIJ, 2663 MatDiagonalScale_SeqBAIJ, 2664 MatNorm_SeqBAIJ, 2665 /* 20*/ NULL, 2666 MatAssemblyEnd_SeqBAIJ, 2667 MatSetOption_SeqBAIJ, 2668 MatZeroEntries_SeqBAIJ, 2669 /* 24*/ MatZeroRows_SeqBAIJ, 2670 NULL, 2671 NULL, 2672 NULL, 2673 NULL, 2674 /* 29*/ MatSetUp_SeqBAIJ, 2675 NULL, 2676 NULL, 2677 NULL, 2678 NULL, 2679 /* 34*/ MatDuplicate_SeqBAIJ, 2680 NULL, 2681 NULL, 2682 MatILUFactor_SeqBAIJ, 2683 NULL, 2684 /* 39*/ MatAXPY_SeqBAIJ, 2685 MatCreateSubMatrices_SeqBAIJ, 2686 MatIncreaseOverlap_SeqBAIJ, 2687 MatGetValues_SeqBAIJ, 2688 MatCopy_SeqBAIJ, 2689 /* 44*/ NULL, 2690 MatScale_SeqBAIJ, 2691 MatShift_SeqBAIJ, 2692 NULL, 2693 MatZeroRowsColumns_SeqBAIJ, 2694 /* 49*/ NULL, 2695 MatGetRowIJ_SeqBAIJ, 2696 MatRestoreRowIJ_SeqBAIJ, 2697 MatGetColumnIJ_SeqBAIJ, 2698 MatRestoreColumnIJ_SeqBAIJ, 2699 /* 54*/ MatFDColoringCreate_SeqXAIJ, 2700 NULL, 2701 NULL, 2702 NULL, 2703 MatSetValuesBlocked_SeqBAIJ, 2704 /* 59*/ MatCreateSubMatrix_SeqBAIJ, 2705 MatDestroy_SeqBAIJ, 2706 MatView_SeqBAIJ, 2707 NULL, 2708 NULL, 2709 /* 64*/ NULL, 2710 NULL, 2711 NULL, 2712 NULL, 2713 NULL, 2714 /* 69*/ MatGetRowMaxAbs_SeqBAIJ, 2715 NULL, 2716 MatConvert_Basic, 2717 NULL, 2718 NULL, 2719 /* 74*/ NULL, 2720 MatFDColoringApply_BAIJ, 2721 NULL, 2722 NULL, 2723 NULL, 2724 /* 79*/ NULL, 2725 NULL, 2726 NULL, 2727 NULL, 2728 MatLoad_SeqBAIJ, 2729 /* 84*/ NULL, 2730 NULL, 2731 NULL, 2732 NULL, 2733 NULL, 2734 /* 89*/ NULL, 2735 NULL, 2736 NULL, 2737 NULL, 2738 NULL, 2739 /* 94*/ NULL, 2740 NULL, 2741 NULL, 2742 NULL, 2743 NULL, 2744 /* 99*/ NULL, 2745 NULL, 2746 NULL, 2747 MatConjugate_SeqBAIJ, 2748 NULL, 2749 /*104*/ NULL, 2750 MatRealPart_SeqBAIJ, 2751 MatImaginaryPart_SeqBAIJ, 2752 NULL, 2753 NULL, 2754 /*109*/ NULL, 2755 NULL, 2756 NULL, 2757 NULL, 2758 MatMissingDiagonal_SeqBAIJ, 2759 /*114*/ NULL, 2760 NULL, 2761 NULL, 2762 NULL, 2763 NULL, 2764 /*119*/ NULL, 2765 NULL, 2766 MatMultHermitianTranspose_SeqBAIJ, 2767 MatMultHermitianTransposeAdd_SeqBAIJ, 2768 NULL, 2769 /*124*/ NULL, 2770 MatGetColumnNorms_SeqBAIJ, 2771 MatInvertBlockDiagonal_SeqBAIJ, 2772 NULL, 2773 NULL, 2774 /*129*/ NULL, 2775 NULL, 2776 NULL, 2777 NULL, 2778 NULL, 2779 /*134*/ NULL, 2780 NULL, 2781 NULL, 2782 NULL, 2783 NULL, 2784 /*139*/ MatSetBlockSizes_Default, 2785 NULL, 2786 NULL, 2787 MatFDColoringSetUp_SeqXAIJ, 2788 NULL, 2789 /*144*/MatCreateMPIMatConcatenateSeqMat_SeqBAIJ, 2790 MatDestroySubMatrices_SeqBAIJ 2791 }; 2792 2793 PetscErrorCode MatStoreValues_SeqBAIJ(Mat mat) 2794 { 2795 Mat_SeqBAIJ *aij = (Mat_SeqBAIJ*)mat->data; 2796 PetscInt nz = aij->i[aij->mbs]*aij->bs2; 2797 PetscErrorCode ierr; 2798 2799 PetscFunctionBegin; 2800 if (aij->nonew != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first"); 2801 2802 /* allocate space for values if not already there */ 2803 if (!aij->saved_values) { 2804 ierr = PetscMalloc1(nz+1,&aij->saved_values);CHKERRQ(ierr); 2805 ierr = PetscLogObjectMemory((PetscObject)mat,(nz+1)*sizeof(PetscScalar));CHKERRQ(ierr); 2806 } 2807 2808 /* copy values over */ 2809 ierr = PetscArraycpy(aij->saved_values,aij->a,nz);CHKERRQ(ierr); 2810 PetscFunctionReturn(0); 2811 } 2812 2813 PetscErrorCode MatRetrieveValues_SeqBAIJ(Mat mat) 2814 { 2815 Mat_SeqBAIJ *aij = (Mat_SeqBAIJ*)mat->data; 2816 PetscErrorCode ierr; 2817 PetscInt nz = aij->i[aij->mbs]*aij->bs2; 2818 2819 PetscFunctionBegin; 2820 if (aij->nonew != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first"); 2821 if (!aij->saved_values) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatStoreValues(A);first"); 2822 2823 /* copy values over */ 2824 ierr = PetscArraycpy(aij->a,aij->saved_values,nz);CHKERRQ(ierr); 2825 PetscFunctionReturn(0); 2826 } 2827 2828 PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqAIJ(Mat, MatType,MatReuse,Mat*); 2829 PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqSBAIJ(Mat, MatType,MatReuse,Mat*); 2830 2831 PetscErrorCode MatSeqBAIJSetPreallocation_SeqBAIJ(Mat B,PetscInt bs,PetscInt nz,PetscInt *nnz) 2832 { 2833 Mat_SeqBAIJ *b; 2834 PetscErrorCode ierr; 2835 PetscInt i,mbs,nbs,bs2; 2836 PetscBool flg = PETSC_FALSE,skipallocation = PETSC_FALSE,realalloc = PETSC_FALSE; 2837 2838 PetscFunctionBegin; 2839 if (nz >= 0 || nnz) realalloc = PETSC_TRUE; 2840 if (nz == MAT_SKIP_ALLOCATION) { 2841 skipallocation = PETSC_TRUE; 2842 nz = 0; 2843 } 2844 2845 ierr = MatSetBlockSize(B,PetscAbs(bs));CHKERRQ(ierr); 2846 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 2847 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 2848 ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr); 2849 2850 B->preallocated = PETSC_TRUE; 2851 2852 mbs = B->rmap->n/bs; 2853 nbs = B->cmap->n/bs; 2854 bs2 = bs*bs; 2855 2856 if (mbs*bs!=B->rmap->n || nbs*bs!=B->cmap->n) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Number rows %D, cols %D must be divisible by blocksize %D",B->rmap->N,B->cmap->n,bs); 2857 2858 if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5; 2859 if (nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nz cannot be less than 0: value %D",nz); 2860 if (nnz) { 2861 for (i=0; i<mbs; i++) { 2862 if (nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be less than 0: local row %D value %D",i,nnz[i]); 2863 if (nnz[i] > nbs) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be greater than block row length: local row %D value %D rowlength %D",i,nnz[i],nbs); 2864 } 2865 } 2866 2867 b = (Mat_SeqBAIJ*)B->data; 2868 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)B),NULL,"Optimize options for SEQBAIJ matrix 2 ","Mat");CHKERRQ(ierr); 2869 ierr = PetscOptionsBool("-mat_no_unroll","Do not optimize for block size (slow)",NULL,flg,&flg,NULL);CHKERRQ(ierr); 2870 ierr = PetscOptionsEnd();CHKERRQ(ierr); 2871 2872 if (!flg) { 2873 switch (bs) { 2874 case 1: 2875 B->ops->mult = MatMult_SeqBAIJ_1; 2876 B->ops->multadd = MatMultAdd_SeqBAIJ_1; 2877 break; 2878 case 2: 2879 B->ops->mult = MatMult_SeqBAIJ_2; 2880 B->ops->multadd = MatMultAdd_SeqBAIJ_2; 2881 break; 2882 case 3: 2883 B->ops->mult = MatMult_SeqBAIJ_3; 2884 B->ops->multadd = MatMultAdd_SeqBAIJ_3; 2885 break; 2886 case 4: 2887 B->ops->mult = MatMult_SeqBAIJ_4; 2888 B->ops->multadd = MatMultAdd_SeqBAIJ_4; 2889 break; 2890 case 5: 2891 B->ops->mult = MatMult_SeqBAIJ_5; 2892 B->ops->multadd = MatMultAdd_SeqBAIJ_5; 2893 break; 2894 case 6: 2895 B->ops->mult = MatMult_SeqBAIJ_6; 2896 B->ops->multadd = MatMultAdd_SeqBAIJ_6; 2897 break; 2898 case 7: 2899 B->ops->mult = MatMult_SeqBAIJ_7; 2900 B->ops->multadd = MatMultAdd_SeqBAIJ_7; 2901 break; 2902 case 9: 2903 { 2904 PetscInt version = 1; 2905 ierr = PetscOptionsGetInt(NULL,((PetscObject)B)->prefix,"-mat_baij_mult_version",&version,NULL);CHKERRQ(ierr); 2906 switch (version) { 2907 #if defined(PETSC_HAVE_IMMINTRIN_H) && defined(__AVX2__) && defined(__FMA__) && defined(PETSC_USE_REAL_DOUBLE) && !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_64BIT_INDICES) 2908 case 1: 2909 B->ops->mult = MatMult_SeqBAIJ_9_AVX2; 2910 B->ops->multadd = MatMultAdd_SeqBAIJ_9_AVX2; 2911 ierr = PetscInfo1((PetscObject)B,"Using AVX2 for MatMult for BAIJ for blocksize %D\n",bs);CHKERRQ(ierr); 2912 break; 2913 #endif 2914 default: 2915 B->ops->mult = MatMult_SeqBAIJ_N; 2916 B->ops->multadd = MatMultAdd_SeqBAIJ_N; 2917 ierr = PetscInfo1((PetscObject)B,"Using BLAS for MatMult for BAIJ for blocksize %D\n",bs);CHKERRQ(ierr); 2918 break; 2919 } 2920 break; 2921 } 2922 case 11: 2923 B->ops->mult = MatMult_SeqBAIJ_11; 2924 B->ops->multadd = MatMultAdd_SeqBAIJ_11; 2925 break; 2926 case 12: 2927 { 2928 PetscInt version = 1; 2929 ierr = PetscOptionsGetInt(NULL,((PetscObject)B)->prefix,"-mat_baij_mult_version",&version,NULL);CHKERRQ(ierr); 2930 switch (version) { 2931 case 1: 2932 B->ops->mult = MatMult_SeqBAIJ_12_ver1; 2933 B->ops->multadd = MatMultAdd_SeqBAIJ_12_ver1; 2934 ierr = PetscInfo2((PetscObject)B,"Using version %D of MatMult for BAIJ for blocksize %D\n",version,bs);CHKERRQ(ierr); 2935 break; 2936 case 2: 2937 B->ops->mult = MatMult_SeqBAIJ_12_ver2; 2938 B->ops->multadd = MatMultAdd_SeqBAIJ_12_ver2; 2939 ierr = PetscInfo2((PetscObject)B,"Using version %D of MatMult for BAIJ for blocksize %D\n",version,bs);CHKERRQ(ierr); 2940 break; 2941 #if defined(PETSC_HAVE_IMMINTRIN_H) && defined(__AVX2__) && defined(__FMA__) && defined(PETSC_USE_REAL_DOUBLE) && !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_64BIT_INDICES) 2942 case 3: 2943 B->ops->mult = MatMult_SeqBAIJ_12_AVX2; 2944 B->ops->multadd = MatMultAdd_SeqBAIJ_12_ver1; 2945 ierr = PetscInfo1((PetscObject)B,"Using AVX2 for MatMult for BAIJ for blocksize %D\n",bs);CHKERRQ(ierr); 2946 break; 2947 #endif 2948 default: 2949 B->ops->mult = MatMult_SeqBAIJ_N; 2950 B->ops->multadd = MatMultAdd_SeqBAIJ_N; 2951 ierr = PetscInfo1((PetscObject)B,"Using BLAS for MatMult for BAIJ for blocksize %D\n",bs);CHKERRQ(ierr); 2952 break; 2953 } 2954 break; 2955 } 2956 case 15: 2957 { 2958 PetscInt version = 1; 2959 ierr = PetscOptionsGetInt(NULL,((PetscObject)B)->prefix,"-mat_baij_mult_version",&version,NULL);CHKERRQ(ierr); 2960 switch (version) { 2961 case 1: 2962 B->ops->mult = MatMult_SeqBAIJ_15_ver1; 2963 ierr = PetscInfo2((PetscObject)B,"Using version %D of MatMult for BAIJ for blocksize %D\n",version,bs);CHKERRQ(ierr); 2964 break; 2965 case 2: 2966 B->ops->mult = MatMult_SeqBAIJ_15_ver2; 2967 ierr = PetscInfo2((PetscObject)B,"Using version %D of MatMult for BAIJ for blocksize %D\n",version,bs);CHKERRQ(ierr); 2968 break; 2969 case 3: 2970 B->ops->mult = MatMult_SeqBAIJ_15_ver3; 2971 ierr = PetscInfo2((PetscObject)B,"Using version %D of MatMult for BAIJ for blocksize %D\n",version,bs);CHKERRQ(ierr); 2972 break; 2973 case 4: 2974 B->ops->mult = MatMult_SeqBAIJ_15_ver4; 2975 ierr = PetscInfo2((PetscObject)B,"Using version %D of MatMult for BAIJ for blocksize %D\n",version,bs);CHKERRQ(ierr); 2976 break; 2977 default: 2978 B->ops->mult = MatMult_SeqBAIJ_N; 2979 ierr = PetscInfo1((PetscObject)B,"Using BLAS for MatMult for BAIJ for blocksize %D\n",bs);CHKERRQ(ierr); 2980 break; 2981 } 2982 B->ops->multadd = MatMultAdd_SeqBAIJ_N; 2983 break; 2984 } 2985 default: 2986 B->ops->mult = MatMult_SeqBAIJ_N; 2987 B->ops->multadd = MatMultAdd_SeqBAIJ_N; 2988 ierr = PetscInfo1((PetscObject)B,"Using BLAS for MatMult for BAIJ for blocksize %D\n",bs);CHKERRQ(ierr); 2989 break; 2990 } 2991 } 2992 B->ops->sor = MatSOR_SeqBAIJ; 2993 b->mbs = mbs; 2994 b->nbs = nbs; 2995 if (!skipallocation) { 2996 if (!b->imax) { 2997 ierr = PetscMalloc2(mbs,&b->imax,mbs,&b->ilen);CHKERRQ(ierr); 2998 ierr = PetscLogObjectMemory((PetscObject)B,2*mbs*sizeof(PetscInt));CHKERRQ(ierr); 2999 3000 b->free_imax_ilen = PETSC_TRUE; 3001 } 3002 /* b->ilen will count nonzeros in each block row so far. */ 3003 for (i=0; i<mbs; i++) b->ilen[i] = 0; 3004 if (!nnz) { 3005 if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5; 3006 else if (nz < 0) nz = 1; 3007 nz = PetscMin(nz,nbs); 3008 for (i=0; i<mbs; i++) b->imax[i] = nz; 3009 ierr = PetscIntMultError(nz,mbs,&nz);CHKERRQ(ierr); 3010 } else { 3011 PetscInt64 nz64 = 0; 3012 for (i=0; i<mbs; i++) {b->imax[i] = nnz[i]; nz64 += nnz[i];} 3013 ierr = PetscIntCast(nz64,&nz);CHKERRQ(ierr); 3014 } 3015 3016 /* allocate the matrix space */ 3017 ierr = MatSeqXAIJFreeAIJ(B,&b->a,&b->j,&b->i);CHKERRQ(ierr); 3018 if (B->structure_only) { 3019 ierr = PetscMalloc1(nz,&b->j);CHKERRQ(ierr); 3020 ierr = PetscMalloc1(B->rmap->N+1,&b->i);CHKERRQ(ierr); 3021 ierr = PetscLogObjectMemory((PetscObject)B,(B->rmap->N+1)*sizeof(PetscInt)+nz*sizeof(PetscInt));CHKERRQ(ierr); 3022 } else { 3023 PetscInt nzbs2 = 0; 3024 ierr = PetscIntMultError(nz,bs2,&nzbs2);CHKERRQ(ierr); 3025 ierr = PetscMalloc3(nzbs2,&b->a,nz,&b->j,B->rmap->N+1,&b->i);CHKERRQ(ierr); 3026 ierr = PetscLogObjectMemory((PetscObject)B,(B->rmap->N+1)*sizeof(PetscInt)+nz*(bs2*sizeof(PetscScalar)+sizeof(PetscInt)));CHKERRQ(ierr); 3027 ierr = PetscArrayzero(b->a,nz*bs2);CHKERRQ(ierr); 3028 } 3029 ierr = PetscArrayzero(b->j,nz);CHKERRQ(ierr); 3030 3031 if (B->structure_only) { 3032 b->singlemalloc = PETSC_FALSE; 3033 b->free_a = PETSC_FALSE; 3034 } else { 3035 b->singlemalloc = PETSC_TRUE; 3036 b->free_a = PETSC_TRUE; 3037 } 3038 b->free_ij = PETSC_TRUE; 3039 3040 b->i[0] = 0; 3041 for (i=1; i<mbs+1; i++) { 3042 b->i[i] = b->i[i-1] + b->imax[i-1]; 3043 } 3044 3045 } else { 3046 b->free_a = PETSC_FALSE; 3047 b->free_ij = PETSC_FALSE; 3048 } 3049 3050 b->bs2 = bs2; 3051 b->mbs = mbs; 3052 b->nz = 0; 3053 b->maxnz = nz; 3054 B->info.nz_unneeded = (PetscReal)b->maxnz*bs2; 3055 B->was_assembled = PETSC_FALSE; 3056 B->assembled = PETSC_FALSE; 3057 if (realalloc) {ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);} 3058 PetscFunctionReturn(0); 3059 } 3060 3061 PetscErrorCode MatSeqBAIJSetPreallocationCSR_SeqBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[]) 3062 { 3063 PetscInt i,m,nz,nz_max=0,*nnz; 3064 PetscScalar *values=NULL; 3065 PetscBool roworiented = ((Mat_SeqBAIJ*)B->data)->roworiented; 3066 PetscErrorCode ierr; 3067 3068 PetscFunctionBegin; 3069 if (bs < 1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive but it is %D",bs); 3070 ierr = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr); 3071 ierr = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr); 3072 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 3073 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 3074 ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr); 3075 m = B->rmap->n/bs; 3076 3077 if (ii[0] != 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE, "ii[0] must be 0 but it is %D",ii[0]); 3078 ierr = PetscMalloc1(m+1, &nnz);CHKERRQ(ierr); 3079 for (i=0; i<m; i++) { 3080 nz = ii[i+1]- ii[i]; 3081 if (nz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE, "Local row %D has a negative number of columns %D",i,nz); 3082 nz_max = PetscMax(nz_max, nz); 3083 nnz[i] = nz; 3084 } 3085 ierr = MatSeqBAIJSetPreallocation(B,bs,0,nnz);CHKERRQ(ierr); 3086 ierr = PetscFree(nnz);CHKERRQ(ierr); 3087 3088 values = (PetscScalar*)V; 3089 if (!values) { 3090 ierr = PetscCalloc1(bs*bs*(nz_max+1),&values);CHKERRQ(ierr); 3091 } 3092 for (i=0; i<m; i++) { 3093 PetscInt ncols = ii[i+1] - ii[i]; 3094 const PetscInt *icols = jj + ii[i]; 3095 if (bs == 1 || !roworiented) { 3096 const PetscScalar *svals = values + (V ? (bs*bs*ii[i]) : 0); 3097 ierr = MatSetValuesBlocked_SeqBAIJ(B,1,&i,ncols,icols,svals,INSERT_VALUES);CHKERRQ(ierr); 3098 } else { 3099 PetscInt j; 3100 for (j=0; j<ncols; j++) { 3101 const PetscScalar *svals = values + (V ? (bs*bs*(ii[i]+j)) : 0); 3102 ierr = MatSetValuesBlocked_SeqBAIJ(B,1,&i,1,&icols[j],svals,INSERT_VALUES);CHKERRQ(ierr); 3103 } 3104 } 3105 } 3106 if (!V) { ierr = PetscFree(values);CHKERRQ(ierr); } 3107 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3108 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3109 ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 3110 PetscFunctionReturn(0); 3111 } 3112 3113 /*@C 3114 MatSeqBAIJGetArray - gives access to the array where the data for a MATSEQBAIJ matrix is stored 3115 3116 Not Collective 3117 3118 Input Parameter: 3119 . mat - a MATSEQBAIJ matrix 3120 3121 Output Parameter: 3122 . array - pointer to the data 3123 3124 Level: intermediate 3125 3126 .seealso: MatSeqBAIJRestoreArray(), MatSeqAIJGetArray(), MatSeqAIJRestoreArray() 3127 @*/ 3128 PetscErrorCode MatSeqBAIJGetArray(Mat A,PetscScalar **array) 3129 { 3130 PetscErrorCode ierr; 3131 3132 PetscFunctionBegin; 3133 ierr = PetscUseMethod(A,"MatSeqBAIJGetArray_C",(Mat,PetscScalar**),(A,array));CHKERRQ(ierr); 3134 PetscFunctionReturn(0); 3135 } 3136 3137 /*@C 3138 MatSeqBAIJRestoreArray - returns access to the array where the data for a MATSEQBAIJ matrix is stored obtained by MatSeqBAIJGetArray() 3139 3140 Not Collective 3141 3142 Input Parameters: 3143 + mat - a MATSEQBAIJ matrix 3144 - array - pointer to the data 3145 3146 Level: intermediate 3147 3148 .seealso: MatSeqBAIJGetArray(), MatSeqAIJGetArray(), MatSeqAIJRestoreArray() 3149 @*/ 3150 PetscErrorCode MatSeqBAIJRestoreArray(Mat A,PetscScalar **array) 3151 { 3152 PetscErrorCode ierr; 3153 3154 PetscFunctionBegin; 3155 ierr = PetscUseMethod(A,"MatSeqBAIJRestoreArray_C",(Mat,PetscScalar**),(A,array));CHKERRQ(ierr); 3156 PetscFunctionReturn(0); 3157 } 3158 3159 /*MC 3160 MATSEQBAIJ - MATSEQBAIJ = "seqbaij" - A matrix type to be used for sequential block sparse matrices, based on 3161 block sparse compressed row format. 3162 3163 Options Database Keys: 3164 + -mat_type seqbaij - sets the matrix type to "seqbaij" during a call to MatSetFromOptions() 3165 - -mat_baij_mult_version version - indicate the version of the matrix-vector product to use (0 often indicates using BLAS) 3166 3167 Level: beginner 3168 3169 Notes: 3170 MatSetOptions(,MAT_STRUCTURE_ONLY,PETSC_TRUE) may be called for this matrix type. In this no 3171 space is allocated for the nonzero entries and any entries passed with MatSetValues() are ignored 3172 3173 Run with -info to see what version of the matrix-vector product is being used 3174 3175 .seealso: MatCreateSeqBAIJ() 3176 M*/ 3177 3178 PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqBSTRM(Mat, MatType,MatReuse,Mat*); 3179 3180 PETSC_EXTERN PetscErrorCode MatCreate_SeqBAIJ(Mat B) 3181 { 3182 PetscErrorCode ierr; 3183 PetscMPIInt size; 3184 Mat_SeqBAIJ *b; 3185 3186 PetscFunctionBegin; 3187 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRMPI(ierr); 3188 if (size > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Comm must be of size 1"); 3189 3190 ierr = PetscNewLog(B,&b);CHKERRQ(ierr); 3191 B->data = (void*)b; 3192 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 3193 3194 b->row = NULL; 3195 b->col = NULL; 3196 b->icol = NULL; 3197 b->reallocs = 0; 3198 b->saved_values = NULL; 3199 3200 b->roworiented = PETSC_TRUE; 3201 b->nonew = 0; 3202 b->diag = NULL; 3203 B->spptr = NULL; 3204 B->info.nz_unneeded = (PetscReal)b->maxnz*b->bs2; 3205 b->keepnonzeropattern = PETSC_FALSE; 3206 3207 ierr = PetscObjectComposeFunction((PetscObject)B,"MatSeqBAIJGetArray_C",MatSeqBAIJGetArray_SeqBAIJ);CHKERRQ(ierr); 3208 ierr = PetscObjectComposeFunction((PetscObject)B,"MatSeqBAIJRestoreArray_C",MatSeqBAIJRestoreArray_SeqBAIJ);CHKERRQ(ierr); 3209 ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_SeqBAIJ);CHKERRQ(ierr); 3210 ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_SeqBAIJ);CHKERRQ(ierr); 3211 ierr = PetscObjectComposeFunction((PetscObject)B,"MatSeqBAIJSetColumnIndices_C",MatSeqBAIJSetColumnIndices_SeqBAIJ);CHKERRQ(ierr); 3212 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqbaij_seqaij_C",MatConvert_SeqBAIJ_SeqAIJ);CHKERRQ(ierr); 3213 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqbaij_seqsbaij_C",MatConvert_SeqBAIJ_SeqSBAIJ);CHKERRQ(ierr); 3214 ierr = PetscObjectComposeFunction((PetscObject)B,"MatSeqBAIJSetPreallocation_C",MatSeqBAIJSetPreallocation_SeqBAIJ);CHKERRQ(ierr); 3215 ierr = PetscObjectComposeFunction((PetscObject)B,"MatSeqBAIJSetPreallocationCSR_C",MatSeqBAIJSetPreallocationCSR_SeqBAIJ);CHKERRQ(ierr); 3216 ierr = PetscObjectComposeFunction((PetscObject)B,"MatIsTranspose_C",MatIsTranspose_SeqBAIJ);CHKERRQ(ierr); 3217 #if defined(PETSC_HAVE_HYPRE) 3218 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqbaij_hypre_C",MatConvert_AIJ_HYPRE);CHKERRQ(ierr); 3219 #endif 3220 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqbaij_is_C",MatConvert_XAIJ_IS);CHKERRQ(ierr); 3221 ierr = PetscObjectChangeTypeName((PetscObject)B,MATSEQBAIJ);CHKERRQ(ierr); 3222 PetscFunctionReturn(0); 3223 } 3224 3225 PetscErrorCode MatDuplicateNoCreate_SeqBAIJ(Mat C,Mat A,MatDuplicateOption cpvalues,PetscBool mallocmatspace) 3226 { 3227 Mat_SeqBAIJ *c = (Mat_SeqBAIJ*)C->data,*a = (Mat_SeqBAIJ*)A->data; 3228 PetscErrorCode ierr; 3229 PetscInt i,mbs = a->mbs,nz = a->nz,bs2 = a->bs2; 3230 3231 PetscFunctionBegin; 3232 if (a->i[mbs] != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Corrupt matrix"); 3233 3234 if (cpvalues == MAT_SHARE_NONZERO_PATTERN) { 3235 c->imax = a->imax; 3236 c->ilen = a->ilen; 3237 c->free_imax_ilen = PETSC_FALSE; 3238 } else { 3239 ierr = PetscMalloc2(mbs,&c->imax,mbs,&c->ilen);CHKERRQ(ierr); 3240 ierr = PetscLogObjectMemory((PetscObject)C,2*mbs*sizeof(PetscInt));CHKERRQ(ierr); 3241 for (i=0; i<mbs; i++) { 3242 c->imax[i] = a->imax[i]; 3243 c->ilen[i] = a->ilen[i]; 3244 } 3245 c->free_imax_ilen = PETSC_TRUE; 3246 } 3247 3248 /* allocate the matrix space */ 3249 if (mallocmatspace) { 3250 if (cpvalues == MAT_SHARE_NONZERO_PATTERN) { 3251 ierr = PetscCalloc1(bs2*nz,&c->a);CHKERRQ(ierr); 3252 ierr = PetscLogObjectMemory((PetscObject)C,a->i[mbs]*bs2*sizeof(PetscScalar));CHKERRQ(ierr); 3253 3254 c->i = a->i; 3255 c->j = a->j; 3256 c->singlemalloc = PETSC_FALSE; 3257 c->free_a = PETSC_TRUE; 3258 c->free_ij = PETSC_FALSE; 3259 c->parent = A; 3260 C->preallocated = PETSC_TRUE; 3261 C->assembled = PETSC_TRUE; 3262 3263 ierr = PetscObjectReference((PetscObject)A);CHKERRQ(ierr); 3264 ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 3265 ierr = MatSetOption(C,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 3266 } else { 3267 ierr = PetscMalloc3(bs2*nz,&c->a,nz,&c->j,mbs+1,&c->i);CHKERRQ(ierr); 3268 ierr = PetscLogObjectMemory((PetscObject)C,a->i[mbs]*(bs2*sizeof(PetscScalar)+sizeof(PetscInt))+(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 3269 3270 c->singlemalloc = PETSC_TRUE; 3271 c->free_a = PETSC_TRUE; 3272 c->free_ij = PETSC_TRUE; 3273 3274 ierr = PetscArraycpy(c->i,a->i,mbs+1);CHKERRQ(ierr); 3275 if (mbs > 0) { 3276 ierr = PetscArraycpy(c->j,a->j,nz);CHKERRQ(ierr); 3277 if (cpvalues == MAT_COPY_VALUES) { 3278 ierr = PetscArraycpy(c->a,a->a,bs2*nz);CHKERRQ(ierr); 3279 } else { 3280 ierr = PetscArrayzero(c->a,bs2*nz);CHKERRQ(ierr); 3281 } 3282 } 3283 C->preallocated = PETSC_TRUE; 3284 C->assembled = PETSC_TRUE; 3285 } 3286 } 3287 3288 c->roworiented = a->roworiented; 3289 c->nonew = a->nonew; 3290 3291 ierr = PetscLayoutReference(A->rmap,&C->rmap);CHKERRQ(ierr); 3292 ierr = PetscLayoutReference(A->cmap,&C->cmap);CHKERRQ(ierr); 3293 3294 c->bs2 = a->bs2; 3295 c->mbs = a->mbs; 3296 c->nbs = a->nbs; 3297 3298 if (a->diag) { 3299 if (cpvalues == MAT_SHARE_NONZERO_PATTERN) { 3300 c->diag = a->diag; 3301 c->free_diag = PETSC_FALSE; 3302 } else { 3303 ierr = PetscMalloc1(mbs+1,&c->diag);CHKERRQ(ierr); 3304 ierr = PetscLogObjectMemory((PetscObject)C,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 3305 for (i=0; i<mbs; i++) c->diag[i] = a->diag[i]; 3306 c->free_diag = PETSC_TRUE; 3307 } 3308 } else c->diag = NULL; 3309 3310 c->nz = a->nz; 3311 c->maxnz = a->nz; /* Since we allocate exactly the right amount */ 3312 c->solve_work = NULL; 3313 c->mult_work = NULL; 3314 c->sor_workt = NULL; 3315 c->sor_work = NULL; 3316 3317 c->compressedrow.use = a->compressedrow.use; 3318 c->compressedrow.nrows = a->compressedrow.nrows; 3319 if (a->compressedrow.use) { 3320 i = a->compressedrow.nrows; 3321 ierr = PetscMalloc2(i+1,&c->compressedrow.i,i+1,&c->compressedrow.rindex);CHKERRQ(ierr); 3322 ierr = PetscLogObjectMemory((PetscObject)C,(2*i+1)*sizeof(PetscInt));CHKERRQ(ierr); 3323 ierr = PetscArraycpy(c->compressedrow.i,a->compressedrow.i,i+1);CHKERRQ(ierr); 3324 ierr = PetscArraycpy(c->compressedrow.rindex,a->compressedrow.rindex,i);CHKERRQ(ierr); 3325 } else { 3326 c->compressedrow.use = PETSC_FALSE; 3327 c->compressedrow.i = NULL; 3328 c->compressedrow.rindex = NULL; 3329 } 3330 C->nonzerostate = A->nonzerostate; 3331 3332 ierr = PetscFunctionListDuplicate(((PetscObject)A)->qlist,&((PetscObject)C)->qlist);CHKERRQ(ierr); 3333 PetscFunctionReturn(0); 3334 } 3335 3336 PetscErrorCode MatDuplicate_SeqBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B) 3337 { 3338 PetscErrorCode ierr; 3339 3340 PetscFunctionBegin; 3341 ierr = MatCreate(PetscObjectComm((PetscObject)A),B);CHKERRQ(ierr); 3342 ierr = MatSetSizes(*B,A->rmap->N,A->cmap->n,A->rmap->N,A->cmap->n);CHKERRQ(ierr); 3343 ierr = MatSetType(*B,MATSEQBAIJ);CHKERRQ(ierr); 3344 ierr = MatDuplicateNoCreate_SeqBAIJ(*B,A,cpvalues,PETSC_TRUE);CHKERRQ(ierr); 3345 PetscFunctionReturn(0); 3346 } 3347 3348 /* Used for both SeqBAIJ and SeqSBAIJ matrices */ 3349 PetscErrorCode MatLoad_SeqBAIJ_Binary(Mat mat,PetscViewer viewer) 3350 { 3351 PetscInt header[4],M,N,nz,bs,m,n,mbs,nbs,rows,cols,sum,i,j,k; 3352 PetscInt *rowidxs,*colidxs; 3353 PetscScalar *matvals; 3354 PetscErrorCode ierr; 3355 3356 PetscFunctionBegin; 3357 ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr); 3358 3359 /* read matrix header */ 3360 ierr = PetscViewerBinaryRead(viewer,header,4,NULL,PETSC_INT);CHKERRQ(ierr); 3361 if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Not a matrix object in file"); 3362 M = header[1]; N = header[2]; nz = header[3]; 3363 if (M < 0) SETERRQ1(PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Matrix row size (%D) in file is negative",M); 3364 if (N < 0) SETERRQ1(PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Matrix column size (%D) in file is negative",N); 3365 if (nz < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format on disk, cannot load as SeqBAIJ"); 3366 3367 /* set block sizes from the viewer's .info file */ 3368 ierr = MatLoad_Binary_BlockSizes(mat,viewer);CHKERRQ(ierr); 3369 /* set local and global sizes if not set already */ 3370 if (mat->rmap->n < 0) mat->rmap->n = M; 3371 if (mat->cmap->n < 0) mat->cmap->n = N; 3372 if (mat->rmap->N < 0) mat->rmap->N = M; 3373 if (mat->cmap->N < 0) mat->cmap->N = N; 3374 ierr = PetscLayoutSetUp(mat->rmap);CHKERRQ(ierr); 3375 ierr = PetscLayoutSetUp(mat->cmap);CHKERRQ(ierr); 3376 3377 /* check if the matrix sizes are correct */ 3378 ierr = MatGetSize(mat,&rows,&cols);CHKERRQ(ierr); 3379 if (M != rows || N != cols) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Matrix in file of different sizes (%D, %D) than the input matrix (%D, %D)",M,N,rows,cols); 3380 ierr = MatGetBlockSize(mat,&bs);CHKERRQ(ierr); 3381 ierr = MatGetLocalSize(mat,&m,&n);CHKERRQ(ierr); 3382 mbs = m/bs; nbs = n/bs; 3383 3384 /* read in row lengths, column indices and nonzero values */ 3385 ierr = PetscMalloc1(m+1,&rowidxs);CHKERRQ(ierr); 3386 ierr = PetscViewerBinaryRead(viewer,rowidxs+1,m,NULL,PETSC_INT);CHKERRQ(ierr); 3387 rowidxs[0] = 0; for (i=0; i<m; i++) rowidxs[i+1] += rowidxs[i]; 3388 sum = rowidxs[m]; 3389 if (sum != nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Inconsistent matrix data in file: nonzeros = %D, sum-row-lengths = %D\n",nz,sum); 3390 3391 /* read in column indices and nonzero values */ 3392 ierr = PetscMalloc2(rowidxs[m],&colidxs,nz,&matvals);CHKERRQ(ierr); 3393 ierr = PetscViewerBinaryRead(viewer,colidxs,rowidxs[m],NULL,PETSC_INT);CHKERRQ(ierr); 3394 ierr = PetscViewerBinaryRead(viewer,matvals,rowidxs[m],NULL,PETSC_SCALAR);CHKERRQ(ierr); 3395 3396 { /* preallocate matrix storage */ 3397 PetscBT bt; /* helper bit set to count nonzeros */ 3398 PetscInt *nnz; 3399 PetscBool sbaij; 3400 3401 ierr = PetscBTCreate(nbs,&bt);CHKERRQ(ierr); 3402 ierr = PetscCalloc1(mbs,&nnz);CHKERRQ(ierr); 3403 ierr = PetscObjectTypeCompare((PetscObject)mat,MATSEQSBAIJ,&sbaij);CHKERRQ(ierr); 3404 for (i=0; i<mbs; i++) { 3405 ierr = PetscBTMemzero(nbs,bt);CHKERRQ(ierr); 3406 for (k=0; k<bs; k++) { 3407 PetscInt row = bs*i + k; 3408 for (j=rowidxs[row]; j<rowidxs[row+1]; j++) { 3409 PetscInt col = colidxs[j]; 3410 if (!sbaij || col >= row) 3411 if (!PetscBTLookupSet(bt,col/bs)) nnz[i]++; 3412 } 3413 } 3414 } 3415 ierr = PetscBTDestroy(&bt);CHKERRQ(ierr); 3416 ierr = MatSeqBAIJSetPreallocation(mat,bs,0,nnz);CHKERRQ(ierr); 3417 ierr = MatSeqSBAIJSetPreallocation(mat,bs,0,nnz);CHKERRQ(ierr); 3418 ierr = PetscFree(nnz);CHKERRQ(ierr); 3419 } 3420 3421 /* store matrix values */ 3422 for (i=0; i<m; i++) { 3423 PetscInt row = i, s = rowidxs[i], e = rowidxs[i+1]; 3424 ierr = (*mat->ops->setvalues)(mat,1,&row,e-s,colidxs+s,matvals+s,INSERT_VALUES);CHKERRQ(ierr); 3425 } 3426 3427 ierr = PetscFree(rowidxs);CHKERRQ(ierr); 3428 ierr = PetscFree2(colidxs,matvals);CHKERRQ(ierr); 3429 ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3430 ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3431 PetscFunctionReturn(0); 3432 } 3433 3434 PetscErrorCode MatLoad_SeqBAIJ(Mat mat,PetscViewer viewer) 3435 { 3436 PetscErrorCode ierr; 3437 PetscBool isbinary; 3438 3439 PetscFunctionBegin; 3440 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 3441 if (!isbinary) SETERRQ2(PetscObjectComm((PetscObject)viewer),PETSC_ERR_SUP,"Viewer type %s not yet supported for reading %s matrices",((PetscObject)viewer)->type_name,((PetscObject)mat)->type_name); 3442 ierr = MatLoad_SeqBAIJ_Binary(mat,viewer);CHKERRQ(ierr); 3443 PetscFunctionReturn(0); 3444 } 3445 3446 /*@C 3447 MatCreateSeqBAIJ - Creates a sparse matrix in block AIJ (block 3448 compressed row) format. For good matrix assembly performance the 3449 user should preallocate the matrix storage by setting the parameter nz 3450 (or the array nnz). By setting these parameters accurately, performance 3451 during matrix assembly can be increased by more than a factor of 50. 3452 3453 Collective 3454 3455 Input Parameters: 3456 + comm - MPI communicator, set to PETSC_COMM_SELF 3457 . bs - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row 3458 blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs() 3459 . m - number of rows 3460 . n - number of columns 3461 . nz - number of nonzero blocks per block row (same for all rows) 3462 - nnz - array containing the number of nonzero blocks in the various block rows 3463 (possibly different for each block row) or NULL 3464 3465 Output Parameter: 3466 . A - the matrix 3467 3468 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 3469 MatXXXXSetPreallocation() paradigm instead of this routine directly. 3470 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 3471 3472 Options Database Keys: 3473 + -mat_no_unroll - uses code that does not unroll the loops in the 3474 block calculations (much slower) 3475 - -mat_block_size - size of the blocks to use 3476 3477 Level: intermediate 3478 3479 Notes: 3480 The number of rows and columns must be divisible by blocksize. 3481 3482 If the nnz parameter is given then the nz parameter is ignored 3483 3484 A nonzero block is any block that as 1 or more nonzeros in it 3485 3486 The block AIJ format is fully compatible with standard Fortran 77 3487 storage. That is, the stored row and column indices can begin at 3488 either one (as in Fortran) or zero. See the users' manual for details. 3489 3490 Specify the preallocated storage with either nz or nnz (not both). 3491 Set nz=PETSC_DEFAULT and nnz=NULL for PETSc to control dynamic memory 3492 allocation. See Users-Manual: ch_mat for details. 3493 matrices. 3494 3495 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateBAIJ() 3496 @*/ 3497 PetscErrorCode MatCreateSeqBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A) 3498 { 3499 PetscErrorCode ierr; 3500 3501 PetscFunctionBegin; 3502 ierr = MatCreate(comm,A);CHKERRQ(ierr); 3503 ierr = MatSetSizes(*A,m,n,m,n);CHKERRQ(ierr); 3504 ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr); 3505 ierr = MatSeqBAIJSetPreallocation(*A,bs,nz,(PetscInt*)nnz);CHKERRQ(ierr); 3506 PetscFunctionReturn(0); 3507 } 3508 3509 /*@C 3510 MatSeqBAIJSetPreallocation - Sets the block size and expected nonzeros 3511 per row in the matrix. For good matrix assembly performance the 3512 user should preallocate the matrix storage by setting the parameter nz 3513 (or the array nnz). By setting these parameters accurately, performance 3514 during matrix assembly can be increased by more than a factor of 50. 3515 3516 Collective 3517 3518 Input Parameters: 3519 + B - the matrix 3520 . bs - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row 3521 blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs() 3522 . nz - number of block nonzeros per block row (same for all rows) 3523 - nnz - array containing the number of block nonzeros in the various block rows 3524 (possibly different for each block row) or NULL 3525 3526 Options Database Keys: 3527 + -mat_no_unroll - uses code that does not unroll the loops in the 3528 block calculations (much slower) 3529 - -mat_block_size - size of the blocks to use 3530 3531 Level: intermediate 3532 3533 Notes: 3534 If the nnz parameter is given then the nz parameter is ignored 3535 3536 You can call MatGetInfo() to get information on how effective the preallocation was; 3537 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 3538 You can also run with the option -info and look for messages with the string 3539 malloc in them to see if additional memory allocation was needed. 3540 3541 The block AIJ format is fully compatible with standard Fortran 77 3542 storage. That is, the stored row and column indices can begin at 3543 either one (as in Fortran) or zero. See the users' manual for details. 3544 3545 Specify the preallocated storage with either nz or nnz (not both). 3546 Set nz=PETSC_DEFAULT and nnz=NULL for PETSc to control dynamic memory 3547 allocation. See Users-Manual: ch_mat for details. 3548 3549 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateBAIJ(), MatGetInfo() 3550 @*/ 3551 PetscErrorCode MatSeqBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt nz,const PetscInt nnz[]) 3552 { 3553 PetscErrorCode ierr; 3554 3555 PetscFunctionBegin; 3556 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 3557 PetscValidType(B,1); 3558 PetscValidLogicalCollectiveInt(B,bs,2); 3559 ierr = PetscTryMethod(B,"MatSeqBAIJSetPreallocation_C",(Mat,PetscInt,PetscInt,const PetscInt[]),(B,bs,nz,nnz));CHKERRQ(ierr); 3560 PetscFunctionReturn(0); 3561 } 3562 3563 /*@C 3564 MatSeqBAIJSetPreallocationCSR - Creates a sparse parallel matrix in BAIJ format using the given nonzero structure and (optional) numerical values 3565 3566 Collective 3567 3568 Input Parameters: 3569 + B - the matrix 3570 . i - the indices into j for the start of each local row (starts with zero) 3571 . j - the column indices for each local row (starts with zero) these must be sorted for each row 3572 - v - optional values in the matrix 3573 3574 Level: advanced 3575 3576 Notes: 3577 The order of the entries in values is specified by the MatOption MAT_ROW_ORIENTED. For example, C programs 3578 may want to use the default MAT_ROW_ORIENTED=PETSC_TRUE and use an array v[nnz][bs][bs] where the second index is 3579 over rows within a block and the last index is over columns within a block row. Fortran programs will likely set 3580 MAT_ROW_ORIENTED=PETSC_FALSE and use a Fortran array v(bs,bs,nnz) in which the first index is over rows within a 3581 block column and the second index is over columns within a block. 3582 3583 Though this routine has Preallocation() in the name it also sets the exact nonzero locations of the matrix entries and usually the numerical values as well 3584 3585 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatSeqBAIJSetPreallocation(), MATSEQBAIJ 3586 @*/ 3587 PetscErrorCode MatSeqBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 3588 { 3589 PetscErrorCode ierr; 3590 3591 PetscFunctionBegin; 3592 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 3593 PetscValidType(B,1); 3594 PetscValidLogicalCollectiveInt(B,bs,2); 3595 ierr = PetscTryMethod(B,"MatSeqBAIJSetPreallocationCSR_C",(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,bs,i,j,v));CHKERRQ(ierr); 3596 PetscFunctionReturn(0); 3597 } 3598 3599 /*@ 3600 MatCreateSeqBAIJWithArrays - Creates an sequential BAIJ matrix using matrix elements provided by the user. 3601 3602 Collective 3603 3604 Input Parameters: 3605 + comm - must be an MPI communicator of size 1 3606 . bs - size of block 3607 . m - number of rows 3608 . n - number of columns 3609 . i - row indices; that is i[0] = 0, i[row] = i[row-1] + number of elements in that row block row of the matrix 3610 . j - column indices 3611 - a - matrix values 3612 3613 Output Parameter: 3614 . mat - the matrix 3615 3616 Level: advanced 3617 3618 Notes: 3619 The i, j, and a arrays are not copied by this routine, the user must free these arrays 3620 once the matrix is destroyed 3621 3622 You cannot set new nonzero locations into this matrix, that will generate an error. 3623 3624 The i and j indices are 0 based 3625 3626 When block size is greater than 1 the matrix values must be stored using the BAIJ storage format (see the BAIJ code to determine this). 3627 3628 The order of the entries in values is the same as the block compressed sparse row storage format; that is, it is 3629 the same as a three dimensional array in Fortran values(bs,bs,nnz) that contains the first column of the first 3630 block, followed by the second column of the first block etc etc. That is, the blocks are contiguous in memory 3631 with column-major ordering within blocks. 3632 3633 .seealso: MatCreate(), MatCreateBAIJ(), MatCreateSeqBAIJ() 3634 3635 @*/ 3636 PetscErrorCode MatCreateSeqBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt i[],PetscInt j[],PetscScalar a[],Mat *mat) 3637 { 3638 PetscErrorCode ierr; 3639 PetscInt ii; 3640 Mat_SeqBAIJ *baij; 3641 3642 PetscFunctionBegin; 3643 if (bs != 1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"block size %D > 1 is not supported yet",bs); 3644 if (m > 0 && i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 3645 3646 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 3647 ierr = MatSetSizes(*mat,m,n,m,n);CHKERRQ(ierr); 3648 ierr = MatSetType(*mat,MATSEQBAIJ);CHKERRQ(ierr); 3649 ierr = MatSeqBAIJSetPreallocation(*mat,bs,MAT_SKIP_ALLOCATION,NULL);CHKERRQ(ierr); 3650 baij = (Mat_SeqBAIJ*)(*mat)->data; 3651 ierr = PetscMalloc2(m,&baij->imax,m,&baij->ilen);CHKERRQ(ierr); 3652 ierr = PetscLogObjectMemory((PetscObject)*mat,2*m*sizeof(PetscInt));CHKERRQ(ierr); 3653 3654 baij->i = i; 3655 baij->j = j; 3656 baij->a = a; 3657 3658 baij->singlemalloc = PETSC_FALSE; 3659 baij->nonew = -1; /*this indicates that inserting a new value in the matrix that generates a new nonzero is an error*/ 3660 baij->free_a = PETSC_FALSE; 3661 baij->free_ij = PETSC_FALSE; 3662 3663 for (ii=0; ii<m; ii++) { 3664 baij->ilen[ii] = baij->imax[ii] = i[ii+1] - i[ii]; 3665 if (PetscUnlikelyDebug(i[ii+1] - i[ii] < 0)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row length in i (row indices) row = %d length = %d",ii,i[ii+1] - i[ii]); 3666 } 3667 if (PetscDefined(USE_DEBUG)) { 3668 for (ii=0; ii<baij->i[m]; ii++) { 3669 if (j[ii] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column index at location = %d index = %d",ii,j[ii]); 3670 if (j[ii] > n - 1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column index to large at location = %d index = %d",ii,j[ii]); 3671 } 3672 } 3673 3674 ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3675 ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3676 PetscFunctionReturn(0); 3677 } 3678 3679 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_SeqBAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat) 3680 { 3681 PetscErrorCode ierr; 3682 PetscMPIInt size; 3683 3684 PetscFunctionBegin; 3685 ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr); 3686 if (size == 1 && scall == MAT_REUSE_MATRIX) { 3687 ierr = MatCopy(inmat,*outmat,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 3688 } else { 3689 ierr = MatCreateMPIMatConcatenateSeqMat_MPIBAIJ(comm,inmat,n,scall,outmat);CHKERRQ(ierr); 3690 } 3691 PetscFunctionReturn(0); 3692 } 3693