1 2 /* 3 Defines the basic matrix operations for the BAIJ (compressed row) 4 matrix storage format. 5 */ 6 #include <../src/mat/impls/baij/seq/baij.h> /*I "petscmat.h" I*/ 7 #include <petscblaslapack.h> 8 #include <petsc/private/kernels/blockinvert.h> 9 #include <petsc/private/kernels/blockmatmult.h> 10 11 #if defined(PETSC_HAVE_HYPRE) 12 PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat,MatType,MatReuse,Mat*); 13 #endif 14 15 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE) 16 PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqBAIJMKL(Mat,MatType,MatReuse,Mat*); 17 #endif 18 PETSC_INTERN PetscErrorCode MatConvert_XAIJ_IS(Mat,MatType,MatReuse,Mat*); 19 20 PetscErrorCode MatGetColumnNorms_SeqBAIJ(Mat A,NormType type,PetscReal *norms) 21 { 22 PetscErrorCode ierr; 23 Mat_SeqBAIJ *a_aij = (Mat_SeqBAIJ*) A->data; 24 PetscInt N,i; 25 PetscInt ib,jb,bs = A->rmap->bs; 26 MatScalar *a_val = a_aij->a; 27 28 PetscFunctionBegin; 29 ierr = MatGetSize(A,NULL,&N);CHKERRQ(ierr); 30 for (i=0; i<N; i++) norms[i] = 0.0; 31 if (type == NORM_2) { 32 for (i=a_aij->i[0]; i<a_aij->i[A->rmap->n/bs]; i++) { 33 for (jb=0; jb<bs; jb++) { 34 for (ib=0; ib<bs; ib++) { 35 norms[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscAbsScalar(*a_val * *a_val); 36 a_val++; 37 } 38 } 39 } 40 } else if (type == NORM_1) { 41 for (i=a_aij->i[0]; i<a_aij->i[A->rmap->n/bs]; i++) { 42 for (jb=0; jb<bs; jb++) { 43 for (ib=0; ib<bs; ib++) { 44 norms[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscAbsScalar(*a_val); 45 a_val++; 46 } 47 } 48 } 49 } else if (type == NORM_INFINITY) { 50 for (i=a_aij->i[0]; i<a_aij->i[A->rmap->n/bs]; i++) { 51 for (jb=0; jb<bs; jb++) { 52 for (ib=0; ib<bs; ib++) { 53 int col = A->cmap->rstart + a_aij->j[i] * bs + jb; 54 norms[col] = PetscMax(PetscAbsScalar(*a_val), norms[col]); 55 a_val++; 56 } 57 } 58 } 59 } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Unknown NormType"); 60 if (type == NORM_2) { 61 for (i=0; i<N; i++) norms[i] = PetscSqrtReal(norms[i]); 62 } 63 PetscFunctionReturn(0); 64 } 65 66 PetscErrorCode MatInvertBlockDiagonal_SeqBAIJ(Mat A,const PetscScalar **values) 67 { 68 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*) A->data; 69 PetscErrorCode ierr; 70 PetscInt *diag_offset,i,bs = A->rmap->bs,mbs = a->mbs,ipvt[5],bs2 = bs*bs,*v_pivots; 71 MatScalar *v = a->a,*odiag,*diag,work[25],*v_work; 72 PetscReal shift = 0.0; 73 PetscBool allowzeropivot,zeropivotdetected=PETSC_FALSE; 74 75 PetscFunctionBegin; 76 allowzeropivot = PetscNot(A->erroriffailure); 77 78 if (a->idiagvalid) { 79 if (values) *values = a->idiag; 80 PetscFunctionReturn(0); 81 } 82 ierr = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr); 83 diag_offset = a->diag; 84 if (!a->idiag) { 85 ierr = PetscMalloc1(bs2*mbs,&a->idiag);CHKERRQ(ierr); 86 ierr = PetscLogObjectMemory((PetscObject)A,bs2*mbs*sizeof(PetscScalar));CHKERRQ(ierr); 87 } 88 diag = a->idiag; 89 if (values) *values = a->idiag; 90 /* factor and invert each block */ 91 switch (bs) { 92 case 1: 93 for (i=0; i<mbs; i++) { 94 odiag = v + 1*diag_offset[i]; 95 diag[0] = odiag[0]; 96 97 if (PetscAbsScalar(diag[0] + shift) < PETSC_MACHINE_EPSILON) { 98 if (allowzeropivot) { 99 A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 100 A->factorerror_zeropivot_value = PetscAbsScalar(diag[0]); 101 A->factorerror_zeropivot_row = i; 102 ierr = PetscInfo1(A,"Zero pivot, row %D\n",i);CHKERRQ(ierr); 103 } else SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot, row %D pivot value %g tolerance %g",i,(double)PetscAbsScalar(diag[0]),(double)PETSC_MACHINE_EPSILON); 104 } 105 106 diag[0] = (PetscScalar)1.0 / (diag[0] + shift); 107 diag += 1; 108 } 109 break; 110 case 2: 111 for (i=0; i<mbs; i++) { 112 odiag = v + 4*diag_offset[i]; 113 diag[0] = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3]; 114 ierr = PetscKernel_A_gets_inverse_A_2(diag,shift,allowzeropivot,&zeropivotdetected);CHKERRQ(ierr); 115 if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 116 diag += 4; 117 } 118 break; 119 case 3: 120 for (i=0; i<mbs; i++) { 121 odiag = v + 9*diag_offset[i]; 122 diag[0] = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3]; 123 diag[4] = odiag[4]; diag[5] = odiag[5]; diag[6] = odiag[6]; diag[7] = odiag[7]; 124 diag[8] = odiag[8]; 125 ierr = PetscKernel_A_gets_inverse_A_3(diag,shift,allowzeropivot,&zeropivotdetected);CHKERRQ(ierr); 126 if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 127 diag += 9; 128 } 129 break; 130 case 4: 131 for (i=0; i<mbs; i++) { 132 odiag = v + 16*diag_offset[i]; 133 ierr = PetscArraycpy(diag,odiag,16);CHKERRQ(ierr); 134 ierr = PetscKernel_A_gets_inverse_A_4(diag,shift,allowzeropivot,&zeropivotdetected);CHKERRQ(ierr); 135 if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 136 diag += 16; 137 } 138 break; 139 case 5: 140 for (i=0; i<mbs; i++) { 141 odiag = v + 25*diag_offset[i]; 142 ierr = PetscArraycpy(diag,odiag,25);CHKERRQ(ierr); 143 ierr = PetscKernel_A_gets_inverse_A_5(diag,ipvt,work,shift,allowzeropivot,&zeropivotdetected);CHKERRQ(ierr); 144 if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 145 diag += 25; 146 } 147 break; 148 case 6: 149 for (i=0; i<mbs; i++) { 150 odiag = v + 36*diag_offset[i]; 151 ierr = PetscArraycpy(diag,odiag,36);CHKERRQ(ierr); 152 ierr = PetscKernel_A_gets_inverse_A_6(diag,shift,allowzeropivot,&zeropivotdetected);CHKERRQ(ierr); 153 if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 154 diag += 36; 155 } 156 break; 157 case 7: 158 for (i=0; i<mbs; i++) { 159 odiag = v + 49*diag_offset[i]; 160 ierr = PetscArraycpy(diag,odiag,49);CHKERRQ(ierr); 161 ierr = PetscKernel_A_gets_inverse_A_7(diag,shift,allowzeropivot,&zeropivotdetected);CHKERRQ(ierr); 162 if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 163 diag += 49; 164 } 165 break; 166 default: 167 ierr = PetscMalloc2(bs,&v_work,bs,&v_pivots);CHKERRQ(ierr); 168 for (i=0; i<mbs; i++) { 169 odiag = v + bs2*diag_offset[i]; 170 ierr = PetscArraycpy(diag,odiag,bs2);CHKERRQ(ierr); 171 ierr = PetscKernel_A_gets_inverse_A(bs,diag,v_pivots,v_work,allowzeropivot,&zeropivotdetected);CHKERRQ(ierr); 172 if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 173 diag += bs2; 174 } 175 ierr = PetscFree2(v_work,v_pivots);CHKERRQ(ierr); 176 } 177 a->idiagvalid = PETSC_TRUE; 178 PetscFunctionReturn(0); 179 } 180 181 PetscErrorCode MatSOR_SeqBAIJ(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 182 { 183 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 184 PetscScalar *x,*work,*w,*workt,*t; 185 const MatScalar *v,*aa = a->a, *idiag; 186 const PetscScalar *b,*xb; 187 PetscScalar s[7], xw[7]={0}; /* avoid some compilers thinking xw is uninitialized */ 188 PetscErrorCode ierr; 189 PetscInt m = a->mbs,i,i2,nz,bs = A->rmap->bs,bs2 = bs*bs,k,j,idx,it; 190 const PetscInt *diag,*ai = a->i,*aj = a->j,*vi; 191 192 PetscFunctionBegin; 193 its = its*lits; 194 if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat"); 195 if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 196 if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support for diagonal shift"); 197 if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support for non-trivial relaxation factor"); 198 if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support for applying upper or lower triangular parts"); 199 200 if (!a->idiagvalid) {ierr = MatInvertBlockDiagonal(A,NULL);CHKERRQ(ierr);} 201 202 if (!m) PetscFunctionReturn(0); 203 diag = a->diag; 204 idiag = a->idiag; 205 k = PetscMax(A->rmap->n,A->cmap->n); 206 if (!a->mult_work) { 207 ierr = PetscMalloc1(k+1,&a->mult_work);CHKERRQ(ierr); 208 } 209 if (!a->sor_workt) { 210 ierr = PetscMalloc1(k,&a->sor_workt);CHKERRQ(ierr); 211 } 212 if (!a->sor_work) { 213 ierr = PetscMalloc1(bs,&a->sor_work);CHKERRQ(ierr); 214 } 215 work = a->mult_work; 216 t = a->sor_workt; 217 w = a->sor_work; 218 219 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 220 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 221 222 if (flag & SOR_ZERO_INITIAL_GUESS) { 223 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) { 224 switch (bs) { 225 case 1: 226 PetscKernel_v_gets_A_times_w_1(x,idiag,b); 227 t[0] = b[0]; 228 i2 = 1; 229 idiag += 1; 230 for (i=1; i<m; i++) { 231 v = aa + ai[i]; 232 vi = aj + ai[i]; 233 nz = diag[i] - ai[i]; 234 s[0] = b[i2]; 235 for (j=0; j<nz; j++) { 236 xw[0] = x[vi[j]]; 237 PetscKernel_v_gets_v_minus_A_times_w_1(s,(v+j),xw); 238 } 239 t[i2] = s[0]; 240 PetscKernel_v_gets_A_times_w_1(xw,idiag,s); 241 x[i2] = xw[0]; 242 idiag += 1; 243 i2 += 1; 244 } 245 break; 246 case 2: 247 PetscKernel_v_gets_A_times_w_2(x,idiag,b); 248 t[0] = b[0]; t[1] = b[1]; 249 i2 = 2; 250 idiag += 4; 251 for (i=1; i<m; i++) { 252 v = aa + 4*ai[i]; 253 vi = aj + ai[i]; 254 nz = diag[i] - ai[i]; 255 s[0] = b[i2]; s[1] = b[i2+1]; 256 for (j=0; j<nz; j++) { 257 idx = 2*vi[j]; 258 it = 4*j; 259 xw[0] = x[idx]; xw[1] = x[1+idx]; 260 PetscKernel_v_gets_v_minus_A_times_w_2(s,(v+it),xw); 261 } 262 t[i2] = s[0]; t[i2+1] = s[1]; 263 PetscKernel_v_gets_A_times_w_2(xw,idiag,s); 264 x[i2] = xw[0]; x[i2+1] = xw[1]; 265 idiag += 4; 266 i2 += 2; 267 } 268 break; 269 case 3: 270 PetscKernel_v_gets_A_times_w_3(x,idiag,b); 271 t[0] = b[0]; t[1] = b[1]; t[2] = b[2]; 272 i2 = 3; 273 idiag += 9; 274 for (i=1; i<m; i++) { 275 v = aa + 9*ai[i]; 276 vi = aj + ai[i]; 277 nz = diag[i] - ai[i]; 278 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; 279 while (nz--) { 280 idx = 3*(*vi++); 281 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 282 PetscKernel_v_gets_v_minus_A_times_w_3(s,v,xw); 283 v += 9; 284 } 285 t[i2] = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2]; 286 PetscKernel_v_gets_A_times_w_3(xw,idiag,s); 287 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; 288 idiag += 9; 289 i2 += 3; 290 } 291 break; 292 case 4: 293 PetscKernel_v_gets_A_times_w_4(x,idiag,b); 294 t[0] = b[0]; t[1] = b[1]; t[2] = b[2]; t[3] = b[3]; 295 i2 = 4; 296 idiag += 16; 297 for (i=1; i<m; i++) { 298 v = aa + 16*ai[i]; 299 vi = aj + ai[i]; 300 nz = diag[i] - ai[i]; 301 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; 302 while (nz--) { 303 idx = 4*(*vi++); 304 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; 305 PetscKernel_v_gets_v_minus_A_times_w_4(s,v,xw); 306 v += 16; 307 } 308 t[i2] = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2]; t[i2 + 3] = s[3]; 309 PetscKernel_v_gets_A_times_w_4(xw,idiag,s); 310 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; 311 idiag += 16; 312 i2 += 4; 313 } 314 break; 315 case 5: 316 PetscKernel_v_gets_A_times_w_5(x,idiag,b); 317 t[0] = b[0]; t[1] = b[1]; t[2] = b[2]; t[3] = b[3]; t[4] = b[4]; 318 i2 = 5; 319 idiag += 25; 320 for (i=1; i<m; i++) { 321 v = aa + 25*ai[i]; 322 vi = aj + ai[i]; 323 nz = diag[i] - ai[i]; 324 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4]; 325 while (nz--) { 326 idx = 5*(*vi++); 327 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; xw[4] = x[4+idx]; 328 PetscKernel_v_gets_v_minus_A_times_w_5(s,v,xw); 329 v += 25; 330 } 331 t[i2] = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2]; t[i2+3] = s[3]; t[i2+4] = s[4]; 332 PetscKernel_v_gets_A_times_w_5(xw,idiag,s); 333 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4]; 334 idiag += 25; 335 i2 += 5; 336 } 337 break; 338 case 6: 339 PetscKernel_v_gets_A_times_w_6(x,idiag,b); 340 t[0] = b[0]; t[1] = b[1]; t[2] = b[2]; t[3] = b[3]; t[4] = b[4]; t[5] = b[5]; 341 i2 = 6; 342 idiag += 36; 343 for (i=1; i<m; i++) { 344 v = aa + 36*ai[i]; 345 vi = aj + ai[i]; 346 nz = diag[i] - ai[i]; 347 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5]; 348 while (nz--) { 349 idx = 6*(*vi++); 350 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 351 xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; 352 PetscKernel_v_gets_v_minus_A_times_w_6(s,v,xw); 353 v += 36; 354 } 355 t[i2] = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2]; 356 t[i2+3] = s[3]; t[i2+4] = s[4]; t[i2+5] = s[5]; 357 PetscKernel_v_gets_A_times_w_6(xw,idiag,s); 358 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5]; 359 idiag += 36; 360 i2 += 6; 361 } 362 break; 363 case 7: 364 PetscKernel_v_gets_A_times_w_7(x,idiag,b); 365 t[0] = b[0]; t[1] = b[1]; t[2] = b[2]; 366 t[3] = b[3]; t[4] = b[4]; t[5] = b[5]; t[6] = b[6]; 367 i2 = 7; 368 idiag += 49; 369 for (i=1; i<m; i++) { 370 v = aa + 49*ai[i]; 371 vi = aj + ai[i]; 372 nz = diag[i] - ai[i]; 373 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; 374 s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5]; s[6] = b[i2+6]; 375 while (nz--) { 376 idx = 7*(*vi++); 377 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 378 xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; xw[6] = x[6+idx]; 379 PetscKernel_v_gets_v_minus_A_times_w_7(s,v,xw); 380 v += 49; 381 } 382 t[i2] = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2]; 383 t[i2+3] = s[3]; t[i2+4] = s[4]; t[i2+5] = s[5]; t[i2+6] = s[6]; 384 PetscKernel_v_gets_A_times_w_7(xw,idiag,s); 385 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; 386 x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5]; x[i2+6] = xw[6]; 387 idiag += 49; 388 i2 += 7; 389 } 390 break; 391 default: 392 PetscKernel_w_gets_Ar_times_v(bs,bs,b,idiag,x); 393 ierr = PetscArraycpy(t,b,bs);CHKERRQ(ierr); 394 i2 = bs; 395 idiag += bs2; 396 for (i=1; i<m; i++) { 397 v = aa + bs2*ai[i]; 398 vi = aj + ai[i]; 399 nz = diag[i] - ai[i]; 400 401 ierr = PetscArraycpy(w,b+i2,bs);CHKERRQ(ierr); 402 /* copy all rows of x that are needed into contiguous space */ 403 workt = work; 404 for (j=0; j<nz; j++) { 405 ierr = PetscArraycpy(workt,x + bs*(*vi++),bs);CHKERRQ(ierr); 406 workt += bs; 407 } 408 PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work); 409 ierr = PetscArraycpy(t+i2,w,bs);CHKERRQ(ierr); 410 PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2); 411 412 idiag += bs2; 413 i2 += bs; 414 } 415 break; 416 } 417 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 418 ierr = PetscLogFlops(1.0*bs2*a->nz);CHKERRQ(ierr); 419 xb = t; 420 } 421 else xb = b; 422 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 423 idiag = a->idiag+bs2*(a->mbs-1); 424 i2 = bs * (m-1); 425 switch (bs) { 426 case 1: 427 s[0] = xb[i2]; 428 PetscKernel_v_gets_A_times_w_1(xw,idiag,s); 429 x[i2] = xw[0]; 430 i2 -= 1; 431 for (i=m-2; i>=0; i--) { 432 v = aa + (diag[i]+1); 433 vi = aj + diag[i] + 1; 434 nz = ai[i+1] - diag[i] - 1; 435 s[0] = xb[i2]; 436 for (j=0; j<nz; j++) { 437 xw[0] = x[vi[j]]; 438 PetscKernel_v_gets_v_minus_A_times_w_1(s,(v+j),xw); 439 } 440 PetscKernel_v_gets_A_times_w_1(xw,idiag,s); 441 x[i2] = xw[0]; 442 idiag -= 1; 443 i2 -= 1; 444 } 445 break; 446 case 2: 447 s[0] = xb[i2]; s[1] = xb[i2+1]; 448 PetscKernel_v_gets_A_times_w_2(xw,idiag,s); 449 x[i2] = xw[0]; x[i2+1] = xw[1]; 450 i2 -= 2; 451 idiag -= 4; 452 for (i=m-2; i>=0; i--) { 453 v = aa + 4*(diag[i] + 1); 454 vi = aj + diag[i] + 1; 455 nz = ai[i+1] - diag[i] - 1; 456 s[0] = xb[i2]; s[1] = xb[i2+1]; 457 for (j=0; j<nz; j++) { 458 idx = 2*vi[j]; 459 it = 4*j; 460 xw[0] = x[idx]; xw[1] = x[1+idx]; 461 PetscKernel_v_gets_v_minus_A_times_w_2(s,(v+it),xw); 462 } 463 PetscKernel_v_gets_A_times_w_2(xw,idiag,s); 464 x[i2] = xw[0]; x[i2+1] = xw[1]; 465 idiag -= 4; 466 i2 -= 2; 467 } 468 break; 469 case 3: 470 s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; 471 PetscKernel_v_gets_A_times_w_3(xw,idiag,s); 472 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; 473 i2 -= 3; 474 idiag -= 9; 475 for (i=m-2; i>=0; i--) { 476 v = aa + 9*(diag[i]+1); 477 vi = aj + diag[i] + 1; 478 nz = ai[i+1] - diag[i] - 1; 479 s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; 480 while (nz--) { 481 idx = 3*(*vi++); 482 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 483 PetscKernel_v_gets_v_minus_A_times_w_3(s,v,xw); 484 v += 9; 485 } 486 PetscKernel_v_gets_A_times_w_3(xw,idiag,s); 487 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; 488 idiag -= 9; 489 i2 -= 3; 490 } 491 break; 492 case 4: 493 s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; 494 PetscKernel_v_gets_A_times_w_4(xw,idiag,s); 495 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; 496 i2 -= 4; 497 idiag -= 16; 498 for (i=m-2; i>=0; i--) { 499 v = aa + 16*(diag[i]+1); 500 vi = aj + diag[i] + 1; 501 nz = ai[i+1] - diag[i] - 1; 502 s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; 503 while (nz--) { 504 idx = 4*(*vi++); 505 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; 506 PetscKernel_v_gets_v_minus_A_times_w_4(s,v,xw); 507 v += 16; 508 } 509 PetscKernel_v_gets_A_times_w_4(xw,idiag,s); 510 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; 511 idiag -= 16; 512 i2 -= 4; 513 } 514 break; 515 case 5: 516 s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; s[4] = xb[i2+4]; 517 PetscKernel_v_gets_A_times_w_5(xw,idiag,s); 518 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4]; 519 i2 -= 5; 520 idiag -= 25; 521 for (i=m-2; i>=0; i--) { 522 v = aa + 25*(diag[i]+1); 523 vi = aj + diag[i] + 1; 524 nz = ai[i+1] - diag[i] - 1; 525 s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; s[4] = xb[i2+4]; 526 while (nz--) { 527 idx = 5*(*vi++); 528 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; xw[4] = x[4+idx]; 529 PetscKernel_v_gets_v_minus_A_times_w_5(s,v,xw); 530 v += 25; 531 } 532 PetscKernel_v_gets_A_times_w_5(xw,idiag,s); 533 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4]; 534 idiag -= 25; 535 i2 -= 5; 536 } 537 break; 538 case 6: 539 s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; s[4] = xb[i2+4]; s[5] = xb[i2+5]; 540 PetscKernel_v_gets_A_times_w_6(xw,idiag,s); 541 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5]; 542 i2 -= 6; 543 idiag -= 36; 544 for (i=m-2; i>=0; i--) { 545 v = aa + 36*(diag[i]+1); 546 vi = aj + diag[i] + 1; 547 nz = ai[i+1] - diag[i] - 1; 548 s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; s[4] = xb[i2+4]; s[5] = xb[i2+5]; 549 while (nz--) { 550 idx = 6*(*vi++); 551 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 552 xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; 553 PetscKernel_v_gets_v_minus_A_times_w_6(s,v,xw); 554 v += 36; 555 } 556 PetscKernel_v_gets_A_times_w_6(xw,idiag,s); 557 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5]; 558 idiag -= 36; 559 i2 -= 6; 560 } 561 break; 562 case 7: 563 s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; 564 s[3] = xb[i2+3]; s[4] = xb[i2+4]; s[5] = xb[i2+5]; s[6] = xb[i2+6]; 565 PetscKernel_v_gets_A_times_w_7(x,idiag,b); 566 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; 567 x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5]; x[i2+6] = xw[6]; 568 i2 -= 7; 569 idiag -= 49; 570 for (i=m-2; i>=0; i--) { 571 v = aa + 49*(diag[i]+1); 572 vi = aj + diag[i] + 1; 573 nz = ai[i+1] - diag[i] - 1; 574 s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; 575 s[3] = xb[i2+3]; s[4] = xb[i2+4]; s[5] = xb[i2+5]; s[6] = xb[i2+6]; 576 while (nz--) { 577 idx = 7*(*vi++); 578 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 579 xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; xw[6] = x[6+idx]; 580 PetscKernel_v_gets_v_minus_A_times_w_7(s,v,xw); 581 v += 49; 582 } 583 PetscKernel_v_gets_A_times_w_7(xw,idiag,s); 584 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; 585 x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5]; x[i2+6] = xw[6]; 586 idiag -= 49; 587 i2 -= 7; 588 } 589 break; 590 default: 591 ierr = PetscArraycpy(w,xb+i2,bs);CHKERRQ(ierr); 592 PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2); 593 i2 -= bs; 594 idiag -= bs2; 595 for (i=m-2; i>=0; i--) { 596 v = aa + bs2*(diag[i]+1); 597 vi = aj + diag[i] + 1; 598 nz = ai[i+1] - diag[i] - 1; 599 600 ierr = PetscArraycpy(w,xb+i2,bs);CHKERRQ(ierr); 601 /* copy all rows of x that are needed into contiguous space */ 602 workt = work; 603 for (j=0; j<nz; j++) { 604 ierr = PetscArraycpy(workt,x + bs*(*vi++),bs);CHKERRQ(ierr); 605 workt += bs; 606 } 607 PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work); 608 PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2); 609 610 idiag -= bs2; 611 i2 -= bs; 612 } 613 break; 614 } 615 ierr = PetscLogFlops(1.0*bs2*(a->nz));CHKERRQ(ierr); 616 } 617 its--; 618 } 619 while (its--) { 620 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) { 621 idiag = a->idiag; 622 i2 = 0; 623 switch (bs) { 624 case 1: 625 for (i=0; i<m; i++) { 626 v = aa + ai[i]; 627 vi = aj + ai[i]; 628 nz = ai[i+1] - ai[i]; 629 s[0] = b[i2]; 630 for (j=0; j<nz; j++) { 631 xw[0] = x[vi[j]]; 632 PetscKernel_v_gets_v_minus_A_times_w_1(s,(v+j),xw); 633 } 634 PetscKernel_v_gets_A_times_w_1(xw,idiag,s); 635 x[i2] += xw[0]; 636 idiag += 1; 637 i2 += 1; 638 } 639 break; 640 case 2: 641 for (i=0; i<m; i++) { 642 v = aa + 4*ai[i]; 643 vi = aj + ai[i]; 644 nz = ai[i+1] - ai[i]; 645 s[0] = b[i2]; s[1] = b[i2+1]; 646 for (j=0; j<nz; j++) { 647 idx = 2*vi[j]; 648 it = 4*j; 649 xw[0] = x[idx]; xw[1] = x[1+idx]; 650 PetscKernel_v_gets_v_minus_A_times_w_2(s,(v+it),xw); 651 } 652 PetscKernel_v_gets_A_times_w_2(xw,idiag,s); 653 x[i2] += xw[0]; x[i2+1] += xw[1]; 654 idiag += 4; 655 i2 += 2; 656 } 657 break; 658 case 3: 659 for (i=0; i<m; i++) { 660 v = aa + 9*ai[i]; 661 vi = aj + ai[i]; 662 nz = ai[i+1] - ai[i]; 663 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; 664 while (nz--) { 665 idx = 3*(*vi++); 666 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 667 PetscKernel_v_gets_v_minus_A_times_w_3(s,v,xw); 668 v += 9; 669 } 670 PetscKernel_v_gets_A_times_w_3(xw,idiag,s); 671 x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; 672 idiag += 9; 673 i2 += 3; 674 } 675 break; 676 case 4: 677 for (i=0; i<m; i++) { 678 v = aa + 16*ai[i]; 679 vi = aj + ai[i]; 680 nz = ai[i+1] - ai[i]; 681 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; 682 while (nz--) { 683 idx = 4*(*vi++); 684 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; 685 PetscKernel_v_gets_v_minus_A_times_w_4(s,v,xw); 686 v += 16; 687 } 688 PetscKernel_v_gets_A_times_w_4(xw,idiag,s); 689 x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; x[i2+3] += xw[3]; 690 idiag += 16; 691 i2 += 4; 692 } 693 break; 694 case 5: 695 for (i=0; i<m; i++) { 696 v = aa + 25*ai[i]; 697 vi = aj + ai[i]; 698 nz = ai[i+1] - ai[i]; 699 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4]; 700 while (nz--) { 701 idx = 5*(*vi++); 702 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; xw[4] = x[4+idx]; 703 PetscKernel_v_gets_v_minus_A_times_w_5(s,v,xw); 704 v += 25; 705 } 706 PetscKernel_v_gets_A_times_w_5(xw,idiag,s); 707 x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; x[i2+3] += xw[3]; x[i2+4] += xw[4]; 708 idiag += 25; 709 i2 += 5; 710 } 711 break; 712 case 6: 713 for (i=0; i<m; i++) { 714 v = aa + 36*ai[i]; 715 vi = aj + ai[i]; 716 nz = ai[i+1] - ai[i]; 717 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5]; 718 while (nz--) { 719 idx = 6*(*vi++); 720 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 721 xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; 722 PetscKernel_v_gets_v_minus_A_times_w_6(s,v,xw); 723 v += 36; 724 } 725 PetscKernel_v_gets_A_times_w_6(xw,idiag,s); 726 x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; 727 x[i2+3] += xw[3]; x[i2+4] += xw[4]; x[i2+5] += xw[5]; 728 idiag += 36; 729 i2 += 6; 730 } 731 break; 732 case 7: 733 for (i=0; i<m; i++) { 734 v = aa + 49*ai[i]; 735 vi = aj + ai[i]; 736 nz = ai[i+1] - ai[i]; 737 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; 738 s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5]; s[6] = b[i2+6]; 739 while (nz--) { 740 idx = 7*(*vi++); 741 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 742 xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; xw[6] = x[6+idx]; 743 PetscKernel_v_gets_v_minus_A_times_w_7(s,v,xw); 744 v += 49; 745 } 746 PetscKernel_v_gets_A_times_w_7(xw,idiag,s); 747 x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; 748 x[i2+3] += xw[3]; x[i2+4] += xw[4]; x[i2+5] += xw[5]; x[i2+6] += xw[6]; 749 idiag += 49; 750 i2 += 7; 751 } 752 break; 753 default: 754 for (i=0; i<m; i++) { 755 v = aa + bs2*ai[i]; 756 vi = aj + ai[i]; 757 nz = ai[i+1] - ai[i]; 758 759 ierr = PetscArraycpy(w,b+i2,bs);CHKERRQ(ierr); 760 /* copy all rows of x that are needed into contiguous space */ 761 workt = work; 762 for (j=0; j<nz; j++) { 763 ierr = PetscArraycpy(workt,x + bs*(*vi++),bs);CHKERRQ(ierr); 764 workt += bs; 765 } 766 PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work); 767 PetscKernel_w_gets_w_plus_Ar_times_v(bs,bs,w,idiag,x+i2); 768 769 idiag += bs2; 770 i2 += bs; 771 } 772 break; 773 } 774 ierr = PetscLogFlops(2.0*bs2*a->nz);CHKERRQ(ierr); 775 } 776 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 777 idiag = a->idiag+bs2*(a->mbs-1); 778 i2 = bs * (m-1); 779 switch (bs) { 780 case 1: 781 for (i=m-1; i>=0; i--) { 782 v = aa + ai[i]; 783 vi = aj + ai[i]; 784 nz = ai[i+1] - ai[i]; 785 s[0] = b[i2]; 786 for (j=0; j<nz; j++) { 787 xw[0] = x[vi[j]]; 788 PetscKernel_v_gets_v_minus_A_times_w_1(s,(v+j),xw); 789 } 790 PetscKernel_v_gets_A_times_w_1(xw,idiag,s); 791 x[i2] += xw[0]; 792 idiag -= 1; 793 i2 -= 1; 794 } 795 break; 796 case 2: 797 for (i=m-1; i>=0; i--) { 798 v = aa + 4*ai[i]; 799 vi = aj + ai[i]; 800 nz = ai[i+1] - ai[i]; 801 s[0] = b[i2]; s[1] = b[i2+1]; 802 for (j=0; j<nz; j++) { 803 idx = 2*vi[j]; 804 it = 4*j; 805 xw[0] = x[idx]; xw[1] = x[1+idx]; 806 PetscKernel_v_gets_v_minus_A_times_w_2(s,(v+it),xw); 807 } 808 PetscKernel_v_gets_A_times_w_2(xw,idiag,s); 809 x[i2] += xw[0]; x[i2+1] += xw[1]; 810 idiag -= 4; 811 i2 -= 2; 812 } 813 break; 814 case 3: 815 for (i=m-1; i>=0; i--) { 816 v = aa + 9*ai[i]; 817 vi = aj + ai[i]; 818 nz = ai[i+1] - ai[i]; 819 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; 820 while (nz--) { 821 idx = 3*(*vi++); 822 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 823 PetscKernel_v_gets_v_minus_A_times_w_3(s,v,xw); 824 v += 9; 825 } 826 PetscKernel_v_gets_A_times_w_3(xw,idiag,s); 827 x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; 828 idiag -= 9; 829 i2 -= 3; 830 } 831 break; 832 case 4: 833 for (i=m-1; i>=0; i--) { 834 v = aa + 16*ai[i]; 835 vi = aj + ai[i]; 836 nz = ai[i+1] - ai[i]; 837 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; 838 while (nz--) { 839 idx = 4*(*vi++); 840 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; 841 PetscKernel_v_gets_v_minus_A_times_w_4(s,v,xw); 842 v += 16; 843 } 844 PetscKernel_v_gets_A_times_w_4(xw,idiag,s); 845 x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; x[i2+3] += xw[3]; 846 idiag -= 16; 847 i2 -= 4; 848 } 849 break; 850 case 5: 851 for (i=m-1; i>=0; i--) { 852 v = aa + 25*ai[i]; 853 vi = aj + ai[i]; 854 nz = ai[i+1] - ai[i]; 855 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4]; 856 while (nz--) { 857 idx = 5*(*vi++); 858 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; xw[4] = x[4+idx]; 859 PetscKernel_v_gets_v_minus_A_times_w_5(s,v,xw); 860 v += 25; 861 } 862 PetscKernel_v_gets_A_times_w_5(xw,idiag,s); 863 x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; x[i2+3] += xw[3]; x[i2+4] += xw[4]; 864 idiag -= 25; 865 i2 -= 5; 866 } 867 break; 868 case 6: 869 for (i=m-1; i>=0; i--) { 870 v = aa + 36*ai[i]; 871 vi = aj + ai[i]; 872 nz = ai[i+1] - ai[i]; 873 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5]; 874 while (nz--) { 875 idx = 6*(*vi++); 876 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 877 xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; 878 PetscKernel_v_gets_v_minus_A_times_w_6(s,v,xw); 879 v += 36; 880 } 881 PetscKernel_v_gets_A_times_w_6(xw,idiag,s); 882 x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; 883 x[i2+3] += xw[3]; x[i2+4] += xw[4]; x[i2+5] += xw[5]; 884 idiag -= 36; 885 i2 -= 6; 886 } 887 break; 888 case 7: 889 for (i=m-1; i>=0; i--) { 890 v = aa + 49*ai[i]; 891 vi = aj + ai[i]; 892 nz = ai[i+1] - ai[i]; 893 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; 894 s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5]; s[6] = b[i2+6]; 895 while (nz--) { 896 idx = 7*(*vi++); 897 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 898 xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; xw[6] = x[6+idx]; 899 PetscKernel_v_gets_v_minus_A_times_w_7(s,v,xw); 900 v += 49; 901 } 902 PetscKernel_v_gets_A_times_w_7(xw,idiag,s); 903 x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; 904 x[i2+3] += xw[3]; x[i2+4] += xw[4]; x[i2+5] += xw[5]; x[i2+6] += xw[6]; 905 idiag -= 49; 906 i2 -= 7; 907 } 908 break; 909 default: 910 for (i=m-1; i>=0; i--) { 911 v = aa + bs2*ai[i]; 912 vi = aj + ai[i]; 913 nz = ai[i+1] - ai[i]; 914 915 ierr = PetscArraycpy(w,b+i2,bs);CHKERRQ(ierr); 916 /* copy all rows of x that are needed into contiguous space */ 917 workt = work; 918 for (j=0; j<nz; j++) { 919 ierr = PetscArraycpy(workt,x + bs*(*vi++),bs);CHKERRQ(ierr); 920 workt += bs; 921 } 922 PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work); 923 PetscKernel_w_gets_w_plus_Ar_times_v(bs,bs,w,idiag,x+i2); 924 925 idiag -= bs2; 926 i2 -= bs; 927 } 928 break; 929 } 930 ierr = PetscLogFlops(2.0*bs2*(a->nz));CHKERRQ(ierr); 931 } 932 } 933 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 934 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 935 PetscFunctionReturn(0); 936 } 937 938 /* 939 Special version for direct calls from Fortran (Used in PETSc-fun3d) 940 */ 941 #if defined(PETSC_HAVE_FORTRAN_CAPS) 942 #define matsetvaluesblocked4_ MATSETVALUESBLOCKED4 943 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 944 #define matsetvaluesblocked4_ matsetvaluesblocked4 945 #endif 946 947 PETSC_EXTERN void matsetvaluesblocked4_(Mat *AA,PetscInt *mm,const PetscInt im[],PetscInt *nn,const PetscInt in[],const PetscScalar v[]) 948 { 949 Mat A = *AA; 950 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 951 PetscInt *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,N,m = *mm,n = *nn; 952 PetscInt *ai =a->i,*ailen=a->ilen; 953 PetscInt *aj =a->j,stepval,lastcol = -1; 954 const PetscScalar *value = v; 955 MatScalar *ap,*aa = a->a,*bap; 956 PetscErrorCode ierr; 957 958 PetscFunctionBegin; 959 if (A->rmap->bs != 4) SETERRABORT(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Can only be called with a block size of 4"); 960 stepval = (n-1)*4; 961 for (k=0; k<m; k++) { /* loop over added rows */ 962 row = im[k]; 963 rp = aj + ai[row]; 964 ap = aa + 16*ai[row]; 965 nrow = ailen[row]; 966 low = 0; 967 high = nrow; 968 for (l=0; l<n; l++) { /* loop over added columns */ 969 col = in[l]; 970 if (col <= lastcol) low = 0; 971 else high = nrow; 972 lastcol = col; 973 value = v + k*(stepval+4 + l)*4; 974 while (high-low > 7) { 975 t = (low+high)/2; 976 if (rp[t] > col) high = t; 977 else low = t; 978 } 979 for (i=low; i<high; i++) { 980 if (rp[i] > col) break; 981 if (rp[i] == col) { 982 bap = ap + 16*i; 983 for (ii=0; ii<4; ii++,value+=stepval) { 984 for (jj=ii; jj<16; jj+=4) { 985 bap[jj] += *value++; 986 } 987 } 988 goto noinsert2; 989 } 990 } 991 N = nrow++ - 1; 992 high++; /* added new column index thus must search to one higher than before */ 993 /* shift up all the later entries in this row */ 994 for (ii=N; ii>=i; ii--) { 995 rp[ii+1] = rp[ii]; 996 ierr = PetscArraycpy(ap+16*(ii+1),ap+16*(ii),16);CHKERRV(ierr); 997 } 998 if (N >= i) { 999 ierr = PetscArrayzero(ap+16*i,16);CHKERRV(ierr); 1000 } 1001 rp[i] = col; 1002 bap = ap + 16*i; 1003 for (ii=0; ii<4; ii++,value+=stepval) { 1004 for (jj=ii; jj<16; jj+=4) { 1005 bap[jj] = *value++; 1006 } 1007 } 1008 noinsert2:; 1009 low = i; 1010 } 1011 ailen[row] = nrow; 1012 } 1013 PetscFunctionReturnVoid(); 1014 } 1015 1016 #if defined(PETSC_HAVE_FORTRAN_CAPS) 1017 #define matsetvalues4_ MATSETVALUES4 1018 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 1019 #define matsetvalues4_ matsetvalues4 1020 #endif 1021 1022 PETSC_EXTERN void matsetvalues4_(Mat *AA,PetscInt *mm,PetscInt *im,PetscInt *nn,PetscInt *in,PetscScalar *v) 1023 { 1024 Mat A = *AA; 1025 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1026 PetscInt *rp,k,low,high,t,row,nrow,i,col,l,N,n = *nn,m = *mm; 1027 PetscInt *ai=a->i,*ailen=a->ilen; 1028 PetscInt *aj=a->j,brow,bcol; 1029 PetscInt ridx,cidx,lastcol = -1; 1030 MatScalar *ap,value,*aa=a->a,*bap; 1031 PetscErrorCode ierr; 1032 1033 PetscFunctionBegin; 1034 for (k=0; k<m; k++) { /* loop over added rows */ 1035 row = im[k]; brow = row/4; 1036 rp = aj + ai[brow]; 1037 ap = aa + 16*ai[brow]; 1038 nrow = ailen[brow]; 1039 low = 0; 1040 high = nrow; 1041 for (l=0; l<n; l++) { /* loop over added columns */ 1042 col = in[l]; bcol = col/4; 1043 ridx = row % 4; cidx = col % 4; 1044 value = v[l + k*n]; 1045 if (col <= lastcol) low = 0; 1046 else high = nrow; 1047 lastcol = col; 1048 while (high-low > 7) { 1049 t = (low+high)/2; 1050 if (rp[t] > bcol) high = t; 1051 else low = t; 1052 } 1053 for (i=low; i<high; i++) { 1054 if (rp[i] > bcol) break; 1055 if (rp[i] == bcol) { 1056 bap = ap + 16*i + 4*cidx + ridx; 1057 *bap += value; 1058 goto noinsert1; 1059 } 1060 } 1061 N = nrow++ - 1; 1062 high++; /* added new column thus must search to one higher than before */ 1063 /* shift up all the later entries in this row */ 1064 ierr = PetscArraymove(rp+i+1,rp+i,N-i+1);CHKERRV(ierr); 1065 ierr = PetscArraymove(ap+16*i+16,ap+16*i,16*(N-i+1));CHKERRV(ierr); 1066 ierr = PetscArrayzero(ap+16*i,16);CHKERRV(ierr); 1067 rp[i] = bcol; 1068 ap[16*i + 4*cidx + ridx] = value; 1069 noinsert1:; 1070 low = i; 1071 } 1072 ailen[brow] = nrow; 1073 } 1074 PetscFunctionReturnVoid(); 1075 } 1076 1077 /* 1078 Checks for missing diagonals 1079 */ 1080 PetscErrorCode MatMissingDiagonal_SeqBAIJ(Mat A,PetscBool *missing,PetscInt *d) 1081 { 1082 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1083 PetscErrorCode ierr; 1084 PetscInt *diag,*ii = a->i,i; 1085 1086 PetscFunctionBegin; 1087 ierr = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr); 1088 *missing = PETSC_FALSE; 1089 if (A->rmap->n > 0 && !ii) { 1090 *missing = PETSC_TRUE; 1091 if (d) *d = 0; 1092 ierr = PetscInfo(A,"Matrix has no entries therefore is missing diagonal\n");CHKERRQ(ierr); 1093 } else { 1094 PetscInt n; 1095 n = PetscMin(a->mbs, a->nbs); 1096 diag = a->diag; 1097 for (i=0; i<n; i++) { 1098 if (diag[i] >= ii[i+1]) { 1099 *missing = PETSC_TRUE; 1100 if (d) *d = i; 1101 ierr = PetscInfo1(A,"Matrix is missing block diagonal number %D\n",i);CHKERRQ(ierr); 1102 break; 1103 } 1104 } 1105 } 1106 PetscFunctionReturn(0); 1107 } 1108 1109 PetscErrorCode MatMarkDiagonal_SeqBAIJ(Mat A) 1110 { 1111 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1112 PetscErrorCode ierr; 1113 PetscInt i,j,m = a->mbs; 1114 1115 PetscFunctionBegin; 1116 if (!a->diag) { 1117 ierr = PetscMalloc1(m,&a->diag);CHKERRQ(ierr); 1118 ierr = PetscLogObjectMemory((PetscObject)A,m*sizeof(PetscInt));CHKERRQ(ierr); 1119 a->free_diag = PETSC_TRUE; 1120 } 1121 for (i=0; i<m; i++) { 1122 a->diag[i] = a->i[i+1]; 1123 for (j=a->i[i]; j<a->i[i+1]; j++) { 1124 if (a->j[j] == i) { 1125 a->diag[i] = j; 1126 break; 1127 } 1128 } 1129 } 1130 PetscFunctionReturn(0); 1131 } 1132 1133 static PetscErrorCode MatGetRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool blockcompressed,PetscInt *nn,const PetscInt *inia[],const PetscInt *inja[],PetscBool *done) 1134 { 1135 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1136 PetscErrorCode ierr; 1137 PetscInt i,j,n = a->mbs,nz = a->i[n],*tia,*tja,bs = A->rmap->bs,k,l,cnt; 1138 PetscInt **ia = (PetscInt**)inia,**ja = (PetscInt**)inja; 1139 1140 PetscFunctionBegin; 1141 *nn = n; 1142 if (!ia) PetscFunctionReturn(0); 1143 if (symmetric) { 1144 ierr = MatToSymmetricIJ_SeqAIJ(n,a->i,a->j,PETSC_TRUE,0,0,&tia,&tja);CHKERRQ(ierr); 1145 nz = tia[n]; 1146 } else { 1147 tia = a->i; tja = a->j; 1148 } 1149 1150 if (!blockcompressed && bs > 1) { 1151 (*nn) *= bs; 1152 /* malloc & create the natural set of indices */ 1153 ierr = PetscMalloc1((n+1)*bs,ia);CHKERRQ(ierr); 1154 if (n) { 1155 (*ia)[0] = oshift; 1156 for (j=1; j<bs; j++) { 1157 (*ia)[j] = (tia[1]-tia[0])*bs+(*ia)[j-1]; 1158 } 1159 } 1160 1161 for (i=1; i<n; i++) { 1162 (*ia)[i*bs] = (tia[i]-tia[i-1])*bs + (*ia)[i*bs-1]; 1163 for (j=1; j<bs; j++) { 1164 (*ia)[i*bs+j] = (tia[i+1]-tia[i])*bs + (*ia)[i*bs+j-1]; 1165 } 1166 } 1167 if (n) { 1168 (*ia)[n*bs] = (tia[n]-tia[n-1])*bs + (*ia)[n*bs-1]; 1169 } 1170 1171 if (inja) { 1172 ierr = PetscMalloc1(nz*bs*bs,ja);CHKERRQ(ierr); 1173 cnt = 0; 1174 for (i=0; i<n; i++) { 1175 for (j=0; j<bs; j++) { 1176 for (k=tia[i]; k<tia[i+1]; k++) { 1177 for (l=0; l<bs; l++) { 1178 (*ja)[cnt++] = bs*tja[k] + l; 1179 } 1180 } 1181 } 1182 } 1183 } 1184 1185 if (symmetric) { /* deallocate memory allocated in MatToSymmetricIJ_SeqAIJ() */ 1186 ierr = PetscFree(tia);CHKERRQ(ierr); 1187 ierr = PetscFree(tja);CHKERRQ(ierr); 1188 } 1189 } else if (oshift == 1) { 1190 if (symmetric) { 1191 nz = tia[A->rmap->n/bs]; 1192 /* add 1 to i and j indices */ 1193 for (i=0; i<A->rmap->n/bs+1; i++) tia[i] = tia[i] + 1; 1194 *ia = tia; 1195 if (ja) { 1196 for (i=0; i<nz; i++) tja[i] = tja[i] + 1; 1197 *ja = tja; 1198 } 1199 } else { 1200 nz = a->i[A->rmap->n/bs]; 1201 /* malloc space and add 1 to i and j indices */ 1202 ierr = PetscMalloc1(A->rmap->n/bs+1,ia);CHKERRQ(ierr); 1203 for (i=0; i<A->rmap->n/bs+1; i++) (*ia)[i] = a->i[i] + 1; 1204 if (ja) { 1205 ierr = PetscMalloc1(nz,ja);CHKERRQ(ierr); 1206 for (i=0; i<nz; i++) (*ja)[i] = a->j[i] + 1; 1207 } 1208 } 1209 } else { 1210 *ia = tia; 1211 if (ja) *ja = tja; 1212 } 1213 PetscFunctionReturn(0); 1214 } 1215 1216 static PetscErrorCode MatRestoreRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool blockcompressed,PetscInt *nn,const PetscInt *ia[],const PetscInt *ja[],PetscBool *done) 1217 { 1218 PetscErrorCode ierr; 1219 1220 PetscFunctionBegin; 1221 if (!ia) PetscFunctionReturn(0); 1222 if ((!blockcompressed && A->rmap->bs > 1) || (symmetric || oshift == 1)) { 1223 ierr = PetscFree(*ia);CHKERRQ(ierr); 1224 if (ja) {ierr = PetscFree(*ja);CHKERRQ(ierr);} 1225 } 1226 PetscFunctionReturn(0); 1227 } 1228 1229 PetscErrorCode MatDestroy_SeqBAIJ(Mat A) 1230 { 1231 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1232 PetscErrorCode ierr; 1233 1234 PetscFunctionBegin; 1235 #if defined(PETSC_USE_LOG) 1236 PetscLogObjectState((PetscObject)A,"Rows=%D, Cols=%D, NZ=%D",A->rmap->N,A->cmap->n,a->nz); 1237 #endif 1238 ierr = MatSeqXAIJFreeAIJ(A,&a->a,&a->j,&a->i);CHKERRQ(ierr); 1239 ierr = ISDestroy(&a->row);CHKERRQ(ierr); 1240 ierr = ISDestroy(&a->col);CHKERRQ(ierr); 1241 if (a->free_diag) {ierr = PetscFree(a->diag);CHKERRQ(ierr);} 1242 ierr = PetscFree(a->idiag);CHKERRQ(ierr); 1243 if (a->free_imax_ilen) {ierr = PetscFree2(a->imax,a->ilen);CHKERRQ(ierr);} 1244 ierr = PetscFree(a->solve_work);CHKERRQ(ierr); 1245 ierr = PetscFree(a->mult_work);CHKERRQ(ierr); 1246 ierr = PetscFree(a->sor_workt);CHKERRQ(ierr); 1247 ierr = PetscFree(a->sor_work);CHKERRQ(ierr); 1248 ierr = ISDestroy(&a->icol);CHKERRQ(ierr); 1249 ierr = PetscFree(a->saved_values);CHKERRQ(ierr); 1250 ierr = PetscFree2(a->compressedrow.i,a->compressedrow.rindex);CHKERRQ(ierr); 1251 1252 ierr = MatDestroy(&a->sbaijMat);CHKERRQ(ierr); 1253 ierr = MatDestroy(&a->parent);CHKERRQ(ierr); 1254 ierr = PetscFree(A->data);CHKERRQ(ierr); 1255 1256 ierr = PetscObjectChangeTypeName((PetscObject)A,NULL);CHKERRQ(ierr); 1257 ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJGetArray_C",NULL);CHKERRQ(ierr); 1258 ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJRestoreArray_C",NULL);CHKERRQ(ierr); 1259 ierr = PetscObjectComposeFunction((PetscObject)A,"MatInvertBlockDiagonal_C",NULL);CHKERRQ(ierr); 1260 ierr = PetscObjectComposeFunction((PetscObject)A,"MatStoreValues_C",NULL);CHKERRQ(ierr); 1261 ierr = PetscObjectComposeFunction((PetscObject)A,"MatRetrieveValues_C",NULL);CHKERRQ(ierr); 1262 ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetColumnIndices_C",NULL);CHKERRQ(ierr); 1263 ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqaij_C",NULL);CHKERRQ(ierr); 1264 ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqsbaij_C",NULL);CHKERRQ(ierr); 1265 ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetPreallocation_C",NULL);CHKERRQ(ierr); 1266 ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetPreallocationCSR_C",NULL);CHKERRQ(ierr); 1267 ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqbstrm_C",NULL);CHKERRQ(ierr); 1268 ierr = PetscObjectComposeFunction((PetscObject)A,"MatIsTranspose_C",NULL);CHKERRQ(ierr); 1269 #if defined(PETSC_HAVE_HYPRE) 1270 ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_hypre_C",NULL);CHKERRQ(ierr); 1271 #endif 1272 ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_is_C",NULL);CHKERRQ(ierr); 1273 PetscFunctionReturn(0); 1274 } 1275 1276 PetscErrorCode MatSetOption_SeqBAIJ(Mat A,MatOption op,PetscBool flg) 1277 { 1278 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1279 PetscErrorCode ierr; 1280 1281 PetscFunctionBegin; 1282 switch (op) { 1283 case MAT_ROW_ORIENTED: 1284 a->roworiented = flg; 1285 break; 1286 case MAT_KEEP_NONZERO_PATTERN: 1287 a->keepnonzeropattern = flg; 1288 break; 1289 case MAT_NEW_NONZERO_LOCATIONS: 1290 a->nonew = (flg ? 0 : 1); 1291 break; 1292 case MAT_NEW_NONZERO_LOCATION_ERR: 1293 a->nonew = (flg ? -1 : 0); 1294 break; 1295 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1296 a->nonew = (flg ? -2 : 0); 1297 break; 1298 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1299 a->nounused = (flg ? -1 : 0); 1300 break; 1301 case MAT_FORCE_DIAGONAL_ENTRIES: 1302 case MAT_IGNORE_OFF_PROC_ENTRIES: 1303 case MAT_USE_HASH_TABLE: 1304 case MAT_SORTED_FULL: 1305 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1306 break; 1307 case MAT_SPD: 1308 case MAT_SYMMETRIC: 1309 case MAT_STRUCTURALLY_SYMMETRIC: 1310 case MAT_HERMITIAN: 1311 case MAT_SYMMETRY_ETERNAL: 1312 case MAT_SUBMAT_SINGLEIS: 1313 case MAT_STRUCTURE_ONLY: 1314 /* These options are handled directly by MatSetOption() */ 1315 break; 1316 default: 1317 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op); 1318 } 1319 PetscFunctionReturn(0); 1320 } 1321 1322 /* used for both SeqBAIJ and SeqSBAIJ matrices */ 1323 PetscErrorCode MatGetRow_SeqBAIJ_private(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v,PetscInt *ai,PetscInt *aj,PetscScalar *aa) 1324 { 1325 PetscErrorCode ierr; 1326 PetscInt itmp,i,j,k,M,bn,bp,*idx_i,bs,bs2; 1327 MatScalar *aa_i; 1328 PetscScalar *v_i; 1329 1330 PetscFunctionBegin; 1331 bs = A->rmap->bs; 1332 bs2 = bs*bs; 1333 if (row < 0 || row >= A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range", row); 1334 1335 bn = row/bs; /* Block number */ 1336 bp = row % bs; /* Block Position */ 1337 M = ai[bn+1] - ai[bn]; 1338 *nz = bs*M; 1339 1340 if (v) { 1341 *v = NULL; 1342 if (*nz) { 1343 ierr = PetscMalloc1(*nz,v);CHKERRQ(ierr); 1344 for (i=0; i<M; i++) { /* for each block in the block row */ 1345 v_i = *v + i*bs; 1346 aa_i = aa + bs2*(ai[bn] + i); 1347 for (j=bp,k=0; j<bs2; j+=bs,k++) v_i[k] = aa_i[j]; 1348 } 1349 } 1350 } 1351 1352 if (idx) { 1353 *idx = NULL; 1354 if (*nz) { 1355 ierr = PetscMalloc1(*nz,idx);CHKERRQ(ierr); 1356 for (i=0; i<M; i++) { /* for each block in the block row */ 1357 idx_i = *idx + i*bs; 1358 itmp = bs*aj[ai[bn] + i]; 1359 for (j=0; j<bs; j++) idx_i[j] = itmp++; 1360 } 1361 } 1362 } 1363 PetscFunctionReturn(0); 1364 } 1365 1366 PetscErrorCode MatGetRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1367 { 1368 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1369 PetscErrorCode ierr; 1370 1371 PetscFunctionBegin; 1372 ierr = MatGetRow_SeqBAIJ_private(A,row,nz,idx,v,a->i,a->j,a->a);CHKERRQ(ierr); 1373 PetscFunctionReturn(0); 1374 } 1375 1376 PetscErrorCode MatRestoreRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1377 { 1378 PetscErrorCode ierr; 1379 1380 PetscFunctionBegin; 1381 if (nz) *nz = 0; 1382 if (idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);} 1383 if (v) {ierr = PetscFree(*v);CHKERRQ(ierr);} 1384 PetscFunctionReturn(0); 1385 } 1386 1387 PetscErrorCode MatTranspose_SeqBAIJ(Mat A,MatReuse reuse,Mat *B) 1388 { 1389 Mat_SeqBAIJ *a=(Mat_SeqBAIJ*)A->data,*at; 1390 Mat C; 1391 PetscErrorCode ierr; 1392 PetscInt i,j,k,*aj=a->j,*ai=a->i,bs=A->rmap->bs,mbs=a->mbs,nbs=a->nbs,*atfill; 1393 PetscInt bs2=a->bs2,*ati,*atj,anzj,kr; 1394 MatScalar *ata,*aa=a->a; 1395 1396 PetscFunctionBegin; 1397 ierr = PetscCalloc1(1+nbs,&atfill);CHKERRQ(ierr); 1398 if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_INPLACE_MATRIX) { 1399 for (i=0; i<ai[mbs]; i++) atfill[aj[i]] += 1; /* count num of non-zeros in row aj[i] */ 1400 1401 ierr = MatCreate(PetscObjectComm((PetscObject)A),&C);CHKERRQ(ierr); 1402 ierr = MatSetSizes(C,A->cmap->n,A->rmap->N,A->cmap->n,A->rmap->N);CHKERRQ(ierr); 1403 ierr = MatSetType(C,((PetscObject)A)->type_name);CHKERRQ(ierr); 1404 ierr = MatSeqBAIJSetPreallocation(C,bs,0,atfill);CHKERRQ(ierr); 1405 1406 at = (Mat_SeqBAIJ*)C->data; 1407 ati = at->i; 1408 for (i=0; i<nbs; i++) at->ilen[i] = at->imax[i] = ati[i+1] - ati[i]; 1409 } else { 1410 C = *B; 1411 at = (Mat_SeqBAIJ*)C->data; 1412 ati = at->i; 1413 } 1414 1415 atj = at->j; 1416 ata = at->a; 1417 1418 /* Copy ati into atfill so we have locations of the next free space in atj */ 1419 ierr = PetscArraycpy(atfill,ati,nbs);CHKERRQ(ierr); 1420 1421 /* Walk through A row-wise and mark nonzero entries of A^T. */ 1422 for (i=0; i<mbs; i++) { 1423 anzj = ai[i+1] - ai[i]; 1424 for (j=0; j<anzj; j++) { 1425 atj[atfill[*aj]] = i; 1426 for (kr=0; kr<bs; kr++) { 1427 for (k=0; k<bs; k++) { 1428 ata[bs2*atfill[*aj]+k*bs+kr] = *aa++; 1429 } 1430 } 1431 atfill[*aj++] += 1; 1432 } 1433 } 1434 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1435 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1436 1437 /* Clean up temporary space and complete requests. */ 1438 ierr = PetscFree(atfill);CHKERRQ(ierr); 1439 1440 if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) { 1441 ierr = MatSetBlockSizes(C,PetscAbs(A->cmap->bs),PetscAbs(A->rmap->bs));CHKERRQ(ierr); 1442 *B = C; 1443 } else { 1444 ierr = MatHeaderMerge(A,&C);CHKERRQ(ierr); 1445 } 1446 PetscFunctionReturn(0); 1447 } 1448 1449 PetscErrorCode MatIsTranspose_SeqBAIJ(Mat A,Mat B,PetscReal tol,PetscBool *f) 1450 { 1451 PetscErrorCode ierr; 1452 Mat Btrans; 1453 1454 PetscFunctionBegin; 1455 *f = PETSC_FALSE; 1456 ierr = MatTranspose_SeqBAIJ(A,MAT_INITIAL_MATRIX,&Btrans);CHKERRQ(ierr); 1457 ierr = MatEqual_SeqBAIJ(B,Btrans,f);CHKERRQ(ierr); 1458 ierr = MatDestroy(&Btrans);CHKERRQ(ierr); 1459 PetscFunctionReturn(0); 1460 } 1461 1462 /* Used for both SeqBAIJ and SeqSBAIJ matrices */ 1463 PetscErrorCode MatView_SeqBAIJ_Binary(Mat mat,PetscViewer viewer) 1464 { 1465 Mat_SeqBAIJ *A = (Mat_SeqBAIJ*)mat->data; 1466 PetscInt header[4],M,N,m,bs,nz,cnt,i,j,k,l; 1467 PetscInt *rowlens,*colidxs; 1468 PetscScalar *matvals; 1469 PetscErrorCode ierr; 1470 1471 PetscFunctionBegin; 1472 ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr); 1473 1474 M = mat->rmap->N; 1475 N = mat->cmap->N; 1476 m = mat->rmap->n; 1477 bs = mat->rmap->bs; 1478 nz = bs*bs*A->nz; 1479 1480 /* write matrix header */ 1481 header[0] = MAT_FILE_CLASSID; 1482 header[1] = M; header[2] = N; header[3] = nz; 1483 ierr = PetscViewerBinaryWrite(viewer,header,4,PETSC_INT);CHKERRQ(ierr); 1484 1485 /* store row lengths */ 1486 ierr = PetscMalloc1(m,&rowlens);CHKERRQ(ierr); 1487 for (cnt=0, i=0; i<A->mbs; i++) 1488 for (j=0; j<bs; j++) 1489 rowlens[cnt++] = bs*(A->i[i+1] - A->i[i]); 1490 ierr = PetscViewerBinaryWrite(viewer,rowlens,m,PETSC_INT);CHKERRQ(ierr); 1491 ierr = PetscFree(rowlens);CHKERRQ(ierr); 1492 1493 /* store column indices */ 1494 ierr = PetscMalloc1(nz,&colidxs);CHKERRQ(ierr); 1495 for (cnt=0, i=0; i<A->mbs; i++) 1496 for (k=0; k<bs; k++) 1497 for (j=A->i[i]; j<A->i[i+1]; j++) 1498 for (l=0; l<bs; l++) 1499 colidxs[cnt++] = bs*A->j[j] + l; 1500 if (cnt != nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,nz); 1501 ierr = PetscViewerBinaryWrite(viewer,colidxs,nz,PETSC_INT);CHKERRQ(ierr); 1502 ierr = PetscFree(colidxs);CHKERRQ(ierr); 1503 1504 /* store nonzero values */ 1505 ierr = PetscMalloc1(nz,&matvals);CHKERRQ(ierr); 1506 for (cnt=0, i=0; i<A->mbs; i++) 1507 for (k=0; k<bs; k++) 1508 for (j=A->i[i]; j<A->i[i+1]; j++) 1509 for (l=0; l<bs; l++) 1510 matvals[cnt++] = A->a[bs*(bs*j + l) + k]; 1511 if (cnt != nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,nz); 1512 ierr = PetscViewerBinaryWrite(viewer,matvals,nz,PETSC_SCALAR);CHKERRQ(ierr); 1513 ierr = PetscFree(matvals);CHKERRQ(ierr); 1514 1515 /* write block size option to the viewer's .info file */ 1516 ierr = MatView_Binary_BlockSizes(mat,viewer);CHKERRQ(ierr); 1517 PetscFunctionReturn(0); 1518 } 1519 1520 static PetscErrorCode MatView_SeqBAIJ_ASCII_structonly(Mat A,PetscViewer viewer) 1521 { 1522 PetscErrorCode ierr; 1523 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1524 PetscInt i,bs = A->rmap->bs,k; 1525 1526 PetscFunctionBegin; 1527 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);CHKERRQ(ierr); 1528 for (i=0; i<a->mbs; i++) { 1529 ierr = PetscViewerASCIIPrintf(viewer,"row %D-%D:",i*bs,i*bs+bs-1);CHKERRQ(ierr); 1530 for (k=a->i[i]; k<a->i[i+1]; k++) { 1531 ierr = PetscViewerASCIIPrintf(viewer," (%D-%D) ",bs*a->j[k],bs*a->j[k]+bs-1);CHKERRQ(ierr); 1532 } 1533 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 1534 } 1535 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);CHKERRQ(ierr); 1536 PetscFunctionReturn(0); 1537 } 1538 1539 static PetscErrorCode MatView_SeqBAIJ_ASCII(Mat A,PetscViewer viewer) 1540 { 1541 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1542 PetscErrorCode ierr; 1543 PetscInt i,j,bs = A->rmap->bs,k,l,bs2=a->bs2; 1544 PetscViewerFormat format; 1545 1546 PetscFunctionBegin; 1547 if (A->structure_only) { 1548 ierr = MatView_SeqBAIJ_ASCII_structonly(A,viewer);CHKERRQ(ierr); 1549 PetscFunctionReturn(0); 1550 } 1551 1552 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1553 if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1554 ierr = PetscViewerASCIIPrintf(viewer," block size is %D\n",bs);CHKERRQ(ierr); 1555 } else if (format == PETSC_VIEWER_ASCII_MATLAB) { 1556 const char *matname; 1557 Mat aij; 1558 ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&aij);CHKERRQ(ierr); 1559 ierr = PetscObjectGetName((PetscObject)A,&matname);CHKERRQ(ierr); 1560 ierr = PetscObjectSetName((PetscObject)aij,matname);CHKERRQ(ierr); 1561 ierr = MatView(aij,viewer);CHKERRQ(ierr); 1562 ierr = MatDestroy(&aij);CHKERRQ(ierr); 1563 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1564 PetscFunctionReturn(0); 1565 } else if (format == PETSC_VIEWER_ASCII_COMMON) { 1566 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);CHKERRQ(ierr); 1567 for (i=0; i<a->mbs; i++) { 1568 for (j=0; j<bs; j++) { 1569 ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr); 1570 for (k=a->i[i]; k<a->i[i+1]; k++) { 1571 for (l=0; l<bs; l++) { 1572 #if defined(PETSC_USE_COMPLEX) 1573 if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 1574 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g + %gi) ",bs*a->j[k]+l, 1575 (double)PetscRealPart(a->a[bs2*k + l*bs + j]),(double)PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1576 } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 1577 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g - %gi) ",bs*a->j[k]+l, 1578 (double)PetscRealPart(a->a[bs2*k + l*bs + j]),-(double)PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1579 } else if (PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 1580 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,(double)PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1581 } 1582 #else 1583 if (a->a[bs2*k + l*bs + j] != 0.0) { 1584 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,(double)a->a[bs2*k + l*bs + j]);CHKERRQ(ierr); 1585 } 1586 #endif 1587 } 1588 } 1589 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 1590 } 1591 } 1592 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);CHKERRQ(ierr); 1593 } else { 1594 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);CHKERRQ(ierr); 1595 for (i=0; i<a->mbs; i++) { 1596 for (j=0; j<bs; j++) { 1597 ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr); 1598 for (k=a->i[i]; k<a->i[i+1]; k++) { 1599 for (l=0; l<bs; l++) { 1600 #if defined(PETSC_USE_COMPLEX) 1601 if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0) { 1602 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g + %g i) ",bs*a->j[k]+l, 1603 (double)PetscRealPart(a->a[bs2*k + l*bs + j]),(double)PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1604 } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0) { 1605 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g - %g i) ",bs*a->j[k]+l, 1606 (double)PetscRealPart(a->a[bs2*k + l*bs + j]),-(double)PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1607 } else { 1608 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,(double)PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1609 } 1610 #else 1611 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,(double)a->a[bs2*k + l*bs + j]);CHKERRQ(ierr); 1612 #endif 1613 } 1614 } 1615 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 1616 } 1617 } 1618 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);CHKERRQ(ierr); 1619 } 1620 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1621 PetscFunctionReturn(0); 1622 } 1623 1624 #include <petscdraw.h> 1625 static PetscErrorCode MatView_SeqBAIJ_Draw_Zoom(PetscDraw draw,void *Aa) 1626 { 1627 Mat A = (Mat) Aa; 1628 Mat_SeqBAIJ *a=(Mat_SeqBAIJ*)A->data; 1629 PetscErrorCode ierr; 1630 PetscInt row,i,j,k,l,mbs=a->mbs,color,bs=A->rmap->bs,bs2=a->bs2; 1631 PetscReal xl,yl,xr,yr,x_l,x_r,y_l,y_r; 1632 MatScalar *aa; 1633 PetscViewer viewer; 1634 PetscViewerFormat format; 1635 1636 PetscFunctionBegin; 1637 ierr = PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer);CHKERRQ(ierr); 1638 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1639 ierr = PetscDrawGetCoordinates(draw,&xl,&yl,&xr,&yr);CHKERRQ(ierr); 1640 1641 /* loop over matrix elements drawing boxes */ 1642 1643 if (format != PETSC_VIEWER_DRAW_CONTOUR) { 1644 ierr = PetscDrawCollectiveBegin(draw);CHKERRQ(ierr); 1645 /* Blue for negative, Cyan for zero and Red for positive */ 1646 color = PETSC_DRAW_BLUE; 1647 for (i=0,row=0; i<mbs; i++,row+=bs) { 1648 for (j=a->i[i]; j<a->i[i+1]; j++) { 1649 y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0; 1650 x_l = a->j[j]*bs; x_r = x_l + 1.0; 1651 aa = a->a + j*bs2; 1652 for (k=0; k<bs; k++) { 1653 for (l=0; l<bs; l++) { 1654 if (PetscRealPart(*aa++) >= 0.) continue; 1655 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 1656 } 1657 } 1658 } 1659 } 1660 color = PETSC_DRAW_CYAN; 1661 for (i=0,row=0; i<mbs; i++,row+=bs) { 1662 for (j=a->i[i]; j<a->i[i+1]; j++) { 1663 y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0; 1664 x_l = a->j[j]*bs; x_r = x_l + 1.0; 1665 aa = a->a + j*bs2; 1666 for (k=0; k<bs; k++) { 1667 for (l=0; l<bs; l++) { 1668 if (PetscRealPart(*aa++) != 0.) continue; 1669 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 1670 } 1671 } 1672 } 1673 } 1674 color = PETSC_DRAW_RED; 1675 for (i=0,row=0; i<mbs; i++,row+=bs) { 1676 for (j=a->i[i]; j<a->i[i+1]; j++) { 1677 y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0; 1678 x_l = a->j[j]*bs; x_r = x_l + 1.0; 1679 aa = a->a + j*bs2; 1680 for (k=0; k<bs; k++) { 1681 for (l=0; l<bs; l++) { 1682 if (PetscRealPart(*aa++) <= 0.) continue; 1683 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 1684 } 1685 } 1686 } 1687 } 1688 ierr = PetscDrawCollectiveEnd(draw);CHKERRQ(ierr); 1689 } else { 1690 /* use contour shading to indicate magnitude of values */ 1691 /* first determine max of all nonzero values */ 1692 PetscReal minv = 0.0, maxv = 0.0; 1693 PetscDraw popup; 1694 1695 for (i=0; i<a->nz*a->bs2; i++) { 1696 if (PetscAbsScalar(a->a[i]) > maxv) maxv = PetscAbsScalar(a->a[i]); 1697 } 1698 if (minv >= maxv) maxv = minv + PETSC_SMALL; 1699 ierr = PetscDrawGetPopup(draw,&popup);CHKERRQ(ierr); 1700 ierr = PetscDrawScalePopup(popup,0.0,maxv);CHKERRQ(ierr); 1701 1702 ierr = PetscDrawCollectiveBegin(draw);CHKERRQ(ierr); 1703 for (i=0,row=0; i<mbs; i++,row+=bs) { 1704 for (j=a->i[i]; j<a->i[i+1]; j++) { 1705 y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0; 1706 x_l = a->j[j]*bs; x_r = x_l + 1.0; 1707 aa = a->a + j*bs2; 1708 for (k=0; k<bs; k++) { 1709 for (l=0; l<bs; l++) { 1710 MatScalar v = *aa++; 1711 color = PetscDrawRealToColor(PetscAbsScalar(v),minv,maxv); 1712 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 1713 } 1714 } 1715 } 1716 } 1717 ierr = PetscDrawCollectiveEnd(draw);CHKERRQ(ierr); 1718 } 1719 PetscFunctionReturn(0); 1720 } 1721 1722 static PetscErrorCode MatView_SeqBAIJ_Draw(Mat A,PetscViewer viewer) 1723 { 1724 PetscErrorCode ierr; 1725 PetscReal xl,yl,xr,yr,w,h; 1726 PetscDraw draw; 1727 PetscBool isnull; 1728 1729 PetscFunctionBegin; 1730 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 1731 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); 1732 if (isnull) PetscFunctionReturn(0); 1733 1734 xr = A->cmap->n; yr = A->rmap->N; h = yr/10.0; w = xr/10.0; 1735 xr += w; yr += h; xl = -w; yl = -h; 1736 ierr = PetscDrawSetCoordinates(draw,xl,yl,xr,yr);CHKERRQ(ierr); 1737 ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);CHKERRQ(ierr); 1738 ierr = PetscDrawZoom(draw,MatView_SeqBAIJ_Draw_Zoom,A);CHKERRQ(ierr); 1739 ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",NULL);CHKERRQ(ierr); 1740 ierr = PetscDrawSave(draw);CHKERRQ(ierr); 1741 PetscFunctionReturn(0); 1742 } 1743 1744 PetscErrorCode MatView_SeqBAIJ(Mat A,PetscViewer viewer) 1745 { 1746 PetscErrorCode ierr; 1747 PetscBool iascii,isbinary,isdraw; 1748 1749 PetscFunctionBegin; 1750 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1751 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1752 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1753 if (iascii) { 1754 ierr = MatView_SeqBAIJ_ASCII(A,viewer);CHKERRQ(ierr); 1755 } else if (isbinary) { 1756 ierr = MatView_SeqBAIJ_Binary(A,viewer);CHKERRQ(ierr); 1757 } else if (isdraw) { 1758 ierr = MatView_SeqBAIJ_Draw(A,viewer);CHKERRQ(ierr); 1759 } else { 1760 Mat B; 1761 ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr); 1762 ierr = MatView(B,viewer);CHKERRQ(ierr); 1763 ierr = MatDestroy(&B);CHKERRQ(ierr); 1764 } 1765 PetscFunctionReturn(0); 1766 } 1767 1768 PetscErrorCode MatGetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],PetscScalar v[]) 1769 { 1770 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1771 PetscInt *rp,k,low,high,t,row,nrow,i,col,l,*aj = a->j; 1772 PetscInt *ai = a->i,*ailen = a->ilen; 1773 PetscInt brow,bcol,ridx,cidx,bs=A->rmap->bs,bs2=a->bs2; 1774 MatScalar *ap,*aa = a->a; 1775 1776 PetscFunctionBegin; 1777 for (k=0; k<m; k++) { /* loop over rows */ 1778 row = im[k]; brow = row/bs; 1779 if (row < 0) {v += n; continue;} /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); */ 1780 if (row >= A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D too large", row); 1781 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; 1782 nrow = ailen[brow]; 1783 for (l=0; l<n; l++) { /* loop over columns */ 1784 if (in[l] < 0) {v++; continue;} /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); */ 1785 if (in[l] >= A->cmap->n) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column %D too large", in[l]); 1786 col = in[l]; 1787 bcol = col/bs; 1788 cidx = col%bs; 1789 ridx = row%bs; 1790 high = nrow; 1791 low = 0; /* assume unsorted */ 1792 while (high-low > 5) { 1793 t = (low+high)/2; 1794 if (rp[t] > bcol) high = t; 1795 else low = t; 1796 } 1797 for (i=low; i<high; i++) { 1798 if (rp[i] > bcol) break; 1799 if (rp[i] == bcol) { 1800 *v++ = ap[bs2*i+bs*cidx+ridx]; 1801 goto finished; 1802 } 1803 } 1804 *v++ = 0.0; 1805 finished:; 1806 } 1807 } 1808 PetscFunctionReturn(0); 1809 } 1810 1811 PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is) 1812 { 1813 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1814 PetscInt *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,lastcol = -1; 1815 PetscInt *imax=a->imax,*ai=a->i,*ailen=a->ilen; 1816 PetscErrorCode ierr; 1817 PetscInt *aj =a->j,nonew=a->nonew,bs2=a->bs2,bs=A->rmap->bs,stepval; 1818 PetscBool roworiented=a->roworiented; 1819 const PetscScalar *value = v; 1820 MatScalar *ap=NULL,*aa = a->a,*bap; 1821 1822 PetscFunctionBegin; 1823 if (roworiented) { 1824 stepval = (n-1)*bs; 1825 } else { 1826 stepval = (m-1)*bs; 1827 } 1828 for (k=0; k<m; k++) { /* loop over added rows */ 1829 row = im[k]; 1830 if (row < 0) continue; 1831 if (PetscUnlikelyDebug(row >= a->mbs)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Block row index too large %D max %D",row,a->mbs-1); 1832 rp = aj + ai[row]; 1833 if (!A->structure_only) ap = aa + bs2*ai[row]; 1834 rmax = imax[row]; 1835 nrow = ailen[row]; 1836 low = 0; 1837 high = nrow; 1838 for (l=0; l<n; l++) { /* loop over added columns */ 1839 if (in[l] < 0) continue; 1840 if (PetscUnlikelyDebug(in[l] >= a->nbs)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Block column index too large %D max %D",in[l],a->nbs-1); 1841 col = in[l]; 1842 if (!A->structure_only) { 1843 if (roworiented) { 1844 value = v + (k*(stepval+bs) + l)*bs; 1845 } else { 1846 value = v + (l*(stepval+bs) + k)*bs; 1847 } 1848 } 1849 if (col <= lastcol) low = 0; 1850 else high = nrow; 1851 lastcol = col; 1852 while (high-low > 7) { 1853 t = (low+high)/2; 1854 if (rp[t] > col) high = t; 1855 else low = t; 1856 } 1857 for (i=low; i<high; i++) { 1858 if (rp[i] > col) break; 1859 if (rp[i] == col) { 1860 if (A->structure_only) goto noinsert2; 1861 bap = ap + bs2*i; 1862 if (roworiented) { 1863 if (is == ADD_VALUES) { 1864 for (ii=0; ii<bs; ii++,value+=stepval) { 1865 for (jj=ii; jj<bs2; jj+=bs) { 1866 bap[jj] += *value++; 1867 } 1868 } 1869 } else { 1870 for (ii=0; ii<bs; ii++,value+=stepval) { 1871 for (jj=ii; jj<bs2; jj+=bs) { 1872 bap[jj] = *value++; 1873 } 1874 } 1875 } 1876 } else { 1877 if (is == ADD_VALUES) { 1878 for (ii=0; ii<bs; ii++,value+=bs+stepval) { 1879 for (jj=0; jj<bs; jj++) { 1880 bap[jj] += value[jj]; 1881 } 1882 bap += bs; 1883 } 1884 } else { 1885 for (ii=0; ii<bs; ii++,value+=bs+stepval) { 1886 for (jj=0; jj<bs; jj++) { 1887 bap[jj] = value[jj]; 1888 } 1889 bap += bs; 1890 } 1891 } 1892 } 1893 goto noinsert2; 1894 } 1895 } 1896 if (nonew == 1) goto noinsert2; 1897 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new blocked index new nonzero block (%D, %D) in the matrix", row, col); 1898 if (A->structure_only) { 1899 MatSeqXAIJReallocateAIJ_structure_only(A,a->mbs,bs2,nrow,row,col,rmax,ai,aj,rp,imax,nonew,MatScalar); 1900 } else { 1901 MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,row,col,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar); 1902 } 1903 N = nrow++ - 1; high++; 1904 /* shift up all the later entries in this row */ 1905 ierr = PetscArraymove(rp+i+1,rp+i,N-i+1);CHKERRQ(ierr); 1906 rp[i] = col; 1907 if (!A->structure_only) { 1908 ierr = PetscArraymove(ap+bs2*(i+1),ap+bs2*i,bs2*(N-i+1));CHKERRQ(ierr); 1909 bap = ap + bs2*i; 1910 if (roworiented) { 1911 for (ii=0; ii<bs; ii++,value+=stepval) { 1912 for (jj=ii; jj<bs2; jj+=bs) { 1913 bap[jj] = *value++; 1914 } 1915 } 1916 } else { 1917 for (ii=0; ii<bs; ii++,value+=stepval) { 1918 for (jj=0; jj<bs; jj++) { 1919 *bap++ = *value++; 1920 } 1921 } 1922 } 1923 } 1924 noinsert2:; 1925 low = i; 1926 } 1927 ailen[row] = nrow; 1928 } 1929 PetscFunctionReturn(0); 1930 } 1931 1932 PetscErrorCode MatAssemblyEnd_SeqBAIJ(Mat A,MatAssemblyType mode) 1933 { 1934 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1935 PetscInt fshift = 0,i,*ai = a->i,*aj = a->j,*imax = a->imax; 1936 PetscInt m = A->rmap->N,*ip,N,*ailen = a->ilen; 1937 PetscErrorCode ierr; 1938 PetscInt mbs = a->mbs,bs2 = a->bs2,rmax = 0; 1939 MatScalar *aa = a->a,*ap; 1940 PetscReal ratio=0.6; 1941 1942 PetscFunctionBegin; 1943 if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0); 1944 1945 if (m) rmax = ailen[0]; 1946 for (i=1; i<mbs; i++) { 1947 /* move each row back by the amount of empty slots (fshift) before it*/ 1948 fshift += imax[i-1] - ailen[i-1]; 1949 rmax = PetscMax(rmax,ailen[i]); 1950 if (fshift) { 1951 ip = aj + ai[i]; 1952 ap = aa + bs2*ai[i]; 1953 N = ailen[i]; 1954 ierr = PetscArraymove(ip-fshift,ip,N);CHKERRQ(ierr); 1955 if (!A->structure_only) { 1956 ierr = PetscArraymove(ap-bs2*fshift,ap,bs2*N);CHKERRQ(ierr); 1957 } 1958 } 1959 ai[i] = ai[i-1] + ailen[i-1]; 1960 } 1961 if (mbs) { 1962 fshift += imax[mbs-1] - ailen[mbs-1]; 1963 ai[mbs] = ai[mbs-1] + ailen[mbs-1]; 1964 } 1965 1966 /* reset ilen and imax for each row */ 1967 a->nonzerorowcnt = 0; 1968 if (A->structure_only) { 1969 ierr = PetscFree2(a->imax,a->ilen);CHKERRQ(ierr); 1970 } else { /* !A->structure_only */ 1971 for (i=0; i<mbs; i++) { 1972 ailen[i] = imax[i] = ai[i+1] - ai[i]; 1973 a->nonzerorowcnt += ((ai[i+1] - ai[i]) > 0); 1974 } 1975 } 1976 a->nz = ai[mbs]; 1977 1978 /* diagonals may have moved, so kill the diagonal pointers */ 1979 a->idiagvalid = PETSC_FALSE; 1980 if (fshift && a->diag) { 1981 ierr = PetscFree(a->diag);CHKERRQ(ierr); 1982 ierr = PetscLogObjectMemory((PetscObject)A,-(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 1983 a->diag = NULL; 1984 } 1985 if (fshift && a->nounused == -1) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB, "Unused space detected in matrix: %D X %D block size %D, %D unneeded", m, A->cmap->n, A->rmap->bs, fshift*bs2); 1986 ierr = PetscInfo5(A,"Matrix size: %D X %D, block size %D; storage space: %D unneeded, %D used\n",m,A->cmap->n,A->rmap->bs,fshift*bs2,a->nz*bs2);CHKERRQ(ierr); 1987 ierr = PetscInfo1(A,"Number of mallocs during MatSetValues is %D\n",a->reallocs);CHKERRQ(ierr); 1988 ierr = PetscInfo1(A,"Most nonzeros blocks in any row is %D\n",rmax);CHKERRQ(ierr); 1989 1990 A->info.mallocs += a->reallocs; 1991 a->reallocs = 0; 1992 A->info.nz_unneeded = (PetscReal)fshift*bs2; 1993 a->rmax = rmax; 1994 1995 if (!A->structure_only) { 1996 ierr = MatCheckCompressedRow(A,a->nonzerorowcnt,&a->compressedrow,a->i,mbs,ratio);CHKERRQ(ierr); 1997 } 1998 PetscFunctionReturn(0); 1999 } 2000 2001 /* 2002 This function returns an array of flags which indicate the locations of contiguous 2003 blocks that should be zeroed. for eg: if bs = 3 and is = [0,1,2,3,5,6,7,8,9] 2004 then the resulting sizes = [3,1,1,3,1] correspondig to sets [(0,1,2),(3),(5),(6,7,8),(9)] 2005 Assume: sizes should be long enough to hold all the values. 2006 */ 2007 static PetscErrorCode MatZeroRows_SeqBAIJ_Check_Blocks(PetscInt idx[],PetscInt n,PetscInt bs,PetscInt sizes[], PetscInt *bs_max) 2008 { 2009 PetscInt i,j,k,row; 2010 PetscBool flg; 2011 2012 PetscFunctionBegin; 2013 for (i=0,j=0; i<n; j++) { 2014 row = idx[i]; 2015 if (row%bs!=0) { /* Not the begining of a block */ 2016 sizes[j] = 1; 2017 i++; 2018 } else if (i+bs > n) { /* complete block doesn't exist (at idx end) */ 2019 sizes[j] = 1; /* Also makes sure atleast 'bs' values exist for next else */ 2020 i++; 2021 } else { /* Begining of the block, so check if the complete block exists */ 2022 flg = PETSC_TRUE; 2023 for (k=1; k<bs; k++) { 2024 if (row+k != idx[i+k]) { /* break in the block */ 2025 flg = PETSC_FALSE; 2026 break; 2027 } 2028 } 2029 if (flg) { /* No break in the bs */ 2030 sizes[j] = bs; 2031 i += bs; 2032 } else { 2033 sizes[j] = 1; 2034 i++; 2035 } 2036 } 2037 } 2038 *bs_max = j; 2039 PetscFunctionReturn(0); 2040 } 2041 2042 PetscErrorCode MatZeroRows_SeqBAIJ(Mat A,PetscInt is_n,const PetscInt is_idx[],PetscScalar diag,Vec x, Vec b) 2043 { 2044 Mat_SeqBAIJ *baij=(Mat_SeqBAIJ*)A->data; 2045 PetscErrorCode ierr; 2046 PetscInt i,j,k,count,*rows; 2047 PetscInt bs=A->rmap->bs,bs2=baij->bs2,*sizes,row,bs_max; 2048 PetscScalar zero = 0.0; 2049 MatScalar *aa; 2050 const PetscScalar *xx; 2051 PetscScalar *bb; 2052 2053 PetscFunctionBegin; 2054 /* fix right hand side if needed */ 2055 if (x && b) { 2056 ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr); 2057 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 2058 for (i=0; i<is_n; i++) { 2059 bb[is_idx[i]] = diag*xx[is_idx[i]]; 2060 } 2061 ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr); 2062 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 2063 } 2064 2065 /* Make a copy of the IS and sort it */ 2066 /* allocate memory for rows,sizes */ 2067 ierr = PetscMalloc2(is_n,&rows,2*is_n,&sizes);CHKERRQ(ierr); 2068 2069 /* copy IS values to rows, and sort them */ 2070 for (i=0; i<is_n; i++) rows[i] = is_idx[i]; 2071 ierr = PetscSortInt(is_n,rows);CHKERRQ(ierr); 2072 2073 if (baij->keepnonzeropattern) { 2074 for (i=0; i<is_n; i++) sizes[i] = 1; 2075 bs_max = is_n; 2076 } else { 2077 ierr = MatZeroRows_SeqBAIJ_Check_Blocks(rows,is_n,bs,sizes,&bs_max);CHKERRQ(ierr); 2078 A->nonzerostate++; 2079 } 2080 2081 for (i=0,j=0; i<bs_max; j+=sizes[i],i++) { 2082 row = rows[j]; 2083 if (row < 0 || row > A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"row %D out of range",row); 2084 count = (baij->i[row/bs +1] - baij->i[row/bs])*bs; 2085 aa = ((MatScalar*)(baij->a)) + baij->i[row/bs]*bs2 + (row%bs); 2086 if (sizes[i] == bs && !baij->keepnonzeropattern) { 2087 if (diag != (PetscScalar)0.0) { 2088 if (baij->ilen[row/bs] > 0) { 2089 baij->ilen[row/bs] = 1; 2090 baij->j[baij->i[row/bs]] = row/bs; 2091 2092 ierr = PetscArrayzero(aa,count*bs);CHKERRQ(ierr); 2093 } 2094 /* Now insert all the diagonal values for this bs */ 2095 for (k=0; k<bs; k++) { 2096 ierr = (*A->ops->setvalues)(A,1,rows+j+k,1,rows+j+k,&diag,INSERT_VALUES);CHKERRQ(ierr); 2097 } 2098 } else { /* (diag == 0.0) */ 2099 baij->ilen[row/bs] = 0; 2100 } /* end (diag == 0.0) */ 2101 } else { /* (sizes[i] != bs) */ 2102 if (PetscUnlikelyDebug(sizes[i] != 1)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal Error. Value should be 1"); 2103 for (k=0; k<count; k++) { 2104 aa[0] = zero; 2105 aa += bs; 2106 } 2107 if (diag != (PetscScalar)0.0) { 2108 ierr = (*A->ops->setvalues)(A,1,rows+j,1,rows+j,&diag,INSERT_VALUES);CHKERRQ(ierr); 2109 } 2110 } 2111 } 2112 2113 ierr = PetscFree2(rows,sizes);CHKERRQ(ierr); 2114 ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2115 PetscFunctionReturn(0); 2116 } 2117 2118 PetscErrorCode MatZeroRowsColumns_SeqBAIJ(Mat A,PetscInt is_n,const PetscInt is_idx[],PetscScalar diag,Vec x, Vec b) 2119 { 2120 Mat_SeqBAIJ *baij=(Mat_SeqBAIJ*)A->data; 2121 PetscErrorCode ierr; 2122 PetscInt i,j,k,count; 2123 PetscInt bs =A->rmap->bs,bs2=baij->bs2,row,col; 2124 PetscScalar zero = 0.0; 2125 MatScalar *aa; 2126 const PetscScalar *xx; 2127 PetscScalar *bb; 2128 PetscBool *zeroed,vecs = PETSC_FALSE; 2129 2130 PetscFunctionBegin; 2131 /* fix right hand side if needed */ 2132 if (x && b) { 2133 ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr); 2134 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 2135 vecs = PETSC_TRUE; 2136 } 2137 2138 /* zero the columns */ 2139 ierr = PetscCalloc1(A->rmap->n,&zeroed);CHKERRQ(ierr); 2140 for (i=0; i<is_n; i++) { 2141 if (is_idx[i] < 0 || is_idx[i] >= A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"row %D out of range",is_idx[i]); 2142 zeroed[is_idx[i]] = PETSC_TRUE; 2143 } 2144 for (i=0; i<A->rmap->N; i++) { 2145 if (!zeroed[i]) { 2146 row = i/bs; 2147 for (j=baij->i[row]; j<baij->i[row+1]; j++) { 2148 for (k=0; k<bs; k++) { 2149 col = bs*baij->j[j] + k; 2150 if (zeroed[col]) { 2151 aa = ((MatScalar*)(baij->a)) + j*bs2 + (i%bs) + bs*k; 2152 if (vecs) bb[i] -= aa[0]*xx[col]; 2153 aa[0] = 0.0; 2154 } 2155 } 2156 } 2157 } else if (vecs) bb[i] = diag*xx[i]; 2158 } 2159 ierr = PetscFree(zeroed);CHKERRQ(ierr); 2160 if (vecs) { 2161 ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr); 2162 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 2163 } 2164 2165 /* zero the rows */ 2166 for (i=0; i<is_n; i++) { 2167 row = is_idx[i]; 2168 count = (baij->i[row/bs +1] - baij->i[row/bs])*bs; 2169 aa = ((MatScalar*)(baij->a)) + baij->i[row/bs]*bs2 + (row%bs); 2170 for (k=0; k<count; k++) { 2171 aa[0] = zero; 2172 aa += bs; 2173 } 2174 if (diag != (PetscScalar)0.0) { 2175 ierr = (*A->ops->setvalues)(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr); 2176 } 2177 } 2178 ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2179 PetscFunctionReturn(0); 2180 } 2181 2182 PetscErrorCode MatSetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is) 2183 { 2184 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2185 PetscInt *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,lastcol = -1; 2186 PetscInt *imax=a->imax,*ai=a->i,*ailen=a->ilen; 2187 PetscInt *aj =a->j,nonew=a->nonew,bs=A->rmap->bs,brow,bcol; 2188 PetscErrorCode ierr; 2189 PetscInt ridx,cidx,bs2=a->bs2; 2190 PetscBool roworiented=a->roworiented; 2191 MatScalar *ap=NULL,value=0.0,*aa=a->a,*bap; 2192 2193 PetscFunctionBegin; 2194 for (k=0; k<m; k++) { /* loop over added rows */ 2195 row = im[k]; 2196 brow = row/bs; 2197 if (row < 0) continue; 2198 if (PetscUnlikelyDebug(row >= A->rmap->N)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,A->rmap->N-1); 2199 rp = aj + ai[brow]; 2200 if (!A->structure_only) ap = aa + bs2*ai[brow]; 2201 rmax = imax[brow]; 2202 nrow = ailen[brow]; 2203 low = 0; 2204 high = nrow; 2205 for (l=0; l<n; l++) { /* loop over added columns */ 2206 if (in[l] < 0) continue; 2207 if (PetscUnlikelyDebug(in[l] >= A->cmap->n)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],A->cmap->n-1); 2208 col = in[l]; bcol = col/bs; 2209 ridx = row % bs; cidx = col % bs; 2210 if (!A->structure_only) { 2211 if (roworiented) { 2212 value = v[l + k*n]; 2213 } else { 2214 value = v[k + l*m]; 2215 } 2216 } 2217 if (col <= lastcol) low = 0; else high = nrow; 2218 lastcol = col; 2219 while (high-low > 7) { 2220 t = (low+high)/2; 2221 if (rp[t] > bcol) high = t; 2222 else low = t; 2223 } 2224 for (i=low; i<high; i++) { 2225 if (rp[i] > bcol) break; 2226 if (rp[i] == bcol) { 2227 bap = ap + bs2*i + bs*cidx + ridx; 2228 if (!A->structure_only) { 2229 if (is == ADD_VALUES) *bap += value; 2230 else *bap = value; 2231 } 2232 goto noinsert1; 2233 } 2234 } 2235 if (nonew == 1) goto noinsert1; 2236 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col); 2237 if (A->structure_only) { 2238 MatSeqXAIJReallocateAIJ_structure_only(A,a->mbs,bs2,nrow,brow,bcol,rmax,ai,aj,rp,imax,nonew,MatScalar); 2239 } else { 2240 MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar); 2241 } 2242 N = nrow++ - 1; high++; 2243 /* shift up all the later entries in this row */ 2244 ierr = PetscArraymove(rp+i+1,rp+i,N-i+1);CHKERRQ(ierr); 2245 rp[i] = bcol; 2246 if (!A->structure_only) { 2247 ierr = PetscArraymove(ap+bs2*(i+1),ap+bs2*i,bs2*(N-i+1));CHKERRQ(ierr); 2248 ierr = PetscArrayzero(ap+bs2*i,bs2);CHKERRQ(ierr); 2249 ap[bs2*i + bs*cidx + ridx] = value; 2250 } 2251 a->nz++; 2252 A->nonzerostate++; 2253 noinsert1:; 2254 low = i; 2255 } 2256 ailen[brow] = nrow; 2257 } 2258 PetscFunctionReturn(0); 2259 } 2260 2261 PetscErrorCode MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,const MatFactorInfo *info) 2262 { 2263 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)inA->data; 2264 Mat outA; 2265 PetscErrorCode ierr; 2266 PetscBool row_identity,col_identity; 2267 2268 PetscFunctionBegin; 2269 if (info->levels != 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only levels = 0 supported for in-place ILU"); 2270 ierr = ISIdentity(row,&row_identity);CHKERRQ(ierr); 2271 ierr = ISIdentity(col,&col_identity);CHKERRQ(ierr); 2272 if (!row_identity || !col_identity) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for in-place ILU"); 2273 2274 outA = inA; 2275 inA->factortype = MAT_FACTOR_LU; 2276 ierr = PetscFree(inA->solvertype);CHKERRQ(ierr); 2277 ierr = PetscStrallocpy(MATSOLVERPETSC,&inA->solvertype);CHKERRQ(ierr); 2278 2279 ierr = MatMarkDiagonal_SeqBAIJ(inA);CHKERRQ(ierr); 2280 2281 ierr = PetscObjectReference((PetscObject)row);CHKERRQ(ierr); 2282 ierr = ISDestroy(&a->row);CHKERRQ(ierr); 2283 a->row = row; 2284 ierr = PetscObjectReference((PetscObject)col);CHKERRQ(ierr); 2285 ierr = ISDestroy(&a->col);CHKERRQ(ierr); 2286 a->col = col; 2287 2288 /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */ 2289 ierr = ISDestroy(&a->icol);CHKERRQ(ierr); 2290 ierr = ISInvertPermutation(col,PETSC_DECIDE,&a->icol);CHKERRQ(ierr); 2291 ierr = PetscLogObjectParent((PetscObject)inA,(PetscObject)a->icol);CHKERRQ(ierr); 2292 2293 ierr = MatSeqBAIJSetNumericFactorization_inplace(inA,(PetscBool)(row_identity && col_identity));CHKERRQ(ierr); 2294 if (!a->solve_work) { 2295 ierr = PetscMalloc1(inA->rmap->N+inA->rmap->bs,&a->solve_work);CHKERRQ(ierr); 2296 ierr = PetscLogObjectMemory((PetscObject)inA,(inA->rmap->N+inA->rmap->bs)*sizeof(PetscScalar));CHKERRQ(ierr); 2297 } 2298 ierr = MatLUFactorNumeric(outA,inA,info);CHKERRQ(ierr); 2299 PetscFunctionReturn(0); 2300 } 2301 2302 PetscErrorCode MatSeqBAIJSetColumnIndices_SeqBAIJ(Mat mat,PetscInt *indices) 2303 { 2304 Mat_SeqBAIJ *baij = (Mat_SeqBAIJ*)mat->data; 2305 PetscInt i,nz,mbs; 2306 2307 PetscFunctionBegin; 2308 nz = baij->maxnz; 2309 mbs = baij->mbs; 2310 for (i=0; i<nz; i++) { 2311 baij->j[i] = indices[i]; 2312 } 2313 baij->nz = nz; 2314 for (i=0; i<mbs; i++) { 2315 baij->ilen[i] = baij->imax[i]; 2316 } 2317 PetscFunctionReturn(0); 2318 } 2319 2320 /*@ 2321 MatSeqBAIJSetColumnIndices - Set the column indices for all the rows 2322 in the matrix. 2323 2324 Input Parameters: 2325 + mat - the SeqBAIJ matrix 2326 - indices - the column indices 2327 2328 Level: advanced 2329 2330 Notes: 2331 This can be called if you have precomputed the nonzero structure of the 2332 matrix and want to provide it to the matrix object to improve the performance 2333 of the MatSetValues() operation. 2334 2335 You MUST have set the correct numbers of nonzeros per row in the call to 2336 MatCreateSeqBAIJ(), and the columns indices MUST be sorted. 2337 2338 MUST be called before any calls to MatSetValues(); 2339 2340 @*/ 2341 PetscErrorCode MatSeqBAIJSetColumnIndices(Mat mat,PetscInt *indices) 2342 { 2343 PetscErrorCode ierr; 2344 2345 PetscFunctionBegin; 2346 PetscValidHeaderSpecific(mat,MAT_CLASSID,1); 2347 PetscValidPointer(indices,2); 2348 ierr = PetscUseMethod(mat,"MatSeqBAIJSetColumnIndices_C",(Mat,PetscInt*),(mat,indices));CHKERRQ(ierr); 2349 PetscFunctionReturn(0); 2350 } 2351 2352 PetscErrorCode MatGetRowMaxAbs_SeqBAIJ(Mat A,Vec v,PetscInt idx[]) 2353 { 2354 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2355 PetscErrorCode ierr; 2356 PetscInt i,j,n,row,bs,*ai,*aj,mbs; 2357 PetscReal atmp; 2358 PetscScalar *x,zero = 0.0; 2359 MatScalar *aa; 2360 PetscInt ncols,brow,krow,kcol; 2361 2362 PetscFunctionBegin; 2363 if (A->factortype) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 2364 bs = A->rmap->bs; 2365 aa = a->a; 2366 ai = a->i; 2367 aj = a->j; 2368 mbs = a->mbs; 2369 2370 ierr = VecSet(v,zero);CHKERRQ(ierr); 2371 ierr = VecGetArray(v,&x);CHKERRQ(ierr); 2372 ierr = VecGetLocalSize(v,&n);CHKERRQ(ierr); 2373 if (n != A->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Nonconforming matrix and vector"); 2374 for (i=0; i<mbs; i++) { 2375 ncols = ai[1] - ai[0]; ai++; 2376 brow = bs*i; 2377 for (j=0; j<ncols; j++) { 2378 for (kcol=0; kcol<bs; kcol++) { 2379 for (krow=0; krow<bs; krow++) { 2380 atmp = PetscAbsScalar(*aa);aa++; 2381 row = brow + krow; /* row index */ 2382 if (PetscAbsScalar(x[row]) < atmp) {x[row] = atmp; if (idx) idx[row] = bs*(*aj) + kcol;} 2383 } 2384 } 2385 aj++; 2386 } 2387 } 2388 ierr = VecRestoreArray(v,&x);CHKERRQ(ierr); 2389 PetscFunctionReturn(0); 2390 } 2391 2392 PetscErrorCode MatCopy_SeqBAIJ(Mat A,Mat B,MatStructure str) 2393 { 2394 PetscErrorCode ierr; 2395 2396 PetscFunctionBegin; 2397 /* If the two matrices have the same copy implementation, use fast copy. */ 2398 if (str == SAME_NONZERO_PATTERN && (A->ops->copy == B->ops->copy)) { 2399 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2400 Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)B->data; 2401 PetscInt ambs=a->mbs,bmbs=b->mbs,abs=A->rmap->bs,bbs=B->rmap->bs,bs2=abs*abs; 2402 2403 if (a->i[ambs] != b->i[bmbs]) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Number of nonzero blocks in matrices A %D and B %D are different",a->i[ambs],b->i[bmbs]); 2404 if (abs != bbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Block size A %D and B %D are different",abs,bbs); 2405 ierr = PetscArraycpy(b->a,a->a,bs2*a->i[ambs]);CHKERRQ(ierr); 2406 ierr = PetscObjectStateIncrease((PetscObject)B);CHKERRQ(ierr); 2407 } else { 2408 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 2409 } 2410 PetscFunctionReturn(0); 2411 } 2412 2413 PetscErrorCode MatSetUp_SeqBAIJ(Mat A) 2414 { 2415 PetscErrorCode ierr; 2416 2417 PetscFunctionBegin; 2418 ierr = MatSeqBAIJSetPreallocation(A,A->rmap->bs,PETSC_DEFAULT,NULL);CHKERRQ(ierr); 2419 PetscFunctionReturn(0); 2420 } 2421 2422 static PetscErrorCode MatSeqBAIJGetArray_SeqBAIJ(Mat A,PetscScalar *array[]) 2423 { 2424 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2425 2426 PetscFunctionBegin; 2427 *array = a->a; 2428 PetscFunctionReturn(0); 2429 } 2430 2431 static PetscErrorCode MatSeqBAIJRestoreArray_SeqBAIJ(Mat A,PetscScalar *array[]) 2432 { 2433 PetscFunctionBegin; 2434 *array = NULL; 2435 PetscFunctionReturn(0); 2436 } 2437 2438 PetscErrorCode MatAXPYGetPreallocation_SeqBAIJ(Mat Y,Mat X,PetscInt *nnz) 2439 { 2440 PetscInt bs = Y->rmap->bs,mbs = Y->rmap->N/bs; 2441 Mat_SeqBAIJ *x = (Mat_SeqBAIJ*)X->data; 2442 Mat_SeqBAIJ *y = (Mat_SeqBAIJ*)Y->data; 2443 PetscErrorCode ierr; 2444 2445 PetscFunctionBegin; 2446 /* Set the number of nonzeros in the new matrix */ 2447 ierr = MatAXPYGetPreallocation_SeqX_private(mbs,x->i,x->j,y->i,y->j,nnz);CHKERRQ(ierr); 2448 PetscFunctionReturn(0); 2449 } 2450 2451 PetscErrorCode MatAXPY_SeqBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 2452 { 2453 Mat_SeqBAIJ *x = (Mat_SeqBAIJ*)X->data,*y = (Mat_SeqBAIJ*)Y->data; 2454 PetscErrorCode ierr; 2455 PetscInt bs=Y->rmap->bs,bs2=bs*bs; 2456 PetscBLASInt one=1; 2457 2458 PetscFunctionBegin; 2459 if (str == SAME_NONZERO_PATTERN) { 2460 PetscScalar alpha = a; 2461 PetscBLASInt bnz; 2462 ierr = PetscBLASIntCast(x->nz*bs2,&bnz);CHKERRQ(ierr); 2463 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one)); 2464 ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr); 2465 } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */ 2466 ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr); 2467 } else { 2468 Mat B; 2469 PetscInt *nnz; 2470 if (bs != X->rmap->bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrices must have same block size"); 2471 ierr = PetscMalloc1(Y->rmap->N,&nnz);CHKERRQ(ierr); 2472 ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr); 2473 ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr); 2474 ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr); 2475 ierr = MatSetBlockSizesFromMats(B,Y,Y);CHKERRQ(ierr); 2476 ierr = MatSetType(B,(MatType) ((PetscObject)Y)->type_name);CHKERRQ(ierr); 2477 ierr = MatAXPYGetPreallocation_SeqBAIJ(Y,X,nnz);CHKERRQ(ierr); 2478 ierr = MatSeqBAIJSetPreallocation(B,bs,0,nnz);CHKERRQ(ierr); 2479 ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr); 2480 ierr = MatHeaderReplace(Y,&B);CHKERRQ(ierr); 2481 ierr = PetscFree(nnz);CHKERRQ(ierr); 2482 } 2483 PetscFunctionReturn(0); 2484 } 2485 2486 PETSC_INTERN PetscErrorCode MatConjugate_SeqBAIJ(Mat A) 2487 { 2488 #if defined(PETSC_USE_COMPLEX) 2489 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2490 PetscInt i,nz = a->bs2*a->i[a->mbs]; 2491 MatScalar *aa = a->a; 2492 2493 PetscFunctionBegin; 2494 for (i=0; i<nz; i++) aa[i] = PetscConj(aa[i]); 2495 #else 2496 PetscFunctionBegin; 2497 #endif 2498 PetscFunctionReturn(0); 2499 } 2500 2501 PetscErrorCode MatRealPart_SeqBAIJ(Mat A) 2502 { 2503 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2504 PetscInt i,nz = a->bs2*a->i[a->mbs]; 2505 MatScalar *aa = a->a; 2506 2507 PetscFunctionBegin; 2508 for (i=0; i<nz; i++) aa[i] = PetscRealPart(aa[i]); 2509 PetscFunctionReturn(0); 2510 } 2511 2512 PetscErrorCode MatImaginaryPart_SeqBAIJ(Mat A) 2513 { 2514 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2515 PetscInt i,nz = a->bs2*a->i[a->mbs]; 2516 MatScalar *aa = a->a; 2517 2518 PetscFunctionBegin; 2519 for (i=0; i<nz; i++) aa[i] = PetscImaginaryPart(aa[i]); 2520 PetscFunctionReturn(0); 2521 } 2522 2523 /* 2524 Code almost identical to MatGetColumnIJ_SeqAIJ() should share common code 2525 */ 2526 PetscErrorCode MatGetColumnIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *nn,const PetscInt *ia[],const PetscInt *ja[],PetscBool *done) 2527 { 2528 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2529 PetscErrorCode ierr; 2530 PetscInt bs = A->rmap->bs,i,*collengths,*cia,*cja,n = A->cmap->n/bs,m = A->rmap->n/bs; 2531 PetscInt nz = a->i[m],row,*jj,mr,col; 2532 2533 PetscFunctionBegin; 2534 *nn = n; 2535 if (!ia) PetscFunctionReturn(0); 2536 if (symmetric) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not for BAIJ matrices"); 2537 else { 2538 ierr = PetscCalloc1(n,&collengths);CHKERRQ(ierr); 2539 ierr = PetscMalloc1(n+1,&cia);CHKERRQ(ierr); 2540 ierr = PetscMalloc1(nz,&cja);CHKERRQ(ierr); 2541 jj = a->j; 2542 for (i=0; i<nz; i++) { 2543 collengths[jj[i]]++; 2544 } 2545 cia[0] = oshift; 2546 for (i=0; i<n; i++) { 2547 cia[i+1] = cia[i] + collengths[i]; 2548 } 2549 ierr = PetscArrayzero(collengths,n);CHKERRQ(ierr); 2550 jj = a->j; 2551 for (row=0; row<m; row++) { 2552 mr = a->i[row+1] - a->i[row]; 2553 for (i=0; i<mr; i++) { 2554 col = *jj++; 2555 2556 cja[cia[col] + collengths[col]++ - oshift] = row + oshift; 2557 } 2558 } 2559 ierr = PetscFree(collengths);CHKERRQ(ierr); 2560 *ia = cia; *ja = cja; 2561 } 2562 PetscFunctionReturn(0); 2563 } 2564 2565 PetscErrorCode MatRestoreColumnIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *n,const PetscInt *ia[],const PetscInt *ja[],PetscBool *done) 2566 { 2567 PetscErrorCode ierr; 2568 2569 PetscFunctionBegin; 2570 if (!ia) PetscFunctionReturn(0); 2571 ierr = PetscFree(*ia);CHKERRQ(ierr); 2572 ierr = PetscFree(*ja);CHKERRQ(ierr); 2573 PetscFunctionReturn(0); 2574 } 2575 2576 /* 2577 MatGetColumnIJ_SeqBAIJ_Color() and MatRestoreColumnIJ_SeqBAIJ_Color() are customized from 2578 MatGetColumnIJ_SeqBAIJ() and MatRestoreColumnIJ_SeqBAIJ() by adding an output 2579 spidx[], index of a->a, to be used in MatTransposeColoringCreate() and MatFDColoringCreate() 2580 */ 2581 PetscErrorCode MatGetColumnIJ_SeqBAIJ_Color(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *nn,const PetscInt *ia[],const PetscInt *ja[],PetscInt *spidx[],PetscBool *done) 2582 { 2583 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2584 PetscErrorCode ierr; 2585 PetscInt i,*collengths,*cia,*cja,n=a->nbs,m=a->mbs; 2586 PetscInt nz = a->i[m],row,*jj,mr,col; 2587 PetscInt *cspidx; 2588 2589 PetscFunctionBegin; 2590 *nn = n; 2591 if (!ia) PetscFunctionReturn(0); 2592 2593 ierr = PetscCalloc1(n,&collengths);CHKERRQ(ierr); 2594 ierr = PetscMalloc1(n+1,&cia);CHKERRQ(ierr); 2595 ierr = PetscMalloc1(nz,&cja);CHKERRQ(ierr); 2596 ierr = PetscMalloc1(nz,&cspidx);CHKERRQ(ierr); 2597 jj = a->j; 2598 for (i=0; i<nz; i++) { 2599 collengths[jj[i]]++; 2600 } 2601 cia[0] = oshift; 2602 for (i=0; i<n; i++) { 2603 cia[i+1] = cia[i] + collengths[i]; 2604 } 2605 ierr = PetscArrayzero(collengths,n);CHKERRQ(ierr); 2606 jj = a->j; 2607 for (row=0; row<m; row++) { 2608 mr = a->i[row+1] - a->i[row]; 2609 for (i=0; i<mr; i++) { 2610 col = *jj++; 2611 cspidx[cia[col] + collengths[col] - oshift] = a->i[row] + i; /* index of a->j */ 2612 cja[cia[col] + collengths[col]++ - oshift] = row + oshift; 2613 } 2614 } 2615 ierr = PetscFree(collengths);CHKERRQ(ierr); 2616 *ia = cia; 2617 *ja = cja; 2618 *spidx = cspidx; 2619 PetscFunctionReturn(0); 2620 } 2621 2622 PetscErrorCode MatRestoreColumnIJ_SeqBAIJ_Color(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *n,const PetscInt *ia[],const PetscInt *ja[],PetscInt *spidx[],PetscBool *done) 2623 { 2624 PetscErrorCode ierr; 2625 2626 PetscFunctionBegin; 2627 ierr = MatRestoreColumnIJ_SeqBAIJ(A,oshift,symmetric,inodecompressed,n,ia,ja,done);CHKERRQ(ierr); 2628 ierr = PetscFree(*spidx);CHKERRQ(ierr); 2629 PetscFunctionReturn(0); 2630 } 2631 2632 PetscErrorCode MatShift_SeqBAIJ(Mat Y,PetscScalar a) 2633 { 2634 PetscErrorCode ierr; 2635 Mat_SeqBAIJ *aij = (Mat_SeqBAIJ*)Y->data; 2636 2637 PetscFunctionBegin; 2638 if (!Y->preallocated || !aij->nz) { 2639 ierr = MatSeqBAIJSetPreallocation(Y,Y->rmap->bs,1,NULL);CHKERRQ(ierr); 2640 } 2641 ierr = MatShift_Basic(Y,a);CHKERRQ(ierr); 2642 PetscFunctionReturn(0); 2643 } 2644 2645 /* -------------------------------------------------------------------*/ 2646 static struct _MatOps MatOps_Values = {MatSetValues_SeqBAIJ, 2647 MatGetRow_SeqBAIJ, 2648 MatRestoreRow_SeqBAIJ, 2649 MatMult_SeqBAIJ_N, 2650 /* 4*/ MatMultAdd_SeqBAIJ_N, 2651 MatMultTranspose_SeqBAIJ, 2652 MatMultTransposeAdd_SeqBAIJ, 2653 NULL, 2654 NULL, 2655 NULL, 2656 /* 10*/ NULL, 2657 MatLUFactor_SeqBAIJ, 2658 NULL, 2659 NULL, 2660 MatTranspose_SeqBAIJ, 2661 /* 15*/ MatGetInfo_SeqBAIJ, 2662 MatEqual_SeqBAIJ, 2663 MatGetDiagonal_SeqBAIJ, 2664 MatDiagonalScale_SeqBAIJ, 2665 MatNorm_SeqBAIJ, 2666 /* 20*/ NULL, 2667 MatAssemblyEnd_SeqBAIJ, 2668 MatSetOption_SeqBAIJ, 2669 MatZeroEntries_SeqBAIJ, 2670 /* 24*/ MatZeroRows_SeqBAIJ, 2671 NULL, 2672 NULL, 2673 NULL, 2674 NULL, 2675 /* 29*/ MatSetUp_SeqBAIJ, 2676 NULL, 2677 NULL, 2678 NULL, 2679 NULL, 2680 /* 34*/ MatDuplicate_SeqBAIJ, 2681 NULL, 2682 NULL, 2683 MatILUFactor_SeqBAIJ, 2684 NULL, 2685 /* 39*/ MatAXPY_SeqBAIJ, 2686 MatCreateSubMatrices_SeqBAIJ, 2687 MatIncreaseOverlap_SeqBAIJ, 2688 MatGetValues_SeqBAIJ, 2689 MatCopy_SeqBAIJ, 2690 /* 44*/ NULL, 2691 MatScale_SeqBAIJ, 2692 MatShift_SeqBAIJ, 2693 NULL, 2694 MatZeroRowsColumns_SeqBAIJ, 2695 /* 49*/ NULL, 2696 MatGetRowIJ_SeqBAIJ, 2697 MatRestoreRowIJ_SeqBAIJ, 2698 MatGetColumnIJ_SeqBAIJ, 2699 MatRestoreColumnIJ_SeqBAIJ, 2700 /* 54*/ MatFDColoringCreate_SeqXAIJ, 2701 NULL, 2702 NULL, 2703 NULL, 2704 MatSetValuesBlocked_SeqBAIJ, 2705 /* 59*/ MatCreateSubMatrix_SeqBAIJ, 2706 MatDestroy_SeqBAIJ, 2707 MatView_SeqBAIJ, 2708 NULL, 2709 NULL, 2710 /* 64*/ NULL, 2711 NULL, 2712 NULL, 2713 NULL, 2714 NULL, 2715 /* 69*/ MatGetRowMaxAbs_SeqBAIJ, 2716 NULL, 2717 MatConvert_Basic, 2718 NULL, 2719 NULL, 2720 /* 74*/ NULL, 2721 MatFDColoringApply_BAIJ, 2722 NULL, 2723 NULL, 2724 NULL, 2725 /* 79*/ NULL, 2726 NULL, 2727 NULL, 2728 NULL, 2729 MatLoad_SeqBAIJ, 2730 /* 84*/ NULL, 2731 NULL, 2732 NULL, 2733 NULL, 2734 NULL, 2735 /* 89*/ NULL, 2736 NULL, 2737 NULL, 2738 NULL, 2739 NULL, 2740 /* 94*/ NULL, 2741 NULL, 2742 NULL, 2743 NULL, 2744 NULL, 2745 /* 99*/ NULL, 2746 NULL, 2747 NULL, 2748 MatConjugate_SeqBAIJ, 2749 NULL, 2750 /*104*/ NULL, 2751 MatRealPart_SeqBAIJ, 2752 MatImaginaryPart_SeqBAIJ, 2753 NULL, 2754 NULL, 2755 /*109*/ NULL, 2756 NULL, 2757 NULL, 2758 NULL, 2759 MatMissingDiagonal_SeqBAIJ, 2760 /*114*/ NULL, 2761 NULL, 2762 NULL, 2763 NULL, 2764 NULL, 2765 /*119*/ NULL, 2766 NULL, 2767 MatMultHermitianTranspose_SeqBAIJ, 2768 MatMultHermitianTransposeAdd_SeqBAIJ, 2769 NULL, 2770 /*124*/ NULL, 2771 MatGetColumnNorms_SeqBAIJ, 2772 MatInvertBlockDiagonal_SeqBAIJ, 2773 NULL, 2774 NULL, 2775 /*129*/ NULL, 2776 NULL, 2777 NULL, 2778 NULL, 2779 NULL, 2780 /*134*/ NULL, 2781 NULL, 2782 NULL, 2783 NULL, 2784 NULL, 2785 /*139*/ MatSetBlockSizes_Default, 2786 NULL, 2787 NULL, 2788 MatFDColoringSetUp_SeqXAIJ, 2789 NULL, 2790 /*144*/MatCreateMPIMatConcatenateSeqMat_SeqBAIJ, 2791 MatDestroySubMatrices_SeqBAIJ 2792 }; 2793 2794 PetscErrorCode MatStoreValues_SeqBAIJ(Mat mat) 2795 { 2796 Mat_SeqBAIJ *aij = (Mat_SeqBAIJ*)mat->data; 2797 PetscInt nz = aij->i[aij->mbs]*aij->bs2; 2798 PetscErrorCode ierr; 2799 2800 PetscFunctionBegin; 2801 if (aij->nonew != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first"); 2802 2803 /* allocate space for values if not already there */ 2804 if (!aij->saved_values) { 2805 ierr = PetscMalloc1(nz+1,&aij->saved_values);CHKERRQ(ierr); 2806 ierr = PetscLogObjectMemory((PetscObject)mat,(nz+1)*sizeof(PetscScalar));CHKERRQ(ierr); 2807 } 2808 2809 /* copy values over */ 2810 ierr = PetscArraycpy(aij->saved_values,aij->a,nz);CHKERRQ(ierr); 2811 PetscFunctionReturn(0); 2812 } 2813 2814 PetscErrorCode MatRetrieveValues_SeqBAIJ(Mat mat) 2815 { 2816 Mat_SeqBAIJ *aij = (Mat_SeqBAIJ*)mat->data; 2817 PetscErrorCode ierr; 2818 PetscInt nz = aij->i[aij->mbs]*aij->bs2; 2819 2820 PetscFunctionBegin; 2821 if (aij->nonew != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first"); 2822 if (!aij->saved_values) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatStoreValues(A);first"); 2823 2824 /* copy values over */ 2825 ierr = PetscArraycpy(aij->a,aij->saved_values,nz);CHKERRQ(ierr); 2826 PetscFunctionReturn(0); 2827 } 2828 2829 PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqAIJ(Mat, MatType,MatReuse,Mat*); 2830 PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqSBAIJ(Mat, MatType,MatReuse,Mat*); 2831 2832 PetscErrorCode MatSeqBAIJSetPreallocation_SeqBAIJ(Mat B,PetscInt bs,PetscInt nz,PetscInt *nnz) 2833 { 2834 Mat_SeqBAIJ *b; 2835 PetscErrorCode ierr; 2836 PetscInt i,mbs,nbs,bs2; 2837 PetscBool flg = PETSC_FALSE,skipallocation = PETSC_FALSE,realalloc = PETSC_FALSE; 2838 2839 PetscFunctionBegin; 2840 if (nz >= 0 || nnz) realalloc = PETSC_TRUE; 2841 if (nz == MAT_SKIP_ALLOCATION) { 2842 skipallocation = PETSC_TRUE; 2843 nz = 0; 2844 } 2845 2846 ierr = MatSetBlockSize(B,PetscAbs(bs));CHKERRQ(ierr); 2847 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 2848 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 2849 ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr); 2850 2851 B->preallocated = PETSC_TRUE; 2852 2853 mbs = B->rmap->n/bs; 2854 nbs = B->cmap->n/bs; 2855 bs2 = bs*bs; 2856 2857 if (mbs*bs!=B->rmap->n || nbs*bs!=B->cmap->n) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Number rows %D, cols %D must be divisible by blocksize %D",B->rmap->N,B->cmap->n,bs); 2858 2859 if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5; 2860 if (nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nz cannot be less than 0: value %D",nz); 2861 if (nnz) { 2862 for (i=0; i<mbs; i++) { 2863 if (nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be less than 0: local row %D value %D",i,nnz[i]); 2864 if (nnz[i] > nbs) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be greater than block row length: local row %D value %D rowlength %D",i,nnz[i],nbs); 2865 } 2866 } 2867 2868 b = (Mat_SeqBAIJ*)B->data; 2869 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)B),NULL,"Optimize options for SEQBAIJ matrix 2 ","Mat");CHKERRQ(ierr); 2870 ierr = PetscOptionsBool("-mat_no_unroll","Do not optimize for block size (slow)",NULL,flg,&flg,NULL);CHKERRQ(ierr); 2871 ierr = PetscOptionsEnd();CHKERRQ(ierr); 2872 2873 if (!flg) { 2874 switch (bs) { 2875 case 1: 2876 B->ops->mult = MatMult_SeqBAIJ_1; 2877 B->ops->multadd = MatMultAdd_SeqBAIJ_1; 2878 break; 2879 case 2: 2880 B->ops->mult = MatMult_SeqBAIJ_2; 2881 B->ops->multadd = MatMultAdd_SeqBAIJ_2; 2882 break; 2883 case 3: 2884 B->ops->mult = MatMult_SeqBAIJ_3; 2885 B->ops->multadd = MatMultAdd_SeqBAIJ_3; 2886 break; 2887 case 4: 2888 B->ops->mult = MatMult_SeqBAIJ_4; 2889 B->ops->multadd = MatMultAdd_SeqBAIJ_4; 2890 break; 2891 case 5: 2892 B->ops->mult = MatMult_SeqBAIJ_5; 2893 B->ops->multadd = MatMultAdd_SeqBAIJ_5; 2894 break; 2895 case 6: 2896 B->ops->mult = MatMult_SeqBAIJ_6; 2897 B->ops->multadd = MatMultAdd_SeqBAIJ_6; 2898 break; 2899 case 7: 2900 B->ops->mult = MatMult_SeqBAIJ_7; 2901 B->ops->multadd = MatMultAdd_SeqBAIJ_7; 2902 break; 2903 case 9: 2904 { 2905 PetscInt version = 1; 2906 ierr = PetscOptionsGetInt(NULL,((PetscObject)B)->prefix,"-mat_baij_mult_version",&version,NULL);CHKERRQ(ierr); 2907 switch (version) { 2908 #if defined(PETSC_HAVE_IMMINTRIN_H) && defined(__AVX2__) && defined(__FMA__) && defined(PETSC_USE_REAL_DOUBLE) && !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_64BIT_INDICES) 2909 case 1: 2910 B->ops->mult = MatMult_SeqBAIJ_9_AVX2; 2911 B->ops->multadd = MatMultAdd_SeqBAIJ_9_AVX2; 2912 ierr = PetscInfo1((PetscObject)B,"Using AVX2 for MatMult for BAIJ for blocksize %D\n",bs);CHKERRQ(ierr); 2913 break; 2914 #endif 2915 default: 2916 B->ops->mult = MatMult_SeqBAIJ_N; 2917 B->ops->multadd = MatMultAdd_SeqBAIJ_N; 2918 ierr = PetscInfo1((PetscObject)B,"Using BLAS for MatMult for BAIJ for blocksize %D\n",bs);CHKERRQ(ierr); 2919 break; 2920 } 2921 break; 2922 } 2923 case 11: 2924 B->ops->mult = MatMult_SeqBAIJ_11; 2925 B->ops->multadd = MatMultAdd_SeqBAIJ_11; 2926 break; 2927 case 12: 2928 { 2929 PetscInt version = 1; 2930 ierr = PetscOptionsGetInt(NULL,((PetscObject)B)->prefix,"-mat_baij_mult_version",&version,NULL);CHKERRQ(ierr); 2931 switch (version) { 2932 case 1: 2933 B->ops->mult = MatMult_SeqBAIJ_12_ver1; 2934 B->ops->multadd = MatMultAdd_SeqBAIJ_12_ver1; 2935 ierr = PetscInfo2((PetscObject)B,"Using version %D of MatMult for BAIJ for blocksize %D\n",version,bs);CHKERRQ(ierr); 2936 break; 2937 case 2: 2938 B->ops->mult = MatMult_SeqBAIJ_12_ver2; 2939 B->ops->multadd = MatMultAdd_SeqBAIJ_12_ver2; 2940 ierr = PetscInfo2((PetscObject)B,"Using version %D of MatMult for BAIJ for blocksize %D\n",version,bs);CHKERRQ(ierr); 2941 break; 2942 #if defined(PETSC_HAVE_IMMINTRIN_H) && defined(__AVX2__) && defined(__FMA__) && defined(PETSC_USE_REAL_DOUBLE) && !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_64BIT_INDICES) 2943 case 3: 2944 B->ops->mult = MatMult_SeqBAIJ_12_AVX2; 2945 B->ops->multadd = MatMultAdd_SeqBAIJ_12_ver1; 2946 ierr = PetscInfo1((PetscObject)B,"Using AVX2 for MatMult for BAIJ for blocksize %D\n",bs);CHKERRQ(ierr); 2947 break; 2948 #endif 2949 default: 2950 B->ops->mult = MatMult_SeqBAIJ_N; 2951 B->ops->multadd = MatMultAdd_SeqBAIJ_N; 2952 ierr = PetscInfo1((PetscObject)B,"Using BLAS for MatMult for BAIJ for blocksize %D\n",bs);CHKERRQ(ierr); 2953 break; 2954 } 2955 break; 2956 } 2957 case 15: 2958 { 2959 PetscInt version = 1; 2960 ierr = PetscOptionsGetInt(NULL,((PetscObject)B)->prefix,"-mat_baij_mult_version",&version,NULL);CHKERRQ(ierr); 2961 switch (version) { 2962 case 1: 2963 B->ops->mult = MatMult_SeqBAIJ_15_ver1; 2964 ierr = PetscInfo2((PetscObject)B,"Using version %D of MatMult for BAIJ for blocksize %D\n",version,bs);CHKERRQ(ierr); 2965 break; 2966 case 2: 2967 B->ops->mult = MatMult_SeqBAIJ_15_ver2; 2968 ierr = PetscInfo2((PetscObject)B,"Using version %D of MatMult for BAIJ for blocksize %D\n",version,bs);CHKERRQ(ierr); 2969 break; 2970 case 3: 2971 B->ops->mult = MatMult_SeqBAIJ_15_ver3; 2972 ierr = PetscInfo2((PetscObject)B,"Using version %D of MatMult for BAIJ for blocksize %D\n",version,bs);CHKERRQ(ierr); 2973 break; 2974 case 4: 2975 B->ops->mult = MatMult_SeqBAIJ_15_ver4; 2976 ierr = PetscInfo2((PetscObject)B,"Using version %D of MatMult for BAIJ for blocksize %D\n",version,bs);CHKERRQ(ierr); 2977 break; 2978 default: 2979 B->ops->mult = MatMult_SeqBAIJ_N; 2980 ierr = PetscInfo1((PetscObject)B,"Using BLAS for MatMult for BAIJ for blocksize %D\n",bs);CHKERRQ(ierr); 2981 break; 2982 } 2983 B->ops->multadd = MatMultAdd_SeqBAIJ_N; 2984 break; 2985 } 2986 default: 2987 B->ops->mult = MatMult_SeqBAIJ_N; 2988 B->ops->multadd = MatMultAdd_SeqBAIJ_N; 2989 ierr = PetscInfo1((PetscObject)B,"Using BLAS for MatMult for BAIJ for blocksize %D\n",bs);CHKERRQ(ierr); 2990 break; 2991 } 2992 } 2993 B->ops->sor = MatSOR_SeqBAIJ; 2994 b->mbs = mbs; 2995 b->nbs = nbs; 2996 if (!skipallocation) { 2997 if (!b->imax) { 2998 ierr = PetscMalloc2(mbs,&b->imax,mbs,&b->ilen);CHKERRQ(ierr); 2999 ierr = PetscLogObjectMemory((PetscObject)B,2*mbs*sizeof(PetscInt));CHKERRQ(ierr); 3000 3001 b->free_imax_ilen = PETSC_TRUE; 3002 } 3003 /* b->ilen will count nonzeros in each block row so far. */ 3004 for (i=0; i<mbs; i++) b->ilen[i] = 0; 3005 if (!nnz) { 3006 if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5; 3007 else if (nz < 0) nz = 1; 3008 nz = PetscMin(nz,nbs); 3009 for (i=0; i<mbs; i++) b->imax[i] = nz; 3010 ierr = PetscIntMultError(nz,mbs,&nz);CHKERRQ(ierr); 3011 } else { 3012 PetscInt64 nz64 = 0; 3013 for (i=0; i<mbs; i++) {b->imax[i] = nnz[i]; nz64 += nnz[i];} 3014 ierr = PetscIntCast(nz64,&nz);CHKERRQ(ierr); 3015 } 3016 3017 /* allocate the matrix space */ 3018 ierr = MatSeqXAIJFreeAIJ(B,&b->a,&b->j,&b->i);CHKERRQ(ierr); 3019 if (B->structure_only) { 3020 ierr = PetscMalloc1(nz,&b->j);CHKERRQ(ierr); 3021 ierr = PetscMalloc1(B->rmap->N+1,&b->i);CHKERRQ(ierr); 3022 ierr = PetscLogObjectMemory((PetscObject)B,(B->rmap->N+1)*sizeof(PetscInt)+nz*sizeof(PetscInt));CHKERRQ(ierr); 3023 } else { 3024 PetscInt nzbs2 = 0; 3025 ierr = PetscIntMultError(nz,bs2,&nzbs2);CHKERRQ(ierr); 3026 ierr = PetscMalloc3(nzbs2,&b->a,nz,&b->j,B->rmap->N+1,&b->i);CHKERRQ(ierr); 3027 ierr = PetscLogObjectMemory((PetscObject)B,(B->rmap->N+1)*sizeof(PetscInt)+nz*(bs2*sizeof(PetscScalar)+sizeof(PetscInt)));CHKERRQ(ierr); 3028 ierr = PetscArrayzero(b->a,nz*bs2);CHKERRQ(ierr); 3029 } 3030 ierr = PetscArrayzero(b->j,nz);CHKERRQ(ierr); 3031 3032 if (B->structure_only) { 3033 b->singlemalloc = PETSC_FALSE; 3034 b->free_a = PETSC_FALSE; 3035 } else { 3036 b->singlemalloc = PETSC_TRUE; 3037 b->free_a = PETSC_TRUE; 3038 } 3039 b->free_ij = PETSC_TRUE; 3040 3041 b->i[0] = 0; 3042 for (i=1; i<mbs+1; i++) { 3043 b->i[i] = b->i[i-1] + b->imax[i-1]; 3044 } 3045 3046 } else { 3047 b->free_a = PETSC_FALSE; 3048 b->free_ij = PETSC_FALSE; 3049 } 3050 3051 b->bs2 = bs2; 3052 b->mbs = mbs; 3053 b->nz = 0; 3054 b->maxnz = nz; 3055 B->info.nz_unneeded = (PetscReal)b->maxnz*bs2; 3056 B->was_assembled = PETSC_FALSE; 3057 B->assembled = PETSC_FALSE; 3058 if (realalloc) {ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);} 3059 PetscFunctionReturn(0); 3060 } 3061 3062 PetscErrorCode MatSeqBAIJSetPreallocationCSR_SeqBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[]) 3063 { 3064 PetscInt i,m,nz,nz_max=0,*nnz; 3065 PetscScalar *values=NULL; 3066 PetscBool roworiented = ((Mat_SeqBAIJ*)B->data)->roworiented; 3067 PetscErrorCode ierr; 3068 3069 PetscFunctionBegin; 3070 if (bs < 1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive but it is %D",bs); 3071 ierr = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr); 3072 ierr = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr); 3073 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 3074 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 3075 ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr); 3076 m = B->rmap->n/bs; 3077 3078 if (ii[0] != 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE, "ii[0] must be 0 but it is %D",ii[0]); 3079 ierr = PetscMalloc1(m+1, &nnz);CHKERRQ(ierr); 3080 for (i=0; i<m; i++) { 3081 nz = ii[i+1]- ii[i]; 3082 if (nz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE, "Local row %D has a negative number of columns %D",i,nz); 3083 nz_max = PetscMax(nz_max, nz); 3084 nnz[i] = nz; 3085 } 3086 ierr = MatSeqBAIJSetPreallocation(B,bs,0,nnz);CHKERRQ(ierr); 3087 ierr = PetscFree(nnz);CHKERRQ(ierr); 3088 3089 values = (PetscScalar*)V; 3090 if (!values) { 3091 ierr = PetscCalloc1(bs*bs*(nz_max+1),&values);CHKERRQ(ierr); 3092 } 3093 for (i=0; i<m; i++) { 3094 PetscInt ncols = ii[i+1] - ii[i]; 3095 const PetscInt *icols = jj + ii[i]; 3096 if (bs == 1 || !roworiented) { 3097 const PetscScalar *svals = values + (V ? (bs*bs*ii[i]) : 0); 3098 ierr = MatSetValuesBlocked_SeqBAIJ(B,1,&i,ncols,icols,svals,INSERT_VALUES);CHKERRQ(ierr); 3099 } else { 3100 PetscInt j; 3101 for (j=0; j<ncols; j++) { 3102 const PetscScalar *svals = values + (V ? (bs*bs*(ii[i]+j)) : 0); 3103 ierr = MatSetValuesBlocked_SeqBAIJ(B,1,&i,1,&icols[j],svals,INSERT_VALUES);CHKERRQ(ierr); 3104 } 3105 } 3106 } 3107 if (!V) { ierr = PetscFree(values);CHKERRQ(ierr); } 3108 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3109 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3110 ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 3111 PetscFunctionReturn(0); 3112 } 3113 3114 /*@C 3115 MatSeqBAIJGetArray - gives access to the array where the data for a MATSEQBAIJ matrix is stored 3116 3117 Not Collective 3118 3119 Input Parameter: 3120 . mat - a MATSEQBAIJ matrix 3121 3122 Output Parameter: 3123 . array - pointer to the data 3124 3125 Level: intermediate 3126 3127 .seealso: MatSeqBAIJRestoreArray(), MatSeqAIJGetArray(), MatSeqAIJRestoreArray() 3128 @*/ 3129 PetscErrorCode MatSeqBAIJGetArray(Mat A,PetscScalar **array) 3130 { 3131 PetscErrorCode ierr; 3132 3133 PetscFunctionBegin; 3134 ierr = PetscUseMethod(A,"MatSeqBAIJGetArray_C",(Mat,PetscScalar**),(A,array));CHKERRQ(ierr); 3135 PetscFunctionReturn(0); 3136 } 3137 3138 /*@C 3139 MatSeqBAIJRestoreArray - returns access to the array where the data for a MATSEQBAIJ matrix is stored obtained by MatSeqBAIJGetArray() 3140 3141 Not Collective 3142 3143 Input Parameters: 3144 + mat - a MATSEQBAIJ matrix 3145 - array - pointer to the data 3146 3147 Level: intermediate 3148 3149 .seealso: MatSeqBAIJGetArray(), MatSeqAIJGetArray(), MatSeqAIJRestoreArray() 3150 @*/ 3151 PetscErrorCode MatSeqBAIJRestoreArray(Mat A,PetscScalar **array) 3152 { 3153 PetscErrorCode ierr; 3154 3155 PetscFunctionBegin; 3156 ierr = PetscUseMethod(A,"MatSeqBAIJRestoreArray_C",(Mat,PetscScalar**),(A,array));CHKERRQ(ierr); 3157 PetscFunctionReturn(0); 3158 } 3159 3160 /*MC 3161 MATSEQBAIJ - MATSEQBAIJ = "seqbaij" - A matrix type to be used for sequential block sparse matrices, based on 3162 block sparse compressed row format. 3163 3164 Options Database Keys: 3165 + -mat_type seqbaij - sets the matrix type to "seqbaij" during a call to MatSetFromOptions() 3166 - -mat_baij_mult_version version - indicate the version of the matrix-vector product to use (0 often indicates using BLAS) 3167 3168 Level: beginner 3169 3170 Notes: 3171 MatSetOptions(,MAT_STRUCTURE_ONLY,PETSC_TRUE) may be called for this matrix type. In this no 3172 space is allocated for the nonzero entries and any entries passed with MatSetValues() are ignored 3173 3174 Run with -info to see what version of the matrix-vector product is being used 3175 3176 .seealso: MatCreateSeqBAIJ() 3177 M*/ 3178 3179 PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqBSTRM(Mat, MatType,MatReuse,Mat*); 3180 3181 PETSC_EXTERN PetscErrorCode MatCreate_SeqBAIJ(Mat B) 3182 { 3183 PetscErrorCode ierr; 3184 PetscMPIInt size; 3185 Mat_SeqBAIJ *b; 3186 3187 PetscFunctionBegin; 3188 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRMPI(ierr); 3189 if (size > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Comm must be of size 1"); 3190 3191 ierr = PetscNewLog(B,&b);CHKERRQ(ierr); 3192 B->data = (void*)b; 3193 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 3194 3195 b->row = NULL; 3196 b->col = NULL; 3197 b->icol = NULL; 3198 b->reallocs = 0; 3199 b->saved_values = NULL; 3200 3201 b->roworiented = PETSC_TRUE; 3202 b->nonew = 0; 3203 b->diag = NULL; 3204 B->spptr = NULL; 3205 B->info.nz_unneeded = (PetscReal)b->maxnz*b->bs2; 3206 b->keepnonzeropattern = PETSC_FALSE; 3207 3208 ierr = PetscObjectComposeFunction((PetscObject)B,"MatSeqBAIJGetArray_C",MatSeqBAIJGetArray_SeqBAIJ);CHKERRQ(ierr); 3209 ierr = PetscObjectComposeFunction((PetscObject)B,"MatSeqBAIJRestoreArray_C",MatSeqBAIJRestoreArray_SeqBAIJ);CHKERRQ(ierr); 3210 ierr = PetscObjectComposeFunction((PetscObject)B,"MatInvertBlockDiagonal_C",MatInvertBlockDiagonal_SeqBAIJ);CHKERRQ(ierr); 3211 ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_SeqBAIJ);CHKERRQ(ierr); 3212 ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_SeqBAIJ);CHKERRQ(ierr); 3213 ierr = PetscObjectComposeFunction((PetscObject)B,"MatSeqBAIJSetColumnIndices_C",MatSeqBAIJSetColumnIndices_SeqBAIJ);CHKERRQ(ierr); 3214 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqbaij_seqaij_C",MatConvert_SeqBAIJ_SeqAIJ);CHKERRQ(ierr); 3215 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqbaij_seqsbaij_C",MatConvert_SeqBAIJ_SeqSBAIJ);CHKERRQ(ierr); 3216 ierr = PetscObjectComposeFunction((PetscObject)B,"MatSeqBAIJSetPreallocation_C",MatSeqBAIJSetPreallocation_SeqBAIJ);CHKERRQ(ierr); 3217 ierr = PetscObjectComposeFunction((PetscObject)B,"MatSeqBAIJSetPreallocationCSR_C",MatSeqBAIJSetPreallocationCSR_SeqBAIJ);CHKERRQ(ierr); 3218 ierr = PetscObjectComposeFunction((PetscObject)B,"MatIsTranspose_C",MatIsTranspose_SeqBAIJ);CHKERRQ(ierr); 3219 #if defined(PETSC_HAVE_HYPRE) 3220 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqbaij_hypre_C",MatConvert_AIJ_HYPRE);CHKERRQ(ierr); 3221 #endif 3222 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqbaij_is_C",MatConvert_XAIJ_IS);CHKERRQ(ierr); 3223 ierr = PetscObjectChangeTypeName((PetscObject)B,MATSEQBAIJ);CHKERRQ(ierr); 3224 PetscFunctionReturn(0); 3225 } 3226 3227 PetscErrorCode MatDuplicateNoCreate_SeqBAIJ(Mat C,Mat A,MatDuplicateOption cpvalues,PetscBool mallocmatspace) 3228 { 3229 Mat_SeqBAIJ *c = (Mat_SeqBAIJ*)C->data,*a = (Mat_SeqBAIJ*)A->data; 3230 PetscErrorCode ierr; 3231 PetscInt i,mbs = a->mbs,nz = a->nz,bs2 = a->bs2; 3232 3233 PetscFunctionBegin; 3234 if (a->i[mbs] != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Corrupt matrix"); 3235 3236 if (cpvalues == MAT_SHARE_NONZERO_PATTERN) { 3237 c->imax = a->imax; 3238 c->ilen = a->ilen; 3239 c->free_imax_ilen = PETSC_FALSE; 3240 } else { 3241 ierr = PetscMalloc2(mbs,&c->imax,mbs,&c->ilen);CHKERRQ(ierr); 3242 ierr = PetscLogObjectMemory((PetscObject)C,2*mbs*sizeof(PetscInt));CHKERRQ(ierr); 3243 for (i=0; i<mbs; i++) { 3244 c->imax[i] = a->imax[i]; 3245 c->ilen[i] = a->ilen[i]; 3246 } 3247 c->free_imax_ilen = PETSC_TRUE; 3248 } 3249 3250 /* allocate the matrix space */ 3251 if (mallocmatspace) { 3252 if (cpvalues == MAT_SHARE_NONZERO_PATTERN) { 3253 ierr = PetscCalloc1(bs2*nz,&c->a);CHKERRQ(ierr); 3254 ierr = PetscLogObjectMemory((PetscObject)C,a->i[mbs]*bs2*sizeof(PetscScalar));CHKERRQ(ierr); 3255 3256 c->i = a->i; 3257 c->j = a->j; 3258 c->singlemalloc = PETSC_FALSE; 3259 c->free_a = PETSC_TRUE; 3260 c->free_ij = PETSC_FALSE; 3261 c->parent = A; 3262 C->preallocated = PETSC_TRUE; 3263 C->assembled = PETSC_TRUE; 3264 3265 ierr = PetscObjectReference((PetscObject)A);CHKERRQ(ierr); 3266 ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 3267 ierr = MatSetOption(C,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 3268 } else { 3269 ierr = PetscMalloc3(bs2*nz,&c->a,nz,&c->j,mbs+1,&c->i);CHKERRQ(ierr); 3270 ierr = PetscLogObjectMemory((PetscObject)C,a->i[mbs]*(bs2*sizeof(PetscScalar)+sizeof(PetscInt))+(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 3271 3272 c->singlemalloc = PETSC_TRUE; 3273 c->free_a = PETSC_TRUE; 3274 c->free_ij = PETSC_TRUE; 3275 3276 ierr = PetscArraycpy(c->i,a->i,mbs+1);CHKERRQ(ierr); 3277 if (mbs > 0) { 3278 ierr = PetscArraycpy(c->j,a->j,nz);CHKERRQ(ierr); 3279 if (cpvalues == MAT_COPY_VALUES) { 3280 ierr = PetscArraycpy(c->a,a->a,bs2*nz);CHKERRQ(ierr); 3281 } else { 3282 ierr = PetscArrayzero(c->a,bs2*nz);CHKERRQ(ierr); 3283 } 3284 } 3285 C->preallocated = PETSC_TRUE; 3286 C->assembled = PETSC_TRUE; 3287 } 3288 } 3289 3290 c->roworiented = a->roworiented; 3291 c->nonew = a->nonew; 3292 3293 ierr = PetscLayoutReference(A->rmap,&C->rmap);CHKERRQ(ierr); 3294 ierr = PetscLayoutReference(A->cmap,&C->cmap);CHKERRQ(ierr); 3295 3296 c->bs2 = a->bs2; 3297 c->mbs = a->mbs; 3298 c->nbs = a->nbs; 3299 3300 if (a->diag) { 3301 if (cpvalues == MAT_SHARE_NONZERO_PATTERN) { 3302 c->diag = a->diag; 3303 c->free_diag = PETSC_FALSE; 3304 } else { 3305 ierr = PetscMalloc1(mbs+1,&c->diag);CHKERRQ(ierr); 3306 ierr = PetscLogObjectMemory((PetscObject)C,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 3307 for (i=0; i<mbs; i++) c->diag[i] = a->diag[i]; 3308 c->free_diag = PETSC_TRUE; 3309 } 3310 } else c->diag = NULL; 3311 3312 c->nz = a->nz; 3313 c->maxnz = a->nz; /* Since we allocate exactly the right amount */ 3314 c->solve_work = NULL; 3315 c->mult_work = NULL; 3316 c->sor_workt = NULL; 3317 c->sor_work = NULL; 3318 3319 c->compressedrow.use = a->compressedrow.use; 3320 c->compressedrow.nrows = a->compressedrow.nrows; 3321 if (a->compressedrow.use) { 3322 i = a->compressedrow.nrows; 3323 ierr = PetscMalloc2(i+1,&c->compressedrow.i,i+1,&c->compressedrow.rindex);CHKERRQ(ierr); 3324 ierr = PetscLogObjectMemory((PetscObject)C,(2*i+1)*sizeof(PetscInt));CHKERRQ(ierr); 3325 ierr = PetscArraycpy(c->compressedrow.i,a->compressedrow.i,i+1);CHKERRQ(ierr); 3326 ierr = PetscArraycpy(c->compressedrow.rindex,a->compressedrow.rindex,i);CHKERRQ(ierr); 3327 } else { 3328 c->compressedrow.use = PETSC_FALSE; 3329 c->compressedrow.i = NULL; 3330 c->compressedrow.rindex = NULL; 3331 } 3332 C->nonzerostate = A->nonzerostate; 3333 3334 ierr = PetscFunctionListDuplicate(((PetscObject)A)->qlist,&((PetscObject)C)->qlist);CHKERRQ(ierr); 3335 PetscFunctionReturn(0); 3336 } 3337 3338 PetscErrorCode MatDuplicate_SeqBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B) 3339 { 3340 PetscErrorCode ierr; 3341 3342 PetscFunctionBegin; 3343 ierr = MatCreate(PetscObjectComm((PetscObject)A),B);CHKERRQ(ierr); 3344 ierr = MatSetSizes(*B,A->rmap->N,A->cmap->n,A->rmap->N,A->cmap->n);CHKERRQ(ierr); 3345 ierr = MatSetType(*B,MATSEQBAIJ);CHKERRQ(ierr); 3346 ierr = MatDuplicateNoCreate_SeqBAIJ(*B,A,cpvalues,PETSC_TRUE);CHKERRQ(ierr); 3347 PetscFunctionReturn(0); 3348 } 3349 3350 /* Used for both SeqBAIJ and SeqSBAIJ matrices */ 3351 PetscErrorCode MatLoad_SeqBAIJ_Binary(Mat mat,PetscViewer viewer) 3352 { 3353 PetscInt header[4],M,N,nz,bs,m,n,mbs,nbs,rows,cols,sum,i,j,k; 3354 PetscInt *rowidxs,*colidxs; 3355 PetscScalar *matvals; 3356 PetscErrorCode ierr; 3357 3358 PetscFunctionBegin; 3359 ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr); 3360 3361 /* read matrix header */ 3362 ierr = PetscViewerBinaryRead(viewer,header,4,NULL,PETSC_INT);CHKERRQ(ierr); 3363 if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Not a matrix object in file"); 3364 M = header[1]; N = header[2]; nz = header[3]; 3365 if (M < 0) SETERRQ1(PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Matrix row size (%D) in file is negative",M); 3366 if (N < 0) SETERRQ1(PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Matrix column size (%D) in file is negative",N); 3367 if (nz < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format on disk, cannot load as SeqBAIJ"); 3368 3369 /* set block sizes from the viewer's .info file */ 3370 ierr = MatLoad_Binary_BlockSizes(mat,viewer);CHKERRQ(ierr); 3371 /* set local and global sizes if not set already */ 3372 if (mat->rmap->n < 0) mat->rmap->n = M; 3373 if (mat->cmap->n < 0) mat->cmap->n = N; 3374 if (mat->rmap->N < 0) mat->rmap->N = M; 3375 if (mat->cmap->N < 0) mat->cmap->N = N; 3376 ierr = PetscLayoutSetUp(mat->rmap);CHKERRQ(ierr); 3377 ierr = PetscLayoutSetUp(mat->cmap);CHKERRQ(ierr); 3378 3379 /* check if the matrix sizes are correct */ 3380 ierr = MatGetSize(mat,&rows,&cols);CHKERRQ(ierr); 3381 if (M != rows || N != cols) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Matrix in file of different sizes (%D, %D) than the input matrix (%D, %D)",M,N,rows,cols); 3382 ierr = MatGetBlockSize(mat,&bs);CHKERRQ(ierr); 3383 ierr = MatGetLocalSize(mat,&m,&n);CHKERRQ(ierr); 3384 mbs = m/bs; nbs = n/bs; 3385 3386 /* read in row lengths, column indices and nonzero values */ 3387 ierr = PetscMalloc1(m+1,&rowidxs);CHKERRQ(ierr); 3388 ierr = PetscViewerBinaryRead(viewer,rowidxs+1,m,NULL,PETSC_INT);CHKERRQ(ierr); 3389 rowidxs[0] = 0; for (i=0; i<m; i++) rowidxs[i+1] += rowidxs[i]; 3390 sum = rowidxs[m]; 3391 if (sum != nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Inconsistent matrix data in file: nonzeros = %D, sum-row-lengths = %D\n",nz,sum); 3392 3393 /* read in column indices and nonzero values */ 3394 ierr = PetscMalloc2(rowidxs[m],&colidxs,nz,&matvals);CHKERRQ(ierr); 3395 ierr = PetscViewerBinaryRead(viewer,colidxs,rowidxs[m],NULL,PETSC_INT);CHKERRQ(ierr); 3396 ierr = PetscViewerBinaryRead(viewer,matvals,rowidxs[m],NULL,PETSC_SCALAR);CHKERRQ(ierr); 3397 3398 { /* preallocate matrix storage */ 3399 PetscBT bt; /* helper bit set to count nonzeros */ 3400 PetscInt *nnz; 3401 PetscBool sbaij; 3402 3403 ierr = PetscBTCreate(nbs,&bt);CHKERRQ(ierr); 3404 ierr = PetscCalloc1(mbs,&nnz);CHKERRQ(ierr); 3405 ierr = PetscObjectTypeCompare((PetscObject)mat,MATSEQSBAIJ,&sbaij);CHKERRQ(ierr); 3406 for (i=0; i<mbs; i++) { 3407 ierr = PetscBTMemzero(nbs,bt);CHKERRQ(ierr); 3408 for (k=0; k<bs; k++) { 3409 PetscInt row = bs*i + k; 3410 for (j=rowidxs[row]; j<rowidxs[row+1]; j++) { 3411 PetscInt col = colidxs[j]; 3412 if (!sbaij || col >= row) 3413 if (!PetscBTLookupSet(bt,col/bs)) nnz[i]++; 3414 } 3415 } 3416 } 3417 ierr = PetscBTDestroy(&bt);CHKERRQ(ierr); 3418 ierr = MatSeqBAIJSetPreallocation(mat,bs,0,nnz);CHKERRQ(ierr); 3419 ierr = MatSeqSBAIJSetPreallocation(mat,bs,0,nnz);CHKERRQ(ierr); 3420 ierr = PetscFree(nnz);CHKERRQ(ierr); 3421 } 3422 3423 /* store matrix values */ 3424 for (i=0; i<m; i++) { 3425 PetscInt row = i, s = rowidxs[i], e = rowidxs[i+1]; 3426 ierr = (*mat->ops->setvalues)(mat,1,&row,e-s,colidxs+s,matvals+s,INSERT_VALUES);CHKERRQ(ierr); 3427 } 3428 3429 ierr = PetscFree(rowidxs);CHKERRQ(ierr); 3430 ierr = PetscFree2(colidxs,matvals);CHKERRQ(ierr); 3431 ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3432 ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3433 PetscFunctionReturn(0); 3434 } 3435 3436 PetscErrorCode MatLoad_SeqBAIJ(Mat mat,PetscViewer viewer) 3437 { 3438 PetscErrorCode ierr; 3439 PetscBool isbinary; 3440 3441 PetscFunctionBegin; 3442 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 3443 if (!isbinary) SETERRQ2(PetscObjectComm((PetscObject)viewer),PETSC_ERR_SUP,"Viewer type %s not yet supported for reading %s matrices",((PetscObject)viewer)->type_name,((PetscObject)mat)->type_name); 3444 ierr = MatLoad_SeqBAIJ_Binary(mat,viewer);CHKERRQ(ierr); 3445 PetscFunctionReturn(0); 3446 } 3447 3448 /*@C 3449 MatCreateSeqBAIJ - Creates a sparse matrix in block AIJ (block 3450 compressed row) format. For good matrix assembly performance the 3451 user should preallocate the matrix storage by setting the parameter nz 3452 (or the array nnz). By setting these parameters accurately, performance 3453 during matrix assembly can be increased by more than a factor of 50. 3454 3455 Collective 3456 3457 Input Parameters: 3458 + comm - MPI communicator, set to PETSC_COMM_SELF 3459 . bs - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row 3460 blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs() 3461 . m - number of rows 3462 . n - number of columns 3463 . nz - number of nonzero blocks per block row (same for all rows) 3464 - nnz - array containing the number of nonzero blocks in the various block rows 3465 (possibly different for each block row) or NULL 3466 3467 Output Parameter: 3468 . A - the matrix 3469 3470 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 3471 MatXXXXSetPreallocation() paradigm instead of this routine directly. 3472 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 3473 3474 Options Database Keys: 3475 + -mat_no_unroll - uses code that does not unroll the loops in the 3476 block calculations (much slower) 3477 - -mat_block_size - size of the blocks to use 3478 3479 Level: intermediate 3480 3481 Notes: 3482 The number of rows and columns must be divisible by blocksize. 3483 3484 If the nnz parameter is given then the nz parameter is ignored 3485 3486 A nonzero block is any block that as 1 or more nonzeros in it 3487 3488 The block AIJ format is fully compatible with standard Fortran 77 3489 storage. That is, the stored row and column indices can begin at 3490 either one (as in Fortran) or zero. See the users' manual for details. 3491 3492 Specify the preallocated storage with either nz or nnz (not both). 3493 Set nz=PETSC_DEFAULT and nnz=NULL for PETSc to control dynamic memory 3494 allocation. See Users-Manual: ch_mat for details. 3495 matrices. 3496 3497 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateBAIJ() 3498 @*/ 3499 PetscErrorCode MatCreateSeqBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A) 3500 { 3501 PetscErrorCode ierr; 3502 3503 PetscFunctionBegin; 3504 ierr = MatCreate(comm,A);CHKERRQ(ierr); 3505 ierr = MatSetSizes(*A,m,n,m,n);CHKERRQ(ierr); 3506 ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr); 3507 ierr = MatSeqBAIJSetPreallocation(*A,bs,nz,(PetscInt*)nnz);CHKERRQ(ierr); 3508 PetscFunctionReturn(0); 3509 } 3510 3511 /*@C 3512 MatSeqBAIJSetPreallocation - Sets the block size and expected nonzeros 3513 per row in the matrix. For good matrix assembly performance the 3514 user should preallocate the matrix storage by setting the parameter nz 3515 (or the array nnz). By setting these parameters accurately, performance 3516 during matrix assembly can be increased by more than a factor of 50. 3517 3518 Collective 3519 3520 Input Parameters: 3521 + B - the matrix 3522 . bs - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row 3523 blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs() 3524 . nz - number of block nonzeros per block row (same for all rows) 3525 - nnz - array containing the number of block nonzeros in the various block rows 3526 (possibly different for each block row) or NULL 3527 3528 Options Database Keys: 3529 + -mat_no_unroll - uses code that does not unroll the loops in the 3530 block calculations (much slower) 3531 - -mat_block_size - size of the blocks to use 3532 3533 Level: intermediate 3534 3535 Notes: 3536 If the nnz parameter is given then the nz parameter is ignored 3537 3538 You can call MatGetInfo() to get information on how effective the preallocation was; 3539 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 3540 You can also run with the option -info and look for messages with the string 3541 malloc in them to see if additional memory allocation was needed. 3542 3543 The block AIJ format is fully compatible with standard Fortran 77 3544 storage. That is, the stored row and column indices can begin at 3545 either one (as in Fortran) or zero. See the users' manual for details. 3546 3547 Specify the preallocated storage with either nz or nnz (not both). 3548 Set nz=PETSC_DEFAULT and nnz=NULL for PETSc to control dynamic memory 3549 allocation. See Users-Manual: ch_mat for details. 3550 3551 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateBAIJ(), MatGetInfo() 3552 @*/ 3553 PetscErrorCode MatSeqBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt nz,const PetscInt nnz[]) 3554 { 3555 PetscErrorCode ierr; 3556 3557 PetscFunctionBegin; 3558 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 3559 PetscValidType(B,1); 3560 PetscValidLogicalCollectiveInt(B,bs,2); 3561 ierr = PetscTryMethod(B,"MatSeqBAIJSetPreallocation_C",(Mat,PetscInt,PetscInt,const PetscInt[]),(B,bs,nz,nnz));CHKERRQ(ierr); 3562 PetscFunctionReturn(0); 3563 } 3564 3565 /*@C 3566 MatSeqBAIJSetPreallocationCSR - Creates a sparse parallel matrix in BAIJ format using the given nonzero structure and (optional) numerical values 3567 3568 Collective 3569 3570 Input Parameters: 3571 + B - the matrix 3572 . i - the indices into j for the start of each local row (starts with zero) 3573 . j - the column indices for each local row (starts with zero) these must be sorted for each row 3574 - v - optional values in the matrix 3575 3576 Level: advanced 3577 3578 Notes: 3579 The order of the entries in values is specified by the MatOption MAT_ROW_ORIENTED. For example, C programs 3580 may want to use the default MAT_ROW_ORIENTED=PETSC_TRUE and use an array v[nnz][bs][bs] where the second index is 3581 over rows within a block and the last index is over columns within a block row. Fortran programs will likely set 3582 MAT_ROW_ORIENTED=PETSC_FALSE and use a Fortran array v(bs,bs,nnz) in which the first index is over rows within a 3583 block column and the second index is over columns within a block. 3584 3585 Though this routine has Preallocation() in the name it also sets the exact nonzero locations of the matrix entries and usually the numerical values as well 3586 3587 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatSeqBAIJSetPreallocation(), MATSEQBAIJ 3588 @*/ 3589 PetscErrorCode MatSeqBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 3590 { 3591 PetscErrorCode ierr; 3592 3593 PetscFunctionBegin; 3594 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 3595 PetscValidType(B,1); 3596 PetscValidLogicalCollectiveInt(B,bs,2); 3597 ierr = PetscTryMethod(B,"MatSeqBAIJSetPreallocationCSR_C",(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,bs,i,j,v));CHKERRQ(ierr); 3598 PetscFunctionReturn(0); 3599 } 3600 3601 /*@ 3602 MatCreateSeqBAIJWithArrays - Creates an sequential BAIJ matrix using matrix elements provided by the user. 3603 3604 Collective 3605 3606 Input Parameters: 3607 + comm - must be an MPI communicator of size 1 3608 . bs - size of block 3609 . m - number of rows 3610 . n - number of columns 3611 . i - row indices; that is i[0] = 0, i[row] = i[row-1] + number of elements in that row block row of the matrix 3612 . j - column indices 3613 - a - matrix values 3614 3615 Output Parameter: 3616 . mat - the matrix 3617 3618 Level: advanced 3619 3620 Notes: 3621 The i, j, and a arrays are not copied by this routine, the user must free these arrays 3622 once the matrix is destroyed 3623 3624 You cannot set new nonzero locations into this matrix, that will generate an error. 3625 3626 The i and j indices are 0 based 3627 3628 When block size is greater than 1 the matrix values must be stored using the BAIJ storage format (see the BAIJ code to determine this). 3629 3630 The order of the entries in values is the same as the block compressed sparse row storage format; that is, it is 3631 the same as a three dimensional array in Fortran values(bs,bs,nnz) that contains the first column of the first 3632 block, followed by the second column of the first block etc etc. That is, the blocks are contiguous in memory 3633 with column-major ordering within blocks. 3634 3635 .seealso: MatCreate(), MatCreateBAIJ(), MatCreateSeqBAIJ() 3636 3637 @*/ 3638 PetscErrorCode MatCreateSeqBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt i[],PetscInt j[],PetscScalar a[],Mat *mat) 3639 { 3640 PetscErrorCode ierr; 3641 PetscInt ii; 3642 Mat_SeqBAIJ *baij; 3643 3644 PetscFunctionBegin; 3645 if (bs != 1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"block size %D > 1 is not supported yet",bs); 3646 if (m > 0 && i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 3647 3648 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 3649 ierr = MatSetSizes(*mat,m,n,m,n);CHKERRQ(ierr); 3650 ierr = MatSetType(*mat,MATSEQBAIJ);CHKERRQ(ierr); 3651 ierr = MatSeqBAIJSetPreallocation(*mat,bs,MAT_SKIP_ALLOCATION,NULL);CHKERRQ(ierr); 3652 baij = (Mat_SeqBAIJ*)(*mat)->data; 3653 ierr = PetscMalloc2(m,&baij->imax,m,&baij->ilen);CHKERRQ(ierr); 3654 ierr = PetscLogObjectMemory((PetscObject)*mat,2*m*sizeof(PetscInt));CHKERRQ(ierr); 3655 3656 baij->i = i; 3657 baij->j = j; 3658 baij->a = a; 3659 3660 baij->singlemalloc = PETSC_FALSE; 3661 baij->nonew = -1; /*this indicates that inserting a new value in the matrix that generates a new nonzero is an error*/ 3662 baij->free_a = PETSC_FALSE; 3663 baij->free_ij = PETSC_FALSE; 3664 3665 for (ii=0; ii<m; ii++) { 3666 baij->ilen[ii] = baij->imax[ii] = i[ii+1] - i[ii]; 3667 if (PetscUnlikelyDebug(i[ii+1] - i[ii] < 0)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row length in i (row indices) row = %d length = %d",ii,i[ii+1] - i[ii]); 3668 } 3669 if (PetscDefined(USE_DEBUG)) { 3670 for (ii=0; ii<baij->i[m]; ii++) { 3671 if (j[ii] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column index at location = %d index = %d",ii,j[ii]); 3672 if (j[ii] > n - 1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column index to large at location = %d index = %d",ii,j[ii]); 3673 } 3674 } 3675 3676 ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3677 ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3678 PetscFunctionReturn(0); 3679 } 3680 3681 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_SeqBAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat) 3682 { 3683 PetscErrorCode ierr; 3684 PetscMPIInt size; 3685 3686 PetscFunctionBegin; 3687 ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr); 3688 if (size == 1 && scall == MAT_REUSE_MATRIX) { 3689 ierr = MatCopy(inmat,*outmat,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 3690 } else { 3691 ierr = MatCreateMPIMatConcatenateSeqMat_MPIBAIJ(comm,inmat,n,scall,outmat);CHKERRQ(ierr); 3692 } 3693 PetscFunctionReturn(0); 3694 } 3695