1 2 /* 3 Defines the basic matrix operations for the BAIJ (compressed row) 4 matrix storage format. 5 */ 6 #include <../src/mat/impls/baij/seq/baij.h> /*I "petscmat.h" I*/ 7 #include <petscblaslapack.h> 8 #include <petsc/private/kernels/blockinvert.h> 9 #include <petsc/private/kernels/blockmatmult.h> 10 11 #if defined(PETSC_HAVE_HYPRE) 12 PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat,MatType,MatReuse,Mat*); 13 #endif 14 15 PetscErrorCode MatInvertBlockDiagonal_SeqBAIJ(Mat A,const PetscScalar **values) 16 { 17 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*) A->data; 18 PetscErrorCode ierr; 19 PetscInt *diag_offset,i,bs = A->rmap->bs,mbs = a->mbs,ipvt[5],bs2 = bs*bs,*v_pivots; 20 MatScalar *v = a->a,*odiag,*diag,*mdiag,work[25],*v_work; 21 PetscReal shift = 0.0; 22 PetscBool allowzeropivot,zeropivotdetected=PETSC_FALSE; 23 24 PetscFunctionBegin; 25 allowzeropivot = PetscNot(A->erroriffailure); 26 27 if (a->idiagvalid) { 28 if (values) *values = a->idiag; 29 PetscFunctionReturn(0); 30 } 31 ierr = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr); 32 diag_offset = a->diag; 33 if (!a->idiag) { 34 ierr = PetscMalloc1(2*bs2*mbs,&a->idiag);CHKERRQ(ierr); 35 ierr = PetscLogObjectMemory((PetscObject)A,2*bs2*mbs*sizeof(PetscScalar));CHKERRQ(ierr); 36 } 37 diag = a->idiag; 38 mdiag = a->idiag+bs2*mbs; 39 if (values) *values = a->idiag; 40 /* factor and invert each block */ 41 switch (bs) { 42 case 1: 43 for (i=0; i<mbs; i++) { 44 odiag = v + 1*diag_offset[i]; 45 diag[0] = odiag[0]; 46 mdiag[0] = odiag[0]; 47 48 if (PetscAbsScalar(diag[0] + shift) < PETSC_MACHINE_EPSILON) { 49 if (allowzeropivot) { 50 A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 51 A->factorerror_zeropivot_value = PetscAbsScalar(diag[0]); 52 A->factorerror_zeropivot_row = i; 53 ierr = PetscInfo1(A,"Zero pivot, row %D\n",i);CHKERRQ(ierr); 54 } else SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot, row %D pivot value %g tolerance %g",i,(double)PetscAbsScalar(diag[0]),(double)PETSC_MACHINE_EPSILON); 55 } 56 57 diag[0] = (PetscScalar)1.0 / (diag[0] + shift); 58 diag += 1; 59 mdiag += 1; 60 } 61 break; 62 case 2: 63 for (i=0; i<mbs; i++) { 64 odiag = v + 4*diag_offset[i]; 65 diag[0] = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3]; 66 mdiag[0] = odiag[0]; mdiag[1] = odiag[1]; mdiag[2] = odiag[2]; mdiag[3] = odiag[3]; 67 ierr = PetscKernel_A_gets_inverse_A_2(diag,shift,allowzeropivot,&zeropivotdetected);CHKERRQ(ierr); 68 if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 69 diag += 4; 70 mdiag += 4; 71 } 72 break; 73 case 3: 74 for (i=0; i<mbs; i++) { 75 odiag = v + 9*diag_offset[i]; 76 diag[0] = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3]; 77 diag[4] = odiag[4]; diag[5] = odiag[5]; diag[6] = odiag[6]; diag[7] = odiag[7]; 78 diag[8] = odiag[8]; 79 mdiag[0] = odiag[0]; mdiag[1] = odiag[1]; mdiag[2] = odiag[2]; mdiag[3] = odiag[3]; 80 mdiag[4] = odiag[4]; mdiag[5] = odiag[5]; mdiag[6] = odiag[6]; mdiag[7] = odiag[7]; 81 mdiag[8] = odiag[8]; 82 ierr = PetscKernel_A_gets_inverse_A_3(diag,shift,allowzeropivot,&zeropivotdetected);CHKERRQ(ierr); 83 if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 84 diag += 9; 85 mdiag += 9; 86 } 87 break; 88 case 4: 89 for (i=0; i<mbs; i++) { 90 odiag = v + 16*diag_offset[i]; 91 ierr = PetscMemcpy(diag,odiag,16*sizeof(PetscScalar));CHKERRQ(ierr); 92 ierr = PetscMemcpy(mdiag,odiag,16*sizeof(PetscScalar));CHKERRQ(ierr); 93 ierr = PetscKernel_A_gets_inverse_A_4(diag,shift,allowzeropivot,&zeropivotdetected);CHKERRQ(ierr); 94 if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 95 diag += 16; 96 mdiag += 16; 97 } 98 break; 99 case 5: 100 for (i=0; i<mbs; i++) { 101 odiag = v + 25*diag_offset[i]; 102 ierr = PetscMemcpy(diag,odiag,25*sizeof(PetscScalar));CHKERRQ(ierr); 103 ierr = PetscMemcpy(mdiag,odiag,25*sizeof(PetscScalar));CHKERRQ(ierr); 104 ierr = PetscKernel_A_gets_inverse_A_5(diag,ipvt,work,shift,allowzeropivot,&zeropivotdetected);CHKERRQ(ierr); 105 if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 106 diag += 25; 107 mdiag += 25; 108 } 109 break; 110 case 6: 111 for (i=0; i<mbs; i++) { 112 odiag = v + 36*diag_offset[i]; 113 ierr = PetscMemcpy(diag,odiag,36*sizeof(PetscScalar));CHKERRQ(ierr); 114 ierr = PetscMemcpy(mdiag,odiag,36*sizeof(PetscScalar));CHKERRQ(ierr); 115 ierr = PetscKernel_A_gets_inverse_A_6(diag,shift,allowzeropivot,&zeropivotdetected);CHKERRQ(ierr); 116 if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 117 diag += 36; 118 mdiag += 36; 119 } 120 break; 121 case 7: 122 for (i=0; i<mbs; i++) { 123 odiag = v + 49*diag_offset[i]; 124 ierr = PetscMemcpy(diag,odiag,49*sizeof(PetscScalar));CHKERRQ(ierr); 125 ierr = PetscMemcpy(mdiag,odiag,49*sizeof(PetscScalar));CHKERRQ(ierr); 126 ierr = PetscKernel_A_gets_inverse_A_7(diag,shift,allowzeropivot,&zeropivotdetected);CHKERRQ(ierr); 127 if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 128 diag += 49; 129 mdiag += 49; 130 } 131 break; 132 default: 133 ierr = PetscMalloc2(bs,&v_work,bs,&v_pivots);CHKERRQ(ierr); 134 for (i=0; i<mbs; i++) { 135 odiag = v + bs2*diag_offset[i]; 136 ierr = PetscMemcpy(diag,odiag,bs2*sizeof(PetscScalar));CHKERRQ(ierr); 137 ierr = PetscMemcpy(mdiag,odiag,bs2*sizeof(PetscScalar));CHKERRQ(ierr); 138 ierr = PetscKernel_A_gets_inverse_A(bs,diag,v_pivots,v_work,allowzeropivot,&zeropivotdetected);CHKERRQ(ierr); 139 if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 140 diag += bs2; 141 mdiag += bs2; 142 } 143 ierr = PetscFree2(v_work,v_pivots);CHKERRQ(ierr); 144 } 145 a->idiagvalid = PETSC_TRUE; 146 PetscFunctionReturn(0); 147 } 148 149 PetscErrorCode MatSOR_SeqBAIJ(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 150 { 151 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 152 PetscScalar *x,*work,*w,*workt,*t; 153 const MatScalar *v,*aa = a->a, *idiag; 154 const PetscScalar *b,*xb; 155 PetscScalar s[7], xw[7]={0}; /* avoid some compilers thinking xw is uninitialized */ 156 PetscErrorCode ierr; 157 PetscInt m = a->mbs,i,i2,nz,bs = A->rmap->bs,bs2 = bs*bs,k,j,idx,it; 158 const PetscInt *diag,*ai = a->i,*aj = a->j,*vi; 159 160 PetscFunctionBegin; 161 if (fshift == -1.0) fshift = 0.0; /* negative fshift indicates do not error on zero diagonal; this code never errors on zero diagonal */ 162 its = its*lits; 163 if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat"); 164 if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 165 if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 166 if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 167 if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts"); 168 169 if (!a->idiagvalid) {ierr = MatInvertBlockDiagonal(A,NULL);CHKERRQ(ierr);} 170 171 if (!m) PetscFunctionReturn(0); 172 diag = a->diag; 173 idiag = a->idiag; 174 k = PetscMax(A->rmap->n,A->cmap->n); 175 if (!a->mult_work) { 176 ierr = PetscMalloc1(k+1,&a->mult_work);CHKERRQ(ierr); 177 } 178 if (!a->sor_workt) { 179 ierr = PetscMalloc1(k,&a->sor_workt);CHKERRQ(ierr); 180 } 181 if (!a->sor_work) { 182 ierr = PetscMalloc1(bs,&a->sor_work);CHKERRQ(ierr); 183 } 184 work = a->mult_work; 185 t = a->sor_workt; 186 w = a->sor_work; 187 188 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 189 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 190 191 if (flag & SOR_ZERO_INITIAL_GUESS) { 192 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) { 193 switch (bs) { 194 case 1: 195 PetscKernel_v_gets_A_times_w_1(x,idiag,b); 196 t[0] = b[0]; 197 i2 = 1; 198 idiag += 1; 199 for (i=1; i<m; i++) { 200 v = aa + ai[i]; 201 vi = aj + ai[i]; 202 nz = diag[i] - ai[i]; 203 s[0] = b[i2]; 204 for (j=0; j<nz; j++) { 205 xw[0] = x[vi[j]]; 206 PetscKernel_v_gets_v_minus_A_times_w_1(s,(v+j),xw); 207 } 208 t[i2] = s[0]; 209 PetscKernel_v_gets_A_times_w_1(xw,idiag,s); 210 x[i2] = xw[0]; 211 idiag += 1; 212 i2 += 1; 213 } 214 break; 215 case 2: 216 PetscKernel_v_gets_A_times_w_2(x,idiag,b); 217 t[0] = b[0]; t[1] = b[1]; 218 i2 = 2; 219 idiag += 4; 220 for (i=1; i<m; i++) { 221 v = aa + 4*ai[i]; 222 vi = aj + ai[i]; 223 nz = diag[i] - ai[i]; 224 s[0] = b[i2]; s[1] = b[i2+1]; 225 for (j=0; j<nz; j++) { 226 idx = 2*vi[j]; 227 it = 4*j; 228 xw[0] = x[idx]; xw[1] = x[1+idx]; 229 PetscKernel_v_gets_v_minus_A_times_w_2(s,(v+it),xw); 230 } 231 t[i2] = s[0]; t[i2+1] = s[1]; 232 PetscKernel_v_gets_A_times_w_2(xw,idiag,s); 233 x[i2] = xw[0]; x[i2+1] = xw[1]; 234 idiag += 4; 235 i2 += 2; 236 } 237 break; 238 case 3: 239 PetscKernel_v_gets_A_times_w_3(x,idiag,b); 240 t[0] = b[0]; t[1] = b[1]; t[2] = b[2]; 241 i2 = 3; 242 idiag += 9; 243 for (i=1; i<m; i++) { 244 v = aa + 9*ai[i]; 245 vi = aj + ai[i]; 246 nz = diag[i] - ai[i]; 247 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; 248 while (nz--) { 249 idx = 3*(*vi++); 250 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 251 PetscKernel_v_gets_v_minus_A_times_w_3(s,v,xw); 252 v += 9; 253 } 254 t[i2] = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2]; 255 PetscKernel_v_gets_A_times_w_3(xw,idiag,s); 256 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; 257 idiag += 9; 258 i2 += 3; 259 } 260 break; 261 case 4: 262 PetscKernel_v_gets_A_times_w_4(x,idiag,b); 263 t[0] = b[0]; t[1] = b[1]; t[2] = b[2]; t[3] = b[3]; 264 i2 = 4; 265 idiag += 16; 266 for (i=1; i<m; i++) { 267 v = aa + 16*ai[i]; 268 vi = aj + ai[i]; 269 nz = diag[i] - ai[i]; 270 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; 271 while (nz--) { 272 idx = 4*(*vi++); 273 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; 274 PetscKernel_v_gets_v_minus_A_times_w_4(s,v,xw); 275 v += 16; 276 } 277 t[i2] = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2]; t[i2 + 3] = s[3]; 278 PetscKernel_v_gets_A_times_w_4(xw,idiag,s); 279 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; 280 idiag += 16; 281 i2 += 4; 282 } 283 break; 284 case 5: 285 PetscKernel_v_gets_A_times_w_5(x,idiag,b); 286 t[0] = b[0]; t[1] = b[1]; t[2] = b[2]; t[3] = b[3]; t[4] = b[4]; 287 i2 = 5; 288 idiag += 25; 289 for (i=1; i<m; i++) { 290 v = aa + 25*ai[i]; 291 vi = aj + ai[i]; 292 nz = diag[i] - ai[i]; 293 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4]; 294 while (nz--) { 295 idx = 5*(*vi++); 296 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; xw[4] = x[4+idx]; 297 PetscKernel_v_gets_v_minus_A_times_w_5(s,v,xw); 298 v += 25; 299 } 300 t[i2] = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2]; t[i2+3] = s[3]; t[i2+4] = s[4]; 301 PetscKernel_v_gets_A_times_w_5(xw,idiag,s); 302 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4]; 303 idiag += 25; 304 i2 += 5; 305 } 306 break; 307 case 6: 308 PetscKernel_v_gets_A_times_w_6(x,idiag,b); 309 t[0] = b[0]; t[1] = b[1]; t[2] = b[2]; t[3] = b[3]; t[4] = b[4]; t[5] = b[5]; 310 i2 = 6; 311 idiag += 36; 312 for (i=1; i<m; i++) { 313 v = aa + 36*ai[i]; 314 vi = aj + ai[i]; 315 nz = diag[i] - ai[i]; 316 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5]; 317 while (nz--) { 318 idx = 6*(*vi++); 319 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 320 xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; 321 PetscKernel_v_gets_v_minus_A_times_w_6(s,v,xw); 322 v += 36; 323 } 324 t[i2] = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2]; 325 t[i2+3] = s[3]; t[i2+4] = s[4]; t[i2+5] = s[5]; 326 PetscKernel_v_gets_A_times_w_6(xw,idiag,s); 327 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5]; 328 idiag += 36; 329 i2 += 6; 330 } 331 break; 332 case 7: 333 PetscKernel_v_gets_A_times_w_7(x,idiag,b); 334 t[0] = b[0]; t[1] = b[1]; t[2] = b[2]; 335 t[3] = b[3]; t[4] = b[4]; t[5] = b[5]; t[6] = b[6]; 336 i2 = 7; 337 idiag += 49; 338 for (i=1; i<m; i++) { 339 v = aa + 49*ai[i]; 340 vi = aj + ai[i]; 341 nz = diag[i] - ai[i]; 342 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; 343 s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5]; s[6] = b[i2+6]; 344 while (nz--) { 345 idx = 7*(*vi++); 346 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 347 xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; xw[6] = x[6+idx]; 348 PetscKernel_v_gets_v_minus_A_times_w_7(s,v,xw); 349 v += 49; 350 } 351 t[i2] = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2]; 352 t[i2+3] = s[3]; t[i2+4] = s[4]; t[i2+5] = s[5]; t[i2+6] = s[6]; 353 PetscKernel_v_gets_A_times_w_7(xw,idiag,s); 354 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; 355 x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5]; x[i2+6] = xw[6]; 356 idiag += 49; 357 i2 += 7; 358 } 359 break; 360 default: 361 PetscKernel_w_gets_Ar_times_v(bs,bs,b,idiag,x); 362 ierr = PetscMemcpy(t,b,bs*sizeof(PetscScalar));CHKERRQ(ierr); 363 i2 = bs; 364 idiag += bs2; 365 for (i=1; i<m; i++) { 366 v = aa + bs2*ai[i]; 367 vi = aj + ai[i]; 368 nz = diag[i] - ai[i]; 369 370 ierr = PetscMemcpy(w,b+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr); 371 /* copy all rows of x that are needed into contiguous space */ 372 workt = work; 373 for (j=0; j<nz; j++) { 374 ierr = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr); 375 workt += bs; 376 } 377 PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work); 378 ierr = PetscMemcpy(t+i2,w,bs*sizeof(PetscScalar));CHKERRQ(ierr); 379 PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2); 380 381 idiag += bs2; 382 i2 += bs; 383 } 384 break; 385 } 386 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 387 ierr = PetscLogFlops(1.0*bs2*a->nz);CHKERRQ(ierr); 388 xb = t; 389 } 390 else xb = b; 391 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 392 idiag = a->idiag+bs2*(a->mbs-1); 393 i2 = bs * (m-1); 394 switch (bs) { 395 case 1: 396 s[0] = xb[i2]; 397 PetscKernel_v_gets_A_times_w_1(xw,idiag,s); 398 x[i2] = xw[0]; 399 i2 -= 1; 400 for (i=m-2; i>=0; i--) { 401 v = aa + (diag[i]+1); 402 vi = aj + diag[i] + 1; 403 nz = ai[i+1] - diag[i] - 1; 404 s[0] = xb[i2]; 405 for (j=0; j<nz; j++) { 406 xw[0] = x[vi[j]]; 407 PetscKernel_v_gets_v_minus_A_times_w_1(s,(v+j),xw); 408 } 409 PetscKernel_v_gets_A_times_w_1(xw,idiag,s); 410 x[i2] = xw[0]; 411 idiag -= 1; 412 i2 -= 1; 413 } 414 break; 415 case 2: 416 s[0] = xb[i2]; s[1] = xb[i2+1]; 417 PetscKernel_v_gets_A_times_w_2(xw,idiag,s); 418 x[i2] = xw[0]; x[i2+1] = xw[1]; 419 i2 -= 2; 420 idiag -= 4; 421 for (i=m-2; i>=0; i--) { 422 v = aa + 4*(diag[i] + 1); 423 vi = aj + diag[i] + 1; 424 nz = ai[i+1] - diag[i] - 1; 425 s[0] = xb[i2]; s[1] = xb[i2+1]; 426 for (j=0; j<nz; j++) { 427 idx = 2*vi[j]; 428 it = 4*j; 429 xw[0] = x[idx]; xw[1] = x[1+idx]; 430 PetscKernel_v_gets_v_minus_A_times_w_2(s,(v+it),xw); 431 } 432 PetscKernel_v_gets_A_times_w_2(xw,idiag,s); 433 x[i2] = xw[0]; x[i2+1] = xw[1]; 434 idiag -= 4; 435 i2 -= 2; 436 } 437 break; 438 case 3: 439 s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; 440 PetscKernel_v_gets_A_times_w_3(xw,idiag,s); 441 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; 442 i2 -= 3; 443 idiag -= 9; 444 for (i=m-2; i>=0; i--) { 445 v = aa + 9*(diag[i]+1); 446 vi = aj + diag[i] + 1; 447 nz = ai[i+1] - diag[i] - 1; 448 s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; 449 while (nz--) { 450 idx = 3*(*vi++); 451 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 452 PetscKernel_v_gets_v_minus_A_times_w_3(s,v,xw); 453 v += 9; 454 } 455 PetscKernel_v_gets_A_times_w_3(xw,idiag,s); 456 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; 457 idiag -= 9; 458 i2 -= 3; 459 } 460 break; 461 case 4: 462 s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; 463 PetscKernel_v_gets_A_times_w_4(xw,idiag,s); 464 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; 465 i2 -= 4; 466 idiag -= 16; 467 for (i=m-2; i>=0; i--) { 468 v = aa + 16*(diag[i]+1); 469 vi = aj + diag[i] + 1; 470 nz = ai[i+1] - diag[i] - 1; 471 s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; 472 while (nz--) { 473 idx = 4*(*vi++); 474 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; 475 PetscKernel_v_gets_v_minus_A_times_w_4(s,v,xw); 476 v += 16; 477 } 478 PetscKernel_v_gets_A_times_w_4(xw,idiag,s); 479 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; 480 idiag -= 16; 481 i2 -= 4; 482 } 483 break; 484 case 5: 485 s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; s[4] = xb[i2+4]; 486 PetscKernel_v_gets_A_times_w_5(xw,idiag,s); 487 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4]; 488 i2 -= 5; 489 idiag -= 25; 490 for (i=m-2; i>=0; i--) { 491 v = aa + 25*(diag[i]+1); 492 vi = aj + diag[i] + 1; 493 nz = ai[i+1] - diag[i] - 1; 494 s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; s[4] = xb[i2+4]; 495 while (nz--) { 496 idx = 5*(*vi++); 497 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; xw[4] = x[4+idx]; 498 PetscKernel_v_gets_v_minus_A_times_w_5(s,v,xw); 499 v += 25; 500 } 501 PetscKernel_v_gets_A_times_w_5(xw,idiag,s); 502 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4]; 503 idiag -= 25; 504 i2 -= 5; 505 } 506 break; 507 case 6: 508 s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; s[4] = xb[i2+4]; s[5] = xb[i2+5]; 509 PetscKernel_v_gets_A_times_w_6(xw,idiag,s); 510 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5]; 511 i2 -= 6; 512 idiag -= 36; 513 for (i=m-2; i>=0; i--) { 514 v = aa + 36*(diag[i]+1); 515 vi = aj + diag[i] + 1; 516 nz = ai[i+1] - diag[i] - 1; 517 s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; s[4] = xb[i2+4]; s[5] = xb[i2+5]; 518 while (nz--) { 519 idx = 6*(*vi++); 520 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 521 xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; 522 PetscKernel_v_gets_v_minus_A_times_w_6(s,v,xw); 523 v += 36; 524 } 525 PetscKernel_v_gets_A_times_w_6(xw,idiag,s); 526 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5]; 527 idiag -= 36; 528 i2 -= 6; 529 } 530 break; 531 case 7: 532 s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; 533 s[3] = xb[i2+3]; s[4] = xb[i2+4]; s[5] = xb[i2+5]; s[6] = xb[i2+6]; 534 PetscKernel_v_gets_A_times_w_7(x,idiag,b); 535 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; 536 x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5]; x[i2+6] = xw[6]; 537 i2 -= 7; 538 idiag -= 49; 539 for (i=m-2; i>=0; i--) { 540 v = aa + 49*(diag[i]+1); 541 vi = aj + diag[i] + 1; 542 nz = ai[i+1] - diag[i] - 1; 543 s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; 544 s[3] = xb[i2+3]; s[4] = xb[i2+4]; s[5] = xb[i2+5]; s[6] = xb[i2+6]; 545 while (nz--) { 546 idx = 7*(*vi++); 547 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 548 xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; xw[6] = x[6+idx]; 549 PetscKernel_v_gets_v_minus_A_times_w_7(s,v,xw); 550 v += 49; 551 } 552 PetscKernel_v_gets_A_times_w_7(xw,idiag,s); 553 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; 554 x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5]; x[i2+6] = xw[6]; 555 idiag -= 49; 556 i2 -= 7; 557 } 558 break; 559 default: 560 ierr = PetscMemcpy(w,xb+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr); 561 PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2); 562 i2 -= bs; 563 idiag -= bs2; 564 for (i=m-2; i>=0; i--) { 565 v = aa + bs2*(diag[i]+1); 566 vi = aj + diag[i] + 1; 567 nz = ai[i+1] - diag[i] - 1; 568 569 ierr = PetscMemcpy(w,xb+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr); 570 /* copy all rows of x that are needed into contiguous space */ 571 workt = work; 572 for (j=0; j<nz; j++) { 573 ierr = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr); 574 workt += bs; 575 } 576 PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work); 577 PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2); 578 579 idiag -= bs2; 580 i2 -= bs; 581 } 582 break; 583 } 584 ierr = PetscLogFlops(1.0*bs2*(a->nz));CHKERRQ(ierr); 585 } 586 its--; 587 } 588 while (its--) { 589 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) { 590 idiag = a->idiag; 591 i2 = 0; 592 switch (bs) { 593 case 1: 594 for (i=0; i<m; i++) { 595 v = aa + ai[i]; 596 vi = aj + ai[i]; 597 nz = ai[i+1] - ai[i]; 598 s[0] = b[i2]; 599 for (j=0; j<nz; j++) { 600 xw[0] = x[vi[j]]; 601 PetscKernel_v_gets_v_minus_A_times_w_1(s,(v+j),xw); 602 } 603 PetscKernel_v_gets_A_times_w_1(xw,idiag,s); 604 x[i2] += xw[0]; 605 idiag += 1; 606 i2 += 1; 607 } 608 break; 609 case 2: 610 for (i=0; i<m; i++) { 611 v = aa + 4*ai[i]; 612 vi = aj + ai[i]; 613 nz = ai[i+1] - ai[i]; 614 s[0] = b[i2]; s[1] = b[i2+1]; 615 for (j=0; j<nz; j++) { 616 idx = 2*vi[j]; 617 it = 4*j; 618 xw[0] = x[idx]; xw[1] = x[1+idx]; 619 PetscKernel_v_gets_v_minus_A_times_w_2(s,(v+it),xw); 620 } 621 PetscKernel_v_gets_A_times_w_2(xw,idiag,s); 622 x[i2] += xw[0]; x[i2+1] += xw[1]; 623 idiag += 4; 624 i2 += 2; 625 } 626 break; 627 case 3: 628 for (i=0; i<m; i++) { 629 v = aa + 9*ai[i]; 630 vi = aj + ai[i]; 631 nz = ai[i+1] - ai[i]; 632 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; 633 while (nz--) { 634 idx = 3*(*vi++); 635 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 636 PetscKernel_v_gets_v_minus_A_times_w_3(s,v,xw); 637 v += 9; 638 } 639 PetscKernel_v_gets_A_times_w_3(xw,idiag,s); 640 x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; 641 idiag += 9; 642 i2 += 3; 643 } 644 break; 645 case 4: 646 for (i=0; i<m; i++) { 647 v = aa + 16*ai[i]; 648 vi = aj + ai[i]; 649 nz = ai[i+1] - ai[i]; 650 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; 651 while (nz--) { 652 idx = 4*(*vi++); 653 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; 654 PetscKernel_v_gets_v_minus_A_times_w_4(s,v,xw); 655 v += 16; 656 } 657 PetscKernel_v_gets_A_times_w_4(xw,idiag,s); 658 x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; x[i2+3] += xw[3]; 659 idiag += 16; 660 i2 += 4; 661 } 662 break; 663 case 5: 664 for (i=0; i<m; i++) { 665 v = aa + 25*ai[i]; 666 vi = aj + ai[i]; 667 nz = ai[i+1] - ai[i]; 668 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4]; 669 while (nz--) { 670 idx = 5*(*vi++); 671 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; xw[4] = x[4+idx]; 672 PetscKernel_v_gets_v_minus_A_times_w_5(s,v,xw); 673 v += 25; 674 } 675 PetscKernel_v_gets_A_times_w_5(xw,idiag,s); 676 x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; x[i2+3] += xw[3]; x[i2+4] += xw[4]; 677 idiag += 25; 678 i2 += 5; 679 } 680 break; 681 case 6: 682 for (i=0; i<m; i++) { 683 v = aa + 36*ai[i]; 684 vi = aj + ai[i]; 685 nz = ai[i+1] - ai[i]; 686 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5]; 687 while (nz--) { 688 idx = 6*(*vi++); 689 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 690 xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; 691 PetscKernel_v_gets_v_minus_A_times_w_6(s,v,xw); 692 v += 36; 693 } 694 PetscKernel_v_gets_A_times_w_6(xw,idiag,s); 695 x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; 696 x[i2+3] += xw[3]; x[i2+4] += xw[4]; x[i2+5] += xw[5]; 697 idiag += 36; 698 i2 += 6; 699 } 700 break; 701 case 7: 702 for (i=0; i<m; i++) { 703 v = aa + 49*ai[i]; 704 vi = aj + ai[i]; 705 nz = ai[i+1] - ai[i]; 706 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; 707 s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5]; s[6] = b[i2+6]; 708 while (nz--) { 709 idx = 7*(*vi++); 710 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 711 xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; xw[6] = x[6+idx]; 712 PetscKernel_v_gets_v_minus_A_times_w_7(s,v,xw); 713 v += 49; 714 } 715 PetscKernel_v_gets_A_times_w_7(xw,idiag,s); 716 x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; 717 x[i2+3] += xw[3]; x[i2+4] += xw[4]; x[i2+5] += xw[5]; x[i2+6] += xw[6]; 718 idiag += 49; 719 i2 += 7; 720 } 721 break; 722 default: 723 for (i=0; i<m; i++) { 724 v = aa + bs2*ai[i]; 725 vi = aj + ai[i]; 726 nz = ai[i+1] - ai[i]; 727 728 ierr = PetscMemcpy(w,b+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr); 729 /* copy all rows of x that are needed into contiguous space */ 730 workt = work; 731 for (j=0; j<nz; j++) { 732 ierr = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr); 733 workt += bs; 734 } 735 PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work); 736 PetscKernel_w_gets_w_plus_Ar_times_v(bs,bs,w,idiag,x+i2); 737 738 idiag += bs2; 739 i2 += bs; 740 } 741 break; 742 } 743 ierr = PetscLogFlops(2.0*bs2*a->nz);CHKERRQ(ierr); 744 } 745 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 746 idiag = a->idiag+bs2*(a->mbs-1); 747 i2 = bs * (m-1); 748 switch (bs) { 749 case 1: 750 for (i=m-1; i>=0; i--) { 751 v = aa + ai[i]; 752 vi = aj + ai[i]; 753 nz = ai[i+1] - ai[i]; 754 s[0] = b[i2]; 755 for (j=0; j<nz; j++) { 756 xw[0] = x[vi[j]]; 757 PetscKernel_v_gets_v_minus_A_times_w_1(s,(v+j),xw); 758 } 759 PetscKernel_v_gets_A_times_w_1(xw,idiag,s); 760 x[i2] += xw[0]; 761 idiag -= 1; 762 i2 -= 1; 763 } 764 break; 765 case 2: 766 for (i=m-1; i>=0; i--) { 767 v = aa + 4*ai[i]; 768 vi = aj + ai[i]; 769 nz = ai[i+1] - ai[i]; 770 s[0] = b[i2]; s[1] = b[i2+1]; 771 for (j=0; j<nz; j++) { 772 idx = 2*vi[j]; 773 it = 4*j; 774 xw[0] = x[idx]; xw[1] = x[1+idx]; 775 PetscKernel_v_gets_v_minus_A_times_w_2(s,(v+it),xw); 776 } 777 PetscKernel_v_gets_A_times_w_2(xw,idiag,s); 778 x[i2] += xw[0]; x[i2+1] += xw[1]; 779 idiag -= 4; 780 i2 -= 2; 781 } 782 break; 783 case 3: 784 for (i=m-1; i>=0; i--) { 785 v = aa + 9*ai[i]; 786 vi = aj + ai[i]; 787 nz = ai[i+1] - ai[i]; 788 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; 789 while (nz--) { 790 idx = 3*(*vi++); 791 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 792 PetscKernel_v_gets_v_minus_A_times_w_3(s,v,xw); 793 v += 9; 794 } 795 PetscKernel_v_gets_A_times_w_3(xw,idiag,s); 796 x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; 797 idiag -= 9; 798 i2 -= 3; 799 } 800 break; 801 case 4: 802 for (i=m-1; i>=0; i--) { 803 v = aa + 16*ai[i]; 804 vi = aj + ai[i]; 805 nz = ai[i+1] - ai[i]; 806 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; 807 while (nz--) { 808 idx = 4*(*vi++); 809 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; 810 PetscKernel_v_gets_v_minus_A_times_w_4(s,v,xw); 811 v += 16; 812 } 813 PetscKernel_v_gets_A_times_w_4(xw,idiag,s); 814 x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; x[i2+3] += xw[3]; 815 idiag -= 16; 816 i2 -= 4; 817 } 818 break; 819 case 5: 820 for (i=m-1; i>=0; i--) { 821 v = aa + 25*ai[i]; 822 vi = aj + ai[i]; 823 nz = ai[i+1] - ai[i]; 824 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4]; 825 while (nz--) { 826 idx = 5*(*vi++); 827 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; xw[4] = x[4+idx]; 828 PetscKernel_v_gets_v_minus_A_times_w_5(s,v,xw); 829 v += 25; 830 } 831 PetscKernel_v_gets_A_times_w_5(xw,idiag,s); 832 x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; x[i2+3] += xw[3]; x[i2+4] += xw[4]; 833 idiag -= 25; 834 i2 -= 5; 835 } 836 break; 837 case 6: 838 for (i=m-1; i>=0; i--) { 839 v = aa + 36*ai[i]; 840 vi = aj + ai[i]; 841 nz = ai[i+1] - ai[i]; 842 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5]; 843 while (nz--) { 844 idx = 6*(*vi++); 845 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 846 xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; 847 PetscKernel_v_gets_v_minus_A_times_w_6(s,v,xw); 848 v += 36; 849 } 850 PetscKernel_v_gets_A_times_w_6(xw,idiag,s); 851 x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; 852 x[i2+3] += xw[3]; x[i2+4] += xw[4]; x[i2+5] += xw[5]; 853 idiag -= 36; 854 i2 -= 6; 855 } 856 break; 857 case 7: 858 for (i=m-1; i>=0; i--) { 859 v = aa + 49*ai[i]; 860 vi = aj + ai[i]; 861 nz = ai[i+1] - ai[i]; 862 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; 863 s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5]; s[6] = b[i2+6]; 864 while (nz--) { 865 idx = 7*(*vi++); 866 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 867 xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; xw[6] = x[6+idx]; 868 PetscKernel_v_gets_v_minus_A_times_w_7(s,v,xw); 869 v += 49; 870 } 871 PetscKernel_v_gets_A_times_w_7(xw,idiag,s); 872 x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; 873 x[i2+3] += xw[3]; x[i2+4] += xw[4]; x[i2+5] += xw[5]; x[i2+6] += xw[6]; 874 idiag -= 49; 875 i2 -= 7; 876 } 877 break; 878 default: 879 for (i=m-1; i>=0; i--) { 880 v = aa + bs2*ai[i]; 881 vi = aj + ai[i]; 882 nz = ai[i+1] - ai[i]; 883 884 ierr = PetscMemcpy(w,b+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr); 885 /* copy all rows of x that are needed into contiguous space */ 886 workt = work; 887 for (j=0; j<nz; j++) { 888 ierr = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr); 889 workt += bs; 890 } 891 PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work); 892 PetscKernel_w_gets_w_plus_Ar_times_v(bs,bs,w,idiag,x+i2); 893 894 idiag -= bs2; 895 i2 -= bs; 896 } 897 break; 898 } 899 ierr = PetscLogFlops(2.0*bs2*(a->nz));CHKERRQ(ierr); 900 } 901 } 902 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 903 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 904 PetscFunctionReturn(0); 905 } 906 907 908 /* 909 Special version for direct calls from Fortran (Used in PETSc-fun3d) 910 */ 911 #if defined(PETSC_HAVE_FORTRAN_CAPS) 912 #define matsetvaluesblocked4_ MATSETVALUESBLOCKED4 913 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 914 #define matsetvaluesblocked4_ matsetvaluesblocked4 915 #endif 916 917 PETSC_EXTERN void matsetvaluesblocked4_(Mat *AA,PetscInt *mm,const PetscInt im[],PetscInt *nn,const PetscInt in[],const PetscScalar v[]) 918 { 919 Mat A = *AA; 920 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 921 PetscInt *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,N,m = *mm,n = *nn; 922 PetscInt *ai =a->i,*ailen=a->ilen; 923 PetscInt *aj =a->j,stepval,lastcol = -1; 924 const PetscScalar *value = v; 925 MatScalar *ap,*aa = a->a,*bap; 926 927 PetscFunctionBegin; 928 if (A->rmap->bs != 4) SETERRABORT(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Can only be called with a block size of 4"); 929 stepval = (n-1)*4; 930 for (k=0; k<m; k++) { /* loop over added rows */ 931 row = im[k]; 932 rp = aj + ai[row]; 933 ap = aa + 16*ai[row]; 934 nrow = ailen[row]; 935 low = 0; 936 high = nrow; 937 for (l=0; l<n; l++) { /* loop over added columns */ 938 col = in[l]; 939 if (col <= lastcol) low = 0; 940 else high = nrow; 941 lastcol = col; 942 value = v + k*(stepval+4 + l)*4; 943 while (high-low > 7) { 944 t = (low+high)/2; 945 if (rp[t] > col) high = t; 946 else low = t; 947 } 948 for (i=low; i<high; i++) { 949 if (rp[i] > col) break; 950 if (rp[i] == col) { 951 bap = ap + 16*i; 952 for (ii=0; ii<4; ii++,value+=stepval) { 953 for (jj=ii; jj<16; jj+=4) { 954 bap[jj] += *value++; 955 } 956 } 957 goto noinsert2; 958 } 959 } 960 N = nrow++ - 1; 961 high++; /* added new column index thus must search to one higher than before */ 962 /* shift up all the later entries in this row */ 963 for (ii=N; ii>=i; ii--) { 964 rp[ii+1] = rp[ii]; 965 PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar)); 966 } 967 if (N >= i) { 968 PetscMemzero(ap+16*i,16*sizeof(MatScalar)); 969 } 970 rp[i] = col; 971 bap = ap + 16*i; 972 for (ii=0; ii<4; ii++,value+=stepval) { 973 for (jj=ii; jj<16; jj+=4) { 974 bap[jj] = *value++; 975 } 976 } 977 noinsert2:; 978 low = i; 979 } 980 ailen[row] = nrow; 981 } 982 PetscFunctionReturnVoid(); 983 } 984 985 #if defined(PETSC_HAVE_FORTRAN_CAPS) 986 #define matsetvalues4_ MATSETVALUES4 987 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 988 #define matsetvalues4_ matsetvalues4 989 #endif 990 991 PETSC_EXTERN void matsetvalues4_(Mat *AA,PetscInt *mm,PetscInt *im,PetscInt *nn,PetscInt *in,PetscScalar *v) 992 { 993 Mat A = *AA; 994 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 995 PetscInt *rp,k,low,high,t,ii,row,nrow,i,col,l,N,n = *nn,m = *mm; 996 PetscInt *ai=a->i,*ailen=a->ilen; 997 PetscInt *aj=a->j,brow,bcol; 998 PetscInt ridx,cidx,lastcol = -1; 999 MatScalar *ap,value,*aa=a->a,*bap; 1000 1001 PetscFunctionBegin; 1002 for (k=0; k<m; k++) { /* loop over added rows */ 1003 row = im[k]; brow = row/4; 1004 rp = aj + ai[brow]; 1005 ap = aa + 16*ai[brow]; 1006 nrow = ailen[brow]; 1007 low = 0; 1008 high = nrow; 1009 for (l=0; l<n; l++) { /* loop over added columns */ 1010 col = in[l]; bcol = col/4; 1011 ridx = row % 4; cidx = col % 4; 1012 value = v[l + k*n]; 1013 if (col <= lastcol) low = 0; 1014 else high = nrow; 1015 lastcol = col; 1016 while (high-low > 7) { 1017 t = (low+high)/2; 1018 if (rp[t] > bcol) high = t; 1019 else low = t; 1020 } 1021 for (i=low; i<high; i++) { 1022 if (rp[i] > bcol) break; 1023 if (rp[i] == bcol) { 1024 bap = ap + 16*i + 4*cidx + ridx; 1025 *bap += value; 1026 goto noinsert1; 1027 } 1028 } 1029 N = nrow++ - 1; 1030 high++; /* added new column thus must search to one higher than before */ 1031 /* shift up all the later entries in this row */ 1032 for (ii=N; ii>=i; ii--) { 1033 rp[ii+1] = rp[ii]; 1034 PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar)); 1035 } 1036 if (N>=i) { 1037 PetscMemzero(ap+16*i,16*sizeof(MatScalar)); 1038 } 1039 rp[i] = bcol; 1040 ap[16*i + 4*cidx + ridx] = value; 1041 noinsert1:; 1042 low = i; 1043 } 1044 ailen[brow] = nrow; 1045 } 1046 PetscFunctionReturnVoid(); 1047 } 1048 1049 /* 1050 Checks for missing diagonals 1051 */ 1052 PetscErrorCode MatMissingDiagonal_SeqBAIJ(Mat A,PetscBool *missing,PetscInt *d) 1053 { 1054 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1055 PetscErrorCode ierr; 1056 PetscInt *diag,*ii = a->i,i; 1057 1058 PetscFunctionBegin; 1059 ierr = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr); 1060 *missing = PETSC_FALSE; 1061 if (A->rmap->n > 0 && !ii) { 1062 *missing = PETSC_TRUE; 1063 if (d) *d = 0; 1064 PetscInfo(A,"Matrix has no entries therefore is missing diagonal\n"); 1065 } else { 1066 diag = a->diag; 1067 for (i=0; i<a->mbs; i++) { 1068 if (diag[i] >= ii[i+1]) { 1069 *missing = PETSC_TRUE; 1070 if (d) *d = i; 1071 PetscInfo1(A,"Matrix is missing block diagonal number %D\n",i); 1072 break; 1073 } 1074 } 1075 } 1076 PetscFunctionReturn(0); 1077 } 1078 1079 PetscErrorCode MatMarkDiagonal_SeqBAIJ(Mat A) 1080 { 1081 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1082 PetscErrorCode ierr; 1083 PetscInt i,j,m = a->mbs; 1084 1085 PetscFunctionBegin; 1086 if (!a->diag) { 1087 ierr = PetscMalloc1(m,&a->diag);CHKERRQ(ierr); 1088 ierr = PetscLogObjectMemory((PetscObject)A,m*sizeof(PetscInt));CHKERRQ(ierr); 1089 a->free_diag = PETSC_TRUE; 1090 } 1091 for (i=0; i<m; i++) { 1092 a->diag[i] = a->i[i+1]; 1093 for (j=a->i[i]; j<a->i[i+1]; j++) { 1094 if (a->j[j] == i) { 1095 a->diag[i] = j; 1096 break; 1097 } 1098 } 1099 } 1100 PetscFunctionReturn(0); 1101 } 1102 1103 1104 static PetscErrorCode MatGetRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool blockcompressed,PetscInt *nn,const PetscInt *inia[],const PetscInt *inja[],PetscBool *done) 1105 { 1106 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1107 PetscErrorCode ierr; 1108 PetscInt i,j,n = a->mbs,nz = a->i[n],*tia,*tja,bs = A->rmap->bs,k,l,cnt; 1109 PetscInt **ia = (PetscInt**)inia,**ja = (PetscInt**)inja; 1110 1111 PetscFunctionBegin; 1112 *nn = n; 1113 if (!ia) PetscFunctionReturn(0); 1114 if (symmetric) { 1115 ierr = MatToSymmetricIJ_SeqAIJ(n,a->i,a->j,PETSC_TRUE,0,0,&tia,&tja);CHKERRQ(ierr); 1116 nz = tia[n]; 1117 } else { 1118 tia = a->i; tja = a->j; 1119 } 1120 1121 if (!blockcompressed && bs > 1) { 1122 (*nn) *= bs; 1123 /* malloc & create the natural set of indices */ 1124 ierr = PetscMalloc1((n+1)*bs,ia);CHKERRQ(ierr); 1125 if (n) { 1126 (*ia)[0] = oshift; 1127 for (j=1; j<bs; j++) { 1128 (*ia)[j] = (tia[1]-tia[0])*bs+(*ia)[j-1]; 1129 } 1130 } 1131 1132 for (i=1; i<n; i++) { 1133 (*ia)[i*bs] = (tia[i]-tia[i-1])*bs + (*ia)[i*bs-1]; 1134 for (j=1; j<bs; j++) { 1135 (*ia)[i*bs+j] = (tia[i+1]-tia[i])*bs + (*ia)[i*bs+j-1]; 1136 } 1137 } 1138 if (n) { 1139 (*ia)[n*bs] = (tia[n]-tia[n-1])*bs + (*ia)[n*bs-1]; 1140 } 1141 1142 if (inja) { 1143 ierr = PetscMalloc1(nz*bs*bs,ja);CHKERRQ(ierr); 1144 cnt = 0; 1145 for (i=0; i<n; i++) { 1146 for (j=0; j<bs; j++) { 1147 for (k=tia[i]; k<tia[i+1]; k++) { 1148 for (l=0; l<bs; l++) { 1149 (*ja)[cnt++] = bs*tja[k] + l; 1150 } 1151 } 1152 } 1153 } 1154 } 1155 1156 if (symmetric) { /* deallocate memory allocated in MatToSymmetricIJ_SeqAIJ() */ 1157 ierr = PetscFree(tia);CHKERRQ(ierr); 1158 ierr = PetscFree(tja);CHKERRQ(ierr); 1159 } 1160 } else if (oshift == 1) { 1161 if (symmetric) { 1162 nz = tia[A->rmap->n/bs]; 1163 /* add 1 to i and j indices */ 1164 for (i=0; i<A->rmap->n/bs+1; i++) tia[i] = tia[i] + 1; 1165 *ia = tia; 1166 if (ja) { 1167 for (i=0; i<nz; i++) tja[i] = tja[i] + 1; 1168 *ja = tja; 1169 } 1170 } else { 1171 nz = a->i[A->rmap->n/bs]; 1172 /* malloc space and add 1 to i and j indices */ 1173 ierr = PetscMalloc1(A->rmap->n/bs+1,ia);CHKERRQ(ierr); 1174 for (i=0; i<A->rmap->n/bs+1; i++) (*ia)[i] = a->i[i] + 1; 1175 if (ja) { 1176 ierr = PetscMalloc1(nz,ja);CHKERRQ(ierr); 1177 for (i=0; i<nz; i++) (*ja)[i] = a->j[i] + 1; 1178 } 1179 } 1180 } else { 1181 *ia = tia; 1182 if (ja) *ja = tja; 1183 } 1184 PetscFunctionReturn(0); 1185 } 1186 1187 static PetscErrorCode MatRestoreRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool blockcompressed,PetscInt *nn,const PetscInt *ia[],const PetscInt *ja[],PetscBool *done) 1188 { 1189 PetscErrorCode ierr; 1190 1191 PetscFunctionBegin; 1192 if (!ia) PetscFunctionReturn(0); 1193 if ((!blockcompressed && A->rmap->bs > 1) || (symmetric || oshift == 1)) { 1194 ierr = PetscFree(*ia);CHKERRQ(ierr); 1195 if (ja) {ierr = PetscFree(*ja);CHKERRQ(ierr);} 1196 } 1197 PetscFunctionReturn(0); 1198 } 1199 1200 PetscErrorCode MatDestroy_SeqBAIJ(Mat A) 1201 { 1202 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1203 PetscErrorCode ierr; 1204 1205 PetscFunctionBegin; 1206 #if defined(PETSC_USE_LOG) 1207 PetscLogObjectState((PetscObject)A,"Rows=%D, Cols=%D, NZ=%D",A->rmap->N,A->cmap->n,a->nz); 1208 #endif 1209 ierr = MatSeqXAIJFreeAIJ(A,&a->a,&a->j,&a->i);CHKERRQ(ierr); 1210 ierr = ISDestroy(&a->row);CHKERRQ(ierr); 1211 ierr = ISDestroy(&a->col);CHKERRQ(ierr); 1212 if (a->free_diag) {ierr = PetscFree(a->diag);CHKERRQ(ierr);} 1213 ierr = PetscFree(a->idiag);CHKERRQ(ierr); 1214 if (a->free_imax_ilen) {ierr = PetscFree2(a->imax,a->ilen);CHKERRQ(ierr);} 1215 ierr = PetscFree(a->solve_work);CHKERRQ(ierr); 1216 ierr = PetscFree(a->mult_work);CHKERRQ(ierr); 1217 ierr = PetscFree(a->sor_workt);CHKERRQ(ierr); 1218 ierr = PetscFree(a->sor_work);CHKERRQ(ierr); 1219 ierr = ISDestroy(&a->icol);CHKERRQ(ierr); 1220 ierr = PetscFree(a->saved_values);CHKERRQ(ierr); 1221 ierr = PetscFree2(a->compressedrow.i,a->compressedrow.rindex);CHKERRQ(ierr); 1222 1223 ierr = MatDestroy(&a->sbaijMat);CHKERRQ(ierr); 1224 ierr = MatDestroy(&a->parent);CHKERRQ(ierr); 1225 ierr = PetscFree(A->data);CHKERRQ(ierr); 1226 1227 ierr = PetscObjectChangeTypeName((PetscObject)A,0);CHKERRQ(ierr); 1228 ierr = PetscObjectComposeFunction((PetscObject)A,"MatInvertBlockDiagonal_C",NULL);CHKERRQ(ierr); 1229 ierr = PetscObjectComposeFunction((PetscObject)A,"MatStoreValues_C",NULL);CHKERRQ(ierr); 1230 ierr = PetscObjectComposeFunction((PetscObject)A,"MatRetrieveValues_C",NULL);CHKERRQ(ierr); 1231 ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetColumnIndices_C",NULL);CHKERRQ(ierr); 1232 ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqaij_C",NULL);CHKERRQ(ierr); 1233 ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqsbaij_C",NULL);CHKERRQ(ierr); 1234 ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetPreallocation_C",NULL);CHKERRQ(ierr); 1235 ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetPreallocationCSR_C",NULL);CHKERRQ(ierr); 1236 ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqbstrm_C",NULL);CHKERRQ(ierr); 1237 ierr = PetscObjectComposeFunction((PetscObject)A,"MatIsTranspose_C",NULL);CHKERRQ(ierr); 1238 #if defined(PETSC_HAVE_HYPRE) 1239 ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_mpiaij_hypre_C",NULL);CHKERRQ(ierr); 1240 #endif 1241 PetscFunctionReturn(0); 1242 } 1243 1244 PetscErrorCode MatSetOption_SeqBAIJ(Mat A,MatOption op,PetscBool flg) 1245 { 1246 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1247 PetscErrorCode ierr; 1248 1249 PetscFunctionBegin; 1250 switch (op) { 1251 case MAT_ROW_ORIENTED: 1252 a->roworiented = flg; 1253 break; 1254 case MAT_KEEP_NONZERO_PATTERN: 1255 a->keepnonzeropattern = flg; 1256 break; 1257 case MAT_NEW_NONZERO_LOCATIONS: 1258 a->nonew = (flg ? 0 : 1); 1259 break; 1260 case MAT_NEW_NONZERO_LOCATION_ERR: 1261 a->nonew = (flg ? -1 : 0); 1262 break; 1263 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1264 a->nonew = (flg ? -2 : 0); 1265 break; 1266 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1267 a->nounused = (flg ? -1 : 0); 1268 break; 1269 case MAT_NEW_DIAGONALS: 1270 case MAT_IGNORE_OFF_PROC_ENTRIES: 1271 case MAT_USE_HASH_TABLE: 1272 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1273 break; 1274 case MAT_SPD: 1275 case MAT_SYMMETRIC: 1276 case MAT_STRUCTURALLY_SYMMETRIC: 1277 case MAT_HERMITIAN: 1278 case MAT_SYMMETRY_ETERNAL: 1279 case MAT_SUBMAT_SINGLEIS: 1280 case MAT_STRUCTURE_ONLY: 1281 /* These options are handled directly by MatSetOption() */ 1282 break; 1283 default: 1284 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op); 1285 } 1286 PetscFunctionReturn(0); 1287 } 1288 1289 /* used for both SeqBAIJ and SeqSBAIJ matrices */ 1290 PetscErrorCode MatGetRow_SeqBAIJ_private(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v,PetscInt *ai,PetscInt *aj,PetscScalar *aa) 1291 { 1292 PetscErrorCode ierr; 1293 PetscInt itmp,i,j,k,M,bn,bp,*idx_i,bs,bs2; 1294 MatScalar *aa_i; 1295 PetscScalar *v_i; 1296 1297 PetscFunctionBegin; 1298 bs = A->rmap->bs; 1299 bs2 = bs*bs; 1300 if (row < 0 || row >= A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range", row); 1301 1302 bn = row/bs; /* Block number */ 1303 bp = row % bs; /* Block Position */ 1304 M = ai[bn+1] - ai[bn]; 1305 *nz = bs*M; 1306 1307 if (v) { 1308 *v = 0; 1309 if (*nz) { 1310 ierr = PetscMalloc1(*nz,v);CHKERRQ(ierr); 1311 for (i=0; i<M; i++) { /* for each block in the block row */ 1312 v_i = *v + i*bs; 1313 aa_i = aa + bs2*(ai[bn] + i); 1314 for (j=bp,k=0; j<bs2; j+=bs,k++) v_i[k] = aa_i[j]; 1315 } 1316 } 1317 } 1318 1319 if (idx) { 1320 *idx = 0; 1321 if (*nz) { 1322 ierr = PetscMalloc1(*nz,idx);CHKERRQ(ierr); 1323 for (i=0; i<M; i++) { /* for each block in the block row */ 1324 idx_i = *idx + i*bs; 1325 itmp = bs*aj[ai[bn] + i]; 1326 for (j=0; j<bs; j++) idx_i[j] = itmp++; 1327 } 1328 } 1329 } 1330 PetscFunctionReturn(0); 1331 } 1332 1333 PetscErrorCode MatGetRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1334 { 1335 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1336 PetscErrorCode ierr; 1337 1338 PetscFunctionBegin; 1339 ierr = MatGetRow_SeqBAIJ_private(A,row,nz,idx,v,a->i,a->j,a->a);CHKERRQ(ierr); 1340 PetscFunctionReturn(0); 1341 } 1342 1343 PetscErrorCode MatRestoreRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1344 { 1345 PetscErrorCode ierr; 1346 1347 PetscFunctionBegin; 1348 if (idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);} 1349 if (v) {ierr = PetscFree(*v);CHKERRQ(ierr);} 1350 PetscFunctionReturn(0); 1351 } 1352 1353 PetscErrorCode MatTranspose_SeqBAIJ(Mat A,MatReuse reuse,Mat *B) 1354 { 1355 Mat_SeqBAIJ *a=(Mat_SeqBAIJ*)A->data; 1356 Mat C; 1357 PetscErrorCode ierr; 1358 PetscInt i,j,k,*aj=a->j,*ai=a->i,bs=A->rmap->bs,mbs=a->mbs,nbs=a->nbs,len,*col; 1359 PetscInt *rows,*cols,bs2=a->bs2; 1360 MatScalar *array; 1361 1362 PetscFunctionBegin; 1363 if (reuse == MAT_INPLACE_MATRIX && mbs != nbs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Square matrix only for in-place"); 1364 if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_INPLACE_MATRIX) { 1365 ierr = PetscCalloc1(1+nbs,&col);CHKERRQ(ierr); 1366 1367 for (i=0; i<ai[mbs]; i++) col[aj[i]] += 1; 1368 ierr = MatCreate(PetscObjectComm((PetscObject)A),&C);CHKERRQ(ierr); 1369 ierr = MatSetSizes(C,A->cmap->n,A->rmap->N,A->cmap->n,A->rmap->N);CHKERRQ(ierr); 1370 ierr = MatSetType(C,((PetscObject)A)->type_name);CHKERRQ(ierr); 1371 ierr = MatSeqBAIJSetPreallocation(C,bs,0,col);CHKERRQ(ierr); 1372 ierr = PetscFree(col);CHKERRQ(ierr); 1373 } else { 1374 C = *B; 1375 } 1376 1377 array = a->a; 1378 ierr = PetscMalloc2(bs,&rows,bs,&cols);CHKERRQ(ierr); 1379 for (i=0; i<mbs; i++) { 1380 cols[0] = i*bs; 1381 for (k=1; k<bs; k++) cols[k] = cols[k-1] + 1; 1382 len = ai[i+1] - ai[i]; 1383 for (j=0; j<len; j++) { 1384 rows[0] = (*aj++)*bs; 1385 for (k=1; k<bs; k++) rows[k] = rows[k-1] + 1; 1386 ierr = MatSetValues_SeqBAIJ(C,bs,rows,bs,cols,array,INSERT_VALUES);CHKERRQ(ierr); 1387 array += bs2; 1388 } 1389 } 1390 ierr = PetscFree2(rows,cols);CHKERRQ(ierr); 1391 1392 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1393 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1394 1395 if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) { 1396 *B = C; 1397 } else { 1398 ierr = MatHeaderMerge(A,&C);CHKERRQ(ierr); 1399 } 1400 PetscFunctionReturn(0); 1401 } 1402 1403 PetscErrorCode MatIsTranspose_SeqBAIJ(Mat A,Mat B,PetscReal tol,PetscBool *f) 1404 { 1405 PetscErrorCode ierr; 1406 Mat Btrans; 1407 1408 PetscFunctionBegin; 1409 *f = PETSC_FALSE; 1410 ierr = MatTranspose_SeqBAIJ(A,MAT_INITIAL_MATRIX,&Btrans);CHKERRQ(ierr); 1411 ierr = MatEqual_SeqBAIJ(B,Btrans,f);CHKERRQ(ierr); 1412 ierr = MatDestroy(&Btrans);CHKERRQ(ierr); 1413 PetscFunctionReturn(0); 1414 } 1415 1416 static PetscErrorCode MatView_SeqBAIJ_Binary(Mat A,PetscViewer viewer) 1417 { 1418 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1419 PetscErrorCode ierr; 1420 PetscInt i,*col_lens,bs = A->rmap->bs,count,*jj,j,k,l,bs2=a->bs2; 1421 int fd; 1422 PetscScalar *aa; 1423 FILE *file; 1424 1425 PetscFunctionBegin; 1426 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1427 ierr = PetscMalloc1(4+A->rmap->N,&col_lens);CHKERRQ(ierr); 1428 col_lens[0] = MAT_FILE_CLASSID; 1429 1430 col_lens[1] = A->rmap->N; 1431 col_lens[2] = A->cmap->n; 1432 col_lens[3] = a->nz*bs2; 1433 1434 /* store lengths of each row and write (including header) to file */ 1435 count = 0; 1436 for (i=0; i<a->mbs; i++) { 1437 for (j=0; j<bs; j++) { 1438 col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]); 1439 } 1440 } 1441 ierr = PetscBinaryWrite(fd,col_lens,4+A->rmap->N,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1442 ierr = PetscFree(col_lens);CHKERRQ(ierr); 1443 1444 /* store column indices (zero start index) */ 1445 ierr = PetscMalloc1((a->nz+1)*bs2,&jj);CHKERRQ(ierr); 1446 count = 0; 1447 for (i=0; i<a->mbs; i++) { 1448 for (j=0; j<bs; j++) { 1449 for (k=a->i[i]; k<a->i[i+1]; k++) { 1450 for (l=0; l<bs; l++) { 1451 jj[count++] = bs*a->j[k] + l; 1452 } 1453 } 1454 } 1455 } 1456 ierr = PetscBinaryWrite(fd,jj,bs2*a->nz,PETSC_INT,PETSC_FALSE);CHKERRQ(ierr); 1457 ierr = PetscFree(jj);CHKERRQ(ierr); 1458 1459 /* store nonzero values */ 1460 ierr = PetscMalloc1((a->nz+1)*bs2,&aa);CHKERRQ(ierr); 1461 count = 0; 1462 for (i=0; i<a->mbs; i++) { 1463 for (j=0; j<bs; j++) { 1464 for (k=a->i[i]; k<a->i[i+1]; k++) { 1465 for (l=0; l<bs; l++) { 1466 aa[count++] = a->a[bs2*k + l*bs + j]; 1467 } 1468 } 1469 } 1470 } 1471 ierr = PetscBinaryWrite(fd,aa,bs2*a->nz,PETSC_SCALAR,PETSC_FALSE);CHKERRQ(ierr); 1472 ierr = PetscFree(aa);CHKERRQ(ierr); 1473 1474 ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr); 1475 if (file) { 1476 fprintf(file,"-matload_block_size %d\n",(int)A->rmap->bs); 1477 } 1478 PetscFunctionReturn(0); 1479 } 1480 1481 static PetscErrorCode MatView_SeqBAIJ_ASCII(Mat A,PetscViewer viewer) 1482 { 1483 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1484 PetscErrorCode ierr; 1485 PetscInt i,j,bs = A->rmap->bs,k,l,bs2=a->bs2; 1486 PetscViewerFormat format; 1487 1488 PetscFunctionBegin; 1489 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1490 if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1491 ierr = PetscViewerASCIIPrintf(viewer," block size is %D\n",bs);CHKERRQ(ierr); 1492 } else if (format == PETSC_VIEWER_ASCII_MATLAB) { 1493 const char *matname; 1494 Mat aij; 1495 ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&aij);CHKERRQ(ierr); 1496 ierr = PetscObjectGetName((PetscObject)A,&matname);CHKERRQ(ierr); 1497 ierr = PetscObjectSetName((PetscObject)aij,matname);CHKERRQ(ierr); 1498 ierr = MatView(aij,viewer);CHKERRQ(ierr); 1499 ierr = MatDestroy(&aij);CHKERRQ(ierr); 1500 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1501 PetscFunctionReturn(0); 1502 } else if (format == PETSC_VIEWER_ASCII_COMMON) { 1503 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);CHKERRQ(ierr); 1504 for (i=0; i<a->mbs; i++) { 1505 for (j=0; j<bs; j++) { 1506 ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr); 1507 for (k=a->i[i]; k<a->i[i+1]; k++) { 1508 for (l=0; l<bs; l++) { 1509 #if defined(PETSC_USE_COMPLEX) 1510 if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 1511 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g + %gi) ",bs*a->j[k]+l, 1512 (double)PetscRealPart(a->a[bs2*k + l*bs + j]),(double)PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1513 } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 1514 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g - %gi) ",bs*a->j[k]+l, 1515 (double)PetscRealPart(a->a[bs2*k + l*bs + j]),-(double)PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1516 } else if (PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 1517 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,(double)PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1518 } 1519 #else 1520 if (a->a[bs2*k + l*bs + j] != 0.0) { 1521 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,(double)a->a[bs2*k + l*bs + j]);CHKERRQ(ierr); 1522 } 1523 #endif 1524 } 1525 } 1526 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 1527 } 1528 } 1529 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);CHKERRQ(ierr); 1530 } else { 1531 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);CHKERRQ(ierr); 1532 for (i=0; i<a->mbs; i++) { 1533 for (j=0; j<bs; j++) { 1534 ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr); 1535 for (k=a->i[i]; k<a->i[i+1]; k++) { 1536 for (l=0; l<bs; l++) { 1537 #if defined(PETSC_USE_COMPLEX) 1538 if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0) { 1539 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g + %g i) ",bs*a->j[k]+l, 1540 (double)PetscRealPart(a->a[bs2*k + l*bs + j]),(double)PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1541 } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0) { 1542 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g - %g i) ",bs*a->j[k]+l, 1543 (double)PetscRealPart(a->a[bs2*k + l*bs + j]),-(double)PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1544 } else { 1545 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,(double)PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1546 } 1547 #else 1548 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,(double)a->a[bs2*k + l*bs + j]);CHKERRQ(ierr); 1549 #endif 1550 } 1551 } 1552 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 1553 } 1554 } 1555 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);CHKERRQ(ierr); 1556 } 1557 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1558 PetscFunctionReturn(0); 1559 } 1560 1561 #include <petscdraw.h> 1562 static PetscErrorCode MatView_SeqBAIJ_Draw_Zoom(PetscDraw draw,void *Aa) 1563 { 1564 Mat A = (Mat) Aa; 1565 Mat_SeqBAIJ *a=(Mat_SeqBAIJ*)A->data; 1566 PetscErrorCode ierr; 1567 PetscInt row,i,j,k,l,mbs=a->mbs,color,bs=A->rmap->bs,bs2=a->bs2; 1568 PetscReal xl,yl,xr,yr,x_l,x_r,y_l,y_r; 1569 MatScalar *aa; 1570 PetscViewer viewer; 1571 PetscViewerFormat format; 1572 1573 PetscFunctionBegin; 1574 ierr = PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer);CHKERRQ(ierr); 1575 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1576 ierr = PetscDrawGetCoordinates(draw,&xl,&yl,&xr,&yr);CHKERRQ(ierr); 1577 1578 /* loop over matrix elements drawing boxes */ 1579 1580 if (format != PETSC_VIEWER_DRAW_CONTOUR) { 1581 ierr = PetscDrawCollectiveBegin(draw);CHKERRQ(ierr); 1582 /* Blue for negative, Cyan for zero and Red for positive */ 1583 color = PETSC_DRAW_BLUE; 1584 for (i=0,row=0; i<mbs; i++,row+=bs) { 1585 for (j=a->i[i]; j<a->i[i+1]; j++) { 1586 y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0; 1587 x_l = a->j[j]*bs; x_r = x_l + 1.0; 1588 aa = a->a + j*bs2; 1589 for (k=0; k<bs; k++) { 1590 for (l=0; l<bs; l++) { 1591 if (PetscRealPart(*aa++) >= 0.) continue; 1592 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 1593 } 1594 } 1595 } 1596 } 1597 color = PETSC_DRAW_CYAN; 1598 for (i=0,row=0; i<mbs; i++,row+=bs) { 1599 for (j=a->i[i]; j<a->i[i+1]; j++) { 1600 y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0; 1601 x_l = a->j[j]*bs; x_r = x_l + 1.0; 1602 aa = a->a + j*bs2; 1603 for (k=0; k<bs; k++) { 1604 for (l=0; l<bs; l++) { 1605 if (PetscRealPart(*aa++) != 0.) continue; 1606 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 1607 } 1608 } 1609 } 1610 } 1611 color = PETSC_DRAW_RED; 1612 for (i=0,row=0; i<mbs; i++,row+=bs) { 1613 for (j=a->i[i]; j<a->i[i+1]; j++) { 1614 y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0; 1615 x_l = a->j[j]*bs; x_r = x_l + 1.0; 1616 aa = a->a + j*bs2; 1617 for (k=0; k<bs; k++) { 1618 for (l=0; l<bs; l++) { 1619 if (PetscRealPart(*aa++) <= 0.) continue; 1620 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 1621 } 1622 } 1623 } 1624 } 1625 ierr = PetscDrawCollectiveEnd(draw);CHKERRQ(ierr); 1626 } else { 1627 /* use contour shading to indicate magnitude of values */ 1628 /* first determine max of all nonzero values */ 1629 PetscReal minv = 0.0, maxv = 0.0; 1630 PetscDraw popup; 1631 1632 for (i=0; i<a->nz*a->bs2; i++) { 1633 if (PetscAbsScalar(a->a[i]) > maxv) maxv = PetscAbsScalar(a->a[i]); 1634 } 1635 if (minv >= maxv) maxv = minv + PETSC_SMALL; 1636 ierr = PetscDrawGetPopup(draw,&popup);CHKERRQ(ierr); 1637 ierr = PetscDrawScalePopup(popup,0.0,maxv);CHKERRQ(ierr); 1638 1639 ierr = PetscDrawCollectiveBegin(draw);CHKERRQ(ierr); 1640 for (i=0,row=0; i<mbs; i++,row+=bs) { 1641 for (j=a->i[i]; j<a->i[i+1]; j++) { 1642 y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0; 1643 x_l = a->j[j]*bs; x_r = x_l + 1.0; 1644 aa = a->a + j*bs2; 1645 for (k=0; k<bs; k++) { 1646 for (l=0; l<bs; l++) { 1647 MatScalar v = *aa++; 1648 color = PetscDrawRealToColor(PetscAbsScalar(v),minv,maxv); 1649 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 1650 } 1651 } 1652 } 1653 } 1654 ierr = PetscDrawCollectiveEnd(draw);CHKERRQ(ierr); 1655 } 1656 PetscFunctionReturn(0); 1657 } 1658 1659 static PetscErrorCode MatView_SeqBAIJ_Draw(Mat A,PetscViewer viewer) 1660 { 1661 PetscErrorCode ierr; 1662 PetscReal xl,yl,xr,yr,w,h; 1663 PetscDraw draw; 1664 PetscBool isnull; 1665 1666 PetscFunctionBegin; 1667 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 1668 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); 1669 if (isnull) PetscFunctionReturn(0); 1670 1671 xr = A->cmap->n; yr = A->rmap->N; h = yr/10.0; w = xr/10.0; 1672 xr += w; yr += h; xl = -w; yl = -h; 1673 ierr = PetscDrawSetCoordinates(draw,xl,yl,xr,yr);CHKERRQ(ierr); 1674 ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);CHKERRQ(ierr); 1675 ierr = PetscDrawZoom(draw,MatView_SeqBAIJ_Draw_Zoom,A);CHKERRQ(ierr); 1676 ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",NULL);CHKERRQ(ierr); 1677 ierr = PetscDrawSave(draw);CHKERRQ(ierr); 1678 PetscFunctionReturn(0); 1679 } 1680 1681 PetscErrorCode MatView_SeqBAIJ(Mat A,PetscViewer viewer) 1682 { 1683 PetscErrorCode ierr; 1684 PetscBool iascii,isbinary,isdraw; 1685 1686 PetscFunctionBegin; 1687 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1688 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1689 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1690 if (iascii) { 1691 ierr = MatView_SeqBAIJ_ASCII(A,viewer);CHKERRQ(ierr); 1692 } else if (isbinary) { 1693 ierr = MatView_SeqBAIJ_Binary(A,viewer);CHKERRQ(ierr); 1694 } else if (isdraw) { 1695 ierr = MatView_SeqBAIJ_Draw(A,viewer);CHKERRQ(ierr); 1696 } else { 1697 Mat B; 1698 ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr); 1699 ierr = MatView(B,viewer);CHKERRQ(ierr); 1700 ierr = MatDestroy(&B);CHKERRQ(ierr); 1701 } 1702 PetscFunctionReturn(0); 1703 } 1704 1705 1706 PetscErrorCode MatGetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],PetscScalar v[]) 1707 { 1708 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1709 PetscInt *rp,k,low,high,t,row,nrow,i,col,l,*aj = a->j; 1710 PetscInt *ai = a->i,*ailen = a->ilen; 1711 PetscInt brow,bcol,ridx,cidx,bs=A->rmap->bs,bs2=a->bs2; 1712 MatScalar *ap,*aa = a->a; 1713 1714 PetscFunctionBegin; 1715 for (k=0; k<m; k++) { /* loop over rows */ 1716 row = im[k]; brow = row/bs; 1717 if (row < 0) {v += n; continue;} /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); */ 1718 if (row >= A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D too large", row); 1719 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; 1720 nrow = ailen[brow]; 1721 for (l=0; l<n; l++) { /* loop over columns */ 1722 if (in[l] < 0) {v++; continue;} /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); */ 1723 if (in[l] >= A->cmap->n) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column %D too large", in[l]); 1724 col = in[l]; 1725 bcol = col/bs; 1726 cidx = col%bs; 1727 ridx = row%bs; 1728 high = nrow; 1729 low = 0; /* assume unsorted */ 1730 while (high-low > 5) { 1731 t = (low+high)/2; 1732 if (rp[t] > bcol) high = t; 1733 else low = t; 1734 } 1735 for (i=low; i<high; i++) { 1736 if (rp[i] > bcol) break; 1737 if (rp[i] == bcol) { 1738 *v++ = ap[bs2*i+bs*cidx+ridx]; 1739 goto finished; 1740 } 1741 } 1742 *v++ = 0.0; 1743 finished:; 1744 } 1745 } 1746 PetscFunctionReturn(0); 1747 } 1748 1749 PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is) 1750 { 1751 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1752 PetscInt *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,lastcol = -1; 1753 PetscInt *imax=a->imax,*ai=a->i,*ailen=a->ilen; 1754 PetscErrorCode ierr; 1755 PetscInt *aj =a->j,nonew=a->nonew,bs2=a->bs2,bs=A->rmap->bs,stepval; 1756 PetscBool roworiented=a->roworiented; 1757 const PetscScalar *value = v; 1758 MatScalar *ap,*aa = a->a,*bap; 1759 1760 PetscFunctionBegin; 1761 if (roworiented) { 1762 stepval = (n-1)*bs; 1763 } else { 1764 stepval = (m-1)*bs; 1765 } 1766 for (k=0; k<m; k++) { /* loop over added rows */ 1767 row = im[k]; 1768 if (row < 0) continue; 1769 #if defined(PETSC_USE_DEBUG) 1770 if (row >= a->mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Block row index too large %D max %D",row,a->mbs-1); 1771 #endif 1772 rp = aj + ai[row]; 1773 ap = aa + bs2*ai[row]; 1774 rmax = imax[row]; 1775 nrow = ailen[row]; 1776 low = 0; 1777 high = nrow; 1778 for (l=0; l<n; l++) { /* loop over added columns */ 1779 if (in[l] < 0) continue; 1780 #if defined(PETSC_USE_DEBUG) 1781 if (in[l] >= a->nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Block column index too large %D max %D",in[l],a->nbs-1); 1782 #endif 1783 col = in[l]; 1784 if (roworiented) { 1785 value = v + (k*(stepval+bs) + l)*bs; 1786 } else { 1787 value = v + (l*(stepval+bs) + k)*bs; 1788 } 1789 if (col <= lastcol) low = 0; 1790 else high = nrow; 1791 lastcol = col; 1792 while (high-low > 7) { 1793 t = (low+high)/2; 1794 if (rp[t] > col) high = t; 1795 else low = t; 1796 } 1797 for (i=low; i<high; i++) { 1798 if (rp[i] > col) break; 1799 if (rp[i] == col) { 1800 bap = ap + bs2*i; 1801 if (roworiented) { 1802 if (is == ADD_VALUES) { 1803 for (ii=0; ii<bs; ii++,value+=stepval) { 1804 for (jj=ii; jj<bs2; jj+=bs) { 1805 bap[jj] += *value++; 1806 } 1807 } 1808 } else { 1809 for (ii=0; ii<bs; ii++,value+=stepval) { 1810 for (jj=ii; jj<bs2; jj+=bs) { 1811 bap[jj] = *value++; 1812 } 1813 } 1814 } 1815 } else { 1816 if (is == ADD_VALUES) { 1817 for (ii=0; ii<bs; ii++,value+=bs+stepval) { 1818 for (jj=0; jj<bs; jj++) { 1819 bap[jj] += value[jj]; 1820 } 1821 bap += bs; 1822 } 1823 } else { 1824 for (ii=0; ii<bs; ii++,value+=bs+stepval) { 1825 for (jj=0; jj<bs; jj++) { 1826 bap[jj] = value[jj]; 1827 } 1828 bap += bs; 1829 } 1830 } 1831 } 1832 goto noinsert2; 1833 } 1834 } 1835 if (nonew == 1) goto noinsert2; 1836 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new blocked index new nonzero block (%D, %D) in the matrix", row, col); 1837 MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,row,col,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar); 1838 N = nrow++ - 1; high++; 1839 /* shift up all the later entries in this row */ 1840 for (ii=N; ii>=i; ii--) { 1841 rp[ii+1] = rp[ii]; 1842 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); 1843 } 1844 if (N >= i) { 1845 ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr); 1846 } 1847 rp[i] = col; 1848 bap = ap + bs2*i; 1849 if (roworiented) { 1850 for (ii=0; ii<bs; ii++,value+=stepval) { 1851 for (jj=ii; jj<bs2; jj+=bs) { 1852 bap[jj] = *value++; 1853 } 1854 } 1855 } else { 1856 for (ii=0; ii<bs; ii++,value+=stepval) { 1857 for (jj=0; jj<bs; jj++) { 1858 *bap++ = *value++; 1859 } 1860 } 1861 } 1862 noinsert2:; 1863 low = i; 1864 } 1865 ailen[row] = nrow; 1866 } 1867 PetscFunctionReturn(0); 1868 } 1869 1870 PetscErrorCode MatAssemblyEnd_SeqBAIJ(Mat A,MatAssemblyType mode) 1871 { 1872 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1873 PetscInt fshift = 0,i,j,*ai = a->i,*aj = a->j,*imax = a->imax; 1874 PetscInt m = A->rmap->N,*ip,N,*ailen = a->ilen; 1875 PetscErrorCode ierr; 1876 PetscInt mbs = a->mbs,bs2 = a->bs2,rmax = 0; 1877 MatScalar *aa = a->a,*ap; 1878 PetscReal ratio=0.6; 1879 1880 PetscFunctionBegin; 1881 if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0); 1882 1883 if (m) rmax = ailen[0]; 1884 for (i=1; i<mbs; i++) { 1885 /* move each row back by the amount of empty slots (fshift) before it*/ 1886 fshift += imax[i-1] - ailen[i-1]; 1887 rmax = PetscMax(rmax,ailen[i]); 1888 if (fshift) { 1889 ip = aj + ai[i]; ap = aa + bs2*ai[i]; 1890 N = ailen[i]; 1891 for (j=0; j<N; j++) { 1892 ip[j-fshift] = ip[j]; 1893 if (!A->structure_only) { 1894 ierr = PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr); 1895 } 1896 } 1897 } 1898 ai[i] = ai[i-1] + ailen[i-1]; 1899 } 1900 if (mbs) { 1901 fshift += imax[mbs-1] - ailen[mbs-1]; 1902 ai[mbs] = ai[mbs-1] + ailen[mbs-1]; 1903 } 1904 1905 /* reset ilen and imax for each row */ 1906 a->nonzerorowcnt = 0; 1907 if (A->structure_only) { 1908 ierr = PetscFree2(a->imax,a->ilen);CHKERRQ(ierr); 1909 } else { /* !A->structure_only */ 1910 for (i=0; i<mbs; i++) { 1911 ailen[i] = imax[i] = ai[i+1] - ai[i]; 1912 a->nonzerorowcnt += ((ai[i+1] - ai[i]) > 0); 1913 } 1914 } 1915 a->nz = ai[mbs]; 1916 1917 /* diagonals may have moved, so kill the diagonal pointers */ 1918 a->idiagvalid = PETSC_FALSE; 1919 if (fshift && a->diag) { 1920 ierr = PetscFree(a->diag);CHKERRQ(ierr); 1921 ierr = PetscLogObjectMemory((PetscObject)A,-(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 1922 a->diag = 0; 1923 } 1924 if (fshift && a->nounused == -1) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB, "Unused space detected in matrix: %D X %D block size %D, %D unneeded", m, A->cmap->n, A->rmap->bs, fshift*bs2); 1925 ierr = PetscInfo5(A,"Matrix size: %D X %D, block size %D; storage space: %D unneeded, %D used\n",m,A->cmap->n,A->rmap->bs,fshift*bs2,a->nz*bs2);CHKERRQ(ierr); 1926 ierr = PetscInfo1(A,"Number of mallocs during MatSetValues is %D\n",a->reallocs);CHKERRQ(ierr); 1927 ierr = PetscInfo1(A,"Most nonzeros blocks in any row is %D\n",rmax);CHKERRQ(ierr); 1928 1929 A->info.mallocs += a->reallocs; 1930 a->reallocs = 0; 1931 A->info.nz_unneeded = (PetscReal)fshift*bs2; 1932 a->rmax = rmax; 1933 1934 if (!A->structure_only) { 1935 ierr = MatCheckCompressedRow(A,a->nonzerorowcnt,&a->compressedrow,a->i,mbs,ratio);CHKERRQ(ierr); 1936 } 1937 PetscFunctionReturn(0); 1938 } 1939 1940 /* 1941 This function returns an array of flags which indicate the locations of contiguous 1942 blocks that should be zeroed. for eg: if bs = 3 and is = [0,1,2,3,5,6,7,8,9] 1943 then the resulting sizes = [3,1,1,3,1] correspondig to sets [(0,1,2),(3),(5),(6,7,8),(9)] 1944 Assume: sizes should be long enough to hold all the values. 1945 */ 1946 static PetscErrorCode MatZeroRows_SeqBAIJ_Check_Blocks(PetscInt idx[],PetscInt n,PetscInt bs,PetscInt sizes[], PetscInt *bs_max) 1947 { 1948 PetscInt i,j,k,row; 1949 PetscBool flg; 1950 1951 PetscFunctionBegin; 1952 for (i=0,j=0; i<n; j++) { 1953 row = idx[i]; 1954 if (row%bs!=0) { /* Not the begining of a block */ 1955 sizes[j] = 1; 1956 i++; 1957 } else if (i+bs > n) { /* complete block doesn't exist (at idx end) */ 1958 sizes[j] = 1; /* Also makes sure atleast 'bs' values exist for next else */ 1959 i++; 1960 } else { /* Begining of the block, so check if the complete block exists */ 1961 flg = PETSC_TRUE; 1962 for (k=1; k<bs; k++) { 1963 if (row+k != idx[i+k]) { /* break in the block */ 1964 flg = PETSC_FALSE; 1965 break; 1966 } 1967 } 1968 if (flg) { /* No break in the bs */ 1969 sizes[j] = bs; 1970 i += bs; 1971 } else { 1972 sizes[j] = 1; 1973 i++; 1974 } 1975 } 1976 } 1977 *bs_max = j; 1978 PetscFunctionReturn(0); 1979 } 1980 1981 PetscErrorCode MatZeroRows_SeqBAIJ(Mat A,PetscInt is_n,const PetscInt is_idx[],PetscScalar diag,Vec x, Vec b) 1982 { 1983 Mat_SeqBAIJ *baij=(Mat_SeqBAIJ*)A->data; 1984 PetscErrorCode ierr; 1985 PetscInt i,j,k,count,*rows; 1986 PetscInt bs=A->rmap->bs,bs2=baij->bs2,*sizes,row,bs_max; 1987 PetscScalar zero = 0.0; 1988 MatScalar *aa; 1989 const PetscScalar *xx; 1990 PetscScalar *bb; 1991 1992 PetscFunctionBegin; 1993 /* fix right hand side if needed */ 1994 if (x && b) { 1995 ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr); 1996 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 1997 for (i=0; i<is_n; i++) { 1998 bb[is_idx[i]] = diag*xx[is_idx[i]]; 1999 } 2000 ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr); 2001 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 2002 } 2003 2004 /* Make a copy of the IS and sort it */ 2005 /* allocate memory for rows,sizes */ 2006 ierr = PetscMalloc2(is_n,&rows,2*is_n,&sizes);CHKERRQ(ierr); 2007 2008 /* copy IS values to rows, and sort them */ 2009 for (i=0; i<is_n; i++) rows[i] = is_idx[i]; 2010 ierr = PetscSortInt(is_n,rows);CHKERRQ(ierr); 2011 2012 if (baij->keepnonzeropattern) { 2013 for (i=0; i<is_n; i++) sizes[i] = 1; 2014 bs_max = is_n; 2015 } else { 2016 ierr = MatZeroRows_SeqBAIJ_Check_Blocks(rows,is_n,bs,sizes,&bs_max);CHKERRQ(ierr); 2017 A->nonzerostate++; 2018 } 2019 2020 for (i=0,j=0; i<bs_max; j+=sizes[i],i++) { 2021 row = rows[j]; 2022 if (row < 0 || row > A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"row %D out of range",row); 2023 count = (baij->i[row/bs +1] - baij->i[row/bs])*bs; 2024 aa = ((MatScalar*)(baij->a)) + baij->i[row/bs]*bs2 + (row%bs); 2025 if (sizes[i] == bs && !baij->keepnonzeropattern) { 2026 if (diag != (PetscScalar)0.0) { 2027 if (baij->ilen[row/bs] > 0) { 2028 baij->ilen[row/bs] = 1; 2029 baij->j[baij->i[row/bs]] = row/bs; 2030 2031 ierr = PetscMemzero(aa,count*bs*sizeof(MatScalar));CHKERRQ(ierr); 2032 } 2033 /* Now insert all the diagonal values for this bs */ 2034 for (k=0; k<bs; k++) { 2035 ierr = (*A->ops->setvalues)(A,1,rows+j+k,1,rows+j+k,&diag,INSERT_VALUES);CHKERRQ(ierr); 2036 } 2037 } else { /* (diag == 0.0) */ 2038 baij->ilen[row/bs] = 0; 2039 } /* end (diag == 0.0) */ 2040 } else { /* (sizes[i] != bs) */ 2041 #if defined(PETSC_USE_DEBUG) 2042 if (sizes[i] != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal Error. Value should be 1"); 2043 #endif 2044 for (k=0; k<count; k++) { 2045 aa[0] = zero; 2046 aa += bs; 2047 } 2048 if (diag != (PetscScalar)0.0) { 2049 ierr = (*A->ops->setvalues)(A,1,rows+j,1,rows+j,&diag,INSERT_VALUES);CHKERRQ(ierr); 2050 } 2051 } 2052 } 2053 2054 ierr = PetscFree2(rows,sizes);CHKERRQ(ierr); 2055 ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2056 PetscFunctionReturn(0); 2057 } 2058 2059 PetscErrorCode MatZeroRowsColumns_SeqBAIJ(Mat A,PetscInt is_n,const PetscInt is_idx[],PetscScalar diag,Vec x, Vec b) 2060 { 2061 Mat_SeqBAIJ *baij=(Mat_SeqBAIJ*)A->data; 2062 PetscErrorCode ierr; 2063 PetscInt i,j,k,count; 2064 PetscInt bs =A->rmap->bs,bs2=baij->bs2,row,col; 2065 PetscScalar zero = 0.0; 2066 MatScalar *aa; 2067 const PetscScalar *xx; 2068 PetscScalar *bb; 2069 PetscBool *zeroed,vecs = PETSC_FALSE; 2070 2071 PetscFunctionBegin; 2072 /* fix right hand side if needed */ 2073 if (x && b) { 2074 ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr); 2075 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 2076 vecs = PETSC_TRUE; 2077 } 2078 2079 /* zero the columns */ 2080 ierr = PetscCalloc1(A->rmap->n,&zeroed);CHKERRQ(ierr); 2081 for (i=0; i<is_n; i++) { 2082 if (is_idx[i] < 0 || is_idx[i] >= A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"row %D out of range",is_idx[i]); 2083 zeroed[is_idx[i]] = PETSC_TRUE; 2084 } 2085 for (i=0; i<A->rmap->N; i++) { 2086 if (!zeroed[i]) { 2087 row = i/bs; 2088 for (j=baij->i[row]; j<baij->i[row+1]; j++) { 2089 for (k=0; k<bs; k++) { 2090 col = bs*baij->j[j] + k; 2091 if (zeroed[col]) { 2092 aa = ((MatScalar*)(baij->a)) + j*bs2 + (i%bs) + bs*k; 2093 if (vecs) bb[i] -= aa[0]*xx[col]; 2094 aa[0] = 0.0; 2095 } 2096 } 2097 } 2098 } else if (vecs) bb[i] = diag*xx[i]; 2099 } 2100 ierr = PetscFree(zeroed);CHKERRQ(ierr); 2101 if (vecs) { 2102 ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr); 2103 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 2104 } 2105 2106 /* zero the rows */ 2107 for (i=0; i<is_n; i++) { 2108 row = is_idx[i]; 2109 count = (baij->i[row/bs +1] - baij->i[row/bs])*bs; 2110 aa = ((MatScalar*)(baij->a)) + baij->i[row/bs]*bs2 + (row%bs); 2111 for (k=0; k<count; k++) { 2112 aa[0] = zero; 2113 aa += bs; 2114 } 2115 if (diag != (PetscScalar)0.0) { 2116 ierr = (*A->ops->setvalues)(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr); 2117 } 2118 } 2119 ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2120 PetscFunctionReturn(0); 2121 } 2122 2123 PetscErrorCode MatSetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is) 2124 { 2125 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2126 PetscInt *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,lastcol = -1; 2127 PetscInt *imax=a->imax,*ai=a->i,*ailen=a->ilen; 2128 PetscInt *aj =a->j,nonew=a->nonew,bs=A->rmap->bs,brow,bcol; 2129 PetscErrorCode ierr; 2130 PetscInt ridx,cidx,bs2=a->bs2; 2131 PetscBool roworiented=a->roworiented; 2132 MatScalar *ap,value,*aa=a->a,*bap; 2133 2134 PetscFunctionBegin; 2135 for (k=0; k<m; k++) { /* loop over added rows */ 2136 row = im[k]; 2137 brow = row/bs; 2138 if (row < 0) continue; 2139 #if defined(PETSC_USE_DEBUG) 2140 if (row >= A->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,A->rmap->N-1); 2141 #endif 2142 rp = aj + ai[brow]; 2143 if (!A->structure_only) ap = aa + bs2*ai[brow]; 2144 rmax = imax[brow]; 2145 nrow = ailen[brow]; 2146 low = 0; 2147 high = nrow; 2148 for (l=0; l<n; l++) { /* loop over added columns */ 2149 if (in[l] < 0) continue; 2150 #if defined(PETSC_USE_DEBUG) 2151 if (in[l] >= A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],A->cmap->n-1); 2152 #endif 2153 col = in[l]; bcol = col/bs; 2154 ridx = row % bs; cidx = col % bs; 2155 if (!A->structure_only) { 2156 if (roworiented) { 2157 value = v[l + k*n]; 2158 } else { 2159 value = v[k + l*m]; 2160 } 2161 } 2162 if (col <= lastcol) low = 0; else high = nrow; 2163 lastcol = col; 2164 while (high-low > 7) { 2165 t = (low+high)/2; 2166 if (rp[t] > bcol) high = t; 2167 else low = t; 2168 } 2169 for (i=low; i<high; i++) { 2170 if (rp[i] > bcol) break; 2171 if (rp[i] == bcol) { 2172 bap = ap + bs2*i + bs*cidx + ridx; 2173 if (!A->structure_only) { 2174 if (is == ADD_VALUES) *bap += value; 2175 else *bap = value; 2176 } 2177 goto noinsert1; 2178 } 2179 } 2180 if (nonew == 1) goto noinsert1; 2181 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col); 2182 if (A->structure_only) { 2183 MatSeqXAIJReallocateAIJ_structure_only(A,a->mbs,bs2,nrow,brow,bcol,rmax,ai,aj,rp,imax,nonew,MatScalar); 2184 } else { 2185 MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar); 2186 } 2187 N = nrow++ - 1; high++; 2188 /* shift up all the later entries in this row */ 2189 for (ii=N; ii>=i; ii--) { 2190 rp[ii+1] = rp[ii]; 2191 if (!A->structure_only) { 2192 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); 2193 } 2194 } 2195 if (N>=i && !A->structure_only) { 2196 ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr); 2197 } 2198 rp[i] = bcol; 2199 if (!A->structure_only) ap[bs2*i + bs*cidx + ridx] = value; 2200 a->nz++; 2201 A->nonzerostate++; 2202 noinsert1:; 2203 low = i; 2204 } 2205 ailen[brow] = nrow; 2206 } 2207 PetscFunctionReturn(0); 2208 } 2209 2210 PetscErrorCode MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,const MatFactorInfo *info) 2211 { 2212 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)inA->data; 2213 Mat outA; 2214 PetscErrorCode ierr; 2215 PetscBool row_identity,col_identity; 2216 2217 PetscFunctionBegin; 2218 if (info->levels != 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only levels = 0 supported for in-place ILU"); 2219 ierr = ISIdentity(row,&row_identity);CHKERRQ(ierr); 2220 ierr = ISIdentity(col,&col_identity);CHKERRQ(ierr); 2221 if (!row_identity || !col_identity) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for in-place ILU"); 2222 2223 outA = inA; 2224 inA->factortype = MAT_FACTOR_LU; 2225 ierr = PetscFree(inA->solvertype);CHKERRQ(ierr); 2226 ierr = PetscStrallocpy(MATSOLVERPETSC,&inA->solvertype);CHKERRQ(ierr); 2227 2228 ierr = MatMarkDiagonal_SeqBAIJ(inA);CHKERRQ(ierr); 2229 2230 ierr = PetscObjectReference((PetscObject)row);CHKERRQ(ierr); 2231 ierr = ISDestroy(&a->row);CHKERRQ(ierr); 2232 a->row = row; 2233 ierr = PetscObjectReference((PetscObject)col);CHKERRQ(ierr); 2234 ierr = ISDestroy(&a->col);CHKERRQ(ierr); 2235 a->col = col; 2236 2237 /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */ 2238 ierr = ISDestroy(&a->icol);CHKERRQ(ierr); 2239 ierr = ISInvertPermutation(col,PETSC_DECIDE,&a->icol);CHKERRQ(ierr); 2240 ierr = PetscLogObjectParent((PetscObject)inA,(PetscObject)a->icol);CHKERRQ(ierr); 2241 2242 ierr = MatSeqBAIJSetNumericFactorization_inplace(inA,(PetscBool)(row_identity && col_identity));CHKERRQ(ierr); 2243 if (!a->solve_work) { 2244 ierr = PetscMalloc1(inA->rmap->N+inA->rmap->bs,&a->solve_work);CHKERRQ(ierr); 2245 ierr = PetscLogObjectMemory((PetscObject)inA,(inA->rmap->N+inA->rmap->bs)*sizeof(PetscScalar));CHKERRQ(ierr); 2246 } 2247 ierr = MatLUFactorNumeric(outA,inA,info);CHKERRQ(ierr); 2248 PetscFunctionReturn(0); 2249 } 2250 2251 PetscErrorCode MatSeqBAIJSetColumnIndices_SeqBAIJ(Mat mat,PetscInt *indices) 2252 { 2253 Mat_SeqBAIJ *baij = (Mat_SeqBAIJ*)mat->data; 2254 PetscInt i,nz,mbs; 2255 2256 PetscFunctionBegin; 2257 nz = baij->maxnz; 2258 mbs = baij->mbs; 2259 for (i=0; i<nz; i++) { 2260 baij->j[i] = indices[i]; 2261 } 2262 baij->nz = nz; 2263 for (i=0; i<mbs; i++) { 2264 baij->ilen[i] = baij->imax[i]; 2265 } 2266 PetscFunctionReturn(0); 2267 } 2268 2269 /*@ 2270 MatSeqBAIJSetColumnIndices - Set the column indices for all the rows 2271 in the matrix. 2272 2273 Input Parameters: 2274 + mat - the SeqBAIJ matrix 2275 - indices - the column indices 2276 2277 Level: advanced 2278 2279 Notes: 2280 This can be called if you have precomputed the nonzero structure of the 2281 matrix and want to provide it to the matrix object to improve the performance 2282 of the MatSetValues() operation. 2283 2284 You MUST have set the correct numbers of nonzeros per row in the call to 2285 MatCreateSeqBAIJ(), and the columns indices MUST be sorted. 2286 2287 MUST be called before any calls to MatSetValues(); 2288 2289 @*/ 2290 PetscErrorCode MatSeqBAIJSetColumnIndices(Mat mat,PetscInt *indices) 2291 { 2292 PetscErrorCode ierr; 2293 2294 PetscFunctionBegin; 2295 PetscValidHeaderSpecific(mat,MAT_CLASSID,1); 2296 PetscValidPointer(indices,2); 2297 ierr = PetscUseMethod(mat,"MatSeqBAIJSetColumnIndices_C",(Mat,PetscInt*),(mat,indices));CHKERRQ(ierr); 2298 PetscFunctionReturn(0); 2299 } 2300 2301 PetscErrorCode MatGetRowMaxAbs_SeqBAIJ(Mat A,Vec v,PetscInt idx[]) 2302 { 2303 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2304 PetscErrorCode ierr; 2305 PetscInt i,j,n,row,bs,*ai,*aj,mbs; 2306 PetscReal atmp; 2307 PetscScalar *x,zero = 0.0; 2308 MatScalar *aa; 2309 PetscInt ncols,brow,krow,kcol; 2310 2311 PetscFunctionBegin; 2312 if (A->factortype) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 2313 bs = A->rmap->bs; 2314 aa = a->a; 2315 ai = a->i; 2316 aj = a->j; 2317 mbs = a->mbs; 2318 2319 ierr = VecSet(v,zero);CHKERRQ(ierr); 2320 ierr = VecGetArray(v,&x);CHKERRQ(ierr); 2321 ierr = VecGetLocalSize(v,&n);CHKERRQ(ierr); 2322 if (n != A->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Nonconforming matrix and vector"); 2323 for (i=0; i<mbs; i++) { 2324 ncols = ai[1] - ai[0]; ai++; 2325 brow = bs*i; 2326 for (j=0; j<ncols; j++) { 2327 for (kcol=0; kcol<bs; kcol++) { 2328 for (krow=0; krow<bs; krow++) { 2329 atmp = PetscAbsScalar(*aa);aa++; 2330 row = brow + krow; /* row index */ 2331 if (PetscAbsScalar(x[row]) < atmp) {x[row] = atmp; if (idx) idx[row] = bs*(*aj) + kcol;} 2332 } 2333 } 2334 aj++; 2335 } 2336 } 2337 ierr = VecRestoreArray(v,&x);CHKERRQ(ierr); 2338 PetscFunctionReturn(0); 2339 } 2340 2341 PetscErrorCode MatCopy_SeqBAIJ(Mat A,Mat B,MatStructure str) 2342 { 2343 PetscErrorCode ierr; 2344 2345 PetscFunctionBegin; 2346 /* If the two matrices have the same copy implementation, use fast copy. */ 2347 if (str == SAME_NONZERO_PATTERN && (A->ops->copy == B->ops->copy)) { 2348 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2349 Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)B->data; 2350 PetscInt ambs=a->mbs,bmbs=b->mbs,abs=A->rmap->bs,bbs=B->rmap->bs,bs2=abs*abs; 2351 2352 if (a->i[ambs] != b->i[bmbs]) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Number of nonzero blocks in matrices A %D and B %D are different",a->i[ambs],b->i[bmbs]); 2353 if (abs != bbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Block size A %D and B %D are different",abs,bbs); 2354 ierr = PetscMemcpy(b->a,a->a,(bs2*a->i[ambs])*sizeof(PetscScalar));CHKERRQ(ierr); 2355 } else { 2356 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 2357 } 2358 PetscFunctionReturn(0); 2359 } 2360 2361 PetscErrorCode MatSetUp_SeqBAIJ(Mat A) 2362 { 2363 PetscErrorCode ierr; 2364 2365 PetscFunctionBegin; 2366 ierr = MatSeqBAIJSetPreallocation(A,A->rmap->bs,PETSC_DEFAULT,0);CHKERRQ(ierr); 2367 PetscFunctionReturn(0); 2368 } 2369 2370 PetscErrorCode MatSeqBAIJGetArray_SeqBAIJ(Mat A,PetscScalar *array[]) 2371 { 2372 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2373 2374 PetscFunctionBegin; 2375 *array = a->a; 2376 PetscFunctionReturn(0); 2377 } 2378 2379 PetscErrorCode MatSeqBAIJRestoreArray_SeqBAIJ(Mat A,PetscScalar *array[]) 2380 { 2381 PetscFunctionBegin; 2382 PetscFunctionReturn(0); 2383 } 2384 2385 PetscErrorCode MatAXPYGetPreallocation_SeqBAIJ(Mat Y,Mat X,PetscInt *nnz) 2386 { 2387 PetscInt bs = Y->rmap->bs,mbs = Y->rmap->N/bs; 2388 Mat_SeqBAIJ *x = (Mat_SeqBAIJ*)X->data; 2389 Mat_SeqBAIJ *y = (Mat_SeqBAIJ*)Y->data; 2390 PetscErrorCode ierr; 2391 2392 PetscFunctionBegin; 2393 /* Set the number of nonzeros in the new matrix */ 2394 ierr = MatAXPYGetPreallocation_SeqX_private(mbs,x->i,x->j,y->i,y->j,nnz);CHKERRQ(ierr); 2395 PetscFunctionReturn(0); 2396 } 2397 2398 PetscErrorCode MatAXPY_SeqBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 2399 { 2400 Mat_SeqBAIJ *x = (Mat_SeqBAIJ*)X->data,*y = (Mat_SeqBAIJ*)Y->data; 2401 PetscErrorCode ierr; 2402 PetscInt bs=Y->rmap->bs,bs2=bs*bs; 2403 PetscBLASInt one=1; 2404 2405 PetscFunctionBegin; 2406 if (str == SAME_NONZERO_PATTERN) { 2407 PetscScalar alpha = a; 2408 PetscBLASInt bnz; 2409 ierr = PetscBLASIntCast(x->nz*bs2,&bnz);CHKERRQ(ierr); 2410 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one)); 2411 ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr); 2412 } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */ 2413 ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr); 2414 } else { 2415 Mat B; 2416 PetscInt *nnz; 2417 if (bs != X->rmap->bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrices must have same block size"); 2418 ierr = PetscMalloc1(Y->rmap->N,&nnz);CHKERRQ(ierr); 2419 ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr); 2420 ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr); 2421 ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr); 2422 ierr = MatSetBlockSizesFromMats(B,Y,Y);CHKERRQ(ierr); 2423 ierr = MatSetType(B,(MatType) ((PetscObject)Y)->type_name);CHKERRQ(ierr); 2424 ierr = MatAXPYGetPreallocation_SeqBAIJ(Y,X,nnz);CHKERRQ(ierr); 2425 ierr = MatSeqBAIJSetPreallocation(B,bs,0,nnz);CHKERRQ(ierr); 2426 ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr); 2427 ierr = MatHeaderReplace(Y,&B);CHKERRQ(ierr); 2428 ierr = PetscFree(nnz);CHKERRQ(ierr); 2429 } 2430 PetscFunctionReturn(0); 2431 } 2432 2433 PetscErrorCode MatRealPart_SeqBAIJ(Mat A) 2434 { 2435 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2436 PetscInt i,nz = a->bs2*a->i[a->mbs]; 2437 MatScalar *aa = a->a; 2438 2439 PetscFunctionBegin; 2440 for (i=0; i<nz; i++) aa[i] = PetscRealPart(aa[i]); 2441 PetscFunctionReturn(0); 2442 } 2443 2444 PetscErrorCode MatImaginaryPart_SeqBAIJ(Mat A) 2445 { 2446 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2447 PetscInt i,nz = a->bs2*a->i[a->mbs]; 2448 MatScalar *aa = a->a; 2449 2450 PetscFunctionBegin; 2451 for (i=0; i<nz; i++) aa[i] = PetscImaginaryPart(aa[i]); 2452 PetscFunctionReturn(0); 2453 } 2454 2455 /* 2456 Code almost idential to MatGetColumnIJ_SeqAIJ() should share common code 2457 */ 2458 PetscErrorCode MatGetColumnIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *nn,const PetscInt *ia[],const PetscInt *ja[],PetscBool *done) 2459 { 2460 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2461 PetscErrorCode ierr; 2462 PetscInt bs = A->rmap->bs,i,*collengths,*cia,*cja,n = A->cmap->n/bs,m = A->rmap->n/bs; 2463 PetscInt nz = a->i[m],row,*jj,mr,col; 2464 2465 PetscFunctionBegin; 2466 *nn = n; 2467 if (!ia) PetscFunctionReturn(0); 2468 if (symmetric) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not for BAIJ matrices"); 2469 else { 2470 ierr = PetscCalloc1(n+1,&collengths);CHKERRQ(ierr); 2471 ierr = PetscMalloc1(n+1,&cia);CHKERRQ(ierr); 2472 ierr = PetscMalloc1(nz+1,&cja);CHKERRQ(ierr); 2473 jj = a->j; 2474 for (i=0; i<nz; i++) { 2475 collengths[jj[i]]++; 2476 } 2477 cia[0] = oshift; 2478 for (i=0; i<n; i++) { 2479 cia[i+1] = cia[i] + collengths[i]; 2480 } 2481 ierr = PetscMemzero(collengths,n*sizeof(PetscInt));CHKERRQ(ierr); 2482 jj = a->j; 2483 for (row=0; row<m; row++) { 2484 mr = a->i[row+1] - a->i[row]; 2485 for (i=0; i<mr; i++) { 2486 col = *jj++; 2487 2488 cja[cia[col] + collengths[col]++ - oshift] = row + oshift; 2489 } 2490 } 2491 ierr = PetscFree(collengths);CHKERRQ(ierr); 2492 *ia = cia; *ja = cja; 2493 } 2494 PetscFunctionReturn(0); 2495 } 2496 2497 PetscErrorCode MatRestoreColumnIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *n,const PetscInt *ia[],const PetscInt *ja[],PetscBool *done) 2498 { 2499 PetscErrorCode ierr; 2500 2501 PetscFunctionBegin; 2502 if (!ia) PetscFunctionReturn(0); 2503 ierr = PetscFree(*ia);CHKERRQ(ierr); 2504 ierr = PetscFree(*ja);CHKERRQ(ierr); 2505 PetscFunctionReturn(0); 2506 } 2507 2508 /* 2509 MatGetColumnIJ_SeqBAIJ_Color() and MatRestoreColumnIJ_SeqBAIJ_Color() are customized from 2510 MatGetColumnIJ_SeqBAIJ() and MatRestoreColumnIJ_SeqBAIJ() by adding an output 2511 spidx[], index of a->a, to be used in MatTransposeColoringCreate() and MatFDColoringCreate() 2512 */ 2513 PetscErrorCode MatGetColumnIJ_SeqBAIJ_Color(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *nn,const PetscInt *ia[],const PetscInt *ja[],PetscInt *spidx[],PetscBool *done) 2514 { 2515 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2516 PetscErrorCode ierr; 2517 PetscInt i,*collengths,*cia,*cja,n=a->nbs,m=a->mbs; 2518 PetscInt nz = a->i[m],row,*jj,mr,col; 2519 PetscInt *cspidx; 2520 2521 PetscFunctionBegin; 2522 *nn = n; 2523 if (!ia) PetscFunctionReturn(0); 2524 2525 ierr = PetscCalloc1(n+1,&collengths);CHKERRQ(ierr); 2526 ierr = PetscMalloc1(n+1,&cia);CHKERRQ(ierr); 2527 ierr = PetscMalloc1(nz+1,&cja);CHKERRQ(ierr); 2528 ierr = PetscMalloc1(nz+1,&cspidx);CHKERRQ(ierr); 2529 jj = a->j; 2530 for (i=0; i<nz; i++) { 2531 collengths[jj[i]]++; 2532 } 2533 cia[0] = oshift; 2534 for (i=0; i<n; i++) { 2535 cia[i+1] = cia[i] + collengths[i]; 2536 } 2537 ierr = PetscMemzero(collengths,n*sizeof(PetscInt));CHKERRQ(ierr); 2538 jj = a->j; 2539 for (row=0; row<m; row++) { 2540 mr = a->i[row+1] - a->i[row]; 2541 for (i=0; i<mr; i++) { 2542 col = *jj++; 2543 cspidx[cia[col] + collengths[col] - oshift] = a->i[row] + i; /* index of a->j */ 2544 cja[cia[col] + collengths[col]++ - oshift] = row + oshift; 2545 } 2546 } 2547 ierr = PetscFree(collengths);CHKERRQ(ierr); 2548 *ia = cia; *ja = cja; 2549 *spidx = cspidx; 2550 PetscFunctionReturn(0); 2551 } 2552 2553 PetscErrorCode MatRestoreColumnIJ_SeqBAIJ_Color(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *n,const PetscInt *ia[],const PetscInt *ja[],PetscInt *spidx[],PetscBool *done) 2554 { 2555 PetscErrorCode ierr; 2556 2557 PetscFunctionBegin; 2558 ierr = MatRestoreColumnIJ_SeqBAIJ(A,oshift,symmetric,inodecompressed,n,ia,ja,done);CHKERRQ(ierr); 2559 ierr = PetscFree(*spidx);CHKERRQ(ierr); 2560 PetscFunctionReturn(0); 2561 } 2562 2563 PetscErrorCode MatShift_SeqBAIJ(Mat Y,PetscScalar a) 2564 { 2565 PetscErrorCode ierr; 2566 Mat_SeqBAIJ *aij = (Mat_SeqBAIJ*)Y->data; 2567 2568 PetscFunctionBegin; 2569 if (!Y->preallocated || !aij->nz) { 2570 ierr = MatSeqBAIJSetPreallocation(Y,Y->rmap->bs,1,NULL);CHKERRQ(ierr); 2571 } 2572 ierr = MatShift_Basic(Y,a);CHKERRQ(ierr); 2573 PetscFunctionReturn(0); 2574 } 2575 2576 /* -------------------------------------------------------------------*/ 2577 static struct _MatOps MatOps_Values = {MatSetValues_SeqBAIJ, 2578 MatGetRow_SeqBAIJ, 2579 MatRestoreRow_SeqBAIJ, 2580 MatMult_SeqBAIJ_N, 2581 /* 4*/ MatMultAdd_SeqBAIJ_N, 2582 MatMultTranspose_SeqBAIJ, 2583 MatMultTransposeAdd_SeqBAIJ, 2584 0, 2585 0, 2586 0, 2587 /* 10*/ 0, 2588 MatLUFactor_SeqBAIJ, 2589 0, 2590 0, 2591 MatTranspose_SeqBAIJ, 2592 /* 15*/ MatGetInfo_SeqBAIJ, 2593 MatEqual_SeqBAIJ, 2594 MatGetDiagonal_SeqBAIJ, 2595 MatDiagonalScale_SeqBAIJ, 2596 MatNorm_SeqBAIJ, 2597 /* 20*/ 0, 2598 MatAssemblyEnd_SeqBAIJ, 2599 MatSetOption_SeqBAIJ, 2600 MatZeroEntries_SeqBAIJ, 2601 /* 24*/ MatZeroRows_SeqBAIJ, 2602 0, 2603 0, 2604 0, 2605 0, 2606 /* 29*/ MatSetUp_SeqBAIJ, 2607 0, 2608 0, 2609 0, 2610 0, 2611 /* 34*/ MatDuplicate_SeqBAIJ, 2612 0, 2613 0, 2614 MatILUFactor_SeqBAIJ, 2615 0, 2616 /* 39*/ MatAXPY_SeqBAIJ, 2617 MatCreateSubMatrices_SeqBAIJ, 2618 MatIncreaseOverlap_SeqBAIJ, 2619 MatGetValues_SeqBAIJ, 2620 MatCopy_SeqBAIJ, 2621 /* 44*/ 0, 2622 MatScale_SeqBAIJ, 2623 MatShift_SeqBAIJ, 2624 0, 2625 MatZeroRowsColumns_SeqBAIJ, 2626 /* 49*/ 0, 2627 MatGetRowIJ_SeqBAIJ, 2628 MatRestoreRowIJ_SeqBAIJ, 2629 MatGetColumnIJ_SeqBAIJ, 2630 MatRestoreColumnIJ_SeqBAIJ, 2631 /* 54*/ MatFDColoringCreate_SeqXAIJ, 2632 0, 2633 0, 2634 0, 2635 MatSetValuesBlocked_SeqBAIJ, 2636 /* 59*/ MatCreateSubMatrix_SeqBAIJ, 2637 MatDestroy_SeqBAIJ, 2638 MatView_SeqBAIJ, 2639 0, 2640 0, 2641 /* 64*/ 0, 2642 0, 2643 0, 2644 0, 2645 0, 2646 /* 69*/ MatGetRowMaxAbs_SeqBAIJ, 2647 0, 2648 MatConvert_Basic, 2649 0, 2650 0, 2651 /* 74*/ 0, 2652 MatFDColoringApply_BAIJ, 2653 0, 2654 0, 2655 0, 2656 /* 79*/ 0, 2657 0, 2658 0, 2659 0, 2660 MatLoad_SeqBAIJ, 2661 /* 84*/ 0, 2662 0, 2663 0, 2664 0, 2665 0, 2666 /* 89*/ 0, 2667 0, 2668 0, 2669 0, 2670 0, 2671 /* 94*/ 0, 2672 0, 2673 0, 2674 0, 2675 0, 2676 /* 99*/ 0, 2677 0, 2678 0, 2679 0, 2680 0, 2681 /*104*/ 0, 2682 MatRealPart_SeqBAIJ, 2683 MatImaginaryPart_SeqBAIJ, 2684 0, 2685 0, 2686 /*109*/ 0, 2687 0, 2688 0, 2689 0, 2690 MatMissingDiagonal_SeqBAIJ, 2691 /*114*/ 0, 2692 0, 2693 0, 2694 0, 2695 0, 2696 /*119*/ 0, 2697 0, 2698 MatMultHermitianTranspose_SeqBAIJ, 2699 MatMultHermitianTransposeAdd_SeqBAIJ, 2700 0, 2701 /*124*/ 0, 2702 0, 2703 MatInvertBlockDiagonal_SeqBAIJ, 2704 0, 2705 0, 2706 /*129*/ 0, 2707 0, 2708 0, 2709 0, 2710 0, 2711 /*134*/ 0, 2712 0, 2713 0, 2714 0, 2715 0, 2716 /*139*/ MatSetBlockSizes_Default, 2717 0, 2718 0, 2719 MatFDColoringSetUp_SeqXAIJ, 2720 0, 2721 /*144*/MatCreateMPIMatConcatenateSeqMat_SeqBAIJ 2722 }; 2723 2724 PetscErrorCode MatStoreValues_SeqBAIJ(Mat mat) 2725 { 2726 Mat_SeqBAIJ *aij = (Mat_SeqBAIJ*)mat->data; 2727 PetscInt nz = aij->i[aij->mbs]*aij->bs2; 2728 PetscErrorCode ierr; 2729 2730 PetscFunctionBegin; 2731 if (aij->nonew != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first"); 2732 2733 /* allocate space for values if not already there */ 2734 if (!aij->saved_values) { 2735 ierr = PetscMalloc1(nz+1,&aij->saved_values);CHKERRQ(ierr); 2736 ierr = PetscLogObjectMemory((PetscObject)mat,(nz+1)*sizeof(PetscScalar));CHKERRQ(ierr); 2737 } 2738 2739 /* copy values over */ 2740 ierr = PetscMemcpy(aij->saved_values,aij->a,nz*sizeof(PetscScalar));CHKERRQ(ierr); 2741 PetscFunctionReturn(0); 2742 } 2743 2744 PetscErrorCode MatRetrieveValues_SeqBAIJ(Mat mat) 2745 { 2746 Mat_SeqBAIJ *aij = (Mat_SeqBAIJ*)mat->data; 2747 PetscErrorCode ierr; 2748 PetscInt nz = aij->i[aij->mbs]*aij->bs2; 2749 2750 PetscFunctionBegin; 2751 if (aij->nonew != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first"); 2752 if (!aij->saved_values) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatStoreValues(A);first"); 2753 2754 /* copy values over */ 2755 ierr = PetscMemcpy(aij->a,aij->saved_values,nz*sizeof(PetscScalar));CHKERRQ(ierr); 2756 PetscFunctionReturn(0); 2757 } 2758 2759 PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqAIJ(Mat, MatType,MatReuse,Mat*); 2760 PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqSBAIJ(Mat, MatType,MatReuse,Mat*); 2761 2762 static PetscErrorCode MatSeqBAIJSetPreallocation_SeqBAIJ(Mat B,PetscInt bs,PetscInt nz,PetscInt *nnz) 2763 { 2764 Mat_SeqBAIJ *b; 2765 PetscErrorCode ierr; 2766 PetscInt i,mbs,nbs,bs2; 2767 PetscBool flg = PETSC_FALSE,skipallocation = PETSC_FALSE,realalloc = PETSC_FALSE; 2768 2769 PetscFunctionBegin; 2770 if (nz >= 0 || nnz) realalloc = PETSC_TRUE; 2771 if (nz == MAT_SKIP_ALLOCATION) { 2772 skipallocation = PETSC_TRUE; 2773 nz = 0; 2774 } 2775 2776 ierr = MatSetBlockSize(B,PetscAbs(bs));CHKERRQ(ierr); 2777 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 2778 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 2779 ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr); 2780 2781 B->preallocated = PETSC_TRUE; 2782 2783 mbs = B->rmap->n/bs; 2784 nbs = B->cmap->n/bs; 2785 bs2 = bs*bs; 2786 2787 if (mbs*bs!=B->rmap->n || nbs*bs!=B->cmap->n) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Number rows %D, cols %D must be divisible by blocksize %D",B->rmap->N,B->cmap->n,bs); 2788 2789 if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5; 2790 if (nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nz cannot be less than 0: value %D",nz); 2791 if (nnz) { 2792 for (i=0; i<mbs; i++) { 2793 if (nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be less than 0: local row %D value %D",i,nnz[i]); 2794 if (nnz[i] > nbs) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be greater than block row length: local row %D value %D rowlength %D",i,nnz[i],nbs); 2795 } 2796 } 2797 2798 b = (Mat_SeqBAIJ*)B->data; 2799 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)B),NULL,"Optimize options for SEQBAIJ matrix 2 ","Mat");CHKERRQ(ierr); 2800 ierr = PetscOptionsBool("-mat_no_unroll","Do not optimize for block size (slow)",NULL,flg,&flg,NULL);CHKERRQ(ierr); 2801 ierr = PetscOptionsEnd();CHKERRQ(ierr); 2802 2803 if (!flg) { 2804 switch (bs) { 2805 case 1: 2806 B->ops->mult = MatMult_SeqBAIJ_1; 2807 B->ops->multadd = MatMultAdd_SeqBAIJ_1; 2808 break; 2809 case 2: 2810 B->ops->mult = MatMult_SeqBAIJ_2; 2811 B->ops->multadd = MatMultAdd_SeqBAIJ_2; 2812 break; 2813 case 3: 2814 B->ops->mult = MatMult_SeqBAIJ_3; 2815 B->ops->multadd = MatMultAdd_SeqBAIJ_3; 2816 break; 2817 case 4: 2818 B->ops->mult = MatMult_SeqBAIJ_4; 2819 B->ops->multadd = MatMultAdd_SeqBAIJ_4; 2820 break; 2821 case 5: 2822 B->ops->mult = MatMult_SeqBAIJ_5; 2823 B->ops->multadd = MatMultAdd_SeqBAIJ_5; 2824 break; 2825 case 6: 2826 B->ops->mult = MatMult_SeqBAIJ_6; 2827 B->ops->multadd = MatMultAdd_SeqBAIJ_6; 2828 break; 2829 case 7: 2830 B->ops->mult = MatMult_SeqBAIJ_7; 2831 B->ops->multadd = MatMultAdd_SeqBAIJ_7; 2832 break; 2833 case 11: 2834 B->ops->mult = MatMult_SeqBAIJ_11; 2835 B->ops->multadd = MatMultAdd_SeqBAIJ_11; 2836 break; 2837 case 15: 2838 B->ops->mult = MatMult_SeqBAIJ_15_ver1; 2839 B->ops->multadd = MatMultAdd_SeqBAIJ_N; 2840 break; 2841 default: 2842 B->ops->mult = MatMult_SeqBAIJ_N; 2843 B->ops->multadd = MatMultAdd_SeqBAIJ_N; 2844 break; 2845 } 2846 } 2847 B->ops->sor = MatSOR_SeqBAIJ; 2848 b->mbs = mbs; 2849 b->nbs = nbs; 2850 if (!skipallocation) { 2851 if (!b->imax) { 2852 ierr = PetscMalloc2(mbs,&b->imax,mbs,&b->ilen);CHKERRQ(ierr); 2853 ierr = PetscLogObjectMemory((PetscObject)B,2*mbs*sizeof(PetscInt));CHKERRQ(ierr); 2854 2855 b->free_imax_ilen = PETSC_TRUE; 2856 } 2857 /* b->ilen will count nonzeros in each block row so far. */ 2858 for (i=0; i<mbs; i++) b->ilen[i] = 0; 2859 if (!nnz) { 2860 if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5; 2861 else if (nz < 0) nz = 1; 2862 for (i=0; i<mbs; i++) b->imax[i] = nz; 2863 nz = nz*mbs; 2864 } else { 2865 nz = 0; 2866 for (i=0; i<mbs; i++) {b->imax[i] = nnz[i]; nz += nnz[i];} 2867 } 2868 2869 /* allocate the matrix space */ 2870 ierr = MatSeqXAIJFreeAIJ(B,&b->a,&b->j,&b->i);CHKERRQ(ierr); 2871 if (B->structure_only) { 2872 ierr = PetscMalloc1(nz,&b->j);CHKERRQ(ierr); 2873 ierr = PetscMalloc1(B->rmap->N+1,&b->i);CHKERRQ(ierr); 2874 ierr = PetscLogObjectMemory((PetscObject)B,(B->rmap->N+1)*sizeof(PetscInt)+nz*sizeof(PetscInt));CHKERRQ(ierr); 2875 } else { 2876 ierr = PetscMalloc3(bs2*nz,&b->a,nz,&b->j,B->rmap->N+1,&b->i);CHKERRQ(ierr); 2877 ierr = PetscLogObjectMemory((PetscObject)B,(B->rmap->N+1)*sizeof(PetscInt)+nz*(bs2*sizeof(PetscScalar)+sizeof(PetscInt)));CHKERRQ(ierr); 2878 ierr = PetscMemzero(b->a,nz*bs2*sizeof(MatScalar));CHKERRQ(ierr); 2879 } 2880 ierr = PetscMemzero(b->j,nz*sizeof(PetscInt));CHKERRQ(ierr); 2881 2882 if (B->structure_only) { 2883 b->singlemalloc = PETSC_FALSE; 2884 b->free_a = PETSC_FALSE; 2885 } else { 2886 b->singlemalloc = PETSC_TRUE; 2887 b->free_a = PETSC_TRUE; 2888 } 2889 b->free_ij = PETSC_TRUE; 2890 2891 b->i[0] = 0; 2892 for (i=1; i<mbs+1; i++) { 2893 b->i[i] = b->i[i-1] + b->imax[i-1]; 2894 } 2895 2896 } else { 2897 b->free_a = PETSC_FALSE; 2898 b->free_ij = PETSC_FALSE; 2899 } 2900 2901 b->bs2 = bs2; 2902 b->mbs = mbs; 2903 b->nz = 0; 2904 b->maxnz = nz; 2905 B->info.nz_unneeded = (PetscReal)b->maxnz*bs2; 2906 B->was_assembled = PETSC_FALSE; 2907 B->assembled = PETSC_FALSE; 2908 if (realalloc) {ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);} 2909 PetscFunctionReturn(0); 2910 } 2911 2912 PetscErrorCode MatSeqBAIJSetPreallocationCSR_SeqBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[]) 2913 { 2914 PetscInt i,m,nz,nz_max=0,*nnz; 2915 PetscScalar *values=0; 2916 PetscBool roworiented = ((Mat_SeqBAIJ*)B->data)->roworiented; 2917 PetscErrorCode ierr; 2918 2919 PetscFunctionBegin; 2920 if (bs < 1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive but it is %D",bs); 2921 ierr = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr); 2922 ierr = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr); 2923 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 2924 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 2925 ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr); 2926 m = B->rmap->n/bs; 2927 2928 if (ii[0] != 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE, "ii[0] must be 0 but it is %D",ii[0]); 2929 ierr = PetscMalloc1(m+1, &nnz);CHKERRQ(ierr); 2930 for (i=0; i<m; i++) { 2931 nz = ii[i+1]- ii[i]; 2932 if (nz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE, "Local row %D has a negative number of columns %D",i,nz); 2933 nz_max = PetscMax(nz_max, nz); 2934 nnz[i] = nz; 2935 } 2936 ierr = MatSeqBAIJSetPreallocation(B,bs,0,nnz);CHKERRQ(ierr); 2937 ierr = PetscFree(nnz);CHKERRQ(ierr); 2938 2939 values = (PetscScalar*)V; 2940 if (!values) { 2941 ierr = PetscCalloc1(bs*bs*(nz_max+1),&values);CHKERRQ(ierr); 2942 } 2943 for (i=0; i<m; i++) { 2944 PetscInt ncols = ii[i+1] - ii[i]; 2945 const PetscInt *icols = jj + ii[i]; 2946 const PetscScalar *svals = values + (V ? (bs*bs*ii[i]) : 0); 2947 if (!roworiented) { 2948 ierr = MatSetValuesBlocked_SeqBAIJ(B,1,&i,ncols,icols,svals,INSERT_VALUES);CHKERRQ(ierr); 2949 } else { 2950 PetscInt j; 2951 for (j=0; j<ncols; j++) { 2952 const PetscScalar *svals = values + (V ? (bs*bs*(ii[i]+j)) : 0); 2953 ierr = MatSetValuesBlocked_SeqBAIJ(B,1,&i,1,&icols[j],svals,INSERT_VALUES);CHKERRQ(ierr); 2954 } 2955 } 2956 } 2957 if (!V) { ierr = PetscFree(values);CHKERRQ(ierr); } 2958 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2959 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2960 ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 2961 PetscFunctionReturn(0); 2962 } 2963 2964 /*MC 2965 MATSEQBAIJ - MATSEQBAIJ = "seqbaij" - A matrix type to be used for sequential block sparse matrices, based on 2966 block sparse compressed row format. 2967 2968 Options Database Keys: 2969 . -mat_type seqbaij - sets the matrix type to "seqbaij" during a call to MatSetFromOptions() 2970 2971 Level: beginner 2972 2973 .seealso: MatCreateSeqBAIJ() 2974 M*/ 2975 2976 PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqBSTRM(Mat, MatType,MatReuse,Mat*); 2977 2978 PETSC_EXTERN PetscErrorCode MatCreate_SeqBAIJ(Mat B) 2979 { 2980 PetscErrorCode ierr; 2981 PetscMPIInt size; 2982 Mat_SeqBAIJ *b; 2983 2984 PetscFunctionBegin; 2985 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRQ(ierr); 2986 if (size > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Comm must be of size 1"); 2987 2988 ierr = PetscNewLog(B,&b);CHKERRQ(ierr); 2989 B->data = (void*)b; 2990 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 2991 2992 b->row = 0; 2993 b->col = 0; 2994 b->icol = 0; 2995 b->reallocs = 0; 2996 b->saved_values = 0; 2997 2998 b->roworiented = PETSC_TRUE; 2999 b->nonew = 0; 3000 b->diag = 0; 3001 B->spptr = 0; 3002 B->info.nz_unneeded = (PetscReal)b->maxnz*b->bs2; 3003 b->keepnonzeropattern = PETSC_FALSE; 3004 3005 ierr = PetscObjectComposeFunction((PetscObject)B,"MatInvertBlockDiagonal_C",MatInvertBlockDiagonal_SeqBAIJ);CHKERRQ(ierr); 3006 ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_SeqBAIJ);CHKERRQ(ierr); 3007 ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_SeqBAIJ);CHKERRQ(ierr); 3008 ierr = PetscObjectComposeFunction((PetscObject)B,"MatSeqBAIJSetColumnIndices_C",MatSeqBAIJSetColumnIndices_SeqBAIJ);CHKERRQ(ierr); 3009 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqbaij_seqaij_C",MatConvert_SeqBAIJ_SeqAIJ);CHKERRQ(ierr); 3010 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqbaij_seqsbaij_C",MatConvert_SeqBAIJ_SeqSBAIJ);CHKERRQ(ierr); 3011 ierr = PetscObjectComposeFunction((PetscObject)B,"MatSeqBAIJSetPreallocation_C",MatSeqBAIJSetPreallocation_SeqBAIJ);CHKERRQ(ierr); 3012 ierr = PetscObjectComposeFunction((PetscObject)B,"MatSeqBAIJSetPreallocationCSR_C",MatSeqBAIJSetPreallocationCSR_SeqBAIJ);CHKERRQ(ierr); 3013 ierr = PetscObjectComposeFunction((PetscObject)B,"MatIsTranspose_C",MatIsTranspose_SeqBAIJ);CHKERRQ(ierr); 3014 #if defined(PETSC_HAVE_HYPRE) 3015 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_baij_hypre_C",MatConvert_AIJ_HYPRE);CHKERRQ(ierr); 3016 #endif 3017 ierr = PetscObjectChangeTypeName((PetscObject)B,MATSEQBAIJ);CHKERRQ(ierr); 3018 PetscFunctionReturn(0); 3019 } 3020 3021 PetscErrorCode MatDuplicateNoCreate_SeqBAIJ(Mat C,Mat A,MatDuplicateOption cpvalues,PetscBool mallocmatspace) 3022 { 3023 Mat_SeqBAIJ *c = (Mat_SeqBAIJ*)C->data,*a = (Mat_SeqBAIJ*)A->data; 3024 PetscErrorCode ierr; 3025 PetscInt i,mbs = a->mbs,nz = a->nz,bs2 = a->bs2; 3026 3027 PetscFunctionBegin; 3028 if (a->i[mbs] != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Corrupt matrix"); 3029 3030 if (cpvalues == MAT_SHARE_NONZERO_PATTERN) { 3031 c->imax = a->imax; 3032 c->ilen = a->ilen; 3033 c->free_imax_ilen = PETSC_FALSE; 3034 } else { 3035 ierr = PetscMalloc2(mbs,&c->imax,mbs,&c->ilen);CHKERRQ(ierr); 3036 ierr = PetscLogObjectMemory((PetscObject)C,2*mbs*sizeof(PetscInt));CHKERRQ(ierr); 3037 for (i=0; i<mbs; i++) { 3038 c->imax[i] = a->imax[i]; 3039 c->ilen[i] = a->ilen[i]; 3040 } 3041 c->free_imax_ilen = PETSC_TRUE; 3042 } 3043 3044 /* allocate the matrix space */ 3045 if (mallocmatspace) { 3046 if (cpvalues == MAT_SHARE_NONZERO_PATTERN) { 3047 ierr = PetscCalloc1(bs2*nz,&c->a);CHKERRQ(ierr); 3048 ierr = PetscLogObjectMemory((PetscObject)C,a->i[mbs]*bs2*sizeof(PetscScalar));CHKERRQ(ierr); 3049 3050 c->i = a->i; 3051 c->j = a->j; 3052 c->singlemalloc = PETSC_FALSE; 3053 c->free_a = PETSC_TRUE; 3054 c->free_ij = PETSC_FALSE; 3055 c->parent = A; 3056 C->preallocated = PETSC_TRUE; 3057 C->assembled = PETSC_TRUE; 3058 3059 ierr = PetscObjectReference((PetscObject)A);CHKERRQ(ierr); 3060 ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 3061 ierr = MatSetOption(C,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 3062 } else { 3063 ierr = PetscMalloc3(bs2*nz,&c->a,nz,&c->j,mbs+1,&c->i);CHKERRQ(ierr); 3064 ierr = PetscLogObjectMemory((PetscObject)C,a->i[mbs]*(bs2*sizeof(PetscScalar)+sizeof(PetscInt))+(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 3065 3066 c->singlemalloc = PETSC_TRUE; 3067 c->free_a = PETSC_TRUE; 3068 c->free_ij = PETSC_TRUE; 3069 3070 ierr = PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 3071 if (mbs > 0) { 3072 ierr = PetscMemcpy(c->j,a->j,nz*sizeof(PetscInt));CHKERRQ(ierr); 3073 if (cpvalues == MAT_COPY_VALUES) { 3074 ierr = PetscMemcpy(c->a,a->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr); 3075 } else { 3076 ierr = PetscMemzero(c->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr); 3077 } 3078 } 3079 C->preallocated = PETSC_TRUE; 3080 C->assembled = PETSC_TRUE; 3081 } 3082 } 3083 3084 c->roworiented = a->roworiented; 3085 c->nonew = a->nonew; 3086 3087 ierr = PetscLayoutReference(A->rmap,&C->rmap);CHKERRQ(ierr); 3088 ierr = PetscLayoutReference(A->cmap,&C->cmap);CHKERRQ(ierr); 3089 3090 c->bs2 = a->bs2; 3091 c->mbs = a->mbs; 3092 c->nbs = a->nbs; 3093 3094 if (a->diag) { 3095 if (cpvalues == MAT_SHARE_NONZERO_PATTERN) { 3096 c->diag = a->diag; 3097 c->free_diag = PETSC_FALSE; 3098 } else { 3099 ierr = PetscMalloc1(mbs+1,&c->diag);CHKERRQ(ierr); 3100 ierr = PetscLogObjectMemory((PetscObject)C,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 3101 for (i=0; i<mbs; i++) c->diag[i] = a->diag[i]; 3102 c->free_diag = PETSC_TRUE; 3103 } 3104 } else c->diag = 0; 3105 3106 c->nz = a->nz; 3107 c->maxnz = a->nz; /* Since we allocate exactly the right amount */ 3108 c->solve_work = NULL; 3109 c->mult_work = NULL; 3110 c->sor_workt = NULL; 3111 c->sor_work = NULL; 3112 3113 c->compressedrow.use = a->compressedrow.use; 3114 c->compressedrow.nrows = a->compressedrow.nrows; 3115 if (a->compressedrow.use) { 3116 i = a->compressedrow.nrows; 3117 ierr = PetscMalloc2(i+1,&c->compressedrow.i,i+1,&c->compressedrow.rindex);CHKERRQ(ierr); 3118 ierr = PetscLogObjectMemory((PetscObject)C,(2*i+1)*sizeof(PetscInt));CHKERRQ(ierr); 3119 ierr = PetscMemcpy(c->compressedrow.i,a->compressedrow.i,(i+1)*sizeof(PetscInt));CHKERRQ(ierr); 3120 ierr = PetscMemcpy(c->compressedrow.rindex,a->compressedrow.rindex,i*sizeof(PetscInt));CHKERRQ(ierr); 3121 } else { 3122 c->compressedrow.use = PETSC_FALSE; 3123 c->compressedrow.i = NULL; 3124 c->compressedrow.rindex = NULL; 3125 } 3126 C->nonzerostate = A->nonzerostate; 3127 3128 ierr = PetscFunctionListDuplicate(((PetscObject)A)->qlist,&((PetscObject)C)->qlist);CHKERRQ(ierr); 3129 ierr = PetscMemcpy(C->ops,A->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 3130 PetscFunctionReturn(0); 3131 } 3132 3133 PetscErrorCode MatDuplicate_SeqBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B) 3134 { 3135 PetscErrorCode ierr; 3136 3137 PetscFunctionBegin; 3138 ierr = MatCreate(PetscObjectComm((PetscObject)A),B);CHKERRQ(ierr); 3139 ierr = MatSetSizes(*B,A->rmap->N,A->cmap->n,A->rmap->N,A->cmap->n);CHKERRQ(ierr); 3140 ierr = MatSetType(*B,MATSEQBAIJ);CHKERRQ(ierr); 3141 ierr = MatDuplicateNoCreate_SeqBAIJ(*B,A,cpvalues,PETSC_TRUE);CHKERRQ(ierr); 3142 PetscFunctionReturn(0); 3143 } 3144 3145 PetscErrorCode MatLoad_SeqBAIJ(Mat newmat,PetscViewer viewer) 3146 { 3147 Mat_SeqBAIJ *a; 3148 PetscErrorCode ierr; 3149 PetscInt i,nz,header[4],*rowlengths=0,M,N,bs = newmat->rmap->bs; 3150 PetscInt *mask,mbs,*jj,j,rowcount,nzcount,k,*browlengths,maskcount; 3151 PetscInt kmax,jcount,block,idx,point,nzcountb,extra_rows,rows,cols; 3152 PetscInt *masked,nmask,tmp,bs2,ishift; 3153 PetscMPIInt size; 3154 int fd; 3155 PetscScalar *aa; 3156 MPI_Comm comm; 3157 3158 PetscFunctionBegin; 3159 /* force binary viewer to load .info file if it has not yet done so */ 3160 ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr); 3161 ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr); 3162 ierr = PetscOptionsBegin(comm,NULL,"Options for loading SEQBAIJ matrix","Mat");CHKERRQ(ierr); 3163 ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,NULL);CHKERRQ(ierr); 3164 ierr = PetscOptionsEnd();CHKERRQ(ierr); 3165 if (bs < 0) bs = 1; 3166 bs2 = bs*bs; 3167 3168 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3169 if (size > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"view must have one processor"); 3170 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 3171 ierr = PetscBinaryRead(fd,header,4,PETSC_INT);CHKERRQ(ierr); 3172 if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not Mat object"); 3173 M = header[1]; N = header[2]; nz = header[3]; 3174 3175 if (header[3] < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as SeqBAIJ"); 3176 if (M != N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Can only do square matrices"); 3177 3178 /* 3179 This code adds extra rows to make sure the number of rows is 3180 divisible by the blocksize 3181 */ 3182 mbs = M/bs; 3183 extra_rows = bs - M + bs*(mbs); 3184 if (extra_rows == bs) extra_rows = 0; 3185 else mbs++; 3186 if (extra_rows) { 3187 ierr = PetscInfo(viewer,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr); 3188 } 3189 3190 /* Set global sizes if not already set */ 3191 if (newmat->rmap->n < 0 && newmat->rmap->N < 0 && newmat->cmap->n < 0 && newmat->cmap->N < 0) { 3192 ierr = MatSetSizes(newmat,PETSC_DECIDE,PETSC_DECIDE,M+extra_rows,N+extra_rows);CHKERRQ(ierr); 3193 } else { /* Check if the matrix global sizes are correct */ 3194 ierr = MatGetSize(newmat,&rows,&cols);CHKERRQ(ierr); 3195 if (rows < 0 && cols < 0) { /* user might provide local size instead of global size */ 3196 ierr = MatGetLocalSize(newmat,&rows,&cols);CHKERRQ(ierr); 3197 } 3198 if (M != rows || N != cols) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix in file of different length (%d, %d) than the input matrix (%d, %d)",M,N,rows,cols); 3199 } 3200 3201 /* read in row lengths */ 3202 ierr = PetscMalloc1(M+extra_rows,&rowlengths);CHKERRQ(ierr); 3203 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 3204 for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1; 3205 3206 /* read in column indices */ 3207 ierr = PetscMalloc1(nz+extra_rows,&jj);CHKERRQ(ierr); 3208 ierr = PetscBinaryRead(fd,jj,nz,PETSC_INT);CHKERRQ(ierr); 3209 for (i=0; i<extra_rows; i++) jj[nz+i] = M+i; 3210 3211 /* loop over row lengths determining block row lengths */ 3212 ierr = PetscCalloc1(mbs,&browlengths);CHKERRQ(ierr); 3213 ierr = PetscMalloc2(mbs,&mask,mbs,&masked);CHKERRQ(ierr); 3214 ierr = PetscMemzero(mask,mbs*sizeof(PetscInt));CHKERRQ(ierr); 3215 rowcount = 0; 3216 nzcount = 0; 3217 for (i=0; i<mbs; i++) { 3218 nmask = 0; 3219 for (j=0; j<bs; j++) { 3220 kmax = rowlengths[rowcount]; 3221 for (k=0; k<kmax; k++) { 3222 tmp = jj[nzcount++]/bs; 3223 if (!mask[tmp]) {masked[nmask++] = tmp; mask[tmp] = 1;} 3224 } 3225 rowcount++; 3226 } 3227 browlengths[i] += nmask; 3228 /* zero out the mask elements we set */ 3229 for (j=0; j<nmask; j++) mask[masked[j]] = 0; 3230 } 3231 3232 /* Do preallocation */ 3233 ierr = MatSeqBAIJSetPreallocation(newmat,bs,0,browlengths);CHKERRQ(ierr); 3234 a = (Mat_SeqBAIJ*)newmat->data; 3235 3236 /* set matrix "i" values */ 3237 a->i[0] = 0; 3238 for (i=1; i<= mbs; i++) { 3239 a->i[i] = a->i[i-1] + browlengths[i-1]; 3240 a->ilen[i-1] = browlengths[i-1]; 3241 } 3242 a->nz = 0; 3243 for (i=0; i<mbs; i++) a->nz += browlengths[i]; 3244 3245 /* read in nonzero values */ 3246 ierr = PetscMalloc1(nz+extra_rows,&aa);CHKERRQ(ierr); 3247 ierr = PetscBinaryRead(fd,aa,nz,PETSC_SCALAR);CHKERRQ(ierr); 3248 for (i=0; i<extra_rows; i++) aa[nz+i] = 1.0; 3249 3250 /* set "a" and "j" values into matrix */ 3251 nzcount = 0; jcount = 0; 3252 for (i=0; i<mbs; i++) { 3253 nzcountb = nzcount; 3254 nmask = 0; 3255 for (j=0; j<bs; j++) { 3256 kmax = rowlengths[i*bs+j]; 3257 for (k=0; k<kmax; k++) { 3258 tmp = jj[nzcount++]/bs; 3259 if (!mask[tmp]) { masked[nmask++] = tmp; mask[tmp] = 1;} 3260 } 3261 } 3262 /* sort the masked values */ 3263 ierr = PetscSortInt(nmask,masked);CHKERRQ(ierr); 3264 3265 /* set "j" values into matrix */ 3266 maskcount = 1; 3267 for (j=0; j<nmask; j++) { 3268 a->j[jcount++] = masked[j]; 3269 mask[masked[j]] = maskcount++; 3270 } 3271 /* set "a" values into matrix */ 3272 ishift = bs2*a->i[i]; 3273 for (j=0; j<bs; j++) { 3274 kmax = rowlengths[i*bs+j]; 3275 for (k=0; k<kmax; k++) { 3276 tmp = jj[nzcountb]/bs; 3277 block = mask[tmp] - 1; 3278 point = jj[nzcountb] - bs*tmp; 3279 idx = ishift + bs2*block + j + bs*point; 3280 a->a[idx] = (MatScalar)aa[nzcountb++]; 3281 } 3282 } 3283 /* zero out the mask elements we set */ 3284 for (j=0; j<nmask; j++) mask[masked[j]] = 0; 3285 } 3286 if (jcount != a->nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Bad binary matrix"); 3287 3288 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 3289 ierr = PetscFree(browlengths);CHKERRQ(ierr); 3290 ierr = PetscFree(aa);CHKERRQ(ierr); 3291 ierr = PetscFree(jj);CHKERRQ(ierr); 3292 ierr = PetscFree2(mask,masked);CHKERRQ(ierr); 3293 3294 ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3295 ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3296 PetscFunctionReturn(0); 3297 } 3298 3299 /*@C 3300 MatCreateSeqBAIJ - Creates a sparse matrix in block AIJ (block 3301 compressed row) format. For good matrix assembly performance the 3302 user should preallocate the matrix storage by setting the parameter nz 3303 (or the array nnz). By setting these parameters accurately, performance 3304 during matrix assembly can be increased by more than a factor of 50. 3305 3306 Collective on MPI_Comm 3307 3308 Input Parameters: 3309 + comm - MPI communicator, set to PETSC_COMM_SELF 3310 . bs - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row 3311 blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs() 3312 . m - number of rows 3313 . n - number of columns 3314 . nz - number of nonzero blocks per block row (same for all rows) 3315 - nnz - array containing the number of nonzero blocks in the various block rows 3316 (possibly different for each block row) or NULL 3317 3318 Output Parameter: 3319 . A - the matrix 3320 3321 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 3322 MatXXXXSetPreallocation() paradgm instead of this routine directly. 3323 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 3324 3325 Options Database Keys: 3326 . -mat_no_unroll - uses code that does not unroll the loops in the 3327 block calculations (much slower) 3328 . -mat_block_size - size of the blocks to use 3329 3330 Level: intermediate 3331 3332 Notes: 3333 The number of rows and columns must be divisible by blocksize. 3334 3335 If the nnz parameter is given then the nz parameter is ignored 3336 3337 A nonzero block is any block that as 1 or more nonzeros in it 3338 3339 The block AIJ format is fully compatible with standard Fortran 77 3340 storage. That is, the stored row and column indices can begin at 3341 either one (as in Fortran) or zero. See the users' manual for details. 3342 3343 Specify the preallocated storage with either nz or nnz (not both). 3344 Set nz=PETSC_DEFAULT and nnz=NULL for PETSc to control dynamic memory 3345 allocation. See Users-Manual: ch_mat for details. 3346 matrices. 3347 3348 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateBAIJ() 3349 @*/ 3350 PetscErrorCode MatCreateSeqBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A) 3351 { 3352 PetscErrorCode ierr; 3353 3354 PetscFunctionBegin; 3355 ierr = MatCreate(comm,A);CHKERRQ(ierr); 3356 ierr = MatSetSizes(*A,m,n,m,n);CHKERRQ(ierr); 3357 ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr); 3358 ierr = MatSeqBAIJSetPreallocation(*A,bs,nz,(PetscInt*)nnz);CHKERRQ(ierr); 3359 PetscFunctionReturn(0); 3360 } 3361 3362 /*@C 3363 MatSeqBAIJSetPreallocation - Sets the block size and expected nonzeros 3364 per row in the matrix. For good matrix assembly performance the 3365 user should preallocate the matrix storage by setting the parameter nz 3366 (or the array nnz). By setting these parameters accurately, performance 3367 during matrix assembly can be increased by more than a factor of 50. 3368 3369 Collective on MPI_Comm 3370 3371 Input Parameters: 3372 + B - the matrix 3373 . bs - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row 3374 blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs() 3375 . nz - number of block nonzeros per block row (same for all rows) 3376 - nnz - array containing the number of block nonzeros in the various block rows 3377 (possibly different for each block row) or NULL 3378 3379 Options Database Keys: 3380 . -mat_no_unroll - uses code that does not unroll the loops in the 3381 block calculations (much slower) 3382 . -mat_block_size - size of the blocks to use 3383 3384 Level: intermediate 3385 3386 Notes: 3387 If the nnz parameter is given then the nz parameter is ignored 3388 3389 You can call MatGetInfo() to get information on how effective the preallocation was; 3390 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 3391 You can also run with the option -info and look for messages with the string 3392 malloc in them to see if additional memory allocation was needed. 3393 3394 The block AIJ format is fully compatible with standard Fortran 77 3395 storage. That is, the stored row and column indices can begin at 3396 either one (as in Fortran) or zero. See the users' manual for details. 3397 3398 Specify the preallocated storage with either nz or nnz (not both). 3399 Set nz=PETSC_DEFAULT and nnz=NULL for PETSc to control dynamic memory 3400 allocation. See Users-Manual: ch_mat for details. 3401 3402 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateBAIJ(), MatGetInfo() 3403 @*/ 3404 PetscErrorCode MatSeqBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt nz,const PetscInt nnz[]) 3405 { 3406 PetscErrorCode ierr; 3407 3408 PetscFunctionBegin; 3409 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 3410 PetscValidType(B,1); 3411 PetscValidLogicalCollectiveInt(B,bs,2); 3412 ierr = PetscTryMethod(B,"MatSeqBAIJSetPreallocation_C",(Mat,PetscInt,PetscInt,const PetscInt[]),(B,bs,nz,nnz));CHKERRQ(ierr); 3413 PetscFunctionReturn(0); 3414 } 3415 3416 /*@C 3417 MatSeqBAIJSetPreallocationCSR - Allocates memory for a sparse sequential matrix in AIJ format 3418 (the default sequential PETSc format). 3419 3420 Collective on MPI_Comm 3421 3422 Input Parameters: 3423 + B - the matrix 3424 . i - the indices into j for the start of each local row (starts with zero) 3425 . j - the column indices for each local row (starts with zero) these must be sorted for each row 3426 - v - optional values in the matrix 3427 3428 Level: developer 3429 3430 Notes: 3431 The order of the entries in values is specified by the MatOption MAT_ROW_ORIENTED. For example, C programs 3432 may want to use the default MAT_ROW_ORIENTED=PETSC_TRUE and use an array v[nnz][bs][bs] where the second index is 3433 over rows within a block and the last index is over columns within a block row. Fortran programs will likely set 3434 MAT_ROW_ORIENTED=PETSC_FALSE and use a Fortran array v(bs,bs,nnz) in which the first index is over rows within a 3435 block column and the second index is over columns within a block. 3436 3437 .keywords: matrix, aij, compressed row, sparse 3438 3439 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatSeqBAIJSetPreallocation(), MATSEQBAIJ 3440 @*/ 3441 PetscErrorCode MatSeqBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 3442 { 3443 PetscErrorCode ierr; 3444 3445 PetscFunctionBegin; 3446 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 3447 PetscValidType(B,1); 3448 PetscValidLogicalCollectiveInt(B,bs,2); 3449 ierr = PetscTryMethod(B,"MatSeqBAIJSetPreallocationCSR_C",(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,bs,i,j,v));CHKERRQ(ierr); 3450 PetscFunctionReturn(0); 3451 } 3452 3453 3454 /*@ 3455 MatCreateSeqBAIJWithArrays - Creates an sequential BAIJ matrix using matrix elements provided by the user. 3456 3457 Collective on MPI_Comm 3458 3459 Input Parameters: 3460 + comm - must be an MPI communicator of size 1 3461 . bs - size of block 3462 . m - number of rows 3463 . n - number of columns 3464 . i - row indices 3465 . j - column indices 3466 - a - matrix values 3467 3468 Output Parameter: 3469 . mat - the matrix 3470 3471 Level: advanced 3472 3473 Notes: 3474 The i, j, and a arrays are not copied by this routine, the user must free these arrays 3475 once the matrix is destroyed 3476 3477 You cannot set new nonzero locations into this matrix, that will generate an error. 3478 3479 The i and j indices are 0 based 3480 3481 When block size is greater than 1 the matrix values must be stored using the BAIJ storage format (see the BAIJ code to determine this). 3482 3483 The order of the entries in values is the same as the block compressed sparse row storage format; that is, it is 3484 the same as a three dimensional array in Fortran values(bs,bs,nnz) that contains the first column of the first 3485 block, followed by the second column of the first block etc etc. That is, the blocks are contiguous in memory 3486 with column-major ordering within blocks. 3487 3488 .seealso: MatCreate(), MatCreateBAIJ(), MatCreateSeqBAIJ() 3489 3490 @*/ 3491 PetscErrorCode MatCreateSeqBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt i[],PetscInt j[],PetscScalar a[],Mat *mat) 3492 { 3493 PetscErrorCode ierr; 3494 PetscInt ii; 3495 Mat_SeqBAIJ *baij; 3496 3497 PetscFunctionBegin; 3498 if (bs != 1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"block size %D > 1 is not supported yet",bs); 3499 if (m > 0 && i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 3500 3501 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 3502 ierr = MatSetSizes(*mat,m,n,m,n);CHKERRQ(ierr); 3503 ierr = MatSetType(*mat,MATSEQBAIJ);CHKERRQ(ierr); 3504 ierr = MatSeqBAIJSetPreallocation(*mat,bs,MAT_SKIP_ALLOCATION,0);CHKERRQ(ierr); 3505 baij = (Mat_SeqBAIJ*)(*mat)->data; 3506 ierr = PetscMalloc2(m,&baij->imax,m,&baij->ilen);CHKERRQ(ierr); 3507 ierr = PetscLogObjectMemory((PetscObject)*mat,2*m*sizeof(PetscInt));CHKERRQ(ierr); 3508 3509 baij->i = i; 3510 baij->j = j; 3511 baij->a = a; 3512 3513 baij->singlemalloc = PETSC_FALSE; 3514 baij->nonew = -1; /*this indicates that inserting a new value in the matrix that generates a new nonzero is an error*/ 3515 baij->free_a = PETSC_FALSE; 3516 baij->free_ij = PETSC_FALSE; 3517 3518 for (ii=0; ii<m; ii++) { 3519 baij->ilen[ii] = baij->imax[ii] = i[ii+1] - i[ii]; 3520 #if defined(PETSC_USE_DEBUG) 3521 if (i[ii+1] - i[ii] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row length in i (row indices) row = %d length = %d",ii,i[ii+1] - i[ii]); 3522 #endif 3523 } 3524 #if defined(PETSC_USE_DEBUG) 3525 for (ii=0; ii<baij->i[m]; ii++) { 3526 if (j[ii] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column index at location = %d index = %d",ii,j[ii]); 3527 if (j[ii] > n - 1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column index to large at location = %d index = %d",ii,j[ii]); 3528 } 3529 #endif 3530 3531 ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3532 ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3533 PetscFunctionReturn(0); 3534 } 3535 3536 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_SeqBAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat) 3537 { 3538 PetscErrorCode ierr; 3539 PetscMPIInt size; 3540 3541 PetscFunctionBegin; 3542 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3543 if (size == 1 && scall == MAT_REUSE_MATRIX) { 3544 ierr = MatCopy(inmat,*outmat,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 3545 } else { 3546 ierr = MatCreateMPIMatConcatenateSeqMat_MPIBAIJ(comm,inmat,n,scall,outmat);CHKERRQ(ierr); 3547 } 3548 PetscFunctionReturn(0); 3549 } 3550