1 2 /* 3 Defines the basic matrix operations for the BAIJ (compressed row) 4 matrix storage format. 5 */ 6 #include <../src/mat/impls/baij/seq/baij.h> /*I "petscmat.h" I*/ 7 #include <petscblaslapack.h> 8 #include <petsc/private/kernels/blockinvert.h> 9 #include <petsc/private/kernels/blockmatmult.h> 10 11 #undef __FUNCT__ 12 #define __FUNCT__ "MatInvertBlockDiagonal_SeqBAIJ" 13 PetscErrorCode MatInvertBlockDiagonal_SeqBAIJ(Mat A,const PetscScalar **values) 14 { 15 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*) A->data; 16 PetscErrorCode ierr; 17 PetscInt *diag_offset,i,bs = A->rmap->bs,mbs = a->mbs,ipvt[5],bs2 = bs*bs,*v_pivots; 18 MatScalar *v = a->a,*odiag,*diag,*mdiag,work[25],*v_work; 19 PetscReal shift = 0.0; 20 PetscBool zeropivotdetected; 21 22 PetscFunctionBegin; 23 if (a->idiagvalid) { 24 if (values) *values = a->idiag; 25 PetscFunctionReturn(0); 26 } 27 ierr = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr); 28 diag_offset = a->diag; 29 if (!a->idiag) { 30 ierr = PetscMalloc1(2*bs2*mbs,&a->idiag);CHKERRQ(ierr); 31 ierr = PetscLogObjectMemory((PetscObject)A,2*bs2*mbs*sizeof(PetscScalar));CHKERRQ(ierr); 32 } 33 diag = a->idiag; 34 mdiag = a->idiag+bs2*mbs; 35 if (values) *values = a->idiag; 36 /* factor and invert each block */ 37 switch (bs) { 38 case 1: 39 for (i=0; i<mbs; i++) { 40 odiag = v + 1*diag_offset[i]; 41 diag[0] = odiag[0]; 42 mdiag[0] = odiag[0]; 43 diag[0] = (PetscScalar)1.0 / (diag[0] + shift); 44 diag += 1; 45 mdiag += 1; 46 } 47 break; 48 case 2: 49 for (i=0; i<mbs; i++) { 50 odiag = v + 4*diag_offset[i]; 51 diag[0] = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3]; 52 mdiag[0] = odiag[0]; mdiag[1] = odiag[1]; mdiag[2] = odiag[2]; mdiag[3] = odiag[3]; 53 ierr = PetscKernel_A_gets_inverse_A_2(diag,shift,!A->erroriffailure,&zeropivotdetected);CHKERRQ(ierr); 54 if (zeropivotdetected) break; 55 diag += 4; 56 mdiag += 4; 57 } 58 break; 59 case 3: 60 for (i=0; i<mbs; i++) { 61 odiag = v + 9*diag_offset[i]; 62 diag[0] = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3]; 63 diag[4] = odiag[4]; diag[5] = odiag[5]; diag[6] = odiag[6]; diag[7] = odiag[7]; 64 diag[8] = odiag[8]; 65 mdiag[0] = odiag[0]; mdiag[1] = odiag[1]; mdiag[2] = odiag[2]; mdiag[3] = odiag[3]; 66 mdiag[4] = odiag[4]; mdiag[5] = odiag[5]; mdiag[6] = odiag[6]; mdiag[7] = odiag[7]; 67 mdiag[8] = odiag[8]; 68 ierr = PetscKernel_A_gets_inverse_A_3(diag,shift,!A->erroriffailure,&zeropivotdetected);CHKERRQ(ierr); 69 if (zeropivotdetected) break; 70 diag += 9; 71 mdiag += 9; 72 } 73 break; 74 case 4: 75 for (i=0; i<mbs; i++) { 76 odiag = v + 16*diag_offset[i]; 77 ierr = PetscMemcpy(diag,odiag,16*sizeof(PetscScalar));CHKERRQ(ierr); 78 ierr = PetscMemcpy(mdiag,odiag,16*sizeof(PetscScalar));CHKERRQ(ierr); 79 ierr = PetscKernel_A_gets_inverse_A_4(diag,shift,!A->erroriffailure,&zeropivotdetected);CHKERRQ(ierr); 80 if (zeropivotdetected) break; 81 diag += 16; 82 mdiag += 16; 83 } 84 break; 85 case 5: 86 for (i=0; i<mbs; i++) { 87 odiag = v + 25*diag_offset[i]; 88 ierr = PetscMemcpy(diag,odiag,25*sizeof(PetscScalar));CHKERRQ(ierr); 89 ierr = PetscMemcpy(mdiag,odiag,25*sizeof(PetscScalar));CHKERRQ(ierr); 90 ierr = PetscKernel_A_gets_inverse_A_5(diag,ipvt,work,shift,!A->erroriffailure,&zeropivotdetected);CHKERRQ(ierr); 91 if (zeropivotdetected) break; 92 diag += 25; 93 mdiag += 25; 94 } 95 break; 96 case 6: 97 for (i=0; i<mbs; i++) { 98 odiag = v + 36*diag_offset[i]; 99 ierr = PetscMemcpy(diag,odiag,36*sizeof(PetscScalar));CHKERRQ(ierr); 100 ierr = PetscMemcpy(mdiag,odiag,36*sizeof(PetscScalar));CHKERRQ(ierr); 101 ierr = PetscKernel_A_gets_inverse_A_6(diag,shift,!A->erroriffailure,&zeropivotdetected);CHKERRQ(ierr); 102 if (zeropivotdetected) break; 103 diag += 36; 104 mdiag += 36; 105 } 106 break; 107 case 7: 108 for (i=0; i<mbs; i++) { 109 odiag = v + 49*diag_offset[i]; 110 ierr = PetscMemcpy(diag,odiag,49*sizeof(PetscScalar));CHKERRQ(ierr); 111 ierr = PetscMemcpy(mdiag,odiag,49*sizeof(PetscScalar));CHKERRQ(ierr); 112 ierr = PetscKernel_A_gets_inverse_A_7(diag,shift,!A->erroriffailure,&zeropivotdetected);CHKERRQ(ierr); 113 if (zeropivotdetected) break; 114 diag += 49; 115 mdiag += 49; 116 } 117 break; 118 default: 119 ierr = PetscMalloc2(bs,&v_work,bs,&v_pivots);CHKERRQ(ierr); 120 for (i=0; i<mbs; i++) { 121 odiag = v + bs2*diag_offset[i]; 122 ierr = PetscMemcpy(diag,odiag,bs2*sizeof(PetscScalar));CHKERRQ(ierr); 123 ierr = PetscMemcpy(mdiag,odiag,bs2*sizeof(PetscScalar));CHKERRQ(ierr); 124 ierr = PetscKernel_A_gets_inverse_A(bs,diag,v_pivots,v_work);CHKERRQ(ierr); 125 diag += bs2; 126 mdiag += bs2; 127 } 128 ierr = PetscFree2(v_work,v_pivots);CHKERRQ(ierr); 129 } 130 a->idiagvalid = PETSC_TRUE; 131 if (zeropivotdetected) { 132 A->errortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 133 } 134 PetscFunctionReturn(0); 135 } 136 137 #undef __FUNCT__ 138 #define __FUNCT__ "MatSOR_SeqBAIJ" 139 PetscErrorCode MatSOR_SeqBAIJ(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 140 { 141 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 142 PetscScalar *x,*work,*w,*workt,*t; 143 const MatScalar *v,*aa = a->a, *idiag; 144 const PetscScalar *b,*xb; 145 PetscScalar s[7], xw[7]={0}; /* avoid some compilers thinking xw is uninitialized */ 146 PetscErrorCode ierr; 147 PetscInt m = a->mbs,i,i2,nz,bs = A->rmap->bs,bs2 = bs*bs,k,j,idx,it; 148 const PetscInt *diag,*ai = a->i,*aj = a->j,*vi; 149 150 PetscFunctionBegin; 151 if (fshift == -1.0) fshift = 0.0; /* negative fshift indicates do not error on zero diagonal; this code never errors on zero diagonal */ 152 its = its*lits; 153 if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat"); 154 if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 155 if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 156 if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 157 if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts"); 158 159 if (!a->idiagvalid) {ierr = MatInvertBlockDiagonal(A,NULL);CHKERRQ(ierr);} 160 161 if (!m) PetscFunctionReturn(0); 162 diag = a->diag; 163 idiag = a->idiag; 164 k = PetscMax(A->rmap->n,A->cmap->n); 165 if (!a->mult_work) { 166 ierr = PetscMalloc1(k+1,&a->mult_work);CHKERRQ(ierr); 167 } 168 if (!a->sor_workt) { 169 ierr = PetscMalloc1(k,&a->sor_workt);CHKERRQ(ierr); 170 } 171 if (!a->sor_work) { 172 ierr = PetscMalloc1(bs,&a->sor_work);CHKERRQ(ierr); 173 } 174 work = a->mult_work; 175 t = a->sor_workt; 176 w = a->sor_work; 177 178 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 179 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 180 181 if (flag & SOR_ZERO_INITIAL_GUESS) { 182 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) { 183 switch (bs) { 184 case 1: 185 PetscKernel_v_gets_A_times_w_1(x,idiag,b); 186 t[0] = b[0]; 187 i2 = 1; 188 idiag += 1; 189 for (i=1; i<m; i++) { 190 v = aa + ai[i]; 191 vi = aj + ai[i]; 192 nz = diag[i] - ai[i]; 193 s[0] = b[i2]; 194 for (j=0; j<nz; j++) { 195 xw[0] = x[vi[j]]; 196 PetscKernel_v_gets_v_minus_A_times_w_1(s,(v+j),xw); 197 } 198 t[i2] = s[0]; 199 PetscKernel_v_gets_A_times_w_1(xw,idiag,s); 200 x[i2] = xw[0]; 201 idiag += 1; 202 i2 += 1; 203 } 204 break; 205 case 2: 206 PetscKernel_v_gets_A_times_w_2(x,idiag,b); 207 t[0] = b[0]; t[1] = b[1]; 208 i2 = 2; 209 idiag += 4; 210 for (i=1; i<m; i++) { 211 v = aa + 4*ai[i]; 212 vi = aj + ai[i]; 213 nz = diag[i] - ai[i]; 214 s[0] = b[i2]; s[1] = b[i2+1]; 215 for (j=0; j<nz; j++) { 216 idx = 2*vi[j]; 217 it = 4*j; 218 xw[0] = x[idx]; xw[1] = x[1+idx]; 219 PetscKernel_v_gets_v_minus_A_times_w_2(s,(v+it),xw); 220 } 221 t[i2] = s[0]; t[i2+1] = s[1]; 222 PetscKernel_v_gets_A_times_w_2(xw,idiag,s); 223 x[i2] = xw[0]; x[i2+1] = xw[1]; 224 idiag += 4; 225 i2 += 2; 226 } 227 break; 228 case 3: 229 PetscKernel_v_gets_A_times_w_3(x,idiag,b); 230 t[0] = b[0]; t[1] = b[1]; t[2] = b[2]; 231 i2 = 3; 232 idiag += 9; 233 for (i=1; i<m; i++) { 234 v = aa + 9*ai[i]; 235 vi = aj + ai[i]; 236 nz = diag[i] - ai[i]; 237 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; 238 while (nz--) { 239 idx = 3*(*vi++); 240 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 241 PetscKernel_v_gets_v_minus_A_times_w_3(s,v,xw); 242 v += 9; 243 } 244 t[i2] = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2]; 245 PetscKernel_v_gets_A_times_w_3(xw,idiag,s); 246 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; 247 idiag += 9; 248 i2 += 3; 249 } 250 break; 251 case 4: 252 PetscKernel_v_gets_A_times_w_4(x,idiag,b); 253 t[0] = b[0]; t[1] = b[1]; t[2] = b[2]; t[3] = b[3]; 254 i2 = 4; 255 idiag += 16; 256 for (i=1; i<m; i++) { 257 v = aa + 16*ai[i]; 258 vi = aj + ai[i]; 259 nz = diag[i] - ai[i]; 260 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; 261 while (nz--) { 262 idx = 4*(*vi++); 263 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; 264 PetscKernel_v_gets_v_minus_A_times_w_4(s,v,xw); 265 v += 16; 266 } 267 t[i2] = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2]; t[i2 + 3] = s[3]; 268 PetscKernel_v_gets_A_times_w_4(xw,idiag,s); 269 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; 270 idiag += 16; 271 i2 += 4; 272 } 273 break; 274 case 5: 275 PetscKernel_v_gets_A_times_w_5(x,idiag,b); 276 t[0] = b[0]; t[1] = b[1]; t[2] = b[2]; t[3] = b[3]; t[4] = b[4]; 277 i2 = 5; 278 idiag += 25; 279 for (i=1; i<m; i++) { 280 v = aa + 25*ai[i]; 281 vi = aj + ai[i]; 282 nz = diag[i] - ai[i]; 283 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4]; 284 while (nz--) { 285 idx = 5*(*vi++); 286 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; xw[4] = x[4+idx]; 287 PetscKernel_v_gets_v_minus_A_times_w_5(s,v,xw); 288 v += 25; 289 } 290 t[i2] = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2]; t[i2+3] = s[3]; t[i2+4] = s[4]; 291 PetscKernel_v_gets_A_times_w_5(xw,idiag,s); 292 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4]; 293 idiag += 25; 294 i2 += 5; 295 } 296 break; 297 case 6: 298 PetscKernel_v_gets_A_times_w_6(x,idiag,b); 299 t[0] = b[0]; t[1] = b[1]; t[2] = b[2]; t[3] = b[3]; t[4] = b[4]; t[5] = b[5]; 300 i2 = 6; 301 idiag += 36; 302 for (i=1; i<m; i++) { 303 v = aa + 36*ai[i]; 304 vi = aj + ai[i]; 305 nz = diag[i] - ai[i]; 306 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5]; 307 while (nz--) { 308 idx = 6*(*vi++); 309 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 310 xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; 311 PetscKernel_v_gets_v_minus_A_times_w_6(s,v,xw); 312 v += 36; 313 } 314 t[i2] = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2]; 315 t[i2+3] = s[3]; t[i2+4] = s[4]; t[i2+5] = s[5]; 316 PetscKernel_v_gets_A_times_w_6(xw,idiag,s); 317 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5]; 318 idiag += 36; 319 i2 += 6; 320 } 321 break; 322 case 7: 323 PetscKernel_v_gets_A_times_w_7(x,idiag,b); 324 t[0] = b[0]; t[1] = b[1]; t[2] = b[2]; 325 t[3] = b[3]; t[4] = b[4]; t[5] = b[5]; t[6] = b[6]; 326 i2 = 7; 327 idiag += 49; 328 for (i=1; i<m; i++) { 329 v = aa + 49*ai[i]; 330 vi = aj + ai[i]; 331 nz = diag[i] - ai[i]; 332 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; 333 s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5]; s[6] = b[i2+6]; 334 while (nz--) { 335 idx = 7*(*vi++); 336 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 337 xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; xw[6] = x[6+idx]; 338 PetscKernel_v_gets_v_minus_A_times_w_7(s,v,xw); 339 v += 49; 340 } 341 t[i2] = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2]; 342 t[i2+3] = s[3]; t[i2+4] = s[4]; t[i2+5] = s[5]; t[i2+6] = s[6]; 343 PetscKernel_v_gets_A_times_w_7(xw,idiag,s); 344 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; 345 x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5]; x[i2+6] = xw[6]; 346 idiag += 49; 347 i2 += 7; 348 } 349 break; 350 default: 351 PetscKernel_w_gets_Ar_times_v(bs,bs,b,idiag,x); 352 ierr = PetscMemcpy(t,b,bs*sizeof(PetscScalar));CHKERRQ(ierr); 353 i2 = bs; 354 idiag += bs2; 355 for (i=1; i<m; i++) { 356 v = aa + bs2*ai[i]; 357 vi = aj + ai[i]; 358 nz = diag[i] - ai[i]; 359 360 ierr = PetscMemcpy(w,b+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr); 361 /* copy all rows of x that are needed into contiguous space */ 362 workt = work; 363 for (j=0; j<nz; j++) { 364 ierr = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr); 365 workt += bs; 366 } 367 PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work); 368 ierr = PetscMemcpy(t+i2,w,bs*sizeof(PetscScalar));CHKERRQ(ierr); 369 PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2); 370 371 idiag += bs2; 372 i2 += bs; 373 } 374 break; 375 } 376 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 377 ierr = PetscLogFlops(1.0*bs2*a->nz);CHKERRQ(ierr); 378 xb = t; 379 } 380 else xb = b; 381 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 382 idiag = a->idiag+bs2*(a->mbs-1); 383 i2 = bs * (m-1); 384 switch (bs) { 385 case 1: 386 s[0] = xb[i2]; 387 PetscKernel_v_gets_A_times_w_1(xw,idiag,s); 388 x[i2] = xw[0]; 389 i2 -= 1; 390 for (i=m-2; i>=0; i--) { 391 v = aa + (diag[i]+1); 392 vi = aj + diag[i] + 1; 393 nz = ai[i+1] - diag[i] - 1; 394 s[0] = xb[i2]; 395 for (j=0; j<nz; j++) { 396 xw[0] = x[vi[j]]; 397 PetscKernel_v_gets_v_minus_A_times_w_1(s,(v+j),xw); 398 } 399 PetscKernel_v_gets_A_times_w_1(xw,idiag,s); 400 x[i2] = xw[0]; 401 idiag -= 1; 402 i2 -= 1; 403 } 404 break; 405 case 2: 406 s[0] = xb[i2]; s[1] = xb[i2+1]; 407 PetscKernel_v_gets_A_times_w_2(xw,idiag,s); 408 x[i2] = xw[0]; x[i2+1] = xw[1]; 409 i2 -= 2; 410 idiag -= 4; 411 for (i=m-2; i>=0; i--) { 412 v = aa + 4*(diag[i] + 1); 413 vi = aj + diag[i] + 1; 414 nz = ai[i+1] - diag[i] - 1; 415 s[0] = xb[i2]; s[1] = xb[i2+1]; 416 for (j=0; j<nz; j++) { 417 idx = 2*vi[j]; 418 it = 4*j; 419 xw[0] = x[idx]; xw[1] = x[1+idx]; 420 PetscKernel_v_gets_v_minus_A_times_w_2(s,(v+it),xw); 421 } 422 PetscKernel_v_gets_A_times_w_2(xw,idiag,s); 423 x[i2] = xw[0]; x[i2+1] = xw[1]; 424 idiag -= 4; 425 i2 -= 2; 426 } 427 break; 428 case 3: 429 s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; 430 PetscKernel_v_gets_A_times_w_3(xw,idiag,s); 431 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; 432 i2 -= 3; 433 idiag -= 9; 434 for (i=m-2; i>=0; i--) { 435 v = aa + 9*(diag[i]+1); 436 vi = aj + diag[i] + 1; 437 nz = ai[i+1] - diag[i] - 1; 438 s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; 439 while (nz--) { 440 idx = 3*(*vi++); 441 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 442 PetscKernel_v_gets_v_minus_A_times_w_3(s,v,xw); 443 v += 9; 444 } 445 PetscKernel_v_gets_A_times_w_3(xw,idiag,s); 446 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; 447 idiag -= 9; 448 i2 -= 3; 449 } 450 break; 451 case 4: 452 s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; 453 PetscKernel_v_gets_A_times_w_4(xw,idiag,s); 454 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; 455 i2 -= 4; 456 idiag -= 16; 457 for (i=m-2; i>=0; i--) { 458 v = aa + 16*(diag[i]+1); 459 vi = aj + diag[i] + 1; 460 nz = ai[i+1] - diag[i] - 1; 461 s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; 462 while (nz--) { 463 idx = 4*(*vi++); 464 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; 465 PetscKernel_v_gets_v_minus_A_times_w_4(s,v,xw); 466 v += 16; 467 } 468 PetscKernel_v_gets_A_times_w_4(xw,idiag,s); 469 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; 470 idiag -= 16; 471 i2 -= 4; 472 } 473 break; 474 case 5: 475 s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; s[4] = xb[i2+4]; 476 PetscKernel_v_gets_A_times_w_5(xw,idiag,s); 477 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4]; 478 i2 -= 5; 479 idiag -= 25; 480 for (i=m-2; i>=0; i--) { 481 v = aa + 25*(diag[i]+1); 482 vi = aj + diag[i] + 1; 483 nz = ai[i+1] - diag[i] - 1; 484 s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; s[4] = xb[i2+4]; 485 while (nz--) { 486 idx = 5*(*vi++); 487 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; xw[4] = x[4+idx]; 488 PetscKernel_v_gets_v_minus_A_times_w_5(s,v,xw); 489 v += 25; 490 } 491 PetscKernel_v_gets_A_times_w_5(xw,idiag,s); 492 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4]; 493 idiag -= 25; 494 i2 -= 5; 495 } 496 break; 497 case 6: 498 s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; s[4] = xb[i2+4]; s[5] = xb[i2+5]; 499 PetscKernel_v_gets_A_times_w_6(xw,idiag,s); 500 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5]; 501 i2 -= 6; 502 idiag -= 36; 503 for (i=m-2; i>=0; i--) { 504 v = aa + 36*(diag[i]+1); 505 vi = aj + diag[i] + 1; 506 nz = ai[i+1] - diag[i] - 1; 507 s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; s[4] = xb[i2+4]; s[5] = xb[i2+5]; 508 while (nz--) { 509 idx = 6*(*vi++); 510 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 511 xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; 512 PetscKernel_v_gets_v_minus_A_times_w_6(s,v,xw); 513 v += 36; 514 } 515 PetscKernel_v_gets_A_times_w_6(xw,idiag,s); 516 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5]; 517 idiag -= 36; 518 i2 -= 6; 519 } 520 break; 521 case 7: 522 s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; 523 s[3] = xb[i2+3]; s[4] = xb[i2+4]; s[5] = xb[i2+5]; s[6] = xb[i2+6]; 524 PetscKernel_v_gets_A_times_w_7(x,idiag,b); 525 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; 526 x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5]; x[i2+6] = xw[6]; 527 i2 -= 7; 528 idiag -= 49; 529 for (i=m-2; i>=0; i--) { 530 v = aa + 49*(diag[i]+1); 531 vi = aj + diag[i] + 1; 532 nz = ai[i+1] - diag[i] - 1; 533 s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; 534 s[3] = xb[i2+3]; s[4] = xb[i2+4]; s[5] = xb[i2+5]; s[6] = xb[i2+6]; 535 while (nz--) { 536 idx = 7*(*vi++); 537 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 538 xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; xw[6] = x[6+idx]; 539 PetscKernel_v_gets_v_minus_A_times_w_7(s,v,xw); 540 v += 49; 541 } 542 PetscKernel_v_gets_A_times_w_7(xw,idiag,s); 543 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; 544 x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5]; x[i2+6] = xw[6]; 545 idiag -= 49; 546 i2 -= 7; 547 } 548 break; 549 default: 550 ierr = PetscMemcpy(w,xb+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr); 551 PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2); 552 i2 -= bs; 553 idiag -= bs2; 554 for (i=m-2; i>=0; i--) { 555 v = aa + bs2*(diag[i]+1); 556 vi = aj + diag[i] + 1; 557 nz = ai[i+1] - diag[i] - 1; 558 559 ierr = PetscMemcpy(w,xb+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr); 560 /* copy all rows of x that are needed into contiguous space */ 561 workt = work; 562 for (j=0; j<nz; j++) { 563 ierr = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr); 564 workt += bs; 565 } 566 PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work); 567 PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2); 568 569 idiag -= bs2; 570 i2 -= bs; 571 } 572 break; 573 } 574 ierr = PetscLogFlops(1.0*bs2*(a->nz));CHKERRQ(ierr); 575 } 576 its--; 577 } 578 while (its--) { 579 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) { 580 idiag = a->idiag; 581 i2 = 0; 582 switch (bs) { 583 case 1: 584 for (i=0; i<m; i++) { 585 v = aa + ai[i]; 586 vi = aj + ai[i]; 587 nz = ai[i+1] - ai[i]; 588 s[0] = b[i2]; 589 for (j=0; j<nz; j++) { 590 xw[0] = x[vi[j]]; 591 PetscKernel_v_gets_v_minus_A_times_w_1(s,(v+j),xw); 592 } 593 PetscKernel_v_gets_A_times_w_1(xw,idiag,s); 594 x[i2] += xw[0]; 595 idiag += 1; 596 i2 += 1; 597 } 598 break; 599 case 2: 600 for (i=0; i<m; i++) { 601 v = aa + 4*ai[i]; 602 vi = aj + ai[i]; 603 nz = ai[i+1] - ai[i]; 604 s[0] = b[i2]; s[1] = b[i2+1]; 605 for (j=0; j<nz; j++) { 606 idx = 2*vi[j]; 607 it = 4*j; 608 xw[0] = x[idx]; xw[1] = x[1+idx]; 609 PetscKernel_v_gets_v_minus_A_times_w_2(s,(v+it),xw); 610 } 611 PetscKernel_v_gets_A_times_w_2(xw,idiag,s); 612 x[i2] += xw[0]; x[i2+1] += xw[1]; 613 idiag += 4; 614 i2 += 2; 615 } 616 break; 617 case 3: 618 for (i=0; i<m; i++) { 619 v = aa + 9*ai[i]; 620 vi = aj + ai[i]; 621 nz = ai[i+1] - ai[i]; 622 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; 623 while (nz--) { 624 idx = 3*(*vi++); 625 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 626 PetscKernel_v_gets_v_minus_A_times_w_3(s,v,xw); 627 v += 9; 628 } 629 PetscKernel_v_gets_A_times_w_3(xw,idiag,s); 630 x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; 631 idiag += 9; 632 i2 += 3; 633 } 634 break; 635 case 4: 636 for (i=0; i<m; i++) { 637 v = aa + 16*ai[i]; 638 vi = aj + ai[i]; 639 nz = ai[i+1] - ai[i]; 640 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; 641 while (nz--) { 642 idx = 4*(*vi++); 643 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; 644 PetscKernel_v_gets_v_minus_A_times_w_4(s,v,xw); 645 v += 16; 646 } 647 PetscKernel_v_gets_A_times_w_4(xw,idiag,s); 648 x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; x[i2+3] += xw[3]; 649 idiag += 16; 650 i2 += 4; 651 } 652 break; 653 case 5: 654 for (i=0; i<m; i++) { 655 v = aa + 25*ai[i]; 656 vi = aj + ai[i]; 657 nz = ai[i+1] - ai[i]; 658 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4]; 659 while (nz--) { 660 idx = 5*(*vi++); 661 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; xw[4] = x[4+idx]; 662 PetscKernel_v_gets_v_minus_A_times_w_5(s,v,xw); 663 v += 25; 664 } 665 PetscKernel_v_gets_A_times_w_5(xw,idiag,s); 666 x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; x[i2+3] += xw[3]; x[i2+4] += xw[4]; 667 idiag += 25; 668 i2 += 5; 669 } 670 break; 671 case 6: 672 for (i=0; i<m; i++) { 673 v = aa + 36*ai[i]; 674 vi = aj + ai[i]; 675 nz = ai[i+1] - ai[i]; 676 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5]; 677 while (nz--) { 678 idx = 6*(*vi++); 679 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 680 xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; 681 PetscKernel_v_gets_v_minus_A_times_w_6(s,v,xw); 682 v += 36; 683 } 684 PetscKernel_v_gets_A_times_w_6(xw,idiag,s); 685 x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; 686 x[i2+3] += xw[3]; x[i2+4] += xw[4]; x[i2+5] += xw[5]; 687 idiag += 36; 688 i2 += 6; 689 } 690 break; 691 case 7: 692 for (i=0; i<m; i++) { 693 v = aa + 49*ai[i]; 694 vi = aj + ai[i]; 695 nz = ai[i+1] - ai[i]; 696 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; 697 s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5]; s[6] = b[i2+6]; 698 while (nz--) { 699 idx = 7*(*vi++); 700 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 701 xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; xw[6] = x[6+idx]; 702 PetscKernel_v_gets_v_minus_A_times_w_7(s,v,xw); 703 v += 49; 704 } 705 PetscKernel_v_gets_A_times_w_7(xw,idiag,s); 706 x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; 707 x[i2+3] += xw[3]; x[i2+4] += xw[4]; x[i2+5] += xw[5]; x[i2+6] += xw[6]; 708 idiag += 49; 709 i2 += 7; 710 } 711 break; 712 default: 713 for (i=0; i<m; i++) { 714 v = aa + bs2*ai[i]; 715 vi = aj + ai[i]; 716 nz = ai[i+1] - ai[i]; 717 718 ierr = PetscMemcpy(w,b+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr); 719 /* copy all rows of x that are needed into contiguous space */ 720 workt = work; 721 for (j=0; j<nz; j++) { 722 ierr = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr); 723 workt += bs; 724 } 725 PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work); 726 PetscKernel_w_gets_w_plus_Ar_times_v(bs,bs,w,idiag,x+i2); 727 728 idiag += bs2; 729 i2 += bs; 730 } 731 break; 732 } 733 ierr = PetscLogFlops(2.0*bs2*a->nz);CHKERRQ(ierr); 734 } 735 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 736 idiag = a->idiag+bs2*(a->mbs-1); 737 i2 = bs * (m-1); 738 switch (bs) { 739 case 1: 740 for (i=m-1; i>=0; i--) { 741 v = aa + ai[i]; 742 vi = aj + ai[i]; 743 nz = ai[i+1] - ai[i]; 744 s[0] = b[i2]; 745 for (j=0; j<nz; j++) { 746 xw[0] = x[vi[j]]; 747 PetscKernel_v_gets_v_minus_A_times_w_1(s,(v+j),xw); 748 } 749 PetscKernel_v_gets_A_times_w_1(xw,idiag,s); 750 x[i2] += xw[0]; 751 idiag -= 1; 752 i2 -= 1; 753 } 754 break; 755 case 2: 756 for (i=m-1; i>=0; i--) { 757 v = aa + 4*ai[i]; 758 vi = aj + ai[i]; 759 nz = ai[i+1] - ai[i]; 760 s[0] = b[i2]; s[1] = b[i2+1]; 761 for (j=0; j<nz; j++) { 762 idx = 2*vi[j]; 763 it = 4*j; 764 xw[0] = x[idx]; xw[1] = x[1+idx]; 765 PetscKernel_v_gets_v_minus_A_times_w_2(s,(v+it),xw); 766 } 767 PetscKernel_v_gets_A_times_w_2(xw,idiag,s); 768 x[i2] += xw[0]; x[i2+1] += xw[1]; 769 idiag -= 4; 770 i2 -= 2; 771 } 772 break; 773 case 3: 774 for (i=m-1; i>=0; i--) { 775 v = aa + 9*ai[i]; 776 vi = aj + ai[i]; 777 nz = ai[i+1] - ai[i]; 778 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; 779 while (nz--) { 780 idx = 3*(*vi++); 781 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 782 PetscKernel_v_gets_v_minus_A_times_w_3(s,v,xw); 783 v += 9; 784 } 785 PetscKernel_v_gets_A_times_w_3(xw,idiag,s); 786 x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; 787 idiag -= 9; 788 i2 -= 3; 789 } 790 break; 791 case 4: 792 for (i=m-1; i>=0; i--) { 793 v = aa + 16*ai[i]; 794 vi = aj + ai[i]; 795 nz = ai[i+1] - ai[i]; 796 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; 797 while (nz--) { 798 idx = 4*(*vi++); 799 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; 800 PetscKernel_v_gets_v_minus_A_times_w_4(s,v,xw); 801 v += 16; 802 } 803 PetscKernel_v_gets_A_times_w_4(xw,idiag,s); 804 x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; x[i2+3] += xw[3]; 805 idiag -= 16; 806 i2 -= 4; 807 } 808 break; 809 case 5: 810 for (i=m-1; i>=0; i--) { 811 v = aa + 25*ai[i]; 812 vi = aj + ai[i]; 813 nz = ai[i+1] - ai[i]; 814 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4]; 815 while (nz--) { 816 idx = 5*(*vi++); 817 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; xw[4] = x[4+idx]; 818 PetscKernel_v_gets_v_minus_A_times_w_5(s,v,xw); 819 v += 25; 820 } 821 PetscKernel_v_gets_A_times_w_5(xw,idiag,s); 822 x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; x[i2+3] += xw[3]; x[i2+4] += xw[4]; 823 idiag -= 25; 824 i2 -= 5; 825 } 826 break; 827 case 6: 828 for (i=m-1; i>=0; i--) { 829 v = aa + 36*ai[i]; 830 vi = aj + ai[i]; 831 nz = ai[i+1] - ai[i]; 832 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5]; 833 while (nz--) { 834 idx = 6*(*vi++); 835 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 836 xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; 837 PetscKernel_v_gets_v_minus_A_times_w_6(s,v,xw); 838 v += 36; 839 } 840 PetscKernel_v_gets_A_times_w_6(xw,idiag,s); 841 x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; 842 x[i2+3] += xw[3]; x[i2+4] += xw[4]; x[i2+5] += xw[5]; 843 idiag -= 36; 844 i2 -= 6; 845 } 846 break; 847 case 7: 848 for (i=m-1; i>=0; i--) { 849 v = aa + 49*ai[i]; 850 vi = aj + ai[i]; 851 nz = ai[i+1] - ai[i]; 852 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; 853 s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5]; s[6] = b[i2+6]; 854 while (nz--) { 855 idx = 7*(*vi++); 856 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 857 xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; xw[6] = x[6+idx]; 858 PetscKernel_v_gets_v_minus_A_times_w_7(s,v,xw); 859 v += 49; 860 } 861 PetscKernel_v_gets_A_times_w_7(xw,idiag,s); 862 x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; 863 x[i2+3] += xw[3]; x[i2+4] += xw[4]; x[i2+5] += xw[5]; x[i2+6] += xw[6]; 864 idiag -= 49; 865 i2 -= 7; 866 } 867 break; 868 default: 869 for (i=m-1; i>=0; i--) { 870 v = aa + bs2*ai[i]; 871 vi = aj + ai[i]; 872 nz = ai[i+1] - ai[i]; 873 874 ierr = PetscMemcpy(w,b+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr); 875 /* copy all rows of x that are needed into contiguous space */ 876 workt = work; 877 for (j=0; j<nz; j++) { 878 ierr = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr); 879 workt += bs; 880 } 881 PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work); 882 PetscKernel_w_gets_w_plus_Ar_times_v(bs,bs,w,idiag,x+i2); 883 884 idiag -= bs2; 885 i2 -= bs; 886 } 887 break; 888 } 889 ierr = PetscLogFlops(2.0*bs2*(a->nz));CHKERRQ(ierr); 890 } 891 } 892 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 893 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 894 PetscFunctionReturn(0); 895 } 896 897 898 /* 899 Special version for direct calls from Fortran (Used in PETSc-fun3d) 900 */ 901 #if defined(PETSC_HAVE_FORTRAN_CAPS) 902 #define matsetvaluesblocked4_ MATSETVALUESBLOCKED4 903 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 904 #define matsetvaluesblocked4_ matsetvaluesblocked4 905 #endif 906 907 #undef __FUNCT__ 908 #define __FUNCT__ "matsetvaluesblocked4_" 909 PETSC_EXTERN void matsetvaluesblocked4_(Mat *AA,PetscInt *mm,const PetscInt im[],PetscInt *nn,const PetscInt in[],const PetscScalar v[]) 910 { 911 Mat A = *AA; 912 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 913 PetscInt *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,N,m = *mm,n = *nn; 914 PetscInt *ai =a->i,*ailen=a->ilen; 915 PetscInt *aj =a->j,stepval,lastcol = -1; 916 const PetscScalar *value = v; 917 MatScalar *ap,*aa = a->a,*bap; 918 919 PetscFunctionBegin; 920 if (A->rmap->bs != 4) SETERRABORT(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Can only be called with a block size of 4"); 921 stepval = (n-1)*4; 922 for (k=0; k<m; k++) { /* loop over added rows */ 923 row = im[k]; 924 rp = aj + ai[row]; 925 ap = aa + 16*ai[row]; 926 nrow = ailen[row]; 927 low = 0; 928 high = nrow; 929 for (l=0; l<n; l++) { /* loop over added columns */ 930 col = in[l]; 931 if (col <= lastcol) low = 0; 932 else high = nrow; 933 lastcol = col; 934 value = v + k*(stepval+4 + l)*4; 935 while (high-low > 7) { 936 t = (low+high)/2; 937 if (rp[t] > col) high = t; 938 else low = t; 939 } 940 for (i=low; i<high; i++) { 941 if (rp[i] > col) break; 942 if (rp[i] == col) { 943 bap = ap + 16*i; 944 for (ii=0; ii<4; ii++,value+=stepval) { 945 for (jj=ii; jj<16; jj+=4) { 946 bap[jj] += *value++; 947 } 948 } 949 goto noinsert2; 950 } 951 } 952 N = nrow++ - 1; 953 high++; /* added new column index thus must search to one higher than before */ 954 /* shift up all the later entries in this row */ 955 for (ii=N; ii>=i; ii--) { 956 rp[ii+1] = rp[ii]; 957 PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar)); 958 } 959 if (N >= i) { 960 PetscMemzero(ap+16*i,16*sizeof(MatScalar)); 961 } 962 rp[i] = col; 963 bap = ap + 16*i; 964 for (ii=0; ii<4; ii++,value+=stepval) { 965 for (jj=ii; jj<16; jj+=4) { 966 bap[jj] = *value++; 967 } 968 } 969 noinsert2:; 970 low = i; 971 } 972 ailen[row] = nrow; 973 } 974 PetscFunctionReturnVoid(); 975 } 976 977 #if defined(PETSC_HAVE_FORTRAN_CAPS) 978 #define matsetvalues4_ MATSETVALUES4 979 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 980 #define matsetvalues4_ matsetvalues4 981 #endif 982 983 #undef __FUNCT__ 984 #define __FUNCT__ "MatSetValues4_" 985 PETSC_EXTERN void matsetvalues4_(Mat *AA,PetscInt *mm,PetscInt *im,PetscInt *nn,PetscInt *in,PetscScalar *v) 986 { 987 Mat A = *AA; 988 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 989 PetscInt *rp,k,low,high,t,ii,row,nrow,i,col,l,N,n = *nn,m = *mm; 990 PetscInt *ai=a->i,*ailen=a->ilen; 991 PetscInt *aj=a->j,brow,bcol; 992 PetscInt ridx,cidx,lastcol = -1; 993 MatScalar *ap,value,*aa=a->a,*bap; 994 995 PetscFunctionBegin; 996 for (k=0; k<m; k++) { /* loop over added rows */ 997 row = im[k]; brow = row/4; 998 rp = aj + ai[brow]; 999 ap = aa + 16*ai[brow]; 1000 nrow = ailen[brow]; 1001 low = 0; 1002 high = nrow; 1003 for (l=0; l<n; l++) { /* loop over added columns */ 1004 col = in[l]; bcol = col/4; 1005 ridx = row % 4; cidx = col % 4; 1006 value = v[l + k*n]; 1007 if (col <= lastcol) low = 0; 1008 else high = nrow; 1009 lastcol = col; 1010 while (high-low > 7) { 1011 t = (low+high)/2; 1012 if (rp[t] > bcol) high = t; 1013 else low = t; 1014 } 1015 for (i=low; i<high; i++) { 1016 if (rp[i] > bcol) break; 1017 if (rp[i] == bcol) { 1018 bap = ap + 16*i + 4*cidx + ridx; 1019 *bap += value; 1020 goto noinsert1; 1021 } 1022 } 1023 N = nrow++ - 1; 1024 high++; /* added new column thus must search to one higher than before */ 1025 /* shift up all the later entries in this row */ 1026 for (ii=N; ii>=i; ii--) { 1027 rp[ii+1] = rp[ii]; 1028 PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar)); 1029 } 1030 if (N>=i) { 1031 PetscMemzero(ap+16*i,16*sizeof(MatScalar)); 1032 } 1033 rp[i] = bcol; 1034 ap[16*i + 4*cidx + ridx] = value; 1035 noinsert1:; 1036 low = i; 1037 } 1038 ailen[brow] = nrow; 1039 } 1040 PetscFunctionReturnVoid(); 1041 } 1042 1043 /* 1044 Checks for missing diagonals 1045 */ 1046 #undef __FUNCT__ 1047 #define __FUNCT__ "MatMissingDiagonal_SeqBAIJ" 1048 PetscErrorCode MatMissingDiagonal_SeqBAIJ(Mat A,PetscBool *missing,PetscInt *d) 1049 { 1050 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1051 PetscErrorCode ierr; 1052 PetscInt *diag,*ii = a->i,i; 1053 1054 PetscFunctionBegin; 1055 ierr = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr); 1056 *missing = PETSC_FALSE; 1057 if (A->rmap->n > 0 && !ii) { 1058 *missing = PETSC_TRUE; 1059 if (d) *d = 0; 1060 PetscInfo(A,"Matrix has no entries therefore is missing diagonal\n"); 1061 } else { 1062 diag = a->diag; 1063 for (i=0; i<a->mbs; i++) { 1064 if (diag[i] >= ii[i+1]) { 1065 *missing = PETSC_TRUE; 1066 if (d) *d = i; 1067 PetscInfo1(A,"Matrix is missing block diagonal number %D\n",i); 1068 break; 1069 } 1070 } 1071 } 1072 PetscFunctionReturn(0); 1073 } 1074 1075 #undef __FUNCT__ 1076 #define __FUNCT__ "MatMarkDiagonal_SeqBAIJ" 1077 PetscErrorCode MatMarkDiagonal_SeqBAIJ(Mat A) 1078 { 1079 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1080 PetscErrorCode ierr; 1081 PetscInt i,j,m = a->mbs; 1082 1083 PetscFunctionBegin; 1084 if (!a->diag) { 1085 ierr = PetscMalloc1(m,&a->diag);CHKERRQ(ierr); 1086 ierr = PetscLogObjectMemory((PetscObject)A,m*sizeof(PetscInt));CHKERRQ(ierr); 1087 a->free_diag = PETSC_TRUE; 1088 } 1089 for (i=0; i<m; i++) { 1090 a->diag[i] = a->i[i+1]; 1091 for (j=a->i[i]; j<a->i[i+1]; j++) { 1092 if (a->j[j] == i) { 1093 a->diag[i] = j; 1094 break; 1095 } 1096 } 1097 } 1098 PetscFunctionReturn(0); 1099 } 1100 1101 1102 #undef __FUNCT__ 1103 #define __FUNCT__ "MatGetRowIJ_SeqBAIJ" 1104 static PetscErrorCode MatGetRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool blockcompressed,PetscInt *nn,const PetscInt *inia[],const PetscInt *inja[],PetscBool *done) 1105 { 1106 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1107 PetscErrorCode ierr; 1108 PetscInt i,j,n = a->mbs,nz = a->i[n],*tia,*tja,bs = A->rmap->bs,k,l,cnt; 1109 PetscInt **ia = (PetscInt**)inia,**ja = (PetscInt**)inja; 1110 1111 PetscFunctionBegin; 1112 *nn = n; 1113 if (!ia) PetscFunctionReturn(0); 1114 if (symmetric) { 1115 ierr = MatToSymmetricIJ_SeqAIJ(n,a->i,a->j,0,0,&tia,&tja);CHKERRQ(ierr); 1116 nz = tia[n]; 1117 } else { 1118 tia = a->i; tja = a->j; 1119 } 1120 1121 if (!blockcompressed && bs > 1) { 1122 (*nn) *= bs; 1123 /* malloc & create the natural set of indices */ 1124 ierr = PetscMalloc1((n+1)*bs,ia);CHKERRQ(ierr); 1125 if (n) { 1126 (*ia)[0] = 0; 1127 for (j=1; j<bs; j++) { 1128 (*ia)[j] = (tia[1]-tia[0])*bs+(*ia)[j-1]; 1129 } 1130 } 1131 1132 for (i=1; i<n; i++) { 1133 (*ia)[i*bs] = (tia[i]-tia[i-1])*bs + (*ia)[i*bs-1]; 1134 for (j=1; j<bs; j++) { 1135 (*ia)[i*bs+j] = (tia[i+1]-tia[i])*bs + (*ia)[i*bs+j-1]; 1136 } 1137 } 1138 if (n) { 1139 (*ia)[n*bs] = (tia[n]-tia[n-1])*bs + (*ia)[n*bs-1]; 1140 } 1141 1142 if (inja) { 1143 ierr = PetscMalloc1(nz*bs*bs,ja);CHKERRQ(ierr); 1144 cnt = 0; 1145 for (i=0; i<n; i++) { 1146 for (j=0; j<bs; j++) { 1147 for (k=tia[i]; k<tia[i+1]; k++) { 1148 for (l=0; l<bs; l++) { 1149 (*ja)[cnt++] = bs*tja[k] + l; 1150 } 1151 } 1152 } 1153 } 1154 } 1155 1156 if (symmetric) { /* deallocate memory allocated in MatToSymmetricIJ_SeqAIJ() */ 1157 ierr = PetscFree(tia);CHKERRQ(ierr); 1158 ierr = PetscFree(tja);CHKERRQ(ierr); 1159 } 1160 } else if (oshift == 1) { 1161 if (symmetric) { 1162 nz = tia[A->rmap->n/bs]; 1163 /* add 1 to i and j indices */ 1164 for (i=0; i<A->rmap->n/bs+1; i++) tia[i] = tia[i] + 1; 1165 *ia = tia; 1166 if (ja) { 1167 for (i=0; i<nz; i++) tja[i] = tja[i] + 1; 1168 *ja = tja; 1169 } 1170 } else { 1171 nz = a->i[A->rmap->n/bs]; 1172 /* malloc space and add 1 to i and j indices */ 1173 ierr = PetscMalloc1(A->rmap->n/bs+1,ia);CHKERRQ(ierr); 1174 for (i=0; i<A->rmap->n/bs+1; i++) (*ia)[i] = a->i[i] + 1; 1175 if (ja) { 1176 ierr = PetscMalloc1(nz,ja);CHKERRQ(ierr); 1177 for (i=0; i<nz; i++) (*ja)[i] = a->j[i] + 1; 1178 } 1179 } 1180 } else { 1181 *ia = tia; 1182 if (ja) *ja = tja; 1183 } 1184 PetscFunctionReturn(0); 1185 } 1186 1187 #undef __FUNCT__ 1188 #define __FUNCT__ "MatRestoreRowIJ_SeqBAIJ" 1189 static PetscErrorCode MatRestoreRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool blockcompressed,PetscInt *nn,const PetscInt *ia[],const PetscInt *ja[],PetscBool *done) 1190 { 1191 PetscErrorCode ierr; 1192 1193 PetscFunctionBegin; 1194 if (!ia) PetscFunctionReturn(0); 1195 if ((!blockcompressed && A->rmap->bs > 1) || (symmetric || oshift == 1)) { 1196 ierr = PetscFree(*ia);CHKERRQ(ierr); 1197 if (ja) {ierr = PetscFree(*ja);CHKERRQ(ierr);} 1198 } 1199 PetscFunctionReturn(0); 1200 } 1201 1202 #undef __FUNCT__ 1203 #define __FUNCT__ "MatDestroy_SeqBAIJ" 1204 PetscErrorCode MatDestroy_SeqBAIJ(Mat A) 1205 { 1206 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1207 PetscErrorCode ierr; 1208 1209 PetscFunctionBegin; 1210 #if defined(PETSC_USE_LOG) 1211 PetscLogObjectState((PetscObject)A,"Rows=%D, Cols=%D, NZ=%D",A->rmap->N,A->cmap->n,a->nz); 1212 #endif 1213 ierr = MatSeqXAIJFreeAIJ(A,&a->a,&a->j,&a->i);CHKERRQ(ierr); 1214 ierr = ISDestroy(&a->row);CHKERRQ(ierr); 1215 ierr = ISDestroy(&a->col);CHKERRQ(ierr); 1216 if (a->free_diag) {ierr = PetscFree(a->diag);CHKERRQ(ierr);} 1217 ierr = PetscFree(a->idiag);CHKERRQ(ierr); 1218 if (a->free_imax_ilen) {ierr = PetscFree2(a->imax,a->ilen);CHKERRQ(ierr);} 1219 ierr = PetscFree(a->solve_work);CHKERRQ(ierr); 1220 ierr = PetscFree(a->mult_work);CHKERRQ(ierr); 1221 ierr = PetscFree(a->sor_workt);CHKERRQ(ierr); 1222 ierr = PetscFree(a->sor_work);CHKERRQ(ierr); 1223 ierr = ISDestroy(&a->icol);CHKERRQ(ierr); 1224 ierr = PetscFree(a->saved_values);CHKERRQ(ierr); 1225 ierr = PetscFree2(a->compressedrow.i,a->compressedrow.rindex);CHKERRQ(ierr); 1226 1227 ierr = MatDestroy(&a->sbaijMat);CHKERRQ(ierr); 1228 ierr = MatDestroy(&a->parent);CHKERRQ(ierr); 1229 ierr = PetscFree(A->data);CHKERRQ(ierr); 1230 1231 ierr = PetscObjectChangeTypeName((PetscObject)A,0);CHKERRQ(ierr); 1232 ierr = PetscObjectComposeFunction((PetscObject)A,"MatInvertBlockDiagonal_C",NULL);CHKERRQ(ierr); 1233 ierr = PetscObjectComposeFunction((PetscObject)A,"MatStoreValues_C",NULL);CHKERRQ(ierr); 1234 ierr = PetscObjectComposeFunction((PetscObject)A,"MatRetrieveValues_C",NULL);CHKERRQ(ierr); 1235 ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetColumnIndices_C",NULL);CHKERRQ(ierr); 1236 ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqaij_C",NULL);CHKERRQ(ierr); 1237 ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqsbaij_C",NULL);CHKERRQ(ierr); 1238 ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetPreallocation_C",NULL);CHKERRQ(ierr); 1239 ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetPreallocationCSR_C",NULL);CHKERRQ(ierr); 1240 ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqbstrm_C",NULL);CHKERRQ(ierr); 1241 ierr = PetscObjectComposeFunction((PetscObject)A,"MatIsTranspose_C",NULL);CHKERRQ(ierr); 1242 PetscFunctionReturn(0); 1243 } 1244 1245 #undef __FUNCT__ 1246 #define __FUNCT__ "MatSetOption_SeqBAIJ" 1247 PetscErrorCode MatSetOption_SeqBAIJ(Mat A,MatOption op,PetscBool flg) 1248 { 1249 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1250 PetscErrorCode ierr; 1251 1252 PetscFunctionBegin; 1253 switch (op) { 1254 case MAT_ROW_ORIENTED: 1255 a->roworiented = flg; 1256 break; 1257 case MAT_KEEP_NONZERO_PATTERN: 1258 a->keepnonzeropattern = flg; 1259 break; 1260 case MAT_NEW_NONZERO_LOCATIONS: 1261 a->nonew = (flg ? 0 : 1); 1262 break; 1263 case MAT_NEW_NONZERO_LOCATION_ERR: 1264 a->nonew = (flg ? -1 : 0); 1265 break; 1266 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1267 a->nonew = (flg ? -2 : 0); 1268 break; 1269 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1270 a->nounused = (flg ? -1 : 0); 1271 break; 1272 case MAT_NEW_DIAGONALS: 1273 case MAT_IGNORE_OFF_PROC_ENTRIES: 1274 case MAT_USE_HASH_TABLE: 1275 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1276 break; 1277 case MAT_SPD: 1278 case MAT_SYMMETRIC: 1279 case MAT_STRUCTURALLY_SYMMETRIC: 1280 case MAT_HERMITIAN: 1281 case MAT_SYMMETRY_ETERNAL: 1282 /* These options are handled directly by MatSetOption() */ 1283 break; 1284 default: 1285 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op); 1286 } 1287 PetscFunctionReturn(0); 1288 } 1289 1290 /* used for both SeqBAIJ and SeqSBAIJ matrices */ 1291 #undef __FUNCT__ 1292 #define __FUNCT__ "MatGetRow_SeqBAIJ_private" 1293 PetscErrorCode MatGetRow_SeqBAIJ_private(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v,PetscInt *ai,PetscInt *aj,PetscScalar *aa) 1294 { 1295 PetscErrorCode ierr; 1296 PetscInt itmp,i,j,k,M,bn,bp,*idx_i,bs,bs2; 1297 MatScalar *aa_i; 1298 PetscScalar *v_i; 1299 1300 PetscFunctionBegin; 1301 bs = A->rmap->bs; 1302 bs2 = bs*bs; 1303 if (row < 0 || row >= A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range", row); 1304 1305 bn = row/bs; /* Block number */ 1306 bp = row % bs; /* Block Position */ 1307 M = ai[bn+1] - ai[bn]; 1308 *nz = bs*M; 1309 1310 if (v) { 1311 *v = 0; 1312 if (*nz) { 1313 ierr = PetscMalloc1(*nz,v);CHKERRQ(ierr); 1314 for (i=0; i<M; i++) { /* for each block in the block row */ 1315 v_i = *v + i*bs; 1316 aa_i = aa + bs2*(ai[bn] + i); 1317 for (j=bp,k=0; j<bs2; j+=bs,k++) v_i[k] = aa_i[j]; 1318 } 1319 } 1320 } 1321 1322 if (idx) { 1323 *idx = 0; 1324 if (*nz) { 1325 ierr = PetscMalloc1(*nz,idx);CHKERRQ(ierr); 1326 for (i=0; i<M; i++) { /* for each block in the block row */ 1327 idx_i = *idx + i*bs; 1328 itmp = bs*aj[ai[bn] + i]; 1329 for (j=0; j<bs; j++) idx_i[j] = itmp++; 1330 } 1331 } 1332 } 1333 PetscFunctionReturn(0); 1334 } 1335 1336 #undef __FUNCT__ 1337 #define __FUNCT__ "MatGetRow_SeqBAIJ" 1338 PetscErrorCode MatGetRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1339 { 1340 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1341 PetscErrorCode ierr; 1342 1343 PetscFunctionBegin; 1344 ierr = MatGetRow_SeqBAIJ_private(A,row,nz,idx,v,a->i,a->j,a->a);CHKERRQ(ierr); 1345 PetscFunctionReturn(0); 1346 } 1347 1348 #undef __FUNCT__ 1349 #define __FUNCT__ "MatRestoreRow_SeqBAIJ" 1350 PetscErrorCode MatRestoreRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1351 { 1352 PetscErrorCode ierr; 1353 1354 PetscFunctionBegin; 1355 if (idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);} 1356 if (v) {ierr = PetscFree(*v);CHKERRQ(ierr);} 1357 PetscFunctionReturn(0); 1358 } 1359 1360 extern PetscErrorCode MatSetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode); 1361 1362 #undef __FUNCT__ 1363 #define __FUNCT__ "MatTranspose_SeqBAIJ" 1364 PetscErrorCode MatTranspose_SeqBAIJ(Mat A,MatReuse reuse,Mat *B) 1365 { 1366 Mat_SeqBAIJ *a=(Mat_SeqBAIJ*)A->data; 1367 Mat C; 1368 PetscErrorCode ierr; 1369 PetscInt i,j,k,*aj=a->j,*ai=a->i,bs=A->rmap->bs,mbs=a->mbs,nbs=a->nbs,len,*col; 1370 PetscInt *rows,*cols,bs2=a->bs2; 1371 MatScalar *array; 1372 1373 PetscFunctionBegin; 1374 if (reuse == MAT_REUSE_MATRIX && A == *B && mbs != nbs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Square matrix only for in-place"); 1375 if (reuse == MAT_INITIAL_MATRIX || A == *B) { 1376 ierr = PetscCalloc1(1+nbs,&col);CHKERRQ(ierr); 1377 1378 for (i=0; i<ai[mbs]; i++) col[aj[i]] += 1; 1379 ierr = MatCreate(PetscObjectComm((PetscObject)A),&C);CHKERRQ(ierr); 1380 ierr = MatSetSizes(C,A->cmap->n,A->rmap->N,A->cmap->n,A->rmap->N);CHKERRQ(ierr); 1381 ierr = MatSetType(C,((PetscObject)A)->type_name);CHKERRQ(ierr); 1382 ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(C,bs,0,col);CHKERRQ(ierr); 1383 ierr = PetscFree(col);CHKERRQ(ierr); 1384 } else { 1385 C = *B; 1386 } 1387 1388 array = a->a; 1389 ierr = PetscMalloc2(bs,&rows,bs,&cols);CHKERRQ(ierr); 1390 for (i=0; i<mbs; i++) { 1391 cols[0] = i*bs; 1392 for (k=1; k<bs; k++) cols[k] = cols[k-1] + 1; 1393 len = ai[i+1] - ai[i]; 1394 for (j=0; j<len; j++) { 1395 rows[0] = (*aj++)*bs; 1396 for (k=1; k<bs; k++) rows[k] = rows[k-1] + 1; 1397 ierr = MatSetValues_SeqBAIJ(C,bs,rows,bs,cols,array,INSERT_VALUES);CHKERRQ(ierr); 1398 array += bs2; 1399 } 1400 } 1401 ierr = PetscFree2(rows,cols);CHKERRQ(ierr); 1402 1403 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1404 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1405 1406 if (reuse == MAT_INITIAL_MATRIX || *B != A) { 1407 *B = C; 1408 } else { 1409 ierr = MatHeaderMerge(A,&C);CHKERRQ(ierr); 1410 } 1411 PetscFunctionReturn(0); 1412 } 1413 1414 #undef __FUNCT__ 1415 #define __FUNCT__ "MatIsTranspose_SeqBAIJ" 1416 PetscErrorCode MatIsTranspose_SeqBAIJ(Mat A,Mat B,PetscReal tol,PetscBool *f) 1417 { 1418 PetscErrorCode ierr; 1419 Mat Btrans; 1420 1421 PetscFunctionBegin; 1422 *f = PETSC_FALSE; 1423 ierr = MatTranspose_SeqBAIJ(A,MAT_INITIAL_MATRIX,&Btrans);CHKERRQ(ierr); 1424 ierr = MatEqual_SeqBAIJ(B,Btrans,f);CHKERRQ(ierr); 1425 ierr = MatDestroy(&Btrans);CHKERRQ(ierr); 1426 PetscFunctionReturn(0); 1427 } 1428 1429 #undef __FUNCT__ 1430 #define __FUNCT__ "MatView_SeqBAIJ_Binary" 1431 static PetscErrorCode MatView_SeqBAIJ_Binary(Mat A,PetscViewer viewer) 1432 { 1433 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1434 PetscErrorCode ierr; 1435 PetscInt i,*col_lens,bs = A->rmap->bs,count,*jj,j,k,l,bs2=a->bs2; 1436 int fd; 1437 PetscScalar *aa; 1438 FILE *file; 1439 1440 PetscFunctionBegin; 1441 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1442 ierr = PetscMalloc1(4+A->rmap->N,&col_lens);CHKERRQ(ierr); 1443 col_lens[0] = MAT_FILE_CLASSID; 1444 1445 col_lens[1] = A->rmap->N; 1446 col_lens[2] = A->cmap->n; 1447 col_lens[3] = a->nz*bs2; 1448 1449 /* store lengths of each row and write (including header) to file */ 1450 count = 0; 1451 for (i=0; i<a->mbs; i++) { 1452 for (j=0; j<bs; j++) { 1453 col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]); 1454 } 1455 } 1456 ierr = PetscBinaryWrite(fd,col_lens,4+A->rmap->N,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1457 ierr = PetscFree(col_lens);CHKERRQ(ierr); 1458 1459 /* store column indices (zero start index) */ 1460 ierr = PetscMalloc1((a->nz+1)*bs2,&jj);CHKERRQ(ierr); 1461 count = 0; 1462 for (i=0; i<a->mbs; i++) { 1463 for (j=0; j<bs; j++) { 1464 for (k=a->i[i]; k<a->i[i+1]; k++) { 1465 for (l=0; l<bs; l++) { 1466 jj[count++] = bs*a->j[k] + l; 1467 } 1468 } 1469 } 1470 } 1471 ierr = PetscBinaryWrite(fd,jj,bs2*a->nz,PETSC_INT,PETSC_FALSE);CHKERRQ(ierr); 1472 ierr = PetscFree(jj);CHKERRQ(ierr); 1473 1474 /* store nonzero values */ 1475 ierr = PetscMalloc1((a->nz+1)*bs2,&aa);CHKERRQ(ierr); 1476 count = 0; 1477 for (i=0; i<a->mbs; i++) { 1478 for (j=0; j<bs; j++) { 1479 for (k=a->i[i]; k<a->i[i+1]; k++) { 1480 for (l=0; l<bs; l++) { 1481 aa[count++] = a->a[bs2*k + l*bs + j]; 1482 } 1483 } 1484 } 1485 } 1486 ierr = PetscBinaryWrite(fd,aa,bs2*a->nz,PETSC_SCALAR,PETSC_FALSE);CHKERRQ(ierr); 1487 ierr = PetscFree(aa);CHKERRQ(ierr); 1488 1489 ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr); 1490 if (file) { 1491 fprintf(file,"-matload_block_size %d\n",(int)A->rmap->bs); 1492 } 1493 PetscFunctionReturn(0); 1494 } 1495 1496 #undef __FUNCT__ 1497 #define __FUNCT__ "MatView_SeqBAIJ_ASCII" 1498 static PetscErrorCode MatView_SeqBAIJ_ASCII(Mat A,PetscViewer viewer) 1499 { 1500 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1501 PetscErrorCode ierr; 1502 PetscInt i,j,bs = A->rmap->bs,k,l,bs2=a->bs2; 1503 PetscViewerFormat format; 1504 1505 PetscFunctionBegin; 1506 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1507 if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1508 ierr = PetscViewerASCIIPrintf(viewer," block size is %D\n",bs);CHKERRQ(ierr); 1509 } else if (format == PETSC_VIEWER_ASCII_MATLAB) { 1510 const char *matname; 1511 Mat aij; 1512 ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&aij);CHKERRQ(ierr); 1513 ierr = PetscObjectGetName((PetscObject)A,&matname);CHKERRQ(ierr); 1514 ierr = PetscObjectSetName((PetscObject)aij,matname);CHKERRQ(ierr); 1515 ierr = MatView(aij,viewer);CHKERRQ(ierr); 1516 ierr = MatDestroy(&aij);CHKERRQ(ierr); 1517 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1518 PetscFunctionReturn(0); 1519 } else if (format == PETSC_VIEWER_ASCII_COMMON) { 1520 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);CHKERRQ(ierr); 1521 for (i=0; i<a->mbs; i++) { 1522 for (j=0; j<bs; j++) { 1523 ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr); 1524 for (k=a->i[i]; k<a->i[i+1]; k++) { 1525 for (l=0; l<bs; l++) { 1526 #if defined(PETSC_USE_COMPLEX) 1527 if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 1528 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g + %gi) ",bs*a->j[k]+l, 1529 (double)PetscRealPart(a->a[bs2*k + l*bs + j]),(double)PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1530 } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 1531 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g - %gi) ",bs*a->j[k]+l, 1532 (double)PetscRealPart(a->a[bs2*k + l*bs + j]),-(double)PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1533 } else if (PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 1534 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,(double)PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1535 } 1536 #else 1537 if (a->a[bs2*k + l*bs + j] != 0.0) { 1538 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,(double)a->a[bs2*k + l*bs + j]);CHKERRQ(ierr); 1539 } 1540 #endif 1541 } 1542 } 1543 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 1544 } 1545 } 1546 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);CHKERRQ(ierr); 1547 } else { 1548 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);CHKERRQ(ierr); 1549 for (i=0; i<a->mbs; i++) { 1550 for (j=0; j<bs; j++) { 1551 ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr); 1552 for (k=a->i[i]; k<a->i[i+1]; k++) { 1553 for (l=0; l<bs; l++) { 1554 #if defined(PETSC_USE_COMPLEX) 1555 if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0) { 1556 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g + %g i) ",bs*a->j[k]+l, 1557 (double)PetscRealPart(a->a[bs2*k + l*bs + j]),(double)PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1558 } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0) { 1559 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g - %g i) ",bs*a->j[k]+l, 1560 (double)PetscRealPart(a->a[bs2*k + l*bs + j]),-(double)PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1561 } else { 1562 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,(double)PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1563 } 1564 #else 1565 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,(double)a->a[bs2*k + l*bs + j]);CHKERRQ(ierr); 1566 #endif 1567 } 1568 } 1569 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 1570 } 1571 } 1572 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);CHKERRQ(ierr); 1573 } 1574 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1575 PetscFunctionReturn(0); 1576 } 1577 1578 #include <petscdraw.h> 1579 #undef __FUNCT__ 1580 #define __FUNCT__ "MatView_SeqBAIJ_Draw_Zoom" 1581 static PetscErrorCode MatView_SeqBAIJ_Draw_Zoom(PetscDraw draw,void *Aa) 1582 { 1583 Mat A = (Mat) Aa; 1584 Mat_SeqBAIJ *a=(Mat_SeqBAIJ*)A->data; 1585 PetscErrorCode ierr; 1586 PetscInt row,i,j,k,l,mbs=a->mbs,color,bs=A->rmap->bs,bs2=a->bs2; 1587 PetscReal xl,yl,xr,yr,x_l,x_r,y_l,y_r; 1588 MatScalar *aa; 1589 PetscViewer viewer; 1590 PetscViewerFormat format; 1591 1592 PetscFunctionBegin; 1593 ierr = PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer);CHKERRQ(ierr); 1594 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1595 1596 ierr = PetscDrawGetCoordinates(draw,&xl,&yl,&xr,&yr);CHKERRQ(ierr); 1597 1598 /* loop over matrix elements drawing boxes */ 1599 1600 if (format != PETSC_VIEWER_DRAW_CONTOUR) { 1601 color = PETSC_DRAW_BLUE; 1602 for (i=0,row=0; i<mbs; i++,row+=bs) { 1603 for (j=a->i[i]; j<a->i[i+1]; j++) { 1604 y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0; 1605 x_l = a->j[j]*bs; x_r = x_l + 1.0; 1606 aa = a->a + j*bs2; 1607 for (k=0; k<bs; k++) { 1608 for (l=0; l<bs; l++) { 1609 if (PetscRealPart(*aa++) >= 0.) continue; 1610 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 1611 } 1612 } 1613 } 1614 } 1615 color = PETSC_DRAW_CYAN; 1616 for (i=0,row=0; i<mbs; i++,row+=bs) { 1617 for (j=a->i[i]; j<a->i[i+1]; j++) { 1618 y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0; 1619 x_l = a->j[j]*bs; x_r = x_l + 1.0; 1620 aa = a->a + j*bs2; 1621 for (k=0; k<bs; k++) { 1622 for (l=0; l<bs; l++) { 1623 if (PetscRealPart(*aa++) != 0.) continue; 1624 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 1625 } 1626 } 1627 } 1628 } 1629 color = PETSC_DRAW_RED; 1630 for (i=0,row=0; i<mbs; i++,row+=bs) { 1631 for (j=a->i[i]; j<a->i[i+1]; j++) { 1632 y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0; 1633 x_l = a->j[j]*bs; x_r = x_l + 1.0; 1634 aa = a->a + j*bs2; 1635 for (k=0; k<bs; k++) { 1636 for (l=0; l<bs; l++) { 1637 if (PetscRealPart(*aa++) <= 0.) continue; 1638 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 1639 } 1640 } 1641 } 1642 } 1643 } else { 1644 /* use contour shading to indicate magnitude of values */ 1645 /* first determine max of all nonzero values */ 1646 PetscDraw popup; 1647 PetscReal scale,maxv = 0.0; 1648 1649 for (i=0; i<a->nz*a->bs2; i++) { 1650 if (PetscAbsScalar(a->a[i]) > maxv) maxv = PetscAbsScalar(a->a[i]); 1651 } 1652 scale = (245.0 - PETSC_DRAW_BASIC_COLORS)/maxv; 1653 ierr = PetscDrawGetPopup(draw,&popup);CHKERRQ(ierr); 1654 if (popup) { 1655 ierr = PetscDrawScalePopup(popup,0.0,maxv);CHKERRQ(ierr); 1656 } 1657 for (i=0,row=0; i<mbs; i++,row+=bs) { 1658 for (j=a->i[i]; j<a->i[i+1]; j++) { 1659 y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0; 1660 x_l = a->j[j]*bs; x_r = x_l + 1.0; 1661 aa = a->a + j*bs2; 1662 for (k=0; k<bs; k++) { 1663 for (l=0; l<bs; l++) { 1664 color = PETSC_DRAW_BASIC_COLORS + (PetscInt)(scale*PetscAbsScalar(*aa++)); 1665 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 1666 } 1667 } 1668 } 1669 } 1670 } 1671 PetscFunctionReturn(0); 1672 } 1673 1674 #undef __FUNCT__ 1675 #define __FUNCT__ "MatView_SeqBAIJ_Draw" 1676 static PetscErrorCode MatView_SeqBAIJ_Draw(Mat A,PetscViewer viewer) 1677 { 1678 PetscErrorCode ierr; 1679 PetscReal xl,yl,xr,yr,w,h; 1680 PetscDraw draw; 1681 PetscBool isnull; 1682 1683 PetscFunctionBegin; 1684 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 1685 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 1686 1687 ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);CHKERRQ(ierr); 1688 xr = A->cmap->n; yr = A->rmap->N; h = yr/10.0; w = xr/10.0; 1689 xr += w; yr += h; xl = -w; yl = -h; 1690 ierr = PetscDrawSetCoordinates(draw,xl,yl,xr,yr);CHKERRQ(ierr); 1691 ierr = PetscDrawZoom(draw,MatView_SeqBAIJ_Draw_Zoom,A);CHKERRQ(ierr); 1692 ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",NULL);CHKERRQ(ierr); 1693 PetscFunctionReturn(0); 1694 } 1695 1696 #undef __FUNCT__ 1697 #define __FUNCT__ "MatView_SeqBAIJ" 1698 PetscErrorCode MatView_SeqBAIJ(Mat A,PetscViewer viewer) 1699 { 1700 PetscErrorCode ierr; 1701 PetscBool iascii,isbinary,isdraw; 1702 1703 PetscFunctionBegin; 1704 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1705 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1706 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1707 if (iascii) { 1708 ierr = MatView_SeqBAIJ_ASCII(A,viewer);CHKERRQ(ierr); 1709 } else if (isbinary) { 1710 ierr = MatView_SeqBAIJ_Binary(A,viewer);CHKERRQ(ierr); 1711 } else if (isdraw) { 1712 ierr = MatView_SeqBAIJ_Draw(A,viewer);CHKERRQ(ierr); 1713 } else { 1714 Mat B; 1715 ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr); 1716 ierr = MatView(B,viewer);CHKERRQ(ierr); 1717 ierr = MatDestroy(&B);CHKERRQ(ierr); 1718 } 1719 PetscFunctionReturn(0); 1720 } 1721 1722 1723 #undef __FUNCT__ 1724 #define __FUNCT__ "MatGetValues_SeqBAIJ" 1725 PetscErrorCode MatGetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],PetscScalar v[]) 1726 { 1727 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1728 PetscInt *rp,k,low,high,t,row,nrow,i,col,l,*aj = a->j; 1729 PetscInt *ai = a->i,*ailen = a->ilen; 1730 PetscInt brow,bcol,ridx,cidx,bs=A->rmap->bs,bs2=a->bs2; 1731 MatScalar *ap,*aa = a->a; 1732 1733 PetscFunctionBegin; 1734 for (k=0; k<m; k++) { /* loop over rows */ 1735 row = im[k]; brow = row/bs; 1736 if (row < 0) {v += n; continue;} /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); */ 1737 if (row >= A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D too large", row); 1738 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; 1739 nrow = ailen[brow]; 1740 for (l=0; l<n; l++) { /* loop over columns */ 1741 if (in[l] < 0) {v++; continue;} /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); */ 1742 if (in[l] >= A->cmap->n) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column %D too large", in[l]); 1743 col = in[l]; 1744 bcol = col/bs; 1745 cidx = col%bs; 1746 ridx = row%bs; 1747 high = nrow; 1748 low = 0; /* assume unsorted */ 1749 while (high-low > 5) { 1750 t = (low+high)/2; 1751 if (rp[t] > bcol) high = t; 1752 else low = t; 1753 } 1754 for (i=low; i<high; i++) { 1755 if (rp[i] > bcol) break; 1756 if (rp[i] == bcol) { 1757 *v++ = ap[bs2*i+bs*cidx+ridx]; 1758 goto finished; 1759 } 1760 } 1761 *v++ = 0.0; 1762 finished:; 1763 } 1764 } 1765 PetscFunctionReturn(0); 1766 } 1767 1768 #undef __FUNCT__ 1769 #define __FUNCT__ "MatSetValuesBlocked_SeqBAIJ" 1770 PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is) 1771 { 1772 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1773 PetscInt *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,lastcol = -1; 1774 PetscInt *imax=a->imax,*ai=a->i,*ailen=a->ilen; 1775 PetscErrorCode ierr; 1776 PetscInt *aj =a->j,nonew=a->nonew,bs2=a->bs2,bs=A->rmap->bs,stepval; 1777 PetscBool roworiented=a->roworiented; 1778 const PetscScalar *value = v; 1779 MatScalar *ap,*aa = a->a,*bap; 1780 1781 PetscFunctionBegin; 1782 if (roworiented) { 1783 stepval = (n-1)*bs; 1784 } else { 1785 stepval = (m-1)*bs; 1786 } 1787 for (k=0; k<m; k++) { /* loop over added rows */ 1788 row = im[k]; 1789 if (row < 0) continue; 1790 #if defined(PETSC_USE_DEBUG) 1791 if (row >= a->mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Block row index too large %D max %D",row,a->mbs-1); 1792 #endif 1793 rp = aj + ai[row]; 1794 ap = aa + bs2*ai[row]; 1795 rmax = imax[row]; 1796 nrow = ailen[row]; 1797 low = 0; 1798 high = nrow; 1799 for (l=0; l<n; l++) { /* loop over added columns */ 1800 if (in[l] < 0) continue; 1801 #if defined(PETSC_USE_DEBUG) 1802 if (in[l] >= a->nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Block column index too large %D max %D",in[l],a->nbs-1); 1803 #endif 1804 col = in[l]; 1805 if (roworiented) { 1806 value = v + (k*(stepval+bs) + l)*bs; 1807 } else { 1808 value = v + (l*(stepval+bs) + k)*bs; 1809 } 1810 if (col <= lastcol) low = 0; 1811 else high = nrow; 1812 lastcol = col; 1813 while (high-low > 7) { 1814 t = (low+high)/2; 1815 if (rp[t] > col) high = t; 1816 else low = t; 1817 } 1818 for (i=low; i<high; i++) { 1819 if (rp[i] > col) break; 1820 if (rp[i] == col) { 1821 bap = ap + bs2*i; 1822 if (roworiented) { 1823 if (is == ADD_VALUES) { 1824 for (ii=0; ii<bs; ii++,value+=stepval) { 1825 for (jj=ii; jj<bs2; jj+=bs) { 1826 bap[jj] += *value++; 1827 } 1828 } 1829 } else { 1830 for (ii=0; ii<bs; ii++,value+=stepval) { 1831 for (jj=ii; jj<bs2; jj+=bs) { 1832 bap[jj] = *value++; 1833 } 1834 } 1835 } 1836 } else { 1837 if (is == ADD_VALUES) { 1838 for (ii=0; ii<bs; ii++,value+=bs+stepval) { 1839 for (jj=0; jj<bs; jj++) { 1840 bap[jj] += value[jj]; 1841 } 1842 bap += bs; 1843 } 1844 } else { 1845 for (ii=0; ii<bs; ii++,value+=bs+stepval) { 1846 for (jj=0; jj<bs; jj++) { 1847 bap[jj] = value[jj]; 1848 } 1849 bap += bs; 1850 } 1851 } 1852 } 1853 goto noinsert2; 1854 } 1855 } 1856 if (nonew == 1) goto noinsert2; 1857 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new blocked index new nonzero block (%D, %D) in the matrix", row, col); 1858 MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,row,col,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar); 1859 N = nrow++ - 1; high++; 1860 /* shift up all the later entries in this row */ 1861 for (ii=N; ii>=i; ii--) { 1862 rp[ii+1] = rp[ii]; 1863 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); 1864 } 1865 if (N >= i) { 1866 ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr); 1867 } 1868 rp[i] = col; 1869 bap = ap + bs2*i; 1870 if (roworiented) { 1871 for (ii=0; ii<bs; ii++,value+=stepval) { 1872 for (jj=ii; jj<bs2; jj+=bs) { 1873 bap[jj] = *value++; 1874 } 1875 } 1876 } else { 1877 for (ii=0; ii<bs; ii++,value+=stepval) { 1878 for (jj=0; jj<bs; jj++) { 1879 *bap++ = *value++; 1880 } 1881 } 1882 } 1883 noinsert2:; 1884 low = i; 1885 } 1886 ailen[row] = nrow; 1887 } 1888 PetscFunctionReturn(0); 1889 } 1890 1891 #undef __FUNCT__ 1892 #define __FUNCT__ "MatAssemblyEnd_SeqBAIJ" 1893 PetscErrorCode MatAssemblyEnd_SeqBAIJ(Mat A,MatAssemblyType mode) 1894 { 1895 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1896 PetscInt fshift = 0,i,j,*ai = a->i,*aj = a->j,*imax = a->imax; 1897 PetscInt m = A->rmap->N,*ip,N,*ailen = a->ilen; 1898 PetscErrorCode ierr; 1899 PetscInt mbs = a->mbs,bs2 = a->bs2,rmax = 0; 1900 MatScalar *aa = a->a,*ap; 1901 PetscReal ratio=0.6; 1902 1903 PetscFunctionBegin; 1904 if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0); 1905 1906 if (m) rmax = ailen[0]; 1907 for (i=1; i<mbs; i++) { 1908 /* move each row back by the amount of empty slots (fshift) before it*/ 1909 fshift += imax[i-1] - ailen[i-1]; 1910 rmax = PetscMax(rmax,ailen[i]); 1911 if (fshift) { 1912 ip = aj + ai[i]; ap = aa + bs2*ai[i]; 1913 N = ailen[i]; 1914 for (j=0; j<N; j++) { 1915 ip[j-fshift] = ip[j]; 1916 1917 ierr = PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr); 1918 } 1919 } 1920 ai[i] = ai[i-1] + ailen[i-1]; 1921 } 1922 if (mbs) { 1923 fshift += imax[mbs-1] - ailen[mbs-1]; 1924 ai[mbs] = ai[mbs-1] + ailen[mbs-1]; 1925 } 1926 1927 /* reset ilen and imax for each row */ 1928 a->nonzerorowcnt = 0; 1929 for (i=0; i<mbs; i++) { 1930 ailen[i] = imax[i] = ai[i+1] - ai[i]; 1931 a->nonzerorowcnt += ((ai[i+1] - ai[i]) > 0); 1932 } 1933 a->nz = ai[mbs]; 1934 1935 /* diagonals may have moved, so kill the diagonal pointers */ 1936 a->idiagvalid = PETSC_FALSE; 1937 if (fshift && a->diag) { 1938 ierr = PetscFree(a->diag);CHKERRQ(ierr); 1939 ierr = PetscLogObjectMemory((PetscObject)A,-(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 1940 a->diag = 0; 1941 } 1942 if (fshift && a->nounused == -1) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB, "Unused space detected in matrix: %D X %D block size %D, %D unneeded", m, A->cmap->n, A->rmap->bs, fshift*bs2); 1943 ierr = PetscInfo5(A,"Matrix size: %D X %D, block size %D; storage space: %D unneeded, %D used\n",m,A->cmap->n,A->rmap->bs,fshift*bs2,a->nz*bs2);CHKERRQ(ierr); 1944 ierr = PetscInfo1(A,"Number of mallocs during MatSetValues is %D\n",a->reallocs);CHKERRQ(ierr); 1945 ierr = PetscInfo1(A,"Most nonzeros blocks in any row is %D\n",rmax);CHKERRQ(ierr); 1946 1947 A->info.mallocs += a->reallocs; 1948 a->reallocs = 0; 1949 A->info.nz_unneeded = (PetscReal)fshift*bs2; 1950 a->rmax = rmax; 1951 1952 ierr = MatCheckCompressedRow(A,a->nonzerorowcnt,&a->compressedrow,a->i,mbs,ratio);CHKERRQ(ierr); 1953 PetscFunctionReturn(0); 1954 } 1955 1956 /* 1957 This function returns an array of flags which indicate the locations of contiguous 1958 blocks that should be zeroed. for eg: if bs = 3 and is = [0,1,2,3,5,6,7,8,9] 1959 then the resulting sizes = [3,1,1,3,1] correspondig to sets [(0,1,2),(3),(5),(6,7,8),(9)] 1960 Assume: sizes should be long enough to hold all the values. 1961 */ 1962 #undef __FUNCT__ 1963 #define __FUNCT__ "MatZeroRows_SeqBAIJ_Check_Blocks" 1964 static PetscErrorCode MatZeroRows_SeqBAIJ_Check_Blocks(PetscInt idx[],PetscInt n,PetscInt bs,PetscInt sizes[], PetscInt *bs_max) 1965 { 1966 PetscInt i,j,k,row; 1967 PetscBool flg; 1968 1969 PetscFunctionBegin; 1970 for (i=0,j=0; i<n; j++) { 1971 row = idx[i]; 1972 if (row%bs!=0) { /* Not the begining of a block */ 1973 sizes[j] = 1; 1974 i++; 1975 } else if (i+bs > n) { /* complete block doesn't exist (at idx end) */ 1976 sizes[j] = 1; /* Also makes sure atleast 'bs' values exist for next else */ 1977 i++; 1978 } else { /* Begining of the block, so check if the complete block exists */ 1979 flg = PETSC_TRUE; 1980 for (k=1; k<bs; k++) { 1981 if (row+k != idx[i+k]) { /* break in the block */ 1982 flg = PETSC_FALSE; 1983 break; 1984 } 1985 } 1986 if (flg) { /* No break in the bs */ 1987 sizes[j] = bs; 1988 i += bs; 1989 } else { 1990 sizes[j] = 1; 1991 i++; 1992 } 1993 } 1994 } 1995 *bs_max = j; 1996 PetscFunctionReturn(0); 1997 } 1998 1999 #undef __FUNCT__ 2000 #define __FUNCT__ "MatZeroRows_SeqBAIJ" 2001 PetscErrorCode MatZeroRows_SeqBAIJ(Mat A,PetscInt is_n,const PetscInt is_idx[],PetscScalar diag,Vec x, Vec b) 2002 { 2003 Mat_SeqBAIJ *baij=(Mat_SeqBAIJ*)A->data; 2004 PetscErrorCode ierr; 2005 PetscInt i,j,k,count,*rows; 2006 PetscInt bs=A->rmap->bs,bs2=baij->bs2,*sizes,row,bs_max; 2007 PetscScalar zero = 0.0; 2008 MatScalar *aa; 2009 const PetscScalar *xx; 2010 PetscScalar *bb; 2011 2012 PetscFunctionBegin; 2013 /* fix right hand side if needed */ 2014 if (x && b) { 2015 ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr); 2016 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 2017 for (i=0; i<is_n; i++) { 2018 bb[is_idx[i]] = diag*xx[is_idx[i]]; 2019 } 2020 ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr); 2021 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 2022 } 2023 2024 /* Make a copy of the IS and sort it */ 2025 /* allocate memory for rows,sizes */ 2026 ierr = PetscMalloc2(is_n,&rows,2*is_n,&sizes);CHKERRQ(ierr); 2027 2028 /* copy IS values to rows, and sort them */ 2029 for (i=0; i<is_n; i++) rows[i] = is_idx[i]; 2030 ierr = PetscSortInt(is_n,rows);CHKERRQ(ierr); 2031 2032 if (baij->keepnonzeropattern) { 2033 for (i=0; i<is_n; i++) sizes[i] = 1; 2034 bs_max = is_n; 2035 } else { 2036 ierr = MatZeroRows_SeqBAIJ_Check_Blocks(rows,is_n,bs,sizes,&bs_max);CHKERRQ(ierr); 2037 A->nonzerostate++; 2038 } 2039 2040 for (i=0,j=0; i<bs_max; j+=sizes[i],i++) { 2041 row = rows[j]; 2042 if (row < 0 || row > A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"row %D out of range",row); 2043 count = (baij->i[row/bs +1] - baij->i[row/bs])*bs; 2044 aa = ((MatScalar*)(baij->a)) + baij->i[row/bs]*bs2 + (row%bs); 2045 if (sizes[i] == bs && !baij->keepnonzeropattern) { 2046 if (diag != (PetscScalar)0.0) { 2047 if (baij->ilen[row/bs] > 0) { 2048 baij->ilen[row/bs] = 1; 2049 baij->j[baij->i[row/bs]] = row/bs; 2050 2051 ierr = PetscMemzero(aa,count*bs*sizeof(MatScalar));CHKERRQ(ierr); 2052 } 2053 /* Now insert all the diagonal values for this bs */ 2054 for (k=0; k<bs; k++) { 2055 ierr = (*A->ops->setvalues)(A,1,rows+j+k,1,rows+j+k,&diag,INSERT_VALUES);CHKERRQ(ierr); 2056 } 2057 } else { /* (diag == 0.0) */ 2058 baij->ilen[row/bs] = 0; 2059 } /* end (diag == 0.0) */ 2060 } else { /* (sizes[i] != bs) */ 2061 #if defined(PETSC_USE_DEBUG) 2062 if (sizes[i] != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal Error. Value should be 1"); 2063 #endif 2064 for (k=0; k<count; k++) { 2065 aa[0] = zero; 2066 aa += bs; 2067 } 2068 if (diag != (PetscScalar)0.0) { 2069 ierr = (*A->ops->setvalues)(A,1,rows+j,1,rows+j,&diag,INSERT_VALUES);CHKERRQ(ierr); 2070 } 2071 } 2072 } 2073 2074 ierr = PetscFree2(rows,sizes);CHKERRQ(ierr); 2075 ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2076 PetscFunctionReturn(0); 2077 } 2078 2079 #undef __FUNCT__ 2080 #define __FUNCT__ "MatZeroRowsColumns_SeqBAIJ" 2081 PetscErrorCode MatZeroRowsColumns_SeqBAIJ(Mat A,PetscInt is_n,const PetscInt is_idx[],PetscScalar diag,Vec x, Vec b) 2082 { 2083 Mat_SeqBAIJ *baij=(Mat_SeqBAIJ*)A->data; 2084 PetscErrorCode ierr; 2085 PetscInt i,j,k,count; 2086 PetscInt bs =A->rmap->bs,bs2=baij->bs2,row,col; 2087 PetscScalar zero = 0.0; 2088 MatScalar *aa; 2089 const PetscScalar *xx; 2090 PetscScalar *bb; 2091 PetscBool *zeroed,vecs = PETSC_FALSE; 2092 2093 PetscFunctionBegin; 2094 /* fix right hand side if needed */ 2095 if (x && b) { 2096 ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr); 2097 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 2098 vecs = PETSC_TRUE; 2099 } 2100 2101 /* zero the columns */ 2102 ierr = PetscCalloc1(A->rmap->n,&zeroed);CHKERRQ(ierr); 2103 for (i=0; i<is_n; i++) { 2104 if (is_idx[i] < 0 || is_idx[i] >= A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"row %D out of range",is_idx[i]); 2105 zeroed[is_idx[i]] = PETSC_TRUE; 2106 } 2107 for (i=0; i<A->rmap->N; i++) { 2108 if (!zeroed[i]) { 2109 row = i/bs; 2110 for (j=baij->i[row]; j<baij->i[row+1]; j++) { 2111 for (k=0; k<bs; k++) { 2112 col = bs*baij->j[j] + k; 2113 if (zeroed[col]) { 2114 aa = ((MatScalar*)(baij->a)) + j*bs2 + (i%bs) + bs*k; 2115 if (vecs) bb[i] -= aa[0]*xx[col]; 2116 aa[0] = 0.0; 2117 } 2118 } 2119 } 2120 } else if (vecs) bb[i] = diag*xx[i]; 2121 } 2122 ierr = PetscFree(zeroed);CHKERRQ(ierr); 2123 if (vecs) { 2124 ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr); 2125 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 2126 } 2127 2128 /* zero the rows */ 2129 for (i=0; i<is_n; i++) { 2130 row = is_idx[i]; 2131 count = (baij->i[row/bs +1] - baij->i[row/bs])*bs; 2132 aa = ((MatScalar*)(baij->a)) + baij->i[row/bs]*bs2 + (row%bs); 2133 for (k=0; k<count; k++) { 2134 aa[0] = zero; 2135 aa += bs; 2136 } 2137 if (diag != (PetscScalar)0.0) { 2138 ierr = (*A->ops->setvalues)(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr); 2139 } 2140 } 2141 ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2142 PetscFunctionReturn(0); 2143 } 2144 2145 #undef __FUNCT__ 2146 #define __FUNCT__ "MatSetValues_SeqBAIJ" 2147 PetscErrorCode MatSetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is) 2148 { 2149 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2150 PetscInt *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,lastcol = -1; 2151 PetscInt *imax=a->imax,*ai=a->i,*ailen=a->ilen; 2152 PetscInt *aj =a->j,nonew=a->nonew,bs=A->rmap->bs,brow,bcol; 2153 PetscErrorCode ierr; 2154 PetscInt ridx,cidx,bs2=a->bs2; 2155 PetscBool roworiented=a->roworiented; 2156 MatScalar *ap,value,*aa=a->a,*bap; 2157 2158 PetscFunctionBegin; 2159 for (k=0; k<m; k++) { /* loop over added rows */ 2160 row = im[k]; 2161 brow = row/bs; 2162 if (row < 0) continue; 2163 #if defined(PETSC_USE_DEBUG) 2164 if (row >= A->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,A->rmap->N-1); 2165 #endif 2166 rp = aj + ai[brow]; 2167 ap = aa + bs2*ai[brow]; 2168 rmax = imax[brow]; 2169 nrow = ailen[brow]; 2170 low = 0; 2171 high = nrow; 2172 for (l=0; l<n; l++) { /* loop over added columns */ 2173 if (in[l] < 0) continue; 2174 #if defined(PETSC_USE_DEBUG) 2175 if (in[l] >= A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],A->cmap->n-1); 2176 #endif 2177 col = in[l]; bcol = col/bs; 2178 ridx = row % bs; cidx = col % bs; 2179 if (roworiented) { 2180 value = v[l + k*n]; 2181 } else { 2182 value = v[k + l*m]; 2183 } 2184 if (col <= lastcol) low = 0; else high = nrow; 2185 lastcol = col; 2186 while (high-low > 7) { 2187 t = (low+high)/2; 2188 if (rp[t] > bcol) high = t; 2189 else low = t; 2190 } 2191 for (i=low; i<high; i++) { 2192 if (rp[i] > bcol) break; 2193 if (rp[i] == bcol) { 2194 bap = ap + bs2*i + bs*cidx + ridx; 2195 if (is == ADD_VALUES) *bap += value; 2196 else *bap = value; 2197 goto noinsert1; 2198 } 2199 } 2200 if (nonew == 1) goto noinsert1; 2201 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col); 2202 MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar); 2203 N = nrow++ - 1; high++; 2204 /* shift up all the later entries in this row */ 2205 for (ii=N; ii>=i; ii--) { 2206 rp[ii+1] = rp[ii]; 2207 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); 2208 } 2209 if (N>=i) { 2210 ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr); 2211 } 2212 rp[i] = bcol; 2213 ap[bs2*i + bs*cidx + ridx] = value; 2214 a->nz++; 2215 A->nonzerostate++; 2216 noinsert1:; 2217 low = i; 2218 } 2219 ailen[brow] = nrow; 2220 } 2221 PetscFunctionReturn(0); 2222 } 2223 2224 #undef __FUNCT__ 2225 #define __FUNCT__ "MatILUFactor_SeqBAIJ" 2226 PetscErrorCode MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,const MatFactorInfo *info) 2227 { 2228 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)inA->data; 2229 Mat outA; 2230 PetscErrorCode ierr; 2231 PetscBool row_identity,col_identity; 2232 2233 PetscFunctionBegin; 2234 if (info->levels != 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only levels = 0 supported for in-place ILU"); 2235 ierr = ISIdentity(row,&row_identity);CHKERRQ(ierr); 2236 ierr = ISIdentity(col,&col_identity);CHKERRQ(ierr); 2237 if (!row_identity || !col_identity) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for in-place ILU"); 2238 2239 outA = inA; 2240 inA->factortype = MAT_FACTOR_LU; 2241 2242 ierr = MatMarkDiagonal_SeqBAIJ(inA);CHKERRQ(ierr); 2243 2244 ierr = PetscObjectReference((PetscObject)row);CHKERRQ(ierr); 2245 ierr = ISDestroy(&a->row);CHKERRQ(ierr); 2246 a->row = row; 2247 ierr = PetscObjectReference((PetscObject)col);CHKERRQ(ierr); 2248 ierr = ISDestroy(&a->col);CHKERRQ(ierr); 2249 a->col = col; 2250 2251 /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */ 2252 ierr = ISDestroy(&a->icol);CHKERRQ(ierr); 2253 ierr = ISInvertPermutation(col,PETSC_DECIDE,&a->icol);CHKERRQ(ierr); 2254 ierr = PetscLogObjectParent((PetscObject)inA,(PetscObject)a->icol);CHKERRQ(ierr); 2255 2256 ierr = MatSeqBAIJSetNumericFactorization_inplace(inA,(PetscBool)(row_identity && col_identity));CHKERRQ(ierr); 2257 if (!a->solve_work) { 2258 ierr = PetscMalloc1(inA->rmap->N+inA->rmap->bs,&a->solve_work);CHKERRQ(ierr); 2259 ierr = PetscLogObjectMemory((PetscObject)inA,(inA->rmap->N+inA->rmap->bs)*sizeof(PetscScalar));CHKERRQ(ierr); 2260 } 2261 ierr = MatLUFactorNumeric(outA,inA,info);CHKERRQ(ierr); 2262 PetscFunctionReturn(0); 2263 } 2264 2265 #undef __FUNCT__ 2266 #define __FUNCT__ "MatSeqBAIJSetColumnIndices_SeqBAIJ" 2267 PetscErrorCode MatSeqBAIJSetColumnIndices_SeqBAIJ(Mat mat,PetscInt *indices) 2268 { 2269 Mat_SeqBAIJ *baij = (Mat_SeqBAIJ*)mat->data; 2270 PetscInt i,nz,mbs; 2271 2272 PetscFunctionBegin; 2273 nz = baij->maxnz; 2274 mbs = baij->mbs; 2275 for (i=0; i<nz; i++) { 2276 baij->j[i] = indices[i]; 2277 } 2278 baij->nz = nz; 2279 for (i=0; i<mbs; i++) { 2280 baij->ilen[i] = baij->imax[i]; 2281 } 2282 PetscFunctionReturn(0); 2283 } 2284 2285 #undef __FUNCT__ 2286 #define __FUNCT__ "MatSeqBAIJSetColumnIndices" 2287 /*@ 2288 MatSeqBAIJSetColumnIndices - Set the column indices for all the rows 2289 in the matrix. 2290 2291 Input Parameters: 2292 + mat - the SeqBAIJ matrix 2293 - indices - the column indices 2294 2295 Level: advanced 2296 2297 Notes: 2298 This can be called if you have precomputed the nonzero structure of the 2299 matrix and want to provide it to the matrix object to improve the performance 2300 of the MatSetValues() operation. 2301 2302 You MUST have set the correct numbers of nonzeros per row in the call to 2303 MatCreateSeqBAIJ(), and the columns indices MUST be sorted. 2304 2305 MUST be called before any calls to MatSetValues(); 2306 2307 @*/ 2308 PetscErrorCode MatSeqBAIJSetColumnIndices(Mat mat,PetscInt *indices) 2309 { 2310 PetscErrorCode ierr; 2311 2312 PetscFunctionBegin; 2313 PetscValidHeaderSpecific(mat,MAT_CLASSID,1); 2314 PetscValidPointer(indices,2); 2315 ierr = PetscUseMethod(mat,"MatSeqBAIJSetColumnIndices_C",(Mat,PetscInt*),(mat,indices));CHKERRQ(ierr); 2316 PetscFunctionReturn(0); 2317 } 2318 2319 #undef __FUNCT__ 2320 #define __FUNCT__ "MatGetRowMaxAbs_SeqBAIJ" 2321 PetscErrorCode MatGetRowMaxAbs_SeqBAIJ(Mat A,Vec v,PetscInt idx[]) 2322 { 2323 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2324 PetscErrorCode ierr; 2325 PetscInt i,j,n,row,bs,*ai,*aj,mbs; 2326 PetscReal atmp; 2327 PetscScalar *x,zero = 0.0; 2328 MatScalar *aa; 2329 PetscInt ncols,brow,krow,kcol; 2330 2331 PetscFunctionBegin; 2332 if (A->factortype) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 2333 bs = A->rmap->bs; 2334 aa = a->a; 2335 ai = a->i; 2336 aj = a->j; 2337 mbs = a->mbs; 2338 2339 ierr = VecSet(v,zero);CHKERRQ(ierr); 2340 ierr = VecGetArray(v,&x);CHKERRQ(ierr); 2341 ierr = VecGetLocalSize(v,&n);CHKERRQ(ierr); 2342 if (n != A->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Nonconforming matrix and vector"); 2343 for (i=0; i<mbs; i++) { 2344 ncols = ai[1] - ai[0]; ai++; 2345 brow = bs*i; 2346 for (j=0; j<ncols; j++) { 2347 for (kcol=0; kcol<bs; kcol++) { 2348 for (krow=0; krow<bs; krow++) { 2349 atmp = PetscAbsScalar(*aa);aa++; 2350 row = brow + krow; /* row index */ 2351 if (PetscAbsScalar(x[row]) < atmp) {x[row] = atmp; if (idx) idx[row] = bs*(*aj) + kcol;} 2352 } 2353 } 2354 aj++; 2355 } 2356 } 2357 ierr = VecRestoreArray(v,&x);CHKERRQ(ierr); 2358 PetscFunctionReturn(0); 2359 } 2360 2361 #undef __FUNCT__ 2362 #define __FUNCT__ "MatCopy_SeqBAIJ" 2363 PetscErrorCode MatCopy_SeqBAIJ(Mat A,Mat B,MatStructure str) 2364 { 2365 PetscErrorCode ierr; 2366 2367 PetscFunctionBegin; 2368 /* If the two matrices have the same copy implementation, use fast copy. */ 2369 if (str == SAME_NONZERO_PATTERN && (A->ops->copy == B->ops->copy)) { 2370 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2371 Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)B->data; 2372 PetscInt ambs=a->mbs,bmbs=b->mbs,abs=A->rmap->bs,bbs=B->rmap->bs,bs2=abs*abs; 2373 2374 if (a->i[ambs] != b->i[bmbs]) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Number of nonzero blocks in matrices A %D and B %D are different",a->i[ambs],b->i[bmbs]); 2375 if (abs != bbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Block size A %D and B %D are different",abs,bbs); 2376 ierr = PetscMemcpy(b->a,a->a,(bs2*a->i[ambs])*sizeof(PetscScalar));CHKERRQ(ierr); 2377 } else { 2378 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 2379 } 2380 PetscFunctionReturn(0); 2381 } 2382 2383 #undef __FUNCT__ 2384 #define __FUNCT__ "MatSetUp_SeqBAIJ" 2385 PetscErrorCode MatSetUp_SeqBAIJ(Mat A) 2386 { 2387 PetscErrorCode ierr; 2388 2389 PetscFunctionBegin; 2390 ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(A,A->rmap->bs,PETSC_DEFAULT,0);CHKERRQ(ierr); 2391 PetscFunctionReturn(0); 2392 } 2393 2394 #undef __FUNCT__ 2395 #define __FUNCT__ "MatSeqBAIJGetArray_SeqBAIJ" 2396 PetscErrorCode MatSeqBAIJGetArray_SeqBAIJ(Mat A,PetscScalar *array[]) 2397 { 2398 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2399 2400 PetscFunctionBegin; 2401 *array = a->a; 2402 PetscFunctionReturn(0); 2403 } 2404 2405 #undef __FUNCT__ 2406 #define __FUNCT__ "MatSeqBAIJRestoreArray_SeqBAIJ" 2407 PetscErrorCode MatSeqBAIJRestoreArray_SeqBAIJ(Mat A,PetscScalar *array[]) 2408 { 2409 PetscFunctionBegin; 2410 PetscFunctionReturn(0); 2411 } 2412 2413 #undef __FUNCT__ 2414 #define __FUNCT__ "MatAXPYGetPreallocation_SeqBAIJ" 2415 PetscErrorCode MatAXPYGetPreallocation_SeqBAIJ(Mat Y,Mat X,PetscInt *nnz) 2416 { 2417 PetscInt bs = Y->rmap->bs,mbs = Y->rmap->N/bs; 2418 Mat_SeqBAIJ *x = (Mat_SeqBAIJ*)X->data; 2419 Mat_SeqBAIJ *y = (Mat_SeqBAIJ*)Y->data; 2420 PetscErrorCode ierr; 2421 2422 PetscFunctionBegin; 2423 /* Set the number of nonzeros in the new matrix */ 2424 ierr = MatAXPYGetPreallocation_SeqX_private(mbs,x->i,x->j,y->i,y->j,nnz);CHKERRQ(ierr); 2425 PetscFunctionReturn(0); 2426 } 2427 2428 #undef __FUNCT__ 2429 #define __FUNCT__ "MatAXPY_SeqBAIJ" 2430 PetscErrorCode MatAXPY_SeqBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 2431 { 2432 Mat_SeqBAIJ *x = (Mat_SeqBAIJ*)X->data,*y = (Mat_SeqBAIJ*)Y->data; 2433 PetscErrorCode ierr; 2434 PetscInt bs=Y->rmap->bs,bs2=bs*bs; 2435 PetscBLASInt one=1; 2436 2437 PetscFunctionBegin; 2438 if (str == SAME_NONZERO_PATTERN) { 2439 PetscScalar alpha = a; 2440 PetscBLASInt bnz; 2441 ierr = PetscBLASIntCast(x->nz*bs2,&bnz);CHKERRQ(ierr); 2442 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one)); 2443 ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr); 2444 } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */ 2445 ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr); 2446 } else { 2447 Mat B; 2448 PetscInt *nnz; 2449 if (bs != X->rmap->bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrices must have same block size"); 2450 ierr = PetscMalloc1(Y->rmap->N,&nnz);CHKERRQ(ierr); 2451 ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr); 2452 ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr); 2453 ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr); 2454 ierr = MatSetBlockSizesFromMats(B,Y,Y);CHKERRQ(ierr); 2455 ierr = MatSetType(B,(MatType) ((PetscObject)Y)->type_name);CHKERRQ(ierr); 2456 ierr = MatAXPYGetPreallocation_SeqBAIJ(Y,X,nnz);CHKERRQ(ierr); 2457 ierr = MatSeqBAIJSetPreallocation(B,bs,0,nnz);CHKERRQ(ierr); 2458 ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr); 2459 ierr = MatHeaderReplace(Y,&B);CHKERRQ(ierr); 2460 ierr = PetscFree(nnz);CHKERRQ(ierr); 2461 } 2462 PetscFunctionReturn(0); 2463 } 2464 2465 #undef __FUNCT__ 2466 #define __FUNCT__ "MatRealPart_SeqBAIJ" 2467 PetscErrorCode MatRealPart_SeqBAIJ(Mat A) 2468 { 2469 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2470 PetscInt i,nz = a->bs2*a->i[a->mbs]; 2471 MatScalar *aa = a->a; 2472 2473 PetscFunctionBegin; 2474 for (i=0; i<nz; i++) aa[i] = PetscRealPart(aa[i]); 2475 PetscFunctionReturn(0); 2476 } 2477 2478 #undef __FUNCT__ 2479 #define __FUNCT__ "MatImaginaryPart_SeqBAIJ" 2480 PetscErrorCode MatImaginaryPart_SeqBAIJ(Mat A) 2481 { 2482 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2483 PetscInt i,nz = a->bs2*a->i[a->mbs]; 2484 MatScalar *aa = a->a; 2485 2486 PetscFunctionBegin; 2487 for (i=0; i<nz; i++) aa[i] = PetscImaginaryPart(aa[i]); 2488 PetscFunctionReturn(0); 2489 } 2490 2491 #undef __FUNCT__ 2492 #define __FUNCT__ "MatGetColumnIJ_SeqBAIJ" 2493 /* 2494 Code almost idential to MatGetColumnIJ_SeqAIJ() should share common code 2495 */ 2496 PetscErrorCode MatGetColumnIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *nn,const PetscInt *ia[],const PetscInt *ja[],PetscBool *done) 2497 { 2498 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2499 PetscErrorCode ierr; 2500 PetscInt bs = A->rmap->bs,i,*collengths,*cia,*cja,n = A->cmap->n/bs,m = A->rmap->n/bs; 2501 PetscInt nz = a->i[m],row,*jj,mr,col; 2502 2503 PetscFunctionBegin; 2504 *nn = n; 2505 if (!ia) PetscFunctionReturn(0); 2506 if (symmetric) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not for BAIJ matrices"); 2507 else { 2508 ierr = PetscCalloc1(n+1,&collengths);CHKERRQ(ierr); 2509 ierr = PetscMalloc1(n+1,&cia);CHKERRQ(ierr); 2510 ierr = PetscMalloc1(nz+1,&cja);CHKERRQ(ierr); 2511 jj = a->j; 2512 for (i=0; i<nz; i++) { 2513 collengths[jj[i]]++; 2514 } 2515 cia[0] = oshift; 2516 for (i=0; i<n; i++) { 2517 cia[i+1] = cia[i] + collengths[i]; 2518 } 2519 ierr = PetscMemzero(collengths,n*sizeof(PetscInt));CHKERRQ(ierr); 2520 jj = a->j; 2521 for (row=0; row<m; row++) { 2522 mr = a->i[row+1] - a->i[row]; 2523 for (i=0; i<mr; i++) { 2524 col = *jj++; 2525 2526 cja[cia[col] + collengths[col]++ - oshift] = row + oshift; 2527 } 2528 } 2529 ierr = PetscFree(collengths);CHKERRQ(ierr); 2530 *ia = cia; *ja = cja; 2531 } 2532 PetscFunctionReturn(0); 2533 } 2534 2535 #undef __FUNCT__ 2536 #define __FUNCT__ "MatRestoreColumnIJ_SeqBAIJ" 2537 PetscErrorCode MatRestoreColumnIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *n,const PetscInt *ia[],const PetscInt *ja[],PetscBool *done) 2538 { 2539 PetscErrorCode ierr; 2540 2541 PetscFunctionBegin; 2542 if (!ia) PetscFunctionReturn(0); 2543 ierr = PetscFree(*ia);CHKERRQ(ierr); 2544 ierr = PetscFree(*ja);CHKERRQ(ierr); 2545 PetscFunctionReturn(0); 2546 } 2547 2548 /* 2549 MatGetColumnIJ_SeqBAIJ_Color() and MatRestoreColumnIJ_SeqBAIJ_Color() are customized from 2550 MatGetColumnIJ_SeqBAIJ() and MatRestoreColumnIJ_SeqBAIJ() by adding an output 2551 spidx[], index of a->a, to be used in MatTransposeColoringCreate() and MatFDColoringCreate() 2552 */ 2553 #undef __FUNCT__ 2554 #define __FUNCT__ "MatGetColumnIJ_SeqBAIJ_Color" 2555 PetscErrorCode MatGetColumnIJ_SeqBAIJ_Color(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *nn,const PetscInt *ia[],const PetscInt *ja[],PetscInt *spidx[],PetscBool *done) 2556 { 2557 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2558 PetscErrorCode ierr; 2559 PetscInt i,*collengths,*cia,*cja,n=a->nbs,m=a->mbs; 2560 PetscInt nz = a->i[m],row,*jj,mr,col; 2561 PetscInt *cspidx; 2562 2563 PetscFunctionBegin; 2564 *nn = n; 2565 if (!ia) PetscFunctionReturn(0); 2566 2567 ierr = PetscCalloc1(n+1,&collengths);CHKERRQ(ierr); 2568 ierr = PetscMalloc1(n+1,&cia);CHKERRQ(ierr); 2569 ierr = PetscMalloc1(nz+1,&cja);CHKERRQ(ierr); 2570 ierr = PetscMalloc1(nz+1,&cspidx);CHKERRQ(ierr); 2571 jj = a->j; 2572 for (i=0; i<nz; i++) { 2573 collengths[jj[i]]++; 2574 } 2575 cia[0] = oshift; 2576 for (i=0; i<n; i++) { 2577 cia[i+1] = cia[i] + collengths[i]; 2578 } 2579 ierr = PetscMemzero(collengths,n*sizeof(PetscInt));CHKERRQ(ierr); 2580 jj = a->j; 2581 for (row=0; row<m; row++) { 2582 mr = a->i[row+1] - a->i[row]; 2583 for (i=0; i<mr; i++) { 2584 col = *jj++; 2585 cspidx[cia[col] + collengths[col] - oshift] = a->i[row] + i; /* index of a->j */ 2586 cja[cia[col] + collengths[col]++ - oshift] = row + oshift; 2587 } 2588 } 2589 ierr = PetscFree(collengths);CHKERRQ(ierr); 2590 *ia = cia; *ja = cja; 2591 *spidx = cspidx; 2592 PetscFunctionReturn(0); 2593 } 2594 2595 #undef __FUNCT__ 2596 #define __FUNCT__ "MatRestoreColumnIJ_SeqBAIJ_Color" 2597 PetscErrorCode MatRestoreColumnIJ_SeqBAIJ_Color(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *n,const PetscInt *ia[],const PetscInt *ja[],PetscInt *spidx[],PetscBool *done) 2598 { 2599 PetscErrorCode ierr; 2600 2601 PetscFunctionBegin; 2602 ierr = MatRestoreColumnIJ_SeqBAIJ(A,oshift,symmetric,inodecompressed,n,ia,ja,done);CHKERRQ(ierr); 2603 ierr = PetscFree(*spidx);CHKERRQ(ierr); 2604 PetscFunctionReturn(0); 2605 } 2606 2607 #undef __FUNCT__ 2608 #define __FUNCT__ "MatShift_SeqBAIJ" 2609 PetscErrorCode MatShift_SeqBAIJ(Mat Y,PetscScalar a) 2610 { 2611 PetscErrorCode ierr; 2612 Mat_SeqBAIJ *aij = (Mat_SeqBAIJ*)Y->data; 2613 2614 PetscFunctionBegin; 2615 if (!Y->preallocated || !aij->nz) { 2616 ierr = MatSeqBAIJSetPreallocation(Y,Y->rmap->bs,1,NULL);CHKERRQ(ierr); 2617 } 2618 ierr = MatShift_Basic(Y,a);CHKERRQ(ierr); 2619 PetscFunctionReturn(0); 2620 } 2621 2622 /* -------------------------------------------------------------------*/ 2623 static struct _MatOps MatOps_Values = {MatSetValues_SeqBAIJ, 2624 MatGetRow_SeqBAIJ, 2625 MatRestoreRow_SeqBAIJ, 2626 MatMult_SeqBAIJ_N, 2627 /* 4*/ MatMultAdd_SeqBAIJ_N, 2628 MatMultTranspose_SeqBAIJ, 2629 MatMultTransposeAdd_SeqBAIJ, 2630 0, 2631 0, 2632 0, 2633 /* 10*/ 0, 2634 MatLUFactor_SeqBAIJ, 2635 0, 2636 0, 2637 MatTranspose_SeqBAIJ, 2638 /* 15*/ MatGetInfo_SeqBAIJ, 2639 MatEqual_SeqBAIJ, 2640 MatGetDiagonal_SeqBAIJ, 2641 MatDiagonalScale_SeqBAIJ, 2642 MatNorm_SeqBAIJ, 2643 /* 20*/ 0, 2644 MatAssemblyEnd_SeqBAIJ, 2645 MatSetOption_SeqBAIJ, 2646 MatZeroEntries_SeqBAIJ, 2647 /* 24*/ MatZeroRows_SeqBAIJ, 2648 0, 2649 0, 2650 0, 2651 0, 2652 /* 29*/ MatSetUp_SeqBAIJ, 2653 0, 2654 0, 2655 0, 2656 0, 2657 /* 34*/ MatDuplicate_SeqBAIJ, 2658 0, 2659 0, 2660 MatILUFactor_SeqBAIJ, 2661 0, 2662 /* 39*/ MatAXPY_SeqBAIJ, 2663 MatGetSubMatrices_SeqBAIJ, 2664 MatIncreaseOverlap_SeqBAIJ, 2665 MatGetValues_SeqBAIJ, 2666 MatCopy_SeqBAIJ, 2667 /* 44*/ 0, 2668 MatScale_SeqBAIJ, 2669 MatShift_SeqBAIJ, 2670 0, 2671 MatZeroRowsColumns_SeqBAIJ, 2672 /* 49*/ 0, 2673 MatGetRowIJ_SeqBAIJ, 2674 MatRestoreRowIJ_SeqBAIJ, 2675 MatGetColumnIJ_SeqBAIJ, 2676 MatRestoreColumnIJ_SeqBAIJ, 2677 /* 54*/ MatFDColoringCreate_SeqXAIJ, 2678 0, 2679 0, 2680 0, 2681 MatSetValuesBlocked_SeqBAIJ, 2682 /* 59*/ MatGetSubMatrix_SeqBAIJ, 2683 MatDestroy_SeqBAIJ, 2684 MatView_SeqBAIJ, 2685 0, 2686 0, 2687 /* 64*/ 0, 2688 0, 2689 0, 2690 0, 2691 0, 2692 /* 69*/ MatGetRowMaxAbs_SeqBAIJ, 2693 0, 2694 MatConvert_Basic, 2695 0, 2696 0, 2697 /* 74*/ 0, 2698 MatFDColoringApply_BAIJ, 2699 0, 2700 0, 2701 0, 2702 /* 79*/ 0, 2703 0, 2704 0, 2705 0, 2706 MatLoad_SeqBAIJ, 2707 /* 84*/ 0, 2708 0, 2709 0, 2710 0, 2711 0, 2712 /* 89*/ 0, 2713 0, 2714 0, 2715 0, 2716 0, 2717 /* 94*/ 0, 2718 0, 2719 0, 2720 0, 2721 0, 2722 /* 99*/ 0, 2723 0, 2724 0, 2725 0, 2726 0, 2727 /*104*/ 0, 2728 MatRealPart_SeqBAIJ, 2729 MatImaginaryPart_SeqBAIJ, 2730 0, 2731 0, 2732 /*109*/ 0, 2733 0, 2734 0, 2735 0, 2736 MatMissingDiagonal_SeqBAIJ, 2737 /*114*/ 0, 2738 0, 2739 0, 2740 0, 2741 0, 2742 /*119*/ 0, 2743 0, 2744 MatMultHermitianTranspose_SeqBAIJ, 2745 MatMultHermitianTransposeAdd_SeqBAIJ, 2746 0, 2747 /*124*/ 0, 2748 0, 2749 MatInvertBlockDiagonal_SeqBAIJ, 2750 0, 2751 0, 2752 /*129*/ 0, 2753 0, 2754 0, 2755 0, 2756 0, 2757 /*134*/ 0, 2758 0, 2759 0, 2760 0, 2761 0, 2762 /*139*/ 0, 2763 0, 2764 0, 2765 MatFDColoringSetUp_SeqXAIJ, 2766 0, 2767 /*144*/MatCreateMPIMatConcatenateSeqMat_SeqBAIJ 2768 }; 2769 2770 #undef __FUNCT__ 2771 #define __FUNCT__ "MatStoreValues_SeqBAIJ" 2772 PetscErrorCode MatStoreValues_SeqBAIJ(Mat mat) 2773 { 2774 Mat_SeqBAIJ *aij = (Mat_SeqBAIJ*)mat->data; 2775 PetscInt nz = aij->i[aij->mbs]*aij->bs2; 2776 PetscErrorCode ierr; 2777 2778 PetscFunctionBegin; 2779 if (aij->nonew != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first"); 2780 2781 /* allocate space for values if not already there */ 2782 if (!aij->saved_values) { 2783 ierr = PetscMalloc1(nz+1,&aij->saved_values);CHKERRQ(ierr); 2784 ierr = PetscLogObjectMemory((PetscObject)mat,(nz+1)*sizeof(PetscScalar));CHKERRQ(ierr); 2785 } 2786 2787 /* copy values over */ 2788 ierr = PetscMemcpy(aij->saved_values,aij->a,nz*sizeof(PetscScalar));CHKERRQ(ierr); 2789 PetscFunctionReturn(0); 2790 } 2791 2792 #undef __FUNCT__ 2793 #define __FUNCT__ "MatRetrieveValues_SeqBAIJ" 2794 PetscErrorCode MatRetrieveValues_SeqBAIJ(Mat mat) 2795 { 2796 Mat_SeqBAIJ *aij = (Mat_SeqBAIJ*)mat->data; 2797 PetscErrorCode ierr; 2798 PetscInt nz = aij->i[aij->mbs]*aij->bs2; 2799 2800 PetscFunctionBegin; 2801 if (aij->nonew != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first"); 2802 if (!aij->saved_values) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatStoreValues(A);first"); 2803 2804 /* copy values over */ 2805 ierr = PetscMemcpy(aij->a,aij->saved_values,nz*sizeof(PetscScalar));CHKERRQ(ierr); 2806 PetscFunctionReturn(0); 2807 } 2808 2809 PETSC_EXTERN PetscErrorCode MatConvert_SeqBAIJ_SeqAIJ(Mat, MatType,MatReuse,Mat*); 2810 PETSC_EXTERN PetscErrorCode MatConvert_SeqBAIJ_SeqSBAIJ(Mat, MatType,MatReuse,Mat*); 2811 2812 #undef __FUNCT__ 2813 #define __FUNCT__ "MatSeqBAIJSetPreallocation_SeqBAIJ" 2814 PetscErrorCode MatSeqBAIJSetPreallocation_SeqBAIJ(Mat B,PetscInt bs,PetscInt nz,PetscInt *nnz) 2815 { 2816 Mat_SeqBAIJ *b; 2817 PetscErrorCode ierr; 2818 PetscInt i,mbs,nbs,bs2; 2819 PetscBool flg = PETSC_FALSE,skipallocation = PETSC_FALSE,realalloc = PETSC_FALSE; 2820 2821 PetscFunctionBegin; 2822 if (nz >= 0 || nnz) realalloc = PETSC_TRUE; 2823 if (nz == MAT_SKIP_ALLOCATION) { 2824 skipallocation = PETSC_TRUE; 2825 nz = 0; 2826 } 2827 2828 ierr = MatSetBlockSize(B,PetscAbs(bs));CHKERRQ(ierr); 2829 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 2830 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 2831 ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr); 2832 2833 B->preallocated = PETSC_TRUE; 2834 2835 mbs = B->rmap->n/bs; 2836 nbs = B->cmap->n/bs; 2837 bs2 = bs*bs; 2838 2839 if (mbs*bs!=B->rmap->n || nbs*bs!=B->cmap->n) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Number rows %D, cols %D must be divisible by blocksize %D",B->rmap->N,B->cmap->n,bs); 2840 2841 if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5; 2842 if (nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nz cannot be less than 0: value %D",nz); 2843 if (nnz) { 2844 for (i=0; i<mbs; i++) { 2845 if (nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be less than 0: local row %D value %D",i,nnz[i]); 2846 if (nnz[i] > nbs) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be greater than block row length: local row %D value %D rowlength %D",i,nnz[i],nbs); 2847 } 2848 } 2849 2850 b = (Mat_SeqBAIJ*)B->data; 2851 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)B),NULL,"Optimize options for SEQBAIJ matrix 2 ","Mat");CHKERRQ(ierr); 2852 ierr = PetscOptionsBool("-mat_no_unroll","Do not optimize for block size (slow)",NULL,flg,&flg,NULL);CHKERRQ(ierr); 2853 ierr = PetscOptionsEnd();CHKERRQ(ierr); 2854 2855 if (!flg) { 2856 switch (bs) { 2857 case 1: 2858 B->ops->mult = MatMult_SeqBAIJ_1; 2859 B->ops->multadd = MatMultAdd_SeqBAIJ_1; 2860 break; 2861 case 2: 2862 B->ops->mult = MatMult_SeqBAIJ_2; 2863 B->ops->multadd = MatMultAdd_SeqBAIJ_2; 2864 break; 2865 case 3: 2866 B->ops->mult = MatMult_SeqBAIJ_3; 2867 B->ops->multadd = MatMultAdd_SeqBAIJ_3; 2868 break; 2869 case 4: 2870 B->ops->mult = MatMult_SeqBAIJ_4; 2871 B->ops->multadd = MatMultAdd_SeqBAIJ_4; 2872 break; 2873 case 5: 2874 B->ops->mult = MatMult_SeqBAIJ_5; 2875 B->ops->multadd = MatMultAdd_SeqBAIJ_5; 2876 break; 2877 case 6: 2878 B->ops->mult = MatMult_SeqBAIJ_6; 2879 B->ops->multadd = MatMultAdd_SeqBAIJ_6; 2880 break; 2881 case 7: 2882 B->ops->mult = MatMult_SeqBAIJ_7; 2883 B->ops->multadd = MatMultAdd_SeqBAIJ_7; 2884 break; 2885 case 15: 2886 B->ops->mult = MatMult_SeqBAIJ_15_ver1; 2887 B->ops->multadd = MatMultAdd_SeqBAIJ_N; 2888 break; 2889 default: 2890 B->ops->mult = MatMult_SeqBAIJ_N; 2891 B->ops->multadd = MatMultAdd_SeqBAIJ_N; 2892 break; 2893 } 2894 } 2895 B->ops->sor = MatSOR_SeqBAIJ; 2896 b->mbs = mbs; 2897 b->nbs = nbs; 2898 if (!skipallocation) { 2899 if (!b->imax) { 2900 ierr = PetscMalloc2(mbs,&b->imax,mbs,&b->ilen);CHKERRQ(ierr); 2901 ierr = PetscLogObjectMemory((PetscObject)B,2*mbs*sizeof(PetscInt));CHKERRQ(ierr); 2902 2903 b->free_imax_ilen = PETSC_TRUE; 2904 } 2905 /* b->ilen will count nonzeros in each block row so far. */ 2906 for (i=0; i<mbs; i++) b->ilen[i] = 0; 2907 if (!nnz) { 2908 if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5; 2909 else if (nz < 0) nz = 1; 2910 for (i=0; i<mbs; i++) b->imax[i] = nz; 2911 nz = nz*mbs; 2912 } else { 2913 nz = 0; 2914 for (i=0; i<mbs; i++) {b->imax[i] = nnz[i]; nz += nnz[i];} 2915 } 2916 2917 /* allocate the matrix space */ 2918 ierr = MatSeqXAIJFreeAIJ(B,&b->a,&b->j,&b->i);CHKERRQ(ierr); 2919 ierr = PetscMalloc3(bs2*nz,&b->a,nz,&b->j,B->rmap->N+1,&b->i);CHKERRQ(ierr); 2920 ierr = PetscLogObjectMemory((PetscObject)B,(B->rmap->N+1)*sizeof(PetscInt)+nz*(bs2*sizeof(PetscScalar)+sizeof(PetscInt)));CHKERRQ(ierr); 2921 ierr = PetscMemzero(b->a,nz*bs2*sizeof(MatScalar));CHKERRQ(ierr); 2922 ierr = PetscMemzero(b->j,nz*sizeof(PetscInt));CHKERRQ(ierr); 2923 2924 b->singlemalloc = PETSC_TRUE; 2925 b->i[0] = 0; 2926 for (i=1; i<mbs+1; i++) { 2927 b->i[i] = b->i[i-1] + b->imax[i-1]; 2928 } 2929 b->free_a = PETSC_TRUE; 2930 b->free_ij = PETSC_TRUE; 2931 } else { 2932 b->free_a = PETSC_FALSE; 2933 b->free_ij = PETSC_FALSE; 2934 } 2935 2936 b->bs2 = bs2; 2937 b->mbs = mbs; 2938 b->nz = 0; 2939 b->maxnz = nz; 2940 B->info.nz_unneeded = (PetscReal)b->maxnz*bs2; 2941 if (realalloc) {ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);} 2942 PetscFunctionReturn(0); 2943 } 2944 2945 #undef __FUNCT__ 2946 #define __FUNCT__ "MatSeqBAIJSetPreallocationCSR_SeqBAIJ" 2947 PetscErrorCode MatSeqBAIJSetPreallocationCSR_SeqBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[]) 2948 { 2949 PetscInt i,m,nz,nz_max=0,*nnz; 2950 PetscScalar *values=0; 2951 PetscBool roworiented = ((Mat_SeqBAIJ*)B->data)->roworiented; 2952 PetscErrorCode ierr; 2953 2954 PetscFunctionBegin; 2955 if (bs < 1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive but it is %D",bs); 2956 ierr = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr); 2957 ierr = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr); 2958 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 2959 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 2960 ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr); 2961 m = B->rmap->n/bs; 2962 2963 if (ii[0] != 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE, "ii[0] must be 0 but it is %D",ii[0]); 2964 ierr = PetscMalloc1(m+1, &nnz);CHKERRQ(ierr); 2965 for (i=0; i<m; i++) { 2966 nz = ii[i+1]- ii[i]; 2967 if (nz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE, "Local row %D has a negative number of columns %D",i,nz); 2968 nz_max = PetscMax(nz_max, nz); 2969 nnz[i] = nz; 2970 } 2971 ierr = MatSeqBAIJSetPreallocation(B,bs,0,nnz);CHKERRQ(ierr); 2972 ierr = PetscFree(nnz);CHKERRQ(ierr); 2973 2974 values = (PetscScalar*)V; 2975 if (!values) { 2976 ierr = PetscCalloc1(bs*bs*(nz_max+1),&values);CHKERRQ(ierr); 2977 } 2978 for (i=0; i<m; i++) { 2979 PetscInt ncols = ii[i+1] - ii[i]; 2980 const PetscInt *icols = jj + ii[i]; 2981 const PetscScalar *svals = values + (V ? (bs*bs*ii[i]) : 0); 2982 if (!roworiented) { 2983 ierr = MatSetValuesBlocked_SeqBAIJ(B,1,&i,ncols,icols,svals,INSERT_VALUES);CHKERRQ(ierr); 2984 } else { 2985 PetscInt j; 2986 for (j=0; j<ncols; j++) { 2987 const PetscScalar *svals = values + (V ? (bs*bs*(ii[i]+j)) : 0); 2988 ierr = MatSetValuesBlocked_SeqBAIJ(B,1,&i,1,&icols[j],svals,INSERT_VALUES);CHKERRQ(ierr); 2989 } 2990 } 2991 } 2992 if (!V) { ierr = PetscFree(values);CHKERRQ(ierr); } 2993 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2994 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2995 ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 2996 PetscFunctionReturn(0); 2997 } 2998 2999 /*MC 3000 MATSEQBAIJ - MATSEQBAIJ = "seqbaij" - A matrix type to be used for sequential block sparse matrices, based on 3001 block sparse compressed row format. 3002 3003 Options Database Keys: 3004 . -mat_type seqbaij - sets the matrix type to "seqbaij" during a call to MatSetFromOptions() 3005 3006 Level: beginner 3007 3008 .seealso: MatCreateSeqBAIJ() 3009 M*/ 3010 3011 PETSC_EXTERN PetscErrorCode MatConvert_SeqBAIJ_SeqBSTRM(Mat, MatType,MatReuse,Mat*); 3012 3013 #undef __FUNCT__ 3014 #define __FUNCT__ "MatCreate_SeqBAIJ" 3015 PETSC_EXTERN PetscErrorCode MatCreate_SeqBAIJ(Mat B) 3016 { 3017 PetscErrorCode ierr; 3018 PetscMPIInt size; 3019 Mat_SeqBAIJ *b; 3020 3021 PetscFunctionBegin; 3022 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRQ(ierr); 3023 if (size > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Comm must be of size 1"); 3024 3025 ierr = PetscNewLog(B,&b);CHKERRQ(ierr); 3026 B->data = (void*)b; 3027 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 3028 3029 b->row = 0; 3030 b->col = 0; 3031 b->icol = 0; 3032 b->reallocs = 0; 3033 b->saved_values = 0; 3034 3035 b->roworiented = PETSC_TRUE; 3036 b->nonew = 0; 3037 b->diag = 0; 3038 B->spptr = 0; 3039 B->info.nz_unneeded = (PetscReal)b->maxnz*b->bs2; 3040 b->keepnonzeropattern = PETSC_FALSE; 3041 3042 ierr = PetscObjectComposeFunction((PetscObject)B,"MatInvertBlockDiagonal_C",MatInvertBlockDiagonal_SeqBAIJ);CHKERRQ(ierr); 3043 ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_SeqBAIJ);CHKERRQ(ierr); 3044 ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_SeqBAIJ);CHKERRQ(ierr); 3045 ierr = PetscObjectComposeFunction((PetscObject)B,"MatSeqBAIJSetColumnIndices_C",MatSeqBAIJSetColumnIndices_SeqBAIJ);CHKERRQ(ierr); 3046 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqbaij_seqaij_C",MatConvert_SeqBAIJ_SeqAIJ);CHKERRQ(ierr); 3047 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqbaij_seqsbaij_C",MatConvert_SeqBAIJ_SeqSBAIJ);CHKERRQ(ierr); 3048 ierr = PetscObjectComposeFunction((PetscObject)B,"MatSeqBAIJSetPreallocation_C",MatSeqBAIJSetPreallocation_SeqBAIJ);CHKERRQ(ierr); 3049 ierr = PetscObjectComposeFunction((PetscObject)B,"MatSeqBAIJSetPreallocationCSR_C",MatSeqBAIJSetPreallocationCSR_SeqBAIJ);CHKERRQ(ierr); 3050 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqbaij_seqbstrm_C",MatConvert_SeqBAIJ_SeqBSTRM);CHKERRQ(ierr); 3051 ierr = PetscObjectComposeFunction((PetscObject)B,"MatIsTranspose_C",MatIsTranspose_SeqBAIJ);CHKERRQ(ierr); 3052 ierr = PetscObjectChangeTypeName((PetscObject)B,MATSEQBAIJ);CHKERRQ(ierr); 3053 PetscFunctionReturn(0); 3054 } 3055 3056 #undef __FUNCT__ 3057 #define __FUNCT__ "MatDuplicateNoCreate_SeqBAIJ" 3058 PetscErrorCode MatDuplicateNoCreate_SeqBAIJ(Mat C,Mat A,MatDuplicateOption cpvalues,PetscBool mallocmatspace) 3059 { 3060 Mat_SeqBAIJ *c = (Mat_SeqBAIJ*)C->data,*a = (Mat_SeqBAIJ*)A->data; 3061 PetscErrorCode ierr; 3062 PetscInt i,mbs = a->mbs,nz = a->nz,bs2 = a->bs2; 3063 3064 PetscFunctionBegin; 3065 if (a->i[mbs] != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Corrupt matrix"); 3066 3067 if (cpvalues == MAT_SHARE_NONZERO_PATTERN) { 3068 c->imax = a->imax; 3069 c->ilen = a->ilen; 3070 c->free_imax_ilen = PETSC_FALSE; 3071 } else { 3072 ierr = PetscMalloc2(mbs,&c->imax,mbs,&c->ilen);CHKERRQ(ierr); 3073 ierr = PetscLogObjectMemory((PetscObject)C,2*mbs*sizeof(PetscInt));CHKERRQ(ierr); 3074 for (i=0; i<mbs; i++) { 3075 c->imax[i] = a->imax[i]; 3076 c->ilen[i] = a->ilen[i]; 3077 } 3078 c->free_imax_ilen = PETSC_TRUE; 3079 } 3080 3081 /* allocate the matrix space */ 3082 if (mallocmatspace) { 3083 if (cpvalues == MAT_SHARE_NONZERO_PATTERN) { 3084 ierr = PetscCalloc1(bs2*nz,&c->a);CHKERRQ(ierr); 3085 ierr = PetscLogObjectMemory((PetscObject)C,a->i[mbs]*bs2*sizeof(PetscScalar));CHKERRQ(ierr); 3086 3087 c->i = a->i; 3088 c->j = a->j; 3089 c->singlemalloc = PETSC_FALSE; 3090 c->free_a = PETSC_TRUE; 3091 c->free_ij = PETSC_FALSE; 3092 c->parent = A; 3093 C->preallocated = PETSC_TRUE; 3094 C->assembled = PETSC_TRUE; 3095 3096 ierr = PetscObjectReference((PetscObject)A);CHKERRQ(ierr); 3097 ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 3098 ierr = MatSetOption(C,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 3099 } else { 3100 ierr = PetscMalloc3(bs2*nz,&c->a,nz,&c->j,mbs+1,&c->i);CHKERRQ(ierr); 3101 ierr = PetscLogObjectMemory((PetscObject)C,a->i[mbs]*(bs2*sizeof(PetscScalar)+sizeof(PetscInt))+(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 3102 3103 c->singlemalloc = PETSC_TRUE; 3104 c->free_a = PETSC_TRUE; 3105 c->free_ij = PETSC_TRUE; 3106 3107 ierr = PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 3108 if (mbs > 0) { 3109 ierr = PetscMemcpy(c->j,a->j,nz*sizeof(PetscInt));CHKERRQ(ierr); 3110 if (cpvalues == MAT_COPY_VALUES) { 3111 ierr = PetscMemcpy(c->a,a->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr); 3112 } else { 3113 ierr = PetscMemzero(c->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr); 3114 } 3115 } 3116 C->preallocated = PETSC_TRUE; 3117 C->assembled = PETSC_TRUE; 3118 } 3119 } 3120 3121 c->roworiented = a->roworiented; 3122 c->nonew = a->nonew; 3123 3124 ierr = PetscLayoutReference(A->rmap,&C->rmap);CHKERRQ(ierr); 3125 ierr = PetscLayoutReference(A->cmap,&C->cmap);CHKERRQ(ierr); 3126 3127 c->bs2 = a->bs2; 3128 c->mbs = a->mbs; 3129 c->nbs = a->nbs; 3130 3131 if (a->diag) { 3132 if (cpvalues == MAT_SHARE_NONZERO_PATTERN) { 3133 c->diag = a->diag; 3134 c->free_diag = PETSC_FALSE; 3135 } else { 3136 ierr = PetscMalloc1(mbs+1,&c->diag);CHKERRQ(ierr); 3137 ierr = PetscLogObjectMemory((PetscObject)C,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 3138 for (i=0; i<mbs; i++) c->diag[i] = a->diag[i]; 3139 c->free_diag = PETSC_TRUE; 3140 } 3141 } else c->diag = 0; 3142 3143 c->nz = a->nz; 3144 c->maxnz = a->nz; /* Since we allocate exactly the right amount */ 3145 c->solve_work = NULL; 3146 c->mult_work = NULL; 3147 c->sor_workt = NULL; 3148 c->sor_work = NULL; 3149 3150 c->compressedrow.use = a->compressedrow.use; 3151 c->compressedrow.nrows = a->compressedrow.nrows; 3152 if (a->compressedrow.use) { 3153 i = a->compressedrow.nrows; 3154 ierr = PetscMalloc2(i+1,&c->compressedrow.i,i+1,&c->compressedrow.rindex);CHKERRQ(ierr); 3155 ierr = PetscLogObjectMemory((PetscObject)C,(2*i+1)*sizeof(PetscInt));CHKERRQ(ierr); 3156 ierr = PetscMemcpy(c->compressedrow.i,a->compressedrow.i,(i+1)*sizeof(PetscInt));CHKERRQ(ierr); 3157 ierr = PetscMemcpy(c->compressedrow.rindex,a->compressedrow.rindex,i*sizeof(PetscInt));CHKERRQ(ierr); 3158 } else { 3159 c->compressedrow.use = PETSC_FALSE; 3160 c->compressedrow.i = NULL; 3161 c->compressedrow.rindex = NULL; 3162 } 3163 C->nonzerostate = A->nonzerostate; 3164 3165 ierr = PetscFunctionListDuplicate(((PetscObject)A)->qlist,&((PetscObject)C)->qlist);CHKERRQ(ierr); 3166 ierr = PetscMemcpy(C->ops,A->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 3167 PetscFunctionReturn(0); 3168 } 3169 3170 #undef __FUNCT__ 3171 #define __FUNCT__ "MatDuplicate_SeqBAIJ" 3172 PetscErrorCode MatDuplicate_SeqBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B) 3173 { 3174 PetscErrorCode ierr; 3175 3176 PetscFunctionBegin; 3177 ierr = MatCreate(PetscObjectComm((PetscObject)A),B);CHKERRQ(ierr); 3178 ierr = MatSetSizes(*B,A->rmap->N,A->cmap->n,A->rmap->N,A->cmap->n);CHKERRQ(ierr); 3179 ierr = MatSetType(*B,MATSEQBAIJ);CHKERRQ(ierr); 3180 ierr = MatDuplicateNoCreate_SeqBAIJ(*B,A,cpvalues,PETSC_TRUE);CHKERRQ(ierr); 3181 PetscFunctionReturn(0); 3182 } 3183 3184 #undef __FUNCT__ 3185 #define __FUNCT__ "MatLoad_SeqBAIJ" 3186 PetscErrorCode MatLoad_SeqBAIJ(Mat newmat,PetscViewer viewer) 3187 { 3188 Mat_SeqBAIJ *a; 3189 PetscErrorCode ierr; 3190 PetscInt i,nz,header[4],*rowlengths=0,M,N,bs = newmat->rmap->bs; 3191 PetscInt *mask,mbs,*jj,j,rowcount,nzcount,k,*browlengths,maskcount; 3192 PetscInt kmax,jcount,block,idx,point,nzcountb,extra_rows,rows,cols; 3193 PetscInt *masked,nmask,tmp,bs2,ishift; 3194 PetscMPIInt size; 3195 int fd; 3196 PetscScalar *aa; 3197 MPI_Comm comm; 3198 3199 PetscFunctionBegin; 3200 /* force binary viewer to load .info file if it has not yet done so */ 3201 ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr); 3202 ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr); 3203 ierr = PetscOptionsBegin(comm,NULL,"Options for loading SEQBAIJ matrix","Mat");CHKERRQ(ierr); 3204 ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,NULL);CHKERRQ(ierr); 3205 ierr = PetscOptionsEnd();CHKERRQ(ierr); 3206 if (bs < 0) bs = 1; 3207 bs2 = bs*bs; 3208 3209 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3210 if (size > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"view must have one processor"); 3211 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 3212 ierr = PetscBinaryRead(fd,header,4,PETSC_INT);CHKERRQ(ierr); 3213 if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not Mat object"); 3214 M = header[1]; N = header[2]; nz = header[3]; 3215 3216 if (header[3] < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as SeqBAIJ"); 3217 if (M != N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Can only do square matrices"); 3218 3219 /* 3220 This code adds extra rows to make sure the number of rows is 3221 divisible by the blocksize 3222 */ 3223 mbs = M/bs; 3224 extra_rows = bs - M + bs*(mbs); 3225 if (extra_rows == bs) extra_rows = 0; 3226 else mbs++; 3227 if (extra_rows) { 3228 ierr = PetscInfo(viewer,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr); 3229 } 3230 3231 /* Set global sizes if not already set */ 3232 if (newmat->rmap->n < 0 && newmat->rmap->N < 0 && newmat->cmap->n < 0 && newmat->cmap->N < 0) { 3233 ierr = MatSetSizes(newmat,PETSC_DECIDE,PETSC_DECIDE,M+extra_rows,N+extra_rows);CHKERRQ(ierr); 3234 } else { /* Check if the matrix global sizes are correct */ 3235 ierr = MatGetSize(newmat,&rows,&cols);CHKERRQ(ierr); 3236 if (rows < 0 && cols < 0) { /* user might provide local size instead of global size */ 3237 ierr = MatGetLocalSize(newmat,&rows,&cols);CHKERRQ(ierr); 3238 } 3239 if (M != rows || N != cols) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix in file of different length (%d, %d) than the input matrix (%d, %d)",M,N,rows,cols); 3240 } 3241 3242 /* read in row lengths */ 3243 ierr = PetscMalloc1(M+extra_rows,&rowlengths);CHKERRQ(ierr); 3244 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 3245 for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1; 3246 3247 /* read in column indices */ 3248 ierr = PetscMalloc1(nz+extra_rows,&jj);CHKERRQ(ierr); 3249 ierr = PetscBinaryRead(fd,jj,nz,PETSC_INT);CHKERRQ(ierr); 3250 for (i=0; i<extra_rows; i++) jj[nz+i] = M+i; 3251 3252 /* loop over row lengths determining block row lengths */ 3253 ierr = PetscCalloc1(mbs,&browlengths);CHKERRQ(ierr); 3254 ierr = PetscMalloc2(mbs,&mask,mbs,&masked);CHKERRQ(ierr); 3255 ierr = PetscMemzero(mask,mbs*sizeof(PetscInt));CHKERRQ(ierr); 3256 rowcount = 0; 3257 nzcount = 0; 3258 for (i=0; i<mbs; i++) { 3259 nmask = 0; 3260 for (j=0; j<bs; j++) { 3261 kmax = rowlengths[rowcount]; 3262 for (k=0; k<kmax; k++) { 3263 tmp = jj[nzcount++]/bs; 3264 if (!mask[tmp]) {masked[nmask++] = tmp; mask[tmp] = 1;} 3265 } 3266 rowcount++; 3267 } 3268 browlengths[i] += nmask; 3269 /* zero out the mask elements we set */ 3270 for (j=0; j<nmask; j++) mask[masked[j]] = 0; 3271 } 3272 3273 /* Do preallocation */ 3274 ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(newmat,bs,0,browlengths);CHKERRQ(ierr); 3275 a = (Mat_SeqBAIJ*)newmat->data; 3276 3277 /* set matrix "i" values */ 3278 a->i[0] = 0; 3279 for (i=1; i<= mbs; i++) { 3280 a->i[i] = a->i[i-1] + browlengths[i-1]; 3281 a->ilen[i-1] = browlengths[i-1]; 3282 } 3283 a->nz = 0; 3284 for (i=0; i<mbs; i++) a->nz += browlengths[i]; 3285 3286 /* read in nonzero values */ 3287 ierr = PetscMalloc1(nz+extra_rows,&aa);CHKERRQ(ierr); 3288 ierr = PetscBinaryRead(fd,aa,nz,PETSC_SCALAR);CHKERRQ(ierr); 3289 for (i=0; i<extra_rows; i++) aa[nz+i] = 1.0; 3290 3291 /* set "a" and "j" values into matrix */ 3292 nzcount = 0; jcount = 0; 3293 for (i=0; i<mbs; i++) { 3294 nzcountb = nzcount; 3295 nmask = 0; 3296 for (j=0; j<bs; j++) { 3297 kmax = rowlengths[i*bs+j]; 3298 for (k=0; k<kmax; k++) { 3299 tmp = jj[nzcount++]/bs; 3300 if (!mask[tmp]) { masked[nmask++] = tmp; mask[tmp] = 1;} 3301 } 3302 } 3303 /* sort the masked values */ 3304 ierr = PetscSortInt(nmask,masked);CHKERRQ(ierr); 3305 3306 /* set "j" values into matrix */ 3307 maskcount = 1; 3308 for (j=0; j<nmask; j++) { 3309 a->j[jcount++] = masked[j]; 3310 mask[masked[j]] = maskcount++; 3311 } 3312 /* set "a" values into matrix */ 3313 ishift = bs2*a->i[i]; 3314 for (j=0; j<bs; j++) { 3315 kmax = rowlengths[i*bs+j]; 3316 for (k=0; k<kmax; k++) { 3317 tmp = jj[nzcountb]/bs; 3318 block = mask[tmp] - 1; 3319 point = jj[nzcountb] - bs*tmp; 3320 idx = ishift + bs2*block + j + bs*point; 3321 a->a[idx] = (MatScalar)aa[nzcountb++]; 3322 } 3323 } 3324 /* zero out the mask elements we set */ 3325 for (j=0; j<nmask; j++) mask[masked[j]] = 0; 3326 } 3327 if (jcount != a->nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Bad binary matrix"); 3328 3329 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 3330 ierr = PetscFree(browlengths);CHKERRQ(ierr); 3331 ierr = PetscFree(aa);CHKERRQ(ierr); 3332 ierr = PetscFree(jj);CHKERRQ(ierr); 3333 ierr = PetscFree2(mask,masked);CHKERRQ(ierr); 3334 3335 ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3336 ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3337 PetscFunctionReturn(0); 3338 } 3339 3340 #undef __FUNCT__ 3341 #define __FUNCT__ "MatCreateSeqBAIJ" 3342 /*@C 3343 MatCreateSeqBAIJ - Creates a sparse matrix in block AIJ (block 3344 compressed row) format. For good matrix assembly performance the 3345 user should preallocate the matrix storage by setting the parameter nz 3346 (or the array nnz). By setting these parameters accurately, performance 3347 during matrix assembly can be increased by more than a factor of 50. 3348 3349 Collective on MPI_Comm 3350 3351 Input Parameters: 3352 + comm - MPI communicator, set to PETSC_COMM_SELF 3353 . bs - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row 3354 blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs() 3355 . m - number of rows 3356 . n - number of columns 3357 . nz - number of nonzero blocks per block row (same for all rows) 3358 - nnz - array containing the number of nonzero blocks in the various block rows 3359 (possibly different for each block row) or NULL 3360 3361 Output Parameter: 3362 . A - the matrix 3363 3364 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 3365 MatXXXXSetPreallocation() paradgm instead of this routine directly. 3366 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 3367 3368 Options Database Keys: 3369 . -mat_no_unroll - uses code that does not unroll the loops in the 3370 block calculations (much slower) 3371 . -mat_block_size - size of the blocks to use 3372 3373 Level: intermediate 3374 3375 Notes: 3376 The number of rows and columns must be divisible by blocksize. 3377 3378 If the nnz parameter is given then the nz parameter is ignored 3379 3380 A nonzero block is any block that as 1 or more nonzeros in it 3381 3382 The block AIJ format is fully compatible with standard Fortran 77 3383 storage. That is, the stored row and column indices can begin at 3384 either one (as in Fortran) or zero. See the users' manual for details. 3385 3386 Specify the preallocated storage with either nz or nnz (not both). 3387 Set nz=PETSC_DEFAULT and nnz=NULL for PETSc to control dynamic memory 3388 allocation. See Users-Manual: ch_mat for details. 3389 matrices. 3390 3391 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateBAIJ() 3392 @*/ 3393 PetscErrorCode MatCreateSeqBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A) 3394 { 3395 PetscErrorCode ierr; 3396 3397 PetscFunctionBegin; 3398 ierr = MatCreate(comm,A);CHKERRQ(ierr); 3399 ierr = MatSetSizes(*A,m,n,m,n);CHKERRQ(ierr); 3400 ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr); 3401 ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(*A,bs,nz,(PetscInt*)nnz);CHKERRQ(ierr); 3402 PetscFunctionReturn(0); 3403 } 3404 3405 #undef __FUNCT__ 3406 #define __FUNCT__ "MatSeqBAIJSetPreallocation" 3407 /*@C 3408 MatSeqBAIJSetPreallocation - Sets the block size and expected nonzeros 3409 per row in the matrix. For good matrix assembly performance the 3410 user should preallocate the matrix storage by setting the parameter nz 3411 (or the array nnz). By setting these parameters accurately, performance 3412 during matrix assembly can be increased by more than a factor of 50. 3413 3414 Collective on MPI_Comm 3415 3416 Input Parameters: 3417 + B - the matrix 3418 . bs - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row 3419 blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs() 3420 . nz - number of block nonzeros per block row (same for all rows) 3421 - nnz - array containing the number of block nonzeros in the various block rows 3422 (possibly different for each block row) or NULL 3423 3424 Options Database Keys: 3425 . -mat_no_unroll - uses code that does not unroll the loops in the 3426 block calculations (much slower) 3427 . -mat_block_size - size of the blocks to use 3428 3429 Level: intermediate 3430 3431 Notes: 3432 If the nnz parameter is given then the nz parameter is ignored 3433 3434 You can call MatGetInfo() to get information on how effective the preallocation was; 3435 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 3436 You can also run with the option -info and look for messages with the string 3437 malloc in them to see if additional memory allocation was needed. 3438 3439 The block AIJ format is fully compatible with standard Fortran 77 3440 storage. That is, the stored row and column indices can begin at 3441 either one (as in Fortran) or zero. See the users' manual for details. 3442 3443 Specify the preallocated storage with either nz or nnz (not both). 3444 Set nz=PETSC_DEFAULT and nnz=NULL for PETSc to control dynamic memory 3445 allocation. See Users-Manual: ch_mat for details. 3446 3447 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateBAIJ(), MatGetInfo() 3448 @*/ 3449 PetscErrorCode MatSeqBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt nz,const PetscInt nnz[]) 3450 { 3451 PetscErrorCode ierr; 3452 3453 PetscFunctionBegin; 3454 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 3455 PetscValidType(B,1); 3456 PetscValidLogicalCollectiveInt(B,bs,2); 3457 ierr = PetscTryMethod(B,"MatSeqBAIJSetPreallocation_C",(Mat,PetscInt,PetscInt,const PetscInt[]),(B,bs,nz,nnz));CHKERRQ(ierr); 3458 PetscFunctionReturn(0); 3459 } 3460 3461 #undef __FUNCT__ 3462 #define __FUNCT__ "MatSeqBAIJSetPreallocationCSR" 3463 /*@C 3464 MatSeqBAIJSetPreallocationCSR - Allocates memory for a sparse sequential matrix in AIJ format 3465 (the default sequential PETSc format). 3466 3467 Collective on MPI_Comm 3468 3469 Input Parameters: 3470 + B - the matrix 3471 . i - the indices into j for the start of each local row (starts with zero) 3472 . j - the column indices for each local row (starts with zero) these must be sorted for each row 3473 - v - optional values in the matrix 3474 3475 Level: developer 3476 3477 Notes: 3478 The order of the entries in values is specified by the MatOption MAT_ROW_ORIENTED. For example, C programs 3479 may want to use the default MAT_ROW_ORIENTED=PETSC_TRUE and use an array v[nnz][bs][bs] where the second index is 3480 over rows within a block and the last index is over columns within a block row. Fortran programs will likely set 3481 MAT_ROW_ORIENTED=PETSC_FALSE and use a Fortran array v(bs,bs,nnz) in which the first index is over rows within a 3482 block column and the second index is over columns within a block. 3483 3484 .keywords: matrix, aij, compressed row, sparse 3485 3486 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatSeqBAIJSetPreallocation(), MATSEQBAIJ 3487 @*/ 3488 PetscErrorCode MatSeqBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 3489 { 3490 PetscErrorCode ierr; 3491 3492 PetscFunctionBegin; 3493 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 3494 PetscValidType(B,1); 3495 PetscValidLogicalCollectiveInt(B,bs,2); 3496 ierr = PetscTryMethod(B,"MatSeqBAIJSetPreallocationCSR_C",(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,bs,i,j,v));CHKERRQ(ierr); 3497 PetscFunctionReturn(0); 3498 } 3499 3500 3501 #undef __FUNCT__ 3502 #define __FUNCT__ "MatCreateSeqBAIJWithArrays" 3503 /*@ 3504 MatCreateSeqBAIJWithArrays - Creates an sequential BAIJ matrix using matrix elements provided by the user. 3505 3506 Collective on MPI_Comm 3507 3508 Input Parameters: 3509 + comm - must be an MPI communicator of size 1 3510 . bs - size of block 3511 . m - number of rows 3512 . n - number of columns 3513 . i - row indices 3514 . j - column indices 3515 - a - matrix values 3516 3517 Output Parameter: 3518 . mat - the matrix 3519 3520 Level: advanced 3521 3522 Notes: 3523 The i, j, and a arrays are not copied by this routine, the user must free these arrays 3524 once the matrix is destroyed 3525 3526 You cannot set new nonzero locations into this matrix, that will generate an error. 3527 3528 The i and j indices are 0 based 3529 3530 When block size is greater than 1 the matrix values must be stored using the BAIJ storage format (see the BAIJ code to determine this). 3531 3532 The order of the entries in values is the same as the block compressed sparse row storage format; that is, it is 3533 the same as a three dimensional array in Fortran values(bs,bs,nnz) that contains the first column of the first 3534 block, followed by the second column of the first block etc etc. That is, the blocks are contiguous in memory 3535 with column-major ordering within blocks. 3536 3537 .seealso: MatCreate(), MatCreateBAIJ(), MatCreateSeqBAIJ() 3538 3539 @*/ 3540 PetscErrorCode MatCreateSeqBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt *i,PetscInt *j,PetscScalar *a,Mat *mat) 3541 { 3542 PetscErrorCode ierr; 3543 PetscInt ii; 3544 Mat_SeqBAIJ *baij; 3545 3546 PetscFunctionBegin; 3547 if (bs != 1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"block size %D > 1 is not supported yet",bs); 3548 if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 3549 3550 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 3551 ierr = MatSetSizes(*mat,m,n,m,n);CHKERRQ(ierr); 3552 ierr = MatSetType(*mat,MATSEQBAIJ);CHKERRQ(ierr); 3553 ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(*mat,bs,MAT_SKIP_ALLOCATION,0);CHKERRQ(ierr); 3554 baij = (Mat_SeqBAIJ*)(*mat)->data; 3555 ierr = PetscMalloc2(m,&baij->imax,m,&baij->ilen);CHKERRQ(ierr); 3556 ierr = PetscLogObjectMemory((PetscObject)*mat,2*m*sizeof(PetscInt));CHKERRQ(ierr); 3557 3558 baij->i = i; 3559 baij->j = j; 3560 baij->a = a; 3561 3562 baij->singlemalloc = PETSC_FALSE; 3563 baij->nonew = -1; /*this indicates that inserting a new value in the matrix that generates a new nonzero is an error*/ 3564 baij->free_a = PETSC_FALSE; 3565 baij->free_ij = PETSC_FALSE; 3566 3567 for (ii=0; ii<m; ii++) { 3568 baij->ilen[ii] = baij->imax[ii] = i[ii+1] - i[ii]; 3569 #if defined(PETSC_USE_DEBUG) 3570 if (i[ii+1] - i[ii] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row length in i (row indices) row = %d length = %d",ii,i[ii+1] - i[ii]); 3571 #endif 3572 } 3573 #if defined(PETSC_USE_DEBUG) 3574 for (ii=0; ii<baij->i[m]; ii++) { 3575 if (j[ii] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column index at location = %d index = %d",ii,j[ii]); 3576 if (j[ii] > n - 1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column index to large at location = %d index = %d",ii,j[ii]); 3577 } 3578 #endif 3579 3580 ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3581 ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3582 PetscFunctionReturn(0); 3583 } 3584 3585 #undef __FUNCT__ 3586 #define __FUNCT__ "MatCreateMPIMatConcatenateSeqMat_SeqBAIJ" 3587 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_SeqBAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat) 3588 { 3589 PetscErrorCode ierr; 3590 3591 PetscFunctionBegin; 3592 ierr = MatCreateMPIMatConcatenateSeqMat_MPIBAIJ(comm,inmat,n,scall,outmat);CHKERRQ(ierr); 3593 PetscFunctionReturn(0); 3594 } 3595