1 2 /* 3 Defines the basic matrix operations for the BAIJ (compressed row) 4 matrix storage format. 5 */ 6 #include <../src/mat/impls/baij/seq/baij.h> /*I "petscmat.h" I*/ 7 #include <petscblaslapack.h> 8 #include <petsc/private/kernels/blockinvert.h> 9 #include <petsc/private/kernels/blockmatmult.h> 10 11 #undef __FUNCT__ 12 #define __FUNCT__ "MatInvertBlockDiagonal_SeqBAIJ" 13 PetscErrorCode MatInvertBlockDiagonal_SeqBAIJ(Mat A,const PetscScalar **values) 14 { 15 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*) A->data; 16 PetscErrorCode ierr; 17 PetscInt *diag_offset,i,bs = A->rmap->bs,mbs = a->mbs,ipvt[5],bs2 = bs*bs,*v_pivots; 18 MatScalar *v = a->a,*odiag,*diag,*mdiag,work[25],*v_work; 19 PetscReal shift = 0.0; 20 PetscBool allowzeropivot,zeropivotdetected; 21 22 PetscFunctionBegin; 23 allowzeropivot = PetscNot(A->erroriffailure); 24 25 if (a->idiagvalid) { 26 if (values) *values = a->idiag; 27 PetscFunctionReturn(0); 28 } 29 ierr = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr); 30 diag_offset = a->diag; 31 if (!a->idiag) { 32 ierr = PetscMalloc1(2*bs2*mbs,&a->idiag);CHKERRQ(ierr); 33 ierr = PetscLogObjectMemory((PetscObject)A,2*bs2*mbs*sizeof(PetscScalar));CHKERRQ(ierr); 34 } 35 diag = a->idiag; 36 mdiag = a->idiag+bs2*mbs; 37 if (values) *values = a->idiag; 38 /* factor and invert each block */ 39 switch (bs) { 40 case 1: 41 for (i=0; i<mbs; i++) { 42 odiag = v + 1*diag_offset[i]; 43 diag[0] = odiag[0]; 44 mdiag[0] = odiag[0]; 45 diag[0] = (PetscScalar)1.0 / (diag[0] + shift); 46 diag += 1; 47 mdiag += 1; 48 } 49 break; 50 case 2: 51 for (i=0; i<mbs; i++) { 52 odiag = v + 4*diag_offset[i]; 53 diag[0] = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3]; 54 mdiag[0] = odiag[0]; mdiag[1] = odiag[1]; mdiag[2] = odiag[2]; mdiag[3] = odiag[3]; 55 ierr = PetscKernel_A_gets_inverse_A_2(diag,shift,allowzeropivot,&zeropivotdetected);CHKERRQ(ierr); 56 if (zeropivotdetected) break; 57 diag += 4; 58 mdiag += 4; 59 } 60 break; 61 case 3: 62 for (i=0; i<mbs; i++) { 63 odiag = v + 9*diag_offset[i]; 64 diag[0] = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3]; 65 diag[4] = odiag[4]; diag[5] = odiag[5]; diag[6] = odiag[6]; diag[7] = odiag[7]; 66 diag[8] = odiag[8]; 67 mdiag[0] = odiag[0]; mdiag[1] = odiag[1]; mdiag[2] = odiag[2]; mdiag[3] = odiag[3]; 68 mdiag[4] = odiag[4]; mdiag[5] = odiag[5]; mdiag[6] = odiag[6]; mdiag[7] = odiag[7]; 69 mdiag[8] = odiag[8]; 70 ierr = PetscKernel_A_gets_inverse_A_3(diag,shift,allowzeropivot,&zeropivotdetected);CHKERRQ(ierr); 71 if (zeropivotdetected) break; 72 diag += 9; 73 mdiag += 9; 74 } 75 break; 76 case 4: 77 for (i=0; i<mbs; i++) { 78 odiag = v + 16*diag_offset[i]; 79 ierr = PetscMemcpy(diag,odiag,16*sizeof(PetscScalar));CHKERRQ(ierr); 80 ierr = PetscMemcpy(mdiag,odiag,16*sizeof(PetscScalar));CHKERRQ(ierr); 81 ierr = PetscKernel_A_gets_inverse_A_4(diag,shift,allowzeropivot,&zeropivotdetected);CHKERRQ(ierr); 82 if (zeropivotdetected) break; 83 diag += 16; 84 mdiag += 16; 85 } 86 break; 87 case 5: 88 for (i=0; i<mbs; i++) { 89 odiag = v + 25*diag_offset[i]; 90 ierr = PetscMemcpy(diag,odiag,25*sizeof(PetscScalar));CHKERRQ(ierr); 91 ierr = PetscMemcpy(mdiag,odiag,25*sizeof(PetscScalar));CHKERRQ(ierr); 92 ierr = PetscKernel_A_gets_inverse_A_5(diag,ipvt,work,shift,allowzeropivot,&zeropivotdetected);CHKERRQ(ierr); 93 if (zeropivotdetected) break; 94 diag += 25; 95 mdiag += 25; 96 } 97 break; 98 case 6: 99 for (i=0; i<mbs; i++) { 100 odiag = v + 36*diag_offset[i]; 101 ierr = PetscMemcpy(diag,odiag,36*sizeof(PetscScalar));CHKERRQ(ierr); 102 ierr = PetscMemcpy(mdiag,odiag,36*sizeof(PetscScalar));CHKERRQ(ierr); 103 ierr = PetscKernel_A_gets_inverse_A_6(diag,shift,allowzeropivot,&zeropivotdetected);CHKERRQ(ierr); 104 if (zeropivotdetected) break; 105 diag += 36; 106 mdiag += 36; 107 } 108 break; 109 case 7: 110 for (i=0; i<mbs; i++) { 111 odiag = v + 49*diag_offset[i]; 112 ierr = PetscMemcpy(diag,odiag,49*sizeof(PetscScalar));CHKERRQ(ierr); 113 ierr = PetscMemcpy(mdiag,odiag,49*sizeof(PetscScalar));CHKERRQ(ierr); 114 ierr = PetscKernel_A_gets_inverse_A_7(diag,shift,allowzeropivot,&zeropivotdetected);CHKERRQ(ierr); 115 if (zeropivotdetected) break; 116 diag += 49; 117 mdiag += 49; 118 } 119 break; 120 default: 121 ierr = PetscMalloc2(bs,&v_work,bs,&v_pivots);CHKERRQ(ierr); 122 for (i=0; i<mbs; i++) { 123 odiag = v + bs2*diag_offset[i]; 124 ierr = PetscMemcpy(diag,odiag,bs2*sizeof(PetscScalar));CHKERRQ(ierr); 125 ierr = PetscMemcpy(mdiag,odiag,bs2*sizeof(PetscScalar));CHKERRQ(ierr); 126 ierr = PetscKernel_A_gets_inverse_A(bs,diag,v_pivots,v_work);CHKERRQ(ierr); 127 diag += bs2; 128 mdiag += bs2; 129 } 130 ierr = PetscFree2(v_work,v_pivots);CHKERRQ(ierr); 131 } 132 a->idiagvalid = PETSC_TRUE; 133 if (zeropivotdetected) { 134 A->errortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 135 } 136 PetscFunctionReturn(0); 137 } 138 139 #undef __FUNCT__ 140 #define __FUNCT__ "MatSOR_SeqBAIJ" 141 PetscErrorCode MatSOR_SeqBAIJ(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 142 { 143 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 144 PetscScalar *x,*work,*w,*workt,*t; 145 const MatScalar *v,*aa = a->a, *idiag; 146 const PetscScalar *b,*xb; 147 PetscScalar s[7], xw[7]={0}; /* avoid some compilers thinking xw is uninitialized */ 148 PetscErrorCode ierr; 149 PetscInt m = a->mbs,i,i2,nz,bs = A->rmap->bs,bs2 = bs*bs,k,j,idx,it; 150 const PetscInt *diag,*ai = a->i,*aj = a->j,*vi; 151 152 PetscFunctionBegin; 153 if (fshift == -1.0) fshift = 0.0; /* negative fshift indicates do not error on zero diagonal; this code never errors on zero diagonal */ 154 its = its*lits; 155 if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat"); 156 if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 157 if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 158 if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 159 if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts"); 160 161 if (!a->idiagvalid) {ierr = MatInvertBlockDiagonal(A,NULL);CHKERRQ(ierr);} 162 163 if (!m) PetscFunctionReturn(0); 164 diag = a->diag; 165 idiag = a->idiag; 166 k = PetscMax(A->rmap->n,A->cmap->n); 167 if (!a->mult_work) { 168 ierr = PetscMalloc1(k+1,&a->mult_work);CHKERRQ(ierr); 169 } 170 if (!a->sor_workt) { 171 ierr = PetscMalloc1(k,&a->sor_workt);CHKERRQ(ierr); 172 } 173 if (!a->sor_work) { 174 ierr = PetscMalloc1(bs,&a->sor_work);CHKERRQ(ierr); 175 } 176 work = a->mult_work; 177 t = a->sor_workt; 178 w = a->sor_work; 179 180 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 181 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 182 183 if (flag & SOR_ZERO_INITIAL_GUESS) { 184 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) { 185 switch (bs) { 186 case 1: 187 PetscKernel_v_gets_A_times_w_1(x,idiag,b); 188 t[0] = b[0]; 189 i2 = 1; 190 idiag += 1; 191 for (i=1; i<m; i++) { 192 v = aa + ai[i]; 193 vi = aj + ai[i]; 194 nz = diag[i] - ai[i]; 195 s[0] = b[i2]; 196 for (j=0; j<nz; j++) { 197 xw[0] = x[vi[j]]; 198 PetscKernel_v_gets_v_minus_A_times_w_1(s,(v+j),xw); 199 } 200 t[i2] = s[0]; 201 PetscKernel_v_gets_A_times_w_1(xw,idiag,s); 202 x[i2] = xw[0]; 203 idiag += 1; 204 i2 += 1; 205 } 206 break; 207 case 2: 208 PetscKernel_v_gets_A_times_w_2(x,idiag,b); 209 t[0] = b[0]; t[1] = b[1]; 210 i2 = 2; 211 idiag += 4; 212 for (i=1; i<m; i++) { 213 v = aa + 4*ai[i]; 214 vi = aj + ai[i]; 215 nz = diag[i] - ai[i]; 216 s[0] = b[i2]; s[1] = b[i2+1]; 217 for (j=0; j<nz; j++) { 218 idx = 2*vi[j]; 219 it = 4*j; 220 xw[0] = x[idx]; xw[1] = x[1+idx]; 221 PetscKernel_v_gets_v_minus_A_times_w_2(s,(v+it),xw); 222 } 223 t[i2] = s[0]; t[i2+1] = s[1]; 224 PetscKernel_v_gets_A_times_w_2(xw,idiag,s); 225 x[i2] = xw[0]; x[i2+1] = xw[1]; 226 idiag += 4; 227 i2 += 2; 228 } 229 break; 230 case 3: 231 PetscKernel_v_gets_A_times_w_3(x,idiag,b); 232 t[0] = b[0]; t[1] = b[1]; t[2] = b[2]; 233 i2 = 3; 234 idiag += 9; 235 for (i=1; i<m; i++) { 236 v = aa + 9*ai[i]; 237 vi = aj + ai[i]; 238 nz = diag[i] - ai[i]; 239 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; 240 while (nz--) { 241 idx = 3*(*vi++); 242 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 243 PetscKernel_v_gets_v_minus_A_times_w_3(s,v,xw); 244 v += 9; 245 } 246 t[i2] = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2]; 247 PetscKernel_v_gets_A_times_w_3(xw,idiag,s); 248 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; 249 idiag += 9; 250 i2 += 3; 251 } 252 break; 253 case 4: 254 PetscKernel_v_gets_A_times_w_4(x,idiag,b); 255 t[0] = b[0]; t[1] = b[1]; t[2] = b[2]; t[3] = b[3]; 256 i2 = 4; 257 idiag += 16; 258 for (i=1; i<m; i++) { 259 v = aa + 16*ai[i]; 260 vi = aj + ai[i]; 261 nz = diag[i] - ai[i]; 262 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; 263 while (nz--) { 264 idx = 4*(*vi++); 265 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; 266 PetscKernel_v_gets_v_minus_A_times_w_4(s,v,xw); 267 v += 16; 268 } 269 t[i2] = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2]; t[i2 + 3] = s[3]; 270 PetscKernel_v_gets_A_times_w_4(xw,idiag,s); 271 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; 272 idiag += 16; 273 i2 += 4; 274 } 275 break; 276 case 5: 277 PetscKernel_v_gets_A_times_w_5(x,idiag,b); 278 t[0] = b[0]; t[1] = b[1]; t[2] = b[2]; t[3] = b[3]; t[4] = b[4]; 279 i2 = 5; 280 idiag += 25; 281 for (i=1; i<m; i++) { 282 v = aa + 25*ai[i]; 283 vi = aj + ai[i]; 284 nz = diag[i] - ai[i]; 285 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4]; 286 while (nz--) { 287 idx = 5*(*vi++); 288 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; xw[4] = x[4+idx]; 289 PetscKernel_v_gets_v_minus_A_times_w_5(s,v,xw); 290 v += 25; 291 } 292 t[i2] = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2]; t[i2+3] = s[3]; t[i2+4] = s[4]; 293 PetscKernel_v_gets_A_times_w_5(xw,idiag,s); 294 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4]; 295 idiag += 25; 296 i2 += 5; 297 } 298 break; 299 case 6: 300 PetscKernel_v_gets_A_times_w_6(x,idiag,b); 301 t[0] = b[0]; t[1] = b[1]; t[2] = b[2]; t[3] = b[3]; t[4] = b[4]; t[5] = b[5]; 302 i2 = 6; 303 idiag += 36; 304 for (i=1; i<m; i++) { 305 v = aa + 36*ai[i]; 306 vi = aj + ai[i]; 307 nz = diag[i] - ai[i]; 308 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5]; 309 while (nz--) { 310 idx = 6*(*vi++); 311 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 312 xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; 313 PetscKernel_v_gets_v_minus_A_times_w_6(s,v,xw); 314 v += 36; 315 } 316 t[i2] = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2]; 317 t[i2+3] = s[3]; t[i2+4] = s[4]; t[i2+5] = s[5]; 318 PetscKernel_v_gets_A_times_w_6(xw,idiag,s); 319 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5]; 320 idiag += 36; 321 i2 += 6; 322 } 323 break; 324 case 7: 325 PetscKernel_v_gets_A_times_w_7(x,idiag,b); 326 t[0] = b[0]; t[1] = b[1]; t[2] = b[2]; 327 t[3] = b[3]; t[4] = b[4]; t[5] = b[5]; t[6] = b[6]; 328 i2 = 7; 329 idiag += 49; 330 for (i=1; i<m; i++) { 331 v = aa + 49*ai[i]; 332 vi = aj + ai[i]; 333 nz = diag[i] - ai[i]; 334 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; 335 s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5]; s[6] = b[i2+6]; 336 while (nz--) { 337 idx = 7*(*vi++); 338 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 339 xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; xw[6] = x[6+idx]; 340 PetscKernel_v_gets_v_minus_A_times_w_7(s,v,xw); 341 v += 49; 342 } 343 t[i2] = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2]; 344 t[i2+3] = s[3]; t[i2+4] = s[4]; t[i2+5] = s[5]; t[i2+6] = s[6]; 345 PetscKernel_v_gets_A_times_w_7(xw,idiag,s); 346 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; 347 x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5]; x[i2+6] = xw[6]; 348 idiag += 49; 349 i2 += 7; 350 } 351 break; 352 default: 353 PetscKernel_w_gets_Ar_times_v(bs,bs,b,idiag,x); 354 ierr = PetscMemcpy(t,b,bs*sizeof(PetscScalar));CHKERRQ(ierr); 355 i2 = bs; 356 idiag += bs2; 357 for (i=1; i<m; i++) { 358 v = aa + bs2*ai[i]; 359 vi = aj + ai[i]; 360 nz = diag[i] - ai[i]; 361 362 ierr = PetscMemcpy(w,b+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr); 363 /* copy all rows of x that are needed into contiguous space */ 364 workt = work; 365 for (j=0; j<nz; j++) { 366 ierr = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr); 367 workt += bs; 368 } 369 PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work); 370 ierr = PetscMemcpy(t+i2,w,bs*sizeof(PetscScalar));CHKERRQ(ierr); 371 PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2); 372 373 idiag += bs2; 374 i2 += bs; 375 } 376 break; 377 } 378 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 379 ierr = PetscLogFlops(1.0*bs2*a->nz);CHKERRQ(ierr); 380 xb = t; 381 } 382 else xb = b; 383 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 384 idiag = a->idiag+bs2*(a->mbs-1); 385 i2 = bs * (m-1); 386 switch (bs) { 387 case 1: 388 s[0] = xb[i2]; 389 PetscKernel_v_gets_A_times_w_1(xw,idiag,s); 390 x[i2] = xw[0]; 391 i2 -= 1; 392 for (i=m-2; i>=0; i--) { 393 v = aa + (diag[i]+1); 394 vi = aj + diag[i] + 1; 395 nz = ai[i+1] - diag[i] - 1; 396 s[0] = xb[i2]; 397 for (j=0; j<nz; j++) { 398 xw[0] = x[vi[j]]; 399 PetscKernel_v_gets_v_minus_A_times_w_1(s,(v+j),xw); 400 } 401 PetscKernel_v_gets_A_times_w_1(xw,idiag,s); 402 x[i2] = xw[0]; 403 idiag -= 1; 404 i2 -= 1; 405 } 406 break; 407 case 2: 408 s[0] = xb[i2]; s[1] = xb[i2+1]; 409 PetscKernel_v_gets_A_times_w_2(xw,idiag,s); 410 x[i2] = xw[0]; x[i2+1] = xw[1]; 411 i2 -= 2; 412 idiag -= 4; 413 for (i=m-2; i>=0; i--) { 414 v = aa + 4*(diag[i] + 1); 415 vi = aj + diag[i] + 1; 416 nz = ai[i+1] - diag[i] - 1; 417 s[0] = xb[i2]; s[1] = xb[i2+1]; 418 for (j=0; j<nz; j++) { 419 idx = 2*vi[j]; 420 it = 4*j; 421 xw[0] = x[idx]; xw[1] = x[1+idx]; 422 PetscKernel_v_gets_v_minus_A_times_w_2(s,(v+it),xw); 423 } 424 PetscKernel_v_gets_A_times_w_2(xw,idiag,s); 425 x[i2] = xw[0]; x[i2+1] = xw[1]; 426 idiag -= 4; 427 i2 -= 2; 428 } 429 break; 430 case 3: 431 s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; 432 PetscKernel_v_gets_A_times_w_3(xw,idiag,s); 433 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; 434 i2 -= 3; 435 idiag -= 9; 436 for (i=m-2; i>=0; i--) { 437 v = aa + 9*(diag[i]+1); 438 vi = aj + diag[i] + 1; 439 nz = ai[i+1] - diag[i] - 1; 440 s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; 441 while (nz--) { 442 idx = 3*(*vi++); 443 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 444 PetscKernel_v_gets_v_minus_A_times_w_3(s,v,xw); 445 v += 9; 446 } 447 PetscKernel_v_gets_A_times_w_3(xw,idiag,s); 448 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; 449 idiag -= 9; 450 i2 -= 3; 451 } 452 break; 453 case 4: 454 s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; 455 PetscKernel_v_gets_A_times_w_4(xw,idiag,s); 456 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; 457 i2 -= 4; 458 idiag -= 16; 459 for (i=m-2; i>=0; i--) { 460 v = aa + 16*(diag[i]+1); 461 vi = aj + diag[i] + 1; 462 nz = ai[i+1] - diag[i] - 1; 463 s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; 464 while (nz--) { 465 idx = 4*(*vi++); 466 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; 467 PetscKernel_v_gets_v_minus_A_times_w_4(s,v,xw); 468 v += 16; 469 } 470 PetscKernel_v_gets_A_times_w_4(xw,idiag,s); 471 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; 472 idiag -= 16; 473 i2 -= 4; 474 } 475 break; 476 case 5: 477 s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; s[4] = xb[i2+4]; 478 PetscKernel_v_gets_A_times_w_5(xw,idiag,s); 479 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4]; 480 i2 -= 5; 481 idiag -= 25; 482 for (i=m-2; i>=0; i--) { 483 v = aa + 25*(diag[i]+1); 484 vi = aj + diag[i] + 1; 485 nz = ai[i+1] - diag[i] - 1; 486 s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; s[4] = xb[i2+4]; 487 while (nz--) { 488 idx = 5*(*vi++); 489 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; xw[4] = x[4+idx]; 490 PetscKernel_v_gets_v_minus_A_times_w_5(s,v,xw); 491 v += 25; 492 } 493 PetscKernel_v_gets_A_times_w_5(xw,idiag,s); 494 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4]; 495 idiag -= 25; 496 i2 -= 5; 497 } 498 break; 499 case 6: 500 s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; s[4] = xb[i2+4]; s[5] = xb[i2+5]; 501 PetscKernel_v_gets_A_times_w_6(xw,idiag,s); 502 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5]; 503 i2 -= 6; 504 idiag -= 36; 505 for (i=m-2; i>=0; i--) { 506 v = aa + 36*(diag[i]+1); 507 vi = aj + diag[i] + 1; 508 nz = ai[i+1] - diag[i] - 1; 509 s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; s[4] = xb[i2+4]; s[5] = xb[i2+5]; 510 while (nz--) { 511 idx = 6*(*vi++); 512 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 513 xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; 514 PetscKernel_v_gets_v_minus_A_times_w_6(s,v,xw); 515 v += 36; 516 } 517 PetscKernel_v_gets_A_times_w_6(xw,idiag,s); 518 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5]; 519 idiag -= 36; 520 i2 -= 6; 521 } 522 break; 523 case 7: 524 s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; 525 s[3] = xb[i2+3]; s[4] = xb[i2+4]; s[5] = xb[i2+5]; s[6] = xb[i2+6]; 526 PetscKernel_v_gets_A_times_w_7(x,idiag,b); 527 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; 528 x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5]; x[i2+6] = xw[6]; 529 i2 -= 7; 530 idiag -= 49; 531 for (i=m-2; i>=0; i--) { 532 v = aa + 49*(diag[i]+1); 533 vi = aj + diag[i] + 1; 534 nz = ai[i+1] - diag[i] - 1; 535 s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; 536 s[3] = xb[i2+3]; s[4] = xb[i2+4]; s[5] = xb[i2+5]; s[6] = xb[i2+6]; 537 while (nz--) { 538 idx = 7*(*vi++); 539 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 540 xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; xw[6] = x[6+idx]; 541 PetscKernel_v_gets_v_minus_A_times_w_7(s,v,xw); 542 v += 49; 543 } 544 PetscKernel_v_gets_A_times_w_7(xw,idiag,s); 545 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; 546 x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5]; x[i2+6] = xw[6]; 547 idiag -= 49; 548 i2 -= 7; 549 } 550 break; 551 default: 552 ierr = PetscMemcpy(w,xb+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr); 553 PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2); 554 i2 -= bs; 555 idiag -= bs2; 556 for (i=m-2; i>=0; i--) { 557 v = aa + bs2*(diag[i]+1); 558 vi = aj + diag[i] + 1; 559 nz = ai[i+1] - diag[i] - 1; 560 561 ierr = PetscMemcpy(w,xb+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr); 562 /* copy all rows of x that are needed into contiguous space */ 563 workt = work; 564 for (j=0; j<nz; j++) { 565 ierr = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr); 566 workt += bs; 567 } 568 PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work); 569 PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2); 570 571 idiag -= bs2; 572 i2 -= bs; 573 } 574 break; 575 } 576 ierr = PetscLogFlops(1.0*bs2*(a->nz));CHKERRQ(ierr); 577 } 578 its--; 579 } 580 while (its--) { 581 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) { 582 idiag = a->idiag; 583 i2 = 0; 584 switch (bs) { 585 case 1: 586 for (i=0; i<m; i++) { 587 v = aa + ai[i]; 588 vi = aj + ai[i]; 589 nz = ai[i+1] - ai[i]; 590 s[0] = b[i2]; 591 for (j=0; j<nz; j++) { 592 xw[0] = x[vi[j]]; 593 PetscKernel_v_gets_v_minus_A_times_w_1(s,(v+j),xw); 594 } 595 PetscKernel_v_gets_A_times_w_1(xw,idiag,s); 596 x[i2] += xw[0]; 597 idiag += 1; 598 i2 += 1; 599 } 600 break; 601 case 2: 602 for (i=0; i<m; i++) { 603 v = aa + 4*ai[i]; 604 vi = aj + ai[i]; 605 nz = ai[i+1] - ai[i]; 606 s[0] = b[i2]; s[1] = b[i2+1]; 607 for (j=0; j<nz; j++) { 608 idx = 2*vi[j]; 609 it = 4*j; 610 xw[0] = x[idx]; xw[1] = x[1+idx]; 611 PetscKernel_v_gets_v_minus_A_times_w_2(s,(v+it),xw); 612 } 613 PetscKernel_v_gets_A_times_w_2(xw,idiag,s); 614 x[i2] += xw[0]; x[i2+1] += xw[1]; 615 idiag += 4; 616 i2 += 2; 617 } 618 break; 619 case 3: 620 for (i=0; i<m; i++) { 621 v = aa + 9*ai[i]; 622 vi = aj + ai[i]; 623 nz = ai[i+1] - ai[i]; 624 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; 625 while (nz--) { 626 idx = 3*(*vi++); 627 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 628 PetscKernel_v_gets_v_minus_A_times_w_3(s,v,xw); 629 v += 9; 630 } 631 PetscKernel_v_gets_A_times_w_3(xw,idiag,s); 632 x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; 633 idiag += 9; 634 i2 += 3; 635 } 636 break; 637 case 4: 638 for (i=0; i<m; i++) { 639 v = aa + 16*ai[i]; 640 vi = aj + ai[i]; 641 nz = ai[i+1] - ai[i]; 642 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; 643 while (nz--) { 644 idx = 4*(*vi++); 645 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; 646 PetscKernel_v_gets_v_minus_A_times_w_4(s,v,xw); 647 v += 16; 648 } 649 PetscKernel_v_gets_A_times_w_4(xw,idiag,s); 650 x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; x[i2+3] += xw[3]; 651 idiag += 16; 652 i2 += 4; 653 } 654 break; 655 case 5: 656 for (i=0; i<m; i++) { 657 v = aa + 25*ai[i]; 658 vi = aj + ai[i]; 659 nz = ai[i+1] - ai[i]; 660 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4]; 661 while (nz--) { 662 idx = 5*(*vi++); 663 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; xw[4] = x[4+idx]; 664 PetscKernel_v_gets_v_minus_A_times_w_5(s,v,xw); 665 v += 25; 666 } 667 PetscKernel_v_gets_A_times_w_5(xw,idiag,s); 668 x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; x[i2+3] += xw[3]; x[i2+4] += xw[4]; 669 idiag += 25; 670 i2 += 5; 671 } 672 break; 673 case 6: 674 for (i=0; i<m; i++) { 675 v = aa + 36*ai[i]; 676 vi = aj + ai[i]; 677 nz = ai[i+1] - ai[i]; 678 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5]; 679 while (nz--) { 680 idx = 6*(*vi++); 681 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 682 xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; 683 PetscKernel_v_gets_v_minus_A_times_w_6(s,v,xw); 684 v += 36; 685 } 686 PetscKernel_v_gets_A_times_w_6(xw,idiag,s); 687 x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; 688 x[i2+3] += xw[3]; x[i2+4] += xw[4]; x[i2+5] += xw[5]; 689 idiag += 36; 690 i2 += 6; 691 } 692 break; 693 case 7: 694 for (i=0; i<m; i++) { 695 v = aa + 49*ai[i]; 696 vi = aj + ai[i]; 697 nz = ai[i+1] - ai[i]; 698 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; 699 s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5]; s[6] = b[i2+6]; 700 while (nz--) { 701 idx = 7*(*vi++); 702 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 703 xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; xw[6] = x[6+idx]; 704 PetscKernel_v_gets_v_minus_A_times_w_7(s,v,xw); 705 v += 49; 706 } 707 PetscKernel_v_gets_A_times_w_7(xw,idiag,s); 708 x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; 709 x[i2+3] += xw[3]; x[i2+4] += xw[4]; x[i2+5] += xw[5]; x[i2+6] += xw[6]; 710 idiag += 49; 711 i2 += 7; 712 } 713 break; 714 default: 715 for (i=0; i<m; i++) { 716 v = aa + bs2*ai[i]; 717 vi = aj + ai[i]; 718 nz = ai[i+1] - ai[i]; 719 720 ierr = PetscMemcpy(w,b+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr); 721 /* copy all rows of x that are needed into contiguous space */ 722 workt = work; 723 for (j=0; j<nz; j++) { 724 ierr = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr); 725 workt += bs; 726 } 727 PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work); 728 PetscKernel_w_gets_w_plus_Ar_times_v(bs,bs,w,idiag,x+i2); 729 730 idiag += bs2; 731 i2 += bs; 732 } 733 break; 734 } 735 ierr = PetscLogFlops(2.0*bs2*a->nz);CHKERRQ(ierr); 736 } 737 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 738 idiag = a->idiag+bs2*(a->mbs-1); 739 i2 = bs * (m-1); 740 switch (bs) { 741 case 1: 742 for (i=m-1; i>=0; i--) { 743 v = aa + ai[i]; 744 vi = aj + ai[i]; 745 nz = ai[i+1] - ai[i]; 746 s[0] = b[i2]; 747 for (j=0; j<nz; j++) { 748 xw[0] = x[vi[j]]; 749 PetscKernel_v_gets_v_minus_A_times_w_1(s,(v+j),xw); 750 } 751 PetscKernel_v_gets_A_times_w_1(xw,idiag,s); 752 x[i2] += xw[0]; 753 idiag -= 1; 754 i2 -= 1; 755 } 756 break; 757 case 2: 758 for (i=m-1; i>=0; i--) { 759 v = aa + 4*ai[i]; 760 vi = aj + ai[i]; 761 nz = ai[i+1] - ai[i]; 762 s[0] = b[i2]; s[1] = b[i2+1]; 763 for (j=0; j<nz; j++) { 764 idx = 2*vi[j]; 765 it = 4*j; 766 xw[0] = x[idx]; xw[1] = x[1+idx]; 767 PetscKernel_v_gets_v_minus_A_times_w_2(s,(v+it),xw); 768 } 769 PetscKernel_v_gets_A_times_w_2(xw,idiag,s); 770 x[i2] += xw[0]; x[i2+1] += xw[1]; 771 idiag -= 4; 772 i2 -= 2; 773 } 774 break; 775 case 3: 776 for (i=m-1; i>=0; i--) { 777 v = aa + 9*ai[i]; 778 vi = aj + ai[i]; 779 nz = ai[i+1] - ai[i]; 780 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; 781 while (nz--) { 782 idx = 3*(*vi++); 783 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 784 PetscKernel_v_gets_v_minus_A_times_w_3(s,v,xw); 785 v += 9; 786 } 787 PetscKernel_v_gets_A_times_w_3(xw,idiag,s); 788 x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; 789 idiag -= 9; 790 i2 -= 3; 791 } 792 break; 793 case 4: 794 for (i=m-1; i>=0; i--) { 795 v = aa + 16*ai[i]; 796 vi = aj + ai[i]; 797 nz = ai[i+1] - ai[i]; 798 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; 799 while (nz--) { 800 idx = 4*(*vi++); 801 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; 802 PetscKernel_v_gets_v_minus_A_times_w_4(s,v,xw); 803 v += 16; 804 } 805 PetscKernel_v_gets_A_times_w_4(xw,idiag,s); 806 x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; x[i2+3] += xw[3]; 807 idiag -= 16; 808 i2 -= 4; 809 } 810 break; 811 case 5: 812 for (i=m-1; i>=0; i--) { 813 v = aa + 25*ai[i]; 814 vi = aj + ai[i]; 815 nz = ai[i+1] - ai[i]; 816 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4]; 817 while (nz--) { 818 idx = 5*(*vi++); 819 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; xw[4] = x[4+idx]; 820 PetscKernel_v_gets_v_minus_A_times_w_5(s,v,xw); 821 v += 25; 822 } 823 PetscKernel_v_gets_A_times_w_5(xw,idiag,s); 824 x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; x[i2+3] += xw[3]; x[i2+4] += xw[4]; 825 idiag -= 25; 826 i2 -= 5; 827 } 828 break; 829 case 6: 830 for (i=m-1; i>=0; i--) { 831 v = aa + 36*ai[i]; 832 vi = aj + ai[i]; 833 nz = ai[i+1] - ai[i]; 834 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5]; 835 while (nz--) { 836 idx = 6*(*vi++); 837 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 838 xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; 839 PetscKernel_v_gets_v_minus_A_times_w_6(s,v,xw); 840 v += 36; 841 } 842 PetscKernel_v_gets_A_times_w_6(xw,idiag,s); 843 x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; 844 x[i2+3] += xw[3]; x[i2+4] += xw[4]; x[i2+5] += xw[5]; 845 idiag -= 36; 846 i2 -= 6; 847 } 848 break; 849 case 7: 850 for (i=m-1; i>=0; i--) { 851 v = aa + 49*ai[i]; 852 vi = aj + ai[i]; 853 nz = ai[i+1] - ai[i]; 854 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; 855 s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5]; s[6] = b[i2+6]; 856 while (nz--) { 857 idx = 7*(*vi++); 858 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 859 xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; xw[6] = x[6+idx]; 860 PetscKernel_v_gets_v_minus_A_times_w_7(s,v,xw); 861 v += 49; 862 } 863 PetscKernel_v_gets_A_times_w_7(xw,idiag,s); 864 x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; 865 x[i2+3] += xw[3]; x[i2+4] += xw[4]; x[i2+5] += xw[5]; x[i2+6] += xw[6]; 866 idiag -= 49; 867 i2 -= 7; 868 } 869 break; 870 default: 871 for (i=m-1; i>=0; i--) { 872 v = aa + bs2*ai[i]; 873 vi = aj + ai[i]; 874 nz = ai[i+1] - ai[i]; 875 876 ierr = PetscMemcpy(w,b+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr); 877 /* copy all rows of x that are needed into contiguous space */ 878 workt = work; 879 for (j=0; j<nz; j++) { 880 ierr = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr); 881 workt += bs; 882 } 883 PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work); 884 PetscKernel_w_gets_w_plus_Ar_times_v(bs,bs,w,idiag,x+i2); 885 886 idiag -= bs2; 887 i2 -= bs; 888 } 889 break; 890 } 891 ierr = PetscLogFlops(2.0*bs2*(a->nz));CHKERRQ(ierr); 892 } 893 } 894 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 895 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 896 PetscFunctionReturn(0); 897 } 898 899 900 /* 901 Special version for direct calls from Fortran (Used in PETSc-fun3d) 902 */ 903 #if defined(PETSC_HAVE_FORTRAN_CAPS) 904 #define matsetvaluesblocked4_ MATSETVALUESBLOCKED4 905 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 906 #define matsetvaluesblocked4_ matsetvaluesblocked4 907 #endif 908 909 #undef __FUNCT__ 910 #define __FUNCT__ "matsetvaluesblocked4_" 911 PETSC_EXTERN void matsetvaluesblocked4_(Mat *AA,PetscInt *mm,const PetscInt im[],PetscInt *nn,const PetscInt in[],const PetscScalar v[]) 912 { 913 Mat A = *AA; 914 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 915 PetscInt *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,N,m = *mm,n = *nn; 916 PetscInt *ai =a->i,*ailen=a->ilen; 917 PetscInt *aj =a->j,stepval,lastcol = -1; 918 const PetscScalar *value = v; 919 MatScalar *ap,*aa = a->a,*bap; 920 921 PetscFunctionBegin; 922 if (A->rmap->bs != 4) SETERRABORT(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Can only be called with a block size of 4"); 923 stepval = (n-1)*4; 924 for (k=0; k<m; k++) { /* loop over added rows */ 925 row = im[k]; 926 rp = aj + ai[row]; 927 ap = aa + 16*ai[row]; 928 nrow = ailen[row]; 929 low = 0; 930 high = nrow; 931 for (l=0; l<n; l++) { /* loop over added columns */ 932 col = in[l]; 933 if (col <= lastcol) low = 0; 934 else high = nrow; 935 lastcol = col; 936 value = v + k*(stepval+4 + l)*4; 937 while (high-low > 7) { 938 t = (low+high)/2; 939 if (rp[t] > col) high = t; 940 else low = t; 941 } 942 for (i=low; i<high; i++) { 943 if (rp[i] > col) break; 944 if (rp[i] == col) { 945 bap = ap + 16*i; 946 for (ii=0; ii<4; ii++,value+=stepval) { 947 for (jj=ii; jj<16; jj+=4) { 948 bap[jj] += *value++; 949 } 950 } 951 goto noinsert2; 952 } 953 } 954 N = nrow++ - 1; 955 high++; /* added new column index thus must search to one higher than before */ 956 /* shift up all the later entries in this row */ 957 for (ii=N; ii>=i; ii--) { 958 rp[ii+1] = rp[ii]; 959 PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar)); 960 } 961 if (N >= i) { 962 PetscMemzero(ap+16*i,16*sizeof(MatScalar)); 963 } 964 rp[i] = col; 965 bap = ap + 16*i; 966 for (ii=0; ii<4; ii++,value+=stepval) { 967 for (jj=ii; jj<16; jj+=4) { 968 bap[jj] = *value++; 969 } 970 } 971 noinsert2:; 972 low = i; 973 } 974 ailen[row] = nrow; 975 } 976 PetscFunctionReturnVoid(); 977 } 978 979 #if defined(PETSC_HAVE_FORTRAN_CAPS) 980 #define matsetvalues4_ MATSETVALUES4 981 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 982 #define matsetvalues4_ matsetvalues4 983 #endif 984 985 #undef __FUNCT__ 986 #define __FUNCT__ "MatSetValues4_" 987 PETSC_EXTERN void matsetvalues4_(Mat *AA,PetscInt *mm,PetscInt *im,PetscInt *nn,PetscInt *in,PetscScalar *v) 988 { 989 Mat A = *AA; 990 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 991 PetscInt *rp,k,low,high,t,ii,row,nrow,i,col,l,N,n = *nn,m = *mm; 992 PetscInt *ai=a->i,*ailen=a->ilen; 993 PetscInt *aj=a->j,brow,bcol; 994 PetscInt ridx,cidx,lastcol = -1; 995 MatScalar *ap,value,*aa=a->a,*bap; 996 997 PetscFunctionBegin; 998 for (k=0; k<m; k++) { /* loop over added rows */ 999 row = im[k]; brow = row/4; 1000 rp = aj + ai[brow]; 1001 ap = aa + 16*ai[brow]; 1002 nrow = ailen[brow]; 1003 low = 0; 1004 high = nrow; 1005 for (l=0; l<n; l++) { /* loop over added columns */ 1006 col = in[l]; bcol = col/4; 1007 ridx = row % 4; cidx = col % 4; 1008 value = v[l + k*n]; 1009 if (col <= lastcol) low = 0; 1010 else high = nrow; 1011 lastcol = col; 1012 while (high-low > 7) { 1013 t = (low+high)/2; 1014 if (rp[t] > bcol) high = t; 1015 else low = t; 1016 } 1017 for (i=low; i<high; i++) { 1018 if (rp[i] > bcol) break; 1019 if (rp[i] == bcol) { 1020 bap = ap + 16*i + 4*cidx + ridx; 1021 *bap += value; 1022 goto noinsert1; 1023 } 1024 } 1025 N = nrow++ - 1; 1026 high++; /* added new column thus must search to one higher than before */ 1027 /* shift up all the later entries in this row */ 1028 for (ii=N; ii>=i; ii--) { 1029 rp[ii+1] = rp[ii]; 1030 PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar)); 1031 } 1032 if (N>=i) { 1033 PetscMemzero(ap+16*i,16*sizeof(MatScalar)); 1034 } 1035 rp[i] = bcol; 1036 ap[16*i + 4*cidx + ridx] = value; 1037 noinsert1:; 1038 low = i; 1039 } 1040 ailen[brow] = nrow; 1041 } 1042 PetscFunctionReturnVoid(); 1043 } 1044 1045 /* 1046 Checks for missing diagonals 1047 */ 1048 #undef __FUNCT__ 1049 #define __FUNCT__ "MatMissingDiagonal_SeqBAIJ" 1050 PetscErrorCode MatMissingDiagonal_SeqBAIJ(Mat A,PetscBool *missing,PetscInt *d) 1051 { 1052 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1053 PetscErrorCode ierr; 1054 PetscInt *diag,*ii = a->i,i; 1055 1056 PetscFunctionBegin; 1057 ierr = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr); 1058 *missing = PETSC_FALSE; 1059 if (A->rmap->n > 0 && !ii) { 1060 *missing = PETSC_TRUE; 1061 if (d) *d = 0; 1062 PetscInfo(A,"Matrix has no entries therefore is missing diagonal\n"); 1063 } else { 1064 diag = a->diag; 1065 for (i=0; i<a->mbs; i++) { 1066 if (diag[i] >= ii[i+1]) { 1067 *missing = PETSC_TRUE; 1068 if (d) *d = i; 1069 PetscInfo1(A,"Matrix is missing block diagonal number %D\n",i); 1070 break; 1071 } 1072 } 1073 } 1074 PetscFunctionReturn(0); 1075 } 1076 1077 #undef __FUNCT__ 1078 #define __FUNCT__ "MatMarkDiagonal_SeqBAIJ" 1079 PetscErrorCode MatMarkDiagonal_SeqBAIJ(Mat A) 1080 { 1081 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1082 PetscErrorCode ierr; 1083 PetscInt i,j,m = a->mbs; 1084 1085 PetscFunctionBegin; 1086 if (!a->diag) { 1087 ierr = PetscMalloc1(m,&a->diag);CHKERRQ(ierr); 1088 ierr = PetscLogObjectMemory((PetscObject)A,m*sizeof(PetscInt));CHKERRQ(ierr); 1089 a->free_diag = PETSC_TRUE; 1090 } 1091 for (i=0; i<m; i++) { 1092 a->diag[i] = a->i[i+1]; 1093 for (j=a->i[i]; j<a->i[i+1]; j++) { 1094 if (a->j[j] == i) { 1095 a->diag[i] = j; 1096 break; 1097 } 1098 } 1099 } 1100 PetscFunctionReturn(0); 1101 } 1102 1103 1104 #undef __FUNCT__ 1105 #define __FUNCT__ "MatGetRowIJ_SeqBAIJ" 1106 static PetscErrorCode MatGetRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool blockcompressed,PetscInt *nn,const PetscInt *inia[],const PetscInt *inja[],PetscBool *done) 1107 { 1108 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1109 PetscErrorCode ierr; 1110 PetscInt i,j,n = a->mbs,nz = a->i[n],*tia,*tja,bs = A->rmap->bs,k,l,cnt; 1111 PetscInt **ia = (PetscInt**)inia,**ja = (PetscInt**)inja; 1112 1113 PetscFunctionBegin; 1114 *nn = n; 1115 if (!ia) PetscFunctionReturn(0); 1116 if (symmetric) { 1117 ierr = MatToSymmetricIJ_SeqAIJ(n,a->i,a->j,0,0,&tia,&tja);CHKERRQ(ierr); 1118 nz = tia[n]; 1119 } else { 1120 tia = a->i; tja = a->j; 1121 } 1122 1123 if (!blockcompressed && bs > 1) { 1124 (*nn) *= bs; 1125 /* malloc & create the natural set of indices */ 1126 ierr = PetscMalloc1((n+1)*bs,ia);CHKERRQ(ierr); 1127 if (n) { 1128 (*ia)[0] = 0; 1129 for (j=1; j<bs; j++) { 1130 (*ia)[j] = (tia[1]-tia[0])*bs+(*ia)[j-1]; 1131 } 1132 } 1133 1134 for (i=1; i<n; i++) { 1135 (*ia)[i*bs] = (tia[i]-tia[i-1])*bs + (*ia)[i*bs-1]; 1136 for (j=1; j<bs; j++) { 1137 (*ia)[i*bs+j] = (tia[i+1]-tia[i])*bs + (*ia)[i*bs+j-1]; 1138 } 1139 } 1140 if (n) { 1141 (*ia)[n*bs] = (tia[n]-tia[n-1])*bs + (*ia)[n*bs-1]; 1142 } 1143 1144 if (inja) { 1145 ierr = PetscMalloc1(nz*bs*bs,ja);CHKERRQ(ierr); 1146 cnt = 0; 1147 for (i=0; i<n; i++) { 1148 for (j=0; j<bs; j++) { 1149 for (k=tia[i]; k<tia[i+1]; k++) { 1150 for (l=0; l<bs; l++) { 1151 (*ja)[cnt++] = bs*tja[k] + l; 1152 } 1153 } 1154 } 1155 } 1156 } 1157 1158 if (symmetric) { /* deallocate memory allocated in MatToSymmetricIJ_SeqAIJ() */ 1159 ierr = PetscFree(tia);CHKERRQ(ierr); 1160 ierr = PetscFree(tja);CHKERRQ(ierr); 1161 } 1162 } else if (oshift == 1) { 1163 if (symmetric) { 1164 nz = tia[A->rmap->n/bs]; 1165 /* add 1 to i and j indices */ 1166 for (i=0; i<A->rmap->n/bs+1; i++) tia[i] = tia[i] + 1; 1167 *ia = tia; 1168 if (ja) { 1169 for (i=0; i<nz; i++) tja[i] = tja[i] + 1; 1170 *ja = tja; 1171 } 1172 } else { 1173 nz = a->i[A->rmap->n/bs]; 1174 /* malloc space and add 1 to i and j indices */ 1175 ierr = PetscMalloc1(A->rmap->n/bs+1,ia);CHKERRQ(ierr); 1176 for (i=0; i<A->rmap->n/bs+1; i++) (*ia)[i] = a->i[i] + 1; 1177 if (ja) { 1178 ierr = PetscMalloc1(nz,ja);CHKERRQ(ierr); 1179 for (i=0; i<nz; i++) (*ja)[i] = a->j[i] + 1; 1180 } 1181 } 1182 } else { 1183 *ia = tia; 1184 if (ja) *ja = tja; 1185 } 1186 PetscFunctionReturn(0); 1187 } 1188 1189 #undef __FUNCT__ 1190 #define __FUNCT__ "MatRestoreRowIJ_SeqBAIJ" 1191 static PetscErrorCode MatRestoreRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool blockcompressed,PetscInt *nn,const PetscInt *ia[],const PetscInt *ja[],PetscBool *done) 1192 { 1193 PetscErrorCode ierr; 1194 1195 PetscFunctionBegin; 1196 if (!ia) PetscFunctionReturn(0); 1197 if ((!blockcompressed && A->rmap->bs > 1) || (symmetric || oshift == 1)) { 1198 ierr = PetscFree(*ia);CHKERRQ(ierr); 1199 if (ja) {ierr = PetscFree(*ja);CHKERRQ(ierr);} 1200 } 1201 PetscFunctionReturn(0); 1202 } 1203 1204 #undef __FUNCT__ 1205 #define __FUNCT__ "MatDestroy_SeqBAIJ" 1206 PetscErrorCode MatDestroy_SeqBAIJ(Mat A) 1207 { 1208 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1209 PetscErrorCode ierr; 1210 1211 PetscFunctionBegin; 1212 #if defined(PETSC_USE_LOG) 1213 PetscLogObjectState((PetscObject)A,"Rows=%D, Cols=%D, NZ=%D",A->rmap->N,A->cmap->n,a->nz); 1214 #endif 1215 ierr = MatSeqXAIJFreeAIJ(A,&a->a,&a->j,&a->i);CHKERRQ(ierr); 1216 ierr = ISDestroy(&a->row);CHKERRQ(ierr); 1217 ierr = ISDestroy(&a->col);CHKERRQ(ierr); 1218 if (a->free_diag) {ierr = PetscFree(a->diag);CHKERRQ(ierr);} 1219 ierr = PetscFree(a->idiag);CHKERRQ(ierr); 1220 if (a->free_imax_ilen) {ierr = PetscFree2(a->imax,a->ilen);CHKERRQ(ierr);} 1221 ierr = PetscFree(a->solve_work);CHKERRQ(ierr); 1222 ierr = PetscFree(a->mult_work);CHKERRQ(ierr); 1223 ierr = PetscFree(a->sor_workt);CHKERRQ(ierr); 1224 ierr = PetscFree(a->sor_work);CHKERRQ(ierr); 1225 ierr = ISDestroy(&a->icol);CHKERRQ(ierr); 1226 ierr = PetscFree(a->saved_values);CHKERRQ(ierr); 1227 ierr = PetscFree2(a->compressedrow.i,a->compressedrow.rindex);CHKERRQ(ierr); 1228 1229 ierr = MatDestroy(&a->sbaijMat);CHKERRQ(ierr); 1230 ierr = MatDestroy(&a->parent);CHKERRQ(ierr); 1231 ierr = PetscFree(A->data);CHKERRQ(ierr); 1232 1233 ierr = PetscObjectChangeTypeName((PetscObject)A,0);CHKERRQ(ierr); 1234 ierr = PetscObjectComposeFunction((PetscObject)A,"MatInvertBlockDiagonal_C",NULL);CHKERRQ(ierr); 1235 ierr = PetscObjectComposeFunction((PetscObject)A,"MatStoreValues_C",NULL);CHKERRQ(ierr); 1236 ierr = PetscObjectComposeFunction((PetscObject)A,"MatRetrieveValues_C",NULL);CHKERRQ(ierr); 1237 ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetColumnIndices_C",NULL);CHKERRQ(ierr); 1238 ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqaij_C",NULL);CHKERRQ(ierr); 1239 ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqsbaij_C",NULL);CHKERRQ(ierr); 1240 ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetPreallocation_C",NULL);CHKERRQ(ierr); 1241 ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetPreallocationCSR_C",NULL);CHKERRQ(ierr); 1242 ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqbstrm_C",NULL);CHKERRQ(ierr); 1243 ierr = PetscObjectComposeFunction((PetscObject)A,"MatIsTranspose_C",NULL);CHKERRQ(ierr); 1244 PetscFunctionReturn(0); 1245 } 1246 1247 #undef __FUNCT__ 1248 #define __FUNCT__ "MatSetOption_SeqBAIJ" 1249 PetscErrorCode MatSetOption_SeqBAIJ(Mat A,MatOption op,PetscBool flg) 1250 { 1251 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1252 PetscErrorCode ierr; 1253 1254 PetscFunctionBegin; 1255 switch (op) { 1256 case MAT_ROW_ORIENTED: 1257 a->roworiented = flg; 1258 break; 1259 case MAT_KEEP_NONZERO_PATTERN: 1260 a->keepnonzeropattern = flg; 1261 break; 1262 case MAT_NEW_NONZERO_LOCATIONS: 1263 a->nonew = (flg ? 0 : 1); 1264 break; 1265 case MAT_NEW_NONZERO_LOCATION_ERR: 1266 a->nonew = (flg ? -1 : 0); 1267 break; 1268 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1269 a->nonew = (flg ? -2 : 0); 1270 break; 1271 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1272 a->nounused = (flg ? -1 : 0); 1273 break; 1274 case MAT_NEW_DIAGONALS: 1275 case MAT_IGNORE_OFF_PROC_ENTRIES: 1276 case MAT_USE_HASH_TABLE: 1277 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1278 break; 1279 case MAT_SPD: 1280 case MAT_SYMMETRIC: 1281 case MAT_STRUCTURALLY_SYMMETRIC: 1282 case MAT_HERMITIAN: 1283 case MAT_SYMMETRY_ETERNAL: 1284 /* These options are handled directly by MatSetOption() */ 1285 break; 1286 default: 1287 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op); 1288 } 1289 PetscFunctionReturn(0); 1290 } 1291 1292 /* used for both SeqBAIJ and SeqSBAIJ matrices */ 1293 #undef __FUNCT__ 1294 #define __FUNCT__ "MatGetRow_SeqBAIJ_private" 1295 PetscErrorCode MatGetRow_SeqBAIJ_private(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v,PetscInt *ai,PetscInt *aj,PetscScalar *aa) 1296 { 1297 PetscErrorCode ierr; 1298 PetscInt itmp,i,j,k,M,bn,bp,*idx_i,bs,bs2; 1299 MatScalar *aa_i; 1300 PetscScalar *v_i; 1301 1302 PetscFunctionBegin; 1303 bs = A->rmap->bs; 1304 bs2 = bs*bs; 1305 if (row < 0 || row >= A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range", row); 1306 1307 bn = row/bs; /* Block number */ 1308 bp = row % bs; /* Block Position */ 1309 M = ai[bn+1] - ai[bn]; 1310 *nz = bs*M; 1311 1312 if (v) { 1313 *v = 0; 1314 if (*nz) { 1315 ierr = PetscMalloc1(*nz,v);CHKERRQ(ierr); 1316 for (i=0; i<M; i++) { /* for each block in the block row */ 1317 v_i = *v + i*bs; 1318 aa_i = aa + bs2*(ai[bn] + i); 1319 for (j=bp,k=0; j<bs2; j+=bs,k++) v_i[k] = aa_i[j]; 1320 } 1321 } 1322 } 1323 1324 if (idx) { 1325 *idx = 0; 1326 if (*nz) { 1327 ierr = PetscMalloc1(*nz,idx);CHKERRQ(ierr); 1328 for (i=0; i<M; i++) { /* for each block in the block row */ 1329 idx_i = *idx + i*bs; 1330 itmp = bs*aj[ai[bn] + i]; 1331 for (j=0; j<bs; j++) idx_i[j] = itmp++; 1332 } 1333 } 1334 } 1335 PetscFunctionReturn(0); 1336 } 1337 1338 #undef __FUNCT__ 1339 #define __FUNCT__ "MatGetRow_SeqBAIJ" 1340 PetscErrorCode MatGetRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1341 { 1342 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1343 PetscErrorCode ierr; 1344 1345 PetscFunctionBegin; 1346 ierr = MatGetRow_SeqBAIJ_private(A,row,nz,idx,v,a->i,a->j,a->a);CHKERRQ(ierr); 1347 PetscFunctionReturn(0); 1348 } 1349 1350 #undef __FUNCT__ 1351 #define __FUNCT__ "MatRestoreRow_SeqBAIJ" 1352 PetscErrorCode MatRestoreRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1353 { 1354 PetscErrorCode ierr; 1355 1356 PetscFunctionBegin; 1357 if (idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);} 1358 if (v) {ierr = PetscFree(*v);CHKERRQ(ierr);} 1359 PetscFunctionReturn(0); 1360 } 1361 1362 extern PetscErrorCode MatSetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode); 1363 1364 #undef __FUNCT__ 1365 #define __FUNCT__ "MatTranspose_SeqBAIJ" 1366 PetscErrorCode MatTranspose_SeqBAIJ(Mat A,MatReuse reuse,Mat *B) 1367 { 1368 Mat_SeqBAIJ *a=(Mat_SeqBAIJ*)A->data; 1369 Mat C; 1370 PetscErrorCode ierr; 1371 PetscInt i,j,k,*aj=a->j,*ai=a->i,bs=A->rmap->bs,mbs=a->mbs,nbs=a->nbs,len,*col; 1372 PetscInt *rows,*cols,bs2=a->bs2; 1373 MatScalar *array; 1374 1375 PetscFunctionBegin; 1376 if (reuse == MAT_REUSE_MATRIX && A == *B && mbs != nbs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Square matrix only for in-place"); 1377 if (reuse == MAT_INITIAL_MATRIX || A == *B) { 1378 ierr = PetscCalloc1(1+nbs,&col);CHKERRQ(ierr); 1379 1380 for (i=0; i<ai[mbs]; i++) col[aj[i]] += 1; 1381 ierr = MatCreate(PetscObjectComm((PetscObject)A),&C);CHKERRQ(ierr); 1382 ierr = MatSetSizes(C,A->cmap->n,A->rmap->N,A->cmap->n,A->rmap->N);CHKERRQ(ierr); 1383 ierr = MatSetType(C,((PetscObject)A)->type_name);CHKERRQ(ierr); 1384 ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(C,bs,0,col);CHKERRQ(ierr); 1385 ierr = PetscFree(col);CHKERRQ(ierr); 1386 } else { 1387 C = *B; 1388 } 1389 1390 array = a->a; 1391 ierr = PetscMalloc2(bs,&rows,bs,&cols);CHKERRQ(ierr); 1392 for (i=0; i<mbs; i++) { 1393 cols[0] = i*bs; 1394 for (k=1; k<bs; k++) cols[k] = cols[k-1] + 1; 1395 len = ai[i+1] - ai[i]; 1396 for (j=0; j<len; j++) { 1397 rows[0] = (*aj++)*bs; 1398 for (k=1; k<bs; k++) rows[k] = rows[k-1] + 1; 1399 ierr = MatSetValues_SeqBAIJ(C,bs,rows,bs,cols,array,INSERT_VALUES);CHKERRQ(ierr); 1400 array += bs2; 1401 } 1402 } 1403 ierr = PetscFree2(rows,cols);CHKERRQ(ierr); 1404 1405 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1406 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1407 1408 if (reuse == MAT_INITIAL_MATRIX || *B != A) { 1409 *B = C; 1410 } else { 1411 ierr = MatHeaderMerge(A,&C);CHKERRQ(ierr); 1412 } 1413 PetscFunctionReturn(0); 1414 } 1415 1416 #undef __FUNCT__ 1417 #define __FUNCT__ "MatIsTranspose_SeqBAIJ" 1418 PetscErrorCode MatIsTranspose_SeqBAIJ(Mat A,Mat B,PetscReal tol,PetscBool *f) 1419 { 1420 PetscErrorCode ierr; 1421 Mat Btrans; 1422 1423 PetscFunctionBegin; 1424 *f = PETSC_FALSE; 1425 ierr = MatTranspose_SeqBAIJ(A,MAT_INITIAL_MATRIX,&Btrans);CHKERRQ(ierr); 1426 ierr = MatEqual_SeqBAIJ(B,Btrans,f);CHKERRQ(ierr); 1427 ierr = MatDestroy(&Btrans);CHKERRQ(ierr); 1428 PetscFunctionReturn(0); 1429 } 1430 1431 #undef __FUNCT__ 1432 #define __FUNCT__ "MatView_SeqBAIJ_Binary" 1433 static PetscErrorCode MatView_SeqBAIJ_Binary(Mat A,PetscViewer viewer) 1434 { 1435 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1436 PetscErrorCode ierr; 1437 PetscInt i,*col_lens,bs = A->rmap->bs,count,*jj,j,k,l,bs2=a->bs2; 1438 int fd; 1439 PetscScalar *aa; 1440 FILE *file; 1441 1442 PetscFunctionBegin; 1443 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1444 ierr = PetscMalloc1(4+A->rmap->N,&col_lens);CHKERRQ(ierr); 1445 col_lens[0] = MAT_FILE_CLASSID; 1446 1447 col_lens[1] = A->rmap->N; 1448 col_lens[2] = A->cmap->n; 1449 col_lens[3] = a->nz*bs2; 1450 1451 /* store lengths of each row and write (including header) to file */ 1452 count = 0; 1453 for (i=0; i<a->mbs; i++) { 1454 for (j=0; j<bs; j++) { 1455 col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]); 1456 } 1457 } 1458 ierr = PetscBinaryWrite(fd,col_lens,4+A->rmap->N,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1459 ierr = PetscFree(col_lens);CHKERRQ(ierr); 1460 1461 /* store column indices (zero start index) */ 1462 ierr = PetscMalloc1((a->nz+1)*bs2,&jj);CHKERRQ(ierr); 1463 count = 0; 1464 for (i=0; i<a->mbs; i++) { 1465 for (j=0; j<bs; j++) { 1466 for (k=a->i[i]; k<a->i[i+1]; k++) { 1467 for (l=0; l<bs; l++) { 1468 jj[count++] = bs*a->j[k] + l; 1469 } 1470 } 1471 } 1472 } 1473 ierr = PetscBinaryWrite(fd,jj,bs2*a->nz,PETSC_INT,PETSC_FALSE);CHKERRQ(ierr); 1474 ierr = PetscFree(jj);CHKERRQ(ierr); 1475 1476 /* store nonzero values */ 1477 ierr = PetscMalloc1((a->nz+1)*bs2,&aa);CHKERRQ(ierr); 1478 count = 0; 1479 for (i=0; i<a->mbs; i++) { 1480 for (j=0; j<bs; j++) { 1481 for (k=a->i[i]; k<a->i[i+1]; k++) { 1482 for (l=0; l<bs; l++) { 1483 aa[count++] = a->a[bs2*k + l*bs + j]; 1484 } 1485 } 1486 } 1487 } 1488 ierr = PetscBinaryWrite(fd,aa,bs2*a->nz,PETSC_SCALAR,PETSC_FALSE);CHKERRQ(ierr); 1489 ierr = PetscFree(aa);CHKERRQ(ierr); 1490 1491 ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr); 1492 if (file) { 1493 fprintf(file,"-matload_block_size %d\n",(int)A->rmap->bs); 1494 } 1495 PetscFunctionReturn(0); 1496 } 1497 1498 #undef __FUNCT__ 1499 #define __FUNCT__ "MatView_SeqBAIJ_ASCII" 1500 static PetscErrorCode MatView_SeqBAIJ_ASCII(Mat A,PetscViewer viewer) 1501 { 1502 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1503 PetscErrorCode ierr; 1504 PetscInt i,j,bs = A->rmap->bs,k,l,bs2=a->bs2; 1505 PetscViewerFormat format; 1506 1507 PetscFunctionBegin; 1508 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1509 if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1510 ierr = PetscViewerASCIIPrintf(viewer," block size is %D\n",bs);CHKERRQ(ierr); 1511 } else if (format == PETSC_VIEWER_ASCII_MATLAB) { 1512 const char *matname; 1513 Mat aij; 1514 ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&aij);CHKERRQ(ierr); 1515 ierr = PetscObjectGetName((PetscObject)A,&matname);CHKERRQ(ierr); 1516 ierr = PetscObjectSetName((PetscObject)aij,matname);CHKERRQ(ierr); 1517 ierr = MatView(aij,viewer);CHKERRQ(ierr); 1518 ierr = MatDestroy(&aij);CHKERRQ(ierr); 1519 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1520 PetscFunctionReturn(0); 1521 } else if (format == PETSC_VIEWER_ASCII_COMMON) { 1522 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);CHKERRQ(ierr); 1523 for (i=0; i<a->mbs; i++) { 1524 for (j=0; j<bs; j++) { 1525 ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr); 1526 for (k=a->i[i]; k<a->i[i+1]; k++) { 1527 for (l=0; l<bs; l++) { 1528 #if defined(PETSC_USE_COMPLEX) 1529 if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 1530 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g + %gi) ",bs*a->j[k]+l, 1531 (double)PetscRealPart(a->a[bs2*k + l*bs + j]),(double)PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1532 } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 1533 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g - %gi) ",bs*a->j[k]+l, 1534 (double)PetscRealPart(a->a[bs2*k + l*bs + j]),-(double)PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1535 } else if (PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 1536 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,(double)PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1537 } 1538 #else 1539 if (a->a[bs2*k + l*bs + j] != 0.0) { 1540 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,(double)a->a[bs2*k + l*bs + j]);CHKERRQ(ierr); 1541 } 1542 #endif 1543 } 1544 } 1545 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 1546 } 1547 } 1548 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);CHKERRQ(ierr); 1549 } else { 1550 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);CHKERRQ(ierr); 1551 for (i=0; i<a->mbs; i++) { 1552 for (j=0; j<bs; j++) { 1553 ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr); 1554 for (k=a->i[i]; k<a->i[i+1]; k++) { 1555 for (l=0; l<bs; l++) { 1556 #if defined(PETSC_USE_COMPLEX) 1557 if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0) { 1558 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g + %g i) ",bs*a->j[k]+l, 1559 (double)PetscRealPart(a->a[bs2*k + l*bs + j]),(double)PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1560 } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0) { 1561 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g - %g i) ",bs*a->j[k]+l, 1562 (double)PetscRealPart(a->a[bs2*k + l*bs + j]),-(double)PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1563 } else { 1564 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,(double)PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1565 } 1566 #else 1567 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,(double)a->a[bs2*k + l*bs + j]);CHKERRQ(ierr); 1568 #endif 1569 } 1570 } 1571 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 1572 } 1573 } 1574 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);CHKERRQ(ierr); 1575 } 1576 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1577 PetscFunctionReturn(0); 1578 } 1579 1580 #include <petscdraw.h> 1581 #undef __FUNCT__ 1582 #define __FUNCT__ "MatView_SeqBAIJ_Draw_Zoom" 1583 static PetscErrorCode MatView_SeqBAIJ_Draw_Zoom(PetscDraw draw,void *Aa) 1584 { 1585 Mat A = (Mat) Aa; 1586 Mat_SeqBAIJ *a=(Mat_SeqBAIJ*)A->data; 1587 PetscErrorCode ierr; 1588 PetscInt row,i,j,k,l,mbs=a->mbs,color,bs=A->rmap->bs,bs2=a->bs2; 1589 PetscReal xl,yl,xr,yr,x_l,x_r,y_l,y_r; 1590 MatScalar *aa; 1591 PetscViewer viewer; 1592 PetscViewerFormat format; 1593 1594 PetscFunctionBegin; 1595 ierr = PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer);CHKERRQ(ierr); 1596 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1597 1598 ierr = PetscDrawGetCoordinates(draw,&xl,&yl,&xr,&yr);CHKERRQ(ierr); 1599 1600 /* loop over matrix elements drawing boxes */ 1601 1602 if (format != PETSC_VIEWER_DRAW_CONTOUR) { 1603 color = PETSC_DRAW_BLUE; 1604 for (i=0,row=0; i<mbs; i++,row+=bs) { 1605 for (j=a->i[i]; j<a->i[i+1]; j++) { 1606 y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0; 1607 x_l = a->j[j]*bs; x_r = x_l + 1.0; 1608 aa = a->a + j*bs2; 1609 for (k=0; k<bs; k++) { 1610 for (l=0; l<bs; l++) { 1611 if (PetscRealPart(*aa++) >= 0.) continue; 1612 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 1613 } 1614 } 1615 } 1616 } 1617 color = PETSC_DRAW_CYAN; 1618 for (i=0,row=0; i<mbs; i++,row+=bs) { 1619 for (j=a->i[i]; j<a->i[i+1]; j++) { 1620 y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0; 1621 x_l = a->j[j]*bs; x_r = x_l + 1.0; 1622 aa = a->a + j*bs2; 1623 for (k=0; k<bs; k++) { 1624 for (l=0; l<bs; l++) { 1625 if (PetscRealPart(*aa++) != 0.) continue; 1626 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 1627 } 1628 } 1629 } 1630 } 1631 color = PETSC_DRAW_RED; 1632 for (i=0,row=0; i<mbs; i++,row+=bs) { 1633 for (j=a->i[i]; j<a->i[i+1]; j++) { 1634 y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0; 1635 x_l = a->j[j]*bs; x_r = x_l + 1.0; 1636 aa = a->a + j*bs2; 1637 for (k=0; k<bs; k++) { 1638 for (l=0; l<bs; l++) { 1639 if (PetscRealPart(*aa++) <= 0.) continue; 1640 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 1641 } 1642 } 1643 } 1644 } 1645 } else { 1646 /* use contour shading to indicate magnitude of values */ 1647 /* first determine max of all nonzero values */ 1648 PetscDraw popup; 1649 PetscReal scale,maxv = 0.0; 1650 1651 for (i=0; i<a->nz*a->bs2; i++) { 1652 if (PetscAbsScalar(a->a[i]) > maxv) maxv = PetscAbsScalar(a->a[i]); 1653 } 1654 scale = (245.0 - PETSC_DRAW_BASIC_COLORS)/maxv; 1655 ierr = PetscDrawGetPopup(draw,&popup);CHKERRQ(ierr); 1656 if (popup) { 1657 ierr = PetscDrawScalePopup(popup,0.0,maxv);CHKERRQ(ierr); 1658 } 1659 for (i=0,row=0; i<mbs; i++,row+=bs) { 1660 for (j=a->i[i]; j<a->i[i+1]; j++) { 1661 y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0; 1662 x_l = a->j[j]*bs; x_r = x_l + 1.0; 1663 aa = a->a + j*bs2; 1664 for (k=0; k<bs; k++) { 1665 for (l=0; l<bs; l++) { 1666 color = PETSC_DRAW_BASIC_COLORS + (PetscInt)(scale*PetscAbsScalar(*aa++)); 1667 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 1668 } 1669 } 1670 } 1671 } 1672 } 1673 PetscFunctionReturn(0); 1674 } 1675 1676 #undef __FUNCT__ 1677 #define __FUNCT__ "MatView_SeqBAIJ_Draw" 1678 static PetscErrorCode MatView_SeqBAIJ_Draw(Mat A,PetscViewer viewer) 1679 { 1680 PetscErrorCode ierr; 1681 PetscReal xl,yl,xr,yr,w,h; 1682 PetscDraw draw; 1683 PetscBool isnull; 1684 1685 PetscFunctionBegin; 1686 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 1687 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 1688 1689 ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);CHKERRQ(ierr); 1690 xr = A->cmap->n; yr = A->rmap->N; h = yr/10.0; w = xr/10.0; 1691 xr += w; yr += h; xl = -w; yl = -h; 1692 ierr = PetscDrawSetCoordinates(draw,xl,yl,xr,yr);CHKERRQ(ierr); 1693 ierr = PetscDrawZoom(draw,MatView_SeqBAIJ_Draw_Zoom,A);CHKERRQ(ierr); 1694 ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",NULL);CHKERRQ(ierr); 1695 PetscFunctionReturn(0); 1696 } 1697 1698 #undef __FUNCT__ 1699 #define __FUNCT__ "MatView_SeqBAIJ" 1700 PetscErrorCode MatView_SeqBAIJ(Mat A,PetscViewer viewer) 1701 { 1702 PetscErrorCode ierr; 1703 PetscBool iascii,isbinary,isdraw; 1704 1705 PetscFunctionBegin; 1706 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1707 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1708 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1709 if (iascii) { 1710 ierr = MatView_SeqBAIJ_ASCII(A,viewer);CHKERRQ(ierr); 1711 } else if (isbinary) { 1712 ierr = MatView_SeqBAIJ_Binary(A,viewer);CHKERRQ(ierr); 1713 } else if (isdraw) { 1714 ierr = MatView_SeqBAIJ_Draw(A,viewer);CHKERRQ(ierr); 1715 } else { 1716 Mat B; 1717 ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr); 1718 ierr = MatView(B,viewer);CHKERRQ(ierr); 1719 ierr = MatDestroy(&B);CHKERRQ(ierr); 1720 } 1721 PetscFunctionReturn(0); 1722 } 1723 1724 1725 #undef __FUNCT__ 1726 #define __FUNCT__ "MatGetValues_SeqBAIJ" 1727 PetscErrorCode MatGetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],PetscScalar v[]) 1728 { 1729 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1730 PetscInt *rp,k,low,high,t,row,nrow,i,col,l,*aj = a->j; 1731 PetscInt *ai = a->i,*ailen = a->ilen; 1732 PetscInt brow,bcol,ridx,cidx,bs=A->rmap->bs,bs2=a->bs2; 1733 MatScalar *ap,*aa = a->a; 1734 1735 PetscFunctionBegin; 1736 for (k=0; k<m; k++) { /* loop over rows */ 1737 row = im[k]; brow = row/bs; 1738 if (row < 0) {v += n; continue;} /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); */ 1739 if (row >= A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D too large", row); 1740 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; 1741 nrow = ailen[brow]; 1742 for (l=0; l<n; l++) { /* loop over columns */ 1743 if (in[l] < 0) {v++; continue;} /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); */ 1744 if (in[l] >= A->cmap->n) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column %D too large", in[l]); 1745 col = in[l]; 1746 bcol = col/bs; 1747 cidx = col%bs; 1748 ridx = row%bs; 1749 high = nrow; 1750 low = 0; /* assume unsorted */ 1751 while (high-low > 5) { 1752 t = (low+high)/2; 1753 if (rp[t] > bcol) high = t; 1754 else low = t; 1755 } 1756 for (i=low; i<high; i++) { 1757 if (rp[i] > bcol) break; 1758 if (rp[i] == bcol) { 1759 *v++ = ap[bs2*i+bs*cidx+ridx]; 1760 goto finished; 1761 } 1762 } 1763 *v++ = 0.0; 1764 finished:; 1765 } 1766 } 1767 PetscFunctionReturn(0); 1768 } 1769 1770 #undef __FUNCT__ 1771 #define __FUNCT__ "MatSetValuesBlocked_SeqBAIJ" 1772 PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is) 1773 { 1774 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1775 PetscInt *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,lastcol = -1; 1776 PetscInt *imax=a->imax,*ai=a->i,*ailen=a->ilen; 1777 PetscErrorCode ierr; 1778 PetscInt *aj =a->j,nonew=a->nonew,bs2=a->bs2,bs=A->rmap->bs,stepval; 1779 PetscBool roworiented=a->roworiented; 1780 const PetscScalar *value = v; 1781 MatScalar *ap,*aa = a->a,*bap; 1782 1783 PetscFunctionBegin; 1784 if (roworiented) { 1785 stepval = (n-1)*bs; 1786 } else { 1787 stepval = (m-1)*bs; 1788 } 1789 for (k=0; k<m; k++) { /* loop over added rows */ 1790 row = im[k]; 1791 if (row < 0) continue; 1792 #if defined(PETSC_USE_DEBUG) 1793 if (row >= a->mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Block row index too large %D max %D",row,a->mbs-1); 1794 #endif 1795 rp = aj + ai[row]; 1796 ap = aa + bs2*ai[row]; 1797 rmax = imax[row]; 1798 nrow = ailen[row]; 1799 low = 0; 1800 high = nrow; 1801 for (l=0; l<n; l++) { /* loop over added columns */ 1802 if (in[l] < 0) continue; 1803 #if defined(PETSC_USE_DEBUG) 1804 if (in[l] >= a->nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Block column index too large %D max %D",in[l],a->nbs-1); 1805 #endif 1806 col = in[l]; 1807 if (roworiented) { 1808 value = v + (k*(stepval+bs) + l)*bs; 1809 } else { 1810 value = v + (l*(stepval+bs) + k)*bs; 1811 } 1812 if (col <= lastcol) low = 0; 1813 else high = nrow; 1814 lastcol = col; 1815 while (high-low > 7) { 1816 t = (low+high)/2; 1817 if (rp[t] > col) high = t; 1818 else low = t; 1819 } 1820 for (i=low; i<high; i++) { 1821 if (rp[i] > col) break; 1822 if (rp[i] == col) { 1823 bap = ap + bs2*i; 1824 if (roworiented) { 1825 if (is == ADD_VALUES) { 1826 for (ii=0; ii<bs; ii++,value+=stepval) { 1827 for (jj=ii; jj<bs2; jj+=bs) { 1828 bap[jj] += *value++; 1829 } 1830 } 1831 } else { 1832 for (ii=0; ii<bs; ii++,value+=stepval) { 1833 for (jj=ii; jj<bs2; jj+=bs) { 1834 bap[jj] = *value++; 1835 } 1836 } 1837 } 1838 } else { 1839 if (is == ADD_VALUES) { 1840 for (ii=0; ii<bs; ii++,value+=bs+stepval) { 1841 for (jj=0; jj<bs; jj++) { 1842 bap[jj] += value[jj]; 1843 } 1844 bap += bs; 1845 } 1846 } else { 1847 for (ii=0; ii<bs; ii++,value+=bs+stepval) { 1848 for (jj=0; jj<bs; jj++) { 1849 bap[jj] = value[jj]; 1850 } 1851 bap += bs; 1852 } 1853 } 1854 } 1855 goto noinsert2; 1856 } 1857 } 1858 if (nonew == 1) goto noinsert2; 1859 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new blocked index new nonzero block (%D, %D) in the matrix", row, col); 1860 MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,row,col,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar); 1861 N = nrow++ - 1; high++; 1862 /* shift up all the later entries in this row */ 1863 for (ii=N; ii>=i; ii--) { 1864 rp[ii+1] = rp[ii]; 1865 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); 1866 } 1867 if (N >= i) { 1868 ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr); 1869 } 1870 rp[i] = col; 1871 bap = ap + bs2*i; 1872 if (roworiented) { 1873 for (ii=0; ii<bs; ii++,value+=stepval) { 1874 for (jj=ii; jj<bs2; jj+=bs) { 1875 bap[jj] = *value++; 1876 } 1877 } 1878 } else { 1879 for (ii=0; ii<bs; ii++,value+=stepval) { 1880 for (jj=0; jj<bs; jj++) { 1881 *bap++ = *value++; 1882 } 1883 } 1884 } 1885 noinsert2:; 1886 low = i; 1887 } 1888 ailen[row] = nrow; 1889 } 1890 PetscFunctionReturn(0); 1891 } 1892 1893 #undef __FUNCT__ 1894 #define __FUNCT__ "MatAssemblyEnd_SeqBAIJ" 1895 PetscErrorCode MatAssemblyEnd_SeqBAIJ(Mat A,MatAssemblyType mode) 1896 { 1897 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1898 PetscInt fshift = 0,i,j,*ai = a->i,*aj = a->j,*imax = a->imax; 1899 PetscInt m = A->rmap->N,*ip,N,*ailen = a->ilen; 1900 PetscErrorCode ierr; 1901 PetscInt mbs = a->mbs,bs2 = a->bs2,rmax = 0; 1902 MatScalar *aa = a->a,*ap; 1903 PetscReal ratio=0.6; 1904 1905 PetscFunctionBegin; 1906 if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0); 1907 1908 if (m) rmax = ailen[0]; 1909 for (i=1; i<mbs; i++) { 1910 /* move each row back by the amount of empty slots (fshift) before it*/ 1911 fshift += imax[i-1] - ailen[i-1]; 1912 rmax = PetscMax(rmax,ailen[i]); 1913 if (fshift) { 1914 ip = aj + ai[i]; ap = aa + bs2*ai[i]; 1915 N = ailen[i]; 1916 for (j=0; j<N; j++) { 1917 ip[j-fshift] = ip[j]; 1918 1919 ierr = PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr); 1920 } 1921 } 1922 ai[i] = ai[i-1] + ailen[i-1]; 1923 } 1924 if (mbs) { 1925 fshift += imax[mbs-1] - ailen[mbs-1]; 1926 ai[mbs] = ai[mbs-1] + ailen[mbs-1]; 1927 } 1928 1929 /* reset ilen and imax for each row */ 1930 a->nonzerorowcnt = 0; 1931 for (i=0; i<mbs; i++) { 1932 ailen[i] = imax[i] = ai[i+1] - ai[i]; 1933 a->nonzerorowcnt += ((ai[i+1] - ai[i]) > 0); 1934 } 1935 a->nz = ai[mbs]; 1936 1937 /* diagonals may have moved, so kill the diagonal pointers */ 1938 a->idiagvalid = PETSC_FALSE; 1939 if (fshift && a->diag) { 1940 ierr = PetscFree(a->diag);CHKERRQ(ierr); 1941 ierr = PetscLogObjectMemory((PetscObject)A,-(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 1942 a->diag = 0; 1943 } 1944 if (fshift && a->nounused == -1) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB, "Unused space detected in matrix: %D X %D block size %D, %D unneeded", m, A->cmap->n, A->rmap->bs, fshift*bs2); 1945 ierr = PetscInfo5(A,"Matrix size: %D X %D, block size %D; storage space: %D unneeded, %D used\n",m,A->cmap->n,A->rmap->bs,fshift*bs2,a->nz*bs2);CHKERRQ(ierr); 1946 ierr = PetscInfo1(A,"Number of mallocs during MatSetValues is %D\n",a->reallocs);CHKERRQ(ierr); 1947 ierr = PetscInfo1(A,"Most nonzeros blocks in any row is %D\n",rmax);CHKERRQ(ierr); 1948 1949 A->info.mallocs += a->reallocs; 1950 a->reallocs = 0; 1951 A->info.nz_unneeded = (PetscReal)fshift*bs2; 1952 a->rmax = rmax; 1953 1954 ierr = MatCheckCompressedRow(A,a->nonzerorowcnt,&a->compressedrow,a->i,mbs,ratio);CHKERRQ(ierr); 1955 PetscFunctionReturn(0); 1956 } 1957 1958 /* 1959 This function returns an array of flags which indicate the locations of contiguous 1960 blocks that should be zeroed. for eg: if bs = 3 and is = [0,1,2,3,5,6,7,8,9] 1961 then the resulting sizes = [3,1,1,3,1] correspondig to sets [(0,1,2),(3),(5),(6,7,8),(9)] 1962 Assume: sizes should be long enough to hold all the values. 1963 */ 1964 #undef __FUNCT__ 1965 #define __FUNCT__ "MatZeroRows_SeqBAIJ_Check_Blocks" 1966 static PetscErrorCode MatZeroRows_SeqBAIJ_Check_Blocks(PetscInt idx[],PetscInt n,PetscInt bs,PetscInt sizes[], PetscInt *bs_max) 1967 { 1968 PetscInt i,j,k,row; 1969 PetscBool flg; 1970 1971 PetscFunctionBegin; 1972 for (i=0,j=0; i<n; j++) { 1973 row = idx[i]; 1974 if (row%bs!=0) { /* Not the begining of a block */ 1975 sizes[j] = 1; 1976 i++; 1977 } else if (i+bs > n) { /* complete block doesn't exist (at idx end) */ 1978 sizes[j] = 1; /* Also makes sure atleast 'bs' values exist for next else */ 1979 i++; 1980 } else { /* Begining of the block, so check if the complete block exists */ 1981 flg = PETSC_TRUE; 1982 for (k=1; k<bs; k++) { 1983 if (row+k != idx[i+k]) { /* break in the block */ 1984 flg = PETSC_FALSE; 1985 break; 1986 } 1987 } 1988 if (flg) { /* No break in the bs */ 1989 sizes[j] = bs; 1990 i += bs; 1991 } else { 1992 sizes[j] = 1; 1993 i++; 1994 } 1995 } 1996 } 1997 *bs_max = j; 1998 PetscFunctionReturn(0); 1999 } 2000 2001 #undef __FUNCT__ 2002 #define __FUNCT__ "MatZeroRows_SeqBAIJ" 2003 PetscErrorCode MatZeroRows_SeqBAIJ(Mat A,PetscInt is_n,const PetscInt is_idx[],PetscScalar diag,Vec x, Vec b) 2004 { 2005 Mat_SeqBAIJ *baij=(Mat_SeqBAIJ*)A->data; 2006 PetscErrorCode ierr; 2007 PetscInt i,j,k,count,*rows; 2008 PetscInt bs=A->rmap->bs,bs2=baij->bs2,*sizes,row,bs_max; 2009 PetscScalar zero = 0.0; 2010 MatScalar *aa; 2011 const PetscScalar *xx; 2012 PetscScalar *bb; 2013 2014 PetscFunctionBegin; 2015 /* fix right hand side if needed */ 2016 if (x && b) { 2017 ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr); 2018 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 2019 for (i=0; i<is_n; i++) { 2020 bb[is_idx[i]] = diag*xx[is_idx[i]]; 2021 } 2022 ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr); 2023 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 2024 } 2025 2026 /* Make a copy of the IS and sort it */ 2027 /* allocate memory for rows,sizes */ 2028 ierr = PetscMalloc2(is_n,&rows,2*is_n,&sizes);CHKERRQ(ierr); 2029 2030 /* copy IS values to rows, and sort them */ 2031 for (i=0; i<is_n; i++) rows[i] = is_idx[i]; 2032 ierr = PetscSortInt(is_n,rows);CHKERRQ(ierr); 2033 2034 if (baij->keepnonzeropattern) { 2035 for (i=0; i<is_n; i++) sizes[i] = 1; 2036 bs_max = is_n; 2037 } else { 2038 ierr = MatZeroRows_SeqBAIJ_Check_Blocks(rows,is_n,bs,sizes,&bs_max);CHKERRQ(ierr); 2039 A->nonzerostate++; 2040 } 2041 2042 for (i=0,j=0; i<bs_max; j+=sizes[i],i++) { 2043 row = rows[j]; 2044 if (row < 0 || row > A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"row %D out of range",row); 2045 count = (baij->i[row/bs +1] - baij->i[row/bs])*bs; 2046 aa = ((MatScalar*)(baij->a)) + baij->i[row/bs]*bs2 + (row%bs); 2047 if (sizes[i] == bs && !baij->keepnonzeropattern) { 2048 if (diag != (PetscScalar)0.0) { 2049 if (baij->ilen[row/bs] > 0) { 2050 baij->ilen[row/bs] = 1; 2051 baij->j[baij->i[row/bs]] = row/bs; 2052 2053 ierr = PetscMemzero(aa,count*bs*sizeof(MatScalar));CHKERRQ(ierr); 2054 } 2055 /* Now insert all the diagonal values for this bs */ 2056 for (k=0; k<bs; k++) { 2057 ierr = (*A->ops->setvalues)(A,1,rows+j+k,1,rows+j+k,&diag,INSERT_VALUES);CHKERRQ(ierr); 2058 } 2059 } else { /* (diag == 0.0) */ 2060 baij->ilen[row/bs] = 0; 2061 } /* end (diag == 0.0) */ 2062 } else { /* (sizes[i] != bs) */ 2063 #if defined(PETSC_USE_DEBUG) 2064 if (sizes[i] != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal Error. Value should be 1"); 2065 #endif 2066 for (k=0; k<count; k++) { 2067 aa[0] = zero; 2068 aa += bs; 2069 } 2070 if (diag != (PetscScalar)0.0) { 2071 ierr = (*A->ops->setvalues)(A,1,rows+j,1,rows+j,&diag,INSERT_VALUES);CHKERRQ(ierr); 2072 } 2073 } 2074 } 2075 2076 ierr = PetscFree2(rows,sizes);CHKERRQ(ierr); 2077 ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2078 PetscFunctionReturn(0); 2079 } 2080 2081 #undef __FUNCT__ 2082 #define __FUNCT__ "MatZeroRowsColumns_SeqBAIJ" 2083 PetscErrorCode MatZeroRowsColumns_SeqBAIJ(Mat A,PetscInt is_n,const PetscInt is_idx[],PetscScalar diag,Vec x, Vec b) 2084 { 2085 Mat_SeqBAIJ *baij=(Mat_SeqBAIJ*)A->data; 2086 PetscErrorCode ierr; 2087 PetscInt i,j,k,count; 2088 PetscInt bs =A->rmap->bs,bs2=baij->bs2,row,col; 2089 PetscScalar zero = 0.0; 2090 MatScalar *aa; 2091 const PetscScalar *xx; 2092 PetscScalar *bb; 2093 PetscBool *zeroed,vecs = PETSC_FALSE; 2094 2095 PetscFunctionBegin; 2096 /* fix right hand side if needed */ 2097 if (x && b) { 2098 ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr); 2099 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 2100 vecs = PETSC_TRUE; 2101 } 2102 2103 /* zero the columns */ 2104 ierr = PetscCalloc1(A->rmap->n,&zeroed);CHKERRQ(ierr); 2105 for (i=0; i<is_n; i++) { 2106 if (is_idx[i] < 0 || is_idx[i] >= A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"row %D out of range",is_idx[i]); 2107 zeroed[is_idx[i]] = PETSC_TRUE; 2108 } 2109 for (i=0; i<A->rmap->N; i++) { 2110 if (!zeroed[i]) { 2111 row = i/bs; 2112 for (j=baij->i[row]; j<baij->i[row+1]; j++) { 2113 for (k=0; k<bs; k++) { 2114 col = bs*baij->j[j] + k; 2115 if (zeroed[col]) { 2116 aa = ((MatScalar*)(baij->a)) + j*bs2 + (i%bs) + bs*k; 2117 if (vecs) bb[i] -= aa[0]*xx[col]; 2118 aa[0] = 0.0; 2119 } 2120 } 2121 } 2122 } else if (vecs) bb[i] = diag*xx[i]; 2123 } 2124 ierr = PetscFree(zeroed);CHKERRQ(ierr); 2125 if (vecs) { 2126 ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr); 2127 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 2128 } 2129 2130 /* zero the rows */ 2131 for (i=0; i<is_n; i++) { 2132 row = is_idx[i]; 2133 count = (baij->i[row/bs +1] - baij->i[row/bs])*bs; 2134 aa = ((MatScalar*)(baij->a)) + baij->i[row/bs]*bs2 + (row%bs); 2135 for (k=0; k<count; k++) { 2136 aa[0] = zero; 2137 aa += bs; 2138 } 2139 if (diag != (PetscScalar)0.0) { 2140 ierr = (*A->ops->setvalues)(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr); 2141 } 2142 } 2143 ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2144 PetscFunctionReturn(0); 2145 } 2146 2147 #undef __FUNCT__ 2148 #define __FUNCT__ "MatSetValues_SeqBAIJ" 2149 PetscErrorCode MatSetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is) 2150 { 2151 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2152 PetscInt *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,lastcol = -1; 2153 PetscInt *imax=a->imax,*ai=a->i,*ailen=a->ilen; 2154 PetscInt *aj =a->j,nonew=a->nonew,bs=A->rmap->bs,brow,bcol; 2155 PetscErrorCode ierr; 2156 PetscInt ridx,cidx,bs2=a->bs2; 2157 PetscBool roworiented=a->roworiented; 2158 MatScalar *ap,value,*aa=a->a,*bap; 2159 2160 PetscFunctionBegin; 2161 for (k=0; k<m; k++) { /* loop over added rows */ 2162 row = im[k]; 2163 brow = row/bs; 2164 if (row < 0) continue; 2165 #if defined(PETSC_USE_DEBUG) 2166 if (row >= A->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,A->rmap->N-1); 2167 #endif 2168 rp = aj + ai[brow]; 2169 ap = aa + bs2*ai[brow]; 2170 rmax = imax[brow]; 2171 nrow = ailen[brow]; 2172 low = 0; 2173 high = nrow; 2174 for (l=0; l<n; l++) { /* loop over added columns */ 2175 if (in[l] < 0) continue; 2176 #if defined(PETSC_USE_DEBUG) 2177 if (in[l] >= A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],A->cmap->n-1); 2178 #endif 2179 col = in[l]; bcol = col/bs; 2180 ridx = row % bs; cidx = col % bs; 2181 if (roworiented) { 2182 value = v[l + k*n]; 2183 } else { 2184 value = v[k + l*m]; 2185 } 2186 if (col <= lastcol) low = 0; else high = nrow; 2187 lastcol = col; 2188 while (high-low > 7) { 2189 t = (low+high)/2; 2190 if (rp[t] > bcol) high = t; 2191 else low = t; 2192 } 2193 for (i=low; i<high; i++) { 2194 if (rp[i] > bcol) break; 2195 if (rp[i] == bcol) { 2196 bap = ap + bs2*i + bs*cidx + ridx; 2197 if (is == ADD_VALUES) *bap += value; 2198 else *bap = value; 2199 goto noinsert1; 2200 } 2201 } 2202 if (nonew == 1) goto noinsert1; 2203 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col); 2204 MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar); 2205 N = nrow++ - 1; high++; 2206 /* shift up all the later entries in this row */ 2207 for (ii=N; ii>=i; ii--) { 2208 rp[ii+1] = rp[ii]; 2209 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); 2210 } 2211 if (N>=i) { 2212 ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr); 2213 } 2214 rp[i] = bcol; 2215 ap[bs2*i + bs*cidx + ridx] = value; 2216 a->nz++; 2217 A->nonzerostate++; 2218 noinsert1:; 2219 low = i; 2220 } 2221 ailen[brow] = nrow; 2222 } 2223 PetscFunctionReturn(0); 2224 } 2225 2226 #undef __FUNCT__ 2227 #define __FUNCT__ "MatILUFactor_SeqBAIJ" 2228 PetscErrorCode MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,const MatFactorInfo *info) 2229 { 2230 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)inA->data; 2231 Mat outA; 2232 PetscErrorCode ierr; 2233 PetscBool row_identity,col_identity; 2234 2235 PetscFunctionBegin; 2236 if (info->levels != 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only levels = 0 supported for in-place ILU"); 2237 ierr = ISIdentity(row,&row_identity);CHKERRQ(ierr); 2238 ierr = ISIdentity(col,&col_identity);CHKERRQ(ierr); 2239 if (!row_identity || !col_identity) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for in-place ILU"); 2240 2241 outA = inA; 2242 inA->factortype = MAT_FACTOR_LU; 2243 2244 ierr = MatMarkDiagonal_SeqBAIJ(inA);CHKERRQ(ierr); 2245 2246 ierr = PetscObjectReference((PetscObject)row);CHKERRQ(ierr); 2247 ierr = ISDestroy(&a->row);CHKERRQ(ierr); 2248 a->row = row; 2249 ierr = PetscObjectReference((PetscObject)col);CHKERRQ(ierr); 2250 ierr = ISDestroy(&a->col);CHKERRQ(ierr); 2251 a->col = col; 2252 2253 /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */ 2254 ierr = ISDestroy(&a->icol);CHKERRQ(ierr); 2255 ierr = ISInvertPermutation(col,PETSC_DECIDE,&a->icol);CHKERRQ(ierr); 2256 ierr = PetscLogObjectParent((PetscObject)inA,(PetscObject)a->icol);CHKERRQ(ierr); 2257 2258 ierr = MatSeqBAIJSetNumericFactorization_inplace(inA,(PetscBool)(row_identity && col_identity));CHKERRQ(ierr); 2259 if (!a->solve_work) { 2260 ierr = PetscMalloc1(inA->rmap->N+inA->rmap->bs,&a->solve_work);CHKERRQ(ierr); 2261 ierr = PetscLogObjectMemory((PetscObject)inA,(inA->rmap->N+inA->rmap->bs)*sizeof(PetscScalar));CHKERRQ(ierr); 2262 } 2263 ierr = MatLUFactorNumeric(outA,inA,info);CHKERRQ(ierr); 2264 PetscFunctionReturn(0); 2265 } 2266 2267 #undef __FUNCT__ 2268 #define __FUNCT__ "MatSeqBAIJSetColumnIndices_SeqBAIJ" 2269 PetscErrorCode MatSeqBAIJSetColumnIndices_SeqBAIJ(Mat mat,PetscInt *indices) 2270 { 2271 Mat_SeqBAIJ *baij = (Mat_SeqBAIJ*)mat->data; 2272 PetscInt i,nz,mbs; 2273 2274 PetscFunctionBegin; 2275 nz = baij->maxnz; 2276 mbs = baij->mbs; 2277 for (i=0; i<nz; i++) { 2278 baij->j[i] = indices[i]; 2279 } 2280 baij->nz = nz; 2281 for (i=0; i<mbs; i++) { 2282 baij->ilen[i] = baij->imax[i]; 2283 } 2284 PetscFunctionReturn(0); 2285 } 2286 2287 #undef __FUNCT__ 2288 #define __FUNCT__ "MatSeqBAIJSetColumnIndices" 2289 /*@ 2290 MatSeqBAIJSetColumnIndices - Set the column indices for all the rows 2291 in the matrix. 2292 2293 Input Parameters: 2294 + mat - the SeqBAIJ matrix 2295 - indices - the column indices 2296 2297 Level: advanced 2298 2299 Notes: 2300 This can be called if you have precomputed the nonzero structure of the 2301 matrix and want to provide it to the matrix object to improve the performance 2302 of the MatSetValues() operation. 2303 2304 You MUST have set the correct numbers of nonzeros per row in the call to 2305 MatCreateSeqBAIJ(), and the columns indices MUST be sorted. 2306 2307 MUST be called before any calls to MatSetValues(); 2308 2309 @*/ 2310 PetscErrorCode MatSeqBAIJSetColumnIndices(Mat mat,PetscInt *indices) 2311 { 2312 PetscErrorCode ierr; 2313 2314 PetscFunctionBegin; 2315 PetscValidHeaderSpecific(mat,MAT_CLASSID,1); 2316 PetscValidPointer(indices,2); 2317 ierr = PetscUseMethod(mat,"MatSeqBAIJSetColumnIndices_C",(Mat,PetscInt*),(mat,indices));CHKERRQ(ierr); 2318 PetscFunctionReturn(0); 2319 } 2320 2321 #undef __FUNCT__ 2322 #define __FUNCT__ "MatGetRowMaxAbs_SeqBAIJ" 2323 PetscErrorCode MatGetRowMaxAbs_SeqBAIJ(Mat A,Vec v,PetscInt idx[]) 2324 { 2325 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2326 PetscErrorCode ierr; 2327 PetscInt i,j,n,row,bs,*ai,*aj,mbs; 2328 PetscReal atmp; 2329 PetscScalar *x,zero = 0.0; 2330 MatScalar *aa; 2331 PetscInt ncols,brow,krow,kcol; 2332 2333 PetscFunctionBegin; 2334 if (A->factortype) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 2335 bs = A->rmap->bs; 2336 aa = a->a; 2337 ai = a->i; 2338 aj = a->j; 2339 mbs = a->mbs; 2340 2341 ierr = VecSet(v,zero);CHKERRQ(ierr); 2342 ierr = VecGetArray(v,&x);CHKERRQ(ierr); 2343 ierr = VecGetLocalSize(v,&n);CHKERRQ(ierr); 2344 if (n != A->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Nonconforming matrix and vector"); 2345 for (i=0; i<mbs; i++) { 2346 ncols = ai[1] - ai[0]; ai++; 2347 brow = bs*i; 2348 for (j=0; j<ncols; j++) { 2349 for (kcol=0; kcol<bs; kcol++) { 2350 for (krow=0; krow<bs; krow++) { 2351 atmp = PetscAbsScalar(*aa);aa++; 2352 row = brow + krow; /* row index */ 2353 if (PetscAbsScalar(x[row]) < atmp) {x[row] = atmp; if (idx) idx[row] = bs*(*aj) + kcol;} 2354 } 2355 } 2356 aj++; 2357 } 2358 } 2359 ierr = VecRestoreArray(v,&x);CHKERRQ(ierr); 2360 PetscFunctionReturn(0); 2361 } 2362 2363 #undef __FUNCT__ 2364 #define __FUNCT__ "MatCopy_SeqBAIJ" 2365 PetscErrorCode MatCopy_SeqBAIJ(Mat A,Mat B,MatStructure str) 2366 { 2367 PetscErrorCode ierr; 2368 2369 PetscFunctionBegin; 2370 /* If the two matrices have the same copy implementation, use fast copy. */ 2371 if (str == SAME_NONZERO_PATTERN && (A->ops->copy == B->ops->copy)) { 2372 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2373 Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)B->data; 2374 PetscInt ambs=a->mbs,bmbs=b->mbs,abs=A->rmap->bs,bbs=B->rmap->bs,bs2=abs*abs; 2375 2376 if (a->i[ambs] != b->i[bmbs]) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Number of nonzero blocks in matrices A %D and B %D are different",a->i[ambs],b->i[bmbs]); 2377 if (abs != bbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Block size A %D and B %D are different",abs,bbs); 2378 ierr = PetscMemcpy(b->a,a->a,(bs2*a->i[ambs])*sizeof(PetscScalar));CHKERRQ(ierr); 2379 } else { 2380 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 2381 } 2382 PetscFunctionReturn(0); 2383 } 2384 2385 #undef __FUNCT__ 2386 #define __FUNCT__ "MatSetUp_SeqBAIJ" 2387 PetscErrorCode MatSetUp_SeqBAIJ(Mat A) 2388 { 2389 PetscErrorCode ierr; 2390 2391 PetscFunctionBegin; 2392 ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(A,A->rmap->bs,PETSC_DEFAULT,0);CHKERRQ(ierr); 2393 PetscFunctionReturn(0); 2394 } 2395 2396 #undef __FUNCT__ 2397 #define __FUNCT__ "MatSeqBAIJGetArray_SeqBAIJ" 2398 PetscErrorCode MatSeqBAIJGetArray_SeqBAIJ(Mat A,PetscScalar *array[]) 2399 { 2400 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2401 2402 PetscFunctionBegin; 2403 *array = a->a; 2404 PetscFunctionReturn(0); 2405 } 2406 2407 #undef __FUNCT__ 2408 #define __FUNCT__ "MatSeqBAIJRestoreArray_SeqBAIJ" 2409 PetscErrorCode MatSeqBAIJRestoreArray_SeqBAIJ(Mat A,PetscScalar *array[]) 2410 { 2411 PetscFunctionBegin; 2412 PetscFunctionReturn(0); 2413 } 2414 2415 #undef __FUNCT__ 2416 #define __FUNCT__ "MatAXPYGetPreallocation_SeqBAIJ" 2417 PetscErrorCode MatAXPYGetPreallocation_SeqBAIJ(Mat Y,Mat X,PetscInt *nnz) 2418 { 2419 PetscInt bs = Y->rmap->bs,mbs = Y->rmap->N/bs; 2420 Mat_SeqBAIJ *x = (Mat_SeqBAIJ*)X->data; 2421 Mat_SeqBAIJ *y = (Mat_SeqBAIJ*)Y->data; 2422 PetscErrorCode ierr; 2423 2424 PetscFunctionBegin; 2425 /* Set the number of nonzeros in the new matrix */ 2426 ierr = MatAXPYGetPreallocation_SeqX_private(mbs,x->i,x->j,y->i,y->j,nnz);CHKERRQ(ierr); 2427 PetscFunctionReturn(0); 2428 } 2429 2430 #undef __FUNCT__ 2431 #define __FUNCT__ "MatAXPY_SeqBAIJ" 2432 PetscErrorCode MatAXPY_SeqBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 2433 { 2434 Mat_SeqBAIJ *x = (Mat_SeqBAIJ*)X->data,*y = (Mat_SeqBAIJ*)Y->data; 2435 PetscErrorCode ierr; 2436 PetscInt bs=Y->rmap->bs,bs2=bs*bs; 2437 PetscBLASInt one=1; 2438 2439 PetscFunctionBegin; 2440 if (str == SAME_NONZERO_PATTERN) { 2441 PetscScalar alpha = a; 2442 PetscBLASInt bnz; 2443 ierr = PetscBLASIntCast(x->nz*bs2,&bnz);CHKERRQ(ierr); 2444 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one)); 2445 ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr); 2446 } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */ 2447 ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr); 2448 } else { 2449 Mat B; 2450 PetscInt *nnz; 2451 if (bs != X->rmap->bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrices must have same block size"); 2452 ierr = PetscMalloc1(Y->rmap->N,&nnz);CHKERRQ(ierr); 2453 ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr); 2454 ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr); 2455 ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr); 2456 ierr = MatSetBlockSizesFromMats(B,Y,Y);CHKERRQ(ierr); 2457 ierr = MatSetType(B,(MatType) ((PetscObject)Y)->type_name);CHKERRQ(ierr); 2458 ierr = MatAXPYGetPreallocation_SeqBAIJ(Y,X,nnz);CHKERRQ(ierr); 2459 ierr = MatSeqBAIJSetPreallocation(B,bs,0,nnz);CHKERRQ(ierr); 2460 ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr); 2461 ierr = MatHeaderReplace(Y,&B);CHKERRQ(ierr); 2462 ierr = PetscFree(nnz);CHKERRQ(ierr); 2463 } 2464 PetscFunctionReturn(0); 2465 } 2466 2467 #undef __FUNCT__ 2468 #define __FUNCT__ "MatRealPart_SeqBAIJ" 2469 PetscErrorCode MatRealPart_SeqBAIJ(Mat A) 2470 { 2471 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2472 PetscInt i,nz = a->bs2*a->i[a->mbs]; 2473 MatScalar *aa = a->a; 2474 2475 PetscFunctionBegin; 2476 for (i=0; i<nz; i++) aa[i] = PetscRealPart(aa[i]); 2477 PetscFunctionReturn(0); 2478 } 2479 2480 #undef __FUNCT__ 2481 #define __FUNCT__ "MatImaginaryPart_SeqBAIJ" 2482 PetscErrorCode MatImaginaryPart_SeqBAIJ(Mat A) 2483 { 2484 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2485 PetscInt i,nz = a->bs2*a->i[a->mbs]; 2486 MatScalar *aa = a->a; 2487 2488 PetscFunctionBegin; 2489 for (i=0; i<nz; i++) aa[i] = PetscImaginaryPart(aa[i]); 2490 PetscFunctionReturn(0); 2491 } 2492 2493 #undef __FUNCT__ 2494 #define __FUNCT__ "MatGetColumnIJ_SeqBAIJ" 2495 /* 2496 Code almost idential to MatGetColumnIJ_SeqAIJ() should share common code 2497 */ 2498 PetscErrorCode MatGetColumnIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *nn,const PetscInt *ia[],const PetscInt *ja[],PetscBool *done) 2499 { 2500 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2501 PetscErrorCode ierr; 2502 PetscInt bs = A->rmap->bs,i,*collengths,*cia,*cja,n = A->cmap->n/bs,m = A->rmap->n/bs; 2503 PetscInt nz = a->i[m],row,*jj,mr,col; 2504 2505 PetscFunctionBegin; 2506 *nn = n; 2507 if (!ia) PetscFunctionReturn(0); 2508 if (symmetric) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not for BAIJ matrices"); 2509 else { 2510 ierr = PetscCalloc1(n+1,&collengths);CHKERRQ(ierr); 2511 ierr = PetscMalloc1(n+1,&cia);CHKERRQ(ierr); 2512 ierr = PetscMalloc1(nz+1,&cja);CHKERRQ(ierr); 2513 jj = a->j; 2514 for (i=0; i<nz; i++) { 2515 collengths[jj[i]]++; 2516 } 2517 cia[0] = oshift; 2518 for (i=0; i<n; i++) { 2519 cia[i+1] = cia[i] + collengths[i]; 2520 } 2521 ierr = PetscMemzero(collengths,n*sizeof(PetscInt));CHKERRQ(ierr); 2522 jj = a->j; 2523 for (row=0; row<m; row++) { 2524 mr = a->i[row+1] - a->i[row]; 2525 for (i=0; i<mr; i++) { 2526 col = *jj++; 2527 2528 cja[cia[col] + collengths[col]++ - oshift] = row + oshift; 2529 } 2530 } 2531 ierr = PetscFree(collengths);CHKERRQ(ierr); 2532 *ia = cia; *ja = cja; 2533 } 2534 PetscFunctionReturn(0); 2535 } 2536 2537 #undef __FUNCT__ 2538 #define __FUNCT__ "MatRestoreColumnIJ_SeqBAIJ" 2539 PetscErrorCode MatRestoreColumnIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *n,const PetscInt *ia[],const PetscInt *ja[],PetscBool *done) 2540 { 2541 PetscErrorCode ierr; 2542 2543 PetscFunctionBegin; 2544 if (!ia) PetscFunctionReturn(0); 2545 ierr = PetscFree(*ia);CHKERRQ(ierr); 2546 ierr = PetscFree(*ja);CHKERRQ(ierr); 2547 PetscFunctionReturn(0); 2548 } 2549 2550 /* 2551 MatGetColumnIJ_SeqBAIJ_Color() and MatRestoreColumnIJ_SeqBAIJ_Color() are customized from 2552 MatGetColumnIJ_SeqBAIJ() and MatRestoreColumnIJ_SeqBAIJ() by adding an output 2553 spidx[], index of a->a, to be used in MatTransposeColoringCreate() and MatFDColoringCreate() 2554 */ 2555 #undef __FUNCT__ 2556 #define __FUNCT__ "MatGetColumnIJ_SeqBAIJ_Color" 2557 PetscErrorCode MatGetColumnIJ_SeqBAIJ_Color(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *nn,const PetscInt *ia[],const PetscInt *ja[],PetscInt *spidx[],PetscBool *done) 2558 { 2559 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2560 PetscErrorCode ierr; 2561 PetscInt i,*collengths,*cia,*cja,n=a->nbs,m=a->mbs; 2562 PetscInt nz = a->i[m],row,*jj,mr,col; 2563 PetscInt *cspidx; 2564 2565 PetscFunctionBegin; 2566 *nn = n; 2567 if (!ia) PetscFunctionReturn(0); 2568 2569 ierr = PetscCalloc1(n+1,&collengths);CHKERRQ(ierr); 2570 ierr = PetscMalloc1(n+1,&cia);CHKERRQ(ierr); 2571 ierr = PetscMalloc1(nz+1,&cja);CHKERRQ(ierr); 2572 ierr = PetscMalloc1(nz+1,&cspidx);CHKERRQ(ierr); 2573 jj = a->j; 2574 for (i=0; i<nz; i++) { 2575 collengths[jj[i]]++; 2576 } 2577 cia[0] = oshift; 2578 for (i=0; i<n; i++) { 2579 cia[i+1] = cia[i] + collengths[i]; 2580 } 2581 ierr = PetscMemzero(collengths,n*sizeof(PetscInt));CHKERRQ(ierr); 2582 jj = a->j; 2583 for (row=0; row<m; row++) { 2584 mr = a->i[row+1] - a->i[row]; 2585 for (i=0; i<mr; i++) { 2586 col = *jj++; 2587 cspidx[cia[col] + collengths[col] - oshift] = a->i[row] + i; /* index of a->j */ 2588 cja[cia[col] + collengths[col]++ - oshift] = row + oshift; 2589 } 2590 } 2591 ierr = PetscFree(collengths);CHKERRQ(ierr); 2592 *ia = cia; *ja = cja; 2593 *spidx = cspidx; 2594 PetscFunctionReturn(0); 2595 } 2596 2597 #undef __FUNCT__ 2598 #define __FUNCT__ "MatRestoreColumnIJ_SeqBAIJ_Color" 2599 PetscErrorCode MatRestoreColumnIJ_SeqBAIJ_Color(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *n,const PetscInt *ia[],const PetscInt *ja[],PetscInt *spidx[],PetscBool *done) 2600 { 2601 PetscErrorCode ierr; 2602 2603 PetscFunctionBegin; 2604 ierr = MatRestoreColumnIJ_SeqBAIJ(A,oshift,symmetric,inodecompressed,n,ia,ja,done);CHKERRQ(ierr); 2605 ierr = PetscFree(*spidx);CHKERRQ(ierr); 2606 PetscFunctionReturn(0); 2607 } 2608 2609 #undef __FUNCT__ 2610 #define __FUNCT__ "MatShift_SeqBAIJ" 2611 PetscErrorCode MatShift_SeqBAIJ(Mat Y,PetscScalar a) 2612 { 2613 PetscErrorCode ierr; 2614 Mat_SeqBAIJ *aij = (Mat_SeqBAIJ*)Y->data; 2615 2616 PetscFunctionBegin; 2617 if (!Y->preallocated || !aij->nz) { 2618 ierr = MatSeqBAIJSetPreallocation(Y,Y->rmap->bs,1,NULL);CHKERRQ(ierr); 2619 } 2620 ierr = MatShift_Basic(Y,a);CHKERRQ(ierr); 2621 PetscFunctionReturn(0); 2622 } 2623 2624 /* -------------------------------------------------------------------*/ 2625 static struct _MatOps MatOps_Values = {MatSetValues_SeqBAIJ, 2626 MatGetRow_SeqBAIJ, 2627 MatRestoreRow_SeqBAIJ, 2628 MatMult_SeqBAIJ_N, 2629 /* 4*/ MatMultAdd_SeqBAIJ_N, 2630 MatMultTranspose_SeqBAIJ, 2631 MatMultTransposeAdd_SeqBAIJ, 2632 0, 2633 0, 2634 0, 2635 /* 10*/ 0, 2636 MatLUFactor_SeqBAIJ, 2637 0, 2638 0, 2639 MatTranspose_SeqBAIJ, 2640 /* 15*/ MatGetInfo_SeqBAIJ, 2641 MatEqual_SeqBAIJ, 2642 MatGetDiagonal_SeqBAIJ, 2643 MatDiagonalScale_SeqBAIJ, 2644 MatNorm_SeqBAIJ, 2645 /* 20*/ 0, 2646 MatAssemblyEnd_SeqBAIJ, 2647 MatSetOption_SeqBAIJ, 2648 MatZeroEntries_SeqBAIJ, 2649 /* 24*/ MatZeroRows_SeqBAIJ, 2650 0, 2651 0, 2652 0, 2653 0, 2654 /* 29*/ MatSetUp_SeqBAIJ, 2655 0, 2656 0, 2657 0, 2658 0, 2659 /* 34*/ MatDuplicate_SeqBAIJ, 2660 0, 2661 0, 2662 MatILUFactor_SeqBAIJ, 2663 0, 2664 /* 39*/ MatAXPY_SeqBAIJ, 2665 MatGetSubMatrices_SeqBAIJ, 2666 MatIncreaseOverlap_SeqBAIJ, 2667 MatGetValues_SeqBAIJ, 2668 MatCopy_SeqBAIJ, 2669 /* 44*/ 0, 2670 MatScale_SeqBAIJ, 2671 MatShift_SeqBAIJ, 2672 0, 2673 MatZeroRowsColumns_SeqBAIJ, 2674 /* 49*/ 0, 2675 MatGetRowIJ_SeqBAIJ, 2676 MatRestoreRowIJ_SeqBAIJ, 2677 MatGetColumnIJ_SeqBAIJ, 2678 MatRestoreColumnIJ_SeqBAIJ, 2679 /* 54*/ MatFDColoringCreate_SeqXAIJ, 2680 0, 2681 0, 2682 0, 2683 MatSetValuesBlocked_SeqBAIJ, 2684 /* 59*/ MatGetSubMatrix_SeqBAIJ, 2685 MatDestroy_SeqBAIJ, 2686 MatView_SeqBAIJ, 2687 0, 2688 0, 2689 /* 64*/ 0, 2690 0, 2691 0, 2692 0, 2693 0, 2694 /* 69*/ MatGetRowMaxAbs_SeqBAIJ, 2695 0, 2696 MatConvert_Basic, 2697 0, 2698 0, 2699 /* 74*/ 0, 2700 MatFDColoringApply_BAIJ, 2701 0, 2702 0, 2703 0, 2704 /* 79*/ 0, 2705 0, 2706 0, 2707 0, 2708 MatLoad_SeqBAIJ, 2709 /* 84*/ 0, 2710 0, 2711 0, 2712 0, 2713 0, 2714 /* 89*/ 0, 2715 0, 2716 0, 2717 0, 2718 0, 2719 /* 94*/ 0, 2720 0, 2721 0, 2722 0, 2723 0, 2724 /* 99*/ 0, 2725 0, 2726 0, 2727 0, 2728 0, 2729 /*104*/ 0, 2730 MatRealPart_SeqBAIJ, 2731 MatImaginaryPart_SeqBAIJ, 2732 0, 2733 0, 2734 /*109*/ 0, 2735 0, 2736 0, 2737 0, 2738 MatMissingDiagonal_SeqBAIJ, 2739 /*114*/ 0, 2740 0, 2741 0, 2742 0, 2743 0, 2744 /*119*/ 0, 2745 0, 2746 MatMultHermitianTranspose_SeqBAIJ, 2747 MatMultHermitianTransposeAdd_SeqBAIJ, 2748 0, 2749 /*124*/ 0, 2750 0, 2751 MatInvertBlockDiagonal_SeqBAIJ, 2752 0, 2753 0, 2754 /*129*/ 0, 2755 0, 2756 0, 2757 0, 2758 0, 2759 /*134*/ 0, 2760 0, 2761 0, 2762 0, 2763 0, 2764 /*139*/ 0, 2765 0, 2766 0, 2767 MatFDColoringSetUp_SeqXAIJ, 2768 0, 2769 /*144*/MatCreateMPIMatConcatenateSeqMat_SeqBAIJ 2770 }; 2771 2772 #undef __FUNCT__ 2773 #define __FUNCT__ "MatStoreValues_SeqBAIJ" 2774 PetscErrorCode MatStoreValues_SeqBAIJ(Mat mat) 2775 { 2776 Mat_SeqBAIJ *aij = (Mat_SeqBAIJ*)mat->data; 2777 PetscInt nz = aij->i[aij->mbs]*aij->bs2; 2778 PetscErrorCode ierr; 2779 2780 PetscFunctionBegin; 2781 if (aij->nonew != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first"); 2782 2783 /* allocate space for values if not already there */ 2784 if (!aij->saved_values) { 2785 ierr = PetscMalloc1(nz+1,&aij->saved_values);CHKERRQ(ierr); 2786 ierr = PetscLogObjectMemory((PetscObject)mat,(nz+1)*sizeof(PetscScalar));CHKERRQ(ierr); 2787 } 2788 2789 /* copy values over */ 2790 ierr = PetscMemcpy(aij->saved_values,aij->a,nz*sizeof(PetscScalar));CHKERRQ(ierr); 2791 PetscFunctionReturn(0); 2792 } 2793 2794 #undef __FUNCT__ 2795 #define __FUNCT__ "MatRetrieveValues_SeqBAIJ" 2796 PetscErrorCode MatRetrieveValues_SeqBAIJ(Mat mat) 2797 { 2798 Mat_SeqBAIJ *aij = (Mat_SeqBAIJ*)mat->data; 2799 PetscErrorCode ierr; 2800 PetscInt nz = aij->i[aij->mbs]*aij->bs2; 2801 2802 PetscFunctionBegin; 2803 if (aij->nonew != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first"); 2804 if (!aij->saved_values) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatStoreValues(A);first"); 2805 2806 /* copy values over */ 2807 ierr = PetscMemcpy(aij->a,aij->saved_values,nz*sizeof(PetscScalar));CHKERRQ(ierr); 2808 PetscFunctionReturn(0); 2809 } 2810 2811 PETSC_EXTERN PetscErrorCode MatConvert_SeqBAIJ_SeqAIJ(Mat, MatType,MatReuse,Mat*); 2812 PETSC_EXTERN PetscErrorCode MatConvert_SeqBAIJ_SeqSBAIJ(Mat, MatType,MatReuse,Mat*); 2813 2814 #undef __FUNCT__ 2815 #define __FUNCT__ "MatSeqBAIJSetPreallocation_SeqBAIJ" 2816 PetscErrorCode MatSeqBAIJSetPreallocation_SeqBAIJ(Mat B,PetscInt bs,PetscInt nz,PetscInt *nnz) 2817 { 2818 Mat_SeqBAIJ *b; 2819 PetscErrorCode ierr; 2820 PetscInt i,mbs,nbs,bs2; 2821 PetscBool flg = PETSC_FALSE,skipallocation = PETSC_FALSE,realalloc = PETSC_FALSE; 2822 2823 PetscFunctionBegin; 2824 if (nz >= 0 || nnz) realalloc = PETSC_TRUE; 2825 if (nz == MAT_SKIP_ALLOCATION) { 2826 skipallocation = PETSC_TRUE; 2827 nz = 0; 2828 } 2829 2830 ierr = MatSetBlockSize(B,PetscAbs(bs));CHKERRQ(ierr); 2831 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 2832 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 2833 ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr); 2834 2835 B->preallocated = PETSC_TRUE; 2836 2837 mbs = B->rmap->n/bs; 2838 nbs = B->cmap->n/bs; 2839 bs2 = bs*bs; 2840 2841 if (mbs*bs!=B->rmap->n || nbs*bs!=B->cmap->n) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Number rows %D, cols %D must be divisible by blocksize %D",B->rmap->N,B->cmap->n,bs); 2842 2843 if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5; 2844 if (nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nz cannot be less than 0: value %D",nz); 2845 if (nnz) { 2846 for (i=0; i<mbs; i++) { 2847 if (nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be less than 0: local row %D value %D",i,nnz[i]); 2848 if (nnz[i] > nbs) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be greater than block row length: local row %D value %D rowlength %D",i,nnz[i],nbs); 2849 } 2850 } 2851 2852 b = (Mat_SeqBAIJ*)B->data; 2853 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)B),NULL,"Optimize options for SEQBAIJ matrix 2 ","Mat");CHKERRQ(ierr); 2854 ierr = PetscOptionsBool("-mat_no_unroll","Do not optimize for block size (slow)",NULL,flg,&flg,NULL);CHKERRQ(ierr); 2855 ierr = PetscOptionsEnd();CHKERRQ(ierr); 2856 2857 if (!flg) { 2858 switch (bs) { 2859 case 1: 2860 B->ops->mult = MatMult_SeqBAIJ_1; 2861 B->ops->multadd = MatMultAdd_SeqBAIJ_1; 2862 break; 2863 case 2: 2864 B->ops->mult = MatMult_SeqBAIJ_2; 2865 B->ops->multadd = MatMultAdd_SeqBAIJ_2; 2866 break; 2867 case 3: 2868 B->ops->mult = MatMult_SeqBAIJ_3; 2869 B->ops->multadd = MatMultAdd_SeqBAIJ_3; 2870 break; 2871 case 4: 2872 B->ops->mult = MatMult_SeqBAIJ_4; 2873 B->ops->multadd = MatMultAdd_SeqBAIJ_4; 2874 break; 2875 case 5: 2876 B->ops->mult = MatMult_SeqBAIJ_5; 2877 B->ops->multadd = MatMultAdd_SeqBAIJ_5; 2878 break; 2879 case 6: 2880 B->ops->mult = MatMult_SeqBAIJ_6; 2881 B->ops->multadd = MatMultAdd_SeqBAIJ_6; 2882 break; 2883 case 7: 2884 B->ops->mult = MatMult_SeqBAIJ_7; 2885 B->ops->multadd = MatMultAdd_SeqBAIJ_7; 2886 break; 2887 case 15: 2888 B->ops->mult = MatMult_SeqBAIJ_15_ver1; 2889 B->ops->multadd = MatMultAdd_SeqBAIJ_N; 2890 break; 2891 default: 2892 B->ops->mult = MatMult_SeqBAIJ_N; 2893 B->ops->multadd = MatMultAdd_SeqBAIJ_N; 2894 break; 2895 } 2896 } 2897 B->ops->sor = MatSOR_SeqBAIJ; 2898 b->mbs = mbs; 2899 b->nbs = nbs; 2900 if (!skipallocation) { 2901 if (!b->imax) { 2902 ierr = PetscMalloc2(mbs,&b->imax,mbs,&b->ilen);CHKERRQ(ierr); 2903 ierr = PetscLogObjectMemory((PetscObject)B,2*mbs*sizeof(PetscInt));CHKERRQ(ierr); 2904 2905 b->free_imax_ilen = PETSC_TRUE; 2906 } 2907 /* b->ilen will count nonzeros in each block row so far. */ 2908 for (i=0; i<mbs; i++) b->ilen[i] = 0; 2909 if (!nnz) { 2910 if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5; 2911 else if (nz < 0) nz = 1; 2912 for (i=0; i<mbs; i++) b->imax[i] = nz; 2913 nz = nz*mbs; 2914 } else { 2915 nz = 0; 2916 for (i=0; i<mbs; i++) {b->imax[i] = nnz[i]; nz += nnz[i];} 2917 } 2918 2919 /* allocate the matrix space */ 2920 ierr = MatSeqXAIJFreeAIJ(B,&b->a,&b->j,&b->i);CHKERRQ(ierr); 2921 ierr = PetscMalloc3(bs2*nz,&b->a,nz,&b->j,B->rmap->N+1,&b->i);CHKERRQ(ierr); 2922 ierr = PetscLogObjectMemory((PetscObject)B,(B->rmap->N+1)*sizeof(PetscInt)+nz*(bs2*sizeof(PetscScalar)+sizeof(PetscInt)));CHKERRQ(ierr); 2923 ierr = PetscMemzero(b->a,nz*bs2*sizeof(MatScalar));CHKERRQ(ierr); 2924 ierr = PetscMemzero(b->j,nz*sizeof(PetscInt));CHKERRQ(ierr); 2925 2926 b->singlemalloc = PETSC_TRUE; 2927 b->i[0] = 0; 2928 for (i=1; i<mbs+1; i++) { 2929 b->i[i] = b->i[i-1] + b->imax[i-1]; 2930 } 2931 b->free_a = PETSC_TRUE; 2932 b->free_ij = PETSC_TRUE; 2933 } else { 2934 b->free_a = PETSC_FALSE; 2935 b->free_ij = PETSC_FALSE; 2936 } 2937 2938 b->bs2 = bs2; 2939 b->mbs = mbs; 2940 b->nz = 0; 2941 b->maxnz = nz; 2942 B->info.nz_unneeded = (PetscReal)b->maxnz*bs2; 2943 if (realalloc) {ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);} 2944 PetscFunctionReturn(0); 2945 } 2946 2947 #undef __FUNCT__ 2948 #define __FUNCT__ "MatSeqBAIJSetPreallocationCSR_SeqBAIJ" 2949 PetscErrorCode MatSeqBAIJSetPreallocationCSR_SeqBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[]) 2950 { 2951 PetscInt i,m,nz,nz_max=0,*nnz; 2952 PetscScalar *values=0; 2953 PetscBool roworiented = ((Mat_SeqBAIJ*)B->data)->roworiented; 2954 PetscErrorCode ierr; 2955 2956 PetscFunctionBegin; 2957 if (bs < 1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive but it is %D",bs); 2958 ierr = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr); 2959 ierr = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr); 2960 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 2961 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 2962 ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr); 2963 m = B->rmap->n/bs; 2964 2965 if (ii[0] != 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE, "ii[0] must be 0 but it is %D",ii[0]); 2966 ierr = PetscMalloc1(m+1, &nnz);CHKERRQ(ierr); 2967 for (i=0; i<m; i++) { 2968 nz = ii[i+1]- ii[i]; 2969 if (nz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE, "Local row %D has a negative number of columns %D",i,nz); 2970 nz_max = PetscMax(nz_max, nz); 2971 nnz[i] = nz; 2972 } 2973 ierr = MatSeqBAIJSetPreallocation(B,bs,0,nnz);CHKERRQ(ierr); 2974 ierr = PetscFree(nnz);CHKERRQ(ierr); 2975 2976 values = (PetscScalar*)V; 2977 if (!values) { 2978 ierr = PetscCalloc1(bs*bs*(nz_max+1),&values);CHKERRQ(ierr); 2979 } 2980 for (i=0; i<m; i++) { 2981 PetscInt ncols = ii[i+1] - ii[i]; 2982 const PetscInt *icols = jj + ii[i]; 2983 const PetscScalar *svals = values + (V ? (bs*bs*ii[i]) : 0); 2984 if (!roworiented) { 2985 ierr = MatSetValuesBlocked_SeqBAIJ(B,1,&i,ncols,icols,svals,INSERT_VALUES);CHKERRQ(ierr); 2986 } else { 2987 PetscInt j; 2988 for (j=0; j<ncols; j++) { 2989 const PetscScalar *svals = values + (V ? (bs*bs*(ii[i]+j)) : 0); 2990 ierr = MatSetValuesBlocked_SeqBAIJ(B,1,&i,1,&icols[j],svals,INSERT_VALUES);CHKERRQ(ierr); 2991 } 2992 } 2993 } 2994 if (!V) { ierr = PetscFree(values);CHKERRQ(ierr); } 2995 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2996 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2997 ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 2998 PetscFunctionReturn(0); 2999 } 3000 3001 /*MC 3002 MATSEQBAIJ - MATSEQBAIJ = "seqbaij" - A matrix type to be used for sequential block sparse matrices, based on 3003 block sparse compressed row format. 3004 3005 Options Database Keys: 3006 . -mat_type seqbaij - sets the matrix type to "seqbaij" during a call to MatSetFromOptions() 3007 3008 Level: beginner 3009 3010 .seealso: MatCreateSeqBAIJ() 3011 M*/ 3012 3013 PETSC_EXTERN PetscErrorCode MatConvert_SeqBAIJ_SeqBSTRM(Mat, MatType,MatReuse,Mat*); 3014 3015 #undef __FUNCT__ 3016 #define __FUNCT__ "MatCreate_SeqBAIJ" 3017 PETSC_EXTERN PetscErrorCode MatCreate_SeqBAIJ(Mat B) 3018 { 3019 PetscErrorCode ierr; 3020 PetscMPIInt size; 3021 Mat_SeqBAIJ *b; 3022 3023 PetscFunctionBegin; 3024 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRQ(ierr); 3025 if (size > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Comm must be of size 1"); 3026 3027 ierr = PetscNewLog(B,&b);CHKERRQ(ierr); 3028 B->data = (void*)b; 3029 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 3030 3031 b->row = 0; 3032 b->col = 0; 3033 b->icol = 0; 3034 b->reallocs = 0; 3035 b->saved_values = 0; 3036 3037 b->roworiented = PETSC_TRUE; 3038 b->nonew = 0; 3039 b->diag = 0; 3040 B->spptr = 0; 3041 B->info.nz_unneeded = (PetscReal)b->maxnz*b->bs2; 3042 b->keepnonzeropattern = PETSC_FALSE; 3043 3044 ierr = PetscObjectComposeFunction((PetscObject)B,"MatInvertBlockDiagonal_C",MatInvertBlockDiagonal_SeqBAIJ);CHKERRQ(ierr); 3045 ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_SeqBAIJ);CHKERRQ(ierr); 3046 ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_SeqBAIJ);CHKERRQ(ierr); 3047 ierr = PetscObjectComposeFunction((PetscObject)B,"MatSeqBAIJSetColumnIndices_C",MatSeqBAIJSetColumnIndices_SeqBAIJ);CHKERRQ(ierr); 3048 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqbaij_seqaij_C",MatConvert_SeqBAIJ_SeqAIJ);CHKERRQ(ierr); 3049 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqbaij_seqsbaij_C",MatConvert_SeqBAIJ_SeqSBAIJ);CHKERRQ(ierr); 3050 ierr = PetscObjectComposeFunction((PetscObject)B,"MatSeqBAIJSetPreallocation_C",MatSeqBAIJSetPreallocation_SeqBAIJ);CHKERRQ(ierr); 3051 ierr = PetscObjectComposeFunction((PetscObject)B,"MatSeqBAIJSetPreallocationCSR_C",MatSeqBAIJSetPreallocationCSR_SeqBAIJ);CHKERRQ(ierr); 3052 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqbaij_seqbstrm_C",MatConvert_SeqBAIJ_SeqBSTRM);CHKERRQ(ierr); 3053 ierr = PetscObjectComposeFunction((PetscObject)B,"MatIsTranspose_C",MatIsTranspose_SeqBAIJ);CHKERRQ(ierr); 3054 ierr = PetscObjectChangeTypeName((PetscObject)B,MATSEQBAIJ);CHKERRQ(ierr); 3055 PetscFunctionReturn(0); 3056 } 3057 3058 #undef __FUNCT__ 3059 #define __FUNCT__ "MatDuplicateNoCreate_SeqBAIJ" 3060 PetscErrorCode MatDuplicateNoCreate_SeqBAIJ(Mat C,Mat A,MatDuplicateOption cpvalues,PetscBool mallocmatspace) 3061 { 3062 Mat_SeqBAIJ *c = (Mat_SeqBAIJ*)C->data,*a = (Mat_SeqBAIJ*)A->data; 3063 PetscErrorCode ierr; 3064 PetscInt i,mbs = a->mbs,nz = a->nz,bs2 = a->bs2; 3065 3066 PetscFunctionBegin; 3067 if (a->i[mbs] != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Corrupt matrix"); 3068 3069 if (cpvalues == MAT_SHARE_NONZERO_PATTERN) { 3070 c->imax = a->imax; 3071 c->ilen = a->ilen; 3072 c->free_imax_ilen = PETSC_FALSE; 3073 } else { 3074 ierr = PetscMalloc2(mbs,&c->imax,mbs,&c->ilen);CHKERRQ(ierr); 3075 ierr = PetscLogObjectMemory((PetscObject)C,2*mbs*sizeof(PetscInt));CHKERRQ(ierr); 3076 for (i=0; i<mbs; i++) { 3077 c->imax[i] = a->imax[i]; 3078 c->ilen[i] = a->ilen[i]; 3079 } 3080 c->free_imax_ilen = PETSC_TRUE; 3081 } 3082 3083 /* allocate the matrix space */ 3084 if (mallocmatspace) { 3085 if (cpvalues == MAT_SHARE_NONZERO_PATTERN) { 3086 ierr = PetscCalloc1(bs2*nz,&c->a);CHKERRQ(ierr); 3087 ierr = PetscLogObjectMemory((PetscObject)C,a->i[mbs]*bs2*sizeof(PetscScalar));CHKERRQ(ierr); 3088 3089 c->i = a->i; 3090 c->j = a->j; 3091 c->singlemalloc = PETSC_FALSE; 3092 c->free_a = PETSC_TRUE; 3093 c->free_ij = PETSC_FALSE; 3094 c->parent = A; 3095 C->preallocated = PETSC_TRUE; 3096 C->assembled = PETSC_TRUE; 3097 3098 ierr = PetscObjectReference((PetscObject)A);CHKERRQ(ierr); 3099 ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 3100 ierr = MatSetOption(C,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 3101 } else { 3102 ierr = PetscMalloc3(bs2*nz,&c->a,nz,&c->j,mbs+1,&c->i);CHKERRQ(ierr); 3103 ierr = PetscLogObjectMemory((PetscObject)C,a->i[mbs]*(bs2*sizeof(PetscScalar)+sizeof(PetscInt))+(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 3104 3105 c->singlemalloc = PETSC_TRUE; 3106 c->free_a = PETSC_TRUE; 3107 c->free_ij = PETSC_TRUE; 3108 3109 ierr = PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 3110 if (mbs > 0) { 3111 ierr = PetscMemcpy(c->j,a->j,nz*sizeof(PetscInt));CHKERRQ(ierr); 3112 if (cpvalues == MAT_COPY_VALUES) { 3113 ierr = PetscMemcpy(c->a,a->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr); 3114 } else { 3115 ierr = PetscMemzero(c->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr); 3116 } 3117 } 3118 C->preallocated = PETSC_TRUE; 3119 C->assembled = PETSC_TRUE; 3120 } 3121 } 3122 3123 c->roworiented = a->roworiented; 3124 c->nonew = a->nonew; 3125 3126 ierr = PetscLayoutReference(A->rmap,&C->rmap);CHKERRQ(ierr); 3127 ierr = PetscLayoutReference(A->cmap,&C->cmap);CHKERRQ(ierr); 3128 3129 c->bs2 = a->bs2; 3130 c->mbs = a->mbs; 3131 c->nbs = a->nbs; 3132 3133 if (a->diag) { 3134 if (cpvalues == MAT_SHARE_NONZERO_PATTERN) { 3135 c->diag = a->diag; 3136 c->free_diag = PETSC_FALSE; 3137 } else { 3138 ierr = PetscMalloc1(mbs+1,&c->diag);CHKERRQ(ierr); 3139 ierr = PetscLogObjectMemory((PetscObject)C,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 3140 for (i=0; i<mbs; i++) c->diag[i] = a->diag[i]; 3141 c->free_diag = PETSC_TRUE; 3142 } 3143 } else c->diag = 0; 3144 3145 c->nz = a->nz; 3146 c->maxnz = a->nz; /* Since we allocate exactly the right amount */ 3147 c->solve_work = NULL; 3148 c->mult_work = NULL; 3149 c->sor_workt = NULL; 3150 c->sor_work = NULL; 3151 3152 c->compressedrow.use = a->compressedrow.use; 3153 c->compressedrow.nrows = a->compressedrow.nrows; 3154 if (a->compressedrow.use) { 3155 i = a->compressedrow.nrows; 3156 ierr = PetscMalloc2(i+1,&c->compressedrow.i,i+1,&c->compressedrow.rindex);CHKERRQ(ierr); 3157 ierr = PetscLogObjectMemory((PetscObject)C,(2*i+1)*sizeof(PetscInt));CHKERRQ(ierr); 3158 ierr = PetscMemcpy(c->compressedrow.i,a->compressedrow.i,(i+1)*sizeof(PetscInt));CHKERRQ(ierr); 3159 ierr = PetscMemcpy(c->compressedrow.rindex,a->compressedrow.rindex,i*sizeof(PetscInt));CHKERRQ(ierr); 3160 } else { 3161 c->compressedrow.use = PETSC_FALSE; 3162 c->compressedrow.i = NULL; 3163 c->compressedrow.rindex = NULL; 3164 } 3165 C->nonzerostate = A->nonzerostate; 3166 3167 ierr = PetscFunctionListDuplicate(((PetscObject)A)->qlist,&((PetscObject)C)->qlist);CHKERRQ(ierr); 3168 ierr = PetscMemcpy(C->ops,A->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 3169 PetscFunctionReturn(0); 3170 } 3171 3172 #undef __FUNCT__ 3173 #define __FUNCT__ "MatDuplicate_SeqBAIJ" 3174 PetscErrorCode MatDuplicate_SeqBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B) 3175 { 3176 PetscErrorCode ierr; 3177 3178 PetscFunctionBegin; 3179 ierr = MatCreate(PetscObjectComm((PetscObject)A),B);CHKERRQ(ierr); 3180 ierr = MatSetSizes(*B,A->rmap->N,A->cmap->n,A->rmap->N,A->cmap->n);CHKERRQ(ierr); 3181 ierr = MatSetType(*B,MATSEQBAIJ);CHKERRQ(ierr); 3182 ierr = MatDuplicateNoCreate_SeqBAIJ(*B,A,cpvalues,PETSC_TRUE);CHKERRQ(ierr); 3183 PetscFunctionReturn(0); 3184 } 3185 3186 #undef __FUNCT__ 3187 #define __FUNCT__ "MatLoad_SeqBAIJ" 3188 PetscErrorCode MatLoad_SeqBAIJ(Mat newmat,PetscViewer viewer) 3189 { 3190 Mat_SeqBAIJ *a; 3191 PetscErrorCode ierr; 3192 PetscInt i,nz,header[4],*rowlengths=0,M,N,bs = newmat->rmap->bs; 3193 PetscInt *mask,mbs,*jj,j,rowcount,nzcount,k,*browlengths,maskcount; 3194 PetscInt kmax,jcount,block,idx,point,nzcountb,extra_rows,rows,cols; 3195 PetscInt *masked,nmask,tmp,bs2,ishift; 3196 PetscMPIInt size; 3197 int fd; 3198 PetscScalar *aa; 3199 MPI_Comm comm; 3200 3201 PetscFunctionBegin; 3202 /* force binary viewer to load .info file if it has not yet done so */ 3203 ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr); 3204 ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr); 3205 ierr = PetscOptionsBegin(comm,NULL,"Options for loading SEQBAIJ matrix","Mat");CHKERRQ(ierr); 3206 ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,NULL);CHKERRQ(ierr); 3207 ierr = PetscOptionsEnd();CHKERRQ(ierr); 3208 if (bs < 0) bs = 1; 3209 bs2 = bs*bs; 3210 3211 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3212 if (size > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"view must have one processor"); 3213 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 3214 ierr = PetscBinaryRead(fd,header,4,PETSC_INT);CHKERRQ(ierr); 3215 if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not Mat object"); 3216 M = header[1]; N = header[2]; nz = header[3]; 3217 3218 if (header[3] < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as SeqBAIJ"); 3219 if (M != N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Can only do square matrices"); 3220 3221 /* 3222 This code adds extra rows to make sure the number of rows is 3223 divisible by the blocksize 3224 */ 3225 mbs = M/bs; 3226 extra_rows = bs - M + bs*(mbs); 3227 if (extra_rows == bs) extra_rows = 0; 3228 else mbs++; 3229 if (extra_rows) { 3230 ierr = PetscInfo(viewer,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr); 3231 } 3232 3233 /* Set global sizes if not already set */ 3234 if (newmat->rmap->n < 0 && newmat->rmap->N < 0 && newmat->cmap->n < 0 && newmat->cmap->N < 0) { 3235 ierr = MatSetSizes(newmat,PETSC_DECIDE,PETSC_DECIDE,M+extra_rows,N+extra_rows);CHKERRQ(ierr); 3236 } else { /* Check if the matrix global sizes are correct */ 3237 ierr = MatGetSize(newmat,&rows,&cols);CHKERRQ(ierr); 3238 if (rows < 0 && cols < 0) { /* user might provide local size instead of global size */ 3239 ierr = MatGetLocalSize(newmat,&rows,&cols);CHKERRQ(ierr); 3240 } 3241 if (M != rows || N != cols) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix in file of different length (%d, %d) than the input matrix (%d, %d)",M,N,rows,cols); 3242 } 3243 3244 /* read in row lengths */ 3245 ierr = PetscMalloc1(M+extra_rows,&rowlengths);CHKERRQ(ierr); 3246 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 3247 for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1; 3248 3249 /* read in column indices */ 3250 ierr = PetscMalloc1(nz+extra_rows,&jj);CHKERRQ(ierr); 3251 ierr = PetscBinaryRead(fd,jj,nz,PETSC_INT);CHKERRQ(ierr); 3252 for (i=0; i<extra_rows; i++) jj[nz+i] = M+i; 3253 3254 /* loop over row lengths determining block row lengths */ 3255 ierr = PetscCalloc1(mbs,&browlengths);CHKERRQ(ierr); 3256 ierr = PetscMalloc2(mbs,&mask,mbs,&masked);CHKERRQ(ierr); 3257 ierr = PetscMemzero(mask,mbs*sizeof(PetscInt));CHKERRQ(ierr); 3258 rowcount = 0; 3259 nzcount = 0; 3260 for (i=0; i<mbs; i++) { 3261 nmask = 0; 3262 for (j=0; j<bs; j++) { 3263 kmax = rowlengths[rowcount]; 3264 for (k=0; k<kmax; k++) { 3265 tmp = jj[nzcount++]/bs; 3266 if (!mask[tmp]) {masked[nmask++] = tmp; mask[tmp] = 1;} 3267 } 3268 rowcount++; 3269 } 3270 browlengths[i] += nmask; 3271 /* zero out the mask elements we set */ 3272 for (j=0; j<nmask; j++) mask[masked[j]] = 0; 3273 } 3274 3275 /* Do preallocation */ 3276 ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(newmat,bs,0,browlengths);CHKERRQ(ierr); 3277 a = (Mat_SeqBAIJ*)newmat->data; 3278 3279 /* set matrix "i" values */ 3280 a->i[0] = 0; 3281 for (i=1; i<= mbs; i++) { 3282 a->i[i] = a->i[i-1] + browlengths[i-1]; 3283 a->ilen[i-1] = browlengths[i-1]; 3284 } 3285 a->nz = 0; 3286 for (i=0; i<mbs; i++) a->nz += browlengths[i]; 3287 3288 /* read in nonzero values */ 3289 ierr = PetscMalloc1(nz+extra_rows,&aa);CHKERRQ(ierr); 3290 ierr = PetscBinaryRead(fd,aa,nz,PETSC_SCALAR);CHKERRQ(ierr); 3291 for (i=0; i<extra_rows; i++) aa[nz+i] = 1.0; 3292 3293 /* set "a" and "j" values into matrix */ 3294 nzcount = 0; jcount = 0; 3295 for (i=0; i<mbs; i++) { 3296 nzcountb = nzcount; 3297 nmask = 0; 3298 for (j=0; j<bs; j++) { 3299 kmax = rowlengths[i*bs+j]; 3300 for (k=0; k<kmax; k++) { 3301 tmp = jj[nzcount++]/bs; 3302 if (!mask[tmp]) { masked[nmask++] = tmp; mask[tmp] = 1;} 3303 } 3304 } 3305 /* sort the masked values */ 3306 ierr = PetscSortInt(nmask,masked);CHKERRQ(ierr); 3307 3308 /* set "j" values into matrix */ 3309 maskcount = 1; 3310 for (j=0; j<nmask; j++) { 3311 a->j[jcount++] = masked[j]; 3312 mask[masked[j]] = maskcount++; 3313 } 3314 /* set "a" values into matrix */ 3315 ishift = bs2*a->i[i]; 3316 for (j=0; j<bs; j++) { 3317 kmax = rowlengths[i*bs+j]; 3318 for (k=0; k<kmax; k++) { 3319 tmp = jj[nzcountb]/bs; 3320 block = mask[tmp] - 1; 3321 point = jj[nzcountb] - bs*tmp; 3322 idx = ishift + bs2*block + j + bs*point; 3323 a->a[idx] = (MatScalar)aa[nzcountb++]; 3324 } 3325 } 3326 /* zero out the mask elements we set */ 3327 for (j=0; j<nmask; j++) mask[masked[j]] = 0; 3328 } 3329 if (jcount != a->nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Bad binary matrix"); 3330 3331 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 3332 ierr = PetscFree(browlengths);CHKERRQ(ierr); 3333 ierr = PetscFree(aa);CHKERRQ(ierr); 3334 ierr = PetscFree(jj);CHKERRQ(ierr); 3335 ierr = PetscFree2(mask,masked);CHKERRQ(ierr); 3336 3337 ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3338 ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3339 PetscFunctionReturn(0); 3340 } 3341 3342 #undef __FUNCT__ 3343 #define __FUNCT__ "MatCreateSeqBAIJ" 3344 /*@C 3345 MatCreateSeqBAIJ - Creates a sparse matrix in block AIJ (block 3346 compressed row) format. For good matrix assembly performance the 3347 user should preallocate the matrix storage by setting the parameter nz 3348 (or the array nnz). By setting these parameters accurately, performance 3349 during matrix assembly can be increased by more than a factor of 50. 3350 3351 Collective on MPI_Comm 3352 3353 Input Parameters: 3354 + comm - MPI communicator, set to PETSC_COMM_SELF 3355 . bs - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row 3356 blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs() 3357 . m - number of rows 3358 . n - number of columns 3359 . nz - number of nonzero blocks per block row (same for all rows) 3360 - nnz - array containing the number of nonzero blocks in the various block rows 3361 (possibly different for each block row) or NULL 3362 3363 Output Parameter: 3364 . A - the matrix 3365 3366 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 3367 MatXXXXSetPreallocation() paradgm instead of this routine directly. 3368 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 3369 3370 Options Database Keys: 3371 . -mat_no_unroll - uses code that does not unroll the loops in the 3372 block calculations (much slower) 3373 . -mat_block_size - size of the blocks to use 3374 3375 Level: intermediate 3376 3377 Notes: 3378 The number of rows and columns must be divisible by blocksize. 3379 3380 If the nnz parameter is given then the nz parameter is ignored 3381 3382 A nonzero block is any block that as 1 or more nonzeros in it 3383 3384 The block AIJ format is fully compatible with standard Fortran 77 3385 storage. That is, the stored row and column indices can begin at 3386 either one (as in Fortran) or zero. See the users' manual for details. 3387 3388 Specify the preallocated storage with either nz or nnz (not both). 3389 Set nz=PETSC_DEFAULT and nnz=NULL for PETSc to control dynamic memory 3390 allocation. See Users-Manual: ch_mat for details. 3391 matrices. 3392 3393 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateBAIJ() 3394 @*/ 3395 PetscErrorCode MatCreateSeqBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A) 3396 { 3397 PetscErrorCode ierr; 3398 3399 PetscFunctionBegin; 3400 ierr = MatCreate(comm,A);CHKERRQ(ierr); 3401 ierr = MatSetSizes(*A,m,n,m,n);CHKERRQ(ierr); 3402 ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr); 3403 ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(*A,bs,nz,(PetscInt*)nnz);CHKERRQ(ierr); 3404 PetscFunctionReturn(0); 3405 } 3406 3407 #undef __FUNCT__ 3408 #define __FUNCT__ "MatSeqBAIJSetPreallocation" 3409 /*@C 3410 MatSeqBAIJSetPreallocation - Sets the block size and expected nonzeros 3411 per row in the matrix. For good matrix assembly performance the 3412 user should preallocate the matrix storage by setting the parameter nz 3413 (or the array nnz). By setting these parameters accurately, performance 3414 during matrix assembly can be increased by more than a factor of 50. 3415 3416 Collective on MPI_Comm 3417 3418 Input Parameters: 3419 + B - the matrix 3420 . bs - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row 3421 blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs() 3422 . nz - number of block nonzeros per block row (same for all rows) 3423 - nnz - array containing the number of block nonzeros in the various block rows 3424 (possibly different for each block row) or NULL 3425 3426 Options Database Keys: 3427 . -mat_no_unroll - uses code that does not unroll the loops in the 3428 block calculations (much slower) 3429 . -mat_block_size - size of the blocks to use 3430 3431 Level: intermediate 3432 3433 Notes: 3434 If the nnz parameter is given then the nz parameter is ignored 3435 3436 You can call MatGetInfo() to get information on how effective the preallocation was; 3437 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 3438 You can also run with the option -info and look for messages with the string 3439 malloc in them to see if additional memory allocation was needed. 3440 3441 The block AIJ format is fully compatible with standard Fortran 77 3442 storage. That is, the stored row and column indices can begin at 3443 either one (as in Fortran) or zero. See the users' manual for details. 3444 3445 Specify the preallocated storage with either nz or nnz (not both). 3446 Set nz=PETSC_DEFAULT and nnz=NULL for PETSc to control dynamic memory 3447 allocation. See Users-Manual: ch_mat for details. 3448 3449 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateBAIJ(), MatGetInfo() 3450 @*/ 3451 PetscErrorCode MatSeqBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt nz,const PetscInt nnz[]) 3452 { 3453 PetscErrorCode ierr; 3454 3455 PetscFunctionBegin; 3456 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 3457 PetscValidType(B,1); 3458 PetscValidLogicalCollectiveInt(B,bs,2); 3459 ierr = PetscTryMethod(B,"MatSeqBAIJSetPreallocation_C",(Mat,PetscInt,PetscInt,const PetscInt[]),(B,bs,nz,nnz));CHKERRQ(ierr); 3460 PetscFunctionReturn(0); 3461 } 3462 3463 #undef __FUNCT__ 3464 #define __FUNCT__ "MatSeqBAIJSetPreallocationCSR" 3465 /*@C 3466 MatSeqBAIJSetPreallocationCSR - Allocates memory for a sparse sequential matrix in AIJ format 3467 (the default sequential PETSc format). 3468 3469 Collective on MPI_Comm 3470 3471 Input Parameters: 3472 + B - the matrix 3473 . i - the indices into j for the start of each local row (starts with zero) 3474 . j - the column indices for each local row (starts with zero) these must be sorted for each row 3475 - v - optional values in the matrix 3476 3477 Level: developer 3478 3479 Notes: 3480 The order of the entries in values is specified by the MatOption MAT_ROW_ORIENTED. For example, C programs 3481 may want to use the default MAT_ROW_ORIENTED=PETSC_TRUE and use an array v[nnz][bs][bs] where the second index is 3482 over rows within a block and the last index is over columns within a block row. Fortran programs will likely set 3483 MAT_ROW_ORIENTED=PETSC_FALSE and use a Fortran array v(bs,bs,nnz) in which the first index is over rows within a 3484 block column and the second index is over columns within a block. 3485 3486 .keywords: matrix, aij, compressed row, sparse 3487 3488 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatSeqBAIJSetPreallocation(), MATSEQBAIJ 3489 @*/ 3490 PetscErrorCode MatSeqBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 3491 { 3492 PetscErrorCode ierr; 3493 3494 PetscFunctionBegin; 3495 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 3496 PetscValidType(B,1); 3497 PetscValidLogicalCollectiveInt(B,bs,2); 3498 ierr = PetscTryMethod(B,"MatSeqBAIJSetPreallocationCSR_C",(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,bs,i,j,v));CHKERRQ(ierr); 3499 PetscFunctionReturn(0); 3500 } 3501 3502 3503 #undef __FUNCT__ 3504 #define __FUNCT__ "MatCreateSeqBAIJWithArrays" 3505 /*@ 3506 MatCreateSeqBAIJWithArrays - Creates an sequential BAIJ matrix using matrix elements provided by the user. 3507 3508 Collective on MPI_Comm 3509 3510 Input Parameters: 3511 + comm - must be an MPI communicator of size 1 3512 . bs - size of block 3513 . m - number of rows 3514 . n - number of columns 3515 . i - row indices 3516 . j - column indices 3517 - a - matrix values 3518 3519 Output Parameter: 3520 . mat - the matrix 3521 3522 Level: advanced 3523 3524 Notes: 3525 The i, j, and a arrays are not copied by this routine, the user must free these arrays 3526 once the matrix is destroyed 3527 3528 You cannot set new nonzero locations into this matrix, that will generate an error. 3529 3530 The i and j indices are 0 based 3531 3532 When block size is greater than 1 the matrix values must be stored using the BAIJ storage format (see the BAIJ code to determine this). 3533 3534 The order of the entries in values is the same as the block compressed sparse row storage format; that is, it is 3535 the same as a three dimensional array in Fortran values(bs,bs,nnz) that contains the first column of the first 3536 block, followed by the second column of the first block etc etc. That is, the blocks are contiguous in memory 3537 with column-major ordering within blocks. 3538 3539 .seealso: MatCreate(), MatCreateBAIJ(), MatCreateSeqBAIJ() 3540 3541 @*/ 3542 PetscErrorCode MatCreateSeqBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt *i,PetscInt *j,PetscScalar *a,Mat *mat) 3543 { 3544 PetscErrorCode ierr; 3545 PetscInt ii; 3546 Mat_SeqBAIJ *baij; 3547 3548 PetscFunctionBegin; 3549 if (bs != 1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"block size %D > 1 is not supported yet",bs); 3550 if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 3551 3552 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 3553 ierr = MatSetSizes(*mat,m,n,m,n);CHKERRQ(ierr); 3554 ierr = MatSetType(*mat,MATSEQBAIJ);CHKERRQ(ierr); 3555 ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(*mat,bs,MAT_SKIP_ALLOCATION,0);CHKERRQ(ierr); 3556 baij = (Mat_SeqBAIJ*)(*mat)->data; 3557 ierr = PetscMalloc2(m,&baij->imax,m,&baij->ilen);CHKERRQ(ierr); 3558 ierr = PetscLogObjectMemory((PetscObject)*mat,2*m*sizeof(PetscInt));CHKERRQ(ierr); 3559 3560 baij->i = i; 3561 baij->j = j; 3562 baij->a = a; 3563 3564 baij->singlemalloc = PETSC_FALSE; 3565 baij->nonew = -1; /*this indicates that inserting a new value in the matrix that generates a new nonzero is an error*/ 3566 baij->free_a = PETSC_FALSE; 3567 baij->free_ij = PETSC_FALSE; 3568 3569 for (ii=0; ii<m; ii++) { 3570 baij->ilen[ii] = baij->imax[ii] = i[ii+1] - i[ii]; 3571 #if defined(PETSC_USE_DEBUG) 3572 if (i[ii+1] - i[ii] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row length in i (row indices) row = %d length = %d",ii,i[ii+1] - i[ii]); 3573 #endif 3574 } 3575 #if defined(PETSC_USE_DEBUG) 3576 for (ii=0; ii<baij->i[m]; ii++) { 3577 if (j[ii] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column index at location = %d index = %d",ii,j[ii]); 3578 if (j[ii] > n - 1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column index to large at location = %d index = %d",ii,j[ii]); 3579 } 3580 #endif 3581 3582 ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3583 ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3584 PetscFunctionReturn(0); 3585 } 3586 3587 #undef __FUNCT__ 3588 #define __FUNCT__ "MatCreateMPIMatConcatenateSeqMat_SeqBAIJ" 3589 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_SeqBAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat) 3590 { 3591 PetscErrorCode ierr; 3592 3593 PetscFunctionBegin; 3594 ierr = MatCreateMPIMatConcatenateSeqMat_MPIBAIJ(comm,inmat,n,scall,outmat);CHKERRQ(ierr); 3595 PetscFunctionReturn(0); 3596 } 3597