1 2 /* 3 Defines the basic matrix operations for the BAIJ (compressed row) 4 matrix storage format. 5 */ 6 #include <../src/mat/impls/baij/seq/baij.h> /*I "petscmat.h" I*/ 7 #include <petscblaslapack.h> 8 #include <petsc/private/kernels/blockinvert.h> 9 #include <petsc/private/kernels/blockmatmult.h> 10 11 #undef __FUNCT__ 12 #define __FUNCT__ "MatInvertBlockDiagonal_SeqBAIJ" 13 PetscErrorCode MatInvertBlockDiagonal_SeqBAIJ(Mat A,const PetscScalar **values) 14 { 15 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*) A->data; 16 PetscErrorCode ierr; 17 PetscInt *diag_offset,i,bs = A->rmap->bs,mbs = a->mbs,ipvt[5],bs2 = bs*bs,*v_pivots; 18 MatScalar *v = a->a,*odiag,*diag,*mdiag,work[25],*v_work; 19 PetscReal shift = 0.0; 20 PetscBool allowzeropivot,zeropivotdetected=PETSC_FALSE; 21 22 PetscFunctionBegin; 23 allowzeropivot = PetscNot(A->erroriffailure); 24 25 if (a->idiagvalid) { 26 if (values) *values = a->idiag; 27 PetscFunctionReturn(0); 28 } 29 ierr = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr); 30 diag_offset = a->diag; 31 if (!a->idiag) { 32 ierr = PetscMalloc1(2*bs2*mbs,&a->idiag);CHKERRQ(ierr); 33 ierr = PetscLogObjectMemory((PetscObject)A,2*bs2*mbs*sizeof(PetscScalar));CHKERRQ(ierr); 34 } 35 diag = a->idiag; 36 mdiag = a->idiag+bs2*mbs; 37 if (values) *values = a->idiag; 38 /* factor and invert each block */ 39 switch (bs) { 40 case 1: 41 for (i=0; i<mbs; i++) { 42 odiag = v + 1*diag_offset[i]; 43 diag[0] = odiag[0]; 44 mdiag[0] = odiag[0]; 45 46 if (PetscAbsScalar(diag[0] + shift) < PETSC_MACHINE_EPSILON) { 47 if (allowzeropivot) { 48 A->errortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 49 ierr = PetscInfo1(A,"Zero pivot, row %D\n",i);CHKERRQ(ierr); 50 } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot, row %D",i); 51 } 52 53 diag[0] = (PetscScalar)1.0 / (diag[0] + shift); 54 diag += 1; 55 mdiag += 1; 56 } 57 break; 58 case 2: 59 for (i=0; i<mbs; i++) { 60 odiag = v + 4*diag_offset[i]; 61 diag[0] = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3]; 62 mdiag[0] = odiag[0]; mdiag[1] = odiag[1]; mdiag[2] = odiag[2]; mdiag[3] = odiag[3]; 63 ierr = PetscKernel_A_gets_inverse_A_2(diag,shift,allowzeropivot,&zeropivotdetected);CHKERRQ(ierr); 64 if (zeropivotdetected) A->errortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 65 diag += 4; 66 mdiag += 4; 67 } 68 break; 69 case 3: 70 for (i=0; i<mbs; i++) { 71 odiag = v + 9*diag_offset[i]; 72 diag[0] = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3]; 73 diag[4] = odiag[4]; diag[5] = odiag[5]; diag[6] = odiag[6]; diag[7] = odiag[7]; 74 diag[8] = odiag[8]; 75 mdiag[0] = odiag[0]; mdiag[1] = odiag[1]; mdiag[2] = odiag[2]; mdiag[3] = odiag[3]; 76 mdiag[4] = odiag[4]; mdiag[5] = odiag[5]; mdiag[6] = odiag[6]; mdiag[7] = odiag[7]; 77 mdiag[8] = odiag[8]; 78 ierr = PetscKernel_A_gets_inverse_A_3(diag,shift,allowzeropivot,&zeropivotdetected);CHKERRQ(ierr); 79 if (zeropivotdetected) A->errortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 80 diag += 9; 81 mdiag += 9; 82 } 83 break; 84 case 4: 85 for (i=0; i<mbs; i++) { 86 odiag = v + 16*diag_offset[i]; 87 ierr = PetscMemcpy(diag,odiag,16*sizeof(PetscScalar));CHKERRQ(ierr); 88 ierr = PetscMemcpy(mdiag,odiag,16*sizeof(PetscScalar));CHKERRQ(ierr); 89 ierr = PetscKernel_A_gets_inverse_A_4(diag,shift,allowzeropivot,&zeropivotdetected);CHKERRQ(ierr); 90 if (zeropivotdetected) A->errortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 91 diag += 16; 92 mdiag += 16; 93 } 94 break; 95 case 5: 96 for (i=0; i<mbs; i++) { 97 odiag = v + 25*diag_offset[i]; 98 ierr = PetscMemcpy(diag,odiag,25*sizeof(PetscScalar));CHKERRQ(ierr); 99 ierr = PetscMemcpy(mdiag,odiag,25*sizeof(PetscScalar));CHKERRQ(ierr); 100 ierr = PetscKernel_A_gets_inverse_A_5(diag,ipvt,work,shift,allowzeropivot,&zeropivotdetected);CHKERRQ(ierr); 101 if (zeropivotdetected) A->errortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 102 diag += 25; 103 mdiag += 25; 104 } 105 break; 106 case 6: 107 for (i=0; i<mbs; i++) { 108 odiag = v + 36*diag_offset[i]; 109 ierr = PetscMemcpy(diag,odiag,36*sizeof(PetscScalar));CHKERRQ(ierr); 110 ierr = PetscMemcpy(mdiag,odiag,36*sizeof(PetscScalar));CHKERRQ(ierr); 111 ierr = PetscKernel_A_gets_inverse_A_6(diag,shift,allowzeropivot,&zeropivotdetected);CHKERRQ(ierr); 112 if (zeropivotdetected) A->errortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 113 diag += 36; 114 mdiag += 36; 115 } 116 break; 117 case 7: 118 for (i=0; i<mbs; i++) { 119 odiag = v + 49*diag_offset[i]; 120 ierr = PetscMemcpy(diag,odiag,49*sizeof(PetscScalar));CHKERRQ(ierr); 121 ierr = PetscMemcpy(mdiag,odiag,49*sizeof(PetscScalar));CHKERRQ(ierr); 122 ierr = PetscKernel_A_gets_inverse_A_7(diag,shift,allowzeropivot,&zeropivotdetected);CHKERRQ(ierr); 123 if (zeropivotdetected) A->errortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 124 diag += 49; 125 mdiag += 49; 126 } 127 break; 128 default: 129 ierr = PetscMalloc2(bs,&v_work,bs,&v_pivots);CHKERRQ(ierr); 130 for (i=0; i<mbs; i++) { 131 odiag = v + bs2*diag_offset[i]; 132 ierr = PetscMemcpy(diag,odiag,bs2*sizeof(PetscScalar));CHKERRQ(ierr); 133 ierr = PetscMemcpy(mdiag,odiag,bs2*sizeof(PetscScalar));CHKERRQ(ierr); 134 ierr = PetscKernel_A_gets_inverse_A(bs,diag,v_pivots,v_work,allowzeropivot,&zeropivotdetected);CHKERRQ(ierr); 135 if (zeropivotdetected) A->errortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 136 diag += bs2; 137 mdiag += bs2; 138 } 139 ierr = PetscFree2(v_work,v_pivots);CHKERRQ(ierr); 140 } 141 a->idiagvalid = PETSC_TRUE; 142 PetscFunctionReturn(0); 143 } 144 145 #undef __FUNCT__ 146 #define __FUNCT__ "MatSOR_SeqBAIJ" 147 PetscErrorCode MatSOR_SeqBAIJ(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 148 { 149 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 150 PetscScalar *x,*work,*w,*workt,*t; 151 const MatScalar *v,*aa = a->a, *idiag; 152 const PetscScalar *b,*xb; 153 PetscScalar s[7], xw[7]={0}; /* avoid some compilers thinking xw is uninitialized */ 154 PetscErrorCode ierr; 155 PetscInt m = a->mbs,i,i2,nz,bs = A->rmap->bs,bs2 = bs*bs,k,j,idx,it; 156 const PetscInt *diag,*ai = a->i,*aj = a->j,*vi; 157 158 PetscFunctionBegin; 159 if (fshift == -1.0) fshift = 0.0; /* negative fshift indicates do not error on zero diagonal; this code never errors on zero diagonal */ 160 its = its*lits; 161 if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat"); 162 if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 163 if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 164 if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 165 if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts"); 166 167 if (!a->idiagvalid) {ierr = MatInvertBlockDiagonal(A,NULL);CHKERRQ(ierr);} 168 169 if (!m) PetscFunctionReturn(0); 170 diag = a->diag; 171 idiag = a->idiag; 172 k = PetscMax(A->rmap->n,A->cmap->n); 173 if (!a->mult_work) { 174 ierr = PetscMalloc1(k+1,&a->mult_work);CHKERRQ(ierr); 175 } 176 if (!a->sor_workt) { 177 ierr = PetscMalloc1(k,&a->sor_workt);CHKERRQ(ierr); 178 } 179 if (!a->sor_work) { 180 ierr = PetscMalloc1(bs,&a->sor_work);CHKERRQ(ierr); 181 } 182 work = a->mult_work; 183 t = a->sor_workt; 184 w = a->sor_work; 185 186 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 187 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 188 189 if (flag & SOR_ZERO_INITIAL_GUESS) { 190 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) { 191 switch (bs) { 192 case 1: 193 PetscKernel_v_gets_A_times_w_1(x,idiag,b); 194 t[0] = b[0]; 195 i2 = 1; 196 idiag += 1; 197 for (i=1; i<m; i++) { 198 v = aa + ai[i]; 199 vi = aj + ai[i]; 200 nz = diag[i] - ai[i]; 201 s[0] = b[i2]; 202 for (j=0; j<nz; j++) { 203 xw[0] = x[vi[j]]; 204 PetscKernel_v_gets_v_minus_A_times_w_1(s,(v+j),xw); 205 } 206 t[i2] = s[0]; 207 PetscKernel_v_gets_A_times_w_1(xw,idiag,s); 208 x[i2] = xw[0]; 209 idiag += 1; 210 i2 += 1; 211 } 212 break; 213 case 2: 214 PetscKernel_v_gets_A_times_w_2(x,idiag,b); 215 t[0] = b[0]; t[1] = b[1]; 216 i2 = 2; 217 idiag += 4; 218 for (i=1; i<m; i++) { 219 v = aa + 4*ai[i]; 220 vi = aj + ai[i]; 221 nz = diag[i] - ai[i]; 222 s[0] = b[i2]; s[1] = b[i2+1]; 223 for (j=0; j<nz; j++) { 224 idx = 2*vi[j]; 225 it = 4*j; 226 xw[0] = x[idx]; xw[1] = x[1+idx]; 227 PetscKernel_v_gets_v_minus_A_times_w_2(s,(v+it),xw); 228 } 229 t[i2] = s[0]; t[i2+1] = s[1]; 230 PetscKernel_v_gets_A_times_w_2(xw,idiag,s); 231 x[i2] = xw[0]; x[i2+1] = xw[1]; 232 idiag += 4; 233 i2 += 2; 234 } 235 break; 236 case 3: 237 PetscKernel_v_gets_A_times_w_3(x,idiag,b); 238 t[0] = b[0]; t[1] = b[1]; t[2] = b[2]; 239 i2 = 3; 240 idiag += 9; 241 for (i=1; i<m; i++) { 242 v = aa + 9*ai[i]; 243 vi = aj + ai[i]; 244 nz = diag[i] - ai[i]; 245 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; 246 while (nz--) { 247 idx = 3*(*vi++); 248 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 249 PetscKernel_v_gets_v_minus_A_times_w_3(s,v,xw); 250 v += 9; 251 } 252 t[i2] = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2]; 253 PetscKernel_v_gets_A_times_w_3(xw,idiag,s); 254 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; 255 idiag += 9; 256 i2 += 3; 257 } 258 break; 259 case 4: 260 PetscKernel_v_gets_A_times_w_4(x,idiag,b); 261 t[0] = b[0]; t[1] = b[1]; t[2] = b[2]; t[3] = b[3]; 262 i2 = 4; 263 idiag += 16; 264 for (i=1; i<m; i++) { 265 v = aa + 16*ai[i]; 266 vi = aj + ai[i]; 267 nz = diag[i] - ai[i]; 268 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; 269 while (nz--) { 270 idx = 4*(*vi++); 271 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; 272 PetscKernel_v_gets_v_minus_A_times_w_4(s,v,xw); 273 v += 16; 274 } 275 t[i2] = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2]; t[i2 + 3] = s[3]; 276 PetscKernel_v_gets_A_times_w_4(xw,idiag,s); 277 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; 278 idiag += 16; 279 i2 += 4; 280 } 281 break; 282 case 5: 283 PetscKernel_v_gets_A_times_w_5(x,idiag,b); 284 t[0] = b[0]; t[1] = b[1]; t[2] = b[2]; t[3] = b[3]; t[4] = b[4]; 285 i2 = 5; 286 idiag += 25; 287 for (i=1; i<m; i++) { 288 v = aa + 25*ai[i]; 289 vi = aj + ai[i]; 290 nz = diag[i] - ai[i]; 291 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4]; 292 while (nz--) { 293 idx = 5*(*vi++); 294 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; xw[4] = x[4+idx]; 295 PetscKernel_v_gets_v_minus_A_times_w_5(s,v,xw); 296 v += 25; 297 } 298 t[i2] = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2]; t[i2+3] = s[3]; t[i2+4] = s[4]; 299 PetscKernel_v_gets_A_times_w_5(xw,idiag,s); 300 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4]; 301 idiag += 25; 302 i2 += 5; 303 } 304 break; 305 case 6: 306 PetscKernel_v_gets_A_times_w_6(x,idiag,b); 307 t[0] = b[0]; t[1] = b[1]; t[2] = b[2]; t[3] = b[3]; t[4] = b[4]; t[5] = b[5]; 308 i2 = 6; 309 idiag += 36; 310 for (i=1; i<m; i++) { 311 v = aa + 36*ai[i]; 312 vi = aj + ai[i]; 313 nz = diag[i] - ai[i]; 314 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5]; 315 while (nz--) { 316 idx = 6*(*vi++); 317 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 318 xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; 319 PetscKernel_v_gets_v_minus_A_times_w_6(s,v,xw); 320 v += 36; 321 } 322 t[i2] = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2]; 323 t[i2+3] = s[3]; t[i2+4] = s[4]; t[i2+5] = s[5]; 324 PetscKernel_v_gets_A_times_w_6(xw,idiag,s); 325 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5]; 326 idiag += 36; 327 i2 += 6; 328 } 329 break; 330 case 7: 331 PetscKernel_v_gets_A_times_w_7(x,idiag,b); 332 t[0] = b[0]; t[1] = b[1]; t[2] = b[2]; 333 t[3] = b[3]; t[4] = b[4]; t[5] = b[5]; t[6] = b[6]; 334 i2 = 7; 335 idiag += 49; 336 for (i=1; i<m; i++) { 337 v = aa + 49*ai[i]; 338 vi = aj + ai[i]; 339 nz = diag[i] - ai[i]; 340 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; 341 s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5]; s[6] = b[i2+6]; 342 while (nz--) { 343 idx = 7*(*vi++); 344 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 345 xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; xw[6] = x[6+idx]; 346 PetscKernel_v_gets_v_minus_A_times_w_7(s,v,xw); 347 v += 49; 348 } 349 t[i2] = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2]; 350 t[i2+3] = s[3]; t[i2+4] = s[4]; t[i2+5] = s[5]; t[i2+6] = s[6]; 351 PetscKernel_v_gets_A_times_w_7(xw,idiag,s); 352 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; 353 x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5]; x[i2+6] = xw[6]; 354 idiag += 49; 355 i2 += 7; 356 } 357 break; 358 default: 359 PetscKernel_w_gets_Ar_times_v(bs,bs,b,idiag,x); 360 ierr = PetscMemcpy(t,b,bs*sizeof(PetscScalar));CHKERRQ(ierr); 361 i2 = bs; 362 idiag += bs2; 363 for (i=1; i<m; i++) { 364 v = aa + bs2*ai[i]; 365 vi = aj + ai[i]; 366 nz = diag[i] - ai[i]; 367 368 ierr = PetscMemcpy(w,b+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr); 369 /* copy all rows of x that are needed into contiguous space */ 370 workt = work; 371 for (j=0; j<nz; j++) { 372 ierr = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr); 373 workt += bs; 374 } 375 PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work); 376 ierr = PetscMemcpy(t+i2,w,bs*sizeof(PetscScalar));CHKERRQ(ierr); 377 PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2); 378 379 idiag += bs2; 380 i2 += bs; 381 } 382 break; 383 } 384 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 385 ierr = PetscLogFlops(1.0*bs2*a->nz);CHKERRQ(ierr); 386 xb = t; 387 } 388 else xb = b; 389 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 390 idiag = a->idiag+bs2*(a->mbs-1); 391 i2 = bs * (m-1); 392 switch (bs) { 393 case 1: 394 s[0] = xb[i2]; 395 PetscKernel_v_gets_A_times_w_1(xw,idiag,s); 396 x[i2] = xw[0]; 397 i2 -= 1; 398 for (i=m-2; i>=0; i--) { 399 v = aa + (diag[i]+1); 400 vi = aj + diag[i] + 1; 401 nz = ai[i+1] - diag[i] - 1; 402 s[0] = xb[i2]; 403 for (j=0; j<nz; j++) { 404 xw[0] = x[vi[j]]; 405 PetscKernel_v_gets_v_minus_A_times_w_1(s,(v+j),xw); 406 } 407 PetscKernel_v_gets_A_times_w_1(xw,idiag,s); 408 x[i2] = xw[0]; 409 idiag -= 1; 410 i2 -= 1; 411 } 412 break; 413 case 2: 414 s[0] = xb[i2]; s[1] = xb[i2+1]; 415 PetscKernel_v_gets_A_times_w_2(xw,idiag,s); 416 x[i2] = xw[0]; x[i2+1] = xw[1]; 417 i2 -= 2; 418 idiag -= 4; 419 for (i=m-2; i>=0; i--) { 420 v = aa + 4*(diag[i] + 1); 421 vi = aj + diag[i] + 1; 422 nz = ai[i+1] - diag[i] - 1; 423 s[0] = xb[i2]; s[1] = xb[i2+1]; 424 for (j=0; j<nz; j++) { 425 idx = 2*vi[j]; 426 it = 4*j; 427 xw[0] = x[idx]; xw[1] = x[1+idx]; 428 PetscKernel_v_gets_v_minus_A_times_w_2(s,(v+it),xw); 429 } 430 PetscKernel_v_gets_A_times_w_2(xw,idiag,s); 431 x[i2] = xw[0]; x[i2+1] = xw[1]; 432 idiag -= 4; 433 i2 -= 2; 434 } 435 break; 436 case 3: 437 s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; 438 PetscKernel_v_gets_A_times_w_3(xw,idiag,s); 439 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; 440 i2 -= 3; 441 idiag -= 9; 442 for (i=m-2; i>=0; i--) { 443 v = aa + 9*(diag[i]+1); 444 vi = aj + diag[i] + 1; 445 nz = ai[i+1] - diag[i] - 1; 446 s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; 447 while (nz--) { 448 idx = 3*(*vi++); 449 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 450 PetscKernel_v_gets_v_minus_A_times_w_3(s,v,xw); 451 v += 9; 452 } 453 PetscKernel_v_gets_A_times_w_3(xw,idiag,s); 454 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; 455 idiag -= 9; 456 i2 -= 3; 457 } 458 break; 459 case 4: 460 s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; 461 PetscKernel_v_gets_A_times_w_4(xw,idiag,s); 462 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; 463 i2 -= 4; 464 idiag -= 16; 465 for (i=m-2; i>=0; i--) { 466 v = aa + 16*(diag[i]+1); 467 vi = aj + diag[i] + 1; 468 nz = ai[i+1] - diag[i] - 1; 469 s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; 470 while (nz--) { 471 idx = 4*(*vi++); 472 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; 473 PetscKernel_v_gets_v_minus_A_times_w_4(s,v,xw); 474 v += 16; 475 } 476 PetscKernel_v_gets_A_times_w_4(xw,idiag,s); 477 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; 478 idiag -= 16; 479 i2 -= 4; 480 } 481 break; 482 case 5: 483 s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; s[4] = xb[i2+4]; 484 PetscKernel_v_gets_A_times_w_5(xw,idiag,s); 485 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4]; 486 i2 -= 5; 487 idiag -= 25; 488 for (i=m-2; i>=0; i--) { 489 v = aa + 25*(diag[i]+1); 490 vi = aj + diag[i] + 1; 491 nz = ai[i+1] - diag[i] - 1; 492 s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; s[4] = xb[i2+4]; 493 while (nz--) { 494 idx = 5*(*vi++); 495 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; xw[4] = x[4+idx]; 496 PetscKernel_v_gets_v_minus_A_times_w_5(s,v,xw); 497 v += 25; 498 } 499 PetscKernel_v_gets_A_times_w_5(xw,idiag,s); 500 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4]; 501 idiag -= 25; 502 i2 -= 5; 503 } 504 break; 505 case 6: 506 s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; s[4] = xb[i2+4]; s[5] = xb[i2+5]; 507 PetscKernel_v_gets_A_times_w_6(xw,idiag,s); 508 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5]; 509 i2 -= 6; 510 idiag -= 36; 511 for (i=m-2; i>=0; i--) { 512 v = aa + 36*(diag[i]+1); 513 vi = aj + diag[i] + 1; 514 nz = ai[i+1] - diag[i] - 1; 515 s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; s[4] = xb[i2+4]; s[5] = xb[i2+5]; 516 while (nz--) { 517 idx = 6*(*vi++); 518 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 519 xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; 520 PetscKernel_v_gets_v_minus_A_times_w_6(s,v,xw); 521 v += 36; 522 } 523 PetscKernel_v_gets_A_times_w_6(xw,idiag,s); 524 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5]; 525 idiag -= 36; 526 i2 -= 6; 527 } 528 break; 529 case 7: 530 s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; 531 s[3] = xb[i2+3]; s[4] = xb[i2+4]; s[5] = xb[i2+5]; s[6] = xb[i2+6]; 532 PetscKernel_v_gets_A_times_w_7(x,idiag,b); 533 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; 534 x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5]; x[i2+6] = xw[6]; 535 i2 -= 7; 536 idiag -= 49; 537 for (i=m-2; i>=0; i--) { 538 v = aa + 49*(diag[i]+1); 539 vi = aj + diag[i] + 1; 540 nz = ai[i+1] - diag[i] - 1; 541 s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; 542 s[3] = xb[i2+3]; s[4] = xb[i2+4]; s[5] = xb[i2+5]; s[6] = xb[i2+6]; 543 while (nz--) { 544 idx = 7*(*vi++); 545 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 546 xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; xw[6] = x[6+idx]; 547 PetscKernel_v_gets_v_minus_A_times_w_7(s,v,xw); 548 v += 49; 549 } 550 PetscKernel_v_gets_A_times_w_7(xw,idiag,s); 551 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; 552 x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5]; x[i2+6] = xw[6]; 553 idiag -= 49; 554 i2 -= 7; 555 } 556 break; 557 default: 558 ierr = PetscMemcpy(w,xb+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr); 559 PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2); 560 i2 -= bs; 561 idiag -= bs2; 562 for (i=m-2; i>=0; i--) { 563 v = aa + bs2*(diag[i]+1); 564 vi = aj + diag[i] + 1; 565 nz = ai[i+1] - diag[i] - 1; 566 567 ierr = PetscMemcpy(w,xb+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr); 568 /* copy all rows of x that are needed into contiguous space */ 569 workt = work; 570 for (j=0; j<nz; j++) { 571 ierr = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr); 572 workt += bs; 573 } 574 PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work); 575 PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2); 576 577 idiag -= bs2; 578 i2 -= bs; 579 } 580 break; 581 } 582 ierr = PetscLogFlops(1.0*bs2*(a->nz));CHKERRQ(ierr); 583 } 584 its--; 585 } 586 while (its--) { 587 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) { 588 idiag = a->idiag; 589 i2 = 0; 590 switch (bs) { 591 case 1: 592 for (i=0; i<m; i++) { 593 v = aa + ai[i]; 594 vi = aj + ai[i]; 595 nz = ai[i+1] - ai[i]; 596 s[0] = b[i2]; 597 for (j=0; j<nz; j++) { 598 xw[0] = x[vi[j]]; 599 PetscKernel_v_gets_v_minus_A_times_w_1(s,(v+j),xw); 600 } 601 PetscKernel_v_gets_A_times_w_1(xw,idiag,s); 602 x[i2] += xw[0]; 603 idiag += 1; 604 i2 += 1; 605 } 606 break; 607 case 2: 608 for (i=0; i<m; i++) { 609 v = aa + 4*ai[i]; 610 vi = aj + ai[i]; 611 nz = ai[i+1] - ai[i]; 612 s[0] = b[i2]; s[1] = b[i2+1]; 613 for (j=0; j<nz; j++) { 614 idx = 2*vi[j]; 615 it = 4*j; 616 xw[0] = x[idx]; xw[1] = x[1+idx]; 617 PetscKernel_v_gets_v_minus_A_times_w_2(s,(v+it),xw); 618 } 619 PetscKernel_v_gets_A_times_w_2(xw,idiag,s); 620 x[i2] += xw[0]; x[i2+1] += xw[1]; 621 idiag += 4; 622 i2 += 2; 623 } 624 break; 625 case 3: 626 for (i=0; i<m; i++) { 627 v = aa + 9*ai[i]; 628 vi = aj + ai[i]; 629 nz = ai[i+1] - ai[i]; 630 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; 631 while (nz--) { 632 idx = 3*(*vi++); 633 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 634 PetscKernel_v_gets_v_minus_A_times_w_3(s,v,xw); 635 v += 9; 636 } 637 PetscKernel_v_gets_A_times_w_3(xw,idiag,s); 638 x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; 639 idiag += 9; 640 i2 += 3; 641 } 642 break; 643 case 4: 644 for (i=0; i<m; i++) { 645 v = aa + 16*ai[i]; 646 vi = aj + ai[i]; 647 nz = ai[i+1] - ai[i]; 648 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; 649 while (nz--) { 650 idx = 4*(*vi++); 651 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; 652 PetscKernel_v_gets_v_minus_A_times_w_4(s,v,xw); 653 v += 16; 654 } 655 PetscKernel_v_gets_A_times_w_4(xw,idiag,s); 656 x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; x[i2+3] += xw[3]; 657 idiag += 16; 658 i2 += 4; 659 } 660 break; 661 case 5: 662 for (i=0; i<m; i++) { 663 v = aa + 25*ai[i]; 664 vi = aj + ai[i]; 665 nz = ai[i+1] - ai[i]; 666 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4]; 667 while (nz--) { 668 idx = 5*(*vi++); 669 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; xw[4] = x[4+idx]; 670 PetscKernel_v_gets_v_minus_A_times_w_5(s,v,xw); 671 v += 25; 672 } 673 PetscKernel_v_gets_A_times_w_5(xw,idiag,s); 674 x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; x[i2+3] += xw[3]; x[i2+4] += xw[4]; 675 idiag += 25; 676 i2 += 5; 677 } 678 break; 679 case 6: 680 for (i=0; i<m; i++) { 681 v = aa + 36*ai[i]; 682 vi = aj + ai[i]; 683 nz = ai[i+1] - ai[i]; 684 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5]; 685 while (nz--) { 686 idx = 6*(*vi++); 687 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 688 xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; 689 PetscKernel_v_gets_v_minus_A_times_w_6(s,v,xw); 690 v += 36; 691 } 692 PetscKernel_v_gets_A_times_w_6(xw,idiag,s); 693 x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; 694 x[i2+3] += xw[3]; x[i2+4] += xw[4]; x[i2+5] += xw[5]; 695 idiag += 36; 696 i2 += 6; 697 } 698 break; 699 case 7: 700 for (i=0; i<m; i++) { 701 v = aa + 49*ai[i]; 702 vi = aj + ai[i]; 703 nz = ai[i+1] - ai[i]; 704 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; 705 s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5]; s[6] = b[i2+6]; 706 while (nz--) { 707 idx = 7*(*vi++); 708 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 709 xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; xw[6] = x[6+idx]; 710 PetscKernel_v_gets_v_minus_A_times_w_7(s,v,xw); 711 v += 49; 712 } 713 PetscKernel_v_gets_A_times_w_7(xw,idiag,s); 714 x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; 715 x[i2+3] += xw[3]; x[i2+4] += xw[4]; x[i2+5] += xw[5]; x[i2+6] += xw[6]; 716 idiag += 49; 717 i2 += 7; 718 } 719 break; 720 default: 721 for (i=0; i<m; i++) { 722 v = aa + bs2*ai[i]; 723 vi = aj + ai[i]; 724 nz = ai[i+1] - ai[i]; 725 726 ierr = PetscMemcpy(w,b+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr); 727 /* copy all rows of x that are needed into contiguous space */ 728 workt = work; 729 for (j=0; j<nz; j++) { 730 ierr = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr); 731 workt += bs; 732 } 733 PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work); 734 PetscKernel_w_gets_w_plus_Ar_times_v(bs,bs,w,idiag,x+i2); 735 736 idiag += bs2; 737 i2 += bs; 738 } 739 break; 740 } 741 ierr = PetscLogFlops(2.0*bs2*a->nz);CHKERRQ(ierr); 742 } 743 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 744 idiag = a->idiag+bs2*(a->mbs-1); 745 i2 = bs * (m-1); 746 switch (bs) { 747 case 1: 748 for (i=m-1; i>=0; i--) { 749 v = aa + ai[i]; 750 vi = aj + ai[i]; 751 nz = ai[i+1] - ai[i]; 752 s[0] = b[i2]; 753 for (j=0; j<nz; j++) { 754 xw[0] = x[vi[j]]; 755 PetscKernel_v_gets_v_minus_A_times_w_1(s,(v+j),xw); 756 } 757 PetscKernel_v_gets_A_times_w_1(xw,idiag,s); 758 x[i2] += xw[0]; 759 idiag -= 1; 760 i2 -= 1; 761 } 762 break; 763 case 2: 764 for (i=m-1; i>=0; i--) { 765 v = aa + 4*ai[i]; 766 vi = aj + ai[i]; 767 nz = ai[i+1] - ai[i]; 768 s[0] = b[i2]; s[1] = b[i2+1]; 769 for (j=0; j<nz; j++) { 770 idx = 2*vi[j]; 771 it = 4*j; 772 xw[0] = x[idx]; xw[1] = x[1+idx]; 773 PetscKernel_v_gets_v_minus_A_times_w_2(s,(v+it),xw); 774 } 775 PetscKernel_v_gets_A_times_w_2(xw,idiag,s); 776 x[i2] += xw[0]; x[i2+1] += xw[1]; 777 idiag -= 4; 778 i2 -= 2; 779 } 780 break; 781 case 3: 782 for (i=m-1; i>=0; i--) { 783 v = aa + 9*ai[i]; 784 vi = aj + ai[i]; 785 nz = ai[i+1] - ai[i]; 786 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; 787 while (nz--) { 788 idx = 3*(*vi++); 789 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 790 PetscKernel_v_gets_v_minus_A_times_w_3(s,v,xw); 791 v += 9; 792 } 793 PetscKernel_v_gets_A_times_w_3(xw,idiag,s); 794 x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; 795 idiag -= 9; 796 i2 -= 3; 797 } 798 break; 799 case 4: 800 for (i=m-1; i>=0; i--) { 801 v = aa + 16*ai[i]; 802 vi = aj + ai[i]; 803 nz = ai[i+1] - ai[i]; 804 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; 805 while (nz--) { 806 idx = 4*(*vi++); 807 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; 808 PetscKernel_v_gets_v_minus_A_times_w_4(s,v,xw); 809 v += 16; 810 } 811 PetscKernel_v_gets_A_times_w_4(xw,idiag,s); 812 x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; x[i2+3] += xw[3]; 813 idiag -= 16; 814 i2 -= 4; 815 } 816 break; 817 case 5: 818 for (i=m-1; i>=0; i--) { 819 v = aa + 25*ai[i]; 820 vi = aj + ai[i]; 821 nz = ai[i+1] - ai[i]; 822 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4]; 823 while (nz--) { 824 idx = 5*(*vi++); 825 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; xw[4] = x[4+idx]; 826 PetscKernel_v_gets_v_minus_A_times_w_5(s,v,xw); 827 v += 25; 828 } 829 PetscKernel_v_gets_A_times_w_5(xw,idiag,s); 830 x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; x[i2+3] += xw[3]; x[i2+4] += xw[4]; 831 idiag -= 25; 832 i2 -= 5; 833 } 834 break; 835 case 6: 836 for (i=m-1; i>=0; i--) { 837 v = aa + 36*ai[i]; 838 vi = aj + ai[i]; 839 nz = ai[i+1] - ai[i]; 840 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5]; 841 while (nz--) { 842 idx = 6*(*vi++); 843 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 844 xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; 845 PetscKernel_v_gets_v_minus_A_times_w_6(s,v,xw); 846 v += 36; 847 } 848 PetscKernel_v_gets_A_times_w_6(xw,idiag,s); 849 x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; 850 x[i2+3] += xw[3]; x[i2+4] += xw[4]; x[i2+5] += xw[5]; 851 idiag -= 36; 852 i2 -= 6; 853 } 854 break; 855 case 7: 856 for (i=m-1; i>=0; i--) { 857 v = aa + 49*ai[i]; 858 vi = aj + ai[i]; 859 nz = ai[i+1] - ai[i]; 860 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; 861 s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5]; s[6] = b[i2+6]; 862 while (nz--) { 863 idx = 7*(*vi++); 864 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 865 xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; xw[6] = x[6+idx]; 866 PetscKernel_v_gets_v_minus_A_times_w_7(s,v,xw); 867 v += 49; 868 } 869 PetscKernel_v_gets_A_times_w_7(xw,idiag,s); 870 x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; 871 x[i2+3] += xw[3]; x[i2+4] += xw[4]; x[i2+5] += xw[5]; x[i2+6] += xw[6]; 872 idiag -= 49; 873 i2 -= 7; 874 } 875 break; 876 default: 877 for (i=m-1; i>=0; i--) { 878 v = aa + bs2*ai[i]; 879 vi = aj + ai[i]; 880 nz = ai[i+1] - ai[i]; 881 882 ierr = PetscMemcpy(w,b+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr); 883 /* copy all rows of x that are needed into contiguous space */ 884 workt = work; 885 for (j=0; j<nz; j++) { 886 ierr = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr); 887 workt += bs; 888 } 889 PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work); 890 PetscKernel_w_gets_w_plus_Ar_times_v(bs,bs,w,idiag,x+i2); 891 892 idiag -= bs2; 893 i2 -= bs; 894 } 895 break; 896 } 897 ierr = PetscLogFlops(2.0*bs2*(a->nz));CHKERRQ(ierr); 898 } 899 } 900 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 901 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 902 PetscFunctionReturn(0); 903 } 904 905 906 /* 907 Special version for direct calls from Fortran (Used in PETSc-fun3d) 908 */ 909 #if defined(PETSC_HAVE_FORTRAN_CAPS) 910 #define matsetvaluesblocked4_ MATSETVALUESBLOCKED4 911 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 912 #define matsetvaluesblocked4_ matsetvaluesblocked4 913 #endif 914 915 #undef __FUNCT__ 916 #define __FUNCT__ "matsetvaluesblocked4_" 917 PETSC_EXTERN void matsetvaluesblocked4_(Mat *AA,PetscInt *mm,const PetscInt im[],PetscInt *nn,const PetscInt in[],const PetscScalar v[]) 918 { 919 Mat A = *AA; 920 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 921 PetscInt *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,N,m = *mm,n = *nn; 922 PetscInt *ai =a->i,*ailen=a->ilen; 923 PetscInt *aj =a->j,stepval,lastcol = -1; 924 const PetscScalar *value = v; 925 MatScalar *ap,*aa = a->a,*bap; 926 927 PetscFunctionBegin; 928 if (A->rmap->bs != 4) SETERRABORT(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Can only be called with a block size of 4"); 929 stepval = (n-1)*4; 930 for (k=0; k<m; k++) { /* loop over added rows */ 931 row = im[k]; 932 rp = aj + ai[row]; 933 ap = aa + 16*ai[row]; 934 nrow = ailen[row]; 935 low = 0; 936 high = nrow; 937 for (l=0; l<n; l++) { /* loop over added columns */ 938 col = in[l]; 939 if (col <= lastcol) low = 0; 940 else high = nrow; 941 lastcol = col; 942 value = v + k*(stepval+4 + l)*4; 943 while (high-low > 7) { 944 t = (low+high)/2; 945 if (rp[t] > col) high = t; 946 else low = t; 947 } 948 for (i=low; i<high; i++) { 949 if (rp[i] > col) break; 950 if (rp[i] == col) { 951 bap = ap + 16*i; 952 for (ii=0; ii<4; ii++,value+=stepval) { 953 for (jj=ii; jj<16; jj+=4) { 954 bap[jj] += *value++; 955 } 956 } 957 goto noinsert2; 958 } 959 } 960 N = nrow++ - 1; 961 high++; /* added new column index thus must search to one higher than before */ 962 /* shift up all the later entries in this row */ 963 for (ii=N; ii>=i; ii--) { 964 rp[ii+1] = rp[ii]; 965 PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar)); 966 } 967 if (N >= i) { 968 PetscMemzero(ap+16*i,16*sizeof(MatScalar)); 969 } 970 rp[i] = col; 971 bap = ap + 16*i; 972 for (ii=0; ii<4; ii++,value+=stepval) { 973 for (jj=ii; jj<16; jj+=4) { 974 bap[jj] = *value++; 975 } 976 } 977 noinsert2:; 978 low = i; 979 } 980 ailen[row] = nrow; 981 } 982 PetscFunctionReturnVoid(); 983 } 984 985 #if defined(PETSC_HAVE_FORTRAN_CAPS) 986 #define matsetvalues4_ MATSETVALUES4 987 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 988 #define matsetvalues4_ matsetvalues4 989 #endif 990 991 #undef __FUNCT__ 992 #define __FUNCT__ "MatSetValues4_" 993 PETSC_EXTERN void matsetvalues4_(Mat *AA,PetscInt *mm,PetscInt *im,PetscInt *nn,PetscInt *in,PetscScalar *v) 994 { 995 Mat A = *AA; 996 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 997 PetscInt *rp,k,low,high,t,ii,row,nrow,i,col,l,N,n = *nn,m = *mm; 998 PetscInt *ai=a->i,*ailen=a->ilen; 999 PetscInt *aj=a->j,brow,bcol; 1000 PetscInt ridx,cidx,lastcol = -1; 1001 MatScalar *ap,value,*aa=a->a,*bap; 1002 1003 PetscFunctionBegin; 1004 for (k=0; k<m; k++) { /* loop over added rows */ 1005 row = im[k]; brow = row/4; 1006 rp = aj + ai[brow]; 1007 ap = aa + 16*ai[brow]; 1008 nrow = ailen[brow]; 1009 low = 0; 1010 high = nrow; 1011 for (l=0; l<n; l++) { /* loop over added columns */ 1012 col = in[l]; bcol = col/4; 1013 ridx = row % 4; cidx = col % 4; 1014 value = v[l + k*n]; 1015 if (col <= lastcol) low = 0; 1016 else high = nrow; 1017 lastcol = col; 1018 while (high-low > 7) { 1019 t = (low+high)/2; 1020 if (rp[t] > bcol) high = t; 1021 else low = t; 1022 } 1023 for (i=low; i<high; i++) { 1024 if (rp[i] > bcol) break; 1025 if (rp[i] == bcol) { 1026 bap = ap + 16*i + 4*cidx + ridx; 1027 *bap += value; 1028 goto noinsert1; 1029 } 1030 } 1031 N = nrow++ - 1; 1032 high++; /* added new column thus must search to one higher than before */ 1033 /* shift up all the later entries in this row */ 1034 for (ii=N; ii>=i; ii--) { 1035 rp[ii+1] = rp[ii]; 1036 PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar)); 1037 } 1038 if (N>=i) { 1039 PetscMemzero(ap+16*i,16*sizeof(MatScalar)); 1040 } 1041 rp[i] = bcol; 1042 ap[16*i + 4*cidx + ridx] = value; 1043 noinsert1:; 1044 low = i; 1045 } 1046 ailen[brow] = nrow; 1047 } 1048 PetscFunctionReturnVoid(); 1049 } 1050 1051 /* 1052 Checks for missing diagonals 1053 */ 1054 #undef __FUNCT__ 1055 #define __FUNCT__ "MatMissingDiagonal_SeqBAIJ" 1056 PetscErrorCode MatMissingDiagonal_SeqBAIJ(Mat A,PetscBool *missing,PetscInt *d) 1057 { 1058 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1059 PetscErrorCode ierr; 1060 PetscInt *diag,*ii = a->i,i; 1061 1062 PetscFunctionBegin; 1063 ierr = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr); 1064 *missing = PETSC_FALSE; 1065 if (A->rmap->n > 0 && !ii) { 1066 *missing = PETSC_TRUE; 1067 if (d) *d = 0; 1068 PetscInfo(A,"Matrix has no entries therefore is missing diagonal\n"); 1069 } else { 1070 diag = a->diag; 1071 for (i=0; i<a->mbs; i++) { 1072 if (diag[i] >= ii[i+1]) { 1073 *missing = PETSC_TRUE; 1074 if (d) *d = i; 1075 PetscInfo1(A,"Matrix is missing block diagonal number %D\n",i); 1076 break; 1077 } 1078 } 1079 } 1080 PetscFunctionReturn(0); 1081 } 1082 1083 #undef __FUNCT__ 1084 #define __FUNCT__ "MatMarkDiagonal_SeqBAIJ" 1085 PetscErrorCode MatMarkDiagonal_SeqBAIJ(Mat A) 1086 { 1087 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1088 PetscErrorCode ierr; 1089 PetscInt i,j,m = a->mbs; 1090 1091 PetscFunctionBegin; 1092 if (!a->diag) { 1093 ierr = PetscMalloc1(m,&a->diag);CHKERRQ(ierr); 1094 ierr = PetscLogObjectMemory((PetscObject)A,m*sizeof(PetscInt));CHKERRQ(ierr); 1095 a->free_diag = PETSC_TRUE; 1096 } 1097 for (i=0; i<m; i++) { 1098 a->diag[i] = a->i[i+1]; 1099 for (j=a->i[i]; j<a->i[i+1]; j++) { 1100 if (a->j[j] == i) { 1101 a->diag[i] = j; 1102 break; 1103 } 1104 } 1105 } 1106 PetscFunctionReturn(0); 1107 } 1108 1109 1110 #undef __FUNCT__ 1111 #define __FUNCT__ "MatGetRowIJ_SeqBAIJ" 1112 static PetscErrorCode MatGetRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool blockcompressed,PetscInt *nn,const PetscInt *inia[],const PetscInt *inja[],PetscBool *done) 1113 { 1114 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1115 PetscErrorCode ierr; 1116 PetscInt i,j,n = a->mbs,nz = a->i[n],*tia,*tja,bs = A->rmap->bs,k,l,cnt; 1117 PetscInt **ia = (PetscInt**)inia,**ja = (PetscInt**)inja; 1118 1119 PetscFunctionBegin; 1120 *nn = n; 1121 if (!ia) PetscFunctionReturn(0); 1122 if (symmetric) { 1123 ierr = MatToSymmetricIJ_SeqAIJ(n,a->i,a->j,0,0,&tia,&tja);CHKERRQ(ierr); 1124 nz = tia[n]; 1125 } else { 1126 tia = a->i; tja = a->j; 1127 } 1128 1129 if (!blockcompressed && bs > 1) { 1130 (*nn) *= bs; 1131 /* malloc & create the natural set of indices */ 1132 ierr = PetscMalloc1((n+1)*bs,ia);CHKERRQ(ierr); 1133 if (n) { 1134 (*ia)[0] = 0; 1135 for (j=1; j<bs; j++) { 1136 (*ia)[j] = (tia[1]-tia[0])*bs+(*ia)[j-1]; 1137 } 1138 } 1139 1140 for (i=1; i<n; i++) { 1141 (*ia)[i*bs] = (tia[i]-tia[i-1])*bs + (*ia)[i*bs-1]; 1142 for (j=1; j<bs; j++) { 1143 (*ia)[i*bs+j] = (tia[i+1]-tia[i])*bs + (*ia)[i*bs+j-1]; 1144 } 1145 } 1146 if (n) { 1147 (*ia)[n*bs] = (tia[n]-tia[n-1])*bs + (*ia)[n*bs-1]; 1148 } 1149 1150 if (inja) { 1151 ierr = PetscMalloc1(nz*bs*bs,ja);CHKERRQ(ierr); 1152 cnt = 0; 1153 for (i=0; i<n; i++) { 1154 for (j=0; j<bs; j++) { 1155 for (k=tia[i]; k<tia[i+1]; k++) { 1156 for (l=0; l<bs; l++) { 1157 (*ja)[cnt++] = bs*tja[k] + l; 1158 } 1159 } 1160 } 1161 } 1162 } 1163 1164 if (symmetric) { /* deallocate memory allocated in MatToSymmetricIJ_SeqAIJ() */ 1165 ierr = PetscFree(tia);CHKERRQ(ierr); 1166 ierr = PetscFree(tja);CHKERRQ(ierr); 1167 } 1168 } else if (oshift == 1) { 1169 if (symmetric) { 1170 nz = tia[A->rmap->n/bs]; 1171 /* add 1 to i and j indices */ 1172 for (i=0; i<A->rmap->n/bs+1; i++) tia[i] = tia[i] + 1; 1173 *ia = tia; 1174 if (ja) { 1175 for (i=0; i<nz; i++) tja[i] = tja[i] + 1; 1176 *ja = tja; 1177 } 1178 } else { 1179 nz = a->i[A->rmap->n/bs]; 1180 /* malloc space and add 1 to i and j indices */ 1181 ierr = PetscMalloc1(A->rmap->n/bs+1,ia);CHKERRQ(ierr); 1182 for (i=0; i<A->rmap->n/bs+1; i++) (*ia)[i] = a->i[i] + 1; 1183 if (ja) { 1184 ierr = PetscMalloc1(nz,ja);CHKERRQ(ierr); 1185 for (i=0; i<nz; i++) (*ja)[i] = a->j[i] + 1; 1186 } 1187 } 1188 } else { 1189 *ia = tia; 1190 if (ja) *ja = tja; 1191 } 1192 PetscFunctionReturn(0); 1193 } 1194 1195 #undef __FUNCT__ 1196 #define __FUNCT__ "MatRestoreRowIJ_SeqBAIJ" 1197 static PetscErrorCode MatRestoreRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool blockcompressed,PetscInt *nn,const PetscInt *ia[],const PetscInt *ja[],PetscBool *done) 1198 { 1199 PetscErrorCode ierr; 1200 1201 PetscFunctionBegin; 1202 if (!ia) PetscFunctionReturn(0); 1203 if ((!blockcompressed && A->rmap->bs > 1) || (symmetric || oshift == 1)) { 1204 ierr = PetscFree(*ia);CHKERRQ(ierr); 1205 if (ja) {ierr = PetscFree(*ja);CHKERRQ(ierr);} 1206 } 1207 PetscFunctionReturn(0); 1208 } 1209 1210 #undef __FUNCT__ 1211 #define __FUNCT__ "MatDestroy_SeqBAIJ" 1212 PetscErrorCode MatDestroy_SeqBAIJ(Mat A) 1213 { 1214 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1215 PetscErrorCode ierr; 1216 1217 PetscFunctionBegin; 1218 #if defined(PETSC_USE_LOG) 1219 PetscLogObjectState((PetscObject)A,"Rows=%D, Cols=%D, NZ=%D",A->rmap->N,A->cmap->n,a->nz); 1220 #endif 1221 ierr = MatSeqXAIJFreeAIJ(A,&a->a,&a->j,&a->i);CHKERRQ(ierr); 1222 ierr = ISDestroy(&a->row);CHKERRQ(ierr); 1223 ierr = ISDestroy(&a->col);CHKERRQ(ierr); 1224 if (a->free_diag) {ierr = PetscFree(a->diag);CHKERRQ(ierr);} 1225 ierr = PetscFree(a->idiag);CHKERRQ(ierr); 1226 if (a->free_imax_ilen) {ierr = PetscFree2(a->imax,a->ilen);CHKERRQ(ierr);} 1227 ierr = PetscFree(a->solve_work);CHKERRQ(ierr); 1228 ierr = PetscFree(a->mult_work);CHKERRQ(ierr); 1229 ierr = PetscFree(a->sor_workt);CHKERRQ(ierr); 1230 ierr = PetscFree(a->sor_work);CHKERRQ(ierr); 1231 ierr = ISDestroy(&a->icol);CHKERRQ(ierr); 1232 ierr = PetscFree(a->saved_values);CHKERRQ(ierr); 1233 ierr = PetscFree2(a->compressedrow.i,a->compressedrow.rindex);CHKERRQ(ierr); 1234 1235 ierr = MatDestroy(&a->sbaijMat);CHKERRQ(ierr); 1236 ierr = MatDestroy(&a->parent);CHKERRQ(ierr); 1237 ierr = PetscFree(A->data);CHKERRQ(ierr); 1238 1239 ierr = PetscObjectChangeTypeName((PetscObject)A,0);CHKERRQ(ierr); 1240 ierr = PetscObjectComposeFunction((PetscObject)A,"MatInvertBlockDiagonal_C",NULL);CHKERRQ(ierr); 1241 ierr = PetscObjectComposeFunction((PetscObject)A,"MatStoreValues_C",NULL);CHKERRQ(ierr); 1242 ierr = PetscObjectComposeFunction((PetscObject)A,"MatRetrieveValues_C",NULL);CHKERRQ(ierr); 1243 ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetColumnIndices_C",NULL);CHKERRQ(ierr); 1244 ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqaij_C",NULL);CHKERRQ(ierr); 1245 ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqsbaij_C",NULL);CHKERRQ(ierr); 1246 ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetPreallocation_C",NULL);CHKERRQ(ierr); 1247 ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetPreallocationCSR_C",NULL);CHKERRQ(ierr); 1248 ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqbstrm_C",NULL);CHKERRQ(ierr); 1249 ierr = PetscObjectComposeFunction((PetscObject)A,"MatIsTranspose_C",NULL);CHKERRQ(ierr); 1250 PetscFunctionReturn(0); 1251 } 1252 1253 #undef __FUNCT__ 1254 #define __FUNCT__ "MatSetOption_SeqBAIJ" 1255 PetscErrorCode MatSetOption_SeqBAIJ(Mat A,MatOption op,PetscBool flg) 1256 { 1257 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1258 PetscErrorCode ierr; 1259 1260 PetscFunctionBegin; 1261 switch (op) { 1262 case MAT_ROW_ORIENTED: 1263 a->roworiented = flg; 1264 break; 1265 case MAT_KEEP_NONZERO_PATTERN: 1266 a->keepnonzeropattern = flg; 1267 break; 1268 case MAT_NEW_NONZERO_LOCATIONS: 1269 a->nonew = (flg ? 0 : 1); 1270 break; 1271 case MAT_NEW_NONZERO_LOCATION_ERR: 1272 a->nonew = (flg ? -1 : 0); 1273 break; 1274 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1275 a->nonew = (flg ? -2 : 0); 1276 break; 1277 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1278 a->nounused = (flg ? -1 : 0); 1279 break; 1280 case MAT_NEW_DIAGONALS: 1281 case MAT_IGNORE_OFF_PROC_ENTRIES: 1282 case MAT_USE_HASH_TABLE: 1283 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1284 break; 1285 case MAT_SPD: 1286 case MAT_SYMMETRIC: 1287 case MAT_STRUCTURALLY_SYMMETRIC: 1288 case MAT_HERMITIAN: 1289 case MAT_SYMMETRY_ETERNAL: 1290 /* These options are handled directly by MatSetOption() */ 1291 break; 1292 default: 1293 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op); 1294 } 1295 PetscFunctionReturn(0); 1296 } 1297 1298 /* used for both SeqBAIJ and SeqSBAIJ matrices */ 1299 #undef __FUNCT__ 1300 #define __FUNCT__ "MatGetRow_SeqBAIJ_private" 1301 PetscErrorCode MatGetRow_SeqBAIJ_private(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v,PetscInt *ai,PetscInt *aj,PetscScalar *aa) 1302 { 1303 PetscErrorCode ierr; 1304 PetscInt itmp,i,j,k,M,bn,bp,*idx_i,bs,bs2; 1305 MatScalar *aa_i; 1306 PetscScalar *v_i; 1307 1308 PetscFunctionBegin; 1309 bs = A->rmap->bs; 1310 bs2 = bs*bs; 1311 if (row < 0 || row >= A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range", row); 1312 1313 bn = row/bs; /* Block number */ 1314 bp = row % bs; /* Block Position */ 1315 M = ai[bn+1] - ai[bn]; 1316 *nz = bs*M; 1317 1318 if (v) { 1319 *v = 0; 1320 if (*nz) { 1321 ierr = PetscMalloc1(*nz,v);CHKERRQ(ierr); 1322 for (i=0; i<M; i++) { /* for each block in the block row */ 1323 v_i = *v + i*bs; 1324 aa_i = aa + bs2*(ai[bn] + i); 1325 for (j=bp,k=0; j<bs2; j+=bs,k++) v_i[k] = aa_i[j]; 1326 } 1327 } 1328 } 1329 1330 if (idx) { 1331 *idx = 0; 1332 if (*nz) { 1333 ierr = PetscMalloc1(*nz,idx);CHKERRQ(ierr); 1334 for (i=0; i<M; i++) { /* for each block in the block row */ 1335 idx_i = *idx + i*bs; 1336 itmp = bs*aj[ai[bn] + i]; 1337 for (j=0; j<bs; j++) idx_i[j] = itmp++; 1338 } 1339 } 1340 } 1341 PetscFunctionReturn(0); 1342 } 1343 1344 #undef __FUNCT__ 1345 #define __FUNCT__ "MatGetRow_SeqBAIJ" 1346 PetscErrorCode MatGetRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1347 { 1348 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1349 PetscErrorCode ierr; 1350 1351 PetscFunctionBegin; 1352 ierr = MatGetRow_SeqBAIJ_private(A,row,nz,idx,v,a->i,a->j,a->a);CHKERRQ(ierr); 1353 PetscFunctionReturn(0); 1354 } 1355 1356 #undef __FUNCT__ 1357 #define __FUNCT__ "MatRestoreRow_SeqBAIJ" 1358 PetscErrorCode MatRestoreRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1359 { 1360 PetscErrorCode ierr; 1361 1362 PetscFunctionBegin; 1363 if (idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);} 1364 if (v) {ierr = PetscFree(*v);CHKERRQ(ierr);} 1365 PetscFunctionReturn(0); 1366 } 1367 1368 extern PetscErrorCode MatSetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode); 1369 1370 #undef __FUNCT__ 1371 #define __FUNCT__ "MatTranspose_SeqBAIJ" 1372 PetscErrorCode MatTranspose_SeqBAIJ(Mat A,MatReuse reuse,Mat *B) 1373 { 1374 Mat_SeqBAIJ *a=(Mat_SeqBAIJ*)A->data; 1375 Mat C; 1376 PetscErrorCode ierr; 1377 PetscInt i,j,k,*aj=a->j,*ai=a->i,bs=A->rmap->bs,mbs=a->mbs,nbs=a->nbs,len,*col; 1378 PetscInt *rows,*cols,bs2=a->bs2; 1379 MatScalar *array; 1380 1381 PetscFunctionBegin; 1382 if (reuse == MAT_REUSE_MATRIX && A == *B && mbs != nbs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Square matrix only for in-place"); 1383 if (reuse == MAT_INITIAL_MATRIX || A == *B) { 1384 ierr = PetscCalloc1(1+nbs,&col);CHKERRQ(ierr); 1385 1386 for (i=0; i<ai[mbs]; i++) col[aj[i]] += 1; 1387 ierr = MatCreate(PetscObjectComm((PetscObject)A),&C);CHKERRQ(ierr); 1388 ierr = MatSetSizes(C,A->cmap->n,A->rmap->N,A->cmap->n,A->rmap->N);CHKERRQ(ierr); 1389 ierr = MatSetType(C,((PetscObject)A)->type_name);CHKERRQ(ierr); 1390 ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(C,bs,0,col);CHKERRQ(ierr); 1391 ierr = PetscFree(col);CHKERRQ(ierr); 1392 } else { 1393 C = *B; 1394 } 1395 1396 array = a->a; 1397 ierr = PetscMalloc2(bs,&rows,bs,&cols);CHKERRQ(ierr); 1398 for (i=0; i<mbs; i++) { 1399 cols[0] = i*bs; 1400 for (k=1; k<bs; k++) cols[k] = cols[k-1] + 1; 1401 len = ai[i+1] - ai[i]; 1402 for (j=0; j<len; j++) { 1403 rows[0] = (*aj++)*bs; 1404 for (k=1; k<bs; k++) rows[k] = rows[k-1] + 1; 1405 ierr = MatSetValues_SeqBAIJ(C,bs,rows,bs,cols,array,INSERT_VALUES);CHKERRQ(ierr); 1406 array += bs2; 1407 } 1408 } 1409 ierr = PetscFree2(rows,cols);CHKERRQ(ierr); 1410 1411 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1412 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1413 1414 if (reuse == MAT_INITIAL_MATRIX || *B != A) { 1415 *B = C; 1416 } else { 1417 ierr = MatHeaderMerge(A,&C);CHKERRQ(ierr); 1418 } 1419 PetscFunctionReturn(0); 1420 } 1421 1422 #undef __FUNCT__ 1423 #define __FUNCT__ "MatIsTranspose_SeqBAIJ" 1424 PetscErrorCode MatIsTranspose_SeqBAIJ(Mat A,Mat B,PetscReal tol,PetscBool *f) 1425 { 1426 PetscErrorCode ierr; 1427 Mat Btrans; 1428 1429 PetscFunctionBegin; 1430 *f = PETSC_FALSE; 1431 ierr = MatTranspose_SeqBAIJ(A,MAT_INITIAL_MATRIX,&Btrans);CHKERRQ(ierr); 1432 ierr = MatEqual_SeqBAIJ(B,Btrans,f);CHKERRQ(ierr); 1433 ierr = MatDestroy(&Btrans);CHKERRQ(ierr); 1434 PetscFunctionReturn(0); 1435 } 1436 1437 #undef __FUNCT__ 1438 #define __FUNCT__ "MatView_SeqBAIJ_Binary" 1439 static PetscErrorCode MatView_SeqBAIJ_Binary(Mat A,PetscViewer viewer) 1440 { 1441 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1442 PetscErrorCode ierr; 1443 PetscInt i,*col_lens,bs = A->rmap->bs,count,*jj,j,k,l,bs2=a->bs2; 1444 int fd; 1445 PetscScalar *aa; 1446 FILE *file; 1447 1448 PetscFunctionBegin; 1449 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1450 ierr = PetscMalloc1(4+A->rmap->N,&col_lens);CHKERRQ(ierr); 1451 col_lens[0] = MAT_FILE_CLASSID; 1452 1453 col_lens[1] = A->rmap->N; 1454 col_lens[2] = A->cmap->n; 1455 col_lens[3] = a->nz*bs2; 1456 1457 /* store lengths of each row and write (including header) to file */ 1458 count = 0; 1459 for (i=0; i<a->mbs; i++) { 1460 for (j=0; j<bs; j++) { 1461 col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]); 1462 } 1463 } 1464 ierr = PetscBinaryWrite(fd,col_lens,4+A->rmap->N,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1465 ierr = PetscFree(col_lens);CHKERRQ(ierr); 1466 1467 /* store column indices (zero start index) */ 1468 ierr = PetscMalloc1((a->nz+1)*bs2,&jj);CHKERRQ(ierr); 1469 count = 0; 1470 for (i=0; i<a->mbs; i++) { 1471 for (j=0; j<bs; j++) { 1472 for (k=a->i[i]; k<a->i[i+1]; k++) { 1473 for (l=0; l<bs; l++) { 1474 jj[count++] = bs*a->j[k] + l; 1475 } 1476 } 1477 } 1478 } 1479 ierr = PetscBinaryWrite(fd,jj,bs2*a->nz,PETSC_INT,PETSC_FALSE);CHKERRQ(ierr); 1480 ierr = PetscFree(jj);CHKERRQ(ierr); 1481 1482 /* store nonzero values */ 1483 ierr = PetscMalloc1((a->nz+1)*bs2,&aa);CHKERRQ(ierr); 1484 count = 0; 1485 for (i=0; i<a->mbs; i++) { 1486 for (j=0; j<bs; j++) { 1487 for (k=a->i[i]; k<a->i[i+1]; k++) { 1488 for (l=0; l<bs; l++) { 1489 aa[count++] = a->a[bs2*k + l*bs + j]; 1490 } 1491 } 1492 } 1493 } 1494 ierr = PetscBinaryWrite(fd,aa,bs2*a->nz,PETSC_SCALAR,PETSC_FALSE);CHKERRQ(ierr); 1495 ierr = PetscFree(aa);CHKERRQ(ierr); 1496 1497 ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr); 1498 if (file) { 1499 fprintf(file,"-matload_block_size %d\n",(int)A->rmap->bs); 1500 } 1501 PetscFunctionReturn(0); 1502 } 1503 1504 #undef __FUNCT__ 1505 #define __FUNCT__ "MatView_SeqBAIJ_ASCII" 1506 static PetscErrorCode MatView_SeqBAIJ_ASCII(Mat A,PetscViewer viewer) 1507 { 1508 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1509 PetscErrorCode ierr; 1510 PetscInt i,j,bs = A->rmap->bs,k,l,bs2=a->bs2; 1511 PetscViewerFormat format; 1512 1513 PetscFunctionBegin; 1514 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1515 if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1516 ierr = PetscViewerASCIIPrintf(viewer," block size is %D\n",bs);CHKERRQ(ierr); 1517 } else if (format == PETSC_VIEWER_ASCII_MATLAB) { 1518 const char *matname; 1519 Mat aij; 1520 ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&aij);CHKERRQ(ierr); 1521 ierr = PetscObjectGetName((PetscObject)A,&matname);CHKERRQ(ierr); 1522 ierr = PetscObjectSetName((PetscObject)aij,matname);CHKERRQ(ierr); 1523 ierr = MatView(aij,viewer);CHKERRQ(ierr); 1524 ierr = MatDestroy(&aij);CHKERRQ(ierr); 1525 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1526 PetscFunctionReturn(0); 1527 } else if (format == PETSC_VIEWER_ASCII_COMMON) { 1528 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);CHKERRQ(ierr); 1529 for (i=0; i<a->mbs; i++) { 1530 for (j=0; j<bs; j++) { 1531 ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr); 1532 for (k=a->i[i]; k<a->i[i+1]; k++) { 1533 for (l=0; l<bs; l++) { 1534 #if defined(PETSC_USE_COMPLEX) 1535 if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 1536 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g + %gi) ",bs*a->j[k]+l, 1537 (double)PetscRealPart(a->a[bs2*k + l*bs + j]),(double)PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1538 } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 1539 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g - %gi) ",bs*a->j[k]+l, 1540 (double)PetscRealPart(a->a[bs2*k + l*bs + j]),-(double)PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1541 } else if (PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 1542 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,(double)PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1543 } 1544 #else 1545 if (a->a[bs2*k + l*bs + j] != 0.0) { 1546 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,(double)a->a[bs2*k + l*bs + j]);CHKERRQ(ierr); 1547 } 1548 #endif 1549 } 1550 } 1551 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 1552 } 1553 } 1554 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);CHKERRQ(ierr); 1555 } else { 1556 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);CHKERRQ(ierr); 1557 for (i=0; i<a->mbs; i++) { 1558 for (j=0; j<bs; j++) { 1559 ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr); 1560 for (k=a->i[i]; k<a->i[i+1]; k++) { 1561 for (l=0; l<bs; l++) { 1562 #if defined(PETSC_USE_COMPLEX) 1563 if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0) { 1564 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g + %g i) ",bs*a->j[k]+l, 1565 (double)PetscRealPart(a->a[bs2*k + l*bs + j]),(double)PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1566 } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0) { 1567 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g - %g i) ",bs*a->j[k]+l, 1568 (double)PetscRealPart(a->a[bs2*k + l*bs + j]),-(double)PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1569 } else { 1570 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,(double)PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1571 } 1572 #else 1573 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,(double)a->a[bs2*k + l*bs + j]);CHKERRQ(ierr); 1574 #endif 1575 } 1576 } 1577 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 1578 } 1579 } 1580 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);CHKERRQ(ierr); 1581 } 1582 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1583 PetscFunctionReturn(0); 1584 } 1585 1586 #include <petscdraw.h> 1587 #undef __FUNCT__ 1588 #define __FUNCT__ "MatView_SeqBAIJ_Draw_Zoom" 1589 static PetscErrorCode MatView_SeqBAIJ_Draw_Zoom(PetscDraw draw,void *Aa) 1590 { 1591 Mat A = (Mat) Aa; 1592 Mat_SeqBAIJ *a=(Mat_SeqBAIJ*)A->data; 1593 PetscErrorCode ierr; 1594 PetscInt row,i,j,k,l,mbs=a->mbs,color,bs=A->rmap->bs,bs2=a->bs2; 1595 PetscReal xl,yl,xr,yr,x_l,x_r,y_l,y_r; 1596 MatScalar *aa; 1597 PetscViewer viewer; 1598 PetscViewerFormat format; 1599 1600 PetscFunctionBegin; 1601 ierr = PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer);CHKERRQ(ierr); 1602 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1603 1604 ierr = PetscDrawGetCoordinates(draw,&xl,&yl,&xr,&yr);CHKERRQ(ierr); 1605 1606 /* loop over matrix elements drawing boxes */ 1607 1608 if (format != PETSC_VIEWER_DRAW_CONTOUR) { 1609 color = PETSC_DRAW_BLUE; 1610 for (i=0,row=0; i<mbs; i++,row+=bs) { 1611 for (j=a->i[i]; j<a->i[i+1]; j++) { 1612 y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0; 1613 x_l = a->j[j]*bs; x_r = x_l + 1.0; 1614 aa = a->a + j*bs2; 1615 for (k=0; k<bs; k++) { 1616 for (l=0; l<bs; l++) { 1617 if (PetscRealPart(*aa++) >= 0.) continue; 1618 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 1619 } 1620 } 1621 } 1622 } 1623 color = PETSC_DRAW_CYAN; 1624 for (i=0,row=0; i<mbs; i++,row+=bs) { 1625 for (j=a->i[i]; j<a->i[i+1]; j++) { 1626 y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0; 1627 x_l = a->j[j]*bs; x_r = x_l + 1.0; 1628 aa = a->a + j*bs2; 1629 for (k=0; k<bs; k++) { 1630 for (l=0; l<bs; l++) { 1631 if (PetscRealPart(*aa++) != 0.) continue; 1632 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 1633 } 1634 } 1635 } 1636 } 1637 color = PETSC_DRAW_RED; 1638 for (i=0,row=0; i<mbs; i++,row+=bs) { 1639 for (j=a->i[i]; j<a->i[i+1]; j++) { 1640 y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0; 1641 x_l = a->j[j]*bs; x_r = x_l + 1.0; 1642 aa = a->a + j*bs2; 1643 for (k=0; k<bs; k++) { 1644 for (l=0; l<bs; l++) { 1645 if (PetscRealPart(*aa++) <= 0.) continue; 1646 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 1647 } 1648 } 1649 } 1650 } 1651 } else { 1652 /* use contour shading to indicate magnitude of values */ 1653 /* first determine max of all nonzero values */ 1654 PetscReal minv = 0.0, maxv = 0.0; 1655 PetscDraw popup; 1656 1657 for (i=0; i<a->nz*a->bs2; i++) { 1658 if (PetscAbsScalar(a->a[i]) > maxv) maxv = PetscAbsScalar(a->a[i]); 1659 } 1660 ierr = PetscDrawGetPopup(draw,&popup);CHKERRQ(ierr); 1661 if (popup) {ierr = PetscDrawScalePopup(popup,0.0,maxv);CHKERRQ(ierr);} 1662 for (i=0,row=0; i<mbs; i++,row+=bs) { 1663 for (j=a->i[i]; j<a->i[i+1]; j++) { 1664 y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0; 1665 x_l = a->j[j]*bs; x_r = x_l + 1.0; 1666 aa = a->a + j*bs2; 1667 for (k=0; k<bs; k++) { 1668 for (l=0; l<bs; l++) { 1669 color = PetscDrawRealToColor(PetscAbsScalar(*aa++),minv,maxv); 1670 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 1671 } 1672 } 1673 } 1674 } 1675 } 1676 PetscFunctionReturn(0); 1677 } 1678 1679 #undef __FUNCT__ 1680 #define __FUNCT__ "MatView_SeqBAIJ_Draw" 1681 static PetscErrorCode MatView_SeqBAIJ_Draw(Mat A,PetscViewer viewer) 1682 { 1683 PetscErrorCode ierr; 1684 PetscReal xl,yl,xr,yr,w,h; 1685 PetscDraw draw; 1686 PetscBool isnull; 1687 1688 PetscFunctionBegin; 1689 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 1690 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 1691 1692 ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);CHKERRQ(ierr); 1693 xr = A->cmap->n; yr = A->rmap->N; h = yr/10.0; w = xr/10.0; 1694 xr += w; yr += h; xl = -w; yl = -h; 1695 ierr = PetscDrawSetCoordinates(draw,xl,yl,xr,yr);CHKERRQ(ierr); 1696 ierr = PetscDrawZoom(draw,MatView_SeqBAIJ_Draw_Zoom,A);CHKERRQ(ierr); 1697 ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",NULL);CHKERRQ(ierr); 1698 PetscFunctionReturn(0); 1699 } 1700 1701 #undef __FUNCT__ 1702 #define __FUNCT__ "MatView_SeqBAIJ" 1703 PetscErrorCode MatView_SeqBAIJ(Mat A,PetscViewer viewer) 1704 { 1705 PetscErrorCode ierr; 1706 PetscBool iascii,isbinary,isdraw; 1707 1708 PetscFunctionBegin; 1709 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1710 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1711 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1712 if (iascii) { 1713 ierr = MatView_SeqBAIJ_ASCII(A,viewer);CHKERRQ(ierr); 1714 } else if (isbinary) { 1715 ierr = MatView_SeqBAIJ_Binary(A,viewer);CHKERRQ(ierr); 1716 } else if (isdraw) { 1717 ierr = MatView_SeqBAIJ_Draw(A,viewer);CHKERRQ(ierr); 1718 } else { 1719 Mat B; 1720 ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr); 1721 ierr = MatView(B,viewer);CHKERRQ(ierr); 1722 ierr = MatDestroy(&B);CHKERRQ(ierr); 1723 } 1724 PetscFunctionReturn(0); 1725 } 1726 1727 1728 #undef __FUNCT__ 1729 #define __FUNCT__ "MatGetValues_SeqBAIJ" 1730 PetscErrorCode MatGetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],PetscScalar v[]) 1731 { 1732 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1733 PetscInt *rp,k,low,high,t,row,nrow,i,col,l,*aj = a->j; 1734 PetscInt *ai = a->i,*ailen = a->ilen; 1735 PetscInt brow,bcol,ridx,cidx,bs=A->rmap->bs,bs2=a->bs2; 1736 MatScalar *ap,*aa = a->a; 1737 1738 PetscFunctionBegin; 1739 for (k=0; k<m; k++) { /* loop over rows */ 1740 row = im[k]; brow = row/bs; 1741 if (row < 0) {v += n; continue;} /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); */ 1742 if (row >= A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D too large", row); 1743 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; 1744 nrow = ailen[brow]; 1745 for (l=0; l<n; l++) { /* loop over columns */ 1746 if (in[l] < 0) {v++; continue;} /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); */ 1747 if (in[l] >= A->cmap->n) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column %D too large", in[l]); 1748 col = in[l]; 1749 bcol = col/bs; 1750 cidx = col%bs; 1751 ridx = row%bs; 1752 high = nrow; 1753 low = 0; /* assume unsorted */ 1754 while (high-low > 5) { 1755 t = (low+high)/2; 1756 if (rp[t] > bcol) high = t; 1757 else low = t; 1758 } 1759 for (i=low; i<high; i++) { 1760 if (rp[i] > bcol) break; 1761 if (rp[i] == bcol) { 1762 *v++ = ap[bs2*i+bs*cidx+ridx]; 1763 goto finished; 1764 } 1765 } 1766 *v++ = 0.0; 1767 finished:; 1768 } 1769 } 1770 PetscFunctionReturn(0); 1771 } 1772 1773 #undef __FUNCT__ 1774 #define __FUNCT__ "MatSetValuesBlocked_SeqBAIJ" 1775 PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is) 1776 { 1777 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1778 PetscInt *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,lastcol = -1; 1779 PetscInt *imax=a->imax,*ai=a->i,*ailen=a->ilen; 1780 PetscErrorCode ierr; 1781 PetscInt *aj =a->j,nonew=a->nonew,bs2=a->bs2,bs=A->rmap->bs,stepval; 1782 PetscBool roworiented=a->roworiented; 1783 const PetscScalar *value = v; 1784 MatScalar *ap,*aa = a->a,*bap; 1785 1786 PetscFunctionBegin; 1787 if (roworiented) { 1788 stepval = (n-1)*bs; 1789 } else { 1790 stepval = (m-1)*bs; 1791 } 1792 for (k=0; k<m; k++) { /* loop over added rows */ 1793 row = im[k]; 1794 if (row < 0) continue; 1795 #if defined(PETSC_USE_DEBUG) 1796 if (row >= a->mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Block row index too large %D max %D",row,a->mbs-1); 1797 #endif 1798 rp = aj + ai[row]; 1799 ap = aa + bs2*ai[row]; 1800 rmax = imax[row]; 1801 nrow = ailen[row]; 1802 low = 0; 1803 high = nrow; 1804 for (l=0; l<n; l++) { /* loop over added columns */ 1805 if (in[l] < 0) continue; 1806 #if defined(PETSC_USE_DEBUG) 1807 if (in[l] >= a->nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Block column index too large %D max %D",in[l],a->nbs-1); 1808 #endif 1809 col = in[l]; 1810 if (roworiented) { 1811 value = v + (k*(stepval+bs) + l)*bs; 1812 } else { 1813 value = v + (l*(stepval+bs) + k)*bs; 1814 } 1815 if (col <= lastcol) low = 0; 1816 else high = nrow; 1817 lastcol = col; 1818 while (high-low > 7) { 1819 t = (low+high)/2; 1820 if (rp[t] > col) high = t; 1821 else low = t; 1822 } 1823 for (i=low; i<high; i++) { 1824 if (rp[i] > col) break; 1825 if (rp[i] == col) { 1826 bap = ap + bs2*i; 1827 if (roworiented) { 1828 if (is == ADD_VALUES) { 1829 for (ii=0; ii<bs; ii++,value+=stepval) { 1830 for (jj=ii; jj<bs2; jj+=bs) { 1831 bap[jj] += *value++; 1832 } 1833 } 1834 } else { 1835 for (ii=0; ii<bs; ii++,value+=stepval) { 1836 for (jj=ii; jj<bs2; jj+=bs) { 1837 bap[jj] = *value++; 1838 } 1839 } 1840 } 1841 } else { 1842 if (is == ADD_VALUES) { 1843 for (ii=0; ii<bs; ii++,value+=bs+stepval) { 1844 for (jj=0; jj<bs; jj++) { 1845 bap[jj] += value[jj]; 1846 } 1847 bap += bs; 1848 } 1849 } else { 1850 for (ii=0; ii<bs; ii++,value+=bs+stepval) { 1851 for (jj=0; jj<bs; jj++) { 1852 bap[jj] = value[jj]; 1853 } 1854 bap += bs; 1855 } 1856 } 1857 } 1858 goto noinsert2; 1859 } 1860 } 1861 if (nonew == 1) goto noinsert2; 1862 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new blocked index new nonzero block (%D, %D) in the matrix", row, col); 1863 MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,row,col,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar); 1864 N = nrow++ - 1; high++; 1865 /* shift up all the later entries in this row */ 1866 for (ii=N; ii>=i; ii--) { 1867 rp[ii+1] = rp[ii]; 1868 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); 1869 } 1870 if (N >= i) { 1871 ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr); 1872 } 1873 rp[i] = col; 1874 bap = ap + bs2*i; 1875 if (roworiented) { 1876 for (ii=0; ii<bs; ii++,value+=stepval) { 1877 for (jj=ii; jj<bs2; jj+=bs) { 1878 bap[jj] = *value++; 1879 } 1880 } 1881 } else { 1882 for (ii=0; ii<bs; ii++,value+=stepval) { 1883 for (jj=0; jj<bs; jj++) { 1884 *bap++ = *value++; 1885 } 1886 } 1887 } 1888 noinsert2:; 1889 low = i; 1890 } 1891 ailen[row] = nrow; 1892 } 1893 PetscFunctionReturn(0); 1894 } 1895 1896 #undef __FUNCT__ 1897 #define __FUNCT__ "MatAssemblyEnd_SeqBAIJ" 1898 PetscErrorCode MatAssemblyEnd_SeqBAIJ(Mat A,MatAssemblyType mode) 1899 { 1900 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1901 PetscInt fshift = 0,i,j,*ai = a->i,*aj = a->j,*imax = a->imax; 1902 PetscInt m = A->rmap->N,*ip,N,*ailen = a->ilen; 1903 PetscErrorCode ierr; 1904 PetscInt mbs = a->mbs,bs2 = a->bs2,rmax = 0; 1905 MatScalar *aa = a->a,*ap; 1906 PetscReal ratio=0.6; 1907 1908 PetscFunctionBegin; 1909 if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0); 1910 1911 if (m) rmax = ailen[0]; 1912 for (i=1; i<mbs; i++) { 1913 /* move each row back by the amount of empty slots (fshift) before it*/ 1914 fshift += imax[i-1] - ailen[i-1]; 1915 rmax = PetscMax(rmax,ailen[i]); 1916 if (fshift) { 1917 ip = aj + ai[i]; ap = aa + bs2*ai[i]; 1918 N = ailen[i]; 1919 for (j=0; j<N; j++) { 1920 ip[j-fshift] = ip[j]; 1921 1922 ierr = PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr); 1923 } 1924 } 1925 ai[i] = ai[i-1] + ailen[i-1]; 1926 } 1927 if (mbs) { 1928 fshift += imax[mbs-1] - ailen[mbs-1]; 1929 ai[mbs] = ai[mbs-1] + ailen[mbs-1]; 1930 } 1931 1932 /* reset ilen and imax for each row */ 1933 a->nonzerorowcnt = 0; 1934 for (i=0; i<mbs; i++) { 1935 ailen[i] = imax[i] = ai[i+1] - ai[i]; 1936 a->nonzerorowcnt += ((ai[i+1] - ai[i]) > 0); 1937 } 1938 a->nz = ai[mbs]; 1939 1940 /* diagonals may have moved, so kill the diagonal pointers */ 1941 a->idiagvalid = PETSC_FALSE; 1942 if (fshift && a->diag) { 1943 ierr = PetscFree(a->diag);CHKERRQ(ierr); 1944 ierr = PetscLogObjectMemory((PetscObject)A,-(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 1945 a->diag = 0; 1946 } 1947 if (fshift && a->nounused == -1) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB, "Unused space detected in matrix: %D X %D block size %D, %D unneeded", m, A->cmap->n, A->rmap->bs, fshift*bs2); 1948 ierr = PetscInfo5(A,"Matrix size: %D X %D, block size %D; storage space: %D unneeded, %D used\n",m,A->cmap->n,A->rmap->bs,fshift*bs2,a->nz*bs2);CHKERRQ(ierr); 1949 ierr = PetscInfo1(A,"Number of mallocs during MatSetValues is %D\n",a->reallocs);CHKERRQ(ierr); 1950 ierr = PetscInfo1(A,"Most nonzeros blocks in any row is %D\n",rmax);CHKERRQ(ierr); 1951 1952 A->info.mallocs += a->reallocs; 1953 a->reallocs = 0; 1954 A->info.nz_unneeded = (PetscReal)fshift*bs2; 1955 a->rmax = rmax; 1956 1957 ierr = MatCheckCompressedRow(A,a->nonzerorowcnt,&a->compressedrow,a->i,mbs,ratio);CHKERRQ(ierr); 1958 PetscFunctionReturn(0); 1959 } 1960 1961 /* 1962 This function returns an array of flags which indicate the locations of contiguous 1963 blocks that should be zeroed. for eg: if bs = 3 and is = [0,1,2,3,5,6,7,8,9] 1964 then the resulting sizes = [3,1,1,3,1] correspondig to sets [(0,1,2),(3),(5),(6,7,8),(9)] 1965 Assume: sizes should be long enough to hold all the values. 1966 */ 1967 #undef __FUNCT__ 1968 #define __FUNCT__ "MatZeroRows_SeqBAIJ_Check_Blocks" 1969 static PetscErrorCode MatZeroRows_SeqBAIJ_Check_Blocks(PetscInt idx[],PetscInt n,PetscInt bs,PetscInt sizes[], PetscInt *bs_max) 1970 { 1971 PetscInt i,j,k,row; 1972 PetscBool flg; 1973 1974 PetscFunctionBegin; 1975 for (i=0,j=0; i<n; j++) { 1976 row = idx[i]; 1977 if (row%bs!=0) { /* Not the begining of a block */ 1978 sizes[j] = 1; 1979 i++; 1980 } else if (i+bs > n) { /* complete block doesn't exist (at idx end) */ 1981 sizes[j] = 1; /* Also makes sure atleast 'bs' values exist for next else */ 1982 i++; 1983 } else { /* Begining of the block, so check if the complete block exists */ 1984 flg = PETSC_TRUE; 1985 for (k=1; k<bs; k++) { 1986 if (row+k != idx[i+k]) { /* break in the block */ 1987 flg = PETSC_FALSE; 1988 break; 1989 } 1990 } 1991 if (flg) { /* No break in the bs */ 1992 sizes[j] = bs; 1993 i += bs; 1994 } else { 1995 sizes[j] = 1; 1996 i++; 1997 } 1998 } 1999 } 2000 *bs_max = j; 2001 PetscFunctionReturn(0); 2002 } 2003 2004 #undef __FUNCT__ 2005 #define __FUNCT__ "MatZeroRows_SeqBAIJ" 2006 PetscErrorCode MatZeroRows_SeqBAIJ(Mat A,PetscInt is_n,const PetscInt is_idx[],PetscScalar diag,Vec x, Vec b) 2007 { 2008 Mat_SeqBAIJ *baij=(Mat_SeqBAIJ*)A->data; 2009 PetscErrorCode ierr; 2010 PetscInt i,j,k,count,*rows; 2011 PetscInt bs=A->rmap->bs,bs2=baij->bs2,*sizes,row,bs_max; 2012 PetscScalar zero = 0.0; 2013 MatScalar *aa; 2014 const PetscScalar *xx; 2015 PetscScalar *bb; 2016 2017 PetscFunctionBegin; 2018 /* fix right hand side if needed */ 2019 if (x && b) { 2020 ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr); 2021 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 2022 for (i=0; i<is_n; i++) { 2023 bb[is_idx[i]] = diag*xx[is_idx[i]]; 2024 } 2025 ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr); 2026 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 2027 } 2028 2029 /* Make a copy of the IS and sort it */ 2030 /* allocate memory for rows,sizes */ 2031 ierr = PetscMalloc2(is_n,&rows,2*is_n,&sizes);CHKERRQ(ierr); 2032 2033 /* copy IS values to rows, and sort them */ 2034 for (i=0; i<is_n; i++) rows[i] = is_idx[i]; 2035 ierr = PetscSortInt(is_n,rows);CHKERRQ(ierr); 2036 2037 if (baij->keepnonzeropattern) { 2038 for (i=0; i<is_n; i++) sizes[i] = 1; 2039 bs_max = is_n; 2040 } else { 2041 ierr = MatZeroRows_SeqBAIJ_Check_Blocks(rows,is_n,bs,sizes,&bs_max);CHKERRQ(ierr); 2042 A->nonzerostate++; 2043 } 2044 2045 for (i=0,j=0; i<bs_max; j+=sizes[i],i++) { 2046 row = rows[j]; 2047 if (row < 0 || row > A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"row %D out of range",row); 2048 count = (baij->i[row/bs +1] - baij->i[row/bs])*bs; 2049 aa = ((MatScalar*)(baij->a)) + baij->i[row/bs]*bs2 + (row%bs); 2050 if (sizes[i] == bs && !baij->keepnonzeropattern) { 2051 if (diag != (PetscScalar)0.0) { 2052 if (baij->ilen[row/bs] > 0) { 2053 baij->ilen[row/bs] = 1; 2054 baij->j[baij->i[row/bs]] = row/bs; 2055 2056 ierr = PetscMemzero(aa,count*bs*sizeof(MatScalar));CHKERRQ(ierr); 2057 } 2058 /* Now insert all the diagonal values for this bs */ 2059 for (k=0; k<bs; k++) { 2060 ierr = (*A->ops->setvalues)(A,1,rows+j+k,1,rows+j+k,&diag,INSERT_VALUES);CHKERRQ(ierr); 2061 } 2062 } else { /* (diag == 0.0) */ 2063 baij->ilen[row/bs] = 0; 2064 } /* end (diag == 0.0) */ 2065 } else { /* (sizes[i] != bs) */ 2066 #if defined(PETSC_USE_DEBUG) 2067 if (sizes[i] != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal Error. Value should be 1"); 2068 #endif 2069 for (k=0; k<count; k++) { 2070 aa[0] = zero; 2071 aa += bs; 2072 } 2073 if (diag != (PetscScalar)0.0) { 2074 ierr = (*A->ops->setvalues)(A,1,rows+j,1,rows+j,&diag,INSERT_VALUES);CHKERRQ(ierr); 2075 } 2076 } 2077 } 2078 2079 ierr = PetscFree2(rows,sizes);CHKERRQ(ierr); 2080 ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2081 PetscFunctionReturn(0); 2082 } 2083 2084 #undef __FUNCT__ 2085 #define __FUNCT__ "MatZeroRowsColumns_SeqBAIJ" 2086 PetscErrorCode MatZeroRowsColumns_SeqBAIJ(Mat A,PetscInt is_n,const PetscInt is_idx[],PetscScalar diag,Vec x, Vec b) 2087 { 2088 Mat_SeqBAIJ *baij=(Mat_SeqBAIJ*)A->data; 2089 PetscErrorCode ierr; 2090 PetscInt i,j,k,count; 2091 PetscInt bs =A->rmap->bs,bs2=baij->bs2,row,col; 2092 PetscScalar zero = 0.0; 2093 MatScalar *aa; 2094 const PetscScalar *xx; 2095 PetscScalar *bb; 2096 PetscBool *zeroed,vecs = PETSC_FALSE; 2097 2098 PetscFunctionBegin; 2099 /* fix right hand side if needed */ 2100 if (x && b) { 2101 ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr); 2102 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 2103 vecs = PETSC_TRUE; 2104 } 2105 2106 /* zero the columns */ 2107 ierr = PetscCalloc1(A->rmap->n,&zeroed);CHKERRQ(ierr); 2108 for (i=0; i<is_n; i++) { 2109 if (is_idx[i] < 0 || is_idx[i] >= A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"row %D out of range",is_idx[i]); 2110 zeroed[is_idx[i]] = PETSC_TRUE; 2111 } 2112 for (i=0; i<A->rmap->N; i++) { 2113 if (!zeroed[i]) { 2114 row = i/bs; 2115 for (j=baij->i[row]; j<baij->i[row+1]; j++) { 2116 for (k=0; k<bs; k++) { 2117 col = bs*baij->j[j] + k; 2118 if (zeroed[col]) { 2119 aa = ((MatScalar*)(baij->a)) + j*bs2 + (i%bs) + bs*k; 2120 if (vecs) bb[i] -= aa[0]*xx[col]; 2121 aa[0] = 0.0; 2122 } 2123 } 2124 } 2125 } else if (vecs) bb[i] = diag*xx[i]; 2126 } 2127 ierr = PetscFree(zeroed);CHKERRQ(ierr); 2128 if (vecs) { 2129 ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr); 2130 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 2131 } 2132 2133 /* zero the rows */ 2134 for (i=0; i<is_n; i++) { 2135 row = is_idx[i]; 2136 count = (baij->i[row/bs +1] - baij->i[row/bs])*bs; 2137 aa = ((MatScalar*)(baij->a)) + baij->i[row/bs]*bs2 + (row%bs); 2138 for (k=0; k<count; k++) { 2139 aa[0] = zero; 2140 aa += bs; 2141 } 2142 if (diag != (PetscScalar)0.0) { 2143 ierr = (*A->ops->setvalues)(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr); 2144 } 2145 } 2146 ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2147 PetscFunctionReturn(0); 2148 } 2149 2150 #undef __FUNCT__ 2151 #define __FUNCT__ "MatSetValues_SeqBAIJ" 2152 PetscErrorCode MatSetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is) 2153 { 2154 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2155 PetscInt *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,lastcol = -1; 2156 PetscInt *imax=a->imax,*ai=a->i,*ailen=a->ilen; 2157 PetscInt *aj =a->j,nonew=a->nonew,bs=A->rmap->bs,brow,bcol; 2158 PetscErrorCode ierr; 2159 PetscInt ridx,cidx,bs2=a->bs2; 2160 PetscBool roworiented=a->roworiented; 2161 MatScalar *ap,value,*aa=a->a,*bap; 2162 2163 PetscFunctionBegin; 2164 for (k=0; k<m; k++) { /* loop over added rows */ 2165 row = im[k]; 2166 brow = row/bs; 2167 if (row < 0) continue; 2168 #if defined(PETSC_USE_DEBUG) 2169 if (row >= A->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,A->rmap->N-1); 2170 #endif 2171 rp = aj + ai[brow]; 2172 ap = aa + bs2*ai[brow]; 2173 rmax = imax[brow]; 2174 nrow = ailen[brow]; 2175 low = 0; 2176 high = nrow; 2177 for (l=0; l<n; l++) { /* loop over added columns */ 2178 if (in[l] < 0) continue; 2179 #if defined(PETSC_USE_DEBUG) 2180 if (in[l] >= A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],A->cmap->n-1); 2181 #endif 2182 col = in[l]; bcol = col/bs; 2183 ridx = row % bs; cidx = col % bs; 2184 if (roworiented) { 2185 value = v[l + k*n]; 2186 } else { 2187 value = v[k + l*m]; 2188 } 2189 if (col <= lastcol) low = 0; else high = nrow; 2190 lastcol = col; 2191 while (high-low > 7) { 2192 t = (low+high)/2; 2193 if (rp[t] > bcol) high = t; 2194 else low = t; 2195 } 2196 for (i=low; i<high; i++) { 2197 if (rp[i] > bcol) break; 2198 if (rp[i] == bcol) { 2199 bap = ap + bs2*i + bs*cidx + ridx; 2200 if (is == ADD_VALUES) *bap += value; 2201 else *bap = value; 2202 goto noinsert1; 2203 } 2204 } 2205 if (nonew == 1) goto noinsert1; 2206 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col); 2207 MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar); 2208 N = nrow++ - 1; high++; 2209 /* shift up all the later entries in this row */ 2210 for (ii=N; ii>=i; ii--) { 2211 rp[ii+1] = rp[ii]; 2212 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); 2213 } 2214 if (N>=i) { 2215 ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr); 2216 } 2217 rp[i] = bcol; 2218 ap[bs2*i + bs*cidx + ridx] = value; 2219 a->nz++; 2220 A->nonzerostate++; 2221 noinsert1:; 2222 low = i; 2223 } 2224 ailen[brow] = nrow; 2225 } 2226 PetscFunctionReturn(0); 2227 } 2228 2229 #undef __FUNCT__ 2230 #define __FUNCT__ "MatILUFactor_SeqBAIJ" 2231 PetscErrorCode MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,const MatFactorInfo *info) 2232 { 2233 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)inA->data; 2234 Mat outA; 2235 PetscErrorCode ierr; 2236 PetscBool row_identity,col_identity; 2237 2238 PetscFunctionBegin; 2239 if (info->levels != 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only levels = 0 supported for in-place ILU"); 2240 ierr = ISIdentity(row,&row_identity);CHKERRQ(ierr); 2241 ierr = ISIdentity(col,&col_identity);CHKERRQ(ierr); 2242 if (!row_identity || !col_identity) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for in-place ILU"); 2243 2244 outA = inA; 2245 inA->factortype = MAT_FACTOR_LU; 2246 2247 ierr = MatMarkDiagonal_SeqBAIJ(inA);CHKERRQ(ierr); 2248 2249 ierr = PetscObjectReference((PetscObject)row);CHKERRQ(ierr); 2250 ierr = ISDestroy(&a->row);CHKERRQ(ierr); 2251 a->row = row; 2252 ierr = PetscObjectReference((PetscObject)col);CHKERRQ(ierr); 2253 ierr = ISDestroy(&a->col);CHKERRQ(ierr); 2254 a->col = col; 2255 2256 /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */ 2257 ierr = ISDestroy(&a->icol);CHKERRQ(ierr); 2258 ierr = ISInvertPermutation(col,PETSC_DECIDE,&a->icol);CHKERRQ(ierr); 2259 ierr = PetscLogObjectParent((PetscObject)inA,(PetscObject)a->icol);CHKERRQ(ierr); 2260 2261 ierr = MatSeqBAIJSetNumericFactorization_inplace(inA,(PetscBool)(row_identity && col_identity));CHKERRQ(ierr); 2262 if (!a->solve_work) { 2263 ierr = PetscMalloc1(inA->rmap->N+inA->rmap->bs,&a->solve_work);CHKERRQ(ierr); 2264 ierr = PetscLogObjectMemory((PetscObject)inA,(inA->rmap->N+inA->rmap->bs)*sizeof(PetscScalar));CHKERRQ(ierr); 2265 } 2266 ierr = MatLUFactorNumeric(outA,inA,info);CHKERRQ(ierr); 2267 PetscFunctionReturn(0); 2268 } 2269 2270 #undef __FUNCT__ 2271 #define __FUNCT__ "MatSeqBAIJSetColumnIndices_SeqBAIJ" 2272 PetscErrorCode MatSeqBAIJSetColumnIndices_SeqBAIJ(Mat mat,PetscInt *indices) 2273 { 2274 Mat_SeqBAIJ *baij = (Mat_SeqBAIJ*)mat->data; 2275 PetscInt i,nz,mbs; 2276 2277 PetscFunctionBegin; 2278 nz = baij->maxnz; 2279 mbs = baij->mbs; 2280 for (i=0; i<nz; i++) { 2281 baij->j[i] = indices[i]; 2282 } 2283 baij->nz = nz; 2284 for (i=0; i<mbs; i++) { 2285 baij->ilen[i] = baij->imax[i]; 2286 } 2287 PetscFunctionReturn(0); 2288 } 2289 2290 #undef __FUNCT__ 2291 #define __FUNCT__ "MatSeqBAIJSetColumnIndices" 2292 /*@ 2293 MatSeqBAIJSetColumnIndices - Set the column indices for all the rows 2294 in the matrix. 2295 2296 Input Parameters: 2297 + mat - the SeqBAIJ matrix 2298 - indices - the column indices 2299 2300 Level: advanced 2301 2302 Notes: 2303 This can be called if you have precomputed the nonzero structure of the 2304 matrix and want to provide it to the matrix object to improve the performance 2305 of the MatSetValues() operation. 2306 2307 You MUST have set the correct numbers of nonzeros per row in the call to 2308 MatCreateSeqBAIJ(), and the columns indices MUST be sorted. 2309 2310 MUST be called before any calls to MatSetValues(); 2311 2312 @*/ 2313 PetscErrorCode MatSeqBAIJSetColumnIndices(Mat mat,PetscInt *indices) 2314 { 2315 PetscErrorCode ierr; 2316 2317 PetscFunctionBegin; 2318 PetscValidHeaderSpecific(mat,MAT_CLASSID,1); 2319 PetscValidPointer(indices,2); 2320 ierr = PetscUseMethod(mat,"MatSeqBAIJSetColumnIndices_C",(Mat,PetscInt*),(mat,indices));CHKERRQ(ierr); 2321 PetscFunctionReturn(0); 2322 } 2323 2324 #undef __FUNCT__ 2325 #define __FUNCT__ "MatGetRowMaxAbs_SeqBAIJ" 2326 PetscErrorCode MatGetRowMaxAbs_SeqBAIJ(Mat A,Vec v,PetscInt idx[]) 2327 { 2328 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2329 PetscErrorCode ierr; 2330 PetscInt i,j,n,row,bs,*ai,*aj,mbs; 2331 PetscReal atmp; 2332 PetscScalar *x,zero = 0.0; 2333 MatScalar *aa; 2334 PetscInt ncols,brow,krow,kcol; 2335 2336 PetscFunctionBegin; 2337 if (A->factortype) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 2338 bs = A->rmap->bs; 2339 aa = a->a; 2340 ai = a->i; 2341 aj = a->j; 2342 mbs = a->mbs; 2343 2344 ierr = VecSet(v,zero);CHKERRQ(ierr); 2345 ierr = VecGetArray(v,&x);CHKERRQ(ierr); 2346 ierr = VecGetLocalSize(v,&n);CHKERRQ(ierr); 2347 if (n != A->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Nonconforming matrix and vector"); 2348 for (i=0; i<mbs; i++) { 2349 ncols = ai[1] - ai[0]; ai++; 2350 brow = bs*i; 2351 for (j=0; j<ncols; j++) { 2352 for (kcol=0; kcol<bs; kcol++) { 2353 for (krow=0; krow<bs; krow++) { 2354 atmp = PetscAbsScalar(*aa);aa++; 2355 row = brow + krow; /* row index */ 2356 if (PetscAbsScalar(x[row]) < atmp) {x[row] = atmp; if (idx) idx[row] = bs*(*aj) + kcol;} 2357 } 2358 } 2359 aj++; 2360 } 2361 } 2362 ierr = VecRestoreArray(v,&x);CHKERRQ(ierr); 2363 PetscFunctionReturn(0); 2364 } 2365 2366 #undef __FUNCT__ 2367 #define __FUNCT__ "MatCopy_SeqBAIJ" 2368 PetscErrorCode MatCopy_SeqBAIJ(Mat A,Mat B,MatStructure str) 2369 { 2370 PetscErrorCode ierr; 2371 2372 PetscFunctionBegin; 2373 /* If the two matrices have the same copy implementation, use fast copy. */ 2374 if (str == SAME_NONZERO_PATTERN && (A->ops->copy == B->ops->copy)) { 2375 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2376 Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)B->data; 2377 PetscInt ambs=a->mbs,bmbs=b->mbs,abs=A->rmap->bs,bbs=B->rmap->bs,bs2=abs*abs; 2378 2379 if (a->i[ambs] != b->i[bmbs]) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Number of nonzero blocks in matrices A %D and B %D are different",a->i[ambs],b->i[bmbs]); 2380 if (abs != bbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Block size A %D and B %D are different",abs,bbs); 2381 ierr = PetscMemcpy(b->a,a->a,(bs2*a->i[ambs])*sizeof(PetscScalar));CHKERRQ(ierr); 2382 } else { 2383 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 2384 } 2385 PetscFunctionReturn(0); 2386 } 2387 2388 #undef __FUNCT__ 2389 #define __FUNCT__ "MatSetUp_SeqBAIJ" 2390 PetscErrorCode MatSetUp_SeqBAIJ(Mat A) 2391 { 2392 PetscErrorCode ierr; 2393 2394 PetscFunctionBegin; 2395 ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(A,A->rmap->bs,PETSC_DEFAULT,0);CHKERRQ(ierr); 2396 PetscFunctionReturn(0); 2397 } 2398 2399 #undef __FUNCT__ 2400 #define __FUNCT__ "MatSeqBAIJGetArray_SeqBAIJ" 2401 PetscErrorCode MatSeqBAIJGetArray_SeqBAIJ(Mat A,PetscScalar *array[]) 2402 { 2403 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2404 2405 PetscFunctionBegin; 2406 *array = a->a; 2407 PetscFunctionReturn(0); 2408 } 2409 2410 #undef __FUNCT__ 2411 #define __FUNCT__ "MatSeqBAIJRestoreArray_SeqBAIJ" 2412 PetscErrorCode MatSeqBAIJRestoreArray_SeqBAIJ(Mat A,PetscScalar *array[]) 2413 { 2414 PetscFunctionBegin; 2415 PetscFunctionReturn(0); 2416 } 2417 2418 #undef __FUNCT__ 2419 #define __FUNCT__ "MatAXPYGetPreallocation_SeqBAIJ" 2420 PetscErrorCode MatAXPYGetPreallocation_SeqBAIJ(Mat Y,Mat X,PetscInt *nnz) 2421 { 2422 PetscInt bs = Y->rmap->bs,mbs = Y->rmap->N/bs; 2423 Mat_SeqBAIJ *x = (Mat_SeqBAIJ*)X->data; 2424 Mat_SeqBAIJ *y = (Mat_SeqBAIJ*)Y->data; 2425 PetscErrorCode ierr; 2426 2427 PetscFunctionBegin; 2428 /* Set the number of nonzeros in the new matrix */ 2429 ierr = MatAXPYGetPreallocation_SeqX_private(mbs,x->i,x->j,y->i,y->j,nnz);CHKERRQ(ierr); 2430 PetscFunctionReturn(0); 2431 } 2432 2433 #undef __FUNCT__ 2434 #define __FUNCT__ "MatAXPY_SeqBAIJ" 2435 PetscErrorCode MatAXPY_SeqBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 2436 { 2437 Mat_SeqBAIJ *x = (Mat_SeqBAIJ*)X->data,*y = (Mat_SeqBAIJ*)Y->data; 2438 PetscErrorCode ierr; 2439 PetscInt bs=Y->rmap->bs,bs2=bs*bs; 2440 PetscBLASInt one=1; 2441 2442 PetscFunctionBegin; 2443 if (str == SAME_NONZERO_PATTERN) { 2444 PetscScalar alpha = a; 2445 PetscBLASInt bnz; 2446 ierr = PetscBLASIntCast(x->nz*bs2,&bnz);CHKERRQ(ierr); 2447 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one)); 2448 ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr); 2449 } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */ 2450 ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr); 2451 } else { 2452 Mat B; 2453 PetscInt *nnz; 2454 if (bs != X->rmap->bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrices must have same block size"); 2455 ierr = PetscMalloc1(Y->rmap->N,&nnz);CHKERRQ(ierr); 2456 ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr); 2457 ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr); 2458 ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr); 2459 ierr = MatSetBlockSizesFromMats(B,Y,Y);CHKERRQ(ierr); 2460 ierr = MatSetType(B,(MatType) ((PetscObject)Y)->type_name);CHKERRQ(ierr); 2461 ierr = MatAXPYGetPreallocation_SeqBAIJ(Y,X,nnz);CHKERRQ(ierr); 2462 ierr = MatSeqBAIJSetPreallocation(B,bs,0,nnz);CHKERRQ(ierr); 2463 ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr); 2464 ierr = MatHeaderReplace(Y,&B);CHKERRQ(ierr); 2465 ierr = PetscFree(nnz);CHKERRQ(ierr); 2466 } 2467 PetscFunctionReturn(0); 2468 } 2469 2470 #undef __FUNCT__ 2471 #define __FUNCT__ "MatRealPart_SeqBAIJ" 2472 PetscErrorCode MatRealPart_SeqBAIJ(Mat A) 2473 { 2474 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2475 PetscInt i,nz = a->bs2*a->i[a->mbs]; 2476 MatScalar *aa = a->a; 2477 2478 PetscFunctionBegin; 2479 for (i=0; i<nz; i++) aa[i] = PetscRealPart(aa[i]); 2480 PetscFunctionReturn(0); 2481 } 2482 2483 #undef __FUNCT__ 2484 #define __FUNCT__ "MatImaginaryPart_SeqBAIJ" 2485 PetscErrorCode MatImaginaryPart_SeqBAIJ(Mat A) 2486 { 2487 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2488 PetscInt i,nz = a->bs2*a->i[a->mbs]; 2489 MatScalar *aa = a->a; 2490 2491 PetscFunctionBegin; 2492 for (i=0; i<nz; i++) aa[i] = PetscImaginaryPart(aa[i]); 2493 PetscFunctionReturn(0); 2494 } 2495 2496 #undef __FUNCT__ 2497 #define __FUNCT__ "MatGetColumnIJ_SeqBAIJ" 2498 /* 2499 Code almost idential to MatGetColumnIJ_SeqAIJ() should share common code 2500 */ 2501 PetscErrorCode MatGetColumnIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *nn,const PetscInt *ia[],const PetscInt *ja[],PetscBool *done) 2502 { 2503 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2504 PetscErrorCode ierr; 2505 PetscInt bs = A->rmap->bs,i,*collengths,*cia,*cja,n = A->cmap->n/bs,m = A->rmap->n/bs; 2506 PetscInt nz = a->i[m],row,*jj,mr,col; 2507 2508 PetscFunctionBegin; 2509 *nn = n; 2510 if (!ia) PetscFunctionReturn(0); 2511 if (symmetric) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not for BAIJ matrices"); 2512 else { 2513 ierr = PetscCalloc1(n+1,&collengths);CHKERRQ(ierr); 2514 ierr = PetscMalloc1(n+1,&cia);CHKERRQ(ierr); 2515 ierr = PetscMalloc1(nz+1,&cja);CHKERRQ(ierr); 2516 jj = a->j; 2517 for (i=0; i<nz; i++) { 2518 collengths[jj[i]]++; 2519 } 2520 cia[0] = oshift; 2521 for (i=0; i<n; i++) { 2522 cia[i+1] = cia[i] + collengths[i]; 2523 } 2524 ierr = PetscMemzero(collengths,n*sizeof(PetscInt));CHKERRQ(ierr); 2525 jj = a->j; 2526 for (row=0; row<m; row++) { 2527 mr = a->i[row+1] - a->i[row]; 2528 for (i=0; i<mr; i++) { 2529 col = *jj++; 2530 2531 cja[cia[col] + collengths[col]++ - oshift] = row + oshift; 2532 } 2533 } 2534 ierr = PetscFree(collengths);CHKERRQ(ierr); 2535 *ia = cia; *ja = cja; 2536 } 2537 PetscFunctionReturn(0); 2538 } 2539 2540 #undef __FUNCT__ 2541 #define __FUNCT__ "MatRestoreColumnIJ_SeqBAIJ" 2542 PetscErrorCode MatRestoreColumnIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *n,const PetscInt *ia[],const PetscInt *ja[],PetscBool *done) 2543 { 2544 PetscErrorCode ierr; 2545 2546 PetscFunctionBegin; 2547 if (!ia) PetscFunctionReturn(0); 2548 ierr = PetscFree(*ia);CHKERRQ(ierr); 2549 ierr = PetscFree(*ja);CHKERRQ(ierr); 2550 PetscFunctionReturn(0); 2551 } 2552 2553 /* 2554 MatGetColumnIJ_SeqBAIJ_Color() and MatRestoreColumnIJ_SeqBAIJ_Color() are customized from 2555 MatGetColumnIJ_SeqBAIJ() and MatRestoreColumnIJ_SeqBAIJ() by adding an output 2556 spidx[], index of a->a, to be used in MatTransposeColoringCreate() and MatFDColoringCreate() 2557 */ 2558 #undef __FUNCT__ 2559 #define __FUNCT__ "MatGetColumnIJ_SeqBAIJ_Color" 2560 PetscErrorCode MatGetColumnIJ_SeqBAIJ_Color(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *nn,const PetscInt *ia[],const PetscInt *ja[],PetscInt *spidx[],PetscBool *done) 2561 { 2562 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2563 PetscErrorCode ierr; 2564 PetscInt i,*collengths,*cia,*cja,n=a->nbs,m=a->mbs; 2565 PetscInt nz = a->i[m],row,*jj,mr,col; 2566 PetscInt *cspidx; 2567 2568 PetscFunctionBegin; 2569 *nn = n; 2570 if (!ia) PetscFunctionReturn(0); 2571 2572 ierr = PetscCalloc1(n+1,&collengths);CHKERRQ(ierr); 2573 ierr = PetscMalloc1(n+1,&cia);CHKERRQ(ierr); 2574 ierr = PetscMalloc1(nz+1,&cja);CHKERRQ(ierr); 2575 ierr = PetscMalloc1(nz+1,&cspidx);CHKERRQ(ierr); 2576 jj = a->j; 2577 for (i=0; i<nz; i++) { 2578 collengths[jj[i]]++; 2579 } 2580 cia[0] = oshift; 2581 for (i=0; i<n; i++) { 2582 cia[i+1] = cia[i] + collengths[i]; 2583 } 2584 ierr = PetscMemzero(collengths,n*sizeof(PetscInt));CHKERRQ(ierr); 2585 jj = a->j; 2586 for (row=0; row<m; row++) { 2587 mr = a->i[row+1] - a->i[row]; 2588 for (i=0; i<mr; i++) { 2589 col = *jj++; 2590 cspidx[cia[col] + collengths[col] - oshift] = a->i[row] + i; /* index of a->j */ 2591 cja[cia[col] + collengths[col]++ - oshift] = row + oshift; 2592 } 2593 } 2594 ierr = PetscFree(collengths);CHKERRQ(ierr); 2595 *ia = cia; *ja = cja; 2596 *spidx = cspidx; 2597 PetscFunctionReturn(0); 2598 } 2599 2600 #undef __FUNCT__ 2601 #define __FUNCT__ "MatRestoreColumnIJ_SeqBAIJ_Color" 2602 PetscErrorCode MatRestoreColumnIJ_SeqBAIJ_Color(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *n,const PetscInt *ia[],const PetscInt *ja[],PetscInt *spidx[],PetscBool *done) 2603 { 2604 PetscErrorCode ierr; 2605 2606 PetscFunctionBegin; 2607 ierr = MatRestoreColumnIJ_SeqBAIJ(A,oshift,symmetric,inodecompressed,n,ia,ja,done);CHKERRQ(ierr); 2608 ierr = PetscFree(*spidx);CHKERRQ(ierr); 2609 PetscFunctionReturn(0); 2610 } 2611 2612 #undef __FUNCT__ 2613 #define __FUNCT__ "MatShift_SeqBAIJ" 2614 PetscErrorCode MatShift_SeqBAIJ(Mat Y,PetscScalar a) 2615 { 2616 PetscErrorCode ierr; 2617 Mat_SeqBAIJ *aij = (Mat_SeqBAIJ*)Y->data; 2618 2619 PetscFunctionBegin; 2620 if (!Y->preallocated || !aij->nz) { 2621 ierr = MatSeqBAIJSetPreallocation(Y,Y->rmap->bs,1,NULL);CHKERRQ(ierr); 2622 } 2623 ierr = MatShift_Basic(Y,a);CHKERRQ(ierr); 2624 PetscFunctionReturn(0); 2625 } 2626 2627 /* -------------------------------------------------------------------*/ 2628 static struct _MatOps MatOps_Values = {MatSetValues_SeqBAIJ, 2629 MatGetRow_SeqBAIJ, 2630 MatRestoreRow_SeqBAIJ, 2631 MatMult_SeqBAIJ_N, 2632 /* 4*/ MatMultAdd_SeqBAIJ_N, 2633 MatMultTranspose_SeqBAIJ, 2634 MatMultTransposeAdd_SeqBAIJ, 2635 0, 2636 0, 2637 0, 2638 /* 10*/ 0, 2639 MatLUFactor_SeqBAIJ, 2640 0, 2641 0, 2642 MatTranspose_SeqBAIJ, 2643 /* 15*/ MatGetInfo_SeqBAIJ, 2644 MatEqual_SeqBAIJ, 2645 MatGetDiagonal_SeqBAIJ, 2646 MatDiagonalScale_SeqBAIJ, 2647 MatNorm_SeqBAIJ, 2648 /* 20*/ 0, 2649 MatAssemblyEnd_SeqBAIJ, 2650 MatSetOption_SeqBAIJ, 2651 MatZeroEntries_SeqBAIJ, 2652 /* 24*/ MatZeroRows_SeqBAIJ, 2653 0, 2654 0, 2655 0, 2656 0, 2657 /* 29*/ MatSetUp_SeqBAIJ, 2658 0, 2659 0, 2660 0, 2661 0, 2662 /* 34*/ MatDuplicate_SeqBAIJ, 2663 0, 2664 0, 2665 MatILUFactor_SeqBAIJ, 2666 0, 2667 /* 39*/ MatAXPY_SeqBAIJ, 2668 MatGetSubMatrices_SeqBAIJ, 2669 MatIncreaseOverlap_SeqBAIJ, 2670 MatGetValues_SeqBAIJ, 2671 MatCopy_SeqBAIJ, 2672 /* 44*/ 0, 2673 MatScale_SeqBAIJ, 2674 MatShift_SeqBAIJ, 2675 0, 2676 MatZeroRowsColumns_SeqBAIJ, 2677 /* 49*/ 0, 2678 MatGetRowIJ_SeqBAIJ, 2679 MatRestoreRowIJ_SeqBAIJ, 2680 MatGetColumnIJ_SeqBAIJ, 2681 MatRestoreColumnIJ_SeqBAIJ, 2682 /* 54*/ MatFDColoringCreate_SeqXAIJ, 2683 0, 2684 0, 2685 0, 2686 MatSetValuesBlocked_SeqBAIJ, 2687 /* 59*/ MatGetSubMatrix_SeqBAIJ, 2688 MatDestroy_SeqBAIJ, 2689 MatView_SeqBAIJ, 2690 0, 2691 0, 2692 /* 64*/ 0, 2693 0, 2694 0, 2695 0, 2696 0, 2697 /* 69*/ MatGetRowMaxAbs_SeqBAIJ, 2698 0, 2699 MatConvert_Basic, 2700 0, 2701 0, 2702 /* 74*/ 0, 2703 MatFDColoringApply_BAIJ, 2704 0, 2705 0, 2706 0, 2707 /* 79*/ 0, 2708 0, 2709 0, 2710 0, 2711 MatLoad_SeqBAIJ, 2712 /* 84*/ 0, 2713 0, 2714 0, 2715 0, 2716 0, 2717 /* 89*/ 0, 2718 0, 2719 0, 2720 0, 2721 0, 2722 /* 94*/ 0, 2723 0, 2724 0, 2725 0, 2726 0, 2727 /* 99*/ 0, 2728 0, 2729 0, 2730 0, 2731 0, 2732 /*104*/ 0, 2733 MatRealPart_SeqBAIJ, 2734 MatImaginaryPart_SeqBAIJ, 2735 0, 2736 0, 2737 /*109*/ 0, 2738 0, 2739 0, 2740 0, 2741 MatMissingDiagonal_SeqBAIJ, 2742 /*114*/ 0, 2743 0, 2744 0, 2745 0, 2746 0, 2747 /*119*/ 0, 2748 0, 2749 MatMultHermitianTranspose_SeqBAIJ, 2750 MatMultHermitianTransposeAdd_SeqBAIJ, 2751 0, 2752 /*124*/ 0, 2753 0, 2754 MatInvertBlockDiagonal_SeqBAIJ, 2755 0, 2756 0, 2757 /*129*/ 0, 2758 0, 2759 0, 2760 0, 2761 0, 2762 /*134*/ 0, 2763 0, 2764 0, 2765 0, 2766 0, 2767 /*139*/ 0, 2768 0, 2769 0, 2770 MatFDColoringSetUp_SeqXAIJ, 2771 0, 2772 /*144*/MatCreateMPIMatConcatenateSeqMat_SeqBAIJ 2773 }; 2774 2775 #undef __FUNCT__ 2776 #define __FUNCT__ "MatStoreValues_SeqBAIJ" 2777 PetscErrorCode MatStoreValues_SeqBAIJ(Mat mat) 2778 { 2779 Mat_SeqBAIJ *aij = (Mat_SeqBAIJ*)mat->data; 2780 PetscInt nz = aij->i[aij->mbs]*aij->bs2; 2781 PetscErrorCode ierr; 2782 2783 PetscFunctionBegin; 2784 if (aij->nonew != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first"); 2785 2786 /* allocate space for values if not already there */ 2787 if (!aij->saved_values) { 2788 ierr = PetscMalloc1(nz+1,&aij->saved_values);CHKERRQ(ierr); 2789 ierr = PetscLogObjectMemory((PetscObject)mat,(nz+1)*sizeof(PetscScalar));CHKERRQ(ierr); 2790 } 2791 2792 /* copy values over */ 2793 ierr = PetscMemcpy(aij->saved_values,aij->a,nz*sizeof(PetscScalar));CHKERRQ(ierr); 2794 PetscFunctionReturn(0); 2795 } 2796 2797 #undef __FUNCT__ 2798 #define __FUNCT__ "MatRetrieveValues_SeqBAIJ" 2799 PetscErrorCode MatRetrieveValues_SeqBAIJ(Mat mat) 2800 { 2801 Mat_SeqBAIJ *aij = (Mat_SeqBAIJ*)mat->data; 2802 PetscErrorCode ierr; 2803 PetscInt nz = aij->i[aij->mbs]*aij->bs2; 2804 2805 PetscFunctionBegin; 2806 if (aij->nonew != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first"); 2807 if (!aij->saved_values) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatStoreValues(A);first"); 2808 2809 /* copy values over */ 2810 ierr = PetscMemcpy(aij->a,aij->saved_values,nz*sizeof(PetscScalar));CHKERRQ(ierr); 2811 PetscFunctionReturn(0); 2812 } 2813 2814 PETSC_EXTERN PetscErrorCode MatConvert_SeqBAIJ_SeqAIJ(Mat, MatType,MatReuse,Mat*); 2815 PETSC_EXTERN PetscErrorCode MatConvert_SeqBAIJ_SeqSBAIJ(Mat, MatType,MatReuse,Mat*); 2816 2817 #undef __FUNCT__ 2818 #define __FUNCT__ "MatSeqBAIJSetPreallocation_SeqBAIJ" 2819 PetscErrorCode MatSeqBAIJSetPreallocation_SeqBAIJ(Mat B,PetscInt bs,PetscInt nz,PetscInt *nnz) 2820 { 2821 Mat_SeqBAIJ *b; 2822 PetscErrorCode ierr; 2823 PetscInt i,mbs,nbs,bs2; 2824 PetscBool flg = PETSC_FALSE,skipallocation = PETSC_FALSE,realalloc = PETSC_FALSE; 2825 2826 PetscFunctionBegin; 2827 if (nz >= 0 || nnz) realalloc = PETSC_TRUE; 2828 if (nz == MAT_SKIP_ALLOCATION) { 2829 skipallocation = PETSC_TRUE; 2830 nz = 0; 2831 } 2832 2833 ierr = MatSetBlockSize(B,PetscAbs(bs));CHKERRQ(ierr); 2834 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 2835 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 2836 ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr); 2837 2838 B->preallocated = PETSC_TRUE; 2839 2840 mbs = B->rmap->n/bs; 2841 nbs = B->cmap->n/bs; 2842 bs2 = bs*bs; 2843 2844 if (mbs*bs!=B->rmap->n || nbs*bs!=B->cmap->n) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Number rows %D, cols %D must be divisible by blocksize %D",B->rmap->N,B->cmap->n,bs); 2845 2846 if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5; 2847 if (nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nz cannot be less than 0: value %D",nz); 2848 if (nnz) { 2849 for (i=0; i<mbs; i++) { 2850 if (nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be less than 0: local row %D value %D",i,nnz[i]); 2851 if (nnz[i] > nbs) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be greater than block row length: local row %D value %D rowlength %D",i,nnz[i],nbs); 2852 } 2853 } 2854 2855 b = (Mat_SeqBAIJ*)B->data; 2856 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)B),NULL,"Optimize options for SEQBAIJ matrix 2 ","Mat");CHKERRQ(ierr); 2857 ierr = PetscOptionsBool("-mat_no_unroll","Do not optimize for block size (slow)",NULL,flg,&flg,NULL);CHKERRQ(ierr); 2858 ierr = PetscOptionsEnd();CHKERRQ(ierr); 2859 2860 if (!flg) { 2861 switch (bs) { 2862 case 1: 2863 B->ops->mult = MatMult_SeqBAIJ_1; 2864 B->ops->multadd = MatMultAdd_SeqBAIJ_1; 2865 break; 2866 case 2: 2867 B->ops->mult = MatMult_SeqBAIJ_2; 2868 B->ops->multadd = MatMultAdd_SeqBAIJ_2; 2869 break; 2870 case 3: 2871 B->ops->mult = MatMult_SeqBAIJ_3; 2872 B->ops->multadd = MatMultAdd_SeqBAIJ_3; 2873 break; 2874 case 4: 2875 B->ops->mult = MatMult_SeqBAIJ_4; 2876 B->ops->multadd = MatMultAdd_SeqBAIJ_4; 2877 break; 2878 case 5: 2879 B->ops->mult = MatMult_SeqBAIJ_5; 2880 B->ops->multadd = MatMultAdd_SeqBAIJ_5; 2881 break; 2882 case 6: 2883 B->ops->mult = MatMult_SeqBAIJ_6; 2884 B->ops->multadd = MatMultAdd_SeqBAIJ_6; 2885 break; 2886 case 7: 2887 B->ops->mult = MatMult_SeqBAIJ_7; 2888 B->ops->multadd = MatMultAdd_SeqBAIJ_7; 2889 break; 2890 case 15: 2891 B->ops->mult = MatMult_SeqBAIJ_15_ver1; 2892 B->ops->multadd = MatMultAdd_SeqBAIJ_N; 2893 break; 2894 default: 2895 B->ops->mult = MatMult_SeqBAIJ_N; 2896 B->ops->multadd = MatMultAdd_SeqBAIJ_N; 2897 break; 2898 } 2899 } 2900 B->ops->sor = MatSOR_SeqBAIJ; 2901 b->mbs = mbs; 2902 b->nbs = nbs; 2903 if (!skipallocation) { 2904 if (!b->imax) { 2905 ierr = PetscMalloc2(mbs,&b->imax,mbs,&b->ilen);CHKERRQ(ierr); 2906 ierr = PetscLogObjectMemory((PetscObject)B,2*mbs*sizeof(PetscInt));CHKERRQ(ierr); 2907 2908 b->free_imax_ilen = PETSC_TRUE; 2909 } 2910 /* b->ilen will count nonzeros in each block row so far. */ 2911 for (i=0; i<mbs; i++) b->ilen[i] = 0; 2912 if (!nnz) { 2913 if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5; 2914 else if (nz < 0) nz = 1; 2915 for (i=0; i<mbs; i++) b->imax[i] = nz; 2916 nz = nz*mbs; 2917 } else { 2918 nz = 0; 2919 for (i=0; i<mbs; i++) {b->imax[i] = nnz[i]; nz += nnz[i];} 2920 } 2921 2922 /* allocate the matrix space */ 2923 ierr = MatSeqXAIJFreeAIJ(B,&b->a,&b->j,&b->i);CHKERRQ(ierr); 2924 ierr = PetscMalloc3(bs2*nz,&b->a,nz,&b->j,B->rmap->N+1,&b->i);CHKERRQ(ierr); 2925 ierr = PetscLogObjectMemory((PetscObject)B,(B->rmap->N+1)*sizeof(PetscInt)+nz*(bs2*sizeof(PetscScalar)+sizeof(PetscInt)));CHKERRQ(ierr); 2926 ierr = PetscMemzero(b->a,nz*bs2*sizeof(MatScalar));CHKERRQ(ierr); 2927 ierr = PetscMemzero(b->j,nz*sizeof(PetscInt));CHKERRQ(ierr); 2928 2929 b->singlemalloc = PETSC_TRUE; 2930 b->i[0] = 0; 2931 for (i=1; i<mbs+1; i++) { 2932 b->i[i] = b->i[i-1] + b->imax[i-1]; 2933 } 2934 b->free_a = PETSC_TRUE; 2935 b->free_ij = PETSC_TRUE; 2936 } else { 2937 b->free_a = PETSC_FALSE; 2938 b->free_ij = PETSC_FALSE; 2939 } 2940 2941 b->bs2 = bs2; 2942 b->mbs = mbs; 2943 b->nz = 0; 2944 b->maxnz = nz; 2945 B->info.nz_unneeded = (PetscReal)b->maxnz*bs2; 2946 if (realalloc) {ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);} 2947 PetscFunctionReturn(0); 2948 } 2949 2950 #undef __FUNCT__ 2951 #define __FUNCT__ "MatSeqBAIJSetPreallocationCSR_SeqBAIJ" 2952 PetscErrorCode MatSeqBAIJSetPreallocationCSR_SeqBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[]) 2953 { 2954 PetscInt i,m,nz,nz_max=0,*nnz; 2955 PetscScalar *values=0; 2956 PetscBool roworiented = ((Mat_SeqBAIJ*)B->data)->roworiented; 2957 PetscErrorCode ierr; 2958 2959 PetscFunctionBegin; 2960 if (bs < 1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive but it is %D",bs); 2961 ierr = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr); 2962 ierr = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr); 2963 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 2964 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 2965 ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr); 2966 m = B->rmap->n/bs; 2967 2968 if (ii[0] != 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE, "ii[0] must be 0 but it is %D",ii[0]); 2969 ierr = PetscMalloc1(m+1, &nnz);CHKERRQ(ierr); 2970 for (i=0; i<m; i++) { 2971 nz = ii[i+1]- ii[i]; 2972 if (nz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE, "Local row %D has a negative number of columns %D",i,nz); 2973 nz_max = PetscMax(nz_max, nz); 2974 nnz[i] = nz; 2975 } 2976 ierr = MatSeqBAIJSetPreallocation(B,bs,0,nnz);CHKERRQ(ierr); 2977 ierr = PetscFree(nnz);CHKERRQ(ierr); 2978 2979 values = (PetscScalar*)V; 2980 if (!values) { 2981 ierr = PetscCalloc1(bs*bs*(nz_max+1),&values);CHKERRQ(ierr); 2982 } 2983 for (i=0; i<m; i++) { 2984 PetscInt ncols = ii[i+1] - ii[i]; 2985 const PetscInt *icols = jj + ii[i]; 2986 const PetscScalar *svals = values + (V ? (bs*bs*ii[i]) : 0); 2987 if (!roworiented) { 2988 ierr = MatSetValuesBlocked_SeqBAIJ(B,1,&i,ncols,icols,svals,INSERT_VALUES);CHKERRQ(ierr); 2989 } else { 2990 PetscInt j; 2991 for (j=0; j<ncols; j++) { 2992 const PetscScalar *svals = values + (V ? (bs*bs*(ii[i]+j)) : 0); 2993 ierr = MatSetValuesBlocked_SeqBAIJ(B,1,&i,1,&icols[j],svals,INSERT_VALUES);CHKERRQ(ierr); 2994 } 2995 } 2996 } 2997 if (!V) { ierr = PetscFree(values);CHKERRQ(ierr); } 2998 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2999 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3000 ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 3001 PetscFunctionReturn(0); 3002 } 3003 3004 /*MC 3005 MATSEQBAIJ - MATSEQBAIJ = "seqbaij" - A matrix type to be used for sequential block sparse matrices, based on 3006 block sparse compressed row format. 3007 3008 Options Database Keys: 3009 . -mat_type seqbaij - sets the matrix type to "seqbaij" during a call to MatSetFromOptions() 3010 3011 Level: beginner 3012 3013 .seealso: MatCreateSeqBAIJ() 3014 M*/ 3015 3016 PETSC_EXTERN PetscErrorCode MatConvert_SeqBAIJ_SeqBSTRM(Mat, MatType,MatReuse,Mat*); 3017 3018 #undef __FUNCT__ 3019 #define __FUNCT__ "MatCreate_SeqBAIJ" 3020 PETSC_EXTERN PetscErrorCode MatCreate_SeqBAIJ(Mat B) 3021 { 3022 PetscErrorCode ierr; 3023 PetscMPIInt size; 3024 Mat_SeqBAIJ *b; 3025 3026 PetscFunctionBegin; 3027 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRQ(ierr); 3028 if (size > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Comm must be of size 1"); 3029 3030 ierr = PetscNewLog(B,&b);CHKERRQ(ierr); 3031 B->data = (void*)b; 3032 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 3033 3034 b->row = 0; 3035 b->col = 0; 3036 b->icol = 0; 3037 b->reallocs = 0; 3038 b->saved_values = 0; 3039 3040 b->roworiented = PETSC_TRUE; 3041 b->nonew = 0; 3042 b->diag = 0; 3043 B->spptr = 0; 3044 B->info.nz_unneeded = (PetscReal)b->maxnz*b->bs2; 3045 b->keepnonzeropattern = PETSC_FALSE; 3046 3047 ierr = PetscObjectComposeFunction((PetscObject)B,"MatInvertBlockDiagonal_C",MatInvertBlockDiagonal_SeqBAIJ);CHKERRQ(ierr); 3048 ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_SeqBAIJ);CHKERRQ(ierr); 3049 ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_SeqBAIJ);CHKERRQ(ierr); 3050 ierr = PetscObjectComposeFunction((PetscObject)B,"MatSeqBAIJSetColumnIndices_C",MatSeqBAIJSetColumnIndices_SeqBAIJ);CHKERRQ(ierr); 3051 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqbaij_seqaij_C",MatConvert_SeqBAIJ_SeqAIJ);CHKERRQ(ierr); 3052 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqbaij_seqsbaij_C",MatConvert_SeqBAIJ_SeqSBAIJ);CHKERRQ(ierr); 3053 ierr = PetscObjectComposeFunction((PetscObject)B,"MatSeqBAIJSetPreallocation_C",MatSeqBAIJSetPreallocation_SeqBAIJ);CHKERRQ(ierr); 3054 ierr = PetscObjectComposeFunction((PetscObject)B,"MatSeqBAIJSetPreallocationCSR_C",MatSeqBAIJSetPreallocationCSR_SeqBAIJ);CHKERRQ(ierr); 3055 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqbaij_seqbstrm_C",MatConvert_SeqBAIJ_SeqBSTRM);CHKERRQ(ierr); 3056 ierr = PetscObjectComposeFunction((PetscObject)B,"MatIsTranspose_C",MatIsTranspose_SeqBAIJ);CHKERRQ(ierr); 3057 ierr = PetscObjectChangeTypeName((PetscObject)B,MATSEQBAIJ);CHKERRQ(ierr); 3058 PetscFunctionReturn(0); 3059 } 3060 3061 #undef __FUNCT__ 3062 #define __FUNCT__ "MatDuplicateNoCreate_SeqBAIJ" 3063 PetscErrorCode MatDuplicateNoCreate_SeqBAIJ(Mat C,Mat A,MatDuplicateOption cpvalues,PetscBool mallocmatspace) 3064 { 3065 Mat_SeqBAIJ *c = (Mat_SeqBAIJ*)C->data,*a = (Mat_SeqBAIJ*)A->data; 3066 PetscErrorCode ierr; 3067 PetscInt i,mbs = a->mbs,nz = a->nz,bs2 = a->bs2; 3068 3069 PetscFunctionBegin; 3070 if (a->i[mbs] != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Corrupt matrix"); 3071 3072 if (cpvalues == MAT_SHARE_NONZERO_PATTERN) { 3073 c->imax = a->imax; 3074 c->ilen = a->ilen; 3075 c->free_imax_ilen = PETSC_FALSE; 3076 } else { 3077 ierr = PetscMalloc2(mbs,&c->imax,mbs,&c->ilen);CHKERRQ(ierr); 3078 ierr = PetscLogObjectMemory((PetscObject)C,2*mbs*sizeof(PetscInt));CHKERRQ(ierr); 3079 for (i=0; i<mbs; i++) { 3080 c->imax[i] = a->imax[i]; 3081 c->ilen[i] = a->ilen[i]; 3082 } 3083 c->free_imax_ilen = PETSC_TRUE; 3084 } 3085 3086 /* allocate the matrix space */ 3087 if (mallocmatspace) { 3088 if (cpvalues == MAT_SHARE_NONZERO_PATTERN) { 3089 ierr = PetscCalloc1(bs2*nz,&c->a);CHKERRQ(ierr); 3090 ierr = PetscLogObjectMemory((PetscObject)C,a->i[mbs]*bs2*sizeof(PetscScalar));CHKERRQ(ierr); 3091 3092 c->i = a->i; 3093 c->j = a->j; 3094 c->singlemalloc = PETSC_FALSE; 3095 c->free_a = PETSC_TRUE; 3096 c->free_ij = PETSC_FALSE; 3097 c->parent = A; 3098 C->preallocated = PETSC_TRUE; 3099 C->assembled = PETSC_TRUE; 3100 3101 ierr = PetscObjectReference((PetscObject)A);CHKERRQ(ierr); 3102 ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 3103 ierr = MatSetOption(C,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 3104 } else { 3105 ierr = PetscMalloc3(bs2*nz,&c->a,nz,&c->j,mbs+1,&c->i);CHKERRQ(ierr); 3106 ierr = PetscLogObjectMemory((PetscObject)C,a->i[mbs]*(bs2*sizeof(PetscScalar)+sizeof(PetscInt))+(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 3107 3108 c->singlemalloc = PETSC_TRUE; 3109 c->free_a = PETSC_TRUE; 3110 c->free_ij = PETSC_TRUE; 3111 3112 ierr = PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 3113 if (mbs > 0) { 3114 ierr = PetscMemcpy(c->j,a->j,nz*sizeof(PetscInt));CHKERRQ(ierr); 3115 if (cpvalues == MAT_COPY_VALUES) { 3116 ierr = PetscMemcpy(c->a,a->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr); 3117 } else { 3118 ierr = PetscMemzero(c->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr); 3119 } 3120 } 3121 C->preallocated = PETSC_TRUE; 3122 C->assembled = PETSC_TRUE; 3123 } 3124 } 3125 3126 c->roworiented = a->roworiented; 3127 c->nonew = a->nonew; 3128 3129 ierr = PetscLayoutReference(A->rmap,&C->rmap);CHKERRQ(ierr); 3130 ierr = PetscLayoutReference(A->cmap,&C->cmap);CHKERRQ(ierr); 3131 3132 c->bs2 = a->bs2; 3133 c->mbs = a->mbs; 3134 c->nbs = a->nbs; 3135 3136 if (a->diag) { 3137 if (cpvalues == MAT_SHARE_NONZERO_PATTERN) { 3138 c->diag = a->diag; 3139 c->free_diag = PETSC_FALSE; 3140 } else { 3141 ierr = PetscMalloc1(mbs+1,&c->diag);CHKERRQ(ierr); 3142 ierr = PetscLogObjectMemory((PetscObject)C,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 3143 for (i=0; i<mbs; i++) c->diag[i] = a->diag[i]; 3144 c->free_diag = PETSC_TRUE; 3145 } 3146 } else c->diag = 0; 3147 3148 c->nz = a->nz; 3149 c->maxnz = a->nz; /* Since we allocate exactly the right amount */ 3150 c->solve_work = NULL; 3151 c->mult_work = NULL; 3152 c->sor_workt = NULL; 3153 c->sor_work = NULL; 3154 3155 c->compressedrow.use = a->compressedrow.use; 3156 c->compressedrow.nrows = a->compressedrow.nrows; 3157 if (a->compressedrow.use) { 3158 i = a->compressedrow.nrows; 3159 ierr = PetscMalloc2(i+1,&c->compressedrow.i,i+1,&c->compressedrow.rindex);CHKERRQ(ierr); 3160 ierr = PetscLogObjectMemory((PetscObject)C,(2*i+1)*sizeof(PetscInt));CHKERRQ(ierr); 3161 ierr = PetscMemcpy(c->compressedrow.i,a->compressedrow.i,(i+1)*sizeof(PetscInt));CHKERRQ(ierr); 3162 ierr = PetscMemcpy(c->compressedrow.rindex,a->compressedrow.rindex,i*sizeof(PetscInt));CHKERRQ(ierr); 3163 } else { 3164 c->compressedrow.use = PETSC_FALSE; 3165 c->compressedrow.i = NULL; 3166 c->compressedrow.rindex = NULL; 3167 } 3168 C->nonzerostate = A->nonzerostate; 3169 3170 ierr = PetscFunctionListDuplicate(((PetscObject)A)->qlist,&((PetscObject)C)->qlist);CHKERRQ(ierr); 3171 ierr = PetscMemcpy(C->ops,A->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 3172 PetscFunctionReturn(0); 3173 } 3174 3175 #undef __FUNCT__ 3176 #define __FUNCT__ "MatDuplicate_SeqBAIJ" 3177 PetscErrorCode MatDuplicate_SeqBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B) 3178 { 3179 PetscErrorCode ierr; 3180 3181 PetscFunctionBegin; 3182 ierr = MatCreate(PetscObjectComm((PetscObject)A),B);CHKERRQ(ierr); 3183 ierr = MatSetSizes(*B,A->rmap->N,A->cmap->n,A->rmap->N,A->cmap->n);CHKERRQ(ierr); 3184 ierr = MatSetType(*B,MATSEQBAIJ);CHKERRQ(ierr); 3185 ierr = MatDuplicateNoCreate_SeqBAIJ(*B,A,cpvalues,PETSC_TRUE);CHKERRQ(ierr); 3186 PetscFunctionReturn(0); 3187 } 3188 3189 #undef __FUNCT__ 3190 #define __FUNCT__ "MatLoad_SeqBAIJ" 3191 PetscErrorCode MatLoad_SeqBAIJ(Mat newmat,PetscViewer viewer) 3192 { 3193 Mat_SeqBAIJ *a; 3194 PetscErrorCode ierr; 3195 PetscInt i,nz,header[4],*rowlengths=0,M,N,bs = newmat->rmap->bs; 3196 PetscInt *mask,mbs,*jj,j,rowcount,nzcount,k,*browlengths,maskcount; 3197 PetscInt kmax,jcount,block,idx,point,nzcountb,extra_rows,rows,cols; 3198 PetscInt *masked,nmask,tmp,bs2,ishift; 3199 PetscMPIInt size; 3200 int fd; 3201 PetscScalar *aa; 3202 MPI_Comm comm; 3203 3204 PetscFunctionBegin; 3205 /* force binary viewer to load .info file if it has not yet done so */ 3206 ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr); 3207 ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr); 3208 ierr = PetscOptionsBegin(comm,NULL,"Options for loading SEQBAIJ matrix","Mat");CHKERRQ(ierr); 3209 ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,NULL);CHKERRQ(ierr); 3210 ierr = PetscOptionsEnd();CHKERRQ(ierr); 3211 if (bs < 0) bs = 1; 3212 bs2 = bs*bs; 3213 3214 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3215 if (size > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"view must have one processor"); 3216 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 3217 ierr = PetscBinaryRead(fd,header,4,PETSC_INT);CHKERRQ(ierr); 3218 if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not Mat object"); 3219 M = header[1]; N = header[2]; nz = header[3]; 3220 3221 if (header[3] < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as SeqBAIJ"); 3222 if (M != N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Can only do square matrices"); 3223 3224 /* 3225 This code adds extra rows to make sure the number of rows is 3226 divisible by the blocksize 3227 */ 3228 mbs = M/bs; 3229 extra_rows = bs - M + bs*(mbs); 3230 if (extra_rows == bs) extra_rows = 0; 3231 else mbs++; 3232 if (extra_rows) { 3233 ierr = PetscInfo(viewer,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr); 3234 } 3235 3236 /* Set global sizes if not already set */ 3237 if (newmat->rmap->n < 0 && newmat->rmap->N < 0 && newmat->cmap->n < 0 && newmat->cmap->N < 0) { 3238 ierr = MatSetSizes(newmat,PETSC_DECIDE,PETSC_DECIDE,M+extra_rows,N+extra_rows);CHKERRQ(ierr); 3239 } else { /* Check if the matrix global sizes are correct */ 3240 ierr = MatGetSize(newmat,&rows,&cols);CHKERRQ(ierr); 3241 if (rows < 0 && cols < 0) { /* user might provide local size instead of global size */ 3242 ierr = MatGetLocalSize(newmat,&rows,&cols);CHKERRQ(ierr); 3243 } 3244 if (M != rows || N != cols) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix in file of different length (%d, %d) than the input matrix (%d, %d)",M,N,rows,cols); 3245 } 3246 3247 /* read in row lengths */ 3248 ierr = PetscMalloc1(M+extra_rows,&rowlengths);CHKERRQ(ierr); 3249 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 3250 for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1; 3251 3252 /* read in column indices */ 3253 ierr = PetscMalloc1(nz+extra_rows,&jj);CHKERRQ(ierr); 3254 ierr = PetscBinaryRead(fd,jj,nz,PETSC_INT);CHKERRQ(ierr); 3255 for (i=0; i<extra_rows; i++) jj[nz+i] = M+i; 3256 3257 /* loop over row lengths determining block row lengths */ 3258 ierr = PetscCalloc1(mbs,&browlengths);CHKERRQ(ierr); 3259 ierr = PetscMalloc2(mbs,&mask,mbs,&masked);CHKERRQ(ierr); 3260 ierr = PetscMemzero(mask,mbs*sizeof(PetscInt));CHKERRQ(ierr); 3261 rowcount = 0; 3262 nzcount = 0; 3263 for (i=0; i<mbs; i++) { 3264 nmask = 0; 3265 for (j=0; j<bs; j++) { 3266 kmax = rowlengths[rowcount]; 3267 for (k=0; k<kmax; k++) { 3268 tmp = jj[nzcount++]/bs; 3269 if (!mask[tmp]) {masked[nmask++] = tmp; mask[tmp] = 1;} 3270 } 3271 rowcount++; 3272 } 3273 browlengths[i] += nmask; 3274 /* zero out the mask elements we set */ 3275 for (j=0; j<nmask; j++) mask[masked[j]] = 0; 3276 } 3277 3278 /* Do preallocation */ 3279 ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(newmat,bs,0,browlengths);CHKERRQ(ierr); 3280 a = (Mat_SeqBAIJ*)newmat->data; 3281 3282 /* set matrix "i" values */ 3283 a->i[0] = 0; 3284 for (i=1; i<= mbs; i++) { 3285 a->i[i] = a->i[i-1] + browlengths[i-1]; 3286 a->ilen[i-1] = browlengths[i-1]; 3287 } 3288 a->nz = 0; 3289 for (i=0; i<mbs; i++) a->nz += browlengths[i]; 3290 3291 /* read in nonzero values */ 3292 ierr = PetscMalloc1(nz+extra_rows,&aa);CHKERRQ(ierr); 3293 ierr = PetscBinaryRead(fd,aa,nz,PETSC_SCALAR);CHKERRQ(ierr); 3294 for (i=0; i<extra_rows; i++) aa[nz+i] = 1.0; 3295 3296 /* set "a" and "j" values into matrix */ 3297 nzcount = 0; jcount = 0; 3298 for (i=0; i<mbs; i++) { 3299 nzcountb = nzcount; 3300 nmask = 0; 3301 for (j=0; j<bs; j++) { 3302 kmax = rowlengths[i*bs+j]; 3303 for (k=0; k<kmax; k++) { 3304 tmp = jj[nzcount++]/bs; 3305 if (!mask[tmp]) { masked[nmask++] = tmp; mask[tmp] = 1;} 3306 } 3307 } 3308 /* sort the masked values */ 3309 ierr = PetscSortInt(nmask,masked);CHKERRQ(ierr); 3310 3311 /* set "j" values into matrix */ 3312 maskcount = 1; 3313 for (j=0; j<nmask; j++) { 3314 a->j[jcount++] = masked[j]; 3315 mask[masked[j]] = maskcount++; 3316 } 3317 /* set "a" values into matrix */ 3318 ishift = bs2*a->i[i]; 3319 for (j=0; j<bs; j++) { 3320 kmax = rowlengths[i*bs+j]; 3321 for (k=0; k<kmax; k++) { 3322 tmp = jj[nzcountb]/bs; 3323 block = mask[tmp] - 1; 3324 point = jj[nzcountb] - bs*tmp; 3325 idx = ishift + bs2*block + j + bs*point; 3326 a->a[idx] = (MatScalar)aa[nzcountb++]; 3327 } 3328 } 3329 /* zero out the mask elements we set */ 3330 for (j=0; j<nmask; j++) mask[masked[j]] = 0; 3331 } 3332 if (jcount != a->nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Bad binary matrix"); 3333 3334 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 3335 ierr = PetscFree(browlengths);CHKERRQ(ierr); 3336 ierr = PetscFree(aa);CHKERRQ(ierr); 3337 ierr = PetscFree(jj);CHKERRQ(ierr); 3338 ierr = PetscFree2(mask,masked);CHKERRQ(ierr); 3339 3340 ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3341 ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3342 PetscFunctionReturn(0); 3343 } 3344 3345 #undef __FUNCT__ 3346 #define __FUNCT__ "MatCreateSeqBAIJ" 3347 /*@C 3348 MatCreateSeqBAIJ - Creates a sparse matrix in block AIJ (block 3349 compressed row) format. For good matrix assembly performance the 3350 user should preallocate the matrix storage by setting the parameter nz 3351 (or the array nnz). By setting these parameters accurately, performance 3352 during matrix assembly can be increased by more than a factor of 50. 3353 3354 Collective on MPI_Comm 3355 3356 Input Parameters: 3357 + comm - MPI communicator, set to PETSC_COMM_SELF 3358 . bs - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row 3359 blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs() 3360 . m - number of rows 3361 . n - number of columns 3362 . nz - number of nonzero blocks per block row (same for all rows) 3363 - nnz - array containing the number of nonzero blocks in the various block rows 3364 (possibly different for each block row) or NULL 3365 3366 Output Parameter: 3367 . A - the matrix 3368 3369 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 3370 MatXXXXSetPreallocation() paradgm instead of this routine directly. 3371 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 3372 3373 Options Database Keys: 3374 . -mat_no_unroll - uses code that does not unroll the loops in the 3375 block calculations (much slower) 3376 . -mat_block_size - size of the blocks to use 3377 3378 Level: intermediate 3379 3380 Notes: 3381 The number of rows and columns must be divisible by blocksize. 3382 3383 If the nnz parameter is given then the nz parameter is ignored 3384 3385 A nonzero block is any block that as 1 or more nonzeros in it 3386 3387 The block AIJ format is fully compatible with standard Fortran 77 3388 storage. That is, the stored row and column indices can begin at 3389 either one (as in Fortran) or zero. See the users' manual for details. 3390 3391 Specify the preallocated storage with either nz or nnz (not both). 3392 Set nz=PETSC_DEFAULT and nnz=NULL for PETSc to control dynamic memory 3393 allocation. See Users-Manual: ch_mat for details. 3394 matrices. 3395 3396 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateBAIJ() 3397 @*/ 3398 PetscErrorCode MatCreateSeqBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A) 3399 { 3400 PetscErrorCode ierr; 3401 3402 PetscFunctionBegin; 3403 ierr = MatCreate(comm,A);CHKERRQ(ierr); 3404 ierr = MatSetSizes(*A,m,n,m,n);CHKERRQ(ierr); 3405 ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr); 3406 ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(*A,bs,nz,(PetscInt*)nnz);CHKERRQ(ierr); 3407 PetscFunctionReturn(0); 3408 } 3409 3410 #undef __FUNCT__ 3411 #define __FUNCT__ "MatSeqBAIJSetPreallocation" 3412 /*@C 3413 MatSeqBAIJSetPreallocation - Sets the block size and expected nonzeros 3414 per row in the matrix. For good matrix assembly performance the 3415 user should preallocate the matrix storage by setting the parameter nz 3416 (or the array nnz). By setting these parameters accurately, performance 3417 during matrix assembly can be increased by more than a factor of 50. 3418 3419 Collective on MPI_Comm 3420 3421 Input Parameters: 3422 + B - the matrix 3423 . bs - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row 3424 blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs() 3425 . nz - number of block nonzeros per block row (same for all rows) 3426 - nnz - array containing the number of block nonzeros in the various block rows 3427 (possibly different for each block row) or NULL 3428 3429 Options Database Keys: 3430 . -mat_no_unroll - uses code that does not unroll the loops in the 3431 block calculations (much slower) 3432 . -mat_block_size - size of the blocks to use 3433 3434 Level: intermediate 3435 3436 Notes: 3437 If the nnz parameter is given then the nz parameter is ignored 3438 3439 You can call MatGetInfo() to get information on how effective the preallocation was; 3440 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 3441 You can also run with the option -info and look for messages with the string 3442 malloc in them to see if additional memory allocation was needed. 3443 3444 The block AIJ format is fully compatible with standard Fortran 77 3445 storage. That is, the stored row and column indices can begin at 3446 either one (as in Fortran) or zero. See the users' manual for details. 3447 3448 Specify the preallocated storage with either nz or nnz (not both). 3449 Set nz=PETSC_DEFAULT and nnz=NULL for PETSc to control dynamic memory 3450 allocation. See Users-Manual: ch_mat for details. 3451 3452 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateBAIJ(), MatGetInfo() 3453 @*/ 3454 PetscErrorCode MatSeqBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt nz,const PetscInt nnz[]) 3455 { 3456 PetscErrorCode ierr; 3457 3458 PetscFunctionBegin; 3459 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 3460 PetscValidType(B,1); 3461 PetscValidLogicalCollectiveInt(B,bs,2); 3462 ierr = PetscTryMethod(B,"MatSeqBAIJSetPreallocation_C",(Mat,PetscInt,PetscInt,const PetscInt[]),(B,bs,nz,nnz));CHKERRQ(ierr); 3463 PetscFunctionReturn(0); 3464 } 3465 3466 #undef __FUNCT__ 3467 #define __FUNCT__ "MatSeqBAIJSetPreallocationCSR" 3468 /*@C 3469 MatSeqBAIJSetPreallocationCSR - Allocates memory for a sparse sequential matrix in AIJ format 3470 (the default sequential PETSc format). 3471 3472 Collective on MPI_Comm 3473 3474 Input Parameters: 3475 + B - the matrix 3476 . i - the indices into j for the start of each local row (starts with zero) 3477 . j - the column indices for each local row (starts with zero) these must be sorted for each row 3478 - v - optional values in the matrix 3479 3480 Level: developer 3481 3482 Notes: 3483 The order of the entries in values is specified by the MatOption MAT_ROW_ORIENTED. For example, C programs 3484 may want to use the default MAT_ROW_ORIENTED=PETSC_TRUE and use an array v[nnz][bs][bs] where the second index is 3485 over rows within a block and the last index is over columns within a block row. Fortran programs will likely set 3486 MAT_ROW_ORIENTED=PETSC_FALSE and use a Fortran array v(bs,bs,nnz) in which the first index is over rows within a 3487 block column and the second index is over columns within a block. 3488 3489 .keywords: matrix, aij, compressed row, sparse 3490 3491 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatSeqBAIJSetPreallocation(), MATSEQBAIJ 3492 @*/ 3493 PetscErrorCode MatSeqBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 3494 { 3495 PetscErrorCode ierr; 3496 3497 PetscFunctionBegin; 3498 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 3499 PetscValidType(B,1); 3500 PetscValidLogicalCollectiveInt(B,bs,2); 3501 ierr = PetscTryMethod(B,"MatSeqBAIJSetPreallocationCSR_C",(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,bs,i,j,v));CHKERRQ(ierr); 3502 PetscFunctionReturn(0); 3503 } 3504 3505 3506 #undef __FUNCT__ 3507 #define __FUNCT__ "MatCreateSeqBAIJWithArrays" 3508 /*@ 3509 MatCreateSeqBAIJWithArrays - Creates an sequential BAIJ matrix using matrix elements provided by the user. 3510 3511 Collective on MPI_Comm 3512 3513 Input Parameters: 3514 + comm - must be an MPI communicator of size 1 3515 . bs - size of block 3516 . m - number of rows 3517 . n - number of columns 3518 . i - row indices 3519 . j - column indices 3520 - a - matrix values 3521 3522 Output Parameter: 3523 . mat - the matrix 3524 3525 Level: advanced 3526 3527 Notes: 3528 The i, j, and a arrays are not copied by this routine, the user must free these arrays 3529 once the matrix is destroyed 3530 3531 You cannot set new nonzero locations into this matrix, that will generate an error. 3532 3533 The i and j indices are 0 based 3534 3535 When block size is greater than 1 the matrix values must be stored using the BAIJ storage format (see the BAIJ code to determine this). 3536 3537 The order of the entries in values is the same as the block compressed sparse row storage format; that is, it is 3538 the same as a three dimensional array in Fortran values(bs,bs,nnz) that contains the first column of the first 3539 block, followed by the second column of the first block etc etc. That is, the blocks are contiguous in memory 3540 with column-major ordering within blocks. 3541 3542 .seealso: MatCreate(), MatCreateBAIJ(), MatCreateSeqBAIJ() 3543 3544 @*/ 3545 PetscErrorCode MatCreateSeqBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt *i,PetscInt *j,PetscScalar *a,Mat *mat) 3546 { 3547 PetscErrorCode ierr; 3548 PetscInt ii; 3549 Mat_SeqBAIJ *baij; 3550 3551 PetscFunctionBegin; 3552 if (bs != 1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"block size %D > 1 is not supported yet",bs); 3553 if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 3554 3555 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 3556 ierr = MatSetSizes(*mat,m,n,m,n);CHKERRQ(ierr); 3557 ierr = MatSetType(*mat,MATSEQBAIJ);CHKERRQ(ierr); 3558 ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(*mat,bs,MAT_SKIP_ALLOCATION,0);CHKERRQ(ierr); 3559 baij = (Mat_SeqBAIJ*)(*mat)->data; 3560 ierr = PetscMalloc2(m,&baij->imax,m,&baij->ilen);CHKERRQ(ierr); 3561 ierr = PetscLogObjectMemory((PetscObject)*mat,2*m*sizeof(PetscInt));CHKERRQ(ierr); 3562 3563 baij->i = i; 3564 baij->j = j; 3565 baij->a = a; 3566 3567 baij->singlemalloc = PETSC_FALSE; 3568 baij->nonew = -1; /*this indicates that inserting a new value in the matrix that generates a new nonzero is an error*/ 3569 baij->free_a = PETSC_FALSE; 3570 baij->free_ij = PETSC_FALSE; 3571 3572 for (ii=0; ii<m; ii++) { 3573 baij->ilen[ii] = baij->imax[ii] = i[ii+1] - i[ii]; 3574 #if defined(PETSC_USE_DEBUG) 3575 if (i[ii+1] - i[ii] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row length in i (row indices) row = %d length = %d",ii,i[ii+1] - i[ii]); 3576 #endif 3577 } 3578 #if defined(PETSC_USE_DEBUG) 3579 for (ii=0; ii<baij->i[m]; ii++) { 3580 if (j[ii] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column index at location = %d index = %d",ii,j[ii]); 3581 if (j[ii] > n - 1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column index to large at location = %d index = %d",ii,j[ii]); 3582 } 3583 #endif 3584 3585 ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3586 ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3587 PetscFunctionReturn(0); 3588 } 3589 3590 #undef __FUNCT__ 3591 #define __FUNCT__ "MatCreateMPIMatConcatenateSeqMat_SeqBAIJ" 3592 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_SeqBAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat) 3593 { 3594 PetscErrorCode ierr; 3595 3596 PetscFunctionBegin; 3597 ierr = MatCreateMPIMatConcatenateSeqMat_MPIBAIJ(comm,inmat,n,scall,outmat);CHKERRQ(ierr); 3598 PetscFunctionReturn(0); 3599 } 3600