1 2 /* 3 Defines the basic matrix operations for the BAIJ (compressed row) 4 matrix storage format. 5 */ 6 #include <../src/mat/impls/baij/seq/baij.h> /*I "petscmat.h" I*/ 7 #include <petscblaslapack.h> 8 #include <petsc/private/kernels/blockinvert.h> 9 #include <petsc/private/kernels/blockmatmult.h> 10 11 #undef __FUNCT__ 12 #define __FUNCT__ "MatInvertBlockDiagonal_SeqBAIJ" 13 PetscErrorCode MatInvertBlockDiagonal_SeqBAIJ(Mat A,const PetscScalar **values) 14 { 15 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*) A->data; 16 PetscErrorCode ierr; 17 PetscInt *diag_offset,i,bs = A->rmap->bs,mbs = a->mbs,ipvt[5],bs2 = bs*bs,*v_pivots; 18 MatScalar *v = a->a,*odiag,*diag,*mdiag,work[25],*v_work; 19 PetscReal shift = 0.0; 20 PetscBool allowzeropivot,zeropivotdetected=PETSC_FALSE; 21 22 PetscFunctionBegin; 23 allowzeropivot = PetscNot(A->erroriffailure); 24 25 if (a->idiagvalid) { 26 if (values) *values = a->idiag; 27 PetscFunctionReturn(0); 28 } 29 ierr = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr); 30 diag_offset = a->diag; 31 if (!a->idiag) { 32 ierr = PetscMalloc1(2*bs2*mbs,&a->idiag);CHKERRQ(ierr); 33 ierr = PetscLogObjectMemory((PetscObject)A,2*bs2*mbs*sizeof(PetscScalar));CHKERRQ(ierr); 34 } 35 diag = a->idiag; 36 mdiag = a->idiag+bs2*mbs; 37 if (values) *values = a->idiag; 38 /* factor and invert each block */ 39 switch (bs) { 40 case 1: 41 for (i=0; i<mbs; i++) { 42 odiag = v + 1*diag_offset[i]; 43 diag[0] = odiag[0]; 44 mdiag[0] = odiag[0]; 45 46 if (PetscAbsScalar(diag[0] + shift) < PETSC_MACHINE_EPSILON) { 47 if (allowzeropivot) { 48 A->errortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 49 ierr = PetscInfo1(A,"Zero pivot, row %D\n",i);CHKERRQ(ierr); 50 } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot, row %D",i); 51 } 52 53 diag[0] = (PetscScalar)1.0 / (diag[0] + shift); 54 diag += 1; 55 mdiag += 1; 56 } 57 break; 58 case 2: 59 for (i=0; i<mbs; i++) { 60 odiag = v + 4*diag_offset[i]; 61 diag[0] = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3]; 62 mdiag[0] = odiag[0]; mdiag[1] = odiag[1]; mdiag[2] = odiag[2]; mdiag[3] = odiag[3]; 63 ierr = PetscKernel_A_gets_inverse_A_2(diag,shift,allowzeropivot,&zeropivotdetected);CHKERRQ(ierr); 64 if (zeropivotdetected) A->errortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 65 diag += 4; 66 mdiag += 4; 67 } 68 break; 69 case 3: 70 for (i=0; i<mbs; i++) { 71 odiag = v + 9*diag_offset[i]; 72 diag[0] = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3]; 73 diag[4] = odiag[4]; diag[5] = odiag[5]; diag[6] = odiag[6]; diag[7] = odiag[7]; 74 diag[8] = odiag[8]; 75 mdiag[0] = odiag[0]; mdiag[1] = odiag[1]; mdiag[2] = odiag[2]; mdiag[3] = odiag[3]; 76 mdiag[4] = odiag[4]; mdiag[5] = odiag[5]; mdiag[6] = odiag[6]; mdiag[7] = odiag[7]; 77 mdiag[8] = odiag[8]; 78 ierr = PetscKernel_A_gets_inverse_A_3(diag,shift,allowzeropivot,&zeropivotdetected);CHKERRQ(ierr); 79 if (zeropivotdetected) A->errortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 80 diag += 9; 81 mdiag += 9; 82 } 83 break; 84 case 4: 85 for (i=0; i<mbs; i++) { 86 odiag = v + 16*diag_offset[i]; 87 ierr = PetscMemcpy(diag,odiag,16*sizeof(PetscScalar));CHKERRQ(ierr); 88 ierr = PetscMemcpy(mdiag,odiag,16*sizeof(PetscScalar));CHKERRQ(ierr); 89 ierr = PetscKernel_A_gets_inverse_A_4(diag,shift,allowzeropivot,&zeropivotdetected);CHKERRQ(ierr); 90 if (zeropivotdetected) A->errortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 91 diag += 16; 92 mdiag += 16; 93 } 94 break; 95 case 5: 96 for (i=0; i<mbs; i++) { 97 odiag = v + 25*diag_offset[i]; 98 ierr = PetscMemcpy(diag,odiag,25*sizeof(PetscScalar));CHKERRQ(ierr); 99 ierr = PetscMemcpy(mdiag,odiag,25*sizeof(PetscScalar));CHKERRQ(ierr); 100 ierr = PetscKernel_A_gets_inverse_A_5(diag,ipvt,work,shift,allowzeropivot,&zeropivotdetected);CHKERRQ(ierr); 101 if (zeropivotdetected) A->errortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 102 diag += 25; 103 mdiag += 25; 104 } 105 break; 106 case 6: 107 for (i=0; i<mbs; i++) { 108 odiag = v + 36*diag_offset[i]; 109 ierr = PetscMemcpy(diag,odiag,36*sizeof(PetscScalar));CHKERRQ(ierr); 110 ierr = PetscMemcpy(mdiag,odiag,36*sizeof(PetscScalar));CHKERRQ(ierr); 111 ierr = PetscKernel_A_gets_inverse_A_6(diag,shift,allowzeropivot,&zeropivotdetected);CHKERRQ(ierr); 112 if (zeropivotdetected) A->errortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 113 diag += 36; 114 mdiag += 36; 115 } 116 break; 117 case 7: 118 for (i=0; i<mbs; i++) { 119 odiag = v + 49*diag_offset[i]; 120 ierr = PetscMemcpy(diag,odiag,49*sizeof(PetscScalar));CHKERRQ(ierr); 121 ierr = PetscMemcpy(mdiag,odiag,49*sizeof(PetscScalar));CHKERRQ(ierr); 122 ierr = PetscKernel_A_gets_inverse_A_7(diag,shift,allowzeropivot,&zeropivotdetected);CHKERRQ(ierr); 123 if (zeropivotdetected) A->errortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 124 diag += 49; 125 mdiag += 49; 126 } 127 break; 128 default: 129 ierr = PetscMalloc2(bs,&v_work,bs,&v_pivots);CHKERRQ(ierr); 130 for (i=0; i<mbs; i++) { 131 odiag = v + bs2*diag_offset[i]; 132 ierr = PetscMemcpy(diag,odiag,bs2*sizeof(PetscScalar));CHKERRQ(ierr); 133 ierr = PetscMemcpy(mdiag,odiag,bs2*sizeof(PetscScalar));CHKERRQ(ierr); 134 ierr = PetscKernel_A_gets_inverse_A(bs,diag,v_pivots,v_work);CHKERRQ(ierr); 135 diag += bs2; 136 mdiag += bs2; 137 } 138 ierr = PetscFree2(v_work,v_pivots);CHKERRQ(ierr); 139 } 140 a->idiagvalid = PETSC_TRUE; 141 PetscFunctionReturn(0); 142 } 143 144 #undef __FUNCT__ 145 #define __FUNCT__ "MatSOR_SeqBAIJ" 146 PetscErrorCode MatSOR_SeqBAIJ(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 147 { 148 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 149 PetscScalar *x,*work,*w,*workt,*t; 150 const MatScalar *v,*aa = a->a, *idiag; 151 const PetscScalar *b,*xb; 152 PetscScalar s[7], xw[7]={0}; /* avoid some compilers thinking xw is uninitialized */ 153 PetscErrorCode ierr; 154 PetscInt m = a->mbs,i,i2,nz,bs = A->rmap->bs,bs2 = bs*bs,k,j,idx,it; 155 const PetscInt *diag,*ai = a->i,*aj = a->j,*vi; 156 157 PetscFunctionBegin; 158 if (fshift == -1.0) fshift = 0.0; /* negative fshift indicates do not error on zero diagonal; this code never errors on zero diagonal */ 159 its = its*lits; 160 if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat"); 161 if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 162 if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 163 if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 164 if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts"); 165 166 if (!a->idiagvalid) {ierr = MatInvertBlockDiagonal(A,NULL);CHKERRQ(ierr);} 167 168 if (!m) PetscFunctionReturn(0); 169 diag = a->diag; 170 idiag = a->idiag; 171 k = PetscMax(A->rmap->n,A->cmap->n); 172 if (!a->mult_work) { 173 ierr = PetscMalloc1(k+1,&a->mult_work);CHKERRQ(ierr); 174 } 175 if (!a->sor_workt) { 176 ierr = PetscMalloc1(k,&a->sor_workt);CHKERRQ(ierr); 177 } 178 if (!a->sor_work) { 179 ierr = PetscMalloc1(bs,&a->sor_work);CHKERRQ(ierr); 180 } 181 work = a->mult_work; 182 t = a->sor_workt; 183 w = a->sor_work; 184 185 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 186 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 187 188 if (flag & SOR_ZERO_INITIAL_GUESS) { 189 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) { 190 switch (bs) { 191 case 1: 192 PetscKernel_v_gets_A_times_w_1(x,idiag,b); 193 t[0] = b[0]; 194 i2 = 1; 195 idiag += 1; 196 for (i=1; i<m; i++) { 197 v = aa + ai[i]; 198 vi = aj + ai[i]; 199 nz = diag[i] - ai[i]; 200 s[0] = b[i2]; 201 for (j=0; j<nz; j++) { 202 xw[0] = x[vi[j]]; 203 PetscKernel_v_gets_v_minus_A_times_w_1(s,(v+j),xw); 204 } 205 t[i2] = s[0]; 206 PetscKernel_v_gets_A_times_w_1(xw,idiag,s); 207 x[i2] = xw[0]; 208 idiag += 1; 209 i2 += 1; 210 } 211 break; 212 case 2: 213 PetscKernel_v_gets_A_times_w_2(x,idiag,b); 214 t[0] = b[0]; t[1] = b[1]; 215 i2 = 2; 216 idiag += 4; 217 for (i=1; i<m; i++) { 218 v = aa + 4*ai[i]; 219 vi = aj + ai[i]; 220 nz = diag[i] - ai[i]; 221 s[0] = b[i2]; s[1] = b[i2+1]; 222 for (j=0; j<nz; j++) { 223 idx = 2*vi[j]; 224 it = 4*j; 225 xw[0] = x[idx]; xw[1] = x[1+idx]; 226 PetscKernel_v_gets_v_minus_A_times_w_2(s,(v+it),xw); 227 } 228 t[i2] = s[0]; t[i2+1] = s[1]; 229 PetscKernel_v_gets_A_times_w_2(xw,idiag,s); 230 x[i2] = xw[0]; x[i2+1] = xw[1]; 231 idiag += 4; 232 i2 += 2; 233 } 234 break; 235 case 3: 236 PetscKernel_v_gets_A_times_w_3(x,idiag,b); 237 t[0] = b[0]; t[1] = b[1]; t[2] = b[2]; 238 i2 = 3; 239 idiag += 9; 240 for (i=1; i<m; i++) { 241 v = aa + 9*ai[i]; 242 vi = aj + ai[i]; 243 nz = diag[i] - ai[i]; 244 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; 245 while (nz--) { 246 idx = 3*(*vi++); 247 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 248 PetscKernel_v_gets_v_minus_A_times_w_3(s,v,xw); 249 v += 9; 250 } 251 t[i2] = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2]; 252 PetscKernel_v_gets_A_times_w_3(xw,idiag,s); 253 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; 254 idiag += 9; 255 i2 += 3; 256 } 257 break; 258 case 4: 259 PetscKernel_v_gets_A_times_w_4(x,idiag,b); 260 t[0] = b[0]; t[1] = b[1]; t[2] = b[2]; t[3] = b[3]; 261 i2 = 4; 262 idiag += 16; 263 for (i=1; i<m; i++) { 264 v = aa + 16*ai[i]; 265 vi = aj + ai[i]; 266 nz = diag[i] - ai[i]; 267 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; 268 while (nz--) { 269 idx = 4*(*vi++); 270 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; 271 PetscKernel_v_gets_v_minus_A_times_w_4(s,v,xw); 272 v += 16; 273 } 274 t[i2] = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2]; t[i2 + 3] = s[3]; 275 PetscKernel_v_gets_A_times_w_4(xw,idiag,s); 276 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; 277 idiag += 16; 278 i2 += 4; 279 } 280 break; 281 case 5: 282 PetscKernel_v_gets_A_times_w_5(x,idiag,b); 283 t[0] = b[0]; t[1] = b[1]; t[2] = b[2]; t[3] = b[3]; t[4] = b[4]; 284 i2 = 5; 285 idiag += 25; 286 for (i=1; i<m; i++) { 287 v = aa + 25*ai[i]; 288 vi = aj + ai[i]; 289 nz = diag[i] - ai[i]; 290 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4]; 291 while (nz--) { 292 idx = 5*(*vi++); 293 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; xw[4] = x[4+idx]; 294 PetscKernel_v_gets_v_minus_A_times_w_5(s,v,xw); 295 v += 25; 296 } 297 t[i2] = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2]; t[i2+3] = s[3]; t[i2+4] = s[4]; 298 PetscKernel_v_gets_A_times_w_5(xw,idiag,s); 299 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4]; 300 idiag += 25; 301 i2 += 5; 302 } 303 break; 304 case 6: 305 PetscKernel_v_gets_A_times_w_6(x,idiag,b); 306 t[0] = b[0]; t[1] = b[1]; t[2] = b[2]; t[3] = b[3]; t[4] = b[4]; t[5] = b[5]; 307 i2 = 6; 308 idiag += 36; 309 for (i=1; i<m; i++) { 310 v = aa + 36*ai[i]; 311 vi = aj + ai[i]; 312 nz = diag[i] - ai[i]; 313 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5]; 314 while (nz--) { 315 idx = 6*(*vi++); 316 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 317 xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; 318 PetscKernel_v_gets_v_minus_A_times_w_6(s,v,xw); 319 v += 36; 320 } 321 t[i2] = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2]; 322 t[i2+3] = s[3]; t[i2+4] = s[4]; t[i2+5] = s[5]; 323 PetscKernel_v_gets_A_times_w_6(xw,idiag,s); 324 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5]; 325 idiag += 36; 326 i2 += 6; 327 } 328 break; 329 case 7: 330 PetscKernel_v_gets_A_times_w_7(x,idiag,b); 331 t[0] = b[0]; t[1] = b[1]; t[2] = b[2]; 332 t[3] = b[3]; t[4] = b[4]; t[5] = b[5]; t[6] = b[6]; 333 i2 = 7; 334 idiag += 49; 335 for (i=1; i<m; i++) { 336 v = aa + 49*ai[i]; 337 vi = aj + ai[i]; 338 nz = diag[i] - ai[i]; 339 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; 340 s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5]; s[6] = b[i2+6]; 341 while (nz--) { 342 idx = 7*(*vi++); 343 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 344 xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; xw[6] = x[6+idx]; 345 PetscKernel_v_gets_v_minus_A_times_w_7(s,v,xw); 346 v += 49; 347 } 348 t[i2] = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2]; 349 t[i2+3] = s[3]; t[i2+4] = s[4]; t[i2+5] = s[5]; t[i2+6] = s[6]; 350 PetscKernel_v_gets_A_times_w_7(xw,idiag,s); 351 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; 352 x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5]; x[i2+6] = xw[6]; 353 idiag += 49; 354 i2 += 7; 355 } 356 break; 357 default: 358 PetscKernel_w_gets_Ar_times_v(bs,bs,b,idiag,x); 359 ierr = PetscMemcpy(t,b,bs*sizeof(PetscScalar));CHKERRQ(ierr); 360 i2 = bs; 361 idiag += bs2; 362 for (i=1; i<m; i++) { 363 v = aa + bs2*ai[i]; 364 vi = aj + ai[i]; 365 nz = diag[i] - ai[i]; 366 367 ierr = PetscMemcpy(w,b+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr); 368 /* copy all rows of x that are needed into contiguous space */ 369 workt = work; 370 for (j=0; j<nz; j++) { 371 ierr = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr); 372 workt += bs; 373 } 374 PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work); 375 ierr = PetscMemcpy(t+i2,w,bs*sizeof(PetscScalar));CHKERRQ(ierr); 376 PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2); 377 378 idiag += bs2; 379 i2 += bs; 380 } 381 break; 382 } 383 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 384 ierr = PetscLogFlops(1.0*bs2*a->nz);CHKERRQ(ierr); 385 xb = t; 386 } 387 else xb = b; 388 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 389 idiag = a->idiag+bs2*(a->mbs-1); 390 i2 = bs * (m-1); 391 switch (bs) { 392 case 1: 393 s[0] = xb[i2]; 394 PetscKernel_v_gets_A_times_w_1(xw,idiag,s); 395 x[i2] = xw[0]; 396 i2 -= 1; 397 for (i=m-2; i>=0; i--) { 398 v = aa + (diag[i]+1); 399 vi = aj + diag[i] + 1; 400 nz = ai[i+1] - diag[i] - 1; 401 s[0] = xb[i2]; 402 for (j=0; j<nz; j++) { 403 xw[0] = x[vi[j]]; 404 PetscKernel_v_gets_v_minus_A_times_w_1(s,(v+j),xw); 405 } 406 PetscKernel_v_gets_A_times_w_1(xw,idiag,s); 407 x[i2] = xw[0]; 408 idiag -= 1; 409 i2 -= 1; 410 } 411 break; 412 case 2: 413 s[0] = xb[i2]; s[1] = xb[i2+1]; 414 PetscKernel_v_gets_A_times_w_2(xw,idiag,s); 415 x[i2] = xw[0]; x[i2+1] = xw[1]; 416 i2 -= 2; 417 idiag -= 4; 418 for (i=m-2; i>=0; i--) { 419 v = aa + 4*(diag[i] + 1); 420 vi = aj + diag[i] + 1; 421 nz = ai[i+1] - diag[i] - 1; 422 s[0] = xb[i2]; s[1] = xb[i2+1]; 423 for (j=0; j<nz; j++) { 424 idx = 2*vi[j]; 425 it = 4*j; 426 xw[0] = x[idx]; xw[1] = x[1+idx]; 427 PetscKernel_v_gets_v_minus_A_times_w_2(s,(v+it),xw); 428 } 429 PetscKernel_v_gets_A_times_w_2(xw,idiag,s); 430 x[i2] = xw[0]; x[i2+1] = xw[1]; 431 idiag -= 4; 432 i2 -= 2; 433 } 434 break; 435 case 3: 436 s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; 437 PetscKernel_v_gets_A_times_w_3(xw,idiag,s); 438 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; 439 i2 -= 3; 440 idiag -= 9; 441 for (i=m-2; i>=0; i--) { 442 v = aa + 9*(diag[i]+1); 443 vi = aj + diag[i] + 1; 444 nz = ai[i+1] - diag[i] - 1; 445 s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; 446 while (nz--) { 447 idx = 3*(*vi++); 448 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 449 PetscKernel_v_gets_v_minus_A_times_w_3(s,v,xw); 450 v += 9; 451 } 452 PetscKernel_v_gets_A_times_w_3(xw,idiag,s); 453 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; 454 idiag -= 9; 455 i2 -= 3; 456 } 457 break; 458 case 4: 459 s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; 460 PetscKernel_v_gets_A_times_w_4(xw,idiag,s); 461 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; 462 i2 -= 4; 463 idiag -= 16; 464 for (i=m-2; i>=0; i--) { 465 v = aa + 16*(diag[i]+1); 466 vi = aj + diag[i] + 1; 467 nz = ai[i+1] - diag[i] - 1; 468 s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; 469 while (nz--) { 470 idx = 4*(*vi++); 471 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; 472 PetscKernel_v_gets_v_minus_A_times_w_4(s,v,xw); 473 v += 16; 474 } 475 PetscKernel_v_gets_A_times_w_4(xw,idiag,s); 476 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; 477 idiag -= 16; 478 i2 -= 4; 479 } 480 break; 481 case 5: 482 s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; s[4] = xb[i2+4]; 483 PetscKernel_v_gets_A_times_w_5(xw,idiag,s); 484 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4]; 485 i2 -= 5; 486 idiag -= 25; 487 for (i=m-2; i>=0; i--) { 488 v = aa + 25*(diag[i]+1); 489 vi = aj + diag[i] + 1; 490 nz = ai[i+1] - diag[i] - 1; 491 s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; s[4] = xb[i2+4]; 492 while (nz--) { 493 idx = 5*(*vi++); 494 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; xw[4] = x[4+idx]; 495 PetscKernel_v_gets_v_minus_A_times_w_5(s,v,xw); 496 v += 25; 497 } 498 PetscKernel_v_gets_A_times_w_5(xw,idiag,s); 499 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4]; 500 idiag -= 25; 501 i2 -= 5; 502 } 503 break; 504 case 6: 505 s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; s[4] = xb[i2+4]; s[5] = xb[i2+5]; 506 PetscKernel_v_gets_A_times_w_6(xw,idiag,s); 507 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5]; 508 i2 -= 6; 509 idiag -= 36; 510 for (i=m-2; i>=0; i--) { 511 v = aa + 36*(diag[i]+1); 512 vi = aj + diag[i] + 1; 513 nz = ai[i+1] - diag[i] - 1; 514 s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; s[4] = xb[i2+4]; s[5] = xb[i2+5]; 515 while (nz--) { 516 idx = 6*(*vi++); 517 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 518 xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; 519 PetscKernel_v_gets_v_minus_A_times_w_6(s,v,xw); 520 v += 36; 521 } 522 PetscKernel_v_gets_A_times_w_6(xw,idiag,s); 523 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5]; 524 idiag -= 36; 525 i2 -= 6; 526 } 527 break; 528 case 7: 529 s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; 530 s[3] = xb[i2+3]; s[4] = xb[i2+4]; s[5] = xb[i2+5]; s[6] = xb[i2+6]; 531 PetscKernel_v_gets_A_times_w_7(x,idiag,b); 532 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; 533 x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5]; x[i2+6] = xw[6]; 534 i2 -= 7; 535 idiag -= 49; 536 for (i=m-2; i>=0; i--) { 537 v = aa + 49*(diag[i]+1); 538 vi = aj + diag[i] + 1; 539 nz = ai[i+1] - diag[i] - 1; 540 s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; 541 s[3] = xb[i2+3]; s[4] = xb[i2+4]; s[5] = xb[i2+5]; s[6] = xb[i2+6]; 542 while (nz--) { 543 idx = 7*(*vi++); 544 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 545 xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; xw[6] = x[6+idx]; 546 PetscKernel_v_gets_v_minus_A_times_w_7(s,v,xw); 547 v += 49; 548 } 549 PetscKernel_v_gets_A_times_w_7(xw,idiag,s); 550 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; 551 x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5]; x[i2+6] = xw[6]; 552 idiag -= 49; 553 i2 -= 7; 554 } 555 break; 556 default: 557 ierr = PetscMemcpy(w,xb+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr); 558 PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2); 559 i2 -= bs; 560 idiag -= bs2; 561 for (i=m-2; i>=0; i--) { 562 v = aa + bs2*(diag[i]+1); 563 vi = aj + diag[i] + 1; 564 nz = ai[i+1] - diag[i] - 1; 565 566 ierr = PetscMemcpy(w,xb+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr); 567 /* copy all rows of x that are needed into contiguous space */ 568 workt = work; 569 for (j=0; j<nz; j++) { 570 ierr = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr); 571 workt += bs; 572 } 573 PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work); 574 PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2); 575 576 idiag -= bs2; 577 i2 -= bs; 578 } 579 break; 580 } 581 ierr = PetscLogFlops(1.0*bs2*(a->nz));CHKERRQ(ierr); 582 } 583 its--; 584 } 585 while (its--) { 586 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) { 587 idiag = a->idiag; 588 i2 = 0; 589 switch (bs) { 590 case 1: 591 for (i=0; i<m; i++) { 592 v = aa + ai[i]; 593 vi = aj + ai[i]; 594 nz = ai[i+1] - ai[i]; 595 s[0] = b[i2]; 596 for (j=0; j<nz; j++) { 597 xw[0] = x[vi[j]]; 598 PetscKernel_v_gets_v_minus_A_times_w_1(s,(v+j),xw); 599 } 600 PetscKernel_v_gets_A_times_w_1(xw,idiag,s); 601 x[i2] += xw[0]; 602 idiag += 1; 603 i2 += 1; 604 } 605 break; 606 case 2: 607 for (i=0; i<m; i++) { 608 v = aa + 4*ai[i]; 609 vi = aj + ai[i]; 610 nz = ai[i+1] - ai[i]; 611 s[0] = b[i2]; s[1] = b[i2+1]; 612 for (j=0; j<nz; j++) { 613 idx = 2*vi[j]; 614 it = 4*j; 615 xw[0] = x[idx]; xw[1] = x[1+idx]; 616 PetscKernel_v_gets_v_minus_A_times_w_2(s,(v+it),xw); 617 } 618 PetscKernel_v_gets_A_times_w_2(xw,idiag,s); 619 x[i2] += xw[0]; x[i2+1] += xw[1]; 620 idiag += 4; 621 i2 += 2; 622 } 623 break; 624 case 3: 625 for (i=0; i<m; i++) { 626 v = aa + 9*ai[i]; 627 vi = aj + ai[i]; 628 nz = ai[i+1] - ai[i]; 629 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; 630 while (nz--) { 631 idx = 3*(*vi++); 632 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 633 PetscKernel_v_gets_v_minus_A_times_w_3(s,v,xw); 634 v += 9; 635 } 636 PetscKernel_v_gets_A_times_w_3(xw,idiag,s); 637 x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; 638 idiag += 9; 639 i2 += 3; 640 } 641 break; 642 case 4: 643 for (i=0; i<m; i++) { 644 v = aa + 16*ai[i]; 645 vi = aj + ai[i]; 646 nz = ai[i+1] - ai[i]; 647 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; 648 while (nz--) { 649 idx = 4*(*vi++); 650 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; 651 PetscKernel_v_gets_v_minus_A_times_w_4(s,v,xw); 652 v += 16; 653 } 654 PetscKernel_v_gets_A_times_w_4(xw,idiag,s); 655 x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; x[i2+3] += xw[3]; 656 idiag += 16; 657 i2 += 4; 658 } 659 break; 660 case 5: 661 for (i=0; i<m; i++) { 662 v = aa + 25*ai[i]; 663 vi = aj + ai[i]; 664 nz = ai[i+1] - ai[i]; 665 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4]; 666 while (nz--) { 667 idx = 5*(*vi++); 668 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; xw[4] = x[4+idx]; 669 PetscKernel_v_gets_v_minus_A_times_w_5(s,v,xw); 670 v += 25; 671 } 672 PetscKernel_v_gets_A_times_w_5(xw,idiag,s); 673 x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; x[i2+3] += xw[3]; x[i2+4] += xw[4]; 674 idiag += 25; 675 i2 += 5; 676 } 677 break; 678 case 6: 679 for (i=0; i<m; i++) { 680 v = aa + 36*ai[i]; 681 vi = aj + ai[i]; 682 nz = ai[i+1] - ai[i]; 683 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5]; 684 while (nz--) { 685 idx = 6*(*vi++); 686 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 687 xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; 688 PetscKernel_v_gets_v_minus_A_times_w_6(s,v,xw); 689 v += 36; 690 } 691 PetscKernel_v_gets_A_times_w_6(xw,idiag,s); 692 x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; 693 x[i2+3] += xw[3]; x[i2+4] += xw[4]; x[i2+5] += xw[5]; 694 idiag += 36; 695 i2 += 6; 696 } 697 break; 698 case 7: 699 for (i=0; i<m; i++) { 700 v = aa + 49*ai[i]; 701 vi = aj + ai[i]; 702 nz = ai[i+1] - ai[i]; 703 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; 704 s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5]; s[6] = b[i2+6]; 705 while (nz--) { 706 idx = 7*(*vi++); 707 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 708 xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; xw[6] = x[6+idx]; 709 PetscKernel_v_gets_v_minus_A_times_w_7(s,v,xw); 710 v += 49; 711 } 712 PetscKernel_v_gets_A_times_w_7(xw,idiag,s); 713 x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; 714 x[i2+3] += xw[3]; x[i2+4] += xw[4]; x[i2+5] += xw[5]; x[i2+6] += xw[6]; 715 idiag += 49; 716 i2 += 7; 717 } 718 break; 719 default: 720 for (i=0; i<m; i++) { 721 v = aa + bs2*ai[i]; 722 vi = aj + ai[i]; 723 nz = ai[i+1] - ai[i]; 724 725 ierr = PetscMemcpy(w,b+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr); 726 /* copy all rows of x that are needed into contiguous space */ 727 workt = work; 728 for (j=0; j<nz; j++) { 729 ierr = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr); 730 workt += bs; 731 } 732 PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work); 733 PetscKernel_w_gets_w_plus_Ar_times_v(bs,bs,w,idiag,x+i2); 734 735 idiag += bs2; 736 i2 += bs; 737 } 738 break; 739 } 740 ierr = PetscLogFlops(2.0*bs2*a->nz);CHKERRQ(ierr); 741 } 742 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 743 idiag = a->idiag+bs2*(a->mbs-1); 744 i2 = bs * (m-1); 745 switch (bs) { 746 case 1: 747 for (i=m-1; i>=0; i--) { 748 v = aa + ai[i]; 749 vi = aj + ai[i]; 750 nz = ai[i+1] - ai[i]; 751 s[0] = b[i2]; 752 for (j=0; j<nz; j++) { 753 xw[0] = x[vi[j]]; 754 PetscKernel_v_gets_v_minus_A_times_w_1(s,(v+j),xw); 755 } 756 PetscKernel_v_gets_A_times_w_1(xw,idiag,s); 757 x[i2] += xw[0]; 758 idiag -= 1; 759 i2 -= 1; 760 } 761 break; 762 case 2: 763 for (i=m-1; i>=0; i--) { 764 v = aa + 4*ai[i]; 765 vi = aj + ai[i]; 766 nz = ai[i+1] - ai[i]; 767 s[0] = b[i2]; s[1] = b[i2+1]; 768 for (j=0; j<nz; j++) { 769 idx = 2*vi[j]; 770 it = 4*j; 771 xw[0] = x[idx]; xw[1] = x[1+idx]; 772 PetscKernel_v_gets_v_minus_A_times_w_2(s,(v+it),xw); 773 } 774 PetscKernel_v_gets_A_times_w_2(xw,idiag,s); 775 x[i2] += xw[0]; x[i2+1] += xw[1]; 776 idiag -= 4; 777 i2 -= 2; 778 } 779 break; 780 case 3: 781 for (i=m-1; i>=0; i--) { 782 v = aa + 9*ai[i]; 783 vi = aj + ai[i]; 784 nz = ai[i+1] - ai[i]; 785 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; 786 while (nz--) { 787 idx = 3*(*vi++); 788 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 789 PetscKernel_v_gets_v_minus_A_times_w_3(s,v,xw); 790 v += 9; 791 } 792 PetscKernel_v_gets_A_times_w_3(xw,idiag,s); 793 x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; 794 idiag -= 9; 795 i2 -= 3; 796 } 797 break; 798 case 4: 799 for (i=m-1; i>=0; i--) { 800 v = aa + 16*ai[i]; 801 vi = aj + ai[i]; 802 nz = ai[i+1] - ai[i]; 803 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; 804 while (nz--) { 805 idx = 4*(*vi++); 806 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; 807 PetscKernel_v_gets_v_minus_A_times_w_4(s,v,xw); 808 v += 16; 809 } 810 PetscKernel_v_gets_A_times_w_4(xw,idiag,s); 811 x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; x[i2+3] += xw[3]; 812 idiag -= 16; 813 i2 -= 4; 814 } 815 break; 816 case 5: 817 for (i=m-1; i>=0; i--) { 818 v = aa + 25*ai[i]; 819 vi = aj + ai[i]; 820 nz = ai[i+1] - ai[i]; 821 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4]; 822 while (nz--) { 823 idx = 5*(*vi++); 824 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; xw[4] = x[4+idx]; 825 PetscKernel_v_gets_v_minus_A_times_w_5(s,v,xw); 826 v += 25; 827 } 828 PetscKernel_v_gets_A_times_w_5(xw,idiag,s); 829 x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; x[i2+3] += xw[3]; x[i2+4] += xw[4]; 830 idiag -= 25; 831 i2 -= 5; 832 } 833 break; 834 case 6: 835 for (i=m-1; i>=0; i--) { 836 v = aa + 36*ai[i]; 837 vi = aj + ai[i]; 838 nz = ai[i+1] - ai[i]; 839 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5]; 840 while (nz--) { 841 idx = 6*(*vi++); 842 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 843 xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; 844 PetscKernel_v_gets_v_minus_A_times_w_6(s,v,xw); 845 v += 36; 846 } 847 PetscKernel_v_gets_A_times_w_6(xw,idiag,s); 848 x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; 849 x[i2+3] += xw[3]; x[i2+4] += xw[4]; x[i2+5] += xw[5]; 850 idiag -= 36; 851 i2 -= 6; 852 } 853 break; 854 case 7: 855 for (i=m-1; i>=0; i--) { 856 v = aa + 49*ai[i]; 857 vi = aj + ai[i]; 858 nz = ai[i+1] - ai[i]; 859 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; 860 s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5]; s[6] = b[i2+6]; 861 while (nz--) { 862 idx = 7*(*vi++); 863 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 864 xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; xw[6] = x[6+idx]; 865 PetscKernel_v_gets_v_minus_A_times_w_7(s,v,xw); 866 v += 49; 867 } 868 PetscKernel_v_gets_A_times_w_7(xw,idiag,s); 869 x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; 870 x[i2+3] += xw[3]; x[i2+4] += xw[4]; x[i2+5] += xw[5]; x[i2+6] += xw[6]; 871 idiag -= 49; 872 i2 -= 7; 873 } 874 break; 875 default: 876 for (i=m-1; i>=0; i--) { 877 v = aa + bs2*ai[i]; 878 vi = aj + ai[i]; 879 nz = ai[i+1] - ai[i]; 880 881 ierr = PetscMemcpy(w,b+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr); 882 /* copy all rows of x that are needed into contiguous space */ 883 workt = work; 884 for (j=0; j<nz; j++) { 885 ierr = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr); 886 workt += bs; 887 } 888 PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work); 889 PetscKernel_w_gets_w_plus_Ar_times_v(bs,bs,w,idiag,x+i2); 890 891 idiag -= bs2; 892 i2 -= bs; 893 } 894 break; 895 } 896 ierr = PetscLogFlops(2.0*bs2*(a->nz));CHKERRQ(ierr); 897 } 898 } 899 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 900 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 901 PetscFunctionReturn(0); 902 } 903 904 905 /* 906 Special version for direct calls from Fortran (Used in PETSc-fun3d) 907 */ 908 #if defined(PETSC_HAVE_FORTRAN_CAPS) 909 #define matsetvaluesblocked4_ MATSETVALUESBLOCKED4 910 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 911 #define matsetvaluesblocked4_ matsetvaluesblocked4 912 #endif 913 914 #undef __FUNCT__ 915 #define __FUNCT__ "matsetvaluesblocked4_" 916 PETSC_EXTERN void matsetvaluesblocked4_(Mat *AA,PetscInt *mm,const PetscInt im[],PetscInt *nn,const PetscInt in[],const PetscScalar v[]) 917 { 918 Mat A = *AA; 919 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 920 PetscInt *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,N,m = *mm,n = *nn; 921 PetscInt *ai =a->i,*ailen=a->ilen; 922 PetscInt *aj =a->j,stepval,lastcol = -1; 923 const PetscScalar *value = v; 924 MatScalar *ap,*aa = a->a,*bap; 925 926 PetscFunctionBegin; 927 if (A->rmap->bs != 4) SETERRABORT(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Can only be called with a block size of 4"); 928 stepval = (n-1)*4; 929 for (k=0; k<m; k++) { /* loop over added rows */ 930 row = im[k]; 931 rp = aj + ai[row]; 932 ap = aa + 16*ai[row]; 933 nrow = ailen[row]; 934 low = 0; 935 high = nrow; 936 for (l=0; l<n; l++) { /* loop over added columns */ 937 col = in[l]; 938 if (col <= lastcol) low = 0; 939 else high = nrow; 940 lastcol = col; 941 value = v + k*(stepval+4 + l)*4; 942 while (high-low > 7) { 943 t = (low+high)/2; 944 if (rp[t] > col) high = t; 945 else low = t; 946 } 947 for (i=low; i<high; i++) { 948 if (rp[i] > col) break; 949 if (rp[i] == col) { 950 bap = ap + 16*i; 951 for (ii=0; ii<4; ii++,value+=stepval) { 952 for (jj=ii; jj<16; jj+=4) { 953 bap[jj] += *value++; 954 } 955 } 956 goto noinsert2; 957 } 958 } 959 N = nrow++ - 1; 960 high++; /* added new column index thus must search to one higher than before */ 961 /* shift up all the later entries in this row */ 962 for (ii=N; ii>=i; ii--) { 963 rp[ii+1] = rp[ii]; 964 PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar)); 965 } 966 if (N >= i) { 967 PetscMemzero(ap+16*i,16*sizeof(MatScalar)); 968 } 969 rp[i] = col; 970 bap = ap + 16*i; 971 for (ii=0; ii<4; ii++,value+=stepval) { 972 for (jj=ii; jj<16; jj+=4) { 973 bap[jj] = *value++; 974 } 975 } 976 noinsert2:; 977 low = i; 978 } 979 ailen[row] = nrow; 980 } 981 PetscFunctionReturnVoid(); 982 } 983 984 #if defined(PETSC_HAVE_FORTRAN_CAPS) 985 #define matsetvalues4_ MATSETVALUES4 986 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 987 #define matsetvalues4_ matsetvalues4 988 #endif 989 990 #undef __FUNCT__ 991 #define __FUNCT__ "MatSetValues4_" 992 PETSC_EXTERN void matsetvalues4_(Mat *AA,PetscInt *mm,PetscInt *im,PetscInt *nn,PetscInt *in,PetscScalar *v) 993 { 994 Mat A = *AA; 995 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 996 PetscInt *rp,k,low,high,t,ii,row,nrow,i,col,l,N,n = *nn,m = *mm; 997 PetscInt *ai=a->i,*ailen=a->ilen; 998 PetscInt *aj=a->j,brow,bcol; 999 PetscInt ridx,cidx,lastcol = -1; 1000 MatScalar *ap,value,*aa=a->a,*bap; 1001 1002 PetscFunctionBegin; 1003 for (k=0; k<m; k++) { /* loop over added rows */ 1004 row = im[k]; brow = row/4; 1005 rp = aj + ai[brow]; 1006 ap = aa + 16*ai[brow]; 1007 nrow = ailen[brow]; 1008 low = 0; 1009 high = nrow; 1010 for (l=0; l<n; l++) { /* loop over added columns */ 1011 col = in[l]; bcol = col/4; 1012 ridx = row % 4; cidx = col % 4; 1013 value = v[l + k*n]; 1014 if (col <= lastcol) low = 0; 1015 else high = nrow; 1016 lastcol = col; 1017 while (high-low > 7) { 1018 t = (low+high)/2; 1019 if (rp[t] > bcol) high = t; 1020 else low = t; 1021 } 1022 for (i=low; i<high; i++) { 1023 if (rp[i] > bcol) break; 1024 if (rp[i] == bcol) { 1025 bap = ap + 16*i + 4*cidx + ridx; 1026 *bap += value; 1027 goto noinsert1; 1028 } 1029 } 1030 N = nrow++ - 1; 1031 high++; /* added new column thus must search to one higher than before */ 1032 /* shift up all the later entries in this row */ 1033 for (ii=N; ii>=i; ii--) { 1034 rp[ii+1] = rp[ii]; 1035 PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar)); 1036 } 1037 if (N>=i) { 1038 PetscMemzero(ap+16*i,16*sizeof(MatScalar)); 1039 } 1040 rp[i] = bcol; 1041 ap[16*i + 4*cidx + ridx] = value; 1042 noinsert1:; 1043 low = i; 1044 } 1045 ailen[brow] = nrow; 1046 } 1047 PetscFunctionReturnVoid(); 1048 } 1049 1050 /* 1051 Checks for missing diagonals 1052 */ 1053 #undef __FUNCT__ 1054 #define __FUNCT__ "MatMissingDiagonal_SeqBAIJ" 1055 PetscErrorCode MatMissingDiagonal_SeqBAIJ(Mat A,PetscBool *missing,PetscInt *d) 1056 { 1057 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1058 PetscErrorCode ierr; 1059 PetscInt *diag,*ii = a->i,i; 1060 1061 PetscFunctionBegin; 1062 ierr = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr); 1063 *missing = PETSC_FALSE; 1064 if (A->rmap->n > 0 && !ii) { 1065 *missing = PETSC_TRUE; 1066 if (d) *d = 0; 1067 PetscInfo(A,"Matrix has no entries therefore is missing diagonal\n"); 1068 } else { 1069 diag = a->diag; 1070 for (i=0; i<a->mbs; i++) { 1071 if (diag[i] >= ii[i+1]) { 1072 *missing = PETSC_TRUE; 1073 if (d) *d = i; 1074 PetscInfo1(A,"Matrix is missing block diagonal number %D\n",i); 1075 break; 1076 } 1077 } 1078 } 1079 PetscFunctionReturn(0); 1080 } 1081 1082 #undef __FUNCT__ 1083 #define __FUNCT__ "MatMarkDiagonal_SeqBAIJ" 1084 PetscErrorCode MatMarkDiagonal_SeqBAIJ(Mat A) 1085 { 1086 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1087 PetscErrorCode ierr; 1088 PetscInt i,j,m = a->mbs; 1089 1090 PetscFunctionBegin; 1091 if (!a->diag) { 1092 ierr = PetscMalloc1(m,&a->diag);CHKERRQ(ierr); 1093 ierr = PetscLogObjectMemory((PetscObject)A,m*sizeof(PetscInt));CHKERRQ(ierr); 1094 a->free_diag = PETSC_TRUE; 1095 } 1096 for (i=0; i<m; i++) { 1097 a->diag[i] = a->i[i+1]; 1098 for (j=a->i[i]; j<a->i[i+1]; j++) { 1099 if (a->j[j] == i) { 1100 a->diag[i] = j; 1101 break; 1102 } 1103 } 1104 } 1105 PetscFunctionReturn(0); 1106 } 1107 1108 1109 #undef __FUNCT__ 1110 #define __FUNCT__ "MatGetRowIJ_SeqBAIJ" 1111 static PetscErrorCode MatGetRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool blockcompressed,PetscInt *nn,const PetscInt *inia[],const PetscInt *inja[],PetscBool *done) 1112 { 1113 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1114 PetscErrorCode ierr; 1115 PetscInt i,j,n = a->mbs,nz = a->i[n],*tia,*tja,bs = A->rmap->bs,k,l,cnt; 1116 PetscInt **ia = (PetscInt**)inia,**ja = (PetscInt**)inja; 1117 1118 PetscFunctionBegin; 1119 *nn = n; 1120 if (!ia) PetscFunctionReturn(0); 1121 if (symmetric) { 1122 ierr = MatToSymmetricIJ_SeqAIJ(n,a->i,a->j,0,0,&tia,&tja);CHKERRQ(ierr); 1123 nz = tia[n]; 1124 } else { 1125 tia = a->i; tja = a->j; 1126 } 1127 1128 if (!blockcompressed && bs > 1) { 1129 (*nn) *= bs; 1130 /* malloc & create the natural set of indices */ 1131 ierr = PetscMalloc1((n+1)*bs,ia);CHKERRQ(ierr); 1132 if (n) { 1133 (*ia)[0] = 0; 1134 for (j=1; j<bs; j++) { 1135 (*ia)[j] = (tia[1]-tia[0])*bs+(*ia)[j-1]; 1136 } 1137 } 1138 1139 for (i=1; i<n; i++) { 1140 (*ia)[i*bs] = (tia[i]-tia[i-1])*bs + (*ia)[i*bs-1]; 1141 for (j=1; j<bs; j++) { 1142 (*ia)[i*bs+j] = (tia[i+1]-tia[i])*bs + (*ia)[i*bs+j-1]; 1143 } 1144 } 1145 if (n) { 1146 (*ia)[n*bs] = (tia[n]-tia[n-1])*bs + (*ia)[n*bs-1]; 1147 } 1148 1149 if (inja) { 1150 ierr = PetscMalloc1(nz*bs*bs,ja);CHKERRQ(ierr); 1151 cnt = 0; 1152 for (i=0; i<n; i++) { 1153 for (j=0; j<bs; j++) { 1154 for (k=tia[i]; k<tia[i+1]; k++) { 1155 for (l=0; l<bs; l++) { 1156 (*ja)[cnt++] = bs*tja[k] + l; 1157 } 1158 } 1159 } 1160 } 1161 } 1162 1163 if (symmetric) { /* deallocate memory allocated in MatToSymmetricIJ_SeqAIJ() */ 1164 ierr = PetscFree(tia);CHKERRQ(ierr); 1165 ierr = PetscFree(tja);CHKERRQ(ierr); 1166 } 1167 } else if (oshift == 1) { 1168 if (symmetric) { 1169 nz = tia[A->rmap->n/bs]; 1170 /* add 1 to i and j indices */ 1171 for (i=0; i<A->rmap->n/bs+1; i++) tia[i] = tia[i] + 1; 1172 *ia = tia; 1173 if (ja) { 1174 for (i=0; i<nz; i++) tja[i] = tja[i] + 1; 1175 *ja = tja; 1176 } 1177 } else { 1178 nz = a->i[A->rmap->n/bs]; 1179 /* malloc space and add 1 to i and j indices */ 1180 ierr = PetscMalloc1(A->rmap->n/bs+1,ia);CHKERRQ(ierr); 1181 for (i=0; i<A->rmap->n/bs+1; i++) (*ia)[i] = a->i[i] + 1; 1182 if (ja) { 1183 ierr = PetscMalloc1(nz,ja);CHKERRQ(ierr); 1184 for (i=0; i<nz; i++) (*ja)[i] = a->j[i] + 1; 1185 } 1186 } 1187 } else { 1188 *ia = tia; 1189 if (ja) *ja = tja; 1190 } 1191 PetscFunctionReturn(0); 1192 } 1193 1194 #undef __FUNCT__ 1195 #define __FUNCT__ "MatRestoreRowIJ_SeqBAIJ" 1196 static PetscErrorCode MatRestoreRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool blockcompressed,PetscInt *nn,const PetscInt *ia[],const PetscInt *ja[],PetscBool *done) 1197 { 1198 PetscErrorCode ierr; 1199 1200 PetscFunctionBegin; 1201 if (!ia) PetscFunctionReturn(0); 1202 if ((!blockcompressed && A->rmap->bs > 1) || (symmetric || oshift == 1)) { 1203 ierr = PetscFree(*ia);CHKERRQ(ierr); 1204 if (ja) {ierr = PetscFree(*ja);CHKERRQ(ierr);} 1205 } 1206 PetscFunctionReturn(0); 1207 } 1208 1209 #undef __FUNCT__ 1210 #define __FUNCT__ "MatDestroy_SeqBAIJ" 1211 PetscErrorCode MatDestroy_SeqBAIJ(Mat A) 1212 { 1213 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1214 PetscErrorCode ierr; 1215 1216 PetscFunctionBegin; 1217 #if defined(PETSC_USE_LOG) 1218 PetscLogObjectState((PetscObject)A,"Rows=%D, Cols=%D, NZ=%D",A->rmap->N,A->cmap->n,a->nz); 1219 #endif 1220 ierr = MatSeqXAIJFreeAIJ(A,&a->a,&a->j,&a->i);CHKERRQ(ierr); 1221 ierr = ISDestroy(&a->row);CHKERRQ(ierr); 1222 ierr = ISDestroy(&a->col);CHKERRQ(ierr); 1223 if (a->free_diag) {ierr = PetscFree(a->diag);CHKERRQ(ierr);} 1224 ierr = PetscFree(a->idiag);CHKERRQ(ierr); 1225 if (a->free_imax_ilen) {ierr = PetscFree2(a->imax,a->ilen);CHKERRQ(ierr);} 1226 ierr = PetscFree(a->solve_work);CHKERRQ(ierr); 1227 ierr = PetscFree(a->mult_work);CHKERRQ(ierr); 1228 ierr = PetscFree(a->sor_workt);CHKERRQ(ierr); 1229 ierr = PetscFree(a->sor_work);CHKERRQ(ierr); 1230 ierr = ISDestroy(&a->icol);CHKERRQ(ierr); 1231 ierr = PetscFree(a->saved_values);CHKERRQ(ierr); 1232 ierr = PetscFree2(a->compressedrow.i,a->compressedrow.rindex);CHKERRQ(ierr); 1233 1234 ierr = MatDestroy(&a->sbaijMat);CHKERRQ(ierr); 1235 ierr = MatDestroy(&a->parent);CHKERRQ(ierr); 1236 ierr = PetscFree(A->data);CHKERRQ(ierr); 1237 1238 ierr = PetscObjectChangeTypeName((PetscObject)A,0);CHKERRQ(ierr); 1239 ierr = PetscObjectComposeFunction((PetscObject)A,"MatInvertBlockDiagonal_C",NULL);CHKERRQ(ierr); 1240 ierr = PetscObjectComposeFunction((PetscObject)A,"MatStoreValues_C",NULL);CHKERRQ(ierr); 1241 ierr = PetscObjectComposeFunction((PetscObject)A,"MatRetrieveValues_C",NULL);CHKERRQ(ierr); 1242 ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetColumnIndices_C",NULL);CHKERRQ(ierr); 1243 ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqaij_C",NULL);CHKERRQ(ierr); 1244 ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqsbaij_C",NULL);CHKERRQ(ierr); 1245 ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetPreallocation_C",NULL);CHKERRQ(ierr); 1246 ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetPreallocationCSR_C",NULL);CHKERRQ(ierr); 1247 ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqbstrm_C",NULL);CHKERRQ(ierr); 1248 ierr = PetscObjectComposeFunction((PetscObject)A,"MatIsTranspose_C",NULL);CHKERRQ(ierr); 1249 PetscFunctionReturn(0); 1250 } 1251 1252 #undef __FUNCT__ 1253 #define __FUNCT__ "MatSetOption_SeqBAIJ" 1254 PetscErrorCode MatSetOption_SeqBAIJ(Mat A,MatOption op,PetscBool flg) 1255 { 1256 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1257 PetscErrorCode ierr; 1258 1259 PetscFunctionBegin; 1260 switch (op) { 1261 case MAT_ROW_ORIENTED: 1262 a->roworiented = flg; 1263 break; 1264 case MAT_KEEP_NONZERO_PATTERN: 1265 a->keepnonzeropattern = flg; 1266 break; 1267 case MAT_NEW_NONZERO_LOCATIONS: 1268 a->nonew = (flg ? 0 : 1); 1269 break; 1270 case MAT_NEW_NONZERO_LOCATION_ERR: 1271 a->nonew = (flg ? -1 : 0); 1272 break; 1273 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1274 a->nonew = (flg ? -2 : 0); 1275 break; 1276 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1277 a->nounused = (flg ? -1 : 0); 1278 break; 1279 case MAT_NEW_DIAGONALS: 1280 case MAT_IGNORE_OFF_PROC_ENTRIES: 1281 case MAT_USE_HASH_TABLE: 1282 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1283 break; 1284 case MAT_SPD: 1285 case MAT_SYMMETRIC: 1286 case MAT_STRUCTURALLY_SYMMETRIC: 1287 case MAT_HERMITIAN: 1288 case MAT_SYMMETRY_ETERNAL: 1289 /* These options are handled directly by MatSetOption() */ 1290 break; 1291 default: 1292 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op); 1293 } 1294 PetscFunctionReturn(0); 1295 } 1296 1297 /* used for both SeqBAIJ and SeqSBAIJ matrices */ 1298 #undef __FUNCT__ 1299 #define __FUNCT__ "MatGetRow_SeqBAIJ_private" 1300 PetscErrorCode MatGetRow_SeqBAIJ_private(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v,PetscInt *ai,PetscInt *aj,PetscScalar *aa) 1301 { 1302 PetscErrorCode ierr; 1303 PetscInt itmp,i,j,k,M,bn,bp,*idx_i,bs,bs2; 1304 MatScalar *aa_i; 1305 PetscScalar *v_i; 1306 1307 PetscFunctionBegin; 1308 bs = A->rmap->bs; 1309 bs2 = bs*bs; 1310 if (row < 0 || row >= A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range", row); 1311 1312 bn = row/bs; /* Block number */ 1313 bp = row % bs; /* Block Position */ 1314 M = ai[bn+1] - ai[bn]; 1315 *nz = bs*M; 1316 1317 if (v) { 1318 *v = 0; 1319 if (*nz) { 1320 ierr = PetscMalloc1(*nz,v);CHKERRQ(ierr); 1321 for (i=0; i<M; i++) { /* for each block in the block row */ 1322 v_i = *v + i*bs; 1323 aa_i = aa + bs2*(ai[bn] + i); 1324 for (j=bp,k=0; j<bs2; j+=bs,k++) v_i[k] = aa_i[j]; 1325 } 1326 } 1327 } 1328 1329 if (idx) { 1330 *idx = 0; 1331 if (*nz) { 1332 ierr = PetscMalloc1(*nz,idx);CHKERRQ(ierr); 1333 for (i=0; i<M; i++) { /* for each block in the block row */ 1334 idx_i = *idx + i*bs; 1335 itmp = bs*aj[ai[bn] + i]; 1336 for (j=0; j<bs; j++) idx_i[j] = itmp++; 1337 } 1338 } 1339 } 1340 PetscFunctionReturn(0); 1341 } 1342 1343 #undef __FUNCT__ 1344 #define __FUNCT__ "MatGetRow_SeqBAIJ" 1345 PetscErrorCode MatGetRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1346 { 1347 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1348 PetscErrorCode ierr; 1349 1350 PetscFunctionBegin; 1351 ierr = MatGetRow_SeqBAIJ_private(A,row,nz,idx,v,a->i,a->j,a->a);CHKERRQ(ierr); 1352 PetscFunctionReturn(0); 1353 } 1354 1355 #undef __FUNCT__ 1356 #define __FUNCT__ "MatRestoreRow_SeqBAIJ" 1357 PetscErrorCode MatRestoreRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1358 { 1359 PetscErrorCode ierr; 1360 1361 PetscFunctionBegin; 1362 if (idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);} 1363 if (v) {ierr = PetscFree(*v);CHKERRQ(ierr);} 1364 PetscFunctionReturn(0); 1365 } 1366 1367 extern PetscErrorCode MatSetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode); 1368 1369 #undef __FUNCT__ 1370 #define __FUNCT__ "MatTranspose_SeqBAIJ" 1371 PetscErrorCode MatTranspose_SeqBAIJ(Mat A,MatReuse reuse,Mat *B) 1372 { 1373 Mat_SeqBAIJ *a=(Mat_SeqBAIJ*)A->data; 1374 Mat C; 1375 PetscErrorCode ierr; 1376 PetscInt i,j,k,*aj=a->j,*ai=a->i,bs=A->rmap->bs,mbs=a->mbs,nbs=a->nbs,len,*col; 1377 PetscInt *rows,*cols,bs2=a->bs2; 1378 MatScalar *array; 1379 1380 PetscFunctionBegin; 1381 if (reuse == MAT_REUSE_MATRIX && A == *B && mbs != nbs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Square matrix only for in-place"); 1382 if (reuse == MAT_INITIAL_MATRIX || A == *B) { 1383 ierr = PetscCalloc1(1+nbs,&col);CHKERRQ(ierr); 1384 1385 for (i=0; i<ai[mbs]; i++) col[aj[i]] += 1; 1386 ierr = MatCreate(PetscObjectComm((PetscObject)A),&C);CHKERRQ(ierr); 1387 ierr = MatSetSizes(C,A->cmap->n,A->rmap->N,A->cmap->n,A->rmap->N);CHKERRQ(ierr); 1388 ierr = MatSetType(C,((PetscObject)A)->type_name);CHKERRQ(ierr); 1389 ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(C,bs,0,col);CHKERRQ(ierr); 1390 ierr = PetscFree(col);CHKERRQ(ierr); 1391 } else { 1392 C = *B; 1393 } 1394 1395 array = a->a; 1396 ierr = PetscMalloc2(bs,&rows,bs,&cols);CHKERRQ(ierr); 1397 for (i=0; i<mbs; i++) { 1398 cols[0] = i*bs; 1399 for (k=1; k<bs; k++) cols[k] = cols[k-1] + 1; 1400 len = ai[i+1] - ai[i]; 1401 for (j=0; j<len; j++) { 1402 rows[0] = (*aj++)*bs; 1403 for (k=1; k<bs; k++) rows[k] = rows[k-1] + 1; 1404 ierr = MatSetValues_SeqBAIJ(C,bs,rows,bs,cols,array,INSERT_VALUES);CHKERRQ(ierr); 1405 array += bs2; 1406 } 1407 } 1408 ierr = PetscFree2(rows,cols);CHKERRQ(ierr); 1409 1410 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1411 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1412 1413 if (reuse == MAT_INITIAL_MATRIX || *B != A) { 1414 *B = C; 1415 } else { 1416 ierr = MatHeaderMerge(A,&C);CHKERRQ(ierr); 1417 } 1418 PetscFunctionReturn(0); 1419 } 1420 1421 #undef __FUNCT__ 1422 #define __FUNCT__ "MatIsTranspose_SeqBAIJ" 1423 PetscErrorCode MatIsTranspose_SeqBAIJ(Mat A,Mat B,PetscReal tol,PetscBool *f) 1424 { 1425 PetscErrorCode ierr; 1426 Mat Btrans; 1427 1428 PetscFunctionBegin; 1429 *f = PETSC_FALSE; 1430 ierr = MatTranspose_SeqBAIJ(A,MAT_INITIAL_MATRIX,&Btrans);CHKERRQ(ierr); 1431 ierr = MatEqual_SeqBAIJ(B,Btrans,f);CHKERRQ(ierr); 1432 ierr = MatDestroy(&Btrans);CHKERRQ(ierr); 1433 PetscFunctionReturn(0); 1434 } 1435 1436 #undef __FUNCT__ 1437 #define __FUNCT__ "MatView_SeqBAIJ_Binary" 1438 static PetscErrorCode MatView_SeqBAIJ_Binary(Mat A,PetscViewer viewer) 1439 { 1440 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1441 PetscErrorCode ierr; 1442 PetscInt i,*col_lens,bs = A->rmap->bs,count,*jj,j,k,l,bs2=a->bs2; 1443 int fd; 1444 PetscScalar *aa; 1445 FILE *file; 1446 1447 PetscFunctionBegin; 1448 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1449 ierr = PetscMalloc1(4+A->rmap->N,&col_lens);CHKERRQ(ierr); 1450 col_lens[0] = MAT_FILE_CLASSID; 1451 1452 col_lens[1] = A->rmap->N; 1453 col_lens[2] = A->cmap->n; 1454 col_lens[3] = a->nz*bs2; 1455 1456 /* store lengths of each row and write (including header) to file */ 1457 count = 0; 1458 for (i=0; i<a->mbs; i++) { 1459 for (j=0; j<bs; j++) { 1460 col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]); 1461 } 1462 } 1463 ierr = PetscBinaryWrite(fd,col_lens,4+A->rmap->N,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1464 ierr = PetscFree(col_lens);CHKERRQ(ierr); 1465 1466 /* store column indices (zero start index) */ 1467 ierr = PetscMalloc1((a->nz+1)*bs2,&jj);CHKERRQ(ierr); 1468 count = 0; 1469 for (i=0; i<a->mbs; i++) { 1470 for (j=0; j<bs; j++) { 1471 for (k=a->i[i]; k<a->i[i+1]; k++) { 1472 for (l=0; l<bs; l++) { 1473 jj[count++] = bs*a->j[k] + l; 1474 } 1475 } 1476 } 1477 } 1478 ierr = PetscBinaryWrite(fd,jj,bs2*a->nz,PETSC_INT,PETSC_FALSE);CHKERRQ(ierr); 1479 ierr = PetscFree(jj);CHKERRQ(ierr); 1480 1481 /* store nonzero values */ 1482 ierr = PetscMalloc1((a->nz+1)*bs2,&aa);CHKERRQ(ierr); 1483 count = 0; 1484 for (i=0; i<a->mbs; i++) { 1485 for (j=0; j<bs; j++) { 1486 for (k=a->i[i]; k<a->i[i+1]; k++) { 1487 for (l=0; l<bs; l++) { 1488 aa[count++] = a->a[bs2*k + l*bs + j]; 1489 } 1490 } 1491 } 1492 } 1493 ierr = PetscBinaryWrite(fd,aa,bs2*a->nz,PETSC_SCALAR,PETSC_FALSE);CHKERRQ(ierr); 1494 ierr = PetscFree(aa);CHKERRQ(ierr); 1495 1496 ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr); 1497 if (file) { 1498 fprintf(file,"-matload_block_size %d\n",(int)A->rmap->bs); 1499 } 1500 PetscFunctionReturn(0); 1501 } 1502 1503 #undef __FUNCT__ 1504 #define __FUNCT__ "MatView_SeqBAIJ_ASCII" 1505 static PetscErrorCode MatView_SeqBAIJ_ASCII(Mat A,PetscViewer viewer) 1506 { 1507 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1508 PetscErrorCode ierr; 1509 PetscInt i,j,bs = A->rmap->bs,k,l,bs2=a->bs2; 1510 PetscViewerFormat format; 1511 1512 PetscFunctionBegin; 1513 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1514 if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1515 ierr = PetscViewerASCIIPrintf(viewer," block size is %D\n",bs);CHKERRQ(ierr); 1516 } else if (format == PETSC_VIEWER_ASCII_MATLAB) { 1517 const char *matname; 1518 Mat aij; 1519 ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&aij);CHKERRQ(ierr); 1520 ierr = PetscObjectGetName((PetscObject)A,&matname);CHKERRQ(ierr); 1521 ierr = PetscObjectSetName((PetscObject)aij,matname);CHKERRQ(ierr); 1522 ierr = MatView(aij,viewer);CHKERRQ(ierr); 1523 ierr = MatDestroy(&aij);CHKERRQ(ierr); 1524 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1525 PetscFunctionReturn(0); 1526 } else if (format == PETSC_VIEWER_ASCII_COMMON) { 1527 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);CHKERRQ(ierr); 1528 for (i=0; i<a->mbs; i++) { 1529 for (j=0; j<bs; j++) { 1530 ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr); 1531 for (k=a->i[i]; k<a->i[i+1]; k++) { 1532 for (l=0; l<bs; l++) { 1533 #if defined(PETSC_USE_COMPLEX) 1534 if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 1535 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g + %gi) ",bs*a->j[k]+l, 1536 (double)PetscRealPart(a->a[bs2*k + l*bs + j]),(double)PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1537 } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 1538 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g - %gi) ",bs*a->j[k]+l, 1539 (double)PetscRealPart(a->a[bs2*k + l*bs + j]),-(double)PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1540 } else if (PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 1541 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,(double)PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1542 } 1543 #else 1544 if (a->a[bs2*k + l*bs + j] != 0.0) { 1545 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,(double)a->a[bs2*k + l*bs + j]);CHKERRQ(ierr); 1546 } 1547 #endif 1548 } 1549 } 1550 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 1551 } 1552 } 1553 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);CHKERRQ(ierr); 1554 } else { 1555 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);CHKERRQ(ierr); 1556 for (i=0; i<a->mbs; i++) { 1557 for (j=0; j<bs; j++) { 1558 ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr); 1559 for (k=a->i[i]; k<a->i[i+1]; k++) { 1560 for (l=0; l<bs; l++) { 1561 #if defined(PETSC_USE_COMPLEX) 1562 if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0) { 1563 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g + %g i) ",bs*a->j[k]+l, 1564 (double)PetscRealPart(a->a[bs2*k + l*bs + j]),(double)PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1565 } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0) { 1566 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g - %g i) ",bs*a->j[k]+l, 1567 (double)PetscRealPart(a->a[bs2*k + l*bs + j]),-(double)PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1568 } else { 1569 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,(double)PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1570 } 1571 #else 1572 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,(double)a->a[bs2*k + l*bs + j]);CHKERRQ(ierr); 1573 #endif 1574 } 1575 } 1576 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 1577 } 1578 } 1579 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);CHKERRQ(ierr); 1580 } 1581 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1582 PetscFunctionReturn(0); 1583 } 1584 1585 #include <petscdraw.h> 1586 #undef __FUNCT__ 1587 #define __FUNCT__ "MatView_SeqBAIJ_Draw_Zoom" 1588 static PetscErrorCode MatView_SeqBAIJ_Draw_Zoom(PetscDraw draw,void *Aa) 1589 { 1590 Mat A = (Mat) Aa; 1591 Mat_SeqBAIJ *a=(Mat_SeqBAIJ*)A->data; 1592 PetscErrorCode ierr; 1593 PetscInt row,i,j,k,l,mbs=a->mbs,color,bs=A->rmap->bs,bs2=a->bs2; 1594 PetscReal xl,yl,xr,yr,x_l,x_r,y_l,y_r; 1595 MatScalar *aa; 1596 PetscViewer viewer; 1597 PetscViewerFormat format; 1598 1599 PetscFunctionBegin; 1600 ierr = PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer);CHKERRQ(ierr); 1601 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1602 1603 ierr = PetscDrawGetCoordinates(draw,&xl,&yl,&xr,&yr);CHKERRQ(ierr); 1604 1605 /* loop over matrix elements drawing boxes */ 1606 1607 if (format != PETSC_VIEWER_DRAW_CONTOUR) { 1608 color = PETSC_DRAW_BLUE; 1609 for (i=0,row=0; i<mbs; i++,row+=bs) { 1610 for (j=a->i[i]; j<a->i[i+1]; j++) { 1611 y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0; 1612 x_l = a->j[j]*bs; x_r = x_l + 1.0; 1613 aa = a->a + j*bs2; 1614 for (k=0; k<bs; k++) { 1615 for (l=0; l<bs; l++) { 1616 if (PetscRealPart(*aa++) >= 0.) continue; 1617 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 1618 } 1619 } 1620 } 1621 } 1622 color = PETSC_DRAW_CYAN; 1623 for (i=0,row=0; i<mbs; i++,row+=bs) { 1624 for (j=a->i[i]; j<a->i[i+1]; j++) { 1625 y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0; 1626 x_l = a->j[j]*bs; x_r = x_l + 1.0; 1627 aa = a->a + j*bs2; 1628 for (k=0; k<bs; k++) { 1629 for (l=0; l<bs; l++) { 1630 if (PetscRealPart(*aa++) != 0.) continue; 1631 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 1632 } 1633 } 1634 } 1635 } 1636 color = PETSC_DRAW_RED; 1637 for (i=0,row=0; i<mbs; i++,row+=bs) { 1638 for (j=a->i[i]; j<a->i[i+1]; j++) { 1639 y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0; 1640 x_l = a->j[j]*bs; x_r = x_l + 1.0; 1641 aa = a->a + j*bs2; 1642 for (k=0; k<bs; k++) { 1643 for (l=0; l<bs; l++) { 1644 if (PetscRealPart(*aa++) <= 0.) continue; 1645 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 1646 } 1647 } 1648 } 1649 } 1650 } else { 1651 /* use contour shading to indicate magnitude of values */ 1652 /* first determine max of all nonzero values */ 1653 PetscDraw popup; 1654 PetscReal scale,maxv = 0.0; 1655 1656 for (i=0; i<a->nz*a->bs2; i++) { 1657 if (PetscAbsScalar(a->a[i]) > maxv) maxv = PetscAbsScalar(a->a[i]); 1658 } 1659 scale = (245.0 - PETSC_DRAW_BASIC_COLORS)/maxv; 1660 ierr = PetscDrawGetPopup(draw,&popup);CHKERRQ(ierr); 1661 if (popup) { 1662 ierr = PetscDrawScalePopup(popup,0.0,maxv);CHKERRQ(ierr); 1663 } 1664 for (i=0,row=0; i<mbs; i++,row+=bs) { 1665 for (j=a->i[i]; j<a->i[i+1]; j++) { 1666 y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0; 1667 x_l = a->j[j]*bs; x_r = x_l + 1.0; 1668 aa = a->a + j*bs2; 1669 for (k=0; k<bs; k++) { 1670 for (l=0; l<bs; l++) { 1671 color = PETSC_DRAW_BASIC_COLORS + (PetscInt)(scale*PetscAbsScalar(*aa++)); 1672 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 1673 } 1674 } 1675 } 1676 } 1677 } 1678 PetscFunctionReturn(0); 1679 } 1680 1681 #undef __FUNCT__ 1682 #define __FUNCT__ "MatView_SeqBAIJ_Draw" 1683 static PetscErrorCode MatView_SeqBAIJ_Draw(Mat A,PetscViewer viewer) 1684 { 1685 PetscErrorCode ierr; 1686 PetscReal xl,yl,xr,yr,w,h; 1687 PetscDraw draw; 1688 PetscBool isnull; 1689 1690 PetscFunctionBegin; 1691 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 1692 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 1693 1694 ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);CHKERRQ(ierr); 1695 xr = A->cmap->n; yr = A->rmap->N; h = yr/10.0; w = xr/10.0; 1696 xr += w; yr += h; xl = -w; yl = -h; 1697 ierr = PetscDrawSetCoordinates(draw,xl,yl,xr,yr);CHKERRQ(ierr); 1698 ierr = PetscDrawZoom(draw,MatView_SeqBAIJ_Draw_Zoom,A);CHKERRQ(ierr); 1699 ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",NULL);CHKERRQ(ierr); 1700 PetscFunctionReturn(0); 1701 } 1702 1703 #undef __FUNCT__ 1704 #define __FUNCT__ "MatView_SeqBAIJ" 1705 PetscErrorCode MatView_SeqBAIJ(Mat A,PetscViewer viewer) 1706 { 1707 PetscErrorCode ierr; 1708 PetscBool iascii,isbinary,isdraw; 1709 1710 PetscFunctionBegin; 1711 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1712 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1713 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1714 if (iascii) { 1715 ierr = MatView_SeqBAIJ_ASCII(A,viewer);CHKERRQ(ierr); 1716 } else if (isbinary) { 1717 ierr = MatView_SeqBAIJ_Binary(A,viewer);CHKERRQ(ierr); 1718 } else if (isdraw) { 1719 ierr = MatView_SeqBAIJ_Draw(A,viewer);CHKERRQ(ierr); 1720 } else { 1721 Mat B; 1722 ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr); 1723 ierr = MatView(B,viewer);CHKERRQ(ierr); 1724 ierr = MatDestroy(&B);CHKERRQ(ierr); 1725 } 1726 PetscFunctionReturn(0); 1727 } 1728 1729 1730 #undef __FUNCT__ 1731 #define __FUNCT__ "MatGetValues_SeqBAIJ" 1732 PetscErrorCode MatGetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],PetscScalar v[]) 1733 { 1734 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1735 PetscInt *rp,k,low,high,t,row,nrow,i,col,l,*aj = a->j; 1736 PetscInt *ai = a->i,*ailen = a->ilen; 1737 PetscInt brow,bcol,ridx,cidx,bs=A->rmap->bs,bs2=a->bs2; 1738 MatScalar *ap,*aa = a->a; 1739 1740 PetscFunctionBegin; 1741 for (k=0; k<m; k++) { /* loop over rows */ 1742 row = im[k]; brow = row/bs; 1743 if (row < 0) {v += n; continue;} /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); */ 1744 if (row >= A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D too large", row); 1745 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; 1746 nrow = ailen[brow]; 1747 for (l=0; l<n; l++) { /* loop over columns */ 1748 if (in[l] < 0) {v++; continue;} /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); */ 1749 if (in[l] >= A->cmap->n) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column %D too large", in[l]); 1750 col = in[l]; 1751 bcol = col/bs; 1752 cidx = col%bs; 1753 ridx = row%bs; 1754 high = nrow; 1755 low = 0; /* assume unsorted */ 1756 while (high-low > 5) { 1757 t = (low+high)/2; 1758 if (rp[t] > bcol) high = t; 1759 else low = t; 1760 } 1761 for (i=low; i<high; i++) { 1762 if (rp[i] > bcol) break; 1763 if (rp[i] == bcol) { 1764 *v++ = ap[bs2*i+bs*cidx+ridx]; 1765 goto finished; 1766 } 1767 } 1768 *v++ = 0.0; 1769 finished:; 1770 } 1771 } 1772 PetscFunctionReturn(0); 1773 } 1774 1775 #undef __FUNCT__ 1776 #define __FUNCT__ "MatSetValuesBlocked_SeqBAIJ" 1777 PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is) 1778 { 1779 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1780 PetscInt *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,lastcol = -1; 1781 PetscInt *imax=a->imax,*ai=a->i,*ailen=a->ilen; 1782 PetscErrorCode ierr; 1783 PetscInt *aj =a->j,nonew=a->nonew,bs2=a->bs2,bs=A->rmap->bs,stepval; 1784 PetscBool roworiented=a->roworiented; 1785 const PetscScalar *value = v; 1786 MatScalar *ap,*aa = a->a,*bap; 1787 1788 PetscFunctionBegin; 1789 if (roworiented) { 1790 stepval = (n-1)*bs; 1791 } else { 1792 stepval = (m-1)*bs; 1793 } 1794 for (k=0; k<m; k++) { /* loop over added rows */ 1795 row = im[k]; 1796 if (row < 0) continue; 1797 #if defined(PETSC_USE_DEBUG) 1798 if (row >= a->mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Block row index too large %D max %D",row,a->mbs-1); 1799 #endif 1800 rp = aj + ai[row]; 1801 ap = aa + bs2*ai[row]; 1802 rmax = imax[row]; 1803 nrow = ailen[row]; 1804 low = 0; 1805 high = nrow; 1806 for (l=0; l<n; l++) { /* loop over added columns */ 1807 if (in[l] < 0) continue; 1808 #if defined(PETSC_USE_DEBUG) 1809 if (in[l] >= a->nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Block column index too large %D max %D",in[l],a->nbs-1); 1810 #endif 1811 col = in[l]; 1812 if (roworiented) { 1813 value = v + (k*(stepval+bs) + l)*bs; 1814 } else { 1815 value = v + (l*(stepval+bs) + k)*bs; 1816 } 1817 if (col <= lastcol) low = 0; 1818 else high = nrow; 1819 lastcol = col; 1820 while (high-low > 7) { 1821 t = (low+high)/2; 1822 if (rp[t] > col) high = t; 1823 else low = t; 1824 } 1825 for (i=low; i<high; i++) { 1826 if (rp[i] > col) break; 1827 if (rp[i] == col) { 1828 bap = ap + bs2*i; 1829 if (roworiented) { 1830 if (is == ADD_VALUES) { 1831 for (ii=0; ii<bs; ii++,value+=stepval) { 1832 for (jj=ii; jj<bs2; jj+=bs) { 1833 bap[jj] += *value++; 1834 } 1835 } 1836 } else { 1837 for (ii=0; ii<bs; ii++,value+=stepval) { 1838 for (jj=ii; jj<bs2; jj+=bs) { 1839 bap[jj] = *value++; 1840 } 1841 } 1842 } 1843 } else { 1844 if (is == ADD_VALUES) { 1845 for (ii=0; ii<bs; ii++,value+=bs+stepval) { 1846 for (jj=0; jj<bs; jj++) { 1847 bap[jj] += value[jj]; 1848 } 1849 bap += bs; 1850 } 1851 } else { 1852 for (ii=0; ii<bs; ii++,value+=bs+stepval) { 1853 for (jj=0; jj<bs; jj++) { 1854 bap[jj] = value[jj]; 1855 } 1856 bap += bs; 1857 } 1858 } 1859 } 1860 goto noinsert2; 1861 } 1862 } 1863 if (nonew == 1) goto noinsert2; 1864 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new blocked index new nonzero block (%D, %D) in the matrix", row, col); 1865 MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,row,col,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar); 1866 N = nrow++ - 1; high++; 1867 /* shift up all the later entries in this row */ 1868 for (ii=N; ii>=i; ii--) { 1869 rp[ii+1] = rp[ii]; 1870 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); 1871 } 1872 if (N >= i) { 1873 ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr); 1874 } 1875 rp[i] = col; 1876 bap = ap + bs2*i; 1877 if (roworiented) { 1878 for (ii=0; ii<bs; ii++,value+=stepval) { 1879 for (jj=ii; jj<bs2; jj+=bs) { 1880 bap[jj] = *value++; 1881 } 1882 } 1883 } else { 1884 for (ii=0; ii<bs; ii++,value+=stepval) { 1885 for (jj=0; jj<bs; jj++) { 1886 *bap++ = *value++; 1887 } 1888 } 1889 } 1890 noinsert2:; 1891 low = i; 1892 } 1893 ailen[row] = nrow; 1894 } 1895 PetscFunctionReturn(0); 1896 } 1897 1898 #undef __FUNCT__ 1899 #define __FUNCT__ "MatAssemblyEnd_SeqBAIJ" 1900 PetscErrorCode MatAssemblyEnd_SeqBAIJ(Mat A,MatAssemblyType mode) 1901 { 1902 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1903 PetscInt fshift = 0,i,j,*ai = a->i,*aj = a->j,*imax = a->imax; 1904 PetscInt m = A->rmap->N,*ip,N,*ailen = a->ilen; 1905 PetscErrorCode ierr; 1906 PetscInt mbs = a->mbs,bs2 = a->bs2,rmax = 0; 1907 MatScalar *aa = a->a,*ap; 1908 PetscReal ratio=0.6; 1909 1910 PetscFunctionBegin; 1911 if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0); 1912 1913 if (m) rmax = ailen[0]; 1914 for (i=1; i<mbs; i++) { 1915 /* move each row back by the amount of empty slots (fshift) before it*/ 1916 fshift += imax[i-1] - ailen[i-1]; 1917 rmax = PetscMax(rmax,ailen[i]); 1918 if (fshift) { 1919 ip = aj + ai[i]; ap = aa + bs2*ai[i]; 1920 N = ailen[i]; 1921 for (j=0; j<N; j++) { 1922 ip[j-fshift] = ip[j]; 1923 1924 ierr = PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr); 1925 } 1926 } 1927 ai[i] = ai[i-1] + ailen[i-1]; 1928 } 1929 if (mbs) { 1930 fshift += imax[mbs-1] - ailen[mbs-1]; 1931 ai[mbs] = ai[mbs-1] + ailen[mbs-1]; 1932 } 1933 1934 /* reset ilen and imax for each row */ 1935 a->nonzerorowcnt = 0; 1936 for (i=0; i<mbs; i++) { 1937 ailen[i] = imax[i] = ai[i+1] - ai[i]; 1938 a->nonzerorowcnt += ((ai[i+1] - ai[i]) > 0); 1939 } 1940 a->nz = ai[mbs]; 1941 1942 /* diagonals may have moved, so kill the diagonal pointers */ 1943 a->idiagvalid = PETSC_FALSE; 1944 if (fshift && a->diag) { 1945 ierr = PetscFree(a->diag);CHKERRQ(ierr); 1946 ierr = PetscLogObjectMemory((PetscObject)A,-(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 1947 a->diag = 0; 1948 } 1949 if (fshift && a->nounused == -1) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB, "Unused space detected in matrix: %D X %D block size %D, %D unneeded", m, A->cmap->n, A->rmap->bs, fshift*bs2); 1950 ierr = PetscInfo5(A,"Matrix size: %D X %D, block size %D; storage space: %D unneeded, %D used\n",m,A->cmap->n,A->rmap->bs,fshift*bs2,a->nz*bs2);CHKERRQ(ierr); 1951 ierr = PetscInfo1(A,"Number of mallocs during MatSetValues is %D\n",a->reallocs);CHKERRQ(ierr); 1952 ierr = PetscInfo1(A,"Most nonzeros blocks in any row is %D\n",rmax);CHKERRQ(ierr); 1953 1954 A->info.mallocs += a->reallocs; 1955 a->reallocs = 0; 1956 A->info.nz_unneeded = (PetscReal)fshift*bs2; 1957 a->rmax = rmax; 1958 1959 ierr = MatCheckCompressedRow(A,a->nonzerorowcnt,&a->compressedrow,a->i,mbs,ratio);CHKERRQ(ierr); 1960 PetscFunctionReturn(0); 1961 } 1962 1963 /* 1964 This function returns an array of flags which indicate the locations of contiguous 1965 blocks that should be zeroed. for eg: if bs = 3 and is = [0,1,2,3,5,6,7,8,9] 1966 then the resulting sizes = [3,1,1,3,1] correspondig to sets [(0,1,2),(3),(5),(6,7,8),(9)] 1967 Assume: sizes should be long enough to hold all the values. 1968 */ 1969 #undef __FUNCT__ 1970 #define __FUNCT__ "MatZeroRows_SeqBAIJ_Check_Blocks" 1971 static PetscErrorCode MatZeroRows_SeqBAIJ_Check_Blocks(PetscInt idx[],PetscInt n,PetscInt bs,PetscInt sizes[], PetscInt *bs_max) 1972 { 1973 PetscInt i,j,k,row; 1974 PetscBool flg; 1975 1976 PetscFunctionBegin; 1977 for (i=0,j=0; i<n; j++) { 1978 row = idx[i]; 1979 if (row%bs!=0) { /* Not the begining of a block */ 1980 sizes[j] = 1; 1981 i++; 1982 } else if (i+bs > n) { /* complete block doesn't exist (at idx end) */ 1983 sizes[j] = 1; /* Also makes sure atleast 'bs' values exist for next else */ 1984 i++; 1985 } else { /* Begining of the block, so check if the complete block exists */ 1986 flg = PETSC_TRUE; 1987 for (k=1; k<bs; k++) { 1988 if (row+k != idx[i+k]) { /* break in the block */ 1989 flg = PETSC_FALSE; 1990 break; 1991 } 1992 } 1993 if (flg) { /* No break in the bs */ 1994 sizes[j] = bs; 1995 i += bs; 1996 } else { 1997 sizes[j] = 1; 1998 i++; 1999 } 2000 } 2001 } 2002 *bs_max = j; 2003 PetscFunctionReturn(0); 2004 } 2005 2006 #undef __FUNCT__ 2007 #define __FUNCT__ "MatZeroRows_SeqBAIJ" 2008 PetscErrorCode MatZeroRows_SeqBAIJ(Mat A,PetscInt is_n,const PetscInt is_idx[],PetscScalar diag,Vec x, Vec b) 2009 { 2010 Mat_SeqBAIJ *baij=(Mat_SeqBAIJ*)A->data; 2011 PetscErrorCode ierr; 2012 PetscInt i,j,k,count,*rows; 2013 PetscInt bs=A->rmap->bs,bs2=baij->bs2,*sizes,row,bs_max; 2014 PetscScalar zero = 0.0; 2015 MatScalar *aa; 2016 const PetscScalar *xx; 2017 PetscScalar *bb; 2018 2019 PetscFunctionBegin; 2020 /* fix right hand side if needed */ 2021 if (x && b) { 2022 ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr); 2023 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 2024 for (i=0; i<is_n; i++) { 2025 bb[is_idx[i]] = diag*xx[is_idx[i]]; 2026 } 2027 ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr); 2028 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 2029 } 2030 2031 /* Make a copy of the IS and sort it */ 2032 /* allocate memory for rows,sizes */ 2033 ierr = PetscMalloc2(is_n,&rows,2*is_n,&sizes);CHKERRQ(ierr); 2034 2035 /* copy IS values to rows, and sort them */ 2036 for (i=0; i<is_n; i++) rows[i] = is_idx[i]; 2037 ierr = PetscSortInt(is_n,rows);CHKERRQ(ierr); 2038 2039 if (baij->keepnonzeropattern) { 2040 for (i=0; i<is_n; i++) sizes[i] = 1; 2041 bs_max = is_n; 2042 } else { 2043 ierr = MatZeroRows_SeqBAIJ_Check_Blocks(rows,is_n,bs,sizes,&bs_max);CHKERRQ(ierr); 2044 A->nonzerostate++; 2045 } 2046 2047 for (i=0,j=0; i<bs_max; j+=sizes[i],i++) { 2048 row = rows[j]; 2049 if (row < 0 || row > A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"row %D out of range",row); 2050 count = (baij->i[row/bs +1] - baij->i[row/bs])*bs; 2051 aa = ((MatScalar*)(baij->a)) + baij->i[row/bs]*bs2 + (row%bs); 2052 if (sizes[i] == bs && !baij->keepnonzeropattern) { 2053 if (diag != (PetscScalar)0.0) { 2054 if (baij->ilen[row/bs] > 0) { 2055 baij->ilen[row/bs] = 1; 2056 baij->j[baij->i[row/bs]] = row/bs; 2057 2058 ierr = PetscMemzero(aa,count*bs*sizeof(MatScalar));CHKERRQ(ierr); 2059 } 2060 /* Now insert all the diagonal values for this bs */ 2061 for (k=0; k<bs; k++) { 2062 ierr = (*A->ops->setvalues)(A,1,rows+j+k,1,rows+j+k,&diag,INSERT_VALUES);CHKERRQ(ierr); 2063 } 2064 } else { /* (diag == 0.0) */ 2065 baij->ilen[row/bs] = 0; 2066 } /* end (diag == 0.0) */ 2067 } else { /* (sizes[i] != bs) */ 2068 #if defined(PETSC_USE_DEBUG) 2069 if (sizes[i] != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal Error. Value should be 1"); 2070 #endif 2071 for (k=0; k<count; k++) { 2072 aa[0] = zero; 2073 aa += bs; 2074 } 2075 if (diag != (PetscScalar)0.0) { 2076 ierr = (*A->ops->setvalues)(A,1,rows+j,1,rows+j,&diag,INSERT_VALUES);CHKERRQ(ierr); 2077 } 2078 } 2079 } 2080 2081 ierr = PetscFree2(rows,sizes);CHKERRQ(ierr); 2082 ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2083 PetscFunctionReturn(0); 2084 } 2085 2086 #undef __FUNCT__ 2087 #define __FUNCT__ "MatZeroRowsColumns_SeqBAIJ" 2088 PetscErrorCode MatZeroRowsColumns_SeqBAIJ(Mat A,PetscInt is_n,const PetscInt is_idx[],PetscScalar diag,Vec x, Vec b) 2089 { 2090 Mat_SeqBAIJ *baij=(Mat_SeqBAIJ*)A->data; 2091 PetscErrorCode ierr; 2092 PetscInt i,j,k,count; 2093 PetscInt bs =A->rmap->bs,bs2=baij->bs2,row,col; 2094 PetscScalar zero = 0.0; 2095 MatScalar *aa; 2096 const PetscScalar *xx; 2097 PetscScalar *bb; 2098 PetscBool *zeroed,vecs = PETSC_FALSE; 2099 2100 PetscFunctionBegin; 2101 /* fix right hand side if needed */ 2102 if (x && b) { 2103 ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr); 2104 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 2105 vecs = PETSC_TRUE; 2106 } 2107 2108 /* zero the columns */ 2109 ierr = PetscCalloc1(A->rmap->n,&zeroed);CHKERRQ(ierr); 2110 for (i=0; i<is_n; i++) { 2111 if (is_idx[i] < 0 || is_idx[i] >= A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"row %D out of range",is_idx[i]); 2112 zeroed[is_idx[i]] = PETSC_TRUE; 2113 } 2114 for (i=0; i<A->rmap->N; i++) { 2115 if (!zeroed[i]) { 2116 row = i/bs; 2117 for (j=baij->i[row]; j<baij->i[row+1]; j++) { 2118 for (k=0; k<bs; k++) { 2119 col = bs*baij->j[j] + k; 2120 if (zeroed[col]) { 2121 aa = ((MatScalar*)(baij->a)) + j*bs2 + (i%bs) + bs*k; 2122 if (vecs) bb[i] -= aa[0]*xx[col]; 2123 aa[0] = 0.0; 2124 } 2125 } 2126 } 2127 } else if (vecs) bb[i] = diag*xx[i]; 2128 } 2129 ierr = PetscFree(zeroed);CHKERRQ(ierr); 2130 if (vecs) { 2131 ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr); 2132 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 2133 } 2134 2135 /* zero the rows */ 2136 for (i=0; i<is_n; i++) { 2137 row = is_idx[i]; 2138 count = (baij->i[row/bs +1] - baij->i[row/bs])*bs; 2139 aa = ((MatScalar*)(baij->a)) + baij->i[row/bs]*bs2 + (row%bs); 2140 for (k=0; k<count; k++) { 2141 aa[0] = zero; 2142 aa += bs; 2143 } 2144 if (diag != (PetscScalar)0.0) { 2145 ierr = (*A->ops->setvalues)(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr); 2146 } 2147 } 2148 ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2149 PetscFunctionReturn(0); 2150 } 2151 2152 #undef __FUNCT__ 2153 #define __FUNCT__ "MatSetValues_SeqBAIJ" 2154 PetscErrorCode MatSetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is) 2155 { 2156 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2157 PetscInt *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,lastcol = -1; 2158 PetscInt *imax=a->imax,*ai=a->i,*ailen=a->ilen; 2159 PetscInt *aj =a->j,nonew=a->nonew,bs=A->rmap->bs,brow,bcol; 2160 PetscErrorCode ierr; 2161 PetscInt ridx,cidx,bs2=a->bs2; 2162 PetscBool roworiented=a->roworiented; 2163 MatScalar *ap,value,*aa=a->a,*bap; 2164 2165 PetscFunctionBegin; 2166 for (k=0; k<m; k++) { /* loop over added rows */ 2167 row = im[k]; 2168 brow = row/bs; 2169 if (row < 0) continue; 2170 #if defined(PETSC_USE_DEBUG) 2171 if (row >= A->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,A->rmap->N-1); 2172 #endif 2173 rp = aj + ai[brow]; 2174 ap = aa + bs2*ai[brow]; 2175 rmax = imax[brow]; 2176 nrow = ailen[brow]; 2177 low = 0; 2178 high = nrow; 2179 for (l=0; l<n; l++) { /* loop over added columns */ 2180 if (in[l] < 0) continue; 2181 #if defined(PETSC_USE_DEBUG) 2182 if (in[l] >= A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],A->cmap->n-1); 2183 #endif 2184 col = in[l]; bcol = col/bs; 2185 ridx = row % bs; cidx = col % bs; 2186 if (roworiented) { 2187 value = v[l + k*n]; 2188 } else { 2189 value = v[k + l*m]; 2190 } 2191 if (col <= lastcol) low = 0; else high = nrow; 2192 lastcol = col; 2193 while (high-low > 7) { 2194 t = (low+high)/2; 2195 if (rp[t] > bcol) high = t; 2196 else low = t; 2197 } 2198 for (i=low; i<high; i++) { 2199 if (rp[i] > bcol) break; 2200 if (rp[i] == bcol) { 2201 bap = ap + bs2*i + bs*cidx + ridx; 2202 if (is == ADD_VALUES) *bap += value; 2203 else *bap = value; 2204 goto noinsert1; 2205 } 2206 } 2207 if (nonew == 1) goto noinsert1; 2208 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col); 2209 MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar); 2210 N = nrow++ - 1; high++; 2211 /* shift up all the later entries in this row */ 2212 for (ii=N; ii>=i; ii--) { 2213 rp[ii+1] = rp[ii]; 2214 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); 2215 } 2216 if (N>=i) { 2217 ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr); 2218 } 2219 rp[i] = bcol; 2220 ap[bs2*i + bs*cidx + ridx] = value; 2221 a->nz++; 2222 A->nonzerostate++; 2223 noinsert1:; 2224 low = i; 2225 } 2226 ailen[brow] = nrow; 2227 } 2228 PetscFunctionReturn(0); 2229 } 2230 2231 #undef __FUNCT__ 2232 #define __FUNCT__ "MatILUFactor_SeqBAIJ" 2233 PetscErrorCode MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,const MatFactorInfo *info) 2234 { 2235 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)inA->data; 2236 Mat outA; 2237 PetscErrorCode ierr; 2238 PetscBool row_identity,col_identity; 2239 2240 PetscFunctionBegin; 2241 if (info->levels != 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only levels = 0 supported for in-place ILU"); 2242 ierr = ISIdentity(row,&row_identity);CHKERRQ(ierr); 2243 ierr = ISIdentity(col,&col_identity);CHKERRQ(ierr); 2244 if (!row_identity || !col_identity) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for in-place ILU"); 2245 2246 outA = inA; 2247 inA->factortype = MAT_FACTOR_LU; 2248 2249 ierr = MatMarkDiagonal_SeqBAIJ(inA);CHKERRQ(ierr); 2250 2251 ierr = PetscObjectReference((PetscObject)row);CHKERRQ(ierr); 2252 ierr = ISDestroy(&a->row);CHKERRQ(ierr); 2253 a->row = row; 2254 ierr = PetscObjectReference((PetscObject)col);CHKERRQ(ierr); 2255 ierr = ISDestroy(&a->col);CHKERRQ(ierr); 2256 a->col = col; 2257 2258 /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */ 2259 ierr = ISDestroy(&a->icol);CHKERRQ(ierr); 2260 ierr = ISInvertPermutation(col,PETSC_DECIDE,&a->icol);CHKERRQ(ierr); 2261 ierr = PetscLogObjectParent((PetscObject)inA,(PetscObject)a->icol);CHKERRQ(ierr); 2262 2263 ierr = MatSeqBAIJSetNumericFactorization_inplace(inA,(PetscBool)(row_identity && col_identity));CHKERRQ(ierr); 2264 if (!a->solve_work) { 2265 ierr = PetscMalloc1(inA->rmap->N+inA->rmap->bs,&a->solve_work);CHKERRQ(ierr); 2266 ierr = PetscLogObjectMemory((PetscObject)inA,(inA->rmap->N+inA->rmap->bs)*sizeof(PetscScalar));CHKERRQ(ierr); 2267 } 2268 ierr = MatLUFactorNumeric(outA,inA,info);CHKERRQ(ierr); 2269 PetscFunctionReturn(0); 2270 } 2271 2272 #undef __FUNCT__ 2273 #define __FUNCT__ "MatSeqBAIJSetColumnIndices_SeqBAIJ" 2274 PetscErrorCode MatSeqBAIJSetColumnIndices_SeqBAIJ(Mat mat,PetscInt *indices) 2275 { 2276 Mat_SeqBAIJ *baij = (Mat_SeqBAIJ*)mat->data; 2277 PetscInt i,nz,mbs; 2278 2279 PetscFunctionBegin; 2280 nz = baij->maxnz; 2281 mbs = baij->mbs; 2282 for (i=0; i<nz; i++) { 2283 baij->j[i] = indices[i]; 2284 } 2285 baij->nz = nz; 2286 for (i=0; i<mbs; i++) { 2287 baij->ilen[i] = baij->imax[i]; 2288 } 2289 PetscFunctionReturn(0); 2290 } 2291 2292 #undef __FUNCT__ 2293 #define __FUNCT__ "MatSeqBAIJSetColumnIndices" 2294 /*@ 2295 MatSeqBAIJSetColumnIndices - Set the column indices for all the rows 2296 in the matrix. 2297 2298 Input Parameters: 2299 + mat - the SeqBAIJ matrix 2300 - indices - the column indices 2301 2302 Level: advanced 2303 2304 Notes: 2305 This can be called if you have precomputed the nonzero structure of the 2306 matrix and want to provide it to the matrix object to improve the performance 2307 of the MatSetValues() operation. 2308 2309 You MUST have set the correct numbers of nonzeros per row in the call to 2310 MatCreateSeqBAIJ(), and the columns indices MUST be sorted. 2311 2312 MUST be called before any calls to MatSetValues(); 2313 2314 @*/ 2315 PetscErrorCode MatSeqBAIJSetColumnIndices(Mat mat,PetscInt *indices) 2316 { 2317 PetscErrorCode ierr; 2318 2319 PetscFunctionBegin; 2320 PetscValidHeaderSpecific(mat,MAT_CLASSID,1); 2321 PetscValidPointer(indices,2); 2322 ierr = PetscUseMethod(mat,"MatSeqBAIJSetColumnIndices_C",(Mat,PetscInt*),(mat,indices));CHKERRQ(ierr); 2323 PetscFunctionReturn(0); 2324 } 2325 2326 #undef __FUNCT__ 2327 #define __FUNCT__ "MatGetRowMaxAbs_SeqBAIJ" 2328 PetscErrorCode MatGetRowMaxAbs_SeqBAIJ(Mat A,Vec v,PetscInt idx[]) 2329 { 2330 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2331 PetscErrorCode ierr; 2332 PetscInt i,j,n,row,bs,*ai,*aj,mbs; 2333 PetscReal atmp; 2334 PetscScalar *x,zero = 0.0; 2335 MatScalar *aa; 2336 PetscInt ncols,brow,krow,kcol; 2337 2338 PetscFunctionBegin; 2339 if (A->factortype) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 2340 bs = A->rmap->bs; 2341 aa = a->a; 2342 ai = a->i; 2343 aj = a->j; 2344 mbs = a->mbs; 2345 2346 ierr = VecSet(v,zero);CHKERRQ(ierr); 2347 ierr = VecGetArray(v,&x);CHKERRQ(ierr); 2348 ierr = VecGetLocalSize(v,&n);CHKERRQ(ierr); 2349 if (n != A->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Nonconforming matrix and vector"); 2350 for (i=0; i<mbs; i++) { 2351 ncols = ai[1] - ai[0]; ai++; 2352 brow = bs*i; 2353 for (j=0; j<ncols; j++) { 2354 for (kcol=0; kcol<bs; kcol++) { 2355 for (krow=0; krow<bs; krow++) { 2356 atmp = PetscAbsScalar(*aa);aa++; 2357 row = brow + krow; /* row index */ 2358 if (PetscAbsScalar(x[row]) < atmp) {x[row] = atmp; if (idx) idx[row] = bs*(*aj) + kcol;} 2359 } 2360 } 2361 aj++; 2362 } 2363 } 2364 ierr = VecRestoreArray(v,&x);CHKERRQ(ierr); 2365 PetscFunctionReturn(0); 2366 } 2367 2368 #undef __FUNCT__ 2369 #define __FUNCT__ "MatCopy_SeqBAIJ" 2370 PetscErrorCode MatCopy_SeqBAIJ(Mat A,Mat B,MatStructure str) 2371 { 2372 PetscErrorCode ierr; 2373 2374 PetscFunctionBegin; 2375 /* If the two matrices have the same copy implementation, use fast copy. */ 2376 if (str == SAME_NONZERO_PATTERN && (A->ops->copy == B->ops->copy)) { 2377 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2378 Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)B->data; 2379 PetscInt ambs=a->mbs,bmbs=b->mbs,abs=A->rmap->bs,bbs=B->rmap->bs,bs2=abs*abs; 2380 2381 if (a->i[ambs] != b->i[bmbs]) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Number of nonzero blocks in matrices A %D and B %D are different",a->i[ambs],b->i[bmbs]); 2382 if (abs != bbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Block size A %D and B %D are different",abs,bbs); 2383 ierr = PetscMemcpy(b->a,a->a,(bs2*a->i[ambs])*sizeof(PetscScalar));CHKERRQ(ierr); 2384 } else { 2385 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 2386 } 2387 PetscFunctionReturn(0); 2388 } 2389 2390 #undef __FUNCT__ 2391 #define __FUNCT__ "MatSetUp_SeqBAIJ" 2392 PetscErrorCode MatSetUp_SeqBAIJ(Mat A) 2393 { 2394 PetscErrorCode ierr; 2395 2396 PetscFunctionBegin; 2397 ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(A,A->rmap->bs,PETSC_DEFAULT,0);CHKERRQ(ierr); 2398 PetscFunctionReturn(0); 2399 } 2400 2401 #undef __FUNCT__ 2402 #define __FUNCT__ "MatSeqBAIJGetArray_SeqBAIJ" 2403 PetscErrorCode MatSeqBAIJGetArray_SeqBAIJ(Mat A,PetscScalar *array[]) 2404 { 2405 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2406 2407 PetscFunctionBegin; 2408 *array = a->a; 2409 PetscFunctionReturn(0); 2410 } 2411 2412 #undef __FUNCT__ 2413 #define __FUNCT__ "MatSeqBAIJRestoreArray_SeqBAIJ" 2414 PetscErrorCode MatSeqBAIJRestoreArray_SeqBAIJ(Mat A,PetscScalar *array[]) 2415 { 2416 PetscFunctionBegin; 2417 PetscFunctionReturn(0); 2418 } 2419 2420 #undef __FUNCT__ 2421 #define __FUNCT__ "MatAXPYGetPreallocation_SeqBAIJ" 2422 PetscErrorCode MatAXPYGetPreallocation_SeqBAIJ(Mat Y,Mat X,PetscInt *nnz) 2423 { 2424 PetscInt bs = Y->rmap->bs,mbs = Y->rmap->N/bs; 2425 Mat_SeqBAIJ *x = (Mat_SeqBAIJ*)X->data; 2426 Mat_SeqBAIJ *y = (Mat_SeqBAIJ*)Y->data; 2427 PetscErrorCode ierr; 2428 2429 PetscFunctionBegin; 2430 /* Set the number of nonzeros in the new matrix */ 2431 ierr = MatAXPYGetPreallocation_SeqX_private(mbs,x->i,x->j,y->i,y->j,nnz);CHKERRQ(ierr); 2432 PetscFunctionReturn(0); 2433 } 2434 2435 #undef __FUNCT__ 2436 #define __FUNCT__ "MatAXPY_SeqBAIJ" 2437 PetscErrorCode MatAXPY_SeqBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 2438 { 2439 Mat_SeqBAIJ *x = (Mat_SeqBAIJ*)X->data,*y = (Mat_SeqBAIJ*)Y->data; 2440 PetscErrorCode ierr; 2441 PetscInt bs=Y->rmap->bs,bs2=bs*bs; 2442 PetscBLASInt one=1; 2443 2444 PetscFunctionBegin; 2445 if (str == SAME_NONZERO_PATTERN) { 2446 PetscScalar alpha = a; 2447 PetscBLASInt bnz; 2448 ierr = PetscBLASIntCast(x->nz*bs2,&bnz);CHKERRQ(ierr); 2449 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one)); 2450 ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr); 2451 } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */ 2452 ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr); 2453 } else { 2454 Mat B; 2455 PetscInt *nnz; 2456 if (bs != X->rmap->bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrices must have same block size"); 2457 ierr = PetscMalloc1(Y->rmap->N,&nnz);CHKERRQ(ierr); 2458 ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr); 2459 ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr); 2460 ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr); 2461 ierr = MatSetBlockSizesFromMats(B,Y,Y);CHKERRQ(ierr); 2462 ierr = MatSetType(B,(MatType) ((PetscObject)Y)->type_name);CHKERRQ(ierr); 2463 ierr = MatAXPYGetPreallocation_SeqBAIJ(Y,X,nnz);CHKERRQ(ierr); 2464 ierr = MatSeqBAIJSetPreallocation(B,bs,0,nnz);CHKERRQ(ierr); 2465 ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr); 2466 ierr = MatHeaderReplace(Y,&B);CHKERRQ(ierr); 2467 ierr = PetscFree(nnz);CHKERRQ(ierr); 2468 } 2469 PetscFunctionReturn(0); 2470 } 2471 2472 #undef __FUNCT__ 2473 #define __FUNCT__ "MatRealPart_SeqBAIJ" 2474 PetscErrorCode MatRealPart_SeqBAIJ(Mat A) 2475 { 2476 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2477 PetscInt i,nz = a->bs2*a->i[a->mbs]; 2478 MatScalar *aa = a->a; 2479 2480 PetscFunctionBegin; 2481 for (i=0; i<nz; i++) aa[i] = PetscRealPart(aa[i]); 2482 PetscFunctionReturn(0); 2483 } 2484 2485 #undef __FUNCT__ 2486 #define __FUNCT__ "MatImaginaryPart_SeqBAIJ" 2487 PetscErrorCode MatImaginaryPart_SeqBAIJ(Mat A) 2488 { 2489 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2490 PetscInt i,nz = a->bs2*a->i[a->mbs]; 2491 MatScalar *aa = a->a; 2492 2493 PetscFunctionBegin; 2494 for (i=0; i<nz; i++) aa[i] = PetscImaginaryPart(aa[i]); 2495 PetscFunctionReturn(0); 2496 } 2497 2498 #undef __FUNCT__ 2499 #define __FUNCT__ "MatGetColumnIJ_SeqBAIJ" 2500 /* 2501 Code almost idential to MatGetColumnIJ_SeqAIJ() should share common code 2502 */ 2503 PetscErrorCode MatGetColumnIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *nn,const PetscInt *ia[],const PetscInt *ja[],PetscBool *done) 2504 { 2505 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2506 PetscErrorCode ierr; 2507 PetscInt bs = A->rmap->bs,i,*collengths,*cia,*cja,n = A->cmap->n/bs,m = A->rmap->n/bs; 2508 PetscInt nz = a->i[m],row,*jj,mr,col; 2509 2510 PetscFunctionBegin; 2511 *nn = n; 2512 if (!ia) PetscFunctionReturn(0); 2513 if (symmetric) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not for BAIJ matrices"); 2514 else { 2515 ierr = PetscCalloc1(n+1,&collengths);CHKERRQ(ierr); 2516 ierr = PetscMalloc1(n+1,&cia);CHKERRQ(ierr); 2517 ierr = PetscMalloc1(nz+1,&cja);CHKERRQ(ierr); 2518 jj = a->j; 2519 for (i=0; i<nz; i++) { 2520 collengths[jj[i]]++; 2521 } 2522 cia[0] = oshift; 2523 for (i=0; i<n; i++) { 2524 cia[i+1] = cia[i] + collengths[i]; 2525 } 2526 ierr = PetscMemzero(collengths,n*sizeof(PetscInt));CHKERRQ(ierr); 2527 jj = a->j; 2528 for (row=0; row<m; row++) { 2529 mr = a->i[row+1] - a->i[row]; 2530 for (i=0; i<mr; i++) { 2531 col = *jj++; 2532 2533 cja[cia[col] + collengths[col]++ - oshift] = row + oshift; 2534 } 2535 } 2536 ierr = PetscFree(collengths);CHKERRQ(ierr); 2537 *ia = cia; *ja = cja; 2538 } 2539 PetscFunctionReturn(0); 2540 } 2541 2542 #undef __FUNCT__ 2543 #define __FUNCT__ "MatRestoreColumnIJ_SeqBAIJ" 2544 PetscErrorCode MatRestoreColumnIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *n,const PetscInt *ia[],const PetscInt *ja[],PetscBool *done) 2545 { 2546 PetscErrorCode ierr; 2547 2548 PetscFunctionBegin; 2549 if (!ia) PetscFunctionReturn(0); 2550 ierr = PetscFree(*ia);CHKERRQ(ierr); 2551 ierr = PetscFree(*ja);CHKERRQ(ierr); 2552 PetscFunctionReturn(0); 2553 } 2554 2555 /* 2556 MatGetColumnIJ_SeqBAIJ_Color() and MatRestoreColumnIJ_SeqBAIJ_Color() are customized from 2557 MatGetColumnIJ_SeqBAIJ() and MatRestoreColumnIJ_SeqBAIJ() by adding an output 2558 spidx[], index of a->a, to be used in MatTransposeColoringCreate() and MatFDColoringCreate() 2559 */ 2560 #undef __FUNCT__ 2561 #define __FUNCT__ "MatGetColumnIJ_SeqBAIJ_Color" 2562 PetscErrorCode MatGetColumnIJ_SeqBAIJ_Color(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *nn,const PetscInt *ia[],const PetscInt *ja[],PetscInt *spidx[],PetscBool *done) 2563 { 2564 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2565 PetscErrorCode ierr; 2566 PetscInt i,*collengths,*cia,*cja,n=a->nbs,m=a->mbs; 2567 PetscInt nz = a->i[m],row,*jj,mr,col; 2568 PetscInt *cspidx; 2569 2570 PetscFunctionBegin; 2571 *nn = n; 2572 if (!ia) PetscFunctionReturn(0); 2573 2574 ierr = PetscCalloc1(n+1,&collengths);CHKERRQ(ierr); 2575 ierr = PetscMalloc1(n+1,&cia);CHKERRQ(ierr); 2576 ierr = PetscMalloc1(nz+1,&cja);CHKERRQ(ierr); 2577 ierr = PetscMalloc1(nz+1,&cspidx);CHKERRQ(ierr); 2578 jj = a->j; 2579 for (i=0; i<nz; i++) { 2580 collengths[jj[i]]++; 2581 } 2582 cia[0] = oshift; 2583 for (i=0; i<n; i++) { 2584 cia[i+1] = cia[i] + collengths[i]; 2585 } 2586 ierr = PetscMemzero(collengths,n*sizeof(PetscInt));CHKERRQ(ierr); 2587 jj = a->j; 2588 for (row=0; row<m; row++) { 2589 mr = a->i[row+1] - a->i[row]; 2590 for (i=0; i<mr; i++) { 2591 col = *jj++; 2592 cspidx[cia[col] + collengths[col] - oshift] = a->i[row] + i; /* index of a->j */ 2593 cja[cia[col] + collengths[col]++ - oshift] = row + oshift; 2594 } 2595 } 2596 ierr = PetscFree(collengths);CHKERRQ(ierr); 2597 *ia = cia; *ja = cja; 2598 *spidx = cspidx; 2599 PetscFunctionReturn(0); 2600 } 2601 2602 #undef __FUNCT__ 2603 #define __FUNCT__ "MatRestoreColumnIJ_SeqBAIJ_Color" 2604 PetscErrorCode MatRestoreColumnIJ_SeqBAIJ_Color(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *n,const PetscInt *ia[],const PetscInt *ja[],PetscInt *spidx[],PetscBool *done) 2605 { 2606 PetscErrorCode ierr; 2607 2608 PetscFunctionBegin; 2609 ierr = MatRestoreColumnIJ_SeqBAIJ(A,oshift,symmetric,inodecompressed,n,ia,ja,done);CHKERRQ(ierr); 2610 ierr = PetscFree(*spidx);CHKERRQ(ierr); 2611 PetscFunctionReturn(0); 2612 } 2613 2614 #undef __FUNCT__ 2615 #define __FUNCT__ "MatShift_SeqBAIJ" 2616 PetscErrorCode MatShift_SeqBAIJ(Mat Y,PetscScalar a) 2617 { 2618 PetscErrorCode ierr; 2619 Mat_SeqBAIJ *aij = (Mat_SeqBAIJ*)Y->data; 2620 2621 PetscFunctionBegin; 2622 if (!Y->preallocated || !aij->nz) { 2623 ierr = MatSeqBAIJSetPreallocation(Y,Y->rmap->bs,1,NULL);CHKERRQ(ierr); 2624 } 2625 ierr = MatShift_Basic(Y,a);CHKERRQ(ierr); 2626 PetscFunctionReturn(0); 2627 } 2628 2629 /* -------------------------------------------------------------------*/ 2630 static struct _MatOps MatOps_Values = {MatSetValues_SeqBAIJ, 2631 MatGetRow_SeqBAIJ, 2632 MatRestoreRow_SeqBAIJ, 2633 MatMult_SeqBAIJ_N, 2634 /* 4*/ MatMultAdd_SeqBAIJ_N, 2635 MatMultTranspose_SeqBAIJ, 2636 MatMultTransposeAdd_SeqBAIJ, 2637 0, 2638 0, 2639 0, 2640 /* 10*/ 0, 2641 MatLUFactor_SeqBAIJ, 2642 0, 2643 0, 2644 MatTranspose_SeqBAIJ, 2645 /* 15*/ MatGetInfo_SeqBAIJ, 2646 MatEqual_SeqBAIJ, 2647 MatGetDiagonal_SeqBAIJ, 2648 MatDiagonalScale_SeqBAIJ, 2649 MatNorm_SeqBAIJ, 2650 /* 20*/ 0, 2651 MatAssemblyEnd_SeqBAIJ, 2652 MatSetOption_SeqBAIJ, 2653 MatZeroEntries_SeqBAIJ, 2654 /* 24*/ MatZeroRows_SeqBAIJ, 2655 0, 2656 0, 2657 0, 2658 0, 2659 /* 29*/ MatSetUp_SeqBAIJ, 2660 0, 2661 0, 2662 0, 2663 0, 2664 /* 34*/ MatDuplicate_SeqBAIJ, 2665 0, 2666 0, 2667 MatILUFactor_SeqBAIJ, 2668 0, 2669 /* 39*/ MatAXPY_SeqBAIJ, 2670 MatGetSubMatrices_SeqBAIJ, 2671 MatIncreaseOverlap_SeqBAIJ, 2672 MatGetValues_SeqBAIJ, 2673 MatCopy_SeqBAIJ, 2674 /* 44*/ 0, 2675 MatScale_SeqBAIJ, 2676 MatShift_SeqBAIJ, 2677 0, 2678 MatZeroRowsColumns_SeqBAIJ, 2679 /* 49*/ 0, 2680 MatGetRowIJ_SeqBAIJ, 2681 MatRestoreRowIJ_SeqBAIJ, 2682 MatGetColumnIJ_SeqBAIJ, 2683 MatRestoreColumnIJ_SeqBAIJ, 2684 /* 54*/ MatFDColoringCreate_SeqXAIJ, 2685 0, 2686 0, 2687 0, 2688 MatSetValuesBlocked_SeqBAIJ, 2689 /* 59*/ MatGetSubMatrix_SeqBAIJ, 2690 MatDestroy_SeqBAIJ, 2691 MatView_SeqBAIJ, 2692 0, 2693 0, 2694 /* 64*/ 0, 2695 0, 2696 0, 2697 0, 2698 0, 2699 /* 69*/ MatGetRowMaxAbs_SeqBAIJ, 2700 0, 2701 MatConvert_Basic, 2702 0, 2703 0, 2704 /* 74*/ 0, 2705 MatFDColoringApply_BAIJ, 2706 0, 2707 0, 2708 0, 2709 /* 79*/ 0, 2710 0, 2711 0, 2712 0, 2713 MatLoad_SeqBAIJ, 2714 /* 84*/ 0, 2715 0, 2716 0, 2717 0, 2718 0, 2719 /* 89*/ 0, 2720 0, 2721 0, 2722 0, 2723 0, 2724 /* 94*/ 0, 2725 0, 2726 0, 2727 0, 2728 0, 2729 /* 99*/ 0, 2730 0, 2731 0, 2732 0, 2733 0, 2734 /*104*/ 0, 2735 MatRealPart_SeqBAIJ, 2736 MatImaginaryPart_SeqBAIJ, 2737 0, 2738 0, 2739 /*109*/ 0, 2740 0, 2741 0, 2742 0, 2743 MatMissingDiagonal_SeqBAIJ, 2744 /*114*/ 0, 2745 0, 2746 0, 2747 0, 2748 0, 2749 /*119*/ 0, 2750 0, 2751 MatMultHermitianTranspose_SeqBAIJ, 2752 MatMultHermitianTransposeAdd_SeqBAIJ, 2753 0, 2754 /*124*/ 0, 2755 0, 2756 MatInvertBlockDiagonal_SeqBAIJ, 2757 0, 2758 0, 2759 /*129*/ 0, 2760 0, 2761 0, 2762 0, 2763 0, 2764 /*134*/ 0, 2765 0, 2766 0, 2767 0, 2768 0, 2769 /*139*/ 0, 2770 0, 2771 0, 2772 MatFDColoringSetUp_SeqXAIJ, 2773 0, 2774 /*144*/MatCreateMPIMatConcatenateSeqMat_SeqBAIJ 2775 }; 2776 2777 #undef __FUNCT__ 2778 #define __FUNCT__ "MatStoreValues_SeqBAIJ" 2779 PetscErrorCode MatStoreValues_SeqBAIJ(Mat mat) 2780 { 2781 Mat_SeqBAIJ *aij = (Mat_SeqBAIJ*)mat->data; 2782 PetscInt nz = aij->i[aij->mbs]*aij->bs2; 2783 PetscErrorCode ierr; 2784 2785 PetscFunctionBegin; 2786 if (aij->nonew != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first"); 2787 2788 /* allocate space for values if not already there */ 2789 if (!aij->saved_values) { 2790 ierr = PetscMalloc1(nz+1,&aij->saved_values);CHKERRQ(ierr); 2791 ierr = PetscLogObjectMemory((PetscObject)mat,(nz+1)*sizeof(PetscScalar));CHKERRQ(ierr); 2792 } 2793 2794 /* copy values over */ 2795 ierr = PetscMemcpy(aij->saved_values,aij->a,nz*sizeof(PetscScalar));CHKERRQ(ierr); 2796 PetscFunctionReturn(0); 2797 } 2798 2799 #undef __FUNCT__ 2800 #define __FUNCT__ "MatRetrieveValues_SeqBAIJ" 2801 PetscErrorCode MatRetrieveValues_SeqBAIJ(Mat mat) 2802 { 2803 Mat_SeqBAIJ *aij = (Mat_SeqBAIJ*)mat->data; 2804 PetscErrorCode ierr; 2805 PetscInt nz = aij->i[aij->mbs]*aij->bs2; 2806 2807 PetscFunctionBegin; 2808 if (aij->nonew != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first"); 2809 if (!aij->saved_values) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatStoreValues(A);first"); 2810 2811 /* copy values over */ 2812 ierr = PetscMemcpy(aij->a,aij->saved_values,nz*sizeof(PetscScalar));CHKERRQ(ierr); 2813 PetscFunctionReturn(0); 2814 } 2815 2816 PETSC_EXTERN PetscErrorCode MatConvert_SeqBAIJ_SeqAIJ(Mat, MatType,MatReuse,Mat*); 2817 PETSC_EXTERN PetscErrorCode MatConvert_SeqBAIJ_SeqSBAIJ(Mat, MatType,MatReuse,Mat*); 2818 2819 #undef __FUNCT__ 2820 #define __FUNCT__ "MatSeqBAIJSetPreallocation_SeqBAIJ" 2821 PetscErrorCode MatSeqBAIJSetPreallocation_SeqBAIJ(Mat B,PetscInt bs,PetscInt nz,PetscInt *nnz) 2822 { 2823 Mat_SeqBAIJ *b; 2824 PetscErrorCode ierr; 2825 PetscInt i,mbs,nbs,bs2; 2826 PetscBool flg = PETSC_FALSE,skipallocation = PETSC_FALSE,realalloc = PETSC_FALSE; 2827 2828 PetscFunctionBegin; 2829 if (nz >= 0 || nnz) realalloc = PETSC_TRUE; 2830 if (nz == MAT_SKIP_ALLOCATION) { 2831 skipallocation = PETSC_TRUE; 2832 nz = 0; 2833 } 2834 2835 ierr = MatSetBlockSize(B,PetscAbs(bs));CHKERRQ(ierr); 2836 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 2837 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 2838 ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr); 2839 2840 B->preallocated = PETSC_TRUE; 2841 2842 mbs = B->rmap->n/bs; 2843 nbs = B->cmap->n/bs; 2844 bs2 = bs*bs; 2845 2846 if (mbs*bs!=B->rmap->n || nbs*bs!=B->cmap->n) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Number rows %D, cols %D must be divisible by blocksize %D",B->rmap->N,B->cmap->n,bs); 2847 2848 if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5; 2849 if (nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nz cannot be less than 0: value %D",nz); 2850 if (nnz) { 2851 for (i=0; i<mbs; i++) { 2852 if (nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be less than 0: local row %D value %D",i,nnz[i]); 2853 if (nnz[i] > nbs) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be greater than block row length: local row %D value %D rowlength %D",i,nnz[i],nbs); 2854 } 2855 } 2856 2857 b = (Mat_SeqBAIJ*)B->data; 2858 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)B),NULL,"Optimize options for SEQBAIJ matrix 2 ","Mat");CHKERRQ(ierr); 2859 ierr = PetscOptionsBool("-mat_no_unroll","Do not optimize for block size (slow)",NULL,flg,&flg,NULL);CHKERRQ(ierr); 2860 ierr = PetscOptionsEnd();CHKERRQ(ierr); 2861 2862 if (!flg) { 2863 switch (bs) { 2864 case 1: 2865 B->ops->mult = MatMult_SeqBAIJ_1; 2866 B->ops->multadd = MatMultAdd_SeqBAIJ_1; 2867 break; 2868 case 2: 2869 B->ops->mult = MatMult_SeqBAIJ_2; 2870 B->ops->multadd = MatMultAdd_SeqBAIJ_2; 2871 break; 2872 case 3: 2873 B->ops->mult = MatMult_SeqBAIJ_3; 2874 B->ops->multadd = MatMultAdd_SeqBAIJ_3; 2875 break; 2876 case 4: 2877 B->ops->mult = MatMult_SeqBAIJ_4; 2878 B->ops->multadd = MatMultAdd_SeqBAIJ_4; 2879 break; 2880 case 5: 2881 B->ops->mult = MatMult_SeqBAIJ_5; 2882 B->ops->multadd = MatMultAdd_SeqBAIJ_5; 2883 break; 2884 case 6: 2885 B->ops->mult = MatMult_SeqBAIJ_6; 2886 B->ops->multadd = MatMultAdd_SeqBAIJ_6; 2887 break; 2888 case 7: 2889 B->ops->mult = MatMult_SeqBAIJ_7; 2890 B->ops->multadd = MatMultAdd_SeqBAIJ_7; 2891 break; 2892 case 15: 2893 B->ops->mult = MatMult_SeqBAIJ_15_ver1; 2894 B->ops->multadd = MatMultAdd_SeqBAIJ_N; 2895 break; 2896 default: 2897 B->ops->mult = MatMult_SeqBAIJ_N; 2898 B->ops->multadd = MatMultAdd_SeqBAIJ_N; 2899 break; 2900 } 2901 } 2902 B->ops->sor = MatSOR_SeqBAIJ; 2903 b->mbs = mbs; 2904 b->nbs = nbs; 2905 if (!skipallocation) { 2906 if (!b->imax) { 2907 ierr = PetscMalloc2(mbs,&b->imax,mbs,&b->ilen);CHKERRQ(ierr); 2908 ierr = PetscLogObjectMemory((PetscObject)B,2*mbs*sizeof(PetscInt));CHKERRQ(ierr); 2909 2910 b->free_imax_ilen = PETSC_TRUE; 2911 } 2912 /* b->ilen will count nonzeros in each block row so far. */ 2913 for (i=0; i<mbs; i++) b->ilen[i] = 0; 2914 if (!nnz) { 2915 if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5; 2916 else if (nz < 0) nz = 1; 2917 for (i=0; i<mbs; i++) b->imax[i] = nz; 2918 nz = nz*mbs; 2919 } else { 2920 nz = 0; 2921 for (i=0; i<mbs; i++) {b->imax[i] = nnz[i]; nz += nnz[i];} 2922 } 2923 2924 /* allocate the matrix space */ 2925 ierr = MatSeqXAIJFreeAIJ(B,&b->a,&b->j,&b->i);CHKERRQ(ierr); 2926 ierr = PetscMalloc3(bs2*nz,&b->a,nz,&b->j,B->rmap->N+1,&b->i);CHKERRQ(ierr); 2927 ierr = PetscLogObjectMemory((PetscObject)B,(B->rmap->N+1)*sizeof(PetscInt)+nz*(bs2*sizeof(PetscScalar)+sizeof(PetscInt)));CHKERRQ(ierr); 2928 ierr = PetscMemzero(b->a,nz*bs2*sizeof(MatScalar));CHKERRQ(ierr); 2929 ierr = PetscMemzero(b->j,nz*sizeof(PetscInt));CHKERRQ(ierr); 2930 2931 b->singlemalloc = PETSC_TRUE; 2932 b->i[0] = 0; 2933 for (i=1; i<mbs+1; i++) { 2934 b->i[i] = b->i[i-1] + b->imax[i-1]; 2935 } 2936 b->free_a = PETSC_TRUE; 2937 b->free_ij = PETSC_TRUE; 2938 } else { 2939 b->free_a = PETSC_FALSE; 2940 b->free_ij = PETSC_FALSE; 2941 } 2942 2943 b->bs2 = bs2; 2944 b->mbs = mbs; 2945 b->nz = 0; 2946 b->maxnz = nz; 2947 B->info.nz_unneeded = (PetscReal)b->maxnz*bs2; 2948 if (realalloc) {ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);} 2949 PetscFunctionReturn(0); 2950 } 2951 2952 #undef __FUNCT__ 2953 #define __FUNCT__ "MatSeqBAIJSetPreallocationCSR_SeqBAIJ" 2954 PetscErrorCode MatSeqBAIJSetPreallocationCSR_SeqBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[]) 2955 { 2956 PetscInt i,m,nz,nz_max=0,*nnz; 2957 PetscScalar *values=0; 2958 PetscBool roworiented = ((Mat_SeqBAIJ*)B->data)->roworiented; 2959 PetscErrorCode ierr; 2960 2961 PetscFunctionBegin; 2962 if (bs < 1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive but it is %D",bs); 2963 ierr = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr); 2964 ierr = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr); 2965 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 2966 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 2967 ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr); 2968 m = B->rmap->n/bs; 2969 2970 if (ii[0] != 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE, "ii[0] must be 0 but it is %D",ii[0]); 2971 ierr = PetscMalloc1(m+1, &nnz);CHKERRQ(ierr); 2972 for (i=0; i<m; i++) { 2973 nz = ii[i+1]- ii[i]; 2974 if (nz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE, "Local row %D has a negative number of columns %D",i,nz); 2975 nz_max = PetscMax(nz_max, nz); 2976 nnz[i] = nz; 2977 } 2978 ierr = MatSeqBAIJSetPreallocation(B,bs,0,nnz);CHKERRQ(ierr); 2979 ierr = PetscFree(nnz);CHKERRQ(ierr); 2980 2981 values = (PetscScalar*)V; 2982 if (!values) { 2983 ierr = PetscCalloc1(bs*bs*(nz_max+1),&values);CHKERRQ(ierr); 2984 } 2985 for (i=0; i<m; i++) { 2986 PetscInt ncols = ii[i+1] - ii[i]; 2987 const PetscInt *icols = jj + ii[i]; 2988 const PetscScalar *svals = values + (V ? (bs*bs*ii[i]) : 0); 2989 if (!roworiented) { 2990 ierr = MatSetValuesBlocked_SeqBAIJ(B,1,&i,ncols,icols,svals,INSERT_VALUES);CHKERRQ(ierr); 2991 } else { 2992 PetscInt j; 2993 for (j=0; j<ncols; j++) { 2994 const PetscScalar *svals = values + (V ? (bs*bs*(ii[i]+j)) : 0); 2995 ierr = MatSetValuesBlocked_SeqBAIJ(B,1,&i,1,&icols[j],svals,INSERT_VALUES);CHKERRQ(ierr); 2996 } 2997 } 2998 } 2999 if (!V) { ierr = PetscFree(values);CHKERRQ(ierr); } 3000 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3001 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3002 ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 3003 PetscFunctionReturn(0); 3004 } 3005 3006 /*MC 3007 MATSEQBAIJ - MATSEQBAIJ = "seqbaij" - A matrix type to be used for sequential block sparse matrices, based on 3008 block sparse compressed row format. 3009 3010 Options Database Keys: 3011 . -mat_type seqbaij - sets the matrix type to "seqbaij" during a call to MatSetFromOptions() 3012 3013 Level: beginner 3014 3015 .seealso: MatCreateSeqBAIJ() 3016 M*/ 3017 3018 PETSC_EXTERN PetscErrorCode MatConvert_SeqBAIJ_SeqBSTRM(Mat, MatType,MatReuse,Mat*); 3019 3020 #undef __FUNCT__ 3021 #define __FUNCT__ "MatCreate_SeqBAIJ" 3022 PETSC_EXTERN PetscErrorCode MatCreate_SeqBAIJ(Mat B) 3023 { 3024 PetscErrorCode ierr; 3025 PetscMPIInt size; 3026 Mat_SeqBAIJ *b; 3027 3028 PetscFunctionBegin; 3029 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRQ(ierr); 3030 if (size > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Comm must be of size 1"); 3031 3032 ierr = PetscNewLog(B,&b);CHKERRQ(ierr); 3033 B->data = (void*)b; 3034 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 3035 3036 b->row = 0; 3037 b->col = 0; 3038 b->icol = 0; 3039 b->reallocs = 0; 3040 b->saved_values = 0; 3041 3042 b->roworiented = PETSC_TRUE; 3043 b->nonew = 0; 3044 b->diag = 0; 3045 B->spptr = 0; 3046 B->info.nz_unneeded = (PetscReal)b->maxnz*b->bs2; 3047 b->keepnonzeropattern = PETSC_FALSE; 3048 3049 ierr = PetscObjectComposeFunction((PetscObject)B,"MatInvertBlockDiagonal_C",MatInvertBlockDiagonal_SeqBAIJ);CHKERRQ(ierr); 3050 ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_SeqBAIJ);CHKERRQ(ierr); 3051 ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_SeqBAIJ);CHKERRQ(ierr); 3052 ierr = PetscObjectComposeFunction((PetscObject)B,"MatSeqBAIJSetColumnIndices_C",MatSeqBAIJSetColumnIndices_SeqBAIJ);CHKERRQ(ierr); 3053 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqbaij_seqaij_C",MatConvert_SeqBAIJ_SeqAIJ);CHKERRQ(ierr); 3054 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqbaij_seqsbaij_C",MatConvert_SeqBAIJ_SeqSBAIJ);CHKERRQ(ierr); 3055 ierr = PetscObjectComposeFunction((PetscObject)B,"MatSeqBAIJSetPreallocation_C",MatSeqBAIJSetPreallocation_SeqBAIJ);CHKERRQ(ierr); 3056 ierr = PetscObjectComposeFunction((PetscObject)B,"MatSeqBAIJSetPreallocationCSR_C",MatSeqBAIJSetPreallocationCSR_SeqBAIJ);CHKERRQ(ierr); 3057 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqbaij_seqbstrm_C",MatConvert_SeqBAIJ_SeqBSTRM);CHKERRQ(ierr); 3058 ierr = PetscObjectComposeFunction((PetscObject)B,"MatIsTranspose_C",MatIsTranspose_SeqBAIJ);CHKERRQ(ierr); 3059 ierr = PetscObjectChangeTypeName((PetscObject)B,MATSEQBAIJ);CHKERRQ(ierr); 3060 PetscFunctionReturn(0); 3061 } 3062 3063 #undef __FUNCT__ 3064 #define __FUNCT__ "MatDuplicateNoCreate_SeqBAIJ" 3065 PetscErrorCode MatDuplicateNoCreate_SeqBAIJ(Mat C,Mat A,MatDuplicateOption cpvalues,PetscBool mallocmatspace) 3066 { 3067 Mat_SeqBAIJ *c = (Mat_SeqBAIJ*)C->data,*a = (Mat_SeqBAIJ*)A->data; 3068 PetscErrorCode ierr; 3069 PetscInt i,mbs = a->mbs,nz = a->nz,bs2 = a->bs2; 3070 3071 PetscFunctionBegin; 3072 if (a->i[mbs] != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Corrupt matrix"); 3073 3074 if (cpvalues == MAT_SHARE_NONZERO_PATTERN) { 3075 c->imax = a->imax; 3076 c->ilen = a->ilen; 3077 c->free_imax_ilen = PETSC_FALSE; 3078 } else { 3079 ierr = PetscMalloc2(mbs,&c->imax,mbs,&c->ilen);CHKERRQ(ierr); 3080 ierr = PetscLogObjectMemory((PetscObject)C,2*mbs*sizeof(PetscInt));CHKERRQ(ierr); 3081 for (i=0; i<mbs; i++) { 3082 c->imax[i] = a->imax[i]; 3083 c->ilen[i] = a->ilen[i]; 3084 } 3085 c->free_imax_ilen = PETSC_TRUE; 3086 } 3087 3088 /* allocate the matrix space */ 3089 if (mallocmatspace) { 3090 if (cpvalues == MAT_SHARE_NONZERO_PATTERN) { 3091 ierr = PetscCalloc1(bs2*nz,&c->a);CHKERRQ(ierr); 3092 ierr = PetscLogObjectMemory((PetscObject)C,a->i[mbs]*bs2*sizeof(PetscScalar));CHKERRQ(ierr); 3093 3094 c->i = a->i; 3095 c->j = a->j; 3096 c->singlemalloc = PETSC_FALSE; 3097 c->free_a = PETSC_TRUE; 3098 c->free_ij = PETSC_FALSE; 3099 c->parent = A; 3100 C->preallocated = PETSC_TRUE; 3101 C->assembled = PETSC_TRUE; 3102 3103 ierr = PetscObjectReference((PetscObject)A);CHKERRQ(ierr); 3104 ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 3105 ierr = MatSetOption(C,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 3106 } else { 3107 ierr = PetscMalloc3(bs2*nz,&c->a,nz,&c->j,mbs+1,&c->i);CHKERRQ(ierr); 3108 ierr = PetscLogObjectMemory((PetscObject)C,a->i[mbs]*(bs2*sizeof(PetscScalar)+sizeof(PetscInt))+(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 3109 3110 c->singlemalloc = PETSC_TRUE; 3111 c->free_a = PETSC_TRUE; 3112 c->free_ij = PETSC_TRUE; 3113 3114 ierr = PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 3115 if (mbs > 0) { 3116 ierr = PetscMemcpy(c->j,a->j,nz*sizeof(PetscInt));CHKERRQ(ierr); 3117 if (cpvalues == MAT_COPY_VALUES) { 3118 ierr = PetscMemcpy(c->a,a->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr); 3119 } else { 3120 ierr = PetscMemzero(c->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr); 3121 } 3122 } 3123 C->preallocated = PETSC_TRUE; 3124 C->assembled = PETSC_TRUE; 3125 } 3126 } 3127 3128 c->roworiented = a->roworiented; 3129 c->nonew = a->nonew; 3130 3131 ierr = PetscLayoutReference(A->rmap,&C->rmap);CHKERRQ(ierr); 3132 ierr = PetscLayoutReference(A->cmap,&C->cmap);CHKERRQ(ierr); 3133 3134 c->bs2 = a->bs2; 3135 c->mbs = a->mbs; 3136 c->nbs = a->nbs; 3137 3138 if (a->diag) { 3139 if (cpvalues == MAT_SHARE_NONZERO_PATTERN) { 3140 c->diag = a->diag; 3141 c->free_diag = PETSC_FALSE; 3142 } else { 3143 ierr = PetscMalloc1(mbs+1,&c->diag);CHKERRQ(ierr); 3144 ierr = PetscLogObjectMemory((PetscObject)C,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 3145 for (i=0; i<mbs; i++) c->diag[i] = a->diag[i]; 3146 c->free_diag = PETSC_TRUE; 3147 } 3148 } else c->diag = 0; 3149 3150 c->nz = a->nz; 3151 c->maxnz = a->nz; /* Since we allocate exactly the right amount */ 3152 c->solve_work = NULL; 3153 c->mult_work = NULL; 3154 c->sor_workt = NULL; 3155 c->sor_work = NULL; 3156 3157 c->compressedrow.use = a->compressedrow.use; 3158 c->compressedrow.nrows = a->compressedrow.nrows; 3159 if (a->compressedrow.use) { 3160 i = a->compressedrow.nrows; 3161 ierr = PetscMalloc2(i+1,&c->compressedrow.i,i+1,&c->compressedrow.rindex);CHKERRQ(ierr); 3162 ierr = PetscLogObjectMemory((PetscObject)C,(2*i+1)*sizeof(PetscInt));CHKERRQ(ierr); 3163 ierr = PetscMemcpy(c->compressedrow.i,a->compressedrow.i,(i+1)*sizeof(PetscInt));CHKERRQ(ierr); 3164 ierr = PetscMemcpy(c->compressedrow.rindex,a->compressedrow.rindex,i*sizeof(PetscInt));CHKERRQ(ierr); 3165 } else { 3166 c->compressedrow.use = PETSC_FALSE; 3167 c->compressedrow.i = NULL; 3168 c->compressedrow.rindex = NULL; 3169 } 3170 C->nonzerostate = A->nonzerostate; 3171 3172 ierr = PetscFunctionListDuplicate(((PetscObject)A)->qlist,&((PetscObject)C)->qlist);CHKERRQ(ierr); 3173 ierr = PetscMemcpy(C->ops,A->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 3174 PetscFunctionReturn(0); 3175 } 3176 3177 #undef __FUNCT__ 3178 #define __FUNCT__ "MatDuplicate_SeqBAIJ" 3179 PetscErrorCode MatDuplicate_SeqBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B) 3180 { 3181 PetscErrorCode ierr; 3182 3183 PetscFunctionBegin; 3184 ierr = MatCreate(PetscObjectComm((PetscObject)A),B);CHKERRQ(ierr); 3185 ierr = MatSetSizes(*B,A->rmap->N,A->cmap->n,A->rmap->N,A->cmap->n);CHKERRQ(ierr); 3186 ierr = MatSetType(*B,MATSEQBAIJ);CHKERRQ(ierr); 3187 ierr = MatDuplicateNoCreate_SeqBAIJ(*B,A,cpvalues,PETSC_TRUE);CHKERRQ(ierr); 3188 PetscFunctionReturn(0); 3189 } 3190 3191 #undef __FUNCT__ 3192 #define __FUNCT__ "MatLoad_SeqBAIJ" 3193 PetscErrorCode MatLoad_SeqBAIJ(Mat newmat,PetscViewer viewer) 3194 { 3195 Mat_SeqBAIJ *a; 3196 PetscErrorCode ierr; 3197 PetscInt i,nz,header[4],*rowlengths=0,M,N,bs = newmat->rmap->bs; 3198 PetscInt *mask,mbs,*jj,j,rowcount,nzcount,k,*browlengths,maskcount; 3199 PetscInt kmax,jcount,block,idx,point,nzcountb,extra_rows,rows,cols; 3200 PetscInt *masked,nmask,tmp,bs2,ishift; 3201 PetscMPIInt size; 3202 int fd; 3203 PetscScalar *aa; 3204 MPI_Comm comm; 3205 3206 PetscFunctionBegin; 3207 /* force binary viewer to load .info file if it has not yet done so */ 3208 ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr); 3209 ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr); 3210 ierr = PetscOptionsBegin(comm,NULL,"Options for loading SEQBAIJ matrix","Mat");CHKERRQ(ierr); 3211 ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,NULL);CHKERRQ(ierr); 3212 ierr = PetscOptionsEnd();CHKERRQ(ierr); 3213 if (bs < 0) bs = 1; 3214 bs2 = bs*bs; 3215 3216 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3217 if (size > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"view must have one processor"); 3218 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 3219 ierr = PetscBinaryRead(fd,header,4,PETSC_INT);CHKERRQ(ierr); 3220 if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not Mat object"); 3221 M = header[1]; N = header[2]; nz = header[3]; 3222 3223 if (header[3] < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as SeqBAIJ"); 3224 if (M != N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Can only do square matrices"); 3225 3226 /* 3227 This code adds extra rows to make sure the number of rows is 3228 divisible by the blocksize 3229 */ 3230 mbs = M/bs; 3231 extra_rows = bs - M + bs*(mbs); 3232 if (extra_rows == bs) extra_rows = 0; 3233 else mbs++; 3234 if (extra_rows) { 3235 ierr = PetscInfo(viewer,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr); 3236 } 3237 3238 /* Set global sizes if not already set */ 3239 if (newmat->rmap->n < 0 && newmat->rmap->N < 0 && newmat->cmap->n < 0 && newmat->cmap->N < 0) { 3240 ierr = MatSetSizes(newmat,PETSC_DECIDE,PETSC_DECIDE,M+extra_rows,N+extra_rows);CHKERRQ(ierr); 3241 } else { /* Check if the matrix global sizes are correct */ 3242 ierr = MatGetSize(newmat,&rows,&cols);CHKERRQ(ierr); 3243 if (rows < 0 && cols < 0) { /* user might provide local size instead of global size */ 3244 ierr = MatGetLocalSize(newmat,&rows,&cols);CHKERRQ(ierr); 3245 } 3246 if (M != rows || N != cols) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix in file of different length (%d, %d) than the input matrix (%d, %d)",M,N,rows,cols); 3247 } 3248 3249 /* read in row lengths */ 3250 ierr = PetscMalloc1(M+extra_rows,&rowlengths);CHKERRQ(ierr); 3251 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 3252 for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1; 3253 3254 /* read in column indices */ 3255 ierr = PetscMalloc1(nz+extra_rows,&jj);CHKERRQ(ierr); 3256 ierr = PetscBinaryRead(fd,jj,nz,PETSC_INT);CHKERRQ(ierr); 3257 for (i=0; i<extra_rows; i++) jj[nz+i] = M+i; 3258 3259 /* loop over row lengths determining block row lengths */ 3260 ierr = PetscCalloc1(mbs,&browlengths);CHKERRQ(ierr); 3261 ierr = PetscMalloc2(mbs,&mask,mbs,&masked);CHKERRQ(ierr); 3262 ierr = PetscMemzero(mask,mbs*sizeof(PetscInt));CHKERRQ(ierr); 3263 rowcount = 0; 3264 nzcount = 0; 3265 for (i=0; i<mbs; i++) { 3266 nmask = 0; 3267 for (j=0; j<bs; j++) { 3268 kmax = rowlengths[rowcount]; 3269 for (k=0; k<kmax; k++) { 3270 tmp = jj[nzcount++]/bs; 3271 if (!mask[tmp]) {masked[nmask++] = tmp; mask[tmp] = 1;} 3272 } 3273 rowcount++; 3274 } 3275 browlengths[i] += nmask; 3276 /* zero out the mask elements we set */ 3277 for (j=0; j<nmask; j++) mask[masked[j]] = 0; 3278 } 3279 3280 /* Do preallocation */ 3281 ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(newmat,bs,0,browlengths);CHKERRQ(ierr); 3282 a = (Mat_SeqBAIJ*)newmat->data; 3283 3284 /* set matrix "i" values */ 3285 a->i[0] = 0; 3286 for (i=1; i<= mbs; i++) { 3287 a->i[i] = a->i[i-1] + browlengths[i-1]; 3288 a->ilen[i-1] = browlengths[i-1]; 3289 } 3290 a->nz = 0; 3291 for (i=0; i<mbs; i++) a->nz += browlengths[i]; 3292 3293 /* read in nonzero values */ 3294 ierr = PetscMalloc1(nz+extra_rows,&aa);CHKERRQ(ierr); 3295 ierr = PetscBinaryRead(fd,aa,nz,PETSC_SCALAR);CHKERRQ(ierr); 3296 for (i=0; i<extra_rows; i++) aa[nz+i] = 1.0; 3297 3298 /* set "a" and "j" values into matrix */ 3299 nzcount = 0; jcount = 0; 3300 for (i=0; i<mbs; i++) { 3301 nzcountb = nzcount; 3302 nmask = 0; 3303 for (j=0; j<bs; j++) { 3304 kmax = rowlengths[i*bs+j]; 3305 for (k=0; k<kmax; k++) { 3306 tmp = jj[nzcount++]/bs; 3307 if (!mask[tmp]) { masked[nmask++] = tmp; mask[tmp] = 1;} 3308 } 3309 } 3310 /* sort the masked values */ 3311 ierr = PetscSortInt(nmask,masked);CHKERRQ(ierr); 3312 3313 /* set "j" values into matrix */ 3314 maskcount = 1; 3315 for (j=0; j<nmask; j++) { 3316 a->j[jcount++] = masked[j]; 3317 mask[masked[j]] = maskcount++; 3318 } 3319 /* set "a" values into matrix */ 3320 ishift = bs2*a->i[i]; 3321 for (j=0; j<bs; j++) { 3322 kmax = rowlengths[i*bs+j]; 3323 for (k=0; k<kmax; k++) { 3324 tmp = jj[nzcountb]/bs; 3325 block = mask[tmp] - 1; 3326 point = jj[nzcountb] - bs*tmp; 3327 idx = ishift + bs2*block + j + bs*point; 3328 a->a[idx] = (MatScalar)aa[nzcountb++]; 3329 } 3330 } 3331 /* zero out the mask elements we set */ 3332 for (j=0; j<nmask; j++) mask[masked[j]] = 0; 3333 } 3334 if (jcount != a->nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Bad binary matrix"); 3335 3336 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 3337 ierr = PetscFree(browlengths);CHKERRQ(ierr); 3338 ierr = PetscFree(aa);CHKERRQ(ierr); 3339 ierr = PetscFree(jj);CHKERRQ(ierr); 3340 ierr = PetscFree2(mask,masked);CHKERRQ(ierr); 3341 3342 ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3343 ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3344 PetscFunctionReturn(0); 3345 } 3346 3347 #undef __FUNCT__ 3348 #define __FUNCT__ "MatCreateSeqBAIJ" 3349 /*@C 3350 MatCreateSeqBAIJ - Creates a sparse matrix in block AIJ (block 3351 compressed row) format. For good matrix assembly performance the 3352 user should preallocate the matrix storage by setting the parameter nz 3353 (or the array nnz). By setting these parameters accurately, performance 3354 during matrix assembly can be increased by more than a factor of 50. 3355 3356 Collective on MPI_Comm 3357 3358 Input Parameters: 3359 + comm - MPI communicator, set to PETSC_COMM_SELF 3360 . bs - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row 3361 blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs() 3362 . m - number of rows 3363 . n - number of columns 3364 . nz - number of nonzero blocks per block row (same for all rows) 3365 - nnz - array containing the number of nonzero blocks in the various block rows 3366 (possibly different for each block row) or NULL 3367 3368 Output Parameter: 3369 . A - the matrix 3370 3371 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 3372 MatXXXXSetPreallocation() paradgm instead of this routine directly. 3373 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 3374 3375 Options Database Keys: 3376 . -mat_no_unroll - uses code that does not unroll the loops in the 3377 block calculations (much slower) 3378 . -mat_block_size - size of the blocks to use 3379 3380 Level: intermediate 3381 3382 Notes: 3383 The number of rows and columns must be divisible by blocksize. 3384 3385 If the nnz parameter is given then the nz parameter is ignored 3386 3387 A nonzero block is any block that as 1 or more nonzeros in it 3388 3389 The block AIJ format is fully compatible with standard Fortran 77 3390 storage. That is, the stored row and column indices can begin at 3391 either one (as in Fortran) or zero. See the users' manual for details. 3392 3393 Specify the preallocated storage with either nz or nnz (not both). 3394 Set nz=PETSC_DEFAULT and nnz=NULL for PETSc to control dynamic memory 3395 allocation. See Users-Manual: ch_mat for details. 3396 matrices. 3397 3398 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateBAIJ() 3399 @*/ 3400 PetscErrorCode MatCreateSeqBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A) 3401 { 3402 PetscErrorCode ierr; 3403 3404 PetscFunctionBegin; 3405 ierr = MatCreate(comm,A);CHKERRQ(ierr); 3406 ierr = MatSetSizes(*A,m,n,m,n);CHKERRQ(ierr); 3407 ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr); 3408 ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(*A,bs,nz,(PetscInt*)nnz);CHKERRQ(ierr); 3409 PetscFunctionReturn(0); 3410 } 3411 3412 #undef __FUNCT__ 3413 #define __FUNCT__ "MatSeqBAIJSetPreallocation" 3414 /*@C 3415 MatSeqBAIJSetPreallocation - Sets the block size and expected nonzeros 3416 per row in the matrix. For good matrix assembly performance the 3417 user should preallocate the matrix storage by setting the parameter nz 3418 (or the array nnz). By setting these parameters accurately, performance 3419 during matrix assembly can be increased by more than a factor of 50. 3420 3421 Collective on MPI_Comm 3422 3423 Input Parameters: 3424 + B - the matrix 3425 . bs - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row 3426 blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs() 3427 . nz - number of block nonzeros per block row (same for all rows) 3428 - nnz - array containing the number of block nonzeros in the various block rows 3429 (possibly different for each block row) or NULL 3430 3431 Options Database Keys: 3432 . -mat_no_unroll - uses code that does not unroll the loops in the 3433 block calculations (much slower) 3434 . -mat_block_size - size of the blocks to use 3435 3436 Level: intermediate 3437 3438 Notes: 3439 If the nnz parameter is given then the nz parameter is ignored 3440 3441 You can call MatGetInfo() to get information on how effective the preallocation was; 3442 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 3443 You can also run with the option -info and look for messages with the string 3444 malloc in them to see if additional memory allocation was needed. 3445 3446 The block AIJ format is fully compatible with standard Fortran 77 3447 storage. That is, the stored row and column indices can begin at 3448 either one (as in Fortran) or zero. See the users' manual for details. 3449 3450 Specify the preallocated storage with either nz or nnz (not both). 3451 Set nz=PETSC_DEFAULT and nnz=NULL for PETSc to control dynamic memory 3452 allocation. See Users-Manual: ch_mat for details. 3453 3454 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateBAIJ(), MatGetInfo() 3455 @*/ 3456 PetscErrorCode MatSeqBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt nz,const PetscInt nnz[]) 3457 { 3458 PetscErrorCode ierr; 3459 3460 PetscFunctionBegin; 3461 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 3462 PetscValidType(B,1); 3463 PetscValidLogicalCollectiveInt(B,bs,2); 3464 ierr = PetscTryMethod(B,"MatSeqBAIJSetPreallocation_C",(Mat,PetscInt,PetscInt,const PetscInt[]),(B,bs,nz,nnz));CHKERRQ(ierr); 3465 PetscFunctionReturn(0); 3466 } 3467 3468 #undef __FUNCT__ 3469 #define __FUNCT__ "MatSeqBAIJSetPreallocationCSR" 3470 /*@C 3471 MatSeqBAIJSetPreallocationCSR - Allocates memory for a sparse sequential matrix in AIJ format 3472 (the default sequential PETSc format). 3473 3474 Collective on MPI_Comm 3475 3476 Input Parameters: 3477 + B - the matrix 3478 . i - the indices into j for the start of each local row (starts with zero) 3479 . j - the column indices for each local row (starts with zero) these must be sorted for each row 3480 - v - optional values in the matrix 3481 3482 Level: developer 3483 3484 Notes: 3485 The order of the entries in values is specified by the MatOption MAT_ROW_ORIENTED. For example, C programs 3486 may want to use the default MAT_ROW_ORIENTED=PETSC_TRUE and use an array v[nnz][bs][bs] where the second index is 3487 over rows within a block and the last index is over columns within a block row. Fortran programs will likely set 3488 MAT_ROW_ORIENTED=PETSC_FALSE and use a Fortran array v(bs,bs,nnz) in which the first index is over rows within a 3489 block column and the second index is over columns within a block. 3490 3491 .keywords: matrix, aij, compressed row, sparse 3492 3493 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatSeqBAIJSetPreallocation(), MATSEQBAIJ 3494 @*/ 3495 PetscErrorCode MatSeqBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 3496 { 3497 PetscErrorCode ierr; 3498 3499 PetscFunctionBegin; 3500 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 3501 PetscValidType(B,1); 3502 PetscValidLogicalCollectiveInt(B,bs,2); 3503 ierr = PetscTryMethod(B,"MatSeqBAIJSetPreallocationCSR_C",(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,bs,i,j,v));CHKERRQ(ierr); 3504 PetscFunctionReturn(0); 3505 } 3506 3507 3508 #undef __FUNCT__ 3509 #define __FUNCT__ "MatCreateSeqBAIJWithArrays" 3510 /*@ 3511 MatCreateSeqBAIJWithArrays - Creates an sequential BAIJ matrix using matrix elements provided by the user. 3512 3513 Collective on MPI_Comm 3514 3515 Input Parameters: 3516 + comm - must be an MPI communicator of size 1 3517 . bs - size of block 3518 . m - number of rows 3519 . n - number of columns 3520 . i - row indices 3521 . j - column indices 3522 - a - matrix values 3523 3524 Output Parameter: 3525 . mat - the matrix 3526 3527 Level: advanced 3528 3529 Notes: 3530 The i, j, and a arrays are not copied by this routine, the user must free these arrays 3531 once the matrix is destroyed 3532 3533 You cannot set new nonzero locations into this matrix, that will generate an error. 3534 3535 The i and j indices are 0 based 3536 3537 When block size is greater than 1 the matrix values must be stored using the BAIJ storage format (see the BAIJ code to determine this). 3538 3539 The order of the entries in values is the same as the block compressed sparse row storage format; that is, it is 3540 the same as a three dimensional array in Fortran values(bs,bs,nnz) that contains the first column of the first 3541 block, followed by the second column of the first block etc etc. That is, the blocks are contiguous in memory 3542 with column-major ordering within blocks. 3543 3544 .seealso: MatCreate(), MatCreateBAIJ(), MatCreateSeqBAIJ() 3545 3546 @*/ 3547 PetscErrorCode MatCreateSeqBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt *i,PetscInt *j,PetscScalar *a,Mat *mat) 3548 { 3549 PetscErrorCode ierr; 3550 PetscInt ii; 3551 Mat_SeqBAIJ *baij; 3552 3553 PetscFunctionBegin; 3554 if (bs != 1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"block size %D > 1 is not supported yet",bs); 3555 if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 3556 3557 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 3558 ierr = MatSetSizes(*mat,m,n,m,n);CHKERRQ(ierr); 3559 ierr = MatSetType(*mat,MATSEQBAIJ);CHKERRQ(ierr); 3560 ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(*mat,bs,MAT_SKIP_ALLOCATION,0);CHKERRQ(ierr); 3561 baij = (Mat_SeqBAIJ*)(*mat)->data; 3562 ierr = PetscMalloc2(m,&baij->imax,m,&baij->ilen);CHKERRQ(ierr); 3563 ierr = PetscLogObjectMemory((PetscObject)*mat,2*m*sizeof(PetscInt));CHKERRQ(ierr); 3564 3565 baij->i = i; 3566 baij->j = j; 3567 baij->a = a; 3568 3569 baij->singlemalloc = PETSC_FALSE; 3570 baij->nonew = -1; /*this indicates that inserting a new value in the matrix that generates a new nonzero is an error*/ 3571 baij->free_a = PETSC_FALSE; 3572 baij->free_ij = PETSC_FALSE; 3573 3574 for (ii=0; ii<m; ii++) { 3575 baij->ilen[ii] = baij->imax[ii] = i[ii+1] - i[ii]; 3576 #if defined(PETSC_USE_DEBUG) 3577 if (i[ii+1] - i[ii] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row length in i (row indices) row = %d length = %d",ii,i[ii+1] - i[ii]); 3578 #endif 3579 } 3580 #if defined(PETSC_USE_DEBUG) 3581 for (ii=0; ii<baij->i[m]; ii++) { 3582 if (j[ii] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column index at location = %d index = %d",ii,j[ii]); 3583 if (j[ii] > n - 1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column index to large at location = %d index = %d",ii,j[ii]); 3584 } 3585 #endif 3586 3587 ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3588 ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3589 PetscFunctionReturn(0); 3590 } 3591 3592 #undef __FUNCT__ 3593 #define __FUNCT__ "MatCreateMPIMatConcatenateSeqMat_SeqBAIJ" 3594 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_SeqBAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat) 3595 { 3596 PetscErrorCode ierr; 3597 3598 PetscFunctionBegin; 3599 ierr = MatCreateMPIMatConcatenateSeqMat_MPIBAIJ(comm,inmat,n,scall,outmat);CHKERRQ(ierr); 3600 PetscFunctionReturn(0); 3601 } 3602