1 2 /* 3 Defines the basic matrix operations for the BAIJ (compressed row) 4 matrix storage format. 5 */ 6 #include <../src/mat/impls/baij/seq/baij.h> /*I "petscmat.h" I*/ 7 #include <petscblaslapack.h> 8 #include <petsc/private/kernels/blockinvert.h> 9 #include <petsc/private/kernels/blockmatmult.h> 10 11 #if defined(PETSC_HAVE_HYPRE) 12 PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat,MatType,MatReuse,Mat*); 13 #endif 14 15 #undef __FUNCT__ 16 #define __FUNCT__ "MatInvertBlockDiagonal_SeqBAIJ" 17 PetscErrorCode MatInvertBlockDiagonal_SeqBAIJ(Mat A,const PetscScalar **values) 18 { 19 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*) A->data; 20 PetscErrorCode ierr; 21 PetscInt *diag_offset,i,bs = A->rmap->bs,mbs = a->mbs,ipvt[5],bs2 = bs*bs,*v_pivots; 22 MatScalar *v = a->a,*odiag,*diag,*mdiag,work[25],*v_work; 23 PetscReal shift = 0.0; 24 PetscBool allowzeropivot,zeropivotdetected=PETSC_FALSE; 25 26 PetscFunctionBegin; 27 allowzeropivot = PetscNot(A->erroriffailure); 28 29 if (a->idiagvalid) { 30 if (values) *values = a->idiag; 31 PetscFunctionReturn(0); 32 } 33 ierr = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr); 34 diag_offset = a->diag; 35 if (!a->idiag) { 36 ierr = PetscMalloc1(2*bs2*mbs,&a->idiag);CHKERRQ(ierr); 37 ierr = PetscLogObjectMemory((PetscObject)A,2*bs2*mbs*sizeof(PetscScalar));CHKERRQ(ierr); 38 } 39 diag = a->idiag; 40 mdiag = a->idiag+bs2*mbs; 41 if (values) *values = a->idiag; 42 /* factor and invert each block */ 43 switch (bs) { 44 case 1: 45 for (i=0; i<mbs; i++) { 46 odiag = v + 1*diag_offset[i]; 47 diag[0] = odiag[0]; 48 mdiag[0] = odiag[0]; 49 50 if (PetscAbsScalar(diag[0] + shift) < PETSC_MACHINE_EPSILON) { 51 if (allowzeropivot) { 52 A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 53 A->factorerror_zeropivot_value = PetscAbsScalar(diag[0]); 54 A->factorerror_zeropivot_row = i; 55 ierr = PetscInfo1(A,"Zero pivot, row %D\n",i);CHKERRQ(ierr); 56 } else SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot, row %D pivot value %g tolerance %g",i,(double)PetscAbsScalar(diag[0]),(double)PETSC_MACHINE_EPSILON); 57 } 58 59 diag[0] = (PetscScalar)1.0 / (diag[0] + shift); 60 diag += 1; 61 mdiag += 1; 62 } 63 break; 64 case 2: 65 for (i=0; i<mbs; i++) { 66 odiag = v + 4*diag_offset[i]; 67 diag[0] = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3]; 68 mdiag[0] = odiag[0]; mdiag[1] = odiag[1]; mdiag[2] = odiag[2]; mdiag[3] = odiag[3]; 69 ierr = PetscKernel_A_gets_inverse_A_2(diag,shift,allowzeropivot,&zeropivotdetected);CHKERRQ(ierr); 70 if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 71 diag += 4; 72 mdiag += 4; 73 } 74 break; 75 case 3: 76 for (i=0; i<mbs; i++) { 77 odiag = v + 9*diag_offset[i]; 78 diag[0] = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3]; 79 diag[4] = odiag[4]; diag[5] = odiag[5]; diag[6] = odiag[6]; diag[7] = odiag[7]; 80 diag[8] = odiag[8]; 81 mdiag[0] = odiag[0]; mdiag[1] = odiag[1]; mdiag[2] = odiag[2]; mdiag[3] = odiag[3]; 82 mdiag[4] = odiag[4]; mdiag[5] = odiag[5]; mdiag[6] = odiag[6]; mdiag[7] = odiag[7]; 83 mdiag[8] = odiag[8]; 84 ierr = PetscKernel_A_gets_inverse_A_3(diag,shift,allowzeropivot,&zeropivotdetected);CHKERRQ(ierr); 85 if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 86 diag += 9; 87 mdiag += 9; 88 } 89 break; 90 case 4: 91 for (i=0; i<mbs; i++) { 92 odiag = v + 16*diag_offset[i]; 93 ierr = PetscMemcpy(diag,odiag,16*sizeof(PetscScalar));CHKERRQ(ierr); 94 ierr = PetscMemcpy(mdiag,odiag,16*sizeof(PetscScalar));CHKERRQ(ierr); 95 ierr = PetscKernel_A_gets_inverse_A_4(diag,shift,allowzeropivot,&zeropivotdetected);CHKERRQ(ierr); 96 if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 97 diag += 16; 98 mdiag += 16; 99 } 100 break; 101 case 5: 102 for (i=0; i<mbs; i++) { 103 odiag = v + 25*diag_offset[i]; 104 ierr = PetscMemcpy(diag,odiag,25*sizeof(PetscScalar));CHKERRQ(ierr); 105 ierr = PetscMemcpy(mdiag,odiag,25*sizeof(PetscScalar));CHKERRQ(ierr); 106 ierr = PetscKernel_A_gets_inverse_A_5(diag,ipvt,work,shift,allowzeropivot,&zeropivotdetected);CHKERRQ(ierr); 107 if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 108 diag += 25; 109 mdiag += 25; 110 } 111 break; 112 case 6: 113 for (i=0; i<mbs; i++) { 114 odiag = v + 36*diag_offset[i]; 115 ierr = PetscMemcpy(diag,odiag,36*sizeof(PetscScalar));CHKERRQ(ierr); 116 ierr = PetscMemcpy(mdiag,odiag,36*sizeof(PetscScalar));CHKERRQ(ierr); 117 ierr = PetscKernel_A_gets_inverse_A_6(diag,shift,allowzeropivot,&zeropivotdetected);CHKERRQ(ierr); 118 if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 119 diag += 36; 120 mdiag += 36; 121 } 122 break; 123 case 7: 124 for (i=0; i<mbs; i++) { 125 odiag = v + 49*diag_offset[i]; 126 ierr = PetscMemcpy(diag,odiag,49*sizeof(PetscScalar));CHKERRQ(ierr); 127 ierr = PetscMemcpy(mdiag,odiag,49*sizeof(PetscScalar));CHKERRQ(ierr); 128 ierr = PetscKernel_A_gets_inverse_A_7(diag,shift,allowzeropivot,&zeropivotdetected);CHKERRQ(ierr); 129 if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 130 diag += 49; 131 mdiag += 49; 132 } 133 break; 134 default: 135 ierr = PetscMalloc2(bs,&v_work,bs,&v_pivots);CHKERRQ(ierr); 136 for (i=0; i<mbs; i++) { 137 odiag = v + bs2*diag_offset[i]; 138 ierr = PetscMemcpy(diag,odiag,bs2*sizeof(PetscScalar));CHKERRQ(ierr); 139 ierr = PetscMemcpy(mdiag,odiag,bs2*sizeof(PetscScalar));CHKERRQ(ierr); 140 ierr = PetscKernel_A_gets_inverse_A(bs,diag,v_pivots,v_work,allowzeropivot,&zeropivotdetected);CHKERRQ(ierr); 141 if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 142 diag += bs2; 143 mdiag += bs2; 144 } 145 ierr = PetscFree2(v_work,v_pivots);CHKERRQ(ierr); 146 } 147 a->idiagvalid = PETSC_TRUE; 148 PetscFunctionReturn(0); 149 } 150 151 #undef __FUNCT__ 152 #define __FUNCT__ "MatSOR_SeqBAIJ" 153 PetscErrorCode MatSOR_SeqBAIJ(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 154 { 155 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 156 PetscScalar *x,*work,*w,*workt,*t; 157 const MatScalar *v,*aa = a->a, *idiag; 158 const PetscScalar *b,*xb; 159 PetscScalar s[7], xw[7]={0}; /* avoid some compilers thinking xw is uninitialized */ 160 PetscErrorCode ierr; 161 PetscInt m = a->mbs,i,i2,nz,bs = A->rmap->bs,bs2 = bs*bs,k,j,idx,it; 162 const PetscInt *diag,*ai = a->i,*aj = a->j,*vi; 163 164 PetscFunctionBegin; 165 if (fshift == -1.0) fshift = 0.0; /* negative fshift indicates do not error on zero diagonal; this code never errors on zero diagonal */ 166 its = its*lits; 167 if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat"); 168 if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 169 if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 170 if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 171 if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts"); 172 173 if (!a->idiagvalid) {ierr = MatInvertBlockDiagonal(A,NULL);CHKERRQ(ierr);} 174 175 if (!m) PetscFunctionReturn(0); 176 diag = a->diag; 177 idiag = a->idiag; 178 k = PetscMax(A->rmap->n,A->cmap->n); 179 if (!a->mult_work) { 180 ierr = PetscMalloc1(k+1,&a->mult_work);CHKERRQ(ierr); 181 } 182 if (!a->sor_workt) { 183 ierr = PetscMalloc1(k,&a->sor_workt);CHKERRQ(ierr); 184 } 185 if (!a->sor_work) { 186 ierr = PetscMalloc1(bs,&a->sor_work);CHKERRQ(ierr); 187 } 188 work = a->mult_work; 189 t = a->sor_workt; 190 w = a->sor_work; 191 192 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 193 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 194 195 if (flag & SOR_ZERO_INITIAL_GUESS) { 196 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) { 197 switch (bs) { 198 case 1: 199 PetscKernel_v_gets_A_times_w_1(x,idiag,b); 200 t[0] = b[0]; 201 i2 = 1; 202 idiag += 1; 203 for (i=1; i<m; i++) { 204 v = aa + ai[i]; 205 vi = aj + ai[i]; 206 nz = diag[i] - ai[i]; 207 s[0] = b[i2]; 208 for (j=0; j<nz; j++) { 209 xw[0] = x[vi[j]]; 210 PetscKernel_v_gets_v_minus_A_times_w_1(s,(v+j),xw); 211 } 212 t[i2] = s[0]; 213 PetscKernel_v_gets_A_times_w_1(xw,idiag,s); 214 x[i2] = xw[0]; 215 idiag += 1; 216 i2 += 1; 217 } 218 break; 219 case 2: 220 PetscKernel_v_gets_A_times_w_2(x,idiag,b); 221 t[0] = b[0]; t[1] = b[1]; 222 i2 = 2; 223 idiag += 4; 224 for (i=1; i<m; i++) { 225 v = aa + 4*ai[i]; 226 vi = aj + ai[i]; 227 nz = diag[i] - ai[i]; 228 s[0] = b[i2]; s[1] = b[i2+1]; 229 for (j=0; j<nz; j++) { 230 idx = 2*vi[j]; 231 it = 4*j; 232 xw[0] = x[idx]; xw[1] = x[1+idx]; 233 PetscKernel_v_gets_v_minus_A_times_w_2(s,(v+it),xw); 234 } 235 t[i2] = s[0]; t[i2+1] = s[1]; 236 PetscKernel_v_gets_A_times_w_2(xw,idiag,s); 237 x[i2] = xw[0]; x[i2+1] = xw[1]; 238 idiag += 4; 239 i2 += 2; 240 } 241 break; 242 case 3: 243 PetscKernel_v_gets_A_times_w_3(x,idiag,b); 244 t[0] = b[0]; t[1] = b[1]; t[2] = b[2]; 245 i2 = 3; 246 idiag += 9; 247 for (i=1; i<m; i++) { 248 v = aa + 9*ai[i]; 249 vi = aj + ai[i]; 250 nz = diag[i] - ai[i]; 251 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; 252 while (nz--) { 253 idx = 3*(*vi++); 254 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 255 PetscKernel_v_gets_v_minus_A_times_w_3(s,v,xw); 256 v += 9; 257 } 258 t[i2] = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2]; 259 PetscKernel_v_gets_A_times_w_3(xw,idiag,s); 260 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; 261 idiag += 9; 262 i2 += 3; 263 } 264 break; 265 case 4: 266 PetscKernel_v_gets_A_times_w_4(x,idiag,b); 267 t[0] = b[0]; t[1] = b[1]; t[2] = b[2]; t[3] = b[3]; 268 i2 = 4; 269 idiag += 16; 270 for (i=1; i<m; i++) { 271 v = aa + 16*ai[i]; 272 vi = aj + ai[i]; 273 nz = diag[i] - ai[i]; 274 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; 275 while (nz--) { 276 idx = 4*(*vi++); 277 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; 278 PetscKernel_v_gets_v_minus_A_times_w_4(s,v,xw); 279 v += 16; 280 } 281 t[i2] = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2]; t[i2 + 3] = s[3]; 282 PetscKernel_v_gets_A_times_w_4(xw,idiag,s); 283 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; 284 idiag += 16; 285 i2 += 4; 286 } 287 break; 288 case 5: 289 PetscKernel_v_gets_A_times_w_5(x,idiag,b); 290 t[0] = b[0]; t[1] = b[1]; t[2] = b[2]; t[3] = b[3]; t[4] = b[4]; 291 i2 = 5; 292 idiag += 25; 293 for (i=1; i<m; i++) { 294 v = aa + 25*ai[i]; 295 vi = aj + ai[i]; 296 nz = diag[i] - ai[i]; 297 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4]; 298 while (nz--) { 299 idx = 5*(*vi++); 300 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; xw[4] = x[4+idx]; 301 PetscKernel_v_gets_v_minus_A_times_w_5(s,v,xw); 302 v += 25; 303 } 304 t[i2] = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2]; t[i2+3] = s[3]; t[i2+4] = s[4]; 305 PetscKernel_v_gets_A_times_w_5(xw,idiag,s); 306 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4]; 307 idiag += 25; 308 i2 += 5; 309 } 310 break; 311 case 6: 312 PetscKernel_v_gets_A_times_w_6(x,idiag,b); 313 t[0] = b[0]; t[1] = b[1]; t[2] = b[2]; t[3] = b[3]; t[4] = b[4]; t[5] = b[5]; 314 i2 = 6; 315 idiag += 36; 316 for (i=1; i<m; i++) { 317 v = aa + 36*ai[i]; 318 vi = aj + ai[i]; 319 nz = diag[i] - ai[i]; 320 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5]; 321 while (nz--) { 322 idx = 6*(*vi++); 323 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 324 xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; 325 PetscKernel_v_gets_v_minus_A_times_w_6(s,v,xw); 326 v += 36; 327 } 328 t[i2] = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2]; 329 t[i2+3] = s[3]; t[i2+4] = s[4]; t[i2+5] = s[5]; 330 PetscKernel_v_gets_A_times_w_6(xw,idiag,s); 331 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5]; 332 idiag += 36; 333 i2 += 6; 334 } 335 break; 336 case 7: 337 PetscKernel_v_gets_A_times_w_7(x,idiag,b); 338 t[0] = b[0]; t[1] = b[1]; t[2] = b[2]; 339 t[3] = b[3]; t[4] = b[4]; t[5] = b[5]; t[6] = b[6]; 340 i2 = 7; 341 idiag += 49; 342 for (i=1; i<m; i++) { 343 v = aa + 49*ai[i]; 344 vi = aj + ai[i]; 345 nz = diag[i] - ai[i]; 346 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; 347 s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5]; s[6] = b[i2+6]; 348 while (nz--) { 349 idx = 7*(*vi++); 350 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 351 xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; xw[6] = x[6+idx]; 352 PetscKernel_v_gets_v_minus_A_times_w_7(s,v,xw); 353 v += 49; 354 } 355 t[i2] = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2]; 356 t[i2+3] = s[3]; t[i2+4] = s[4]; t[i2+5] = s[5]; t[i2+6] = s[6]; 357 PetscKernel_v_gets_A_times_w_7(xw,idiag,s); 358 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; 359 x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5]; x[i2+6] = xw[6]; 360 idiag += 49; 361 i2 += 7; 362 } 363 break; 364 default: 365 PetscKernel_w_gets_Ar_times_v(bs,bs,b,idiag,x); 366 ierr = PetscMemcpy(t,b,bs*sizeof(PetscScalar));CHKERRQ(ierr); 367 i2 = bs; 368 idiag += bs2; 369 for (i=1; i<m; i++) { 370 v = aa + bs2*ai[i]; 371 vi = aj + ai[i]; 372 nz = diag[i] - ai[i]; 373 374 ierr = PetscMemcpy(w,b+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr); 375 /* copy all rows of x that are needed into contiguous space */ 376 workt = work; 377 for (j=0; j<nz; j++) { 378 ierr = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr); 379 workt += bs; 380 } 381 PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work); 382 ierr = PetscMemcpy(t+i2,w,bs*sizeof(PetscScalar));CHKERRQ(ierr); 383 PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2); 384 385 idiag += bs2; 386 i2 += bs; 387 } 388 break; 389 } 390 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 391 ierr = PetscLogFlops(1.0*bs2*a->nz);CHKERRQ(ierr); 392 xb = t; 393 } 394 else xb = b; 395 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 396 idiag = a->idiag+bs2*(a->mbs-1); 397 i2 = bs * (m-1); 398 switch (bs) { 399 case 1: 400 s[0] = xb[i2]; 401 PetscKernel_v_gets_A_times_w_1(xw,idiag,s); 402 x[i2] = xw[0]; 403 i2 -= 1; 404 for (i=m-2; i>=0; i--) { 405 v = aa + (diag[i]+1); 406 vi = aj + diag[i] + 1; 407 nz = ai[i+1] - diag[i] - 1; 408 s[0] = xb[i2]; 409 for (j=0; j<nz; j++) { 410 xw[0] = x[vi[j]]; 411 PetscKernel_v_gets_v_minus_A_times_w_1(s,(v+j),xw); 412 } 413 PetscKernel_v_gets_A_times_w_1(xw,idiag,s); 414 x[i2] = xw[0]; 415 idiag -= 1; 416 i2 -= 1; 417 } 418 break; 419 case 2: 420 s[0] = xb[i2]; s[1] = xb[i2+1]; 421 PetscKernel_v_gets_A_times_w_2(xw,idiag,s); 422 x[i2] = xw[0]; x[i2+1] = xw[1]; 423 i2 -= 2; 424 idiag -= 4; 425 for (i=m-2; i>=0; i--) { 426 v = aa + 4*(diag[i] + 1); 427 vi = aj + diag[i] + 1; 428 nz = ai[i+1] - diag[i] - 1; 429 s[0] = xb[i2]; s[1] = xb[i2+1]; 430 for (j=0; j<nz; j++) { 431 idx = 2*vi[j]; 432 it = 4*j; 433 xw[0] = x[idx]; xw[1] = x[1+idx]; 434 PetscKernel_v_gets_v_minus_A_times_w_2(s,(v+it),xw); 435 } 436 PetscKernel_v_gets_A_times_w_2(xw,idiag,s); 437 x[i2] = xw[0]; x[i2+1] = xw[1]; 438 idiag -= 4; 439 i2 -= 2; 440 } 441 break; 442 case 3: 443 s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; 444 PetscKernel_v_gets_A_times_w_3(xw,idiag,s); 445 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; 446 i2 -= 3; 447 idiag -= 9; 448 for (i=m-2; i>=0; i--) { 449 v = aa + 9*(diag[i]+1); 450 vi = aj + diag[i] + 1; 451 nz = ai[i+1] - diag[i] - 1; 452 s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; 453 while (nz--) { 454 idx = 3*(*vi++); 455 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 456 PetscKernel_v_gets_v_minus_A_times_w_3(s,v,xw); 457 v += 9; 458 } 459 PetscKernel_v_gets_A_times_w_3(xw,idiag,s); 460 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; 461 idiag -= 9; 462 i2 -= 3; 463 } 464 break; 465 case 4: 466 s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; 467 PetscKernel_v_gets_A_times_w_4(xw,idiag,s); 468 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; 469 i2 -= 4; 470 idiag -= 16; 471 for (i=m-2; i>=0; i--) { 472 v = aa + 16*(diag[i]+1); 473 vi = aj + diag[i] + 1; 474 nz = ai[i+1] - diag[i] - 1; 475 s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; 476 while (nz--) { 477 idx = 4*(*vi++); 478 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; 479 PetscKernel_v_gets_v_minus_A_times_w_4(s,v,xw); 480 v += 16; 481 } 482 PetscKernel_v_gets_A_times_w_4(xw,idiag,s); 483 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; 484 idiag -= 16; 485 i2 -= 4; 486 } 487 break; 488 case 5: 489 s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; s[4] = xb[i2+4]; 490 PetscKernel_v_gets_A_times_w_5(xw,idiag,s); 491 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4]; 492 i2 -= 5; 493 idiag -= 25; 494 for (i=m-2; i>=0; i--) { 495 v = aa + 25*(diag[i]+1); 496 vi = aj + diag[i] + 1; 497 nz = ai[i+1] - diag[i] - 1; 498 s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; s[4] = xb[i2+4]; 499 while (nz--) { 500 idx = 5*(*vi++); 501 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; xw[4] = x[4+idx]; 502 PetscKernel_v_gets_v_minus_A_times_w_5(s,v,xw); 503 v += 25; 504 } 505 PetscKernel_v_gets_A_times_w_5(xw,idiag,s); 506 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4]; 507 idiag -= 25; 508 i2 -= 5; 509 } 510 break; 511 case 6: 512 s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; s[4] = xb[i2+4]; s[5] = xb[i2+5]; 513 PetscKernel_v_gets_A_times_w_6(xw,idiag,s); 514 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5]; 515 i2 -= 6; 516 idiag -= 36; 517 for (i=m-2; i>=0; i--) { 518 v = aa + 36*(diag[i]+1); 519 vi = aj + diag[i] + 1; 520 nz = ai[i+1] - diag[i] - 1; 521 s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; s[4] = xb[i2+4]; s[5] = xb[i2+5]; 522 while (nz--) { 523 idx = 6*(*vi++); 524 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 525 xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; 526 PetscKernel_v_gets_v_minus_A_times_w_6(s,v,xw); 527 v += 36; 528 } 529 PetscKernel_v_gets_A_times_w_6(xw,idiag,s); 530 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5]; 531 idiag -= 36; 532 i2 -= 6; 533 } 534 break; 535 case 7: 536 s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; 537 s[3] = xb[i2+3]; s[4] = xb[i2+4]; s[5] = xb[i2+5]; s[6] = xb[i2+6]; 538 PetscKernel_v_gets_A_times_w_7(x,idiag,b); 539 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; 540 x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5]; x[i2+6] = xw[6]; 541 i2 -= 7; 542 idiag -= 49; 543 for (i=m-2; i>=0; i--) { 544 v = aa + 49*(diag[i]+1); 545 vi = aj + diag[i] + 1; 546 nz = ai[i+1] - diag[i] - 1; 547 s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; 548 s[3] = xb[i2+3]; s[4] = xb[i2+4]; s[5] = xb[i2+5]; s[6] = xb[i2+6]; 549 while (nz--) { 550 idx = 7*(*vi++); 551 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 552 xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; xw[6] = x[6+idx]; 553 PetscKernel_v_gets_v_minus_A_times_w_7(s,v,xw); 554 v += 49; 555 } 556 PetscKernel_v_gets_A_times_w_7(xw,idiag,s); 557 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; 558 x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5]; x[i2+6] = xw[6]; 559 idiag -= 49; 560 i2 -= 7; 561 } 562 break; 563 default: 564 ierr = PetscMemcpy(w,xb+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr); 565 PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2); 566 i2 -= bs; 567 idiag -= bs2; 568 for (i=m-2; i>=0; i--) { 569 v = aa + bs2*(diag[i]+1); 570 vi = aj + diag[i] + 1; 571 nz = ai[i+1] - diag[i] - 1; 572 573 ierr = PetscMemcpy(w,xb+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr); 574 /* copy all rows of x that are needed into contiguous space */ 575 workt = work; 576 for (j=0; j<nz; j++) { 577 ierr = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr); 578 workt += bs; 579 } 580 PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work); 581 PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2); 582 583 idiag -= bs2; 584 i2 -= bs; 585 } 586 break; 587 } 588 ierr = PetscLogFlops(1.0*bs2*(a->nz));CHKERRQ(ierr); 589 } 590 its--; 591 } 592 while (its--) { 593 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) { 594 idiag = a->idiag; 595 i2 = 0; 596 switch (bs) { 597 case 1: 598 for (i=0; i<m; i++) { 599 v = aa + ai[i]; 600 vi = aj + ai[i]; 601 nz = ai[i+1] - ai[i]; 602 s[0] = b[i2]; 603 for (j=0; j<nz; j++) { 604 xw[0] = x[vi[j]]; 605 PetscKernel_v_gets_v_minus_A_times_w_1(s,(v+j),xw); 606 } 607 PetscKernel_v_gets_A_times_w_1(xw,idiag,s); 608 x[i2] += xw[0]; 609 idiag += 1; 610 i2 += 1; 611 } 612 break; 613 case 2: 614 for (i=0; i<m; i++) { 615 v = aa + 4*ai[i]; 616 vi = aj + ai[i]; 617 nz = ai[i+1] - ai[i]; 618 s[0] = b[i2]; s[1] = b[i2+1]; 619 for (j=0; j<nz; j++) { 620 idx = 2*vi[j]; 621 it = 4*j; 622 xw[0] = x[idx]; xw[1] = x[1+idx]; 623 PetscKernel_v_gets_v_minus_A_times_w_2(s,(v+it),xw); 624 } 625 PetscKernel_v_gets_A_times_w_2(xw,idiag,s); 626 x[i2] += xw[0]; x[i2+1] += xw[1]; 627 idiag += 4; 628 i2 += 2; 629 } 630 break; 631 case 3: 632 for (i=0; i<m; i++) { 633 v = aa + 9*ai[i]; 634 vi = aj + ai[i]; 635 nz = ai[i+1] - ai[i]; 636 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; 637 while (nz--) { 638 idx = 3*(*vi++); 639 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 640 PetscKernel_v_gets_v_minus_A_times_w_3(s,v,xw); 641 v += 9; 642 } 643 PetscKernel_v_gets_A_times_w_3(xw,idiag,s); 644 x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; 645 idiag += 9; 646 i2 += 3; 647 } 648 break; 649 case 4: 650 for (i=0; i<m; i++) { 651 v = aa + 16*ai[i]; 652 vi = aj + ai[i]; 653 nz = ai[i+1] - ai[i]; 654 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; 655 while (nz--) { 656 idx = 4*(*vi++); 657 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; 658 PetscKernel_v_gets_v_minus_A_times_w_4(s,v,xw); 659 v += 16; 660 } 661 PetscKernel_v_gets_A_times_w_4(xw,idiag,s); 662 x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; x[i2+3] += xw[3]; 663 idiag += 16; 664 i2 += 4; 665 } 666 break; 667 case 5: 668 for (i=0; i<m; i++) { 669 v = aa + 25*ai[i]; 670 vi = aj + ai[i]; 671 nz = ai[i+1] - ai[i]; 672 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4]; 673 while (nz--) { 674 idx = 5*(*vi++); 675 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; xw[4] = x[4+idx]; 676 PetscKernel_v_gets_v_minus_A_times_w_5(s,v,xw); 677 v += 25; 678 } 679 PetscKernel_v_gets_A_times_w_5(xw,idiag,s); 680 x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; x[i2+3] += xw[3]; x[i2+4] += xw[4]; 681 idiag += 25; 682 i2 += 5; 683 } 684 break; 685 case 6: 686 for (i=0; i<m; i++) { 687 v = aa + 36*ai[i]; 688 vi = aj + ai[i]; 689 nz = ai[i+1] - ai[i]; 690 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5]; 691 while (nz--) { 692 idx = 6*(*vi++); 693 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 694 xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; 695 PetscKernel_v_gets_v_minus_A_times_w_6(s,v,xw); 696 v += 36; 697 } 698 PetscKernel_v_gets_A_times_w_6(xw,idiag,s); 699 x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; 700 x[i2+3] += xw[3]; x[i2+4] += xw[4]; x[i2+5] += xw[5]; 701 idiag += 36; 702 i2 += 6; 703 } 704 break; 705 case 7: 706 for (i=0; i<m; i++) { 707 v = aa + 49*ai[i]; 708 vi = aj + ai[i]; 709 nz = ai[i+1] - ai[i]; 710 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; 711 s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5]; s[6] = b[i2+6]; 712 while (nz--) { 713 idx = 7*(*vi++); 714 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 715 xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; xw[6] = x[6+idx]; 716 PetscKernel_v_gets_v_minus_A_times_w_7(s,v,xw); 717 v += 49; 718 } 719 PetscKernel_v_gets_A_times_w_7(xw,idiag,s); 720 x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; 721 x[i2+3] += xw[3]; x[i2+4] += xw[4]; x[i2+5] += xw[5]; x[i2+6] += xw[6]; 722 idiag += 49; 723 i2 += 7; 724 } 725 break; 726 default: 727 for (i=0; i<m; i++) { 728 v = aa + bs2*ai[i]; 729 vi = aj + ai[i]; 730 nz = ai[i+1] - ai[i]; 731 732 ierr = PetscMemcpy(w,b+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr); 733 /* copy all rows of x that are needed into contiguous space */ 734 workt = work; 735 for (j=0; j<nz; j++) { 736 ierr = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr); 737 workt += bs; 738 } 739 PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work); 740 PetscKernel_w_gets_w_plus_Ar_times_v(bs,bs,w,idiag,x+i2); 741 742 idiag += bs2; 743 i2 += bs; 744 } 745 break; 746 } 747 ierr = PetscLogFlops(2.0*bs2*a->nz);CHKERRQ(ierr); 748 } 749 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 750 idiag = a->idiag+bs2*(a->mbs-1); 751 i2 = bs * (m-1); 752 switch (bs) { 753 case 1: 754 for (i=m-1; i>=0; i--) { 755 v = aa + ai[i]; 756 vi = aj + ai[i]; 757 nz = ai[i+1] - ai[i]; 758 s[0] = b[i2]; 759 for (j=0; j<nz; j++) { 760 xw[0] = x[vi[j]]; 761 PetscKernel_v_gets_v_minus_A_times_w_1(s,(v+j),xw); 762 } 763 PetscKernel_v_gets_A_times_w_1(xw,idiag,s); 764 x[i2] += xw[0]; 765 idiag -= 1; 766 i2 -= 1; 767 } 768 break; 769 case 2: 770 for (i=m-1; i>=0; i--) { 771 v = aa + 4*ai[i]; 772 vi = aj + ai[i]; 773 nz = ai[i+1] - ai[i]; 774 s[0] = b[i2]; s[1] = b[i2+1]; 775 for (j=0; j<nz; j++) { 776 idx = 2*vi[j]; 777 it = 4*j; 778 xw[0] = x[idx]; xw[1] = x[1+idx]; 779 PetscKernel_v_gets_v_minus_A_times_w_2(s,(v+it),xw); 780 } 781 PetscKernel_v_gets_A_times_w_2(xw,idiag,s); 782 x[i2] += xw[0]; x[i2+1] += xw[1]; 783 idiag -= 4; 784 i2 -= 2; 785 } 786 break; 787 case 3: 788 for (i=m-1; i>=0; i--) { 789 v = aa + 9*ai[i]; 790 vi = aj + ai[i]; 791 nz = ai[i+1] - ai[i]; 792 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; 793 while (nz--) { 794 idx = 3*(*vi++); 795 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 796 PetscKernel_v_gets_v_minus_A_times_w_3(s,v,xw); 797 v += 9; 798 } 799 PetscKernel_v_gets_A_times_w_3(xw,idiag,s); 800 x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; 801 idiag -= 9; 802 i2 -= 3; 803 } 804 break; 805 case 4: 806 for (i=m-1; i>=0; i--) { 807 v = aa + 16*ai[i]; 808 vi = aj + ai[i]; 809 nz = ai[i+1] - ai[i]; 810 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; 811 while (nz--) { 812 idx = 4*(*vi++); 813 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; 814 PetscKernel_v_gets_v_minus_A_times_w_4(s,v,xw); 815 v += 16; 816 } 817 PetscKernel_v_gets_A_times_w_4(xw,idiag,s); 818 x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; x[i2+3] += xw[3]; 819 idiag -= 16; 820 i2 -= 4; 821 } 822 break; 823 case 5: 824 for (i=m-1; i>=0; i--) { 825 v = aa + 25*ai[i]; 826 vi = aj + ai[i]; 827 nz = ai[i+1] - ai[i]; 828 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4]; 829 while (nz--) { 830 idx = 5*(*vi++); 831 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; xw[4] = x[4+idx]; 832 PetscKernel_v_gets_v_minus_A_times_w_5(s,v,xw); 833 v += 25; 834 } 835 PetscKernel_v_gets_A_times_w_5(xw,idiag,s); 836 x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; x[i2+3] += xw[3]; x[i2+4] += xw[4]; 837 idiag -= 25; 838 i2 -= 5; 839 } 840 break; 841 case 6: 842 for (i=m-1; i>=0; i--) { 843 v = aa + 36*ai[i]; 844 vi = aj + ai[i]; 845 nz = ai[i+1] - ai[i]; 846 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5]; 847 while (nz--) { 848 idx = 6*(*vi++); 849 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 850 xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; 851 PetscKernel_v_gets_v_minus_A_times_w_6(s,v,xw); 852 v += 36; 853 } 854 PetscKernel_v_gets_A_times_w_6(xw,idiag,s); 855 x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; 856 x[i2+3] += xw[3]; x[i2+4] += xw[4]; x[i2+5] += xw[5]; 857 idiag -= 36; 858 i2 -= 6; 859 } 860 break; 861 case 7: 862 for (i=m-1; i>=0; i--) { 863 v = aa + 49*ai[i]; 864 vi = aj + ai[i]; 865 nz = ai[i+1] - ai[i]; 866 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; 867 s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5]; s[6] = b[i2+6]; 868 while (nz--) { 869 idx = 7*(*vi++); 870 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 871 xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; xw[6] = x[6+idx]; 872 PetscKernel_v_gets_v_minus_A_times_w_7(s,v,xw); 873 v += 49; 874 } 875 PetscKernel_v_gets_A_times_w_7(xw,idiag,s); 876 x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; 877 x[i2+3] += xw[3]; x[i2+4] += xw[4]; x[i2+5] += xw[5]; x[i2+6] += xw[6]; 878 idiag -= 49; 879 i2 -= 7; 880 } 881 break; 882 default: 883 for (i=m-1; i>=0; i--) { 884 v = aa + bs2*ai[i]; 885 vi = aj + ai[i]; 886 nz = ai[i+1] - ai[i]; 887 888 ierr = PetscMemcpy(w,b+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr); 889 /* copy all rows of x that are needed into contiguous space */ 890 workt = work; 891 for (j=0; j<nz; j++) { 892 ierr = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr); 893 workt += bs; 894 } 895 PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work); 896 PetscKernel_w_gets_w_plus_Ar_times_v(bs,bs,w,idiag,x+i2); 897 898 idiag -= bs2; 899 i2 -= bs; 900 } 901 break; 902 } 903 ierr = PetscLogFlops(2.0*bs2*(a->nz));CHKERRQ(ierr); 904 } 905 } 906 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 907 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 908 PetscFunctionReturn(0); 909 } 910 911 912 /* 913 Special version for direct calls from Fortran (Used in PETSc-fun3d) 914 */ 915 #if defined(PETSC_HAVE_FORTRAN_CAPS) 916 #define matsetvaluesblocked4_ MATSETVALUESBLOCKED4 917 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 918 #define matsetvaluesblocked4_ matsetvaluesblocked4 919 #endif 920 921 #undef __FUNCT__ 922 #define __FUNCT__ "matsetvaluesblocked4_" 923 PETSC_EXTERN void matsetvaluesblocked4_(Mat *AA,PetscInt *mm,const PetscInt im[],PetscInt *nn,const PetscInt in[],const PetscScalar v[]) 924 { 925 Mat A = *AA; 926 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 927 PetscInt *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,N,m = *mm,n = *nn; 928 PetscInt *ai =a->i,*ailen=a->ilen; 929 PetscInt *aj =a->j,stepval,lastcol = -1; 930 const PetscScalar *value = v; 931 MatScalar *ap,*aa = a->a,*bap; 932 933 PetscFunctionBegin; 934 if (A->rmap->bs != 4) SETERRABORT(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Can only be called with a block size of 4"); 935 stepval = (n-1)*4; 936 for (k=0; k<m; k++) { /* loop over added rows */ 937 row = im[k]; 938 rp = aj + ai[row]; 939 ap = aa + 16*ai[row]; 940 nrow = ailen[row]; 941 low = 0; 942 high = nrow; 943 for (l=0; l<n; l++) { /* loop over added columns */ 944 col = in[l]; 945 if (col <= lastcol) low = 0; 946 else high = nrow; 947 lastcol = col; 948 value = v + k*(stepval+4 + l)*4; 949 while (high-low > 7) { 950 t = (low+high)/2; 951 if (rp[t] > col) high = t; 952 else low = t; 953 } 954 for (i=low; i<high; i++) { 955 if (rp[i] > col) break; 956 if (rp[i] == col) { 957 bap = ap + 16*i; 958 for (ii=0; ii<4; ii++,value+=stepval) { 959 for (jj=ii; jj<16; jj+=4) { 960 bap[jj] += *value++; 961 } 962 } 963 goto noinsert2; 964 } 965 } 966 N = nrow++ - 1; 967 high++; /* added new column index thus must search to one higher than before */ 968 /* shift up all the later entries in this row */ 969 for (ii=N; ii>=i; ii--) { 970 rp[ii+1] = rp[ii]; 971 PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar)); 972 } 973 if (N >= i) { 974 PetscMemzero(ap+16*i,16*sizeof(MatScalar)); 975 } 976 rp[i] = col; 977 bap = ap + 16*i; 978 for (ii=0; ii<4; ii++,value+=stepval) { 979 for (jj=ii; jj<16; jj+=4) { 980 bap[jj] = *value++; 981 } 982 } 983 noinsert2:; 984 low = i; 985 } 986 ailen[row] = nrow; 987 } 988 PetscFunctionReturnVoid(); 989 } 990 991 #if defined(PETSC_HAVE_FORTRAN_CAPS) 992 #define matsetvalues4_ MATSETVALUES4 993 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 994 #define matsetvalues4_ matsetvalues4 995 #endif 996 997 #undef __FUNCT__ 998 #define __FUNCT__ "MatSetValues4_" 999 PETSC_EXTERN void matsetvalues4_(Mat *AA,PetscInt *mm,PetscInt *im,PetscInt *nn,PetscInt *in,PetscScalar *v) 1000 { 1001 Mat A = *AA; 1002 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1003 PetscInt *rp,k,low,high,t,ii,row,nrow,i,col,l,N,n = *nn,m = *mm; 1004 PetscInt *ai=a->i,*ailen=a->ilen; 1005 PetscInt *aj=a->j,brow,bcol; 1006 PetscInt ridx,cidx,lastcol = -1; 1007 MatScalar *ap,value,*aa=a->a,*bap; 1008 1009 PetscFunctionBegin; 1010 for (k=0; k<m; k++) { /* loop over added rows */ 1011 row = im[k]; brow = row/4; 1012 rp = aj + ai[brow]; 1013 ap = aa + 16*ai[brow]; 1014 nrow = ailen[brow]; 1015 low = 0; 1016 high = nrow; 1017 for (l=0; l<n; l++) { /* loop over added columns */ 1018 col = in[l]; bcol = col/4; 1019 ridx = row % 4; cidx = col % 4; 1020 value = v[l + k*n]; 1021 if (col <= lastcol) low = 0; 1022 else high = nrow; 1023 lastcol = col; 1024 while (high-low > 7) { 1025 t = (low+high)/2; 1026 if (rp[t] > bcol) high = t; 1027 else low = t; 1028 } 1029 for (i=low; i<high; i++) { 1030 if (rp[i] > bcol) break; 1031 if (rp[i] == bcol) { 1032 bap = ap + 16*i + 4*cidx + ridx; 1033 *bap += value; 1034 goto noinsert1; 1035 } 1036 } 1037 N = nrow++ - 1; 1038 high++; /* added new column thus must search to one higher than before */ 1039 /* shift up all the later entries in this row */ 1040 for (ii=N; ii>=i; ii--) { 1041 rp[ii+1] = rp[ii]; 1042 PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar)); 1043 } 1044 if (N>=i) { 1045 PetscMemzero(ap+16*i,16*sizeof(MatScalar)); 1046 } 1047 rp[i] = bcol; 1048 ap[16*i + 4*cidx + ridx] = value; 1049 noinsert1:; 1050 low = i; 1051 } 1052 ailen[brow] = nrow; 1053 } 1054 PetscFunctionReturnVoid(); 1055 } 1056 1057 /* 1058 Checks for missing diagonals 1059 */ 1060 #undef __FUNCT__ 1061 #define __FUNCT__ "MatMissingDiagonal_SeqBAIJ" 1062 PetscErrorCode MatMissingDiagonal_SeqBAIJ(Mat A,PetscBool *missing,PetscInt *d) 1063 { 1064 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1065 PetscErrorCode ierr; 1066 PetscInt *diag,*ii = a->i,i; 1067 1068 PetscFunctionBegin; 1069 ierr = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr); 1070 *missing = PETSC_FALSE; 1071 if (A->rmap->n > 0 && !ii) { 1072 *missing = PETSC_TRUE; 1073 if (d) *d = 0; 1074 PetscInfo(A,"Matrix has no entries therefore is missing diagonal\n"); 1075 } else { 1076 diag = a->diag; 1077 for (i=0; i<a->mbs; i++) { 1078 if (diag[i] >= ii[i+1]) { 1079 *missing = PETSC_TRUE; 1080 if (d) *d = i; 1081 PetscInfo1(A,"Matrix is missing block diagonal number %D\n",i); 1082 break; 1083 } 1084 } 1085 } 1086 PetscFunctionReturn(0); 1087 } 1088 1089 #undef __FUNCT__ 1090 #define __FUNCT__ "MatMarkDiagonal_SeqBAIJ" 1091 PetscErrorCode MatMarkDiagonal_SeqBAIJ(Mat A) 1092 { 1093 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1094 PetscErrorCode ierr; 1095 PetscInt i,j,m = a->mbs; 1096 1097 PetscFunctionBegin; 1098 if (!a->diag) { 1099 ierr = PetscMalloc1(m,&a->diag);CHKERRQ(ierr); 1100 ierr = PetscLogObjectMemory((PetscObject)A,m*sizeof(PetscInt));CHKERRQ(ierr); 1101 a->free_diag = PETSC_TRUE; 1102 } 1103 for (i=0; i<m; i++) { 1104 a->diag[i] = a->i[i+1]; 1105 for (j=a->i[i]; j<a->i[i+1]; j++) { 1106 if (a->j[j] == i) { 1107 a->diag[i] = j; 1108 break; 1109 } 1110 } 1111 } 1112 PetscFunctionReturn(0); 1113 } 1114 1115 1116 #undef __FUNCT__ 1117 #define __FUNCT__ "MatGetRowIJ_SeqBAIJ" 1118 static PetscErrorCode MatGetRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool blockcompressed,PetscInt *nn,const PetscInt *inia[],const PetscInt *inja[],PetscBool *done) 1119 { 1120 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1121 PetscErrorCode ierr; 1122 PetscInt i,j,n = a->mbs,nz = a->i[n],*tia,*tja,bs = A->rmap->bs,k,l,cnt; 1123 PetscInt **ia = (PetscInt**)inia,**ja = (PetscInt**)inja; 1124 1125 PetscFunctionBegin; 1126 *nn = n; 1127 if (!ia) PetscFunctionReturn(0); 1128 if (symmetric) { 1129 ierr = MatToSymmetricIJ_SeqAIJ(n,a->i,a->j,PETSC_TRUE,0,0,&tia,&tja);CHKERRQ(ierr); 1130 nz = tia[n]; 1131 } else { 1132 tia = a->i; tja = a->j; 1133 } 1134 1135 if (!blockcompressed && bs > 1) { 1136 (*nn) *= bs; 1137 /* malloc & create the natural set of indices */ 1138 ierr = PetscMalloc1((n+1)*bs,ia);CHKERRQ(ierr); 1139 if (n) { 1140 (*ia)[0] = oshift; 1141 for (j=1; j<bs; j++) { 1142 (*ia)[j] = (tia[1]-tia[0])*bs+(*ia)[j-1]; 1143 } 1144 } 1145 1146 for (i=1; i<n; i++) { 1147 (*ia)[i*bs] = (tia[i]-tia[i-1])*bs + (*ia)[i*bs-1]; 1148 for (j=1; j<bs; j++) { 1149 (*ia)[i*bs+j] = (tia[i+1]-tia[i])*bs + (*ia)[i*bs+j-1]; 1150 } 1151 } 1152 if (n) { 1153 (*ia)[n*bs] = (tia[n]-tia[n-1])*bs + (*ia)[n*bs-1]; 1154 } 1155 1156 if (inja) { 1157 ierr = PetscMalloc1(nz*bs*bs,ja);CHKERRQ(ierr); 1158 cnt = 0; 1159 for (i=0; i<n; i++) { 1160 for (j=0; j<bs; j++) { 1161 for (k=tia[i]; k<tia[i+1]; k++) { 1162 for (l=0; l<bs; l++) { 1163 (*ja)[cnt++] = bs*tja[k] + l; 1164 } 1165 } 1166 } 1167 } 1168 } 1169 1170 if (symmetric) { /* deallocate memory allocated in MatToSymmetricIJ_SeqAIJ() */ 1171 ierr = PetscFree(tia);CHKERRQ(ierr); 1172 ierr = PetscFree(tja);CHKERRQ(ierr); 1173 } 1174 } else if (oshift == 1) { 1175 if (symmetric) { 1176 nz = tia[A->rmap->n/bs]; 1177 /* add 1 to i and j indices */ 1178 for (i=0; i<A->rmap->n/bs+1; i++) tia[i] = tia[i] + 1; 1179 *ia = tia; 1180 if (ja) { 1181 for (i=0; i<nz; i++) tja[i] = tja[i] + 1; 1182 *ja = tja; 1183 } 1184 } else { 1185 nz = a->i[A->rmap->n/bs]; 1186 /* malloc space and add 1 to i and j indices */ 1187 ierr = PetscMalloc1(A->rmap->n/bs+1,ia);CHKERRQ(ierr); 1188 for (i=0; i<A->rmap->n/bs+1; i++) (*ia)[i] = a->i[i] + 1; 1189 if (ja) { 1190 ierr = PetscMalloc1(nz,ja);CHKERRQ(ierr); 1191 for (i=0; i<nz; i++) (*ja)[i] = a->j[i] + 1; 1192 } 1193 } 1194 } else { 1195 *ia = tia; 1196 if (ja) *ja = tja; 1197 } 1198 PetscFunctionReturn(0); 1199 } 1200 1201 #undef __FUNCT__ 1202 #define __FUNCT__ "MatRestoreRowIJ_SeqBAIJ" 1203 static PetscErrorCode MatRestoreRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool blockcompressed,PetscInt *nn,const PetscInt *ia[],const PetscInt *ja[],PetscBool *done) 1204 { 1205 PetscErrorCode ierr; 1206 1207 PetscFunctionBegin; 1208 if (!ia) PetscFunctionReturn(0); 1209 if ((!blockcompressed && A->rmap->bs > 1) || (symmetric || oshift == 1)) { 1210 ierr = PetscFree(*ia);CHKERRQ(ierr); 1211 if (ja) {ierr = PetscFree(*ja);CHKERRQ(ierr);} 1212 } 1213 PetscFunctionReturn(0); 1214 } 1215 1216 #undef __FUNCT__ 1217 #define __FUNCT__ "MatDestroy_SeqBAIJ" 1218 PetscErrorCode MatDestroy_SeqBAIJ(Mat A) 1219 { 1220 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1221 PetscErrorCode ierr; 1222 1223 PetscFunctionBegin; 1224 #if defined(PETSC_USE_LOG) 1225 PetscLogObjectState((PetscObject)A,"Rows=%D, Cols=%D, NZ=%D",A->rmap->N,A->cmap->n,a->nz); 1226 #endif 1227 ierr = MatSeqXAIJFreeAIJ(A,&a->a,&a->j,&a->i);CHKERRQ(ierr); 1228 ierr = ISDestroy(&a->row);CHKERRQ(ierr); 1229 ierr = ISDestroy(&a->col);CHKERRQ(ierr); 1230 if (a->free_diag) {ierr = PetscFree(a->diag);CHKERRQ(ierr);} 1231 ierr = PetscFree(a->idiag);CHKERRQ(ierr); 1232 if (a->free_imax_ilen) {ierr = PetscFree2(a->imax,a->ilen);CHKERRQ(ierr);} 1233 ierr = PetscFree(a->solve_work);CHKERRQ(ierr); 1234 ierr = PetscFree(a->mult_work);CHKERRQ(ierr); 1235 ierr = PetscFree(a->sor_workt);CHKERRQ(ierr); 1236 ierr = PetscFree(a->sor_work);CHKERRQ(ierr); 1237 ierr = ISDestroy(&a->icol);CHKERRQ(ierr); 1238 ierr = PetscFree(a->saved_values);CHKERRQ(ierr); 1239 ierr = PetscFree2(a->compressedrow.i,a->compressedrow.rindex);CHKERRQ(ierr); 1240 1241 ierr = MatDestroy(&a->sbaijMat);CHKERRQ(ierr); 1242 ierr = MatDestroy(&a->parent);CHKERRQ(ierr); 1243 ierr = PetscFree(A->data);CHKERRQ(ierr); 1244 1245 ierr = PetscObjectChangeTypeName((PetscObject)A,0);CHKERRQ(ierr); 1246 ierr = PetscObjectComposeFunction((PetscObject)A,"MatInvertBlockDiagonal_C",NULL);CHKERRQ(ierr); 1247 ierr = PetscObjectComposeFunction((PetscObject)A,"MatStoreValues_C",NULL);CHKERRQ(ierr); 1248 ierr = PetscObjectComposeFunction((PetscObject)A,"MatRetrieveValues_C",NULL);CHKERRQ(ierr); 1249 ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetColumnIndices_C",NULL);CHKERRQ(ierr); 1250 ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqaij_C",NULL);CHKERRQ(ierr); 1251 ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqsbaij_C",NULL);CHKERRQ(ierr); 1252 ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetPreallocation_C",NULL);CHKERRQ(ierr); 1253 ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetPreallocationCSR_C",NULL);CHKERRQ(ierr); 1254 ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqbstrm_C",NULL);CHKERRQ(ierr); 1255 ierr = PetscObjectComposeFunction((PetscObject)A,"MatIsTranspose_C",NULL);CHKERRQ(ierr); 1256 #if defined(PETSC_HAVE_HYPRE) 1257 ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_mpiaij_hypre_C",NULL);CHKERRQ(ierr); 1258 #endif 1259 PetscFunctionReturn(0); 1260 } 1261 1262 #undef __FUNCT__ 1263 #define __FUNCT__ "MatSetOption_SeqBAIJ" 1264 PetscErrorCode MatSetOption_SeqBAIJ(Mat A,MatOption op,PetscBool flg) 1265 { 1266 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1267 PetscErrorCode ierr; 1268 1269 PetscFunctionBegin; 1270 switch (op) { 1271 case MAT_ROW_ORIENTED: 1272 a->roworiented = flg; 1273 break; 1274 case MAT_KEEP_NONZERO_PATTERN: 1275 a->keepnonzeropattern = flg; 1276 break; 1277 case MAT_NEW_NONZERO_LOCATIONS: 1278 a->nonew = (flg ? 0 : 1); 1279 break; 1280 case MAT_NEW_NONZERO_LOCATION_ERR: 1281 a->nonew = (flg ? -1 : 0); 1282 break; 1283 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1284 a->nonew = (flg ? -2 : 0); 1285 break; 1286 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1287 a->nounused = (flg ? -1 : 0); 1288 break; 1289 case MAT_NEW_DIAGONALS: 1290 case MAT_IGNORE_OFF_PROC_ENTRIES: 1291 case MAT_USE_HASH_TABLE: 1292 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1293 break; 1294 case MAT_SPD: 1295 case MAT_SYMMETRIC: 1296 case MAT_STRUCTURALLY_SYMMETRIC: 1297 case MAT_HERMITIAN: 1298 case MAT_SYMMETRY_ETERNAL: 1299 /* These options are handled directly by MatSetOption() */ 1300 break; 1301 default: 1302 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op); 1303 } 1304 PetscFunctionReturn(0); 1305 } 1306 1307 /* used for both SeqBAIJ and SeqSBAIJ matrices */ 1308 #undef __FUNCT__ 1309 #define __FUNCT__ "MatGetRow_SeqBAIJ_private" 1310 PetscErrorCode MatGetRow_SeqBAIJ_private(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v,PetscInt *ai,PetscInt *aj,PetscScalar *aa) 1311 { 1312 PetscErrorCode ierr; 1313 PetscInt itmp,i,j,k,M,bn,bp,*idx_i,bs,bs2; 1314 MatScalar *aa_i; 1315 PetscScalar *v_i; 1316 1317 PetscFunctionBegin; 1318 bs = A->rmap->bs; 1319 bs2 = bs*bs; 1320 if (row < 0 || row >= A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range", row); 1321 1322 bn = row/bs; /* Block number */ 1323 bp = row % bs; /* Block Position */ 1324 M = ai[bn+1] - ai[bn]; 1325 *nz = bs*M; 1326 1327 if (v) { 1328 *v = 0; 1329 if (*nz) { 1330 ierr = PetscMalloc1(*nz,v);CHKERRQ(ierr); 1331 for (i=0; i<M; i++) { /* for each block in the block row */ 1332 v_i = *v + i*bs; 1333 aa_i = aa + bs2*(ai[bn] + i); 1334 for (j=bp,k=0; j<bs2; j+=bs,k++) v_i[k] = aa_i[j]; 1335 } 1336 } 1337 } 1338 1339 if (idx) { 1340 *idx = 0; 1341 if (*nz) { 1342 ierr = PetscMalloc1(*nz,idx);CHKERRQ(ierr); 1343 for (i=0; i<M; i++) { /* for each block in the block row */ 1344 idx_i = *idx + i*bs; 1345 itmp = bs*aj[ai[bn] + i]; 1346 for (j=0; j<bs; j++) idx_i[j] = itmp++; 1347 } 1348 } 1349 } 1350 PetscFunctionReturn(0); 1351 } 1352 1353 #undef __FUNCT__ 1354 #define __FUNCT__ "MatGetRow_SeqBAIJ" 1355 PetscErrorCode MatGetRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1356 { 1357 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1358 PetscErrorCode ierr; 1359 1360 PetscFunctionBegin; 1361 ierr = MatGetRow_SeqBAIJ_private(A,row,nz,idx,v,a->i,a->j,a->a);CHKERRQ(ierr); 1362 PetscFunctionReturn(0); 1363 } 1364 1365 #undef __FUNCT__ 1366 #define __FUNCT__ "MatRestoreRow_SeqBAIJ" 1367 PetscErrorCode MatRestoreRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1368 { 1369 PetscErrorCode ierr; 1370 1371 PetscFunctionBegin; 1372 if (idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);} 1373 if (v) {ierr = PetscFree(*v);CHKERRQ(ierr);} 1374 PetscFunctionReturn(0); 1375 } 1376 1377 extern PetscErrorCode MatSetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode); 1378 1379 #undef __FUNCT__ 1380 #define __FUNCT__ "MatTranspose_SeqBAIJ" 1381 PetscErrorCode MatTranspose_SeqBAIJ(Mat A,MatReuse reuse,Mat *B) 1382 { 1383 Mat_SeqBAIJ *a=(Mat_SeqBAIJ*)A->data; 1384 Mat C; 1385 PetscErrorCode ierr; 1386 PetscInt i,j,k,*aj=a->j,*ai=a->i,bs=A->rmap->bs,mbs=a->mbs,nbs=a->nbs,len,*col; 1387 PetscInt *rows,*cols,bs2=a->bs2; 1388 MatScalar *array; 1389 1390 PetscFunctionBegin; 1391 if (reuse == MAT_REUSE_MATRIX && A == *B && mbs != nbs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Square matrix only for in-place"); 1392 if (reuse == MAT_INITIAL_MATRIX || A == *B) { 1393 ierr = PetscCalloc1(1+nbs,&col);CHKERRQ(ierr); 1394 1395 for (i=0; i<ai[mbs]; i++) col[aj[i]] += 1; 1396 ierr = MatCreate(PetscObjectComm((PetscObject)A),&C);CHKERRQ(ierr); 1397 ierr = MatSetSizes(C,A->cmap->n,A->rmap->N,A->cmap->n,A->rmap->N);CHKERRQ(ierr); 1398 ierr = MatSetType(C,((PetscObject)A)->type_name);CHKERRQ(ierr); 1399 ierr = MatSeqBAIJSetPreallocation(C,bs,0,col);CHKERRQ(ierr); 1400 ierr = PetscFree(col);CHKERRQ(ierr); 1401 } else { 1402 C = *B; 1403 } 1404 1405 array = a->a; 1406 ierr = PetscMalloc2(bs,&rows,bs,&cols);CHKERRQ(ierr); 1407 for (i=0; i<mbs; i++) { 1408 cols[0] = i*bs; 1409 for (k=1; k<bs; k++) cols[k] = cols[k-1] + 1; 1410 len = ai[i+1] - ai[i]; 1411 for (j=0; j<len; j++) { 1412 rows[0] = (*aj++)*bs; 1413 for (k=1; k<bs; k++) rows[k] = rows[k-1] + 1; 1414 ierr = MatSetValues_SeqBAIJ(C,bs,rows,bs,cols,array,INSERT_VALUES);CHKERRQ(ierr); 1415 array += bs2; 1416 } 1417 } 1418 ierr = PetscFree2(rows,cols);CHKERRQ(ierr); 1419 1420 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1421 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1422 1423 if (reuse == MAT_INITIAL_MATRIX || *B != A) { 1424 *B = C; 1425 } else { 1426 ierr = MatHeaderMerge(A,&C);CHKERRQ(ierr); 1427 } 1428 PetscFunctionReturn(0); 1429 } 1430 1431 #undef __FUNCT__ 1432 #define __FUNCT__ "MatIsTranspose_SeqBAIJ" 1433 PetscErrorCode MatIsTranspose_SeqBAIJ(Mat A,Mat B,PetscReal tol,PetscBool *f) 1434 { 1435 PetscErrorCode ierr; 1436 Mat Btrans; 1437 1438 PetscFunctionBegin; 1439 *f = PETSC_FALSE; 1440 ierr = MatTranspose_SeqBAIJ(A,MAT_INITIAL_MATRIX,&Btrans);CHKERRQ(ierr); 1441 ierr = MatEqual_SeqBAIJ(B,Btrans,f);CHKERRQ(ierr); 1442 ierr = MatDestroy(&Btrans);CHKERRQ(ierr); 1443 PetscFunctionReturn(0); 1444 } 1445 1446 #undef __FUNCT__ 1447 #define __FUNCT__ "MatView_SeqBAIJ_Binary" 1448 static PetscErrorCode MatView_SeqBAIJ_Binary(Mat A,PetscViewer viewer) 1449 { 1450 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1451 PetscErrorCode ierr; 1452 PetscInt i,*col_lens,bs = A->rmap->bs,count,*jj,j,k,l,bs2=a->bs2; 1453 int fd; 1454 PetscScalar *aa; 1455 FILE *file; 1456 1457 PetscFunctionBegin; 1458 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1459 ierr = PetscMalloc1(4+A->rmap->N,&col_lens);CHKERRQ(ierr); 1460 col_lens[0] = MAT_FILE_CLASSID; 1461 1462 col_lens[1] = A->rmap->N; 1463 col_lens[2] = A->cmap->n; 1464 col_lens[3] = a->nz*bs2; 1465 1466 /* store lengths of each row and write (including header) to file */ 1467 count = 0; 1468 for (i=0; i<a->mbs; i++) { 1469 for (j=0; j<bs; j++) { 1470 col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]); 1471 } 1472 } 1473 ierr = PetscBinaryWrite(fd,col_lens,4+A->rmap->N,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1474 ierr = PetscFree(col_lens);CHKERRQ(ierr); 1475 1476 /* store column indices (zero start index) */ 1477 ierr = PetscMalloc1((a->nz+1)*bs2,&jj);CHKERRQ(ierr); 1478 count = 0; 1479 for (i=0; i<a->mbs; i++) { 1480 for (j=0; j<bs; j++) { 1481 for (k=a->i[i]; k<a->i[i+1]; k++) { 1482 for (l=0; l<bs; l++) { 1483 jj[count++] = bs*a->j[k] + l; 1484 } 1485 } 1486 } 1487 } 1488 ierr = PetscBinaryWrite(fd,jj,bs2*a->nz,PETSC_INT,PETSC_FALSE);CHKERRQ(ierr); 1489 ierr = PetscFree(jj);CHKERRQ(ierr); 1490 1491 /* store nonzero values */ 1492 ierr = PetscMalloc1((a->nz+1)*bs2,&aa);CHKERRQ(ierr); 1493 count = 0; 1494 for (i=0; i<a->mbs; i++) { 1495 for (j=0; j<bs; j++) { 1496 for (k=a->i[i]; k<a->i[i+1]; k++) { 1497 for (l=0; l<bs; l++) { 1498 aa[count++] = a->a[bs2*k + l*bs + j]; 1499 } 1500 } 1501 } 1502 } 1503 ierr = PetscBinaryWrite(fd,aa,bs2*a->nz,PETSC_SCALAR,PETSC_FALSE);CHKERRQ(ierr); 1504 ierr = PetscFree(aa);CHKERRQ(ierr); 1505 1506 ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr); 1507 if (file) { 1508 fprintf(file,"-matload_block_size %d\n",(int)A->rmap->bs); 1509 } 1510 PetscFunctionReturn(0); 1511 } 1512 1513 #undef __FUNCT__ 1514 #define __FUNCT__ "MatView_SeqBAIJ_ASCII" 1515 static PetscErrorCode MatView_SeqBAIJ_ASCII(Mat A,PetscViewer viewer) 1516 { 1517 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1518 PetscErrorCode ierr; 1519 PetscInt i,j,bs = A->rmap->bs,k,l,bs2=a->bs2; 1520 PetscViewerFormat format; 1521 1522 PetscFunctionBegin; 1523 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1524 if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1525 ierr = PetscViewerASCIIPrintf(viewer," block size is %D\n",bs);CHKERRQ(ierr); 1526 } else if (format == PETSC_VIEWER_ASCII_MATLAB) { 1527 const char *matname; 1528 Mat aij; 1529 ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&aij);CHKERRQ(ierr); 1530 ierr = PetscObjectGetName((PetscObject)A,&matname);CHKERRQ(ierr); 1531 ierr = PetscObjectSetName((PetscObject)aij,matname);CHKERRQ(ierr); 1532 ierr = MatView(aij,viewer);CHKERRQ(ierr); 1533 ierr = MatDestroy(&aij);CHKERRQ(ierr); 1534 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1535 PetscFunctionReturn(0); 1536 } else if (format == PETSC_VIEWER_ASCII_COMMON) { 1537 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);CHKERRQ(ierr); 1538 for (i=0; i<a->mbs; i++) { 1539 for (j=0; j<bs; j++) { 1540 ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr); 1541 for (k=a->i[i]; k<a->i[i+1]; k++) { 1542 for (l=0; l<bs; l++) { 1543 #if defined(PETSC_USE_COMPLEX) 1544 if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 1545 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g + %gi) ",bs*a->j[k]+l, 1546 (double)PetscRealPart(a->a[bs2*k + l*bs + j]),(double)PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1547 } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 1548 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g - %gi) ",bs*a->j[k]+l, 1549 (double)PetscRealPart(a->a[bs2*k + l*bs + j]),-(double)PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1550 } else if (PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 1551 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,(double)PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1552 } 1553 #else 1554 if (a->a[bs2*k + l*bs + j] != 0.0) { 1555 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,(double)a->a[bs2*k + l*bs + j]);CHKERRQ(ierr); 1556 } 1557 #endif 1558 } 1559 } 1560 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 1561 } 1562 } 1563 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);CHKERRQ(ierr); 1564 } else { 1565 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);CHKERRQ(ierr); 1566 for (i=0; i<a->mbs; i++) { 1567 for (j=0; j<bs; j++) { 1568 ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr); 1569 for (k=a->i[i]; k<a->i[i+1]; k++) { 1570 for (l=0; l<bs; l++) { 1571 #if defined(PETSC_USE_COMPLEX) 1572 if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0) { 1573 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g + %g i) ",bs*a->j[k]+l, 1574 (double)PetscRealPart(a->a[bs2*k + l*bs + j]),(double)PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1575 } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0) { 1576 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g - %g i) ",bs*a->j[k]+l, 1577 (double)PetscRealPart(a->a[bs2*k + l*bs + j]),-(double)PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1578 } else { 1579 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,(double)PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1580 } 1581 #else 1582 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,(double)a->a[bs2*k + l*bs + j]);CHKERRQ(ierr); 1583 #endif 1584 } 1585 } 1586 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 1587 } 1588 } 1589 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);CHKERRQ(ierr); 1590 } 1591 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1592 PetscFunctionReturn(0); 1593 } 1594 1595 #include <petscdraw.h> 1596 #undef __FUNCT__ 1597 #define __FUNCT__ "MatView_SeqBAIJ_Draw_Zoom" 1598 static PetscErrorCode MatView_SeqBAIJ_Draw_Zoom(PetscDraw draw,void *Aa) 1599 { 1600 Mat A = (Mat) Aa; 1601 Mat_SeqBAIJ *a=(Mat_SeqBAIJ*)A->data; 1602 PetscErrorCode ierr; 1603 PetscInt row,i,j,k,l,mbs=a->mbs,color,bs=A->rmap->bs,bs2=a->bs2; 1604 PetscReal xl,yl,xr,yr,x_l,x_r,y_l,y_r; 1605 MatScalar *aa; 1606 PetscViewer viewer; 1607 PetscViewerFormat format; 1608 1609 PetscFunctionBegin; 1610 ierr = PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer);CHKERRQ(ierr); 1611 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1612 ierr = PetscDrawGetCoordinates(draw,&xl,&yl,&xr,&yr);CHKERRQ(ierr); 1613 1614 /* loop over matrix elements drawing boxes */ 1615 1616 if (format != PETSC_VIEWER_DRAW_CONTOUR) { 1617 ierr = PetscDrawCollectiveBegin(draw);CHKERRQ(ierr); 1618 /* Blue for negative, Cyan for zero and Red for positive */ 1619 color = PETSC_DRAW_BLUE; 1620 for (i=0,row=0; i<mbs; i++,row+=bs) { 1621 for (j=a->i[i]; j<a->i[i+1]; j++) { 1622 y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0; 1623 x_l = a->j[j]*bs; x_r = x_l + 1.0; 1624 aa = a->a + j*bs2; 1625 for (k=0; k<bs; k++) { 1626 for (l=0; l<bs; l++) { 1627 if (PetscRealPart(*aa++) >= 0.) continue; 1628 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 1629 } 1630 } 1631 } 1632 } 1633 color = PETSC_DRAW_CYAN; 1634 for (i=0,row=0; i<mbs; i++,row+=bs) { 1635 for (j=a->i[i]; j<a->i[i+1]; j++) { 1636 y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0; 1637 x_l = a->j[j]*bs; x_r = x_l + 1.0; 1638 aa = a->a + j*bs2; 1639 for (k=0; k<bs; k++) { 1640 for (l=0; l<bs; l++) { 1641 if (PetscRealPart(*aa++) != 0.) continue; 1642 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 1643 } 1644 } 1645 } 1646 } 1647 color = PETSC_DRAW_RED; 1648 for (i=0,row=0; i<mbs; i++,row+=bs) { 1649 for (j=a->i[i]; j<a->i[i+1]; j++) { 1650 y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0; 1651 x_l = a->j[j]*bs; x_r = x_l + 1.0; 1652 aa = a->a + j*bs2; 1653 for (k=0; k<bs; k++) { 1654 for (l=0; l<bs; l++) { 1655 if (PetscRealPart(*aa++) <= 0.) continue; 1656 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 1657 } 1658 } 1659 } 1660 } 1661 ierr = PetscDrawCollectiveEnd(draw);CHKERRQ(ierr); 1662 } else { 1663 /* use contour shading to indicate magnitude of values */ 1664 /* first determine max of all nonzero values */ 1665 PetscReal minv = 0.0, maxv = 0.0; 1666 PetscDraw popup; 1667 1668 for (i=0; i<a->nz*a->bs2; i++) { 1669 if (PetscAbsScalar(a->a[i]) > maxv) maxv = PetscAbsScalar(a->a[i]); 1670 } 1671 if (minv >= maxv) maxv = minv + PETSC_SMALL; 1672 ierr = PetscDrawGetPopup(draw,&popup);CHKERRQ(ierr); 1673 ierr = PetscDrawScalePopup(popup,0.0,maxv);CHKERRQ(ierr); 1674 1675 ierr = PetscDrawCollectiveBegin(draw);CHKERRQ(ierr); 1676 for (i=0,row=0; i<mbs; i++,row+=bs) { 1677 for (j=a->i[i]; j<a->i[i+1]; j++) { 1678 y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0; 1679 x_l = a->j[j]*bs; x_r = x_l + 1.0; 1680 aa = a->a + j*bs2; 1681 for (k=0; k<bs; k++) { 1682 for (l=0; l<bs; l++) { 1683 MatScalar v = *aa++; 1684 color = PetscDrawRealToColor(PetscAbsScalar(v),minv,maxv); 1685 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 1686 } 1687 } 1688 } 1689 } 1690 ierr = PetscDrawCollectiveEnd(draw);CHKERRQ(ierr); 1691 } 1692 PetscFunctionReturn(0); 1693 } 1694 1695 #undef __FUNCT__ 1696 #define __FUNCT__ "MatView_SeqBAIJ_Draw" 1697 static PetscErrorCode MatView_SeqBAIJ_Draw(Mat A,PetscViewer viewer) 1698 { 1699 PetscErrorCode ierr; 1700 PetscReal xl,yl,xr,yr,w,h; 1701 PetscDraw draw; 1702 PetscBool isnull; 1703 1704 PetscFunctionBegin; 1705 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 1706 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); 1707 if (isnull) PetscFunctionReturn(0); 1708 1709 xr = A->cmap->n; yr = A->rmap->N; h = yr/10.0; w = xr/10.0; 1710 xr += w; yr += h; xl = -w; yl = -h; 1711 ierr = PetscDrawSetCoordinates(draw,xl,yl,xr,yr);CHKERRQ(ierr); 1712 ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);CHKERRQ(ierr); 1713 ierr = PetscDrawZoom(draw,MatView_SeqBAIJ_Draw_Zoom,A);CHKERRQ(ierr); 1714 ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",NULL);CHKERRQ(ierr); 1715 ierr = PetscDrawSave(draw);CHKERRQ(ierr); 1716 PetscFunctionReturn(0); 1717 } 1718 1719 #undef __FUNCT__ 1720 #define __FUNCT__ "MatView_SeqBAIJ" 1721 PetscErrorCode MatView_SeqBAIJ(Mat A,PetscViewer viewer) 1722 { 1723 PetscErrorCode ierr; 1724 PetscBool iascii,isbinary,isdraw; 1725 1726 PetscFunctionBegin; 1727 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1728 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1729 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1730 if (iascii) { 1731 ierr = MatView_SeqBAIJ_ASCII(A,viewer);CHKERRQ(ierr); 1732 } else if (isbinary) { 1733 ierr = MatView_SeqBAIJ_Binary(A,viewer);CHKERRQ(ierr); 1734 } else if (isdraw) { 1735 ierr = MatView_SeqBAIJ_Draw(A,viewer);CHKERRQ(ierr); 1736 } else { 1737 Mat B; 1738 ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr); 1739 ierr = MatView(B,viewer);CHKERRQ(ierr); 1740 ierr = MatDestroy(&B);CHKERRQ(ierr); 1741 } 1742 PetscFunctionReturn(0); 1743 } 1744 1745 1746 #undef __FUNCT__ 1747 #define __FUNCT__ "MatGetValues_SeqBAIJ" 1748 PetscErrorCode MatGetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],PetscScalar v[]) 1749 { 1750 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1751 PetscInt *rp,k,low,high,t,row,nrow,i,col,l,*aj = a->j; 1752 PetscInt *ai = a->i,*ailen = a->ilen; 1753 PetscInt brow,bcol,ridx,cidx,bs=A->rmap->bs,bs2=a->bs2; 1754 MatScalar *ap,*aa = a->a; 1755 1756 PetscFunctionBegin; 1757 for (k=0; k<m; k++) { /* loop over rows */ 1758 row = im[k]; brow = row/bs; 1759 if (row < 0) {v += n; continue;} /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); */ 1760 if (row >= A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D too large", row); 1761 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; 1762 nrow = ailen[brow]; 1763 for (l=0; l<n; l++) { /* loop over columns */ 1764 if (in[l] < 0) {v++; continue;} /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); */ 1765 if (in[l] >= A->cmap->n) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column %D too large", in[l]); 1766 col = in[l]; 1767 bcol = col/bs; 1768 cidx = col%bs; 1769 ridx = row%bs; 1770 high = nrow; 1771 low = 0; /* assume unsorted */ 1772 while (high-low > 5) { 1773 t = (low+high)/2; 1774 if (rp[t] > bcol) high = t; 1775 else low = t; 1776 } 1777 for (i=low; i<high; i++) { 1778 if (rp[i] > bcol) break; 1779 if (rp[i] == bcol) { 1780 *v++ = ap[bs2*i+bs*cidx+ridx]; 1781 goto finished; 1782 } 1783 } 1784 *v++ = 0.0; 1785 finished:; 1786 } 1787 } 1788 PetscFunctionReturn(0); 1789 } 1790 1791 #undef __FUNCT__ 1792 #define __FUNCT__ "MatSetValuesBlocked_SeqBAIJ" 1793 PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is) 1794 { 1795 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1796 PetscInt *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,lastcol = -1; 1797 PetscInt *imax=a->imax,*ai=a->i,*ailen=a->ilen; 1798 PetscErrorCode ierr; 1799 PetscInt *aj =a->j,nonew=a->nonew,bs2=a->bs2,bs=A->rmap->bs,stepval; 1800 PetscBool roworiented=a->roworiented; 1801 const PetscScalar *value = v; 1802 MatScalar *ap,*aa = a->a,*bap; 1803 1804 PetscFunctionBegin; 1805 if (roworiented) { 1806 stepval = (n-1)*bs; 1807 } else { 1808 stepval = (m-1)*bs; 1809 } 1810 for (k=0; k<m; k++) { /* loop over added rows */ 1811 row = im[k]; 1812 if (row < 0) continue; 1813 #if defined(PETSC_USE_DEBUG) 1814 if (row >= a->mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Block row index too large %D max %D",row,a->mbs-1); 1815 #endif 1816 rp = aj + ai[row]; 1817 ap = aa + bs2*ai[row]; 1818 rmax = imax[row]; 1819 nrow = ailen[row]; 1820 low = 0; 1821 high = nrow; 1822 for (l=0; l<n; l++) { /* loop over added columns */ 1823 if (in[l] < 0) continue; 1824 #if defined(PETSC_USE_DEBUG) 1825 if (in[l] >= a->nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Block column index too large %D max %D",in[l],a->nbs-1); 1826 #endif 1827 col = in[l]; 1828 if (roworiented) { 1829 value = v + (k*(stepval+bs) + l)*bs; 1830 } else { 1831 value = v + (l*(stepval+bs) + k)*bs; 1832 } 1833 if (col <= lastcol) low = 0; 1834 else high = nrow; 1835 lastcol = col; 1836 while (high-low > 7) { 1837 t = (low+high)/2; 1838 if (rp[t] > col) high = t; 1839 else low = t; 1840 } 1841 for (i=low; i<high; i++) { 1842 if (rp[i] > col) break; 1843 if (rp[i] == col) { 1844 bap = ap + bs2*i; 1845 if (roworiented) { 1846 if (is == ADD_VALUES) { 1847 for (ii=0; ii<bs; ii++,value+=stepval) { 1848 for (jj=ii; jj<bs2; jj+=bs) { 1849 bap[jj] += *value++; 1850 } 1851 } 1852 } else { 1853 for (ii=0; ii<bs; ii++,value+=stepval) { 1854 for (jj=ii; jj<bs2; jj+=bs) { 1855 bap[jj] = *value++; 1856 } 1857 } 1858 } 1859 } else { 1860 if (is == ADD_VALUES) { 1861 for (ii=0; ii<bs; ii++,value+=bs+stepval) { 1862 for (jj=0; jj<bs; jj++) { 1863 bap[jj] += value[jj]; 1864 } 1865 bap += bs; 1866 } 1867 } else { 1868 for (ii=0; ii<bs; ii++,value+=bs+stepval) { 1869 for (jj=0; jj<bs; jj++) { 1870 bap[jj] = value[jj]; 1871 } 1872 bap += bs; 1873 } 1874 } 1875 } 1876 goto noinsert2; 1877 } 1878 } 1879 if (nonew == 1) goto noinsert2; 1880 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new blocked index new nonzero block (%D, %D) in the matrix", row, col); 1881 MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,row,col,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar); 1882 N = nrow++ - 1; high++; 1883 /* shift up all the later entries in this row */ 1884 for (ii=N; ii>=i; ii--) { 1885 rp[ii+1] = rp[ii]; 1886 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); 1887 } 1888 if (N >= i) { 1889 ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr); 1890 } 1891 rp[i] = col; 1892 bap = ap + bs2*i; 1893 if (roworiented) { 1894 for (ii=0; ii<bs; ii++,value+=stepval) { 1895 for (jj=ii; jj<bs2; jj+=bs) { 1896 bap[jj] = *value++; 1897 } 1898 } 1899 } else { 1900 for (ii=0; ii<bs; ii++,value+=stepval) { 1901 for (jj=0; jj<bs; jj++) { 1902 *bap++ = *value++; 1903 } 1904 } 1905 } 1906 noinsert2:; 1907 low = i; 1908 } 1909 ailen[row] = nrow; 1910 } 1911 PetscFunctionReturn(0); 1912 } 1913 1914 #undef __FUNCT__ 1915 #define __FUNCT__ "MatAssemblyEnd_SeqBAIJ" 1916 PetscErrorCode MatAssemblyEnd_SeqBAIJ(Mat A,MatAssemblyType mode) 1917 { 1918 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1919 PetscInt fshift = 0,i,j,*ai = a->i,*aj = a->j,*imax = a->imax; 1920 PetscInt m = A->rmap->N,*ip,N,*ailen = a->ilen; 1921 PetscErrorCode ierr; 1922 PetscInt mbs = a->mbs,bs2 = a->bs2,rmax = 0; 1923 MatScalar *aa = a->a,*ap; 1924 PetscReal ratio=0.6; 1925 1926 PetscFunctionBegin; 1927 if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0); 1928 1929 if (m) rmax = ailen[0]; 1930 for (i=1; i<mbs; i++) { 1931 /* move each row back by the amount of empty slots (fshift) before it*/ 1932 fshift += imax[i-1] - ailen[i-1]; 1933 rmax = PetscMax(rmax,ailen[i]); 1934 if (fshift) { 1935 ip = aj + ai[i]; ap = aa + bs2*ai[i]; 1936 N = ailen[i]; 1937 for (j=0; j<N; j++) { 1938 ip[j-fshift] = ip[j]; 1939 1940 ierr = PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr); 1941 } 1942 } 1943 ai[i] = ai[i-1] + ailen[i-1]; 1944 } 1945 if (mbs) { 1946 fshift += imax[mbs-1] - ailen[mbs-1]; 1947 ai[mbs] = ai[mbs-1] + ailen[mbs-1]; 1948 } 1949 1950 /* reset ilen and imax for each row */ 1951 a->nonzerorowcnt = 0; 1952 for (i=0; i<mbs; i++) { 1953 ailen[i] = imax[i] = ai[i+1] - ai[i]; 1954 a->nonzerorowcnt += ((ai[i+1] - ai[i]) > 0); 1955 } 1956 a->nz = ai[mbs]; 1957 1958 /* diagonals may have moved, so kill the diagonal pointers */ 1959 a->idiagvalid = PETSC_FALSE; 1960 if (fshift && a->diag) { 1961 ierr = PetscFree(a->diag);CHKERRQ(ierr); 1962 ierr = PetscLogObjectMemory((PetscObject)A,-(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 1963 a->diag = 0; 1964 } 1965 if (fshift && a->nounused == -1) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB, "Unused space detected in matrix: %D X %D block size %D, %D unneeded", m, A->cmap->n, A->rmap->bs, fshift*bs2); 1966 ierr = PetscInfo5(A,"Matrix size: %D X %D, block size %D; storage space: %D unneeded, %D used\n",m,A->cmap->n,A->rmap->bs,fshift*bs2,a->nz*bs2);CHKERRQ(ierr); 1967 ierr = PetscInfo1(A,"Number of mallocs during MatSetValues is %D\n",a->reallocs);CHKERRQ(ierr); 1968 ierr = PetscInfo1(A,"Most nonzeros blocks in any row is %D\n",rmax);CHKERRQ(ierr); 1969 1970 A->info.mallocs += a->reallocs; 1971 a->reallocs = 0; 1972 A->info.nz_unneeded = (PetscReal)fshift*bs2; 1973 a->rmax = rmax; 1974 1975 ierr = MatCheckCompressedRow(A,a->nonzerorowcnt,&a->compressedrow,a->i,mbs,ratio);CHKERRQ(ierr); 1976 PetscFunctionReturn(0); 1977 } 1978 1979 /* 1980 This function returns an array of flags which indicate the locations of contiguous 1981 blocks that should be zeroed. for eg: if bs = 3 and is = [0,1,2,3,5,6,7,8,9] 1982 then the resulting sizes = [3,1,1,3,1] correspondig to sets [(0,1,2),(3),(5),(6,7,8),(9)] 1983 Assume: sizes should be long enough to hold all the values. 1984 */ 1985 #undef __FUNCT__ 1986 #define __FUNCT__ "MatZeroRows_SeqBAIJ_Check_Blocks" 1987 static PetscErrorCode MatZeroRows_SeqBAIJ_Check_Blocks(PetscInt idx[],PetscInt n,PetscInt bs,PetscInt sizes[], PetscInt *bs_max) 1988 { 1989 PetscInt i,j,k,row; 1990 PetscBool flg; 1991 1992 PetscFunctionBegin; 1993 for (i=0,j=0; i<n; j++) { 1994 row = idx[i]; 1995 if (row%bs!=0) { /* Not the begining of a block */ 1996 sizes[j] = 1; 1997 i++; 1998 } else if (i+bs > n) { /* complete block doesn't exist (at idx end) */ 1999 sizes[j] = 1; /* Also makes sure atleast 'bs' values exist for next else */ 2000 i++; 2001 } else { /* Begining of the block, so check if the complete block exists */ 2002 flg = PETSC_TRUE; 2003 for (k=1; k<bs; k++) { 2004 if (row+k != idx[i+k]) { /* break in the block */ 2005 flg = PETSC_FALSE; 2006 break; 2007 } 2008 } 2009 if (flg) { /* No break in the bs */ 2010 sizes[j] = bs; 2011 i += bs; 2012 } else { 2013 sizes[j] = 1; 2014 i++; 2015 } 2016 } 2017 } 2018 *bs_max = j; 2019 PetscFunctionReturn(0); 2020 } 2021 2022 #undef __FUNCT__ 2023 #define __FUNCT__ "MatZeroRows_SeqBAIJ" 2024 PetscErrorCode MatZeroRows_SeqBAIJ(Mat A,PetscInt is_n,const PetscInt is_idx[],PetscScalar diag,Vec x, Vec b) 2025 { 2026 Mat_SeqBAIJ *baij=(Mat_SeqBAIJ*)A->data; 2027 PetscErrorCode ierr; 2028 PetscInt i,j,k,count,*rows; 2029 PetscInt bs=A->rmap->bs,bs2=baij->bs2,*sizes,row,bs_max; 2030 PetscScalar zero = 0.0; 2031 MatScalar *aa; 2032 const PetscScalar *xx; 2033 PetscScalar *bb; 2034 2035 PetscFunctionBegin; 2036 /* fix right hand side if needed */ 2037 if (x && b) { 2038 ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr); 2039 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 2040 for (i=0; i<is_n; i++) { 2041 bb[is_idx[i]] = diag*xx[is_idx[i]]; 2042 } 2043 ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr); 2044 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 2045 } 2046 2047 /* Make a copy of the IS and sort it */ 2048 /* allocate memory for rows,sizes */ 2049 ierr = PetscMalloc2(is_n,&rows,2*is_n,&sizes);CHKERRQ(ierr); 2050 2051 /* copy IS values to rows, and sort them */ 2052 for (i=0; i<is_n; i++) rows[i] = is_idx[i]; 2053 ierr = PetscSortInt(is_n,rows);CHKERRQ(ierr); 2054 2055 if (baij->keepnonzeropattern) { 2056 for (i=0; i<is_n; i++) sizes[i] = 1; 2057 bs_max = is_n; 2058 } else { 2059 ierr = MatZeroRows_SeqBAIJ_Check_Blocks(rows,is_n,bs,sizes,&bs_max);CHKERRQ(ierr); 2060 A->nonzerostate++; 2061 } 2062 2063 for (i=0,j=0; i<bs_max; j+=sizes[i],i++) { 2064 row = rows[j]; 2065 if (row < 0 || row > A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"row %D out of range",row); 2066 count = (baij->i[row/bs +1] - baij->i[row/bs])*bs; 2067 aa = ((MatScalar*)(baij->a)) + baij->i[row/bs]*bs2 + (row%bs); 2068 if (sizes[i] == bs && !baij->keepnonzeropattern) { 2069 if (diag != (PetscScalar)0.0) { 2070 if (baij->ilen[row/bs] > 0) { 2071 baij->ilen[row/bs] = 1; 2072 baij->j[baij->i[row/bs]] = row/bs; 2073 2074 ierr = PetscMemzero(aa,count*bs*sizeof(MatScalar));CHKERRQ(ierr); 2075 } 2076 /* Now insert all the diagonal values for this bs */ 2077 for (k=0; k<bs; k++) { 2078 ierr = (*A->ops->setvalues)(A,1,rows+j+k,1,rows+j+k,&diag,INSERT_VALUES);CHKERRQ(ierr); 2079 } 2080 } else { /* (diag == 0.0) */ 2081 baij->ilen[row/bs] = 0; 2082 } /* end (diag == 0.0) */ 2083 } else { /* (sizes[i] != bs) */ 2084 #if defined(PETSC_USE_DEBUG) 2085 if (sizes[i] != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal Error. Value should be 1"); 2086 #endif 2087 for (k=0; k<count; k++) { 2088 aa[0] = zero; 2089 aa += bs; 2090 } 2091 if (diag != (PetscScalar)0.0) { 2092 ierr = (*A->ops->setvalues)(A,1,rows+j,1,rows+j,&diag,INSERT_VALUES);CHKERRQ(ierr); 2093 } 2094 } 2095 } 2096 2097 ierr = PetscFree2(rows,sizes);CHKERRQ(ierr); 2098 ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2099 PetscFunctionReturn(0); 2100 } 2101 2102 #undef __FUNCT__ 2103 #define __FUNCT__ "MatZeroRowsColumns_SeqBAIJ" 2104 PetscErrorCode MatZeroRowsColumns_SeqBAIJ(Mat A,PetscInt is_n,const PetscInt is_idx[],PetscScalar diag,Vec x, Vec b) 2105 { 2106 Mat_SeqBAIJ *baij=(Mat_SeqBAIJ*)A->data; 2107 PetscErrorCode ierr; 2108 PetscInt i,j,k,count; 2109 PetscInt bs =A->rmap->bs,bs2=baij->bs2,row,col; 2110 PetscScalar zero = 0.0; 2111 MatScalar *aa; 2112 const PetscScalar *xx; 2113 PetscScalar *bb; 2114 PetscBool *zeroed,vecs = PETSC_FALSE; 2115 2116 PetscFunctionBegin; 2117 /* fix right hand side if needed */ 2118 if (x && b) { 2119 ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr); 2120 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 2121 vecs = PETSC_TRUE; 2122 } 2123 2124 /* zero the columns */ 2125 ierr = PetscCalloc1(A->rmap->n,&zeroed);CHKERRQ(ierr); 2126 for (i=0; i<is_n; i++) { 2127 if (is_idx[i] < 0 || is_idx[i] >= A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"row %D out of range",is_idx[i]); 2128 zeroed[is_idx[i]] = PETSC_TRUE; 2129 } 2130 for (i=0; i<A->rmap->N; i++) { 2131 if (!zeroed[i]) { 2132 row = i/bs; 2133 for (j=baij->i[row]; j<baij->i[row+1]; j++) { 2134 for (k=0; k<bs; k++) { 2135 col = bs*baij->j[j] + k; 2136 if (zeroed[col]) { 2137 aa = ((MatScalar*)(baij->a)) + j*bs2 + (i%bs) + bs*k; 2138 if (vecs) bb[i] -= aa[0]*xx[col]; 2139 aa[0] = 0.0; 2140 } 2141 } 2142 } 2143 } else if (vecs) bb[i] = diag*xx[i]; 2144 } 2145 ierr = PetscFree(zeroed);CHKERRQ(ierr); 2146 if (vecs) { 2147 ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr); 2148 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 2149 } 2150 2151 /* zero the rows */ 2152 for (i=0; i<is_n; i++) { 2153 row = is_idx[i]; 2154 count = (baij->i[row/bs +1] - baij->i[row/bs])*bs; 2155 aa = ((MatScalar*)(baij->a)) + baij->i[row/bs]*bs2 + (row%bs); 2156 for (k=0; k<count; k++) { 2157 aa[0] = zero; 2158 aa += bs; 2159 } 2160 if (diag != (PetscScalar)0.0) { 2161 ierr = (*A->ops->setvalues)(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr); 2162 } 2163 } 2164 ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2165 PetscFunctionReturn(0); 2166 } 2167 2168 #undef __FUNCT__ 2169 #define __FUNCT__ "MatSetValues_SeqBAIJ" 2170 PetscErrorCode MatSetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is) 2171 { 2172 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2173 PetscInt *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,lastcol = -1; 2174 PetscInt *imax=a->imax,*ai=a->i,*ailen=a->ilen; 2175 PetscInt *aj =a->j,nonew=a->nonew,bs=A->rmap->bs,brow,bcol; 2176 PetscErrorCode ierr; 2177 PetscInt ridx,cidx,bs2=a->bs2; 2178 PetscBool roworiented=a->roworiented; 2179 MatScalar *ap,value,*aa=a->a,*bap; 2180 2181 PetscFunctionBegin; 2182 for (k=0; k<m; k++) { /* loop over added rows */ 2183 row = im[k]; 2184 brow = row/bs; 2185 if (row < 0) continue; 2186 #if defined(PETSC_USE_DEBUG) 2187 if (row >= A->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,A->rmap->N-1); 2188 #endif 2189 rp = aj + ai[brow]; 2190 ap = aa + bs2*ai[brow]; 2191 rmax = imax[brow]; 2192 nrow = ailen[brow]; 2193 low = 0; 2194 high = nrow; 2195 for (l=0; l<n; l++) { /* loop over added columns */ 2196 if (in[l] < 0) continue; 2197 #if defined(PETSC_USE_DEBUG) 2198 if (in[l] >= A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],A->cmap->n-1); 2199 #endif 2200 col = in[l]; bcol = col/bs; 2201 ridx = row % bs; cidx = col % bs; 2202 if (roworiented) { 2203 value = v[l + k*n]; 2204 } else { 2205 value = v[k + l*m]; 2206 } 2207 if (col <= lastcol) low = 0; else high = nrow; 2208 lastcol = col; 2209 while (high-low > 7) { 2210 t = (low+high)/2; 2211 if (rp[t] > bcol) high = t; 2212 else low = t; 2213 } 2214 for (i=low; i<high; i++) { 2215 if (rp[i] > bcol) break; 2216 if (rp[i] == bcol) { 2217 bap = ap + bs2*i + bs*cidx + ridx; 2218 if (is == ADD_VALUES) *bap += value; 2219 else *bap = value; 2220 goto noinsert1; 2221 } 2222 } 2223 if (nonew == 1) goto noinsert1; 2224 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col); 2225 MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar); 2226 N = nrow++ - 1; high++; 2227 /* shift up all the later entries in this row */ 2228 for (ii=N; ii>=i; ii--) { 2229 rp[ii+1] = rp[ii]; 2230 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); 2231 } 2232 if (N>=i) { 2233 ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr); 2234 } 2235 rp[i] = bcol; 2236 ap[bs2*i + bs*cidx + ridx] = value; 2237 a->nz++; 2238 A->nonzerostate++; 2239 noinsert1:; 2240 low = i; 2241 } 2242 ailen[brow] = nrow; 2243 } 2244 PetscFunctionReturn(0); 2245 } 2246 2247 #undef __FUNCT__ 2248 #define __FUNCT__ "MatILUFactor_SeqBAIJ" 2249 PetscErrorCode MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,const MatFactorInfo *info) 2250 { 2251 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)inA->data; 2252 Mat outA; 2253 PetscErrorCode ierr; 2254 PetscBool row_identity,col_identity; 2255 2256 PetscFunctionBegin; 2257 if (info->levels != 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only levels = 0 supported for in-place ILU"); 2258 ierr = ISIdentity(row,&row_identity);CHKERRQ(ierr); 2259 ierr = ISIdentity(col,&col_identity);CHKERRQ(ierr); 2260 if (!row_identity || !col_identity) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for in-place ILU"); 2261 2262 outA = inA; 2263 inA->factortype = MAT_FACTOR_LU; 2264 ierr = PetscFree(inA->solvertype);CHKERRQ(ierr); 2265 ierr = PetscStrallocpy(MATSOLVERPETSC,&inA->solvertype);CHKERRQ(ierr); 2266 2267 ierr = MatMarkDiagonal_SeqBAIJ(inA);CHKERRQ(ierr); 2268 2269 ierr = PetscObjectReference((PetscObject)row);CHKERRQ(ierr); 2270 ierr = ISDestroy(&a->row);CHKERRQ(ierr); 2271 a->row = row; 2272 ierr = PetscObjectReference((PetscObject)col);CHKERRQ(ierr); 2273 ierr = ISDestroy(&a->col);CHKERRQ(ierr); 2274 a->col = col; 2275 2276 /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */ 2277 ierr = ISDestroy(&a->icol);CHKERRQ(ierr); 2278 ierr = ISInvertPermutation(col,PETSC_DECIDE,&a->icol);CHKERRQ(ierr); 2279 ierr = PetscLogObjectParent((PetscObject)inA,(PetscObject)a->icol);CHKERRQ(ierr); 2280 2281 ierr = MatSeqBAIJSetNumericFactorization_inplace(inA,(PetscBool)(row_identity && col_identity));CHKERRQ(ierr); 2282 if (!a->solve_work) { 2283 ierr = PetscMalloc1(inA->rmap->N+inA->rmap->bs,&a->solve_work);CHKERRQ(ierr); 2284 ierr = PetscLogObjectMemory((PetscObject)inA,(inA->rmap->N+inA->rmap->bs)*sizeof(PetscScalar));CHKERRQ(ierr); 2285 } 2286 ierr = MatLUFactorNumeric(outA,inA,info);CHKERRQ(ierr); 2287 PetscFunctionReturn(0); 2288 } 2289 2290 #undef __FUNCT__ 2291 #define __FUNCT__ "MatSeqBAIJSetColumnIndices_SeqBAIJ" 2292 PetscErrorCode MatSeqBAIJSetColumnIndices_SeqBAIJ(Mat mat,PetscInt *indices) 2293 { 2294 Mat_SeqBAIJ *baij = (Mat_SeqBAIJ*)mat->data; 2295 PetscInt i,nz,mbs; 2296 2297 PetscFunctionBegin; 2298 nz = baij->maxnz; 2299 mbs = baij->mbs; 2300 for (i=0; i<nz; i++) { 2301 baij->j[i] = indices[i]; 2302 } 2303 baij->nz = nz; 2304 for (i=0; i<mbs; i++) { 2305 baij->ilen[i] = baij->imax[i]; 2306 } 2307 PetscFunctionReturn(0); 2308 } 2309 2310 #undef __FUNCT__ 2311 #define __FUNCT__ "MatSeqBAIJSetColumnIndices" 2312 /*@ 2313 MatSeqBAIJSetColumnIndices - Set the column indices for all the rows 2314 in the matrix. 2315 2316 Input Parameters: 2317 + mat - the SeqBAIJ matrix 2318 - indices - the column indices 2319 2320 Level: advanced 2321 2322 Notes: 2323 This can be called if you have precomputed the nonzero structure of the 2324 matrix and want to provide it to the matrix object to improve the performance 2325 of the MatSetValues() operation. 2326 2327 You MUST have set the correct numbers of nonzeros per row in the call to 2328 MatCreateSeqBAIJ(), and the columns indices MUST be sorted. 2329 2330 MUST be called before any calls to MatSetValues(); 2331 2332 @*/ 2333 PetscErrorCode MatSeqBAIJSetColumnIndices(Mat mat,PetscInt *indices) 2334 { 2335 PetscErrorCode ierr; 2336 2337 PetscFunctionBegin; 2338 PetscValidHeaderSpecific(mat,MAT_CLASSID,1); 2339 PetscValidPointer(indices,2); 2340 ierr = PetscUseMethod(mat,"MatSeqBAIJSetColumnIndices_C",(Mat,PetscInt*),(mat,indices));CHKERRQ(ierr); 2341 PetscFunctionReturn(0); 2342 } 2343 2344 #undef __FUNCT__ 2345 #define __FUNCT__ "MatGetRowMaxAbs_SeqBAIJ" 2346 PetscErrorCode MatGetRowMaxAbs_SeqBAIJ(Mat A,Vec v,PetscInt idx[]) 2347 { 2348 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2349 PetscErrorCode ierr; 2350 PetscInt i,j,n,row,bs,*ai,*aj,mbs; 2351 PetscReal atmp; 2352 PetscScalar *x,zero = 0.0; 2353 MatScalar *aa; 2354 PetscInt ncols,brow,krow,kcol; 2355 2356 PetscFunctionBegin; 2357 if (A->factortype) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 2358 bs = A->rmap->bs; 2359 aa = a->a; 2360 ai = a->i; 2361 aj = a->j; 2362 mbs = a->mbs; 2363 2364 ierr = VecSet(v,zero);CHKERRQ(ierr); 2365 ierr = VecGetArray(v,&x);CHKERRQ(ierr); 2366 ierr = VecGetLocalSize(v,&n);CHKERRQ(ierr); 2367 if (n != A->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Nonconforming matrix and vector"); 2368 for (i=0; i<mbs; i++) { 2369 ncols = ai[1] - ai[0]; ai++; 2370 brow = bs*i; 2371 for (j=0; j<ncols; j++) { 2372 for (kcol=0; kcol<bs; kcol++) { 2373 for (krow=0; krow<bs; krow++) { 2374 atmp = PetscAbsScalar(*aa);aa++; 2375 row = brow + krow; /* row index */ 2376 if (PetscAbsScalar(x[row]) < atmp) {x[row] = atmp; if (idx) idx[row] = bs*(*aj) + kcol;} 2377 } 2378 } 2379 aj++; 2380 } 2381 } 2382 ierr = VecRestoreArray(v,&x);CHKERRQ(ierr); 2383 PetscFunctionReturn(0); 2384 } 2385 2386 #undef __FUNCT__ 2387 #define __FUNCT__ "MatCopy_SeqBAIJ" 2388 PetscErrorCode MatCopy_SeqBAIJ(Mat A,Mat B,MatStructure str) 2389 { 2390 PetscErrorCode ierr; 2391 2392 PetscFunctionBegin; 2393 /* If the two matrices have the same copy implementation, use fast copy. */ 2394 if (str == SAME_NONZERO_PATTERN && (A->ops->copy == B->ops->copy)) { 2395 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2396 Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)B->data; 2397 PetscInt ambs=a->mbs,bmbs=b->mbs,abs=A->rmap->bs,bbs=B->rmap->bs,bs2=abs*abs; 2398 2399 if (a->i[ambs] != b->i[bmbs]) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Number of nonzero blocks in matrices A %D and B %D are different",a->i[ambs],b->i[bmbs]); 2400 if (abs != bbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Block size A %D and B %D are different",abs,bbs); 2401 ierr = PetscMemcpy(b->a,a->a,(bs2*a->i[ambs])*sizeof(PetscScalar));CHKERRQ(ierr); 2402 } else { 2403 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 2404 } 2405 PetscFunctionReturn(0); 2406 } 2407 2408 #undef __FUNCT__ 2409 #define __FUNCT__ "MatSetUp_SeqBAIJ" 2410 PetscErrorCode MatSetUp_SeqBAIJ(Mat A) 2411 { 2412 PetscErrorCode ierr; 2413 2414 PetscFunctionBegin; 2415 ierr = MatSeqBAIJSetPreallocation(A,A->rmap->bs,PETSC_DEFAULT,0);CHKERRQ(ierr); 2416 PetscFunctionReturn(0); 2417 } 2418 2419 #undef __FUNCT__ 2420 #define __FUNCT__ "MatSeqBAIJGetArray_SeqBAIJ" 2421 PetscErrorCode MatSeqBAIJGetArray_SeqBAIJ(Mat A,PetscScalar *array[]) 2422 { 2423 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2424 2425 PetscFunctionBegin; 2426 *array = a->a; 2427 PetscFunctionReturn(0); 2428 } 2429 2430 #undef __FUNCT__ 2431 #define __FUNCT__ "MatSeqBAIJRestoreArray_SeqBAIJ" 2432 PetscErrorCode MatSeqBAIJRestoreArray_SeqBAIJ(Mat A,PetscScalar *array[]) 2433 { 2434 PetscFunctionBegin; 2435 PetscFunctionReturn(0); 2436 } 2437 2438 #undef __FUNCT__ 2439 #define __FUNCT__ "MatAXPYGetPreallocation_SeqBAIJ" 2440 PetscErrorCode MatAXPYGetPreallocation_SeqBAIJ(Mat Y,Mat X,PetscInt *nnz) 2441 { 2442 PetscInt bs = Y->rmap->bs,mbs = Y->rmap->N/bs; 2443 Mat_SeqBAIJ *x = (Mat_SeqBAIJ*)X->data; 2444 Mat_SeqBAIJ *y = (Mat_SeqBAIJ*)Y->data; 2445 PetscErrorCode ierr; 2446 2447 PetscFunctionBegin; 2448 /* Set the number of nonzeros in the new matrix */ 2449 ierr = MatAXPYGetPreallocation_SeqX_private(mbs,x->i,x->j,y->i,y->j,nnz);CHKERRQ(ierr); 2450 PetscFunctionReturn(0); 2451 } 2452 2453 #undef __FUNCT__ 2454 #define __FUNCT__ "MatAXPY_SeqBAIJ" 2455 PetscErrorCode MatAXPY_SeqBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 2456 { 2457 Mat_SeqBAIJ *x = (Mat_SeqBAIJ*)X->data,*y = (Mat_SeqBAIJ*)Y->data; 2458 PetscErrorCode ierr; 2459 PetscInt bs=Y->rmap->bs,bs2=bs*bs; 2460 PetscBLASInt one=1; 2461 2462 PetscFunctionBegin; 2463 if (str == SAME_NONZERO_PATTERN) { 2464 PetscScalar alpha = a; 2465 PetscBLASInt bnz; 2466 ierr = PetscBLASIntCast(x->nz*bs2,&bnz);CHKERRQ(ierr); 2467 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one)); 2468 ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr); 2469 } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */ 2470 ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr); 2471 } else { 2472 Mat B; 2473 PetscInt *nnz; 2474 if (bs != X->rmap->bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrices must have same block size"); 2475 ierr = PetscMalloc1(Y->rmap->N,&nnz);CHKERRQ(ierr); 2476 ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr); 2477 ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr); 2478 ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr); 2479 ierr = MatSetBlockSizesFromMats(B,Y,Y);CHKERRQ(ierr); 2480 ierr = MatSetType(B,(MatType) ((PetscObject)Y)->type_name);CHKERRQ(ierr); 2481 ierr = MatAXPYGetPreallocation_SeqBAIJ(Y,X,nnz);CHKERRQ(ierr); 2482 ierr = MatSeqBAIJSetPreallocation(B,bs,0,nnz);CHKERRQ(ierr); 2483 ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr); 2484 ierr = MatHeaderReplace(Y,&B);CHKERRQ(ierr); 2485 ierr = PetscFree(nnz);CHKERRQ(ierr); 2486 } 2487 PetscFunctionReturn(0); 2488 } 2489 2490 #undef __FUNCT__ 2491 #define __FUNCT__ "MatRealPart_SeqBAIJ" 2492 PetscErrorCode MatRealPart_SeqBAIJ(Mat A) 2493 { 2494 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2495 PetscInt i,nz = a->bs2*a->i[a->mbs]; 2496 MatScalar *aa = a->a; 2497 2498 PetscFunctionBegin; 2499 for (i=0; i<nz; i++) aa[i] = PetscRealPart(aa[i]); 2500 PetscFunctionReturn(0); 2501 } 2502 2503 #undef __FUNCT__ 2504 #define __FUNCT__ "MatImaginaryPart_SeqBAIJ" 2505 PetscErrorCode MatImaginaryPart_SeqBAIJ(Mat A) 2506 { 2507 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2508 PetscInt i,nz = a->bs2*a->i[a->mbs]; 2509 MatScalar *aa = a->a; 2510 2511 PetscFunctionBegin; 2512 for (i=0; i<nz; i++) aa[i] = PetscImaginaryPart(aa[i]); 2513 PetscFunctionReturn(0); 2514 } 2515 2516 #undef __FUNCT__ 2517 #define __FUNCT__ "MatGetColumnIJ_SeqBAIJ" 2518 /* 2519 Code almost idential to MatGetColumnIJ_SeqAIJ() should share common code 2520 */ 2521 PetscErrorCode MatGetColumnIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *nn,const PetscInt *ia[],const PetscInt *ja[],PetscBool *done) 2522 { 2523 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2524 PetscErrorCode ierr; 2525 PetscInt bs = A->rmap->bs,i,*collengths,*cia,*cja,n = A->cmap->n/bs,m = A->rmap->n/bs; 2526 PetscInt nz = a->i[m],row,*jj,mr,col; 2527 2528 PetscFunctionBegin; 2529 *nn = n; 2530 if (!ia) PetscFunctionReturn(0); 2531 if (symmetric) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not for BAIJ matrices"); 2532 else { 2533 ierr = PetscCalloc1(n+1,&collengths);CHKERRQ(ierr); 2534 ierr = PetscMalloc1(n+1,&cia);CHKERRQ(ierr); 2535 ierr = PetscMalloc1(nz+1,&cja);CHKERRQ(ierr); 2536 jj = a->j; 2537 for (i=0; i<nz; i++) { 2538 collengths[jj[i]]++; 2539 } 2540 cia[0] = oshift; 2541 for (i=0; i<n; i++) { 2542 cia[i+1] = cia[i] + collengths[i]; 2543 } 2544 ierr = PetscMemzero(collengths,n*sizeof(PetscInt));CHKERRQ(ierr); 2545 jj = a->j; 2546 for (row=0; row<m; row++) { 2547 mr = a->i[row+1] - a->i[row]; 2548 for (i=0; i<mr; i++) { 2549 col = *jj++; 2550 2551 cja[cia[col] + collengths[col]++ - oshift] = row + oshift; 2552 } 2553 } 2554 ierr = PetscFree(collengths);CHKERRQ(ierr); 2555 *ia = cia; *ja = cja; 2556 } 2557 PetscFunctionReturn(0); 2558 } 2559 2560 #undef __FUNCT__ 2561 #define __FUNCT__ "MatRestoreColumnIJ_SeqBAIJ" 2562 PetscErrorCode MatRestoreColumnIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *n,const PetscInt *ia[],const PetscInt *ja[],PetscBool *done) 2563 { 2564 PetscErrorCode ierr; 2565 2566 PetscFunctionBegin; 2567 if (!ia) PetscFunctionReturn(0); 2568 ierr = PetscFree(*ia);CHKERRQ(ierr); 2569 ierr = PetscFree(*ja);CHKERRQ(ierr); 2570 PetscFunctionReturn(0); 2571 } 2572 2573 /* 2574 MatGetColumnIJ_SeqBAIJ_Color() and MatRestoreColumnIJ_SeqBAIJ_Color() are customized from 2575 MatGetColumnIJ_SeqBAIJ() and MatRestoreColumnIJ_SeqBAIJ() by adding an output 2576 spidx[], index of a->a, to be used in MatTransposeColoringCreate() and MatFDColoringCreate() 2577 */ 2578 #undef __FUNCT__ 2579 #define __FUNCT__ "MatGetColumnIJ_SeqBAIJ_Color" 2580 PetscErrorCode MatGetColumnIJ_SeqBAIJ_Color(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *nn,const PetscInt *ia[],const PetscInt *ja[],PetscInt *spidx[],PetscBool *done) 2581 { 2582 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2583 PetscErrorCode ierr; 2584 PetscInt i,*collengths,*cia,*cja,n=a->nbs,m=a->mbs; 2585 PetscInt nz = a->i[m],row,*jj,mr,col; 2586 PetscInt *cspidx; 2587 2588 PetscFunctionBegin; 2589 *nn = n; 2590 if (!ia) PetscFunctionReturn(0); 2591 2592 ierr = PetscCalloc1(n+1,&collengths);CHKERRQ(ierr); 2593 ierr = PetscMalloc1(n+1,&cia);CHKERRQ(ierr); 2594 ierr = PetscMalloc1(nz+1,&cja);CHKERRQ(ierr); 2595 ierr = PetscMalloc1(nz+1,&cspidx);CHKERRQ(ierr); 2596 jj = a->j; 2597 for (i=0; i<nz; i++) { 2598 collengths[jj[i]]++; 2599 } 2600 cia[0] = oshift; 2601 for (i=0; i<n; i++) { 2602 cia[i+1] = cia[i] + collengths[i]; 2603 } 2604 ierr = PetscMemzero(collengths,n*sizeof(PetscInt));CHKERRQ(ierr); 2605 jj = a->j; 2606 for (row=0; row<m; row++) { 2607 mr = a->i[row+1] - a->i[row]; 2608 for (i=0; i<mr; i++) { 2609 col = *jj++; 2610 cspidx[cia[col] + collengths[col] - oshift] = a->i[row] + i; /* index of a->j */ 2611 cja[cia[col] + collengths[col]++ - oshift] = row + oshift; 2612 } 2613 } 2614 ierr = PetscFree(collengths);CHKERRQ(ierr); 2615 *ia = cia; *ja = cja; 2616 *spidx = cspidx; 2617 PetscFunctionReturn(0); 2618 } 2619 2620 #undef __FUNCT__ 2621 #define __FUNCT__ "MatRestoreColumnIJ_SeqBAIJ_Color" 2622 PetscErrorCode MatRestoreColumnIJ_SeqBAIJ_Color(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *n,const PetscInt *ia[],const PetscInt *ja[],PetscInt *spidx[],PetscBool *done) 2623 { 2624 PetscErrorCode ierr; 2625 2626 PetscFunctionBegin; 2627 ierr = MatRestoreColumnIJ_SeqBAIJ(A,oshift,symmetric,inodecompressed,n,ia,ja,done);CHKERRQ(ierr); 2628 ierr = PetscFree(*spidx);CHKERRQ(ierr); 2629 PetscFunctionReturn(0); 2630 } 2631 2632 #undef __FUNCT__ 2633 #define __FUNCT__ "MatShift_SeqBAIJ" 2634 PetscErrorCode MatShift_SeqBAIJ(Mat Y,PetscScalar a) 2635 { 2636 PetscErrorCode ierr; 2637 Mat_SeqBAIJ *aij = (Mat_SeqBAIJ*)Y->data; 2638 2639 PetscFunctionBegin; 2640 if (!Y->preallocated || !aij->nz) { 2641 ierr = MatSeqBAIJSetPreallocation(Y,Y->rmap->bs,1,NULL);CHKERRQ(ierr); 2642 } 2643 ierr = MatShift_Basic(Y,a);CHKERRQ(ierr); 2644 PetscFunctionReturn(0); 2645 } 2646 2647 /* -------------------------------------------------------------------*/ 2648 static struct _MatOps MatOps_Values = {MatSetValues_SeqBAIJ, 2649 MatGetRow_SeqBAIJ, 2650 MatRestoreRow_SeqBAIJ, 2651 MatMult_SeqBAIJ_N, 2652 /* 4*/ MatMultAdd_SeqBAIJ_N, 2653 MatMultTranspose_SeqBAIJ, 2654 MatMultTransposeAdd_SeqBAIJ, 2655 0, 2656 0, 2657 0, 2658 /* 10*/ 0, 2659 MatLUFactor_SeqBAIJ, 2660 0, 2661 0, 2662 MatTranspose_SeqBAIJ, 2663 /* 15*/ MatGetInfo_SeqBAIJ, 2664 MatEqual_SeqBAIJ, 2665 MatGetDiagonal_SeqBAIJ, 2666 MatDiagonalScale_SeqBAIJ, 2667 MatNorm_SeqBAIJ, 2668 /* 20*/ 0, 2669 MatAssemblyEnd_SeqBAIJ, 2670 MatSetOption_SeqBAIJ, 2671 MatZeroEntries_SeqBAIJ, 2672 /* 24*/ MatZeroRows_SeqBAIJ, 2673 0, 2674 0, 2675 0, 2676 0, 2677 /* 29*/ MatSetUp_SeqBAIJ, 2678 0, 2679 0, 2680 0, 2681 0, 2682 /* 34*/ MatDuplicate_SeqBAIJ, 2683 0, 2684 0, 2685 MatILUFactor_SeqBAIJ, 2686 0, 2687 /* 39*/ MatAXPY_SeqBAIJ, 2688 MatGetSubMatrices_SeqBAIJ, 2689 MatIncreaseOverlap_SeqBAIJ, 2690 MatGetValues_SeqBAIJ, 2691 MatCopy_SeqBAIJ, 2692 /* 44*/ 0, 2693 MatScale_SeqBAIJ, 2694 MatShift_SeqBAIJ, 2695 0, 2696 MatZeroRowsColumns_SeqBAIJ, 2697 /* 49*/ 0, 2698 MatGetRowIJ_SeqBAIJ, 2699 MatRestoreRowIJ_SeqBAIJ, 2700 MatGetColumnIJ_SeqBAIJ, 2701 MatRestoreColumnIJ_SeqBAIJ, 2702 /* 54*/ MatFDColoringCreate_SeqXAIJ, 2703 0, 2704 0, 2705 0, 2706 MatSetValuesBlocked_SeqBAIJ, 2707 /* 59*/ MatGetSubMatrix_SeqBAIJ, 2708 MatDestroy_SeqBAIJ, 2709 MatView_SeqBAIJ, 2710 0, 2711 0, 2712 /* 64*/ 0, 2713 0, 2714 0, 2715 0, 2716 0, 2717 /* 69*/ MatGetRowMaxAbs_SeqBAIJ, 2718 0, 2719 MatConvert_Basic, 2720 0, 2721 0, 2722 /* 74*/ 0, 2723 MatFDColoringApply_BAIJ, 2724 0, 2725 0, 2726 0, 2727 /* 79*/ 0, 2728 0, 2729 0, 2730 0, 2731 MatLoad_SeqBAIJ, 2732 /* 84*/ 0, 2733 0, 2734 0, 2735 0, 2736 0, 2737 /* 89*/ 0, 2738 0, 2739 0, 2740 0, 2741 0, 2742 /* 94*/ 0, 2743 0, 2744 0, 2745 0, 2746 0, 2747 /* 99*/ 0, 2748 0, 2749 0, 2750 0, 2751 0, 2752 /*104*/ 0, 2753 MatRealPart_SeqBAIJ, 2754 MatImaginaryPart_SeqBAIJ, 2755 0, 2756 0, 2757 /*109*/ 0, 2758 0, 2759 0, 2760 0, 2761 MatMissingDiagonal_SeqBAIJ, 2762 /*114*/ 0, 2763 0, 2764 0, 2765 0, 2766 0, 2767 /*119*/ 0, 2768 0, 2769 MatMultHermitianTranspose_SeqBAIJ, 2770 MatMultHermitianTransposeAdd_SeqBAIJ, 2771 0, 2772 /*124*/ 0, 2773 0, 2774 MatInvertBlockDiagonal_SeqBAIJ, 2775 0, 2776 0, 2777 /*129*/ 0, 2778 0, 2779 0, 2780 0, 2781 0, 2782 /*134*/ 0, 2783 0, 2784 0, 2785 0, 2786 0, 2787 /*139*/ 0, 2788 0, 2789 0, 2790 MatFDColoringSetUp_SeqXAIJ, 2791 0, 2792 /*144*/MatCreateMPIMatConcatenateSeqMat_SeqBAIJ 2793 }; 2794 2795 #undef __FUNCT__ 2796 #define __FUNCT__ "MatStoreValues_SeqBAIJ" 2797 PetscErrorCode MatStoreValues_SeqBAIJ(Mat mat) 2798 { 2799 Mat_SeqBAIJ *aij = (Mat_SeqBAIJ*)mat->data; 2800 PetscInt nz = aij->i[aij->mbs]*aij->bs2; 2801 PetscErrorCode ierr; 2802 2803 PetscFunctionBegin; 2804 if (aij->nonew != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first"); 2805 2806 /* allocate space for values if not already there */ 2807 if (!aij->saved_values) { 2808 ierr = PetscMalloc1(nz+1,&aij->saved_values);CHKERRQ(ierr); 2809 ierr = PetscLogObjectMemory((PetscObject)mat,(nz+1)*sizeof(PetscScalar));CHKERRQ(ierr); 2810 } 2811 2812 /* copy values over */ 2813 ierr = PetscMemcpy(aij->saved_values,aij->a,nz*sizeof(PetscScalar));CHKERRQ(ierr); 2814 PetscFunctionReturn(0); 2815 } 2816 2817 #undef __FUNCT__ 2818 #define __FUNCT__ "MatRetrieveValues_SeqBAIJ" 2819 PetscErrorCode MatRetrieveValues_SeqBAIJ(Mat mat) 2820 { 2821 Mat_SeqBAIJ *aij = (Mat_SeqBAIJ*)mat->data; 2822 PetscErrorCode ierr; 2823 PetscInt nz = aij->i[aij->mbs]*aij->bs2; 2824 2825 PetscFunctionBegin; 2826 if (aij->nonew != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first"); 2827 if (!aij->saved_values) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatStoreValues(A);first"); 2828 2829 /* copy values over */ 2830 ierr = PetscMemcpy(aij->a,aij->saved_values,nz*sizeof(PetscScalar));CHKERRQ(ierr); 2831 PetscFunctionReturn(0); 2832 } 2833 2834 PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqAIJ(Mat, MatType,MatReuse,Mat*); 2835 PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqSBAIJ(Mat, MatType,MatReuse,Mat*); 2836 2837 #undef __FUNCT__ 2838 #define __FUNCT__ "MatSeqBAIJSetPreallocation_SeqBAIJ" 2839 static PetscErrorCode MatSeqBAIJSetPreallocation_SeqBAIJ(Mat B,PetscInt bs,PetscInt nz,PetscInt *nnz) 2840 { 2841 Mat_SeqBAIJ *b; 2842 PetscErrorCode ierr; 2843 PetscInt i,mbs,nbs,bs2; 2844 PetscBool flg = PETSC_FALSE,skipallocation = PETSC_FALSE,realalloc = PETSC_FALSE; 2845 2846 PetscFunctionBegin; 2847 if (nz >= 0 || nnz) realalloc = PETSC_TRUE; 2848 if (nz == MAT_SKIP_ALLOCATION) { 2849 skipallocation = PETSC_TRUE; 2850 nz = 0; 2851 } 2852 2853 ierr = MatSetBlockSize(B,PetscAbs(bs));CHKERRQ(ierr); 2854 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 2855 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 2856 ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr); 2857 2858 B->preallocated = PETSC_TRUE; 2859 2860 mbs = B->rmap->n/bs; 2861 nbs = B->cmap->n/bs; 2862 bs2 = bs*bs; 2863 2864 if (mbs*bs!=B->rmap->n || nbs*bs!=B->cmap->n) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Number rows %D, cols %D must be divisible by blocksize %D",B->rmap->N,B->cmap->n,bs); 2865 2866 if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5; 2867 if (nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nz cannot be less than 0: value %D",nz); 2868 if (nnz) { 2869 for (i=0; i<mbs; i++) { 2870 if (nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be less than 0: local row %D value %D",i,nnz[i]); 2871 if (nnz[i] > nbs) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be greater than block row length: local row %D value %D rowlength %D",i,nnz[i],nbs); 2872 } 2873 } 2874 2875 b = (Mat_SeqBAIJ*)B->data; 2876 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)B),NULL,"Optimize options for SEQBAIJ matrix 2 ","Mat");CHKERRQ(ierr); 2877 ierr = PetscOptionsBool("-mat_no_unroll","Do not optimize for block size (slow)",NULL,flg,&flg,NULL);CHKERRQ(ierr); 2878 ierr = PetscOptionsEnd();CHKERRQ(ierr); 2879 2880 if (!flg) { 2881 switch (bs) { 2882 case 1: 2883 B->ops->mult = MatMult_SeqBAIJ_1; 2884 B->ops->multadd = MatMultAdd_SeqBAIJ_1; 2885 break; 2886 case 2: 2887 B->ops->mult = MatMult_SeqBAIJ_2; 2888 B->ops->multadd = MatMultAdd_SeqBAIJ_2; 2889 break; 2890 case 3: 2891 B->ops->mult = MatMult_SeqBAIJ_3; 2892 B->ops->multadd = MatMultAdd_SeqBAIJ_3; 2893 break; 2894 case 4: 2895 B->ops->mult = MatMult_SeqBAIJ_4; 2896 B->ops->multadd = MatMultAdd_SeqBAIJ_4; 2897 break; 2898 case 5: 2899 B->ops->mult = MatMult_SeqBAIJ_5; 2900 B->ops->multadd = MatMultAdd_SeqBAIJ_5; 2901 break; 2902 case 6: 2903 B->ops->mult = MatMult_SeqBAIJ_6; 2904 B->ops->multadd = MatMultAdd_SeqBAIJ_6; 2905 break; 2906 case 7: 2907 B->ops->mult = MatMult_SeqBAIJ_7; 2908 B->ops->multadd = MatMultAdd_SeqBAIJ_7; 2909 break; 2910 case 15: 2911 B->ops->mult = MatMult_SeqBAIJ_15_ver1; 2912 B->ops->multadd = MatMultAdd_SeqBAIJ_N; 2913 break; 2914 default: 2915 B->ops->mult = MatMult_SeqBAIJ_N; 2916 B->ops->multadd = MatMultAdd_SeqBAIJ_N; 2917 break; 2918 } 2919 } 2920 B->ops->sor = MatSOR_SeqBAIJ; 2921 b->mbs = mbs; 2922 b->nbs = nbs; 2923 if (!skipallocation) { 2924 if (!b->imax) { 2925 ierr = PetscMalloc2(mbs,&b->imax,mbs,&b->ilen);CHKERRQ(ierr); 2926 ierr = PetscLogObjectMemory((PetscObject)B,2*mbs*sizeof(PetscInt));CHKERRQ(ierr); 2927 2928 b->free_imax_ilen = PETSC_TRUE; 2929 } 2930 /* b->ilen will count nonzeros in each block row so far. */ 2931 for (i=0; i<mbs; i++) b->ilen[i] = 0; 2932 if (!nnz) { 2933 if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5; 2934 else if (nz < 0) nz = 1; 2935 for (i=0; i<mbs; i++) b->imax[i] = nz; 2936 nz = nz*mbs; 2937 } else { 2938 nz = 0; 2939 for (i=0; i<mbs; i++) {b->imax[i] = nnz[i]; nz += nnz[i];} 2940 } 2941 2942 /* allocate the matrix space */ 2943 ierr = MatSeqXAIJFreeAIJ(B,&b->a,&b->j,&b->i);CHKERRQ(ierr); 2944 ierr = PetscMalloc3(bs2*nz,&b->a,nz,&b->j,B->rmap->N+1,&b->i);CHKERRQ(ierr); 2945 ierr = PetscLogObjectMemory((PetscObject)B,(B->rmap->N+1)*sizeof(PetscInt)+nz*(bs2*sizeof(PetscScalar)+sizeof(PetscInt)));CHKERRQ(ierr); 2946 ierr = PetscMemzero(b->a,nz*bs2*sizeof(MatScalar));CHKERRQ(ierr); 2947 ierr = PetscMemzero(b->j,nz*sizeof(PetscInt));CHKERRQ(ierr); 2948 2949 b->singlemalloc = PETSC_TRUE; 2950 b->i[0] = 0; 2951 for (i=1; i<mbs+1; i++) { 2952 b->i[i] = b->i[i-1] + b->imax[i-1]; 2953 } 2954 b->free_a = PETSC_TRUE; 2955 b->free_ij = PETSC_TRUE; 2956 } else { 2957 b->free_a = PETSC_FALSE; 2958 b->free_ij = PETSC_FALSE; 2959 } 2960 2961 b->bs2 = bs2; 2962 b->mbs = mbs; 2963 b->nz = 0; 2964 b->maxnz = nz; 2965 B->info.nz_unneeded = (PetscReal)b->maxnz*bs2; 2966 B->was_assembled = PETSC_FALSE; 2967 B->assembled = PETSC_FALSE; 2968 if (realalloc) {ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);} 2969 PetscFunctionReturn(0); 2970 } 2971 2972 #undef __FUNCT__ 2973 #define __FUNCT__ "MatSeqBAIJSetPreallocationCSR_SeqBAIJ" 2974 PetscErrorCode MatSeqBAIJSetPreallocationCSR_SeqBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[]) 2975 { 2976 PetscInt i,m,nz,nz_max=0,*nnz; 2977 PetscScalar *values=0; 2978 PetscBool roworiented = ((Mat_SeqBAIJ*)B->data)->roworiented; 2979 PetscErrorCode ierr; 2980 2981 PetscFunctionBegin; 2982 if (bs < 1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive but it is %D",bs); 2983 ierr = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr); 2984 ierr = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr); 2985 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 2986 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 2987 ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr); 2988 m = B->rmap->n/bs; 2989 2990 if (ii[0] != 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE, "ii[0] must be 0 but it is %D",ii[0]); 2991 ierr = PetscMalloc1(m+1, &nnz);CHKERRQ(ierr); 2992 for (i=0; i<m; i++) { 2993 nz = ii[i+1]- ii[i]; 2994 if (nz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE, "Local row %D has a negative number of columns %D",i,nz); 2995 nz_max = PetscMax(nz_max, nz); 2996 nnz[i] = nz; 2997 } 2998 ierr = MatSeqBAIJSetPreallocation(B,bs,0,nnz);CHKERRQ(ierr); 2999 ierr = PetscFree(nnz);CHKERRQ(ierr); 3000 3001 values = (PetscScalar*)V; 3002 if (!values) { 3003 ierr = PetscCalloc1(bs*bs*(nz_max+1),&values);CHKERRQ(ierr); 3004 } 3005 for (i=0; i<m; i++) { 3006 PetscInt ncols = ii[i+1] - ii[i]; 3007 const PetscInt *icols = jj + ii[i]; 3008 const PetscScalar *svals = values + (V ? (bs*bs*ii[i]) : 0); 3009 if (!roworiented) { 3010 ierr = MatSetValuesBlocked_SeqBAIJ(B,1,&i,ncols,icols,svals,INSERT_VALUES);CHKERRQ(ierr); 3011 } else { 3012 PetscInt j; 3013 for (j=0; j<ncols; j++) { 3014 const PetscScalar *svals = values + (V ? (bs*bs*(ii[i]+j)) : 0); 3015 ierr = MatSetValuesBlocked_SeqBAIJ(B,1,&i,1,&icols[j],svals,INSERT_VALUES);CHKERRQ(ierr); 3016 } 3017 } 3018 } 3019 if (!V) { ierr = PetscFree(values);CHKERRQ(ierr); } 3020 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3021 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3022 ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 3023 PetscFunctionReturn(0); 3024 } 3025 3026 /*MC 3027 MATSEQBAIJ - MATSEQBAIJ = "seqbaij" - A matrix type to be used for sequential block sparse matrices, based on 3028 block sparse compressed row format. 3029 3030 Options Database Keys: 3031 . -mat_type seqbaij - sets the matrix type to "seqbaij" during a call to MatSetFromOptions() 3032 3033 Level: beginner 3034 3035 .seealso: MatCreateSeqBAIJ() 3036 M*/ 3037 3038 PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqBSTRM(Mat, MatType,MatReuse,Mat*); 3039 3040 #undef __FUNCT__ 3041 #define __FUNCT__ "MatCreate_SeqBAIJ" 3042 PETSC_EXTERN PetscErrorCode MatCreate_SeqBAIJ(Mat B) 3043 { 3044 PetscErrorCode ierr; 3045 PetscMPIInt size; 3046 Mat_SeqBAIJ *b; 3047 3048 PetscFunctionBegin; 3049 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRQ(ierr); 3050 if (size > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Comm must be of size 1"); 3051 3052 ierr = PetscNewLog(B,&b);CHKERRQ(ierr); 3053 B->data = (void*)b; 3054 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 3055 3056 b->row = 0; 3057 b->col = 0; 3058 b->icol = 0; 3059 b->reallocs = 0; 3060 b->saved_values = 0; 3061 3062 b->roworiented = PETSC_TRUE; 3063 b->nonew = 0; 3064 b->diag = 0; 3065 B->spptr = 0; 3066 B->info.nz_unneeded = (PetscReal)b->maxnz*b->bs2; 3067 b->keepnonzeropattern = PETSC_FALSE; 3068 3069 ierr = PetscObjectComposeFunction((PetscObject)B,"MatInvertBlockDiagonal_C",MatInvertBlockDiagonal_SeqBAIJ);CHKERRQ(ierr); 3070 ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_SeqBAIJ);CHKERRQ(ierr); 3071 ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_SeqBAIJ);CHKERRQ(ierr); 3072 ierr = PetscObjectComposeFunction((PetscObject)B,"MatSeqBAIJSetColumnIndices_C",MatSeqBAIJSetColumnIndices_SeqBAIJ);CHKERRQ(ierr); 3073 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqbaij_seqaij_C",MatConvert_SeqBAIJ_SeqAIJ);CHKERRQ(ierr); 3074 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqbaij_seqsbaij_C",MatConvert_SeqBAIJ_SeqSBAIJ);CHKERRQ(ierr); 3075 ierr = PetscObjectComposeFunction((PetscObject)B,"MatSeqBAIJSetPreallocation_C",MatSeqBAIJSetPreallocation_SeqBAIJ);CHKERRQ(ierr); 3076 ierr = PetscObjectComposeFunction((PetscObject)B,"MatSeqBAIJSetPreallocationCSR_C",MatSeqBAIJSetPreallocationCSR_SeqBAIJ);CHKERRQ(ierr); 3077 ierr = PetscObjectComposeFunction((PetscObject)B,"MatIsTranspose_C",MatIsTranspose_SeqBAIJ);CHKERRQ(ierr); 3078 #if defined(PETSC_HAVE_HYPRE) 3079 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_baij_hypre_C",MatConvert_AIJ_HYPRE);CHKERRQ(ierr); 3080 #endif 3081 ierr = PetscObjectChangeTypeName((PetscObject)B,MATSEQBAIJ);CHKERRQ(ierr); 3082 PetscFunctionReturn(0); 3083 } 3084 3085 #undef __FUNCT__ 3086 #define __FUNCT__ "MatDuplicateNoCreate_SeqBAIJ" 3087 PetscErrorCode MatDuplicateNoCreate_SeqBAIJ(Mat C,Mat A,MatDuplicateOption cpvalues,PetscBool mallocmatspace) 3088 { 3089 Mat_SeqBAIJ *c = (Mat_SeqBAIJ*)C->data,*a = (Mat_SeqBAIJ*)A->data; 3090 PetscErrorCode ierr; 3091 PetscInt i,mbs = a->mbs,nz = a->nz,bs2 = a->bs2; 3092 3093 PetscFunctionBegin; 3094 if (a->i[mbs] != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Corrupt matrix"); 3095 3096 if (cpvalues == MAT_SHARE_NONZERO_PATTERN) { 3097 c->imax = a->imax; 3098 c->ilen = a->ilen; 3099 c->free_imax_ilen = PETSC_FALSE; 3100 } else { 3101 ierr = PetscMalloc2(mbs,&c->imax,mbs,&c->ilen);CHKERRQ(ierr); 3102 ierr = PetscLogObjectMemory((PetscObject)C,2*mbs*sizeof(PetscInt));CHKERRQ(ierr); 3103 for (i=0; i<mbs; i++) { 3104 c->imax[i] = a->imax[i]; 3105 c->ilen[i] = a->ilen[i]; 3106 } 3107 c->free_imax_ilen = PETSC_TRUE; 3108 } 3109 3110 /* allocate the matrix space */ 3111 if (mallocmatspace) { 3112 if (cpvalues == MAT_SHARE_NONZERO_PATTERN) { 3113 ierr = PetscCalloc1(bs2*nz,&c->a);CHKERRQ(ierr); 3114 ierr = PetscLogObjectMemory((PetscObject)C,a->i[mbs]*bs2*sizeof(PetscScalar));CHKERRQ(ierr); 3115 3116 c->i = a->i; 3117 c->j = a->j; 3118 c->singlemalloc = PETSC_FALSE; 3119 c->free_a = PETSC_TRUE; 3120 c->free_ij = PETSC_FALSE; 3121 c->parent = A; 3122 C->preallocated = PETSC_TRUE; 3123 C->assembled = PETSC_TRUE; 3124 3125 ierr = PetscObjectReference((PetscObject)A);CHKERRQ(ierr); 3126 ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 3127 ierr = MatSetOption(C,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 3128 } else { 3129 ierr = PetscMalloc3(bs2*nz,&c->a,nz,&c->j,mbs+1,&c->i);CHKERRQ(ierr); 3130 ierr = PetscLogObjectMemory((PetscObject)C,a->i[mbs]*(bs2*sizeof(PetscScalar)+sizeof(PetscInt))+(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 3131 3132 c->singlemalloc = PETSC_TRUE; 3133 c->free_a = PETSC_TRUE; 3134 c->free_ij = PETSC_TRUE; 3135 3136 ierr = PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 3137 if (mbs > 0) { 3138 ierr = PetscMemcpy(c->j,a->j,nz*sizeof(PetscInt));CHKERRQ(ierr); 3139 if (cpvalues == MAT_COPY_VALUES) { 3140 ierr = PetscMemcpy(c->a,a->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr); 3141 } else { 3142 ierr = PetscMemzero(c->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr); 3143 } 3144 } 3145 C->preallocated = PETSC_TRUE; 3146 C->assembled = PETSC_TRUE; 3147 } 3148 } 3149 3150 c->roworiented = a->roworiented; 3151 c->nonew = a->nonew; 3152 3153 ierr = PetscLayoutReference(A->rmap,&C->rmap);CHKERRQ(ierr); 3154 ierr = PetscLayoutReference(A->cmap,&C->cmap);CHKERRQ(ierr); 3155 3156 c->bs2 = a->bs2; 3157 c->mbs = a->mbs; 3158 c->nbs = a->nbs; 3159 3160 if (a->diag) { 3161 if (cpvalues == MAT_SHARE_NONZERO_PATTERN) { 3162 c->diag = a->diag; 3163 c->free_diag = PETSC_FALSE; 3164 } else { 3165 ierr = PetscMalloc1(mbs+1,&c->diag);CHKERRQ(ierr); 3166 ierr = PetscLogObjectMemory((PetscObject)C,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 3167 for (i=0; i<mbs; i++) c->diag[i] = a->diag[i]; 3168 c->free_diag = PETSC_TRUE; 3169 } 3170 } else c->diag = 0; 3171 3172 c->nz = a->nz; 3173 c->maxnz = a->nz; /* Since we allocate exactly the right amount */ 3174 c->solve_work = NULL; 3175 c->mult_work = NULL; 3176 c->sor_workt = NULL; 3177 c->sor_work = NULL; 3178 3179 c->compressedrow.use = a->compressedrow.use; 3180 c->compressedrow.nrows = a->compressedrow.nrows; 3181 if (a->compressedrow.use) { 3182 i = a->compressedrow.nrows; 3183 ierr = PetscMalloc2(i+1,&c->compressedrow.i,i+1,&c->compressedrow.rindex);CHKERRQ(ierr); 3184 ierr = PetscLogObjectMemory((PetscObject)C,(2*i+1)*sizeof(PetscInt));CHKERRQ(ierr); 3185 ierr = PetscMemcpy(c->compressedrow.i,a->compressedrow.i,(i+1)*sizeof(PetscInt));CHKERRQ(ierr); 3186 ierr = PetscMemcpy(c->compressedrow.rindex,a->compressedrow.rindex,i*sizeof(PetscInt));CHKERRQ(ierr); 3187 } else { 3188 c->compressedrow.use = PETSC_FALSE; 3189 c->compressedrow.i = NULL; 3190 c->compressedrow.rindex = NULL; 3191 } 3192 C->nonzerostate = A->nonzerostate; 3193 3194 ierr = PetscFunctionListDuplicate(((PetscObject)A)->qlist,&((PetscObject)C)->qlist);CHKERRQ(ierr); 3195 ierr = PetscMemcpy(C->ops,A->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 3196 PetscFunctionReturn(0); 3197 } 3198 3199 #undef __FUNCT__ 3200 #define __FUNCT__ "MatDuplicate_SeqBAIJ" 3201 PetscErrorCode MatDuplicate_SeqBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B) 3202 { 3203 PetscErrorCode ierr; 3204 3205 PetscFunctionBegin; 3206 ierr = MatCreate(PetscObjectComm((PetscObject)A),B);CHKERRQ(ierr); 3207 ierr = MatSetSizes(*B,A->rmap->N,A->cmap->n,A->rmap->N,A->cmap->n);CHKERRQ(ierr); 3208 ierr = MatSetType(*B,MATSEQBAIJ);CHKERRQ(ierr); 3209 ierr = MatDuplicateNoCreate_SeqBAIJ(*B,A,cpvalues,PETSC_TRUE);CHKERRQ(ierr); 3210 PetscFunctionReturn(0); 3211 } 3212 3213 #undef __FUNCT__ 3214 #define __FUNCT__ "MatLoad_SeqBAIJ" 3215 PetscErrorCode MatLoad_SeqBAIJ(Mat newmat,PetscViewer viewer) 3216 { 3217 Mat_SeqBAIJ *a; 3218 PetscErrorCode ierr; 3219 PetscInt i,nz,header[4],*rowlengths=0,M,N,bs = newmat->rmap->bs; 3220 PetscInt *mask,mbs,*jj,j,rowcount,nzcount,k,*browlengths,maskcount; 3221 PetscInt kmax,jcount,block,idx,point,nzcountb,extra_rows,rows,cols; 3222 PetscInt *masked,nmask,tmp,bs2,ishift; 3223 PetscMPIInt size; 3224 int fd; 3225 PetscScalar *aa; 3226 MPI_Comm comm; 3227 3228 PetscFunctionBegin; 3229 /* force binary viewer to load .info file if it has not yet done so */ 3230 ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr); 3231 ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr); 3232 ierr = PetscOptionsBegin(comm,NULL,"Options for loading SEQBAIJ matrix","Mat");CHKERRQ(ierr); 3233 ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,NULL);CHKERRQ(ierr); 3234 ierr = PetscOptionsEnd();CHKERRQ(ierr); 3235 if (bs < 0) bs = 1; 3236 bs2 = bs*bs; 3237 3238 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3239 if (size > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"view must have one processor"); 3240 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 3241 ierr = PetscBinaryRead(fd,header,4,PETSC_INT);CHKERRQ(ierr); 3242 if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not Mat object"); 3243 M = header[1]; N = header[2]; nz = header[3]; 3244 3245 if (header[3] < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as SeqBAIJ"); 3246 if (M != N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Can only do square matrices"); 3247 3248 /* 3249 This code adds extra rows to make sure the number of rows is 3250 divisible by the blocksize 3251 */ 3252 mbs = M/bs; 3253 extra_rows = bs - M + bs*(mbs); 3254 if (extra_rows == bs) extra_rows = 0; 3255 else mbs++; 3256 if (extra_rows) { 3257 ierr = PetscInfo(viewer,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr); 3258 } 3259 3260 /* Set global sizes if not already set */ 3261 if (newmat->rmap->n < 0 && newmat->rmap->N < 0 && newmat->cmap->n < 0 && newmat->cmap->N < 0) { 3262 ierr = MatSetSizes(newmat,PETSC_DECIDE,PETSC_DECIDE,M+extra_rows,N+extra_rows);CHKERRQ(ierr); 3263 } else { /* Check if the matrix global sizes are correct */ 3264 ierr = MatGetSize(newmat,&rows,&cols);CHKERRQ(ierr); 3265 if (rows < 0 && cols < 0) { /* user might provide local size instead of global size */ 3266 ierr = MatGetLocalSize(newmat,&rows,&cols);CHKERRQ(ierr); 3267 } 3268 if (M != rows || N != cols) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix in file of different length (%d, %d) than the input matrix (%d, %d)",M,N,rows,cols); 3269 } 3270 3271 /* read in row lengths */ 3272 ierr = PetscMalloc1(M+extra_rows,&rowlengths);CHKERRQ(ierr); 3273 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 3274 for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1; 3275 3276 /* read in column indices */ 3277 ierr = PetscMalloc1(nz+extra_rows,&jj);CHKERRQ(ierr); 3278 ierr = PetscBinaryRead(fd,jj,nz,PETSC_INT);CHKERRQ(ierr); 3279 for (i=0; i<extra_rows; i++) jj[nz+i] = M+i; 3280 3281 /* loop over row lengths determining block row lengths */ 3282 ierr = PetscCalloc1(mbs,&browlengths);CHKERRQ(ierr); 3283 ierr = PetscMalloc2(mbs,&mask,mbs,&masked);CHKERRQ(ierr); 3284 ierr = PetscMemzero(mask,mbs*sizeof(PetscInt));CHKERRQ(ierr); 3285 rowcount = 0; 3286 nzcount = 0; 3287 for (i=0; i<mbs; i++) { 3288 nmask = 0; 3289 for (j=0; j<bs; j++) { 3290 kmax = rowlengths[rowcount]; 3291 for (k=0; k<kmax; k++) { 3292 tmp = jj[nzcount++]/bs; 3293 if (!mask[tmp]) {masked[nmask++] = tmp; mask[tmp] = 1;} 3294 } 3295 rowcount++; 3296 } 3297 browlengths[i] += nmask; 3298 /* zero out the mask elements we set */ 3299 for (j=0; j<nmask; j++) mask[masked[j]] = 0; 3300 } 3301 3302 /* Do preallocation */ 3303 ierr = MatSeqBAIJSetPreallocation(newmat,bs,0,browlengths);CHKERRQ(ierr); 3304 a = (Mat_SeqBAIJ*)newmat->data; 3305 3306 /* set matrix "i" values */ 3307 a->i[0] = 0; 3308 for (i=1; i<= mbs; i++) { 3309 a->i[i] = a->i[i-1] + browlengths[i-1]; 3310 a->ilen[i-1] = browlengths[i-1]; 3311 } 3312 a->nz = 0; 3313 for (i=0; i<mbs; i++) a->nz += browlengths[i]; 3314 3315 /* read in nonzero values */ 3316 ierr = PetscMalloc1(nz+extra_rows,&aa);CHKERRQ(ierr); 3317 ierr = PetscBinaryRead(fd,aa,nz,PETSC_SCALAR);CHKERRQ(ierr); 3318 for (i=0; i<extra_rows; i++) aa[nz+i] = 1.0; 3319 3320 /* set "a" and "j" values into matrix */ 3321 nzcount = 0; jcount = 0; 3322 for (i=0; i<mbs; i++) { 3323 nzcountb = nzcount; 3324 nmask = 0; 3325 for (j=0; j<bs; j++) { 3326 kmax = rowlengths[i*bs+j]; 3327 for (k=0; k<kmax; k++) { 3328 tmp = jj[nzcount++]/bs; 3329 if (!mask[tmp]) { masked[nmask++] = tmp; mask[tmp] = 1;} 3330 } 3331 } 3332 /* sort the masked values */ 3333 ierr = PetscSortInt(nmask,masked);CHKERRQ(ierr); 3334 3335 /* set "j" values into matrix */ 3336 maskcount = 1; 3337 for (j=0; j<nmask; j++) { 3338 a->j[jcount++] = masked[j]; 3339 mask[masked[j]] = maskcount++; 3340 } 3341 /* set "a" values into matrix */ 3342 ishift = bs2*a->i[i]; 3343 for (j=0; j<bs; j++) { 3344 kmax = rowlengths[i*bs+j]; 3345 for (k=0; k<kmax; k++) { 3346 tmp = jj[nzcountb]/bs; 3347 block = mask[tmp] - 1; 3348 point = jj[nzcountb] - bs*tmp; 3349 idx = ishift + bs2*block + j + bs*point; 3350 a->a[idx] = (MatScalar)aa[nzcountb++]; 3351 } 3352 } 3353 /* zero out the mask elements we set */ 3354 for (j=0; j<nmask; j++) mask[masked[j]] = 0; 3355 } 3356 if (jcount != a->nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Bad binary matrix"); 3357 3358 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 3359 ierr = PetscFree(browlengths);CHKERRQ(ierr); 3360 ierr = PetscFree(aa);CHKERRQ(ierr); 3361 ierr = PetscFree(jj);CHKERRQ(ierr); 3362 ierr = PetscFree2(mask,masked);CHKERRQ(ierr); 3363 3364 ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3365 ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3366 PetscFunctionReturn(0); 3367 } 3368 3369 #undef __FUNCT__ 3370 #define __FUNCT__ "MatCreateSeqBAIJ" 3371 /*@C 3372 MatCreateSeqBAIJ - Creates a sparse matrix in block AIJ (block 3373 compressed row) format. For good matrix assembly performance the 3374 user should preallocate the matrix storage by setting the parameter nz 3375 (or the array nnz). By setting these parameters accurately, performance 3376 during matrix assembly can be increased by more than a factor of 50. 3377 3378 Collective on MPI_Comm 3379 3380 Input Parameters: 3381 + comm - MPI communicator, set to PETSC_COMM_SELF 3382 . bs - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row 3383 blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs() 3384 . m - number of rows 3385 . n - number of columns 3386 . nz - number of nonzero blocks per block row (same for all rows) 3387 - nnz - array containing the number of nonzero blocks in the various block rows 3388 (possibly different for each block row) or NULL 3389 3390 Output Parameter: 3391 . A - the matrix 3392 3393 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 3394 MatXXXXSetPreallocation() paradgm instead of this routine directly. 3395 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 3396 3397 Options Database Keys: 3398 . -mat_no_unroll - uses code that does not unroll the loops in the 3399 block calculations (much slower) 3400 . -mat_block_size - size of the blocks to use 3401 3402 Level: intermediate 3403 3404 Notes: 3405 The number of rows and columns must be divisible by blocksize. 3406 3407 If the nnz parameter is given then the nz parameter is ignored 3408 3409 A nonzero block is any block that as 1 or more nonzeros in it 3410 3411 The block AIJ format is fully compatible with standard Fortran 77 3412 storage. That is, the stored row and column indices can begin at 3413 either one (as in Fortran) or zero. See the users' manual for details. 3414 3415 Specify the preallocated storage with either nz or nnz (not both). 3416 Set nz=PETSC_DEFAULT and nnz=NULL for PETSc to control dynamic memory 3417 allocation. See Users-Manual: ch_mat for details. 3418 matrices. 3419 3420 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateBAIJ() 3421 @*/ 3422 PetscErrorCode MatCreateSeqBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A) 3423 { 3424 PetscErrorCode ierr; 3425 3426 PetscFunctionBegin; 3427 ierr = MatCreate(comm,A);CHKERRQ(ierr); 3428 ierr = MatSetSizes(*A,m,n,m,n);CHKERRQ(ierr); 3429 ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr); 3430 ierr = MatSeqBAIJSetPreallocation(*A,bs,nz,(PetscInt*)nnz);CHKERRQ(ierr); 3431 PetscFunctionReturn(0); 3432 } 3433 3434 #undef __FUNCT__ 3435 #define __FUNCT__ "MatSeqBAIJSetPreallocation" 3436 /*@C 3437 MatSeqBAIJSetPreallocation - Sets the block size and expected nonzeros 3438 per row in the matrix. For good matrix assembly performance the 3439 user should preallocate the matrix storage by setting the parameter nz 3440 (or the array nnz). By setting these parameters accurately, performance 3441 during matrix assembly can be increased by more than a factor of 50. 3442 3443 Collective on MPI_Comm 3444 3445 Input Parameters: 3446 + B - the matrix 3447 . bs - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row 3448 blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs() 3449 . nz - number of block nonzeros per block row (same for all rows) 3450 - nnz - array containing the number of block nonzeros in the various block rows 3451 (possibly different for each block row) or NULL 3452 3453 Options Database Keys: 3454 . -mat_no_unroll - uses code that does not unroll the loops in the 3455 block calculations (much slower) 3456 . -mat_block_size - size of the blocks to use 3457 3458 Level: intermediate 3459 3460 Notes: 3461 If the nnz parameter is given then the nz parameter is ignored 3462 3463 You can call MatGetInfo() to get information on how effective the preallocation was; 3464 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 3465 You can also run with the option -info and look for messages with the string 3466 malloc in them to see if additional memory allocation was needed. 3467 3468 The block AIJ format is fully compatible with standard Fortran 77 3469 storage. That is, the stored row and column indices can begin at 3470 either one (as in Fortran) or zero. See the users' manual for details. 3471 3472 Specify the preallocated storage with either nz or nnz (not both). 3473 Set nz=PETSC_DEFAULT and nnz=NULL for PETSc to control dynamic memory 3474 allocation. See Users-Manual: ch_mat for details. 3475 3476 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateBAIJ(), MatGetInfo() 3477 @*/ 3478 PetscErrorCode MatSeqBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt nz,const PetscInt nnz[]) 3479 { 3480 PetscErrorCode ierr; 3481 3482 PetscFunctionBegin; 3483 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 3484 PetscValidType(B,1); 3485 PetscValidLogicalCollectiveInt(B,bs,2); 3486 ierr = PetscTryMethod(B,"MatSeqBAIJSetPreallocation_C",(Mat,PetscInt,PetscInt,const PetscInt[]),(B,bs,nz,nnz));CHKERRQ(ierr); 3487 PetscFunctionReturn(0); 3488 } 3489 3490 #undef __FUNCT__ 3491 #define __FUNCT__ "MatSeqBAIJSetPreallocationCSR" 3492 /*@C 3493 MatSeqBAIJSetPreallocationCSR - Allocates memory for a sparse sequential matrix in AIJ format 3494 (the default sequential PETSc format). 3495 3496 Collective on MPI_Comm 3497 3498 Input Parameters: 3499 + B - the matrix 3500 . i - the indices into j for the start of each local row (starts with zero) 3501 . j - the column indices for each local row (starts with zero) these must be sorted for each row 3502 - v - optional values in the matrix 3503 3504 Level: developer 3505 3506 Notes: 3507 The order of the entries in values is specified by the MatOption MAT_ROW_ORIENTED. For example, C programs 3508 may want to use the default MAT_ROW_ORIENTED=PETSC_TRUE and use an array v[nnz][bs][bs] where the second index is 3509 over rows within a block and the last index is over columns within a block row. Fortran programs will likely set 3510 MAT_ROW_ORIENTED=PETSC_FALSE and use a Fortran array v(bs,bs,nnz) in which the first index is over rows within a 3511 block column and the second index is over columns within a block. 3512 3513 .keywords: matrix, aij, compressed row, sparse 3514 3515 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatSeqBAIJSetPreallocation(), MATSEQBAIJ 3516 @*/ 3517 PetscErrorCode MatSeqBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 3518 { 3519 PetscErrorCode ierr; 3520 3521 PetscFunctionBegin; 3522 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 3523 PetscValidType(B,1); 3524 PetscValidLogicalCollectiveInt(B,bs,2); 3525 ierr = PetscTryMethod(B,"MatSeqBAIJSetPreallocationCSR_C",(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,bs,i,j,v));CHKERRQ(ierr); 3526 PetscFunctionReturn(0); 3527 } 3528 3529 3530 #undef __FUNCT__ 3531 #define __FUNCT__ "MatCreateSeqBAIJWithArrays" 3532 /*@ 3533 MatCreateSeqBAIJWithArrays - Creates an sequential BAIJ matrix using matrix elements provided by the user. 3534 3535 Collective on MPI_Comm 3536 3537 Input Parameters: 3538 + comm - must be an MPI communicator of size 1 3539 . bs - size of block 3540 . m - number of rows 3541 . n - number of columns 3542 . i - row indices 3543 . j - column indices 3544 - a - matrix values 3545 3546 Output Parameter: 3547 . mat - the matrix 3548 3549 Level: advanced 3550 3551 Notes: 3552 The i, j, and a arrays are not copied by this routine, the user must free these arrays 3553 once the matrix is destroyed 3554 3555 You cannot set new nonzero locations into this matrix, that will generate an error. 3556 3557 The i and j indices are 0 based 3558 3559 When block size is greater than 1 the matrix values must be stored using the BAIJ storage format (see the BAIJ code to determine this). 3560 3561 The order of the entries in values is the same as the block compressed sparse row storage format; that is, it is 3562 the same as a three dimensional array in Fortran values(bs,bs,nnz) that contains the first column of the first 3563 block, followed by the second column of the first block etc etc. That is, the blocks are contiguous in memory 3564 with column-major ordering within blocks. 3565 3566 .seealso: MatCreate(), MatCreateBAIJ(), MatCreateSeqBAIJ() 3567 3568 @*/ 3569 PetscErrorCode MatCreateSeqBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt *i,PetscInt *j,PetscScalar *a,Mat *mat) 3570 { 3571 PetscErrorCode ierr; 3572 PetscInt ii; 3573 Mat_SeqBAIJ *baij; 3574 3575 PetscFunctionBegin; 3576 if (bs != 1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"block size %D > 1 is not supported yet",bs); 3577 if (m > 0 && i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 3578 3579 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 3580 ierr = MatSetSizes(*mat,m,n,m,n);CHKERRQ(ierr); 3581 ierr = MatSetType(*mat,MATSEQBAIJ);CHKERRQ(ierr); 3582 ierr = MatSeqBAIJSetPreallocation(*mat,bs,MAT_SKIP_ALLOCATION,0);CHKERRQ(ierr); 3583 baij = (Mat_SeqBAIJ*)(*mat)->data; 3584 ierr = PetscMalloc2(m,&baij->imax,m,&baij->ilen);CHKERRQ(ierr); 3585 ierr = PetscLogObjectMemory((PetscObject)*mat,2*m*sizeof(PetscInt));CHKERRQ(ierr); 3586 3587 baij->i = i; 3588 baij->j = j; 3589 baij->a = a; 3590 3591 baij->singlemalloc = PETSC_FALSE; 3592 baij->nonew = -1; /*this indicates that inserting a new value in the matrix that generates a new nonzero is an error*/ 3593 baij->free_a = PETSC_FALSE; 3594 baij->free_ij = PETSC_FALSE; 3595 3596 for (ii=0; ii<m; ii++) { 3597 baij->ilen[ii] = baij->imax[ii] = i[ii+1] - i[ii]; 3598 #if defined(PETSC_USE_DEBUG) 3599 if (i[ii+1] - i[ii] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row length in i (row indices) row = %d length = %d",ii,i[ii+1] - i[ii]); 3600 #endif 3601 } 3602 #if defined(PETSC_USE_DEBUG) 3603 for (ii=0; ii<baij->i[m]; ii++) { 3604 if (j[ii] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column index at location = %d index = %d",ii,j[ii]); 3605 if (j[ii] > n - 1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column index to large at location = %d index = %d",ii,j[ii]); 3606 } 3607 #endif 3608 3609 ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3610 ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3611 PetscFunctionReturn(0); 3612 } 3613 3614 #undef __FUNCT__ 3615 #define __FUNCT__ "MatCreateMPIMatConcatenateSeqMat_SeqBAIJ" 3616 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_SeqBAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat) 3617 { 3618 PetscErrorCode ierr; 3619 3620 PetscFunctionBegin; 3621 ierr = MatCreateMPIMatConcatenateSeqMat_MPIBAIJ(comm,inmat,n,scall,outmat);CHKERRQ(ierr); 3622 PetscFunctionReturn(0); 3623 } 3624