1 2 /* 3 Defines the basic matrix operations for the BAIJ (compressed row) 4 matrix storage format. 5 */ 6 #include <../src/mat/impls/baij/seq/baij.h> /*I "petscmat.h" I*/ 7 #include <petscblaslapack.h> 8 #include <petsc/private/kernels/blockinvert.h> 9 #include <petsc/private/kernels/blockmatmult.h> 10 11 #undef __FUNCT__ 12 #define __FUNCT__ "MatInvertBlockDiagonal_SeqBAIJ" 13 PetscErrorCode MatInvertBlockDiagonal_SeqBAIJ(Mat A,const PetscScalar **values) 14 { 15 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*) A->data; 16 PetscErrorCode ierr; 17 PetscInt *diag_offset,i,bs = A->rmap->bs,mbs = a->mbs,ipvt[5],bs2 = bs*bs,*v_pivots; 18 MatScalar *v = a->a,*odiag,*diag,*mdiag,work[25],*v_work; 19 PetscReal shift = 0.0; 20 PetscBool allowzeropivot,zeropivotdetected=PETSC_FALSE; 21 22 PetscFunctionBegin; 23 allowzeropivot = PetscNot(A->erroriffailure); 24 25 if (a->idiagvalid) { 26 if (values) *values = a->idiag; 27 PetscFunctionReturn(0); 28 } 29 ierr = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr); 30 diag_offset = a->diag; 31 if (!a->idiag) { 32 ierr = PetscMalloc1(2*bs2*mbs,&a->idiag);CHKERRQ(ierr); 33 ierr = PetscLogObjectMemory((PetscObject)A,2*bs2*mbs*sizeof(PetscScalar));CHKERRQ(ierr); 34 } 35 diag = a->idiag; 36 mdiag = a->idiag+bs2*mbs; 37 if (values) *values = a->idiag; 38 /* factor and invert each block */ 39 switch (bs) { 40 case 1: 41 for (i=0; i<mbs; i++) { 42 odiag = v + 1*diag_offset[i]; 43 diag[0] = odiag[0]; 44 mdiag[0] = odiag[0]; 45 46 if (PetscAbsScalar(diag[0] + shift) < PETSC_MACHINE_EPSILON) { 47 if (allowzeropivot) { 48 A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 49 A->factorerror_zeropivot_value = PetscAbsScalar(diag[0]); 50 A->factorerror_zeropivot_row = i; 51 ierr = PetscInfo1(A,"Zero pivot, row %D\n",i);CHKERRQ(ierr); 52 } else SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot, row %D pivot value %g tolerance %g",i,(double)PetscAbsScalar(diag[0]),(double)PETSC_MACHINE_EPSILON); 53 } 54 55 diag[0] = (PetscScalar)1.0 / (diag[0] + shift); 56 diag += 1; 57 mdiag += 1; 58 } 59 break; 60 case 2: 61 for (i=0; i<mbs; i++) { 62 odiag = v + 4*diag_offset[i]; 63 diag[0] = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3]; 64 mdiag[0] = odiag[0]; mdiag[1] = odiag[1]; mdiag[2] = odiag[2]; mdiag[3] = odiag[3]; 65 ierr = PetscKernel_A_gets_inverse_A_2(diag,shift,allowzeropivot,&zeropivotdetected);CHKERRQ(ierr); 66 if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 67 diag += 4; 68 mdiag += 4; 69 } 70 break; 71 case 3: 72 for (i=0; i<mbs; i++) { 73 odiag = v + 9*diag_offset[i]; 74 diag[0] = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3]; 75 diag[4] = odiag[4]; diag[5] = odiag[5]; diag[6] = odiag[6]; diag[7] = odiag[7]; 76 diag[8] = odiag[8]; 77 mdiag[0] = odiag[0]; mdiag[1] = odiag[1]; mdiag[2] = odiag[2]; mdiag[3] = odiag[3]; 78 mdiag[4] = odiag[4]; mdiag[5] = odiag[5]; mdiag[6] = odiag[6]; mdiag[7] = odiag[7]; 79 mdiag[8] = odiag[8]; 80 ierr = PetscKernel_A_gets_inverse_A_3(diag,shift,allowzeropivot,&zeropivotdetected);CHKERRQ(ierr); 81 if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 82 diag += 9; 83 mdiag += 9; 84 } 85 break; 86 case 4: 87 for (i=0; i<mbs; i++) { 88 odiag = v + 16*diag_offset[i]; 89 ierr = PetscMemcpy(diag,odiag,16*sizeof(PetscScalar));CHKERRQ(ierr); 90 ierr = PetscMemcpy(mdiag,odiag,16*sizeof(PetscScalar));CHKERRQ(ierr); 91 ierr = PetscKernel_A_gets_inverse_A_4(diag,shift,allowzeropivot,&zeropivotdetected);CHKERRQ(ierr); 92 if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 93 diag += 16; 94 mdiag += 16; 95 } 96 break; 97 case 5: 98 for (i=0; i<mbs; i++) { 99 odiag = v + 25*diag_offset[i]; 100 ierr = PetscMemcpy(diag,odiag,25*sizeof(PetscScalar));CHKERRQ(ierr); 101 ierr = PetscMemcpy(mdiag,odiag,25*sizeof(PetscScalar));CHKERRQ(ierr); 102 ierr = PetscKernel_A_gets_inverse_A_5(diag,ipvt,work,shift,allowzeropivot,&zeropivotdetected);CHKERRQ(ierr); 103 if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 104 diag += 25; 105 mdiag += 25; 106 } 107 break; 108 case 6: 109 for (i=0; i<mbs; i++) { 110 odiag = v + 36*diag_offset[i]; 111 ierr = PetscMemcpy(diag,odiag,36*sizeof(PetscScalar));CHKERRQ(ierr); 112 ierr = PetscMemcpy(mdiag,odiag,36*sizeof(PetscScalar));CHKERRQ(ierr); 113 ierr = PetscKernel_A_gets_inverse_A_6(diag,shift,allowzeropivot,&zeropivotdetected);CHKERRQ(ierr); 114 if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 115 diag += 36; 116 mdiag += 36; 117 } 118 break; 119 case 7: 120 for (i=0; i<mbs; i++) { 121 odiag = v + 49*diag_offset[i]; 122 ierr = PetscMemcpy(diag,odiag,49*sizeof(PetscScalar));CHKERRQ(ierr); 123 ierr = PetscMemcpy(mdiag,odiag,49*sizeof(PetscScalar));CHKERRQ(ierr); 124 ierr = PetscKernel_A_gets_inverse_A_7(diag,shift,allowzeropivot,&zeropivotdetected);CHKERRQ(ierr); 125 if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 126 diag += 49; 127 mdiag += 49; 128 } 129 break; 130 default: 131 ierr = PetscMalloc2(bs,&v_work,bs,&v_pivots);CHKERRQ(ierr); 132 for (i=0; i<mbs; i++) { 133 odiag = v + bs2*diag_offset[i]; 134 ierr = PetscMemcpy(diag,odiag,bs2*sizeof(PetscScalar));CHKERRQ(ierr); 135 ierr = PetscMemcpy(mdiag,odiag,bs2*sizeof(PetscScalar));CHKERRQ(ierr); 136 ierr = PetscKernel_A_gets_inverse_A(bs,diag,v_pivots,v_work,allowzeropivot,&zeropivotdetected);CHKERRQ(ierr); 137 if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 138 diag += bs2; 139 mdiag += bs2; 140 } 141 ierr = PetscFree2(v_work,v_pivots);CHKERRQ(ierr); 142 } 143 a->idiagvalid = PETSC_TRUE; 144 PetscFunctionReturn(0); 145 } 146 147 #undef __FUNCT__ 148 #define __FUNCT__ "MatSOR_SeqBAIJ" 149 PetscErrorCode MatSOR_SeqBAIJ(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 150 { 151 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 152 PetscScalar *x,*work,*w,*workt,*t; 153 const MatScalar *v,*aa = a->a, *idiag; 154 const PetscScalar *b,*xb; 155 PetscScalar s[7], xw[7]={0}; /* avoid some compilers thinking xw is uninitialized */ 156 PetscErrorCode ierr; 157 PetscInt m = a->mbs,i,i2,nz,bs = A->rmap->bs,bs2 = bs*bs,k,j,idx,it; 158 const PetscInt *diag,*ai = a->i,*aj = a->j,*vi; 159 160 PetscFunctionBegin; 161 if (fshift == -1.0) fshift = 0.0; /* negative fshift indicates do not error on zero diagonal; this code never errors on zero diagonal */ 162 its = its*lits; 163 if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat"); 164 if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 165 if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 166 if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 167 if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts"); 168 169 if (!a->idiagvalid) {ierr = MatInvertBlockDiagonal(A,NULL);CHKERRQ(ierr);} 170 171 if (!m) PetscFunctionReturn(0); 172 diag = a->diag; 173 idiag = a->idiag; 174 k = PetscMax(A->rmap->n,A->cmap->n); 175 if (!a->mult_work) { 176 ierr = PetscMalloc1(k+1,&a->mult_work);CHKERRQ(ierr); 177 } 178 if (!a->sor_workt) { 179 ierr = PetscMalloc1(k,&a->sor_workt);CHKERRQ(ierr); 180 } 181 if (!a->sor_work) { 182 ierr = PetscMalloc1(bs,&a->sor_work);CHKERRQ(ierr); 183 } 184 work = a->mult_work; 185 t = a->sor_workt; 186 w = a->sor_work; 187 188 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 189 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 190 191 if (flag & SOR_ZERO_INITIAL_GUESS) { 192 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) { 193 switch (bs) { 194 case 1: 195 PetscKernel_v_gets_A_times_w_1(x,idiag,b); 196 t[0] = b[0]; 197 i2 = 1; 198 idiag += 1; 199 for (i=1; i<m; i++) { 200 v = aa + ai[i]; 201 vi = aj + ai[i]; 202 nz = diag[i] - ai[i]; 203 s[0] = b[i2]; 204 for (j=0; j<nz; j++) { 205 xw[0] = x[vi[j]]; 206 PetscKernel_v_gets_v_minus_A_times_w_1(s,(v+j),xw); 207 } 208 t[i2] = s[0]; 209 PetscKernel_v_gets_A_times_w_1(xw,idiag,s); 210 x[i2] = xw[0]; 211 idiag += 1; 212 i2 += 1; 213 } 214 break; 215 case 2: 216 PetscKernel_v_gets_A_times_w_2(x,idiag,b); 217 t[0] = b[0]; t[1] = b[1]; 218 i2 = 2; 219 idiag += 4; 220 for (i=1; i<m; i++) { 221 v = aa + 4*ai[i]; 222 vi = aj + ai[i]; 223 nz = diag[i] - ai[i]; 224 s[0] = b[i2]; s[1] = b[i2+1]; 225 for (j=0; j<nz; j++) { 226 idx = 2*vi[j]; 227 it = 4*j; 228 xw[0] = x[idx]; xw[1] = x[1+idx]; 229 PetscKernel_v_gets_v_minus_A_times_w_2(s,(v+it),xw); 230 } 231 t[i2] = s[0]; t[i2+1] = s[1]; 232 PetscKernel_v_gets_A_times_w_2(xw,idiag,s); 233 x[i2] = xw[0]; x[i2+1] = xw[1]; 234 idiag += 4; 235 i2 += 2; 236 } 237 break; 238 case 3: 239 PetscKernel_v_gets_A_times_w_3(x,idiag,b); 240 t[0] = b[0]; t[1] = b[1]; t[2] = b[2]; 241 i2 = 3; 242 idiag += 9; 243 for (i=1; i<m; i++) { 244 v = aa + 9*ai[i]; 245 vi = aj + ai[i]; 246 nz = diag[i] - ai[i]; 247 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; 248 while (nz--) { 249 idx = 3*(*vi++); 250 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 251 PetscKernel_v_gets_v_minus_A_times_w_3(s,v,xw); 252 v += 9; 253 } 254 t[i2] = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2]; 255 PetscKernel_v_gets_A_times_w_3(xw,idiag,s); 256 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; 257 idiag += 9; 258 i2 += 3; 259 } 260 break; 261 case 4: 262 PetscKernel_v_gets_A_times_w_4(x,idiag,b); 263 t[0] = b[0]; t[1] = b[1]; t[2] = b[2]; t[3] = b[3]; 264 i2 = 4; 265 idiag += 16; 266 for (i=1; i<m; i++) { 267 v = aa + 16*ai[i]; 268 vi = aj + ai[i]; 269 nz = diag[i] - ai[i]; 270 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; 271 while (nz--) { 272 idx = 4*(*vi++); 273 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; 274 PetscKernel_v_gets_v_minus_A_times_w_4(s,v,xw); 275 v += 16; 276 } 277 t[i2] = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2]; t[i2 + 3] = s[3]; 278 PetscKernel_v_gets_A_times_w_4(xw,idiag,s); 279 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; 280 idiag += 16; 281 i2 += 4; 282 } 283 break; 284 case 5: 285 PetscKernel_v_gets_A_times_w_5(x,idiag,b); 286 t[0] = b[0]; t[1] = b[1]; t[2] = b[2]; t[3] = b[3]; t[4] = b[4]; 287 i2 = 5; 288 idiag += 25; 289 for (i=1; i<m; i++) { 290 v = aa + 25*ai[i]; 291 vi = aj + ai[i]; 292 nz = diag[i] - ai[i]; 293 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4]; 294 while (nz--) { 295 idx = 5*(*vi++); 296 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; xw[4] = x[4+idx]; 297 PetscKernel_v_gets_v_minus_A_times_w_5(s,v,xw); 298 v += 25; 299 } 300 t[i2] = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2]; t[i2+3] = s[3]; t[i2+4] = s[4]; 301 PetscKernel_v_gets_A_times_w_5(xw,idiag,s); 302 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4]; 303 idiag += 25; 304 i2 += 5; 305 } 306 break; 307 case 6: 308 PetscKernel_v_gets_A_times_w_6(x,idiag,b); 309 t[0] = b[0]; t[1] = b[1]; t[2] = b[2]; t[3] = b[3]; t[4] = b[4]; t[5] = b[5]; 310 i2 = 6; 311 idiag += 36; 312 for (i=1; i<m; i++) { 313 v = aa + 36*ai[i]; 314 vi = aj + ai[i]; 315 nz = diag[i] - ai[i]; 316 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5]; 317 while (nz--) { 318 idx = 6*(*vi++); 319 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 320 xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; 321 PetscKernel_v_gets_v_minus_A_times_w_6(s,v,xw); 322 v += 36; 323 } 324 t[i2] = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2]; 325 t[i2+3] = s[3]; t[i2+4] = s[4]; t[i2+5] = s[5]; 326 PetscKernel_v_gets_A_times_w_6(xw,idiag,s); 327 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5]; 328 idiag += 36; 329 i2 += 6; 330 } 331 break; 332 case 7: 333 PetscKernel_v_gets_A_times_w_7(x,idiag,b); 334 t[0] = b[0]; t[1] = b[1]; t[2] = b[2]; 335 t[3] = b[3]; t[4] = b[4]; t[5] = b[5]; t[6] = b[6]; 336 i2 = 7; 337 idiag += 49; 338 for (i=1; i<m; i++) { 339 v = aa + 49*ai[i]; 340 vi = aj + ai[i]; 341 nz = diag[i] - ai[i]; 342 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; 343 s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5]; s[6] = b[i2+6]; 344 while (nz--) { 345 idx = 7*(*vi++); 346 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 347 xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; xw[6] = x[6+idx]; 348 PetscKernel_v_gets_v_minus_A_times_w_7(s,v,xw); 349 v += 49; 350 } 351 t[i2] = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2]; 352 t[i2+3] = s[3]; t[i2+4] = s[4]; t[i2+5] = s[5]; t[i2+6] = s[6]; 353 PetscKernel_v_gets_A_times_w_7(xw,idiag,s); 354 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; 355 x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5]; x[i2+6] = xw[6]; 356 idiag += 49; 357 i2 += 7; 358 } 359 break; 360 default: 361 PetscKernel_w_gets_Ar_times_v(bs,bs,b,idiag,x); 362 ierr = PetscMemcpy(t,b,bs*sizeof(PetscScalar));CHKERRQ(ierr); 363 i2 = bs; 364 idiag += bs2; 365 for (i=1; i<m; i++) { 366 v = aa + bs2*ai[i]; 367 vi = aj + ai[i]; 368 nz = diag[i] - ai[i]; 369 370 ierr = PetscMemcpy(w,b+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr); 371 /* copy all rows of x that are needed into contiguous space */ 372 workt = work; 373 for (j=0; j<nz; j++) { 374 ierr = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr); 375 workt += bs; 376 } 377 PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work); 378 ierr = PetscMemcpy(t+i2,w,bs*sizeof(PetscScalar));CHKERRQ(ierr); 379 PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2); 380 381 idiag += bs2; 382 i2 += bs; 383 } 384 break; 385 } 386 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 387 ierr = PetscLogFlops(1.0*bs2*a->nz);CHKERRQ(ierr); 388 xb = t; 389 } 390 else xb = b; 391 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 392 idiag = a->idiag+bs2*(a->mbs-1); 393 i2 = bs * (m-1); 394 switch (bs) { 395 case 1: 396 s[0] = xb[i2]; 397 PetscKernel_v_gets_A_times_w_1(xw,idiag,s); 398 x[i2] = xw[0]; 399 i2 -= 1; 400 for (i=m-2; i>=0; i--) { 401 v = aa + (diag[i]+1); 402 vi = aj + diag[i] + 1; 403 nz = ai[i+1] - diag[i] - 1; 404 s[0] = xb[i2]; 405 for (j=0; j<nz; j++) { 406 xw[0] = x[vi[j]]; 407 PetscKernel_v_gets_v_minus_A_times_w_1(s,(v+j),xw); 408 } 409 PetscKernel_v_gets_A_times_w_1(xw,idiag,s); 410 x[i2] = xw[0]; 411 idiag -= 1; 412 i2 -= 1; 413 } 414 break; 415 case 2: 416 s[0] = xb[i2]; s[1] = xb[i2+1]; 417 PetscKernel_v_gets_A_times_w_2(xw,idiag,s); 418 x[i2] = xw[0]; x[i2+1] = xw[1]; 419 i2 -= 2; 420 idiag -= 4; 421 for (i=m-2; i>=0; i--) { 422 v = aa + 4*(diag[i] + 1); 423 vi = aj + diag[i] + 1; 424 nz = ai[i+1] - diag[i] - 1; 425 s[0] = xb[i2]; s[1] = xb[i2+1]; 426 for (j=0; j<nz; j++) { 427 idx = 2*vi[j]; 428 it = 4*j; 429 xw[0] = x[idx]; xw[1] = x[1+idx]; 430 PetscKernel_v_gets_v_minus_A_times_w_2(s,(v+it),xw); 431 } 432 PetscKernel_v_gets_A_times_w_2(xw,idiag,s); 433 x[i2] = xw[0]; x[i2+1] = xw[1]; 434 idiag -= 4; 435 i2 -= 2; 436 } 437 break; 438 case 3: 439 s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; 440 PetscKernel_v_gets_A_times_w_3(xw,idiag,s); 441 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; 442 i2 -= 3; 443 idiag -= 9; 444 for (i=m-2; i>=0; i--) { 445 v = aa + 9*(diag[i]+1); 446 vi = aj + diag[i] + 1; 447 nz = ai[i+1] - diag[i] - 1; 448 s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; 449 while (nz--) { 450 idx = 3*(*vi++); 451 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 452 PetscKernel_v_gets_v_minus_A_times_w_3(s,v,xw); 453 v += 9; 454 } 455 PetscKernel_v_gets_A_times_w_3(xw,idiag,s); 456 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; 457 idiag -= 9; 458 i2 -= 3; 459 } 460 break; 461 case 4: 462 s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; 463 PetscKernel_v_gets_A_times_w_4(xw,idiag,s); 464 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; 465 i2 -= 4; 466 idiag -= 16; 467 for (i=m-2; i>=0; i--) { 468 v = aa + 16*(diag[i]+1); 469 vi = aj + diag[i] + 1; 470 nz = ai[i+1] - diag[i] - 1; 471 s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; 472 while (nz--) { 473 idx = 4*(*vi++); 474 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; 475 PetscKernel_v_gets_v_minus_A_times_w_4(s,v,xw); 476 v += 16; 477 } 478 PetscKernel_v_gets_A_times_w_4(xw,idiag,s); 479 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; 480 idiag -= 16; 481 i2 -= 4; 482 } 483 break; 484 case 5: 485 s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; s[4] = xb[i2+4]; 486 PetscKernel_v_gets_A_times_w_5(xw,idiag,s); 487 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4]; 488 i2 -= 5; 489 idiag -= 25; 490 for (i=m-2; i>=0; i--) { 491 v = aa + 25*(diag[i]+1); 492 vi = aj + diag[i] + 1; 493 nz = ai[i+1] - diag[i] - 1; 494 s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; s[4] = xb[i2+4]; 495 while (nz--) { 496 idx = 5*(*vi++); 497 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; xw[4] = x[4+idx]; 498 PetscKernel_v_gets_v_minus_A_times_w_5(s,v,xw); 499 v += 25; 500 } 501 PetscKernel_v_gets_A_times_w_5(xw,idiag,s); 502 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4]; 503 idiag -= 25; 504 i2 -= 5; 505 } 506 break; 507 case 6: 508 s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; s[4] = xb[i2+4]; s[5] = xb[i2+5]; 509 PetscKernel_v_gets_A_times_w_6(xw,idiag,s); 510 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5]; 511 i2 -= 6; 512 idiag -= 36; 513 for (i=m-2; i>=0; i--) { 514 v = aa + 36*(diag[i]+1); 515 vi = aj + diag[i] + 1; 516 nz = ai[i+1] - diag[i] - 1; 517 s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; s[4] = xb[i2+4]; s[5] = xb[i2+5]; 518 while (nz--) { 519 idx = 6*(*vi++); 520 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 521 xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; 522 PetscKernel_v_gets_v_minus_A_times_w_6(s,v,xw); 523 v += 36; 524 } 525 PetscKernel_v_gets_A_times_w_6(xw,idiag,s); 526 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5]; 527 idiag -= 36; 528 i2 -= 6; 529 } 530 break; 531 case 7: 532 s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; 533 s[3] = xb[i2+3]; s[4] = xb[i2+4]; s[5] = xb[i2+5]; s[6] = xb[i2+6]; 534 PetscKernel_v_gets_A_times_w_7(x,idiag,b); 535 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; 536 x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5]; x[i2+6] = xw[6]; 537 i2 -= 7; 538 idiag -= 49; 539 for (i=m-2; i>=0; i--) { 540 v = aa + 49*(diag[i]+1); 541 vi = aj + diag[i] + 1; 542 nz = ai[i+1] - diag[i] - 1; 543 s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; 544 s[3] = xb[i2+3]; s[4] = xb[i2+4]; s[5] = xb[i2+5]; s[6] = xb[i2+6]; 545 while (nz--) { 546 idx = 7*(*vi++); 547 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 548 xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; xw[6] = x[6+idx]; 549 PetscKernel_v_gets_v_minus_A_times_w_7(s,v,xw); 550 v += 49; 551 } 552 PetscKernel_v_gets_A_times_w_7(xw,idiag,s); 553 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; 554 x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5]; x[i2+6] = xw[6]; 555 idiag -= 49; 556 i2 -= 7; 557 } 558 break; 559 default: 560 ierr = PetscMemcpy(w,xb+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr); 561 PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2); 562 i2 -= bs; 563 idiag -= bs2; 564 for (i=m-2; i>=0; i--) { 565 v = aa + bs2*(diag[i]+1); 566 vi = aj + diag[i] + 1; 567 nz = ai[i+1] - diag[i] - 1; 568 569 ierr = PetscMemcpy(w,xb+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr); 570 /* copy all rows of x that are needed into contiguous space */ 571 workt = work; 572 for (j=0; j<nz; j++) { 573 ierr = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr); 574 workt += bs; 575 } 576 PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work); 577 PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2); 578 579 idiag -= bs2; 580 i2 -= bs; 581 } 582 break; 583 } 584 ierr = PetscLogFlops(1.0*bs2*(a->nz));CHKERRQ(ierr); 585 } 586 its--; 587 } 588 while (its--) { 589 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) { 590 idiag = a->idiag; 591 i2 = 0; 592 switch (bs) { 593 case 1: 594 for (i=0; i<m; i++) { 595 v = aa + ai[i]; 596 vi = aj + ai[i]; 597 nz = ai[i+1] - ai[i]; 598 s[0] = b[i2]; 599 for (j=0; j<nz; j++) { 600 xw[0] = x[vi[j]]; 601 PetscKernel_v_gets_v_minus_A_times_w_1(s,(v+j),xw); 602 } 603 PetscKernel_v_gets_A_times_w_1(xw,idiag,s); 604 x[i2] += xw[0]; 605 idiag += 1; 606 i2 += 1; 607 } 608 break; 609 case 2: 610 for (i=0; i<m; i++) { 611 v = aa + 4*ai[i]; 612 vi = aj + ai[i]; 613 nz = ai[i+1] - ai[i]; 614 s[0] = b[i2]; s[1] = b[i2+1]; 615 for (j=0; j<nz; j++) { 616 idx = 2*vi[j]; 617 it = 4*j; 618 xw[0] = x[idx]; xw[1] = x[1+idx]; 619 PetscKernel_v_gets_v_minus_A_times_w_2(s,(v+it),xw); 620 } 621 PetscKernel_v_gets_A_times_w_2(xw,idiag,s); 622 x[i2] += xw[0]; x[i2+1] += xw[1]; 623 idiag += 4; 624 i2 += 2; 625 } 626 break; 627 case 3: 628 for (i=0; i<m; i++) { 629 v = aa + 9*ai[i]; 630 vi = aj + ai[i]; 631 nz = ai[i+1] - ai[i]; 632 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; 633 while (nz--) { 634 idx = 3*(*vi++); 635 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 636 PetscKernel_v_gets_v_minus_A_times_w_3(s,v,xw); 637 v += 9; 638 } 639 PetscKernel_v_gets_A_times_w_3(xw,idiag,s); 640 x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; 641 idiag += 9; 642 i2 += 3; 643 } 644 break; 645 case 4: 646 for (i=0; i<m; i++) { 647 v = aa + 16*ai[i]; 648 vi = aj + ai[i]; 649 nz = ai[i+1] - ai[i]; 650 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; 651 while (nz--) { 652 idx = 4*(*vi++); 653 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; 654 PetscKernel_v_gets_v_minus_A_times_w_4(s,v,xw); 655 v += 16; 656 } 657 PetscKernel_v_gets_A_times_w_4(xw,idiag,s); 658 x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; x[i2+3] += xw[3]; 659 idiag += 16; 660 i2 += 4; 661 } 662 break; 663 case 5: 664 for (i=0; i<m; i++) { 665 v = aa + 25*ai[i]; 666 vi = aj + ai[i]; 667 nz = ai[i+1] - ai[i]; 668 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4]; 669 while (nz--) { 670 idx = 5*(*vi++); 671 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; xw[4] = x[4+idx]; 672 PetscKernel_v_gets_v_minus_A_times_w_5(s,v,xw); 673 v += 25; 674 } 675 PetscKernel_v_gets_A_times_w_5(xw,idiag,s); 676 x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; x[i2+3] += xw[3]; x[i2+4] += xw[4]; 677 idiag += 25; 678 i2 += 5; 679 } 680 break; 681 case 6: 682 for (i=0; i<m; i++) { 683 v = aa + 36*ai[i]; 684 vi = aj + ai[i]; 685 nz = ai[i+1] - ai[i]; 686 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5]; 687 while (nz--) { 688 idx = 6*(*vi++); 689 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 690 xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; 691 PetscKernel_v_gets_v_minus_A_times_w_6(s,v,xw); 692 v += 36; 693 } 694 PetscKernel_v_gets_A_times_w_6(xw,idiag,s); 695 x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; 696 x[i2+3] += xw[3]; x[i2+4] += xw[4]; x[i2+5] += xw[5]; 697 idiag += 36; 698 i2 += 6; 699 } 700 break; 701 case 7: 702 for (i=0; i<m; i++) { 703 v = aa + 49*ai[i]; 704 vi = aj + ai[i]; 705 nz = ai[i+1] - ai[i]; 706 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; 707 s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5]; s[6] = b[i2+6]; 708 while (nz--) { 709 idx = 7*(*vi++); 710 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 711 xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; xw[6] = x[6+idx]; 712 PetscKernel_v_gets_v_minus_A_times_w_7(s,v,xw); 713 v += 49; 714 } 715 PetscKernel_v_gets_A_times_w_7(xw,idiag,s); 716 x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; 717 x[i2+3] += xw[3]; x[i2+4] += xw[4]; x[i2+5] += xw[5]; x[i2+6] += xw[6]; 718 idiag += 49; 719 i2 += 7; 720 } 721 break; 722 default: 723 for (i=0; i<m; i++) { 724 v = aa + bs2*ai[i]; 725 vi = aj + ai[i]; 726 nz = ai[i+1] - ai[i]; 727 728 ierr = PetscMemcpy(w,b+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr); 729 /* copy all rows of x that are needed into contiguous space */ 730 workt = work; 731 for (j=0; j<nz; j++) { 732 ierr = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr); 733 workt += bs; 734 } 735 PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work); 736 PetscKernel_w_gets_w_plus_Ar_times_v(bs,bs,w,idiag,x+i2); 737 738 idiag += bs2; 739 i2 += bs; 740 } 741 break; 742 } 743 ierr = PetscLogFlops(2.0*bs2*a->nz);CHKERRQ(ierr); 744 } 745 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 746 idiag = a->idiag+bs2*(a->mbs-1); 747 i2 = bs * (m-1); 748 switch (bs) { 749 case 1: 750 for (i=m-1; i>=0; i--) { 751 v = aa + ai[i]; 752 vi = aj + ai[i]; 753 nz = ai[i+1] - ai[i]; 754 s[0] = b[i2]; 755 for (j=0; j<nz; j++) { 756 xw[0] = x[vi[j]]; 757 PetscKernel_v_gets_v_minus_A_times_w_1(s,(v+j),xw); 758 } 759 PetscKernel_v_gets_A_times_w_1(xw,idiag,s); 760 x[i2] += xw[0]; 761 idiag -= 1; 762 i2 -= 1; 763 } 764 break; 765 case 2: 766 for (i=m-1; i>=0; i--) { 767 v = aa + 4*ai[i]; 768 vi = aj + ai[i]; 769 nz = ai[i+1] - ai[i]; 770 s[0] = b[i2]; s[1] = b[i2+1]; 771 for (j=0; j<nz; j++) { 772 idx = 2*vi[j]; 773 it = 4*j; 774 xw[0] = x[idx]; xw[1] = x[1+idx]; 775 PetscKernel_v_gets_v_minus_A_times_w_2(s,(v+it),xw); 776 } 777 PetscKernel_v_gets_A_times_w_2(xw,idiag,s); 778 x[i2] += xw[0]; x[i2+1] += xw[1]; 779 idiag -= 4; 780 i2 -= 2; 781 } 782 break; 783 case 3: 784 for (i=m-1; i>=0; i--) { 785 v = aa + 9*ai[i]; 786 vi = aj + ai[i]; 787 nz = ai[i+1] - ai[i]; 788 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; 789 while (nz--) { 790 idx = 3*(*vi++); 791 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 792 PetscKernel_v_gets_v_minus_A_times_w_3(s,v,xw); 793 v += 9; 794 } 795 PetscKernel_v_gets_A_times_w_3(xw,idiag,s); 796 x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; 797 idiag -= 9; 798 i2 -= 3; 799 } 800 break; 801 case 4: 802 for (i=m-1; i>=0; i--) { 803 v = aa + 16*ai[i]; 804 vi = aj + ai[i]; 805 nz = ai[i+1] - ai[i]; 806 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; 807 while (nz--) { 808 idx = 4*(*vi++); 809 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; 810 PetscKernel_v_gets_v_minus_A_times_w_4(s,v,xw); 811 v += 16; 812 } 813 PetscKernel_v_gets_A_times_w_4(xw,idiag,s); 814 x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; x[i2+3] += xw[3]; 815 idiag -= 16; 816 i2 -= 4; 817 } 818 break; 819 case 5: 820 for (i=m-1; i>=0; i--) { 821 v = aa + 25*ai[i]; 822 vi = aj + ai[i]; 823 nz = ai[i+1] - ai[i]; 824 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4]; 825 while (nz--) { 826 idx = 5*(*vi++); 827 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; xw[4] = x[4+idx]; 828 PetscKernel_v_gets_v_minus_A_times_w_5(s,v,xw); 829 v += 25; 830 } 831 PetscKernel_v_gets_A_times_w_5(xw,idiag,s); 832 x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; x[i2+3] += xw[3]; x[i2+4] += xw[4]; 833 idiag -= 25; 834 i2 -= 5; 835 } 836 break; 837 case 6: 838 for (i=m-1; i>=0; i--) { 839 v = aa + 36*ai[i]; 840 vi = aj + ai[i]; 841 nz = ai[i+1] - ai[i]; 842 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5]; 843 while (nz--) { 844 idx = 6*(*vi++); 845 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 846 xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; 847 PetscKernel_v_gets_v_minus_A_times_w_6(s,v,xw); 848 v += 36; 849 } 850 PetscKernel_v_gets_A_times_w_6(xw,idiag,s); 851 x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; 852 x[i2+3] += xw[3]; x[i2+4] += xw[4]; x[i2+5] += xw[5]; 853 idiag -= 36; 854 i2 -= 6; 855 } 856 break; 857 case 7: 858 for (i=m-1; i>=0; i--) { 859 v = aa + 49*ai[i]; 860 vi = aj + ai[i]; 861 nz = ai[i+1] - ai[i]; 862 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; 863 s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5]; s[6] = b[i2+6]; 864 while (nz--) { 865 idx = 7*(*vi++); 866 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 867 xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; xw[6] = x[6+idx]; 868 PetscKernel_v_gets_v_minus_A_times_w_7(s,v,xw); 869 v += 49; 870 } 871 PetscKernel_v_gets_A_times_w_7(xw,idiag,s); 872 x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; 873 x[i2+3] += xw[3]; x[i2+4] += xw[4]; x[i2+5] += xw[5]; x[i2+6] += xw[6]; 874 idiag -= 49; 875 i2 -= 7; 876 } 877 break; 878 default: 879 for (i=m-1; i>=0; i--) { 880 v = aa + bs2*ai[i]; 881 vi = aj + ai[i]; 882 nz = ai[i+1] - ai[i]; 883 884 ierr = PetscMemcpy(w,b+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr); 885 /* copy all rows of x that are needed into contiguous space */ 886 workt = work; 887 for (j=0; j<nz; j++) { 888 ierr = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr); 889 workt += bs; 890 } 891 PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work); 892 PetscKernel_w_gets_w_plus_Ar_times_v(bs,bs,w,idiag,x+i2); 893 894 idiag -= bs2; 895 i2 -= bs; 896 } 897 break; 898 } 899 ierr = PetscLogFlops(2.0*bs2*(a->nz));CHKERRQ(ierr); 900 } 901 } 902 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 903 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 904 PetscFunctionReturn(0); 905 } 906 907 908 /* 909 Special version for direct calls from Fortran (Used in PETSc-fun3d) 910 */ 911 #if defined(PETSC_HAVE_FORTRAN_CAPS) 912 #define matsetvaluesblocked4_ MATSETVALUESBLOCKED4 913 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 914 #define matsetvaluesblocked4_ matsetvaluesblocked4 915 #endif 916 917 #undef __FUNCT__ 918 #define __FUNCT__ "matsetvaluesblocked4_" 919 PETSC_EXTERN void matsetvaluesblocked4_(Mat *AA,PetscInt *mm,const PetscInt im[],PetscInt *nn,const PetscInt in[],const PetscScalar v[]) 920 { 921 Mat A = *AA; 922 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 923 PetscInt *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,N,m = *mm,n = *nn; 924 PetscInt *ai =a->i,*ailen=a->ilen; 925 PetscInt *aj =a->j,stepval,lastcol = -1; 926 const PetscScalar *value = v; 927 MatScalar *ap,*aa = a->a,*bap; 928 929 PetscFunctionBegin; 930 if (A->rmap->bs != 4) SETERRABORT(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Can only be called with a block size of 4"); 931 stepval = (n-1)*4; 932 for (k=0; k<m; k++) { /* loop over added rows */ 933 row = im[k]; 934 rp = aj + ai[row]; 935 ap = aa + 16*ai[row]; 936 nrow = ailen[row]; 937 low = 0; 938 high = nrow; 939 for (l=0; l<n; l++) { /* loop over added columns */ 940 col = in[l]; 941 if (col <= lastcol) low = 0; 942 else high = nrow; 943 lastcol = col; 944 value = v + k*(stepval+4 + l)*4; 945 while (high-low > 7) { 946 t = (low+high)/2; 947 if (rp[t] > col) high = t; 948 else low = t; 949 } 950 for (i=low; i<high; i++) { 951 if (rp[i] > col) break; 952 if (rp[i] == col) { 953 bap = ap + 16*i; 954 for (ii=0; ii<4; ii++,value+=stepval) { 955 for (jj=ii; jj<16; jj+=4) { 956 bap[jj] += *value++; 957 } 958 } 959 goto noinsert2; 960 } 961 } 962 N = nrow++ - 1; 963 high++; /* added new column index thus must search to one higher than before */ 964 /* shift up all the later entries in this row */ 965 for (ii=N; ii>=i; ii--) { 966 rp[ii+1] = rp[ii]; 967 PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar)); 968 } 969 if (N >= i) { 970 PetscMemzero(ap+16*i,16*sizeof(MatScalar)); 971 } 972 rp[i] = col; 973 bap = ap + 16*i; 974 for (ii=0; ii<4; ii++,value+=stepval) { 975 for (jj=ii; jj<16; jj+=4) { 976 bap[jj] = *value++; 977 } 978 } 979 noinsert2:; 980 low = i; 981 } 982 ailen[row] = nrow; 983 } 984 PetscFunctionReturnVoid(); 985 } 986 987 #if defined(PETSC_HAVE_FORTRAN_CAPS) 988 #define matsetvalues4_ MATSETVALUES4 989 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 990 #define matsetvalues4_ matsetvalues4 991 #endif 992 993 #undef __FUNCT__ 994 #define __FUNCT__ "MatSetValues4_" 995 PETSC_EXTERN void matsetvalues4_(Mat *AA,PetscInt *mm,PetscInt *im,PetscInt *nn,PetscInt *in,PetscScalar *v) 996 { 997 Mat A = *AA; 998 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 999 PetscInt *rp,k,low,high,t,ii,row,nrow,i,col,l,N,n = *nn,m = *mm; 1000 PetscInt *ai=a->i,*ailen=a->ilen; 1001 PetscInt *aj=a->j,brow,bcol; 1002 PetscInt ridx,cidx,lastcol = -1; 1003 MatScalar *ap,value,*aa=a->a,*bap; 1004 1005 PetscFunctionBegin; 1006 for (k=0; k<m; k++) { /* loop over added rows */ 1007 row = im[k]; brow = row/4; 1008 rp = aj + ai[brow]; 1009 ap = aa + 16*ai[brow]; 1010 nrow = ailen[brow]; 1011 low = 0; 1012 high = nrow; 1013 for (l=0; l<n; l++) { /* loop over added columns */ 1014 col = in[l]; bcol = col/4; 1015 ridx = row % 4; cidx = col % 4; 1016 value = v[l + k*n]; 1017 if (col <= lastcol) low = 0; 1018 else high = nrow; 1019 lastcol = col; 1020 while (high-low > 7) { 1021 t = (low+high)/2; 1022 if (rp[t] > bcol) high = t; 1023 else low = t; 1024 } 1025 for (i=low; i<high; i++) { 1026 if (rp[i] > bcol) break; 1027 if (rp[i] == bcol) { 1028 bap = ap + 16*i + 4*cidx + ridx; 1029 *bap += value; 1030 goto noinsert1; 1031 } 1032 } 1033 N = nrow++ - 1; 1034 high++; /* added new column thus must search to one higher than before */ 1035 /* shift up all the later entries in this row */ 1036 for (ii=N; ii>=i; ii--) { 1037 rp[ii+1] = rp[ii]; 1038 PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar)); 1039 } 1040 if (N>=i) { 1041 PetscMemzero(ap+16*i,16*sizeof(MatScalar)); 1042 } 1043 rp[i] = bcol; 1044 ap[16*i + 4*cidx + ridx] = value; 1045 noinsert1:; 1046 low = i; 1047 } 1048 ailen[brow] = nrow; 1049 } 1050 PetscFunctionReturnVoid(); 1051 } 1052 1053 /* 1054 Checks for missing diagonals 1055 */ 1056 #undef __FUNCT__ 1057 #define __FUNCT__ "MatMissingDiagonal_SeqBAIJ" 1058 PetscErrorCode MatMissingDiagonal_SeqBAIJ(Mat A,PetscBool *missing,PetscInt *d) 1059 { 1060 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1061 PetscErrorCode ierr; 1062 PetscInt *diag,*ii = a->i,i; 1063 1064 PetscFunctionBegin; 1065 ierr = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr); 1066 *missing = PETSC_FALSE; 1067 if (A->rmap->n > 0 && !ii) { 1068 *missing = PETSC_TRUE; 1069 if (d) *d = 0; 1070 PetscInfo(A,"Matrix has no entries therefore is missing diagonal\n"); 1071 } else { 1072 diag = a->diag; 1073 for (i=0; i<a->mbs; i++) { 1074 if (diag[i] >= ii[i+1]) { 1075 *missing = PETSC_TRUE; 1076 if (d) *d = i; 1077 PetscInfo1(A,"Matrix is missing block diagonal number %D\n",i); 1078 break; 1079 } 1080 } 1081 } 1082 PetscFunctionReturn(0); 1083 } 1084 1085 #undef __FUNCT__ 1086 #define __FUNCT__ "MatMarkDiagonal_SeqBAIJ" 1087 PetscErrorCode MatMarkDiagonal_SeqBAIJ(Mat A) 1088 { 1089 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1090 PetscErrorCode ierr; 1091 PetscInt i,j,m = a->mbs; 1092 1093 PetscFunctionBegin; 1094 if (!a->diag) { 1095 ierr = PetscMalloc1(m,&a->diag);CHKERRQ(ierr); 1096 ierr = PetscLogObjectMemory((PetscObject)A,m*sizeof(PetscInt));CHKERRQ(ierr); 1097 a->free_diag = PETSC_TRUE; 1098 } 1099 for (i=0; i<m; i++) { 1100 a->diag[i] = a->i[i+1]; 1101 for (j=a->i[i]; j<a->i[i+1]; j++) { 1102 if (a->j[j] == i) { 1103 a->diag[i] = j; 1104 break; 1105 } 1106 } 1107 } 1108 PetscFunctionReturn(0); 1109 } 1110 1111 1112 #undef __FUNCT__ 1113 #define __FUNCT__ "MatGetRowIJ_SeqBAIJ" 1114 static PetscErrorCode MatGetRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool blockcompressed,PetscInt *nn,const PetscInt *inia[],const PetscInt *inja[],PetscBool *done) 1115 { 1116 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1117 PetscErrorCode ierr; 1118 PetscInt i,j,n = a->mbs,nz = a->i[n],*tia,*tja,bs = A->rmap->bs,k,l,cnt; 1119 PetscInt **ia = (PetscInt**)inia,**ja = (PetscInt**)inja; 1120 1121 PetscFunctionBegin; 1122 *nn = n; 1123 if (!ia) PetscFunctionReturn(0); 1124 if (symmetric) { 1125 ierr = MatToSymmetricIJ_SeqAIJ(n,a->i,a->j,PETSC_TRUE,0,0,&tia,&tja);CHKERRQ(ierr); 1126 nz = tia[n]; 1127 } else { 1128 tia = a->i; tja = a->j; 1129 } 1130 1131 if (!blockcompressed && bs > 1) { 1132 (*nn) *= bs; 1133 /* malloc & create the natural set of indices */ 1134 ierr = PetscMalloc1((n+1)*bs,ia);CHKERRQ(ierr); 1135 if (n) { 1136 (*ia)[0] = oshift; 1137 for (j=1; j<bs; j++) { 1138 (*ia)[j] = (tia[1]-tia[0])*bs+(*ia)[j-1]; 1139 } 1140 } 1141 1142 for (i=1; i<n; i++) { 1143 (*ia)[i*bs] = (tia[i]-tia[i-1])*bs + (*ia)[i*bs-1]; 1144 for (j=1; j<bs; j++) { 1145 (*ia)[i*bs+j] = (tia[i+1]-tia[i])*bs + (*ia)[i*bs+j-1]; 1146 } 1147 } 1148 if (n) { 1149 (*ia)[n*bs] = (tia[n]-tia[n-1])*bs + (*ia)[n*bs-1]; 1150 } 1151 1152 if (inja) { 1153 ierr = PetscMalloc1(nz*bs*bs,ja);CHKERRQ(ierr); 1154 cnt = 0; 1155 for (i=0; i<n; i++) { 1156 for (j=0; j<bs; j++) { 1157 for (k=tia[i]; k<tia[i+1]; k++) { 1158 for (l=0; l<bs; l++) { 1159 (*ja)[cnt++] = bs*tja[k] + l; 1160 } 1161 } 1162 } 1163 } 1164 } 1165 1166 if (symmetric) { /* deallocate memory allocated in MatToSymmetricIJ_SeqAIJ() */ 1167 ierr = PetscFree(tia);CHKERRQ(ierr); 1168 ierr = PetscFree(tja);CHKERRQ(ierr); 1169 } 1170 } else if (oshift == 1) { 1171 if (symmetric) { 1172 nz = tia[A->rmap->n/bs]; 1173 /* add 1 to i and j indices */ 1174 for (i=0; i<A->rmap->n/bs+1; i++) tia[i] = tia[i] + 1; 1175 *ia = tia; 1176 if (ja) { 1177 for (i=0; i<nz; i++) tja[i] = tja[i] + 1; 1178 *ja = tja; 1179 } 1180 } else { 1181 nz = a->i[A->rmap->n/bs]; 1182 /* malloc space and add 1 to i and j indices */ 1183 ierr = PetscMalloc1(A->rmap->n/bs+1,ia);CHKERRQ(ierr); 1184 for (i=0; i<A->rmap->n/bs+1; i++) (*ia)[i] = a->i[i] + 1; 1185 if (ja) { 1186 ierr = PetscMalloc1(nz,ja);CHKERRQ(ierr); 1187 for (i=0; i<nz; i++) (*ja)[i] = a->j[i] + 1; 1188 } 1189 } 1190 } else { 1191 *ia = tia; 1192 if (ja) *ja = tja; 1193 } 1194 PetscFunctionReturn(0); 1195 } 1196 1197 #undef __FUNCT__ 1198 #define __FUNCT__ "MatRestoreRowIJ_SeqBAIJ" 1199 static PetscErrorCode MatRestoreRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool blockcompressed,PetscInt *nn,const PetscInt *ia[],const PetscInt *ja[],PetscBool *done) 1200 { 1201 PetscErrorCode ierr; 1202 1203 PetscFunctionBegin; 1204 if (!ia) PetscFunctionReturn(0); 1205 if ((!blockcompressed && A->rmap->bs > 1) || (symmetric || oshift == 1)) { 1206 ierr = PetscFree(*ia);CHKERRQ(ierr); 1207 if (ja) {ierr = PetscFree(*ja);CHKERRQ(ierr);} 1208 } 1209 PetscFunctionReturn(0); 1210 } 1211 1212 #undef __FUNCT__ 1213 #define __FUNCT__ "MatDestroy_SeqBAIJ" 1214 PetscErrorCode MatDestroy_SeqBAIJ(Mat A) 1215 { 1216 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1217 PetscErrorCode ierr; 1218 1219 PetscFunctionBegin; 1220 #if defined(PETSC_USE_LOG) 1221 PetscLogObjectState((PetscObject)A,"Rows=%D, Cols=%D, NZ=%D",A->rmap->N,A->cmap->n,a->nz); 1222 #endif 1223 ierr = MatSeqXAIJFreeAIJ(A,&a->a,&a->j,&a->i);CHKERRQ(ierr); 1224 ierr = ISDestroy(&a->row);CHKERRQ(ierr); 1225 ierr = ISDestroy(&a->col);CHKERRQ(ierr); 1226 if (a->free_diag) {ierr = PetscFree(a->diag);CHKERRQ(ierr);} 1227 ierr = PetscFree(a->idiag);CHKERRQ(ierr); 1228 if (a->free_imax_ilen) {ierr = PetscFree2(a->imax,a->ilen);CHKERRQ(ierr);} 1229 ierr = PetscFree(a->solve_work);CHKERRQ(ierr); 1230 ierr = PetscFree(a->mult_work);CHKERRQ(ierr); 1231 ierr = PetscFree(a->sor_workt);CHKERRQ(ierr); 1232 ierr = PetscFree(a->sor_work);CHKERRQ(ierr); 1233 ierr = ISDestroy(&a->icol);CHKERRQ(ierr); 1234 ierr = PetscFree(a->saved_values);CHKERRQ(ierr); 1235 ierr = PetscFree2(a->compressedrow.i,a->compressedrow.rindex);CHKERRQ(ierr); 1236 1237 ierr = MatDestroy(&a->sbaijMat);CHKERRQ(ierr); 1238 ierr = MatDestroy(&a->parent);CHKERRQ(ierr); 1239 ierr = PetscFree(A->data);CHKERRQ(ierr); 1240 1241 ierr = PetscObjectChangeTypeName((PetscObject)A,0);CHKERRQ(ierr); 1242 ierr = PetscObjectComposeFunction((PetscObject)A,"MatInvertBlockDiagonal_C",NULL);CHKERRQ(ierr); 1243 ierr = PetscObjectComposeFunction((PetscObject)A,"MatStoreValues_C",NULL);CHKERRQ(ierr); 1244 ierr = PetscObjectComposeFunction((PetscObject)A,"MatRetrieveValues_C",NULL);CHKERRQ(ierr); 1245 ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetColumnIndices_C",NULL);CHKERRQ(ierr); 1246 ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqaij_C",NULL);CHKERRQ(ierr); 1247 ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqsbaij_C",NULL);CHKERRQ(ierr); 1248 ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetPreallocation_C",NULL);CHKERRQ(ierr); 1249 ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetPreallocationCSR_C",NULL);CHKERRQ(ierr); 1250 ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqbstrm_C",NULL);CHKERRQ(ierr); 1251 ierr = PetscObjectComposeFunction((PetscObject)A,"MatIsTranspose_C",NULL);CHKERRQ(ierr); 1252 PetscFunctionReturn(0); 1253 } 1254 1255 #undef __FUNCT__ 1256 #define __FUNCT__ "MatSetOption_SeqBAIJ" 1257 PetscErrorCode MatSetOption_SeqBAIJ(Mat A,MatOption op,PetscBool flg) 1258 { 1259 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1260 PetscErrorCode ierr; 1261 1262 PetscFunctionBegin; 1263 switch (op) { 1264 case MAT_ROW_ORIENTED: 1265 a->roworiented = flg; 1266 break; 1267 case MAT_KEEP_NONZERO_PATTERN: 1268 a->keepnonzeropattern = flg; 1269 break; 1270 case MAT_NEW_NONZERO_LOCATIONS: 1271 a->nonew = (flg ? 0 : 1); 1272 break; 1273 case MAT_NEW_NONZERO_LOCATION_ERR: 1274 a->nonew = (flg ? -1 : 0); 1275 break; 1276 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1277 a->nonew = (flg ? -2 : 0); 1278 break; 1279 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1280 a->nounused = (flg ? -1 : 0); 1281 break; 1282 case MAT_NEW_DIAGONALS: 1283 case MAT_IGNORE_OFF_PROC_ENTRIES: 1284 case MAT_USE_HASH_TABLE: 1285 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1286 break; 1287 case MAT_SPD: 1288 case MAT_SYMMETRIC: 1289 case MAT_STRUCTURALLY_SYMMETRIC: 1290 case MAT_HERMITIAN: 1291 case MAT_SYMMETRY_ETERNAL: 1292 /* These options are handled directly by MatSetOption() */ 1293 break; 1294 default: 1295 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op); 1296 } 1297 PetscFunctionReturn(0); 1298 } 1299 1300 /* used for both SeqBAIJ and SeqSBAIJ matrices */ 1301 #undef __FUNCT__ 1302 #define __FUNCT__ "MatGetRow_SeqBAIJ_private" 1303 PetscErrorCode MatGetRow_SeqBAIJ_private(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v,PetscInt *ai,PetscInt *aj,PetscScalar *aa) 1304 { 1305 PetscErrorCode ierr; 1306 PetscInt itmp,i,j,k,M,bn,bp,*idx_i,bs,bs2; 1307 MatScalar *aa_i; 1308 PetscScalar *v_i; 1309 1310 PetscFunctionBegin; 1311 bs = A->rmap->bs; 1312 bs2 = bs*bs; 1313 if (row < 0 || row >= A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range", row); 1314 1315 bn = row/bs; /* Block number */ 1316 bp = row % bs; /* Block Position */ 1317 M = ai[bn+1] - ai[bn]; 1318 *nz = bs*M; 1319 1320 if (v) { 1321 *v = 0; 1322 if (*nz) { 1323 ierr = PetscMalloc1(*nz,v);CHKERRQ(ierr); 1324 for (i=0; i<M; i++) { /* for each block in the block row */ 1325 v_i = *v + i*bs; 1326 aa_i = aa + bs2*(ai[bn] + i); 1327 for (j=bp,k=0; j<bs2; j+=bs,k++) v_i[k] = aa_i[j]; 1328 } 1329 } 1330 } 1331 1332 if (idx) { 1333 *idx = 0; 1334 if (*nz) { 1335 ierr = PetscMalloc1(*nz,idx);CHKERRQ(ierr); 1336 for (i=0; i<M; i++) { /* for each block in the block row */ 1337 idx_i = *idx + i*bs; 1338 itmp = bs*aj[ai[bn] + i]; 1339 for (j=0; j<bs; j++) idx_i[j] = itmp++; 1340 } 1341 } 1342 } 1343 PetscFunctionReturn(0); 1344 } 1345 1346 #undef __FUNCT__ 1347 #define __FUNCT__ "MatGetRow_SeqBAIJ" 1348 PetscErrorCode MatGetRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1349 { 1350 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1351 PetscErrorCode ierr; 1352 1353 PetscFunctionBegin; 1354 ierr = MatGetRow_SeqBAIJ_private(A,row,nz,idx,v,a->i,a->j,a->a);CHKERRQ(ierr); 1355 PetscFunctionReturn(0); 1356 } 1357 1358 #undef __FUNCT__ 1359 #define __FUNCT__ "MatRestoreRow_SeqBAIJ" 1360 PetscErrorCode MatRestoreRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1361 { 1362 PetscErrorCode ierr; 1363 1364 PetscFunctionBegin; 1365 if (idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);} 1366 if (v) {ierr = PetscFree(*v);CHKERRQ(ierr);} 1367 PetscFunctionReturn(0); 1368 } 1369 1370 extern PetscErrorCode MatSetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode); 1371 1372 #undef __FUNCT__ 1373 #define __FUNCT__ "MatTranspose_SeqBAIJ" 1374 PetscErrorCode MatTranspose_SeqBAIJ(Mat A,MatReuse reuse,Mat *B) 1375 { 1376 Mat_SeqBAIJ *a=(Mat_SeqBAIJ*)A->data; 1377 Mat C; 1378 PetscErrorCode ierr; 1379 PetscInt i,j,k,*aj=a->j,*ai=a->i,bs=A->rmap->bs,mbs=a->mbs,nbs=a->nbs,len,*col; 1380 PetscInt *rows,*cols,bs2=a->bs2; 1381 MatScalar *array; 1382 1383 PetscFunctionBegin; 1384 if (reuse == MAT_REUSE_MATRIX && A == *B && mbs != nbs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Square matrix only for in-place"); 1385 if (reuse == MAT_INITIAL_MATRIX || A == *B) { 1386 ierr = PetscCalloc1(1+nbs,&col);CHKERRQ(ierr); 1387 1388 for (i=0; i<ai[mbs]; i++) col[aj[i]] += 1; 1389 ierr = MatCreate(PetscObjectComm((PetscObject)A),&C);CHKERRQ(ierr); 1390 ierr = MatSetSizes(C,A->cmap->n,A->rmap->N,A->cmap->n,A->rmap->N);CHKERRQ(ierr); 1391 ierr = MatSetType(C,((PetscObject)A)->type_name);CHKERRQ(ierr); 1392 ierr = MatSeqBAIJSetPreallocation(C,bs,0,col);CHKERRQ(ierr); 1393 ierr = PetscFree(col);CHKERRQ(ierr); 1394 } else { 1395 C = *B; 1396 } 1397 1398 array = a->a; 1399 ierr = PetscMalloc2(bs,&rows,bs,&cols);CHKERRQ(ierr); 1400 for (i=0; i<mbs; i++) { 1401 cols[0] = i*bs; 1402 for (k=1; k<bs; k++) cols[k] = cols[k-1] + 1; 1403 len = ai[i+1] - ai[i]; 1404 for (j=0; j<len; j++) { 1405 rows[0] = (*aj++)*bs; 1406 for (k=1; k<bs; k++) rows[k] = rows[k-1] + 1; 1407 ierr = MatSetValues_SeqBAIJ(C,bs,rows,bs,cols,array,INSERT_VALUES);CHKERRQ(ierr); 1408 array += bs2; 1409 } 1410 } 1411 ierr = PetscFree2(rows,cols);CHKERRQ(ierr); 1412 1413 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1414 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1415 1416 if (reuse == MAT_INITIAL_MATRIX || *B != A) { 1417 *B = C; 1418 } else { 1419 ierr = MatHeaderMerge(A,&C);CHKERRQ(ierr); 1420 } 1421 PetscFunctionReturn(0); 1422 } 1423 1424 #undef __FUNCT__ 1425 #define __FUNCT__ "MatIsTranspose_SeqBAIJ" 1426 PetscErrorCode MatIsTranspose_SeqBAIJ(Mat A,Mat B,PetscReal tol,PetscBool *f) 1427 { 1428 PetscErrorCode ierr; 1429 Mat Btrans; 1430 1431 PetscFunctionBegin; 1432 *f = PETSC_FALSE; 1433 ierr = MatTranspose_SeqBAIJ(A,MAT_INITIAL_MATRIX,&Btrans);CHKERRQ(ierr); 1434 ierr = MatEqual_SeqBAIJ(B,Btrans,f);CHKERRQ(ierr); 1435 ierr = MatDestroy(&Btrans);CHKERRQ(ierr); 1436 PetscFunctionReturn(0); 1437 } 1438 1439 #undef __FUNCT__ 1440 #define __FUNCT__ "MatView_SeqBAIJ_Binary" 1441 static PetscErrorCode MatView_SeqBAIJ_Binary(Mat A,PetscViewer viewer) 1442 { 1443 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1444 PetscErrorCode ierr; 1445 PetscInt i,*col_lens,bs = A->rmap->bs,count,*jj,j,k,l,bs2=a->bs2; 1446 int fd; 1447 PetscScalar *aa; 1448 FILE *file; 1449 1450 PetscFunctionBegin; 1451 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1452 ierr = PetscMalloc1(4+A->rmap->N,&col_lens);CHKERRQ(ierr); 1453 col_lens[0] = MAT_FILE_CLASSID; 1454 1455 col_lens[1] = A->rmap->N; 1456 col_lens[2] = A->cmap->n; 1457 col_lens[3] = a->nz*bs2; 1458 1459 /* store lengths of each row and write (including header) to file */ 1460 count = 0; 1461 for (i=0; i<a->mbs; i++) { 1462 for (j=0; j<bs; j++) { 1463 col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]); 1464 } 1465 } 1466 ierr = PetscBinaryWrite(fd,col_lens,4+A->rmap->N,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1467 ierr = PetscFree(col_lens);CHKERRQ(ierr); 1468 1469 /* store column indices (zero start index) */ 1470 ierr = PetscMalloc1((a->nz+1)*bs2,&jj);CHKERRQ(ierr); 1471 count = 0; 1472 for (i=0; i<a->mbs; i++) { 1473 for (j=0; j<bs; j++) { 1474 for (k=a->i[i]; k<a->i[i+1]; k++) { 1475 for (l=0; l<bs; l++) { 1476 jj[count++] = bs*a->j[k] + l; 1477 } 1478 } 1479 } 1480 } 1481 ierr = PetscBinaryWrite(fd,jj,bs2*a->nz,PETSC_INT,PETSC_FALSE);CHKERRQ(ierr); 1482 ierr = PetscFree(jj);CHKERRQ(ierr); 1483 1484 /* store nonzero values */ 1485 ierr = PetscMalloc1((a->nz+1)*bs2,&aa);CHKERRQ(ierr); 1486 count = 0; 1487 for (i=0; i<a->mbs; i++) { 1488 for (j=0; j<bs; j++) { 1489 for (k=a->i[i]; k<a->i[i+1]; k++) { 1490 for (l=0; l<bs; l++) { 1491 aa[count++] = a->a[bs2*k + l*bs + j]; 1492 } 1493 } 1494 } 1495 } 1496 ierr = PetscBinaryWrite(fd,aa,bs2*a->nz,PETSC_SCALAR,PETSC_FALSE);CHKERRQ(ierr); 1497 ierr = PetscFree(aa);CHKERRQ(ierr); 1498 1499 ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr); 1500 if (file) { 1501 fprintf(file,"-matload_block_size %d\n",(int)A->rmap->bs); 1502 } 1503 PetscFunctionReturn(0); 1504 } 1505 1506 #undef __FUNCT__ 1507 #define __FUNCT__ "MatView_SeqBAIJ_ASCII" 1508 static PetscErrorCode MatView_SeqBAIJ_ASCII(Mat A,PetscViewer viewer) 1509 { 1510 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1511 PetscErrorCode ierr; 1512 PetscInt i,j,bs = A->rmap->bs,k,l,bs2=a->bs2; 1513 PetscViewerFormat format; 1514 1515 PetscFunctionBegin; 1516 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1517 if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1518 ierr = PetscViewerASCIIPrintf(viewer," block size is %D\n",bs);CHKERRQ(ierr); 1519 } else if (format == PETSC_VIEWER_ASCII_MATLAB) { 1520 const char *matname; 1521 Mat aij; 1522 ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&aij);CHKERRQ(ierr); 1523 ierr = PetscObjectGetName((PetscObject)A,&matname);CHKERRQ(ierr); 1524 ierr = PetscObjectSetName((PetscObject)aij,matname);CHKERRQ(ierr); 1525 ierr = MatView(aij,viewer);CHKERRQ(ierr); 1526 ierr = MatDestroy(&aij);CHKERRQ(ierr); 1527 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1528 PetscFunctionReturn(0); 1529 } else if (format == PETSC_VIEWER_ASCII_COMMON) { 1530 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);CHKERRQ(ierr); 1531 for (i=0; i<a->mbs; i++) { 1532 for (j=0; j<bs; j++) { 1533 ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr); 1534 for (k=a->i[i]; k<a->i[i+1]; k++) { 1535 for (l=0; l<bs; l++) { 1536 #if defined(PETSC_USE_COMPLEX) 1537 if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 1538 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g + %gi) ",bs*a->j[k]+l, 1539 (double)PetscRealPart(a->a[bs2*k + l*bs + j]),(double)PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1540 } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 1541 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g - %gi) ",bs*a->j[k]+l, 1542 (double)PetscRealPart(a->a[bs2*k + l*bs + j]),-(double)PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1543 } else if (PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 1544 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,(double)PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1545 } 1546 #else 1547 if (a->a[bs2*k + l*bs + j] != 0.0) { 1548 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,(double)a->a[bs2*k + l*bs + j]);CHKERRQ(ierr); 1549 } 1550 #endif 1551 } 1552 } 1553 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 1554 } 1555 } 1556 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);CHKERRQ(ierr); 1557 } else { 1558 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);CHKERRQ(ierr); 1559 for (i=0; i<a->mbs; i++) { 1560 for (j=0; j<bs; j++) { 1561 ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr); 1562 for (k=a->i[i]; k<a->i[i+1]; k++) { 1563 for (l=0; l<bs; l++) { 1564 #if defined(PETSC_USE_COMPLEX) 1565 if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0) { 1566 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g + %g i) ",bs*a->j[k]+l, 1567 (double)PetscRealPart(a->a[bs2*k + l*bs + j]),(double)PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1568 } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0) { 1569 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g - %g i) ",bs*a->j[k]+l, 1570 (double)PetscRealPart(a->a[bs2*k + l*bs + j]),-(double)PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1571 } else { 1572 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,(double)PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1573 } 1574 #else 1575 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,(double)a->a[bs2*k + l*bs + j]);CHKERRQ(ierr); 1576 #endif 1577 } 1578 } 1579 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 1580 } 1581 } 1582 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);CHKERRQ(ierr); 1583 } 1584 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1585 PetscFunctionReturn(0); 1586 } 1587 1588 #include <petscdraw.h> 1589 #undef __FUNCT__ 1590 #define __FUNCT__ "MatView_SeqBAIJ_Draw_Zoom" 1591 static PetscErrorCode MatView_SeqBAIJ_Draw_Zoom(PetscDraw draw,void *Aa) 1592 { 1593 Mat A = (Mat) Aa; 1594 Mat_SeqBAIJ *a=(Mat_SeqBAIJ*)A->data; 1595 PetscErrorCode ierr; 1596 PetscInt row,i,j,k,l,mbs=a->mbs,color,bs=A->rmap->bs,bs2=a->bs2; 1597 PetscReal xl,yl,xr,yr,x_l,x_r,y_l,y_r; 1598 MatScalar *aa; 1599 PetscViewer viewer; 1600 PetscViewerFormat format; 1601 1602 PetscFunctionBegin; 1603 ierr = PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer);CHKERRQ(ierr); 1604 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1605 ierr = PetscDrawGetCoordinates(draw,&xl,&yl,&xr,&yr);CHKERRQ(ierr); 1606 1607 /* loop over matrix elements drawing boxes */ 1608 1609 if (format != PETSC_VIEWER_DRAW_CONTOUR) { 1610 ierr = PetscDrawCollectiveBegin(draw);CHKERRQ(ierr); 1611 /* Blue for negative, Cyan for zero and Red for positive */ 1612 color = PETSC_DRAW_BLUE; 1613 for (i=0,row=0; i<mbs; i++,row+=bs) { 1614 for (j=a->i[i]; j<a->i[i+1]; j++) { 1615 y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0; 1616 x_l = a->j[j]*bs; x_r = x_l + 1.0; 1617 aa = a->a + j*bs2; 1618 for (k=0; k<bs; k++) { 1619 for (l=0; l<bs; l++) { 1620 if (PetscRealPart(*aa++) >= 0.) continue; 1621 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 1622 } 1623 } 1624 } 1625 } 1626 color = PETSC_DRAW_CYAN; 1627 for (i=0,row=0; i<mbs; i++,row+=bs) { 1628 for (j=a->i[i]; j<a->i[i+1]; j++) { 1629 y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0; 1630 x_l = a->j[j]*bs; x_r = x_l + 1.0; 1631 aa = a->a + j*bs2; 1632 for (k=0; k<bs; k++) { 1633 for (l=0; l<bs; l++) { 1634 if (PetscRealPart(*aa++) != 0.) continue; 1635 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 1636 } 1637 } 1638 } 1639 } 1640 color = PETSC_DRAW_RED; 1641 for (i=0,row=0; i<mbs; i++,row+=bs) { 1642 for (j=a->i[i]; j<a->i[i+1]; j++) { 1643 y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0; 1644 x_l = a->j[j]*bs; x_r = x_l + 1.0; 1645 aa = a->a + j*bs2; 1646 for (k=0; k<bs; k++) { 1647 for (l=0; l<bs; l++) { 1648 if (PetscRealPart(*aa++) <= 0.) continue; 1649 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 1650 } 1651 } 1652 } 1653 } 1654 ierr = PetscDrawCollectiveEnd(draw);CHKERRQ(ierr); 1655 } else { 1656 /* use contour shading to indicate magnitude of values */ 1657 /* first determine max of all nonzero values */ 1658 PetscReal minv = 0.0, maxv = 0.0; 1659 PetscDraw popup; 1660 1661 for (i=0; i<a->nz*a->bs2; i++) { 1662 if (PetscAbsScalar(a->a[i]) > maxv) maxv = PetscAbsScalar(a->a[i]); 1663 } 1664 if (minv >= maxv) maxv = minv + PETSC_SMALL; 1665 ierr = PetscDrawGetPopup(draw,&popup);CHKERRQ(ierr); 1666 ierr = PetscDrawScalePopup(popup,0.0,maxv);CHKERRQ(ierr); 1667 1668 ierr = PetscDrawCollectiveBegin(draw);CHKERRQ(ierr); 1669 for (i=0,row=0; i<mbs; i++,row+=bs) { 1670 for (j=a->i[i]; j<a->i[i+1]; j++) { 1671 y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0; 1672 x_l = a->j[j]*bs; x_r = x_l + 1.0; 1673 aa = a->a + j*bs2; 1674 for (k=0; k<bs; k++) { 1675 for (l=0; l<bs; l++) { 1676 MatScalar v = *aa++; 1677 color = PetscDrawRealToColor(PetscAbsScalar(v),minv,maxv); 1678 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 1679 } 1680 } 1681 } 1682 } 1683 ierr = PetscDrawCollectiveEnd(draw);CHKERRQ(ierr); 1684 } 1685 PetscFunctionReturn(0); 1686 } 1687 1688 #undef __FUNCT__ 1689 #define __FUNCT__ "MatView_SeqBAIJ_Draw" 1690 static PetscErrorCode MatView_SeqBAIJ_Draw(Mat A,PetscViewer viewer) 1691 { 1692 PetscErrorCode ierr; 1693 PetscReal xl,yl,xr,yr,w,h; 1694 PetscDraw draw; 1695 PetscBool isnull; 1696 1697 PetscFunctionBegin; 1698 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 1699 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); 1700 if (isnull) PetscFunctionReturn(0); 1701 1702 xr = A->cmap->n; yr = A->rmap->N; h = yr/10.0; w = xr/10.0; 1703 xr += w; yr += h; xl = -w; yl = -h; 1704 ierr = PetscDrawSetCoordinates(draw,xl,yl,xr,yr);CHKERRQ(ierr); 1705 ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);CHKERRQ(ierr); 1706 ierr = PetscDrawZoom(draw,MatView_SeqBAIJ_Draw_Zoom,A);CHKERRQ(ierr); 1707 ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",NULL);CHKERRQ(ierr); 1708 ierr = PetscDrawSave(draw);CHKERRQ(ierr); 1709 PetscFunctionReturn(0); 1710 } 1711 1712 #undef __FUNCT__ 1713 #define __FUNCT__ "MatView_SeqBAIJ" 1714 PetscErrorCode MatView_SeqBAIJ(Mat A,PetscViewer viewer) 1715 { 1716 PetscErrorCode ierr; 1717 PetscBool iascii,isbinary,isdraw; 1718 1719 PetscFunctionBegin; 1720 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1721 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1722 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1723 if (iascii) { 1724 ierr = MatView_SeqBAIJ_ASCII(A,viewer);CHKERRQ(ierr); 1725 } else if (isbinary) { 1726 ierr = MatView_SeqBAIJ_Binary(A,viewer);CHKERRQ(ierr); 1727 } else if (isdraw) { 1728 ierr = MatView_SeqBAIJ_Draw(A,viewer);CHKERRQ(ierr); 1729 } else { 1730 Mat B; 1731 ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr); 1732 ierr = MatView(B,viewer);CHKERRQ(ierr); 1733 ierr = MatDestroy(&B);CHKERRQ(ierr); 1734 } 1735 PetscFunctionReturn(0); 1736 } 1737 1738 1739 #undef __FUNCT__ 1740 #define __FUNCT__ "MatGetValues_SeqBAIJ" 1741 PetscErrorCode MatGetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],PetscScalar v[]) 1742 { 1743 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1744 PetscInt *rp,k,low,high,t,row,nrow,i,col,l,*aj = a->j; 1745 PetscInt *ai = a->i,*ailen = a->ilen; 1746 PetscInt brow,bcol,ridx,cidx,bs=A->rmap->bs,bs2=a->bs2; 1747 MatScalar *ap,*aa = a->a; 1748 1749 PetscFunctionBegin; 1750 for (k=0; k<m; k++) { /* loop over rows */ 1751 row = im[k]; brow = row/bs; 1752 if (row < 0) {v += n; continue;} /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); */ 1753 if (row >= A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D too large", row); 1754 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; 1755 nrow = ailen[brow]; 1756 for (l=0; l<n; l++) { /* loop over columns */ 1757 if (in[l] < 0) {v++; continue;} /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); */ 1758 if (in[l] >= A->cmap->n) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column %D too large", in[l]); 1759 col = in[l]; 1760 bcol = col/bs; 1761 cidx = col%bs; 1762 ridx = row%bs; 1763 high = nrow; 1764 low = 0; /* assume unsorted */ 1765 while (high-low > 5) { 1766 t = (low+high)/2; 1767 if (rp[t] > bcol) high = t; 1768 else low = t; 1769 } 1770 for (i=low; i<high; i++) { 1771 if (rp[i] > bcol) break; 1772 if (rp[i] == bcol) { 1773 *v++ = ap[bs2*i+bs*cidx+ridx]; 1774 goto finished; 1775 } 1776 } 1777 *v++ = 0.0; 1778 finished:; 1779 } 1780 } 1781 PetscFunctionReturn(0); 1782 } 1783 1784 #undef __FUNCT__ 1785 #define __FUNCT__ "MatSetValuesBlocked_SeqBAIJ" 1786 PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is) 1787 { 1788 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1789 PetscInt *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,lastcol = -1; 1790 PetscInt *imax=a->imax,*ai=a->i,*ailen=a->ilen; 1791 PetscErrorCode ierr; 1792 PetscInt *aj =a->j,nonew=a->nonew,bs2=a->bs2,bs=A->rmap->bs,stepval; 1793 PetscBool roworiented=a->roworiented; 1794 const PetscScalar *value = v; 1795 MatScalar *ap,*aa = a->a,*bap; 1796 1797 PetscFunctionBegin; 1798 if (roworiented) { 1799 stepval = (n-1)*bs; 1800 } else { 1801 stepval = (m-1)*bs; 1802 } 1803 for (k=0; k<m; k++) { /* loop over added rows */ 1804 row = im[k]; 1805 if (row < 0) continue; 1806 #if defined(PETSC_USE_DEBUG) 1807 if (row >= a->mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Block row index too large %D max %D",row,a->mbs-1); 1808 #endif 1809 rp = aj + ai[row]; 1810 ap = aa + bs2*ai[row]; 1811 rmax = imax[row]; 1812 nrow = ailen[row]; 1813 low = 0; 1814 high = nrow; 1815 for (l=0; l<n; l++) { /* loop over added columns */ 1816 if (in[l] < 0) continue; 1817 #if defined(PETSC_USE_DEBUG) 1818 if (in[l] >= a->nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Block column index too large %D max %D",in[l],a->nbs-1); 1819 #endif 1820 col = in[l]; 1821 if (roworiented) { 1822 value = v + (k*(stepval+bs) + l)*bs; 1823 } else { 1824 value = v + (l*(stepval+bs) + k)*bs; 1825 } 1826 if (col <= lastcol) low = 0; 1827 else high = nrow; 1828 lastcol = col; 1829 while (high-low > 7) { 1830 t = (low+high)/2; 1831 if (rp[t] > col) high = t; 1832 else low = t; 1833 } 1834 for (i=low; i<high; i++) { 1835 if (rp[i] > col) break; 1836 if (rp[i] == col) { 1837 bap = ap + bs2*i; 1838 if (roworiented) { 1839 if (is == ADD_VALUES) { 1840 for (ii=0; ii<bs; ii++,value+=stepval) { 1841 for (jj=ii; jj<bs2; jj+=bs) { 1842 bap[jj] += *value++; 1843 } 1844 } 1845 } else { 1846 for (ii=0; ii<bs; ii++,value+=stepval) { 1847 for (jj=ii; jj<bs2; jj+=bs) { 1848 bap[jj] = *value++; 1849 } 1850 } 1851 } 1852 } else { 1853 if (is == ADD_VALUES) { 1854 for (ii=0; ii<bs; ii++,value+=bs+stepval) { 1855 for (jj=0; jj<bs; jj++) { 1856 bap[jj] += value[jj]; 1857 } 1858 bap += bs; 1859 } 1860 } else { 1861 for (ii=0; ii<bs; ii++,value+=bs+stepval) { 1862 for (jj=0; jj<bs; jj++) { 1863 bap[jj] = value[jj]; 1864 } 1865 bap += bs; 1866 } 1867 } 1868 } 1869 goto noinsert2; 1870 } 1871 } 1872 if (nonew == 1) goto noinsert2; 1873 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new blocked index new nonzero block (%D, %D) in the matrix", row, col); 1874 MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,row,col,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar); 1875 N = nrow++ - 1; high++; 1876 /* shift up all the later entries in this row */ 1877 for (ii=N; ii>=i; ii--) { 1878 rp[ii+1] = rp[ii]; 1879 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); 1880 } 1881 if (N >= i) { 1882 ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr); 1883 } 1884 rp[i] = col; 1885 bap = ap + bs2*i; 1886 if (roworiented) { 1887 for (ii=0; ii<bs; ii++,value+=stepval) { 1888 for (jj=ii; jj<bs2; jj+=bs) { 1889 bap[jj] = *value++; 1890 } 1891 } 1892 } else { 1893 for (ii=0; ii<bs; ii++,value+=stepval) { 1894 for (jj=0; jj<bs; jj++) { 1895 *bap++ = *value++; 1896 } 1897 } 1898 } 1899 noinsert2:; 1900 low = i; 1901 } 1902 ailen[row] = nrow; 1903 } 1904 PetscFunctionReturn(0); 1905 } 1906 1907 #undef __FUNCT__ 1908 #define __FUNCT__ "MatAssemblyEnd_SeqBAIJ" 1909 PetscErrorCode MatAssemblyEnd_SeqBAIJ(Mat A,MatAssemblyType mode) 1910 { 1911 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1912 PetscInt fshift = 0,i,j,*ai = a->i,*aj = a->j,*imax = a->imax; 1913 PetscInt m = A->rmap->N,*ip,N,*ailen = a->ilen; 1914 PetscErrorCode ierr; 1915 PetscInt mbs = a->mbs,bs2 = a->bs2,rmax = 0; 1916 MatScalar *aa = a->a,*ap; 1917 PetscReal ratio=0.6; 1918 1919 PetscFunctionBegin; 1920 if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0); 1921 1922 if (m) rmax = ailen[0]; 1923 for (i=1; i<mbs; i++) { 1924 /* move each row back by the amount of empty slots (fshift) before it*/ 1925 fshift += imax[i-1] - ailen[i-1]; 1926 rmax = PetscMax(rmax,ailen[i]); 1927 if (fshift) { 1928 ip = aj + ai[i]; ap = aa + bs2*ai[i]; 1929 N = ailen[i]; 1930 for (j=0; j<N; j++) { 1931 ip[j-fshift] = ip[j]; 1932 1933 ierr = PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr); 1934 } 1935 } 1936 ai[i] = ai[i-1] + ailen[i-1]; 1937 } 1938 if (mbs) { 1939 fshift += imax[mbs-1] - ailen[mbs-1]; 1940 ai[mbs] = ai[mbs-1] + ailen[mbs-1]; 1941 } 1942 1943 /* reset ilen and imax for each row */ 1944 a->nonzerorowcnt = 0; 1945 for (i=0; i<mbs; i++) { 1946 ailen[i] = imax[i] = ai[i+1] - ai[i]; 1947 a->nonzerorowcnt += ((ai[i+1] - ai[i]) > 0); 1948 } 1949 a->nz = ai[mbs]; 1950 1951 /* diagonals may have moved, so kill the diagonal pointers */ 1952 a->idiagvalid = PETSC_FALSE; 1953 if (fshift && a->diag) { 1954 ierr = PetscFree(a->diag);CHKERRQ(ierr); 1955 ierr = PetscLogObjectMemory((PetscObject)A,-(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 1956 a->diag = 0; 1957 } 1958 if (fshift && a->nounused == -1) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB, "Unused space detected in matrix: %D X %D block size %D, %D unneeded", m, A->cmap->n, A->rmap->bs, fshift*bs2); 1959 ierr = PetscInfo5(A,"Matrix size: %D X %D, block size %D; storage space: %D unneeded, %D used\n",m,A->cmap->n,A->rmap->bs,fshift*bs2,a->nz*bs2);CHKERRQ(ierr); 1960 ierr = PetscInfo1(A,"Number of mallocs during MatSetValues is %D\n",a->reallocs);CHKERRQ(ierr); 1961 ierr = PetscInfo1(A,"Most nonzeros blocks in any row is %D\n",rmax);CHKERRQ(ierr); 1962 1963 A->info.mallocs += a->reallocs; 1964 a->reallocs = 0; 1965 A->info.nz_unneeded = (PetscReal)fshift*bs2; 1966 a->rmax = rmax; 1967 1968 ierr = MatCheckCompressedRow(A,a->nonzerorowcnt,&a->compressedrow,a->i,mbs,ratio);CHKERRQ(ierr); 1969 PetscFunctionReturn(0); 1970 } 1971 1972 /* 1973 This function returns an array of flags which indicate the locations of contiguous 1974 blocks that should be zeroed. for eg: if bs = 3 and is = [0,1,2,3,5,6,7,8,9] 1975 then the resulting sizes = [3,1,1,3,1] correspondig to sets [(0,1,2),(3),(5),(6,7,8),(9)] 1976 Assume: sizes should be long enough to hold all the values. 1977 */ 1978 #undef __FUNCT__ 1979 #define __FUNCT__ "MatZeroRows_SeqBAIJ_Check_Blocks" 1980 static PetscErrorCode MatZeroRows_SeqBAIJ_Check_Blocks(PetscInt idx[],PetscInt n,PetscInt bs,PetscInt sizes[], PetscInt *bs_max) 1981 { 1982 PetscInt i,j,k,row; 1983 PetscBool flg; 1984 1985 PetscFunctionBegin; 1986 for (i=0,j=0; i<n; j++) { 1987 row = idx[i]; 1988 if (row%bs!=0) { /* Not the begining of a block */ 1989 sizes[j] = 1; 1990 i++; 1991 } else if (i+bs > n) { /* complete block doesn't exist (at idx end) */ 1992 sizes[j] = 1; /* Also makes sure atleast 'bs' values exist for next else */ 1993 i++; 1994 } else { /* Begining of the block, so check if the complete block exists */ 1995 flg = PETSC_TRUE; 1996 for (k=1; k<bs; k++) { 1997 if (row+k != idx[i+k]) { /* break in the block */ 1998 flg = PETSC_FALSE; 1999 break; 2000 } 2001 } 2002 if (flg) { /* No break in the bs */ 2003 sizes[j] = bs; 2004 i += bs; 2005 } else { 2006 sizes[j] = 1; 2007 i++; 2008 } 2009 } 2010 } 2011 *bs_max = j; 2012 PetscFunctionReturn(0); 2013 } 2014 2015 #undef __FUNCT__ 2016 #define __FUNCT__ "MatZeroRows_SeqBAIJ" 2017 PetscErrorCode MatZeroRows_SeqBAIJ(Mat A,PetscInt is_n,const PetscInt is_idx[],PetscScalar diag,Vec x, Vec b) 2018 { 2019 Mat_SeqBAIJ *baij=(Mat_SeqBAIJ*)A->data; 2020 PetscErrorCode ierr; 2021 PetscInt i,j,k,count,*rows; 2022 PetscInt bs=A->rmap->bs,bs2=baij->bs2,*sizes,row,bs_max; 2023 PetscScalar zero = 0.0; 2024 MatScalar *aa; 2025 const PetscScalar *xx; 2026 PetscScalar *bb; 2027 2028 PetscFunctionBegin; 2029 /* fix right hand side if needed */ 2030 if (x && b) { 2031 ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr); 2032 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 2033 for (i=0; i<is_n; i++) { 2034 bb[is_idx[i]] = diag*xx[is_idx[i]]; 2035 } 2036 ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr); 2037 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 2038 } 2039 2040 /* Make a copy of the IS and sort it */ 2041 /* allocate memory for rows,sizes */ 2042 ierr = PetscMalloc2(is_n,&rows,2*is_n,&sizes);CHKERRQ(ierr); 2043 2044 /* copy IS values to rows, and sort them */ 2045 for (i=0; i<is_n; i++) rows[i] = is_idx[i]; 2046 ierr = PetscSortInt(is_n,rows);CHKERRQ(ierr); 2047 2048 if (baij->keepnonzeropattern) { 2049 for (i=0; i<is_n; i++) sizes[i] = 1; 2050 bs_max = is_n; 2051 } else { 2052 ierr = MatZeroRows_SeqBAIJ_Check_Blocks(rows,is_n,bs,sizes,&bs_max);CHKERRQ(ierr); 2053 A->nonzerostate++; 2054 } 2055 2056 for (i=0,j=0; i<bs_max; j+=sizes[i],i++) { 2057 row = rows[j]; 2058 if (row < 0 || row > A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"row %D out of range",row); 2059 count = (baij->i[row/bs +1] - baij->i[row/bs])*bs; 2060 aa = ((MatScalar*)(baij->a)) + baij->i[row/bs]*bs2 + (row%bs); 2061 if (sizes[i] == bs && !baij->keepnonzeropattern) { 2062 if (diag != (PetscScalar)0.0) { 2063 if (baij->ilen[row/bs] > 0) { 2064 baij->ilen[row/bs] = 1; 2065 baij->j[baij->i[row/bs]] = row/bs; 2066 2067 ierr = PetscMemzero(aa,count*bs*sizeof(MatScalar));CHKERRQ(ierr); 2068 } 2069 /* Now insert all the diagonal values for this bs */ 2070 for (k=0; k<bs; k++) { 2071 ierr = (*A->ops->setvalues)(A,1,rows+j+k,1,rows+j+k,&diag,INSERT_VALUES);CHKERRQ(ierr); 2072 } 2073 } else { /* (diag == 0.0) */ 2074 baij->ilen[row/bs] = 0; 2075 } /* end (diag == 0.0) */ 2076 } else { /* (sizes[i] != bs) */ 2077 #if defined(PETSC_USE_DEBUG) 2078 if (sizes[i] != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal Error. Value should be 1"); 2079 #endif 2080 for (k=0; k<count; k++) { 2081 aa[0] = zero; 2082 aa += bs; 2083 } 2084 if (diag != (PetscScalar)0.0) { 2085 ierr = (*A->ops->setvalues)(A,1,rows+j,1,rows+j,&diag,INSERT_VALUES);CHKERRQ(ierr); 2086 } 2087 } 2088 } 2089 2090 ierr = PetscFree2(rows,sizes);CHKERRQ(ierr); 2091 ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2092 PetscFunctionReturn(0); 2093 } 2094 2095 #undef __FUNCT__ 2096 #define __FUNCT__ "MatZeroRowsColumns_SeqBAIJ" 2097 PetscErrorCode MatZeroRowsColumns_SeqBAIJ(Mat A,PetscInt is_n,const PetscInt is_idx[],PetscScalar diag,Vec x, Vec b) 2098 { 2099 Mat_SeqBAIJ *baij=(Mat_SeqBAIJ*)A->data; 2100 PetscErrorCode ierr; 2101 PetscInt i,j,k,count; 2102 PetscInt bs =A->rmap->bs,bs2=baij->bs2,row,col; 2103 PetscScalar zero = 0.0; 2104 MatScalar *aa; 2105 const PetscScalar *xx; 2106 PetscScalar *bb; 2107 PetscBool *zeroed,vecs = PETSC_FALSE; 2108 2109 PetscFunctionBegin; 2110 /* fix right hand side if needed */ 2111 if (x && b) { 2112 ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr); 2113 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 2114 vecs = PETSC_TRUE; 2115 } 2116 2117 /* zero the columns */ 2118 ierr = PetscCalloc1(A->rmap->n,&zeroed);CHKERRQ(ierr); 2119 for (i=0; i<is_n; i++) { 2120 if (is_idx[i] < 0 || is_idx[i] >= A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"row %D out of range",is_idx[i]); 2121 zeroed[is_idx[i]] = PETSC_TRUE; 2122 } 2123 for (i=0; i<A->rmap->N; i++) { 2124 if (!zeroed[i]) { 2125 row = i/bs; 2126 for (j=baij->i[row]; j<baij->i[row+1]; j++) { 2127 for (k=0; k<bs; k++) { 2128 col = bs*baij->j[j] + k; 2129 if (zeroed[col]) { 2130 aa = ((MatScalar*)(baij->a)) + j*bs2 + (i%bs) + bs*k; 2131 if (vecs) bb[i] -= aa[0]*xx[col]; 2132 aa[0] = 0.0; 2133 } 2134 } 2135 } 2136 } else if (vecs) bb[i] = diag*xx[i]; 2137 } 2138 ierr = PetscFree(zeroed);CHKERRQ(ierr); 2139 if (vecs) { 2140 ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr); 2141 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 2142 } 2143 2144 /* zero the rows */ 2145 for (i=0; i<is_n; i++) { 2146 row = is_idx[i]; 2147 count = (baij->i[row/bs +1] - baij->i[row/bs])*bs; 2148 aa = ((MatScalar*)(baij->a)) + baij->i[row/bs]*bs2 + (row%bs); 2149 for (k=0; k<count; k++) { 2150 aa[0] = zero; 2151 aa += bs; 2152 } 2153 if (diag != (PetscScalar)0.0) { 2154 ierr = (*A->ops->setvalues)(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr); 2155 } 2156 } 2157 ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2158 PetscFunctionReturn(0); 2159 } 2160 2161 #undef __FUNCT__ 2162 #define __FUNCT__ "MatSetValues_SeqBAIJ" 2163 PetscErrorCode MatSetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is) 2164 { 2165 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2166 PetscInt *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,lastcol = -1; 2167 PetscInt *imax=a->imax,*ai=a->i,*ailen=a->ilen; 2168 PetscInt *aj =a->j,nonew=a->nonew,bs=A->rmap->bs,brow,bcol; 2169 PetscErrorCode ierr; 2170 PetscInt ridx,cidx,bs2=a->bs2; 2171 PetscBool roworiented=a->roworiented; 2172 MatScalar *ap,value,*aa=a->a,*bap; 2173 2174 PetscFunctionBegin; 2175 for (k=0; k<m; k++) { /* loop over added rows */ 2176 row = im[k]; 2177 brow = row/bs; 2178 if (row < 0) continue; 2179 #if defined(PETSC_USE_DEBUG) 2180 if (row >= A->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,A->rmap->N-1); 2181 #endif 2182 rp = aj + ai[brow]; 2183 ap = aa + bs2*ai[brow]; 2184 rmax = imax[brow]; 2185 nrow = ailen[brow]; 2186 low = 0; 2187 high = nrow; 2188 for (l=0; l<n; l++) { /* loop over added columns */ 2189 if (in[l] < 0) continue; 2190 #if defined(PETSC_USE_DEBUG) 2191 if (in[l] >= A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],A->cmap->n-1); 2192 #endif 2193 col = in[l]; bcol = col/bs; 2194 ridx = row % bs; cidx = col % bs; 2195 if (roworiented) { 2196 value = v[l + k*n]; 2197 } else { 2198 value = v[k + l*m]; 2199 } 2200 if (col <= lastcol) low = 0; else high = nrow; 2201 lastcol = col; 2202 while (high-low > 7) { 2203 t = (low+high)/2; 2204 if (rp[t] > bcol) high = t; 2205 else low = t; 2206 } 2207 for (i=low; i<high; i++) { 2208 if (rp[i] > bcol) break; 2209 if (rp[i] == bcol) { 2210 bap = ap + bs2*i + bs*cidx + ridx; 2211 if (is == ADD_VALUES) *bap += value; 2212 else *bap = value; 2213 goto noinsert1; 2214 } 2215 } 2216 if (nonew == 1) goto noinsert1; 2217 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col); 2218 MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar); 2219 N = nrow++ - 1; high++; 2220 /* shift up all the later entries in this row */ 2221 for (ii=N; ii>=i; ii--) { 2222 rp[ii+1] = rp[ii]; 2223 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); 2224 } 2225 if (N>=i) { 2226 ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr); 2227 } 2228 rp[i] = bcol; 2229 ap[bs2*i + bs*cidx + ridx] = value; 2230 a->nz++; 2231 A->nonzerostate++; 2232 noinsert1:; 2233 low = i; 2234 } 2235 ailen[brow] = nrow; 2236 } 2237 PetscFunctionReturn(0); 2238 } 2239 2240 #undef __FUNCT__ 2241 #define __FUNCT__ "MatILUFactor_SeqBAIJ" 2242 PetscErrorCode MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,const MatFactorInfo *info) 2243 { 2244 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)inA->data; 2245 Mat outA; 2246 PetscErrorCode ierr; 2247 PetscBool row_identity,col_identity; 2248 2249 PetscFunctionBegin; 2250 if (info->levels != 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only levels = 0 supported for in-place ILU"); 2251 ierr = ISIdentity(row,&row_identity);CHKERRQ(ierr); 2252 ierr = ISIdentity(col,&col_identity);CHKERRQ(ierr); 2253 if (!row_identity || !col_identity) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for in-place ILU"); 2254 2255 outA = inA; 2256 inA->factortype = MAT_FACTOR_LU; 2257 ierr = PetscFree(inA->solvertype);CHKERRQ(ierr); 2258 ierr = PetscStrallocpy(MATSOLVERPETSC,&inA->solvertype);CHKERRQ(ierr); 2259 2260 ierr = MatMarkDiagonal_SeqBAIJ(inA);CHKERRQ(ierr); 2261 2262 ierr = PetscObjectReference((PetscObject)row);CHKERRQ(ierr); 2263 ierr = ISDestroy(&a->row);CHKERRQ(ierr); 2264 a->row = row; 2265 ierr = PetscObjectReference((PetscObject)col);CHKERRQ(ierr); 2266 ierr = ISDestroy(&a->col);CHKERRQ(ierr); 2267 a->col = col; 2268 2269 /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */ 2270 ierr = ISDestroy(&a->icol);CHKERRQ(ierr); 2271 ierr = ISInvertPermutation(col,PETSC_DECIDE,&a->icol);CHKERRQ(ierr); 2272 ierr = PetscLogObjectParent((PetscObject)inA,(PetscObject)a->icol);CHKERRQ(ierr); 2273 2274 ierr = MatSeqBAIJSetNumericFactorization_inplace(inA,(PetscBool)(row_identity && col_identity));CHKERRQ(ierr); 2275 if (!a->solve_work) { 2276 ierr = PetscMalloc1(inA->rmap->N+inA->rmap->bs,&a->solve_work);CHKERRQ(ierr); 2277 ierr = PetscLogObjectMemory((PetscObject)inA,(inA->rmap->N+inA->rmap->bs)*sizeof(PetscScalar));CHKERRQ(ierr); 2278 } 2279 ierr = MatLUFactorNumeric(outA,inA,info);CHKERRQ(ierr); 2280 PetscFunctionReturn(0); 2281 } 2282 2283 #undef __FUNCT__ 2284 #define __FUNCT__ "MatSeqBAIJSetColumnIndices_SeqBAIJ" 2285 PetscErrorCode MatSeqBAIJSetColumnIndices_SeqBAIJ(Mat mat,PetscInt *indices) 2286 { 2287 Mat_SeqBAIJ *baij = (Mat_SeqBAIJ*)mat->data; 2288 PetscInt i,nz,mbs; 2289 2290 PetscFunctionBegin; 2291 nz = baij->maxnz; 2292 mbs = baij->mbs; 2293 for (i=0; i<nz; i++) { 2294 baij->j[i] = indices[i]; 2295 } 2296 baij->nz = nz; 2297 for (i=0; i<mbs; i++) { 2298 baij->ilen[i] = baij->imax[i]; 2299 } 2300 PetscFunctionReturn(0); 2301 } 2302 2303 #undef __FUNCT__ 2304 #define __FUNCT__ "MatSeqBAIJSetColumnIndices" 2305 /*@ 2306 MatSeqBAIJSetColumnIndices - Set the column indices for all the rows 2307 in the matrix. 2308 2309 Input Parameters: 2310 + mat - the SeqBAIJ matrix 2311 - indices - the column indices 2312 2313 Level: advanced 2314 2315 Notes: 2316 This can be called if you have precomputed the nonzero structure of the 2317 matrix and want to provide it to the matrix object to improve the performance 2318 of the MatSetValues() operation. 2319 2320 You MUST have set the correct numbers of nonzeros per row in the call to 2321 MatCreateSeqBAIJ(), and the columns indices MUST be sorted. 2322 2323 MUST be called before any calls to MatSetValues(); 2324 2325 @*/ 2326 PetscErrorCode MatSeqBAIJSetColumnIndices(Mat mat,PetscInt *indices) 2327 { 2328 PetscErrorCode ierr; 2329 2330 PetscFunctionBegin; 2331 PetscValidHeaderSpecific(mat,MAT_CLASSID,1); 2332 PetscValidPointer(indices,2); 2333 ierr = PetscUseMethod(mat,"MatSeqBAIJSetColumnIndices_C",(Mat,PetscInt*),(mat,indices));CHKERRQ(ierr); 2334 PetscFunctionReturn(0); 2335 } 2336 2337 #undef __FUNCT__ 2338 #define __FUNCT__ "MatGetRowMaxAbs_SeqBAIJ" 2339 PetscErrorCode MatGetRowMaxAbs_SeqBAIJ(Mat A,Vec v,PetscInt idx[]) 2340 { 2341 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2342 PetscErrorCode ierr; 2343 PetscInt i,j,n,row,bs,*ai,*aj,mbs; 2344 PetscReal atmp; 2345 PetscScalar *x,zero = 0.0; 2346 MatScalar *aa; 2347 PetscInt ncols,brow,krow,kcol; 2348 2349 PetscFunctionBegin; 2350 if (A->factortype) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 2351 bs = A->rmap->bs; 2352 aa = a->a; 2353 ai = a->i; 2354 aj = a->j; 2355 mbs = a->mbs; 2356 2357 ierr = VecSet(v,zero);CHKERRQ(ierr); 2358 ierr = VecGetArray(v,&x);CHKERRQ(ierr); 2359 ierr = VecGetLocalSize(v,&n);CHKERRQ(ierr); 2360 if (n != A->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Nonconforming matrix and vector"); 2361 for (i=0; i<mbs; i++) { 2362 ncols = ai[1] - ai[0]; ai++; 2363 brow = bs*i; 2364 for (j=0; j<ncols; j++) { 2365 for (kcol=0; kcol<bs; kcol++) { 2366 for (krow=0; krow<bs; krow++) { 2367 atmp = PetscAbsScalar(*aa);aa++; 2368 row = brow + krow; /* row index */ 2369 if (PetscAbsScalar(x[row]) < atmp) {x[row] = atmp; if (idx) idx[row] = bs*(*aj) + kcol;} 2370 } 2371 } 2372 aj++; 2373 } 2374 } 2375 ierr = VecRestoreArray(v,&x);CHKERRQ(ierr); 2376 PetscFunctionReturn(0); 2377 } 2378 2379 #undef __FUNCT__ 2380 #define __FUNCT__ "MatCopy_SeqBAIJ" 2381 PetscErrorCode MatCopy_SeqBAIJ(Mat A,Mat B,MatStructure str) 2382 { 2383 PetscErrorCode ierr; 2384 2385 PetscFunctionBegin; 2386 /* If the two matrices have the same copy implementation, use fast copy. */ 2387 if (str == SAME_NONZERO_PATTERN && (A->ops->copy == B->ops->copy)) { 2388 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2389 Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)B->data; 2390 PetscInt ambs=a->mbs,bmbs=b->mbs,abs=A->rmap->bs,bbs=B->rmap->bs,bs2=abs*abs; 2391 2392 if (a->i[ambs] != b->i[bmbs]) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Number of nonzero blocks in matrices A %D and B %D are different",a->i[ambs],b->i[bmbs]); 2393 if (abs != bbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Block size A %D and B %D are different",abs,bbs); 2394 ierr = PetscMemcpy(b->a,a->a,(bs2*a->i[ambs])*sizeof(PetscScalar));CHKERRQ(ierr); 2395 } else { 2396 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 2397 } 2398 PetscFunctionReturn(0); 2399 } 2400 2401 #undef __FUNCT__ 2402 #define __FUNCT__ "MatSetUp_SeqBAIJ" 2403 PetscErrorCode MatSetUp_SeqBAIJ(Mat A) 2404 { 2405 PetscErrorCode ierr; 2406 2407 PetscFunctionBegin; 2408 ierr = MatSeqBAIJSetPreallocation(A,A->rmap->bs,PETSC_DEFAULT,0);CHKERRQ(ierr); 2409 PetscFunctionReturn(0); 2410 } 2411 2412 #undef __FUNCT__ 2413 #define __FUNCT__ "MatSeqBAIJGetArray_SeqBAIJ" 2414 PetscErrorCode MatSeqBAIJGetArray_SeqBAIJ(Mat A,PetscScalar *array[]) 2415 { 2416 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2417 2418 PetscFunctionBegin; 2419 *array = a->a; 2420 PetscFunctionReturn(0); 2421 } 2422 2423 #undef __FUNCT__ 2424 #define __FUNCT__ "MatSeqBAIJRestoreArray_SeqBAIJ" 2425 PetscErrorCode MatSeqBAIJRestoreArray_SeqBAIJ(Mat A,PetscScalar *array[]) 2426 { 2427 PetscFunctionBegin; 2428 PetscFunctionReturn(0); 2429 } 2430 2431 #undef __FUNCT__ 2432 #define __FUNCT__ "MatAXPYGetPreallocation_SeqBAIJ" 2433 PetscErrorCode MatAXPYGetPreallocation_SeqBAIJ(Mat Y,Mat X,PetscInt *nnz) 2434 { 2435 PetscInt bs = Y->rmap->bs,mbs = Y->rmap->N/bs; 2436 Mat_SeqBAIJ *x = (Mat_SeqBAIJ*)X->data; 2437 Mat_SeqBAIJ *y = (Mat_SeqBAIJ*)Y->data; 2438 PetscErrorCode ierr; 2439 2440 PetscFunctionBegin; 2441 /* Set the number of nonzeros in the new matrix */ 2442 ierr = MatAXPYGetPreallocation_SeqX_private(mbs,x->i,x->j,y->i,y->j,nnz);CHKERRQ(ierr); 2443 PetscFunctionReturn(0); 2444 } 2445 2446 #undef __FUNCT__ 2447 #define __FUNCT__ "MatAXPY_SeqBAIJ" 2448 PetscErrorCode MatAXPY_SeqBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 2449 { 2450 Mat_SeqBAIJ *x = (Mat_SeqBAIJ*)X->data,*y = (Mat_SeqBAIJ*)Y->data; 2451 PetscErrorCode ierr; 2452 PetscInt bs=Y->rmap->bs,bs2=bs*bs; 2453 PetscBLASInt one=1; 2454 2455 PetscFunctionBegin; 2456 if (str == SAME_NONZERO_PATTERN) { 2457 PetscScalar alpha = a; 2458 PetscBLASInt bnz; 2459 ierr = PetscBLASIntCast(x->nz*bs2,&bnz);CHKERRQ(ierr); 2460 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one)); 2461 ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr); 2462 } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */ 2463 ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr); 2464 } else { 2465 Mat B; 2466 PetscInt *nnz; 2467 if (bs != X->rmap->bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrices must have same block size"); 2468 ierr = PetscMalloc1(Y->rmap->N,&nnz);CHKERRQ(ierr); 2469 ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr); 2470 ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr); 2471 ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr); 2472 ierr = MatSetBlockSizesFromMats(B,Y,Y);CHKERRQ(ierr); 2473 ierr = MatSetType(B,(MatType) ((PetscObject)Y)->type_name);CHKERRQ(ierr); 2474 ierr = MatAXPYGetPreallocation_SeqBAIJ(Y,X,nnz);CHKERRQ(ierr); 2475 ierr = MatSeqBAIJSetPreallocation(B,bs,0,nnz);CHKERRQ(ierr); 2476 ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr); 2477 ierr = MatHeaderReplace(Y,&B);CHKERRQ(ierr); 2478 ierr = PetscFree(nnz);CHKERRQ(ierr); 2479 } 2480 PetscFunctionReturn(0); 2481 } 2482 2483 #undef __FUNCT__ 2484 #define __FUNCT__ "MatRealPart_SeqBAIJ" 2485 PetscErrorCode MatRealPart_SeqBAIJ(Mat A) 2486 { 2487 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2488 PetscInt i,nz = a->bs2*a->i[a->mbs]; 2489 MatScalar *aa = a->a; 2490 2491 PetscFunctionBegin; 2492 for (i=0; i<nz; i++) aa[i] = PetscRealPart(aa[i]); 2493 PetscFunctionReturn(0); 2494 } 2495 2496 #undef __FUNCT__ 2497 #define __FUNCT__ "MatImaginaryPart_SeqBAIJ" 2498 PetscErrorCode MatImaginaryPart_SeqBAIJ(Mat A) 2499 { 2500 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2501 PetscInt i,nz = a->bs2*a->i[a->mbs]; 2502 MatScalar *aa = a->a; 2503 2504 PetscFunctionBegin; 2505 for (i=0; i<nz; i++) aa[i] = PetscImaginaryPart(aa[i]); 2506 PetscFunctionReturn(0); 2507 } 2508 2509 #undef __FUNCT__ 2510 #define __FUNCT__ "MatGetColumnIJ_SeqBAIJ" 2511 /* 2512 Code almost idential to MatGetColumnIJ_SeqAIJ() should share common code 2513 */ 2514 PetscErrorCode MatGetColumnIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *nn,const PetscInt *ia[],const PetscInt *ja[],PetscBool *done) 2515 { 2516 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2517 PetscErrorCode ierr; 2518 PetscInt bs = A->rmap->bs,i,*collengths,*cia,*cja,n = A->cmap->n/bs,m = A->rmap->n/bs; 2519 PetscInt nz = a->i[m],row,*jj,mr,col; 2520 2521 PetscFunctionBegin; 2522 *nn = n; 2523 if (!ia) PetscFunctionReturn(0); 2524 if (symmetric) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not for BAIJ matrices"); 2525 else { 2526 ierr = PetscCalloc1(n+1,&collengths);CHKERRQ(ierr); 2527 ierr = PetscMalloc1(n+1,&cia);CHKERRQ(ierr); 2528 ierr = PetscMalloc1(nz+1,&cja);CHKERRQ(ierr); 2529 jj = a->j; 2530 for (i=0; i<nz; i++) { 2531 collengths[jj[i]]++; 2532 } 2533 cia[0] = oshift; 2534 for (i=0; i<n; i++) { 2535 cia[i+1] = cia[i] + collengths[i]; 2536 } 2537 ierr = PetscMemzero(collengths,n*sizeof(PetscInt));CHKERRQ(ierr); 2538 jj = a->j; 2539 for (row=0; row<m; row++) { 2540 mr = a->i[row+1] - a->i[row]; 2541 for (i=0; i<mr; i++) { 2542 col = *jj++; 2543 2544 cja[cia[col] + collengths[col]++ - oshift] = row + oshift; 2545 } 2546 } 2547 ierr = PetscFree(collengths);CHKERRQ(ierr); 2548 *ia = cia; *ja = cja; 2549 } 2550 PetscFunctionReturn(0); 2551 } 2552 2553 #undef __FUNCT__ 2554 #define __FUNCT__ "MatRestoreColumnIJ_SeqBAIJ" 2555 PetscErrorCode MatRestoreColumnIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *n,const PetscInt *ia[],const PetscInt *ja[],PetscBool *done) 2556 { 2557 PetscErrorCode ierr; 2558 2559 PetscFunctionBegin; 2560 if (!ia) PetscFunctionReturn(0); 2561 ierr = PetscFree(*ia);CHKERRQ(ierr); 2562 ierr = PetscFree(*ja);CHKERRQ(ierr); 2563 PetscFunctionReturn(0); 2564 } 2565 2566 /* 2567 MatGetColumnIJ_SeqBAIJ_Color() and MatRestoreColumnIJ_SeqBAIJ_Color() are customized from 2568 MatGetColumnIJ_SeqBAIJ() and MatRestoreColumnIJ_SeqBAIJ() by adding an output 2569 spidx[], index of a->a, to be used in MatTransposeColoringCreate() and MatFDColoringCreate() 2570 */ 2571 #undef __FUNCT__ 2572 #define __FUNCT__ "MatGetColumnIJ_SeqBAIJ_Color" 2573 PetscErrorCode MatGetColumnIJ_SeqBAIJ_Color(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *nn,const PetscInt *ia[],const PetscInt *ja[],PetscInt *spidx[],PetscBool *done) 2574 { 2575 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2576 PetscErrorCode ierr; 2577 PetscInt i,*collengths,*cia,*cja,n=a->nbs,m=a->mbs; 2578 PetscInt nz = a->i[m],row,*jj,mr,col; 2579 PetscInt *cspidx; 2580 2581 PetscFunctionBegin; 2582 *nn = n; 2583 if (!ia) PetscFunctionReturn(0); 2584 2585 ierr = PetscCalloc1(n+1,&collengths);CHKERRQ(ierr); 2586 ierr = PetscMalloc1(n+1,&cia);CHKERRQ(ierr); 2587 ierr = PetscMalloc1(nz+1,&cja);CHKERRQ(ierr); 2588 ierr = PetscMalloc1(nz+1,&cspidx);CHKERRQ(ierr); 2589 jj = a->j; 2590 for (i=0; i<nz; i++) { 2591 collengths[jj[i]]++; 2592 } 2593 cia[0] = oshift; 2594 for (i=0; i<n; i++) { 2595 cia[i+1] = cia[i] + collengths[i]; 2596 } 2597 ierr = PetscMemzero(collengths,n*sizeof(PetscInt));CHKERRQ(ierr); 2598 jj = a->j; 2599 for (row=0; row<m; row++) { 2600 mr = a->i[row+1] - a->i[row]; 2601 for (i=0; i<mr; i++) { 2602 col = *jj++; 2603 cspidx[cia[col] + collengths[col] - oshift] = a->i[row] + i; /* index of a->j */ 2604 cja[cia[col] + collengths[col]++ - oshift] = row + oshift; 2605 } 2606 } 2607 ierr = PetscFree(collengths);CHKERRQ(ierr); 2608 *ia = cia; *ja = cja; 2609 *spidx = cspidx; 2610 PetscFunctionReturn(0); 2611 } 2612 2613 #undef __FUNCT__ 2614 #define __FUNCT__ "MatRestoreColumnIJ_SeqBAIJ_Color" 2615 PetscErrorCode MatRestoreColumnIJ_SeqBAIJ_Color(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *n,const PetscInt *ia[],const PetscInt *ja[],PetscInt *spidx[],PetscBool *done) 2616 { 2617 PetscErrorCode ierr; 2618 2619 PetscFunctionBegin; 2620 ierr = MatRestoreColumnIJ_SeqBAIJ(A,oshift,symmetric,inodecompressed,n,ia,ja,done);CHKERRQ(ierr); 2621 ierr = PetscFree(*spidx);CHKERRQ(ierr); 2622 PetscFunctionReturn(0); 2623 } 2624 2625 #undef __FUNCT__ 2626 #define __FUNCT__ "MatShift_SeqBAIJ" 2627 PetscErrorCode MatShift_SeqBAIJ(Mat Y,PetscScalar a) 2628 { 2629 PetscErrorCode ierr; 2630 Mat_SeqBAIJ *aij = (Mat_SeqBAIJ*)Y->data; 2631 2632 PetscFunctionBegin; 2633 if (!Y->preallocated || !aij->nz) { 2634 ierr = MatSeqBAIJSetPreallocation(Y,Y->rmap->bs,1,NULL);CHKERRQ(ierr); 2635 } 2636 ierr = MatShift_Basic(Y,a);CHKERRQ(ierr); 2637 PetscFunctionReturn(0); 2638 } 2639 2640 /* -------------------------------------------------------------------*/ 2641 static struct _MatOps MatOps_Values = {MatSetValues_SeqBAIJ, 2642 MatGetRow_SeqBAIJ, 2643 MatRestoreRow_SeqBAIJ, 2644 MatMult_SeqBAIJ_N, 2645 /* 4*/ MatMultAdd_SeqBAIJ_N, 2646 MatMultTranspose_SeqBAIJ, 2647 MatMultTransposeAdd_SeqBAIJ, 2648 0, 2649 0, 2650 0, 2651 /* 10*/ 0, 2652 MatLUFactor_SeqBAIJ, 2653 0, 2654 0, 2655 MatTranspose_SeqBAIJ, 2656 /* 15*/ MatGetInfo_SeqBAIJ, 2657 MatEqual_SeqBAIJ, 2658 MatGetDiagonal_SeqBAIJ, 2659 MatDiagonalScale_SeqBAIJ, 2660 MatNorm_SeqBAIJ, 2661 /* 20*/ 0, 2662 MatAssemblyEnd_SeqBAIJ, 2663 MatSetOption_SeqBAIJ, 2664 MatZeroEntries_SeqBAIJ, 2665 /* 24*/ MatZeroRows_SeqBAIJ, 2666 0, 2667 0, 2668 0, 2669 0, 2670 /* 29*/ MatSetUp_SeqBAIJ, 2671 0, 2672 0, 2673 0, 2674 0, 2675 /* 34*/ MatDuplicate_SeqBAIJ, 2676 0, 2677 0, 2678 MatILUFactor_SeqBAIJ, 2679 0, 2680 /* 39*/ MatAXPY_SeqBAIJ, 2681 MatGetSubMatrices_SeqBAIJ, 2682 MatIncreaseOverlap_SeqBAIJ, 2683 MatGetValues_SeqBAIJ, 2684 MatCopy_SeqBAIJ, 2685 /* 44*/ 0, 2686 MatScale_SeqBAIJ, 2687 MatShift_SeqBAIJ, 2688 0, 2689 MatZeroRowsColumns_SeqBAIJ, 2690 /* 49*/ 0, 2691 MatGetRowIJ_SeqBAIJ, 2692 MatRestoreRowIJ_SeqBAIJ, 2693 MatGetColumnIJ_SeqBAIJ, 2694 MatRestoreColumnIJ_SeqBAIJ, 2695 /* 54*/ MatFDColoringCreate_SeqXAIJ, 2696 0, 2697 0, 2698 0, 2699 MatSetValuesBlocked_SeqBAIJ, 2700 /* 59*/ MatGetSubMatrix_SeqBAIJ, 2701 MatDestroy_SeqBAIJ, 2702 MatView_SeqBAIJ, 2703 0, 2704 0, 2705 /* 64*/ 0, 2706 0, 2707 0, 2708 0, 2709 0, 2710 /* 69*/ MatGetRowMaxAbs_SeqBAIJ, 2711 0, 2712 MatConvert_Basic, 2713 0, 2714 0, 2715 /* 74*/ 0, 2716 MatFDColoringApply_BAIJ, 2717 0, 2718 0, 2719 0, 2720 /* 79*/ 0, 2721 0, 2722 0, 2723 0, 2724 MatLoad_SeqBAIJ, 2725 /* 84*/ 0, 2726 0, 2727 0, 2728 0, 2729 0, 2730 /* 89*/ 0, 2731 0, 2732 0, 2733 0, 2734 0, 2735 /* 94*/ 0, 2736 0, 2737 0, 2738 0, 2739 0, 2740 /* 99*/ 0, 2741 0, 2742 0, 2743 0, 2744 0, 2745 /*104*/ 0, 2746 MatRealPart_SeqBAIJ, 2747 MatImaginaryPart_SeqBAIJ, 2748 0, 2749 0, 2750 /*109*/ 0, 2751 0, 2752 0, 2753 0, 2754 MatMissingDiagonal_SeqBAIJ, 2755 /*114*/ 0, 2756 0, 2757 0, 2758 0, 2759 0, 2760 /*119*/ 0, 2761 0, 2762 MatMultHermitianTranspose_SeqBAIJ, 2763 MatMultHermitianTransposeAdd_SeqBAIJ, 2764 0, 2765 /*124*/ 0, 2766 0, 2767 MatInvertBlockDiagonal_SeqBAIJ, 2768 0, 2769 0, 2770 /*129*/ 0, 2771 0, 2772 0, 2773 0, 2774 0, 2775 /*134*/ 0, 2776 0, 2777 0, 2778 0, 2779 0, 2780 /*139*/ 0, 2781 0, 2782 0, 2783 MatFDColoringSetUp_SeqXAIJ, 2784 0, 2785 /*144*/MatCreateMPIMatConcatenateSeqMat_SeqBAIJ 2786 }; 2787 2788 #undef __FUNCT__ 2789 #define __FUNCT__ "MatStoreValues_SeqBAIJ" 2790 PetscErrorCode MatStoreValues_SeqBAIJ(Mat mat) 2791 { 2792 Mat_SeqBAIJ *aij = (Mat_SeqBAIJ*)mat->data; 2793 PetscInt nz = aij->i[aij->mbs]*aij->bs2; 2794 PetscErrorCode ierr; 2795 2796 PetscFunctionBegin; 2797 if (aij->nonew != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first"); 2798 2799 /* allocate space for values if not already there */ 2800 if (!aij->saved_values) { 2801 ierr = PetscMalloc1(nz+1,&aij->saved_values);CHKERRQ(ierr); 2802 ierr = PetscLogObjectMemory((PetscObject)mat,(nz+1)*sizeof(PetscScalar));CHKERRQ(ierr); 2803 } 2804 2805 /* copy values over */ 2806 ierr = PetscMemcpy(aij->saved_values,aij->a,nz*sizeof(PetscScalar));CHKERRQ(ierr); 2807 PetscFunctionReturn(0); 2808 } 2809 2810 #undef __FUNCT__ 2811 #define __FUNCT__ "MatRetrieveValues_SeqBAIJ" 2812 PetscErrorCode MatRetrieveValues_SeqBAIJ(Mat mat) 2813 { 2814 Mat_SeqBAIJ *aij = (Mat_SeqBAIJ*)mat->data; 2815 PetscErrorCode ierr; 2816 PetscInt nz = aij->i[aij->mbs]*aij->bs2; 2817 2818 PetscFunctionBegin; 2819 if (aij->nonew != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first"); 2820 if (!aij->saved_values) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatStoreValues(A);first"); 2821 2822 /* copy values over */ 2823 ierr = PetscMemcpy(aij->a,aij->saved_values,nz*sizeof(PetscScalar));CHKERRQ(ierr); 2824 PetscFunctionReturn(0); 2825 } 2826 2827 PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqAIJ(Mat, MatType,MatReuse,Mat*); 2828 PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqSBAIJ(Mat, MatType,MatReuse,Mat*); 2829 2830 #undef __FUNCT__ 2831 #define __FUNCT__ "MatSeqBAIJSetPreallocation_SeqBAIJ" 2832 static PetscErrorCode MatSeqBAIJSetPreallocation_SeqBAIJ(Mat B,PetscInt bs,PetscInt nz,PetscInt *nnz) 2833 { 2834 Mat_SeqBAIJ *b; 2835 PetscErrorCode ierr; 2836 PetscInt i,mbs,nbs,bs2; 2837 PetscBool flg = PETSC_FALSE,skipallocation = PETSC_FALSE,realalloc = PETSC_FALSE; 2838 2839 PetscFunctionBegin; 2840 if (nz >= 0 || nnz) realalloc = PETSC_TRUE; 2841 if (nz == MAT_SKIP_ALLOCATION) { 2842 skipallocation = PETSC_TRUE; 2843 nz = 0; 2844 } 2845 2846 ierr = MatSetBlockSize(B,PetscAbs(bs));CHKERRQ(ierr); 2847 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 2848 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 2849 ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr); 2850 2851 B->preallocated = PETSC_TRUE; 2852 2853 mbs = B->rmap->n/bs; 2854 nbs = B->cmap->n/bs; 2855 bs2 = bs*bs; 2856 2857 if (mbs*bs!=B->rmap->n || nbs*bs!=B->cmap->n) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Number rows %D, cols %D must be divisible by blocksize %D",B->rmap->N,B->cmap->n,bs); 2858 2859 if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5; 2860 if (nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nz cannot be less than 0: value %D",nz); 2861 if (nnz) { 2862 for (i=0; i<mbs; i++) { 2863 if (nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be less than 0: local row %D value %D",i,nnz[i]); 2864 if (nnz[i] > nbs) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be greater than block row length: local row %D value %D rowlength %D",i,nnz[i],nbs); 2865 } 2866 } 2867 2868 b = (Mat_SeqBAIJ*)B->data; 2869 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)B),NULL,"Optimize options for SEQBAIJ matrix 2 ","Mat");CHKERRQ(ierr); 2870 ierr = PetscOptionsBool("-mat_no_unroll","Do not optimize for block size (slow)",NULL,flg,&flg,NULL);CHKERRQ(ierr); 2871 ierr = PetscOptionsEnd();CHKERRQ(ierr); 2872 2873 if (!flg) { 2874 switch (bs) { 2875 case 1: 2876 B->ops->mult = MatMult_SeqBAIJ_1; 2877 B->ops->multadd = MatMultAdd_SeqBAIJ_1; 2878 break; 2879 case 2: 2880 B->ops->mult = MatMult_SeqBAIJ_2; 2881 B->ops->multadd = MatMultAdd_SeqBAIJ_2; 2882 break; 2883 case 3: 2884 B->ops->mult = MatMult_SeqBAIJ_3; 2885 B->ops->multadd = MatMultAdd_SeqBAIJ_3; 2886 break; 2887 case 4: 2888 B->ops->mult = MatMult_SeqBAIJ_4; 2889 B->ops->multadd = MatMultAdd_SeqBAIJ_4; 2890 break; 2891 case 5: 2892 B->ops->mult = MatMult_SeqBAIJ_5; 2893 B->ops->multadd = MatMultAdd_SeqBAIJ_5; 2894 break; 2895 case 6: 2896 B->ops->mult = MatMult_SeqBAIJ_6; 2897 B->ops->multadd = MatMultAdd_SeqBAIJ_6; 2898 break; 2899 case 7: 2900 B->ops->mult = MatMult_SeqBAIJ_7; 2901 B->ops->multadd = MatMultAdd_SeqBAIJ_7; 2902 break; 2903 case 15: 2904 B->ops->mult = MatMult_SeqBAIJ_15_ver1; 2905 B->ops->multadd = MatMultAdd_SeqBAIJ_N; 2906 break; 2907 default: 2908 B->ops->mult = MatMult_SeqBAIJ_N; 2909 B->ops->multadd = MatMultAdd_SeqBAIJ_N; 2910 break; 2911 } 2912 } 2913 B->ops->sor = MatSOR_SeqBAIJ; 2914 b->mbs = mbs; 2915 b->nbs = nbs; 2916 if (!skipallocation) { 2917 if (!b->imax) { 2918 ierr = PetscMalloc2(mbs,&b->imax,mbs,&b->ilen);CHKERRQ(ierr); 2919 ierr = PetscLogObjectMemory((PetscObject)B,2*mbs*sizeof(PetscInt));CHKERRQ(ierr); 2920 2921 b->free_imax_ilen = PETSC_TRUE; 2922 } 2923 /* b->ilen will count nonzeros in each block row so far. */ 2924 for (i=0; i<mbs; i++) b->ilen[i] = 0; 2925 if (!nnz) { 2926 if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5; 2927 else if (nz < 0) nz = 1; 2928 for (i=0; i<mbs; i++) b->imax[i] = nz; 2929 nz = nz*mbs; 2930 } else { 2931 nz = 0; 2932 for (i=0; i<mbs; i++) {b->imax[i] = nnz[i]; nz += nnz[i];} 2933 } 2934 2935 /* allocate the matrix space */ 2936 ierr = MatSeqXAIJFreeAIJ(B,&b->a,&b->j,&b->i);CHKERRQ(ierr); 2937 ierr = PetscMalloc3(bs2*nz,&b->a,nz,&b->j,B->rmap->N+1,&b->i);CHKERRQ(ierr); 2938 ierr = PetscLogObjectMemory((PetscObject)B,(B->rmap->N+1)*sizeof(PetscInt)+nz*(bs2*sizeof(PetscScalar)+sizeof(PetscInt)));CHKERRQ(ierr); 2939 ierr = PetscMemzero(b->a,nz*bs2*sizeof(MatScalar));CHKERRQ(ierr); 2940 ierr = PetscMemzero(b->j,nz*sizeof(PetscInt));CHKERRQ(ierr); 2941 2942 b->singlemalloc = PETSC_TRUE; 2943 b->i[0] = 0; 2944 for (i=1; i<mbs+1; i++) { 2945 b->i[i] = b->i[i-1] + b->imax[i-1]; 2946 } 2947 b->free_a = PETSC_TRUE; 2948 b->free_ij = PETSC_TRUE; 2949 } else { 2950 b->free_a = PETSC_FALSE; 2951 b->free_ij = PETSC_FALSE; 2952 } 2953 2954 b->bs2 = bs2; 2955 b->mbs = mbs; 2956 b->nz = 0; 2957 b->maxnz = nz; 2958 B->info.nz_unneeded = (PetscReal)b->maxnz*bs2; 2959 B->was_assembled = PETSC_FALSE; 2960 B->assembled = PETSC_FALSE; 2961 if (realalloc) {ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);} 2962 PetscFunctionReturn(0); 2963 } 2964 2965 #undef __FUNCT__ 2966 #define __FUNCT__ "MatSeqBAIJSetPreallocationCSR_SeqBAIJ" 2967 PetscErrorCode MatSeqBAIJSetPreallocationCSR_SeqBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[]) 2968 { 2969 PetscInt i,m,nz,nz_max=0,*nnz; 2970 PetscScalar *values=0; 2971 PetscBool roworiented = ((Mat_SeqBAIJ*)B->data)->roworiented; 2972 PetscErrorCode ierr; 2973 2974 PetscFunctionBegin; 2975 if (bs < 1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive but it is %D",bs); 2976 ierr = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr); 2977 ierr = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr); 2978 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 2979 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 2980 ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr); 2981 m = B->rmap->n/bs; 2982 2983 if (ii[0] != 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE, "ii[0] must be 0 but it is %D",ii[0]); 2984 ierr = PetscMalloc1(m+1, &nnz);CHKERRQ(ierr); 2985 for (i=0; i<m; i++) { 2986 nz = ii[i+1]- ii[i]; 2987 if (nz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE, "Local row %D has a negative number of columns %D",i,nz); 2988 nz_max = PetscMax(nz_max, nz); 2989 nnz[i] = nz; 2990 } 2991 ierr = MatSeqBAIJSetPreallocation(B,bs,0,nnz);CHKERRQ(ierr); 2992 ierr = PetscFree(nnz);CHKERRQ(ierr); 2993 2994 values = (PetscScalar*)V; 2995 if (!values) { 2996 ierr = PetscCalloc1(bs*bs*(nz_max+1),&values);CHKERRQ(ierr); 2997 } 2998 for (i=0; i<m; i++) { 2999 PetscInt ncols = ii[i+1] - ii[i]; 3000 const PetscInt *icols = jj + ii[i]; 3001 const PetscScalar *svals = values + (V ? (bs*bs*ii[i]) : 0); 3002 if (!roworiented) { 3003 ierr = MatSetValuesBlocked_SeqBAIJ(B,1,&i,ncols,icols,svals,INSERT_VALUES);CHKERRQ(ierr); 3004 } else { 3005 PetscInt j; 3006 for (j=0; j<ncols; j++) { 3007 const PetscScalar *svals = values + (V ? (bs*bs*(ii[i]+j)) : 0); 3008 ierr = MatSetValuesBlocked_SeqBAIJ(B,1,&i,1,&icols[j],svals,INSERT_VALUES);CHKERRQ(ierr); 3009 } 3010 } 3011 } 3012 if (!V) { ierr = PetscFree(values);CHKERRQ(ierr); } 3013 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3014 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3015 ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 3016 PetscFunctionReturn(0); 3017 } 3018 3019 /*MC 3020 MATSEQBAIJ - MATSEQBAIJ = "seqbaij" - A matrix type to be used for sequential block sparse matrices, based on 3021 block sparse compressed row format. 3022 3023 Options Database Keys: 3024 . -mat_type seqbaij - sets the matrix type to "seqbaij" during a call to MatSetFromOptions() 3025 3026 Level: beginner 3027 3028 .seealso: MatCreateSeqBAIJ() 3029 M*/ 3030 3031 PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqBSTRM(Mat, MatType,MatReuse,Mat*); 3032 3033 #undef __FUNCT__ 3034 #define __FUNCT__ "MatCreate_SeqBAIJ" 3035 PETSC_EXTERN PetscErrorCode MatCreate_SeqBAIJ(Mat B) 3036 { 3037 PetscErrorCode ierr; 3038 PetscMPIInt size; 3039 Mat_SeqBAIJ *b; 3040 3041 PetscFunctionBegin; 3042 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRQ(ierr); 3043 if (size > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Comm must be of size 1"); 3044 3045 ierr = PetscNewLog(B,&b);CHKERRQ(ierr); 3046 B->data = (void*)b; 3047 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 3048 3049 b->row = 0; 3050 b->col = 0; 3051 b->icol = 0; 3052 b->reallocs = 0; 3053 b->saved_values = 0; 3054 3055 b->roworiented = PETSC_TRUE; 3056 b->nonew = 0; 3057 b->diag = 0; 3058 B->spptr = 0; 3059 B->info.nz_unneeded = (PetscReal)b->maxnz*b->bs2; 3060 b->keepnonzeropattern = PETSC_FALSE; 3061 3062 ierr = PetscObjectComposeFunction((PetscObject)B,"MatInvertBlockDiagonal_C",MatInvertBlockDiagonal_SeqBAIJ);CHKERRQ(ierr); 3063 ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_SeqBAIJ);CHKERRQ(ierr); 3064 ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_SeqBAIJ);CHKERRQ(ierr); 3065 ierr = PetscObjectComposeFunction((PetscObject)B,"MatSeqBAIJSetColumnIndices_C",MatSeqBAIJSetColumnIndices_SeqBAIJ);CHKERRQ(ierr); 3066 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqbaij_seqaij_C",MatConvert_SeqBAIJ_SeqAIJ);CHKERRQ(ierr); 3067 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqbaij_seqsbaij_C",MatConvert_SeqBAIJ_SeqSBAIJ);CHKERRQ(ierr); 3068 ierr = PetscObjectComposeFunction((PetscObject)B,"MatSeqBAIJSetPreallocation_C",MatSeqBAIJSetPreallocation_SeqBAIJ);CHKERRQ(ierr); 3069 ierr = PetscObjectComposeFunction((PetscObject)B,"MatSeqBAIJSetPreallocationCSR_C",MatSeqBAIJSetPreallocationCSR_SeqBAIJ);CHKERRQ(ierr); 3070 ierr = PetscObjectComposeFunction((PetscObject)B,"MatIsTranspose_C",MatIsTranspose_SeqBAIJ);CHKERRQ(ierr); 3071 ierr = PetscObjectChangeTypeName((PetscObject)B,MATSEQBAIJ);CHKERRQ(ierr); 3072 PetscFunctionReturn(0); 3073 } 3074 3075 #undef __FUNCT__ 3076 #define __FUNCT__ "MatDuplicateNoCreate_SeqBAIJ" 3077 PetscErrorCode MatDuplicateNoCreate_SeqBAIJ(Mat C,Mat A,MatDuplicateOption cpvalues,PetscBool mallocmatspace) 3078 { 3079 Mat_SeqBAIJ *c = (Mat_SeqBAIJ*)C->data,*a = (Mat_SeqBAIJ*)A->data; 3080 PetscErrorCode ierr; 3081 PetscInt i,mbs = a->mbs,nz = a->nz,bs2 = a->bs2; 3082 3083 PetscFunctionBegin; 3084 if (a->i[mbs] != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Corrupt matrix"); 3085 3086 if (cpvalues == MAT_SHARE_NONZERO_PATTERN) { 3087 c->imax = a->imax; 3088 c->ilen = a->ilen; 3089 c->free_imax_ilen = PETSC_FALSE; 3090 } else { 3091 ierr = PetscMalloc2(mbs,&c->imax,mbs,&c->ilen);CHKERRQ(ierr); 3092 ierr = PetscLogObjectMemory((PetscObject)C,2*mbs*sizeof(PetscInt));CHKERRQ(ierr); 3093 for (i=0; i<mbs; i++) { 3094 c->imax[i] = a->imax[i]; 3095 c->ilen[i] = a->ilen[i]; 3096 } 3097 c->free_imax_ilen = PETSC_TRUE; 3098 } 3099 3100 /* allocate the matrix space */ 3101 if (mallocmatspace) { 3102 if (cpvalues == MAT_SHARE_NONZERO_PATTERN) { 3103 ierr = PetscCalloc1(bs2*nz,&c->a);CHKERRQ(ierr); 3104 ierr = PetscLogObjectMemory((PetscObject)C,a->i[mbs]*bs2*sizeof(PetscScalar));CHKERRQ(ierr); 3105 3106 c->i = a->i; 3107 c->j = a->j; 3108 c->singlemalloc = PETSC_FALSE; 3109 c->free_a = PETSC_TRUE; 3110 c->free_ij = PETSC_FALSE; 3111 c->parent = A; 3112 C->preallocated = PETSC_TRUE; 3113 C->assembled = PETSC_TRUE; 3114 3115 ierr = PetscObjectReference((PetscObject)A);CHKERRQ(ierr); 3116 ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 3117 ierr = MatSetOption(C,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 3118 } else { 3119 ierr = PetscMalloc3(bs2*nz,&c->a,nz,&c->j,mbs+1,&c->i);CHKERRQ(ierr); 3120 ierr = PetscLogObjectMemory((PetscObject)C,a->i[mbs]*(bs2*sizeof(PetscScalar)+sizeof(PetscInt))+(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 3121 3122 c->singlemalloc = PETSC_TRUE; 3123 c->free_a = PETSC_TRUE; 3124 c->free_ij = PETSC_TRUE; 3125 3126 ierr = PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 3127 if (mbs > 0) { 3128 ierr = PetscMemcpy(c->j,a->j,nz*sizeof(PetscInt));CHKERRQ(ierr); 3129 if (cpvalues == MAT_COPY_VALUES) { 3130 ierr = PetscMemcpy(c->a,a->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr); 3131 } else { 3132 ierr = PetscMemzero(c->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr); 3133 } 3134 } 3135 C->preallocated = PETSC_TRUE; 3136 C->assembled = PETSC_TRUE; 3137 } 3138 } 3139 3140 c->roworiented = a->roworiented; 3141 c->nonew = a->nonew; 3142 3143 ierr = PetscLayoutReference(A->rmap,&C->rmap);CHKERRQ(ierr); 3144 ierr = PetscLayoutReference(A->cmap,&C->cmap);CHKERRQ(ierr); 3145 3146 c->bs2 = a->bs2; 3147 c->mbs = a->mbs; 3148 c->nbs = a->nbs; 3149 3150 if (a->diag) { 3151 if (cpvalues == MAT_SHARE_NONZERO_PATTERN) { 3152 c->diag = a->diag; 3153 c->free_diag = PETSC_FALSE; 3154 } else { 3155 ierr = PetscMalloc1(mbs+1,&c->diag);CHKERRQ(ierr); 3156 ierr = PetscLogObjectMemory((PetscObject)C,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 3157 for (i=0; i<mbs; i++) c->diag[i] = a->diag[i]; 3158 c->free_diag = PETSC_TRUE; 3159 } 3160 } else c->diag = 0; 3161 3162 c->nz = a->nz; 3163 c->maxnz = a->nz; /* Since we allocate exactly the right amount */ 3164 c->solve_work = NULL; 3165 c->mult_work = NULL; 3166 c->sor_workt = NULL; 3167 c->sor_work = NULL; 3168 3169 c->compressedrow.use = a->compressedrow.use; 3170 c->compressedrow.nrows = a->compressedrow.nrows; 3171 if (a->compressedrow.use) { 3172 i = a->compressedrow.nrows; 3173 ierr = PetscMalloc2(i+1,&c->compressedrow.i,i+1,&c->compressedrow.rindex);CHKERRQ(ierr); 3174 ierr = PetscLogObjectMemory((PetscObject)C,(2*i+1)*sizeof(PetscInt));CHKERRQ(ierr); 3175 ierr = PetscMemcpy(c->compressedrow.i,a->compressedrow.i,(i+1)*sizeof(PetscInt));CHKERRQ(ierr); 3176 ierr = PetscMemcpy(c->compressedrow.rindex,a->compressedrow.rindex,i*sizeof(PetscInt));CHKERRQ(ierr); 3177 } else { 3178 c->compressedrow.use = PETSC_FALSE; 3179 c->compressedrow.i = NULL; 3180 c->compressedrow.rindex = NULL; 3181 } 3182 C->nonzerostate = A->nonzerostate; 3183 3184 ierr = PetscFunctionListDuplicate(((PetscObject)A)->qlist,&((PetscObject)C)->qlist);CHKERRQ(ierr); 3185 ierr = PetscMemcpy(C->ops,A->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 3186 PetscFunctionReturn(0); 3187 } 3188 3189 #undef __FUNCT__ 3190 #define __FUNCT__ "MatDuplicate_SeqBAIJ" 3191 PetscErrorCode MatDuplicate_SeqBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B) 3192 { 3193 PetscErrorCode ierr; 3194 3195 PetscFunctionBegin; 3196 ierr = MatCreate(PetscObjectComm((PetscObject)A),B);CHKERRQ(ierr); 3197 ierr = MatSetSizes(*B,A->rmap->N,A->cmap->n,A->rmap->N,A->cmap->n);CHKERRQ(ierr); 3198 ierr = MatSetType(*B,MATSEQBAIJ);CHKERRQ(ierr); 3199 ierr = MatDuplicateNoCreate_SeqBAIJ(*B,A,cpvalues,PETSC_TRUE);CHKERRQ(ierr); 3200 PetscFunctionReturn(0); 3201 } 3202 3203 #undef __FUNCT__ 3204 #define __FUNCT__ "MatLoad_SeqBAIJ" 3205 PetscErrorCode MatLoad_SeqBAIJ(Mat newmat,PetscViewer viewer) 3206 { 3207 Mat_SeqBAIJ *a; 3208 PetscErrorCode ierr; 3209 PetscInt i,nz,header[4],*rowlengths=0,M,N,bs = newmat->rmap->bs; 3210 PetscInt *mask,mbs,*jj,j,rowcount,nzcount,k,*browlengths,maskcount; 3211 PetscInt kmax,jcount,block,idx,point,nzcountb,extra_rows,rows,cols; 3212 PetscInt *masked,nmask,tmp,bs2,ishift; 3213 PetscMPIInt size; 3214 int fd; 3215 PetscScalar *aa; 3216 MPI_Comm comm; 3217 3218 PetscFunctionBegin; 3219 /* force binary viewer to load .info file if it has not yet done so */ 3220 ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr); 3221 ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr); 3222 ierr = PetscOptionsBegin(comm,NULL,"Options for loading SEQBAIJ matrix","Mat");CHKERRQ(ierr); 3223 ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,NULL);CHKERRQ(ierr); 3224 ierr = PetscOptionsEnd();CHKERRQ(ierr); 3225 if (bs < 0) bs = 1; 3226 bs2 = bs*bs; 3227 3228 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3229 if (size > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"view must have one processor"); 3230 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 3231 ierr = PetscBinaryRead(fd,header,4,PETSC_INT);CHKERRQ(ierr); 3232 if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not Mat object"); 3233 M = header[1]; N = header[2]; nz = header[3]; 3234 3235 if (header[3] < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as SeqBAIJ"); 3236 if (M != N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Can only do square matrices"); 3237 3238 /* 3239 This code adds extra rows to make sure the number of rows is 3240 divisible by the blocksize 3241 */ 3242 mbs = M/bs; 3243 extra_rows = bs - M + bs*(mbs); 3244 if (extra_rows == bs) extra_rows = 0; 3245 else mbs++; 3246 if (extra_rows) { 3247 ierr = PetscInfo(viewer,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr); 3248 } 3249 3250 /* Set global sizes if not already set */ 3251 if (newmat->rmap->n < 0 && newmat->rmap->N < 0 && newmat->cmap->n < 0 && newmat->cmap->N < 0) { 3252 ierr = MatSetSizes(newmat,PETSC_DECIDE,PETSC_DECIDE,M+extra_rows,N+extra_rows);CHKERRQ(ierr); 3253 } else { /* Check if the matrix global sizes are correct */ 3254 ierr = MatGetSize(newmat,&rows,&cols);CHKERRQ(ierr); 3255 if (rows < 0 && cols < 0) { /* user might provide local size instead of global size */ 3256 ierr = MatGetLocalSize(newmat,&rows,&cols);CHKERRQ(ierr); 3257 } 3258 if (M != rows || N != cols) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix in file of different length (%d, %d) than the input matrix (%d, %d)",M,N,rows,cols); 3259 } 3260 3261 /* read in row lengths */ 3262 ierr = PetscMalloc1(M+extra_rows,&rowlengths);CHKERRQ(ierr); 3263 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 3264 for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1; 3265 3266 /* read in column indices */ 3267 ierr = PetscMalloc1(nz+extra_rows,&jj);CHKERRQ(ierr); 3268 ierr = PetscBinaryRead(fd,jj,nz,PETSC_INT);CHKERRQ(ierr); 3269 for (i=0; i<extra_rows; i++) jj[nz+i] = M+i; 3270 3271 /* loop over row lengths determining block row lengths */ 3272 ierr = PetscCalloc1(mbs,&browlengths);CHKERRQ(ierr); 3273 ierr = PetscMalloc2(mbs,&mask,mbs,&masked);CHKERRQ(ierr); 3274 ierr = PetscMemzero(mask,mbs*sizeof(PetscInt));CHKERRQ(ierr); 3275 rowcount = 0; 3276 nzcount = 0; 3277 for (i=0; i<mbs; i++) { 3278 nmask = 0; 3279 for (j=0; j<bs; j++) { 3280 kmax = rowlengths[rowcount]; 3281 for (k=0; k<kmax; k++) { 3282 tmp = jj[nzcount++]/bs; 3283 if (!mask[tmp]) {masked[nmask++] = tmp; mask[tmp] = 1;} 3284 } 3285 rowcount++; 3286 } 3287 browlengths[i] += nmask; 3288 /* zero out the mask elements we set */ 3289 for (j=0; j<nmask; j++) mask[masked[j]] = 0; 3290 } 3291 3292 /* Do preallocation */ 3293 ierr = MatSeqBAIJSetPreallocation(newmat,bs,0,browlengths);CHKERRQ(ierr); 3294 a = (Mat_SeqBAIJ*)newmat->data; 3295 3296 /* set matrix "i" values */ 3297 a->i[0] = 0; 3298 for (i=1; i<= mbs; i++) { 3299 a->i[i] = a->i[i-1] + browlengths[i-1]; 3300 a->ilen[i-1] = browlengths[i-1]; 3301 } 3302 a->nz = 0; 3303 for (i=0; i<mbs; i++) a->nz += browlengths[i]; 3304 3305 /* read in nonzero values */ 3306 ierr = PetscMalloc1(nz+extra_rows,&aa);CHKERRQ(ierr); 3307 ierr = PetscBinaryRead(fd,aa,nz,PETSC_SCALAR);CHKERRQ(ierr); 3308 for (i=0; i<extra_rows; i++) aa[nz+i] = 1.0; 3309 3310 /* set "a" and "j" values into matrix */ 3311 nzcount = 0; jcount = 0; 3312 for (i=0; i<mbs; i++) { 3313 nzcountb = nzcount; 3314 nmask = 0; 3315 for (j=0; j<bs; j++) { 3316 kmax = rowlengths[i*bs+j]; 3317 for (k=0; k<kmax; k++) { 3318 tmp = jj[nzcount++]/bs; 3319 if (!mask[tmp]) { masked[nmask++] = tmp; mask[tmp] = 1;} 3320 } 3321 } 3322 /* sort the masked values */ 3323 ierr = PetscSortInt(nmask,masked);CHKERRQ(ierr); 3324 3325 /* set "j" values into matrix */ 3326 maskcount = 1; 3327 for (j=0; j<nmask; j++) { 3328 a->j[jcount++] = masked[j]; 3329 mask[masked[j]] = maskcount++; 3330 } 3331 /* set "a" values into matrix */ 3332 ishift = bs2*a->i[i]; 3333 for (j=0; j<bs; j++) { 3334 kmax = rowlengths[i*bs+j]; 3335 for (k=0; k<kmax; k++) { 3336 tmp = jj[nzcountb]/bs; 3337 block = mask[tmp] - 1; 3338 point = jj[nzcountb] - bs*tmp; 3339 idx = ishift + bs2*block + j + bs*point; 3340 a->a[idx] = (MatScalar)aa[nzcountb++]; 3341 } 3342 } 3343 /* zero out the mask elements we set */ 3344 for (j=0; j<nmask; j++) mask[masked[j]] = 0; 3345 } 3346 if (jcount != a->nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Bad binary matrix"); 3347 3348 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 3349 ierr = PetscFree(browlengths);CHKERRQ(ierr); 3350 ierr = PetscFree(aa);CHKERRQ(ierr); 3351 ierr = PetscFree(jj);CHKERRQ(ierr); 3352 ierr = PetscFree2(mask,masked);CHKERRQ(ierr); 3353 3354 ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3355 ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3356 PetscFunctionReturn(0); 3357 } 3358 3359 #undef __FUNCT__ 3360 #define __FUNCT__ "MatCreateSeqBAIJ" 3361 /*@C 3362 MatCreateSeqBAIJ - Creates a sparse matrix in block AIJ (block 3363 compressed row) format. For good matrix assembly performance the 3364 user should preallocate the matrix storage by setting the parameter nz 3365 (or the array nnz). By setting these parameters accurately, performance 3366 during matrix assembly can be increased by more than a factor of 50. 3367 3368 Collective on MPI_Comm 3369 3370 Input Parameters: 3371 + comm - MPI communicator, set to PETSC_COMM_SELF 3372 . bs - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row 3373 blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs() 3374 . m - number of rows 3375 . n - number of columns 3376 . nz - number of nonzero blocks per block row (same for all rows) 3377 - nnz - array containing the number of nonzero blocks in the various block rows 3378 (possibly different for each block row) or NULL 3379 3380 Output Parameter: 3381 . A - the matrix 3382 3383 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 3384 MatXXXXSetPreallocation() paradgm instead of this routine directly. 3385 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 3386 3387 Options Database Keys: 3388 . -mat_no_unroll - uses code that does not unroll the loops in the 3389 block calculations (much slower) 3390 . -mat_block_size - size of the blocks to use 3391 3392 Level: intermediate 3393 3394 Notes: 3395 The number of rows and columns must be divisible by blocksize. 3396 3397 If the nnz parameter is given then the nz parameter is ignored 3398 3399 A nonzero block is any block that as 1 or more nonzeros in it 3400 3401 The block AIJ format is fully compatible with standard Fortran 77 3402 storage. That is, the stored row and column indices can begin at 3403 either one (as in Fortran) or zero. See the users' manual for details. 3404 3405 Specify the preallocated storage with either nz or nnz (not both). 3406 Set nz=PETSC_DEFAULT and nnz=NULL for PETSc to control dynamic memory 3407 allocation. See Users-Manual: ch_mat for details. 3408 matrices. 3409 3410 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateBAIJ() 3411 @*/ 3412 PetscErrorCode MatCreateSeqBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A) 3413 { 3414 PetscErrorCode ierr; 3415 3416 PetscFunctionBegin; 3417 ierr = MatCreate(comm,A);CHKERRQ(ierr); 3418 ierr = MatSetSizes(*A,m,n,m,n);CHKERRQ(ierr); 3419 ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr); 3420 ierr = MatSeqBAIJSetPreallocation(*A,bs,nz,(PetscInt*)nnz);CHKERRQ(ierr); 3421 PetscFunctionReturn(0); 3422 } 3423 3424 #undef __FUNCT__ 3425 #define __FUNCT__ "MatSeqBAIJSetPreallocation" 3426 /*@C 3427 MatSeqBAIJSetPreallocation - Sets the block size and expected nonzeros 3428 per row in the matrix. For good matrix assembly performance the 3429 user should preallocate the matrix storage by setting the parameter nz 3430 (or the array nnz). By setting these parameters accurately, performance 3431 during matrix assembly can be increased by more than a factor of 50. 3432 3433 Collective on MPI_Comm 3434 3435 Input Parameters: 3436 + B - the matrix 3437 . bs - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row 3438 blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs() 3439 . nz - number of block nonzeros per block row (same for all rows) 3440 - nnz - array containing the number of block nonzeros in the various block rows 3441 (possibly different for each block row) or NULL 3442 3443 Options Database Keys: 3444 . -mat_no_unroll - uses code that does not unroll the loops in the 3445 block calculations (much slower) 3446 . -mat_block_size - size of the blocks to use 3447 3448 Level: intermediate 3449 3450 Notes: 3451 If the nnz parameter is given then the nz parameter is ignored 3452 3453 You can call MatGetInfo() to get information on how effective the preallocation was; 3454 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 3455 You can also run with the option -info and look for messages with the string 3456 malloc in them to see if additional memory allocation was needed. 3457 3458 The block AIJ format is fully compatible with standard Fortran 77 3459 storage. That is, the stored row and column indices can begin at 3460 either one (as in Fortran) or zero. See the users' manual for details. 3461 3462 Specify the preallocated storage with either nz or nnz (not both). 3463 Set nz=PETSC_DEFAULT and nnz=NULL for PETSc to control dynamic memory 3464 allocation. See Users-Manual: ch_mat for details. 3465 3466 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateBAIJ(), MatGetInfo() 3467 @*/ 3468 PetscErrorCode MatSeqBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt nz,const PetscInt nnz[]) 3469 { 3470 PetscErrorCode ierr; 3471 3472 PetscFunctionBegin; 3473 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 3474 PetscValidType(B,1); 3475 PetscValidLogicalCollectiveInt(B,bs,2); 3476 ierr = PetscTryMethod(B,"MatSeqBAIJSetPreallocation_C",(Mat,PetscInt,PetscInt,const PetscInt[]),(B,bs,nz,nnz));CHKERRQ(ierr); 3477 PetscFunctionReturn(0); 3478 } 3479 3480 #undef __FUNCT__ 3481 #define __FUNCT__ "MatSeqBAIJSetPreallocationCSR" 3482 /*@C 3483 MatSeqBAIJSetPreallocationCSR - Allocates memory for a sparse sequential matrix in AIJ format 3484 (the default sequential PETSc format). 3485 3486 Collective on MPI_Comm 3487 3488 Input Parameters: 3489 + B - the matrix 3490 . i - the indices into j for the start of each local row (starts with zero) 3491 . j - the column indices for each local row (starts with zero) these must be sorted for each row 3492 - v - optional values in the matrix 3493 3494 Level: developer 3495 3496 Notes: 3497 The order of the entries in values is specified by the MatOption MAT_ROW_ORIENTED. For example, C programs 3498 may want to use the default MAT_ROW_ORIENTED=PETSC_TRUE and use an array v[nnz][bs][bs] where the second index is 3499 over rows within a block and the last index is over columns within a block row. Fortran programs will likely set 3500 MAT_ROW_ORIENTED=PETSC_FALSE and use a Fortran array v(bs,bs,nnz) in which the first index is over rows within a 3501 block column and the second index is over columns within a block. 3502 3503 .keywords: matrix, aij, compressed row, sparse 3504 3505 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatSeqBAIJSetPreallocation(), MATSEQBAIJ 3506 @*/ 3507 PetscErrorCode MatSeqBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 3508 { 3509 PetscErrorCode ierr; 3510 3511 PetscFunctionBegin; 3512 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 3513 PetscValidType(B,1); 3514 PetscValidLogicalCollectiveInt(B,bs,2); 3515 ierr = PetscTryMethod(B,"MatSeqBAIJSetPreallocationCSR_C",(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,bs,i,j,v));CHKERRQ(ierr); 3516 PetscFunctionReturn(0); 3517 } 3518 3519 3520 #undef __FUNCT__ 3521 #define __FUNCT__ "MatCreateSeqBAIJWithArrays" 3522 /*@ 3523 MatCreateSeqBAIJWithArrays - Creates an sequential BAIJ matrix using matrix elements provided by the user. 3524 3525 Collective on MPI_Comm 3526 3527 Input Parameters: 3528 + comm - must be an MPI communicator of size 1 3529 . bs - size of block 3530 . m - number of rows 3531 . n - number of columns 3532 . i - row indices 3533 . j - column indices 3534 - a - matrix values 3535 3536 Output Parameter: 3537 . mat - the matrix 3538 3539 Level: advanced 3540 3541 Notes: 3542 The i, j, and a arrays are not copied by this routine, the user must free these arrays 3543 once the matrix is destroyed 3544 3545 You cannot set new nonzero locations into this matrix, that will generate an error. 3546 3547 The i and j indices are 0 based 3548 3549 When block size is greater than 1 the matrix values must be stored using the BAIJ storage format (see the BAIJ code to determine this). 3550 3551 The order of the entries in values is the same as the block compressed sparse row storage format; that is, it is 3552 the same as a three dimensional array in Fortran values(bs,bs,nnz) that contains the first column of the first 3553 block, followed by the second column of the first block etc etc. That is, the blocks are contiguous in memory 3554 with column-major ordering within blocks. 3555 3556 .seealso: MatCreate(), MatCreateBAIJ(), MatCreateSeqBAIJ() 3557 3558 @*/ 3559 PetscErrorCode MatCreateSeqBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt *i,PetscInt *j,PetscScalar *a,Mat *mat) 3560 { 3561 PetscErrorCode ierr; 3562 PetscInt ii; 3563 Mat_SeqBAIJ *baij; 3564 3565 PetscFunctionBegin; 3566 if (bs != 1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"block size %D > 1 is not supported yet",bs); 3567 if (m > 0 && i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 3568 3569 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 3570 ierr = MatSetSizes(*mat,m,n,m,n);CHKERRQ(ierr); 3571 ierr = MatSetType(*mat,MATSEQBAIJ);CHKERRQ(ierr); 3572 ierr = MatSeqBAIJSetPreallocation(*mat,bs,MAT_SKIP_ALLOCATION,0);CHKERRQ(ierr); 3573 baij = (Mat_SeqBAIJ*)(*mat)->data; 3574 ierr = PetscMalloc2(m,&baij->imax,m,&baij->ilen);CHKERRQ(ierr); 3575 ierr = PetscLogObjectMemory((PetscObject)*mat,2*m*sizeof(PetscInt));CHKERRQ(ierr); 3576 3577 baij->i = i; 3578 baij->j = j; 3579 baij->a = a; 3580 3581 baij->singlemalloc = PETSC_FALSE; 3582 baij->nonew = -1; /*this indicates that inserting a new value in the matrix that generates a new nonzero is an error*/ 3583 baij->free_a = PETSC_FALSE; 3584 baij->free_ij = PETSC_FALSE; 3585 3586 for (ii=0; ii<m; ii++) { 3587 baij->ilen[ii] = baij->imax[ii] = i[ii+1] - i[ii]; 3588 #if defined(PETSC_USE_DEBUG) 3589 if (i[ii+1] - i[ii] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row length in i (row indices) row = %d length = %d",ii,i[ii+1] - i[ii]); 3590 #endif 3591 } 3592 #if defined(PETSC_USE_DEBUG) 3593 for (ii=0; ii<baij->i[m]; ii++) { 3594 if (j[ii] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column index at location = %d index = %d",ii,j[ii]); 3595 if (j[ii] > n - 1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column index to large at location = %d index = %d",ii,j[ii]); 3596 } 3597 #endif 3598 3599 ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3600 ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3601 PetscFunctionReturn(0); 3602 } 3603 3604 #undef __FUNCT__ 3605 #define __FUNCT__ "MatCreateMPIMatConcatenateSeqMat_SeqBAIJ" 3606 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_SeqBAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat) 3607 { 3608 PetscErrorCode ierr; 3609 3610 PetscFunctionBegin; 3611 ierr = MatCreateMPIMatConcatenateSeqMat_MPIBAIJ(comm,inmat,n,scall,outmat);CHKERRQ(ierr); 3612 PetscFunctionReturn(0); 3613 } 3614