1 2 /* 3 Defines the basic matrix operations for the BAIJ (compressed row) 4 matrix storage format. 5 */ 6 #include <../src/mat/impls/baij/seq/baij.h> /*I "petscmat.h" I*/ 7 #include <petscblaslapack.h> 8 #include <petsc/private/kernels/blockinvert.h> 9 #include <petsc/private/kernels/blockmatmult.h> 10 11 #undef __FUNCT__ 12 #define __FUNCT__ "MatInvertBlockDiagonal_SeqBAIJ" 13 PetscErrorCode MatInvertBlockDiagonal_SeqBAIJ(Mat A,const PetscScalar **values) 14 { 15 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*) A->data; 16 PetscErrorCode ierr; 17 PetscInt *diag_offset,i,bs = A->rmap->bs,mbs = a->mbs,ipvt[5],bs2 = bs*bs,*v_pivots; 18 MatScalar *v = a->a,*odiag,*diag,*mdiag,work[25],*v_work; 19 PetscReal shift = 0.0; 20 PetscBool allowzeropivot,zeropivotdetected=PETSC_FALSE; 21 22 PetscFunctionBegin; 23 allowzeropivot = PetscNot(A->erroriffailure); 24 25 if (a->idiagvalid) { 26 if (values) *values = a->idiag; 27 PetscFunctionReturn(0); 28 } 29 ierr = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr); 30 diag_offset = a->diag; 31 if (!a->idiag) { 32 ierr = PetscMalloc1(2*bs2*mbs,&a->idiag);CHKERRQ(ierr); 33 ierr = PetscLogObjectMemory((PetscObject)A,2*bs2*mbs*sizeof(PetscScalar));CHKERRQ(ierr); 34 } 35 diag = a->idiag; 36 mdiag = a->idiag+bs2*mbs; 37 if (values) *values = a->idiag; 38 /* factor and invert each block */ 39 switch (bs) { 40 case 1: 41 for (i=0; i<mbs; i++) { 42 odiag = v + 1*diag_offset[i]; 43 diag[0] = odiag[0]; 44 mdiag[0] = odiag[0]; 45 46 if (PetscAbsScalar(diag[0] + shift) < PETSC_MACHINE_EPSILON) { 47 if (allowzeropivot) { 48 A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 49 A->factorerror_zeropivot_value = PetscAbsScalar(diag[0]); 50 A->factorerror_zeropivot_row = i; 51 ierr = PetscInfo1(A,"Zero pivot, row %D\n",i);CHKERRQ(ierr); 52 } else SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot, row %D pivot value %g tolerance %g",i,(double)PetscAbsScalar(diag[0]),(double)PETSC_MACHINE_EPSILON); 53 } 54 55 diag[0] = (PetscScalar)1.0 / (diag[0] + shift); 56 diag += 1; 57 mdiag += 1; 58 } 59 break; 60 case 2: 61 for (i=0; i<mbs; i++) { 62 odiag = v + 4*diag_offset[i]; 63 diag[0] = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3]; 64 mdiag[0] = odiag[0]; mdiag[1] = odiag[1]; mdiag[2] = odiag[2]; mdiag[3] = odiag[3]; 65 ierr = PetscKernel_A_gets_inverse_A_2(diag,shift,allowzeropivot,&zeropivotdetected);CHKERRQ(ierr); 66 if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 67 diag += 4; 68 mdiag += 4; 69 } 70 break; 71 case 3: 72 for (i=0; i<mbs; i++) { 73 odiag = v + 9*diag_offset[i]; 74 diag[0] = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3]; 75 diag[4] = odiag[4]; diag[5] = odiag[5]; diag[6] = odiag[6]; diag[7] = odiag[7]; 76 diag[8] = odiag[8]; 77 mdiag[0] = odiag[0]; mdiag[1] = odiag[1]; mdiag[2] = odiag[2]; mdiag[3] = odiag[3]; 78 mdiag[4] = odiag[4]; mdiag[5] = odiag[5]; mdiag[6] = odiag[6]; mdiag[7] = odiag[7]; 79 mdiag[8] = odiag[8]; 80 ierr = PetscKernel_A_gets_inverse_A_3(diag,shift,allowzeropivot,&zeropivotdetected);CHKERRQ(ierr); 81 if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 82 diag += 9; 83 mdiag += 9; 84 } 85 break; 86 case 4: 87 for (i=0; i<mbs; i++) { 88 odiag = v + 16*diag_offset[i]; 89 ierr = PetscMemcpy(diag,odiag,16*sizeof(PetscScalar));CHKERRQ(ierr); 90 ierr = PetscMemcpy(mdiag,odiag,16*sizeof(PetscScalar));CHKERRQ(ierr); 91 ierr = PetscKernel_A_gets_inverse_A_4(diag,shift,allowzeropivot,&zeropivotdetected);CHKERRQ(ierr); 92 if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 93 diag += 16; 94 mdiag += 16; 95 } 96 break; 97 case 5: 98 for (i=0; i<mbs; i++) { 99 odiag = v + 25*diag_offset[i]; 100 ierr = PetscMemcpy(diag,odiag,25*sizeof(PetscScalar));CHKERRQ(ierr); 101 ierr = PetscMemcpy(mdiag,odiag,25*sizeof(PetscScalar));CHKERRQ(ierr); 102 ierr = PetscKernel_A_gets_inverse_A_5(diag,ipvt,work,shift,allowzeropivot,&zeropivotdetected);CHKERRQ(ierr); 103 if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 104 diag += 25; 105 mdiag += 25; 106 } 107 break; 108 case 6: 109 for (i=0; i<mbs; i++) { 110 odiag = v + 36*diag_offset[i]; 111 ierr = PetscMemcpy(diag,odiag,36*sizeof(PetscScalar));CHKERRQ(ierr); 112 ierr = PetscMemcpy(mdiag,odiag,36*sizeof(PetscScalar));CHKERRQ(ierr); 113 ierr = PetscKernel_A_gets_inverse_A_6(diag,shift,allowzeropivot,&zeropivotdetected);CHKERRQ(ierr); 114 if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 115 diag += 36; 116 mdiag += 36; 117 } 118 break; 119 case 7: 120 for (i=0; i<mbs; i++) { 121 odiag = v + 49*diag_offset[i]; 122 ierr = PetscMemcpy(diag,odiag,49*sizeof(PetscScalar));CHKERRQ(ierr); 123 ierr = PetscMemcpy(mdiag,odiag,49*sizeof(PetscScalar));CHKERRQ(ierr); 124 ierr = PetscKernel_A_gets_inverse_A_7(diag,shift,allowzeropivot,&zeropivotdetected);CHKERRQ(ierr); 125 if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 126 diag += 49; 127 mdiag += 49; 128 } 129 break; 130 default: 131 ierr = PetscMalloc2(bs,&v_work,bs,&v_pivots);CHKERRQ(ierr); 132 for (i=0; i<mbs; i++) { 133 odiag = v + bs2*diag_offset[i]; 134 ierr = PetscMemcpy(diag,odiag,bs2*sizeof(PetscScalar));CHKERRQ(ierr); 135 ierr = PetscMemcpy(mdiag,odiag,bs2*sizeof(PetscScalar));CHKERRQ(ierr); 136 ierr = PetscKernel_A_gets_inverse_A(bs,diag,v_pivots,v_work,allowzeropivot,&zeropivotdetected);CHKERRQ(ierr); 137 if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 138 diag += bs2; 139 mdiag += bs2; 140 } 141 ierr = PetscFree2(v_work,v_pivots);CHKERRQ(ierr); 142 } 143 a->idiagvalid = PETSC_TRUE; 144 PetscFunctionReturn(0); 145 } 146 147 #undef __FUNCT__ 148 #define __FUNCT__ "MatSOR_SeqBAIJ" 149 PetscErrorCode MatSOR_SeqBAIJ(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 150 { 151 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 152 PetscScalar *x,*work,*w,*workt,*t; 153 const MatScalar *v,*aa = a->a, *idiag; 154 const PetscScalar *b,*xb; 155 PetscScalar s[7], xw[7]={0}; /* avoid some compilers thinking xw is uninitialized */ 156 PetscErrorCode ierr; 157 PetscInt m = a->mbs,i,i2,nz,bs = A->rmap->bs,bs2 = bs*bs,k,j,idx,it; 158 const PetscInt *diag,*ai = a->i,*aj = a->j,*vi; 159 160 PetscFunctionBegin; 161 if (fshift == -1.0) fshift = 0.0; /* negative fshift indicates do not error on zero diagonal; this code never errors on zero diagonal */ 162 its = its*lits; 163 if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat"); 164 if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 165 if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 166 if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 167 if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts"); 168 169 if (!a->idiagvalid) {ierr = MatInvertBlockDiagonal(A,NULL);CHKERRQ(ierr);} 170 171 if (!m) PetscFunctionReturn(0); 172 diag = a->diag; 173 idiag = a->idiag; 174 k = PetscMax(A->rmap->n,A->cmap->n); 175 if (!a->mult_work) { 176 ierr = PetscMalloc1(k+1,&a->mult_work);CHKERRQ(ierr); 177 } 178 if (!a->sor_workt) { 179 ierr = PetscMalloc1(k,&a->sor_workt);CHKERRQ(ierr); 180 } 181 if (!a->sor_work) { 182 ierr = PetscMalloc1(bs,&a->sor_work);CHKERRQ(ierr); 183 } 184 work = a->mult_work; 185 t = a->sor_workt; 186 w = a->sor_work; 187 188 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 189 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 190 191 if (flag & SOR_ZERO_INITIAL_GUESS) { 192 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) { 193 switch (bs) { 194 case 1: 195 PetscKernel_v_gets_A_times_w_1(x,idiag,b); 196 t[0] = b[0]; 197 i2 = 1; 198 idiag += 1; 199 for (i=1; i<m; i++) { 200 v = aa + ai[i]; 201 vi = aj + ai[i]; 202 nz = diag[i] - ai[i]; 203 s[0] = b[i2]; 204 for (j=0; j<nz; j++) { 205 xw[0] = x[vi[j]]; 206 PetscKernel_v_gets_v_minus_A_times_w_1(s,(v+j),xw); 207 } 208 t[i2] = s[0]; 209 PetscKernel_v_gets_A_times_w_1(xw,idiag,s); 210 x[i2] = xw[0]; 211 idiag += 1; 212 i2 += 1; 213 } 214 break; 215 case 2: 216 PetscKernel_v_gets_A_times_w_2(x,idiag,b); 217 t[0] = b[0]; t[1] = b[1]; 218 i2 = 2; 219 idiag += 4; 220 for (i=1; i<m; i++) { 221 v = aa + 4*ai[i]; 222 vi = aj + ai[i]; 223 nz = diag[i] - ai[i]; 224 s[0] = b[i2]; s[1] = b[i2+1]; 225 for (j=0; j<nz; j++) { 226 idx = 2*vi[j]; 227 it = 4*j; 228 xw[0] = x[idx]; xw[1] = x[1+idx]; 229 PetscKernel_v_gets_v_minus_A_times_w_2(s,(v+it),xw); 230 } 231 t[i2] = s[0]; t[i2+1] = s[1]; 232 PetscKernel_v_gets_A_times_w_2(xw,idiag,s); 233 x[i2] = xw[0]; x[i2+1] = xw[1]; 234 idiag += 4; 235 i2 += 2; 236 } 237 break; 238 case 3: 239 PetscKernel_v_gets_A_times_w_3(x,idiag,b); 240 t[0] = b[0]; t[1] = b[1]; t[2] = b[2]; 241 i2 = 3; 242 idiag += 9; 243 for (i=1; i<m; i++) { 244 v = aa + 9*ai[i]; 245 vi = aj + ai[i]; 246 nz = diag[i] - ai[i]; 247 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; 248 while (nz--) { 249 idx = 3*(*vi++); 250 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 251 PetscKernel_v_gets_v_minus_A_times_w_3(s,v,xw); 252 v += 9; 253 } 254 t[i2] = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2]; 255 PetscKernel_v_gets_A_times_w_3(xw,idiag,s); 256 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; 257 idiag += 9; 258 i2 += 3; 259 } 260 break; 261 case 4: 262 PetscKernel_v_gets_A_times_w_4(x,idiag,b); 263 t[0] = b[0]; t[1] = b[1]; t[2] = b[2]; t[3] = b[3]; 264 i2 = 4; 265 idiag += 16; 266 for (i=1; i<m; i++) { 267 v = aa + 16*ai[i]; 268 vi = aj + ai[i]; 269 nz = diag[i] - ai[i]; 270 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; 271 while (nz--) { 272 idx = 4*(*vi++); 273 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; 274 PetscKernel_v_gets_v_minus_A_times_w_4(s,v,xw); 275 v += 16; 276 } 277 t[i2] = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2]; t[i2 + 3] = s[3]; 278 PetscKernel_v_gets_A_times_w_4(xw,idiag,s); 279 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; 280 idiag += 16; 281 i2 += 4; 282 } 283 break; 284 case 5: 285 PetscKernel_v_gets_A_times_w_5(x,idiag,b); 286 t[0] = b[0]; t[1] = b[1]; t[2] = b[2]; t[3] = b[3]; t[4] = b[4]; 287 i2 = 5; 288 idiag += 25; 289 for (i=1; i<m; i++) { 290 v = aa + 25*ai[i]; 291 vi = aj + ai[i]; 292 nz = diag[i] - ai[i]; 293 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4]; 294 while (nz--) { 295 idx = 5*(*vi++); 296 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; xw[4] = x[4+idx]; 297 PetscKernel_v_gets_v_minus_A_times_w_5(s,v,xw); 298 v += 25; 299 } 300 t[i2] = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2]; t[i2+3] = s[3]; t[i2+4] = s[4]; 301 PetscKernel_v_gets_A_times_w_5(xw,idiag,s); 302 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4]; 303 idiag += 25; 304 i2 += 5; 305 } 306 break; 307 case 6: 308 PetscKernel_v_gets_A_times_w_6(x,idiag,b); 309 t[0] = b[0]; t[1] = b[1]; t[2] = b[2]; t[3] = b[3]; t[4] = b[4]; t[5] = b[5]; 310 i2 = 6; 311 idiag += 36; 312 for (i=1; i<m; i++) { 313 v = aa + 36*ai[i]; 314 vi = aj + ai[i]; 315 nz = diag[i] - ai[i]; 316 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5]; 317 while (nz--) { 318 idx = 6*(*vi++); 319 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 320 xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; 321 PetscKernel_v_gets_v_minus_A_times_w_6(s,v,xw); 322 v += 36; 323 } 324 t[i2] = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2]; 325 t[i2+3] = s[3]; t[i2+4] = s[4]; t[i2+5] = s[5]; 326 PetscKernel_v_gets_A_times_w_6(xw,idiag,s); 327 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5]; 328 idiag += 36; 329 i2 += 6; 330 } 331 break; 332 case 7: 333 PetscKernel_v_gets_A_times_w_7(x,idiag,b); 334 t[0] = b[0]; t[1] = b[1]; t[2] = b[2]; 335 t[3] = b[3]; t[4] = b[4]; t[5] = b[5]; t[6] = b[6]; 336 i2 = 7; 337 idiag += 49; 338 for (i=1; i<m; i++) { 339 v = aa + 49*ai[i]; 340 vi = aj + ai[i]; 341 nz = diag[i] - ai[i]; 342 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; 343 s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5]; s[6] = b[i2+6]; 344 while (nz--) { 345 idx = 7*(*vi++); 346 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 347 xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; xw[6] = x[6+idx]; 348 PetscKernel_v_gets_v_minus_A_times_w_7(s,v,xw); 349 v += 49; 350 } 351 t[i2] = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2]; 352 t[i2+3] = s[3]; t[i2+4] = s[4]; t[i2+5] = s[5]; t[i2+6] = s[6]; 353 PetscKernel_v_gets_A_times_w_7(xw,idiag,s); 354 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; 355 x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5]; x[i2+6] = xw[6]; 356 idiag += 49; 357 i2 += 7; 358 } 359 break; 360 default: 361 PetscKernel_w_gets_Ar_times_v(bs,bs,b,idiag,x); 362 ierr = PetscMemcpy(t,b,bs*sizeof(PetscScalar));CHKERRQ(ierr); 363 i2 = bs; 364 idiag += bs2; 365 for (i=1; i<m; i++) { 366 v = aa + bs2*ai[i]; 367 vi = aj + ai[i]; 368 nz = diag[i] - ai[i]; 369 370 ierr = PetscMemcpy(w,b+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr); 371 /* copy all rows of x that are needed into contiguous space */ 372 workt = work; 373 for (j=0; j<nz; j++) { 374 ierr = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr); 375 workt += bs; 376 } 377 PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work); 378 ierr = PetscMemcpy(t+i2,w,bs*sizeof(PetscScalar));CHKERRQ(ierr); 379 PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2); 380 381 idiag += bs2; 382 i2 += bs; 383 } 384 break; 385 } 386 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 387 ierr = PetscLogFlops(1.0*bs2*a->nz);CHKERRQ(ierr); 388 xb = t; 389 } 390 else xb = b; 391 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 392 idiag = a->idiag+bs2*(a->mbs-1); 393 i2 = bs * (m-1); 394 switch (bs) { 395 case 1: 396 s[0] = xb[i2]; 397 PetscKernel_v_gets_A_times_w_1(xw,idiag,s); 398 x[i2] = xw[0]; 399 i2 -= 1; 400 for (i=m-2; i>=0; i--) { 401 v = aa + (diag[i]+1); 402 vi = aj + diag[i] + 1; 403 nz = ai[i+1] - diag[i] - 1; 404 s[0] = xb[i2]; 405 for (j=0; j<nz; j++) { 406 xw[0] = x[vi[j]]; 407 PetscKernel_v_gets_v_minus_A_times_w_1(s,(v+j),xw); 408 } 409 PetscKernel_v_gets_A_times_w_1(xw,idiag,s); 410 x[i2] = xw[0]; 411 idiag -= 1; 412 i2 -= 1; 413 } 414 break; 415 case 2: 416 s[0] = xb[i2]; s[1] = xb[i2+1]; 417 PetscKernel_v_gets_A_times_w_2(xw,idiag,s); 418 x[i2] = xw[0]; x[i2+1] = xw[1]; 419 i2 -= 2; 420 idiag -= 4; 421 for (i=m-2; i>=0; i--) { 422 v = aa + 4*(diag[i] + 1); 423 vi = aj + diag[i] + 1; 424 nz = ai[i+1] - diag[i] - 1; 425 s[0] = xb[i2]; s[1] = xb[i2+1]; 426 for (j=0; j<nz; j++) { 427 idx = 2*vi[j]; 428 it = 4*j; 429 xw[0] = x[idx]; xw[1] = x[1+idx]; 430 PetscKernel_v_gets_v_minus_A_times_w_2(s,(v+it),xw); 431 } 432 PetscKernel_v_gets_A_times_w_2(xw,idiag,s); 433 x[i2] = xw[0]; x[i2+1] = xw[1]; 434 idiag -= 4; 435 i2 -= 2; 436 } 437 break; 438 case 3: 439 s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; 440 PetscKernel_v_gets_A_times_w_3(xw,idiag,s); 441 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; 442 i2 -= 3; 443 idiag -= 9; 444 for (i=m-2; i>=0; i--) { 445 v = aa + 9*(diag[i]+1); 446 vi = aj + diag[i] + 1; 447 nz = ai[i+1] - diag[i] - 1; 448 s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; 449 while (nz--) { 450 idx = 3*(*vi++); 451 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 452 PetscKernel_v_gets_v_minus_A_times_w_3(s,v,xw); 453 v += 9; 454 } 455 PetscKernel_v_gets_A_times_w_3(xw,idiag,s); 456 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; 457 idiag -= 9; 458 i2 -= 3; 459 } 460 break; 461 case 4: 462 s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; 463 PetscKernel_v_gets_A_times_w_4(xw,idiag,s); 464 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; 465 i2 -= 4; 466 idiag -= 16; 467 for (i=m-2; i>=0; i--) { 468 v = aa + 16*(diag[i]+1); 469 vi = aj + diag[i] + 1; 470 nz = ai[i+1] - diag[i] - 1; 471 s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; 472 while (nz--) { 473 idx = 4*(*vi++); 474 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; 475 PetscKernel_v_gets_v_minus_A_times_w_4(s,v,xw); 476 v += 16; 477 } 478 PetscKernel_v_gets_A_times_w_4(xw,idiag,s); 479 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; 480 idiag -= 16; 481 i2 -= 4; 482 } 483 break; 484 case 5: 485 s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; s[4] = xb[i2+4]; 486 PetscKernel_v_gets_A_times_w_5(xw,idiag,s); 487 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4]; 488 i2 -= 5; 489 idiag -= 25; 490 for (i=m-2; i>=0; i--) { 491 v = aa + 25*(diag[i]+1); 492 vi = aj + diag[i] + 1; 493 nz = ai[i+1] - diag[i] - 1; 494 s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; s[4] = xb[i2+4]; 495 while (nz--) { 496 idx = 5*(*vi++); 497 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; xw[4] = x[4+idx]; 498 PetscKernel_v_gets_v_minus_A_times_w_5(s,v,xw); 499 v += 25; 500 } 501 PetscKernel_v_gets_A_times_w_5(xw,idiag,s); 502 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4]; 503 idiag -= 25; 504 i2 -= 5; 505 } 506 break; 507 case 6: 508 s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; s[4] = xb[i2+4]; s[5] = xb[i2+5]; 509 PetscKernel_v_gets_A_times_w_6(xw,idiag,s); 510 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5]; 511 i2 -= 6; 512 idiag -= 36; 513 for (i=m-2; i>=0; i--) { 514 v = aa + 36*(diag[i]+1); 515 vi = aj + diag[i] + 1; 516 nz = ai[i+1] - diag[i] - 1; 517 s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; s[4] = xb[i2+4]; s[5] = xb[i2+5]; 518 while (nz--) { 519 idx = 6*(*vi++); 520 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 521 xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; 522 PetscKernel_v_gets_v_minus_A_times_w_6(s,v,xw); 523 v += 36; 524 } 525 PetscKernel_v_gets_A_times_w_6(xw,idiag,s); 526 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5]; 527 idiag -= 36; 528 i2 -= 6; 529 } 530 break; 531 case 7: 532 s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; 533 s[3] = xb[i2+3]; s[4] = xb[i2+4]; s[5] = xb[i2+5]; s[6] = xb[i2+6]; 534 PetscKernel_v_gets_A_times_w_7(x,idiag,b); 535 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; 536 x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5]; x[i2+6] = xw[6]; 537 i2 -= 7; 538 idiag -= 49; 539 for (i=m-2; i>=0; i--) { 540 v = aa + 49*(diag[i]+1); 541 vi = aj + diag[i] + 1; 542 nz = ai[i+1] - diag[i] - 1; 543 s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; 544 s[3] = xb[i2+3]; s[4] = xb[i2+4]; s[5] = xb[i2+5]; s[6] = xb[i2+6]; 545 while (nz--) { 546 idx = 7*(*vi++); 547 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 548 xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; xw[6] = x[6+idx]; 549 PetscKernel_v_gets_v_minus_A_times_w_7(s,v,xw); 550 v += 49; 551 } 552 PetscKernel_v_gets_A_times_w_7(xw,idiag,s); 553 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; 554 x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5]; x[i2+6] = xw[6]; 555 idiag -= 49; 556 i2 -= 7; 557 } 558 break; 559 default: 560 ierr = PetscMemcpy(w,xb+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr); 561 PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2); 562 i2 -= bs; 563 idiag -= bs2; 564 for (i=m-2; i>=0; i--) { 565 v = aa + bs2*(diag[i]+1); 566 vi = aj + diag[i] + 1; 567 nz = ai[i+1] - diag[i] - 1; 568 569 ierr = PetscMemcpy(w,xb+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr); 570 /* copy all rows of x that are needed into contiguous space */ 571 workt = work; 572 for (j=0; j<nz; j++) { 573 ierr = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr); 574 workt += bs; 575 } 576 PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work); 577 PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2); 578 579 idiag -= bs2; 580 i2 -= bs; 581 } 582 break; 583 } 584 ierr = PetscLogFlops(1.0*bs2*(a->nz));CHKERRQ(ierr); 585 } 586 its--; 587 } 588 while (its--) { 589 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) { 590 idiag = a->idiag; 591 i2 = 0; 592 switch (bs) { 593 case 1: 594 for (i=0; i<m; i++) { 595 v = aa + ai[i]; 596 vi = aj + ai[i]; 597 nz = ai[i+1] - ai[i]; 598 s[0] = b[i2]; 599 for (j=0; j<nz; j++) { 600 xw[0] = x[vi[j]]; 601 PetscKernel_v_gets_v_minus_A_times_w_1(s,(v+j),xw); 602 } 603 PetscKernel_v_gets_A_times_w_1(xw,idiag,s); 604 x[i2] += xw[0]; 605 idiag += 1; 606 i2 += 1; 607 } 608 break; 609 case 2: 610 for (i=0; i<m; i++) { 611 v = aa + 4*ai[i]; 612 vi = aj + ai[i]; 613 nz = ai[i+1] - ai[i]; 614 s[0] = b[i2]; s[1] = b[i2+1]; 615 for (j=0; j<nz; j++) { 616 idx = 2*vi[j]; 617 it = 4*j; 618 xw[0] = x[idx]; xw[1] = x[1+idx]; 619 PetscKernel_v_gets_v_minus_A_times_w_2(s,(v+it),xw); 620 } 621 PetscKernel_v_gets_A_times_w_2(xw,idiag,s); 622 x[i2] += xw[0]; x[i2+1] += xw[1]; 623 idiag += 4; 624 i2 += 2; 625 } 626 break; 627 case 3: 628 for (i=0; i<m; i++) { 629 v = aa + 9*ai[i]; 630 vi = aj + ai[i]; 631 nz = ai[i+1] - ai[i]; 632 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; 633 while (nz--) { 634 idx = 3*(*vi++); 635 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 636 PetscKernel_v_gets_v_minus_A_times_w_3(s,v,xw); 637 v += 9; 638 } 639 PetscKernel_v_gets_A_times_w_3(xw,idiag,s); 640 x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; 641 idiag += 9; 642 i2 += 3; 643 } 644 break; 645 case 4: 646 for (i=0; i<m; i++) { 647 v = aa + 16*ai[i]; 648 vi = aj + ai[i]; 649 nz = ai[i+1] - ai[i]; 650 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; 651 while (nz--) { 652 idx = 4*(*vi++); 653 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; 654 PetscKernel_v_gets_v_minus_A_times_w_4(s,v,xw); 655 v += 16; 656 } 657 PetscKernel_v_gets_A_times_w_4(xw,idiag,s); 658 x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; x[i2+3] += xw[3]; 659 idiag += 16; 660 i2 += 4; 661 } 662 break; 663 case 5: 664 for (i=0; i<m; i++) { 665 v = aa + 25*ai[i]; 666 vi = aj + ai[i]; 667 nz = ai[i+1] - ai[i]; 668 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4]; 669 while (nz--) { 670 idx = 5*(*vi++); 671 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; xw[4] = x[4+idx]; 672 PetscKernel_v_gets_v_minus_A_times_w_5(s,v,xw); 673 v += 25; 674 } 675 PetscKernel_v_gets_A_times_w_5(xw,idiag,s); 676 x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; x[i2+3] += xw[3]; x[i2+4] += xw[4]; 677 idiag += 25; 678 i2 += 5; 679 } 680 break; 681 case 6: 682 for (i=0; i<m; i++) { 683 v = aa + 36*ai[i]; 684 vi = aj + ai[i]; 685 nz = ai[i+1] - ai[i]; 686 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5]; 687 while (nz--) { 688 idx = 6*(*vi++); 689 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 690 xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; 691 PetscKernel_v_gets_v_minus_A_times_w_6(s,v,xw); 692 v += 36; 693 } 694 PetscKernel_v_gets_A_times_w_6(xw,idiag,s); 695 x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; 696 x[i2+3] += xw[3]; x[i2+4] += xw[4]; x[i2+5] += xw[5]; 697 idiag += 36; 698 i2 += 6; 699 } 700 break; 701 case 7: 702 for (i=0; i<m; i++) { 703 v = aa + 49*ai[i]; 704 vi = aj + ai[i]; 705 nz = ai[i+1] - ai[i]; 706 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; 707 s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5]; s[6] = b[i2+6]; 708 while (nz--) { 709 idx = 7*(*vi++); 710 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 711 xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; xw[6] = x[6+idx]; 712 PetscKernel_v_gets_v_minus_A_times_w_7(s,v,xw); 713 v += 49; 714 } 715 PetscKernel_v_gets_A_times_w_7(xw,idiag,s); 716 x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; 717 x[i2+3] += xw[3]; x[i2+4] += xw[4]; x[i2+5] += xw[5]; x[i2+6] += xw[6]; 718 idiag += 49; 719 i2 += 7; 720 } 721 break; 722 default: 723 for (i=0; i<m; i++) { 724 v = aa + bs2*ai[i]; 725 vi = aj + ai[i]; 726 nz = ai[i+1] - ai[i]; 727 728 ierr = PetscMemcpy(w,b+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr); 729 /* copy all rows of x that are needed into contiguous space */ 730 workt = work; 731 for (j=0; j<nz; j++) { 732 ierr = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr); 733 workt += bs; 734 } 735 PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work); 736 PetscKernel_w_gets_w_plus_Ar_times_v(bs,bs,w,idiag,x+i2); 737 738 idiag += bs2; 739 i2 += bs; 740 } 741 break; 742 } 743 ierr = PetscLogFlops(2.0*bs2*a->nz);CHKERRQ(ierr); 744 } 745 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 746 idiag = a->idiag+bs2*(a->mbs-1); 747 i2 = bs * (m-1); 748 switch (bs) { 749 case 1: 750 for (i=m-1; i>=0; i--) { 751 v = aa + ai[i]; 752 vi = aj + ai[i]; 753 nz = ai[i+1] - ai[i]; 754 s[0] = b[i2]; 755 for (j=0; j<nz; j++) { 756 xw[0] = x[vi[j]]; 757 PetscKernel_v_gets_v_minus_A_times_w_1(s,(v+j),xw); 758 } 759 PetscKernel_v_gets_A_times_w_1(xw,idiag,s); 760 x[i2] += xw[0]; 761 idiag -= 1; 762 i2 -= 1; 763 } 764 break; 765 case 2: 766 for (i=m-1; i>=0; i--) { 767 v = aa + 4*ai[i]; 768 vi = aj + ai[i]; 769 nz = ai[i+1] - ai[i]; 770 s[0] = b[i2]; s[1] = b[i2+1]; 771 for (j=0; j<nz; j++) { 772 idx = 2*vi[j]; 773 it = 4*j; 774 xw[0] = x[idx]; xw[1] = x[1+idx]; 775 PetscKernel_v_gets_v_minus_A_times_w_2(s,(v+it),xw); 776 } 777 PetscKernel_v_gets_A_times_w_2(xw,idiag,s); 778 x[i2] += xw[0]; x[i2+1] += xw[1]; 779 idiag -= 4; 780 i2 -= 2; 781 } 782 break; 783 case 3: 784 for (i=m-1; i>=0; i--) { 785 v = aa + 9*ai[i]; 786 vi = aj + ai[i]; 787 nz = ai[i+1] - ai[i]; 788 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; 789 while (nz--) { 790 idx = 3*(*vi++); 791 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 792 PetscKernel_v_gets_v_minus_A_times_w_3(s,v,xw); 793 v += 9; 794 } 795 PetscKernel_v_gets_A_times_w_3(xw,idiag,s); 796 x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; 797 idiag -= 9; 798 i2 -= 3; 799 } 800 break; 801 case 4: 802 for (i=m-1; i>=0; i--) { 803 v = aa + 16*ai[i]; 804 vi = aj + ai[i]; 805 nz = ai[i+1] - ai[i]; 806 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; 807 while (nz--) { 808 idx = 4*(*vi++); 809 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; 810 PetscKernel_v_gets_v_minus_A_times_w_4(s,v,xw); 811 v += 16; 812 } 813 PetscKernel_v_gets_A_times_w_4(xw,idiag,s); 814 x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; x[i2+3] += xw[3]; 815 idiag -= 16; 816 i2 -= 4; 817 } 818 break; 819 case 5: 820 for (i=m-1; i>=0; i--) { 821 v = aa + 25*ai[i]; 822 vi = aj + ai[i]; 823 nz = ai[i+1] - ai[i]; 824 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4]; 825 while (nz--) { 826 idx = 5*(*vi++); 827 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; xw[4] = x[4+idx]; 828 PetscKernel_v_gets_v_minus_A_times_w_5(s,v,xw); 829 v += 25; 830 } 831 PetscKernel_v_gets_A_times_w_5(xw,idiag,s); 832 x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; x[i2+3] += xw[3]; x[i2+4] += xw[4]; 833 idiag -= 25; 834 i2 -= 5; 835 } 836 break; 837 case 6: 838 for (i=m-1; i>=0; i--) { 839 v = aa + 36*ai[i]; 840 vi = aj + ai[i]; 841 nz = ai[i+1] - ai[i]; 842 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5]; 843 while (nz--) { 844 idx = 6*(*vi++); 845 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 846 xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; 847 PetscKernel_v_gets_v_minus_A_times_w_6(s,v,xw); 848 v += 36; 849 } 850 PetscKernel_v_gets_A_times_w_6(xw,idiag,s); 851 x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; 852 x[i2+3] += xw[3]; x[i2+4] += xw[4]; x[i2+5] += xw[5]; 853 idiag -= 36; 854 i2 -= 6; 855 } 856 break; 857 case 7: 858 for (i=m-1; i>=0; i--) { 859 v = aa + 49*ai[i]; 860 vi = aj + ai[i]; 861 nz = ai[i+1] - ai[i]; 862 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; 863 s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5]; s[6] = b[i2+6]; 864 while (nz--) { 865 idx = 7*(*vi++); 866 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 867 xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; xw[6] = x[6+idx]; 868 PetscKernel_v_gets_v_minus_A_times_w_7(s,v,xw); 869 v += 49; 870 } 871 PetscKernel_v_gets_A_times_w_7(xw,idiag,s); 872 x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; 873 x[i2+3] += xw[3]; x[i2+4] += xw[4]; x[i2+5] += xw[5]; x[i2+6] += xw[6]; 874 idiag -= 49; 875 i2 -= 7; 876 } 877 break; 878 default: 879 for (i=m-1; i>=0; i--) { 880 v = aa + bs2*ai[i]; 881 vi = aj + ai[i]; 882 nz = ai[i+1] - ai[i]; 883 884 ierr = PetscMemcpy(w,b+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr); 885 /* copy all rows of x that are needed into contiguous space */ 886 workt = work; 887 for (j=0; j<nz; j++) { 888 ierr = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr); 889 workt += bs; 890 } 891 PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work); 892 PetscKernel_w_gets_w_plus_Ar_times_v(bs,bs,w,idiag,x+i2); 893 894 idiag -= bs2; 895 i2 -= bs; 896 } 897 break; 898 } 899 ierr = PetscLogFlops(2.0*bs2*(a->nz));CHKERRQ(ierr); 900 } 901 } 902 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 903 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 904 PetscFunctionReturn(0); 905 } 906 907 908 /* 909 Special version for direct calls from Fortran (Used in PETSc-fun3d) 910 */ 911 #if defined(PETSC_HAVE_FORTRAN_CAPS) 912 #define matsetvaluesblocked4_ MATSETVALUESBLOCKED4 913 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 914 #define matsetvaluesblocked4_ matsetvaluesblocked4 915 #endif 916 917 #undef __FUNCT__ 918 #define __FUNCT__ "matsetvaluesblocked4_" 919 PETSC_EXTERN void matsetvaluesblocked4_(Mat *AA,PetscInt *mm,const PetscInt im[],PetscInt *nn,const PetscInt in[],const PetscScalar v[]) 920 { 921 Mat A = *AA; 922 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 923 PetscInt *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,N,m = *mm,n = *nn; 924 PetscInt *ai =a->i,*ailen=a->ilen; 925 PetscInt *aj =a->j,stepval,lastcol = -1; 926 const PetscScalar *value = v; 927 MatScalar *ap,*aa = a->a,*bap; 928 929 PetscFunctionBegin; 930 if (A->rmap->bs != 4) SETERRABORT(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Can only be called with a block size of 4"); 931 stepval = (n-1)*4; 932 for (k=0; k<m; k++) { /* loop over added rows */ 933 row = im[k]; 934 rp = aj + ai[row]; 935 ap = aa + 16*ai[row]; 936 nrow = ailen[row]; 937 low = 0; 938 high = nrow; 939 for (l=0; l<n; l++) { /* loop over added columns */ 940 col = in[l]; 941 if (col <= lastcol) low = 0; 942 else high = nrow; 943 lastcol = col; 944 value = v + k*(stepval+4 + l)*4; 945 while (high-low > 7) { 946 t = (low+high)/2; 947 if (rp[t] > col) high = t; 948 else low = t; 949 } 950 for (i=low; i<high; i++) { 951 if (rp[i] > col) break; 952 if (rp[i] == col) { 953 bap = ap + 16*i; 954 for (ii=0; ii<4; ii++,value+=stepval) { 955 for (jj=ii; jj<16; jj+=4) { 956 bap[jj] += *value++; 957 } 958 } 959 goto noinsert2; 960 } 961 } 962 N = nrow++ - 1; 963 high++; /* added new column index thus must search to one higher than before */ 964 /* shift up all the later entries in this row */ 965 for (ii=N; ii>=i; ii--) { 966 rp[ii+1] = rp[ii]; 967 PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar)); 968 } 969 if (N >= i) { 970 PetscMemzero(ap+16*i,16*sizeof(MatScalar)); 971 } 972 rp[i] = col; 973 bap = ap + 16*i; 974 for (ii=0; ii<4; ii++,value+=stepval) { 975 for (jj=ii; jj<16; jj+=4) { 976 bap[jj] = *value++; 977 } 978 } 979 noinsert2:; 980 low = i; 981 } 982 ailen[row] = nrow; 983 } 984 PetscFunctionReturnVoid(); 985 } 986 987 #if defined(PETSC_HAVE_FORTRAN_CAPS) 988 #define matsetvalues4_ MATSETVALUES4 989 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 990 #define matsetvalues4_ matsetvalues4 991 #endif 992 993 #undef __FUNCT__ 994 #define __FUNCT__ "MatSetValues4_" 995 PETSC_EXTERN void matsetvalues4_(Mat *AA,PetscInt *mm,PetscInt *im,PetscInt *nn,PetscInt *in,PetscScalar *v) 996 { 997 Mat A = *AA; 998 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 999 PetscInt *rp,k,low,high,t,ii,row,nrow,i,col,l,N,n = *nn,m = *mm; 1000 PetscInt *ai=a->i,*ailen=a->ilen; 1001 PetscInt *aj=a->j,brow,bcol; 1002 PetscInt ridx,cidx,lastcol = -1; 1003 MatScalar *ap,value,*aa=a->a,*bap; 1004 1005 PetscFunctionBegin; 1006 for (k=0; k<m; k++) { /* loop over added rows */ 1007 row = im[k]; brow = row/4; 1008 rp = aj + ai[brow]; 1009 ap = aa + 16*ai[brow]; 1010 nrow = ailen[brow]; 1011 low = 0; 1012 high = nrow; 1013 for (l=0; l<n; l++) { /* loop over added columns */ 1014 col = in[l]; bcol = col/4; 1015 ridx = row % 4; cidx = col % 4; 1016 value = v[l + k*n]; 1017 if (col <= lastcol) low = 0; 1018 else high = nrow; 1019 lastcol = col; 1020 while (high-low > 7) { 1021 t = (low+high)/2; 1022 if (rp[t] > bcol) high = t; 1023 else low = t; 1024 } 1025 for (i=low; i<high; i++) { 1026 if (rp[i] > bcol) break; 1027 if (rp[i] == bcol) { 1028 bap = ap + 16*i + 4*cidx + ridx; 1029 *bap += value; 1030 goto noinsert1; 1031 } 1032 } 1033 N = nrow++ - 1; 1034 high++; /* added new column thus must search to one higher than before */ 1035 /* shift up all the later entries in this row */ 1036 for (ii=N; ii>=i; ii--) { 1037 rp[ii+1] = rp[ii]; 1038 PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar)); 1039 } 1040 if (N>=i) { 1041 PetscMemzero(ap+16*i,16*sizeof(MatScalar)); 1042 } 1043 rp[i] = bcol; 1044 ap[16*i + 4*cidx + ridx] = value; 1045 noinsert1:; 1046 low = i; 1047 } 1048 ailen[brow] = nrow; 1049 } 1050 PetscFunctionReturnVoid(); 1051 } 1052 1053 /* 1054 Checks for missing diagonals 1055 */ 1056 #undef __FUNCT__ 1057 #define __FUNCT__ "MatMissingDiagonal_SeqBAIJ" 1058 PetscErrorCode MatMissingDiagonal_SeqBAIJ(Mat A,PetscBool *missing,PetscInt *d) 1059 { 1060 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1061 PetscErrorCode ierr; 1062 PetscInt *diag,*ii = a->i,i; 1063 1064 PetscFunctionBegin; 1065 ierr = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr); 1066 *missing = PETSC_FALSE; 1067 if (A->rmap->n > 0 && !ii) { 1068 *missing = PETSC_TRUE; 1069 if (d) *d = 0; 1070 PetscInfo(A,"Matrix has no entries therefore is missing diagonal\n"); 1071 } else { 1072 diag = a->diag; 1073 for (i=0; i<a->mbs; i++) { 1074 if (diag[i] >= ii[i+1]) { 1075 *missing = PETSC_TRUE; 1076 if (d) *d = i; 1077 PetscInfo1(A,"Matrix is missing block diagonal number %D\n",i); 1078 break; 1079 } 1080 } 1081 } 1082 PetscFunctionReturn(0); 1083 } 1084 1085 #undef __FUNCT__ 1086 #define __FUNCT__ "MatMarkDiagonal_SeqBAIJ" 1087 PetscErrorCode MatMarkDiagonal_SeqBAIJ(Mat A) 1088 { 1089 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1090 PetscErrorCode ierr; 1091 PetscInt i,j,m = a->mbs; 1092 1093 PetscFunctionBegin; 1094 if (!a->diag) { 1095 ierr = PetscMalloc1(m,&a->diag);CHKERRQ(ierr); 1096 ierr = PetscLogObjectMemory((PetscObject)A,m*sizeof(PetscInt));CHKERRQ(ierr); 1097 a->free_diag = PETSC_TRUE; 1098 } 1099 for (i=0; i<m; i++) { 1100 a->diag[i] = a->i[i+1]; 1101 for (j=a->i[i]; j<a->i[i+1]; j++) { 1102 if (a->j[j] == i) { 1103 a->diag[i] = j; 1104 break; 1105 } 1106 } 1107 } 1108 PetscFunctionReturn(0); 1109 } 1110 1111 1112 #undef __FUNCT__ 1113 #define __FUNCT__ "MatGetRowIJ_SeqBAIJ" 1114 static PetscErrorCode MatGetRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool blockcompressed,PetscInt *nn,const PetscInt *inia[],const PetscInt *inja[],PetscBool *done) 1115 { 1116 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1117 PetscErrorCode ierr; 1118 PetscInt i,j,n = a->mbs,nz = a->i[n],*tia,*tja,bs = A->rmap->bs,k,l,cnt; 1119 PetscInt **ia = (PetscInt**)inia,**ja = (PetscInt**)inja; 1120 1121 PetscFunctionBegin; 1122 *nn = n; 1123 if (!ia) PetscFunctionReturn(0); 1124 if (symmetric) { 1125 ierr = MatToSymmetricIJ_SeqAIJ(n,a->i,a->j,PETSC_TRUE,0,0,&tia,&tja);CHKERRQ(ierr); 1126 nz = tia[n]; 1127 } else { 1128 tia = a->i; tja = a->j; 1129 } 1130 1131 if (!blockcompressed && bs > 1) { 1132 (*nn) *= bs; 1133 /* malloc & create the natural set of indices */ 1134 ierr = PetscMalloc1((n+1)*bs,ia);CHKERRQ(ierr); 1135 if (n) { 1136 (*ia)[0] = oshift; 1137 for (j=1; j<bs; j++) { 1138 (*ia)[j] = (tia[1]-tia[0])*bs+(*ia)[j-1]; 1139 } 1140 } 1141 1142 for (i=1; i<n; i++) { 1143 (*ia)[i*bs] = (tia[i]-tia[i-1])*bs + (*ia)[i*bs-1]; 1144 for (j=1; j<bs; j++) { 1145 (*ia)[i*bs+j] = (tia[i+1]-tia[i])*bs + (*ia)[i*bs+j-1]; 1146 } 1147 } 1148 if (n) { 1149 (*ia)[n*bs] = (tia[n]-tia[n-1])*bs + (*ia)[n*bs-1]; 1150 } 1151 1152 if (inja) { 1153 ierr = PetscMalloc1(nz*bs*bs,ja);CHKERRQ(ierr); 1154 cnt = 0; 1155 for (i=0; i<n; i++) { 1156 for (j=0; j<bs; j++) { 1157 for (k=tia[i]; k<tia[i+1]; k++) { 1158 for (l=0; l<bs; l++) { 1159 (*ja)[cnt++] = bs*tja[k] + l; 1160 } 1161 } 1162 } 1163 } 1164 } 1165 1166 if (symmetric) { /* deallocate memory allocated in MatToSymmetricIJ_SeqAIJ() */ 1167 ierr = PetscFree(tia);CHKERRQ(ierr); 1168 ierr = PetscFree(tja);CHKERRQ(ierr); 1169 } 1170 } else if (oshift == 1) { 1171 if (symmetric) { 1172 nz = tia[A->rmap->n/bs]; 1173 /* add 1 to i and j indices */ 1174 for (i=0; i<A->rmap->n/bs+1; i++) tia[i] = tia[i] + 1; 1175 *ia = tia; 1176 if (ja) { 1177 for (i=0; i<nz; i++) tja[i] = tja[i] + 1; 1178 *ja = tja; 1179 } 1180 } else { 1181 nz = a->i[A->rmap->n/bs]; 1182 /* malloc space and add 1 to i and j indices */ 1183 ierr = PetscMalloc1(A->rmap->n/bs+1,ia);CHKERRQ(ierr); 1184 for (i=0; i<A->rmap->n/bs+1; i++) (*ia)[i] = a->i[i] + 1; 1185 if (ja) { 1186 ierr = PetscMalloc1(nz,ja);CHKERRQ(ierr); 1187 for (i=0; i<nz; i++) (*ja)[i] = a->j[i] + 1; 1188 } 1189 } 1190 } else { 1191 *ia = tia; 1192 if (ja) *ja = tja; 1193 } 1194 PetscFunctionReturn(0); 1195 } 1196 1197 #undef __FUNCT__ 1198 #define __FUNCT__ "MatRestoreRowIJ_SeqBAIJ" 1199 static PetscErrorCode MatRestoreRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool blockcompressed,PetscInt *nn,const PetscInt *ia[],const PetscInt *ja[],PetscBool *done) 1200 { 1201 PetscErrorCode ierr; 1202 1203 PetscFunctionBegin; 1204 if (!ia) PetscFunctionReturn(0); 1205 if ((!blockcompressed && A->rmap->bs > 1) || (symmetric || oshift == 1)) { 1206 ierr = PetscFree(*ia);CHKERRQ(ierr); 1207 if (ja) {ierr = PetscFree(*ja);CHKERRQ(ierr);} 1208 } 1209 PetscFunctionReturn(0); 1210 } 1211 1212 #undef __FUNCT__ 1213 #define __FUNCT__ "MatDestroy_SeqBAIJ" 1214 PetscErrorCode MatDestroy_SeqBAIJ(Mat A) 1215 { 1216 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1217 PetscErrorCode ierr; 1218 1219 PetscFunctionBegin; 1220 #if defined(PETSC_USE_LOG) 1221 PetscLogObjectState((PetscObject)A,"Rows=%D, Cols=%D, NZ=%D",A->rmap->N,A->cmap->n,a->nz); 1222 #endif 1223 ierr = MatSeqXAIJFreeAIJ(A,&a->a,&a->j,&a->i);CHKERRQ(ierr); 1224 ierr = ISDestroy(&a->row);CHKERRQ(ierr); 1225 ierr = ISDestroy(&a->col);CHKERRQ(ierr); 1226 if (a->free_diag) {ierr = PetscFree(a->diag);CHKERRQ(ierr);} 1227 ierr = PetscFree(a->idiag);CHKERRQ(ierr); 1228 if (a->free_imax_ilen) {ierr = PetscFree2(a->imax,a->ilen);CHKERRQ(ierr);} 1229 ierr = PetscFree(a->solve_work);CHKERRQ(ierr); 1230 ierr = PetscFree(a->mult_work);CHKERRQ(ierr); 1231 ierr = PetscFree(a->sor_workt);CHKERRQ(ierr); 1232 ierr = PetscFree(a->sor_work);CHKERRQ(ierr); 1233 ierr = ISDestroy(&a->icol);CHKERRQ(ierr); 1234 ierr = PetscFree(a->saved_values);CHKERRQ(ierr); 1235 ierr = PetscFree2(a->compressedrow.i,a->compressedrow.rindex);CHKERRQ(ierr); 1236 1237 ierr = MatDestroy(&a->sbaijMat);CHKERRQ(ierr); 1238 ierr = MatDestroy(&a->parent);CHKERRQ(ierr); 1239 ierr = PetscFree(A->data);CHKERRQ(ierr); 1240 1241 ierr = PetscObjectChangeTypeName((PetscObject)A,0);CHKERRQ(ierr); 1242 ierr = PetscObjectComposeFunction((PetscObject)A,"MatInvertBlockDiagonal_C",NULL);CHKERRQ(ierr); 1243 ierr = PetscObjectComposeFunction((PetscObject)A,"MatStoreValues_C",NULL);CHKERRQ(ierr); 1244 ierr = PetscObjectComposeFunction((PetscObject)A,"MatRetrieveValues_C",NULL);CHKERRQ(ierr); 1245 ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetColumnIndices_C",NULL);CHKERRQ(ierr); 1246 ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqaij_C",NULL);CHKERRQ(ierr); 1247 ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqsbaij_C",NULL);CHKERRQ(ierr); 1248 ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetPreallocation_C",NULL);CHKERRQ(ierr); 1249 ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetPreallocationCSR_C",NULL);CHKERRQ(ierr); 1250 ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqbstrm_C",NULL);CHKERRQ(ierr); 1251 ierr = PetscObjectComposeFunction((PetscObject)A,"MatIsTranspose_C",NULL);CHKERRQ(ierr); 1252 PetscFunctionReturn(0); 1253 } 1254 1255 #undef __FUNCT__ 1256 #define __FUNCT__ "MatSetOption_SeqBAIJ" 1257 PetscErrorCode MatSetOption_SeqBAIJ(Mat A,MatOption op,PetscBool flg) 1258 { 1259 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1260 PetscErrorCode ierr; 1261 1262 PetscFunctionBegin; 1263 switch (op) { 1264 case MAT_ROW_ORIENTED: 1265 a->roworiented = flg; 1266 break; 1267 case MAT_KEEP_NONZERO_PATTERN: 1268 a->keepnonzeropattern = flg; 1269 break; 1270 case MAT_NEW_NONZERO_LOCATIONS: 1271 a->nonew = (flg ? 0 : 1); 1272 break; 1273 case MAT_NEW_NONZERO_LOCATION_ERR: 1274 a->nonew = (flg ? -1 : 0); 1275 break; 1276 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1277 a->nonew = (flg ? -2 : 0); 1278 break; 1279 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1280 a->nounused = (flg ? -1 : 0); 1281 break; 1282 case MAT_NEW_DIAGONALS: 1283 case MAT_IGNORE_OFF_PROC_ENTRIES: 1284 case MAT_USE_HASH_TABLE: 1285 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1286 break; 1287 case MAT_SPD: 1288 case MAT_SYMMETRIC: 1289 case MAT_STRUCTURALLY_SYMMETRIC: 1290 case MAT_HERMITIAN: 1291 case MAT_SYMMETRY_ETERNAL: 1292 /* These options are handled directly by MatSetOption() */ 1293 break; 1294 default: 1295 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op); 1296 } 1297 PetscFunctionReturn(0); 1298 } 1299 1300 /* used for both SeqBAIJ and SeqSBAIJ matrices */ 1301 #undef __FUNCT__ 1302 #define __FUNCT__ "MatGetRow_SeqBAIJ_private" 1303 PetscErrorCode MatGetRow_SeqBAIJ_private(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v,PetscInt *ai,PetscInt *aj,PetscScalar *aa) 1304 { 1305 PetscErrorCode ierr; 1306 PetscInt itmp,i,j,k,M,bn,bp,*idx_i,bs,bs2; 1307 MatScalar *aa_i; 1308 PetscScalar *v_i; 1309 1310 PetscFunctionBegin; 1311 bs = A->rmap->bs; 1312 bs2 = bs*bs; 1313 if (row < 0 || row >= A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range", row); 1314 1315 bn = row/bs; /* Block number */ 1316 bp = row % bs; /* Block Position */ 1317 M = ai[bn+1] - ai[bn]; 1318 *nz = bs*M; 1319 1320 if (v) { 1321 *v = 0; 1322 if (*nz) { 1323 ierr = PetscMalloc1(*nz,v);CHKERRQ(ierr); 1324 for (i=0; i<M; i++) { /* for each block in the block row */ 1325 v_i = *v + i*bs; 1326 aa_i = aa + bs2*(ai[bn] + i); 1327 for (j=bp,k=0; j<bs2; j+=bs,k++) v_i[k] = aa_i[j]; 1328 } 1329 } 1330 } 1331 1332 if (idx) { 1333 *idx = 0; 1334 if (*nz) { 1335 ierr = PetscMalloc1(*nz,idx);CHKERRQ(ierr); 1336 for (i=0; i<M; i++) { /* for each block in the block row */ 1337 idx_i = *idx + i*bs; 1338 itmp = bs*aj[ai[bn] + i]; 1339 for (j=0; j<bs; j++) idx_i[j] = itmp++; 1340 } 1341 } 1342 } 1343 PetscFunctionReturn(0); 1344 } 1345 1346 #undef __FUNCT__ 1347 #define __FUNCT__ "MatGetRow_SeqBAIJ" 1348 PetscErrorCode MatGetRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1349 { 1350 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1351 PetscErrorCode ierr; 1352 1353 PetscFunctionBegin; 1354 ierr = MatGetRow_SeqBAIJ_private(A,row,nz,idx,v,a->i,a->j,a->a);CHKERRQ(ierr); 1355 PetscFunctionReturn(0); 1356 } 1357 1358 #undef __FUNCT__ 1359 #define __FUNCT__ "MatRestoreRow_SeqBAIJ" 1360 PetscErrorCode MatRestoreRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1361 { 1362 PetscErrorCode ierr; 1363 1364 PetscFunctionBegin; 1365 if (idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);} 1366 if (v) {ierr = PetscFree(*v);CHKERRQ(ierr);} 1367 PetscFunctionReturn(0); 1368 } 1369 1370 extern PetscErrorCode MatSetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode); 1371 1372 #undef __FUNCT__ 1373 #define __FUNCT__ "MatTranspose_SeqBAIJ" 1374 PetscErrorCode MatTranspose_SeqBAIJ(Mat A,MatReuse reuse,Mat *B) 1375 { 1376 Mat_SeqBAIJ *a=(Mat_SeqBAIJ*)A->data; 1377 Mat C; 1378 PetscErrorCode ierr; 1379 PetscInt i,j,k,*aj=a->j,*ai=a->i,bs=A->rmap->bs,mbs=a->mbs,nbs=a->nbs,len,*col; 1380 PetscInt *rows,*cols,bs2=a->bs2; 1381 MatScalar *array; 1382 1383 PetscFunctionBegin; 1384 if (reuse == MAT_REUSE_MATRIX && A == *B && mbs != nbs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Square matrix only for in-place"); 1385 if (reuse == MAT_INITIAL_MATRIX || A == *B) { 1386 ierr = PetscCalloc1(1+nbs,&col);CHKERRQ(ierr); 1387 1388 for (i=0; i<ai[mbs]; i++) col[aj[i]] += 1; 1389 ierr = MatCreate(PetscObjectComm((PetscObject)A),&C);CHKERRQ(ierr); 1390 ierr = MatSetSizes(C,A->cmap->n,A->rmap->N,A->cmap->n,A->rmap->N);CHKERRQ(ierr); 1391 ierr = MatSetType(C,((PetscObject)A)->type_name);CHKERRQ(ierr); 1392 ierr = MatSeqBAIJSetPreallocation(C,bs,0,col);CHKERRQ(ierr); 1393 ierr = PetscFree(col);CHKERRQ(ierr); 1394 } else { 1395 C = *B; 1396 } 1397 1398 array = a->a; 1399 ierr = PetscMalloc2(bs,&rows,bs,&cols);CHKERRQ(ierr); 1400 for (i=0; i<mbs; i++) { 1401 cols[0] = i*bs; 1402 for (k=1; k<bs; k++) cols[k] = cols[k-1] + 1; 1403 len = ai[i+1] - ai[i]; 1404 for (j=0; j<len; j++) { 1405 rows[0] = (*aj++)*bs; 1406 for (k=1; k<bs; k++) rows[k] = rows[k-1] + 1; 1407 ierr = MatSetValues_SeqBAIJ(C,bs,rows,bs,cols,array,INSERT_VALUES);CHKERRQ(ierr); 1408 array += bs2; 1409 } 1410 } 1411 ierr = PetscFree2(rows,cols);CHKERRQ(ierr); 1412 1413 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1414 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1415 1416 if (reuse == MAT_INITIAL_MATRIX || *B != A) { 1417 *B = C; 1418 } else { 1419 ierr = MatHeaderMerge(A,&C);CHKERRQ(ierr); 1420 } 1421 PetscFunctionReturn(0); 1422 } 1423 1424 #undef __FUNCT__ 1425 #define __FUNCT__ "MatIsTranspose_SeqBAIJ" 1426 PetscErrorCode MatIsTranspose_SeqBAIJ(Mat A,Mat B,PetscReal tol,PetscBool *f) 1427 { 1428 PetscErrorCode ierr; 1429 Mat Btrans; 1430 1431 PetscFunctionBegin; 1432 *f = PETSC_FALSE; 1433 ierr = MatTranspose_SeqBAIJ(A,MAT_INITIAL_MATRIX,&Btrans);CHKERRQ(ierr); 1434 ierr = MatEqual_SeqBAIJ(B,Btrans,f);CHKERRQ(ierr); 1435 ierr = MatDestroy(&Btrans);CHKERRQ(ierr); 1436 PetscFunctionReturn(0); 1437 } 1438 1439 #undef __FUNCT__ 1440 #define __FUNCT__ "MatView_SeqBAIJ_Binary" 1441 static PetscErrorCode MatView_SeqBAIJ_Binary(Mat A,PetscViewer viewer) 1442 { 1443 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1444 PetscErrorCode ierr; 1445 PetscInt i,*col_lens,bs = A->rmap->bs,count,*jj,j,k,l,bs2=a->bs2; 1446 int fd; 1447 PetscScalar *aa; 1448 FILE *file; 1449 1450 PetscFunctionBegin; 1451 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1452 ierr = PetscMalloc1(4+A->rmap->N,&col_lens);CHKERRQ(ierr); 1453 col_lens[0] = MAT_FILE_CLASSID; 1454 1455 col_lens[1] = A->rmap->N; 1456 col_lens[2] = A->cmap->n; 1457 col_lens[3] = a->nz*bs2; 1458 1459 /* store lengths of each row and write (including header) to file */ 1460 count = 0; 1461 for (i=0; i<a->mbs; i++) { 1462 for (j=0; j<bs; j++) { 1463 col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]); 1464 } 1465 } 1466 ierr = PetscBinaryWrite(fd,col_lens,4+A->rmap->N,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1467 ierr = PetscFree(col_lens);CHKERRQ(ierr); 1468 1469 /* store column indices (zero start index) */ 1470 ierr = PetscMalloc1((a->nz+1)*bs2,&jj);CHKERRQ(ierr); 1471 count = 0; 1472 for (i=0; i<a->mbs; i++) { 1473 for (j=0; j<bs; j++) { 1474 for (k=a->i[i]; k<a->i[i+1]; k++) { 1475 for (l=0; l<bs; l++) { 1476 jj[count++] = bs*a->j[k] + l; 1477 } 1478 } 1479 } 1480 } 1481 ierr = PetscBinaryWrite(fd,jj,bs2*a->nz,PETSC_INT,PETSC_FALSE);CHKERRQ(ierr); 1482 ierr = PetscFree(jj);CHKERRQ(ierr); 1483 1484 /* store nonzero values */ 1485 ierr = PetscMalloc1((a->nz+1)*bs2,&aa);CHKERRQ(ierr); 1486 count = 0; 1487 for (i=0; i<a->mbs; i++) { 1488 for (j=0; j<bs; j++) { 1489 for (k=a->i[i]; k<a->i[i+1]; k++) { 1490 for (l=0; l<bs; l++) { 1491 aa[count++] = a->a[bs2*k + l*bs + j]; 1492 } 1493 } 1494 } 1495 } 1496 ierr = PetscBinaryWrite(fd,aa,bs2*a->nz,PETSC_SCALAR,PETSC_FALSE);CHKERRQ(ierr); 1497 ierr = PetscFree(aa);CHKERRQ(ierr); 1498 1499 ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr); 1500 if (file) { 1501 fprintf(file,"-matload_block_size %d\n",(int)A->rmap->bs); 1502 } 1503 PetscFunctionReturn(0); 1504 } 1505 1506 #undef __FUNCT__ 1507 #define __FUNCT__ "MatView_SeqBAIJ_ASCII" 1508 static PetscErrorCode MatView_SeqBAIJ_ASCII(Mat A,PetscViewer viewer) 1509 { 1510 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1511 PetscErrorCode ierr; 1512 PetscInt i,j,bs = A->rmap->bs,k,l,bs2=a->bs2; 1513 PetscViewerFormat format; 1514 1515 PetscFunctionBegin; 1516 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1517 if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1518 ierr = PetscViewerASCIIPrintf(viewer," block size is %D\n",bs);CHKERRQ(ierr); 1519 } else if (format == PETSC_VIEWER_ASCII_MATLAB) { 1520 const char *matname; 1521 Mat aij; 1522 ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&aij);CHKERRQ(ierr); 1523 ierr = PetscObjectGetName((PetscObject)A,&matname);CHKERRQ(ierr); 1524 ierr = PetscObjectSetName((PetscObject)aij,matname);CHKERRQ(ierr); 1525 ierr = MatView(aij,viewer);CHKERRQ(ierr); 1526 ierr = MatDestroy(&aij);CHKERRQ(ierr); 1527 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1528 PetscFunctionReturn(0); 1529 } else if (format == PETSC_VIEWER_ASCII_COMMON) { 1530 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);CHKERRQ(ierr); 1531 for (i=0; i<a->mbs; i++) { 1532 for (j=0; j<bs; j++) { 1533 ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr); 1534 for (k=a->i[i]; k<a->i[i+1]; k++) { 1535 for (l=0; l<bs; l++) { 1536 #if defined(PETSC_USE_COMPLEX) 1537 if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 1538 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g + %gi) ",bs*a->j[k]+l, 1539 (double)PetscRealPart(a->a[bs2*k + l*bs + j]),(double)PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1540 } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 1541 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g - %gi) ",bs*a->j[k]+l, 1542 (double)PetscRealPart(a->a[bs2*k + l*bs + j]),-(double)PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1543 } else if (PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 1544 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,(double)PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1545 } 1546 #else 1547 if (a->a[bs2*k + l*bs + j] != 0.0) { 1548 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,(double)a->a[bs2*k + l*bs + j]);CHKERRQ(ierr); 1549 } 1550 #endif 1551 } 1552 } 1553 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 1554 } 1555 } 1556 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);CHKERRQ(ierr); 1557 } else { 1558 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);CHKERRQ(ierr); 1559 for (i=0; i<a->mbs; i++) { 1560 for (j=0; j<bs; j++) { 1561 ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr); 1562 for (k=a->i[i]; k<a->i[i+1]; k++) { 1563 for (l=0; l<bs; l++) { 1564 #if defined(PETSC_USE_COMPLEX) 1565 if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0) { 1566 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g + %g i) ",bs*a->j[k]+l, 1567 (double)PetscRealPart(a->a[bs2*k + l*bs + j]),(double)PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1568 } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0) { 1569 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g - %g i) ",bs*a->j[k]+l, 1570 (double)PetscRealPart(a->a[bs2*k + l*bs + j]),-(double)PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1571 } else { 1572 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,(double)PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1573 } 1574 #else 1575 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,(double)a->a[bs2*k + l*bs + j]);CHKERRQ(ierr); 1576 #endif 1577 } 1578 } 1579 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 1580 } 1581 } 1582 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);CHKERRQ(ierr); 1583 } 1584 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1585 PetscFunctionReturn(0); 1586 } 1587 1588 #include <petscdraw.h> 1589 #undef __FUNCT__ 1590 #define __FUNCT__ "MatView_SeqBAIJ_Draw_Zoom" 1591 static PetscErrorCode MatView_SeqBAIJ_Draw_Zoom(PetscDraw draw,void *Aa) 1592 { 1593 Mat A = (Mat) Aa; 1594 Mat_SeqBAIJ *a=(Mat_SeqBAIJ*)A->data; 1595 PetscErrorCode ierr; 1596 PetscInt row,i,j,k,l,mbs=a->mbs,color,bs=A->rmap->bs,bs2=a->bs2; 1597 PetscReal xl,yl,xr,yr,x_l,x_r,y_l,y_r; 1598 MatScalar *aa; 1599 PetscViewer viewer; 1600 PetscViewerFormat format; 1601 1602 PetscFunctionBegin; 1603 ierr = PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer);CHKERRQ(ierr); 1604 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1605 ierr = PetscDrawGetCoordinates(draw,&xl,&yl,&xr,&yr);CHKERRQ(ierr); 1606 1607 /* loop over matrix elements drawing boxes */ 1608 1609 if (format != PETSC_VIEWER_DRAW_CONTOUR) { 1610 ierr = PetscDrawCollectiveBegin(draw);CHKERRQ(ierr); 1611 /* Blue for negative, Cyan for zero and Red for positive */ 1612 color = PETSC_DRAW_BLUE; 1613 for (i=0,row=0; i<mbs; i++,row+=bs) { 1614 for (j=a->i[i]; j<a->i[i+1]; j++) { 1615 y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0; 1616 x_l = a->j[j]*bs; x_r = x_l + 1.0; 1617 aa = a->a + j*bs2; 1618 for (k=0; k<bs; k++) { 1619 for (l=0; l<bs; l++) { 1620 if (PetscRealPart(*aa++) >= 0.) continue; 1621 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 1622 } 1623 } 1624 } 1625 } 1626 color = PETSC_DRAW_CYAN; 1627 for (i=0,row=0; i<mbs; i++,row+=bs) { 1628 for (j=a->i[i]; j<a->i[i+1]; j++) { 1629 y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0; 1630 x_l = a->j[j]*bs; x_r = x_l + 1.0; 1631 aa = a->a + j*bs2; 1632 for (k=0; k<bs; k++) { 1633 for (l=0; l<bs; l++) { 1634 if (PetscRealPart(*aa++) != 0.) continue; 1635 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 1636 } 1637 } 1638 } 1639 } 1640 color = PETSC_DRAW_RED; 1641 for (i=0,row=0; i<mbs; i++,row+=bs) { 1642 for (j=a->i[i]; j<a->i[i+1]; j++) { 1643 y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0; 1644 x_l = a->j[j]*bs; x_r = x_l + 1.0; 1645 aa = a->a + j*bs2; 1646 for (k=0; k<bs; k++) { 1647 for (l=0; l<bs; l++) { 1648 if (PetscRealPart(*aa++) <= 0.) continue; 1649 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 1650 } 1651 } 1652 } 1653 } 1654 ierr = PetscDrawCollectiveEnd(draw);CHKERRQ(ierr); 1655 } else { 1656 /* use contour shading to indicate magnitude of values */ 1657 /* first determine max of all nonzero values */ 1658 PetscReal minv = 0.0, maxv = 0.0; 1659 PetscDraw popup; 1660 1661 for (i=0; i<a->nz*a->bs2; i++) { 1662 if (PetscAbsScalar(a->a[i]) > maxv) maxv = PetscAbsScalar(a->a[i]); 1663 } 1664 if (minv >= maxv) maxv = minv + PETSC_SMALL; 1665 ierr = PetscDrawGetPopup(draw,&popup);CHKERRQ(ierr); 1666 ierr = PetscDrawScalePopup(popup,0.0,maxv);CHKERRQ(ierr); 1667 1668 ierr = PetscDrawCollectiveBegin(draw);CHKERRQ(ierr); 1669 for (i=0,row=0; i<mbs; i++,row+=bs) { 1670 for (j=a->i[i]; j<a->i[i+1]; j++) { 1671 y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0; 1672 x_l = a->j[j]*bs; x_r = x_l + 1.0; 1673 aa = a->a + j*bs2; 1674 for (k=0; k<bs; k++) { 1675 for (l=0; l<bs; l++) { 1676 MatScalar v = *aa++; 1677 color = PetscDrawRealToColor(PetscAbsScalar(v),minv,maxv); 1678 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 1679 } 1680 } 1681 } 1682 } 1683 ierr = PetscDrawCollectiveEnd(draw);CHKERRQ(ierr); 1684 } 1685 PetscFunctionReturn(0); 1686 } 1687 1688 #undef __FUNCT__ 1689 #define __FUNCT__ "MatView_SeqBAIJ_Draw" 1690 static PetscErrorCode MatView_SeqBAIJ_Draw(Mat A,PetscViewer viewer) 1691 { 1692 PetscErrorCode ierr; 1693 PetscReal xl,yl,xr,yr,w,h; 1694 PetscDraw draw; 1695 PetscBool isnull; 1696 1697 PetscFunctionBegin; 1698 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 1699 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); 1700 if (isnull) PetscFunctionReturn(0); 1701 1702 xr = A->cmap->n; yr = A->rmap->N; h = yr/10.0; w = xr/10.0; 1703 xr += w; yr += h; xl = -w; yl = -h; 1704 ierr = PetscDrawSetCoordinates(draw,xl,yl,xr,yr);CHKERRQ(ierr); 1705 ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);CHKERRQ(ierr); 1706 ierr = PetscDrawZoom(draw,MatView_SeqBAIJ_Draw_Zoom,A);CHKERRQ(ierr); 1707 ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",NULL);CHKERRQ(ierr); 1708 ierr = PetscDrawSave(draw);CHKERRQ(ierr); 1709 PetscFunctionReturn(0); 1710 } 1711 1712 #undef __FUNCT__ 1713 #define __FUNCT__ "MatView_SeqBAIJ" 1714 PetscErrorCode MatView_SeqBAIJ(Mat A,PetscViewer viewer) 1715 { 1716 PetscErrorCode ierr; 1717 PetscBool iascii,isbinary,isdraw; 1718 1719 PetscFunctionBegin; 1720 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1721 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1722 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1723 if (iascii) { 1724 ierr = MatView_SeqBAIJ_ASCII(A,viewer);CHKERRQ(ierr); 1725 } else if (isbinary) { 1726 ierr = MatView_SeqBAIJ_Binary(A,viewer);CHKERRQ(ierr); 1727 } else if (isdraw) { 1728 ierr = MatView_SeqBAIJ_Draw(A,viewer);CHKERRQ(ierr); 1729 } else { 1730 Mat B; 1731 ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr); 1732 ierr = MatView(B,viewer);CHKERRQ(ierr); 1733 ierr = MatDestroy(&B);CHKERRQ(ierr); 1734 } 1735 PetscFunctionReturn(0); 1736 } 1737 1738 1739 #undef __FUNCT__ 1740 #define __FUNCT__ "MatGetValues_SeqBAIJ" 1741 PetscErrorCode MatGetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],PetscScalar v[]) 1742 { 1743 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1744 PetscInt *rp,k,low,high,t,row,nrow,i,col,l,*aj = a->j; 1745 PetscInt *ai = a->i,*ailen = a->ilen; 1746 PetscInt brow,bcol,ridx,cidx,bs=A->rmap->bs,bs2=a->bs2; 1747 MatScalar *ap,*aa = a->a; 1748 1749 PetscFunctionBegin; 1750 for (k=0; k<m; k++) { /* loop over rows */ 1751 row = im[k]; brow = row/bs; 1752 if (row < 0) {v += n; continue;} /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); */ 1753 if (row >= A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D too large", row); 1754 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; 1755 nrow = ailen[brow]; 1756 for (l=0; l<n; l++) { /* loop over columns */ 1757 if (in[l] < 0) {v++; continue;} /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); */ 1758 if (in[l] >= A->cmap->n) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column %D too large", in[l]); 1759 col = in[l]; 1760 bcol = col/bs; 1761 cidx = col%bs; 1762 ridx = row%bs; 1763 high = nrow; 1764 low = 0; /* assume unsorted */ 1765 while (high-low > 5) { 1766 t = (low+high)/2; 1767 if (rp[t] > bcol) high = t; 1768 else low = t; 1769 } 1770 for (i=low; i<high; i++) { 1771 if (rp[i] > bcol) break; 1772 if (rp[i] == bcol) { 1773 *v++ = ap[bs2*i+bs*cidx+ridx]; 1774 goto finished; 1775 } 1776 } 1777 *v++ = 0.0; 1778 finished:; 1779 } 1780 } 1781 PetscFunctionReturn(0); 1782 } 1783 1784 #undef __FUNCT__ 1785 #define __FUNCT__ "MatSetValuesBlocked_SeqBAIJ" 1786 PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is) 1787 { 1788 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1789 PetscInt *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,lastcol = -1; 1790 PetscInt *imax=a->imax,*ai=a->i,*ailen=a->ilen; 1791 PetscErrorCode ierr; 1792 PetscInt *aj =a->j,nonew=a->nonew,bs2=a->bs2,bs=A->rmap->bs,stepval; 1793 PetscBool roworiented=a->roworiented; 1794 const PetscScalar *value = v; 1795 MatScalar *ap,*aa = a->a,*bap; 1796 1797 PetscFunctionBegin; 1798 if (roworiented) { 1799 stepval = (n-1)*bs; 1800 } else { 1801 stepval = (m-1)*bs; 1802 } 1803 for (k=0; k<m; k++) { /* loop over added rows */ 1804 row = im[k]; 1805 if (row < 0) continue; 1806 #if defined(PETSC_USE_DEBUG) 1807 if (row >= a->mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Block row index too large %D max %D",row,a->mbs-1); 1808 #endif 1809 rp = aj + ai[row]; 1810 ap = aa + bs2*ai[row]; 1811 rmax = imax[row]; 1812 nrow = ailen[row]; 1813 low = 0; 1814 high = nrow; 1815 for (l=0; l<n; l++) { /* loop over added columns */ 1816 if (in[l] < 0) continue; 1817 #if defined(PETSC_USE_DEBUG) 1818 if (in[l] >= a->nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Block column index too large %D max %D",in[l],a->nbs-1); 1819 #endif 1820 col = in[l]; 1821 if (roworiented) { 1822 value = v + (k*(stepval+bs) + l)*bs; 1823 } else { 1824 value = v + (l*(stepval+bs) + k)*bs; 1825 } 1826 if (col <= lastcol) low = 0; 1827 else high = nrow; 1828 lastcol = col; 1829 while (high-low > 7) { 1830 t = (low+high)/2; 1831 if (rp[t] > col) high = t; 1832 else low = t; 1833 } 1834 for (i=low; i<high; i++) { 1835 if (rp[i] > col) break; 1836 if (rp[i] == col) { 1837 bap = ap + bs2*i; 1838 if (roworiented) { 1839 if (is == ADD_VALUES) { 1840 for (ii=0; ii<bs; ii++,value+=stepval) { 1841 for (jj=ii; jj<bs2; jj+=bs) { 1842 bap[jj] += *value++; 1843 } 1844 } 1845 } else { 1846 for (ii=0; ii<bs; ii++,value+=stepval) { 1847 for (jj=ii; jj<bs2; jj+=bs) { 1848 bap[jj] = *value++; 1849 } 1850 } 1851 } 1852 } else { 1853 if (is == ADD_VALUES) { 1854 for (ii=0; ii<bs; ii++,value+=bs+stepval) { 1855 for (jj=0; jj<bs; jj++) { 1856 bap[jj] += value[jj]; 1857 } 1858 bap += bs; 1859 } 1860 } else { 1861 for (ii=0; ii<bs; ii++,value+=bs+stepval) { 1862 for (jj=0; jj<bs; jj++) { 1863 bap[jj] = value[jj]; 1864 } 1865 bap += bs; 1866 } 1867 } 1868 } 1869 goto noinsert2; 1870 } 1871 } 1872 if (nonew == 1) goto noinsert2; 1873 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new blocked index new nonzero block (%D, %D) in the matrix", row, col); 1874 MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,row,col,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar); 1875 N = nrow++ - 1; high++; 1876 /* shift up all the later entries in this row */ 1877 for (ii=N; ii>=i; ii--) { 1878 rp[ii+1] = rp[ii]; 1879 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); 1880 } 1881 if (N >= i) { 1882 ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr); 1883 } 1884 rp[i] = col; 1885 bap = ap + bs2*i; 1886 if (roworiented) { 1887 for (ii=0; ii<bs; ii++,value+=stepval) { 1888 for (jj=ii; jj<bs2; jj+=bs) { 1889 bap[jj] = *value++; 1890 } 1891 } 1892 } else { 1893 for (ii=0; ii<bs; ii++,value+=stepval) { 1894 for (jj=0; jj<bs; jj++) { 1895 *bap++ = *value++; 1896 } 1897 } 1898 } 1899 noinsert2:; 1900 low = i; 1901 } 1902 ailen[row] = nrow; 1903 } 1904 PetscFunctionReturn(0); 1905 } 1906 1907 #undef __FUNCT__ 1908 #define __FUNCT__ "MatAssemblyEnd_SeqBAIJ" 1909 PetscErrorCode MatAssemblyEnd_SeqBAIJ(Mat A,MatAssemblyType mode) 1910 { 1911 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1912 PetscInt fshift = 0,i,j,*ai = a->i,*aj = a->j,*imax = a->imax; 1913 PetscInt m = A->rmap->N,*ip,N,*ailen = a->ilen; 1914 PetscErrorCode ierr; 1915 PetscInt mbs = a->mbs,bs2 = a->bs2,rmax = 0; 1916 MatScalar *aa = a->a,*ap; 1917 PetscReal ratio=0.6; 1918 1919 PetscFunctionBegin; 1920 if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0); 1921 1922 if (m) rmax = ailen[0]; 1923 for (i=1; i<mbs; i++) { 1924 /* move each row back by the amount of empty slots (fshift) before it*/ 1925 fshift += imax[i-1] - ailen[i-1]; 1926 rmax = PetscMax(rmax,ailen[i]); 1927 if (fshift) { 1928 ip = aj + ai[i]; ap = aa + bs2*ai[i]; 1929 N = ailen[i]; 1930 for (j=0; j<N; j++) { 1931 ip[j-fshift] = ip[j]; 1932 1933 ierr = PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr); 1934 } 1935 } 1936 ai[i] = ai[i-1] + ailen[i-1]; 1937 } 1938 if (mbs) { 1939 fshift += imax[mbs-1] - ailen[mbs-1]; 1940 ai[mbs] = ai[mbs-1] + ailen[mbs-1]; 1941 } 1942 1943 /* reset ilen and imax for each row */ 1944 a->nonzerorowcnt = 0; 1945 for (i=0; i<mbs; i++) { 1946 ailen[i] = imax[i] = ai[i+1] - ai[i]; 1947 a->nonzerorowcnt += ((ai[i+1] - ai[i]) > 0); 1948 } 1949 a->nz = ai[mbs]; 1950 1951 /* diagonals may have moved, so kill the diagonal pointers */ 1952 a->idiagvalid = PETSC_FALSE; 1953 if (fshift && a->diag) { 1954 ierr = PetscFree(a->diag);CHKERRQ(ierr); 1955 ierr = PetscLogObjectMemory((PetscObject)A,-(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 1956 a->diag = 0; 1957 } 1958 if (fshift && a->nounused == -1) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB, "Unused space detected in matrix: %D X %D block size %D, %D unneeded", m, A->cmap->n, A->rmap->bs, fshift*bs2); 1959 ierr = PetscInfo5(A,"Matrix size: %D X %D, block size %D; storage space: %D unneeded, %D used\n",m,A->cmap->n,A->rmap->bs,fshift*bs2,a->nz*bs2);CHKERRQ(ierr); 1960 ierr = PetscInfo1(A,"Number of mallocs during MatSetValues is %D\n",a->reallocs);CHKERRQ(ierr); 1961 ierr = PetscInfo1(A,"Most nonzeros blocks in any row is %D\n",rmax);CHKERRQ(ierr); 1962 1963 A->info.mallocs += a->reallocs; 1964 a->reallocs = 0; 1965 A->info.nz_unneeded = (PetscReal)fshift*bs2; 1966 a->rmax = rmax; 1967 1968 ierr = MatCheckCompressedRow(A,a->nonzerorowcnt,&a->compressedrow,a->i,mbs,ratio);CHKERRQ(ierr); 1969 PetscFunctionReturn(0); 1970 } 1971 1972 /* 1973 This function returns an array of flags which indicate the locations of contiguous 1974 blocks that should be zeroed. for eg: if bs = 3 and is = [0,1,2,3,5,6,7,8,9] 1975 then the resulting sizes = [3,1,1,3,1] correspondig to sets [(0,1,2),(3),(5),(6,7,8),(9)] 1976 Assume: sizes should be long enough to hold all the values. 1977 */ 1978 #undef __FUNCT__ 1979 #define __FUNCT__ "MatZeroRows_SeqBAIJ_Check_Blocks" 1980 static PetscErrorCode MatZeroRows_SeqBAIJ_Check_Blocks(PetscInt idx[],PetscInt n,PetscInt bs,PetscInt sizes[], PetscInt *bs_max) 1981 { 1982 PetscInt i,j,k,row; 1983 PetscBool flg; 1984 1985 PetscFunctionBegin; 1986 for (i=0,j=0; i<n; j++) { 1987 row = idx[i]; 1988 if (row%bs!=0) { /* Not the begining of a block */ 1989 sizes[j] = 1; 1990 i++; 1991 } else if (i+bs > n) { /* complete block doesn't exist (at idx end) */ 1992 sizes[j] = 1; /* Also makes sure atleast 'bs' values exist for next else */ 1993 i++; 1994 } else { /* Begining of the block, so check if the complete block exists */ 1995 flg = PETSC_TRUE; 1996 for (k=1; k<bs; k++) { 1997 if (row+k != idx[i+k]) { /* break in the block */ 1998 flg = PETSC_FALSE; 1999 break; 2000 } 2001 } 2002 if (flg) { /* No break in the bs */ 2003 sizes[j] = bs; 2004 i += bs; 2005 } else { 2006 sizes[j] = 1; 2007 i++; 2008 } 2009 } 2010 } 2011 *bs_max = j; 2012 PetscFunctionReturn(0); 2013 } 2014 2015 #undef __FUNCT__ 2016 #define __FUNCT__ "MatZeroRows_SeqBAIJ" 2017 PetscErrorCode MatZeroRows_SeqBAIJ(Mat A,PetscInt is_n,const PetscInt is_idx[],PetscScalar diag,Vec x, Vec b) 2018 { 2019 Mat_SeqBAIJ *baij=(Mat_SeqBAIJ*)A->data; 2020 PetscErrorCode ierr; 2021 PetscInt i,j,k,count,*rows; 2022 PetscInt bs=A->rmap->bs,bs2=baij->bs2,*sizes,row,bs_max; 2023 PetscScalar zero = 0.0; 2024 MatScalar *aa; 2025 const PetscScalar *xx; 2026 PetscScalar *bb; 2027 2028 PetscFunctionBegin; 2029 /* fix right hand side if needed */ 2030 if (x && b) { 2031 ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr); 2032 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 2033 for (i=0; i<is_n; i++) { 2034 bb[is_idx[i]] = diag*xx[is_idx[i]]; 2035 } 2036 ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr); 2037 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 2038 } 2039 2040 /* Make a copy of the IS and sort it */ 2041 /* allocate memory for rows,sizes */ 2042 ierr = PetscMalloc2(is_n,&rows,2*is_n,&sizes);CHKERRQ(ierr); 2043 2044 /* copy IS values to rows, and sort them */ 2045 for (i=0; i<is_n; i++) rows[i] = is_idx[i]; 2046 ierr = PetscSortInt(is_n,rows);CHKERRQ(ierr); 2047 2048 if (baij->keepnonzeropattern) { 2049 for (i=0; i<is_n; i++) sizes[i] = 1; 2050 bs_max = is_n; 2051 } else { 2052 ierr = MatZeroRows_SeqBAIJ_Check_Blocks(rows,is_n,bs,sizes,&bs_max);CHKERRQ(ierr); 2053 A->nonzerostate++; 2054 } 2055 2056 for (i=0,j=0; i<bs_max; j+=sizes[i],i++) { 2057 row = rows[j]; 2058 if (row < 0 || row > A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"row %D out of range",row); 2059 count = (baij->i[row/bs +1] - baij->i[row/bs])*bs; 2060 aa = ((MatScalar*)(baij->a)) + baij->i[row/bs]*bs2 + (row%bs); 2061 if (sizes[i] == bs && !baij->keepnonzeropattern) { 2062 if (diag != (PetscScalar)0.0) { 2063 if (baij->ilen[row/bs] > 0) { 2064 baij->ilen[row/bs] = 1; 2065 baij->j[baij->i[row/bs]] = row/bs; 2066 2067 ierr = PetscMemzero(aa,count*bs*sizeof(MatScalar));CHKERRQ(ierr); 2068 } 2069 /* Now insert all the diagonal values for this bs */ 2070 for (k=0; k<bs; k++) { 2071 ierr = (*A->ops->setvalues)(A,1,rows+j+k,1,rows+j+k,&diag,INSERT_VALUES);CHKERRQ(ierr); 2072 } 2073 } else { /* (diag == 0.0) */ 2074 baij->ilen[row/bs] = 0; 2075 } /* end (diag == 0.0) */ 2076 } else { /* (sizes[i] != bs) */ 2077 #if defined(PETSC_USE_DEBUG) 2078 if (sizes[i] != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal Error. Value should be 1"); 2079 #endif 2080 for (k=0; k<count; k++) { 2081 aa[0] = zero; 2082 aa += bs; 2083 } 2084 if (diag != (PetscScalar)0.0) { 2085 ierr = (*A->ops->setvalues)(A,1,rows+j,1,rows+j,&diag,INSERT_VALUES);CHKERRQ(ierr); 2086 } 2087 } 2088 } 2089 2090 ierr = PetscFree2(rows,sizes);CHKERRQ(ierr); 2091 ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2092 PetscFunctionReturn(0); 2093 } 2094 2095 #undef __FUNCT__ 2096 #define __FUNCT__ "MatZeroRowsColumns_SeqBAIJ" 2097 PetscErrorCode MatZeroRowsColumns_SeqBAIJ(Mat A,PetscInt is_n,const PetscInt is_idx[],PetscScalar diag,Vec x, Vec b) 2098 { 2099 Mat_SeqBAIJ *baij=(Mat_SeqBAIJ*)A->data; 2100 PetscErrorCode ierr; 2101 PetscInt i,j,k,count; 2102 PetscInt bs =A->rmap->bs,bs2=baij->bs2,row,col; 2103 PetscScalar zero = 0.0; 2104 MatScalar *aa; 2105 const PetscScalar *xx; 2106 PetscScalar *bb; 2107 PetscBool *zeroed,vecs = PETSC_FALSE; 2108 2109 PetscFunctionBegin; 2110 /* fix right hand side if needed */ 2111 if (x && b) { 2112 ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr); 2113 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 2114 vecs = PETSC_TRUE; 2115 } 2116 2117 /* zero the columns */ 2118 ierr = PetscCalloc1(A->rmap->n,&zeroed);CHKERRQ(ierr); 2119 for (i=0; i<is_n; i++) { 2120 if (is_idx[i] < 0 || is_idx[i] >= A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"row %D out of range",is_idx[i]); 2121 zeroed[is_idx[i]] = PETSC_TRUE; 2122 } 2123 for (i=0; i<A->rmap->N; i++) { 2124 if (!zeroed[i]) { 2125 row = i/bs; 2126 for (j=baij->i[row]; j<baij->i[row+1]; j++) { 2127 for (k=0; k<bs; k++) { 2128 col = bs*baij->j[j] + k; 2129 if (zeroed[col]) { 2130 aa = ((MatScalar*)(baij->a)) + j*bs2 + (i%bs) + bs*k; 2131 if (vecs) bb[i] -= aa[0]*xx[col]; 2132 aa[0] = 0.0; 2133 } 2134 } 2135 } 2136 } else if (vecs) bb[i] = diag*xx[i]; 2137 } 2138 ierr = PetscFree(zeroed);CHKERRQ(ierr); 2139 if (vecs) { 2140 ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr); 2141 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 2142 } 2143 2144 /* zero the rows */ 2145 for (i=0; i<is_n; i++) { 2146 row = is_idx[i]; 2147 count = (baij->i[row/bs +1] - baij->i[row/bs])*bs; 2148 aa = ((MatScalar*)(baij->a)) + baij->i[row/bs]*bs2 + (row%bs); 2149 for (k=0; k<count; k++) { 2150 aa[0] = zero; 2151 aa += bs; 2152 } 2153 if (diag != (PetscScalar)0.0) { 2154 ierr = (*A->ops->setvalues)(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr); 2155 } 2156 } 2157 ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2158 PetscFunctionReturn(0); 2159 } 2160 2161 #undef __FUNCT__ 2162 #define __FUNCT__ "MatSetValues_SeqBAIJ" 2163 PetscErrorCode MatSetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is) 2164 { 2165 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2166 PetscInt *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,lastcol = -1; 2167 PetscInt *imax=a->imax,*ai=a->i,*ailen=a->ilen; 2168 PetscInt *aj =a->j,nonew=a->nonew,bs=A->rmap->bs,brow,bcol; 2169 PetscErrorCode ierr; 2170 PetscInt ridx,cidx,bs2=a->bs2; 2171 PetscBool roworiented=a->roworiented; 2172 MatScalar *ap,value,*aa=a->a,*bap; 2173 2174 PetscFunctionBegin; 2175 for (k=0; k<m; k++) { /* loop over added rows */ 2176 row = im[k]; 2177 brow = row/bs; 2178 if (row < 0) continue; 2179 #if defined(PETSC_USE_DEBUG) 2180 if (row >= A->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,A->rmap->N-1); 2181 #endif 2182 rp = aj + ai[brow]; 2183 ap = aa + bs2*ai[brow]; 2184 rmax = imax[brow]; 2185 nrow = ailen[brow]; 2186 low = 0; 2187 high = nrow; 2188 for (l=0; l<n; l++) { /* loop over added columns */ 2189 if (in[l] < 0) continue; 2190 #if defined(PETSC_USE_DEBUG) 2191 if (in[l] >= A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],A->cmap->n-1); 2192 #endif 2193 col = in[l]; bcol = col/bs; 2194 ridx = row % bs; cidx = col % bs; 2195 if (roworiented) { 2196 value = v[l + k*n]; 2197 } else { 2198 value = v[k + l*m]; 2199 } 2200 if (col <= lastcol) low = 0; else high = nrow; 2201 lastcol = col; 2202 while (high-low > 7) { 2203 t = (low+high)/2; 2204 if (rp[t] > bcol) high = t; 2205 else low = t; 2206 } 2207 for (i=low; i<high; i++) { 2208 if (rp[i] > bcol) break; 2209 if (rp[i] == bcol) { 2210 bap = ap + bs2*i + bs*cidx + ridx; 2211 if (is == ADD_VALUES) *bap += value; 2212 else *bap = value; 2213 goto noinsert1; 2214 } 2215 } 2216 if (nonew == 1) goto noinsert1; 2217 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col); 2218 MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar); 2219 N = nrow++ - 1; high++; 2220 /* shift up all the later entries in this row */ 2221 for (ii=N; ii>=i; ii--) { 2222 rp[ii+1] = rp[ii]; 2223 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); 2224 } 2225 if (N>=i) { 2226 ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr); 2227 } 2228 rp[i] = bcol; 2229 ap[bs2*i + bs*cidx + ridx] = value; 2230 a->nz++; 2231 A->nonzerostate++; 2232 noinsert1:; 2233 low = i; 2234 } 2235 ailen[brow] = nrow; 2236 } 2237 PetscFunctionReturn(0); 2238 } 2239 2240 #undef __FUNCT__ 2241 #define __FUNCT__ "MatILUFactor_SeqBAIJ" 2242 PetscErrorCode MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,const MatFactorInfo *info) 2243 { 2244 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)inA->data; 2245 Mat outA; 2246 PetscErrorCode ierr; 2247 PetscBool row_identity,col_identity; 2248 2249 PetscFunctionBegin; 2250 if (info->levels != 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only levels = 0 supported for in-place ILU"); 2251 ierr = ISIdentity(row,&row_identity);CHKERRQ(ierr); 2252 ierr = ISIdentity(col,&col_identity);CHKERRQ(ierr); 2253 if (!row_identity || !col_identity) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for in-place ILU"); 2254 2255 outA = inA; 2256 inA->factortype = MAT_FACTOR_LU; 2257 ierr = PetscFree(inA->solvertype);CHKERRQ(ierr); 2258 ierr = PetscStrallocpy(MATSOLVERPETSC,&inA->solvertype);CHKERRQ(ierr); 2259 2260 ierr = MatMarkDiagonal_SeqBAIJ(inA);CHKERRQ(ierr); 2261 2262 ierr = PetscObjectReference((PetscObject)row);CHKERRQ(ierr); 2263 ierr = ISDestroy(&a->row);CHKERRQ(ierr); 2264 a->row = row; 2265 ierr = PetscObjectReference((PetscObject)col);CHKERRQ(ierr); 2266 ierr = ISDestroy(&a->col);CHKERRQ(ierr); 2267 a->col = col; 2268 2269 /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */ 2270 ierr = ISDestroy(&a->icol);CHKERRQ(ierr); 2271 ierr = ISInvertPermutation(col,PETSC_DECIDE,&a->icol);CHKERRQ(ierr); 2272 ierr = PetscLogObjectParent((PetscObject)inA,(PetscObject)a->icol);CHKERRQ(ierr); 2273 2274 ierr = MatSeqBAIJSetNumericFactorization_inplace(inA,(PetscBool)(row_identity && col_identity));CHKERRQ(ierr); 2275 if (!a->solve_work) { 2276 ierr = PetscMalloc1(inA->rmap->N+inA->rmap->bs,&a->solve_work);CHKERRQ(ierr); 2277 ierr = PetscLogObjectMemory((PetscObject)inA,(inA->rmap->N+inA->rmap->bs)*sizeof(PetscScalar));CHKERRQ(ierr); 2278 } 2279 ierr = MatLUFactorNumeric(outA,inA,info);CHKERRQ(ierr); 2280 PetscFunctionReturn(0); 2281 } 2282 2283 #undef __FUNCT__ 2284 #define __FUNCT__ "MatSeqBAIJSetColumnIndices_SeqBAIJ" 2285 PetscErrorCode MatSeqBAIJSetColumnIndices_SeqBAIJ(Mat mat,PetscInt *indices) 2286 { 2287 Mat_SeqBAIJ *baij = (Mat_SeqBAIJ*)mat->data; 2288 PetscInt i,nz,mbs; 2289 2290 PetscFunctionBegin; 2291 nz = baij->maxnz; 2292 mbs = baij->mbs; 2293 for (i=0; i<nz; i++) { 2294 baij->j[i] = indices[i]; 2295 } 2296 baij->nz = nz; 2297 for (i=0; i<mbs; i++) { 2298 baij->ilen[i] = baij->imax[i]; 2299 } 2300 PetscFunctionReturn(0); 2301 } 2302 2303 #undef __FUNCT__ 2304 #define __FUNCT__ "MatSeqBAIJSetColumnIndices" 2305 /*@ 2306 MatSeqBAIJSetColumnIndices - Set the column indices for all the rows 2307 in the matrix. 2308 2309 Input Parameters: 2310 + mat - the SeqBAIJ matrix 2311 - indices - the column indices 2312 2313 Level: advanced 2314 2315 Notes: 2316 This can be called if you have precomputed the nonzero structure of the 2317 matrix and want to provide it to the matrix object to improve the performance 2318 of the MatSetValues() operation. 2319 2320 You MUST have set the correct numbers of nonzeros per row in the call to 2321 MatCreateSeqBAIJ(), and the columns indices MUST be sorted. 2322 2323 MUST be called before any calls to MatSetValues(); 2324 2325 @*/ 2326 PetscErrorCode MatSeqBAIJSetColumnIndices(Mat mat,PetscInt *indices) 2327 { 2328 PetscErrorCode ierr; 2329 2330 PetscFunctionBegin; 2331 PetscValidHeaderSpecific(mat,MAT_CLASSID,1); 2332 PetscValidPointer(indices,2); 2333 ierr = PetscUseMethod(mat,"MatSeqBAIJSetColumnIndices_C",(Mat,PetscInt*),(mat,indices));CHKERRQ(ierr); 2334 PetscFunctionReturn(0); 2335 } 2336 2337 #undef __FUNCT__ 2338 #define __FUNCT__ "MatGetRowMaxAbs_SeqBAIJ" 2339 PetscErrorCode MatGetRowMaxAbs_SeqBAIJ(Mat A,Vec v,PetscInt idx[]) 2340 { 2341 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2342 PetscErrorCode ierr; 2343 PetscInt i,j,n,row,bs,*ai,*aj,mbs; 2344 PetscReal atmp; 2345 PetscScalar *x,zero = 0.0; 2346 MatScalar *aa; 2347 PetscInt ncols,brow,krow,kcol; 2348 2349 PetscFunctionBegin; 2350 if (A->factortype) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 2351 bs = A->rmap->bs; 2352 aa = a->a; 2353 ai = a->i; 2354 aj = a->j; 2355 mbs = a->mbs; 2356 2357 ierr = VecSet(v,zero);CHKERRQ(ierr); 2358 ierr = VecGetArray(v,&x);CHKERRQ(ierr); 2359 ierr = VecGetLocalSize(v,&n);CHKERRQ(ierr); 2360 if (n != A->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Nonconforming matrix and vector"); 2361 for (i=0; i<mbs; i++) { 2362 ncols = ai[1] - ai[0]; ai++; 2363 brow = bs*i; 2364 for (j=0; j<ncols; j++) { 2365 for (kcol=0; kcol<bs; kcol++) { 2366 for (krow=0; krow<bs; krow++) { 2367 atmp = PetscAbsScalar(*aa);aa++; 2368 row = brow + krow; /* row index */ 2369 if (PetscAbsScalar(x[row]) < atmp) {x[row] = atmp; if (idx) idx[row] = bs*(*aj) + kcol;} 2370 } 2371 } 2372 aj++; 2373 } 2374 } 2375 ierr = VecRestoreArray(v,&x);CHKERRQ(ierr); 2376 PetscFunctionReturn(0); 2377 } 2378 2379 #undef __FUNCT__ 2380 #define __FUNCT__ "MatCopy_SeqBAIJ" 2381 PetscErrorCode MatCopy_SeqBAIJ(Mat A,Mat B,MatStructure str) 2382 { 2383 PetscErrorCode ierr; 2384 2385 PetscFunctionBegin; 2386 /* If the two matrices have the same copy implementation, use fast copy. */ 2387 if (str == SAME_NONZERO_PATTERN && (A->ops->copy == B->ops->copy)) { 2388 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2389 Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)B->data; 2390 PetscInt ambs=a->mbs,bmbs=b->mbs,abs=A->rmap->bs,bbs=B->rmap->bs,bs2=abs*abs; 2391 2392 if (a->i[ambs] != b->i[bmbs]) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Number of nonzero blocks in matrices A %D and B %D are different",a->i[ambs],b->i[bmbs]); 2393 if (abs != bbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Block size A %D and B %D are different",abs,bbs); 2394 ierr = PetscMemcpy(b->a,a->a,(bs2*a->i[ambs])*sizeof(PetscScalar));CHKERRQ(ierr); 2395 } else { 2396 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 2397 } 2398 PetscFunctionReturn(0); 2399 } 2400 2401 #undef __FUNCT__ 2402 #define __FUNCT__ "MatSetUp_SeqBAIJ" 2403 PetscErrorCode MatSetUp_SeqBAIJ(Mat A) 2404 { 2405 PetscErrorCode ierr; 2406 2407 PetscFunctionBegin; 2408 ierr = MatSeqBAIJSetPreallocation(A,A->rmap->bs,PETSC_DEFAULT,0);CHKERRQ(ierr); 2409 PetscFunctionReturn(0); 2410 } 2411 2412 #undef __FUNCT__ 2413 #define __FUNCT__ "MatSeqBAIJGetArray_SeqBAIJ" 2414 PetscErrorCode MatSeqBAIJGetArray_SeqBAIJ(Mat A,PetscScalar *array[]) 2415 { 2416 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2417 2418 PetscFunctionBegin; 2419 *array = a->a; 2420 PetscFunctionReturn(0); 2421 } 2422 2423 #undef __FUNCT__ 2424 #define __FUNCT__ "MatSeqBAIJRestoreArray_SeqBAIJ" 2425 PetscErrorCode MatSeqBAIJRestoreArray_SeqBAIJ(Mat A,PetscScalar *array[]) 2426 { 2427 PetscFunctionBegin; 2428 PetscFunctionReturn(0); 2429 } 2430 2431 #undef __FUNCT__ 2432 #define __FUNCT__ "MatAXPYGetPreallocation_SeqBAIJ" 2433 PetscErrorCode MatAXPYGetPreallocation_SeqBAIJ(Mat Y,Mat X,PetscInt *nnz) 2434 { 2435 PetscInt bs = Y->rmap->bs,mbs = Y->rmap->N/bs; 2436 Mat_SeqBAIJ *x = (Mat_SeqBAIJ*)X->data; 2437 Mat_SeqBAIJ *y = (Mat_SeqBAIJ*)Y->data; 2438 PetscErrorCode ierr; 2439 2440 PetscFunctionBegin; 2441 /* Set the number of nonzeros in the new matrix */ 2442 ierr = MatAXPYGetPreallocation_SeqX_private(mbs,x->i,x->j,y->i,y->j,nnz);CHKERRQ(ierr); 2443 PetscFunctionReturn(0); 2444 } 2445 2446 #undef __FUNCT__ 2447 #define __FUNCT__ "MatAXPY_SeqBAIJ" 2448 PetscErrorCode MatAXPY_SeqBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 2449 { 2450 Mat_SeqBAIJ *x = (Mat_SeqBAIJ*)X->data,*y = (Mat_SeqBAIJ*)Y->data; 2451 PetscErrorCode ierr; 2452 PetscInt bs=Y->rmap->bs,bs2=bs*bs; 2453 PetscBLASInt one=1; 2454 2455 PetscFunctionBegin; 2456 if (str == SAME_NONZERO_PATTERN) { 2457 PetscScalar alpha = a; 2458 PetscBLASInt bnz; 2459 ierr = PetscBLASIntCast(x->nz*bs2,&bnz);CHKERRQ(ierr); 2460 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one)); 2461 ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr); 2462 } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */ 2463 ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr); 2464 } else { 2465 Mat B; 2466 PetscInt *nnz; 2467 if (bs != X->rmap->bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrices must have same block size"); 2468 ierr = PetscMalloc1(Y->rmap->N,&nnz);CHKERRQ(ierr); 2469 ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr); 2470 ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr); 2471 ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr); 2472 ierr = MatSetBlockSizesFromMats(B,Y,Y);CHKERRQ(ierr); 2473 ierr = MatSetType(B,(MatType) ((PetscObject)Y)->type_name);CHKERRQ(ierr); 2474 ierr = MatAXPYGetPreallocation_SeqBAIJ(Y,X,nnz);CHKERRQ(ierr); 2475 ierr = MatSeqBAIJSetPreallocation(B,bs,0,nnz);CHKERRQ(ierr); 2476 ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr); 2477 ierr = MatHeaderReplace(Y,&B);CHKERRQ(ierr); 2478 ierr = PetscFree(nnz);CHKERRQ(ierr); 2479 } 2480 PetscFunctionReturn(0); 2481 } 2482 2483 #undef __FUNCT__ 2484 #define __FUNCT__ "MatRealPart_SeqBAIJ" 2485 PetscErrorCode MatRealPart_SeqBAIJ(Mat A) 2486 { 2487 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2488 PetscInt i,nz = a->bs2*a->i[a->mbs]; 2489 MatScalar *aa = a->a; 2490 2491 PetscFunctionBegin; 2492 for (i=0; i<nz; i++) aa[i] = PetscRealPart(aa[i]); 2493 PetscFunctionReturn(0); 2494 } 2495 2496 #undef __FUNCT__ 2497 #define __FUNCT__ "MatImaginaryPart_SeqBAIJ" 2498 PetscErrorCode MatImaginaryPart_SeqBAIJ(Mat A) 2499 { 2500 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2501 PetscInt i,nz = a->bs2*a->i[a->mbs]; 2502 MatScalar *aa = a->a; 2503 2504 PetscFunctionBegin; 2505 for (i=0; i<nz; i++) aa[i] = PetscImaginaryPart(aa[i]); 2506 PetscFunctionReturn(0); 2507 } 2508 2509 #undef __FUNCT__ 2510 #define __FUNCT__ "MatGetColumnIJ_SeqBAIJ" 2511 /* 2512 Code almost idential to MatGetColumnIJ_SeqAIJ() should share common code 2513 */ 2514 PetscErrorCode MatGetColumnIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *nn,const PetscInt *ia[],const PetscInt *ja[],PetscBool *done) 2515 { 2516 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2517 PetscErrorCode ierr; 2518 PetscInt bs = A->rmap->bs,i,*collengths,*cia,*cja,n = A->cmap->n/bs,m = A->rmap->n/bs; 2519 PetscInt nz = a->i[m],row,*jj,mr,col; 2520 2521 PetscFunctionBegin; 2522 *nn = n; 2523 if (!ia) PetscFunctionReturn(0); 2524 if (symmetric) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not for BAIJ matrices"); 2525 else { 2526 ierr = PetscCalloc1(n+1,&collengths);CHKERRQ(ierr); 2527 ierr = PetscMalloc1(n+1,&cia);CHKERRQ(ierr); 2528 ierr = PetscMalloc1(nz+1,&cja);CHKERRQ(ierr); 2529 jj = a->j; 2530 for (i=0; i<nz; i++) { 2531 collengths[jj[i]]++; 2532 } 2533 cia[0] = oshift; 2534 for (i=0; i<n; i++) { 2535 cia[i+1] = cia[i] + collengths[i]; 2536 } 2537 ierr = PetscMemzero(collengths,n*sizeof(PetscInt));CHKERRQ(ierr); 2538 jj = a->j; 2539 for (row=0; row<m; row++) { 2540 mr = a->i[row+1] - a->i[row]; 2541 for (i=0; i<mr; i++) { 2542 col = *jj++; 2543 2544 cja[cia[col] + collengths[col]++ - oshift] = row + oshift; 2545 } 2546 } 2547 ierr = PetscFree(collengths);CHKERRQ(ierr); 2548 *ia = cia; *ja = cja; 2549 } 2550 PetscFunctionReturn(0); 2551 } 2552 2553 #undef __FUNCT__ 2554 #define __FUNCT__ "MatRestoreColumnIJ_SeqBAIJ" 2555 PetscErrorCode MatRestoreColumnIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *n,const PetscInt *ia[],const PetscInt *ja[],PetscBool *done) 2556 { 2557 PetscErrorCode ierr; 2558 2559 PetscFunctionBegin; 2560 if (!ia) PetscFunctionReturn(0); 2561 ierr = PetscFree(*ia);CHKERRQ(ierr); 2562 ierr = PetscFree(*ja);CHKERRQ(ierr); 2563 PetscFunctionReturn(0); 2564 } 2565 2566 /* 2567 MatGetColumnIJ_SeqBAIJ_Color() and MatRestoreColumnIJ_SeqBAIJ_Color() are customized from 2568 MatGetColumnIJ_SeqBAIJ() and MatRestoreColumnIJ_SeqBAIJ() by adding an output 2569 spidx[], index of a->a, to be used in MatTransposeColoringCreate() and MatFDColoringCreate() 2570 */ 2571 #undef __FUNCT__ 2572 #define __FUNCT__ "MatGetColumnIJ_SeqBAIJ_Color" 2573 PetscErrorCode MatGetColumnIJ_SeqBAIJ_Color(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *nn,const PetscInt *ia[],const PetscInt *ja[],PetscInt *spidx[],PetscBool *done) 2574 { 2575 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2576 PetscErrorCode ierr; 2577 PetscInt i,*collengths,*cia,*cja,n=a->nbs,m=a->mbs; 2578 PetscInt nz = a->i[m],row,*jj,mr,col; 2579 PetscInt *cspidx; 2580 2581 PetscFunctionBegin; 2582 *nn = n; 2583 if (!ia) PetscFunctionReturn(0); 2584 2585 ierr = PetscCalloc1(n+1,&collengths);CHKERRQ(ierr); 2586 ierr = PetscMalloc1(n+1,&cia);CHKERRQ(ierr); 2587 ierr = PetscMalloc1(nz+1,&cja);CHKERRQ(ierr); 2588 ierr = PetscMalloc1(nz+1,&cspidx);CHKERRQ(ierr); 2589 jj = a->j; 2590 for (i=0; i<nz; i++) { 2591 collengths[jj[i]]++; 2592 } 2593 cia[0] = oshift; 2594 for (i=0; i<n; i++) { 2595 cia[i+1] = cia[i] + collengths[i]; 2596 } 2597 ierr = PetscMemzero(collengths,n*sizeof(PetscInt));CHKERRQ(ierr); 2598 jj = a->j; 2599 for (row=0; row<m; row++) { 2600 mr = a->i[row+1] - a->i[row]; 2601 for (i=0; i<mr; i++) { 2602 col = *jj++; 2603 cspidx[cia[col] + collengths[col] - oshift] = a->i[row] + i; /* index of a->j */ 2604 cja[cia[col] + collengths[col]++ - oshift] = row + oshift; 2605 } 2606 } 2607 ierr = PetscFree(collengths);CHKERRQ(ierr); 2608 *ia = cia; *ja = cja; 2609 *spidx = cspidx; 2610 PetscFunctionReturn(0); 2611 } 2612 2613 #undef __FUNCT__ 2614 #define __FUNCT__ "MatRestoreColumnIJ_SeqBAIJ_Color" 2615 PetscErrorCode MatRestoreColumnIJ_SeqBAIJ_Color(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *n,const PetscInt *ia[],const PetscInt *ja[],PetscInt *spidx[],PetscBool *done) 2616 { 2617 PetscErrorCode ierr; 2618 2619 PetscFunctionBegin; 2620 ierr = MatRestoreColumnIJ_SeqBAIJ(A,oshift,symmetric,inodecompressed,n,ia,ja,done);CHKERRQ(ierr); 2621 ierr = PetscFree(*spidx);CHKERRQ(ierr); 2622 PetscFunctionReturn(0); 2623 } 2624 2625 #undef __FUNCT__ 2626 #define __FUNCT__ "MatShift_SeqBAIJ" 2627 PetscErrorCode MatShift_SeqBAIJ(Mat Y,PetscScalar a) 2628 { 2629 PetscErrorCode ierr; 2630 Mat_SeqBAIJ *aij = (Mat_SeqBAIJ*)Y->data; 2631 2632 PetscFunctionBegin; 2633 if (!Y->preallocated || !aij->nz) { 2634 ierr = MatSeqBAIJSetPreallocation(Y,Y->rmap->bs,1,NULL);CHKERRQ(ierr); 2635 } 2636 ierr = MatShift_Basic(Y,a);CHKERRQ(ierr); 2637 PetscFunctionReturn(0); 2638 } 2639 2640 /* -------------------------------------------------------------------*/ 2641 static struct _MatOps MatOps_Values = {MatSetValues_SeqBAIJ, 2642 MatGetRow_SeqBAIJ, 2643 MatRestoreRow_SeqBAIJ, 2644 MatMult_SeqBAIJ_N, 2645 /* 4*/ MatMultAdd_SeqBAIJ_N, 2646 MatMultTranspose_SeqBAIJ, 2647 MatMultTransposeAdd_SeqBAIJ, 2648 0, 2649 0, 2650 0, 2651 /* 10*/ 0, 2652 MatLUFactor_SeqBAIJ, 2653 0, 2654 0, 2655 MatTranspose_SeqBAIJ, 2656 /* 15*/ MatGetInfo_SeqBAIJ, 2657 MatEqual_SeqBAIJ, 2658 MatGetDiagonal_SeqBAIJ, 2659 MatDiagonalScale_SeqBAIJ, 2660 MatNorm_SeqBAIJ, 2661 /* 20*/ 0, 2662 MatAssemblyEnd_SeqBAIJ, 2663 MatSetOption_SeqBAIJ, 2664 MatZeroEntries_SeqBAIJ, 2665 /* 24*/ MatZeroRows_SeqBAIJ, 2666 0, 2667 0, 2668 0, 2669 0, 2670 /* 29*/ MatSetUp_SeqBAIJ, 2671 0, 2672 0, 2673 0, 2674 0, 2675 /* 34*/ MatDuplicate_SeqBAIJ, 2676 0, 2677 0, 2678 MatILUFactor_SeqBAIJ, 2679 0, 2680 /* 39*/ MatAXPY_SeqBAIJ, 2681 MatGetSubMatrices_SeqBAIJ, 2682 MatIncreaseOverlap_SeqBAIJ, 2683 MatGetValues_SeqBAIJ, 2684 MatCopy_SeqBAIJ, 2685 /* 44*/ 0, 2686 MatScale_SeqBAIJ, 2687 MatShift_SeqBAIJ, 2688 0, 2689 MatZeroRowsColumns_SeqBAIJ, 2690 /* 49*/ 0, 2691 MatGetRowIJ_SeqBAIJ, 2692 MatRestoreRowIJ_SeqBAIJ, 2693 MatGetColumnIJ_SeqBAIJ, 2694 MatRestoreColumnIJ_SeqBAIJ, 2695 /* 54*/ MatFDColoringCreate_SeqXAIJ, 2696 0, 2697 0, 2698 0, 2699 MatSetValuesBlocked_SeqBAIJ, 2700 /* 59*/ MatGetSubMatrix_SeqBAIJ, 2701 MatDestroy_SeqBAIJ, 2702 MatView_SeqBAIJ, 2703 0, 2704 0, 2705 /* 64*/ 0, 2706 0, 2707 0, 2708 0, 2709 0, 2710 /* 69*/ MatGetRowMaxAbs_SeqBAIJ, 2711 0, 2712 MatConvert_Basic, 2713 0, 2714 0, 2715 /* 74*/ 0, 2716 MatFDColoringApply_BAIJ, 2717 0, 2718 0, 2719 0, 2720 /* 79*/ 0, 2721 0, 2722 0, 2723 0, 2724 MatLoad_SeqBAIJ, 2725 /* 84*/ 0, 2726 0, 2727 0, 2728 0, 2729 0, 2730 /* 89*/ 0, 2731 0, 2732 0, 2733 0, 2734 0, 2735 /* 94*/ 0, 2736 0, 2737 0, 2738 0, 2739 0, 2740 /* 99*/ 0, 2741 0, 2742 0, 2743 0, 2744 0, 2745 /*104*/ 0, 2746 MatRealPart_SeqBAIJ, 2747 MatImaginaryPart_SeqBAIJ, 2748 0, 2749 0, 2750 /*109*/ 0, 2751 0, 2752 0, 2753 0, 2754 MatMissingDiagonal_SeqBAIJ, 2755 /*114*/ 0, 2756 0, 2757 0, 2758 0, 2759 0, 2760 /*119*/ 0, 2761 0, 2762 MatMultHermitianTranspose_SeqBAIJ, 2763 MatMultHermitianTransposeAdd_SeqBAIJ, 2764 0, 2765 /*124*/ 0, 2766 0, 2767 MatInvertBlockDiagonal_SeqBAIJ, 2768 0, 2769 0, 2770 /*129*/ 0, 2771 0, 2772 0, 2773 0, 2774 0, 2775 /*134*/ 0, 2776 0, 2777 0, 2778 0, 2779 0, 2780 /*139*/ 0, 2781 0, 2782 0, 2783 MatFDColoringSetUp_SeqXAIJ, 2784 0, 2785 /*144*/MatCreateMPIMatConcatenateSeqMat_SeqBAIJ 2786 }; 2787 2788 #undef __FUNCT__ 2789 #define __FUNCT__ "MatStoreValues_SeqBAIJ" 2790 PetscErrorCode MatStoreValues_SeqBAIJ(Mat mat) 2791 { 2792 Mat_SeqBAIJ *aij = (Mat_SeqBAIJ*)mat->data; 2793 PetscInt nz = aij->i[aij->mbs]*aij->bs2; 2794 PetscErrorCode ierr; 2795 2796 PetscFunctionBegin; 2797 if (aij->nonew != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first"); 2798 2799 /* allocate space for values if not already there */ 2800 if (!aij->saved_values) { 2801 ierr = PetscMalloc1(nz+1,&aij->saved_values);CHKERRQ(ierr); 2802 ierr = PetscLogObjectMemory((PetscObject)mat,(nz+1)*sizeof(PetscScalar));CHKERRQ(ierr); 2803 } 2804 2805 /* copy values over */ 2806 ierr = PetscMemcpy(aij->saved_values,aij->a,nz*sizeof(PetscScalar));CHKERRQ(ierr); 2807 PetscFunctionReturn(0); 2808 } 2809 2810 #undef __FUNCT__ 2811 #define __FUNCT__ "MatRetrieveValues_SeqBAIJ" 2812 PetscErrorCode MatRetrieveValues_SeqBAIJ(Mat mat) 2813 { 2814 Mat_SeqBAIJ *aij = (Mat_SeqBAIJ*)mat->data; 2815 PetscErrorCode ierr; 2816 PetscInt nz = aij->i[aij->mbs]*aij->bs2; 2817 2818 PetscFunctionBegin; 2819 if (aij->nonew != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first"); 2820 if (!aij->saved_values) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatStoreValues(A);first"); 2821 2822 /* copy values over */ 2823 ierr = PetscMemcpy(aij->a,aij->saved_values,nz*sizeof(PetscScalar));CHKERRQ(ierr); 2824 PetscFunctionReturn(0); 2825 } 2826 2827 PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqAIJ(Mat, MatType,MatReuse,Mat*); 2828 PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqSBAIJ(Mat, MatType,MatReuse,Mat*); 2829 2830 #undef __FUNCT__ 2831 #define __FUNCT__ "MatSeqBAIJSetPreallocation_SeqBAIJ" 2832 static PetscErrorCode MatSeqBAIJSetPreallocation_SeqBAIJ(Mat B,PetscInt bs,PetscInt nz,PetscInt *nnz) 2833 { 2834 Mat_SeqBAIJ *b; 2835 PetscErrorCode ierr; 2836 PetscInt i,mbs,nbs,bs2; 2837 PetscBool flg = PETSC_FALSE,skipallocation = PETSC_FALSE,realalloc = PETSC_FALSE; 2838 2839 PetscFunctionBegin; 2840 if (nz >= 0 || nnz) realalloc = PETSC_TRUE; 2841 if (nz == MAT_SKIP_ALLOCATION) { 2842 skipallocation = PETSC_TRUE; 2843 nz = 0; 2844 } 2845 2846 ierr = MatSetBlockSize(B,PetscAbs(bs));CHKERRQ(ierr); 2847 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 2848 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 2849 ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr); 2850 2851 B->preallocated = PETSC_TRUE; 2852 2853 mbs = B->rmap->n/bs; 2854 nbs = B->cmap->n/bs; 2855 bs2 = bs*bs; 2856 2857 if (mbs*bs!=B->rmap->n || nbs*bs!=B->cmap->n) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Number rows %D, cols %D must be divisible by blocksize %D",B->rmap->N,B->cmap->n,bs); 2858 2859 if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5; 2860 if (nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nz cannot be less than 0: value %D",nz); 2861 if (nnz) { 2862 for (i=0; i<mbs; i++) { 2863 if (nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be less than 0: local row %D value %D",i,nnz[i]); 2864 if (nnz[i] > nbs) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be greater than block row length: local row %D value %D rowlength %D",i,nnz[i],nbs); 2865 } 2866 } 2867 2868 b = (Mat_SeqBAIJ*)B->data; 2869 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)B),NULL,"Optimize options for SEQBAIJ matrix 2 ","Mat");CHKERRQ(ierr); 2870 ierr = PetscOptionsBool("-mat_no_unroll","Do not optimize for block size (slow)",NULL,flg,&flg,NULL);CHKERRQ(ierr); 2871 ierr = PetscOptionsEnd();CHKERRQ(ierr); 2872 2873 if (!flg) { 2874 switch (bs) { 2875 case 1: 2876 B->ops->mult = MatMult_SeqBAIJ_1; 2877 B->ops->multadd = MatMultAdd_SeqBAIJ_1; 2878 break; 2879 case 2: 2880 B->ops->mult = MatMult_SeqBAIJ_2; 2881 B->ops->multadd = MatMultAdd_SeqBAIJ_2; 2882 break; 2883 case 3: 2884 B->ops->mult = MatMult_SeqBAIJ_3; 2885 B->ops->multadd = MatMultAdd_SeqBAIJ_3; 2886 break; 2887 case 4: 2888 B->ops->mult = MatMult_SeqBAIJ_4; 2889 B->ops->multadd = MatMultAdd_SeqBAIJ_4; 2890 break; 2891 case 5: 2892 B->ops->mult = MatMult_SeqBAIJ_5; 2893 B->ops->multadd = MatMultAdd_SeqBAIJ_5; 2894 break; 2895 case 6: 2896 B->ops->mult = MatMult_SeqBAIJ_6; 2897 B->ops->multadd = MatMultAdd_SeqBAIJ_6; 2898 break; 2899 case 7: 2900 B->ops->mult = MatMult_SeqBAIJ_7; 2901 B->ops->multadd = MatMultAdd_SeqBAIJ_7; 2902 break; 2903 case 15: 2904 B->ops->mult = MatMult_SeqBAIJ_15_ver1; 2905 B->ops->multadd = MatMultAdd_SeqBAIJ_N; 2906 break; 2907 default: 2908 B->ops->mult = MatMult_SeqBAIJ_N; 2909 B->ops->multadd = MatMultAdd_SeqBAIJ_N; 2910 break; 2911 } 2912 } 2913 B->ops->sor = MatSOR_SeqBAIJ; 2914 b->mbs = mbs; 2915 b->nbs = nbs; 2916 if (!skipallocation) { 2917 if (!b->imax) { 2918 ierr = PetscMalloc2(mbs,&b->imax,mbs,&b->ilen);CHKERRQ(ierr); 2919 ierr = PetscLogObjectMemory((PetscObject)B,2*mbs*sizeof(PetscInt));CHKERRQ(ierr); 2920 2921 b->free_imax_ilen = PETSC_TRUE; 2922 } 2923 /* b->ilen will count nonzeros in each block row so far. */ 2924 for (i=0; i<mbs; i++) b->ilen[i] = 0; 2925 if (!nnz) { 2926 if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5; 2927 else if (nz < 0) nz = 1; 2928 for (i=0; i<mbs; i++) b->imax[i] = nz; 2929 nz = nz*mbs; 2930 } else { 2931 nz = 0; 2932 for (i=0; i<mbs; i++) {b->imax[i] = nnz[i]; nz += nnz[i];} 2933 } 2934 2935 /* allocate the matrix space */ 2936 ierr = MatSeqXAIJFreeAIJ(B,&b->a,&b->j,&b->i);CHKERRQ(ierr); 2937 ierr = PetscMalloc3(bs2*nz,&b->a,nz,&b->j,B->rmap->N+1,&b->i);CHKERRQ(ierr); 2938 ierr = PetscLogObjectMemory((PetscObject)B,(B->rmap->N+1)*sizeof(PetscInt)+nz*(bs2*sizeof(PetscScalar)+sizeof(PetscInt)));CHKERRQ(ierr); 2939 ierr = PetscMemzero(b->a,nz*bs2*sizeof(MatScalar));CHKERRQ(ierr); 2940 ierr = PetscMemzero(b->j,nz*sizeof(PetscInt));CHKERRQ(ierr); 2941 2942 b->singlemalloc = PETSC_TRUE; 2943 b->i[0] = 0; 2944 for (i=1; i<mbs+1; i++) { 2945 b->i[i] = b->i[i-1] + b->imax[i-1]; 2946 } 2947 b->free_a = PETSC_TRUE; 2948 b->free_ij = PETSC_TRUE; 2949 } else { 2950 b->free_a = PETSC_FALSE; 2951 b->free_ij = PETSC_FALSE; 2952 } 2953 2954 b->bs2 = bs2; 2955 b->mbs = mbs; 2956 b->nz = 0; 2957 b->maxnz = nz; 2958 B->info.nz_unneeded = (PetscReal)b->maxnz*bs2; 2959 if (realalloc) {ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);} 2960 PetscFunctionReturn(0); 2961 } 2962 2963 #undef __FUNCT__ 2964 #define __FUNCT__ "MatSeqBAIJSetPreallocationCSR_SeqBAIJ" 2965 PetscErrorCode MatSeqBAIJSetPreallocationCSR_SeqBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[]) 2966 { 2967 PetscInt i,m,nz,nz_max=0,*nnz; 2968 PetscScalar *values=0; 2969 PetscBool roworiented = ((Mat_SeqBAIJ*)B->data)->roworiented; 2970 PetscErrorCode ierr; 2971 2972 PetscFunctionBegin; 2973 if (bs < 1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive but it is %D",bs); 2974 ierr = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr); 2975 ierr = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr); 2976 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 2977 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 2978 ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr); 2979 m = B->rmap->n/bs; 2980 2981 if (ii[0] != 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE, "ii[0] must be 0 but it is %D",ii[0]); 2982 ierr = PetscMalloc1(m+1, &nnz);CHKERRQ(ierr); 2983 for (i=0; i<m; i++) { 2984 nz = ii[i+1]- ii[i]; 2985 if (nz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE, "Local row %D has a negative number of columns %D",i,nz); 2986 nz_max = PetscMax(nz_max, nz); 2987 nnz[i] = nz; 2988 } 2989 ierr = MatSeqBAIJSetPreallocation(B,bs,0,nnz);CHKERRQ(ierr); 2990 ierr = PetscFree(nnz);CHKERRQ(ierr); 2991 2992 values = (PetscScalar*)V; 2993 if (!values) { 2994 ierr = PetscCalloc1(bs*bs*(nz_max+1),&values);CHKERRQ(ierr); 2995 } 2996 for (i=0; i<m; i++) { 2997 PetscInt ncols = ii[i+1] - ii[i]; 2998 const PetscInt *icols = jj + ii[i]; 2999 const PetscScalar *svals = values + (V ? (bs*bs*ii[i]) : 0); 3000 if (!roworiented) { 3001 ierr = MatSetValuesBlocked_SeqBAIJ(B,1,&i,ncols,icols,svals,INSERT_VALUES);CHKERRQ(ierr); 3002 } else { 3003 PetscInt j; 3004 for (j=0; j<ncols; j++) { 3005 const PetscScalar *svals = values + (V ? (bs*bs*(ii[i]+j)) : 0); 3006 ierr = MatSetValuesBlocked_SeqBAIJ(B,1,&i,1,&icols[j],svals,INSERT_VALUES);CHKERRQ(ierr); 3007 } 3008 } 3009 } 3010 if (!V) { ierr = PetscFree(values);CHKERRQ(ierr); } 3011 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3012 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3013 ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 3014 PetscFunctionReturn(0); 3015 } 3016 3017 /*MC 3018 MATSEQBAIJ - MATSEQBAIJ = "seqbaij" - A matrix type to be used for sequential block sparse matrices, based on 3019 block sparse compressed row format. 3020 3021 Options Database Keys: 3022 . -mat_type seqbaij - sets the matrix type to "seqbaij" during a call to MatSetFromOptions() 3023 3024 Level: beginner 3025 3026 .seealso: MatCreateSeqBAIJ() 3027 M*/ 3028 3029 PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqBSTRM(Mat, MatType,MatReuse,Mat*); 3030 3031 #undef __FUNCT__ 3032 #define __FUNCT__ "MatCreate_SeqBAIJ" 3033 PETSC_EXTERN PetscErrorCode MatCreate_SeqBAIJ(Mat B) 3034 { 3035 PetscErrorCode ierr; 3036 PetscMPIInt size; 3037 Mat_SeqBAIJ *b; 3038 3039 PetscFunctionBegin; 3040 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRQ(ierr); 3041 if (size > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Comm must be of size 1"); 3042 3043 ierr = PetscNewLog(B,&b);CHKERRQ(ierr); 3044 B->data = (void*)b; 3045 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 3046 3047 b->row = 0; 3048 b->col = 0; 3049 b->icol = 0; 3050 b->reallocs = 0; 3051 b->saved_values = 0; 3052 3053 b->roworiented = PETSC_TRUE; 3054 b->nonew = 0; 3055 b->diag = 0; 3056 B->spptr = 0; 3057 B->info.nz_unneeded = (PetscReal)b->maxnz*b->bs2; 3058 b->keepnonzeropattern = PETSC_FALSE; 3059 3060 ierr = PetscObjectComposeFunction((PetscObject)B,"MatInvertBlockDiagonal_C",MatInvertBlockDiagonal_SeqBAIJ);CHKERRQ(ierr); 3061 ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_SeqBAIJ);CHKERRQ(ierr); 3062 ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_SeqBAIJ);CHKERRQ(ierr); 3063 ierr = PetscObjectComposeFunction((PetscObject)B,"MatSeqBAIJSetColumnIndices_C",MatSeqBAIJSetColumnIndices_SeqBAIJ);CHKERRQ(ierr); 3064 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqbaij_seqaij_C",MatConvert_SeqBAIJ_SeqAIJ);CHKERRQ(ierr); 3065 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqbaij_seqsbaij_C",MatConvert_SeqBAIJ_SeqSBAIJ);CHKERRQ(ierr); 3066 ierr = PetscObjectComposeFunction((PetscObject)B,"MatSeqBAIJSetPreallocation_C",MatSeqBAIJSetPreallocation_SeqBAIJ);CHKERRQ(ierr); 3067 ierr = PetscObjectComposeFunction((PetscObject)B,"MatSeqBAIJSetPreallocationCSR_C",MatSeqBAIJSetPreallocationCSR_SeqBAIJ);CHKERRQ(ierr); 3068 ierr = PetscObjectComposeFunction((PetscObject)B,"MatIsTranspose_C",MatIsTranspose_SeqBAIJ);CHKERRQ(ierr); 3069 ierr = PetscObjectChangeTypeName((PetscObject)B,MATSEQBAIJ);CHKERRQ(ierr); 3070 PetscFunctionReturn(0); 3071 } 3072 3073 #undef __FUNCT__ 3074 #define __FUNCT__ "MatDuplicateNoCreate_SeqBAIJ" 3075 PetscErrorCode MatDuplicateNoCreate_SeqBAIJ(Mat C,Mat A,MatDuplicateOption cpvalues,PetscBool mallocmatspace) 3076 { 3077 Mat_SeqBAIJ *c = (Mat_SeqBAIJ*)C->data,*a = (Mat_SeqBAIJ*)A->data; 3078 PetscErrorCode ierr; 3079 PetscInt i,mbs = a->mbs,nz = a->nz,bs2 = a->bs2; 3080 3081 PetscFunctionBegin; 3082 if (a->i[mbs] != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Corrupt matrix"); 3083 3084 if (cpvalues == MAT_SHARE_NONZERO_PATTERN) { 3085 c->imax = a->imax; 3086 c->ilen = a->ilen; 3087 c->free_imax_ilen = PETSC_FALSE; 3088 } else { 3089 ierr = PetscMalloc2(mbs,&c->imax,mbs,&c->ilen);CHKERRQ(ierr); 3090 ierr = PetscLogObjectMemory((PetscObject)C,2*mbs*sizeof(PetscInt));CHKERRQ(ierr); 3091 for (i=0; i<mbs; i++) { 3092 c->imax[i] = a->imax[i]; 3093 c->ilen[i] = a->ilen[i]; 3094 } 3095 c->free_imax_ilen = PETSC_TRUE; 3096 } 3097 3098 /* allocate the matrix space */ 3099 if (mallocmatspace) { 3100 if (cpvalues == MAT_SHARE_NONZERO_PATTERN) { 3101 ierr = PetscCalloc1(bs2*nz,&c->a);CHKERRQ(ierr); 3102 ierr = PetscLogObjectMemory((PetscObject)C,a->i[mbs]*bs2*sizeof(PetscScalar));CHKERRQ(ierr); 3103 3104 c->i = a->i; 3105 c->j = a->j; 3106 c->singlemalloc = PETSC_FALSE; 3107 c->free_a = PETSC_TRUE; 3108 c->free_ij = PETSC_FALSE; 3109 c->parent = A; 3110 C->preallocated = PETSC_TRUE; 3111 C->assembled = PETSC_TRUE; 3112 3113 ierr = PetscObjectReference((PetscObject)A);CHKERRQ(ierr); 3114 ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 3115 ierr = MatSetOption(C,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 3116 } else { 3117 ierr = PetscMalloc3(bs2*nz,&c->a,nz,&c->j,mbs+1,&c->i);CHKERRQ(ierr); 3118 ierr = PetscLogObjectMemory((PetscObject)C,a->i[mbs]*(bs2*sizeof(PetscScalar)+sizeof(PetscInt))+(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 3119 3120 c->singlemalloc = PETSC_TRUE; 3121 c->free_a = PETSC_TRUE; 3122 c->free_ij = PETSC_TRUE; 3123 3124 ierr = PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 3125 if (mbs > 0) { 3126 ierr = PetscMemcpy(c->j,a->j,nz*sizeof(PetscInt));CHKERRQ(ierr); 3127 if (cpvalues == MAT_COPY_VALUES) { 3128 ierr = PetscMemcpy(c->a,a->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr); 3129 } else { 3130 ierr = PetscMemzero(c->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr); 3131 } 3132 } 3133 C->preallocated = PETSC_TRUE; 3134 C->assembled = PETSC_TRUE; 3135 } 3136 } 3137 3138 c->roworiented = a->roworiented; 3139 c->nonew = a->nonew; 3140 3141 ierr = PetscLayoutReference(A->rmap,&C->rmap);CHKERRQ(ierr); 3142 ierr = PetscLayoutReference(A->cmap,&C->cmap);CHKERRQ(ierr); 3143 3144 c->bs2 = a->bs2; 3145 c->mbs = a->mbs; 3146 c->nbs = a->nbs; 3147 3148 if (a->diag) { 3149 if (cpvalues == MAT_SHARE_NONZERO_PATTERN) { 3150 c->diag = a->diag; 3151 c->free_diag = PETSC_FALSE; 3152 } else { 3153 ierr = PetscMalloc1(mbs+1,&c->diag);CHKERRQ(ierr); 3154 ierr = PetscLogObjectMemory((PetscObject)C,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 3155 for (i=0; i<mbs; i++) c->diag[i] = a->diag[i]; 3156 c->free_diag = PETSC_TRUE; 3157 } 3158 } else c->diag = 0; 3159 3160 c->nz = a->nz; 3161 c->maxnz = a->nz; /* Since we allocate exactly the right amount */ 3162 c->solve_work = NULL; 3163 c->mult_work = NULL; 3164 c->sor_workt = NULL; 3165 c->sor_work = NULL; 3166 3167 c->compressedrow.use = a->compressedrow.use; 3168 c->compressedrow.nrows = a->compressedrow.nrows; 3169 if (a->compressedrow.use) { 3170 i = a->compressedrow.nrows; 3171 ierr = PetscMalloc2(i+1,&c->compressedrow.i,i+1,&c->compressedrow.rindex);CHKERRQ(ierr); 3172 ierr = PetscLogObjectMemory((PetscObject)C,(2*i+1)*sizeof(PetscInt));CHKERRQ(ierr); 3173 ierr = PetscMemcpy(c->compressedrow.i,a->compressedrow.i,(i+1)*sizeof(PetscInt));CHKERRQ(ierr); 3174 ierr = PetscMemcpy(c->compressedrow.rindex,a->compressedrow.rindex,i*sizeof(PetscInt));CHKERRQ(ierr); 3175 } else { 3176 c->compressedrow.use = PETSC_FALSE; 3177 c->compressedrow.i = NULL; 3178 c->compressedrow.rindex = NULL; 3179 } 3180 C->nonzerostate = A->nonzerostate; 3181 3182 ierr = PetscFunctionListDuplicate(((PetscObject)A)->qlist,&((PetscObject)C)->qlist);CHKERRQ(ierr); 3183 ierr = PetscMemcpy(C->ops,A->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 3184 PetscFunctionReturn(0); 3185 } 3186 3187 #undef __FUNCT__ 3188 #define __FUNCT__ "MatDuplicate_SeqBAIJ" 3189 PetscErrorCode MatDuplicate_SeqBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B) 3190 { 3191 PetscErrorCode ierr; 3192 3193 PetscFunctionBegin; 3194 ierr = MatCreate(PetscObjectComm((PetscObject)A),B);CHKERRQ(ierr); 3195 ierr = MatSetSizes(*B,A->rmap->N,A->cmap->n,A->rmap->N,A->cmap->n);CHKERRQ(ierr); 3196 ierr = MatSetType(*B,MATSEQBAIJ);CHKERRQ(ierr); 3197 ierr = MatDuplicateNoCreate_SeqBAIJ(*B,A,cpvalues,PETSC_TRUE);CHKERRQ(ierr); 3198 PetscFunctionReturn(0); 3199 } 3200 3201 #undef __FUNCT__ 3202 #define __FUNCT__ "MatLoad_SeqBAIJ" 3203 PetscErrorCode MatLoad_SeqBAIJ(Mat newmat,PetscViewer viewer) 3204 { 3205 Mat_SeqBAIJ *a; 3206 PetscErrorCode ierr; 3207 PetscInt i,nz,header[4],*rowlengths=0,M,N,bs = newmat->rmap->bs; 3208 PetscInt *mask,mbs,*jj,j,rowcount,nzcount,k,*browlengths,maskcount; 3209 PetscInt kmax,jcount,block,idx,point,nzcountb,extra_rows,rows,cols; 3210 PetscInt *masked,nmask,tmp,bs2,ishift; 3211 PetscMPIInt size; 3212 int fd; 3213 PetscScalar *aa; 3214 MPI_Comm comm; 3215 3216 PetscFunctionBegin; 3217 /* force binary viewer to load .info file if it has not yet done so */ 3218 ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr); 3219 ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr); 3220 ierr = PetscOptionsBegin(comm,NULL,"Options for loading SEQBAIJ matrix","Mat");CHKERRQ(ierr); 3221 ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,NULL);CHKERRQ(ierr); 3222 ierr = PetscOptionsEnd();CHKERRQ(ierr); 3223 if (bs < 0) bs = 1; 3224 bs2 = bs*bs; 3225 3226 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3227 if (size > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"view must have one processor"); 3228 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 3229 ierr = PetscBinaryRead(fd,header,4,PETSC_INT);CHKERRQ(ierr); 3230 if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not Mat object"); 3231 M = header[1]; N = header[2]; nz = header[3]; 3232 3233 if (header[3] < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as SeqBAIJ"); 3234 if (M != N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Can only do square matrices"); 3235 3236 /* 3237 This code adds extra rows to make sure the number of rows is 3238 divisible by the blocksize 3239 */ 3240 mbs = M/bs; 3241 extra_rows = bs - M + bs*(mbs); 3242 if (extra_rows == bs) extra_rows = 0; 3243 else mbs++; 3244 if (extra_rows) { 3245 ierr = PetscInfo(viewer,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr); 3246 } 3247 3248 /* Set global sizes if not already set */ 3249 if (newmat->rmap->n < 0 && newmat->rmap->N < 0 && newmat->cmap->n < 0 && newmat->cmap->N < 0) { 3250 ierr = MatSetSizes(newmat,PETSC_DECIDE,PETSC_DECIDE,M+extra_rows,N+extra_rows);CHKERRQ(ierr); 3251 } else { /* Check if the matrix global sizes are correct */ 3252 ierr = MatGetSize(newmat,&rows,&cols);CHKERRQ(ierr); 3253 if (rows < 0 && cols < 0) { /* user might provide local size instead of global size */ 3254 ierr = MatGetLocalSize(newmat,&rows,&cols);CHKERRQ(ierr); 3255 } 3256 if (M != rows || N != cols) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix in file of different length (%d, %d) than the input matrix (%d, %d)",M,N,rows,cols); 3257 } 3258 3259 /* read in row lengths */ 3260 ierr = PetscMalloc1(M+extra_rows,&rowlengths);CHKERRQ(ierr); 3261 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 3262 for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1; 3263 3264 /* read in column indices */ 3265 ierr = PetscMalloc1(nz+extra_rows,&jj);CHKERRQ(ierr); 3266 ierr = PetscBinaryRead(fd,jj,nz,PETSC_INT);CHKERRQ(ierr); 3267 for (i=0; i<extra_rows; i++) jj[nz+i] = M+i; 3268 3269 /* loop over row lengths determining block row lengths */ 3270 ierr = PetscCalloc1(mbs,&browlengths);CHKERRQ(ierr); 3271 ierr = PetscMalloc2(mbs,&mask,mbs,&masked);CHKERRQ(ierr); 3272 ierr = PetscMemzero(mask,mbs*sizeof(PetscInt));CHKERRQ(ierr); 3273 rowcount = 0; 3274 nzcount = 0; 3275 for (i=0; i<mbs; i++) { 3276 nmask = 0; 3277 for (j=0; j<bs; j++) { 3278 kmax = rowlengths[rowcount]; 3279 for (k=0; k<kmax; k++) { 3280 tmp = jj[nzcount++]/bs; 3281 if (!mask[tmp]) {masked[nmask++] = tmp; mask[tmp] = 1;} 3282 } 3283 rowcount++; 3284 } 3285 browlengths[i] += nmask; 3286 /* zero out the mask elements we set */ 3287 for (j=0; j<nmask; j++) mask[masked[j]] = 0; 3288 } 3289 3290 /* Do preallocation */ 3291 ierr = MatSeqBAIJSetPreallocation(newmat,bs,0,browlengths);CHKERRQ(ierr); 3292 a = (Mat_SeqBAIJ*)newmat->data; 3293 3294 /* set matrix "i" values */ 3295 a->i[0] = 0; 3296 for (i=1; i<= mbs; i++) { 3297 a->i[i] = a->i[i-1] + browlengths[i-1]; 3298 a->ilen[i-1] = browlengths[i-1]; 3299 } 3300 a->nz = 0; 3301 for (i=0; i<mbs; i++) a->nz += browlengths[i]; 3302 3303 /* read in nonzero values */ 3304 ierr = PetscMalloc1(nz+extra_rows,&aa);CHKERRQ(ierr); 3305 ierr = PetscBinaryRead(fd,aa,nz,PETSC_SCALAR);CHKERRQ(ierr); 3306 for (i=0; i<extra_rows; i++) aa[nz+i] = 1.0; 3307 3308 /* set "a" and "j" values into matrix */ 3309 nzcount = 0; jcount = 0; 3310 for (i=0; i<mbs; i++) { 3311 nzcountb = nzcount; 3312 nmask = 0; 3313 for (j=0; j<bs; j++) { 3314 kmax = rowlengths[i*bs+j]; 3315 for (k=0; k<kmax; k++) { 3316 tmp = jj[nzcount++]/bs; 3317 if (!mask[tmp]) { masked[nmask++] = tmp; mask[tmp] = 1;} 3318 } 3319 } 3320 /* sort the masked values */ 3321 ierr = PetscSortInt(nmask,masked);CHKERRQ(ierr); 3322 3323 /* set "j" values into matrix */ 3324 maskcount = 1; 3325 for (j=0; j<nmask; j++) { 3326 a->j[jcount++] = masked[j]; 3327 mask[masked[j]] = maskcount++; 3328 } 3329 /* set "a" values into matrix */ 3330 ishift = bs2*a->i[i]; 3331 for (j=0; j<bs; j++) { 3332 kmax = rowlengths[i*bs+j]; 3333 for (k=0; k<kmax; k++) { 3334 tmp = jj[nzcountb]/bs; 3335 block = mask[tmp] - 1; 3336 point = jj[nzcountb] - bs*tmp; 3337 idx = ishift + bs2*block + j + bs*point; 3338 a->a[idx] = (MatScalar)aa[nzcountb++]; 3339 } 3340 } 3341 /* zero out the mask elements we set */ 3342 for (j=0; j<nmask; j++) mask[masked[j]] = 0; 3343 } 3344 if (jcount != a->nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Bad binary matrix"); 3345 3346 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 3347 ierr = PetscFree(browlengths);CHKERRQ(ierr); 3348 ierr = PetscFree(aa);CHKERRQ(ierr); 3349 ierr = PetscFree(jj);CHKERRQ(ierr); 3350 ierr = PetscFree2(mask,masked);CHKERRQ(ierr); 3351 3352 ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3353 ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3354 PetscFunctionReturn(0); 3355 } 3356 3357 #undef __FUNCT__ 3358 #define __FUNCT__ "MatCreateSeqBAIJ" 3359 /*@C 3360 MatCreateSeqBAIJ - Creates a sparse matrix in block AIJ (block 3361 compressed row) format. For good matrix assembly performance the 3362 user should preallocate the matrix storage by setting the parameter nz 3363 (or the array nnz). By setting these parameters accurately, performance 3364 during matrix assembly can be increased by more than a factor of 50. 3365 3366 Collective on MPI_Comm 3367 3368 Input Parameters: 3369 + comm - MPI communicator, set to PETSC_COMM_SELF 3370 . bs - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row 3371 blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs() 3372 . m - number of rows 3373 . n - number of columns 3374 . nz - number of nonzero blocks per block row (same for all rows) 3375 - nnz - array containing the number of nonzero blocks in the various block rows 3376 (possibly different for each block row) or NULL 3377 3378 Output Parameter: 3379 . A - the matrix 3380 3381 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 3382 MatXXXXSetPreallocation() paradgm instead of this routine directly. 3383 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 3384 3385 Options Database Keys: 3386 . -mat_no_unroll - uses code that does not unroll the loops in the 3387 block calculations (much slower) 3388 . -mat_block_size - size of the blocks to use 3389 3390 Level: intermediate 3391 3392 Notes: 3393 The number of rows and columns must be divisible by blocksize. 3394 3395 If the nnz parameter is given then the nz parameter is ignored 3396 3397 A nonzero block is any block that as 1 or more nonzeros in it 3398 3399 The block AIJ format is fully compatible with standard Fortran 77 3400 storage. That is, the stored row and column indices can begin at 3401 either one (as in Fortran) or zero. See the users' manual for details. 3402 3403 Specify the preallocated storage with either nz or nnz (not both). 3404 Set nz=PETSC_DEFAULT and nnz=NULL for PETSc to control dynamic memory 3405 allocation. See Users-Manual: ch_mat for details. 3406 matrices. 3407 3408 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateBAIJ() 3409 @*/ 3410 PetscErrorCode MatCreateSeqBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A) 3411 { 3412 PetscErrorCode ierr; 3413 3414 PetscFunctionBegin; 3415 ierr = MatCreate(comm,A);CHKERRQ(ierr); 3416 ierr = MatSetSizes(*A,m,n,m,n);CHKERRQ(ierr); 3417 ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr); 3418 ierr = MatSeqBAIJSetPreallocation(*A,bs,nz,(PetscInt*)nnz);CHKERRQ(ierr); 3419 PetscFunctionReturn(0); 3420 } 3421 3422 #undef __FUNCT__ 3423 #define __FUNCT__ "MatSeqBAIJSetPreallocation" 3424 /*@C 3425 MatSeqBAIJSetPreallocation - Sets the block size and expected nonzeros 3426 per row in the matrix. For good matrix assembly performance the 3427 user should preallocate the matrix storage by setting the parameter nz 3428 (or the array nnz). By setting these parameters accurately, performance 3429 during matrix assembly can be increased by more than a factor of 50. 3430 3431 Collective on MPI_Comm 3432 3433 Input Parameters: 3434 + B - the matrix 3435 . bs - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row 3436 blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs() 3437 . nz - number of block nonzeros per block row (same for all rows) 3438 - nnz - array containing the number of block nonzeros in the various block rows 3439 (possibly different for each block row) or NULL 3440 3441 Options Database Keys: 3442 . -mat_no_unroll - uses code that does not unroll the loops in the 3443 block calculations (much slower) 3444 . -mat_block_size - size of the blocks to use 3445 3446 Level: intermediate 3447 3448 Notes: 3449 If the nnz parameter is given then the nz parameter is ignored 3450 3451 You can call MatGetInfo() to get information on how effective the preallocation was; 3452 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 3453 You can also run with the option -info and look for messages with the string 3454 malloc in them to see if additional memory allocation was needed. 3455 3456 The block AIJ format is fully compatible with standard Fortran 77 3457 storage. That is, the stored row and column indices can begin at 3458 either one (as in Fortran) or zero. See the users' manual for details. 3459 3460 Specify the preallocated storage with either nz or nnz (not both). 3461 Set nz=PETSC_DEFAULT and nnz=NULL for PETSc to control dynamic memory 3462 allocation. See Users-Manual: ch_mat for details. 3463 3464 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateBAIJ(), MatGetInfo() 3465 @*/ 3466 PetscErrorCode MatSeqBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt nz,const PetscInt nnz[]) 3467 { 3468 PetscErrorCode ierr; 3469 3470 PetscFunctionBegin; 3471 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 3472 PetscValidType(B,1); 3473 PetscValidLogicalCollectiveInt(B,bs,2); 3474 ierr = PetscTryMethod(B,"MatSeqBAIJSetPreallocation_C",(Mat,PetscInt,PetscInt,const PetscInt[]),(B,bs,nz,nnz));CHKERRQ(ierr); 3475 PetscFunctionReturn(0); 3476 } 3477 3478 #undef __FUNCT__ 3479 #define __FUNCT__ "MatSeqBAIJSetPreallocationCSR" 3480 /*@C 3481 MatSeqBAIJSetPreallocationCSR - Allocates memory for a sparse sequential matrix in AIJ format 3482 (the default sequential PETSc format). 3483 3484 Collective on MPI_Comm 3485 3486 Input Parameters: 3487 + B - the matrix 3488 . i - the indices into j for the start of each local row (starts with zero) 3489 . j - the column indices for each local row (starts with zero) these must be sorted for each row 3490 - v - optional values in the matrix 3491 3492 Level: developer 3493 3494 Notes: 3495 The order of the entries in values is specified by the MatOption MAT_ROW_ORIENTED. For example, C programs 3496 may want to use the default MAT_ROW_ORIENTED=PETSC_TRUE and use an array v[nnz][bs][bs] where the second index is 3497 over rows within a block and the last index is over columns within a block row. Fortran programs will likely set 3498 MAT_ROW_ORIENTED=PETSC_FALSE and use a Fortran array v(bs,bs,nnz) in which the first index is over rows within a 3499 block column and the second index is over columns within a block. 3500 3501 .keywords: matrix, aij, compressed row, sparse 3502 3503 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatSeqBAIJSetPreallocation(), MATSEQBAIJ 3504 @*/ 3505 PetscErrorCode MatSeqBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 3506 { 3507 PetscErrorCode ierr; 3508 3509 PetscFunctionBegin; 3510 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 3511 PetscValidType(B,1); 3512 PetscValidLogicalCollectiveInt(B,bs,2); 3513 ierr = PetscTryMethod(B,"MatSeqBAIJSetPreallocationCSR_C",(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,bs,i,j,v));CHKERRQ(ierr); 3514 PetscFunctionReturn(0); 3515 } 3516 3517 3518 #undef __FUNCT__ 3519 #define __FUNCT__ "MatCreateSeqBAIJWithArrays" 3520 /*@ 3521 MatCreateSeqBAIJWithArrays - Creates an sequential BAIJ matrix using matrix elements provided by the user. 3522 3523 Collective on MPI_Comm 3524 3525 Input Parameters: 3526 + comm - must be an MPI communicator of size 1 3527 . bs - size of block 3528 . m - number of rows 3529 . n - number of columns 3530 . i - row indices 3531 . j - column indices 3532 - a - matrix values 3533 3534 Output Parameter: 3535 . mat - the matrix 3536 3537 Level: advanced 3538 3539 Notes: 3540 The i, j, and a arrays are not copied by this routine, the user must free these arrays 3541 once the matrix is destroyed 3542 3543 You cannot set new nonzero locations into this matrix, that will generate an error. 3544 3545 The i and j indices are 0 based 3546 3547 When block size is greater than 1 the matrix values must be stored using the BAIJ storage format (see the BAIJ code to determine this). 3548 3549 The order of the entries in values is the same as the block compressed sparse row storage format; that is, it is 3550 the same as a three dimensional array in Fortran values(bs,bs,nnz) that contains the first column of the first 3551 block, followed by the second column of the first block etc etc. That is, the blocks are contiguous in memory 3552 with column-major ordering within blocks. 3553 3554 .seealso: MatCreate(), MatCreateBAIJ(), MatCreateSeqBAIJ() 3555 3556 @*/ 3557 PetscErrorCode MatCreateSeqBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt *i,PetscInt *j,PetscScalar *a,Mat *mat) 3558 { 3559 PetscErrorCode ierr; 3560 PetscInt ii; 3561 Mat_SeqBAIJ *baij; 3562 3563 PetscFunctionBegin; 3564 if (bs != 1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"block size %D > 1 is not supported yet",bs); 3565 if (m > 0 && i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 3566 3567 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 3568 ierr = MatSetSizes(*mat,m,n,m,n);CHKERRQ(ierr); 3569 ierr = MatSetType(*mat,MATSEQBAIJ);CHKERRQ(ierr); 3570 ierr = MatSeqBAIJSetPreallocation(*mat,bs,MAT_SKIP_ALLOCATION,0);CHKERRQ(ierr); 3571 baij = (Mat_SeqBAIJ*)(*mat)->data; 3572 ierr = PetscMalloc2(m,&baij->imax,m,&baij->ilen);CHKERRQ(ierr); 3573 ierr = PetscLogObjectMemory((PetscObject)*mat,2*m*sizeof(PetscInt));CHKERRQ(ierr); 3574 3575 baij->i = i; 3576 baij->j = j; 3577 baij->a = a; 3578 3579 baij->singlemalloc = PETSC_FALSE; 3580 baij->nonew = -1; /*this indicates that inserting a new value in the matrix that generates a new nonzero is an error*/ 3581 baij->free_a = PETSC_FALSE; 3582 baij->free_ij = PETSC_FALSE; 3583 3584 for (ii=0; ii<m; ii++) { 3585 baij->ilen[ii] = baij->imax[ii] = i[ii+1] - i[ii]; 3586 #if defined(PETSC_USE_DEBUG) 3587 if (i[ii+1] - i[ii] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row length in i (row indices) row = %d length = %d",ii,i[ii+1] - i[ii]); 3588 #endif 3589 } 3590 #if defined(PETSC_USE_DEBUG) 3591 for (ii=0; ii<baij->i[m]; ii++) { 3592 if (j[ii] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column index at location = %d index = %d",ii,j[ii]); 3593 if (j[ii] > n - 1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column index to large at location = %d index = %d",ii,j[ii]); 3594 } 3595 #endif 3596 3597 ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3598 ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3599 PetscFunctionReturn(0); 3600 } 3601 3602 #undef __FUNCT__ 3603 #define __FUNCT__ "MatCreateMPIMatConcatenateSeqMat_SeqBAIJ" 3604 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_SeqBAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat) 3605 { 3606 PetscErrorCode ierr; 3607 3608 PetscFunctionBegin; 3609 ierr = MatCreateMPIMatConcatenateSeqMat_MPIBAIJ(comm,inmat,n,scall,outmat);CHKERRQ(ierr); 3610 PetscFunctionReturn(0); 3611 } 3612