1 2 /* 3 Defines the basic matrix operations for the BAIJ (compressed row) 4 matrix storage format. 5 */ 6 #include <../src/mat/impls/baij/seq/baij.h> /*I "petscmat.h" I*/ 7 #include <petscblaslapack.h> 8 #include <petsc/private/kernels/blockinvert.h> 9 #include <petsc/private/kernels/blockmatmult.h> 10 11 #undef __FUNCT__ 12 #define __FUNCT__ "MatInvertBlockDiagonal_SeqBAIJ" 13 PetscErrorCode MatInvertBlockDiagonal_SeqBAIJ(Mat A,const PetscScalar **values) 14 { 15 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*) A->data; 16 PetscErrorCode ierr; 17 PetscInt *diag_offset,i,bs = A->rmap->bs,mbs = a->mbs,ipvt[5],bs2 = bs*bs,*v_pivots; 18 MatScalar *v = a->a,*odiag,*diag,*mdiag,work[25],*v_work; 19 PetscReal shift = 0.0; 20 PetscBool allowzeropivot,zeropivotdetected=PETSC_FALSE; 21 22 PetscFunctionBegin; 23 allowzeropivot = PetscNot(A->erroriffailure); 24 25 if (a->idiagvalid) { 26 if (values) *values = a->idiag; 27 PetscFunctionReturn(0); 28 } 29 ierr = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr); 30 diag_offset = a->diag; 31 if (!a->idiag) { 32 ierr = PetscMalloc1(2*bs2*mbs,&a->idiag);CHKERRQ(ierr); 33 ierr = PetscLogObjectMemory((PetscObject)A,2*bs2*mbs*sizeof(PetscScalar));CHKERRQ(ierr); 34 } 35 diag = a->idiag; 36 mdiag = a->idiag+bs2*mbs; 37 if (values) *values = a->idiag; 38 /* factor and invert each block */ 39 switch (bs) { 40 case 1: 41 for (i=0; i<mbs; i++) { 42 odiag = v + 1*diag_offset[i]; 43 diag[0] = odiag[0]; 44 mdiag[0] = odiag[0]; 45 46 if (PetscAbsScalar(diag[0] + shift) < PETSC_MACHINE_EPSILON) { 47 if (allowzeropivot) { 48 A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 49 A->factorerror_zeropivot_value = PetscAbsScalar(diag[0]); 50 A->factorerror_zeropivot_row = i; 51 ierr = PetscInfo1(A,"Zero pivot, row %D\n",i);CHKERRQ(ierr); 52 } else SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot, row %D pivot value %g tolerance %g",i,(double)PetscAbsScalar(diag[0]),(double)PETSC_MACHINE_EPSILON); 53 } 54 55 diag[0] = (PetscScalar)1.0 / (diag[0] + shift); 56 diag += 1; 57 mdiag += 1; 58 } 59 break; 60 case 2: 61 for (i=0; i<mbs; i++) { 62 odiag = v + 4*diag_offset[i]; 63 diag[0] = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3]; 64 mdiag[0] = odiag[0]; mdiag[1] = odiag[1]; mdiag[2] = odiag[2]; mdiag[3] = odiag[3]; 65 ierr = PetscKernel_A_gets_inverse_A_2(diag,shift,allowzeropivot,&zeropivotdetected);CHKERRQ(ierr); 66 if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 67 diag += 4; 68 mdiag += 4; 69 } 70 break; 71 case 3: 72 for (i=0; i<mbs; i++) { 73 odiag = v + 9*diag_offset[i]; 74 diag[0] = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3]; 75 diag[4] = odiag[4]; diag[5] = odiag[5]; diag[6] = odiag[6]; diag[7] = odiag[7]; 76 diag[8] = odiag[8]; 77 mdiag[0] = odiag[0]; mdiag[1] = odiag[1]; mdiag[2] = odiag[2]; mdiag[3] = odiag[3]; 78 mdiag[4] = odiag[4]; mdiag[5] = odiag[5]; mdiag[6] = odiag[6]; mdiag[7] = odiag[7]; 79 mdiag[8] = odiag[8]; 80 ierr = PetscKernel_A_gets_inverse_A_3(diag,shift,allowzeropivot,&zeropivotdetected);CHKERRQ(ierr); 81 if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 82 diag += 9; 83 mdiag += 9; 84 } 85 break; 86 case 4: 87 for (i=0; i<mbs; i++) { 88 odiag = v + 16*diag_offset[i]; 89 ierr = PetscMemcpy(diag,odiag,16*sizeof(PetscScalar));CHKERRQ(ierr); 90 ierr = PetscMemcpy(mdiag,odiag,16*sizeof(PetscScalar));CHKERRQ(ierr); 91 ierr = PetscKernel_A_gets_inverse_A_4(diag,shift,allowzeropivot,&zeropivotdetected);CHKERRQ(ierr); 92 if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 93 diag += 16; 94 mdiag += 16; 95 } 96 break; 97 case 5: 98 for (i=0; i<mbs; i++) { 99 odiag = v + 25*diag_offset[i]; 100 ierr = PetscMemcpy(diag,odiag,25*sizeof(PetscScalar));CHKERRQ(ierr); 101 ierr = PetscMemcpy(mdiag,odiag,25*sizeof(PetscScalar));CHKERRQ(ierr); 102 ierr = PetscKernel_A_gets_inverse_A_5(diag,ipvt,work,shift,allowzeropivot,&zeropivotdetected);CHKERRQ(ierr); 103 if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 104 diag += 25; 105 mdiag += 25; 106 } 107 break; 108 case 6: 109 for (i=0; i<mbs; i++) { 110 odiag = v + 36*diag_offset[i]; 111 ierr = PetscMemcpy(diag,odiag,36*sizeof(PetscScalar));CHKERRQ(ierr); 112 ierr = PetscMemcpy(mdiag,odiag,36*sizeof(PetscScalar));CHKERRQ(ierr); 113 ierr = PetscKernel_A_gets_inverse_A_6(diag,shift,allowzeropivot,&zeropivotdetected);CHKERRQ(ierr); 114 if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 115 diag += 36; 116 mdiag += 36; 117 } 118 break; 119 case 7: 120 for (i=0; i<mbs; i++) { 121 odiag = v + 49*diag_offset[i]; 122 ierr = PetscMemcpy(diag,odiag,49*sizeof(PetscScalar));CHKERRQ(ierr); 123 ierr = PetscMemcpy(mdiag,odiag,49*sizeof(PetscScalar));CHKERRQ(ierr); 124 ierr = PetscKernel_A_gets_inverse_A_7(diag,shift,allowzeropivot,&zeropivotdetected);CHKERRQ(ierr); 125 if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 126 diag += 49; 127 mdiag += 49; 128 } 129 break; 130 default: 131 ierr = PetscMalloc2(bs,&v_work,bs,&v_pivots);CHKERRQ(ierr); 132 for (i=0; i<mbs; i++) { 133 odiag = v + bs2*diag_offset[i]; 134 ierr = PetscMemcpy(diag,odiag,bs2*sizeof(PetscScalar));CHKERRQ(ierr); 135 ierr = PetscMemcpy(mdiag,odiag,bs2*sizeof(PetscScalar));CHKERRQ(ierr); 136 ierr = PetscKernel_A_gets_inverse_A(bs,diag,v_pivots,v_work,allowzeropivot,&zeropivotdetected);CHKERRQ(ierr); 137 if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 138 diag += bs2; 139 mdiag += bs2; 140 } 141 ierr = PetscFree2(v_work,v_pivots);CHKERRQ(ierr); 142 } 143 a->idiagvalid = PETSC_TRUE; 144 PetscFunctionReturn(0); 145 } 146 147 #undef __FUNCT__ 148 #define __FUNCT__ "MatSOR_SeqBAIJ" 149 PetscErrorCode MatSOR_SeqBAIJ(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 150 { 151 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 152 PetscScalar *x,*work,*w,*workt,*t; 153 const MatScalar *v,*aa = a->a, *idiag; 154 const PetscScalar *b,*xb; 155 PetscScalar s[7], xw[7]={0}; /* avoid some compilers thinking xw is uninitialized */ 156 PetscErrorCode ierr; 157 PetscInt m = a->mbs,i,i2,nz,bs = A->rmap->bs,bs2 = bs*bs,k,j,idx,it; 158 const PetscInt *diag,*ai = a->i,*aj = a->j,*vi; 159 160 PetscFunctionBegin; 161 if (fshift == -1.0) fshift = 0.0; /* negative fshift indicates do not error on zero diagonal; this code never errors on zero diagonal */ 162 its = its*lits; 163 if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat"); 164 if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 165 if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 166 if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 167 if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts"); 168 169 if (!a->idiagvalid) {ierr = MatInvertBlockDiagonal(A,NULL);CHKERRQ(ierr);} 170 171 if (!m) PetscFunctionReturn(0); 172 diag = a->diag; 173 idiag = a->idiag; 174 k = PetscMax(A->rmap->n,A->cmap->n); 175 if (!a->mult_work) { 176 ierr = PetscMalloc1(k+1,&a->mult_work);CHKERRQ(ierr); 177 } 178 if (!a->sor_workt) { 179 ierr = PetscMalloc1(k,&a->sor_workt);CHKERRQ(ierr); 180 } 181 if (!a->sor_work) { 182 ierr = PetscMalloc1(bs,&a->sor_work);CHKERRQ(ierr); 183 } 184 work = a->mult_work; 185 t = a->sor_workt; 186 w = a->sor_work; 187 188 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 189 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 190 191 if (flag & SOR_ZERO_INITIAL_GUESS) { 192 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) { 193 switch (bs) { 194 case 1: 195 PetscKernel_v_gets_A_times_w_1(x,idiag,b); 196 t[0] = b[0]; 197 i2 = 1; 198 idiag += 1; 199 for (i=1; i<m; i++) { 200 v = aa + ai[i]; 201 vi = aj + ai[i]; 202 nz = diag[i] - ai[i]; 203 s[0] = b[i2]; 204 for (j=0; j<nz; j++) { 205 xw[0] = x[vi[j]]; 206 PetscKernel_v_gets_v_minus_A_times_w_1(s,(v+j),xw); 207 } 208 t[i2] = s[0]; 209 PetscKernel_v_gets_A_times_w_1(xw,idiag,s); 210 x[i2] = xw[0]; 211 idiag += 1; 212 i2 += 1; 213 } 214 break; 215 case 2: 216 PetscKernel_v_gets_A_times_w_2(x,idiag,b); 217 t[0] = b[0]; t[1] = b[1]; 218 i2 = 2; 219 idiag += 4; 220 for (i=1; i<m; i++) { 221 v = aa + 4*ai[i]; 222 vi = aj + ai[i]; 223 nz = diag[i] - ai[i]; 224 s[0] = b[i2]; s[1] = b[i2+1]; 225 for (j=0; j<nz; j++) { 226 idx = 2*vi[j]; 227 it = 4*j; 228 xw[0] = x[idx]; xw[1] = x[1+idx]; 229 PetscKernel_v_gets_v_minus_A_times_w_2(s,(v+it),xw); 230 } 231 t[i2] = s[0]; t[i2+1] = s[1]; 232 PetscKernel_v_gets_A_times_w_2(xw,idiag,s); 233 x[i2] = xw[0]; x[i2+1] = xw[1]; 234 idiag += 4; 235 i2 += 2; 236 } 237 break; 238 case 3: 239 PetscKernel_v_gets_A_times_w_3(x,idiag,b); 240 t[0] = b[0]; t[1] = b[1]; t[2] = b[2]; 241 i2 = 3; 242 idiag += 9; 243 for (i=1; i<m; i++) { 244 v = aa + 9*ai[i]; 245 vi = aj + ai[i]; 246 nz = diag[i] - ai[i]; 247 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; 248 while (nz--) { 249 idx = 3*(*vi++); 250 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 251 PetscKernel_v_gets_v_minus_A_times_w_3(s,v,xw); 252 v += 9; 253 } 254 t[i2] = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2]; 255 PetscKernel_v_gets_A_times_w_3(xw,idiag,s); 256 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; 257 idiag += 9; 258 i2 += 3; 259 } 260 break; 261 case 4: 262 PetscKernel_v_gets_A_times_w_4(x,idiag,b); 263 t[0] = b[0]; t[1] = b[1]; t[2] = b[2]; t[3] = b[3]; 264 i2 = 4; 265 idiag += 16; 266 for (i=1; i<m; i++) { 267 v = aa + 16*ai[i]; 268 vi = aj + ai[i]; 269 nz = diag[i] - ai[i]; 270 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; 271 while (nz--) { 272 idx = 4*(*vi++); 273 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; 274 PetscKernel_v_gets_v_minus_A_times_w_4(s,v,xw); 275 v += 16; 276 } 277 t[i2] = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2]; t[i2 + 3] = s[3]; 278 PetscKernel_v_gets_A_times_w_4(xw,idiag,s); 279 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; 280 idiag += 16; 281 i2 += 4; 282 } 283 break; 284 case 5: 285 PetscKernel_v_gets_A_times_w_5(x,idiag,b); 286 t[0] = b[0]; t[1] = b[1]; t[2] = b[2]; t[3] = b[3]; t[4] = b[4]; 287 i2 = 5; 288 idiag += 25; 289 for (i=1; i<m; i++) { 290 v = aa + 25*ai[i]; 291 vi = aj + ai[i]; 292 nz = diag[i] - ai[i]; 293 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4]; 294 while (nz--) { 295 idx = 5*(*vi++); 296 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; xw[4] = x[4+idx]; 297 PetscKernel_v_gets_v_minus_A_times_w_5(s,v,xw); 298 v += 25; 299 } 300 t[i2] = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2]; t[i2+3] = s[3]; t[i2+4] = s[4]; 301 PetscKernel_v_gets_A_times_w_5(xw,idiag,s); 302 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4]; 303 idiag += 25; 304 i2 += 5; 305 } 306 break; 307 case 6: 308 PetscKernel_v_gets_A_times_w_6(x,idiag,b); 309 t[0] = b[0]; t[1] = b[1]; t[2] = b[2]; t[3] = b[3]; t[4] = b[4]; t[5] = b[5]; 310 i2 = 6; 311 idiag += 36; 312 for (i=1; i<m; i++) { 313 v = aa + 36*ai[i]; 314 vi = aj + ai[i]; 315 nz = diag[i] - ai[i]; 316 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5]; 317 while (nz--) { 318 idx = 6*(*vi++); 319 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 320 xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; 321 PetscKernel_v_gets_v_minus_A_times_w_6(s,v,xw); 322 v += 36; 323 } 324 t[i2] = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2]; 325 t[i2+3] = s[3]; t[i2+4] = s[4]; t[i2+5] = s[5]; 326 PetscKernel_v_gets_A_times_w_6(xw,idiag,s); 327 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5]; 328 idiag += 36; 329 i2 += 6; 330 } 331 break; 332 case 7: 333 PetscKernel_v_gets_A_times_w_7(x,idiag,b); 334 t[0] = b[0]; t[1] = b[1]; t[2] = b[2]; 335 t[3] = b[3]; t[4] = b[4]; t[5] = b[5]; t[6] = b[6]; 336 i2 = 7; 337 idiag += 49; 338 for (i=1; i<m; i++) { 339 v = aa + 49*ai[i]; 340 vi = aj + ai[i]; 341 nz = diag[i] - ai[i]; 342 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; 343 s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5]; s[6] = b[i2+6]; 344 while (nz--) { 345 idx = 7*(*vi++); 346 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 347 xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; xw[6] = x[6+idx]; 348 PetscKernel_v_gets_v_minus_A_times_w_7(s,v,xw); 349 v += 49; 350 } 351 t[i2] = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2]; 352 t[i2+3] = s[3]; t[i2+4] = s[4]; t[i2+5] = s[5]; t[i2+6] = s[6]; 353 PetscKernel_v_gets_A_times_w_7(xw,idiag,s); 354 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; 355 x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5]; x[i2+6] = xw[6]; 356 idiag += 49; 357 i2 += 7; 358 } 359 break; 360 default: 361 PetscKernel_w_gets_Ar_times_v(bs,bs,b,idiag,x); 362 ierr = PetscMemcpy(t,b,bs*sizeof(PetscScalar));CHKERRQ(ierr); 363 i2 = bs; 364 idiag += bs2; 365 for (i=1; i<m; i++) { 366 v = aa + bs2*ai[i]; 367 vi = aj + ai[i]; 368 nz = diag[i] - ai[i]; 369 370 ierr = PetscMemcpy(w,b+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr); 371 /* copy all rows of x that are needed into contiguous space */ 372 workt = work; 373 for (j=0; j<nz; j++) { 374 ierr = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr); 375 workt += bs; 376 } 377 PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work); 378 ierr = PetscMemcpy(t+i2,w,bs*sizeof(PetscScalar));CHKERRQ(ierr); 379 PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2); 380 381 idiag += bs2; 382 i2 += bs; 383 } 384 break; 385 } 386 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 387 ierr = PetscLogFlops(1.0*bs2*a->nz);CHKERRQ(ierr); 388 xb = t; 389 } 390 else xb = b; 391 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 392 idiag = a->idiag+bs2*(a->mbs-1); 393 i2 = bs * (m-1); 394 switch (bs) { 395 case 1: 396 s[0] = xb[i2]; 397 PetscKernel_v_gets_A_times_w_1(xw,idiag,s); 398 x[i2] = xw[0]; 399 i2 -= 1; 400 for (i=m-2; i>=0; i--) { 401 v = aa + (diag[i]+1); 402 vi = aj + diag[i] + 1; 403 nz = ai[i+1] - diag[i] - 1; 404 s[0] = xb[i2]; 405 for (j=0; j<nz; j++) { 406 xw[0] = x[vi[j]]; 407 PetscKernel_v_gets_v_minus_A_times_w_1(s,(v+j),xw); 408 } 409 PetscKernel_v_gets_A_times_w_1(xw,idiag,s); 410 x[i2] = xw[0]; 411 idiag -= 1; 412 i2 -= 1; 413 } 414 break; 415 case 2: 416 s[0] = xb[i2]; s[1] = xb[i2+1]; 417 PetscKernel_v_gets_A_times_w_2(xw,idiag,s); 418 x[i2] = xw[0]; x[i2+1] = xw[1]; 419 i2 -= 2; 420 idiag -= 4; 421 for (i=m-2; i>=0; i--) { 422 v = aa + 4*(diag[i] + 1); 423 vi = aj + diag[i] + 1; 424 nz = ai[i+1] - diag[i] - 1; 425 s[0] = xb[i2]; s[1] = xb[i2+1]; 426 for (j=0; j<nz; j++) { 427 idx = 2*vi[j]; 428 it = 4*j; 429 xw[0] = x[idx]; xw[1] = x[1+idx]; 430 PetscKernel_v_gets_v_minus_A_times_w_2(s,(v+it),xw); 431 } 432 PetscKernel_v_gets_A_times_w_2(xw,idiag,s); 433 x[i2] = xw[0]; x[i2+1] = xw[1]; 434 idiag -= 4; 435 i2 -= 2; 436 } 437 break; 438 case 3: 439 s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; 440 PetscKernel_v_gets_A_times_w_3(xw,idiag,s); 441 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; 442 i2 -= 3; 443 idiag -= 9; 444 for (i=m-2; i>=0; i--) { 445 v = aa + 9*(diag[i]+1); 446 vi = aj + diag[i] + 1; 447 nz = ai[i+1] - diag[i] - 1; 448 s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; 449 while (nz--) { 450 idx = 3*(*vi++); 451 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 452 PetscKernel_v_gets_v_minus_A_times_w_3(s,v,xw); 453 v += 9; 454 } 455 PetscKernel_v_gets_A_times_w_3(xw,idiag,s); 456 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; 457 idiag -= 9; 458 i2 -= 3; 459 } 460 break; 461 case 4: 462 s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; 463 PetscKernel_v_gets_A_times_w_4(xw,idiag,s); 464 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; 465 i2 -= 4; 466 idiag -= 16; 467 for (i=m-2; i>=0; i--) { 468 v = aa + 16*(diag[i]+1); 469 vi = aj + diag[i] + 1; 470 nz = ai[i+1] - diag[i] - 1; 471 s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; 472 while (nz--) { 473 idx = 4*(*vi++); 474 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; 475 PetscKernel_v_gets_v_minus_A_times_w_4(s,v,xw); 476 v += 16; 477 } 478 PetscKernel_v_gets_A_times_w_4(xw,idiag,s); 479 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; 480 idiag -= 16; 481 i2 -= 4; 482 } 483 break; 484 case 5: 485 s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; s[4] = xb[i2+4]; 486 PetscKernel_v_gets_A_times_w_5(xw,idiag,s); 487 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4]; 488 i2 -= 5; 489 idiag -= 25; 490 for (i=m-2; i>=0; i--) { 491 v = aa + 25*(diag[i]+1); 492 vi = aj + diag[i] + 1; 493 nz = ai[i+1] - diag[i] - 1; 494 s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; s[4] = xb[i2+4]; 495 while (nz--) { 496 idx = 5*(*vi++); 497 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; xw[4] = x[4+idx]; 498 PetscKernel_v_gets_v_minus_A_times_w_5(s,v,xw); 499 v += 25; 500 } 501 PetscKernel_v_gets_A_times_w_5(xw,idiag,s); 502 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4]; 503 idiag -= 25; 504 i2 -= 5; 505 } 506 break; 507 case 6: 508 s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; s[4] = xb[i2+4]; s[5] = xb[i2+5]; 509 PetscKernel_v_gets_A_times_w_6(xw,idiag,s); 510 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5]; 511 i2 -= 6; 512 idiag -= 36; 513 for (i=m-2; i>=0; i--) { 514 v = aa + 36*(diag[i]+1); 515 vi = aj + diag[i] + 1; 516 nz = ai[i+1] - diag[i] - 1; 517 s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; s[4] = xb[i2+4]; s[5] = xb[i2+5]; 518 while (nz--) { 519 idx = 6*(*vi++); 520 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 521 xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; 522 PetscKernel_v_gets_v_minus_A_times_w_6(s,v,xw); 523 v += 36; 524 } 525 PetscKernel_v_gets_A_times_w_6(xw,idiag,s); 526 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5]; 527 idiag -= 36; 528 i2 -= 6; 529 } 530 break; 531 case 7: 532 s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; 533 s[3] = xb[i2+3]; s[4] = xb[i2+4]; s[5] = xb[i2+5]; s[6] = xb[i2+6]; 534 PetscKernel_v_gets_A_times_w_7(x,idiag,b); 535 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; 536 x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5]; x[i2+6] = xw[6]; 537 i2 -= 7; 538 idiag -= 49; 539 for (i=m-2; i>=0; i--) { 540 v = aa + 49*(diag[i]+1); 541 vi = aj + diag[i] + 1; 542 nz = ai[i+1] - diag[i] - 1; 543 s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; 544 s[3] = xb[i2+3]; s[4] = xb[i2+4]; s[5] = xb[i2+5]; s[6] = xb[i2+6]; 545 while (nz--) { 546 idx = 7*(*vi++); 547 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 548 xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; xw[6] = x[6+idx]; 549 PetscKernel_v_gets_v_minus_A_times_w_7(s,v,xw); 550 v += 49; 551 } 552 PetscKernel_v_gets_A_times_w_7(xw,idiag,s); 553 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; 554 x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5]; x[i2+6] = xw[6]; 555 idiag -= 49; 556 i2 -= 7; 557 } 558 break; 559 default: 560 ierr = PetscMemcpy(w,xb+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr); 561 PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2); 562 i2 -= bs; 563 idiag -= bs2; 564 for (i=m-2; i>=0; i--) { 565 v = aa + bs2*(diag[i]+1); 566 vi = aj + diag[i] + 1; 567 nz = ai[i+1] - diag[i] - 1; 568 569 ierr = PetscMemcpy(w,xb+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr); 570 /* copy all rows of x that are needed into contiguous space */ 571 workt = work; 572 for (j=0; j<nz; j++) { 573 ierr = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr); 574 workt += bs; 575 } 576 PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work); 577 PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2); 578 579 idiag -= bs2; 580 i2 -= bs; 581 } 582 break; 583 } 584 ierr = PetscLogFlops(1.0*bs2*(a->nz));CHKERRQ(ierr); 585 } 586 its--; 587 } 588 while (its--) { 589 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) { 590 idiag = a->idiag; 591 i2 = 0; 592 switch (bs) { 593 case 1: 594 for (i=0; i<m; i++) { 595 v = aa + ai[i]; 596 vi = aj + ai[i]; 597 nz = ai[i+1] - ai[i]; 598 s[0] = b[i2]; 599 for (j=0; j<nz; j++) { 600 xw[0] = x[vi[j]]; 601 PetscKernel_v_gets_v_minus_A_times_w_1(s,(v+j),xw); 602 } 603 PetscKernel_v_gets_A_times_w_1(xw,idiag,s); 604 x[i2] += xw[0]; 605 idiag += 1; 606 i2 += 1; 607 } 608 break; 609 case 2: 610 for (i=0; i<m; i++) { 611 v = aa + 4*ai[i]; 612 vi = aj + ai[i]; 613 nz = ai[i+1] - ai[i]; 614 s[0] = b[i2]; s[1] = b[i2+1]; 615 for (j=0; j<nz; j++) { 616 idx = 2*vi[j]; 617 it = 4*j; 618 xw[0] = x[idx]; xw[1] = x[1+idx]; 619 PetscKernel_v_gets_v_minus_A_times_w_2(s,(v+it),xw); 620 } 621 PetscKernel_v_gets_A_times_w_2(xw,idiag,s); 622 x[i2] += xw[0]; x[i2+1] += xw[1]; 623 idiag += 4; 624 i2 += 2; 625 } 626 break; 627 case 3: 628 for (i=0; i<m; i++) { 629 v = aa + 9*ai[i]; 630 vi = aj + ai[i]; 631 nz = ai[i+1] - ai[i]; 632 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; 633 while (nz--) { 634 idx = 3*(*vi++); 635 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 636 PetscKernel_v_gets_v_minus_A_times_w_3(s,v,xw); 637 v += 9; 638 } 639 PetscKernel_v_gets_A_times_w_3(xw,idiag,s); 640 x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; 641 idiag += 9; 642 i2 += 3; 643 } 644 break; 645 case 4: 646 for (i=0; i<m; i++) { 647 v = aa + 16*ai[i]; 648 vi = aj + ai[i]; 649 nz = ai[i+1] - ai[i]; 650 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; 651 while (nz--) { 652 idx = 4*(*vi++); 653 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; 654 PetscKernel_v_gets_v_minus_A_times_w_4(s,v,xw); 655 v += 16; 656 } 657 PetscKernel_v_gets_A_times_w_4(xw,idiag,s); 658 x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; x[i2+3] += xw[3]; 659 idiag += 16; 660 i2 += 4; 661 } 662 break; 663 case 5: 664 for (i=0; i<m; i++) { 665 v = aa + 25*ai[i]; 666 vi = aj + ai[i]; 667 nz = ai[i+1] - ai[i]; 668 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4]; 669 while (nz--) { 670 idx = 5*(*vi++); 671 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; xw[4] = x[4+idx]; 672 PetscKernel_v_gets_v_minus_A_times_w_5(s,v,xw); 673 v += 25; 674 } 675 PetscKernel_v_gets_A_times_w_5(xw,idiag,s); 676 x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; x[i2+3] += xw[3]; x[i2+4] += xw[4]; 677 idiag += 25; 678 i2 += 5; 679 } 680 break; 681 case 6: 682 for (i=0; i<m; i++) { 683 v = aa + 36*ai[i]; 684 vi = aj + ai[i]; 685 nz = ai[i+1] - ai[i]; 686 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5]; 687 while (nz--) { 688 idx = 6*(*vi++); 689 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 690 xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; 691 PetscKernel_v_gets_v_minus_A_times_w_6(s,v,xw); 692 v += 36; 693 } 694 PetscKernel_v_gets_A_times_w_6(xw,idiag,s); 695 x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; 696 x[i2+3] += xw[3]; x[i2+4] += xw[4]; x[i2+5] += xw[5]; 697 idiag += 36; 698 i2 += 6; 699 } 700 break; 701 case 7: 702 for (i=0; i<m; i++) { 703 v = aa + 49*ai[i]; 704 vi = aj + ai[i]; 705 nz = ai[i+1] - ai[i]; 706 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; 707 s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5]; s[6] = b[i2+6]; 708 while (nz--) { 709 idx = 7*(*vi++); 710 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 711 xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; xw[6] = x[6+idx]; 712 PetscKernel_v_gets_v_minus_A_times_w_7(s,v,xw); 713 v += 49; 714 } 715 PetscKernel_v_gets_A_times_w_7(xw,idiag,s); 716 x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; 717 x[i2+3] += xw[3]; x[i2+4] += xw[4]; x[i2+5] += xw[5]; x[i2+6] += xw[6]; 718 idiag += 49; 719 i2 += 7; 720 } 721 break; 722 default: 723 for (i=0; i<m; i++) { 724 v = aa + bs2*ai[i]; 725 vi = aj + ai[i]; 726 nz = ai[i+1] - ai[i]; 727 728 ierr = PetscMemcpy(w,b+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr); 729 /* copy all rows of x that are needed into contiguous space */ 730 workt = work; 731 for (j=0; j<nz; j++) { 732 ierr = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr); 733 workt += bs; 734 } 735 PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work); 736 PetscKernel_w_gets_w_plus_Ar_times_v(bs,bs,w,idiag,x+i2); 737 738 idiag += bs2; 739 i2 += bs; 740 } 741 break; 742 } 743 ierr = PetscLogFlops(2.0*bs2*a->nz);CHKERRQ(ierr); 744 } 745 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 746 idiag = a->idiag+bs2*(a->mbs-1); 747 i2 = bs * (m-1); 748 switch (bs) { 749 case 1: 750 for (i=m-1; i>=0; i--) { 751 v = aa + ai[i]; 752 vi = aj + ai[i]; 753 nz = ai[i+1] - ai[i]; 754 s[0] = b[i2]; 755 for (j=0; j<nz; j++) { 756 xw[0] = x[vi[j]]; 757 PetscKernel_v_gets_v_minus_A_times_w_1(s,(v+j),xw); 758 } 759 PetscKernel_v_gets_A_times_w_1(xw,idiag,s); 760 x[i2] += xw[0]; 761 idiag -= 1; 762 i2 -= 1; 763 } 764 break; 765 case 2: 766 for (i=m-1; i>=0; i--) { 767 v = aa + 4*ai[i]; 768 vi = aj + ai[i]; 769 nz = ai[i+1] - ai[i]; 770 s[0] = b[i2]; s[1] = b[i2+1]; 771 for (j=0; j<nz; j++) { 772 idx = 2*vi[j]; 773 it = 4*j; 774 xw[0] = x[idx]; xw[1] = x[1+idx]; 775 PetscKernel_v_gets_v_minus_A_times_w_2(s,(v+it),xw); 776 } 777 PetscKernel_v_gets_A_times_w_2(xw,idiag,s); 778 x[i2] += xw[0]; x[i2+1] += xw[1]; 779 idiag -= 4; 780 i2 -= 2; 781 } 782 break; 783 case 3: 784 for (i=m-1; i>=0; i--) { 785 v = aa + 9*ai[i]; 786 vi = aj + ai[i]; 787 nz = ai[i+1] - ai[i]; 788 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; 789 while (nz--) { 790 idx = 3*(*vi++); 791 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 792 PetscKernel_v_gets_v_minus_A_times_w_3(s,v,xw); 793 v += 9; 794 } 795 PetscKernel_v_gets_A_times_w_3(xw,idiag,s); 796 x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; 797 idiag -= 9; 798 i2 -= 3; 799 } 800 break; 801 case 4: 802 for (i=m-1; i>=0; i--) { 803 v = aa + 16*ai[i]; 804 vi = aj + ai[i]; 805 nz = ai[i+1] - ai[i]; 806 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; 807 while (nz--) { 808 idx = 4*(*vi++); 809 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; 810 PetscKernel_v_gets_v_minus_A_times_w_4(s,v,xw); 811 v += 16; 812 } 813 PetscKernel_v_gets_A_times_w_4(xw,idiag,s); 814 x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; x[i2+3] += xw[3]; 815 idiag -= 16; 816 i2 -= 4; 817 } 818 break; 819 case 5: 820 for (i=m-1; i>=0; i--) { 821 v = aa + 25*ai[i]; 822 vi = aj + ai[i]; 823 nz = ai[i+1] - ai[i]; 824 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4]; 825 while (nz--) { 826 idx = 5*(*vi++); 827 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; xw[4] = x[4+idx]; 828 PetscKernel_v_gets_v_minus_A_times_w_5(s,v,xw); 829 v += 25; 830 } 831 PetscKernel_v_gets_A_times_w_5(xw,idiag,s); 832 x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; x[i2+3] += xw[3]; x[i2+4] += xw[4]; 833 idiag -= 25; 834 i2 -= 5; 835 } 836 break; 837 case 6: 838 for (i=m-1; i>=0; i--) { 839 v = aa + 36*ai[i]; 840 vi = aj + ai[i]; 841 nz = ai[i+1] - ai[i]; 842 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5]; 843 while (nz--) { 844 idx = 6*(*vi++); 845 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 846 xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; 847 PetscKernel_v_gets_v_minus_A_times_w_6(s,v,xw); 848 v += 36; 849 } 850 PetscKernel_v_gets_A_times_w_6(xw,idiag,s); 851 x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; 852 x[i2+3] += xw[3]; x[i2+4] += xw[4]; x[i2+5] += xw[5]; 853 idiag -= 36; 854 i2 -= 6; 855 } 856 break; 857 case 7: 858 for (i=m-1; i>=0; i--) { 859 v = aa + 49*ai[i]; 860 vi = aj + ai[i]; 861 nz = ai[i+1] - ai[i]; 862 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; 863 s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5]; s[6] = b[i2+6]; 864 while (nz--) { 865 idx = 7*(*vi++); 866 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 867 xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; xw[6] = x[6+idx]; 868 PetscKernel_v_gets_v_minus_A_times_w_7(s,v,xw); 869 v += 49; 870 } 871 PetscKernel_v_gets_A_times_w_7(xw,idiag,s); 872 x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; 873 x[i2+3] += xw[3]; x[i2+4] += xw[4]; x[i2+5] += xw[5]; x[i2+6] += xw[6]; 874 idiag -= 49; 875 i2 -= 7; 876 } 877 break; 878 default: 879 for (i=m-1; i>=0; i--) { 880 v = aa + bs2*ai[i]; 881 vi = aj + ai[i]; 882 nz = ai[i+1] - ai[i]; 883 884 ierr = PetscMemcpy(w,b+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr); 885 /* copy all rows of x that are needed into contiguous space */ 886 workt = work; 887 for (j=0; j<nz; j++) { 888 ierr = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr); 889 workt += bs; 890 } 891 PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work); 892 PetscKernel_w_gets_w_plus_Ar_times_v(bs,bs,w,idiag,x+i2); 893 894 idiag -= bs2; 895 i2 -= bs; 896 } 897 break; 898 } 899 ierr = PetscLogFlops(2.0*bs2*(a->nz));CHKERRQ(ierr); 900 } 901 } 902 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 903 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 904 PetscFunctionReturn(0); 905 } 906 907 908 /* 909 Special version for direct calls from Fortran (Used in PETSc-fun3d) 910 */ 911 #if defined(PETSC_HAVE_FORTRAN_CAPS) 912 #define matsetvaluesblocked4_ MATSETVALUESBLOCKED4 913 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 914 #define matsetvaluesblocked4_ matsetvaluesblocked4 915 #endif 916 917 #undef __FUNCT__ 918 #define __FUNCT__ "matsetvaluesblocked4_" 919 PETSC_EXTERN void matsetvaluesblocked4_(Mat *AA,PetscInt *mm,const PetscInt im[],PetscInt *nn,const PetscInt in[],const PetscScalar v[]) 920 { 921 Mat A = *AA; 922 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 923 PetscInt *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,N,m = *mm,n = *nn; 924 PetscInt *ai =a->i,*ailen=a->ilen; 925 PetscInt *aj =a->j,stepval,lastcol = -1; 926 const PetscScalar *value = v; 927 MatScalar *ap,*aa = a->a,*bap; 928 929 PetscFunctionBegin; 930 if (A->rmap->bs != 4) SETERRABORT(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Can only be called with a block size of 4"); 931 stepval = (n-1)*4; 932 for (k=0; k<m; k++) { /* loop over added rows */ 933 row = im[k]; 934 rp = aj + ai[row]; 935 ap = aa + 16*ai[row]; 936 nrow = ailen[row]; 937 low = 0; 938 high = nrow; 939 for (l=0; l<n; l++) { /* loop over added columns */ 940 col = in[l]; 941 if (col <= lastcol) low = 0; 942 else high = nrow; 943 lastcol = col; 944 value = v + k*(stepval+4 + l)*4; 945 while (high-low > 7) { 946 t = (low+high)/2; 947 if (rp[t] > col) high = t; 948 else low = t; 949 } 950 for (i=low; i<high; i++) { 951 if (rp[i] > col) break; 952 if (rp[i] == col) { 953 bap = ap + 16*i; 954 for (ii=0; ii<4; ii++,value+=stepval) { 955 for (jj=ii; jj<16; jj+=4) { 956 bap[jj] += *value++; 957 } 958 } 959 goto noinsert2; 960 } 961 } 962 N = nrow++ - 1; 963 high++; /* added new column index thus must search to one higher than before */ 964 /* shift up all the later entries in this row */ 965 for (ii=N; ii>=i; ii--) { 966 rp[ii+1] = rp[ii]; 967 PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar)); 968 } 969 if (N >= i) { 970 PetscMemzero(ap+16*i,16*sizeof(MatScalar)); 971 } 972 rp[i] = col; 973 bap = ap + 16*i; 974 for (ii=0; ii<4; ii++,value+=stepval) { 975 for (jj=ii; jj<16; jj+=4) { 976 bap[jj] = *value++; 977 } 978 } 979 noinsert2:; 980 low = i; 981 } 982 ailen[row] = nrow; 983 } 984 PetscFunctionReturnVoid(); 985 } 986 987 #if defined(PETSC_HAVE_FORTRAN_CAPS) 988 #define matsetvalues4_ MATSETVALUES4 989 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 990 #define matsetvalues4_ matsetvalues4 991 #endif 992 993 #undef __FUNCT__ 994 #define __FUNCT__ "MatSetValues4_" 995 PETSC_EXTERN void matsetvalues4_(Mat *AA,PetscInt *mm,PetscInt *im,PetscInt *nn,PetscInt *in,PetscScalar *v) 996 { 997 Mat A = *AA; 998 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 999 PetscInt *rp,k,low,high,t,ii,row,nrow,i,col,l,N,n = *nn,m = *mm; 1000 PetscInt *ai=a->i,*ailen=a->ilen; 1001 PetscInt *aj=a->j,brow,bcol; 1002 PetscInt ridx,cidx,lastcol = -1; 1003 MatScalar *ap,value,*aa=a->a,*bap; 1004 1005 PetscFunctionBegin; 1006 for (k=0; k<m; k++) { /* loop over added rows */ 1007 row = im[k]; brow = row/4; 1008 rp = aj + ai[brow]; 1009 ap = aa + 16*ai[brow]; 1010 nrow = ailen[brow]; 1011 low = 0; 1012 high = nrow; 1013 for (l=0; l<n; l++) { /* loop over added columns */ 1014 col = in[l]; bcol = col/4; 1015 ridx = row % 4; cidx = col % 4; 1016 value = v[l + k*n]; 1017 if (col <= lastcol) low = 0; 1018 else high = nrow; 1019 lastcol = col; 1020 while (high-low > 7) { 1021 t = (low+high)/2; 1022 if (rp[t] > bcol) high = t; 1023 else low = t; 1024 } 1025 for (i=low; i<high; i++) { 1026 if (rp[i] > bcol) break; 1027 if (rp[i] == bcol) { 1028 bap = ap + 16*i + 4*cidx + ridx; 1029 *bap += value; 1030 goto noinsert1; 1031 } 1032 } 1033 N = nrow++ - 1; 1034 high++; /* added new column thus must search to one higher than before */ 1035 /* shift up all the later entries in this row */ 1036 for (ii=N; ii>=i; ii--) { 1037 rp[ii+1] = rp[ii]; 1038 PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar)); 1039 } 1040 if (N>=i) { 1041 PetscMemzero(ap+16*i,16*sizeof(MatScalar)); 1042 } 1043 rp[i] = bcol; 1044 ap[16*i + 4*cidx + ridx] = value; 1045 noinsert1:; 1046 low = i; 1047 } 1048 ailen[brow] = nrow; 1049 } 1050 PetscFunctionReturnVoid(); 1051 } 1052 1053 /* 1054 Checks for missing diagonals 1055 */ 1056 #undef __FUNCT__ 1057 #define __FUNCT__ "MatMissingDiagonal_SeqBAIJ" 1058 PetscErrorCode MatMissingDiagonal_SeqBAIJ(Mat A,PetscBool *missing,PetscInt *d) 1059 { 1060 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1061 PetscErrorCode ierr; 1062 PetscInt *diag,*ii = a->i,i; 1063 1064 PetscFunctionBegin; 1065 ierr = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr); 1066 *missing = PETSC_FALSE; 1067 if (A->rmap->n > 0 && !ii) { 1068 *missing = PETSC_TRUE; 1069 if (d) *d = 0; 1070 PetscInfo(A,"Matrix has no entries therefore is missing diagonal\n"); 1071 } else { 1072 diag = a->diag; 1073 for (i=0; i<a->mbs; i++) { 1074 if (diag[i] >= ii[i+1]) { 1075 *missing = PETSC_TRUE; 1076 if (d) *d = i; 1077 PetscInfo1(A,"Matrix is missing block diagonal number %D\n",i); 1078 break; 1079 } 1080 } 1081 } 1082 PetscFunctionReturn(0); 1083 } 1084 1085 #undef __FUNCT__ 1086 #define __FUNCT__ "MatMarkDiagonal_SeqBAIJ" 1087 PetscErrorCode MatMarkDiagonal_SeqBAIJ(Mat A) 1088 { 1089 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1090 PetscErrorCode ierr; 1091 PetscInt i,j,m = a->mbs; 1092 1093 PetscFunctionBegin; 1094 if (!a->diag) { 1095 ierr = PetscMalloc1(m,&a->diag);CHKERRQ(ierr); 1096 ierr = PetscLogObjectMemory((PetscObject)A,m*sizeof(PetscInt));CHKERRQ(ierr); 1097 a->free_diag = PETSC_TRUE; 1098 } 1099 for (i=0; i<m; i++) { 1100 a->diag[i] = a->i[i+1]; 1101 for (j=a->i[i]; j<a->i[i+1]; j++) { 1102 if (a->j[j] == i) { 1103 a->diag[i] = j; 1104 break; 1105 } 1106 } 1107 } 1108 PetscFunctionReturn(0); 1109 } 1110 1111 1112 #undef __FUNCT__ 1113 #define __FUNCT__ "MatGetRowIJ_SeqBAIJ" 1114 static PetscErrorCode MatGetRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool blockcompressed,PetscInt *nn,const PetscInt *inia[],const PetscInt *inja[],PetscBool *done) 1115 { 1116 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1117 PetscErrorCode ierr; 1118 PetscInt i,j,n = a->mbs,nz = a->i[n],*tia,*tja,bs = A->rmap->bs,k,l,cnt; 1119 PetscInt **ia = (PetscInt**)inia,**ja = (PetscInt**)inja; 1120 1121 PetscFunctionBegin; 1122 *nn = n; 1123 if (!ia) PetscFunctionReturn(0); 1124 if (symmetric) { 1125 ierr = MatToSymmetricIJ_SeqAIJ(n,a->i,a->j,PETSC_TRUE,0,0,&tia,&tja);CHKERRQ(ierr); 1126 nz = tia[n]; 1127 } else { 1128 tia = a->i; tja = a->j; 1129 } 1130 1131 if (!blockcompressed && bs > 1) { 1132 (*nn) *= bs; 1133 /* malloc & create the natural set of indices */ 1134 ierr = PetscMalloc1((n+1)*bs,ia);CHKERRQ(ierr); 1135 if (n) { 1136 (*ia)[0] = oshift; 1137 for (j=1; j<bs; j++) { 1138 (*ia)[j] = (tia[1]-tia[0])*bs+(*ia)[j-1]; 1139 } 1140 } 1141 1142 for (i=1; i<n; i++) { 1143 (*ia)[i*bs] = (tia[i]-tia[i-1])*bs + (*ia)[i*bs-1]; 1144 for (j=1; j<bs; j++) { 1145 (*ia)[i*bs+j] = (tia[i+1]-tia[i])*bs + (*ia)[i*bs+j-1]; 1146 } 1147 } 1148 if (n) { 1149 (*ia)[n*bs] = (tia[n]-tia[n-1])*bs + (*ia)[n*bs-1]; 1150 } 1151 1152 if (inja) { 1153 ierr = PetscMalloc1(nz*bs*bs,ja);CHKERRQ(ierr); 1154 cnt = 0; 1155 for (i=0; i<n; i++) { 1156 for (j=0; j<bs; j++) { 1157 for (k=tia[i]; k<tia[i+1]; k++) { 1158 for (l=0; l<bs; l++) { 1159 (*ja)[cnt++] = bs*tja[k] + l; 1160 } 1161 } 1162 } 1163 } 1164 } 1165 1166 if (symmetric) { /* deallocate memory allocated in MatToSymmetricIJ_SeqAIJ() */ 1167 ierr = PetscFree(tia);CHKERRQ(ierr); 1168 ierr = PetscFree(tja);CHKERRQ(ierr); 1169 } 1170 } else if (oshift == 1) { 1171 if (symmetric) { 1172 nz = tia[A->rmap->n/bs]; 1173 /* add 1 to i and j indices */ 1174 for (i=0; i<A->rmap->n/bs+1; i++) tia[i] = tia[i] + 1; 1175 *ia = tia; 1176 if (ja) { 1177 for (i=0; i<nz; i++) tja[i] = tja[i] + 1; 1178 *ja = tja; 1179 } 1180 } else { 1181 nz = a->i[A->rmap->n/bs]; 1182 /* malloc space and add 1 to i and j indices */ 1183 ierr = PetscMalloc1(A->rmap->n/bs+1,ia);CHKERRQ(ierr); 1184 for (i=0; i<A->rmap->n/bs+1; i++) (*ia)[i] = a->i[i] + 1; 1185 if (ja) { 1186 ierr = PetscMalloc1(nz,ja);CHKERRQ(ierr); 1187 for (i=0; i<nz; i++) (*ja)[i] = a->j[i] + 1; 1188 } 1189 } 1190 } else { 1191 *ia = tia; 1192 if (ja) *ja = tja; 1193 } 1194 PetscFunctionReturn(0); 1195 } 1196 1197 #undef __FUNCT__ 1198 #define __FUNCT__ "MatRestoreRowIJ_SeqBAIJ" 1199 static PetscErrorCode MatRestoreRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool blockcompressed,PetscInt *nn,const PetscInt *ia[],const PetscInt *ja[],PetscBool *done) 1200 { 1201 PetscErrorCode ierr; 1202 1203 PetscFunctionBegin; 1204 if (!ia) PetscFunctionReturn(0); 1205 if ((!blockcompressed && A->rmap->bs > 1) || (symmetric || oshift == 1)) { 1206 ierr = PetscFree(*ia);CHKERRQ(ierr); 1207 if (ja) {ierr = PetscFree(*ja);CHKERRQ(ierr);} 1208 } 1209 PetscFunctionReturn(0); 1210 } 1211 1212 #undef __FUNCT__ 1213 #define __FUNCT__ "MatDestroy_SeqBAIJ" 1214 PetscErrorCode MatDestroy_SeqBAIJ(Mat A) 1215 { 1216 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1217 PetscErrorCode ierr; 1218 1219 PetscFunctionBegin; 1220 #if defined(PETSC_USE_LOG) 1221 PetscLogObjectState((PetscObject)A,"Rows=%D, Cols=%D, NZ=%D",A->rmap->N,A->cmap->n,a->nz); 1222 #endif 1223 ierr = MatSeqXAIJFreeAIJ(A,&a->a,&a->j,&a->i);CHKERRQ(ierr); 1224 ierr = ISDestroy(&a->row);CHKERRQ(ierr); 1225 ierr = ISDestroy(&a->col);CHKERRQ(ierr); 1226 if (a->free_diag) {ierr = PetscFree(a->diag);CHKERRQ(ierr);} 1227 ierr = PetscFree(a->idiag);CHKERRQ(ierr); 1228 if (a->free_imax_ilen) {ierr = PetscFree2(a->imax,a->ilen);CHKERRQ(ierr);} 1229 ierr = PetscFree(a->solve_work);CHKERRQ(ierr); 1230 ierr = PetscFree(a->mult_work);CHKERRQ(ierr); 1231 ierr = PetscFree(a->sor_workt);CHKERRQ(ierr); 1232 ierr = PetscFree(a->sor_work);CHKERRQ(ierr); 1233 ierr = ISDestroy(&a->icol);CHKERRQ(ierr); 1234 ierr = PetscFree(a->saved_values);CHKERRQ(ierr); 1235 ierr = PetscFree2(a->compressedrow.i,a->compressedrow.rindex);CHKERRQ(ierr); 1236 1237 ierr = MatDestroy(&a->sbaijMat);CHKERRQ(ierr); 1238 ierr = MatDestroy(&a->parent);CHKERRQ(ierr); 1239 ierr = PetscFree(A->data);CHKERRQ(ierr); 1240 1241 ierr = PetscObjectChangeTypeName((PetscObject)A,0);CHKERRQ(ierr); 1242 ierr = PetscObjectComposeFunction((PetscObject)A,"MatInvertBlockDiagonal_C",NULL);CHKERRQ(ierr); 1243 ierr = PetscObjectComposeFunction((PetscObject)A,"MatStoreValues_C",NULL);CHKERRQ(ierr); 1244 ierr = PetscObjectComposeFunction((PetscObject)A,"MatRetrieveValues_C",NULL);CHKERRQ(ierr); 1245 ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetColumnIndices_C",NULL);CHKERRQ(ierr); 1246 ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqaij_C",NULL);CHKERRQ(ierr); 1247 ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqsbaij_C",NULL);CHKERRQ(ierr); 1248 ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetPreallocation_C",NULL);CHKERRQ(ierr); 1249 ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetPreallocationCSR_C",NULL);CHKERRQ(ierr); 1250 ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqbstrm_C",NULL);CHKERRQ(ierr); 1251 ierr = PetscObjectComposeFunction((PetscObject)A,"MatIsTranspose_C",NULL);CHKERRQ(ierr); 1252 PetscFunctionReturn(0); 1253 } 1254 1255 #undef __FUNCT__ 1256 #define __FUNCT__ "MatSetOption_SeqBAIJ" 1257 PetscErrorCode MatSetOption_SeqBAIJ(Mat A,MatOption op,PetscBool flg) 1258 { 1259 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1260 PetscErrorCode ierr; 1261 1262 PetscFunctionBegin; 1263 switch (op) { 1264 case MAT_ROW_ORIENTED: 1265 a->roworiented = flg; 1266 break; 1267 case MAT_KEEP_NONZERO_PATTERN: 1268 a->keepnonzeropattern = flg; 1269 break; 1270 case MAT_NEW_NONZERO_LOCATIONS: 1271 a->nonew = (flg ? 0 : 1); 1272 break; 1273 case MAT_NEW_NONZERO_LOCATION_ERR: 1274 a->nonew = (flg ? -1 : 0); 1275 break; 1276 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1277 a->nonew = (flg ? -2 : 0); 1278 break; 1279 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1280 a->nounused = (flg ? -1 : 0); 1281 break; 1282 case MAT_NEW_DIAGONALS: 1283 case MAT_IGNORE_OFF_PROC_ENTRIES: 1284 case MAT_USE_HASH_TABLE: 1285 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1286 break; 1287 case MAT_SPD: 1288 case MAT_SYMMETRIC: 1289 case MAT_STRUCTURALLY_SYMMETRIC: 1290 case MAT_HERMITIAN: 1291 case MAT_SYMMETRY_ETERNAL: 1292 case MAT_SUBMAT_SINGLEIS: 1293 /* These options are handled directly by MatSetOption() */ 1294 break; 1295 default: 1296 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op); 1297 } 1298 PetscFunctionReturn(0); 1299 } 1300 1301 /* used for both SeqBAIJ and SeqSBAIJ matrices */ 1302 #undef __FUNCT__ 1303 #define __FUNCT__ "MatGetRow_SeqBAIJ_private" 1304 PetscErrorCode MatGetRow_SeqBAIJ_private(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v,PetscInt *ai,PetscInt *aj,PetscScalar *aa) 1305 { 1306 PetscErrorCode ierr; 1307 PetscInt itmp,i,j,k,M,bn,bp,*idx_i,bs,bs2; 1308 MatScalar *aa_i; 1309 PetscScalar *v_i; 1310 1311 PetscFunctionBegin; 1312 bs = A->rmap->bs; 1313 bs2 = bs*bs; 1314 if (row < 0 || row >= A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range", row); 1315 1316 bn = row/bs; /* Block number */ 1317 bp = row % bs; /* Block Position */ 1318 M = ai[bn+1] - ai[bn]; 1319 *nz = bs*M; 1320 1321 if (v) { 1322 *v = 0; 1323 if (*nz) { 1324 ierr = PetscMalloc1(*nz,v);CHKERRQ(ierr); 1325 for (i=0; i<M; i++) { /* for each block in the block row */ 1326 v_i = *v + i*bs; 1327 aa_i = aa + bs2*(ai[bn] + i); 1328 for (j=bp,k=0; j<bs2; j+=bs,k++) v_i[k] = aa_i[j]; 1329 } 1330 } 1331 } 1332 1333 if (idx) { 1334 *idx = 0; 1335 if (*nz) { 1336 ierr = PetscMalloc1(*nz,idx);CHKERRQ(ierr); 1337 for (i=0; i<M; i++) { /* for each block in the block row */ 1338 idx_i = *idx + i*bs; 1339 itmp = bs*aj[ai[bn] + i]; 1340 for (j=0; j<bs; j++) idx_i[j] = itmp++; 1341 } 1342 } 1343 } 1344 PetscFunctionReturn(0); 1345 } 1346 1347 #undef __FUNCT__ 1348 #define __FUNCT__ "MatGetRow_SeqBAIJ" 1349 PetscErrorCode MatGetRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1350 { 1351 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1352 PetscErrorCode ierr; 1353 1354 PetscFunctionBegin; 1355 ierr = MatGetRow_SeqBAIJ_private(A,row,nz,idx,v,a->i,a->j,a->a);CHKERRQ(ierr); 1356 PetscFunctionReturn(0); 1357 } 1358 1359 #undef __FUNCT__ 1360 #define __FUNCT__ "MatRestoreRow_SeqBAIJ" 1361 PetscErrorCode MatRestoreRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1362 { 1363 PetscErrorCode ierr; 1364 1365 PetscFunctionBegin; 1366 if (idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);} 1367 if (v) {ierr = PetscFree(*v);CHKERRQ(ierr);} 1368 PetscFunctionReturn(0); 1369 } 1370 1371 extern PetscErrorCode MatSetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode); 1372 1373 #undef __FUNCT__ 1374 #define __FUNCT__ "MatTranspose_SeqBAIJ" 1375 PetscErrorCode MatTranspose_SeqBAIJ(Mat A,MatReuse reuse,Mat *B) 1376 { 1377 Mat_SeqBAIJ *a=(Mat_SeqBAIJ*)A->data; 1378 Mat C; 1379 PetscErrorCode ierr; 1380 PetscInt i,j,k,*aj=a->j,*ai=a->i,bs=A->rmap->bs,mbs=a->mbs,nbs=a->nbs,len,*col; 1381 PetscInt *rows,*cols,bs2=a->bs2; 1382 MatScalar *array; 1383 1384 PetscFunctionBegin; 1385 if (reuse == MAT_REUSE_MATRIX && A == *B && mbs != nbs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Square matrix only for in-place"); 1386 if (reuse == MAT_INITIAL_MATRIX || A == *B) { 1387 ierr = PetscCalloc1(1+nbs,&col);CHKERRQ(ierr); 1388 1389 for (i=0; i<ai[mbs]; i++) col[aj[i]] += 1; 1390 ierr = MatCreate(PetscObjectComm((PetscObject)A),&C);CHKERRQ(ierr); 1391 ierr = MatSetSizes(C,A->cmap->n,A->rmap->N,A->cmap->n,A->rmap->N);CHKERRQ(ierr); 1392 ierr = MatSetType(C,((PetscObject)A)->type_name);CHKERRQ(ierr); 1393 ierr = MatSeqBAIJSetPreallocation(C,bs,0,col);CHKERRQ(ierr); 1394 ierr = PetscFree(col);CHKERRQ(ierr); 1395 } else { 1396 C = *B; 1397 } 1398 1399 array = a->a; 1400 ierr = PetscMalloc2(bs,&rows,bs,&cols);CHKERRQ(ierr); 1401 for (i=0; i<mbs; i++) { 1402 cols[0] = i*bs; 1403 for (k=1; k<bs; k++) cols[k] = cols[k-1] + 1; 1404 len = ai[i+1] - ai[i]; 1405 for (j=0; j<len; j++) { 1406 rows[0] = (*aj++)*bs; 1407 for (k=1; k<bs; k++) rows[k] = rows[k-1] + 1; 1408 ierr = MatSetValues_SeqBAIJ(C,bs,rows,bs,cols,array,INSERT_VALUES);CHKERRQ(ierr); 1409 array += bs2; 1410 } 1411 } 1412 ierr = PetscFree2(rows,cols);CHKERRQ(ierr); 1413 1414 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1415 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1416 1417 if (reuse == MAT_INITIAL_MATRIX || *B != A) { 1418 *B = C; 1419 } else { 1420 ierr = MatHeaderMerge(A,&C);CHKERRQ(ierr); 1421 } 1422 PetscFunctionReturn(0); 1423 } 1424 1425 #undef __FUNCT__ 1426 #define __FUNCT__ "MatIsTranspose_SeqBAIJ" 1427 PetscErrorCode MatIsTranspose_SeqBAIJ(Mat A,Mat B,PetscReal tol,PetscBool *f) 1428 { 1429 PetscErrorCode ierr; 1430 Mat Btrans; 1431 1432 PetscFunctionBegin; 1433 *f = PETSC_FALSE; 1434 ierr = MatTranspose_SeqBAIJ(A,MAT_INITIAL_MATRIX,&Btrans);CHKERRQ(ierr); 1435 ierr = MatEqual_SeqBAIJ(B,Btrans,f);CHKERRQ(ierr); 1436 ierr = MatDestroy(&Btrans);CHKERRQ(ierr); 1437 PetscFunctionReturn(0); 1438 } 1439 1440 #undef __FUNCT__ 1441 #define __FUNCT__ "MatView_SeqBAIJ_Binary" 1442 static PetscErrorCode MatView_SeqBAIJ_Binary(Mat A,PetscViewer viewer) 1443 { 1444 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1445 PetscErrorCode ierr; 1446 PetscInt i,*col_lens,bs = A->rmap->bs,count,*jj,j,k,l,bs2=a->bs2; 1447 int fd; 1448 PetscScalar *aa; 1449 FILE *file; 1450 1451 PetscFunctionBegin; 1452 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1453 ierr = PetscMalloc1(4+A->rmap->N,&col_lens);CHKERRQ(ierr); 1454 col_lens[0] = MAT_FILE_CLASSID; 1455 1456 col_lens[1] = A->rmap->N; 1457 col_lens[2] = A->cmap->n; 1458 col_lens[3] = a->nz*bs2; 1459 1460 /* store lengths of each row and write (including header) to file */ 1461 count = 0; 1462 for (i=0; i<a->mbs; i++) { 1463 for (j=0; j<bs; j++) { 1464 col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]); 1465 } 1466 } 1467 ierr = PetscBinaryWrite(fd,col_lens,4+A->rmap->N,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1468 ierr = PetscFree(col_lens);CHKERRQ(ierr); 1469 1470 /* store column indices (zero start index) */ 1471 ierr = PetscMalloc1((a->nz+1)*bs2,&jj);CHKERRQ(ierr); 1472 count = 0; 1473 for (i=0; i<a->mbs; i++) { 1474 for (j=0; j<bs; j++) { 1475 for (k=a->i[i]; k<a->i[i+1]; k++) { 1476 for (l=0; l<bs; l++) { 1477 jj[count++] = bs*a->j[k] + l; 1478 } 1479 } 1480 } 1481 } 1482 ierr = PetscBinaryWrite(fd,jj,bs2*a->nz,PETSC_INT,PETSC_FALSE);CHKERRQ(ierr); 1483 ierr = PetscFree(jj);CHKERRQ(ierr); 1484 1485 /* store nonzero values */ 1486 ierr = PetscMalloc1((a->nz+1)*bs2,&aa);CHKERRQ(ierr); 1487 count = 0; 1488 for (i=0; i<a->mbs; i++) { 1489 for (j=0; j<bs; j++) { 1490 for (k=a->i[i]; k<a->i[i+1]; k++) { 1491 for (l=0; l<bs; l++) { 1492 aa[count++] = a->a[bs2*k + l*bs + j]; 1493 } 1494 } 1495 } 1496 } 1497 ierr = PetscBinaryWrite(fd,aa,bs2*a->nz,PETSC_SCALAR,PETSC_FALSE);CHKERRQ(ierr); 1498 ierr = PetscFree(aa);CHKERRQ(ierr); 1499 1500 ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr); 1501 if (file) { 1502 fprintf(file,"-matload_block_size %d\n",(int)A->rmap->bs); 1503 } 1504 PetscFunctionReturn(0); 1505 } 1506 1507 #undef __FUNCT__ 1508 #define __FUNCT__ "MatView_SeqBAIJ_ASCII" 1509 static PetscErrorCode MatView_SeqBAIJ_ASCII(Mat A,PetscViewer viewer) 1510 { 1511 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1512 PetscErrorCode ierr; 1513 PetscInt i,j,bs = A->rmap->bs,k,l,bs2=a->bs2; 1514 PetscViewerFormat format; 1515 1516 PetscFunctionBegin; 1517 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1518 if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1519 ierr = PetscViewerASCIIPrintf(viewer," block size is %D\n",bs);CHKERRQ(ierr); 1520 } else if (format == PETSC_VIEWER_ASCII_MATLAB) { 1521 const char *matname; 1522 Mat aij; 1523 ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&aij);CHKERRQ(ierr); 1524 ierr = PetscObjectGetName((PetscObject)A,&matname);CHKERRQ(ierr); 1525 ierr = PetscObjectSetName((PetscObject)aij,matname);CHKERRQ(ierr); 1526 ierr = MatView(aij,viewer);CHKERRQ(ierr); 1527 ierr = MatDestroy(&aij);CHKERRQ(ierr); 1528 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1529 PetscFunctionReturn(0); 1530 } else if (format == PETSC_VIEWER_ASCII_COMMON) { 1531 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);CHKERRQ(ierr); 1532 for (i=0; i<a->mbs; i++) { 1533 for (j=0; j<bs; j++) { 1534 ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr); 1535 for (k=a->i[i]; k<a->i[i+1]; k++) { 1536 for (l=0; l<bs; l++) { 1537 #if defined(PETSC_USE_COMPLEX) 1538 if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 1539 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g + %gi) ",bs*a->j[k]+l, 1540 (double)PetscRealPart(a->a[bs2*k + l*bs + j]),(double)PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1541 } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 1542 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g - %gi) ",bs*a->j[k]+l, 1543 (double)PetscRealPart(a->a[bs2*k + l*bs + j]),-(double)PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1544 } else if (PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 1545 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,(double)PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1546 } 1547 #else 1548 if (a->a[bs2*k + l*bs + j] != 0.0) { 1549 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,(double)a->a[bs2*k + l*bs + j]);CHKERRQ(ierr); 1550 } 1551 #endif 1552 } 1553 } 1554 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 1555 } 1556 } 1557 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);CHKERRQ(ierr); 1558 } else { 1559 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);CHKERRQ(ierr); 1560 for (i=0; i<a->mbs; i++) { 1561 for (j=0; j<bs; j++) { 1562 ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr); 1563 for (k=a->i[i]; k<a->i[i+1]; k++) { 1564 for (l=0; l<bs; l++) { 1565 #if defined(PETSC_USE_COMPLEX) 1566 if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0) { 1567 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g + %g i) ",bs*a->j[k]+l, 1568 (double)PetscRealPart(a->a[bs2*k + l*bs + j]),(double)PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1569 } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0) { 1570 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g - %g i) ",bs*a->j[k]+l, 1571 (double)PetscRealPart(a->a[bs2*k + l*bs + j]),-(double)PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1572 } else { 1573 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,(double)PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1574 } 1575 #else 1576 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,(double)a->a[bs2*k + l*bs + j]);CHKERRQ(ierr); 1577 #endif 1578 } 1579 } 1580 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 1581 } 1582 } 1583 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);CHKERRQ(ierr); 1584 } 1585 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1586 PetscFunctionReturn(0); 1587 } 1588 1589 #include <petscdraw.h> 1590 #undef __FUNCT__ 1591 #define __FUNCT__ "MatView_SeqBAIJ_Draw_Zoom" 1592 static PetscErrorCode MatView_SeqBAIJ_Draw_Zoom(PetscDraw draw,void *Aa) 1593 { 1594 Mat A = (Mat) Aa; 1595 Mat_SeqBAIJ *a=(Mat_SeqBAIJ*)A->data; 1596 PetscErrorCode ierr; 1597 PetscInt row,i,j,k,l,mbs=a->mbs,color,bs=A->rmap->bs,bs2=a->bs2; 1598 PetscReal xl,yl,xr,yr,x_l,x_r,y_l,y_r; 1599 MatScalar *aa; 1600 PetscViewer viewer; 1601 PetscViewerFormat format; 1602 1603 PetscFunctionBegin; 1604 ierr = PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer);CHKERRQ(ierr); 1605 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1606 ierr = PetscDrawGetCoordinates(draw,&xl,&yl,&xr,&yr);CHKERRQ(ierr); 1607 1608 /* loop over matrix elements drawing boxes */ 1609 1610 if (format != PETSC_VIEWER_DRAW_CONTOUR) { 1611 ierr = PetscDrawCollectiveBegin(draw);CHKERRQ(ierr); 1612 /* Blue for negative, Cyan for zero and Red for positive */ 1613 color = PETSC_DRAW_BLUE; 1614 for (i=0,row=0; i<mbs; i++,row+=bs) { 1615 for (j=a->i[i]; j<a->i[i+1]; j++) { 1616 y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0; 1617 x_l = a->j[j]*bs; x_r = x_l + 1.0; 1618 aa = a->a + j*bs2; 1619 for (k=0; k<bs; k++) { 1620 for (l=0; l<bs; l++) { 1621 if (PetscRealPart(*aa++) >= 0.) continue; 1622 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 1623 } 1624 } 1625 } 1626 } 1627 color = PETSC_DRAW_CYAN; 1628 for (i=0,row=0; i<mbs; i++,row+=bs) { 1629 for (j=a->i[i]; j<a->i[i+1]; j++) { 1630 y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0; 1631 x_l = a->j[j]*bs; x_r = x_l + 1.0; 1632 aa = a->a + j*bs2; 1633 for (k=0; k<bs; k++) { 1634 for (l=0; l<bs; l++) { 1635 if (PetscRealPart(*aa++) != 0.) continue; 1636 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 1637 } 1638 } 1639 } 1640 } 1641 color = PETSC_DRAW_RED; 1642 for (i=0,row=0; i<mbs; i++,row+=bs) { 1643 for (j=a->i[i]; j<a->i[i+1]; j++) { 1644 y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0; 1645 x_l = a->j[j]*bs; x_r = x_l + 1.0; 1646 aa = a->a + j*bs2; 1647 for (k=0; k<bs; k++) { 1648 for (l=0; l<bs; l++) { 1649 if (PetscRealPart(*aa++) <= 0.) continue; 1650 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 1651 } 1652 } 1653 } 1654 } 1655 ierr = PetscDrawCollectiveEnd(draw);CHKERRQ(ierr); 1656 } else { 1657 /* use contour shading to indicate magnitude of values */ 1658 /* first determine max of all nonzero values */ 1659 PetscReal minv = 0.0, maxv = 0.0; 1660 PetscDraw popup; 1661 1662 for (i=0; i<a->nz*a->bs2; i++) { 1663 if (PetscAbsScalar(a->a[i]) > maxv) maxv = PetscAbsScalar(a->a[i]); 1664 } 1665 if (minv >= maxv) maxv = minv + PETSC_SMALL; 1666 ierr = PetscDrawGetPopup(draw,&popup);CHKERRQ(ierr); 1667 ierr = PetscDrawScalePopup(popup,0.0,maxv);CHKERRQ(ierr); 1668 1669 ierr = PetscDrawCollectiveBegin(draw);CHKERRQ(ierr); 1670 for (i=0,row=0; i<mbs; i++,row+=bs) { 1671 for (j=a->i[i]; j<a->i[i+1]; j++) { 1672 y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0; 1673 x_l = a->j[j]*bs; x_r = x_l + 1.0; 1674 aa = a->a + j*bs2; 1675 for (k=0; k<bs; k++) { 1676 for (l=0; l<bs; l++) { 1677 MatScalar v = *aa++; 1678 color = PetscDrawRealToColor(PetscAbsScalar(v),minv,maxv); 1679 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 1680 } 1681 } 1682 } 1683 } 1684 ierr = PetscDrawCollectiveEnd(draw);CHKERRQ(ierr); 1685 } 1686 PetscFunctionReturn(0); 1687 } 1688 1689 #undef __FUNCT__ 1690 #define __FUNCT__ "MatView_SeqBAIJ_Draw" 1691 static PetscErrorCode MatView_SeqBAIJ_Draw(Mat A,PetscViewer viewer) 1692 { 1693 PetscErrorCode ierr; 1694 PetscReal xl,yl,xr,yr,w,h; 1695 PetscDraw draw; 1696 PetscBool isnull; 1697 1698 PetscFunctionBegin; 1699 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 1700 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); 1701 if (isnull) PetscFunctionReturn(0); 1702 1703 xr = A->cmap->n; yr = A->rmap->N; h = yr/10.0; w = xr/10.0; 1704 xr += w; yr += h; xl = -w; yl = -h; 1705 ierr = PetscDrawSetCoordinates(draw,xl,yl,xr,yr);CHKERRQ(ierr); 1706 ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);CHKERRQ(ierr); 1707 ierr = PetscDrawZoom(draw,MatView_SeqBAIJ_Draw_Zoom,A);CHKERRQ(ierr); 1708 ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",NULL);CHKERRQ(ierr); 1709 ierr = PetscDrawSave(draw);CHKERRQ(ierr); 1710 PetscFunctionReturn(0); 1711 } 1712 1713 #undef __FUNCT__ 1714 #define __FUNCT__ "MatView_SeqBAIJ" 1715 PetscErrorCode MatView_SeqBAIJ(Mat A,PetscViewer viewer) 1716 { 1717 PetscErrorCode ierr; 1718 PetscBool iascii,isbinary,isdraw; 1719 1720 PetscFunctionBegin; 1721 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1722 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1723 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1724 if (iascii) { 1725 ierr = MatView_SeqBAIJ_ASCII(A,viewer);CHKERRQ(ierr); 1726 } else if (isbinary) { 1727 ierr = MatView_SeqBAIJ_Binary(A,viewer);CHKERRQ(ierr); 1728 } else if (isdraw) { 1729 ierr = MatView_SeqBAIJ_Draw(A,viewer);CHKERRQ(ierr); 1730 } else { 1731 Mat B; 1732 ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr); 1733 ierr = MatView(B,viewer);CHKERRQ(ierr); 1734 ierr = MatDestroy(&B);CHKERRQ(ierr); 1735 } 1736 PetscFunctionReturn(0); 1737 } 1738 1739 1740 #undef __FUNCT__ 1741 #define __FUNCT__ "MatGetValues_SeqBAIJ" 1742 PetscErrorCode MatGetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],PetscScalar v[]) 1743 { 1744 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1745 PetscInt *rp,k,low,high,t,row,nrow,i,col,l,*aj = a->j; 1746 PetscInt *ai = a->i,*ailen = a->ilen; 1747 PetscInt brow,bcol,ridx,cidx,bs=A->rmap->bs,bs2=a->bs2; 1748 MatScalar *ap,*aa = a->a; 1749 1750 PetscFunctionBegin; 1751 for (k=0; k<m; k++) { /* loop over rows */ 1752 row = im[k]; brow = row/bs; 1753 if (row < 0) {v += n; continue;} /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); */ 1754 if (row >= A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D too large", row); 1755 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; 1756 nrow = ailen[brow]; 1757 for (l=0; l<n; l++) { /* loop over columns */ 1758 if (in[l] < 0) {v++; continue;} /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); */ 1759 if (in[l] >= A->cmap->n) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column %D too large", in[l]); 1760 col = in[l]; 1761 bcol = col/bs; 1762 cidx = col%bs; 1763 ridx = row%bs; 1764 high = nrow; 1765 low = 0; /* assume unsorted */ 1766 while (high-low > 5) { 1767 t = (low+high)/2; 1768 if (rp[t] > bcol) high = t; 1769 else low = t; 1770 } 1771 for (i=low; i<high; i++) { 1772 if (rp[i] > bcol) break; 1773 if (rp[i] == bcol) { 1774 *v++ = ap[bs2*i+bs*cidx+ridx]; 1775 goto finished; 1776 } 1777 } 1778 *v++ = 0.0; 1779 finished:; 1780 } 1781 } 1782 PetscFunctionReturn(0); 1783 } 1784 1785 #undef __FUNCT__ 1786 #define __FUNCT__ "MatSetValuesBlocked_SeqBAIJ" 1787 PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is) 1788 { 1789 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1790 PetscInt *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,lastcol = -1; 1791 PetscInt *imax=a->imax,*ai=a->i,*ailen=a->ilen; 1792 PetscErrorCode ierr; 1793 PetscInt *aj =a->j,nonew=a->nonew,bs2=a->bs2,bs=A->rmap->bs,stepval; 1794 PetscBool roworiented=a->roworiented; 1795 const PetscScalar *value = v; 1796 MatScalar *ap,*aa = a->a,*bap; 1797 1798 PetscFunctionBegin; 1799 if (roworiented) { 1800 stepval = (n-1)*bs; 1801 } else { 1802 stepval = (m-1)*bs; 1803 } 1804 for (k=0; k<m; k++) { /* loop over added rows */ 1805 row = im[k]; 1806 if (row < 0) continue; 1807 #if defined(PETSC_USE_DEBUG) 1808 if (row >= a->mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Block row index too large %D max %D",row,a->mbs-1); 1809 #endif 1810 rp = aj + ai[row]; 1811 ap = aa + bs2*ai[row]; 1812 rmax = imax[row]; 1813 nrow = ailen[row]; 1814 low = 0; 1815 high = nrow; 1816 for (l=0; l<n; l++) { /* loop over added columns */ 1817 if (in[l] < 0) continue; 1818 #if defined(PETSC_USE_DEBUG) 1819 if (in[l] >= a->nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Block column index too large %D max %D",in[l],a->nbs-1); 1820 #endif 1821 col = in[l]; 1822 if (roworiented) { 1823 value = v + (k*(stepval+bs) + l)*bs; 1824 } else { 1825 value = v + (l*(stepval+bs) + k)*bs; 1826 } 1827 if (col <= lastcol) low = 0; 1828 else high = nrow; 1829 lastcol = col; 1830 while (high-low > 7) { 1831 t = (low+high)/2; 1832 if (rp[t] > col) high = t; 1833 else low = t; 1834 } 1835 for (i=low; i<high; i++) { 1836 if (rp[i] > col) break; 1837 if (rp[i] == col) { 1838 bap = ap + bs2*i; 1839 if (roworiented) { 1840 if (is == ADD_VALUES) { 1841 for (ii=0; ii<bs; ii++,value+=stepval) { 1842 for (jj=ii; jj<bs2; jj+=bs) { 1843 bap[jj] += *value++; 1844 } 1845 } 1846 } else { 1847 for (ii=0; ii<bs; ii++,value+=stepval) { 1848 for (jj=ii; jj<bs2; jj+=bs) { 1849 bap[jj] = *value++; 1850 } 1851 } 1852 } 1853 } else { 1854 if (is == ADD_VALUES) { 1855 for (ii=0; ii<bs; ii++,value+=bs+stepval) { 1856 for (jj=0; jj<bs; jj++) { 1857 bap[jj] += value[jj]; 1858 } 1859 bap += bs; 1860 } 1861 } else { 1862 for (ii=0; ii<bs; ii++,value+=bs+stepval) { 1863 for (jj=0; jj<bs; jj++) { 1864 bap[jj] = value[jj]; 1865 } 1866 bap += bs; 1867 } 1868 } 1869 } 1870 goto noinsert2; 1871 } 1872 } 1873 if (nonew == 1) goto noinsert2; 1874 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new blocked index new nonzero block (%D, %D) in the matrix", row, col); 1875 MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,row,col,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar); 1876 N = nrow++ - 1; high++; 1877 /* shift up all the later entries in this row */ 1878 for (ii=N; ii>=i; ii--) { 1879 rp[ii+1] = rp[ii]; 1880 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); 1881 } 1882 if (N >= i) { 1883 ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr); 1884 } 1885 rp[i] = col; 1886 bap = ap + bs2*i; 1887 if (roworiented) { 1888 for (ii=0; ii<bs; ii++,value+=stepval) { 1889 for (jj=ii; jj<bs2; jj+=bs) { 1890 bap[jj] = *value++; 1891 } 1892 } 1893 } else { 1894 for (ii=0; ii<bs; ii++,value+=stepval) { 1895 for (jj=0; jj<bs; jj++) { 1896 *bap++ = *value++; 1897 } 1898 } 1899 } 1900 noinsert2:; 1901 low = i; 1902 } 1903 ailen[row] = nrow; 1904 } 1905 PetscFunctionReturn(0); 1906 } 1907 1908 #undef __FUNCT__ 1909 #define __FUNCT__ "MatAssemblyEnd_SeqBAIJ" 1910 PetscErrorCode MatAssemblyEnd_SeqBAIJ(Mat A,MatAssemblyType mode) 1911 { 1912 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1913 PetscInt fshift = 0,i,j,*ai = a->i,*aj = a->j,*imax = a->imax; 1914 PetscInt m = A->rmap->N,*ip,N,*ailen = a->ilen; 1915 PetscErrorCode ierr; 1916 PetscInt mbs = a->mbs,bs2 = a->bs2,rmax = 0; 1917 MatScalar *aa = a->a,*ap; 1918 PetscReal ratio=0.6; 1919 1920 PetscFunctionBegin; 1921 if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0); 1922 1923 if (m) rmax = ailen[0]; 1924 for (i=1; i<mbs; i++) { 1925 /* move each row back by the amount of empty slots (fshift) before it*/ 1926 fshift += imax[i-1] - ailen[i-1]; 1927 rmax = PetscMax(rmax,ailen[i]); 1928 if (fshift) { 1929 ip = aj + ai[i]; ap = aa + bs2*ai[i]; 1930 N = ailen[i]; 1931 for (j=0; j<N; j++) { 1932 ip[j-fshift] = ip[j]; 1933 1934 ierr = PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr); 1935 } 1936 } 1937 ai[i] = ai[i-1] + ailen[i-1]; 1938 } 1939 if (mbs) { 1940 fshift += imax[mbs-1] - ailen[mbs-1]; 1941 ai[mbs] = ai[mbs-1] + ailen[mbs-1]; 1942 } 1943 1944 /* reset ilen and imax for each row */ 1945 a->nonzerorowcnt = 0; 1946 for (i=0; i<mbs; i++) { 1947 ailen[i] = imax[i] = ai[i+1] - ai[i]; 1948 a->nonzerorowcnt += ((ai[i+1] - ai[i]) > 0); 1949 } 1950 a->nz = ai[mbs]; 1951 1952 /* diagonals may have moved, so kill the diagonal pointers */ 1953 a->idiagvalid = PETSC_FALSE; 1954 if (fshift && a->diag) { 1955 ierr = PetscFree(a->diag);CHKERRQ(ierr); 1956 ierr = PetscLogObjectMemory((PetscObject)A,-(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 1957 a->diag = 0; 1958 } 1959 if (fshift && a->nounused == -1) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB, "Unused space detected in matrix: %D X %D block size %D, %D unneeded", m, A->cmap->n, A->rmap->bs, fshift*bs2); 1960 ierr = PetscInfo5(A,"Matrix size: %D X %D, block size %D; storage space: %D unneeded, %D used\n",m,A->cmap->n,A->rmap->bs,fshift*bs2,a->nz*bs2);CHKERRQ(ierr); 1961 ierr = PetscInfo1(A,"Number of mallocs during MatSetValues is %D\n",a->reallocs);CHKERRQ(ierr); 1962 ierr = PetscInfo1(A,"Most nonzeros blocks in any row is %D\n",rmax);CHKERRQ(ierr); 1963 1964 A->info.mallocs += a->reallocs; 1965 a->reallocs = 0; 1966 A->info.nz_unneeded = (PetscReal)fshift*bs2; 1967 a->rmax = rmax; 1968 1969 ierr = MatCheckCompressedRow(A,a->nonzerorowcnt,&a->compressedrow,a->i,mbs,ratio);CHKERRQ(ierr); 1970 PetscFunctionReturn(0); 1971 } 1972 1973 /* 1974 This function returns an array of flags which indicate the locations of contiguous 1975 blocks that should be zeroed. for eg: if bs = 3 and is = [0,1,2,3,5,6,7,8,9] 1976 then the resulting sizes = [3,1,1,3,1] correspondig to sets [(0,1,2),(3),(5),(6,7,8),(9)] 1977 Assume: sizes should be long enough to hold all the values. 1978 */ 1979 #undef __FUNCT__ 1980 #define __FUNCT__ "MatZeroRows_SeqBAIJ_Check_Blocks" 1981 static PetscErrorCode MatZeroRows_SeqBAIJ_Check_Blocks(PetscInt idx[],PetscInt n,PetscInt bs,PetscInt sizes[], PetscInt *bs_max) 1982 { 1983 PetscInt i,j,k,row; 1984 PetscBool flg; 1985 1986 PetscFunctionBegin; 1987 for (i=0,j=0; i<n; j++) { 1988 row = idx[i]; 1989 if (row%bs!=0) { /* Not the begining of a block */ 1990 sizes[j] = 1; 1991 i++; 1992 } else if (i+bs > n) { /* complete block doesn't exist (at idx end) */ 1993 sizes[j] = 1; /* Also makes sure atleast 'bs' values exist for next else */ 1994 i++; 1995 } else { /* Begining of the block, so check if the complete block exists */ 1996 flg = PETSC_TRUE; 1997 for (k=1; k<bs; k++) { 1998 if (row+k != idx[i+k]) { /* break in the block */ 1999 flg = PETSC_FALSE; 2000 break; 2001 } 2002 } 2003 if (flg) { /* No break in the bs */ 2004 sizes[j] = bs; 2005 i += bs; 2006 } else { 2007 sizes[j] = 1; 2008 i++; 2009 } 2010 } 2011 } 2012 *bs_max = j; 2013 PetscFunctionReturn(0); 2014 } 2015 2016 #undef __FUNCT__ 2017 #define __FUNCT__ "MatZeroRows_SeqBAIJ" 2018 PetscErrorCode MatZeroRows_SeqBAIJ(Mat A,PetscInt is_n,const PetscInt is_idx[],PetscScalar diag,Vec x, Vec b) 2019 { 2020 Mat_SeqBAIJ *baij=(Mat_SeqBAIJ*)A->data; 2021 PetscErrorCode ierr; 2022 PetscInt i,j,k,count,*rows; 2023 PetscInt bs=A->rmap->bs,bs2=baij->bs2,*sizes,row,bs_max; 2024 PetscScalar zero = 0.0; 2025 MatScalar *aa; 2026 const PetscScalar *xx; 2027 PetscScalar *bb; 2028 2029 PetscFunctionBegin; 2030 /* fix right hand side if needed */ 2031 if (x && b) { 2032 ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr); 2033 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 2034 for (i=0; i<is_n; i++) { 2035 bb[is_idx[i]] = diag*xx[is_idx[i]]; 2036 } 2037 ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr); 2038 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 2039 } 2040 2041 /* Make a copy of the IS and sort it */ 2042 /* allocate memory for rows,sizes */ 2043 ierr = PetscMalloc2(is_n,&rows,2*is_n,&sizes);CHKERRQ(ierr); 2044 2045 /* copy IS values to rows, and sort them */ 2046 for (i=0; i<is_n; i++) rows[i] = is_idx[i]; 2047 ierr = PetscSortInt(is_n,rows);CHKERRQ(ierr); 2048 2049 if (baij->keepnonzeropattern) { 2050 for (i=0; i<is_n; i++) sizes[i] = 1; 2051 bs_max = is_n; 2052 } else { 2053 ierr = MatZeroRows_SeqBAIJ_Check_Blocks(rows,is_n,bs,sizes,&bs_max);CHKERRQ(ierr); 2054 A->nonzerostate++; 2055 } 2056 2057 for (i=0,j=0; i<bs_max; j+=sizes[i],i++) { 2058 row = rows[j]; 2059 if (row < 0 || row > A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"row %D out of range",row); 2060 count = (baij->i[row/bs +1] - baij->i[row/bs])*bs; 2061 aa = ((MatScalar*)(baij->a)) + baij->i[row/bs]*bs2 + (row%bs); 2062 if (sizes[i] == bs && !baij->keepnonzeropattern) { 2063 if (diag != (PetscScalar)0.0) { 2064 if (baij->ilen[row/bs] > 0) { 2065 baij->ilen[row/bs] = 1; 2066 baij->j[baij->i[row/bs]] = row/bs; 2067 2068 ierr = PetscMemzero(aa,count*bs*sizeof(MatScalar));CHKERRQ(ierr); 2069 } 2070 /* Now insert all the diagonal values for this bs */ 2071 for (k=0; k<bs; k++) { 2072 ierr = (*A->ops->setvalues)(A,1,rows+j+k,1,rows+j+k,&diag,INSERT_VALUES);CHKERRQ(ierr); 2073 } 2074 } else { /* (diag == 0.0) */ 2075 baij->ilen[row/bs] = 0; 2076 } /* end (diag == 0.0) */ 2077 } else { /* (sizes[i] != bs) */ 2078 #if defined(PETSC_USE_DEBUG) 2079 if (sizes[i] != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal Error. Value should be 1"); 2080 #endif 2081 for (k=0; k<count; k++) { 2082 aa[0] = zero; 2083 aa += bs; 2084 } 2085 if (diag != (PetscScalar)0.0) { 2086 ierr = (*A->ops->setvalues)(A,1,rows+j,1,rows+j,&diag,INSERT_VALUES);CHKERRQ(ierr); 2087 } 2088 } 2089 } 2090 2091 ierr = PetscFree2(rows,sizes);CHKERRQ(ierr); 2092 ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2093 PetscFunctionReturn(0); 2094 } 2095 2096 #undef __FUNCT__ 2097 #define __FUNCT__ "MatZeroRowsColumns_SeqBAIJ" 2098 PetscErrorCode MatZeroRowsColumns_SeqBAIJ(Mat A,PetscInt is_n,const PetscInt is_idx[],PetscScalar diag,Vec x, Vec b) 2099 { 2100 Mat_SeqBAIJ *baij=(Mat_SeqBAIJ*)A->data; 2101 PetscErrorCode ierr; 2102 PetscInt i,j,k,count; 2103 PetscInt bs =A->rmap->bs,bs2=baij->bs2,row,col; 2104 PetscScalar zero = 0.0; 2105 MatScalar *aa; 2106 const PetscScalar *xx; 2107 PetscScalar *bb; 2108 PetscBool *zeroed,vecs = PETSC_FALSE; 2109 2110 PetscFunctionBegin; 2111 /* fix right hand side if needed */ 2112 if (x && b) { 2113 ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr); 2114 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 2115 vecs = PETSC_TRUE; 2116 } 2117 2118 /* zero the columns */ 2119 ierr = PetscCalloc1(A->rmap->n,&zeroed);CHKERRQ(ierr); 2120 for (i=0; i<is_n; i++) { 2121 if (is_idx[i] < 0 || is_idx[i] >= A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"row %D out of range",is_idx[i]); 2122 zeroed[is_idx[i]] = PETSC_TRUE; 2123 } 2124 for (i=0; i<A->rmap->N; i++) { 2125 if (!zeroed[i]) { 2126 row = i/bs; 2127 for (j=baij->i[row]; j<baij->i[row+1]; j++) { 2128 for (k=0; k<bs; k++) { 2129 col = bs*baij->j[j] + k; 2130 if (zeroed[col]) { 2131 aa = ((MatScalar*)(baij->a)) + j*bs2 + (i%bs) + bs*k; 2132 if (vecs) bb[i] -= aa[0]*xx[col]; 2133 aa[0] = 0.0; 2134 } 2135 } 2136 } 2137 } else if (vecs) bb[i] = diag*xx[i]; 2138 } 2139 ierr = PetscFree(zeroed);CHKERRQ(ierr); 2140 if (vecs) { 2141 ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr); 2142 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 2143 } 2144 2145 /* zero the rows */ 2146 for (i=0; i<is_n; i++) { 2147 row = is_idx[i]; 2148 count = (baij->i[row/bs +1] - baij->i[row/bs])*bs; 2149 aa = ((MatScalar*)(baij->a)) + baij->i[row/bs]*bs2 + (row%bs); 2150 for (k=0; k<count; k++) { 2151 aa[0] = zero; 2152 aa += bs; 2153 } 2154 if (diag != (PetscScalar)0.0) { 2155 ierr = (*A->ops->setvalues)(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr); 2156 } 2157 } 2158 ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2159 PetscFunctionReturn(0); 2160 } 2161 2162 #undef __FUNCT__ 2163 #define __FUNCT__ "MatSetValues_SeqBAIJ" 2164 PetscErrorCode MatSetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is) 2165 { 2166 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2167 PetscInt *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,lastcol = -1; 2168 PetscInt *imax=a->imax,*ai=a->i,*ailen=a->ilen; 2169 PetscInt *aj =a->j,nonew=a->nonew,bs=A->rmap->bs,brow,bcol; 2170 PetscErrorCode ierr; 2171 PetscInt ridx,cidx,bs2=a->bs2; 2172 PetscBool roworiented=a->roworiented; 2173 MatScalar *ap,value,*aa=a->a,*bap; 2174 2175 PetscFunctionBegin; 2176 for (k=0; k<m; k++) { /* loop over added rows */ 2177 row = im[k]; 2178 brow = row/bs; 2179 if (row < 0) continue; 2180 #if defined(PETSC_USE_DEBUG) 2181 if (row >= A->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,A->rmap->N-1); 2182 #endif 2183 rp = aj + ai[brow]; 2184 ap = aa + bs2*ai[brow]; 2185 rmax = imax[brow]; 2186 nrow = ailen[brow]; 2187 low = 0; 2188 high = nrow; 2189 for (l=0; l<n; l++) { /* loop over added columns */ 2190 if (in[l] < 0) continue; 2191 #if defined(PETSC_USE_DEBUG) 2192 if (in[l] >= A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],A->cmap->n-1); 2193 #endif 2194 col = in[l]; bcol = col/bs; 2195 ridx = row % bs; cidx = col % bs; 2196 if (roworiented) { 2197 value = v[l + k*n]; 2198 } else { 2199 value = v[k + l*m]; 2200 } 2201 if (col <= lastcol) low = 0; else high = nrow; 2202 lastcol = col; 2203 while (high-low > 7) { 2204 t = (low+high)/2; 2205 if (rp[t] > bcol) high = t; 2206 else low = t; 2207 } 2208 for (i=low; i<high; i++) { 2209 if (rp[i] > bcol) break; 2210 if (rp[i] == bcol) { 2211 bap = ap + bs2*i + bs*cidx + ridx; 2212 if (is == ADD_VALUES) *bap += value; 2213 else *bap = value; 2214 goto noinsert1; 2215 } 2216 } 2217 if (nonew == 1) goto noinsert1; 2218 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col); 2219 MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar); 2220 N = nrow++ - 1; high++; 2221 /* shift up all the later entries in this row */ 2222 for (ii=N; ii>=i; ii--) { 2223 rp[ii+1] = rp[ii]; 2224 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); 2225 } 2226 if (N>=i) { 2227 ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr); 2228 } 2229 rp[i] = bcol; 2230 ap[bs2*i + bs*cidx + ridx] = value; 2231 a->nz++; 2232 A->nonzerostate++; 2233 noinsert1:; 2234 low = i; 2235 } 2236 ailen[brow] = nrow; 2237 } 2238 PetscFunctionReturn(0); 2239 } 2240 2241 #undef __FUNCT__ 2242 #define __FUNCT__ "MatILUFactor_SeqBAIJ" 2243 PetscErrorCode MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,const MatFactorInfo *info) 2244 { 2245 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)inA->data; 2246 Mat outA; 2247 PetscErrorCode ierr; 2248 PetscBool row_identity,col_identity; 2249 2250 PetscFunctionBegin; 2251 if (info->levels != 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only levels = 0 supported for in-place ILU"); 2252 ierr = ISIdentity(row,&row_identity);CHKERRQ(ierr); 2253 ierr = ISIdentity(col,&col_identity);CHKERRQ(ierr); 2254 if (!row_identity || !col_identity) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for in-place ILU"); 2255 2256 outA = inA; 2257 inA->factortype = MAT_FACTOR_LU; 2258 ierr = PetscFree(inA->solvertype);CHKERRQ(ierr); 2259 ierr = PetscStrallocpy(MATSOLVERPETSC,&inA->solvertype);CHKERRQ(ierr); 2260 2261 ierr = MatMarkDiagonal_SeqBAIJ(inA);CHKERRQ(ierr); 2262 2263 ierr = PetscObjectReference((PetscObject)row);CHKERRQ(ierr); 2264 ierr = ISDestroy(&a->row);CHKERRQ(ierr); 2265 a->row = row; 2266 ierr = PetscObjectReference((PetscObject)col);CHKERRQ(ierr); 2267 ierr = ISDestroy(&a->col);CHKERRQ(ierr); 2268 a->col = col; 2269 2270 /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */ 2271 ierr = ISDestroy(&a->icol);CHKERRQ(ierr); 2272 ierr = ISInvertPermutation(col,PETSC_DECIDE,&a->icol);CHKERRQ(ierr); 2273 ierr = PetscLogObjectParent((PetscObject)inA,(PetscObject)a->icol);CHKERRQ(ierr); 2274 2275 ierr = MatSeqBAIJSetNumericFactorization_inplace(inA,(PetscBool)(row_identity && col_identity));CHKERRQ(ierr); 2276 if (!a->solve_work) { 2277 ierr = PetscMalloc1(inA->rmap->N+inA->rmap->bs,&a->solve_work);CHKERRQ(ierr); 2278 ierr = PetscLogObjectMemory((PetscObject)inA,(inA->rmap->N+inA->rmap->bs)*sizeof(PetscScalar));CHKERRQ(ierr); 2279 } 2280 ierr = MatLUFactorNumeric(outA,inA,info);CHKERRQ(ierr); 2281 PetscFunctionReturn(0); 2282 } 2283 2284 #undef __FUNCT__ 2285 #define __FUNCT__ "MatSeqBAIJSetColumnIndices_SeqBAIJ" 2286 PetscErrorCode MatSeqBAIJSetColumnIndices_SeqBAIJ(Mat mat,PetscInt *indices) 2287 { 2288 Mat_SeqBAIJ *baij = (Mat_SeqBAIJ*)mat->data; 2289 PetscInt i,nz,mbs; 2290 2291 PetscFunctionBegin; 2292 nz = baij->maxnz; 2293 mbs = baij->mbs; 2294 for (i=0; i<nz; i++) { 2295 baij->j[i] = indices[i]; 2296 } 2297 baij->nz = nz; 2298 for (i=0; i<mbs; i++) { 2299 baij->ilen[i] = baij->imax[i]; 2300 } 2301 PetscFunctionReturn(0); 2302 } 2303 2304 #undef __FUNCT__ 2305 #define __FUNCT__ "MatSeqBAIJSetColumnIndices" 2306 /*@ 2307 MatSeqBAIJSetColumnIndices - Set the column indices for all the rows 2308 in the matrix. 2309 2310 Input Parameters: 2311 + mat - the SeqBAIJ matrix 2312 - indices - the column indices 2313 2314 Level: advanced 2315 2316 Notes: 2317 This can be called if you have precomputed the nonzero structure of the 2318 matrix and want to provide it to the matrix object to improve the performance 2319 of the MatSetValues() operation. 2320 2321 You MUST have set the correct numbers of nonzeros per row in the call to 2322 MatCreateSeqBAIJ(), and the columns indices MUST be sorted. 2323 2324 MUST be called before any calls to MatSetValues(); 2325 2326 @*/ 2327 PetscErrorCode MatSeqBAIJSetColumnIndices(Mat mat,PetscInt *indices) 2328 { 2329 PetscErrorCode ierr; 2330 2331 PetscFunctionBegin; 2332 PetscValidHeaderSpecific(mat,MAT_CLASSID,1); 2333 PetscValidPointer(indices,2); 2334 ierr = PetscUseMethod(mat,"MatSeqBAIJSetColumnIndices_C",(Mat,PetscInt*),(mat,indices));CHKERRQ(ierr); 2335 PetscFunctionReturn(0); 2336 } 2337 2338 #undef __FUNCT__ 2339 #define __FUNCT__ "MatGetRowMaxAbs_SeqBAIJ" 2340 PetscErrorCode MatGetRowMaxAbs_SeqBAIJ(Mat A,Vec v,PetscInt idx[]) 2341 { 2342 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2343 PetscErrorCode ierr; 2344 PetscInt i,j,n,row,bs,*ai,*aj,mbs; 2345 PetscReal atmp; 2346 PetscScalar *x,zero = 0.0; 2347 MatScalar *aa; 2348 PetscInt ncols,brow,krow,kcol; 2349 2350 PetscFunctionBegin; 2351 if (A->factortype) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 2352 bs = A->rmap->bs; 2353 aa = a->a; 2354 ai = a->i; 2355 aj = a->j; 2356 mbs = a->mbs; 2357 2358 ierr = VecSet(v,zero);CHKERRQ(ierr); 2359 ierr = VecGetArray(v,&x);CHKERRQ(ierr); 2360 ierr = VecGetLocalSize(v,&n);CHKERRQ(ierr); 2361 if (n != A->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Nonconforming matrix and vector"); 2362 for (i=0; i<mbs; i++) { 2363 ncols = ai[1] - ai[0]; ai++; 2364 brow = bs*i; 2365 for (j=0; j<ncols; j++) { 2366 for (kcol=0; kcol<bs; kcol++) { 2367 for (krow=0; krow<bs; krow++) { 2368 atmp = PetscAbsScalar(*aa);aa++; 2369 row = brow + krow; /* row index */ 2370 if (PetscAbsScalar(x[row]) < atmp) {x[row] = atmp; if (idx) idx[row] = bs*(*aj) + kcol;} 2371 } 2372 } 2373 aj++; 2374 } 2375 } 2376 ierr = VecRestoreArray(v,&x);CHKERRQ(ierr); 2377 PetscFunctionReturn(0); 2378 } 2379 2380 #undef __FUNCT__ 2381 #define __FUNCT__ "MatCopy_SeqBAIJ" 2382 PetscErrorCode MatCopy_SeqBAIJ(Mat A,Mat B,MatStructure str) 2383 { 2384 PetscErrorCode ierr; 2385 2386 PetscFunctionBegin; 2387 /* If the two matrices have the same copy implementation, use fast copy. */ 2388 if (str == SAME_NONZERO_PATTERN && (A->ops->copy == B->ops->copy)) { 2389 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2390 Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)B->data; 2391 PetscInt ambs=a->mbs,bmbs=b->mbs,abs=A->rmap->bs,bbs=B->rmap->bs,bs2=abs*abs; 2392 2393 if (a->i[ambs] != b->i[bmbs]) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Number of nonzero blocks in matrices A %D and B %D are different",a->i[ambs],b->i[bmbs]); 2394 if (abs != bbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Block size A %D and B %D are different",abs,bbs); 2395 ierr = PetscMemcpy(b->a,a->a,(bs2*a->i[ambs])*sizeof(PetscScalar));CHKERRQ(ierr); 2396 } else { 2397 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 2398 } 2399 PetscFunctionReturn(0); 2400 } 2401 2402 #undef __FUNCT__ 2403 #define __FUNCT__ "MatSetUp_SeqBAIJ" 2404 PetscErrorCode MatSetUp_SeqBAIJ(Mat A) 2405 { 2406 PetscErrorCode ierr; 2407 2408 PetscFunctionBegin; 2409 ierr = MatSeqBAIJSetPreallocation(A,A->rmap->bs,PETSC_DEFAULT,0);CHKERRQ(ierr); 2410 PetscFunctionReturn(0); 2411 } 2412 2413 #undef __FUNCT__ 2414 #define __FUNCT__ "MatSeqBAIJGetArray_SeqBAIJ" 2415 PetscErrorCode MatSeqBAIJGetArray_SeqBAIJ(Mat A,PetscScalar *array[]) 2416 { 2417 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2418 2419 PetscFunctionBegin; 2420 *array = a->a; 2421 PetscFunctionReturn(0); 2422 } 2423 2424 #undef __FUNCT__ 2425 #define __FUNCT__ "MatSeqBAIJRestoreArray_SeqBAIJ" 2426 PetscErrorCode MatSeqBAIJRestoreArray_SeqBAIJ(Mat A,PetscScalar *array[]) 2427 { 2428 PetscFunctionBegin; 2429 PetscFunctionReturn(0); 2430 } 2431 2432 #undef __FUNCT__ 2433 #define __FUNCT__ "MatAXPYGetPreallocation_SeqBAIJ" 2434 PetscErrorCode MatAXPYGetPreallocation_SeqBAIJ(Mat Y,Mat X,PetscInt *nnz) 2435 { 2436 PetscInt bs = Y->rmap->bs,mbs = Y->rmap->N/bs; 2437 Mat_SeqBAIJ *x = (Mat_SeqBAIJ*)X->data; 2438 Mat_SeqBAIJ *y = (Mat_SeqBAIJ*)Y->data; 2439 PetscErrorCode ierr; 2440 2441 PetscFunctionBegin; 2442 /* Set the number of nonzeros in the new matrix */ 2443 ierr = MatAXPYGetPreallocation_SeqX_private(mbs,x->i,x->j,y->i,y->j,nnz);CHKERRQ(ierr); 2444 PetscFunctionReturn(0); 2445 } 2446 2447 #undef __FUNCT__ 2448 #define __FUNCT__ "MatAXPY_SeqBAIJ" 2449 PetscErrorCode MatAXPY_SeqBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 2450 { 2451 Mat_SeqBAIJ *x = (Mat_SeqBAIJ*)X->data,*y = (Mat_SeqBAIJ*)Y->data; 2452 PetscErrorCode ierr; 2453 PetscInt bs=Y->rmap->bs,bs2=bs*bs; 2454 PetscBLASInt one=1; 2455 2456 PetscFunctionBegin; 2457 if (str == SAME_NONZERO_PATTERN) { 2458 PetscScalar alpha = a; 2459 PetscBLASInt bnz; 2460 ierr = PetscBLASIntCast(x->nz*bs2,&bnz);CHKERRQ(ierr); 2461 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one)); 2462 ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr); 2463 } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */ 2464 ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr); 2465 } else { 2466 Mat B; 2467 PetscInt *nnz; 2468 if (bs != X->rmap->bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrices must have same block size"); 2469 ierr = PetscMalloc1(Y->rmap->N,&nnz);CHKERRQ(ierr); 2470 ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr); 2471 ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr); 2472 ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr); 2473 ierr = MatSetBlockSizesFromMats(B,Y,Y);CHKERRQ(ierr); 2474 ierr = MatSetType(B,(MatType) ((PetscObject)Y)->type_name);CHKERRQ(ierr); 2475 ierr = MatAXPYGetPreallocation_SeqBAIJ(Y,X,nnz);CHKERRQ(ierr); 2476 ierr = MatSeqBAIJSetPreallocation(B,bs,0,nnz);CHKERRQ(ierr); 2477 ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr); 2478 ierr = MatHeaderReplace(Y,&B);CHKERRQ(ierr); 2479 ierr = PetscFree(nnz);CHKERRQ(ierr); 2480 } 2481 PetscFunctionReturn(0); 2482 } 2483 2484 #undef __FUNCT__ 2485 #define __FUNCT__ "MatRealPart_SeqBAIJ" 2486 PetscErrorCode MatRealPart_SeqBAIJ(Mat A) 2487 { 2488 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2489 PetscInt i,nz = a->bs2*a->i[a->mbs]; 2490 MatScalar *aa = a->a; 2491 2492 PetscFunctionBegin; 2493 for (i=0; i<nz; i++) aa[i] = PetscRealPart(aa[i]); 2494 PetscFunctionReturn(0); 2495 } 2496 2497 #undef __FUNCT__ 2498 #define __FUNCT__ "MatImaginaryPart_SeqBAIJ" 2499 PetscErrorCode MatImaginaryPart_SeqBAIJ(Mat A) 2500 { 2501 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2502 PetscInt i,nz = a->bs2*a->i[a->mbs]; 2503 MatScalar *aa = a->a; 2504 2505 PetscFunctionBegin; 2506 for (i=0; i<nz; i++) aa[i] = PetscImaginaryPart(aa[i]); 2507 PetscFunctionReturn(0); 2508 } 2509 2510 #undef __FUNCT__ 2511 #define __FUNCT__ "MatGetColumnIJ_SeqBAIJ" 2512 /* 2513 Code almost idential to MatGetColumnIJ_SeqAIJ() should share common code 2514 */ 2515 PetscErrorCode MatGetColumnIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *nn,const PetscInt *ia[],const PetscInt *ja[],PetscBool *done) 2516 { 2517 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2518 PetscErrorCode ierr; 2519 PetscInt bs = A->rmap->bs,i,*collengths,*cia,*cja,n = A->cmap->n/bs,m = A->rmap->n/bs; 2520 PetscInt nz = a->i[m],row,*jj,mr,col; 2521 2522 PetscFunctionBegin; 2523 *nn = n; 2524 if (!ia) PetscFunctionReturn(0); 2525 if (symmetric) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not for BAIJ matrices"); 2526 else { 2527 ierr = PetscCalloc1(n+1,&collengths);CHKERRQ(ierr); 2528 ierr = PetscMalloc1(n+1,&cia);CHKERRQ(ierr); 2529 ierr = PetscMalloc1(nz+1,&cja);CHKERRQ(ierr); 2530 jj = a->j; 2531 for (i=0; i<nz; i++) { 2532 collengths[jj[i]]++; 2533 } 2534 cia[0] = oshift; 2535 for (i=0; i<n; i++) { 2536 cia[i+1] = cia[i] + collengths[i]; 2537 } 2538 ierr = PetscMemzero(collengths,n*sizeof(PetscInt));CHKERRQ(ierr); 2539 jj = a->j; 2540 for (row=0; row<m; row++) { 2541 mr = a->i[row+1] - a->i[row]; 2542 for (i=0; i<mr; i++) { 2543 col = *jj++; 2544 2545 cja[cia[col] + collengths[col]++ - oshift] = row + oshift; 2546 } 2547 } 2548 ierr = PetscFree(collengths);CHKERRQ(ierr); 2549 *ia = cia; *ja = cja; 2550 } 2551 PetscFunctionReturn(0); 2552 } 2553 2554 #undef __FUNCT__ 2555 #define __FUNCT__ "MatRestoreColumnIJ_SeqBAIJ" 2556 PetscErrorCode MatRestoreColumnIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *n,const PetscInt *ia[],const PetscInt *ja[],PetscBool *done) 2557 { 2558 PetscErrorCode ierr; 2559 2560 PetscFunctionBegin; 2561 if (!ia) PetscFunctionReturn(0); 2562 ierr = PetscFree(*ia);CHKERRQ(ierr); 2563 ierr = PetscFree(*ja);CHKERRQ(ierr); 2564 PetscFunctionReturn(0); 2565 } 2566 2567 /* 2568 MatGetColumnIJ_SeqBAIJ_Color() and MatRestoreColumnIJ_SeqBAIJ_Color() are customized from 2569 MatGetColumnIJ_SeqBAIJ() and MatRestoreColumnIJ_SeqBAIJ() by adding an output 2570 spidx[], index of a->a, to be used in MatTransposeColoringCreate() and MatFDColoringCreate() 2571 */ 2572 #undef __FUNCT__ 2573 #define __FUNCT__ "MatGetColumnIJ_SeqBAIJ_Color" 2574 PetscErrorCode MatGetColumnIJ_SeqBAIJ_Color(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *nn,const PetscInt *ia[],const PetscInt *ja[],PetscInt *spidx[],PetscBool *done) 2575 { 2576 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2577 PetscErrorCode ierr; 2578 PetscInt i,*collengths,*cia,*cja,n=a->nbs,m=a->mbs; 2579 PetscInt nz = a->i[m],row,*jj,mr,col; 2580 PetscInt *cspidx; 2581 2582 PetscFunctionBegin; 2583 *nn = n; 2584 if (!ia) PetscFunctionReturn(0); 2585 2586 ierr = PetscCalloc1(n+1,&collengths);CHKERRQ(ierr); 2587 ierr = PetscMalloc1(n+1,&cia);CHKERRQ(ierr); 2588 ierr = PetscMalloc1(nz+1,&cja);CHKERRQ(ierr); 2589 ierr = PetscMalloc1(nz+1,&cspidx);CHKERRQ(ierr); 2590 jj = a->j; 2591 for (i=0; i<nz; i++) { 2592 collengths[jj[i]]++; 2593 } 2594 cia[0] = oshift; 2595 for (i=0; i<n; i++) { 2596 cia[i+1] = cia[i] + collengths[i]; 2597 } 2598 ierr = PetscMemzero(collengths,n*sizeof(PetscInt));CHKERRQ(ierr); 2599 jj = a->j; 2600 for (row=0; row<m; row++) { 2601 mr = a->i[row+1] - a->i[row]; 2602 for (i=0; i<mr; i++) { 2603 col = *jj++; 2604 cspidx[cia[col] + collengths[col] - oshift] = a->i[row] + i; /* index of a->j */ 2605 cja[cia[col] + collengths[col]++ - oshift] = row + oshift; 2606 } 2607 } 2608 ierr = PetscFree(collengths);CHKERRQ(ierr); 2609 *ia = cia; *ja = cja; 2610 *spidx = cspidx; 2611 PetscFunctionReturn(0); 2612 } 2613 2614 #undef __FUNCT__ 2615 #define __FUNCT__ "MatRestoreColumnIJ_SeqBAIJ_Color" 2616 PetscErrorCode MatRestoreColumnIJ_SeqBAIJ_Color(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *n,const PetscInt *ia[],const PetscInt *ja[],PetscInt *spidx[],PetscBool *done) 2617 { 2618 PetscErrorCode ierr; 2619 2620 PetscFunctionBegin; 2621 ierr = MatRestoreColumnIJ_SeqBAIJ(A,oshift,symmetric,inodecompressed,n,ia,ja,done);CHKERRQ(ierr); 2622 ierr = PetscFree(*spidx);CHKERRQ(ierr); 2623 PetscFunctionReturn(0); 2624 } 2625 2626 #undef __FUNCT__ 2627 #define __FUNCT__ "MatShift_SeqBAIJ" 2628 PetscErrorCode MatShift_SeqBAIJ(Mat Y,PetscScalar a) 2629 { 2630 PetscErrorCode ierr; 2631 Mat_SeqBAIJ *aij = (Mat_SeqBAIJ*)Y->data; 2632 2633 PetscFunctionBegin; 2634 if (!Y->preallocated || !aij->nz) { 2635 ierr = MatSeqBAIJSetPreallocation(Y,Y->rmap->bs,1,NULL);CHKERRQ(ierr); 2636 } 2637 ierr = MatShift_Basic(Y,a);CHKERRQ(ierr); 2638 PetscFunctionReturn(0); 2639 } 2640 2641 /* -------------------------------------------------------------------*/ 2642 static struct _MatOps MatOps_Values = {MatSetValues_SeqBAIJ, 2643 MatGetRow_SeqBAIJ, 2644 MatRestoreRow_SeqBAIJ, 2645 MatMult_SeqBAIJ_N, 2646 /* 4*/ MatMultAdd_SeqBAIJ_N, 2647 MatMultTranspose_SeqBAIJ, 2648 MatMultTransposeAdd_SeqBAIJ, 2649 0, 2650 0, 2651 0, 2652 /* 10*/ 0, 2653 MatLUFactor_SeqBAIJ, 2654 0, 2655 0, 2656 MatTranspose_SeqBAIJ, 2657 /* 15*/ MatGetInfo_SeqBAIJ, 2658 MatEqual_SeqBAIJ, 2659 MatGetDiagonal_SeqBAIJ, 2660 MatDiagonalScale_SeqBAIJ, 2661 MatNorm_SeqBAIJ, 2662 /* 20*/ 0, 2663 MatAssemblyEnd_SeqBAIJ, 2664 MatSetOption_SeqBAIJ, 2665 MatZeroEntries_SeqBAIJ, 2666 /* 24*/ MatZeroRows_SeqBAIJ, 2667 0, 2668 0, 2669 0, 2670 0, 2671 /* 29*/ MatSetUp_SeqBAIJ, 2672 0, 2673 0, 2674 0, 2675 0, 2676 /* 34*/ MatDuplicate_SeqBAIJ, 2677 0, 2678 0, 2679 MatILUFactor_SeqBAIJ, 2680 0, 2681 /* 39*/ MatAXPY_SeqBAIJ, 2682 MatGetSubMatrices_SeqBAIJ, 2683 MatIncreaseOverlap_SeqBAIJ, 2684 MatGetValues_SeqBAIJ, 2685 MatCopy_SeqBAIJ, 2686 /* 44*/ 0, 2687 MatScale_SeqBAIJ, 2688 MatShift_SeqBAIJ, 2689 0, 2690 MatZeroRowsColumns_SeqBAIJ, 2691 /* 49*/ 0, 2692 MatGetRowIJ_SeqBAIJ, 2693 MatRestoreRowIJ_SeqBAIJ, 2694 MatGetColumnIJ_SeqBAIJ, 2695 MatRestoreColumnIJ_SeqBAIJ, 2696 /* 54*/ MatFDColoringCreate_SeqXAIJ, 2697 0, 2698 0, 2699 0, 2700 MatSetValuesBlocked_SeqBAIJ, 2701 /* 59*/ MatGetSubMatrix_SeqBAIJ, 2702 MatDestroy_SeqBAIJ, 2703 MatView_SeqBAIJ, 2704 0, 2705 0, 2706 /* 64*/ 0, 2707 0, 2708 0, 2709 0, 2710 0, 2711 /* 69*/ MatGetRowMaxAbs_SeqBAIJ, 2712 0, 2713 MatConvert_Basic, 2714 0, 2715 0, 2716 /* 74*/ 0, 2717 MatFDColoringApply_BAIJ, 2718 0, 2719 0, 2720 0, 2721 /* 79*/ 0, 2722 0, 2723 0, 2724 0, 2725 MatLoad_SeqBAIJ, 2726 /* 84*/ 0, 2727 0, 2728 0, 2729 0, 2730 0, 2731 /* 89*/ 0, 2732 0, 2733 0, 2734 0, 2735 0, 2736 /* 94*/ 0, 2737 0, 2738 0, 2739 0, 2740 0, 2741 /* 99*/ 0, 2742 0, 2743 0, 2744 0, 2745 0, 2746 /*104*/ 0, 2747 MatRealPart_SeqBAIJ, 2748 MatImaginaryPart_SeqBAIJ, 2749 0, 2750 0, 2751 /*109*/ 0, 2752 0, 2753 0, 2754 0, 2755 MatMissingDiagonal_SeqBAIJ, 2756 /*114*/ 0, 2757 0, 2758 0, 2759 0, 2760 0, 2761 /*119*/ 0, 2762 0, 2763 MatMultHermitianTranspose_SeqBAIJ, 2764 MatMultHermitianTransposeAdd_SeqBAIJ, 2765 0, 2766 /*124*/ 0, 2767 0, 2768 MatInvertBlockDiagonal_SeqBAIJ, 2769 0, 2770 0, 2771 /*129*/ 0, 2772 0, 2773 0, 2774 0, 2775 0, 2776 /*134*/ 0, 2777 0, 2778 0, 2779 0, 2780 0, 2781 /*139*/ 0, 2782 0, 2783 0, 2784 MatFDColoringSetUp_SeqXAIJ, 2785 0, 2786 /*144*/MatCreateMPIMatConcatenateSeqMat_SeqBAIJ 2787 }; 2788 2789 #undef __FUNCT__ 2790 #define __FUNCT__ "MatStoreValues_SeqBAIJ" 2791 PetscErrorCode MatStoreValues_SeqBAIJ(Mat mat) 2792 { 2793 Mat_SeqBAIJ *aij = (Mat_SeqBAIJ*)mat->data; 2794 PetscInt nz = aij->i[aij->mbs]*aij->bs2; 2795 PetscErrorCode ierr; 2796 2797 PetscFunctionBegin; 2798 if (aij->nonew != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first"); 2799 2800 /* allocate space for values if not already there */ 2801 if (!aij->saved_values) { 2802 ierr = PetscMalloc1(nz+1,&aij->saved_values);CHKERRQ(ierr); 2803 ierr = PetscLogObjectMemory((PetscObject)mat,(nz+1)*sizeof(PetscScalar));CHKERRQ(ierr); 2804 } 2805 2806 /* copy values over */ 2807 ierr = PetscMemcpy(aij->saved_values,aij->a,nz*sizeof(PetscScalar));CHKERRQ(ierr); 2808 PetscFunctionReturn(0); 2809 } 2810 2811 #undef __FUNCT__ 2812 #define __FUNCT__ "MatRetrieveValues_SeqBAIJ" 2813 PetscErrorCode MatRetrieveValues_SeqBAIJ(Mat mat) 2814 { 2815 Mat_SeqBAIJ *aij = (Mat_SeqBAIJ*)mat->data; 2816 PetscErrorCode ierr; 2817 PetscInt nz = aij->i[aij->mbs]*aij->bs2; 2818 2819 PetscFunctionBegin; 2820 if (aij->nonew != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first"); 2821 if (!aij->saved_values) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatStoreValues(A);first"); 2822 2823 /* copy values over */ 2824 ierr = PetscMemcpy(aij->a,aij->saved_values,nz*sizeof(PetscScalar));CHKERRQ(ierr); 2825 PetscFunctionReturn(0); 2826 } 2827 2828 PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqAIJ(Mat, MatType,MatReuse,Mat*); 2829 PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqSBAIJ(Mat, MatType,MatReuse,Mat*); 2830 2831 #undef __FUNCT__ 2832 #define __FUNCT__ "MatSeqBAIJSetPreallocation_SeqBAIJ" 2833 static PetscErrorCode MatSeqBAIJSetPreallocation_SeqBAIJ(Mat B,PetscInt bs,PetscInt nz,PetscInt *nnz) 2834 { 2835 Mat_SeqBAIJ *b; 2836 PetscErrorCode ierr; 2837 PetscInt i,mbs,nbs,bs2; 2838 PetscBool flg = PETSC_FALSE,skipallocation = PETSC_FALSE,realalloc = PETSC_FALSE; 2839 2840 PetscFunctionBegin; 2841 if (nz >= 0 || nnz) realalloc = PETSC_TRUE; 2842 if (nz == MAT_SKIP_ALLOCATION) { 2843 skipallocation = PETSC_TRUE; 2844 nz = 0; 2845 } 2846 2847 ierr = MatSetBlockSize(B,PetscAbs(bs));CHKERRQ(ierr); 2848 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 2849 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 2850 ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr); 2851 2852 B->preallocated = PETSC_TRUE; 2853 2854 mbs = B->rmap->n/bs; 2855 nbs = B->cmap->n/bs; 2856 bs2 = bs*bs; 2857 2858 if (mbs*bs!=B->rmap->n || nbs*bs!=B->cmap->n) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Number rows %D, cols %D must be divisible by blocksize %D",B->rmap->N,B->cmap->n,bs); 2859 2860 if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5; 2861 if (nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nz cannot be less than 0: value %D",nz); 2862 if (nnz) { 2863 for (i=0; i<mbs; i++) { 2864 if (nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be less than 0: local row %D value %D",i,nnz[i]); 2865 if (nnz[i] > nbs) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be greater than block row length: local row %D value %D rowlength %D",i,nnz[i],nbs); 2866 } 2867 } 2868 2869 b = (Mat_SeqBAIJ*)B->data; 2870 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)B),NULL,"Optimize options for SEQBAIJ matrix 2 ","Mat");CHKERRQ(ierr); 2871 ierr = PetscOptionsBool("-mat_no_unroll","Do not optimize for block size (slow)",NULL,flg,&flg,NULL);CHKERRQ(ierr); 2872 ierr = PetscOptionsEnd();CHKERRQ(ierr); 2873 2874 if (!flg) { 2875 switch (bs) { 2876 case 1: 2877 B->ops->mult = MatMult_SeqBAIJ_1; 2878 B->ops->multadd = MatMultAdd_SeqBAIJ_1; 2879 break; 2880 case 2: 2881 B->ops->mult = MatMult_SeqBAIJ_2; 2882 B->ops->multadd = MatMultAdd_SeqBAIJ_2; 2883 break; 2884 case 3: 2885 B->ops->mult = MatMult_SeqBAIJ_3; 2886 B->ops->multadd = MatMultAdd_SeqBAIJ_3; 2887 break; 2888 case 4: 2889 B->ops->mult = MatMult_SeqBAIJ_4; 2890 B->ops->multadd = MatMultAdd_SeqBAIJ_4; 2891 break; 2892 case 5: 2893 B->ops->mult = MatMult_SeqBAIJ_5; 2894 B->ops->multadd = MatMultAdd_SeqBAIJ_5; 2895 break; 2896 case 6: 2897 B->ops->mult = MatMult_SeqBAIJ_6; 2898 B->ops->multadd = MatMultAdd_SeqBAIJ_6; 2899 break; 2900 case 7: 2901 B->ops->mult = MatMult_SeqBAIJ_7; 2902 B->ops->multadd = MatMultAdd_SeqBAIJ_7; 2903 break; 2904 case 15: 2905 B->ops->mult = MatMult_SeqBAIJ_15_ver1; 2906 B->ops->multadd = MatMultAdd_SeqBAIJ_N; 2907 break; 2908 default: 2909 B->ops->mult = MatMult_SeqBAIJ_N; 2910 B->ops->multadd = MatMultAdd_SeqBAIJ_N; 2911 break; 2912 } 2913 } 2914 B->ops->sor = MatSOR_SeqBAIJ; 2915 b->mbs = mbs; 2916 b->nbs = nbs; 2917 if (!skipallocation) { 2918 if (!b->imax) { 2919 ierr = PetscMalloc2(mbs,&b->imax,mbs,&b->ilen);CHKERRQ(ierr); 2920 ierr = PetscLogObjectMemory((PetscObject)B,2*mbs*sizeof(PetscInt));CHKERRQ(ierr); 2921 2922 b->free_imax_ilen = PETSC_TRUE; 2923 } 2924 /* b->ilen will count nonzeros in each block row so far. */ 2925 for (i=0; i<mbs; i++) b->ilen[i] = 0; 2926 if (!nnz) { 2927 if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5; 2928 else if (nz < 0) nz = 1; 2929 for (i=0; i<mbs; i++) b->imax[i] = nz; 2930 nz = nz*mbs; 2931 } else { 2932 nz = 0; 2933 for (i=0; i<mbs; i++) {b->imax[i] = nnz[i]; nz += nnz[i];} 2934 } 2935 2936 /* allocate the matrix space */ 2937 ierr = MatSeqXAIJFreeAIJ(B,&b->a,&b->j,&b->i);CHKERRQ(ierr); 2938 ierr = PetscMalloc3(bs2*nz,&b->a,nz,&b->j,B->rmap->N+1,&b->i);CHKERRQ(ierr); 2939 ierr = PetscLogObjectMemory((PetscObject)B,(B->rmap->N+1)*sizeof(PetscInt)+nz*(bs2*sizeof(PetscScalar)+sizeof(PetscInt)));CHKERRQ(ierr); 2940 ierr = PetscMemzero(b->a,nz*bs2*sizeof(MatScalar));CHKERRQ(ierr); 2941 ierr = PetscMemzero(b->j,nz*sizeof(PetscInt));CHKERRQ(ierr); 2942 2943 b->singlemalloc = PETSC_TRUE; 2944 b->i[0] = 0; 2945 for (i=1; i<mbs+1; i++) { 2946 b->i[i] = b->i[i-1] + b->imax[i-1]; 2947 } 2948 b->free_a = PETSC_TRUE; 2949 b->free_ij = PETSC_TRUE; 2950 } else { 2951 b->free_a = PETSC_FALSE; 2952 b->free_ij = PETSC_FALSE; 2953 } 2954 2955 b->bs2 = bs2; 2956 b->mbs = mbs; 2957 b->nz = 0; 2958 b->maxnz = nz; 2959 B->info.nz_unneeded = (PetscReal)b->maxnz*bs2; 2960 B->was_assembled = PETSC_FALSE; 2961 B->assembled = PETSC_FALSE; 2962 if (realalloc) {ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);} 2963 PetscFunctionReturn(0); 2964 } 2965 2966 #undef __FUNCT__ 2967 #define __FUNCT__ "MatSeqBAIJSetPreallocationCSR_SeqBAIJ" 2968 PetscErrorCode MatSeqBAIJSetPreallocationCSR_SeqBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[]) 2969 { 2970 PetscInt i,m,nz,nz_max=0,*nnz; 2971 PetscScalar *values=0; 2972 PetscBool roworiented = ((Mat_SeqBAIJ*)B->data)->roworiented; 2973 PetscErrorCode ierr; 2974 2975 PetscFunctionBegin; 2976 if (bs < 1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive but it is %D",bs); 2977 ierr = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr); 2978 ierr = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr); 2979 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 2980 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 2981 ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr); 2982 m = B->rmap->n/bs; 2983 2984 if (ii[0] != 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE, "ii[0] must be 0 but it is %D",ii[0]); 2985 ierr = PetscMalloc1(m+1, &nnz);CHKERRQ(ierr); 2986 for (i=0; i<m; i++) { 2987 nz = ii[i+1]- ii[i]; 2988 if (nz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE, "Local row %D has a negative number of columns %D",i,nz); 2989 nz_max = PetscMax(nz_max, nz); 2990 nnz[i] = nz; 2991 } 2992 ierr = MatSeqBAIJSetPreallocation(B,bs,0,nnz);CHKERRQ(ierr); 2993 ierr = PetscFree(nnz);CHKERRQ(ierr); 2994 2995 values = (PetscScalar*)V; 2996 if (!values) { 2997 ierr = PetscCalloc1(bs*bs*(nz_max+1),&values);CHKERRQ(ierr); 2998 } 2999 for (i=0; i<m; i++) { 3000 PetscInt ncols = ii[i+1] - ii[i]; 3001 const PetscInt *icols = jj + ii[i]; 3002 const PetscScalar *svals = values + (V ? (bs*bs*ii[i]) : 0); 3003 if (!roworiented) { 3004 ierr = MatSetValuesBlocked_SeqBAIJ(B,1,&i,ncols,icols,svals,INSERT_VALUES);CHKERRQ(ierr); 3005 } else { 3006 PetscInt j; 3007 for (j=0; j<ncols; j++) { 3008 const PetscScalar *svals = values + (V ? (bs*bs*(ii[i]+j)) : 0); 3009 ierr = MatSetValuesBlocked_SeqBAIJ(B,1,&i,1,&icols[j],svals,INSERT_VALUES);CHKERRQ(ierr); 3010 } 3011 } 3012 } 3013 if (!V) { ierr = PetscFree(values);CHKERRQ(ierr); } 3014 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3015 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3016 ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 3017 PetscFunctionReturn(0); 3018 } 3019 3020 /*MC 3021 MATSEQBAIJ - MATSEQBAIJ = "seqbaij" - A matrix type to be used for sequential block sparse matrices, based on 3022 block sparse compressed row format. 3023 3024 Options Database Keys: 3025 . -mat_type seqbaij - sets the matrix type to "seqbaij" during a call to MatSetFromOptions() 3026 3027 Level: beginner 3028 3029 .seealso: MatCreateSeqBAIJ() 3030 M*/ 3031 3032 PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqBSTRM(Mat, MatType,MatReuse,Mat*); 3033 3034 #undef __FUNCT__ 3035 #define __FUNCT__ "MatCreate_SeqBAIJ" 3036 PETSC_EXTERN PetscErrorCode MatCreate_SeqBAIJ(Mat B) 3037 { 3038 PetscErrorCode ierr; 3039 PetscMPIInt size; 3040 Mat_SeqBAIJ *b; 3041 3042 PetscFunctionBegin; 3043 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRQ(ierr); 3044 if (size > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Comm must be of size 1"); 3045 3046 ierr = PetscNewLog(B,&b);CHKERRQ(ierr); 3047 B->data = (void*)b; 3048 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 3049 3050 b->row = 0; 3051 b->col = 0; 3052 b->icol = 0; 3053 b->reallocs = 0; 3054 b->saved_values = 0; 3055 3056 b->roworiented = PETSC_TRUE; 3057 b->nonew = 0; 3058 b->diag = 0; 3059 B->spptr = 0; 3060 B->info.nz_unneeded = (PetscReal)b->maxnz*b->bs2; 3061 b->keepnonzeropattern = PETSC_FALSE; 3062 3063 ierr = PetscObjectComposeFunction((PetscObject)B,"MatInvertBlockDiagonal_C",MatInvertBlockDiagonal_SeqBAIJ);CHKERRQ(ierr); 3064 ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_SeqBAIJ);CHKERRQ(ierr); 3065 ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_SeqBAIJ);CHKERRQ(ierr); 3066 ierr = PetscObjectComposeFunction((PetscObject)B,"MatSeqBAIJSetColumnIndices_C",MatSeqBAIJSetColumnIndices_SeqBAIJ);CHKERRQ(ierr); 3067 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqbaij_seqaij_C",MatConvert_SeqBAIJ_SeqAIJ);CHKERRQ(ierr); 3068 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqbaij_seqsbaij_C",MatConvert_SeqBAIJ_SeqSBAIJ);CHKERRQ(ierr); 3069 ierr = PetscObjectComposeFunction((PetscObject)B,"MatSeqBAIJSetPreallocation_C",MatSeqBAIJSetPreallocation_SeqBAIJ);CHKERRQ(ierr); 3070 ierr = PetscObjectComposeFunction((PetscObject)B,"MatSeqBAIJSetPreallocationCSR_C",MatSeqBAIJSetPreallocationCSR_SeqBAIJ);CHKERRQ(ierr); 3071 ierr = PetscObjectComposeFunction((PetscObject)B,"MatIsTranspose_C",MatIsTranspose_SeqBAIJ);CHKERRQ(ierr); 3072 ierr = PetscObjectChangeTypeName((PetscObject)B,MATSEQBAIJ);CHKERRQ(ierr); 3073 PetscFunctionReturn(0); 3074 } 3075 3076 #undef __FUNCT__ 3077 #define __FUNCT__ "MatDuplicateNoCreate_SeqBAIJ" 3078 PetscErrorCode MatDuplicateNoCreate_SeqBAIJ(Mat C,Mat A,MatDuplicateOption cpvalues,PetscBool mallocmatspace) 3079 { 3080 Mat_SeqBAIJ *c = (Mat_SeqBAIJ*)C->data,*a = (Mat_SeqBAIJ*)A->data; 3081 PetscErrorCode ierr; 3082 PetscInt i,mbs = a->mbs,nz = a->nz,bs2 = a->bs2; 3083 3084 PetscFunctionBegin; 3085 if (a->i[mbs] != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Corrupt matrix"); 3086 3087 if (cpvalues == MAT_SHARE_NONZERO_PATTERN) { 3088 c->imax = a->imax; 3089 c->ilen = a->ilen; 3090 c->free_imax_ilen = PETSC_FALSE; 3091 } else { 3092 ierr = PetscMalloc2(mbs,&c->imax,mbs,&c->ilen);CHKERRQ(ierr); 3093 ierr = PetscLogObjectMemory((PetscObject)C,2*mbs*sizeof(PetscInt));CHKERRQ(ierr); 3094 for (i=0; i<mbs; i++) { 3095 c->imax[i] = a->imax[i]; 3096 c->ilen[i] = a->ilen[i]; 3097 } 3098 c->free_imax_ilen = PETSC_TRUE; 3099 } 3100 3101 /* allocate the matrix space */ 3102 if (mallocmatspace) { 3103 if (cpvalues == MAT_SHARE_NONZERO_PATTERN) { 3104 ierr = PetscCalloc1(bs2*nz,&c->a);CHKERRQ(ierr); 3105 ierr = PetscLogObjectMemory((PetscObject)C,a->i[mbs]*bs2*sizeof(PetscScalar));CHKERRQ(ierr); 3106 3107 c->i = a->i; 3108 c->j = a->j; 3109 c->singlemalloc = PETSC_FALSE; 3110 c->free_a = PETSC_TRUE; 3111 c->free_ij = PETSC_FALSE; 3112 c->parent = A; 3113 C->preallocated = PETSC_TRUE; 3114 C->assembled = PETSC_TRUE; 3115 3116 ierr = PetscObjectReference((PetscObject)A);CHKERRQ(ierr); 3117 ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 3118 ierr = MatSetOption(C,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 3119 } else { 3120 ierr = PetscMalloc3(bs2*nz,&c->a,nz,&c->j,mbs+1,&c->i);CHKERRQ(ierr); 3121 ierr = PetscLogObjectMemory((PetscObject)C,a->i[mbs]*(bs2*sizeof(PetscScalar)+sizeof(PetscInt))+(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 3122 3123 c->singlemalloc = PETSC_TRUE; 3124 c->free_a = PETSC_TRUE; 3125 c->free_ij = PETSC_TRUE; 3126 3127 ierr = PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 3128 if (mbs > 0) { 3129 ierr = PetscMemcpy(c->j,a->j,nz*sizeof(PetscInt));CHKERRQ(ierr); 3130 if (cpvalues == MAT_COPY_VALUES) { 3131 ierr = PetscMemcpy(c->a,a->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr); 3132 } else { 3133 ierr = PetscMemzero(c->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr); 3134 } 3135 } 3136 C->preallocated = PETSC_TRUE; 3137 C->assembled = PETSC_TRUE; 3138 } 3139 } 3140 3141 c->roworiented = a->roworiented; 3142 c->nonew = a->nonew; 3143 3144 ierr = PetscLayoutReference(A->rmap,&C->rmap);CHKERRQ(ierr); 3145 ierr = PetscLayoutReference(A->cmap,&C->cmap);CHKERRQ(ierr); 3146 3147 c->bs2 = a->bs2; 3148 c->mbs = a->mbs; 3149 c->nbs = a->nbs; 3150 3151 if (a->diag) { 3152 if (cpvalues == MAT_SHARE_NONZERO_PATTERN) { 3153 c->diag = a->diag; 3154 c->free_diag = PETSC_FALSE; 3155 } else { 3156 ierr = PetscMalloc1(mbs+1,&c->diag);CHKERRQ(ierr); 3157 ierr = PetscLogObjectMemory((PetscObject)C,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 3158 for (i=0; i<mbs; i++) c->diag[i] = a->diag[i]; 3159 c->free_diag = PETSC_TRUE; 3160 } 3161 } else c->diag = 0; 3162 3163 c->nz = a->nz; 3164 c->maxnz = a->nz; /* Since we allocate exactly the right amount */ 3165 c->solve_work = NULL; 3166 c->mult_work = NULL; 3167 c->sor_workt = NULL; 3168 c->sor_work = NULL; 3169 3170 c->compressedrow.use = a->compressedrow.use; 3171 c->compressedrow.nrows = a->compressedrow.nrows; 3172 if (a->compressedrow.use) { 3173 i = a->compressedrow.nrows; 3174 ierr = PetscMalloc2(i+1,&c->compressedrow.i,i+1,&c->compressedrow.rindex);CHKERRQ(ierr); 3175 ierr = PetscLogObjectMemory((PetscObject)C,(2*i+1)*sizeof(PetscInt));CHKERRQ(ierr); 3176 ierr = PetscMemcpy(c->compressedrow.i,a->compressedrow.i,(i+1)*sizeof(PetscInt));CHKERRQ(ierr); 3177 ierr = PetscMemcpy(c->compressedrow.rindex,a->compressedrow.rindex,i*sizeof(PetscInt));CHKERRQ(ierr); 3178 } else { 3179 c->compressedrow.use = PETSC_FALSE; 3180 c->compressedrow.i = NULL; 3181 c->compressedrow.rindex = NULL; 3182 } 3183 C->nonzerostate = A->nonzerostate; 3184 3185 ierr = PetscFunctionListDuplicate(((PetscObject)A)->qlist,&((PetscObject)C)->qlist);CHKERRQ(ierr); 3186 ierr = PetscMemcpy(C->ops,A->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 3187 PetscFunctionReturn(0); 3188 } 3189 3190 #undef __FUNCT__ 3191 #define __FUNCT__ "MatDuplicate_SeqBAIJ" 3192 PetscErrorCode MatDuplicate_SeqBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B) 3193 { 3194 PetscErrorCode ierr; 3195 3196 PetscFunctionBegin; 3197 ierr = MatCreate(PetscObjectComm((PetscObject)A),B);CHKERRQ(ierr); 3198 ierr = MatSetSizes(*B,A->rmap->N,A->cmap->n,A->rmap->N,A->cmap->n);CHKERRQ(ierr); 3199 ierr = MatSetType(*B,MATSEQBAIJ);CHKERRQ(ierr); 3200 ierr = MatDuplicateNoCreate_SeqBAIJ(*B,A,cpvalues,PETSC_TRUE);CHKERRQ(ierr); 3201 PetscFunctionReturn(0); 3202 } 3203 3204 #undef __FUNCT__ 3205 #define __FUNCT__ "MatLoad_SeqBAIJ" 3206 PetscErrorCode MatLoad_SeqBAIJ(Mat newmat,PetscViewer viewer) 3207 { 3208 Mat_SeqBAIJ *a; 3209 PetscErrorCode ierr; 3210 PetscInt i,nz,header[4],*rowlengths=0,M,N,bs = newmat->rmap->bs; 3211 PetscInt *mask,mbs,*jj,j,rowcount,nzcount,k,*browlengths,maskcount; 3212 PetscInt kmax,jcount,block,idx,point,nzcountb,extra_rows,rows,cols; 3213 PetscInt *masked,nmask,tmp,bs2,ishift; 3214 PetscMPIInt size; 3215 int fd; 3216 PetscScalar *aa; 3217 MPI_Comm comm; 3218 3219 PetscFunctionBegin; 3220 /* force binary viewer to load .info file if it has not yet done so */ 3221 ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr); 3222 ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr); 3223 ierr = PetscOptionsBegin(comm,NULL,"Options for loading SEQBAIJ matrix","Mat");CHKERRQ(ierr); 3224 ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,NULL);CHKERRQ(ierr); 3225 ierr = PetscOptionsEnd();CHKERRQ(ierr); 3226 if (bs < 0) bs = 1; 3227 bs2 = bs*bs; 3228 3229 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3230 if (size > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"view must have one processor"); 3231 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 3232 ierr = PetscBinaryRead(fd,header,4,PETSC_INT);CHKERRQ(ierr); 3233 if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not Mat object"); 3234 M = header[1]; N = header[2]; nz = header[3]; 3235 3236 if (header[3] < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as SeqBAIJ"); 3237 if (M != N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Can only do square matrices"); 3238 3239 /* 3240 This code adds extra rows to make sure the number of rows is 3241 divisible by the blocksize 3242 */ 3243 mbs = M/bs; 3244 extra_rows = bs - M + bs*(mbs); 3245 if (extra_rows == bs) extra_rows = 0; 3246 else mbs++; 3247 if (extra_rows) { 3248 ierr = PetscInfo(viewer,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr); 3249 } 3250 3251 /* Set global sizes if not already set */ 3252 if (newmat->rmap->n < 0 && newmat->rmap->N < 0 && newmat->cmap->n < 0 && newmat->cmap->N < 0) { 3253 ierr = MatSetSizes(newmat,PETSC_DECIDE,PETSC_DECIDE,M+extra_rows,N+extra_rows);CHKERRQ(ierr); 3254 } else { /* Check if the matrix global sizes are correct */ 3255 ierr = MatGetSize(newmat,&rows,&cols);CHKERRQ(ierr); 3256 if (rows < 0 && cols < 0) { /* user might provide local size instead of global size */ 3257 ierr = MatGetLocalSize(newmat,&rows,&cols);CHKERRQ(ierr); 3258 } 3259 if (M != rows || N != cols) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix in file of different length (%d, %d) than the input matrix (%d, %d)",M,N,rows,cols); 3260 } 3261 3262 /* read in row lengths */ 3263 ierr = PetscMalloc1(M+extra_rows,&rowlengths);CHKERRQ(ierr); 3264 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 3265 for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1; 3266 3267 /* read in column indices */ 3268 ierr = PetscMalloc1(nz+extra_rows,&jj);CHKERRQ(ierr); 3269 ierr = PetscBinaryRead(fd,jj,nz,PETSC_INT);CHKERRQ(ierr); 3270 for (i=0; i<extra_rows; i++) jj[nz+i] = M+i; 3271 3272 /* loop over row lengths determining block row lengths */ 3273 ierr = PetscCalloc1(mbs,&browlengths);CHKERRQ(ierr); 3274 ierr = PetscMalloc2(mbs,&mask,mbs,&masked);CHKERRQ(ierr); 3275 ierr = PetscMemzero(mask,mbs*sizeof(PetscInt));CHKERRQ(ierr); 3276 rowcount = 0; 3277 nzcount = 0; 3278 for (i=0; i<mbs; i++) { 3279 nmask = 0; 3280 for (j=0; j<bs; j++) { 3281 kmax = rowlengths[rowcount]; 3282 for (k=0; k<kmax; k++) { 3283 tmp = jj[nzcount++]/bs; 3284 if (!mask[tmp]) {masked[nmask++] = tmp; mask[tmp] = 1;} 3285 } 3286 rowcount++; 3287 } 3288 browlengths[i] += nmask; 3289 /* zero out the mask elements we set */ 3290 for (j=0; j<nmask; j++) mask[masked[j]] = 0; 3291 } 3292 3293 /* Do preallocation */ 3294 ierr = MatSeqBAIJSetPreallocation(newmat,bs,0,browlengths);CHKERRQ(ierr); 3295 a = (Mat_SeqBAIJ*)newmat->data; 3296 3297 /* set matrix "i" values */ 3298 a->i[0] = 0; 3299 for (i=1; i<= mbs; i++) { 3300 a->i[i] = a->i[i-1] + browlengths[i-1]; 3301 a->ilen[i-1] = browlengths[i-1]; 3302 } 3303 a->nz = 0; 3304 for (i=0; i<mbs; i++) a->nz += browlengths[i]; 3305 3306 /* read in nonzero values */ 3307 ierr = PetscMalloc1(nz+extra_rows,&aa);CHKERRQ(ierr); 3308 ierr = PetscBinaryRead(fd,aa,nz,PETSC_SCALAR);CHKERRQ(ierr); 3309 for (i=0; i<extra_rows; i++) aa[nz+i] = 1.0; 3310 3311 /* set "a" and "j" values into matrix */ 3312 nzcount = 0; jcount = 0; 3313 for (i=0; i<mbs; i++) { 3314 nzcountb = nzcount; 3315 nmask = 0; 3316 for (j=0; j<bs; j++) { 3317 kmax = rowlengths[i*bs+j]; 3318 for (k=0; k<kmax; k++) { 3319 tmp = jj[nzcount++]/bs; 3320 if (!mask[tmp]) { masked[nmask++] = tmp; mask[tmp] = 1;} 3321 } 3322 } 3323 /* sort the masked values */ 3324 ierr = PetscSortInt(nmask,masked);CHKERRQ(ierr); 3325 3326 /* set "j" values into matrix */ 3327 maskcount = 1; 3328 for (j=0; j<nmask; j++) { 3329 a->j[jcount++] = masked[j]; 3330 mask[masked[j]] = maskcount++; 3331 } 3332 /* set "a" values into matrix */ 3333 ishift = bs2*a->i[i]; 3334 for (j=0; j<bs; j++) { 3335 kmax = rowlengths[i*bs+j]; 3336 for (k=0; k<kmax; k++) { 3337 tmp = jj[nzcountb]/bs; 3338 block = mask[tmp] - 1; 3339 point = jj[nzcountb] - bs*tmp; 3340 idx = ishift + bs2*block + j + bs*point; 3341 a->a[idx] = (MatScalar)aa[nzcountb++]; 3342 } 3343 } 3344 /* zero out the mask elements we set */ 3345 for (j=0; j<nmask; j++) mask[masked[j]] = 0; 3346 } 3347 if (jcount != a->nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Bad binary matrix"); 3348 3349 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 3350 ierr = PetscFree(browlengths);CHKERRQ(ierr); 3351 ierr = PetscFree(aa);CHKERRQ(ierr); 3352 ierr = PetscFree(jj);CHKERRQ(ierr); 3353 ierr = PetscFree2(mask,masked);CHKERRQ(ierr); 3354 3355 ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3356 ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3357 PetscFunctionReturn(0); 3358 } 3359 3360 #undef __FUNCT__ 3361 #define __FUNCT__ "MatCreateSeqBAIJ" 3362 /*@C 3363 MatCreateSeqBAIJ - Creates a sparse matrix in block AIJ (block 3364 compressed row) format. For good matrix assembly performance the 3365 user should preallocate the matrix storage by setting the parameter nz 3366 (or the array nnz). By setting these parameters accurately, performance 3367 during matrix assembly can be increased by more than a factor of 50. 3368 3369 Collective on MPI_Comm 3370 3371 Input Parameters: 3372 + comm - MPI communicator, set to PETSC_COMM_SELF 3373 . bs - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row 3374 blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs() 3375 . m - number of rows 3376 . n - number of columns 3377 . nz - number of nonzero blocks per block row (same for all rows) 3378 - nnz - array containing the number of nonzero blocks in the various block rows 3379 (possibly different for each block row) or NULL 3380 3381 Output Parameter: 3382 . A - the matrix 3383 3384 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 3385 MatXXXXSetPreallocation() paradgm instead of this routine directly. 3386 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 3387 3388 Options Database Keys: 3389 . -mat_no_unroll - uses code that does not unroll the loops in the 3390 block calculations (much slower) 3391 . -mat_block_size - size of the blocks to use 3392 3393 Level: intermediate 3394 3395 Notes: 3396 The number of rows and columns must be divisible by blocksize. 3397 3398 If the nnz parameter is given then the nz parameter is ignored 3399 3400 A nonzero block is any block that as 1 or more nonzeros in it 3401 3402 The block AIJ format is fully compatible with standard Fortran 77 3403 storage. That is, the stored row and column indices can begin at 3404 either one (as in Fortran) or zero. See the users' manual for details. 3405 3406 Specify the preallocated storage with either nz or nnz (not both). 3407 Set nz=PETSC_DEFAULT and nnz=NULL for PETSc to control dynamic memory 3408 allocation. See Users-Manual: ch_mat for details. 3409 matrices. 3410 3411 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateBAIJ() 3412 @*/ 3413 PetscErrorCode MatCreateSeqBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A) 3414 { 3415 PetscErrorCode ierr; 3416 3417 PetscFunctionBegin; 3418 ierr = MatCreate(comm,A);CHKERRQ(ierr); 3419 ierr = MatSetSizes(*A,m,n,m,n);CHKERRQ(ierr); 3420 ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr); 3421 ierr = MatSeqBAIJSetPreallocation(*A,bs,nz,(PetscInt*)nnz);CHKERRQ(ierr); 3422 PetscFunctionReturn(0); 3423 } 3424 3425 #undef __FUNCT__ 3426 #define __FUNCT__ "MatSeqBAIJSetPreallocation" 3427 /*@C 3428 MatSeqBAIJSetPreallocation - Sets the block size and expected nonzeros 3429 per row in the matrix. For good matrix assembly performance the 3430 user should preallocate the matrix storage by setting the parameter nz 3431 (or the array nnz). By setting these parameters accurately, performance 3432 during matrix assembly can be increased by more than a factor of 50. 3433 3434 Collective on MPI_Comm 3435 3436 Input Parameters: 3437 + B - the matrix 3438 . bs - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row 3439 blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs() 3440 . nz - number of block nonzeros per block row (same for all rows) 3441 - nnz - array containing the number of block nonzeros in the various block rows 3442 (possibly different for each block row) or NULL 3443 3444 Options Database Keys: 3445 . -mat_no_unroll - uses code that does not unroll the loops in the 3446 block calculations (much slower) 3447 . -mat_block_size - size of the blocks to use 3448 3449 Level: intermediate 3450 3451 Notes: 3452 If the nnz parameter is given then the nz parameter is ignored 3453 3454 You can call MatGetInfo() to get information on how effective the preallocation was; 3455 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 3456 You can also run with the option -info and look for messages with the string 3457 malloc in them to see if additional memory allocation was needed. 3458 3459 The block AIJ format is fully compatible with standard Fortran 77 3460 storage. That is, the stored row and column indices can begin at 3461 either one (as in Fortran) or zero. See the users' manual for details. 3462 3463 Specify the preallocated storage with either nz or nnz (not both). 3464 Set nz=PETSC_DEFAULT and nnz=NULL for PETSc to control dynamic memory 3465 allocation. See Users-Manual: ch_mat for details. 3466 3467 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateBAIJ(), MatGetInfo() 3468 @*/ 3469 PetscErrorCode MatSeqBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt nz,const PetscInt nnz[]) 3470 { 3471 PetscErrorCode ierr; 3472 3473 PetscFunctionBegin; 3474 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 3475 PetscValidType(B,1); 3476 PetscValidLogicalCollectiveInt(B,bs,2); 3477 ierr = PetscTryMethod(B,"MatSeqBAIJSetPreallocation_C",(Mat,PetscInt,PetscInt,const PetscInt[]),(B,bs,nz,nnz));CHKERRQ(ierr); 3478 PetscFunctionReturn(0); 3479 } 3480 3481 #undef __FUNCT__ 3482 #define __FUNCT__ "MatSeqBAIJSetPreallocationCSR" 3483 /*@C 3484 MatSeqBAIJSetPreallocationCSR - Allocates memory for a sparse sequential matrix in AIJ format 3485 (the default sequential PETSc format). 3486 3487 Collective on MPI_Comm 3488 3489 Input Parameters: 3490 + B - the matrix 3491 . i - the indices into j for the start of each local row (starts with zero) 3492 . j - the column indices for each local row (starts with zero) these must be sorted for each row 3493 - v - optional values in the matrix 3494 3495 Level: developer 3496 3497 Notes: 3498 The order of the entries in values is specified by the MatOption MAT_ROW_ORIENTED. For example, C programs 3499 may want to use the default MAT_ROW_ORIENTED=PETSC_TRUE and use an array v[nnz][bs][bs] where the second index is 3500 over rows within a block and the last index is over columns within a block row. Fortran programs will likely set 3501 MAT_ROW_ORIENTED=PETSC_FALSE and use a Fortran array v(bs,bs,nnz) in which the first index is over rows within a 3502 block column and the second index is over columns within a block. 3503 3504 .keywords: matrix, aij, compressed row, sparse 3505 3506 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatSeqBAIJSetPreallocation(), MATSEQBAIJ 3507 @*/ 3508 PetscErrorCode MatSeqBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 3509 { 3510 PetscErrorCode ierr; 3511 3512 PetscFunctionBegin; 3513 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 3514 PetscValidType(B,1); 3515 PetscValidLogicalCollectiveInt(B,bs,2); 3516 ierr = PetscTryMethod(B,"MatSeqBAIJSetPreallocationCSR_C",(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,bs,i,j,v));CHKERRQ(ierr); 3517 PetscFunctionReturn(0); 3518 } 3519 3520 3521 #undef __FUNCT__ 3522 #define __FUNCT__ "MatCreateSeqBAIJWithArrays" 3523 /*@ 3524 MatCreateSeqBAIJWithArrays - Creates an sequential BAIJ matrix using matrix elements provided by the user. 3525 3526 Collective on MPI_Comm 3527 3528 Input Parameters: 3529 + comm - must be an MPI communicator of size 1 3530 . bs - size of block 3531 . m - number of rows 3532 . n - number of columns 3533 . i - row indices 3534 . j - column indices 3535 - a - matrix values 3536 3537 Output Parameter: 3538 . mat - the matrix 3539 3540 Level: advanced 3541 3542 Notes: 3543 The i, j, and a arrays are not copied by this routine, the user must free these arrays 3544 once the matrix is destroyed 3545 3546 You cannot set new nonzero locations into this matrix, that will generate an error. 3547 3548 The i and j indices are 0 based 3549 3550 When block size is greater than 1 the matrix values must be stored using the BAIJ storage format (see the BAIJ code to determine this). 3551 3552 The order of the entries in values is the same as the block compressed sparse row storage format; that is, it is 3553 the same as a three dimensional array in Fortran values(bs,bs,nnz) that contains the first column of the first 3554 block, followed by the second column of the first block etc etc. That is, the blocks are contiguous in memory 3555 with column-major ordering within blocks. 3556 3557 .seealso: MatCreate(), MatCreateBAIJ(), MatCreateSeqBAIJ() 3558 3559 @*/ 3560 PetscErrorCode MatCreateSeqBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt *i,PetscInt *j,PetscScalar *a,Mat *mat) 3561 { 3562 PetscErrorCode ierr; 3563 PetscInt ii; 3564 Mat_SeqBAIJ *baij; 3565 3566 PetscFunctionBegin; 3567 if (bs != 1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"block size %D > 1 is not supported yet",bs); 3568 if (m > 0 && i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 3569 3570 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 3571 ierr = MatSetSizes(*mat,m,n,m,n);CHKERRQ(ierr); 3572 ierr = MatSetType(*mat,MATSEQBAIJ);CHKERRQ(ierr); 3573 ierr = MatSeqBAIJSetPreallocation(*mat,bs,MAT_SKIP_ALLOCATION,0);CHKERRQ(ierr); 3574 baij = (Mat_SeqBAIJ*)(*mat)->data; 3575 ierr = PetscMalloc2(m,&baij->imax,m,&baij->ilen);CHKERRQ(ierr); 3576 ierr = PetscLogObjectMemory((PetscObject)*mat,2*m*sizeof(PetscInt));CHKERRQ(ierr); 3577 3578 baij->i = i; 3579 baij->j = j; 3580 baij->a = a; 3581 3582 baij->singlemalloc = PETSC_FALSE; 3583 baij->nonew = -1; /*this indicates that inserting a new value in the matrix that generates a new nonzero is an error*/ 3584 baij->free_a = PETSC_FALSE; 3585 baij->free_ij = PETSC_FALSE; 3586 3587 for (ii=0; ii<m; ii++) { 3588 baij->ilen[ii] = baij->imax[ii] = i[ii+1] - i[ii]; 3589 #if defined(PETSC_USE_DEBUG) 3590 if (i[ii+1] - i[ii] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row length in i (row indices) row = %d length = %d",ii,i[ii+1] - i[ii]); 3591 #endif 3592 } 3593 #if defined(PETSC_USE_DEBUG) 3594 for (ii=0; ii<baij->i[m]; ii++) { 3595 if (j[ii] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column index at location = %d index = %d",ii,j[ii]); 3596 if (j[ii] > n - 1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column index to large at location = %d index = %d",ii,j[ii]); 3597 } 3598 #endif 3599 3600 ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3601 ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3602 PetscFunctionReturn(0); 3603 } 3604 3605 #undef __FUNCT__ 3606 #define __FUNCT__ "MatCreateMPIMatConcatenateSeqMat_SeqBAIJ" 3607 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_SeqBAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat) 3608 { 3609 PetscErrorCode ierr; 3610 3611 PetscFunctionBegin; 3612 ierr = MatCreateMPIMatConcatenateSeqMat_MPIBAIJ(comm,inmat,n,scall,outmat);CHKERRQ(ierr); 3613 PetscFunctionReturn(0); 3614 } 3615