1 2 /* 3 Defines the basic matrix operations for the BAIJ (compressed row) 4 matrix storage format. 5 */ 6 #include <../src/mat/impls/baij/seq/baij.h> /*I "petscmat.h" I*/ 7 #include <petscblaslapack.h> 8 #include <petsc-private/kernels/blockinvert.h> 9 10 11 #undef __FUNCT__ 12 #define __FUNCT__ "MatInvertBlockDiagonal_SeqBAIJ" 13 PetscErrorCode MatInvertBlockDiagonal_SeqBAIJ(Mat A,const PetscScalar **values) 14 { 15 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*) A->data; 16 PetscErrorCode ierr; 17 PetscInt *diag_offset,i,bs = A->rmap->bs,mbs = a->mbs,ipvt[5],bs2 = bs*bs,*v_pivots; 18 MatScalar *v = a->a,*odiag,*diag,*mdiag,work[25],*v_work; 19 PetscReal shift = 0.0; 20 21 PetscFunctionBegin; 22 if (a->idiagvalid) { 23 if (values) *values = a->idiag; 24 PetscFunctionReturn(0); 25 } 26 ierr = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr); 27 diag_offset = a->diag; 28 if (!a->idiag) { 29 ierr = PetscMalloc(2*bs2*mbs*sizeof(PetscScalar),&a->idiag);CHKERRQ(ierr); 30 ierr = PetscLogObjectMemory(A,2*bs2*mbs*sizeof(PetscScalar));CHKERRQ(ierr); 31 } 32 diag = a->idiag; 33 mdiag = a->idiag+bs2*mbs; 34 if (values) *values = a->idiag; 35 /* factor and invert each block */ 36 switch (bs) { 37 case 1: 38 for (i=0; i<mbs; i++) { 39 odiag = v + 1*diag_offset[i]; 40 diag[0] = odiag[0]; 41 mdiag[0] = odiag[0]; 42 diag[0] = (PetscScalar)1.0 / (diag[0] + shift); 43 diag += 1; 44 mdiag += 1; 45 } 46 break; 47 case 2: 48 for (i=0; i<mbs; i++) { 49 odiag = v + 4*diag_offset[i]; 50 diag[0] = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3]; 51 mdiag[0] = odiag[0]; mdiag[1] = odiag[1]; mdiag[2] = odiag[2]; mdiag[3] = odiag[3]; 52 ierr = PetscKernel_A_gets_inverse_A_2(diag,shift);CHKERRQ(ierr); 53 diag += 4; 54 mdiag += 4; 55 } 56 break; 57 case 3: 58 for (i=0; i<mbs; i++) { 59 odiag = v + 9*diag_offset[i]; 60 diag[0] = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3]; 61 diag[4] = odiag[4]; diag[5] = odiag[5]; diag[6] = odiag[6]; diag[7] = odiag[7]; 62 diag[8] = odiag[8]; 63 mdiag[0] = odiag[0]; mdiag[1] = odiag[1]; mdiag[2] = odiag[2]; mdiag[3] = odiag[3]; 64 mdiag[4] = odiag[4]; mdiag[5] = odiag[5]; mdiag[6] = odiag[6]; mdiag[7] = odiag[7]; 65 mdiag[8] = odiag[8]; 66 ierr = PetscKernel_A_gets_inverse_A_3(diag,shift);CHKERRQ(ierr); 67 diag += 9; 68 mdiag += 9; 69 } 70 break; 71 case 4: 72 for (i=0; i<mbs; i++) { 73 odiag = v + 16*diag_offset[i]; 74 ierr = PetscMemcpy(diag,odiag,16*sizeof(PetscScalar));CHKERRQ(ierr); 75 ierr = PetscMemcpy(mdiag,odiag,16*sizeof(PetscScalar));CHKERRQ(ierr); 76 ierr = PetscKernel_A_gets_inverse_A_4(diag,shift);CHKERRQ(ierr); 77 diag += 16; 78 mdiag += 16; 79 } 80 break; 81 case 5: 82 for (i=0; i<mbs; i++) { 83 odiag = v + 25*diag_offset[i]; 84 ierr = PetscMemcpy(diag,odiag,25*sizeof(PetscScalar));CHKERRQ(ierr); 85 ierr = PetscMemcpy(mdiag,odiag,25*sizeof(PetscScalar));CHKERRQ(ierr); 86 ierr = PetscKernel_A_gets_inverse_A_5(diag,ipvt,work,shift);CHKERRQ(ierr); 87 diag += 25; 88 mdiag += 25; 89 } 90 break; 91 case 6: 92 for (i=0; i<mbs; i++) { 93 odiag = v + 36*diag_offset[i]; 94 ierr = PetscMemcpy(diag,odiag,36*sizeof(PetscScalar));CHKERRQ(ierr); 95 ierr = PetscMemcpy(mdiag,odiag,36*sizeof(PetscScalar));CHKERRQ(ierr); 96 ierr = PetscKernel_A_gets_inverse_A_6(diag,shift);CHKERRQ(ierr); 97 diag += 36; 98 mdiag += 36; 99 } 100 break; 101 case 7: 102 for (i=0; i<mbs; i++) { 103 odiag = v + 49*diag_offset[i]; 104 ierr = PetscMemcpy(diag,odiag,49*sizeof(PetscScalar));CHKERRQ(ierr); 105 ierr = PetscMemcpy(mdiag,odiag,49*sizeof(PetscScalar));CHKERRQ(ierr); 106 ierr = PetscKernel_A_gets_inverse_A_7(diag,shift);CHKERRQ(ierr); 107 diag += 49; 108 mdiag += 49; 109 } 110 break; 111 default: 112 ierr = PetscMalloc2(bs,MatScalar,&v_work,bs,PetscInt,&v_pivots);CHKERRQ(ierr); 113 for (i=0; i<mbs; i++) { 114 odiag = v + bs2*diag_offset[i]; 115 ierr = PetscMemcpy(diag,odiag,bs2*sizeof(PetscScalar));CHKERRQ(ierr); 116 ierr = PetscMemcpy(mdiag,odiag,bs2*sizeof(PetscScalar));CHKERRQ(ierr); 117 ierr = PetscKernel_A_gets_inverse_A(bs,diag,v_pivots,v_work);CHKERRQ(ierr); 118 diag += bs2; 119 mdiag += bs2; 120 } 121 ierr = PetscFree2(v_work,v_pivots);CHKERRQ(ierr); 122 } 123 a->idiagvalid = PETSC_TRUE; 124 PetscFunctionReturn(0); 125 } 126 127 #define PetscKernel_v_gets_A_times_w_1_exp(v,A,w,exp) \ 128 do { \ 129 v[0] exp A[0]*w[0]; \ 130 } while (0) 131 132 #define PetscKernel_v_gets_A_times_w_2_exp(v,A,w,exp) \ 133 do { \ 134 v[0] exp A[0]*w[0] + A[2]*w[1]; \ 135 v[1] exp A[1]*w[0] + A[3]*w[1]; \ 136 } while (0) 137 138 #define PetscKernel_v_gets_A_times_w_3_exp(v,A,w,exp) \ 139 do { \ 140 v[0] exp A[0]*w[0] + A[3]*w[1] + A[6]*w[2]; \ 141 v[1] exp A[1]*w[0] + A[4]*w[1] + A[7]*w[2]; \ 142 v[2] exp A[2]*w[0] + A[5]*w[1] + A[8]*w[2]; \ 143 } while (0) 144 145 #define PetscKernel_v_gets_A_times_w_4_exp(v,A,w,exp) \ 146 do { \ 147 v[0] exp A[0]*w[0] + A[4]*w[1] + A[8] *w[2] + A[12]*w[3]; \ 148 v[1] exp A[1]*w[0] + A[5]*w[1] + A[9] *w[2] + A[13]*w[3]; \ 149 v[2] exp A[2]*w[0] + A[6]*w[1] + A[10]*w[2] + A[14]*w[3]; \ 150 v[3] exp A[3]*w[0] + A[7]*w[1] + A[11]*w[2] + A[15]*w[3]; \ 151 } while (0) 152 153 #define PetscKernel_v_gets_A_times_w_4_exp(v,A,w,exp) \ 154 do { \ 155 v[0] exp A[0]*w[0] + A[4]*w[1] + A[8] *w[2] + A[12]*w[3]; \ 156 v[1] exp A[1]*w[0] + A[5]*w[1] + A[9] *w[2] + A[13]*w[3]; \ 157 v[2] exp A[2]*w[0] + A[6]*w[1] + A[10]*w[2] + A[14]*w[3]; \ 158 v[3] exp A[3]*w[0] + A[7]*w[1] + A[11]*w[2] + A[15]*w[3]; \ 159 } while (0) 160 161 #define PetscKernel_v_gets_A_times_w_5_exp(v,A,w,exp) \ 162 do { \ 163 v[0] exp A[0]*w[0] + A[5]*w[1] + A[10]*w[2] + A[15]*w[3] + A[20]*w[4]; \ 164 v[1] exp A[1]*w[0] + A[6]*w[1] + A[11]*w[2] + A[16]*w[3] + A[21]*w[4]; \ 165 v[2] exp A[2]*w[0] + A[7]*w[1] + A[12]*w[2] + A[17]*w[3] + A[22]*w[4]; \ 166 v[3] exp A[3]*w[0] + A[8]*w[1] + A[13]*w[2] + A[18]*w[3] + A[23]*w[4]; \ 167 v[4] exp A[4]*w[0] + A[9]*w[1] + A[14]*w[2] + A[19]*w[3] + A[24]*w[4]; \ 168 } while (0) 169 170 #define PetscKernel_v_gets_A_times_w_6_exp(v,A,w,exp) \ 171 do { \ 172 v[0] exp A[0]*w[0] + A[6] *w[1] + A[12]*w[2] + A[18]*w[3] + A[24]*w[4] + A[30]*w[5]; \ 173 v[1] exp A[1]*w[0] + A[7] *w[1] + A[13]*w[2] + A[19]*w[3] + A[25]*w[4] + A[31]*w[5]; \ 174 v[2] exp A[2]*w[0] + A[8] *w[1] + A[14]*w[2] + A[20]*w[3] + A[26]*w[4] + A[32]*w[5]; \ 175 v[3] exp A[3]*w[0] + A[9] *w[1] + A[15]*w[2] + A[21]*w[3] + A[27]*w[4] + A[33]*w[5]; \ 176 v[4] exp A[4]*w[0] + A[10]*w[1] + A[16]*w[2] + A[22]*w[3] + A[28]*w[4] + A[34]*w[5]; \ 177 v[5] exp A[5]*w[0] + A[11]*w[1] + A[17]*w[2] + A[23]*w[3] + A[29]*w[4] + A[35]*w[5]; \ 178 } while (0) 179 180 #define PetscKernel_v_gets_A_times_w_7_exp(v,A,w,exp) \ 181 do { \ 182 v[0] exp A[0]*w[0] + A[7] *w[1] + A[14]*w[2] + A[21]*w[3] + A[28]*w[4] + A[35]*w[5] + A[42]*w[6]; \ 183 v[1] exp A[1]*w[0] + A[8] *w[1] + A[15]*w[2] + A[22]*w[3] + A[29]*w[4] + A[36]*w[5] + A[43]*w[6]; \ 184 v[2] exp A[2]*w[0] + A[9] *w[1] + A[16]*w[2] + A[23]*w[3] + A[30]*w[4] + A[37]*w[5] + A[44]*w[6]; \ 185 v[3] exp A[3]*w[0] + A[10]*w[1] + A[17]*w[2] + A[24]*w[3] + A[31]*w[4] + A[38]*w[5] + A[45]*w[6]; \ 186 v[4] exp A[4]*w[0] + A[11]*w[1] + A[18]*w[2] + A[25]*w[3] + A[32]*w[4] + A[39]*w[5] + A[46]*w[6]; \ 187 v[5] exp A[5]*w[0] + A[12]*w[1] + A[19]*w[2] + A[26]*w[3] + A[33]*w[4] + A[40]*w[5] + A[47]*w[6]; \ 188 v[6] exp A[6]*w[0] + A[13]*w[1] + A[20]*w[2] + A[27]*w[3] + A[34]*w[4] + A[41]*w[5] + A[48]*w[6]; \ 189 } while (0) 190 191 #define PetscKernel_v_gets_A_times_w_1(v,A,w) PetscKernel_v_gets_A_times_w_1_exp(v,A,w,=) 192 #define PetscKernel_v_gets_A_times_w_2(v,A,w) PetscKernel_v_gets_A_times_w_2_exp(v,A,w,=) 193 #define PetscKernel_v_gets_A_times_w_3(v,A,w) PetscKernel_v_gets_A_times_w_3_exp(v,A,w,=) 194 #define PetscKernel_v_gets_A_times_w_4(v,A,w) PetscKernel_v_gets_A_times_w_4_exp(v,A,w,=) 195 #define PetscKernel_v_gets_A_times_w_5(v,A,w) PetscKernel_v_gets_A_times_w_5_exp(v,A,w,=) 196 #define PetscKernel_v_gets_A_times_w_6(v,A,w) PetscKernel_v_gets_A_times_w_6_exp(v,A,w,=) 197 #define PetscKernel_v_gets_A_times_w_7(v,A,w) PetscKernel_v_gets_A_times_w_7_exp(v,A,w,=) 198 #define PetscKernel_v_gets_v_plus_A_times_w_1(v,A,w) PetscKernel_v_gets_A_times_w_1_exp(v,A,w,+=) 199 #define PetscKernel_v_gets_v_plus_A_times_w_2(v,A,w) PetscKernel_v_gets_A_times_w_2_exp(v,A,w,+=) 200 #define PetscKernel_v_gets_v_plus_A_times_w_3(v,A,w) PetscKernel_v_gets_A_times_w_3_exp(v,A,w,+=) 201 #define PetscKernel_v_gets_v_plus_A_times_w_4(v,A,w) PetscKernel_v_gets_A_times_w_4_exp(v,A,w,+=) 202 #define PetscKernel_v_gets_v_plus_A_times_w_5(v,A,w) PetscKernel_v_gets_A_times_w_5_exp(v,A,w,+=) 203 #define PetscKernel_v_gets_v_plus_A_times_w_6(v,A,w) PetscKernel_v_gets_A_times_w_6_exp(v,A,w,+=) 204 #define PetscKernel_v_gets_v_plus_A_times_w_7(v,A,w) PetscKernel_v_gets_A_times_w_7_exp(v,A,w,+=) 205 #define PetscKernel_v_gets_v_minus_A_times_w_1(v,A,w) PetscKernel_v_gets_A_times_w_1_exp(v,A,w,-=) 206 #define PetscKernel_v_gets_v_minus_A_times_w_2(v,A,w) PetscKernel_v_gets_A_times_w_2_exp(v,A,w,-=) 207 #define PetscKernel_v_gets_v_minus_A_times_w_3(v,A,w) PetscKernel_v_gets_A_times_w_3_exp(v,A,w,-=) 208 #define PetscKernel_v_gets_v_minus_A_times_w_4(v,A,w) PetscKernel_v_gets_A_times_w_4_exp(v,A,w,-=) 209 #define PetscKernel_v_gets_v_minus_A_times_w_5(v,A,w) PetscKernel_v_gets_A_times_w_5_exp(v,A,w,-=) 210 #define PetscKernel_v_gets_v_minus_A_times_w_6(v,A,w) PetscKernel_v_gets_A_times_w_6_exp(v,A,w,-=) 211 #define PetscKernel_v_gets_v_minus_A_times_w_7(v,A,w) PetscKernel_v_gets_A_times_w_7_exp(v,A,w,-=) 212 213 #undef __FUNCT__ 214 #define __FUNCT__ "MatSOR_SeqBAIJ" 215 PetscErrorCode MatSOR_SeqBAIJ(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 216 { 217 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 218 PetscScalar *x,*work,*w,*workt,*t; 219 const MatScalar *v,*aa = a->a, *idiag; 220 const PetscScalar *b,*xb; 221 PetscScalar s[7], xw[7]; 222 PetscErrorCode ierr; 223 PetscInt m = a->mbs,i,i2,nz,bs = A->rmap->bs,bs2 = bs*bs,k,j,idx,it; 224 const PetscInt *diag,*ai = a->i,*aj = a->j,*vi; 225 226 PetscFunctionBegin; 227 its = its*lits; 228 if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat"); 229 if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 230 if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 231 if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 232 if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts"); 233 234 if (!a->idiagvalid) {ierr = MatInvertBlockDiagonal(A,NULL);CHKERRQ(ierr);} 235 236 if (!m) PetscFunctionReturn(0); 237 diag = a->diag; 238 idiag = a->idiag; 239 k = PetscMax(A->rmap->n,A->cmap->n); 240 if (!a->mult_work) { 241 ierr = PetscMalloc((2*k+1)*sizeof(PetscScalar),&a->mult_work);CHKERRQ(ierr); 242 } 243 work = a->mult_work; 244 t = work + k+1; 245 if (!a->sor_work) { 246 ierr = PetscMalloc(bs*sizeof(PetscScalar),&a->sor_work);CHKERRQ(ierr); 247 } 248 w = a->sor_work; 249 250 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 251 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 252 253 if (flag & SOR_ZERO_INITIAL_GUESS) { 254 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) { 255 switch (bs) { 256 case 1: 257 PetscKernel_v_gets_A_times_w_1(x,idiag,b); 258 t[0] = b[0]; 259 i2 = 1; 260 idiag += 1; 261 for (i=1; i<m; i++) { 262 v = aa + ai[i]; 263 vi = aj + ai[i]; 264 nz = diag[i] - ai[i]; 265 s[0] = b[i2]; 266 for (j=0; j<nz; j++) { 267 xw[0] = x[vi[j]]; 268 PetscKernel_v_gets_v_minus_A_times_w_1(s,(v+j),xw); 269 } 270 t[i2] = s[0]; 271 PetscKernel_v_gets_A_times_w_1(xw,idiag,s); 272 x[i2] = xw[0]; 273 idiag += 1; 274 i2 += 1; 275 } 276 break; 277 case 2: 278 PetscKernel_v_gets_A_times_w_2(x,idiag,b); 279 t[0] = b[0]; t[1] = b[1]; 280 i2 = 2; 281 idiag += 4; 282 for (i=1; i<m; i++) { 283 v = aa + 4*ai[i]; 284 vi = aj + ai[i]; 285 nz = diag[i] - ai[i]; 286 s[0] = b[i2]; s[1] = b[i2+1]; 287 for (j=0; j<nz; j++) { 288 idx = 2*vi[j]; 289 it = 4*j; 290 xw[0] = x[idx]; xw[1] = x[1+idx]; 291 PetscKernel_v_gets_v_minus_A_times_w_2(s,(v+it),xw); 292 } 293 t[i2] = s[0]; t[i2+1] = s[1]; 294 PetscKernel_v_gets_A_times_w_2(xw,idiag,s); 295 x[i2] = xw[0]; x[i2+1] = xw[1]; 296 idiag += 4; 297 i2 += 2; 298 } 299 break; 300 case 3: 301 PetscKernel_v_gets_A_times_w_3(x,idiag,b); 302 t[0] = b[0]; t[1] = b[1]; t[2] = b[2]; 303 i2 = 3; 304 idiag += 9; 305 for (i=1; i<m; i++) { 306 v = aa + 9*ai[i]; 307 vi = aj + ai[i]; 308 nz = diag[i] - ai[i]; 309 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; 310 while (nz--) { 311 idx = 3*(*vi++); 312 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 313 PetscKernel_v_gets_v_minus_A_times_w_3(s,v,xw); 314 v += 9; 315 } 316 t[i2] = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2]; 317 PetscKernel_v_gets_A_times_w_3(xw,idiag,s); 318 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; 319 idiag += 9; 320 i2 += 3; 321 } 322 break; 323 case 4: 324 PetscKernel_v_gets_A_times_w_4(x,idiag,b); 325 t[0] = b[0]; t[1] = b[1]; t[2] = b[2]; t[3] = b[3]; 326 i2 = 4; 327 idiag += 16; 328 for (i=1; i<m; i++) { 329 v = aa + 16*ai[i]; 330 vi = aj + ai[i]; 331 nz = diag[i] - ai[i]; 332 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; 333 while (nz--) { 334 idx = 4*(*vi++); 335 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; 336 PetscKernel_v_gets_v_minus_A_times_w_4(s,v,xw); 337 v += 16; 338 } 339 t[i2] = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2]; t[i2 + 3] = s[3]; 340 PetscKernel_v_gets_A_times_w_4(xw,idiag,s); 341 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; 342 idiag += 16; 343 i2 += 4; 344 } 345 break; 346 case 5: 347 PetscKernel_v_gets_A_times_w_5(x,idiag,b); 348 t[0] = b[0]; t[1] = b[1]; t[2] = b[2]; t[3] = b[3]; t[4] = b[4]; 349 i2 = 5; 350 idiag += 25; 351 for (i=1; i<m; i++) { 352 v = aa + 25*ai[i]; 353 vi = aj + ai[i]; 354 nz = diag[i] - ai[i]; 355 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4]; 356 while (nz--) { 357 idx = 5*(*vi++); 358 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; xw[4] = x[4+idx]; 359 PetscKernel_v_gets_v_minus_A_times_w_5(s,v,xw); 360 v += 25; 361 } 362 t[i2] = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2]; t[i2+3] = s[3]; t[i2+4] = s[4]; 363 PetscKernel_v_gets_A_times_w_5(xw,idiag,s); 364 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4]; 365 idiag += 25; 366 i2 += 5; 367 } 368 break; 369 case 6: 370 PetscKernel_v_gets_A_times_w_6(x,idiag,b); 371 t[0] = b[0]; t[1] = b[1]; t[2] = b[2]; t[3] = b[3]; t[4] = b[4]; t[5] = b[5]; 372 i2 = 6; 373 idiag += 36; 374 for (i=1; i<m; i++) { 375 v = aa + 36*ai[i]; 376 vi = aj + ai[i]; 377 nz = diag[i] - ai[i]; 378 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5]; 379 while (nz--) { 380 idx = 6*(*vi++); 381 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 382 xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; 383 PetscKernel_v_gets_v_minus_A_times_w_6(s,v,xw); 384 v += 36; 385 } 386 t[i2] = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2]; 387 t[i2+3] = s[3]; t[i2+4] = s[4]; t[i2+5] = s[5]; 388 PetscKernel_v_gets_A_times_w_6(xw,idiag,s); 389 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5]; 390 idiag += 36; 391 i2 += 6; 392 } 393 break; 394 case 7: 395 PetscKernel_v_gets_A_times_w_7(x,idiag,b); 396 t[0] = b[0]; t[1] = b[1]; t[2] = b[2]; 397 t[3] = b[3]; t[4] = b[4]; t[5] = b[5]; t[6] = b[6]; 398 i2 = 7; 399 idiag += 49; 400 for (i=1; i<m; i++) { 401 v = aa + 49*ai[i]; 402 vi = aj + ai[i]; 403 nz = diag[i] - ai[i]; 404 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; 405 s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5]; s[6] = b[i2+6]; 406 while (nz--) { 407 idx = 7*(*vi++); 408 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 409 xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; xw[6] = x[6+idx]; 410 PetscKernel_v_gets_v_minus_A_times_w_7(s,v,xw); 411 v += 49; 412 } 413 t[i2] = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2]; 414 t[i2+3] = s[3]; t[i2+4] = s[4]; t[i2+5] = s[5]; t[i2+6] = s[6]; 415 PetscKernel_v_gets_A_times_w_7(xw,idiag,s); 416 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; 417 x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5]; x[i2+6] = xw[6]; 418 idiag += 49; 419 i2 += 7; 420 } 421 break; 422 default: 423 PetscKernel_w_gets_Ar_times_v(bs,bs,b,idiag,x); 424 ierr = PetscMemcpy(t,b,bs*sizeof(PetscScalar));CHKERRQ(ierr); 425 i2 = bs; 426 idiag += bs2; 427 for (i=1; i<m; i++) { 428 v = aa + bs2*ai[i]; 429 vi = aj + ai[i]; 430 nz = diag[i] - ai[i]; 431 432 ierr = PetscMemcpy(w,b+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr); 433 /* copy all rows of x that are needed into contiguous space */ 434 workt = work; 435 for (j=0; j<nz; j++) { 436 ierr = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr); 437 workt += bs; 438 } 439 PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work); 440 ierr = PetscMemcpy(t+i2,w,bs*sizeof(PetscScalar));CHKERRQ(ierr); 441 PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2); 442 443 idiag += bs2; 444 i2 += bs; 445 } 446 break; 447 } 448 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 449 ierr = PetscLogFlops(1.0*bs2*a->nz);CHKERRQ(ierr); 450 xb = t; 451 } 452 else xb = b; 453 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 454 idiag = a->idiag+bs2*(a->mbs-1); 455 i2 = bs * (m-1); 456 switch (bs) { 457 case 1: 458 s[0] = xb[i2]; 459 PetscKernel_v_gets_A_times_w_1(xw,idiag,s); 460 x[i2] = xw[0]; 461 i2 -= 1; 462 for (i=m-2; i>=0; i--) { 463 v = aa + (diag[i]+1); 464 vi = aj + diag[i] + 1; 465 nz = ai[i+1] - diag[i] - 1; 466 s[0] = xb[i2]; 467 for (j=0; j<nz; j++) { 468 xw[0] = x[vi[j]]; 469 PetscKernel_v_gets_v_minus_A_times_w_1(s,(v+j),xw); 470 } 471 PetscKernel_v_gets_A_times_w_1(xw,idiag,s); 472 x[i2] = xw[0]; 473 idiag -= 1; 474 i2 -= 1; 475 } 476 break; 477 case 2: 478 s[0] = xb[i2]; s[1] = xb[i2+1]; 479 PetscKernel_v_gets_A_times_w_2(xw,idiag,s); 480 x[i2] = xw[0]; x[i2+1] = xw[1]; 481 i2 -= 2; 482 idiag -= 4; 483 for (i=m-2; i>=0; i--) { 484 v = aa + 4*(diag[i] + 1); 485 vi = aj + diag[i] + 1; 486 nz = ai[i+1] - diag[i] - 1; 487 s[0] = xb[i2]; s[1] = xb[i2+1]; 488 for (j=0; j<nz; j++) { 489 idx = 2*vi[j]; 490 it = 4*j; 491 xw[0] = x[idx]; xw[1] = x[1+idx]; 492 PetscKernel_v_gets_v_minus_A_times_w_2(s,(v+it),xw); 493 } 494 PetscKernel_v_gets_A_times_w_2(xw,idiag,s); 495 x[i2] = xw[0]; x[i2+1] = xw[1]; 496 idiag -= 4; 497 i2 -= 2; 498 } 499 break; 500 case 3: 501 s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; 502 PetscKernel_v_gets_A_times_w_3(xw,idiag,s); 503 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; 504 i2 -= 3; 505 idiag -= 9; 506 for (i=m-2; i>=0; i--) { 507 v = aa + 9*(diag[i]+1); 508 vi = aj + diag[i] + 1; 509 nz = ai[i+1] - diag[i] - 1; 510 s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; 511 while (nz--) { 512 idx = 3*(*vi++); 513 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 514 PetscKernel_v_gets_v_minus_A_times_w_3(s,v,xw); 515 v += 9; 516 } 517 PetscKernel_v_gets_A_times_w_3(xw,idiag,s); 518 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; 519 idiag -= 9; 520 i2 -= 3; 521 } 522 break; 523 case 4: 524 s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; 525 PetscKernel_v_gets_A_times_w_4(xw,idiag,s); 526 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; 527 i2 -= 4; 528 idiag -= 16; 529 for (i=m-2; i>=0; i--) { 530 v = aa + 16*(diag[i]+1); 531 vi = aj + diag[i] + 1; 532 nz = ai[i+1] - diag[i] - 1; 533 s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; 534 while (nz--) { 535 idx = 4*(*vi++); 536 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; 537 PetscKernel_v_gets_v_minus_A_times_w_4(s,v,xw); 538 v += 16; 539 } 540 PetscKernel_v_gets_A_times_w_4(xw,idiag,s); 541 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; 542 idiag -= 16; 543 i2 -= 4; 544 } 545 break; 546 case 5: 547 s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; s[4] = xb[i2+4]; 548 PetscKernel_v_gets_A_times_w_5(xw,idiag,s); 549 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4]; 550 i2 -= 5; 551 idiag -= 25; 552 for (i=m-2; i>=0; i--) { 553 v = aa + 25*(diag[i]+1); 554 vi = aj + diag[i] + 1; 555 nz = ai[i+1] - diag[i] - 1; 556 s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; s[4] = xb[i2+4]; 557 while (nz--) { 558 idx = 5*(*vi++); 559 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; xw[4] = x[4+idx]; 560 PetscKernel_v_gets_v_minus_A_times_w_5(s,v,xw); 561 v += 25; 562 } 563 PetscKernel_v_gets_A_times_w_5(xw,idiag,s); 564 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4]; 565 idiag -= 25; 566 i2 -= 5; 567 } 568 break; 569 case 6: 570 s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; s[4] = xb[i2+4]; s[5] = xb[i2+5]; 571 PetscKernel_v_gets_A_times_w_6(xw,idiag,s); 572 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5]; 573 i2 -= 6; 574 idiag -= 36; 575 for (i=m-2; i>=0; i--) { 576 v = aa + 36*(diag[i]+1); 577 vi = aj + diag[i] + 1; 578 nz = ai[i+1] - diag[i] - 1; 579 s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; s[4] = xb[i2+4]; s[5] = xb[i2+5]; 580 while (nz--) { 581 idx = 6*(*vi++); 582 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 583 xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; 584 PetscKernel_v_gets_v_minus_A_times_w_6(s,v,xw); 585 v += 36; 586 } 587 PetscKernel_v_gets_A_times_w_6(xw,idiag,s); 588 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5]; 589 idiag -= 36; 590 i2 -= 6; 591 } 592 break; 593 case 7: 594 s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; 595 s[3] = xb[i2+3]; s[4] = xb[i2+4]; s[5] = xb[i2+5]; s[6] = xb[i2+6]; 596 PetscKernel_v_gets_A_times_w_7(x,idiag,b); 597 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; 598 x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5]; x[i2+6] = xw[6]; 599 i2 -= 7; 600 idiag -= 49; 601 for (i=m-2; i>=0; i--) { 602 v = aa + 49*(diag[i]+1); 603 vi = aj + diag[i] + 1; 604 nz = ai[i+1] - diag[i] - 1; 605 s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; 606 s[3] = xb[i2+3]; s[4] = xb[i2+4]; s[5] = xb[i2+5]; s[6] = xb[i2+6]; 607 while (nz--) { 608 idx = 7*(*vi++); 609 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 610 xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; xw[6] = x[6+idx]; 611 PetscKernel_v_gets_v_minus_A_times_w_7(s,v,xw); 612 v += 49; 613 } 614 PetscKernel_v_gets_A_times_w_7(xw,idiag,s); 615 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; 616 x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5]; x[i2+6] = xw[6]; 617 idiag -= 49; 618 i2 -= 7; 619 } 620 break; 621 default: 622 ierr = PetscMemcpy(w,xb+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr); 623 PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2); 624 i2 -= bs; 625 idiag -= bs2; 626 for (i=m-2; i>=0; i--) { 627 v = aa + bs2*(diag[i]+1); 628 vi = aj + diag[i] + 1; 629 nz = ai[i+1] - diag[i] - 1; 630 631 ierr = PetscMemcpy(w,xb+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr); 632 /* copy all rows of x that are needed into contiguous space */ 633 workt = work; 634 for (j=0; j<nz; j++) { 635 ierr = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr); 636 workt += bs; 637 } 638 PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work); 639 PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2); 640 641 idiag -= bs2; 642 i2 -= bs; 643 } 644 break; 645 } 646 ierr = PetscLogFlops(1.0*bs2*(a->nz));CHKERRQ(ierr); 647 } 648 its--; 649 } 650 while (its--) { 651 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) { 652 idiag = a->idiag; 653 i2 = 0; 654 switch (bs) { 655 case 1: 656 for (i=0; i<m; i++) { 657 v = aa + ai[i]; 658 vi = aj + ai[i]; 659 nz = ai[i+1] - ai[i]; 660 s[0] = b[i2]; 661 for (j=0; j<nz; j++) { 662 xw[0] = x[vi[j]]; 663 PetscKernel_v_gets_v_minus_A_times_w_1(s,(v+j),xw); 664 } 665 PetscKernel_v_gets_A_times_w_1(xw,idiag,s); 666 x[i2] += xw[0]; 667 idiag += 1; 668 i2 += 1; 669 } 670 break; 671 case 2: 672 for (i=0; i<m; i++) { 673 v = aa + 4*ai[i]; 674 vi = aj + ai[i]; 675 nz = ai[i+1] - ai[i]; 676 s[0] = b[i2]; s[1] = b[i2+1]; 677 for (j=0; j<nz; j++) { 678 idx = 2*vi[j]; 679 it = 4*j; 680 xw[0] = x[idx]; xw[1] = x[1+idx]; 681 PetscKernel_v_gets_v_minus_A_times_w_2(s,(v+it),xw); 682 } 683 PetscKernel_v_gets_A_times_w_2(xw,idiag,s); 684 x[i2] += xw[0]; x[i2+1] += xw[1]; 685 idiag += 4; 686 i2 += 2; 687 } 688 break; 689 case 3: 690 for (i=0; i<m; i++) { 691 v = aa + 9*ai[i]; 692 vi = aj + ai[i]; 693 nz = ai[i+1] - ai[i]; 694 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; 695 while (nz--) { 696 idx = 3*(*vi++); 697 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 698 PetscKernel_v_gets_v_minus_A_times_w_3(s,v,xw); 699 v += 9; 700 } 701 PetscKernel_v_gets_A_times_w_3(xw,idiag,s); 702 x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; 703 idiag += 9; 704 i2 += 3; 705 } 706 break; 707 case 4: 708 for (i=0; i<m; i++) { 709 v = aa + 16*ai[i]; 710 vi = aj + ai[i]; 711 nz = ai[i+1] - ai[i]; 712 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; 713 while (nz--) { 714 idx = 4*(*vi++); 715 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; 716 PetscKernel_v_gets_v_minus_A_times_w_4(s,v,xw); 717 v += 16; 718 } 719 PetscKernel_v_gets_A_times_w_4(xw,idiag,s); 720 x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; x[i2+3] += xw[3]; 721 idiag += 16; 722 i2 += 4; 723 } 724 break; 725 case 5: 726 for (i=0; i<m; i++) { 727 v = aa + 25*ai[i]; 728 vi = aj + ai[i]; 729 nz = ai[i+1] - ai[i]; 730 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4]; 731 while (nz--) { 732 idx = 5*(*vi++); 733 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; xw[4] = x[4+idx]; 734 PetscKernel_v_gets_v_minus_A_times_w_5(s,v,xw); 735 v += 25; 736 } 737 PetscKernel_v_gets_A_times_w_5(xw,idiag,s); 738 x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; x[i2+3] += xw[3]; x[i2+4] += xw[4]; 739 idiag += 25; 740 i2 += 5; 741 } 742 break; 743 case 6: 744 for (i=0; i<m; i++) { 745 v = aa + 36*ai[i]; 746 vi = aj + ai[i]; 747 nz = ai[i+1] - ai[i]; 748 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5]; 749 while (nz--) { 750 idx = 6*(*vi++); 751 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 752 xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; 753 PetscKernel_v_gets_v_minus_A_times_w_6(s,v,xw); 754 v += 36; 755 } 756 PetscKernel_v_gets_A_times_w_6(xw,idiag,s); 757 x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; 758 x[i2+3] += xw[3]; x[i2+4] += xw[4]; x[i2+5] += xw[5]; 759 idiag += 36; 760 i2 += 6; 761 } 762 break; 763 case 7: 764 for (i=0; i<m; i++) { 765 v = aa + 49*ai[i]; 766 vi = aj + ai[i]; 767 nz = ai[i+1] - ai[i]; 768 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; 769 s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5]; s[6] = b[i2+6]; 770 while (nz--) { 771 idx = 7*(*vi++); 772 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 773 xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; xw[6] = x[6+idx]; 774 PetscKernel_v_gets_v_minus_A_times_w_7(s,v,xw); 775 v += 49; 776 } 777 PetscKernel_v_gets_A_times_w_7(xw,idiag,s); 778 x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; 779 x[i2+3] += xw[3]; x[i2+4] += xw[4]; x[i2+5] += xw[5]; x[i2+6] += xw[6]; 780 idiag += 49; 781 i2 += 7; 782 } 783 break; 784 default: 785 for (i=0; i<m; i++) { 786 v = aa + bs2*ai[i]; 787 vi = aj + ai[i]; 788 nz = ai[i+1] - ai[i]; 789 790 ierr = PetscMemcpy(w,b+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr); 791 /* copy all rows of x that are needed into contiguous space */ 792 workt = work; 793 for (j=0; j<nz; j++) { 794 ierr = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr); 795 workt += bs; 796 } 797 PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work); 798 PetscKernel_w_gets_w_plus_Ar_times_v(bs,bs,w,idiag,x+i2); 799 800 idiag += bs2; 801 i2 += bs; 802 } 803 break; 804 } 805 ierr = PetscLogFlops(2.0*bs2*a->nz);CHKERRQ(ierr); 806 } 807 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 808 idiag = a->idiag+bs2*(a->mbs-1); 809 i2 = bs * (m-1); 810 switch (bs) { 811 case 1: 812 for (i=m-1; i>=0; i--) { 813 v = aa + ai[i]; 814 vi = aj + ai[i]; 815 nz = ai[i+1] - ai[i]; 816 s[0] = b[i2]; 817 for (j=0; j<nz; j++) { 818 xw[0] = x[vi[j]]; 819 PetscKernel_v_gets_v_minus_A_times_w_1(s,(v+j),xw); 820 } 821 PetscKernel_v_gets_A_times_w_1(xw,idiag,s); 822 x[i2] += xw[0]; 823 idiag -= 1; 824 i2 -= 1; 825 } 826 break; 827 case 2: 828 for (i=m-1; i>=0; i--) { 829 v = aa + 4*ai[i]; 830 vi = aj + ai[i]; 831 nz = ai[i+1] - ai[i]; 832 s[0] = b[i2]; s[1] = b[i2+1]; 833 for (j=0; j<nz; j++) { 834 idx = 2*vi[j]; 835 it = 4*j; 836 xw[0] = x[idx]; xw[1] = x[1+idx]; 837 PetscKernel_v_gets_v_minus_A_times_w_2(s,(v+it),xw); 838 } 839 PetscKernel_v_gets_A_times_w_2(xw,idiag,s); 840 x[i2] += xw[0]; x[i2+1] += xw[1]; 841 idiag -= 4; 842 i2 -= 2; 843 } 844 break; 845 case 3: 846 for (i=m-1; i>=0; i--) { 847 v = aa + 9*ai[i]; 848 vi = aj + ai[i]; 849 nz = ai[i+1] - ai[i]; 850 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; 851 while (nz--) { 852 idx = 3*(*vi++); 853 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 854 PetscKernel_v_gets_v_minus_A_times_w_3(s,v,xw); 855 v += 9; 856 } 857 PetscKernel_v_gets_A_times_w_3(xw,idiag,s); 858 x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; 859 idiag -= 9; 860 i2 -= 3; 861 } 862 break; 863 case 4: 864 for (i=m-1; i>=0; i--) { 865 v = aa + 16*ai[i]; 866 vi = aj + ai[i]; 867 nz = ai[i+1] - ai[i]; 868 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; 869 while (nz--) { 870 idx = 4*(*vi++); 871 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; 872 PetscKernel_v_gets_v_minus_A_times_w_4(s,v,xw); 873 v += 16; 874 } 875 PetscKernel_v_gets_A_times_w_4(xw,idiag,s); 876 x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; x[i2+3] += xw[3]; 877 idiag -= 16; 878 i2 -= 4; 879 } 880 break; 881 case 5: 882 for (i=m-1; i>=0; i--) { 883 v = aa + 25*ai[i]; 884 vi = aj + ai[i]; 885 nz = ai[i+1] - ai[i]; 886 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4]; 887 while (nz--) { 888 idx = 5*(*vi++); 889 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; xw[4] = x[4+idx]; 890 PetscKernel_v_gets_v_minus_A_times_w_5(s,v,xw); 891 v += 25; 892 } 893 PetscKernel_v_gets_A_times_w_5(xw,idiag,s); 894 x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; x[i2+3] += xw[3]; x[i2+4] += xw[4]; 895 idiag -= 25; 896 i2 -= 5; 897 } 898 break; 899 case 6: 900 for (i=m-1; i>=0; i--) { 901 v = aa + 36*ai[i]; 902 vi = aj + ai[i]; 903 nz = ai[i+1] - ai[i]; 904 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5]; 905 while (nz--) { 906 idx = 6*(*vi++); 907 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 908 xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; 909 PetscKernel_v_gets_v_minus_A_times_w_6(s,v,xw); 910 v += 36; 911 } 912 PetscKernel_v_gets_A_times_w_6(xw,idiag,s); 913 x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; 914 x[i2+3] += xw[3]; x[i2+4] += xw[4]; x[i2+5] += xw[5]; 915 idiag -= 36; 916 i2 -= 6; 917 } 918 break; 919 case 7: 920 for (i=m-1; i>=0; i--) { 921 v = aa + 49*ai[i]; 922 vi = aj + ai[i]; 923 nz = ai[i+1] - ai[i]; 924 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; 925 s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5]; s[6] = b[i2+6]; 926 while (nz--) { 927 idx = 7*(*vi++); 928 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 929 xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; xw[6] = x[6+idx]; 930 PetscKernel_v_gets_v_minus_A_times_w_7(s,v,xw); 931 v += 49; 932 } 933 PetscKernel_v_gets_A_times_w_7(xw,idiag,s); 934 x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; 935 x[i2+3] += xw[3]; x[i2+4] += xw[4]; x[i2+5] += xw[5]; x[i2+6] += xw[6]; 936 idiag -= 49; 937 i2 -= 7; 938 } 939 break; 940 default: 941 for (i=m-1; i>=0; i--) { 942 v = aa + bs2*ai[i]; 943 vi = aj + ai[i]; 944 nz = ai[i+1] - ai[i]; 945 946 ierr = PetscMemcpy(w,b+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr); 947 /* copy all rows of x that are needed into contiguous space */ 948 workt = work; 949 for (j=0; j<nz; j++) { 950 ierr = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr); 951 workt += bs; 952 } 953 PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work); 954 PetscKernel_w_gets_w_plus_Ar_times_v(bs,bs,w,idiag,x+i2); 955 956 idiag -= bs2; 957 i2 -= bs; 958 } 959 break; 960 } 961 ierr = PetscLogFlops(2.0*bs2*(a->nz));CHKERRQ(ierr); 962 } 963 } 964 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 965 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 966 PetscFunctionReturn(0); 967 } 968 969 970 /* 971 Special version for direct calls from Fortran (Used in PETSc-fun3d) 972 */ 973 #if defined(PETSC_HAVE_FORTRAN_CAPS) 974 #define matsetvaluesblocked4_ MATSETVALUESBLOCKED4 975 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 976 #define matsetvaluesblocked4_ matsetvaluesblocked4 977 #endif 978 979 #undef __FUNCT__ 980 #define __FUNCT__ "matsetvaluesblocked4_" 981 PETSC_EXTERN void matsetvaluesblocked4_(Mat *AA,PetscInt *mm,const PetscInt im[],PetscInt *nn,const PetscInt in[],const PetscScalar v[]) 982 { 983 Mat A = *AA; 984 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 985 PetscInt *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,N,m = *mm,n = *nn; 986 PetscInt *ai =a->i,*ailen=a->ilen; 987 PetscInt *aj =a->j,stepval,lastcol = -1; 988 const PetscScalar *value = v; 989 MatScalar *ap,*aa = a->a,*bap; 990 991 PetscFunctionBegin; 992 if (A->rmap->bs != 4) SETERRABORT(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Can only be called with a block size of 4"); 993 stepval = (n-1)*4; 994 for (k=0; k<m; k++) { /* loop over added rows */ 995 row = im[k]; 996 rp = aj + ai[row]; 997 ap = aa + 16*ai[row]; 998 nrow = ailen[row]; 999 low = 0; 1000 high = nrow; 1001 for (l=0; l<n; l++) { /* loop over added columns */ 1002 col = in[l]; 1003 if (col <= lastcol) low = 0; 1004 else high = nrow; 1005 lastcol = col; 1006 value = v + k*(stepval+4 + l)*4; 1007 while (high-low > 7) { 1008 t = (low+high)/2; 1009 if (rp[t] > col) high = t; 1010 else low = t; 1011 } 1012 for (i=low; i<high; i++) { 1013 if (rp[i] > col) break; 1014 if (rp[i] == col) { 1015 bap = ap + 16*i; 1016 for (ii=0; ii<4; ii++,value+=stepval) { 1017 for (jj=ii; jj<16; jj+=4) { 1018 bap[jj] += *value++; 1019 } 1020 } 1021 goto noinsert2; 1022 } 1023 } 1024 N = nrow++ - 1; 1025 high++; /* added new column index thus must search to one higher than before */ 1026 /* shift up all the later entries in this row */ 1027 for (ii=N; ii>=i; ii--) { 1028 rp[ii+1] = rp[ii]; 1029 PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar)); 1030 } 1031 if (N >= i) { 1032 PetscMemzero(ap+16*i,16*sizeof(MatScalar)); 1033 } 1034 rp[i] = col; 1035 bap = ap + 16*i; 1036 for (ii=0; ii<4; ii++,value+=stepval) { 1037 for (jj=ii; jj<16; jj+=4) { 1038 bap[jj] = *value++; 1039 } 1040 } 1041 noinsert2:; 1042 low = i; 1043 } 1044 ailen[row] = nrow; 1045 } 1046 PetscFunctionReturnVoid(); 1047 } 1048 1049 #if defined(PETSC_HAVE_FORTRAN_CAPS) 1050 #define matsetvalues4_ MATSETVALUES4 1051 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 1052 #define matsetvalues4_ matsetvalues4 1053 #endif 1054 1055 #undef __FUNCT__ 1056 #define __FUNCT__ "MatSetValues4_" 1057 PETSC_EXTERN void matsetvalues4_(Mat *AA,PetscInt *mm,PetscInt *im,PetscInt *nn,PetscInt *in,PetscScalar *v) 1058 { 1059 Mat A = *AA; 1060 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1061 PetscInt *rp,k,low,high,t,ii,row,nrow,i,col,l,N,n = *nn,m = *mm; 1062 PetscInt *ai=a->i,*ailen=a->ilen; 1063 PetscInt *aj=a->j,brow,bcol; 1064 PetscInt ridx,cidx,lastcol = -1; 1065 MatScalar *ap,value,*aa=a->a,*bap; 1066 1067 PetscFunctionBegin; 1068 for (k=0; k<m; k++) { /* loop over added rows */ 1069 row = im[k]; brow = row/4; 1070 rp = aj + ai[brow]; 1071 ap = aa + 16*ai[brow]; 1072 nrow = ailen[brow]; 1073 low = 0; 1074 high = nrow; 1075 for (l=0; l<n; l++) { /* loop over added columns */ 1076 col = in[l]; bcol = col/4; 1077 ridx = row % 4; cidx = col % 4; 1078 value = v[l + k*n]; 1079 if (col <= lastcol) low = 0; 1080 else high = nrow; 1081 lastcol = col; 1082 while (high-low > 7) { 1083 t = (low+high)/2; 1084 if (rp[t] > bcol) high = t; 1085 else low = t; 1086 } 1087 for (i=low; i<high; i++) { 1088 if (rp[i] > bcol) break; 1089 if (rp[i] == bcol) { 1090 bap = ap + 16*i + 4*cidx + ridx; 1091 *bap += value; 1092 goto noinsert1; 1093 } 1094 } 1095 N = nrow++ - 1; 1096 high++; /* added new column thus must search to one higher than before */ 1097 /* shift up all the later entries in this row */ 1098 for (ii=N; ii>=i; ii--) { 1099 rp[ii+1] = rp[ii]; 1100 PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar)); 1101 } 1102 if (N>=i) { 1103 PetscMemzero(ap+16*i,16*sizeof(MatScalar)); 1104 } 1105 rp[i] = bcol; 1106 ap[16*i + 4*cidx + ridx] = value; 1107 noinsert1:; 1108 low = i; 1109 } 1110 ailen[brow] = nrow; 1111 } 1112 PetscFunctionReturnVoid(); 1113 } 1114 1115 /* 1116 Checks for missing diagonals 1117 */ 1118 #undef __FUNCT__ 1119 #define __FUNCT__ "MatMissingDiagonal_SeqBAIJ" 1120 PetscErrorCode MatMissingDiagonal_SeqBAIJ(Mat A,PetscBool *missing,PetscInt *d) 1121 { 1122 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1123 PetscErrorCode ierr; 1124 PetscInt *diag,*jj = a->j,i; 1125 1126 PetscFunctionBegin; 1127 ierr = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr); 1128 *missing = PETSC_FALSE; 1129 if (A->rmap->n > 0 && !jj) { 1130 *missing = PETSC_TRUE; 1131 if (d) *d = 0; 1132 PetscInfo(A,"Matrix has no entries therefore is missing diagonal"); 1133 } else { 1134 diag = a->diag; 1135 for (i=0; i<a->mbs; i++) { 1136 if (jj[diag[i]] != i) { 1137 *missing = PETSC_TRUE; 1138 if (d) *d = i; 1139 PetscInfo1(A,"Matrix is missing block diagonal number %D",i); 1140 break; 1141 } 1142 } 1143 } 1144 PetscFunctionReturn(0); 1145 } 1146 1147 #undef __FUNCT__ 1148 #define __FUNCT__ "MatMarkDiagonal_SeqBAIJ" 1149 PetscErrorCode MatMarkDiagonal_SeqBAIJ(Mat A) 1150 { 1151 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1152 PetscErrorCode ierr; 1153 PetscInt i,j,m = a->mbs; 1154 1155 PetscFunctionBegin; 1156 if (!a->diag) { 1157 ierr = PetscMalloc(m*sizeof(PetscInt),&a->diag);CHKERRQ(ierr); 1158 ierr = PetscLogObjectMemory(A,m*sizeof(PetscInt));CHKERRQ(ierr); 1159 a->free_diag = PETSC_TRUE; 1160 } 1161 for (i=0; i<m; i++) { 1162 a->diag[i] = a->i[i+1]; 1163 for (j=a->i[i]; j<a->i[i+1]; j++) { 1164 if (a->j[j] == i) { 1165 a->diag[i] = j; 1166 break; 1167 } 1168 } 1169 } 1170 PetscFunctionReturn(0); 1171 } 1172 1173 1174 #undef __FUNCT__ 1175 #define __FUNCT__ "MatGetRowIJ_SeqBAIJ" 1176 static PetscErrorCode MatGetRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool blockcompressed,PetscInt *nn,const PetscInt *inia[],const PetscInt *inja[],PetscBool *done) 1177 { 1178 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1179 PetscErrorCode ierr; 1180 PetscInt i,j,n = a->mbs,nz = a->i[n],*tia,*tja,bs = A->rmap->bs,k,l,cnt; 1181 PetscInt **ia = (PetscInt**)inia,**ja = (PetscInt**)inja; 1182 1183 PetscFunctionBegin; 1184 *nn = n; 1185 if (!ia) PetscFunctionReturn(0); 1186 if (symmetric) { 1187 ierr = MatToSymmetricIJ_SeqAIJ(n,a->i,a->j,0,0,&tia,&tja);CHKERRQ(ierr); 1188 nz = tia[n]; 1189 } else { 1190 tia = a->i; tja = a->j; 1191 } 1192 1193 if (!blockcompressed && bs > 1) { 1194 (*nn) *= bs; 1195 /* malloc & create the natural set of indices */ 1196 ierr = PetscMalloc((n+1)*bs*sizeof(PetscInt),ia);CHKERRQ(ierr); 1197 if (n) { 1198 (*ia)[0] = 0; 1199 for (j=1; j<bs; j++) { 1200 (*ia)[j] = (tia[1]-tia[0])*bs+(*ia)[j-1]; 1201 } 1202 } 1203 1204 for (i=1; i<n; i++) { 1205 (*ia)[i*bs] = (tia[i]-tia[i-1])*bs + (*ia)[i*bs-1]; 1206 for (j=1; j<bs; j++) { 1207 (*ia)[i*bs+j] = (tia[i+1]-tia[i])*bs + (*ia)[i*bs+j-1]; 1208 } 1209 } 1210 if (n) { 1211 (*ia)[n*bs] = (tia[n]-tia[n-1])*bs + (*ia)[n*bs-1]; 1212 } 1213 1214 if (inja) { 1215 ierr = PetscMalloc(nz*bs*bs*sizeof(PetscInt),ja);CHKERRQ(ierr); 1216 cnt = 0; 1217 for (i=0; i<n; i++) { 1218 for (j=0; j<bs; j++) { 1219 for (k=tia[i]; k<tia[i+1]; k++) { 1220 for (l=0; l<bs; l++) { 1221 (*ja)[cnt++] = bs*tja[k] + l; 1222 } 1223 } 1224 } 1225 } 1226 } 1227 1228 if (symmetric) { /* deallocate memory allocated in MatToSymmetricIJ_SeqAIJ() */ 1229 ierr = PetscFree(tia);CHKERRQ(ierr); 1230 ierr = PetscFree(tja);CHKERRQ(ierr); 1231 } 1232 } else if (oshift == 1) { 1233 if (symmetric) { 1234 nz = tia[A->rmap->n/bs]; 1235 /* add 1 to i and j indices */ 1236 for (i=0; i<A->rmap->n/bs+1; i++) tia[i] = tia[i] + 1; 1237 *ia = tia; 1238 if (ja) { 1239 for (i=0; i<nz; i++) tja[i] = tja[i] + 1; 1240 *ja = tja; 1241 } 1242 } else { 1243 nz = a->i[A->rmap->n/bs]; 1244 /* malloc space and add 1 to i and j indices */ 1245 ierr = PetscMalloc((A->rmap->n/bs+1)*sizeof(PetscInt),ia);CHKERRQ(ierr); 1246 for (i=0; i<A->rmap->n/bs+1; i++) (*ia)[i] = a->i[i] + 1; 1247 if (ja) { 1248 ierr = PetscMalloc(nz*sizeof(PetscInt),ja);CHKERRQ(ierr); 1249 for (i=0; i<nz; i++) (*ja)[i] = a->j[i] + 1; 1250 } 1251 } 1252 } else { 1253 *ia = tia; 1254 if (ja) *ja = tja; 1255 } 1256 PetscFunctionReturn(0); 1257 } 1258 1259 #undef __FUNCT__ 1260 #define __FUNCT__ "MatRestoreRowIJ_SeqBAIJ" 1261 static PetscErrorCode MatRestoreRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool blockcompressed,PetscInt *nn,const PetscInt *ia[],const PetscInt *ja[],PetscBool *done) 1262 { 1263 PetscErrorCode ierr; 1264 1265 PetscFunctionBegin; 1266 if (!ia) PetscFunctionReturn(0); 1267 if ((!blockcompressed && A->rmap->bs > 1) || (symmetric || oshift == 1)) { 1268 ierr = PetscFree(*ia);CHKERRQ(ierr); 1269 if (ja) {ierr = PetscFree(*ja);CHKERRQ(ierr);} 1270 } 1271 PetscFunctionReturn(0); 1272 } 1273 1274 #undef __FUNCT__ 1275 #define __FUNCT__ "MatDestroy_SeqBAIJ" 1276 PetscErrorCode MatDestroy_SeqBAIJ(Mat A) 1277 { 1278 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1279 PetscErrorCode ierr; 1280 1281 PetscFunctionBegin; 1282 #if defined(PETSC_USE_LOG) 1283 PetscLogObjectState((PetscObject)A,"Rows=%D, Cols=%D, NZ=%D",A->rmap->N,A->cmap->n,a->nz); 1284 #endif 1285 ierr = MatSeqXAIJFreeAIJ(A,&a->a,&a->j,&a->i);CHKERRQ(ierr); 1286 ierr = ISDestroy(&a->row);CHKERRQ(ierr); 1287 ierr = ISDestroy(&a->col);CHKERRQ(ierr); 1288 if (a->free_diag) {ierr = PetscFree(a->diag);CHKERRQ(ierr);} 1289 ierr = PetscFree(a->idiag);CHKERRQ(ierr); 1290 if (a->free_imax_ilen) {ierr = PetscFree2(a->imax,a->ilen);CHKERRQ(ierr);} 1291 ierr = PetscFree(a->solve_work);CHKERRQ(ierr); 1292 ierr = PetscFree(a->mult_work);CHKERRQ(ierr); 1293 ierr = PetscFree(a->sor_work);CHKERRQ(ierr); 1294 ierr = ISDestroy(&a->icol);CHKERRQ(ierr); 1295 ierr = PetscFree(a->saved_values);CHKERRQ(ierr); 1296 ierr = PetscFree(a->xtoy);CHKERRQ(ierr); 1297 ierr = PetscFree2(a->compressedrow.i,a->compressedrow.rindex);CHKERRQ(ierr); 1298 1299 ierr = MatDestroy(&a->sbaijMat);CHKERRQ(ierr); 1300 ierr = MatDestroy(&a->parent);CHKERRQ(ierr); 1301 ierr = PetscFree(A->data);CHKERRQ(ierr); 1302 1303 ierr = PetscObjectChangeTypeName((PetscObject)A,0);CHKERRQ(ierr); 1304 ierr = PetscObjectComposeFunction((PetscObject)A,"MatInvertBlockDiagonal_C","",NULL);CHKERRQ(ierr); 1305 ierr = PetscObjectComposeFunction((PetscObject)A,"MatStoreValues_C","",NULL);CHKERRQ(ierr); 1306 ierr = PetscObjectComposeFunction((PetscObject)A,"MatRetrieveValues_C","",NULL);CHKERRQ(ierr); 1307 ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetColumnIndices_C","",NULL);CHKERRQ(ierr); 1308 ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqaij_C","",NULL);CHKERRQ(ierr); 1309 ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqsbaij_C","",NULL);CHKERRQ(ierr); 1310 ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetPreallocation_C","",NULL);CHKERRQ(ierr); 1311 ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetPreallocationCSR_C","",NULL);CHKERRQ(ierr); 1312 ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqbstrm_C","",NULL);CHKERRQ(ierr); 1313 ierr = PetscObjectComposeFunction((PetscObject)A,"MatIsTranspose_C","",NULL);CHKERRQ(ierr); 1314 PetscFunctionReturn(0); 1315 } 1316 1317 #undef __FUNCT__ 1318 #define __FUNCT__ "MatSetOption_SeqBAIJ" 1319 PetscErrorCode MatSetOption_SeqBAIJ(Mat A,MatOption op,PetscBool flg) 1320 { 1321 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1322 PetscErrorCode ierr; 1323 1324 PetscFunctionBegin; 1325 switch (op) { 1326 case MAT_ROW_ORIENTED: 1327 a->roworiented = flg; 1328 break; 1329 case MAT_KEEP_NONZERO_PATTERN: 1330 a->keepnonzeropattern = flg; 1331 break; 1332 case MAT_NEW_NONZERO_LOCATIONS: 1333 a->nonew = (flg ? 0 : 1); 1334 break; 1335 case MAT_NEW_NONZERO_LOCATION_ERR: 1336 a->nonew = (flg ? -1 : 0); 1337 break; 1338 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1339 a->nonew = (flg ? -2 : 0); 1340 break; 1341 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1342 a->nounused = (flg ? -1 : 0); 1343 break; 1344 case MAT_CHECK_COMPRESSED_ROW: 1345 a->compressedrow.check = flg; 1346 break; 1347 case MAT_NEW_DIAGONALS: 1348 case MAT_IGNORE_OFF_PROC_ENTRIES: 1349 case MAT_USE_HASH_TABLE: 1350 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1351 break; 1352 case MAT_SPD: 1353 case MAT_SYMMETRIC: 1354 case MAT_STRUCTURALLY_SYMMETRIC: 1355 case MAT_HERMITIAN: 1356 case MAT_SYMMETRY_ETERNAL: 1357 /* These options are handled directly by MatSetOption() */ 1358 break; 1359 default: 1360 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op); 1361 } 1362 PetscFunctionReturn(0); 1363 } 1364 1365 #undef __FUNCT__ 1366 #define __FUNCT__ "MatGetRow_SeqBAIJ" 1367 PetscErrorCode MatGetRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1368 { 1369 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1370 PetscErrorCode ierr; 1371 PetscInt itmp,i,j,k,M,*ai,*aj,bs,bn,bp,*idx_i,bs2; 1372 MatScalar *aa,*aa_i; 1373 PetscScalar *v_i; 1374 1375 PetscFunctionBegin; 1376 bs = A->rmap->bs; 1377 ai = a->i; 1378 aj = a->j; 1379 aa = a->a; 1380 bs2 = a->bs2; 1381 1382 if (row < 0 || row >= A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range", row); 1383 1384 bn = row/bs; /* Block number */ 1385 bp = row % bs; /* Block Position */ 1386 M = ai[bn+1] - ai[bn]; 1387 *nz = bs*M; 1388 1389 if (v) { 1390 *v = 0; 1391 if (*nz) { 1392 ierr = PetscMalloc((*nz)*sizeof(PetscScalar),v);CHKERRQ(ierr); 1393 for (i=0; i<M; i++) { /* for each block in the block row */ 1394 v_i = *v + i*bs; 1395 aa_i = aa + bs2*(ai[bn] + i); 1396 for (j=bp,k=0; j<bs2; j+=bs,k++) v_i[k] = aa_i[j]; 1397 } 1398 } 1399 } 1400 1401 if (idx) { 1402 *idx = 0; 1403 if (*nz) { 1404 ierr = PetscMalloc((*nz)*sizeof(PetscInt),idx);CHKERRQ(ierr); 1405 for (i=0; i<M; i++) { /* for each block in the block row */ 1406 idx_i = *idx + i*bs; 1407 itmp = bs*aj[ai[bn] + i]; 1408 for (j=0; j<bs; j++) idx_i[j] = itmp++; 1409 } 1410 } 1411 } 1412 PetscFunctionReturn(0); 1413 } 1414 1415 #undef __FUNCT__ 1416 #define __FUNCT__ "MatRestoreRow_SeqBAIJ" 1417 PetscErrorCode MatRestoreRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1418 { 1419 PetscErrorCode ierr; 1420 1421 PetscFunctionBegin; 1422 if (idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);} 1423 if (v) {ierr = PetscFree(*v);CHKERRQ(ierr);} 1424 PetscFunctionReturn(0); 1425 } 1426 1427 extern PetscErrorCode MatSetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode); 1428 1429 #undef __FUNCT__ 1430 #define __FUNCT__ "MatTranspose_SeqBAIJ" 1431 PetscErrorCode MatTranspose_SeqBAIJ(Mat A,MatReuse reuse,Mat *B) 1432 { 1433 Mat_SeqBAIJ *a=(Mat_SeqBAIJ*)A->data; 1434 Mat C; 1435 PetscErrorCode ierr; 1436 PetscInt i,j,k,*aj=a->j,*ai=a->i,bs=A->rmap->bs,mbs=a->mbs,nbs=a->nbs,len,*col; 1437 PetscInt *rows,*cols,bs2=a->bs2; 1438 MatScalar *array; 1439 1440 PetscFunctionBegin; 1441 if (reuse == MAT_REUSE_MATRIX && A == *B && mbs != nbs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Square matrix only for in-place"); 1442 if (reuse == MAT_INITIAL_MATRIX || A == *B) { 1443 ierr = PetscMalloc((1+nbs)*sizeof(PetscInt),&col);CHKERRQ(ierr); 1444 ierr = PetscMemzero(col,(1+nbs)*sizeof(PetscInt));CHKERRQ(ierr); 1445 1446 for (i=0; i<ai[mbs]; i++) col[aj[i]] += 1; 1447 ierr = MatCreate(PetscObjectComm((PetscObject)A),&C);CHKERRQ(ierr); 1448 ierr = MatSetSizes(C,A->cmap->n,A->rmap->N,A->cmap->n,A->rmap->N);CHKERRQ(ierr); 1449 ierr = MatSetType(C,((PetscObject)A)->type_name);CHKERRQ(ierr); 1450 ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(C,bs,0,col);CHKERRQ(ierr); 1451 ierr = PetscFree(col);CHKERRQ(ierr); 1452 } else { 1453 C = *B; 1454 } 1455 1456 array = a->a; 1457 ierr = PetscMalloc2(bs,PetscInt,&rows,bs,PetscInt,&cols);CHKERRQ(ierr); 1458 for (i=0; i<mbs; i++) { 1459 cols[0] = i*bs; 1460 for (k=1; k<bs; k++) cols[k] = cols[k-1] + 1; 1461 len = ai[i+1] - ai[i]; 1462 for (j=0; j<len; j++) { 1463 rows[0] = (*aj++)*bs; 1464 for (k=1; k<bs; k++) rows[k] = rows[k-1] + 1; 1465 ierr = MatSetValues_SeqBAIJ(C,bs,rows,bs,cols,array,INSERT_VALUES);CHKERRQ(ierr); 1466 array += bs2; 1467 } 1468 } 1469 ierr = PetscFree2(rows,cols);CHKERRQ(ierr); 1470 1471 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1472 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1473 1474 if (reuse == MAT_INITIAL_MATRIX || *B != A) { 1475 *B = C; 1476 } else { 1477 ierr = MatHeaderMerge(A,C);CHKERRQ(ierr); 1478 } 1479 PetscFunctionReturn(0); 1480 } 1481 1482 #undef __FUNCT__ 1483 #define __FUNCT__ "MatIsTranspose_SeqBAIJ" 1484 PetscErrorCode MatIsTranspose_SeqBAIJ(Mat A,Mat B,PetscReal tol,PetscBool *f) 1485 { 1486 PetscErrorCode ierr; 1487 Mat Btrans; 1488 1489 PetscFunctionBegin; 1490 *f = PETSC_FALSE; 1491 ierr = MatTranspose_SeqBAIJ(A,MAT_INITIAL_MATRIX,&Btrans);CHKERRQ(ierr); 1492 ierr = MatEqual_SeqBAIJ(B,Btrans,f);CHKERRQ(ierr); 1493 ierr = MatDestroy(&Btrans);CHKERRQ(ierr); 1494 PetscFunctionReturn(0); 1495 } 1496 1497 #undef __FUNCT__ 1498 #define __FUNCT__ "MatView_SeqBAIJ_Binary" 1499 static PetscErrorCode MatView_SeqBAIJ_Binary(Mat A,PetscViewer viewer) 1500 { 1501 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1502 PetscErrorCode ierr; 1503 PetscInt i,*col_lens,bs = A->rmap->bs,count,*jj,j,k,l,bs2=a->bs2; 1504 int fd; 1505 PetscScalar *aa; 1506 FILE *file; 1507 1508 PetscFunctionBegin; 1509 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1510 ierr = PetscMalloc((4+A->rmap->N)*sizeof(PetscInt),&col_lens);CHKERRQ(ierr); 1511 col_lens[0] = MAT_FILE_CLASSID; 1512 1513 col_lens[1] = A->rmap->N; 1514 col_lens[2] = A->cmap->n; 1515 col_lens[3] = a->nz*bs2; 1516 1517 /* store lengths of each row and write (including header) to file */ 1518 count = 0; 1519 for (i=0; i<a->mbs; i++) { 1520 for (j=0; j<bs; j++) { 1521 col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]); 1522 } 1523 } 1524 ierr = PetscBinaryWrite(fd,col_lens,4+A->rmap->N,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1525 ierr = PetscFree(col_lens);CHKERRQ(ierr); 1526 1527 /* store column indices (zero start index) */ 1528 ierr = PetscMalloc((a->nz+1)*bs2*sizeof(PetscInt),&jj);CHKERRQ(ierr); 1529 count = 0; 1530 for (i=0; i<a->mbs; i++) { 1531 for (j=0; j<bs; j++) { 1532 for (k=a->i[i]; k<a->i[i+1]; k++) { 1533 for (l=0; l<bs; l++) { 1534 jj[count++] = bs*a->j[k] + l; 1535 } 1536 } 1537 } 1538 } 1539 ierr = PetscBinaryWrite(fd,jj,bs2*a->nz,PETSC_INT,PETSC_FALSE);CHKERRQ(ierr); 1540 ierr = PetscFree(jj);CHKERRQ(ierr); 1541 1542 /* store nonzero values */ 1543 ierr = PetscMalloc((a->nz+1)*bs2*sizeof(PetscScalar),&aa);CHKERRQ(ierr); 1544 count = 0; 1545 for (i=0; i<a->mbs; i++) { 1546 for (j=0; j<bs; j++) { 1547 for (k=a->i[i]; k<a->i[i+1]; k++) { 1548 for (l=0; l<bs; l++) { 1549 aa[count++] = a->a[bs2*k + l*bs + j]; 1550 } 1551 } 1552 } 1553 } 1554 ierr = PetscBinaryWrite(fd,aa,bs2*a->nz,PETSC_SCALAR,PETSC_FALSE);CHKERRQ(ierr); 1555 ierr = PetscFree(aa);CHKERRQ(ierr); 1556 1557 ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr); 1558 if (file) { 1559 fprintf(file,"-matload_block_size %d\n",(int)A->rmap->bs); 1560 } 1561 PetscFunctionReturn(0); 1562 } 1563 1564 #undef __FUNCT__ 1565 #define __FUNCT__ "MatView_SeqBAIJ_ASCII" 1566 static PetscErrorCode MatView_SeqBAIJ_ASCII(Mat A,PetscViewer viewer) 1567 { 1568 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1569 PetscErrorCode ierr; 1570 PetscInt i,j,bs = A->rmap->bs,k,l,bs2=a->bs2; 1571 PetscViewerFormat format; 1572 1573 PetscFunctionBegin; 1574 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1575 if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1576 ierr = PetscViewerASCIIPrintf(viewer," block size is %D\n",bs);CHKERRQ(ierr); 1577 } else if (format == PETSC_VIEWER_ASCII_MATLAB) { 1578 Mat aij; 1579 ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&aij);CHKERRQ(ierr); 1580 ierr = MatView(aij,viewer);CHKERRQ(ierr); 1581 ierr = MatDestroy(&aij);CHKERRQ(ierr); 1582 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1583 PetscFunctionReturn(0); 1584 } else if (format == PETSC_VIEWER_ASCII_COMMON) { 1585 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);CHKERRQ(ierr); 1586 ierr = PetscObjectPrintClassNamePrefixType((PetscObject)A,viewer,"Matrix Object");CHKERRQ(ierr); 1587 for (i=0; i<a->mbs; i++) { 1588 for (j=0; j<bs; j++) { 1589 ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr); 1590 for (k=a->i[i]; k<a->i[i+1]; k++) { 1591 for (l=0; l<bs; l++) { 1592 #if defined(PETSC_USE_COMPLEX) 1593 if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 1594 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G + %Gi) ",bs*a->j[k]+l, 1595 PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1596 } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 1597 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G - %Gi) ",bs*a->j[k]+l, 1598 PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1599 } else if (PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 1600 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1601 } 1602 #else 1603 if (a->a[bs2*k + l*bs + j] != 0.0) { 1604 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr); 1605 } 1606 #endif 1607 } 1608 } 1609 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 1610 } 1611 } 1612 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);CHKERRQ(ierr); 1613 } else { 1614 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);CHKERRQ(ierr); 1615 ierr = PetscObjectPrintClassNamePrefixType((PetscObject)A,viewer,"Matrix Object");CHKERRQ(ierr); 1616 for (i=0; i<a->mbs; i++) { 1617 for (j=0; j<bs; j++) { 1618 ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr); 1619 for (k=a->i[i]; k<a->i[i+1]; k++) { 1620 for (l=0; l<bs; l++) { 1621 #if defined(PETSC_USE_COMPLEX) 1622 if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0) { 1623 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G + %G i) ",bs*a->j[k]+l, 1624 PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1625 } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0) { 1626 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G - %G i) ",bs*a->j[k]+l, 1627 PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1628 } else { 1629 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1630 } 1631 #else 1632 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr); 1633 #endif 1634 } 1635 } 1636 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 1637 } 1638 } 1639 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);CHKERRQ(ierr); 1640 } 1641 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1642 PetscFunctionReturn(0); 1643 } 1644 1645 #include <petscdraw.h> 1646 #undef __FUNCT__ 1647 #define __FUNCT__ "MatView_SeqBAIJ_Draw_Zoom" 1648 static PetscErrorCode MatView_SeqBAIJ_Draw_Zoom(PetscDraw draw,void *Aa) 1649 { 1650 Mat A = (Mat) Aa; 1651 Mat_SeqBAIJ *a=(Mat_SeqBAIJ*)A->data; 1652 PetscErrorCode ierr; 1653 PetscInt row,i,j,k,l,mbs=a->mbs,color,bs=A->rmap->bs,bs2=a->bs2; 1654 PetscReal xl,yl,xr,yr,x_l,x_r,y_l,y_r; 1655 MatScalar *aa; 1656 PetscViewer viewer; 1657 PetscViewerFormat format; 1658 1659 PetscFunctionBegin; 1660 ierr = PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer);CHKERRQ(ierr); 1661 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1662 1663 ierr = PetscDrawGetCoordinates(draw,&xl,&yl,&xr,&yr);CHKERRQ(ierr); 1664 1665 /* loop over matrix elements drawing boxes */ 1666 1667 if (format != PETSC_VIEWER_DRAW_CONTOUR) { 1668 color = PETSC_DRAW_BLUE; 1669 for (i=0,row=0; i<mbs; i++,row+=bs) { 1670 for (j=a->i[i]; j<a->i[i+1]; j++) { 1671 y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0; 1672 x_l = a->j[j]*bs; x_r = x_l + 1.0; 1673 aa = a->a + j*bs2; 1674 for (k=0; k<bs; k++) { 1675 for (l=0; l<bs; l++) { 1676 if (PetscRealPart(*aa++) >= 0.) continue; 1677 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 1678 } 1679 } 1680 } 1681 } 1682 color = PETSC_DRAW_CYAN; 1683 for (i=0,row=0; i<mbs; i++,row+=bs) { 1684 for (j=a->i[i]; j<a->i[i+1]; j++) { 1685 y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0; 1686 x_l = a->j[j]*bs; x_r = x_l + 1.0; 1687 aa = a->a + j*bs2; 1688 for (k=0; k<bs; k++) { 1689 for (l=0; l<bs; l++) { 1690 if (PetscRealPart(*aa++) != 0.) continue; 1691 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 1692 } 1693 } 1694 } 1695 } 1696 color = PETSC_DRAW_RED; 1697 for (i=0,row=0; i<mbs; i++,row+=bs) { 1698 for (j=a->i[i]; j<a->i[i+1]; j++) { 1699 y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0; 1700 x_l = a->j[j]*bs; x_r = x_l + 1.0; 1701 aa = a->a + j*bs2; 1702 for (k=0; k<bs; k++) { 1703 for (l=0; l<bs; l++) { 1704 if (PetscRealPart(*aa++) <= 0.) continue; 1705 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 1706 } 1707 } 1708 } 1709 } 1710 } else { 1711 /* use contour shading to indicate magnitude of values */ 1712 /* first determine max of all nonzero values */ 1713 PetscDraw popup; 1714 PetscReal scale,maxv = 0.0; 1715 1716 for (i=0; i<a->nz*a->bs2; i++) { 1717 if (PetscAbsScalar(a->a[i]) > maxv) maxv = PetscAbsScalar(a->a[i]); 1718 } 1719 scale = (245.0 - PETSC_DRAW_BASIC_COLORS)/maxv; 1720 ierr = PetscDrawGetPopup(draw,&popup);CHKERRQ(ierr); 1721 if (popup) { 1722 ierr = PetscDrawScalePopup(popup,0.0,maxv);CHKERRQ(ierr); 1723 } 1724 for (i=0,row=0; i<mbs; i++,row+=bs) { 1725 for (j=a->i[i]; j<a->i[i+1]; j++) { 1726 y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0; 1727 x_l = a->j[j]*bs; x_r = x_l + 1.0; 1728 aa = a->a + j*bs2; 1729 for (k=0; k<bs; k++) { 1730 for (l=0; l<bs; l++) { 1731 color = PETSC_DRAW_BASIC_COLORS + (PetscInt)(scale*PetscAbsScalar(*aa++)); 1732 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 1733 } 1734 } 1735 } 1736 } 1737 } 1738 PetscFunctionReturn(0); 1739 } 1740 1741 #undef __FUNCT__ 1742 #define __FUNCT__ "MatView_SeqBAIJ_Draw" 1743 static PetscErrorCode MatView_SeqBAIJ_Draw(Mat A,PetscViewer viewer) 1744 { 1745 PetscErrorCode ierr; 1746 PetscReal xl,yl,xr,yr,w,h; 1747 PetscDraw draw; 1748 PetscBool isnull; 1749 1750 PetscFunctionBegin; 1751 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 1752 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 1753 1754 ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);CHKERRQ(ierr); 1755 xr = A->cmap->n; yr = A->rmap->N; h = yr/10.0; w = xr/10.0; 1756 xr += w; yr += h; xl = -w; yl = -h; 1757 ierr = PetscDrawSetCoordinates(draw,xl,yl,xr,yr);CHKERRQ(ierr); 1758 ierr = PetscDrawZoom(draw,MatView_SeqBAIJ_Draw_Zoom,A);CHKERRQ(ierr); 1759 ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",NULL);CHKERRQ(ierr); 1760 PetscFunctionReturn(0); 1761 } 1762 1763 #undef __FUNCT__ 1764 #define __FUNCT__ "MatView_SeqBAIJ" 1765 PetscErrorCode MatView_SeqBAIJ(Mat A,PetscViewer viewer) 1766 { 1767 PetscErrorCode ierr; 1768 PetscBool iascii,isbinary,isdraw; 1769 1770 PetscFunctionBegin; 1771 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1772 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1773 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1774 if (iascii) { 1775 ierr = MatView_SeqBAIJ_ASCII(A,viewer);CHKERRQ(ierr); 1776 } else if (isbinary) { 1777 ierr = MatView_SeqBAIJ_Binary(A,viewer);CHKERRQ(ierr); 1778 } else if (isdraw) { 1779 ierr = MatView_SeqBAIJ_Draw(A,viewer);CHKERRQ(ierr); 1780 } else { 1781 Mat B; 1782 ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr); 1783 ierr = MatView(B,viewer);CHKERRQ(ierr); 1784 ierr = MatDestroy(&B);CHKERRQ(ierr); 1785 } 1786 PetscFunctionReturn(0); 1787 } 1788 1789 1790 #undef __FUNCT__ 1791 #define __FUNCT__ "MatGetValues_SeqBAIJ" 1792 PetscErrorCode MatGetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],PetscScalar v[]) 1793 { 1794 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1795 PetscInt *rp,k,low,high,t,row,nrow,i,col,l,*aj = a->j; 1796 PetscInt *ai = a->i,*ailen = a->ilen; 1797 PetscInt brow,bcol,ridx,cidx,bs=A->rmap->bs,bs2=a->bs2; 1798 MatScalar *ap,*aa = a->a; 1799 1800 PetscFunctionBegin; 1801 for (k=0; k<m; k++) { /* loop over rows */ 1802 row = im[k]; brow = row/bs; 1803 if (row < 0) {v += n; continue;} /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); */ 1804 if (row >= A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D too large", row); 1805 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; 1806 nrow = ailen[brow]; 1807 for (l=0; l<n; l++) { /* loop over columns */ 1808 if (in[l] < 0) {v++; continue;} /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); */ 1809 if (in[l] >= A->cmap->n) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column %D too large", in[l]); 1810 col = in[l]; 1811 bcol = col/bs; 1812 cidx = col%bs; 1813 ridx = row%bs; 1814 high = nrow; 1815 low = 0; /* assume unsorted */ 1816 while (high-low > 5) { 1817 t = (low+high)/2; 1818 if (rp[t] > bcol) high = t; 1819 else low = t; 1820 } 1821 for (i=low; i<high; i++) { 1822 if (rp[i] > bcol) break; 1823 if (rp[i] == bcol) { 1824 *v++ = ap[bs2*i+bs*cidx+ridx]; 1825 goto finished; 1826 } 1827 } 1828 *v++ = 0.0; 1829 finished:; 1830 } 1831 } 1832 PetscFunctionReturn(0); 1833 } 1834 1835 #undef __FUNCT__ 1836 #define __FUNCT__ "MatSetValuesBlocked_SeqBAIJ" 1837 PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is) 1838 { 1839 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1840 PetscInt *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,lastcol = -1; 1841 PetscInt *imax=a->imax,*ai=a->i,*ailen=a->ilen; 1842 PetscErrorCode ierr; 1843 PetscInt *aj =a->j,nonew=a->nonew,bs2=a->bs2,bs=A->rmap->bs,stepval; 1844 PetscBool roworiented=a->roworiented; 1845 const PetscScalar *value = v; 1846 MatScalar *ap,*aa = a->a,*bap; 1847 1848 PetscFunctionBegin; 1849 if (roworiented) { 1850 stepval = (n-1)*bs; 1851 } else { 1852 stepval = (m-1)*bs; 1853 } 1854 for (k=0; k<m; k++) { /* loop over added rows */ 1855 row = im[k]; 1856 if (row < 0) continue; 1857 #if defined(PETSC_USE_DEBUG) 1858 if (row >= a->mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,a->mbs-1); 1859 #endif 1860 rp = aj + ai[row]; 1861 ap = aa + bs2*ai[row]; 1862 rmax = imax[row]; 1863 nrow = ailen[row]; 1864 low = 0; 1865 high = nrow; 1866 for (l=0; l<n; l++) { /* loop over added columns */ 1867 if (in[l] < 0) continue; 1868 #if defined(PETSC_USE_DEBUG) 1869 if (in[l] >= a->nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],a->nbs-1); 1870 #endif 1871 col = in[l]; 1872 if (roworiented) { 1873 value = v + (k*(stepval+bs) + l)*bs; 1874 } else { 1875 value = v + (l*(stepval+bs) + k)*bs; 1876 } 1877 if (col <= lastcol) low = 0; 1878 else high = nrow; 1879 lastcol = col; 1880 while (high-low > 7) { 1881 t = (low+high)/2; 1882 if (rp[t] > col) high = t; 1883 else low = t; 1884 } 1885 for (i=low; i<high; i++) { 1886 if (rp[i] > col) break; 1887 if (rp[i] == col) { 1888 bap = ap + bs2*i; 1889 if (roworiented) { 1890 if (is == ADD_VALUES) { 1891 for (ii=0; ii<bs; ii++,value+=stepval) { 1892 for (jj=ii; jj<bs2; jj+=bs) { 1893 bap[jj] += *value++; 1894 } 1895 } 1896 } else { 1897 for (ii=0; ii<bs; ii++,value+=stepval) { 1898 for (jj=ii; jj<bs2; jj+=bs) { 1899 bap[jj] = *value++; 1900 } 1901 } 1902 } 1903 } else { 1904 if (is == ADD_VALUES) { 1905 for (ii=0; ii<bs; ii++,value+=bs+stepval) { 1906 for (jj=0; jj<bs; jj++) { 1907 bap[jj] += value[jj]; 1908 } 1909 bap += bs; 1910 } 1911 } else { 1912 for (ii=0; ii<bs; ii++,value+=bs+stepval) { 1913 for (jj=0; jj<bs; jj++) { 1914 bap[jj] = value[jj]; 1915 } 1916 bap += bs; 1917 } 1918 } 1919 } 1920 goto noinsert2; 1921 } 1922 } 1923 if (nonew == 1) goto noinsert2; 1924 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col); 1925 MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,row,col,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar); 1926 N = nrow++ - 1; high++; 1927 /* shift up all the later entries in this row */ 1928 for (ii=N; ii>=i; ii--) { 1929 rp[ii+1] = rp[ii]; 1930 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); 1931 } 1932 if (N >= i) { 1933 ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr); 1934 } 1935 rp[i] = col; 1936 bap = ap + bs2*i; 1937 if (roworiented) { 1938 for (ii=0; ii<bs; ii++,value+=stepval) { 1939 for (jj=ii; jj<bs2; jj+=bs) { 1940 bap[jj] = *value++; 1941 } 1942 } 1943 } else { 1944 for (ii=0; ii<bs; ii++,value+=stepval) { 1945 for (jj=0; jj<bs; jj++) { 1946 *bap++ = *value++; 1947 } 1948 } 1949 } 1950 noinsert2:; 1951 low = i; 1952 } 1953 ailen[row] = nrow; 1954 } 1955 PetscFunctionReturn(0); 1956 } 1957 1958 #undef __FUNCT__ 1959 #define __FUNCT__ "MatAssemblyEnd_SeqBAIJ" 1960 PetscErrorCode MatAssemblyEnd_SeqBAIJ(Mat A,MatAssemblyType mode) 1961 { 1962 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1963 PetscInt fshift = 0,i,j,*ai = a->i,*aj = a->j,*imax = a->imax; 1964 PetscInt m = A->rmap->N,*ip,N,*ailen = a->ilen; 1965 PetscErrorCode ierr; 1966 PetscInt mbs = a->mbs,bs2 = a->bs2,rmax = 0; 1967 MatScalar *aa = a->a,*ap; 1968 PetscReal ratio=0.6; 1969 1970 PetscFunctionBegin; 1971 if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0); 1972 1973 if (m) rmax = ailen[0]; 1974 for (i=1; i<mbs; i++) { 1975 /* move each row back by the amount of empty slots (fshift) before it*/ 1976 fshift += imax[i-1] - ailen[i-1]; 1977 rmax = PetscMax(rmax,ailen[i]); 1978 if (fshift) { 1979 ip = aj + ai[i]; ap = aa + bs2*ai[i]; 1980 N = ailen[i]; 1981 for (j=0; j<N; j++) { 1982 ip[j-fshift] = ip[j]; 1983 1984 ierr = PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr); 1985 } 1986 } 1987 ai[i] = ai[i-1] + ailen[i-1]; 1988 } 1989 if (mbs) { 1990 fshift += imax[mbs-1] - ailen[mbs-1]; 1991 ai[mbs] = ai[mbs-1] + ailen[mbs-1]; 1992 } 1993 /* reset ilen and imax for each row */ 1994 for (i=0; i<mbs; i++) { 1995 ailen[i] = imax[i] = ai[i+1] - ai[i]; 1996 } 1997 a->nz = ai[mbs]; 1998 1999 /* diagonals may have moved, so kill the diagonal pointers */ 2000 a->idiagvalid = PETSC_FALSE; 2001 if (fshift && a->diag) { 2002 ierr = PetscFree(a->diag);CHKERRQ(ierr); 2003 ierr = PetscLogObjectMemory(A,-(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 2004 a->diag = 0; 2005 } 2006 if (fshift && a->nounused == -1) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB, "Unused space detected in matrix: %D X %D block size %D, %D unneeded", m, A->cmap->n, A->rmap->bs, fshift*bs2); 2007 ierr = PetscInfo5(A,"Matrix size: %D X %D, block size %D; storage space: %D unneeded, %D used\n",m,A->cmap->n,A->rmap->bs,fshift*bs2,a->nz*bs2);CHKERRQ(ierr); 2008 ierr = PetscInfo1(A,"Number of mallocs during MatSetValues is %D\n",a->reallocs);CHKERRQ(ierr); 2009 ierr = PetscInfo1(A,"Most nonzeros blocks in any row is %D\n",rmax);CHKERRQ(ierr); 2010 2011 A->info.mallocs += a->reallocs; 2012 a->reallocs = 0; 2013 A->info.nz_unneeded = (PetscReal)fshift*bs2; 2014 2015 ierr = MatCheckCompressedRow(A,&a->compressedrow,a->i,mbs,ratio);CHKERRQ(ierr); 2016 2017 A->same_nonzero = PETSC_TRUE; 2018 PetscFunctionReturn(0); 2019 } 2020 2021 /* 2022 This function returns an array of flags which indicate the locations of contiguous 2023 blocks that should be zeroed. for eg: if bs = 3 and is = [0,1,2,3,5,6,7,8,9] 2024 then the resulting sizes = [3,1,1,3,1] correspondig to sets [(0,1,2),(3),(5),(6,7,8),(9)] 2025 Assume: sizes should be long enough to hold all the values. 2026 */ 2027 #undef __FUNCT__ 2028 #define __FUNCT__ "MatZeroRows_SeqBAIJ_Check_Blocks" 2029 static PetscErrorCode MatZeroRows_SeqBAIJ_Check_Blocks(PetscInt idx[],PetscInt n,PetscInt bs,PetscInt sizes[], PetscInt *bs_max) 2030 { 2031 PetscInt i,j,k,row; 2032 PetscBool flg; 2033 2034 PetscFunctionBegin; 2035 for (i=0,j=0; i<n; j++) { 2036 row = idx[i]; 2037 if (row%bs!=0) { /* Not the begining of a block */ 2038 sizes[j] = 1; 2039 i++; 2040 } else if (i+bs > n) { /* complete block doesn't exist (at idx end) */ 2041 sizes[j] = 1; /* Also makes sure atleast 'bs' values exist for next else */ 2042 i++; 2043 } else { /* Begining of the block, so check if the complete block exists */ 2044 flg = PETSC_TRUE; 2045 for (k=1; k<bs; k++) { 2046 if (row+k != idx[i+k]) { /* break in the block */ 2047 flg = PETSC_FALSE; 2048 break; 2049 } 2050 } 2051 if (flg) { /* No break in the bs */ 2052 sizes[j] = bs; 2053 i += bs; 2054 } else { 2055 sizes[j] = 1; 2056 i++; 2057 } 2058 } 2059 } 2060 *bs_max = j; 2061 PetscFunctionReturn(0); 2062 } 2063 2064 #undef __FUNCT__ 2065 #define __FUNCT__ "MatZeroRows_SeqBAIJ" 2066 PetscErrorCode MatZeroRows_SeqBAIJ(Mat A,PetscInt is_n,const PetscInt is_idx[],PetscScalar diag,Vec x, Vec b) 2067 { 2068 Mat_SeqBAIJ *baij=(Mat_SeqBAIJ*)A->data; 2069 PetscErrorCode ierr; 2070 PetscInt i,j,k,count,*rows; 2071 PetscInt bs=A->rmap->bs,bs2=baij->bs2,*sizes,row,bs_max; 2072 PetscScalar zero = 0.0; 2073 MatScalar *aa; 2074 const PetscScalar *xx; 2075 PetscScalar *bb; 2076 2077 PetscFunctionBegin; 2078 /* fix right hand side if needed */ 2079 if (x && b) { 2080 ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr); 2081 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 2082 for (i=0; i<is_n; i++) { 2083 bb[is_idx[i]] = diag*xx[is_idx[i]]; 2084 } 2085 ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr); 2086 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 2087 } 2088 2089 /* Make a copy of the IS and sort it */ 2090 /* allocate memory for rows,sizes */ 2091 ierr = PetscMalloc2(is_n,PetscInt,&rows,2*is_n,PetscInt,&sizes);CHKERRQ(ierr); 2092 2093 /* copy IS values to rows, and sort them */ 2094 for (i=0; i<is_n; i++) rows[i] = is_idx[i]; 2095 ierr = PetscSortInt(is_n,rows);CHKERRQ(ierr); 2096 2097 if (baij->keepnonzeropattern) { 2098 for (i=0; i<is_n; i++) sizes[i] = 1; 2099 bs_max = is_n; 2100 A->same_nonzero = PETSC_TRUE; 2101 } else { 2102 ierr = MatZeroRows_SeqBAIJ_Check_Blocks(rows,is_n,bs,sizes,&bs_max);CHKERRQ(ierr); 2103 2104 A->same_nonzero = PETSC_FALSE; 2105 } 2106 2107 for (i=0,j=0; i<bs_max; j+=sizes[i],i++) { 2108 row = rows[j]; 2109 if (row < 0 || row > A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"row %D out of range",row); 2110 count = (baij->i[row/bs +1] - baij->i[row/bs])*bs; 2111 aa = ((MatScalar*)(baij->a)) + baij->i[row/bs]*bs2 + (row%bs); 2112 if (sizes[i] == bs && !baij->keepnonzeropattern) { 2113 if (diag != (PetscScalar)0.0) { 2114 if (baij->ilen[row/bs] > 0) { 2115 baij->ilen[row/bs] = 1; 2116 baij->j[baij->i[row/bs]] = row/bs; 2117 2118 ierr = PetscMemzero(aa,count*bs*sizeof(MatScalar));CHKERRQ(ierr); 2119 } 2120 /* Now insert all the diagonal values for this bs */ 2121 for (k=0; k<bs; k++) { 2122 ierr = (*A->ops->setvalues)(A,1,rows+j+k,1,rows+j+k,&diag,INSERT_VALUES);CHKERRQ(ierr); 2123 } 2124 } else { /* (diag == 0.0) */ 2125 baij->ilen[row/bs] = 0; 2126 } /* end (diag == 0.0) */ 2127 } else { /* (sizes[i] != bs) */ 2128 #if defined(PETSC_USE_DEBUG) 2129 if (sizes[i] != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal Error. Value should be 1"); 2130 #endif 2131 for (k=0; k<count; k++) { 2132 aa[0] = zero; 2133 aa += bs; 2134 } 2135 if (diag != (PetscScalar)0.0) { 2136 ierr = (*A->ops->setvalues)(A,1,rows+j,1,rows+j,&diag,INSERT_VALUES);CHKERRQ(ierr); 2137 } 2138 } 2139 } 2140 2141 ierr = PetscFree2(rows,sizes);CHKERRQ(ierr); 2142 ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2143 PetscFunctionReturn(0); 2144 } 2145 2146 #undef __FUNCT__ 2147 #define __FUNCT__ "MatZeroRowsColumns_SeqBAIJ" 2148 PetscErrorCode MatZeroRowsColumns_SeqBAIJ(Mat A,PetscInt is_n,const PetscInt is_idx[],PetscScalar diag,Vec x, Vec b) 2149 { 2150 Mat_SeqBAIJ *baij=(Mat_SeqBAIJ*)A->data; 2151 PetscErrorCode ierr; 2152 PetscInt i,j,k,count; 2153 PetscInt bs =A->rmap->bs,bs2=baij->bs2,row,col; 2154 PetscScalar zero = 0.0; 2155 MatScalar *aa; 2156 const PetscScalar *xx; 2157 PetscScalar *bb; 2158 PetscBool *zeroed,vecs = PETSC_FALSE; 2159 2160 PetscFunctionBegin; 2161 /* fix right hand side if needed */ 2162 if (x && b) { 2163 ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr); 2164 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 2165 vecs = PETSC_TRUE; 2166 } 2167 A->same_nonzero = PETSC_TRUE; 2168 2169 /* zero the columns */ 2170 ierr = PetscMalloc(A->rmap->n*sizeof(PetscBool),&zeroed);CHKERRQ(ierr); 2171 ierr = PetscMemzero(zeroed,A->rmap->n*sizeof(PetscBool));CHKERRQ(ierr); 2172 for (i=0; i<is_n; i++) { 2173 if (is_idx[i] < 0 || is_idx[i] >= A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"row %D out of range",is_idx[i]); 2174 zeroed[is_idx[i]] = PETSC_TRUE; 2175 } 2176 for (i=0; i<A->rmap->N; i++) { 2177 if (!zeroed[i]) { 2178 row = i/bs; 2179 for (j=baij->i[row]; j<baij->i[row+1]; j++) { 2180 for (k=0; k<bs; k++) { 2181 col = bs*baij->j[j] + k; 2182 if (zeroed[col]) { 2183 aa = ((MatScalar*)(baij->a)) + j*bs2 + (i%bs) + bs*k; 2184 if (vecs) bb[i] -= aa[0]*xx[col]; 2185 aa[0] = 0.0; 2186 } 2187 } 2188 } 2189 } else if (vecs) bb[i] = diag*xx[i]; 2190 } 2191 ierr = PetscFree(zeroed);CHKERRQ(ierr); 2192 if (vecs) { 2193 ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr); 2194 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 2195 } 2196 2197 /* zero the rows */ 2198 for (i=0; i<is_n; i++) { 2199 row = is_idx[i]; 2200 count = (baij->i[row/bs +1] - baij->i[row/bs])*bs; 2201 aa = ((MatScalar*)(baij->a)) + baij->i[row/bs]*bs2 + (row%bs); 2202 for (k=0; k<count; k++) { 2203 aa[0] = zero; 2204 aa += bs; 2205 } 2206 if (diag != (PetscScalar)0.0) { 2207 ierr = (*A->ops->setvalues)(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr); 2208 } 2209 } 2210 ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2211 PetscFunctionReturn(0); 2212 } 2213 2214 #undef __FUNCT__ 2215 #define __FUNCT__ "MatSetValues_SeqBAIJ" 2216 PetscErrorCode MatSetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is) 2217 { 2218 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2219 PetscInt *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,lastcol = -1; 2220 PetscInt *imax=a->imax,*ai=a->i,*ailen=a->ilen; 2221 PetscInt *aj =a->j,nonew=a->nonew,bs=A->rmap->bs,brow,bcol; 2222 PetscErrorCode ierr; 2223 PetscInt ridx,cidx,bs2=a->bs2; 2224 PetscBool roworiented=a->roworiented; 2225 MatScalar *ap,value,*aa=a->a,*bap; 2226 2227 PetscFunctionBegin; 2228 if (v) PetscValidScalarPointer(v,6); 2229 for (k=0; k<m; k++) { /* loop over added rows */ 2230 row = im[k]; 2231 brow = row/bs; 2232 if (row < 0) continue; 2233 #if defined(PETSC_USE_DEBUG) 2234 if (row >= A->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,A->rmap->N-1); 2235 #endif 2236 rp = aj + ai[brow]; 2237 ap = aa + bs2*ai[brow]; 2238 rmax = imax[brow]; 2239 nrow = ailen[brow]; 2240 low = 0; 2241 high = nrow; 2242 for (l=0; l<n; l++) { /* loop over added columns */ 2243 if (in[l] < 0) continue; 2244 #if defined(PETSC_USE_DEBUG) 2245 if (in[l] >= A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],A->cmap->n-1); 2246 #endif 2247 col = in[l]; bcol = col/bs; 2248 ridx = row % bs; cidx = col % bs; 2249 if (roworiented) { 2250 value = v[l + k*n]; 2251 } else { 2252 value = v[k + l*m]; 2253 } 2254 if (col <= lastcol) low = 0; else high = nrow; 2255 lastcol = col; 2256 while (high-low > 7) { 2257 t = (low+high)/2; 2258 if (rp[t] > bcol) high = t; 2259 else low = t; 2260 } 2261 for (i=low; i<high; i++) { 2262 if (rp[i] > bcol) break; 2263 if (rp[i] == bcol) { 2264 bap = ap + bs2*i + bs*cidx + ridx; 2265 if (is == ADD_VALUES) *bap += value; 2266 else *bap = value; 2267 goto noinsert1; 2268 } 2269 } 2270 if (nonew == 1) goto noinsert1; 2271 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col); 2272 MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar); 2273 N = nrow++ - 1; high++; 2274 /* shift up all the later entries in this row */ 2275 for (ii=N; ii>=i; ii--) { 2276 rp[ii+1] = rp[ii]; 2277 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); 2278 } 2279 if (N>=i) { 2280 ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr); 2281 } 2282 rp[i] = bcol; 2283 ap[bs2*i + bs*cidx + ridx] = value; 2284 a->nz++; 2285 noinsert1:; 2286 low = i; 2287 } 2288 ailen[brow] = nrow; 2289 } 2290 A->same_nonzero = PETSC_FALSE; 2291 PetscFunctionReturn(0); 2292 } 2293 2294 #undef __FUNCT__ 2295 #define __FUNCT__ "MatILUFactor_SeqBAIJ" 2296 PetscErrorCode MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,const MatFactorInfo *info) 2297 { 2298 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)inA->data; 2299 Mat outA; 2300 PetscErrorCode ierr; 2301 PetscBool row_identity,col_identity; 2302 2303 PetscFunctionBegin; 2304 if (info->levels != 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only levels = 0 supported for in-place ILU"); 2305 ierr = ISIdentity(row,&row_identity);CHKERRQ(ierr); 2306 ierr = ISIdentity(col,&col_identity);CHKERRQ(ierr); 2307 if (!row_identity || !col_identity) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for in-place ILU"); 2308 2309 outA = inA; 2310 inA->factortype = MAT_FACTOR_LU; 2311 2312 ierr = MatMarkDiagonal_SeqBAIJ(inA);CHKERRQ(ierr); 2313 2314 ierr = PetscObjectReference((PetscObject)row);CHKERRQ(ierr); 2315 ierr = ISDestroy(&a->row);CHKERRQ(ierr); 2316 a->row = row; 2317 ierr = PetscObjectReference((PetscObject)col);CHKERRQ(ierr); 2318 ierr = ISDestroy(&a->col);CHKERRQ(ierr); 2319 a->col = col; 2320 2321 /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */ 2322 ierr = ISDestroy(&a->icol);CHKERRQ(ierr); 2323 ierr = ISInvertPermutation(col,PETSC_DECIDE,&a->icol);CHKERRQ(ierr); 2324 ierr = PetscLogObjectParent(inA,a->icol);CHKERRQ(ierr); 2325 2326 ierr = MatSeqBAIJSetNumericFactorization_inplace(inA,(PetscBool)(row_identity && col_identity));CHKERRQ(ierr); 2327 if (!a->solve_work) { 2328 ierr = PetscMalloc((inA->rmap->N+inA->rmap->bs)*sizeof(PetscScalar),&a->solve_work);CHKERRQ(ierr); 2329 ierr = PetscLogObjectMemory(inA,(inA->rmap->N+inA->rmap->bs)*sizeof(PetscScalar));CHKERRQ(ierr); 2330 } 2331 ierr = MatLUFactorNumeric(outA,inA,info);CHKERRQ(ierr); 2332 PetscFunctionReturn(0); 2333 } 2334 2335 #undef __FUNCT__ 2336 #define __FUNCT__ "MatSeqBAIJSetColumnIndices_SeqBAIJ" 2337 PetscErrorCode MatSeqBAIJSetColumnIndices_SeqBAIJ(Mat mat,PetscInt *indices) 2338 { 2339 Mat_SeqBAIJ *baij = (Mat_SeqBAIJ*)mat->data; 2340 PetscInt i,nz,mbs; 2341 2342 PetscFunctionBegin; 2343 nz = baij->maxnz; 2344 mbs = baij->mbs; 2345 for (i=0; i<nz; i++) { 2346 baij->j[i] = indices[i]; 2347 } 2348 baij->nz = nz; 2349 for (i=0; i<mbs; i++) { 2350 baij->ilen[i] = baij->imax[i]; 2351 } 2352 PetscFunctionReturn(0); 2353 } 2354 2355 #undef __FUNCT__ 2356 #define __FUNCT__ "MatSeqBAIJSetColumnIndices" 2357 /*@ 2358 MatSeqBAIJSetColumnIndices - Set the column indices for all the rows 2359 in the matrix. 2360 2361 Input Parameters: 2362 + mat - the SeqBAIJ matrix 2363 - indices - the column indices 2364 2365 Level: advanced 2366 2367 Notes: 2368 This can be called if you have precomputed the nonzero structure of the 2369 matrix and want to provide it to the matrix object to improve the performance 2370 of the MatSetValues() operation. 2371 2372 You MUST have set the correct numbers of nonzeros per row in the call to 2373 MatCreateSeqBAIJ(), and the columns indices MUST be sorted. 2374 2375 MUST be called before any calls to MatSetValues(); 2376 2377 @*/ 2378 PetscErrorCode MatSeqBAIJSetColumnIndices(Mat mat,PetscInt *indices) 2379 { 2380 PetscErrorCode ierr; 2381 2382 PetscFunctionBegin; 2383 PetscValidHeaderSpecific(mat,MAT_CLASSID,1); 2384 PetscValidPointer(indices,2); 2385 ierr = PetscUseMethod(mat,"MatSeqBAIJSetColumnIndices_C",(Mat,PetscInt*),(mat,indices));CHKERRQ(ierr); 2386 PetscFunctionReturn(0); 2387 } 2388 2389 #undef __FUNCT__ 2390 #define __FUNCT__ "MatGetRowMaxAbs_SeqBAIJ" 2391 PetscErrorCode MatGetRowMaxAbs_SeqBAIJ(Mat A,Vec v,PetscInt idx[]) 2392 { 2393 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2394 PetscErrorCode ierr; 2395 PetscInt i,j,n,row,bs,*ai,*aj,mbs; 2396 PetscReal atmp; 2397 PetscScalar *x,zero = 0.0; 2398 MatScalar *aa; 2399 PetscInt ncols,brow,krow,kcol; 2400 2401 PetscFunctionBegin; 2402 if (A->factortype) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 2403 bs = A->rmap->bs; 2404 aa = a->a; 2405 ai = a->i; 2406 aj = a->j; 2407 mbs = a->mbs; 2408 2409 ierr = VecSet(v,zero);CHKERRQ(ierr); 2410 ierr = VecGetArray(v,&x);CHKERRQ(ierr); 2411 ierr = VecGetLocalSize(v,&n);CHKERRQ(ierr); 2412 if (n != A->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Nonconforming matrix and vector"); 2413 for (i=0; i<mbs; i++) { 2414 ncols = ai[1] - ai[0]; ai++; 2415 brow = bs*i; 2416 for (j=0; j<ncols; j++) { 2417 for (kcol=0; kcol<bs; kcol++) { 2418 for (krow=0; krow<bs; krow++) { 2419 atmp = PetscAbsScalar(*aa);aa++; 2420 row = brow + krow; /* row index */ 2421 /* printf("val[%d,%d]: %G\n",row,bcol+kcol,atmp); */ 2422 if (PetscAbsScalar(x[row]) < atmp) {x[row] = atmp; if (idx) idx[row] = bs*(*aj) + kcol;} 2423 } 2424 } 2425 aj++; 2426 } 2427 } 2428 ierr = VecRestoreArray(v,&x);CHKERRQ(ierr); 2429 PetscFunctionReturn(0); 2430 } 2431 2432 #undef __FUNCT__ 2433 #define __FUNCT__ "MatCopy_SeqBAIJ" 2434 PetscErrorCode MatCopy_SeqBAIJ(Mat A,Mat B,MatStructure str) 2435 { 2436 PetscErrorCode ierr; 2437 2438 PetscFunctionBegin; 2439 /* If the two matrices have the same copy implementation, use fast copy. */ 2440 if (str == SAME_NONZERO_PATTERN && (A->ops->copy == B->ops->copy)) { 2441 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2442 Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)B->data; 2443 PetscInt ambs=a->mbs,bmbs=b->mbs,abs=A->rmap->bs,bbs=B->rmap->bs,bs2=abs*abs; 2444 2445 if (a->i[ambs] != b->i[bmbs]) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Number of nonzero blocks in matrices A %D and B %D are different",a->i[ambs],b->i[bmbs]); 2446 if (abs != bbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Block size A %D and B %D are different",abs,bbs); 2447 ierr = PetscMemcpy(b->a,a->a,(bs2*a->i[ambs])*sizeof(PetscScalar));CHKERRQ(ierr); 2448 } else { 2449 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 2450 } 2451 PetscFunctionReturn(0); 2452 } 2453 2454 #undef __FUNCT__ 2455 #define __FUNCT__ "MatSetUp_SeqBAIJ" 2456 PetscErrorCode MatSetUp_SeqBAIJ(Mat A) 2457 { 2458 PetscErrorCode ierr; 2459 2460 PetscFunctionBegin; 2461 ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(A,A->rmap->bs,PETSC_DEFAULT,0);CHKERRQ(ierr); 2462 PetscFunctionReturn(0); 2463 } 2464 2465 #undef __FUNCT__ 2466 #define __FUNCT__ "MatSeqBAIJGetArray_SeqBAIJ" 2467 PetscErrorCode MatSeqBAIJGetArray_SeqBAIJ(Mat A,PetscScalar *array[]) 2468 { 2469 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2470 2471 PetscFunctionBegin; 2472 *array = a->a; 2473 PetscFunctionReturn(0); 2474 } 2475 2476 #undef __FUNCT__ 2477 #define __FUNCT__ "MatSeqBAIJRestoreArray_SeqBAIJ" 2478 PetscErrorCode MatSeqBAIJRestoreArray_SeqBAIJ(Mat A,PetscScalar *array[]) 2479 { 2480 PetscFunctionBegin; 2481 PetscFunctionReturn(0); 2482 } 2483 2484 #undef __FUNCT__ 2485 #define __FUNCT__ "MatAXPY_SeqBAIJ" 2486 PetscErrorCode MatAXPY_SeqBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 2487 { 2488 Mat_SeqBAIJ *x = (Mat_SeqBAIJ*)X->data,*y = (Mat_SeqBAIJ*)Y->data; 2489 PetscErrorCode ierr; 2490 PetscInt i,bs=Y->rmap->bs,j,bs2=bs*bs; 2491 PetscBLASInt one=1; 2492 2493 PetscFunctionBegin; 2494 if (str == SAME_NONZERO_PATTERN) { 2495 PetscScalar alpha = a; 2496 PetscBLASInt bnz; 2497 ierr = PetscBLASIntCast(x->nz*bs2,&bnz);CHKERRQ(ierr); 2498 PetscStackCall("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one)); 2499 } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */ 2500 if (y->xtoy && y->XtoY != X) { 2501 ierr = PetscFree(y->xtoy);CHKERRQ(ierr); 2502 ierr = MatDestroy(&y->XtoY);CHKERRQ(ierr); 2503 } 2504 if (!y->xtoy) { /* get xtoy */ 2505 ierr = MatAXPYGetxtoy_Private(x->mbs,x->i,x->j,NULL, y->i,y->j,NULL, &y->xtoy);CHKERRQ(ierr); 2506 y->XtoY = X; 2507 ierr = PetscObjectReference((PetscObject)X);CHKERRQ(ierr); 2508 } 2509 for (i=0; i<x->nz; i++) { 2510 j = 0; 2511 while (j < bs2) { 2512 y->a[bs2*y->xtoy[i]+j] += a*(x->a[bs2*i+j]); 2513 j++; 2514 } 2515 } 2516 ierr = PetscInfo3(Y,"ratio of nnz(X)/nnz(Y): %D/%D = %G\n",bs2*x->nz,bs2*y->nz,(PetscReal)(bs2*x->nz)/(bs2*y->nz));CHKERRQ(ierr); 2517 } else { 2518 ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr); 2519 } 2520 PetscFunctionReturn(0); 2521 } 2522 2523 #undef __FUNCT__ 2524 #define __FUNCT__ "MatRealPart_SeqBAIJ" 2525 PetscErrorCode MatRealPart_SeqBAIJ(Mat A) 2526 { 2527 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2528 PetscInt i,nz = a->bs2*a->i[a->mbs]; 2529 MatScalar *aa = a->a; 2530 2531 PetscFunctionBegin; 2532 for (i=0; i<nz; i++) aa[i] = PetscRealPart(aa[i]); 2533 PetscFunctionReturn(0); 2534 } 2535 2536 #undef __FUNCT__ 2537 #define __FUNCT__ "MatImaginaryPart_SeqBAIJ" 2538 PetscErrorCode MatImaginaryPart_SeqBAIJ(Mat A) 2539 { 2540 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2541 PetscInt i,nz = a->bs2*a->i[a->mbs]; 2542 MatScalar *aa = a->a; 2543 2544 PetscFunctionBegin; 2545 for (i=0; i<nz; i++) aa[i] = PetscImaginaryPart(aa[i]); 2546 PetscFunctionReturn(0); 2547 } 2548 2549 extern PetscErrorCode MatFDColoringCreate_SeqAIJ(Mat,ISColoring,MatFDColoring); 2550 2551 #undef __FUNCT__ 2552 #define __FUNCT__ "MatGetColumnIJ_SeqBAIJ" 2553 /* 2554 Code almost idential to MatGetColumnIJ_SeqAIJ() should share common code 2555 */ 2556 PetscErrorCode MatGetColumnIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *nn,const PetscInt *ia[],const PetscInt *ja[],PetscBool *done) 2557 { 2558 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2559 PetscErrorCode ierr; 2560 PetscInt bs = A->rmap->bs,i,*collengths,*cia,*cja,n = A->cmap->n/bs,m = A->rmap->n/bs; 2561 PetscInt nz = a->i[m],row,*jj,mr,col; 2562 2563 PetscFunctionBegin; 2564 *nn = n; 2565 if (!ia) PetscFunctionReturn(0); 2566 if (symmetric) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not for BAIJ matrices"); 2567 else { 2568 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&collengths);CHKERRQ(ierr); 2569 ierr = PetscMemzero(collengths,n*sizeof(PetscInt));CHKERRQ(ierr); 2570 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&cia);CHKERRQ(ierr); 2571 ierr = PetscMalloc((nz+1)*sizeof(PetscInt),&cja);CHKERRQ(ierr); 2572 jj = a->j; 2573 for (i=0; i<nz; i++) { 2574 collengths[jj[i]]++; 2575 } 2576 cia[0] = oshift; 2577 for (i=0; i<n; i++) { 2578 cia[i+1] = cia[i] + collengths[i]; 2579 } 2580 ierr = PetscMemzero(collengths,n*sizeof(PetscInt));CHKERRQ(ierr); 2581 jj = a->j; 2582 for (row=0; row<m; row++) { 2583 mr = a->i[row+1] - a->i[row]; 2584 for (i=0; i<mr; i++) { 2585 col = *jj++; 2586 2587 cja[cia[col] + collengths[col]++ - oshift] = row + oshift; 2588 } 2589 } 2590 ierr = PetscFree(collengths);CHKERRQ(ierr); 2591 *ia = cia; *ja = cja; 2592 } 2593 PetscFunctionReturn(0); 2594 } 2595 2596 #undef __FUNCT__ 2597 #define __FUNCT__ "MatRestoreColumnIJ_SeqBAIJ" 2598 PetscErrorCode MatRestoreColumnIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *n,const PetscInt *ia[],const PetscInt *ja[],PetscBool *done) 2599 { 2600 PetscErrorCode ierr; 2601 2602 PetscFunctionBegin; 2603 if (!ia) PetscFunctionReturn(0); 2604 ierr = PetscFree(*ia);CHKERRQ(ierr); 2605 ierr = PetscFree(*ja);CHKERRQ(ierr); 2606 PetscFunctionReturn(0); 2607 } 2608 2609 #undef __FUNCT__ 2610 #define __FUNCT__ "MatFDColoringApply_BAIJ" 2611 PetscErrorCode MatFDColoringApply_BAIJ(Mat J,MatFDColoring coloring,Vec x1,MatStructure *flag,void *sctx) 2612 { 2613 PetscErrorCode (*f)(void*,Vec,Vec,void*) = (PetscErrorCode (*)(void*,Vec,Vec,void*))coloring->f; 2614 PetscErrorCode ierr; 2615 PetscInt bs = J->rmap->bs,i,j,k,start,end,l,row,col,*srows,**vscaleforrow; 2616 PetscScalar dx,*y,*xx,*w3_array; 2617 PetscScalar *vscale_array; 2618 PetscReal epsilon = coloring->error_rel,umin = coloring->umin,unorm; 2619 Vec w1 = coloring->w1,w2=coloring->w2,w3; 2620 void *fctx = coloring->fctx; 2621 PetscBool flg = PETSC_FALSE; 2622 PetscInt ctype = coloring->ctype,N,col_start=0,col_end=0; 2623 Vec x1_tmp; 2624 2625 PetscFunctionBegin; 2626 PetscValidHeaderSpecific(J,MAT_CLASSID,1); 2627 PetscValidHeaderSpecific(coloring,MAT_FDCOLORING_CLASSID,2); 2628 PetscValidHeaderSpecific(x1,VEC_CLASSID,3); 2629 if (!f) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Must call MatFDColoringSetFunction()"); 2630 2631 ierr = PetscLogEventBegin(MAT_FDColoringApply,coloring,J,x1,0);CHKERRQ(ierr); 2632 ierr = MatSetUnfactored(J);CHKERRQ(ierr); 2633 ierr = PetscOptionsGetBool(NULL,"-mat_fd_coloring_dont_rezero",&flg,NULL);CHKERRQ(ierr); 2634 if (flg) { 2635 ierr = PetscInfo(coloring,"Not calling MatZeroEntries()\n");CHKERRQ(ierr); 2636 } else { 2637 PetscBool assembled; 2638 ierr = MatAssembled(J,&assembled);CHKERRQ(ierr); 2639 if (assembled) { 2640 ierr = MatZeroEntries(J);CHKERRQ(ierr); 2641 } 2642 } 2643 2644 x1_tmp = x1; 2645 if (!coloring->vscale) { 2646 ierr = VecDuplicate(x1_tmp,&coloring->vscale);CHKERRQ(ierr); 2647 } 2648 2649 if (coloring->htype[0] == 'w') { /* tacky test; need to make systematic if we add other approaches to computing h*/ 2650 ierr = VecNorm(x1_tmp,NORM_2,&unorm);CHKERRQ(ierr); 2651 } 2652 ierr = VecGetOwnershipRange(w1,&start,&end);CHKERRQ(ierr); /* OwnershipRange is used by ghosted x! */ 2653 2654 /* Set w1 = F(x1) */ 2655 if (!coloring->fset) { 2656 ierr = PetscLogEventBegin(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr); 2657 ierr = (*f)(sctx,x1_tmp,w1,fctx);CHKERRQ(ierr); 2658 ierr = PetscLogEventEnd(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr); 2659 } else { 2660 coloring->fset = PETSC_FALSE; 2661 } 2662 2663 if (!coloring->w3) { 2664 ierr = VecDuplicate(x1_tmp,&coloring->w3);CHKERRQ(ierr); 2665 ierr = PetscLogObjectParent(coloring,coloring->w3);CHKERRQ(ierr); 2666 } 2667 w3 = coloring->w3; 2668 2669 /* Compute all the local scale factors, including ghost points */ 2670 ierr = VecGetLocalSize(x1_tmp,&N);CHKERRQ(ierr); 2671 ierr = VecGetArray(x1_tmp,&xx);CHKERRQ(ierr); 2672 ierr = VecGetArray(coloring->vscale,&vscale_array);CHKERRQ(ierr); 2673 if (ctype == IS_COLORING_GHOSTED) { 2674 col_start = 0; col_end = N; 2675 } else if (ctype == IS_COLORING_GLOBAL) { 2676 xx = xx - start; 2677 vscale_array = vscale_array - start; 2678 col_start = start; col_end = N + start; 2679 } 2680 for (col=col_start; col<col_end; col++) { 2681 /* Loop over each local column, vscale[col] = 1./(epsilon*dx[col]) */ 2682 if (coloring->htype[0] == 'w') { 2683 dx = 1.0 + unorm; 2684 } else { 2685 dx = xx[col]; 2686 } 2687 if (dx == (PetscScalar)0.0) dx = 1.0; 2688 if (PetscAbsScalar(dx) < umin && PetscRealPart(dx) >= 0.0) dx = umin; 2689 else if (PetscRealPart(dx) < 0.0 && PetscAbsScalar(dx) < umin) dx = -umin; 2690 dx *= epsilon; 2691 vscale_array[col] = (PetscScalar)1.0/dx; 2692 } 2693 if (ctype == IS_COLORING_GLOBAL) vscale_array = vscale_array + start; 2694 ierr = VecRestoreArray(coloring->vscale,&vscale_array);CHKERRQ(ierr); 2695 if (ctype == IS_COLORING_GLOBAL) { 2696 ierr = VecGhostUpdateBegin(coloring->vscale,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2697 ierr = VecGhostUpdateEnd(coloring->vscale,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2698 } 2699 if (coloring->vscaleforrow) { 2700 vscaleforrow = coloring->vscaleforrow; 2701 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_NULL,"Null Object: coloring->vscaleforrow"); 2702 2703 ierr = PetscMalloc(bs*sizeof(PetscInt),&srows);CHKERRQ(ierr); 2704 /* 2705 Loop over each color 2706 */ 2707 ierr = VecGetArray(coloring->vscale,&vscale_array);CHKERRQ(ierr); 2708 for (k=0; k<coloring->ncolors; k++) { 2709 coloring->currentcolor = k; 2710 for (i=0; i<bs; i++) { 2711 ierr = VecCopy(x1_tmp,w3);CHKERRQ(ierr); 2712 ierr = VecGetArray(w3,&w3_array);CHKERRQ(ierr); 2713 if (ctype == IS_COLORING_GLOBAL) w3_array = w3_array - start; 2714 /* 2715 Loop over each column associated with color 2716 adding the perturbation to the vector w3. 2717 */ 2718 for (l=0; l<coloring->ncolumns[k]; l++) { 2719 col = i + bs*coloring->columns[k][l]; /* local column of the matrix we are probing for */ 2720 if (coloring->htype[0] == 'w') { 2721 dx = 1.0 + unorm; 2722 } else { 2723 dx = xx[col]; 2724 } 2725 if (dx == (PetscScalar)0.0) dx = 1.0; 2726 if (PetscAbsScalar(dx) < umin && PetscRealPart(dx) >= 0.0) dx = umin; 2727 else if (PetscRealPart(dx) < 0.0 && PetscAbsScalar(dx) < umin) dx = -umin; 2728 dx *= epsilon; 2729 if (!PetscAbsScalar(dx)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Computed 0 differencing parameter"); 2730 w3_array[col] += dx; 2731 } 2732 if (ctype == IS_COLORING_GLOBAL) w3_array = w3_array + start; 2733 ierr = VecRestoreArray(w3,&w3_array);CHKERRQ(ierr); 2734 2735 /* 2736 Evaluate function at w3 = x1 + dx (here dx is a vector of perturbations) 2737 w2 = F(x1 + dx) - F(x1) 2738 */ 2739 ierr = PetscLogEventBegin(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr); 2740 ierr = (*f)(sctx,w3,w2,fctx);CHKERRQ(ierr); 2741 ierr = PetscLogEventEnd(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr); 2742 ierr = VecAXPY(w2,-1.0,w1);CHKERRQ(ierr); 2743 2744 /* 2745 Loop over rows of vector, putting results into Jacobian matrix 2746 */ 2747 ierr = VecGetArray(w2,&y);CHKERRQ(ierr); 2748 for (l=0; l<coloring->nrows[k]; l++) { 2749 row = bs*coloring->rows[k][l]; /* local row index */ 2750 col = i + bs*coloring->columnsforrow[k][l]; /* global column index */ 2751 for (j=0; j<bs; j++) { 2752 y[row+j] *= vscale_array[j+bs*vscaleforrow[k][l]]; 2753 srows[j] = row + start + j; 2754 } 2755 ierr = MatSetValues(J,bs,srows,1,&col,y+row,INSERT_VALUES);CHKERRQ(ierr); 2756 } 2757 ierr = VecRestoreArray(w2,&y);CHKERRQ(ierr); 2758 } 2759 } /* endof for each color */ 2760 if (ctype == IS_COLORING_GLOBAL) xx = xx + start; 2761 ierr = VecRestoreArray(coloring->vscale,&vscale_array);CHKERRQ(ierr); 2762 ierr = VecRestoreArray(x1_tmp,&xx);CHKERRQ(ierr); 2763 ierr = PetscFree(srows);CHKERRQ(ierr); 2764 2765 coloring->currentcolor = -1; 2766 2767 ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2768 ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2769 ierr = PetscLogEventEnd(MAT_FDColoringApply,coloring,J,x1,0);CHKERRQ(ierr); 2770 PetscFunctionReturn(0); 2771 } 2772 2773 /* -------------------------------------------------------------------*/ 2774 static struct _MatOps MatOps_Values = {MatSetValues_SeqBAIJ, 2775 MatGetRow_SeqBAIJ, 2776 MatRestoreRow_SeqBAIJ, 2777 MatMult_SeqBAIJ_N, 2778 /* 4*/ MatMultAdd_SeqBAIJ_N, 2779 MatMultTranspose_SeqBAIJ, 2780 MatMultTransposeAdd_SeqBAIJ, 2781 0, 2782 0, 2783 0, 2784 /* 10*/ 0, 2785 MatLUFactor_SeqBAIJ, 2786 0, 2787 0, 2788 MatTranspose_SeqBAIJ, 2789 /* 15*/ MatGetInfo_SeqBAIJ, 2790 MatEqual_SeqBAIJ, 2791 MatGetDiagonal_SeqBAIJ, 2792 MatDiagonalScale_SeqBAIJ, 2793 MatNorm_SeqBAIJ, 2794 /* 20*/ 0, 2795 MatAssemblyEnd_SeqBAIJ, 2796 MatSetOption_SeqBAIJ, 2797 MatZeroEntries_SeqBAIJ, 2798 /* 24*/ MatZeroRows_SeqBAIJ, 2799 0, 2800 0, 2801 0, 2802 0, 2803 /* 29*/ MatSetUp_SeqBAIJ, 2804 0, 2805 0, 2806 0, 2807 0, 2808 /* 34*/ MatDuplicate_SeqBAIJ, 2809 0, 2810 0, 2811 MatILUFactor_SeqBAIJ, 2812 0, 2813 /* 39*/ MatAXPY_SeqBAIJ, 2814 MatGetSubMatrices_SeqBAIJ, 2815 MatIncreaseOverlap_SeqBAIJ, 2816 MatGetValues_SeqBAIJ, 2817 MatCopy_SeqBAIJ, 2818 /* 44*/ 0, 2819 MatScale_SeqBAIJ, 2820 0, 2821 0, 2822 MatZeroRowsColumns_SeqBAIJ, 2823 /* 49*/ 0, 2824 MatGetRowIJ_SeqBAIJ, 2825 MatRestoreRowIJ_SeqBAIJ, 2826 MatGetColumnIJ_SeqBAIJ, 2827 MatRestoreColumnIJ_SeqBAIJ, 2828 /* 54*/ MatFDColoringCreate_SeqAIJ, 2829 0, 2830 0, 2831 0, 2832 MatSetValuesBlocked_SeqBAIJ, 2833 /* 59*/ MatGetSubMatrix_SeqBAIJ, 2834 MatDestroy_SeqBAIJ, 2835 MatView_SeqBAIJ, 2836 0, 2837 0, 2838 /* 64*/ 0, 2839 0, 2840 0, 2841 0, 2842 0, 2843 /* 69*/ MatGetRowMaxAbs_SeqBAIJ, 2844 0, 2845 MatConvert_Basic, 2846 0, 2847 0, 2848 /* 74*/ 0, 2849 MatFDColoringApply_BAIJ, 2850 0, 2851 0, 2852 0, 2853 /* 79*/ 0, 2854 0, 2855 0, 2856 0, 2857 MatLoad_SeqBAIJ, 2858 /* 84*/ 0, 2859 0, 2860 0, 2861 0, 2862 0, 2863 /* 89*/ 0, 2864 0, 2865 0, 2866 0, 2867 0, 2868 /* 94*/ 0, 2869 0, 2870 0, 2871 0, 2872 0, 2873 /* 99*/ 0, 2874 0, 2875 0, 2876 0, 2877 0, 2878 /*104*/ 0, 2879 MatRealPart_SeqBAIJ, 2880 MatImaginaryPart_SeqBAIJ, 2881 0, 2882 0, 2883 /*109*/ 0, 2884 0, 2885 0, 2886 0, 2887 MatMissingDiagonal_SeqBAIJ, 2888 /*114*/ 0, 2889 0, 2890 0, 2891 0, 2892 0, 2893 /*119*/ 0, 2894 0, 2895 MatMultHermitianTranspose_SeqBAIJ, 2896 MatMultHermitianTransposeAdd_SeqBAIJ, 2897 0, 2898 /*124*/ 0, 2899 0, 2900 MatInvertBlockDiagonal_SeqBAIJ, 2901 0, 2902 0, 2903 /*129*/ 0, 2904 0, 2905 0, 2906 0, 2907 0, 2908 /*134*/ 0, 2909 0, 2910 0, 2911 0, 2912 0, 2913 /*139*/ 0, 2914 0 2915 }; 2916 2917 #undef __FUNCT__ 2918 #define __FUNCT__ "MatStoreValues_SeqBAIJ" 2919 PetscErrorCode MatStoreValues_SeqBAIJ(Mat mat) 2920 { 2921 Mat_SeqBAIJ *aij = (Mat_SeqBAIJ*)mat->data; 2922 PetscInt nz = aij->i[aij->mbs]*aij->bs2; 2923 PetscErrorCode ierr; 2924 2925 PetscFunctionBegin; 2926 if (aij->nonew != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first"); 2927 2928 /* allocate space for values if not already there */ 2929 if (!aij->saved_values) { 2930 ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&aij->saved_values);CHKERRQ(ierr); 2931 ierr = PetscLogObjectMemory(mat,(nz+1)*sizeof(PetscScalar));CHKERRQ(ierr); 2932 } 2933 2934 /* copy values over */ 2935 ierr = PetscMemcpy(aij->saved_values,aij->a,nz*sizeof(PetscScalar));CHKERRQ(ierr); 2936 PetscFunctionReturn(0); 2937 } 2938 2939 #undef __FUNCT__ 2940 #define __FUNCT__ "MatRetrieveValues_SeqBAIJ" 2941 PetscErrorCode MatRetrieveValues_SeqBAIJ(Mat mat) 2942 { 2943 Mat_SeqBAIJ *aij = (Mat_SeqBAIJ*)mat->data; 2944 PetscErrorCode ierr; 2945 PetscInt nz = aij->i[aij->mbs]*aij->bs2; 2946 2947 PetscFunctionBegin; 2948 if (aij->nonew != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first"); 2949 if (!aij->saved_values) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatStoreValues(A);first"); 2950 2951 /* copy values over */ 2952 ierr = PetscMemcpy(aij->a,aij->saved_values,nz*sizeof(PetscScalar));CHKERRQ(ierr); 2953 PetscFunctionReturn(0); 2954 } 2955 2956 PETSC_EXTERN PetscErrorCode MatConvert_SeqBAIJ_SeqAIJ(Mat, MatType,MatReuse,Mat*); 2957 PETSC_EXTERN PetscErrorCode MatConvert_SeqBAIJ_SeqSBAIJ(Mat, MatType,MatReuse,Mat*); 2958 2959 #undef __FUNCT__ 2960 #define __FUNCT__ "MatSeqBAIJSetPreallocation_SeqBAIJ" 2961 PetscErrorCode MatSeqBAIJSetPreallocation_SeqBAIJ(Mat B,PetscInt bs,PetscInt nz,PetscInt *nnz) 2962 { 2963 Mat_SeqBAIJ *b; 2964 PetscErrorCode ierr; 2965 PetscInt i,mbs,nbs,bs2; 2966 PetscBool flg,skipallocation = PETSC_FALSE,realalloc = PETSC_FALSE; 2967 2968 PetscFunctionBegin; 2969 if (nz >= 0 || nnz) realalloc = PETSC_TRUE; 2970 if (nz == MAT_SKIP_ALLOCATION) { 2971 skipallocation = PETSC_TRUE; 2972 nz = 0; 2973 } 2974 2975 ierr = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr); 2976 ierr = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr); 2977 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 2978 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 2979 ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr); 2980 2981 B->preallocated = PETSC_TRUE; 2982 2983 mbs = B->rmap->n/bs; 2984 nbs = B->cmap->n/bs; 2985 bs2 = bs*bs; 2986 2987 if (mbs*bs!=B->rmap->n || nbs*bs!=B->cmap->n) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Number rows %D, cols %D must be divisible by blocksize %D",B->rmap->N,B->cmap->n,bs); 2988 2989 if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5; 2990 if (nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nz cannot be less than 0: value %D",nz); 2991 if (nnz) { 2992 for (i=0; i<mbs; i++) { 2993 if (nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be less than 0: local row %D value %D",i,nnz[i]); 2994 if (nnz[i] > nbs) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be greater than block row length: local row %D value %D rowlength %D",i,nnz[i],nbs); 2995 } 2996 } 2997 2998 b = (Mat_SeqBAIJ*)B->data; 2999 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)B),NULL,"Optimize options for SEQBAIJ matrix 2 ","Mat");CHKERRQ(ierr); 3000 ierr = PetscOptionsBool("-mat_no_unroll","Do not optimize for block size (slow)",NULL,PETSC_FALSE,&flg,NULL);CHKERRQ(ierr); 3001 ierr = PetscOptionsEnd();CHKERRQ(ierr); 3002 3003 if (!flg) { 3004 switch (bs) { 3005 case 1: 3006 B->ops->mult = MatMult_SeqBAIJ_1; 3007 B->ops->multadd = MatMultAdd_SeqBAIJ_1; 3008 break; 3009 case 2: 3010 B->ops->mult = MatMult_SeqBAIJ_2; 3011 B->ops->multadd = MatMultAdd_SeqBAIJ_2; 3012 break; 3013 case 3: 3014 B->ops->mult = MatMult_SeqBAIJ_3; 3015 B->ops->multadd = MatMultAdd_SeqBAIJ_3; 3016 break; 3017 case 4: 3018 B->ops->mult = MatMult_SeqBAIJ_4; 3019 B->ops->multadd = MatMultAdd_SeqBAIJ_4; 3020 break; 3021 case 5: 3022 B->ops->mult = MatMult_SeqBAIJ_5; 3023 B->ops->multadd = MatMultAdd_SeqBAIJ_5; 3024 break; 3025 case 6: 3026 B->ops->mult = MatMult_SeqBAIJ_6; 3027 B->ops->multadd = MatMultAdd_SeqBAIJ_6; 3028 break; 3029 case 7: 3030 B->ops->mult = MatMult_SeqBAIJ_7; 3031 B->ops->multadd = MatMultAdd_SeqBAIJ_7; 3032 break; 3033 case 15: 3034 B->ops->mult = MatMult_SeqBAIJ_15_ver1; 3035 B->ops->multadd = MatMultAdd_SeqBAIJ_N; 3036 break; 3037 default: 3038 B->ops->mult = MatMult_SeqBAIJ_N; 3039 B->ops->multadd = MatMultAdd_SeqBAIJ_N; 3040 break; 3041 } 3042 } 3043 B->ops->sor = MatSOR_SeqBAIJ; 3044 b->mbs = mbs; 3045 b->nbs = nbs; 3046 if (!skipallocation) { 3047 if (!b->imax) { 3048 ierr = PetscMalloc2(mbs,PetscInt,&b->imax,mbs,PetscInt,&b->ilen);CHKERRQ(ierr); 3049 ierr = PetscLogObjectMemory(B,2*mbs*sizeof(PetscInt));CHKERRQ(ierr); 3050 3051 b->free_imax_ilen = PETSC_TRUE; 3052 } 3053 /* b->ilen will count nonzeros in each block row so far. */ 3054 for (i=0; i<mbs; i++) b->ilen[i] = 0; 3055 if (!nnz) { 3056 if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5; 3057 else if (nz < 0) nz = 1; 3058 for (i=0; i<mbs; i++) b->imax[i] = nz; 3059 nz = nz*mbs; 3060 } else { 3061 nz = 0; 3062 for (i=0; i<mbs; i++) {b->imax[i] = nnz[i]; nz += nnz[i];} 3063 } 3064 3065 /* allocate the matrix space */ 3066 ierr = MatSeqXAIJFreeAIJ(B,&b->a,&b->j,&b->i);CHKERRQ(ierr); 3067 ierr = PetscMalloc3(bs2*nz,PetscScalar,&b->a,nz,PetscInt,&b->j,B->rmap->N+1,PetscInt,&b->i);CHKERRQ(ierr); 3068 ierr = PetscLogObjectMemory(B,(B->rmap->N+1)*sizeof(PetscInt)+nz*(bs2*sizeof(PetscScalar)+sizeof(PetscInt)));CHKERRQ(ierr); 3069 ierr = PetscMemzero(b->a,nz*bs2*sizeof(MatScalar));CHKERRQ(ierr); 3070 ierr = PetscMemzero(b->j,nz*sizeof(PetscInt));CHKERRQ(ierr); 3071 3072 b->singlemalloc = PETSC_TRUE; 3073 b->i[0] = 0; 3074 for (i=1; i<mbs+1; i++) { 3075 b->i[i] = b->i[i-1] + b->imax[i-1]; 3076 } 3077 b->free_a = PETSC_TRUE; 3078 b->free_ij = PETSC_TRUE; 3079 } else { 3080 b->free_a = PETSC_FALSE; 3081 b->free_ij = PETSC_FALSE; 3082 } 3083 3084 b->bs2 = bs2; 3085 b->mbs = mbs; 3086 b->nz = 0; 3087 b->maxnz = nz; 3088 B->info.nz_unneeded = (PetscReal)b->maxnz*bs2; 3089 if (realalloc) {ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);} 3090 PetscFunctionReturn(0); 3091 } 3092 3093 #undef __FUNCT__ 3094 #define __FUNCT__ "MatSeqBAIJSetPreallocationCSR_SeqBAIJ" 3095 PetscErrorCode MatSeqBAIJSetPreallocationCSR_SeqBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[]) 3096 { 3097 PetscInt i,m,nz,nz_max=0,*nnz; 3098 PetscScalar *values=0; 3099 PetscErrorCode ierr; 3100 3101 PetscFunctionBegin; 3102 if (bs < 1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive but it is %D",bs); 3103 ierr = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr); 3104 ierr = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr); 3105 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 3106 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 3107 ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr); 3108 m = B->rmap->n/bs; 3109 3110 if (ii[0] != 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE, "ii[0] must be 0 but it is %D",ii[0]); 3111 ierr = PetscMalloc((m+1) * sizeof(PetscInt), &nnz);CHKERRQ(ierr); 3112 for (i=0; i<m; i++) { 3113 nz = ii[i+1]- ii[i]; 3114 if (nz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE, "Local row %D has a negative number of columns %D",i,nz); 3115 nz_max = PetscMax(nz_max, nz); 3116 nnz[i] = nz; 3117 } 3118 ierr = MatSeqBAIJSetPreallocation(B,bs,0,nnz);CHKERRQ(ierr); 3119 ierr = PetscFree(nnz);CHKERRQ(ierr); 3120 3121 values = (PetscScalar*)V; 3122 if (!values) { 3123 ierr = PetscMalloc(bs*bs*(nz_max+1)*sizeof(PetscScalar),&values);CHKERRQ(ierr); 3124 ierr = PetscMemzero(values,bs*bs*nz_max*sizeof(PetscScalar));CHKERRQ(ierr); 3125 } 3126 for (i=0; i<m; i++) { 3127 PetscInt ncols = ii[i+1] - ii[i]; 3128 const PetscInt *icols = jj + ii[i]; 3129 const PetscScalar *svals = values + (V ? (bs*bs*ii[i]) : 0); 3130 ierr = MatSetValuesBlocked_SeqBAIJ(B,1,&i,ncols,icols,svals,INSERT_VALUES);CHKERRQ(ierr); 3131 } 3132 if (!V) { ierr = PetscFree(values);CHKERRQ(ierr); } 3133 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3134 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3135 ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 3136 PetscFunctionReturn(0); 3137 } 3138 3139 PETSC_EXTERN PetscErrorCode MatGetFactor_seqbaij_petsc(Mat,MatFactorType,Mat*); 3140 PETSC_EXTERN PetscErrorCode MatGetFactor_seqbaij_bstrm(Mat,MatFactorType,Mat*); 3141 #if defined(PETSC_HAVE_MUMPS) 3142 PETSC_EXTERN PetscErrorCode MatGetFactor_baij_mumps(Mat,MatFactorType,Mat*); 3143 #endif 3144 extern PetscErrorCode MatGetFactorAvailable_seqbaij_petsc(Mat,MatFactorType,PetscBool*); 3145 3146 /*MC 3147 MATSEQBAIJ - MATSEQBAIJ = "seqbaij" - A matrix type to be used for sequential block sparse matrices, based on 3148 block sparse compressed row format. 3149 3150 Options Database Keys: 3151 . -mat_type seqbaij - sets the matrix type to "seqbaij" during a call to MatSetFromOptions() 3152 3153 Level: beginner 3154 3155 .seealso: MatCreateSeqBAIJ() 3156 M*/ 3157 3158 PETSC_EXTERN PetscErrorCode MatConvert_SeqBAIJ_SeqBSTRM(Mat, MatType,MatReuse,Mat*); 3159 3160 #undef __FUNCT__ 3161 #define __FUNCT__ "MatCreate_SeqBAIJ" 3162 PETSC_EXTERN PetscErrorCode MatCreate_SeqBAIJ(Mat B) 3163 { 3164 PetscErrorCode ierr; 3165 PetscMPIInt size; 3166 Mat_SeqBAIJ *b; 3167 3168 PetscFunctionBegin; 3169 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRQ(ierr); 3170 if (size > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Comm must be of size 1"); 3171 3172 ierr = PetscNewLog(B,Mat_SeqBAIJ,&b);CHKERRQ(ierr); 3173 B->data = (void*)b; 3174 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 3175 3176 b->row = 0; 3177 b->col = 0; 3178 b->icol = 0; 3179 b->reallocs = 0; 3180 b->saved_values = 0; 3181 3182 b->roworiented = PETSC_TRUE; 3183 b->nonew = 0; 3184 b->diag = 0; 3185 b->solve_work = 0; 3186 b->mult_work = 0; 3187 B->spptr = 0; 3188 B->info.nz_unneeded = (PetscReal)b->maxnz*b->bs2; 3189 b->keepnonzeropattern = PETSC_FALSE; 3190 b->xtoy = 0; 3191 b->XtoY = 0; 3192 B->same_nonzero = PETSC_FALSE; 3193 3194 ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactorAvailable_petsc_C","MatGetFactorAvailable_seqbaij_petsc",MatGetFactorAvailable_seqbaij_petsc);CHKERRQ(ierr); 3195 ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_petsc_C","MatGetFactor_seqbaij_petsc",MatGetFactor_seqbaij_petsc);CHKERRQ(ierr); 3196 ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_bstrm_C","MatGetFactor_seqbaij_bstrm",MatGetFactor_seqbaij_bstrm);CHKERRQ(ierr); 3197 #if defined(PETSC_HAVE_MUMPS) 3198 ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_mumps_C", "MatGetFactor_baij_mumps", MatGetFactor_baij_mumps);CHKERRQ(ierr); 3199 #endif 3200 ierr = PetscObjectComposeFunction((PetscObject)B,"MatInvertBlockDiagonal_C","MatInvertBlockDiagonal_SeqBAIJ",MatInvertBlockDiagonal_SeqBAIJ);CHKERRQ(ierr); 3201 ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C","MatStoreValues_SeqBAIJ",MatStoreValues_SeqBAIJ);CHKERRQ(ierr); 3202 ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C","MatRetrieveValues_SeqBAIJ",MatRetrieveValues_SeqBAIJ);CHKERRQ(ierr); 3203 ierr = PetscObjectComposeFunction((PetscObject)B,"MatSeqBAIJSetColumnIndices_C","MatSeqBAIJSetColumnIndices_SeqBAIJ",MatSeqBAIJSetColumnIndices_SeqBAIJ);CHKERRQ(ierr); 3204 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqbaij_seqaij_C","MatConvert_SeqBAIJ_SeqAIJ",MatConvert_SeqBAIJ_SeqAIJ);CHKERRQ(ierr); 3205 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqbaij_seqsbaij_C","MatConvert_SeqBAIJ_SeqSBAIJ",MatConvert_SeqBAIJ_SeqSBAIJ);CHKERRQ(ierr); 3206 ierr = PetscObjectComposeFunction((PetscObject)B,"MatSeqBAIJSetPreallocation_C","MatSeqBAIJSetPreallocation_SeqBAIJ",MatSeqBAIJSetPreallocation_SeqBAIJ);CHKERRQ(ierr); 3207 ierr = PetscObjectComposeFunction((PetscObject)B,"MatSeqBAIJSetPreallocationCSR_C","MatSeqBAIJSetPreallocationCSR_SeqBAIJ",MatSeqBAIJSetPreallocationCSR_SeqBAIJ);CHKERRQ(ierr); 3208 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqbaij_seqbstrm_C","MatConvert_SeqBAIJ_SeqBSTRM",MatConvert_SeqBAIJ_SeqBSTRM);CHKERRQ(ierr); 3209 ierr = PetscObjectComposeFunction((PetscObject)B,"MatIsTranspose_C","MatIsTranspose_SeqBAIJ",MatIsTranspose_SeqBAIJ);CHKERRQ(ierr); 3210 ierr = PetscObjectChangeTypeName((PetscObject)B,MATSEQBAIJ);CHKERRQ(ierr); 3211 PetscFunctionReturn(0); 3212 } 3213 3214 #undef __FUNCT__ 3215 #define __FUNCT__ "MatDuplicateNoCreate_SeqBAIJ" 3216 PetscErrorCode MatDuplicateNoCreate_SeqBAIJ(Mat C,Mat A,MatDuplicateOption cpvalues,PetscBool mallocmatspace) 3217 { 3218 Mat_SeqBAIJ *c = (Mat_SeqBAIJ*)C->data,*a = (Mat_SeqBAIJ*)A->data; 3219 PetscErrorCode ierr; 3220 PetscInt i,mbs = a->mbs,nz = a->nz,bs2 = a->bs2; 3221 3222 PetscFunctionBegin; 3223 if (a->i[mbs] != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Corrupt matrix"); 3224 3225 if (cpvalues == MAT_SHARE_NONZERO_PATTERN) { 3226 c->imax = a->imax; 3227 c->ilen = a->ilen; 3228 c->free_imax_ilen = PETSC_FALSE; 3229 } else { 3230 ierr = PetscMalloc2(mbs,PetscInt,&c->imax,mbs,PetscInt,&c->ilen);CHKERRQ(ierr); 3231 ierr = PetscLogObjectMemory(C,2*mbs*sizeof(PetscInt));CHKERRQ(ierr); 3232 for (i=0; i<mbs; i++) { 3233 c->imax[i] = a->imax[i]; 3234 c->ilen[i] = a->ilen[i]; 3235 } 3236 c->free_imax_ilen = PETSC_TRUE; 3237 } 3238 3239 /* allocate the matrix space */ 3240 if (mallocmatspace) { 3241 if (cpvalues == MAT_SHARE_NONZERO_PATTERN) { 3242 ierr = PetscMalloc(bs2*nz*sizeof(PetscScalar),&c->a);CHKERRQ(ierr); 3243 ierr = PetscLogObjectMemory(C,a->i[mbs]*bs2*sizeof(PetscScalar));CHKERRQ(ierr); 3244 ierr = PetscMemzero(c->a,bs2*nz*sizeof(PetscScalar));CHKERRQ(ierr); 3245 3246 c->i = a->i; 3247 c->j = a->j; 3248 c->singlemalloc = PETSC_FALSE; 3249 c->free_a = PETSC_TRUE; 3250 c->free_ij = PETSC_FALSE; 3251 c->parent = A; 3252 C->preallocated = PETSC_TRUE; 3253 C->assembled = PETSC_TRUE; 3254 3255 ierr = PetscObjectReference((PetscObject)A);CHKERRQ(ierr); 3256 ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 3257 ierr = MatSetOption(C,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 3258 } else { 3259 ierr = PetscMalloc3(bs2*nz,PetscScalar,&c->a,nz,PetscInt,&c->j,mbs+1,PetscInt,&c->i);CHKERRQ(ierr); 3260 ierr = PetscLogObjectMemory(C,a->i[mbs]*(bs2*sizeof(PetscScalar)+sizeof(PetscInt))+(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 3261 3262 c->singlemalloc = PETSC_TRUE; 3263 c->free_a = PETSC_TRUE; 3264 c->free_ij = PETSC_TRUE; 3265 3266 ierr = PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 3267 if (mbs > 0) { 3268 ierr = PetscMemcpy(c->j,a->j,nz*sizeof(PetscInt));CHKERRQ(ierr); 3269 if (cpvalues == MAT_COPY_VALUES) { 3270 ierr = PetscMemcpy(c->a,a->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr); 3271 } else { 3272 ierr = PetscMemzero(c->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr); 3273 } 3274 } 3275 C->preallocated = PETSC_TRUE; 3276 C->assembled = PETSC_TRUE; 3277 } 3278 } 3279 3280 c->roworiented = a->roworiented; 3281 c->nonew = a->nonew; 3282 3283 ierr = PetscLayoutReference(A->rmap,&C->rmap);CHKERRQ(ierr); 3284 ierr = PetscLayoutReference(A->cmap,&C->cmap);CHKERRQ(ierr); 3285 3286 c->bs2 = a->bs2; 3287 c->mbs = a->mbs; 3288 c->nbs = a->nbs; 3289 3290 if (a->diag) { 3291 if (cpvalues == MAT_SHARE_NONZERO_PATTERN) { 3292 c->diag = a->diag; 3293 c->free_diag = PETSC_FALSE; 3294 } else { 3295 ierr = PetscMalloc((mbs+1)*sizeof(PetscInt),&c->diag);CHKERRQ(ierr); 3296 ierr = PetscLogObjectMemory(C,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 3297 for (i=0; i<mbs; i++) c->diag[i] = a->diag[i]; 3298 c->free_diag = PETSC_TRUE; 3299 } 3300 } else c->diag = 0; 3301 3302 c->nz = a->nz; 3303 c->maxnz = a->nz; /* Since we allocate exactly the right amount */ 3304 c->solve_work = 0; 3305 c->mult_work = 0; 3306 3307 c->compressedrow.use = a->compressedrow.use; 3308 c->compressedrow.nrows = a->compressedrow.nrows; 3309 c->compressedrow.check = a->compressedrow.check; 3310 if (a->compressedrow.use) { 3311 i = a->compressedrow.nrows; 3312 ierr = PetscMalloc2(i+1,PetscInt,&c->compressedrow.i,i+1,PetscInt,&c->compressedrow.rindex);CHKERRQ(ierr); 3313 ierr = PetscLogObjectMemory(C,(2*i+1)*sizeof(PetscInt));CHKERRQ(ierr); 3314 ierr = PetscMemcpy(c->compressedrow.i,a->compressedrow.i,(i+1)*sizeof(PetscInt));CHKERRQ(ierr); 3315 ierr = PetscMemcpy(c->compressedrow.rindex,a->compressedrow.rindex,i*sizeof(PetscInt));CHKERRQ(ierr); 3316 } else { 3317 c->compressedrow.use = PETSC_FALSE; 3318 c->compressedrow.i = NULL; 3319 c->compressedrow.rindex = NULL; 3320 } 3321 C->same_nonzero = A->same_nonzero; 3322 3323 ierr = PetscFunctionListDuplicate(((PetscObject)A)->qlist,&((PetscObject)C)->qlist);CHKERRQ(ierr); 3324 ierr = PetscMemcpy(C->ops,A->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 3325 PetscFunctionReturn(0); 3326 } 3327 3328 #undef __FUNCT__ 3329 #define __FUNCT__ "MatDuplicate_SeqBAIJ" 3330 PetscErrorCode MatDuplicate_SeqBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B) 3331 { 3332 PetscErrorCode ierr; 3333 3334 PetscFunctionBegin; 3335 ierr = MatCreate(PetscObjectComm((PetscObject)A),B);CHKERRQ(ierr); 3336 ierr = MatSetSizes(*B,A->rmap->N,A->cmap->n,A->rmap->N,A->cmap->n);CHKERRQ(ierr); 3337 ierr = MatSetType(*B,MATSEQBAIJ);CHKERRQ(ierr); 3338 ierr = MatDuplicateNoCreate_SeqBAIJ(*B,A,cpvalues,PETSC_TRUE);CHKERRQ(ierr); 3339 PetscFunctionReturn(0); 3340 } 3341 3342 #undef __FUNCT__ 3343 #define __FUNCT__ "MatLoad_SeqBAIJ" 3344 PetscErrorCode MatLoad_SeqBAIJ(Mat newmat,PetscViewer viewer) 3345 { 3346 Mat_SeqBAIJ *a; 3347 PetscErrorCode ierr; 3348 PetscInt i,nz,header[4],*rowlengths=0,M,N,bs=1; 3349 PetscInt *mask,mbs,*jj,j,rowcount,nzcount,k,*browlengths,maskcount; 3350 PetscInt kmax,jcount,block,idx,point,nzcountb,extra_rows,rows,cols; 3351 PetscInt *masked,nmask,tmp,bs2,ishift; 3352 PetscMPIInt size; 3353 int fd; 3354 PetscScalar *aa; 3355 MPI_Comm comm; 3356 3357 PetscFunctionBegin; 3358 ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr); 3359 ierr = PetscOptionsBegin(comm,NULL,"Options for loading SEQBAIJ matrix","Mat");CHKERRQ(ierr); 3360 ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,NULL);CHKERRQ(ierr); 3361 ierr = PetscOptionsEnd();CHKERRQ(ierr); 3362 bs2 = bs*bs; 3363 3364 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3365 if (size > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"view must have one processor"); 3366 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 3367 ierr = PetscBinaryRead(fd,header,4,PETSC_INT);CHKERRQ(ierr); 3368 if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not Mat object"); 3369 M = header[1]; N = header[2]; nz = header[3]; 3370 3371 if (header[3] < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as SeqBAIJ"); 3372 if (M != N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Can only do square matrices"); 3373 3374 /* 3375 This code adds extra rows to make sure the number of rows is 3376 divisible by the blocksize 3377 */ 3378 mbs = M/bs; 3379 extra_rows = bs - M + bs*(mbs); 3380 if (extra_rows == bs) extra_rows = 0; 3381 else mbs++; 3382 if (extra_rows) { 3383 ierr = PetscInfo(viewer,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr); 3384 } 3385 3386 /* Set global sizes if not already set */ 3387 if (newmat->rmap->n < 0 && newmat->rmap->N < 0 && newmat->cmap->n < 0 && newmat->cmap->N < 0) { 3388 ierr = MatSetSizes(newmat,PETSC_DECIDE,PETSC_DECIDE,M+extra_rows,N+extra_rows);CHKERRQ(ierr); 3389 } else { /* Check if the matrix global sizes are correct */ 3390 ierr = MatGetSize(newmat,&rows,&cols);CHKERRQ(ierr); 3391 if (rows < 0 && cols < 0) { /* user might provide local size instead of global size */ 3392 ierr = MatGetLocalSize(newmat,&rows,&cols);CHKERRQ(ierr); 3393 } 3394 if (M != rows || N != cols) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix in file of different length (%d, %d) than the input matrix (%d, %d)",M,N,rows,cols); 3395 } 3396 3397 /* read in row lengths */ 3398 ierr = PetscMalloc((M+extra_rows)*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr); 3399 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 3400 for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1; 3401 3402 /* read in column indices */ 3403 ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscInt),&jj);CHKERRQ(ierr); 3404 ierr = PetscBinaryRead(fd,jj,nz,PETSC_INT);CHKERRQ(ierr); 3405 for (i=0; i<extra_rows; i++) jj[nz+i] = M+i; 3406 3407 /* loop over row lengths determining block row lengths */ 3408 ierr = PetscMalloc(mbs*sizeof(PetscInt),&browlengths);CHKERRQ(ierr); 3409 ierr = PetscMemzero(browlengths,mbs*sizeof(PetscInt));CHKERRQ(ierr); 3410 ierr = PetscMalloc2(mbs,PetscInt,&mask,mbs,PetscInt,&masked);CHKERRQ(ierr); 3411 ierr = PetscMemzero(mask,mbs*sizeof(PetscInt));CHKERRQ(ierr); 3412 rowcount = 0; 3413 nzcount = 0; 3414 for (i=0; i<mbs; i++) { 3415 nmask = 0; 3416 for (j=0; j<bs; j++) { 3417 kmax = rowlengths[rowcount]; 3418 for (k=0; k<kmax; k++) { 3419 tmp = jj[nzcount++]/bs; 3420 if (!mask[tmp]) {masked[nmask++] = tmp; mask[tmp] = 1;} 3421 } 3422 rowcount++; 3423 } 3424 browlengths[i] += nmask; 3425 /* zero out the mask elements we set */ 3426 for (j=0; j<nmask; j++) mask[masked[j]] = 0; 3427 } 3428 3429 /* Do preallocation */ 3430 ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(newmat,bs,0,browlengths);CHKERRQ(ierr); 3431 a = (Mat_SeqBAIJ*)newmat->data; 3432 3433 /* set matrix "i" values */ 3434 a->i[0] = 0; 3435 for (i=1; i<= mbs; i++) { 3436 a->i[i] = a->i[i-1] + browlengths[i-1]; 3437 a->ilen[i-1] = browlengths[i-1]; 3438 } 3439 a->nz = 0; 3440 for (i=0; i<mbs; i++) a->nz += browlengths[i]; 3441 3442 /* read in nonzero values */ 3443 ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscScalar),&aa);CHKERRQ(ierr); 3444 ierr = PetscBinaryRead(fd,aa,nz,PETSC_SCALAR);CHKERRQ(ierr); 3445 for (i=0; i<extra_rows; i++) aa[nz+i] = 1.0; 3446 3447 /* set "a" and "j" values into matrix */ 3448 nzcount = 0; jcount = 0; 3449 for (i=0; i<mbs; i++) { 3450 nzcountb = nzcount; 3451 nmask = 0; 3452 for (j=0; j<bs; j++) { 3453 kmax = rowlengths[i*bs+j]; 3454 for (k=0; k<kmax; k++) { 3455 tmp = jj[nzcount++]/bs; 3456 if (!mask[tmp]) { masked[nmask++] = tmp; mask[tmp] = 1;} 3457 } 3458 } 3459 /* sort the masked values */ 3460 ierr = PetscSortInt(nmask,masked);CHKERRQ(ierr); 3461 3462 /* set "j" values into matrix */ 3463 maskcount = 1; 3464 for (j=0; j<nmask; j++) { 3465 a->j[jcount++] = masked[j]; 3466 mask[masked[j]] = maskcount++; 3467 } 3468 /* set "a" values into matrix */ 3469 ishift = bs2*a->i[i]; 3470 for (j=0; j<bs; j++) { 3471 kmax = rowlengths[i*bs+j]; 3472 for (k=0; k<kmax; k++) { 3473 tmp = jj[nzcountb]/bs; 3474 block = mask[tmp] - 1; 3475 point = jj[nzcountb] - bs*tmp; 3476 idx = ishift + bs2*block + j + bs*point; 3477 a->a[idx] = (MatScalar)aa[nzcountb++]; 3478 } 3479 } 3480 /* zero out the mask elements we set */ 3481 for (j=0; j<nmask; j++) mask[masked[j]] = 0; 3482 } 3483 if (jcount != a->nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Bad binary matrix"); 3484 3485 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 3486 ierr = PetscFree(browlengths);CHKERRQ(ierr); 3487 ierr = PetscFree(aa);CHKERRQ(ierr); 3488 ierr = PetscFree(jj);CHKERRQ(ierr); 3489 ierr = PetscFree2(mask,masked);CHKERRQ(ierr); 3490 3491 ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3492 ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3493 PetscFunctionReturn(0); 3494 } 3495 3496 #undef __FUNCT__ 3497 #define __FUNCT__ "MatCreateSeqBAIJ" 3498 /*@C 3499 MatCreateSeqBAIJ - Creates a sparse matrix in block AIJ (block 3500 compressed row) format. For good matrix assembly performance the 3501 user should preallocate the matrix storage by setting the parameter nz 3502 (or the array nnz). By setting these parameters accurately, performance 3503 during matrix assembly can be increased by more than a factor of 50. 3504 3505 Collective on MPI_Comm 3506 3507 Input Parameters: 3508 + comm - MPI communicator, set to PETSC_COMM_SELF 3509 . bs - size of block 3510 . m - number of rows 3511 . n - number of columns 3512 . nz - number of nonzero blocks per block row (same for all rows) 3513 - nnz - array containing the number of nonzero blocks in the various block rows 3514 (possibly different for each block row) or NULL 3515 3516 Output Parameter: 3517 . A - the matrix 3518 3519 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 3520 MatXXXXSetPreallocation() paradgm instead of this routine directly. 3521 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 3522 3523 Options Database Keys: 3524 . -mat_no_unroll - uses code that does not unroll the loops in the 3525 block calculations (much slower) 3526 . -mat_block_size - size of the blocks to use 3527 3528 Level: intermediate 3529 3530 Notes: 3531 The number of rows and columns must be divisible by blocksize. 3532 3533 If the nnz parameter is given then the nz parameter is ignored 3534 3535 A nonzero block is any block that as 1 or more nonzeros in it 3536 3537 The block AIJ format is fully compatible with standard Fortran 77 3538 storage. That is, the stored row and column indices can begin at 3539 either one (as in Fortran) or zero. See the users' manual for details. 3540 3541 Specify the preallocated storage with either nz or nnz (not both). 3542 Set nz=PETSC_DEFAULT and nnz=NULL for PETSc to control dynamic memory 3543 allocation. See the <A href="../../docs/manual.pdf#nameddest=ch_mat">Mat chapter of the users manual</A> for details. 3544 matrices. 3545 3546 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateBAIJ() 3547 @*/ 3548 PetscErrorCode MatCreateSeqBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A) 3549 { 3550 PetscErrorCode ierr; 3551 3552 PetscFunctionBegin; 3553 ierr = MatCreate(comm,A);CHKERRQ(ierr); 3554 ierr = MatSetSizes(*A,m,n,m,n);CHKERRQ(ierr); 3555 ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr); 3556 ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(*A,bs,nz,(PetscInt*)nnz);CHKERRQ(ierr); 3557 PetscFunctionReturn(0); 3558 } 3559 3560 #undef __FUNCT__ 3561 #define __FUNCT__ "MatSeqBAIJSetPreallocation" 3562 /*@C 3563 MatSeqBAIJSetPreallocation - Sets the block size and expected nonzeros 3564 per row in the matrix. For good matrix assembly performance the 3565 user should preallocate the matrix storage by setting the parameter nz 3566 (or the array nnz). By setting these parameters accurately, performance 3567 during matrix assembly can be increased by more than a factor of 50. 3568 3569 Collective on MPI_Comm 3570 3571 Input Parameters: 3572 + A - the matrix 3573 . bs - size of block 3574 . nz - number of block nonzeros per block row (same for all rows) 3575 - nnz - array containing the number of block nonzeros in the various block rows 3576 (possibly different for each block row) or NULL 3577 3578 Options Database Keys: 3579 . -mat_no_unroll - uses code that does not unroll the loops in the 3580 block calculations (much slower) 3581 . -mat_block_size - size of the blocks to use 3582 3583 Level: intermediate 3584 3585 Notes: 3586 If the nnz parameter is given then the nz parameter is ignored 3587 3588 You can call MatGetInfo() to get information on how effective the preallocation was; 3589 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 3590 You can also run with the option -info and look for messages with the string 3591 malloc in them to see if additional memory allocation was needed. 3592 3593 The block AIJ format is fully compatible with standard Fortran 77 3594 storage. That is, the stored row and column indices can begin at 3595 either one (as in Fortran) or zero. See the users' manual for details. 3596 3597 Specify the preallocated storage with either nz or nnz (not both). 3598 Set nz=PETSC_DEFAULT and nnz=NULL for PETSc to control dynamic memory 3599 allocation. See the <A href="../../docs/manual.pdf#nameddest=ch_mat">Mat chapter of the users manual</A> for details. 3600 3601 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateBAIJ(), MatGetInfo() 3602 @*/ 3603 PetscErrorCode MatSeqBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt nz,const PetscInt nnz[]) 3604 { 3605 PetscErrorCode ierr; 3606 3607 PetscFunctionBegin; 3608 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 3609 PetscValidType(B,1); 3610 PetscValidLogicalCollectiveInt(B,bs,2); 3611 ierr = PetscTryMethod(B,"MatSeqBAIJSetPreallocation_C",(Mat,PetscInt,PetscInt,const PetscInt[]),(B,bs,nz,nnz));CHKERRQ(ierr); 3612 PetscFunctionReturn(0); 3613 } 3614 3615 #undef __FUNCT__ 3616 #define __FUNCT__ "MatSeqBAIJSetPreallocationCSR" 3617 /*@C 3618 MatSeqBAIJSetPreallocationCSR - Allocates memory for a sparse sequential matrix in AIJ format 3619 (the default sequential PETSc format). 3620 3621 Collective on MPI_Comm 3622 3623 Input Parameters: 3624 + A - the matrix 3625 . i - the indices into j for the start of each local row (starts with zero) 3626 . j - the column indices for each local row (starts with zero) these must be sorted for each row 3627 - v - optional values in the matrix 3628 3629 Level: developer 3630 3631 .keywords: matrix, aij, compressed row, sparse 3632 3633 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatSeqBAIJSetPreallocation(), MATSEQBAIJ 3634 @*/ 3635 PetscErrorCode MatSeqBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 3636 { 3637 PetscErrorCode ierr; 3638 3639 PetscFunctionBegin; 3640 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 3641 PetscValidType(B,1); 3642 PetscValidLogicalCollectiveInt(B,bs,2); 3643 ierr = PetscTryMethod(B,"MatSeqBAIJSetPreallocationCSR_C",(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,bs,i,j,v));CHKERRQ(ierr); 3644 PetscFunctionReturn(0); 3645 } 3646 3647 3648 #undef __FUNCT__ 3649 #define __FUNCT__ "MatCreateSeqBAIJWithArrays" 3650 /*@ 3651 MatCreateSeqBAIJWithArrays - Creates an sequential BAIJ matrix using matrix elements provided by the user. 3652 3653 Collective on MPI_Comm 3654 3655 Input Parameters: 3656 + comm - must be an MPI communicator of size 1 3657 . bs - size of block 3658 . m - number of rows 3659 . n - number of columns 3660 . i - row indices 3661 . j - column indices 3662 - a - matrix values 3663 3664 Output Parameter: 3665 . mat - the matrix 3666 3667 Level: advanced 3668 3669 Notes: 3670 The i, j, and a arrays are not copied by this routine, the user must free these arrays 3671 once the matrix is destroyed 3672 3673 You cannot set new nonzero locations into this matrix, that will generate an error. 3674 3675 The i and j indices are 0 based 3676 3677 When block size is greater than 1 the matrix values must be stored using the BAIJ storage format (see the BAIJ code to determine this). 3678 3679 3680 .seealso: MatCreate(), MatCreateBAIJ(), MatCreateSeqBAIJ() 3681 3682 @*/ 3683 PetscErrorCode MatCreateSeqBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt *i,PetscInt *j,PetscScalar *a,Mat *mat) 3684 { 3685 PetscErrorCode ierr; 3686 PetscInt ii; 3687 Mat_SeqBAIJ *baij; 3688 3689 PetscFunctionBegin; 3690 if (bs != 1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"block size %D > 1 is not supported yet",bs); 3691 if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 3692 3693 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 3694 ierr = MatSetSizes(*mat,m,n,m,n);CHKERRQ(ierr); 3695 ierr = MatSetType(*mat,MATSEQBAIJ);CHKERRQ(ierr); 3696 ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(*mat,bs,MAT_SKIP_ALLOCATION,0);CHKERRQ(ierr); 3697 baij = (Mat_SeqBAIJ*)(*mat)->data; 3698 ierr = PetscMalloc2(m,PetscInt,&baij->imax,m,PetscInt,&baij->ilen);CHKERRQ(ierr); 3699 ierr = PetscLogObjectMemory(*mat,2*m*sizeof(PetscInt));CHKERRQ(ierr); 3700 3701 baij->i = i; 3702 baij->j = j; 3703 baij->a = a; 3704 3705 baij->singlemalloc = PETSC_FALSE; 3706 baij->nonew = -1; /*this indicates that inserting a new value in the matrix that generates a new nonzero is an error*/ 3707 baij->free_a = PETSC_FALSE; 3708 baij->free_ij = PETSC_FALSE; 3709 3710 for (ii=0; ii<m; ii++) { 3711 baij->ilen[ii] = baij->imax[ii] = i[ii+1] - i[ii]; 3712 #if defined(PETSC_USE_DEBUG) 3713 if (i[ii+1] - i[ii] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row length in i (row indices) row = %d length = %d",ii,i[ii+1] - i[ii]); 3714 #endif 3715 } 3716 #if defined(PETSC_USE_DEBUG) 3717 for (ii=0; ii<baij->i[m]; ii++) { 3718 if (j[ii] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column index at location = %d index = %d",ii,j[ii]); 3719 if (j[ii] > n - 1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column index to large at location = %d index = %d",ii,j[ii]); 3720 } 3721 #endif 3722 3723 ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3724 ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3725 PetscFunctionReturn(0); 3726 } 3727