1 /*$Id: mpisbaij.c,v 1.61 2001/08/10 03:31:37 bsmith Exp $*/ 2 3 #include "src/mat/impls/baij/mpi/mpibaij.h" /*I "petscmat.h" I*/ 4 #include "src/vec/vecimpl.h" 5 #include "mpisbaij.h" 6 #include "src/mat/impls/sbaij/seq/sbaij.h" 7 8 extern int MatSetUpMultiply_MPISBAIJ(Mat); 9 extern int MatSetUpMultiply_MPISBAIJ_2comm(Mat); 10 extern int DisAssemble_MPISBAIJ(Mat); 11 extern int MatIncreaseOverlap_MPISBAIJ(Mat,int,IS *,int); 12 extern int MatGetSubMatrices_MPISBAIJ(Mat,int,IS *,IS *,MatReuse,Mat **); 13 extern int MatGetValues_SeqSBAIJ(Mat,int,int *,int,int *,PetscScalar *); 14 extern int MatSetValues_SeqSBAIJ(Mat,int,int *,int,int *,PetscScalar *,InsertMode); 15 extern int MatSetValuesBlocked_SeqSBAIJ(Mat,int,int*,int,int*,PetscScalar*,InsertMode); 16 extern int MatGetRow_SeqSBAIJ(Mat,int,int*,int**,PetscScalar**); 17 extern int MatRestoreRow_SeqSBAIJ(Mat,int,int*,int**,PetscScalar**); 18 extern int MatPrintHelp_SeqSBAIJ(Mat); 19 extern int MatZeroRows_SeqSBAIJ(Mat,IS,PetscScalar*); 20 extern int MatZeroRows_SeqBAIJ(Mat,IS,PetscScalar *); 21 extern int MatGetRowMax_MPISBAIJ(Mat,Vec); 22 extern int MatRelax_MPISBAIJ(Mat,Vec,PetscReal,MatSORType,PetscReal,int,int,Vec); 23 24 /* UGLY, ugly, ugly 25 When MatScalar == PetscScalar the function MatSetValuesBlocked_MPIBAIJ_MatScalar() does 26 not exist. Otherwise ..._MatScalar() takes matrix elements in single precision and 27 inserts them into the single precision data structure. The function MatSetValuesBlocked_MPIBAIJ() 28 converts the entries into single precision and then calls ..._MatScalar() to put them 29 into the single precision data structures. 30 */ 31 #if defined(PETSC_USE_MAT_SINGLE) 32 extern int MatSetValuesBlocked_SeqSBAIJ_MatScalar(Mat,int,int*,int,int*,MatScalar*,InsertMode); 33 extern int MatSetValues_MPISBAIJ_MatScalar(Mat,int,int*,int,int*,MatScalar*,InsertMode); 34 extern int MatSetValuesBlocked_MPISBAIJ_MatScalar(Mat,int,int*,int,int*,MatScalar*,InsertMode); 35 extern int MatSetValues_MPISBAIJ_HT_MatScalar(Mat,int,int*,int,int*,MatScalar*,InsertMode); 36 extern int MatSetValuesBlocked_MPISBAIJ_HT_MatScalar(Mat,int,int*,int,int*,MatScalar*,InsertMode); 37 #else 38 #define MatSetValuesBlocked_SeqSBAIJ_MatScalar MatSetValuesBlocked_SeqSBAIJ 39 #define MatSetValues_MPISBAIJ_MatScalar MatSetValues_MPISBAIJ 40 #define MatSetValuesBlocked_MPISBAIJ_MatScalar MatSetValuesBlocked_MPISBAIJ 41 #define MatSetValues_MPISBAIJ_HT_MatScalar MatSetValues_MPISBAIJ_HT 42 #define MatSetValuesBlocked_MPISBAIJ_HT_MatScalar MatSetValuesBlocked_MPISBAIJ_HT 43 #endif 44 45 EXTERN_C_BEGIN 46 #undef __FUNCT__ 47 #define __FUNCT__ "MatStoreValues_MPISBAIJ" 48 int MatStoreValues_MPISBAIJ(Mat mat) 49 { 50 Mat_MPISBAIJ *aij = (Mat_MPISBAIJ *)mat->data; 51 int ierr; 52 53 PetscFunctionBegin; 54 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 55 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 56 PetscFunctionReturn(0); 57 } 58 EXTERN_C_END 59 60 EXTERN_C_BEGIN 61 #undef __FUNCT__ 62 #define __FUNCT__ "MatRetrieveValues_MPISBAIJ" 63 int MatRetrieveValues_MPISBAIJ(Mat mat) 64 { 65 Mat_MPISBAIJ *aij = (Mat_MPISBAIJ *)mat->data; 66 int ierr; 67 68 PetscFunctionBegin; 69 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 70 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 71 PetscFunctionReturn(0); 72 } 73 EXTERN_C_END 74 75 /* 76 Local utility routine that creates a mapping from the global column 77 number to the local number in the off-diagonal part of the local 78 storage of the matrix. This is done in a non scable way since the 79 length of colmap equals the global matrix length. 80 */ 81 #undef __FUNCT__ 82 #define __FUNCT__ "CreateColmap_MPISBAIJ_Private" 83 static int CreateColmap_MPISBAIJ_Private(Mat mat) 84 { 85 PetscFunctionBegin; 86 SETERRQ(1,"Function not yet written for SBAIJ format"); 87 /* PetscFunctionReturn(0); */ 88 } 89 90 #define CHUNKSIZE 10 91 92 #define MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv) \ 93 { \ 94 \ 95 brow = row/bs; \ 96 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; \ 97 rmax = aimax[brow]; nrow = ailen[brow]; \ 98 bcol = col/bs; \ 99 ridx = row % bs; cidx = col % bs; \ 100 low = 0; high = nrow; \ 101 while (high-low > 3) { \ 102 t = (low+high)/2; \ 103 if (rp[t] > bcol) high = t; \ 104 else low = t; \ 105 } \ 106 for (_i=low; _i<high; _i++) { \ 107 if (rp[_i] > bcol) break; \ 108 if (rp[_i] == bcol) { \ 109 bap = ap + bs2*_i + bs*cidx + ridx; \ 110 if (addv == ADD_VALUES) *bap += value; \ 111 else *bap = value; \ 112 goto a_noinsert; \ 113 } \ 114 } \ 115 if (a->nonew == 1) goto a_noinsert; \ 116 else if (a->nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero into matrix"); \ 117 if (nrow >= rmax) { \ 118 /* there is no extra room in row, therefore enlarge */ \ 119 int new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j; \ 120 MatScalar *new_a; \ 121 \ 122 if (a->nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix"); \ 123 \ 124 /* malloc new storage space */ \ 125 len = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(int); \ 126 ierr = PetscMalloc(len,&new_a);CHKERRQ(ierr); \ 127 new_j = (int*)(new_a + bs2*new_nz); \ 128 new_i = new_j + new_nz; \ 129 \ 130 /* copy over old data into new slots */ \ 131 for (ii=0; ii<brow+1; ii++) {new_i[ii] = ai[ii];} \ 132 for (ii=brow+1; ii<a->mbs+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;} \ 133 ierr = PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(int));CHKERRQ(ierr); \ 134 len = (new_nz - CHUNKSIZE - ai[brow] - nrow); \ 135 ierr = PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow,len*sizeof(int));CHKERRQ(ierr); \ 136 ierr = PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 137 ierr = PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(PetscScalar));CHKERRQ(ierr); \ 138 ierr = PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE), \ 139 aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr); \ 140 /* free up old matrix storage */ \ 141 ierr = PetscFree(a->a);CHKERRQ(ierr); \ 142 if (!a->singlemalloc) { \ 143 ierr = PetscFree(a->i);CHKERRQ(ierr); \ 144 ierr = PetscFree(a->j);CHKERRQ(ierr);\ 145 } \ 146 aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; \ 147 a->singlemalloc = PETSC_TRUE; \ 148 \ 149 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; \ 150 rmax = aimax[brow] = aimax[brow] + CHUNKSIZE; \ 151 PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar))); \ 152 a->s_maxnz += bs2*CHUNKSIZE; \ 153 a->reallocs++; \ 154 a->s_nz++; \ 155 } \ 156 N = nrow++ - 1; \ 157 /* shift up all the later entries in this row */ \ 158 for (ii=N; ii>=_i; ii--) { \ 159 rp[ii+1] = rp[ii]; \ 160 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 161 } \ 162 if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr); } \ 163 rp[_i] = bcol; \ 164 ap[bs2*_i + bs*cidx + ridx] = value; \ 165 a_noinsert:; \ 166 ailen[brow] = nrow; \ 167 } 168 #ifndef MatSetValues_SeqBAIJ_B_Private 169 #define MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv) \ 170 { \ 171 brow = row/bs; \ 172 rp = bj + bi[brow]; ap = ba + bs2*bi[brow]; \ 173 rmax = bimax[brow]; nrow = bilen[brow]; \ 174 bcol = col/bs; \ 175 ridx = row % bs; cidx = col % bs; \ 176 low = 0; high = nrow; \ 177 while (high-low > 3) { \ 178 t = (low+high)/2; \ 179 if (rp[t] > bcol) high = t; \ 180 else low = t; \ 181 } \ 182 for (_i=low; _i<high; _i++) { \ 183 if (rp[_i] > bcol) break; \ 184 if (rp[_i] == bcol) { \ 185 bap = ap + bs2*_i + bs*cidx + ridx; \ 186 if (addv == ADD_VALUES) *bap += value; \ 187 else *bap = value; \ 188 goto b_noinsert; \ 189 } \ 190 } \ 191 if (b->nonew == 1) goto b_noinsert; \ 192 else if (b->nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero into matrix"); \ 193 if (nrow >= rmax) { \ 194 /* there is no extra room in row, therefore enlarge */ \ 195 int new_nz = bi[b->mbs] + CHUNKSIZE,len,*new_i,*new_j; \ 196 MatScalar *new_a; \ 197 \ 198 if (b->nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix"); \ 199 \ 200 /* malloc new storage space */ \ 201 len = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(b->mbs+1)*sizeof(int); \ 202 ierr = PetscMalloc(len,&new_a);CHKERRQ(ierr); \ 203 new_j = (int*)(new_a + bs2*new_nz); \ 204 new_i = new_j + new_nz; \ 205 \ 206 /* copy over old data into new slots */ \ 207 for (ii=0; ii<brow+1; ii++) {new_i[ii] = bi[ii];} \ 208 for (ii=brow+1; ii<b->mbs+1; ii++) {new_i[ii] = bi[ii]+CHUNKSIZE;} \ 209 ierr = PetscMemcpy(new_j,bj,(bi[brow]+nrow)*sizeof(int));CHKERRQ(ierr); \ 210 len = (new_nz - CHUNKSIZE - bi[brow] - nrow); \ 211 ierr = PetscMemcpy(new_j+bi[brow]+nrow+CHUNKSIZE,bj+bi[brow]+nrow,len*sizeof(int));CHKERRQ(ierr); \ 212 ierr = PetscMemcpy(new_a,ba,(bi[brow]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 213 ierr = PetscMemzero(new_a+bs2*(bi[brow]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));CHKERRQ(ierr); \ 214 ierr = PetscMemcpy(new_a+bs2*(bi[brow]+nrow+CHUNKSIZE), \ 215 ba+bs2*(bi[brow]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr); \ 216 /* free up old matrix storage */ \ 217 ierr = PetscFree(b->a);CHKERRQ(ierr); \ 218 if (!b->singlemalloc) { \ 219 ierr = PetscFree(b->i);CHKERRQ(ierr); \ 220 ierr = PetscFree(b->j);CHKERRQ(ierr); \ 221 } \ 222 ba = b->a = new_a; bi = b->i = new_i; bj = b->j = new_j; \ 223 b->singlemalloc = PETSC_TRUE; \ 224 \ 225 rp = bj + bi[brow]; ap = ba + bs2*bi[brow]; \ 226 rmax = bimax[brow] = bimax[brow] + CHUNKSIZE; \ 227 PetscLogObjectMemory(B,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar))); \ 228 b->maxnz += bs2*CHUNKSIZE; \ 229 b->reallocs++; \ 230 b->nz++; \ 231 } \ 232 N = nrow++ - 1; \ 233 /* shift up all the later entries in this row */ \ 234 for (ii=N; ii>=_i; ii--) { \ 235 rp[ii+1] = rp[ii]; \ 236 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 237 } \ 238 if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr);} \ 239 rp[_i] = bcol; \ 240 ap[bs2*_i + bs*cidx + ridx] = value; \ 241 b_noinsert:; \ 242 bilen[brow] = nrow; \ 243 } 244 #endif 245 246 #if defined(PETSC_USE_MAT_SINGLE) 247 #undef __FUNCT__ 248 #define __FUNCT__ "MatSetValues_MPISBAIJ" 249 int MatSetValues_MPISBAIJ(Mat mat,int m,int *im,int n,int *in,PetscScalar *v,InsertMode addv) 250 { 251 Mat_MPISBAIJ *b = (Mat_MPISBAIJ*)mat->data; 252 int ierr,i,N = m*n; 253 MatScalar *vsingle; 254 255 PetscFunctionBegin; 256 if (N > b->setvalueslen) { 257 if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);} 258 ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr); 259 b->setvalueslen = N; 260 } 261 vsingle = b->setvaluescopy; 262 263 for (i=0; i<N; i++) { 264 vsingle[i] = v[i]; 265 } 266 ierr = MatSetValues_MPISBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr); 267 PetscFunctionReturn(0); 268 } 269 270 #undef __FUNCT__ 271 #define __FUNCT__ "MatSetValuesBlocked_MPISBAIJ" 272 int MatSetValuesBlocked_MPISBAIJ(Mat mat,int m,int *im,int n,int *in,PetscScalar *v,InsertMode addv) 273 { 274 Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)mat->data; 275 int ierr,i,N = m*n*b->bs2; 276 MatScalar *vsingle; 277 278 PetscFunctionBegin; 279 if (N > b->setvalueslen) { 280 if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);} 281 ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr); 282 b->setvalueslen = N; 283 } 284 vsingle = b->setvaluescopy; 285 for (i=0; i<N; i++) { 286 vsingle[i] = v[i]; 287 } 288 ierr = MatSetValuesBlocked_MPISBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr); 289 PetscFunctionReturn(0); 290 } 291 292 #undef __FUNCT__ 293 #define __FUNCT__ "MatSetValues_MPISBAIJ_HT" 294 int MatSetValues_MPISBAIJ_HT(Mat mat,int m,int *im,int n,int *in,PetscScalar *v,InsertMode addv) 295 { 296 Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)mat->data; 297 int ierr,i,N = m*n; 298 MatScalar *vsingle; 299 300 PetscFunctionBegin; 301 SETERRQ(1,"Function not yet written for SBAIJ format"); 302 /* PetscFunctionReturn(0); */ 303 } 304 305 #undef __FUNCT__ 306 #define __FUNCT__ "MatSetValuesBlocked_MPISBAIJ_HT" 307 int MatSetValuesBlocked_MPISBAIJ_HT(Mat mat,int m,int *im,int n,int *in,PetscScalar *v,InsertMode addv) 308 { 309 Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)mat->data; 310 int ierr,i,N = m*n*b->bs2; 311 MatScalar *vsingle; 312 313 PetscFunctionBegin; 314 SETERRQ(1,"Function not yet written for SBAIJ format"); 315 /* PetscFunctionReturn(0); */ 316 } 317 #endif 318 319 /* Only add/insert a(i,j) with i<=j (blocks). 320 Any a(i,j) with i>j input by user is ingored. 321 */ 322 #undef __FUNCT__ 323 #define __FUNCT__ "MatSetValues_MPIBAIJ" 324 int MatSetValues_MPISBAIJ_MatScalar(Mat mat,int m,int *im,int n,int *in,MatScalar *v,InsertMode addv) 325 { 326 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 327 MatScalar value; 328 PetscTruth roworiented = baij->roworiented; 329 int ierr,i,j,row,col; 330 int rstart_orig=baij->rstart_bs; 331 int rend_orig=baij->rend_bs,cstart_orig=baij->cstart_bs; 332 int cend_orig=baij->cend_bs,bs=baij->bs; 333 334 /* Some Variables required in the macro */ 335 Mat A = baij->A; 336 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)(A)->data; 337 int *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j; 338 MatScalar *aa=a->a; 339 340 Mat B = baij->B; 341 Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)(B)->data; 342 int *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j; 343 MatScalar *ba=b->a; 344 345 int *rp,ii,nrow,_i,rmax,N,brow,bcol; 346 int low,high,t,ridx,cidx,bs2=a->bs2; 347 MatScalar *ap,*bap; 348 349 /* for stash */ 350 int n_loc, *in_loc=0; 351 MatScalar *v_loc=0; 352 353 PetscFunctionBegin; 354 355 if(!baij->donotstash){ 356 ierr = PetscMalloc(n*sizeof(int),&in_loc);CHKERRQ(ierr); 357 ierr = PetscMalloc(n*sizeof(MatScalar),&v_loc);CHKERRQ(ierr); 358 } 359 360 for (i=0; i<m; i++) { 361 if (im[i] < 0) continue; 362 #if defined(PETSC_USE_BOPT_g) 363 if (im[i] >= mat->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large"); 364 #endif 365 if (im[i] >= rstart_orig && im[i] < rend_orig) { /* this processor entry */ 366 row = im[i] - rstart_orig; /* local row index */ 367 for (j=0; j<n; j++) { 368 if (im[i]/bs > in[j]/bs) continue; /* ignore lower triangular blocks */ 369 if (in[j] >= cstart_orig && in[j] < cend_orig){ /* diag entry (A) */ 370 col = in[j] - cstart_orig; /* local col index */ 371 brow = row/bs; bcol = col/bs; 372 if (brow > bcol) continue; /* ignore lower triangular blocks of A */ 373 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 374 MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv); 375 /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 376 } else if (in[j] < 0) continue; 377 #if defined(PETSC_USE_BOPT_g) 378 else if (in[j] >= mat->N) {SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Col too large");} 379 #endif 380 else { /* off-diag entry (B) */ 381 if (mat->was_assembled) { 382 if (!baij->colmap) { 383 ierr = CreateColmap_MPISBAIJ_Private(mat);CHKERRQ(ierr); 384 } 385 #if defined (PETSC_USE_CTABLE) 386 ierr = PetscTableFind(baij->colmap,in[j]/bs + 1,&col);CHKERRQ(ierr); 387 col = col - 1; 388 #else 389 col = baij->colmap[in[j]/bs] - 1; 390 #endif 391 if (col < 0 && !((Mat_SeqSBAIJ*)(baij->A->data))->nonew) { 392 ierr = DisAssemble_MPISBAIJ(mat);CHKERRQ(ierr); 393 col = in[j]; 394 /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */ 395 B = baij->B; 396 b = (Mat_SeqBAIJ*)(B)->data; 397 bimax=b->imax;bi=b->i;bilen=b->ilen;bj=b->j; 398 ba=b->a; 399 } else col += in[j]%bs; 400 } else col = in[j]; 401 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 402 MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv); 403 /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 404 } 405 } 406 } else { /* off processor entry */ 407 if (!baij->donotstash) { 408 n_loc = 0; 409 for (j=0; j<n; j++){ 410 if (im[i]/bs > in[j]/bs) continue; /* ignore lower triangular blocks */ 411 in_loc[n_loc] = in[j]; 412 if (roworiented) { 413 v_loc[n_loc] = v[i*n+j]; 414 } else { 415 v_loc[n_loc] = v[j*m+i]; 416 } 417 n_loc++; 418 } 419 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n_loc,in_loc,v_loc);CHKERRQ(ierr); 420 } 421 } 422 } 423 424 if(!baij->donotstash){ 425 ierr = PetscFree(in_loc);CHKERRQ(ierr); 426 ierr = PetscFree(v_loc);CHKERRQ(ierr); 427 } 428 PetscFunctionReturn(0); 429 } 430 431 #undef __FUNCT__ 432 #define __FUNCT__ "MatSetValuesBlocked_MPISBAIJ" 433 int MatSetValuesBlocked_MPISBAIJ(Mat mat,int m,int *im,int n,int *in,MatScalar *v,InsertMode addv) 434 { 435 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 436 MatScalar *value,*barray=baij->barray; 437 PetscTruth roworiented = baij->roworiented; 438 int ierr,i,j,ii,jj,row,col,rstart=baij->rstart; 439 int rend=baij->rend,cstart=baij->cstart,stepval; 440 int cend=baij->cend,bs=baij->bs,bs2=baij->bs2; 441 442 PetscFunctionBegin; 443 if(!barray) { 444 ierr = PetscMalloc(bs2*sizeof(MatScalar),&barray);CHKERRQ(ierr); 445 baij->barray = barray; 446 } 447 448 if (roworiented) { 449 stepval = (n-1)*bs; 450 } else { 451 stepval = (m-1)*bs; 452 } 453 for (i=0; i<m; i++) { 454 if (im[i] < 0) continue; 455 #if defined(PETSC_USE_BOPT_g) 456 if (im[i] >= baij->Mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %d max %d",im[i],baij->Mbs); 457 #endif 458 if (im[i] >= rstart && im[i] < rend) { 459 row = im[i] - rstart; 460 for (j=0; j<n; j++) { 461 /* If NumCol = 1 then a copy is not required */ 462 if ((roworiented) && (n == 1)) { 463 barray = v + i*bs2; 464 } else if((!roworiented) && (m == 1)) { 465 barray = v + j*bs2; 466 } else { /* Here a copy is required */ 467 if (roworiented) { 468 value = v + i*(stepval+bs)*bs + j*bs; 469 } else { 470 value = v + j*(stepval+bs)*bs + i*bs; 471 } 472 for (ii=0; ii<bs; ii++,value+=stepval) { 473 for (jj=0; jj<bs; jj++) { 474 *barray++ = *value++; 475 } 476 } 477 barray -=bs2; 478 } 479 480 if (in[j] >= cstart && in[j] < cend){ 481 col = in[j] - cstart; 482 ierr = MatSetValuesBlocked_SeqSBAIJ(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 483 } 484 else if (in[j] < 0) continue; 485 #if defined(PETSC_USE_BOPT_g) 486 else if (in[j] >= baij->Nbs) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %d max %d",in[j],baij->Nbs);} 487 #endif 488 else { 489 if (mat->was_assembled) { 490 if (!baij->colmap) { 491 ierr = CreateColmap_MPISBAIJ_Private(mat);CHKERRQ(ierr); 492 } 493 494 #if defined(PETSC_USE_BOPT_g) 495 #if defined (PETSC_USE_CTABLE) 496 { int data; 497 ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr); 498 if ((data - 1) % bs) SETERRQ(PETSC_ERR_PLIB,"Incorrect colmap"); 499 } 500 #else 501 if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_ERR_PLIB,"Incorrect colmap"); 502 #endif 503 #endif 504 #if defined (PETSC_USE_CTABLE) 505 ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr); 506 col = (col - 1)/bs; 507 #else 508 col = (baij->colmap[in[j]] - 1)/bs; 509 #endif 510 if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 511 ierr = DisAssemble_MPISBAIJ(mat);CHKERRQ(ierr); 512 col = in[j]; 513 } 514 } 515 else col = in[j]; 516 ierr = MatSetValuesBlocked_SeqSBAIJ(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 517 } 518 } 519 } else { 520 if (!baij->donotstash) { 521 if (roworiented) { 522 ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 523 } else { 524 ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 525 } 526 } 527 } 528 } 529 PetscFunctionReturn(0); 530 } 531 532 #define HASH_KEY 0.6180339887 533 #define HASH(size,key,tmp) (tmp = (key)*HASH_KEY,(int)((size)*(tmp-(int)tmp))) 534 /* #define HASH(size,key) ((int)((size)*fmod(((key)*HASH_KEY),1))) */ 535 /* #define HASH(size,key,tmp) ((int)((size)*fmod(((key)*HASH_KEY),1))) */ 536 #undef __FUNCT__ 537 #define __FUNCT__ "MatSetValues_MPISBAIJ_HT_MatScalar" 538 int MatSetValues_MPISBAIJ_HT_MatScalar(Mat mat,int m,int *im,int n,int *in,MatScalar *v,InsertMode addv) 539 { 540 PetscFunctionBegin; 541 SETERRQ(1,"Function not yet written for SBAIJ format"); 542 /* PetscFunctionReturn(0); */ 543 } 544 545 #undef __FUNCT__ 546 #define __FUNCT__ "MatSetValuesBlocked_MPISBAIJ_HT_MatScalar" 547 int MatSetValuesBlocked_MPISBAIJ_HT_MatScalar(Mat mat,int m,int *im,int n,int *in,MatScalar *v,InsertMode addv) 548 { 549 PetscFunctionBegin; 550 SETERRQ(1,"Function not yet written for SBAIJ format"); 551 /* PetscFunctionReturn(0); */ 552 } 553 554 #undef __FUNCT__ 555 #define __FUNCT__ "MatGetValues_MPISBAIJ" 556 int MatGetValues_MPISBAIJ(Mat mat,int m,int *idxm,int n,int *idxn,PetscScalar *v) 557 { 558 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 559 int bs=baij->bs,ierr,i,j,bsrstart = baij->rstart*bs,bsrend = baij->rend*bs; 560 int bscstart = baij->cstart*bs,bscend = baij->cend*bs,row,col,data; 561 562 PetscFunctionBegin; 563 for (i=0; i<m; i++) { 564 if (idxm[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); 565 if (idxm[i] >= mat->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large"); 566 if (idxm[i] >= bsrstart && idxm[i] < bsrend) { 567 row = idxm[i] - bsrstart; 568 for (j=0; j<n; j++) { 569 if (idxn[j] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); 570 if (idxn[j] >= mat->N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large"); 571 if (idxn[j] >= bscstart && idxn[j] < bscend){ 572 col = idxn[j] - bscstart; 573 ierr = MatGetValues_SeqSBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 574 } else { 575 if (!baij->colmap) { 576 ierr = CreateColmap_MPISBAIJ_Private(mat);CHKERRQ(ierr); 577 } 578 #if defined (PETSC_USE_CTABLE) 579 ierr = PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr); 580 data --; 581 #else 582 data = baij->colmap[idxn[j]/bs]-1; 583 #endif 584 if((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0; 585 else { 586 col = data + idxn[j]%bs; 587 ierr = MatGetValues_SeqSBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 588 } 589 } 590 } 591 } else { 592 SETERRQ(PETSC_ERR_SUP,"Only local values currently supported"); 593 } 594 } 595 PetscFunctionReturn(0); 596 } 597 598 #undef __FUNCT__ 599 #define __FUNCT__ "MatNorm_MPISBAIJ" 600 int MatNorm_MPISBAIJ(Mat mat,NormType type,PetscReal *norm) 601 { 602 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 603 /* Mat_SeqSBAIJ *amat = (Mat_SeqSBAIJ*)baij->A->data; */ 604 /* Mat_SeqBAIJ *bmat = (Mat_SeqBAIJ*)baij->B->data; */ 605 int ierr; 606 PetscReal sum[2],*lnorm2; 607 608 PetscFunctionBegin; 609 if (baij->size == 1) { 610 ierr = MatNorm(baij->A,type,norm);CHKERRQ(ierr); 611 } else { 612 if (type == NORM_FROBENIUS) { 613 ierr = PetscMalloc(2*sizeof(PetscReal),&lnorm2);CHKERRQ(ierr); 614 ierr = MatNorm(baij->A,type,lnorm2);CHKERRQ(ierr); 615 *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2++; /* squar power of norm(A) */ 616 ierr = MatNorm(baij->B,type,lnorm2);CHKERRQ(ierr); 617 *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2--; /* squar power of norm(B) */ 618 /* 619 ierr = MPI_Comm_rank(PETSC_COMM_WORLD,&rank);CHKERRQ(ierr); 620 PetscSynchronizedPrintf(PETSC_COMM_WORLD,"[%d], lnorm2=%g, %g\n",rank,lnorm2[0],lnorm2[1]); 621 */ 622 ierr = MPI_Allreduce(lnorm2,&sum,2,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr); 623 /* 624 PetscSynchronizedPrintf(PETSC_COMM_WORLD,"[%d], sum=%g, %g\n",rank,sum[0],sum[1]); 625 PetscSynchronizedFlush(PETSC_COMM_WORLD); */ 626 627 *norm = sqrt(sum[0] + 2*sum[1]); 628 ierr = PetscFree(lnorm2);CHKERRQ(ierr); 629 } else { 630 SETERRQ(PETSC_ERR_SUP,"No support for this norm yet"); 631 } 632 } 633 PetscFunctionReturn(0); 634 } 635 636 /* 637 Creates the hash table, and sets the table 638 This table is created only once. 639 If new entried need to be added to the matrix 640 then the hash table has to be destroyed and 641 recreated. 642 */ 643 #undef __FUNCT__ 644 #define __FUNCT__ "MatCreateHashTable_MPISBAIJ_Private" 645 int MatCreateHashTable_MPISBAIJ_Private(Mat mat,PetscReal factor) 646 { 647 PetscFunctionBegin; 648 SETERRQ(1,"Function not yet written for SBAIJ format"); 649 /* PetscFunctionReturn(0); */ 650 } 651 652 #undef __FUNCT__ 653 #define __FUNCT__ "MatAssemblyBegin_MPISBAIJ" 654 int MatAssemblyBegin_MPISBAIJ(Mat mat,MatAssemblyType mode) 655 { 656 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 657 int ierr,nstash,reallocs; 658 InsertMode addv; 659 660 PetscFunctionBegin; 661 if (baij->donotstash) { 662 PetscFunctionReturn(0); 663 } 664 665 /* make sure all processors are either in INSERTMODE or ADDMODE */ 666 ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,mat->comm);CHKERRQ(ierr); 667 if (addv == (ADD_VALUES|INSERT_VALUES)) { 668 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added"); 669 } 670 mat->insertmode = addv; /* in case this processor had no cache */ 671 672 ierr = MatStashScatterBegin_Private(&mat->stash,baij->rowners_bs);CHKERRQ(ierr); 673 ierr = MatStashScatterBegin_Private(&mat->bstash,baij->rowners);CHKERRQ(ierr); 674 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 675 PetscLogInfo(0,"MatAssemblyBegin_MPISBAIJ:Stash has %d entries,uses %d mallocs.\n",nstash,reallocs); 676 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 677 PetscLogInfo(0,"MatAssemblyBegin_MPISBAIJ:Block-Stash has %d entries, uses %d mallocs.\n",nstash,reallocs); 678 PetscFunctionReturn(0); 679 } 680 681 #undef __FUNCT__ 682 #define __FUNCT__ "MatAssemblyEnd_MPISBAIJ" 683 int MatAssemblyEnd_MPISBAIJ(Mat mat,MatAssemblyType mode) 684 { 685 Mat_MPISBAIJ *baij=(Mat_MPISBAIJ*)mat->data; 686 Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)baij->A->data; 687 Mat_SeqBAIJ *b=(Mat_SeqBAIJ*)baij->B->data; 688 int i,j,rstart,ncols,n,ierr,flg,bs2=baij->bs2; 689 int *row,*col,other_disassembled; 690 PetscTruth r1,r2,r3; 691 MatScalar *val; 692 InsertMode addv = mat->insertmode; 693 #if defined(PETSC_HAVE_SPOOLES) 694 PetscTruth flag; 695 #endif 696 697 PetscFunctionBegin; 698 699 if (!baij->donotstash) { 700 while (1) { 701 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 702 /* 703 PetscSynchronizedPrintf(PETSC_COMM_WORLD,"[%d]: in AssemblyEnd, stash, flg=%d\n",rank,flg); 704 PetscSynchronizedFlush(PETSC_COMM_WORLD); 705 */ 706 if (!flg) break; 707 708 for (i=0; i<n;) { 709 /* Now identify the consecutive vals belonging to the same row */ 710 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 711 if (j < n) ncols = j-i; 712 else ncols = n-i; 713 /* Now assemble all these values with a single function call */ 714 ierr = MatSetValues_MPISBAIJ_MatScalar(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 715 i = j; 716 } 717 } 718 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 719 /* Now process the block-stash. Since the values are stashed column-oriented, 720 set the roworiented flag to column oriented, and after MatSetValues() 721 restore the original flags */ 722 r1 = baij->roworiented; 723 r2 = a->roworiented; 724 r3 = b->roworiented; 725 baij->roworiented = PETSC_FALSE; 726 a->roworiented = PETSC_FALSE; 727 b->roworiented = PETSC_FALSE; 728 while (1) { 729 ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 730 if (!flg) break; 731 732 for (i=0; i<n;) { 733 /* Now identify the consecutive vals belonging to the same row */ 734 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 735 if (j < n) ncols = j-i; 736 else ncols = n-i; 737 ierr = MatSetValuesBlocked_MPISBAIJ_MatScalar(mat,1,row+i,ncols,col+i,val+i*bs2,addv);CHKERRQ(ierr); 738 i = j; 739 } 740 } 741 ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr); 742 baij->roworiented = r1; 743 a->roworiented = r2; 744 b->roworiented = r3; 745 } 746 747 ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr); 748 ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr); 749 750 /* determine if any processor has disassembled, if so we must 751 also disassemble ourselfs, in order that we may reassemble. */ 752 /* 753 if nonzero structure of submatrix B cannot change then we know that 754 no processor disassembled thus we can skip this stuff 755 */ 756 if (!((Mat_SeqBAIJ*)baij->B->data)->nonew) { 757 ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr); 758 if (mat->was_assembled && !other_disassembled) { 759 ierr = DisAssemble_MPISBAIJ(mat);CHKERRQ(ierr); 760 } 761 } 762 763 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 764 ierr = MatSetUpMultiply_MPISBAIJ(mat);CHKERRQ(ierr); /* setup Mvctx and sMvctx */ 765 } 766 ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr); 767 ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr); 768 769 #if defined(PETSC_USE_BOPT_g) 770 if (baij->ht && mode== MAT_FINAL_ASSEMBLY) { 771 PetscLogInfo(0,"MatAssemblyEnd_MPISBAIJ:Average Hash Table Search in MatSetValues = %5.2f\n",((PetscReal)baij->ht_total_ct)/baij->ht_insert_ct); 772 baij->ht_total_ct = 0; 773 baij->ht_insert_ct = 0; 774 } 775 #endif 776 if (baij->ht_flag && !baij->ht && mode == MAT_FINAL_ASSEMBLY) { 777 ierr = MatCreateHashTable_MPISBAIJ_Private(mat,baij->ht_fact);CHKERRQ(ierr); 778 mat->ops->setvalues = MatSetValues_MPISBAIJ_HT; 779 mat->ops->setvaluesblocked = MatSetValuesBlocked_MPISBAIJ_HT; 780 } 781 782 if (baij->rowvalues) { 783 ierr = PetscFree(baij->rowvalues);CHKERRQ(ierr); 784 baij->rowvalues = 0; 785 } 786 787 #if defined(PETSC_HAVE_SPOOLES) 788 ierr = PetscOptionsHasName(mat->prefix,"-mat_sbaij_spooles",&flag);CHKERRQ(ierr); 789 if (flag) { ierr = MatUseSpooles_MPISBAIJ(mat);CHKERRQ(ierr); } 790 #endif 791 PetscFunctionReturn(0); 792 } 793 794 #undef __FUNCT__ 795 #define __FUNCT__ "MatView_MPISBAIJ_ASCIIorDraworSocket" 796 static int MatView_MPISBAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 797 { 798 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 799 int ierr,bs = baij->bs,size = baij->size,rank = baij->rank; 800 PetscTruth isascii,isdraw; 801 PetscViewer sviewer; 802 PetscViewerFormat format; 803 804 PetscFunctionBegin; 805 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);CHKERRQ(ierr); 806 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 807 if (isascii) { 808 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 809 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 810 MatInfo info; 811 ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr); 812 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 813 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %d nz %d nz alloced %d bs %d mem %d\n", 814 rank,mat->m,(int)info.nz_used*bs,(int)info.nz_allocated*bs, 815 baij->bs,(int)info.memory);CHKERRQ(ierr); 816 ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 817 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %d \n",rank,(int)info.nz_used*bs);CHKERRQ(ierr); 818 ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 819 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %d \n",rank,(int)info.nz_used*bs);CHKERRQ(ierr); 820 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 821 ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr); 822 PetscFunctionReturn(0); 823 } else if (format == PETSC_VIEWER_ASCII_INFO) { 824 ierr = PetscViewerASCIIPrintf(viewer," block size is %d\n",bs);CHKERRQ(ierr); 825 PetscFunctionReturn(0); 826 } 827 } 828 829 if (isdraw) { 830 PetscDraw draw; 831 PetscTruth isnull; 832 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 833 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 834 } 835 836 if (size == 1) { 837 ierr = PetscObjectSetName((PetscObject)baij->A,mat->name);CHKERRQ(ierr); 838 ierr = MatView(baij->A,viewer);CHKERRQ(ierr); 839 } else { 840 /* assemble the entire matrix onto first processor. */ 841 Mat A; 842 Mat_SeqSBAIJ *Aloc; 843 Mat_SeqBAIJ *Bloc; 844 int M = mat->M,N = mat->N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs; 845 MatScalar *a; 846 847 if (!rank) { 848 ierr = MatCreateMPISBAIJ(mat->comm,baij->bs,M,N,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr); 849 } else { 850 ierr = MatCreateMPISBAIJ(mat->comm,baij->bs,0,0,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr); 851 } 852 PetscLogObjectParent(mat,A); 853 854 /* copy over the A part */ 855 Aloc = (Mat_SeqSBAIJ*)baij->A->data; 856 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 857 ierr = PetscMalloc(bs*sizeof(int),&rvals);CHKERRQ(ierr); 858 859 for (i=0; i<mbs; i++) { 860 rvals[0] = bs*(baij->rstart + i); 861 for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; } 862 for (j=ai[i]; j<ai[i+1]; j++) { 863 col = (baij->cstart+aj[j])*bs; 864 for (k=0; k<bs; k++) { 865 ierr = MatSetValues_MPISBAIJ_MatScalar(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 866 col++; a += bs; 867 } 868 } 869 } 870 /* copy over the B part */ 871 Bloc = (Mat_SeqBAIJ*)baij->B->data; 872 ai = Bloc->i; aj = Bloc->j; a = Bloc->a; 873 for (i=0; i<mbs; i++) { 874 rvals[0] = bs*(baij->rstart + i); 875 for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; } 876 for (j=ai[i]; j<ai[i+1]; j++) { 877 col = baij->garray[aj[j]]*bs; 878 for (k=0; k<bs; k++) { 879 ierr = MatSetValues_MPISBAIJ_MatScalar(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 880 col++; a += bs; 881 } 882 } 883 } 884 ierr = PetscFree(rvals);CHKERRQ(ierr); 885 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 886 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 887 /* 888 Everyone has to call to draw the matrix since the graphics waits are 889 synchronized across all processors that share the PetscDraw object 890 */ 891 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 892 if (!rank) { 893 ierr = PetscObjectSetName((PetscObject)((Mat_MPISBAIJ*)(A->data))->A,mat->name);CHKERRQ(ierr); 894 ierr = MatView(((Mat_MPISBAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 895 } 896 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 897 ierr = MatDestroy(A);CHKERRQ(ierr); 898 } 899 PetscFunctionReturn(0); 900 } 901 902 #undef __FUNCT__ 903 #define __FUNCT__ "MatView_MPISBAIJ" 904 int MatView_MPISBAIJ(Mat mat,PetscViewer viewer) 905 { 906 int ierr; 907 PetscTruth isascii,isdraw,issocket,isbinary; 908 909 PetscFunctionBegin; 910 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);CHKERRQ(ierr); 911 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 912 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr); 913 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr); 914 if (isascii || isdraw || issocket || isbinary) { 915 ierr = MatView_MPISBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 916 } else { 917 SETERRQ1(1,"Viewer type %s not supported by MPISBAIJ matrices",((PetscObject)viewer)->type_name); 918 } 919 PetscFunctionReturn(0); 920 } 921 922 #undef __FUNCT__ 923 #define __FUNCT__ "MatDestroy_MPISBAIJ" 924 int MatDestroy_MPISBAIJ(Mat mat) 925 { 926 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 927 int ierr; 928 929 PetscFunctionBegin; 930 #if defined(PETSC_USE_LOG) 931 PetscLogObjectState((PetscObject)mat,"Rows=%d,Cols=%d",mat->M,mat->N); 932 #endif 933 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 934 ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr); 935 ierr = PetscFree(baij->rowners);CHKERRQ(ierr); 936 ierr = MatDestroy(baij->A);CHKERRQ(ierr); 937 ierr = MatDestroy(baij->B);CHKERRQ(ierr); 938 #if defined (PETSC_USE_CTABLE) 939 if (baij->colmap) {ierr = PetscTableDelete(baij->colmap);CHKERRQ(ierr);} 940 #else 941 if (baij->colmap) {ierr = PetscFree(baij->colmap);CHKERRQ(ierr);} 942 #endif 943 if (baij->garray) {ierr = PetscFree(baij->garray);CHKERRQ(ierr);} 944 if (baij->lvec) {ierr = VecDestroy(baij->lvec);CHKERRQ(ierr);} 945 if (baij->Mvctx) {ierr = VecScatterDestroy(baij->Mvctx);CHKERRQ(ierr);} 946 if (baij->slvec0) { 947 ierr = VecDestroy(baij->slvec0);CHKERRQ(ierr); 948 ierr = VecDestroy(baij->slvec0b);CHKERRQ(ierr); 949 } 950 if (baij->slvec1) { 951 ierr = VecDestroy(baij->slvec1);CHKERRQ(ierr); 952 ierr = VecDestroy(baij->slvec1a);CHKERRQ(ierr); 953 ierr = VecDestroy(baij->slvec1b);CHKERRQ(ierr); 954 } 955 if (baij->sMvctx) {ierr = VecScatterDestroy(baij->sMvctx);CHKERRQ(ierr);} 956 if (baij->rowvalues) {ierr = PetscFree(baij->rowvalues);CHKERRQ(ierr);} 957 if (baij->barray) {ierr = PetscFree(baij->barray);CHKERRQ(ierr);} 958 if (baij->hd) {ierr = PetscFree(baij->hd);CHKERRQ(ierr);} 959 #if defined(PETSC_USE_MAT_SINGLE) 960 if (baij->setvaluescopy) {ierr = PetscFree(baij->setvaluescopy);CHKERRQ(ierr);} 961 #endif 962 ierr = PetscFree(baij);CHKERRQ(ierr); 963 PetscFunctionReturn(0); 964 } 965 966 #undef __FUNCT__ 967 #define __FUNCT__ "MatMult_MPISBAIJ" 968 int MatMult_MPISBAIJ(Mat A,Vec xx,Vec yy) 969 { 970 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 971 int ierr,nt,mbs=a->mbs,bs=a->bs; 972 PetscScalar *x,*from,zero=0.0; 973 974 PetscFunctionBegin; 975 /* 976 PetscSynchronizedPrintf(PETSC_COMM_WORLD," _1comm is called ...\n"); 977 PetscSynchronizedFlush(PETSC_COMM_WORLD); 978 */ 979 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 980 if (nt != A->n) { 981 SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx"); 982 } 983 ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr); 984 if (nt != A->m) { 985 SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy"); 986 } 987 988 /* diagonal part */ 989 ierr = (*a->A->ops->mult)(a->A,xx,a->slvec1a);CHKERRQ(ierr); 990 ierr = VecSet(&zero,a->slvec1b);CHKERRQ(ierr); 991 992 /* subdiagonal part */ 993 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr); 994 995 /* copy x into the vec slvec0 */ 996 ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr); 997 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 998 ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr); 999 ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr); 1000 1001 ierr = VecScatterBegin(a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD,a->sMvctx);CHKERRQ(ierr); 1002 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1003 ierr = VecScatterEnd(a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD,a->sMvctx);CHKERRQ(ierr); 1004 1005 /* supperdiagonal part */ 1006 ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,yy);CHKERRQ(ierr); 1007 1008 PetscFunctionReturn(0); 1009 } 1010 1011 #undef __FUNCT__ 1012 #define __FUNCT__ "MatMult_MPISBAIJ_2comm" 1013 int MatMult_MPISBAIJ_2comm(Mat A,Vec xx,Vec yy) 1014 { 1015 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1016 int ierr,nt; 1017 1018 PetscFunctionBegin; 1019 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 1020 if (nt != A->n) { 1021 SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx"); 1022 } 1023 ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr); 1024 if (nt != A->m) { 1025 SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy"); 1026 } 1027 1028 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 1029 /* do diagonal part */ 1030 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 1031 /* do supperdiagonal part */ 1032 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 1033 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 1034 /* do subdiagonal part */ 1035 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1036 ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 1037 ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 1038 1039 PetscFunctionReturn(0); 1040 } 1041 1042 #undef __FUNCT__ 1043 #define __FUNCT__ "MatMultAdd_MPISBAIJ" 1044 int MatMultAdd_MPISBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1045 { 1046 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1047 int ierr,mbs=a->mbs,bs=a->bs; 1048 PetscScalar *x,*from,zero=0.0; 1049 1050 PetscFunctionBegin; 1051 /* 1052 PetscSynchronizedPrintf(PETSC_COMM_WORLD," MatMultAdd is called ...\n"); 1053 PetscSynchronizedFlush(PETSC_COMM_WORLD); 1054 */ 1055 /* diagonal part */ 1056 ierr = (*a->A->ops->multadd)(a->A,xx,yy,a->slvec1a);CHKERRQ(ierr); 1057 ierr = VecSet(&zero,a->slvec1b);CHKERRQ(ierr); 1058 1059 /* subdiagonal part */ 1060 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr); 1061 1062 /* copy x into the vec slvec0 */ 1063 ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr); 1064 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1065 ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr); 1066 ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr); 1067 1068 ierr = VecScatterBegin(a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD,a->sMvctx);CHKERRQ(ierr); 1069 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1070 ierr = VecScatterEnd(a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD,a->sMvctx);CHKERRQ(ierr); 1071 1072 /* supperdiagonal part */ 1073 ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,zz);CHKERRQ(ierr); 1074 1075 PetscFunctionReturn(0); 1076 } 1077 1078 #undef __FUNCT__ 1079 #define __FUNCT__ "MatMultAdd_MPISBAIJ_2comm" 1080 int MatMultAdd_MPISBAIJ_2comm(Mat A,Vec xx,Vec yy,Vec zz) 1081 { 1082 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1083 int ierr; 1084 1085 PetscFunctionBegin; 1086 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 1087 /* do diagonal part */ 1088 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1089 /* do supperdiagonal part */ 1090 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 1091 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 1092 1093 /* do subdiagonal part */ 1094 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1095 ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 1096 ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 1097 1098 PetscFunctionReturn(0); 1099 } 1100 1101 #undef __FUNCT__ 1102 #define __FUNCT__ "MatMultTranspose_MPISBAIJ" 1103 int MatMultTranspose_MPISBAIJ(Mat A,Vec xx,Vec yy) 1104 { 1105 PetscFunctionBegin; 1106 SETERRQ(1,"Matrix is symmetric. Call MatMult()."); 1107 /* PetscFunctionReturn(0); */ 1108 } 1109 1110 #undef __FUNCT__ 1111 #define __FUNCT__ "MatMultTransposeAdd_MPISBAIJ" 1112 int MatMultTransposeAdd_MPISBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1113 { 1114 PetscFunctionBegin; 1115 SETERRQ(1,"Matrix is symmetric. Call MatMultAdd()."); 1116 /* PetscFunctionReturn(0); */ 1117 } 1118 1119 /* 1120 This only works correctly for square matrices where the subblock A->A is the 1121 diagonal block 1122 */ 1123 #undef __FUNCT__ 1124 #define __FUNCT__ "MatGetDiagonal_MPISBAIJ" 1125 int MatGetDiagonal_MPISBAIJ(Mat A,Vec v) 1126 { 1127 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1128 int ierr; 1129 1130 PetscFunctionBegin; 1131 /* if (a->M != a->N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); */ 1132 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 1133 PetscFunctionReturn(0); 1134 } 1135 1136 #undef __FUNCT__ 1137 #define __FUNCT__ "MatScale_MPISBAIJ" 1138 int MatScale_MPISBAIJ(PetscScalar *aa,Mat A) 1139 { 1140 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1141 int ierr; 1142 1143 PetscFunctionBegin; 1144 ierr = MatScale(aa,a->A);CHKERRQ(ierr); 1145 ierr = MatScale(aa,a->B);CHKERRQ(ierr); 1146 PetscFunctionReturn(0); 1147 } 1148 1149 #undef __FUNCT__ 1150 #define __FUNCT__ "MatGetRow_MPISBAIJ" 1151 int MatGetRow_MPISBAIJ(Mat matin,int row,int *nz,int **idx,PetscScalar **v) 1152 { 1153 Mat_MPISBAIJ *mat = (Mat_MPISBAIJ*)matin->data; 1154 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1155 int bs = mat->bs,bs2 = mat->bs2,i,ierr,*cworkA,*cworkB,**pcA,**pcB; 1156 int nztot,nzA,nzB,lrow,brstart = mat->rstart*bs,brend = mat->rend*bs; 1157 int *cmap,*idx_p,cstart = mat->cstart; 1158 1159 PetscFunctionBegin; 1160 if (mat->getrowactive == PETSC_TRUE) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1161 mat->getrowactive = PETSC_TRUE; 1162 1163 if (!mat->rowvalues && (idx || v)) { 1164 /* 1165 allocate enough space to hold information from the longest row. 1166 */ 1167 Mat_SeqSBAIJ *Aa = (Mat_SeqSBAIJ*)mat->A->data; 1168 Mat_SeqBAIJ *Ba = (Mat_SeqBAIJ*)mat->B->data; 1169 int max = 1,mbs = mat->mbs,tmp; 1170 for (i=0; i<mbs; i++) { 1171 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; /* row length */ 1172 if (max < tmp) { max = tmp; } 1173 } 1174 ierr = PetscMalloc(max*bs2*(sizeof(int)+sizeof(PetscScalar)),&mat->rowvalues);CHKERRQ(ierr); 1175 mat->rowindices = (int*)(mat->rowvalues + max*bs2); 1176 } 1177 1178 if (row < brstart || row >= brend) SETERRQ(PETSC_ERR_SUP,"Only local rows") 1179 lrow = row - brstart; /* local row index */ 1180 1181 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1182 if (!v) {pvA = 0; pvB = 0;} 1183 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1184 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1185 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1186 nztot = nzA + nzB; 1187 1188 cmap = mat->garray; 1189 if (v || idx) { 1190 if (nztot) { 1191 /* Sort by increasing column numbers, assuming A and B already sorted */ 1192 int imark = -1; 1193 if (v) { 1194 *v = v_p = mat->rowvalues; 1195 for (i=0; i<nzB; i++) { 1196 if (cmap[cworkB[i]/bs] < cstart) v_p[i] = vworkB[i]; 1197 else break; 1198 } 1199 imark = i; 1200 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1201 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1202 } 1203 if (idx) { 1204 *idx = idx_p = mat->rowindices; 1205 if (imark > -1) { 1206 for (i=0; i<imark; i++) { 1207 idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs; 1208 } 1209 } else { 1210 for (i=0; i<nzB; i++) { 1211 if (cmap[cworkB[i]/bs] < cstart) 1212 idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ; 1213 else break; 1214 } 1215 imark = i; 1216 } 1217 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart*bs + cworkA[i]; 1218 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ; 1219 } 1220 } else { 1221 if (idx) *idx = 0; 1222 if (v) *v = 0; 1223 } 1224 } 1225 *nz = nztot; 1226 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1227 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1228 PetscFunctionReturn(0); 1229 } 1230 1231 #undef __FUNCT__ 1232 #define __FUNCT__ "MatRestoreRow_MPISBAIJ" 1233 int MatRestoreRow_MPISBAIJ(Mat mat,int row,int *nz,int **idx,PetscScalar **v) 1234 { 1235 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 1236 1237 PetscFunctionBegin; 1238 if (baij->getrowactive == PETSC_FALSE) { 1239 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow not called"); 1240 } 1241 baij->getrowactive = PETSC_FALSE; 1242 PetscFunctionReturn(0); 1243 } 1244 1245 #undef __FUNCT__ 1246 #define __FUNCT__ "MatGetBlockSize_MPISBAIJ" 1247 int MatGetBlockSize_MPISBAIJ(Mat mat,int *bs) 1248 { 1249 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 1250 1251 PetscFunctionBegin; 1252 *bs = baij->bs; 1253 PetscFunctionReturn(0); 1254 } 1255 1256 #undef __FUNCT__ 1257 #define __FUNCT__ "MatZeroEntries_MPISBAIJ" 1258 int MatZeroEntries_MPISBAIJ(Mat A) 1259 { 1260 Mat_MPISBAIJ *l = (Mat_MPISBAIJ*)A->data; 1261 int ierr; 1262 1263 PetscFunctionBegin; 1264 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 1265 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 1266 PetscFunctionReturn(0); 1267 } 1268 1269 #undef __FUNCT__ 1270 #define __FUNCT__ "MatGetInfo_MPISBAIJ" 1271 int MatGetInfo_MPISBAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1272 { 1273 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)matin->data; 1274 Mat A = a->A,B = a->B; 1275 int ierr; 1276 PetscReal isend[5],irecv[5]; 1277 1278 PetscFunctionBegin; 1279 info->block_size = (PetscReal)a->bs; 1280 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1281 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1282 isend[3] = info->memory; isend[4] = info->mallocs; 1283 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1284 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1285 isend[3] += info->memory; isend[4] += info->mallocs; 1286 if (flag == MAT_LOCAL) { 1287 info->nz_used = isend[0]; 1288 info->nz_allocated = isend[1]; 1289 info->nz_unneeded = isend[2]; 1290 info->memory = isend[3]; 1291 info->mallocs = isend[4]; 1292 } else if (flag == MAT_GLOBAL_MAX) { 1293 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,matin->comm);CHKERRQ(ierr); 1294 info->nz_used = irecv[0]; 1295 info->nz_allocated = irecv[1]; 1296 info->nz_unneeded = irecv[2]; 1297 info->memory = irecv[3]; 1298 info->mallocs = irecv[4]; 1299 } else if (flag == MAT_GLOBAL_SUM) { 1300 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,matin->comm);CHKERRQ(ierr); 1301 info->nz_used = irecv[0]; 1302 info->nz_allocated = irecv[1]; 1303 info->nz_unneeded = irecv[2]; 1304 info->memory = irecv[3]; 1305 info->mallocs = irecv[4]; 1306 } else { 1307 SETERRQ1(1,"Unknown MatInfoType argument %d",flag); 1308 } 1309 info->rows_global = (PetscReal)A->M; 1310 info->columns_global = (PetscReal)A->N; 1311 info->rows_local = (PetscReal)A->m; 1312 info->columns_local = (PetscReal)A->N; 1313 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1314 info->fill_ratio_needed = 0; 1315 info->factor_mallocs = 0; 1316 PetscFunctionReturn(0); 1317 } 1318 1319 #undef __FUNCT__ 1320 #define __FUNCT__ "MatSetOption_MPISBAIJ" 1321 int MatSetOption_MPISBAIJ(Mat A,MatOption op) 1322 { 1323 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1324 int ierr; 1325 1326 PetscFunctionBegin; 1327 switch (op) { 1328 case MAT_NO_NEW_NONZERO_LOCATIONS: 1329 case MAT_YES_NEW_NONZERO_LOCATIONS: 1330 case MAT_COLUMNS_UNSORTED: 1331 case MAT_COLUMNS_SORTED: 1332 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1333 case MAT_KEEP_ZEROED_ROWS: 1334 case MAT_NEW_NONZERO_LOCATION_ERR: 1335 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1336 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1337 break; 1338 case MAT_ROW_ORIENTED: 1339 a->roworiented = PETSC_TRUE; 1340 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1341 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1342 break; 1343 case MAT_ROWS_SORTED: 1344 case MAT_ROWS_UNSORTED: 1345 case MAT_YES_NEW_DIAGONALS: 1346 case MAT_USE_SINGLE_PRECISION_SOLVES: 1347 PetscLogInfo(A,"Info:MatSetOption_MPIBAIJ:Option ignored\n"); 1348 break; 1349 case MAT_COLUMN_ORIENTED: 1350 a->roworiented = PETSC_FALSE; 1351 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1352 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1353 break; 1354 case MAT_IGNORE_OFF_PROC_ENTRIES: 1355 a->donotstash = PETSC_TRUE; 1356 break; 1357 case MAT_NO_NEW_DIAGONALS: 1358 SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS"); 1359 case MAT_USE_HASH_TABLE: 1360 a->ht_flag = PETSC_TRUE; 1361 break; 1362 default: 1363 SETERRQ(PETSC_ERR_SUP,"unknown option"); 1364 } 1365 PetscFunctionReturn(0); 1366 } 1367 1368 #undef __FUNCT__ 1369 #define __FUNCT__ "MatTranspose_MPISBAIJ" 1370 int MatTranspose_MPISBAIJ(Mat A,Mat *B) 1371 { 1372 int ierr; 1373 PetscFunctionBegin; 1374 ierr = MatDuplicate(A,MAT_COPY_VALUES,B);CHKERRQ(ierr); 1375 PetscFunctionReturn(0); 1376 } 1377 1378 #undef __FUNCT__ 1379 #define __FUNCT__ "MatDiagonalScale_MPISBAIJ" 1380 int MatDiagonalScale_MPISBAIJ(Mat mat,Vec ll,Vec rr) 1381 { 1382 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 1383 Mat a = baij->A,b = baij->B; 1384 int ierr,s1,s2,s3; 1385 1386 PetscFunctionBegin; 1387 if (ll != rr) { 1388 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"For symmetric format, left and right scaling vectors must be same\n"); 1389 } 1390 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 1391 if (rr) { 1392 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 1393 if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 1394 /* Overlap communication with computation. */ 1395 ierr = VecScatterBegin(rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD,baij->Mvctx);CHKERRQ(ierr); 1396 /*} if (ll) { */ 1397 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 1398 if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 1399 ierr = (*b->ops->diagonalscale)(b,ll,PETSC_NULL);CHKERRQ(ierr); 1400 /* } */ 1401 /* scale the diagonal block */ 1402 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 1403 1404 /* if (rr) { */ 1405 /* Do a scatter end and then right scale the off-diagonal block */ 1406 ierr = VecScatterEnd(rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD,baij->Mvctx);CHKERRQ(ierr); 1407 ierr = (*b->ops->diagonalscale)(b,PETSC_NULL,baij->lvec);CHKERRQ(ierr); 1408 } 1409 1410 PetscFunctionReturn(0); 1411 } 1412 1413 #undef __FUNCT__ 1414 #define __FUNCT__ "MatZeroRows_MPISBAIJ" 1415 int MatZeroRows_MPISBAIJ(Mat A,IS is,PetscScalar *diag) 1416 { 1417 Mat_MPISBAIJ *l = (Mat_MPISBAIJ*)A->data; 1418 int i,ierr,N,*rows,*owners = l->rowners,size = l->size; 1419 int *procs,*nprocs,j,idx,nsends,*work,row; 1420 int nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank; 1421 int *rvalues,tag = A->tag,count,base,slen,n,*source; 1422 int *lens,imdex,*lrows,*values,bs=l->bs,rstart_bs=l->rstart_bs; 1423 MPI_Comm comm = A->comm; 1424 MPI_Request *send_waits,*recv_waits; 1425 MPI_Status recv_status,*send_status; 1426 IS istmp; 1427 PetscTruth found; 1428 1429 PetscFunctionBegin; 1430 ierr = ISGetSize(is,&N);CHKERRQ(ierr); 1431 ierr = ISGetIndices(is,&rows);CHKERRQ(ierr); 1432 1433 /* first count number of contributors to each processor */ 1434 ierr = PetscMalloc(2*size*sizeof(int),&nprocs);CHKERRQ(ierr); 1435 ierr = PetscMemzero(nprocs,2*size*sizeof(int));CHKERRQ(ierr); 1436 procs = nprocs + size; 1437 ierr = PetscMalloc((N+1)*sizeof(int),&owner);CHKERRQ(ierr); /* see note*/ 1438 for (i=0; i<N; i++) { 1439 idx = rows[i]; 1440 found = PETSC_FALSE; 1441 for (j=0; j<size; j++) { 1442 if (idx >= owners[j]*bs && idx < owners[j+1]*bs) { 1443 nprocs[j]++; procs[j] = 1; owner[i] = j; found = PETSC_TRUE; break; 1444 } 1445 } 1446 if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range"); 1447 } 1448 nsends = 0; for (i=0; i<size; i++) { nsends += procs[i];} 1449 1450 /* inform other processors of number of messages and max length*/ 1451 ierr = PetscMalloc(2*size*sizeof(int),&work);CHKERRQ(ierr); 1452 ierr = MPI_Allreduce(nprocs,work,2*size,MPI_INT,PetscMaxSum_Op,comm);CHKERRQ(ierr); 1453 nmax = work[rank]; 1454 nrecvs = work[size+rank]; 1455 ierr = PetscFree(work);CHKERRQ(ierr); 1456 1457 /* post receives: */ 1458 ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int),&rvalues);CHKERRQ(ierr); 1459 ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr); 1460 for (i=0; i<nrecvs; i++) { 1461 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 1462 } 1463 1464 /* do sends: 1465 1) starts[i] gives the starting index in svalues for stuff going to 1466 the ith processor 1467 */ 1468 ierr = PetscMalloc((N+1)*sizeof(int),&svalues);CHKERRQ(ierr); 1469 ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr); 1470 ierr = PetscMalloc((size+1)*sizeof(int),&starts);CHKERRQ(ierr); 1471 starts[0] = 0; 1472 for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[i-1];} 1473 for (i=0; i<N; i++) { 1474 svalues[starts[owner[i]]++] = rows[i]; 1475 } 1476 ierr = ISRestoreIndices(is,&rows);CHKERRQ(ierr); 1477 1478 starts[0] = 0; 1479 for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[i-1];} 1480 count = 0; 1481 for (i=0; i<size; i++) { 1482 if (procs[i]) { 1483 ierr = MPI_Isend(svalues+starts[i],nprocs[i],MPI_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 1484 } 1485 } 1486 ierr = PetscFree(starts);CHKERRQ(ierr); 1487 1488 base = owners[rank]*bs; 1489 1490 /* wait on receives */ 1491 ierr = PetscMalloc(2*(nrecvs+1)*sizeof(int),&lens);CHKERRQ(ierr); 1492 source = lens + nrecvs; 1493 count = nrecvs; slen = 0; 1494 while (count) { 1495 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 1496 /* unpack receives into our local space */ 1497 ierr = MPI_Get_count(&recv_status,MPI_INT,&n);CHKERRQ(ierr); 1498 source[imdex] = recv_status.MPI_SOURCE; 1499 lens[imdex] = n; 1500 slen += n; 1501 count--; 1502 } 1503 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 1504 1505 /* move the data into the send scatter */ 1506 ierr = PetscMalloc((slen+1)*sizeof(int),&lrows);CHKERRQ(ierr); 1507 count = 0; 1508 for (i=0; i<nrecvs; i++) { 1509 values = rvalues + i*nmax; 1510 for (j=0; j<lens[i]; j++) { 1511 lrows[count++] = values[j] - base; 1512 } 1513 } 1514 ierr = PetscFree(rvalues);CHKERRQ(ierr); 1515 ierr = PetscFree(lens);CHKERRQ(ierr); 1516 ierr = PetscFree(owner);CHKERRQ(ierr); 1517 ierr = PetscFree(nprocs);CHKERRQ(ierr); 1518 1519 /* actually zap the local rows */ 1520 ierr = ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr); 1521 PetscLogObjectParent(A,istmp); 1522 1523 /* 1524 Zero the required rows. If the "diagonal block" of the matrix 1525 is square and the user wishes to set the diagonal we use seperate 1526 code so that MatSetValues() is not called for each diagonal allocating 1527 new memory, thus calling lots of mallocs and slowing things down. 1528 1529 Contributed by: Mathew Knepley 1530 */ 1531 /* must zero l->B before l->A because the (diag) case below may put values into l->B*/ 1532 ierr = MatZeroRows_SeqBAIJ(l->B,istmp,0);CHKERRQ(ierr); 1533 if (diag && (l->A->M == l->A->N)) { 1534 ierr = MatZeroRows_SeqSBAIJ(l->A,istmp,diag);CHKERRQ(ierr); 1535 } else if (diag) { 1536 ierr = MatZeroRows_SeqSBAIJ(l->A,istmp,0);CHKERRQ(ierr); 1537 if (((Mat_SeqSBAIJ*)l->A->data)->nonew) { 1538 SETERRQ(PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options \n\ 1539 MAT_NO_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR"); 1540 } 1541 for (i=0; i<slen; i++) { 1542 row = lrows[i] + rstart_bs; 1543 ierr = MatSetValues(A,1,&row,1,&row,diag,INSERT_VALUES);CHKERRQ(ierr); 1544 } 1545 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1546 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1547 } else { 1548 ierr = MatZeroRows_SeqSBAIJ(l->A,istmp,0);CHKERRQ(ierr); 1549 } 1550 1551 ierr = ISDestroy(istmp);CHKERRQ(ierr); 1552 ierr = PetscFree(lrows);CHKERRQ(ierr); 1553 1554 /* wait on sends */ 1555 if (nsends) { 1556 ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr); 1557 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 1558 ierr = PetscFree(send_status);CHKERRQ(ierr); 1559 } 1560 ierr = PetscFree(send_waits);CHKERRQ(ierr); 1561 ierr = PetscFree(svalues);CHKERRQ(ierr); 1562 1563 PetscFunctionReturn(0); 1564 } 1565 1566 #undef __FUNCT__ 1567 #define __FUNCT__ "MatPrintHelp_MPISBAIJ" 1568 int MatPrintHelp_MPISBAIJ(Mat A) 1569 { 1570 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1571 MPI_Comm comm = A->comm; 1572 static int called = 0; 1573 int ierr; 1574 1575 PetscFunctionBegin; 1576 if (!a->rank) { 1577 ierr = MatPrintHelp_SeqSBAIJ(a->A);CHKERRQ(ierr); 1578 } 1579 if (called) {PetscFunctionReturn(0);} else called = 1; 1580 ierr = (*PetscHelpPrintf)(comm," Options for MATMPISBAIJ matrix format (the defaults):\n");CHKERRQ(ierr); 1581 ierr = (*PetscHelpPrintf)(comm," -mat_use_hash_table <factor>: Use hashtable for efficient matrix assembly\n");CHKERRQ(ierr); 1582 PetscFunctionReturn(0); 1583 } 1584 1585 #undef __FUNCT__ 1586 #define __FUNCT__ "MatSetUnfactored_MPISBAIJ" 1587 int MatSetUnfactored_MPISBAIJ(Mat A) 1588 { 1589 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1590 int ierr; 1591 1592 PetscFunctionBegin; 1593 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 1594 PetscFunctionReturn(0); 1595 } 1596 1597 static int MatDuplicate_MPISBAIJ(Mat,MatDuplicateOption,Mat *); 1598 1599 #undef __FUNCT__ 1600 #define __FUNCT__ "MatEqual_MPISBAIJ" 1601 int MatEqual_MPISBAIJ(Mat A,Mat B,PetscTruth *flag) 1602 { 1603 Mat_MPISBAIJ *matB = (Mat_MPISBAIJ*)B->data,*matA = (Mat_MPISBAIJ*)A->data; 1604 Mat a,b,c,d; 1605 PetscTruth flg; 1606 int ierr; 1607 1608 PetscFunctionBegin; 1609 ierr = PetscTypeCompare((PetscObject)B,MATMPISBAIJ,&flg);CHKERRQ(ierr); 1610 if (!flg) SETERRQ(PETSC_ERR_ARG_INCOMP,"Matrices must be same type"); 1611 a = matA->A; b = matA->B; 1612 c = matB->A; d = matB->B; 1613 1614 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 1615 if (flg == PETSC_TRUE) { 1616 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 1617 } 1618 ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,A->comm);CHKERRQ(ierr); 1619 PetscFunctionReturn(0); 1620 } 1621 1622 #undef __FUNCT__ 1623 #define __FUNCT__ "MatSetUpPreallocation_MPISBAIJ" 1624 int MatSetUpPreallocation_MPISBAIJ(Mat A) 1625 { 1626 int ierr; 1627 1628 PetscFunctionBegin; 1629 ierr = MatMPISBAIJSetPreallocation(A,1,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 1630 PetscFunctionReturn(0); 1631 } 1632 /* -------------------------------------------------------------------*/ 1633 static struct _MatOps MatOps_Values = { 1634 MatSetValues_MPISBAIJ, 1635 MatGetRow_MPISBAIJ, 1636 MatRestoreRow_MPISBAIJ, 1637 MatMult_MPISBAIJ, 1638 MatMultAdd_MPISBAIJ, 1639 MatMultTranspose_MPISBAIJ, 1640 MatMultTransposeAdd_MPISBAIJ, 1641 0, 1642 0, 1643 0, 1644 0, 1645 0, 1646 0, 1647 MatRelax_MPISBAIJ, 1648 MatTranspose_MPISBAIJ, 1649 MatGetInfo_MPISBAIJ, 1650 MatEqual_MPISBAIJ, 1651 MatGetDiagonal_MPISBAIJ, 1652 MatDiagonalScale_MPISBAIJ, 1653 MatNorm_MPISBAIJ, 1654 MatAssemblyBegin_MPISBAIJ, 1655 MatAssemblyEnd_MPISBAIJ, 1656 0, 1657 MatSetOption_MPISBAIJ, 1658 MatZeroEntries_MPISBAIJ, 1659 MatZeroRows_MPISBAIJ, 1660 0, 1661 0, 1662 0, 1663 0, 1664 MatSetUpPreallocation_MPISBAIJ, 1665 0, 1666 0, 1667 0, 1668 0, 1669 MatDuplicate_MPISBAIJ, 1670 0, 1671 0, 1672 0, 1673 0, 1674 0, 1675 MatGetSubMatrices_MPISBAIJ, 1676 MatIncreaseOverlap_MPISBAIJ, 1677 MatGetValues_MPISBAIJ, 1678 0, 1679 MatPrintHelp_MPISBAIJ, 1680 MatScale_MPISBAIJ, 1681 0, 1682 0, 1683 0, 1684 MatGetBlockSize_MPISBAIJ, 1685 0, 1686 0, 1687 0, 1688 0, 1689 0, 1690 0, 1691 MatSetUnfactored_MPISBAIJ, 1692 0, 1693 MatSetValuesBlocked_MPISBAIJ, 1694 0, 1695 0, 1696 0, 1697 MatGetPetscMaps_Petsc, 1698 0, 1699 0, 1700 0, 1701 0, 1702 0, 1703 0, 1704 MatGetRowMax_MPISBAIJ}; 1705 1706 1707 EXTERN_C_BEGIN 1708 #undef __FUNCT__ 1709 #define __FUNCT__ "MatGetDiagonalBlock_MPISBAIJ" 1710 int MatGetDiagonalBlock_MPISBAIJ(Mat A,PetscTruth *iscopy,MatReuse reuse,Mat *a) 1711 { 1712 PetscFunctionBegin; 1713 *a = ((Mat_MPISBAIJ *)A->data)->A; 1714 *iscopy = PETSC_FALSE; 1715 PetscFunctionReturn(0); 1716 } 1717 EXTERN_C_END 1718 1719 EXTERN_C_BEGIN 1720 #undef __FUNCT__ 1721 #define __FUNCT__ "MatCreate_MPISBAIJ" 1722 int MatCreate_MPISBAIJ(Mat B) 1723 { 1724 Mat_MPISBAIJ *b; 1725 int ierr; 1726 PetscTruth flg; 1727 1728 PetscFunctionBegin; 1729 1730 ierr = PetscNew(Mat_MPISBAIJ,&b);CHKERRQ(ierr); 1731 B->data = (void*)b; 1732 ierr = PetscMemzero(b,sizeof(Mat_MPISBAIJ));CHKERRQ(ierr); 1733 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 1734 1735 B->ops->destroy = MatDestroy_MPISBAIJ; 1736 B->ops->view = MatView_MPISBAIJ; 1737 B->mapping = 0; 1738 B->factor = 0; 1739 B->assembled = PETSC_FALSE; 1740 1741 B->insertmode = NOT_SET_VALUES; 1742 ierr = MPI_Comm_rank(B->comm,&b->rank);CHKERRQ(ierr); 1743 ierr = MPI_Comm_size(B->comm,&b->size);CHKERRQ(ierr); 1744 1745 /* build local table of row and column ownerships */ 1746 ierr = PetscMalloc(3*(b->size+2)*sizeof(int),&b->rowners);CHKERRQ(ierr); 1747 b->cowners = b->rowners + b->size + 2; 1748 b->rowners_bs = b->cowners + b->size + 2; 1749 PetscLogObjectMemory(B,3*(b->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPISBAIJ)); 1750 1751 /* build cache for off array entries formed */ 1752 ierr = MatStashCreate_Private(B->comm,1,&B->stash);CHKERRQ(ierr); 1753 b->donotstash = PETSC_FALSE; 1754 b->colmap = PETSC_NULL; 1755 b->garray = PETSC_NULL; 1756 b->roworiented = PETSC_TRUE; 1757 1758 #if defined(PETSC_USE_MAT_SINGLE) 1759 /* stuff for MatSetValues_XXX in single precision */ 1760 b->setvalueslen = 0; 1761 b->setvaluescopy = PETSC_NULL; 1762 #endif 1763 1764 /* stuff used in block assembly */ 1765 b->barray = 0; 1766 1767 /* stuff used for matrix vector multiply */ 1768 b->lvec = 0; 1769 b->Mvctx = 0; 1770 b->slvec0 = 0; 1771 b->slvec0b = 0; 1772 b->slvec1 = 0; 1773 b->slvec1a = 0; 1774 b->slvec1b = 0; 1775 b->sMvctx = 0; 1776 1777 /* stuff for MatGetRow() */ 1778 b->rowindices = 0; 1779 b->rowvalues = 0; 1780 b->getrowactive = PETSC_FALSE; 1781 1782 /* hash table stuff */ 1783 b->ht = 0; 1784 b->hd = 0; 1785 b->ht_size = 0; 1786 b->ht_flag = PETSC_FALSE; 1787 b->ht_fact = 0; 1788 b->ht_total_ct = 0; 1789 b->ht_insert_ct = 0; 1790 1791 ierr = PetscOptionsHasName(B->prefix,"-mat_use_hash_table",&flg);CHKERRQ(ierr); 1792 if (flg) { 1793 PetscReal fact = 1.39; 1794 ierr = MatSetOption(B,MAT_USE_HASH_TABLE);CHKERRQ(ierr); 1795 ierr = PetscOptionsGetReal(B->prefix,"-mat_use_hash_table",&fact,PETSC_NULL);CHKERRQ(ierr); 1796 if (fact <= 1.0) fact = 1.39; 1797 ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr); 1798 PetscLogInfo(0,"MatCreateMPISBAIJ:Hash table Factor used %5.2f\n",fact); 1799 } 1800 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 1801 "MatStoreValues_MPISBAIJ", 1802 MatStoreValues_MPISBAIJ);CHKERRQ(ierr); 1803 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 1804 "MatRetrieveValues_MPISBAIJ", 1805 MatRetrieveValues_MPISBAIJ);CHKERRQ(ierr); 1806 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C", 1807 "MatGetDiagonalBlock_MPISBAIJ", 1808 MatGetDiagonalBlock_MPISBAIJ);CHKERRQ(ierr); 1809 PetscFunctionReturn(0); 1810 } 1811 EXTERN_C_END 1812 1813 #undef __FUNCT__ 1814 #define __FUNCT__ "MatMPISBAIJSetPreallocation" 1815 /*@C 1816 MatMPISBAIJSetPreallocation - For good matrix assembly performance 1817 the user should preallocate the matrix storage by setting the parameters 1818 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 1819 performance can be increased by more than a factor of 50. 1820 1821 Collective on Mat 1822 1823 Input Parameters: 1824 + A - the matrix 1825 . bs - size of blockk 1826 . d_nz - number of block nonzeros per block row in diagonal portion of local 1827 submatrix (same for all local rows) 1828 . d_nnz - array containing the number of block nonzeros in the various block rows 1829 in the upper triangular and diagonal part of the in diagonal portion of the local 1830 (possibly different for each block row) or PETSC_NULL. You must leave room 1831 for the diagonal entry even if it is zero. 1832 . o_nz - number of block nonzeros per block row in the off-diagonal portion of local 1833 submatrix (same for all local rows). 1834 - o_nnz - array containing the number of nonzeros in the various block rows of the 1835 off-diagonal portion of the local submatrix (possibly different for 1836 each block row) or PETSC_NULL. 1837 1838 1839 Options Database Keys: 1840 . -mat_no_unroll - uses code that does not unroll the loops in the 1841 block calculations (much slower) 1842 . -mat_block_size - size of the blocks to use 1843 1844 Notes: 1845 1846 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor 1847 than it must be used on all processors that share the object for that argument. 1848 1849 Storage Information: 1850 For a square global matrix we define each processor's diagonal portion 1851 to be its local rows and the corresponding columns (a square submatrix); 1852 each processor's off-diagonal portion encompasses the remainder of the 1853 local matrix (a rectangular submatrix). 1854 1855 The user can specify preallocated storage for the diagonal part of 1856 the local submatrix with either d_nz or d_nnz (not both). Set 1857 d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic 1858 memory allocation. Likewise, specify preallocated storage for the 1859 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 1860 1861 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 1862 the figure below we depict these three local rows and all columns (0-11). 1863 1864 .vb 1865 0 1 2 3 4 5 6 7 8 9 10 11 1866 ------------------- 1867 row 3 | o o o d d d o o o o o o 1868 row 4 | o o o d d d o o o o o o 1869 row 5 | o o o d d d o o o o o o 1870 ------------------- 1871 .ve 1872 1873 Thus, any entries in the d locations are stored in the d (diagonal) 1874 submatrix, and any entries in the o locations are stored in the 1875 o (off-diagonal) submatrix. Note that the d matrix is stored in 1876 MatSeqSBAIJ format and the o submatrix in MATSEQBAIJ format. 1877 1878 Now d_nz should indicate the number of block nonzeros per row in the upper triangular 1879 plus the diagonal part of the d matrix, 1880 and o_nz should indicate the number of block nonzeros per row in the o matrix. 1881 In general, for PDE problems in which most nonzeros are near the diagonal, 1882 one expects d_nz >> o_nz. For large problems you MUST preallocate memory 1883 or you will get TERRIBLE performance; see the users' manual chapter on 1884 matrices. 1885 1886 Level: intermediate 1887 1888 .keywords: matrix, block, aij, compressed row, sparse, parallel 1889 1890 .seealso: MatCreate(), MatCreateSeqSBAIJ(), MatSetValues(), MatCreateMPIBAIJ() 1891 @*/ 1892 1893 int MatMPISBAIJSetPreallocation(Mat B,int bs,int d_nz,int *d_nnz,int o_nz,int *o_nnz) 1894 { 1895 Mat_MPISBAIJ *b; 1896 int ierr,i,mbs,Mbs; 1897 PetscTruth flg2; 1898 1899 PetscFunctionBegin; 1900 ierr = PetscTypeCompare((PetscObject)B,MATMPISBAIJ,&flg2);CHKERRQ(ierr); 1901 if (!flg2) PetscFunctionReturn(0); 1902 1903 ierr = PetscOptionsGetInt(B->prefix,"-mat_block_size",&bs,PETSC_NULL);CHKERRQ(ierr); 1904 1905 if (bs < 1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive"); 1906 if (d_nz == PETSC_DECIDE || d_nz == PETSC_DEFAULT) d_nz = 3; 1907 if (o_nz == PETSC_DECIDE || o_nz == PETSC_DEFAULT) o_nz = 1; 1908 if (d_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %d",d_nz); 1909 if (o_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %d",o_nz); 1910 if (d_nnz) { 1911 for (i=0; i<B->m/bs; i++) { 1912 if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than -1: local row %d value %d",i,d_nnz[i]); 1913 } 1914 } 1915 if (o_nnz) { 1916 for (i=0; i<B->m/bs; i++) { 1917 if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than -1: local row %d value %d",i,o_nnz[i]); 1918 } 1919 } 1920 B->preallocated = PETSC_TRUE; 1921 ierr = PetscSplitOwnershipBlock(B->comm,bs,&B->m,&B->M);CHKERRQ(ierr); 1922 ierr = PetscSplitOwnershipBlock(B->comm,bs,&B->n,&B->N);CHKERRQ(ierr); 1923 ierr = PetscMapCreateMPI(B->comm,B->m,B->M,&B->rmap);CHKERRQ(ierr); 1924 ierr = PetscMapCreateMPI(B->comm,B->m,B->M,&B->cmap);CHKERRQ(ierr); 1925 1926 b = (Mat_MPISBAIJ*)B->data; 1927 mbs = B->m/bs; 1928 Mbs = B->M/bs; 1929 if (mbs*bs != B->m) { 1930 SETERRQ2(PETSC_ERR_ARG_SIZ,"No of local rows %d must be divisible by blocksize %d",B->m,bs); 1931 } 1932 1933 b->bs = bs; 1934 b->bs2 = bs*bs; 1935 b->mbs = mbs; 1936 b->nbs = mbs; 1937 b->Mbs = Mbs; 1938 b->Nbs = Mbs; 1939 1940 ierr = MPI_Allgather(&b->mbs,1,MPI_INT,b->rowners+1,1,MPI_INT,B->comm);CHKERRQ(ierr); 1941 b->rowners[0] = 0; 1942 for (i=2; i<=b->size; i++) { 1943 b->rowners[i] += b->rowners[i-1]; 1944 } 1945 b->rstart = b->rowners[b->rank]; 1946 b->rend = b->rowners[b->rank+1]; 1947 b->cstart = b->rstart; 1948 b->cend = b->rend; 1949 for (i=0; i<=b->size; i++) { 1950 b->rowners_bs[i] = b->rowners[i]*bs; 1951 } 1952 b->rstart_bs = b-> rstart*bs; 1953 b->rend_bs = b->rend*bs; 1954 1955 b->cstart_bs = b->cstart*bs; 1956 b->cend_bs = b->cend*bs; 1957 1958 1959 ierr = MatCreateSeqSBAIJ(PETSC_COMM_SELF,bs,B->m,B->m,d_nz,d_nnz,&b->A);CHKERRQ(ierr); 1960 PetscLogObjectParent(B,b->A); 1961 ierr = MatCreateSeqBAIJ(PETSC_COMM_SELF,bs,B->m,B->M,o_nz,o_nnz,&b->B);CHKERRQ(ierr); 1962 PetscLogObjectParent(B,b->B); 1963 1964 /* build cache for off array entries formed */ 1965 ierr = MatStashCreate_Private(B->comm,bs,&B->bstash);CHKERRQ(ierr); 1966 1967 PetscFunctionReturn(0); 1968 } 1969 1970 #undef __FUNCT__ 1971 #define __FUNCT__ "MatCreateMPISBAIJ" 1972 /*@C 1973 MatCreateMPISBAIJ - Creates a sparse parallel matrix in symmetric block AIJ format 1974 (block compressed row). For good matrix assembly performance 1975 the user should preallocate the matrix storage by setting the parameters 1976 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 1977 performance can be increased by more than a factor of 50. 1978 1979 Collective on MPI_Comm 1980 1981 Input Parameters: 1982 + comm - MPI communicator 1983 . bs - size of blockk 1984 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 1985 This value should be the same as the local size used in creating the 1986 y vector for the matrix-vector product y = Ax. 1987 . n - number of local columns (or PETSC_DECIDE to have calculated if N is given) 1988 This value should be the same as the local size used in creating the 1989 x vector for the matrix-vector product y = Ax. 1990 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 1991 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 1992 . d_nz - number of block nonzeros per block row in diagonal portion of local 1993 submatrix (same for all local rows) 1994 . d_nnz - array containing the number of block nonzeros in the various block rows 1995 in the upper triangular portion of the in diagonal portion of the local 1996 (possibly different for each block block row) or PETSC_NULL. 1997 You must leave room for the diagonal entry even if it is zero. 1998 . o_nz - number of block nonzeros per block row in the off-diagonal portion of local 1999 submatrix (same for all local rows). 2000 - o_nnz - array containing the number of nonzeros in the various block rows of the 2001 off-diagonal portion of the local submatrix (possibly different for 2002 each block row) or PETSC_NULL. 2003 2004 Output Parameter: 2005 . A - the matrix 2006 2007 Options Database Keys: 2008 . -mat_no_unroll - uses code that does not unroll the loops in the 2009 block calculations (much slower) 2010 . -mat_block_size - size of the blocks to use 2011 . -mat_mpi - use the parallel matrix data structures even on one processor 2012 (defaults to using SeqBAIJ format on one processor) 2013 2014 Notes: 2015 The user MUST specify either the local or global matrix dimensions 2016 (possibly both). 2017 2018 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor 2019 than it must be used on all processors that share the object for that argument. 2020 2021 Storage Information: 2022 For a square global matrix we define each processor's diagonal portion 2023 to be its local rows and the corresponding columns (a square submatrix); 2024 each processor's off-diagonal portion encompasses the remainder of the 2025 local matrix (a rectangular submatrix). 2026 2027 The user can specify preallocated storage for the diagonal part of 2028 the local submatrix with either d_nz or d_nnz (not both). Set 2029 d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic 2030 memory allocation. Likewise, specify preallocated storage for the 2031 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 2032 2033 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 2034 the figure below we depict these three local rows and all columns (0-11). 2035 2036 .vb 2037 0 1 2 3 4 5 6 7 8 9 10 11 2038 ------------------- 2039 row 3 | o o o d d d o o o o o o 2040 row 4 | o o o d d d o o o o o o 2041 row 5 | o o o d d d o o o o o o 2042 ------------------- 2043 .ve 2044 2045 Thus, any entries in the d locations are stored in the d (diagonal) 2046 submatrix, and any entries in the o locations are stored in the 2047 o (off-diagonal) submatrix. Note that the d matrix is stored in 2048 MatSeqSBAIJ format and the o submatrix in MATSEQBAIJ format. 2049 2050 Now d_nz should indicate the number of block nonzeros per row in the upper triangular 2051 plus the diagonal part of the d matrix, 2052 and o_nz should indicate the number of block nonzeros per row in the o matrix. 2053 In general, for PDE problems in which most nonzeros are near the diagonal, 2054 one expects d_nz >> o_nz. For large problems you MUST preallocate memory 2055 or you will get TERRIBLE performance; see the users' manual chapter on 2056 matrices. 2057 2058 Level: intermediate 2059 2060 .keywords: matrix, block, aij, compressed row, sparse, parallel 2061 2062 .seealso: MatCreate(), MatCreateSeqSBAIJ(), MatSetValues(), MatCreateMPIBAIJ() 2063 @*/ 2064 2065 int MatCreateMPISBAIJ(MPI_Comm comm,int bs,int m,int n,int M,int N,int d_nz,int *d_nnz,int o_nz,int *o_nnz,Mat *A) 2066 { 2067 int ierr,size; 2068 2069 PetscFunctionBegin; 2070 ierr = MatCreate(comm,m,n,M,N,A);CHKERRQ(ierr); 2071 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2072 if (size > 1) { 2073 ierr = MatSetType(*A,MATMPISBAIJ);CHKERRQ(ierr); 2074 ierr = MatMPISBAIJSetPreallocation(*A,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 2075 } else { 2076 ierr = MatSetType(*A,MATSEQSBAIJ);CHKERRQ(ierr); 2077 ierr = MatSeqSBAIJSetPreallocation(*A,bs,d_nz,d_nnz);CHKERRQ(ierr); 2078 } 2079 PetscFunctionReturn(0); 2080 } 2081 2082 2083 #undef __FUNCT__ 2084 #define __FUNCT__ "MatDuplicate_MPISBAIJ" 2085 static int MatDuplicate_MPISBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 2086 { 2087 Mat mat; 2088 Mat_MPISBAIJ *a,*oldmat = (Mat_MPISBAIJ*)matin->data; 2089 int ierr,len=0; 2090 2091 PetscFunctionBegin; 2092 *newmat = 0; 2093 ierr = MatCreate(matin->comm,matin->m,matin->n,matin->M,matin->N,&mat);CHKERRQ(ierr); 2094 ierr = MatSetType(mat,MATMPISBAIJ);CHKERRQ(ierr); 2095 mat->preallocated = PETSC_TRUE; 2096 a = (Mat_MPISBAIJ*)mat->data; 2097 a->bs = oldmat->bs; 2098 a->bs2 = oldmat->bs2; 2099 a->mbs = oldmat->mbs; 2100 a->nbs = oldmat->nbs; 2101 a->Mbs = oldmat->Mbs; 2102 a->Nbs = oldmat->Nbs; 2103 2104 a->rstart = oldmat->rstart; 2105 a->rend = oldmat->rend; 2106 a->cstart = oldmat->cstart; 2107 a->cend = oldmat->cend; 2108 a->size = oldmat->size; 2109 a->rank = oldmat->rank; 2110 a->donotstash = oldmat->donotstash; 2111 a->roworiented = oldmat->roworiented; 2112 a->rowindices = 0; 2113 a->rowvalues = 0; 2114 a->getrowactive = PETSC_FALSE; 2115 a->barray = 0; 2116 a->rstart_bs = oldmat->rstart_bs; 2117 a->rend_bs = oldmat->rend_bs; 2118 a->cstart_bs = oldmat->cstart_bs; 2119 a->cend_bs = oldmat->cend_bs; 2120 2121 /* hash table stuff */ 2122 a->ht = 0; 2123 a->hd = 0; 2124 a->ht_size = 0; 2125 a->ht_flag = oldmat->ht_flag; 2126 a->ht_fact = oldmat->ht_fact; 2127 a->ht_total_ct = 0; 2128 a->ht_insert_ct = 0; 2129 2130 ierr = PetscMalloc(3*(a->size+2)*sizeof(int),&a->rowners);CHKERRQ(ierr); 2131 PetscLogObjectMemory(mat,3*(a->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPISBAIJ)); 2132 a->cowners = a->rowners + a->size + 2; 2133 a->rowners_bs = a->cowners + a->size + 2; 2134 ierr = PetscMemcpy(a->rowners,oldmat->rowners,3*(a->size+2)*sizeof(int));CHKERRQ(ierr); 2135 ierr = MatStashCreate_Private(matin->comm,1,&mat->stash);CHKERRQ(ierr); 2136 ierr = MatStashCreate_Private(matin->comm,oldmat->bs,&mat->bstash);CHKERRQ(ierr); 2137 if (oldmat->colmap) { 2138 #if defined (PETSC_USE_CTABLE) 2139 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 2140 #else 2141 ierr = PetscMalloc((a->Nbs)*sizeof(int),&a->colmap);CHKERRQ(ierr); 2142 PetscLogObjectMemory(mat,(a->Nbs)*sizeof(int)); 2143 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(int));CHKERRQ(ierr); 2144 #endif 2145 } else a->colmap = 0; 2146 if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) { 2147 ierr = PetscMalloc(len*sizeof(int),&a->garray);CHKERRQ(ierr); 2148 PetscLogObjectMemory(mat,len*sizeof(int)); 2149 ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int));CHKERRQ(ierr); 2150 } else a->garray = 0; 2151 2152 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 2153 PetscLogObjectParent(mat,a->lvec); 2154 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 2155 2156 PetscLogObjectParent(mat,a->Mvctx); 2157 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 2158 PetscLogObjectParent(mat,a->A); 2159 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 2160 PetscLogObjectParent(mat,a->B); 2161 ierr = PetscFListDuplicate(mat->qlist,&matin->qlist);CHKERRQ(ierr); 2162 *newmat = mat; 2163 PetscFunctionReturn(0); 2164 } 2165 2166 #include "petscsys.h" 2167 2168 EXTERN_C_BEGIN 2169 #undef __FUNCT__ 2170 #define __FUNCT__ "MatLoad_MPISBAIJ" 2171 int MatLoad_MPISBAIJ(PetscViewer viewer,MatType type,Mat *newmat) 2172 { 2173 Mat A; 2174 int i,nz,ierr,j,rstart,rend,fd; 2175 PetscScalar *vals,*buf; 2176 MPI_Comm comm = ((PetscObject)viewer)->comm; 2177 MPI_Status status; 2178 int header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,*browners,maxnz,*cols; 2179 int *locrowlens,*sndcounts = 0,*procsnz = 0,jj,*mycols,*ibuf; 2180 int tag = ((PetscObject)viewer)->tag,bs=1,Mbs,mbs,extra_rows; 2181 int *dlens,*odlens,*mask,*masked1,*masked2,rowcount,odcount; 2182 int dcount,kmax,k,nzcount,tmp; 2183 2184 PetscFunctionBegin; 2185 ierr = PetscOptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,PETSC_NULL);CHKERRQ(ierr); 2186 2187 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2188 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2189 if (!rank) { 2190 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 2191 ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr); 2192 if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 2193 if (header[3] < 0) { 2194 SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as MPISBAIJ"); 2195 } 2196 } 2197 2198 ierr = MPI_Bcast(header+1,3,MPI_INT,0,comm);CHKERRQ(ierr); 2199 M = header[1]; N = header[2]; 2200 2201 if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices"); 2202 2203 /* 2204 This code adds extra rows to make sure the number of rows is 2205 divisible by the blocksize 2206 */ 2207 Mbs = M/bs; 2208 extra_rows = bs - M + bs*(Mbs); 2209 if (extra_rows == bs) extra_rows = 0; 2210 else Mbs++; 2211 if (extra_rows &&!rank) { 2212 PetscLogInfo(0,"MatLoad_MPISBAIJ:Padding loaded matrix to match blocksize\n"); 2213 } 2214 2215 /* determine ownership of all rows */ 2216 mbs = Mbs/size + ((Mbs % size) > rank); 2217 m = mbs*bs; 2218 ierr = PetscMalloc(2*(size+2)*sizeof(int),&rowners);CHKERRQ(ierr); 2219 browners = rowners + size + 1; 2220 ierr = MPI_Allgather(&mbs,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 2221 rowners[0] = 0; 2222 for (i=2; i<=size; i++) rowners[i] += rowners[i-1]; 2223 for (i=0; i<=size; i++) browners[i] = rowners[i]*bs; 2224 rstart = rowners[rank]; 2225 rend = rowners[rank+1]; 2226 2227 /* distribute row lengths to all processors */ 2228 ierr = PetscMalloc((rend-rstart)*bs*sizeof(int),&locrowlens);CHKERRQ(ierr); 2229 if (!rank) { 2230 ierr = PetscMalloc((M+extra_rows)*sizeof(int),&rowlengths);CHKERRQ(ierr); 2231 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 2232 for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1; 2233 ierr = PetscMalloc(size*sizeof(int),&sndcounts);CHKERRQ(ierr); 2234 for (i=0; i<size; i++) sndcounts[i] = browners[i+1] - browners[i]; 2235 ierr = MPI_Scatterv(rowlengths,sndcounts,browners,MPI_INT,locrowlens,(rend-rstart)*bs,MPI_INT,0,comm);CHKERRQ(ierr); 2236 ierr = PetscFree(sndcounts);CHKERRQ(ierr); 2237 } else { 2238 ierr = MPI_Scatterv(0,0,0,MPI_INT,locrowlens,(rend-rstart)*bs,MPI_INT,0,comm);CHKERRQ(ierr); 2239 } 2240 2241 if (!rank) { /* procs[0] */ 2242 /* calculate the number of nonzeros on each processor */ 2243 ierr = PetscMalloc(size*sizeof(int),&procsnz);CHKERRQ(ierr); 2244 ierr = PetscMemzero(procsnz,size*sizeof(int));CHKERRQ(ierr); 2245 for (i=0; i<size; i++) { 2246 for (j=rowners[i]*bs; j< rowners[i+1]*bs; j++) { 2247 procsnz[i] += rowlengths[j]; 2248 } 2249 } 2250 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 2251 2252 /* determine max buffer needed and allocate it */ 2253 maxnz = 0; 2254 for (i=0; i<size; i++) { 2255 maxnz = PetscMax(maxnz,procsnz[i]); 2256 } 2257 ierr = PetscMalloc(maxnz*sizeof(int),&cols);CHKERRQ(ierr); 2258 2259 /* read in my part of the matrix column indices */ 2260 nz = procsnz[0]; 2261 ierr = PetscMalloc(nz*sizeof(int),&ibuf);CHKERRQ(ierr); 2262 mycols = ibuf; 2263 if (size == 1) nz -= extra_rows; 2264 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 2265 if (size == 1) for (i=0; i< extra_rows; i++) { mycols[nz+i] = M+i; } 2266 2267 /* read in every ones (except the last) and ship off */ 2268 for (i=1; i<size-1; i++) { 2269 nz = procsnz[i]; 2270 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 2271 ierr = MPI_Send(cols,nz,MPI_INT,i,tag,comm);CHKERRQ(ierr); 2272 } 2273 /* read in the stuff for the last proc */ 2274 if (size != 1) { 2275 nz = procsnz[size-1] - extra_rows; /* the extra rows are not on the disk */ 2276 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 2277 for (i=0; i<extra_rows; i++) cols[nz+i] = M+i; 2278 ierr = MPI_Send(cols,nz+extra_rows,MPI_INT,size-1,tag,comm);CHKERRQ(ierr); 2279 } 2280 ierr = PetscFree(cols);CHKERRQ(ierr); 2281 } else { /* procs[i], i>0 */ 2282 /* determine buffer space needed for message */ 2283 nz = 0; 2284 for (i=0; i<m; i++) { 2285 nz += locrowlens[i]; 2286 } 2287 ierr = PetscMalloc(nz*sizeof(int),&ibuf);CHKERRQ(ierr); 2288 mycols = ibuf; 2289 /* receive message of column indices*/ 2290 ierr = MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);CHKERRQ(ierr); 2291 ierr = MPI_Get_count(&status,MPI_INT,&maxnz);CHKERRQ(ierr); 2292 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 2293 } 2294 2295 /* loop over local rows, determining number of off diagonal entries */ 2296 ierr = PetscMalloc(2*(rend-rstart+1)*sizeof(int),&dlens);CHKERRQ(ierr); 2297 odlens = dlens + (rend-rstart); 2298 ierr = PetscMalloc(3*Mbs*sizeof(int),&mask);CHKERRQ(ierr); 2299 ierr = PetscMemzero(mask,3*Mbs*sizeof(int));CHKERRQ(ierr); 2300 masked1 = mask + Mbs; 2301 masked2 = masked1 + Mbs; 2302 rowcount = 0; nzcount = 0; 2303 for (i=0; i<mbs; i++) { 2304 dcount = 0; 2305 odcount = 0; 2306 for (j=0; j<bs; j++) { 2307 kmax = locrowlens[rowcount]; 2308 for (k=0; k<kmax; k++) { 2309 tmp = mycols[nzcount++]/bs; /* block col. index */ 2310 if (!mask[tmp]) { 2311 mask[tmp] = 1; 2312 if (tmp < rstart || tmp >= rend) masked2[odcount++] = tmp; /* entry in off-diag portion */ 2313 else masked1[dcount++] = tmp; /* entry in diag portion */ 2314 } 2315 } 2316 rowcount++; 2317 } 2318 2319 dlens[i] = dcount; /* d_nzz[i] */ 2320 odlens[i] = odcount; /* o_nzz[i] */ 2321 2322 /* zero out the mask elements we set */ 2323 for (j=0; j<dcount; j++) mask[masked1[j]] = 0; 2324 for (j=0; j<odcount; j++) mask[masked2[j]] = 0; 2325 } 2326 2327 /* create our matrix */ 2328 ierr = MatCreateMPISBAIJ(comm,bs,m,m,PETSC_DETERMINE,PETSC_DETERMINE,0,dlens,0,odlens,newmat); 2329 CHKERRQ(ierr); 2330 A = *newmat; 2331 ierr = MatSetOption(A,MAT_COLUMNS_SORTED);CHKERRQ(ierr); 2332 2333 if (!rank) { 2334 ierr = PetscMalloc(maxnz*sizeof(PetscScalar),&buf);CHKERRQ(ierr); 2335 /* read in my part of the matrix numerical values */ 2336 nz = procsnz[0]; 2337 vals = buf; 2338 mycols = ibuf; 2339 if (size == 1) nz -= extra_rows; 2340 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2341 if (size == 1) for (i=0; i< extra_rows; i++) { vals[nz+i] = 1.0; } 2342 2343 /* insert into matrix */ 2344 jj = rstart*bs; 2345 for (i=0; i<m; i++) { 2346 ierr = MatSetValues(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 2347 mycols += locrowlens[i]; 2348 vals += locrowlens[i]; 2349 jj++; 2350 } 2351 2352 /* read in other processors (except the last one) and ship out */ 2353 for (i=1; i<size-1; i++) { 2354 nz = procsnz[i]; 2355 vals = buf; 2356 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2357 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr); 2358 } 2359 /* the last proc */ 2360 if (size != 1){ 2361 nz = procsnz[i] - extra_rows; 2362 vals = buf; 2363 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2364 for (i=0; i<extra_rows; i++) vals[nz+i] = 1.0; 2365 ierr = MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,A->tag,comm);CHKERRQ(ierr); 2366 } 2367 ierr = PetscFree(procsnz);CHKERRQ(ierr); 2368 2369 } else { 2370 /* receive numeric values */ 2371 ierr = PetscMalloc(nz*sizeof(PetscScalar),&buf);CHKERRQ(ierr); 2372 2373 /* receive message of values*/ 2374 vals = buf; 2375 mycols = ibuf; 2376 ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr); 2377 ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr); 2378 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 2379 2380 /* insert into matrix */ 2381 jj = rstart*bs; 2382 for (i=0; i<m; i++) { 2383 ierr = MatSetValues_MPISBAIJ(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 2384 mycols += locrowlens[i]; 2385 vals += locrowlens[i]; 2386 jj++; 2387 } 2388 } 2389 2390 ierr = PetscFree(locrowlens);CHKERRQ(ierr); 2391 ierr = PetscFree(buf);CHKERRQ(ierr); 2392 ierr = PetscFree(ibuf);CHKERRQ(ierr); 2393 ierr = PetscFree(rowners);CHKERRQ(ierr); 2394 ierr = PetscFree(dlens);CHKERRQ(ierr); 2395 ierr = PetscFree(mask);CHKERRQ(ierr); 2396 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2397 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2398 PetscFunctionReturn(0); 2399 } 2400 EXTERN_C_END 2401 2402 #undef __FUNCT__ 2403 #define __FUNCT__ "MatMPISBAIJSetHashTableFactor" 2404 /*@ 2405 MatMPISBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable. 2406 2407 Input Parameters: 2408 . mat - the matrix 2409 . fact - factor 2410 2411 Collective on Mat 2412 2413 Level: advanced 2414 2415 Notes: 2416 This can also be set by the command line option: -mat_use_hash_table fact 2417 2418 .keywords: matrix, hashtable, factor, HT 2419 2420 .seealso: MatSetOption() 2421 @*/ 2422 int MatMPISBAIJSetHashTableFactor(Mat mat,PetscReal fact) 2423 { 2424 PetscFunctionBegin; 2425 SETERRQ(1,"Function not yet written for SBAIJ format"); 2426 /* PetscFunctionReturn(0); */ 2427 } 2428 2429 #undef __FUNCT__ 2430 #define __FUNCT__ "MatGetRowMax_MPISBAIJ" 2431 int MatGetRowMax_MPISBAIJ(Mat A,Vec v) 2432 { 2433 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 2434 Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)(a->B)->data; 2435 PetscReal atmp; 2436 PetscReal *work,*svalues,*rvalues; 2437 int ierr,i,bs,mbs,*bi,*bj,brow,j,ncols,krow,kcol,col,row,Mbs,bcol; 2438 int rank,size,*rowners_bs,dest,count,source; 2439 PetscScalar *va; 2440 MatScalar *ba; 2441 MPI_Status stat; 2442 2443 PetscFunctionBegin; 2444 ierr = MatGetRowMax(a->A,v);CHKERRQ(ierr); 2445 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2446 2447 ierr = MPI_Comm_size(PETSC_COMM_WORLD,&size);CHKERRQ(ierr); 2448 ierr = MPI_Comm_rank(PETSC_COMM_WORLD,&rank);CHKERRQ(ierr); 2449 2450 bs = a->bs; 2451 mbs = a->mbs; 2452 Mbs = a->Mbs; 2453 ba = b->a; 2454 bi = b->i; 2455 bj = b->j; 2456 /* 2457 PetscSynchronizedPrintf(PETSC_COMM_WORLD,"[%d] M: %d, bs: %d, mbs: %d \n",rank,bs*Mbs,bs,mbs); 2458 PetscSynchronizedFlush(PETSC_COMM_WORLD); 2459 */ 2460 2461 /* find ownerships */ 2462 rowners_bs = a->rowners_bs; 2463 /* 2464 if (!rank){ 2465 for (i=0; i<size+1; i++) PetscPrintf(PETSC_COMM_SELF," rowners_bs[%d]: %d\n",i,rowners_bs[i]); 2466 } 2467 */ 2468 2469 /* each proc creates an array to be distributed */ 2470 ierr = PetscMalloc(bs*Mbs*sizeof(PetscReal),&work);CHKERRQ(ierr); 2471 ierr = PetscMemzero(work,bs*Mbs*sizeof(PetscReal));CHKERRQ(ierr); 2472 2473 /* row_max for B */ 2474 if (rank != size-1){ 2475 for (i=0; i<mbs; i++) { 2476 ncols = bi[1] - bi[0]; bi++; 2477 brow = bs*i; 2478 for (j=0; j<ncols; j++){ 2479 bcol = bs*(*bj); 2480 for (kcol=0; kcol<bs; kcol++){ 2481 col = bcol + kcol; /* local col index */ 2482 col += rowners_bs[rank+1]; /* global col index */ 2483 /* PetscPrintf(PETSC_COMM_SELF,"[%d], col: %d\n",rank,col); */ 2484 for (krow=0; krow<bs; krow++){ 2485 atmp = PetscAbsScalar(*ba); ba++; 2486 row = brow + krow; /* local row index */ 2487 /* printf("val[%d,%d]: %g\n",row,col,atmp); */ 2488 if (PetscRealPart(va[row]) < atmp) va[row] = atmp; 2489 if (work[col] < atmp) work[col] = atmp; 2490 } 2491 } 2492 bj++; 2493 } 2494 } 2495 /* 2496 PetscPrintf(PETSC_COMM_SELF,"[%d], work: ",rank); 2497 for (i=0; i<bs*Mbs; i++) PetscPrintf(PETSC_COMM_SELF,"%g ",work[i]); 2498 PetscPrintf(PETSC_COMM_SELF,"[%d]: \n"); 2499 */ 2500 2501 /* send values to its owners */ 2502 for (dest=rank+1; dest<size; dest++){ 2503 svalues = work + rowners_bs[dest]; 2504 count = rowners_bs[dest+1]-rowners_bs[dest]; 2505 ierr = MPI_Send(svalues,count,MPIU_REAL,dest,rank,PETSC_COMM_WORLD);CHKERRQ(ierr); 2506 /* 2507 PetscSynchronizedPrintf(PETSC_COMM_WORLD,"[%d] sends %d values to [%d]: %g, %g, %g, %g\n",rank,count,dest,svalues[0],svalues[1],svalues[2],svalues[3]); 2508 PetscSynchronizedFlush(PETSC_COMM_WORLD); 2509 */ 2510 } 2511 } 2512 2513 /* receive values */ 2514 if (rank){ 2515 rvalues = work; 2516 count = rowners_bs[rank+1]-rowners_bs[rank]; 2517 for (source=0; source<rank; source++){ 2518 ierr = MPI_Recv(rvalues,count,MPIU_REAL,MPI_ANY_SOURCE,MPI_ANY_TAG,PETSC_COMM_WORLD,&stat);CHKERRQ(ierr); 2519 /* process values */ 2520 for (i=0; i<count; i++){ 2521 if (PetscRealPart(va[i]) < rvalues[i]) va[i] = rvalues[i]; 2522 } 2523 /* 2524 PetscSynchronizedPrintf(PETSC_COMM_WORLD,"[%d] received %d values from [%d]: %g, %g, %g, %g \n",rank,count,stat.MPI_SOURCE,rvalues[0],rvalues[1],rvalues[2],rvalues[3]); 2525 PetscSynchronizedFlush(PETSC_COMM_WORLD); 2526 */ 2527 } 2528 } 2529 2530 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2531 ierr = PetscFree(work);CHKERRQ(ierr); 2532 PetscFunctionReturn(0); 2533 } 2534 2535 #undef __FUNCT__ 2536 #define __FUNCT__ "MatRelax_MPISBAIJ" 2537 int MatRelax_MPISBAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,int its,int lits,Vec xx) 2538 { 2539 Mat_MPISBAIJ *mat = (Mat_MPISBAIJ*)matin->data; 2540 int ierr,mbs=mat->mbs,bs=mat->bs; 2541 PetscScalar mone=-1.0,*x,*b,*ptr,zero=0.0; 2542 Vec bb1; 2543 2544 PetscFunctionBegin; 2545 if (its <= 0 || lits <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %d and local its %d both positive",its,lits); 2546 if (bs > 1) 2547 SETERRQ(PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented"); 2548 2549 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){ 2550 if ( flag & SOR_ZERO_INITIAL_GUESS ) { 2551 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr); 2552 its--; 2553 } 2554 2555 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 2556 while (its--){ 2557 2558 /* lower triangular part: slvec0b = - B^T*xx */ 2559 ierr = (*mat->B->ops->multtranspose)(mat->B,xx,mat->slvec0b);CHKERRQ(ierr); 2560 2561 /* copy xx into slvec0a */ 2562 ierr = VecGetArray(mat->slvec0,&ptr);CHKERRQ(ierr); 2563 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 2564 ierr = PetscMemcpy(ptr,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr); 2565 ierr = VecRestoreArray(mat->slvec0,&ptr);CHKERRQ(ierr); 2566 2567 ierr = VecScale(&mone,mat->slvec0);CHKERRQ(ierr); 2568 2569 /* copy bb into slvec1a */ 2570 ierr = VecGetArray(mat->slvec1,&ptr);CHKERRQ(ierr); 2571 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 2572 ierr = PetscMemcpy(ptr,b,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr); 2573 ierr = VecRestoreArray(mat->slvec1,&ptr);CHKERRQ(ierr); 2574 2575 /* set slvec1b = 0 */ 2576 ierr = VecSet(&zero,mat->slvec1b);CHKERRQ(ierr); 2577 2578 ierr = VecScatterBegin(mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD,mat->sMvctx);CHKERRQ(ierr); 2579 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 2580 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 2581 ierr = VecScatterEnd(mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD,mat->sMvctx);CHKERRQ(ierr); 2582 2583 /* upper triangular part: bb1 = bb1 - B*x */ 2584 ierr = (*mat->B->ops->multadd)(mat->B,mat->slvec1b,mat->slvec1a,bb1);CHKERRQ(ierr); 2585 2586 /* local diagonal sweep */ 2587 ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);CHKERRQ(ierr); 2588 } 2589 ierr = VecDestroy(bb1);CHKERRQ(ierr); 2590 } else { 2591 SETERRQ(PETSC_ERR_SUP,"MatSORType is not supported for SBAIJ matrix format"); 2592 } 2593 PetscFunctionReturn(0); 2594 } 2595 2596 #undef __FUNCT__ 2597 #define __FUNCT__ "MatRelax_MPISBAIJ_2comm" 2598 int MatRelax_MPISBAIJ_2comm(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,int its,int lits,Vec xx) 2599 { 2600 Mat_MPISBAIJ *mat = (Mat_MPISBAIJ*)matin->data; 2601 int ierr; 2602 PetscScalar mone=-1.0; 2603 Vec lvec1,bb1; 2604 2605 PetscFunctionBegin; 2606 if (its <= 0 || lits <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %d and local its %d both positive",its,lits); 2607 if (mat->bs > 1) 2608 SETERRQ(PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented"); 2609 2610 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){ 2611 if ( flag & SOR_ZERO_INITIAL_GUESS ) { 2612 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr); 2613 its--; 2614 } 2615 2616 ierr = VecDuplicate(mat->lvec,&lvec1);CHKERRQ(ierr); 2617 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 2618 while (its--){ 2619 ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 2620 2621 /* lower diagonal part: bb1 = bb - B^T*xx */ 2622 ierr = (*mat->B->ops->multtranspose)(mat->B,xx,lvec1);CHKERRQ(ierr); 2623 ierr = VecScale(&mone,lvec1);CHKERRQ(ierr); 2624 2625 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 2626 ierr = VecCopy(bb,bb1);CHKERRQ(ierr); 2627 ierr = VecScatterBegin(lvec1,bb1,ADD_VALUES,SCATTER_REVERSE,mat->Mvctx);CHKERRQ(ierr); 2628 2629 /* upper diagonal part: bb1 = bb1 - B*x */ 2630 ierr = VecScale(&mone,mat->lvec);CHKERRQ(ierr); 2631 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr); 2632 2633 ierr = VecScatterEnd(lvec1,bb1,ADD_VALUES,SCATTER_REVERSE,mat->Mvctx);CHKERRQ(ierr); 2634 2635 /* diagonal sweep */ 2636 ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);CHKERRQ(ierr); 2637 } 2638 ierr = VecDestroy(lvec1);CHKERRQ(ierr); 2639 ierr = VecDestroy(bb1);CHKERRQ(ierr); 2640 } else { 2641 SETERRQ(PETSC_ERR_SUP,"MatSORType is not supported for SBAIJ matrix format"); 2642 } 2643 PetscFunctionReturn(0); 2644 } 2645 2646