1 2 #include <../src/mat/impls/baij/mpi/mpibaij.h> /*I "petscmat.h" I*/ 3 #include <petscblaslapack.h> 4 5 extern PetscErrorCode MatSetUpMultiply_MPIBAIJ(Mat); 6 extern PetscErrorCode MatDisAssemble_MPIBAIJ(Mat); 7 extern PetscErrorCode MatGetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt [],PetscScalar []); 8 extern PetscErrorCode MatSetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt [],const PetscScalar [],InsertMode); 9 extern PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode); 10 extern PetscErrorCode MatGetRow_SeqBAIJ(Mat,PetscInt,PetscInt*,PetscInt*[],PetscScalar*[]); 11 extern PetscErrorCode MatRestoreRow_SeqBAIJ(Mat,PetscInt,PetscInt*,PetscInt*[],PetscScalar*[]); 12 extern PetscErrorCode MatZeroRows_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscScalar,Vec,Vec); 13 14 #undef __FUNCT__ 15 #define __FUNCT__ "MatGetRowMaxAbs_MPIBAIJ" 16 PetscErrorCode MatGetRowMaxAbs_MPIBAIJ(Mat A,Vec v,PetscInt idx[]) 17 { 18 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 19 PetscErrorCode ierr; 20 PetscInt i,*idxb = 0; 21 PetscScalar *va,*vb; 22 Vec vtmp; 23 24 PetscFunctionBegin; 25 ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr); 26 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 27 if (idx) { 28 for (i=0; i<A->rmap->n; i++) { 29 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 30 } 31 } 32 33 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 34 if (idx) {ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr);} 35 ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 36 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 37 38 for (i=0; i<A->rmap->n; i++) { 39 if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) { 40 va[i] = vb[i]; 41 if (idx) idx[i] = A->cmap->bs*a->garray[idxb[i]/A->cmap->bs] + (idxb[i] % A->cmap->bs); 42 } 43 } 44 45 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 46 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 47 ierr = PetscFree(idxb);CHKERRQ(ierr); 48 ierr = VecDestroy(&vtmp);CHKERRQ(ierr); 49 PetscFunctionReturn(0); 50 } 51 52 #undef __FUNCT__ 53 #define __FUNCT__ "MatStoreValues_MPIBAIJ" 54 PetscErrorCode MatStoreValues_MPIBAIJ(Mat mat) 55 { 56 Mat_MPIBAIJ *aij = (Mat_MPIBAIJ*)mat->data; 57 PetscErrorCode ierr; 58 59 PetscFunctionBegin; 60 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 61 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 62 PetscFunctionReturn(0); 63 } 64 65 #undef __FUNCT__ 66 #define __FUNCT__ "MatRetrieveValues_MPIBAIJ" 67 PetscErrorCode MatRetrieveValues_MPIBAIJ(Mat mat) 68 { 69 Mat_MPIBAIJ *aij = (Mat_MPIBAIJ*)mat->data; 70 PetscErrorCode ierr; 71 72 PetscFunctionBegin; 73 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 74 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 75 PetscFunctionReturn(0); 76 } 77 78 /* 79 Local utility routine that creates a mapping from the global column 80 number to the local number in the off-diagonal part of the local 81 storage of the matrix. This is done in a non scalable way since the 82 length of colmap equals the global matrix length. 83 */ 84 #undef __FUNCT__ 85 #define __FUNCT__ "MatCreateColmap_MPIBAIJ_Private" 86 PetscErrorCode MatCreateColmap_MPIBAIJ_Private(Mat mat) 87 { 88 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 89 Mat_SeqBAIJ *B = (Mat_SeqBAIJ*)baij->B->data; 90 PetscErrorCode ierr; 91 PetscInt nbs = B->nbs,i,bs=mat->rmap->bs; 92 93 PetscFunctionBegin; 94 #if defined(PETSC_USE_CTABLE) 95 ierr = PetscTableCreate(baij->nbs,baij->Nbs+1,&baij->colmap);CHKERRQ(ierr); 96 for (i=0; i<nbs; i++) { 97 ierr = PetscTableAdd(baij->colmap,baij->garray[i]+1,i*bs+1,INSERT_VALUES);CHKERRQ(ierr); 98 } 99 #else 100 ierr = PetscMalloc((baij->Nbs+1)*sizeof(PetscInt),&baij->colmap);CHKERRQ(ierr); 101 ierr = PetscLogObjectMemory(mat,baij->Nbs*sizeof(PetscInt));CHKERRQ(ierr); 102 ierr = PetscMemzero(baij->colmap,baij->Nbs*sizeof(PetscInt));CHKERRQ(ierr); 103 for (i=0; i<nbs; i++) baij->colmap[baij->garray[i]] = i*bs+1; 104 #endif 105 PetscFunctionReturn(0); 106 } 107 108 #define MatSetValues_SeqBAIJ_A_Private(row,col,value,addv) \ 109 { \ 110 \ 111 brow = row/bs; \ 112 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; \ 113 rmax = aimax[brow]; nrow = ailen[brow]; \ 114 bcol = col/bs; \ 115 ridx = row % bs; cidx = col % bs; \ 116 low = 0; high = nrow; \ 117 while (high-low > 3) { \ 118 t = (low+high)/2; \ 119 if (rp[t] > bcol) high = t; \ 120 else low = t; \ 121 } \ 122 for (_i=low; _i<high; _i++) { \ 123 if (rp[_i] > bcol) break; \ 124 if (rp[_i] == bcol) { \ 125 bap = ap + bs2*_i + bs*cidx + ridx; \ 126 if (addv == ADD_VALUES) *bap += value; \ 127 else *bap = value; \ 128 goto a_noinsert; \ 129 } \ 130 } \ 131 if (a->nonew == 1) goto a_noinsert; \ 132 if (a->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 133 MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,aimax,a->nonew,MatScalar); \ 134 N = nrow++ - 1; \ 135 /* shift up all the later entries in this row */ \ 136 for (ii=N; ii>=_i; ii--) { \ 137 rp[ii+1] = rp[ii]; \ 138 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 139 } \ 140 if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr); } \ 141 rp[_i] = bcol; \ 142 ap[bs2*_i + bs*cidx + ridx] = value; \ 143 a_noinsert:; \ 144 ailen[brow] = nrow; \ 145 } 146 147 #define MatSetValues_SeqBAIJ_B_Private(row,col,value,addv) \ 148 { \ 149 brow = row/bs; \ 150 rp = bj + bi[brow]; ap = ba + bs2*bi[brow]; \ 151 rmax = bimax[brow]; nrow = bilen[brow]; \ 152 bcol = col/bs; \ 153 ridx = row % bs; cidx = col % bs; \ 154 low = 0; high = nrow; \ 155 while (high-low > 3) { \ 156 t = (low+high)/2; \ 157 if (rp[t] > bcol) high = t; \ 158 else low = t; \ 159 } \ 160 for (_i=low; _i<high; _i++) { \ 161 if (rp[_i] > bcol) break; \ 162 if (rp[_i] == bcol) { \ 163 bap = ap + bs2*_i + bs*cidx + ridx; \ 164 if (addv == ADD_VALUES) *bap += value; \ 165 else *bap = value; \ 166 goto b_noinsert; \ 167 } \ 168 } \ 169 if (b->nonew == 1) goto b_noinsert; \ 170 if (b->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 171 MatSeqXAIJReallocateAIJ(B,b->mbs,bs2,nrow,brow,bcol,rmax,ba,bi,bj,rp,ap,bimax,b->nonew,MatScalar); \ 172 N = nrow++ - 1; \ 173 /* shift up all the later entries in this row */ \ 174 for (ii=N; ii>=_i; ii--) { \ 175 rp[ii+1] = rp[ii]; \ 176 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 177 } \ 178 if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr);} \ 179 rp[_i] = bcol; \ 180 ap[bs2*_i + bs*cidx + ridx] = value; \ 181 b_noinsert:; \ 182 bilen[brow] = nrow; \ 183 } 184 185 #undef __FUNCT__ 186 #define __FUNCT__ "MatSetValues_MPIBAIJ" 187 PetscErrorCode MatSetValues_MPIBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 188 { 189 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 190 MatScalar value; 191 PetscBool roworiented = baij->roworiented; 192 PetscErrorCode ierr; 193 PetscInt i,j,row,col; 194 PetscInt rstart_orig=mat->rmap->rstart; 195 PetscInt rend_orig =mat->rmap->rend,cstart_orig=mat->cmap->rstart; 196 PetscInt cend_orig =mat->cmap->rend,bs=mat->rmap->bs; 197 198 /* Some Variables required in the macro */ 199 Mat A = baij->A; 200 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)(A)->data; 201 PetscInt *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j; 202 MatScalar *aa =a->a; 203 204 Mat B = baij->B; 205 Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)(B)->data; 206 PetscInt *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j; 207 MatScalar *ba =b->a; 208 209 PetscInt *rp,ii,nrow,_i,rmax,N,brow,bcol; 210 PetscInt low,high,t,ridx,cidx,bs2=a->bs2; 211 MatScalar *ap,*bap; 212 213 PetscFunctionBegin; 214 if (v) PetscValidScalarPointer(v,6); 215 for (i=0; i<m; i++) { 216 if (im[i] < 0) continue; 217 #if defined(PETSC_USE_DEBUG) 218 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 219 #endif 220 if (im[i] >= rstart_orig && im[i] < rend_orig) { 221 row = im[i] - rstart_orig; 222 for (j=0; j<n; j++) { 223 if (in[j] >= cstart_orig && in[j] < cend_orig) { 224 col = in[j] - cstart_orig; 225 if (roworiented) value = v[i*n+j]; 226 else value = v[i+j*m]; 227 MatSetValues_SeqBAIJ_A_Private(row,col,value,addv); 228 /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 229 } else if (in[j] < 0) continue; 230 #if defined(PETSC_USE_DEBUG) 231 else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); 232 #endif 233 else { 234 if (mat->was_assembled) { 235 if (!baij->colmap) { 236 ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 237 } 238 #if defined(PETSC_USE_CTABLE) 239 ierr = PetscTableFind(baij->colmap,in[j]/bs + 1,&col);CHKERRQ(ierr); 240 col = col - 1; 241 #else 242 col = baij->colmap[in[j]/bs] - 1; 243 #endif 244 if (col < 0 && !((Mat_SeqBAIJ*)(baij->B->data))->nonew) { 245 ierr = MatDisAssemble_MPIBAIJ(mat);CHKERRQ(ierr); 246 col = in[j]; 247 /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */ 248 B = baij->B; 249 b = (Mat_SeqBAIJ*)(B)->data; 250 bimax=b->imax;bi=b->i;bilen=b->ilen;bj=b->j; 251 ba =b->a; 252 } else if (col < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", im[i], in[j]); 253 else col += in[j]%bs; 254 } else col = in[j]; 255 if (roworiented) value = v[i*n+j]; 256 else value = v[i+j*m]; 257 MatSetValues_SeqBAIJ_B_Private(row,col,value,addv); 258 /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 259 } 260 } 261 } else { 262 if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]); 263 if (!baij->donotstash) { 264 mat->assembled = PETSC_FALSE; 265 if (roworiented) { 266 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,PETSC_FALSE);CHKERRQ(ierr); 267 } else { 268 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,PETSC_FALSE);CHKERRQ(ierr); 269 } 270 } 271 } 272 } 273 PetscFunctionReturn(0); 274 } 275 276 #undef __FUNCT__ 277 #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ" 278 PetscErrorCode MatSetValuesBlocked_MPIBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 279 { 280 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 281 const PetscScalar *value; 282 MatScalar *barray = baij->barray; 283 PetscBool roworiented = baij->roworiented; 284 PetscErrorCode ierr; 285 PetscInt i,j,ii,jj,row,col,rstart=baij->rstartbs; 286 PetscInt rend=baij->rendbs,cstart=baij->cstartbs,stepval; 287 PetscInt cend=baij->cendbs,bs=mat->rmap->bs,bs2=baij->bs2; 288 289 PetscFunctionBegin; 290 if (!barray) { 291 ierr = PetscMalloc(bs2*sizeof(MatScalar),&barray);CHKERRQ(ierr); 292 baij->barray = barray; 293 } 294 295 if (roworiented) stepval = (n-1)*bs; 296 else stepval = (m-1)*bs; 297 298 for (i=0; i<m; i++) { 299 if (im[i] < 0) continue; 300 #if defined(PETSC_USE_DEBUG) 301 if (im[i] >= baij->Mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %D max %D",im[i],baij->Mbs-1); 302 #endif 303 if (im[i] >= rstart && im[i] < rend) { 304 row = im[i] - rstart; 305 for (j=0; j<n; j++) { 306 /* If NumCol = 1 then a copy is not required */ 307 if ((roworiented) && (n == 1)) { 308 barray = (MatScalar*)v + i*bs2; 309 } else if ((!roworiented) && (m == 1)) { 310 barray = (MatScalar*)v + j*bs2; 311 } else { /* Here a copy is required */ 312 if (roworiented) { 313 value = v + (i*(stepval+bs) + j)*bs; 314 } else { 315 value = v + (j*(stepval+bs) + i)*bs; 316 } 317 for (ii=0; ii<bs; ii++,value+=bs+stepval) { 318 for (jj=0; jj<bs; jj++) barray[jj] = value[jj]; 319 barray += bs; 320 } 321 barray -= bs2; 322 } 323 324 if (in[j] >= cstart && in[j] < cend) { 325 col = in[j] - cstart; 326 ierr = MatSetValuesBlocked_SeqBAIJ(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 327 } else if (in[j] < 0) continue; 328 #if defined(PETSC_USE_DEBUG) 329 else if (in[j] >= baij->Nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %D max %D",in[j],baij->Nbs-1); 330 #endif 331 else { 332 if (mat->was_assembled) { 333 if (!baij->colmap) { 334 ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 335 } 336 337 #if defined(PETSC_USE_DEBUG) 338 #if defined(PETSC_USE_CTABLE) 339 { PetscInt data; 340 ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr); 341 if ((data - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap"); 342 } 343 #else 344 if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap"); 345 #endif 346 #endif 347 #if defined(PETSC_USE_CTABLE) 348 ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr); 349 col = (col - 1)/bs; 350 #else 351 col = (baij->colmap[in[j]] - 1)/bs; 352 #endif 353 if (col < 0 && !((Mat_SeqBAIJ*)(baij->B->data))->nonew) { 354 ierr = MatDisAssemble_MPIBAIJ(mat);CHKERRQ(ierr); 355 col = in[j]; 356 } else if (col < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", bs*im[i], bs*in[j]); 357 } else col = in[j]; 358 ierr = MatSetValuesBlocked_SeqBAIJ(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 359 } 360 } 361 } else { 362 if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]); 363 if (!baij->donotstash) { 364 if (roworiented) { 365 ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 366 } else { 367 ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 368 } 369 } 370 } 371 } 372 PetscFunctionReturn(0); 373 } 374 375 #define HASH_KEY 0.6180339887 376 #define HASH(size,key,tmp) (tmp = (key)*HASH_KEY,(PetscInt)((size)*(tmp-(PetscInt)tmp))) 377 /* #define HASH(size,key) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */ 378 /* #define HASH(size,key,tmp) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */ 379 #undef __FUNCT__ 380 #define __FUNCT__ "MatSetValues_MPIBAIJ_HT" 381 PetscErrorCode MatSetValues_MPIBAIJ_HT(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 382 { 383 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 384 PetscBool roworiented = baij->roworiented; 385 PetscErrorCode ierr; 386 PetscInt i,j,row,col; 387 PetscInt rstart_orig=mat->rmap->rstart; 388 PetscInt rend_orig =mat->rmap->rend,Nbs=baij->Nbs; 389 PetscInt h1,key,size=baij->ht_size,bs=mat->rmap->bs,*HT=baij->ht,idx; 390 PetscReal tmp; 391 MatScalar **HD = baij->hd,value; 392 #if defined(PETSC_USE_DEBUG) 393 PetscInt total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct; 394 #endif 395 396 PetscFunctionBegin; 397 if (v) PetscValidScalarPointer(v,6); 398 for (i=0; i<m; i++) { 399 #if defined(PETSC_USE_DEBUG) 400 if (im[i] < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); 401 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 402 #endif 403 row = im[i]; 404 if (row >= rstart_orig && row < rend_orig) { 405 for (j=0; j<n; j++) { 406 col = in[j]; 407 if (roworiented) value = v[i*n+j]; 408 else value = v[i+j*m]; 409 /* Look up PetscInto the Hash Table */ 410 key = (row/bs)*Nbs+(col/bs)+1; 411 h1 = HASH(size,key,tmp); 412 413 414 idx = h1; 415 #if defined(PETSC_USE_DEBUG) 416 insert_ct++; 417 total_ct++; 418 if (HT[idx] != key) { 419 for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++) ; 420 if (idx == size) { 421 for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++) ; 422 if (idx == h1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col); 423 } 424 } 425 #else 426 if (HT[idx] != key) { 427 for (idx=h1; (idx<size) && (HT[idx]!=key); idx++) ; 428 if (idx == size) { 429 for (idx=0; (idx<h1) && (HT[idx]!=key); idx++) ; 430 if (idx == h1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col); 431 } 432 } 433 #endif 434 /* A HASH table entry is found, so insert the values at the correct address */ 435 if (addv == ADD_VALUES) *(HD[idx]+ (col % bs)*bs + (row % bs)) += value; 436 else *(HD[idx]+ (col % bs)*bs + (row % bs)) = value; 437 } 438 } else if (!baij->donotstash) { 439 if (roworiented) { 440 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,PETSC_FALSE);CHKERRQ(ierr); 441 } else { 442 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,PETSC_FALSE);CHKERRQ(ierr); 443 } 444 } 445 } 446 #if defined(PETSC_USE_DEBUG) 447 baij->ht_total_ct = total_ct; 448 baij->ht_insert_ct = insert_ct; 449 #endif 450 PetscFunctionReturn(0); 451 } 452 453 #undef __FUNCT__ 454 #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ_HT" 455 PetscErrorCode MatSetValuesBlocked_MPIBAIJ_HT(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 456 { 457 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 458 PetscBool roworiented = baij->roworiented; 459 PetscErrorCode ierr; 460 PetscInt i,j,ii,jj,row,col; 461 PetscInt rstart=baij->rstartbs; 462 PetscInt rend =mat->rmap->rend,stepval,bs=mat->rmap->bs,bs2=baij->bs2,nbs2=n*bs2; 463 PetscInt h1,key,size=baij->ht_size,idx,*HT=baij->ht,Nbs=baij->Nbs; 464 PetscReal tmp; 465 MatScalar **HD = baij->hd,*baij_a; 466 const PetscScalar *v_t,*value; 467 #if defined(PETSC_USE_DEBUG) 468 PetscInt total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct; 469 #endif 470 471 PetscFunctionBegin; 472 if (roworiented) stepval = (n-1)*bs; 473 else stepval = (m-1)*bs; 474 475 for (i=0; i<m; i++) { 476 #if defined(PETSC_USE_DEBUG) 477 if (im[i] < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",im[i]); 478 if (im[i] >= baij->Mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],baij->Mbs-1); 479 #endif 480 row = im[i]; 481 v_t = v + i*nbs2; 482 if (row >= rstart && row < rend) { 483 for (j=0; j<n; j++) { 484 col = in[j]; 485 486 /* Look up into the Hash Table */ 487 key = row*Nbs+col+1; 488 h1 = HASH(size,key,tmp); 489 490 idx = h1; 491 #if defined(PETSC_USE_DEBUG) 492 total_ct++; 493 insert_ct++; 494 if (HT[idx] != key) { 495 for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++) ; 496 if (idx == size) { 497 for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++) ; 498 if (idx == h1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col); 499 } 500 } 501 #else 502 if (HT[idx] != key) { 503 for (idx=h1; (idx<size) && (HT[idx]!=key); idx++) ; 504 if (idx == size) { 505 for (idx=0; (idx<h1) && (HT[idx]!=key); idx++) ; 506 if (idx == h1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col); 507 } 508 } 509 #endif 510 baij_a = HD[idx]; 511 if (roworiented) { 512 /*value = v + i*(stepval+bs)*bs + j*bs;*/ 513 /* value = v + (i*(stepval+bs)+j)*bs; */ 514 value = v_t; 515 v_t += bs; 516 if (addv == ADD_VALUES) { 517 for (ii=0; ii<bs; ii++,value+=stepval) { 518 for (jj=ii; jj<bs2; jj+=bs) { 519 baij_a[jj] += *value++; 520 } 521 } 522 } else { 523 for (ii=0; ii<bs; ii++,value+=stepval) { 524 for (jj=ii; jj<bs2; jj+=bs) { 525 baij_a[jj] = *value++; 526 } 527 } 528 } 529 } else { 530 value = v + j*(stepval+bs)*bs + i*bs; 531 if (addv == ADD_VALUES) { 532 for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) { 533 for (jj=0; jj<bs; jj++) { 534 baij_a[jj] += *value++; 535 } 536 } 537 } else { 538 for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) { 539 for (jj=0; jj<bs; jj++) { 540 baij_a[jj] = *value++; 541 } 542 } 543 } 544 } 545 } 546 } else { 547 if (!baij->donotstash) { 548 if (roworiented) { 549 ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 550 } else { 551 ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 552 } 553 } 554 } 555 } 556 #if defined(PETSC_USE_DEBUG) 557 baij->ht_total_ct = total_ct; 558 baij->ht_insert_ct = insert_ct; 559 #endif 560 PetscFunctionReturn(0); 561 } 562 563 #undef __FUNCT__ 564 #define __FUNCT__ "MatGetValues_MPIBAIJ" 565 PetscErrorCode MatGetValues_MPIBAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 566 { 567 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 568 PetscErrorCode ierr; 569 PetscInt bs = mat->rmap->bs,i,j,bsrstart = mat->rmap->rstart,bsrend = mat->rmap->rend; 570 PetscInt bscstart = mat->cmap->rstart,bscend = mat->cmap->rend,row,col,data; 571 572 PetscFunctionBegin; 573 for (i=0; i<m; i++) { 574 if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/ 575 if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1); 576 if (idxm[i] >= bsrstart && idxm[i] < bsrend) { 577 row = idxm[i] - bsrstart; 578 for (j=0; j<n; j++) { 579 if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */ 580 if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1); 581 if (idxn[j] >= bscstart && idxn[j] < bscend) { 582 col = idxn[j] - bscstart; 583 ierr = MatGetValues_SeqBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 584 } else { 585 if (!baij->colmap) { 586 ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 587 } 588 #if defined(PETSC_USE_CTABLE) 589 ierr = PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr); 590 data--; 591 #else 592 data = baij->colmap[idxn[j]/bs]-1; 593 #endif 594 if ((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0; 595 else { 596 col = data + idxn[j]%bs; 597 ierr = MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 598 } 599 } 600 } 601 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported"); 602 } 603 PetscFunctionReturn(0); 604 } 605 606 #undef __FUNCT__ 607 #define __FUNCT__ "MatNorm_MPIBAIJ" 608 PetscErrorCode MatNorm_MPIBAIJ(Mat mat,NormType type,PetscReal *nrm) 609 { 610 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 611 Mat_SeqBAIJ *amat = (Mat_SeqBAIJ*)baij->A->data,*bmat = (Mat_SeqBAIJ*)baij->B->data; 612 PetscErrorCode ierr; 613 PetscInt i,j,bs2=baij->bs2,bs=baij->A->rmap->bs,nz,row,col; 614 PetscReal sum = 0.0; 615 MatScalar *v; 616 617 PetscFunctionBegin; 618 if (baij->size == 1) { 619 ierr = MatNorm(baij->A,type,nrm);CHKERRQ(ierr); 620 } else { 621 if (type == NORM_FROBENIUS) { 622 v = amat->a; 623 nz = amat->nz*bs2; 624 for (i=0; i<nz; i++) { 625 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 626 } 627 v = bmat->a; 628 nz = bmat->nz*bs2; 629 for (i=0; i<nz; i++) { 630 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 631 } 632 ierr = MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 633 *nrm = PetscSqrtReal(*nrm); 634 } else if (type == NORM_1) { /* max column sum */ 635 PetscReal *tmp,*tmp2; 636 PetscInt *jj,*garray=baij->garray,cstart=baij->rstartbs; 637 ierr = PetscMalloc2(mat->cmap->N,PetscReal,&tmp,mat->cmap->N,PetscReal,&tmp2);CHKERRQ(ierr); 638 ierr = PetscMemzero(tmp,mat->cmap->N*sizeof(PetscReal));CHKERRQ(ierr); 639 v = amat->a; jj = amat->j; 640 for (i=0; i<amat->nz; i++) { 641 for (j=0; j<bs; j++) { 642 col = bs*(cstart + *jj) + j; /* column index */ 643 for (row=0; row<bs; row++) { 644 tmp[col] += PetscAbsScalar(*v); v++; 645 } 646 } 647 jj++; 648 } 649 v = bmat->a; jj = bmat->j; 650 for (i=0; i<bmat->nz; i++) { 651 for (j=0; j<bs; j++) { 652 col = bs*garray[*jj] + j; 653 for (row=0; row<bs; row++) { 654 tmp[col] += PetscAbsScalar(*v); v++; 655 } 656 } 657 jj++; 658 } 659 ierr = MPI_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 660 *nrm = 0.0; 661 for (j=0; j<mat->cmap->N; j++) { 662 if (tmp2[j] > *nrm) *nrm = tmp2[j]; 663 } 664 ierr = PetscFree2(tmp,tmp2);CHKERRQ(ierr); 665 } else if (type == NORM_INFINITY) { /* max row sum */ 666 PetscReal *sums; 667 ierr = PetscMalloc(bs*sizeof(PetscReal),&sums);CHKERRQ(ierr); 668 sum = 0.0; 669 for (j=0; j<amat->mbs; j++) { 670 for (row=0; row<bs; row++) sums[row] = 0.0; 671 v = amat->a + bs2*amat->i[j]; 672 nz = amat->i[j+1]-amat->i[j]; 673 for (i=0; i<nz; i++) { 674 for (col=0; col<bs; col++) { 675 for (row=0; row<bs; row++) { 676 sums[row] += PetscAbsScalar(*v); v++; 677 } 678 } 679 } 680 v = bmat->a + bs2*bmat->i[j]; 681 nz = bmat->i[j+1]-bmat->i[j]; 682 for (i=0; i<nz; i++) { 683 for (col=0; col<bs; col++) { 684 for (row=0; row<bs; row++) { 685 sums[row] += PetscAbsScalar(*v); v++; 686 } 687 } 688 } 689 for (row=0; row<bs; row++) { 690 if (sums[row] > sum) sum = sums[row]; 691 } 692 } 693 ierr = MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 694 ierr = PetscFree(sums);CHKERRQ(ierr); 695 } else SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"No support for this norm yet"); 696 } 697 PetscFunctionReturn(0); 698 } 699 700 /* 701 Creates the hash table, and sets the table 702 This table is created only once. 703 If new entried need to be added to the matrix 704 then the hash table has to be destroyed and 705 recreated. 706 */ 707 #undef __FUNCT__ 708 #define __FUNCT__ "MatCreateHashTable_MPIBAIJ_Private" 709 PetscErrorCode MatCreateHashTable_MPIBAIJ_Private(Mat mat,PetscReal factor) 710 { 711 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 712 Mat A = baij->A,B=baij->B; 713 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data,*b=(Mat_SeqBAIJ*)B->data; 714 PetscInt i,j,k,nz=a->nz+b->nz,h1,*ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j; 715 PetscErrorCode ierr; 716 PetscInt ht_size,bs2=baij->bs2,rstart=baij->rstartbs; 717 PetscInt cstart=baij->cstartbs,*garray=baij->garray,row,col,Nbs=baij->Nbs; 718 PetscInt *HT,key; 719 MatScalar **HD; 720 PetscReal tmp; 721 #if defined(PETSC_USE_INFO) 722 PetscInt ct=0,max=0; 723 #endif 724 725 PetscFunctionBegin; 726 if (baij->ht) PetscFunctionReturn(0); 727 728 baij->ht_size = (PetscInt)(factor*nz); 729 ht_size = baij->ht_size; 730 731 /* Allocate Memory for Hash Table */ 732 ierr = PetscMalloc2(ht_size,MatScalar*,&baij->hd,ht_size,PetscInt,&baij->ht);CHKERRQ(ierr); 733 ierr = PetscMemzero(baij->hd,ht_size*sizeof(MatScalar*));CHKERRQ(ierr); 734 ierr = PetscMemzero(baij->ht,ht_size*sizeof(PetscInt));CHKERRQ(ierr); 735 HD = baij->hd; 736 HT = baij->ht; 737 738 /* Loop Over A */ 739 for (i=0; i<a->mbs; i++) { 740 for (j=ai[i]; j<ai[i+1]; j++) { 741 row = i+rstart; 742 col = aj[j]+cstart; 743 744 key = row*Nbs + col + 1; 745 h1 = HASH(ht_size,key,tmp); 746 for (k=0; k<ht_size; k++) { 747 if (!HT[(h1+k)%ht_size]) { 748 HT[(h1+k)%ht_size] = key; 749 HD[(h1+k)%ht_size] = a->a + j*bs2; 750 break; 751 #if defined(PETSC_USE_INFO) 752 } else { 753 ct++; 754 #endif 755 } 756 } 757 #if defined(PETSC_USE_INFO) 758 if (k> max) max = k; 759 #endif 760 } 761 } 762 /* Loop Over B */ 763 for (i=0; i<b->mbs; i++) { 764 for (j=bi[i]; j<bi[i+1]; j++) { 765 row = i+rstart; 766 col = garray[bj[j]]; 767 key = row*Nbs + col + 1; 768 h1 = HASH(ht_size,key,tmp); 769 for (k=0; k<ht_size; k++) { 770 if (!HT[(h1+k)%ht_size]) { 771 HT[(h1+k)%ht_size] = key; 772 HD[(h1+k)%ht_size] = b->a + j*bs2; 773 break; 774 #if defined(PETSC_USE_INFO) 775 } else { 776 ct++; 777 #endif 778 } 779 } 780 #if defined(PETSC_USE_INFO) 781 if (k> max) max = k; 782 #endif 783 } 784 } 785 786 /* Print Summary */ 787 #if defined(PETSC_USE_INFO) 788 for (i=0,j=0; i<ht_size; i++) { 789 if (HT[i]) j++; 790 } 791 ierr = PetscInfo2(mat,"Average Search = %5.2f,max search = %D\n",(!j)? 0.0:((PetscReal)(ct+j))/j,max);CHKERRQ(ierr); 792 #endif 793 PetscFunctionReturn(0); 794 } 795 796 #undef __FUNCT__ 797 #define __FUNCT__ "MatAssemblyBegin_MPIBAIJ" 798 PetscErrorCode MatAssemblyBegin_MPIBAIJ(Mat mat,MatAssemblyType mode) 799 { 800 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 801 PetscErrorCode ierr; 802 PetscInt nstash,reallocs; 803 InsertMode addv; 804 805 PetscFunctionBegin; 806 if (baij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0); 807 808 /* make sure all processors are either in INSERTMODE or ADDMODE */ 809 ierr = MPI_Allreduce((PetscEnum*)&mat->insertmode,(PetscEnum*)&addv,1,MPIU_ENUM,MPI_BOR,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 810 if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added"); 811 mat->insertmode = addv; /* in case this processor had no cache */ 812 813 ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr); 814 ierr = MatStashScatterBegin_Private(mat,&mat->bstash,baij->rangebs);CHKERRQ(ierr); 815 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 816 ierr = PetscInfo2(mat,"Stash has %D entries,uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 817 ierr = MatStashGetInfo_Private(&mat->bstash,&nstash,&reallocs);CHKERRQ(ierr); 818 ierr = PetscInfo2(mat,"Block-Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 819 PetscFunctionReturn(0); 820 } 821 822 #undef __FUNCT__ 823 #define __FUNCT__ "MatAssemblyEnd_MPIBAIJ" 824 PetscErrorCode MatAssemblyEnd_MPIBAIJ(Mat mat,MatAssemblyType mode) 825 { 826 Mat_MPIBAIJ *baij=(Mat_MPIBAIJ*)mat->data; 827 Mat_SeqBAIJ *a =(Mat_SeqBAIJ*)baij->A->data; 828 PetscErrorCode ierr; 829 PetscInt i,j,rstart,ncols,flg,bs2=baij->bs2; 830 PetscInt *row,*col; 831 PetscBool r1,r2,r3,other_disassembled; 832 MatScalar *val; 833 InsertMode addv = mat->insertmode; 834 PetscMPIInt n; 835 836 PetscFunctionBegin; 837 /* do not use 'b=(Mat_SeqBAIJ*)baij->B->data' as B can be reset in disassembly */ 838 if (!baij->donotstash && !mat->nooffprocentries) { 839 while (1) { 840 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 841 if (!flg) break; 842 843 for (i=0; i<n;) { 844 /* Now identify the consecutive vals belonging to the same row */ 845 for (j=i,rstart=row[j]; j<n; j++) { 846 if (row[j] != rstart) break; 847 } 848 if (j < n) ncols = j-i; 849 else ncols = n-i; 850 /* Now assemble all these values with a single function call */ 851 ierr = MatSetValues_MPIBAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 852 i = j; 853 } 854 } 855 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 856 /* Now process the block-stash. Since the values are stashed column-oriented, 857 set the roworiented flag to column oriented, and after MatSetValues() 858 restore the original flags */ 859 r1 = baij->roworiented; 860 r2 = a->roworiented; 861 r3 = ((Mat_SeqBAIJ*)baij->B->data)->roworiented; 862 863 baij->roworiented = PETSC_FALSE; 864 a->roworiented = PETSC_FALSE; 865 866 (((Mat_SeqBAIJ*)baij->B->data))->roworiented = PETSC_FALSE; /* b->roworiented */ 867 while (1) { 868 ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 869 if (!flg) break; 870 871 for (i=0; i<n;) { 872 /* Now identify the consecutive vals belonging to the same row */ 873 for (j=i,rstart=row[j]; j<n; j++) { 874 if (row[j] != rstart) break; 875 } 876 if (j < n) ncols = j-i; 877 else ncols = n-i; 878 ierr = MatSetValuesBlocked_MPIBAIJ(mat,1,row+i,ncols,col+i,val+i*bs2,addv);CHKERRQ(ierr); 879 i = j; 880 } 881 } 882 ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr); 883 884 baij->roworiented = r1; 885 a->roworiented = r2; 886 887 ((Mat_SeqBAIJ*)baij->B->data)->roworiented = r3; /* b->roworiented */ 888 } 889 890 ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr); 891 ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr); 892 893 /* determine if any processor has disassembled, if so we must 894 also disassemble ourselfs, in order that we may reassemble. */ 895 /* 896 if nonzero structure of submatrix B cannot change then we know that 897 no processor disassembled thus we can skip this stuff 898 */ 899 if (!((Mat_SeqBAIJ*)baij->B->data)->nonew) { 900 ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 901 if (mat->was_assembled && !other_disassembled) { 902 ierr = MatDisAssemble_MPIBAIJ(mat);CHKERRQ(ierr); 903 } 904 } 905 906 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 907 ierr = MatSetUpMultiply_MPIBAIJ(mat);CHKERRQ(ierr); 908 } 909 ierr = MatSetOption(baij->B,MAT_CHECK_COMPRESSED_ROW,PETSC_FALSE);CHKERRQ(ierr); 910 ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr); 911 ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr); 912 913 #if defined(PETSC_USE_INFO) 914 if (baij->ht && mode== MAT_FINAL_ASSEMBLY) { 915 ierr = PetscInfo1(mat,"Average Hash Table Search in MatSetValues = %5.2f\n",((PetscReal)baij->ht_total_ct)/baij->ht_insert_ct);CHKERRQ(ierr); 916 917 baij->ht_total_ct = 0; 918 baij->ht_insert_ct = 0; 919 } 920 #endif 921 if (baij->ht_flag && !baij->ht && mode == MAT_FINAL_ASSEMBLY) { 922 ierr = MatCreateHashTable_MPIBAIJ_Private(mat,baij->ht_fact);CHKERRQ(ierr); 923 924 mat->ops->setvalues = MatSetValues_MPIBAIJ_HT; 925 mat->ops->setvaluesblocked = MatSetValuesBlocked_MPIBAIJ_HT; 926 } 927 928 ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr); 929 930 baij->rowvalues = 0; 931 PetscFunctionReturn(0); 932 } 933 934 #include <petscdraw.h> 935 #undef __FUNCT__ 936 #define __FUNCT__ "MatView_MPIBAIJ_ASCIIorDraworSocket" 937 static PetscErrorCode MatView_MPIBAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 938 { 939 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 940 PetscErrorCode ierr; 941 PetscMPIInt size = baij->size,rank = baij->rank; 942 PetscInt bs = mat->rmap->bs; 943 PetscBool iascii,isdraw; 944 PetscViewer sviewer; 945 PetscViewerFormat format; 946 947 PetscFunctionBegin; 948 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 949 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 950 if (iascii) { 951 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 952 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 953 MatInfo info; 954 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 955 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 956 ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_TRUE);CHKERRQ(ierr); 957 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D bs %D mem %D\n", 958 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,mat->rmap->bs,(PetscInt)info.memory);CHKERRQ(ierr); 959 ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 960 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 961 ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 962 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 963 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 964 ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_FALSE);CHKERRQ(ierr); 965 ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr); 966 ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr); 967 PetscFunctionReturn(0); 968 } else if (format == PETSC_VIEWER_ASCII_INFO) { 969 ierr = PetscViewerASCIIPrintf(viewer," block size is %D\n",bs);CHKERRQ(ierr); 970 PetscFunctionReturn(0); 971 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 972 PetscFunctionReturn(0); 973 } 974 } 975 976 if (isdraw) { 977 PetscDraw draw; 978 PetscBool isnull; 979 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 980 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 981 } 982 983 if (size == 1) { 984 ierr = PetscObjectSetName((PetscObject)baij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 985 ierr = MatView(baij->A,viewer);CHKERRQ(ierr); 986 } else { 987 /* assemble the entire matrix onto first processor. */ 988 Mat A; 989 Mat_SeqBAIJ *Aloc; 990 PetscInt M = mat->rmap->N,N = mat->cmap->N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs; 991 MatScalar *a; 992 993 /* Here we are creating a temporary matrix, so will assume MPIBAIJ is acceptable */ 994 /* Perhaps this should be the type of mat? */ 995 ierr = MatCreate(PetscObjectComm((PetscObject)mat),&A);CHKERRQ(ierr); 996 if (!rank) { 997 ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr); 998 } else { 999 ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr); 1000 } 1001 ierr = MatSetType(A,MATMPIBAIJ);CHKERRQ(ierr); 1002 ierr = MatMPIBAIJSetPreallocation(A,mat->rmap->bs,0,NULL,0,NULL);CHKERRQ(ierr); 1003 ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 1004 ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr); 1005 1006 /* copy over the A part */ 1007 Aloc = (Mat_SeqBAIJ*)baij->A->data; 1008 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1009 ierr = PetscMalloc(bs*sizeof(PetscInt),&rvals);CHKERRQ(ierr); 1010 1011 for (i=0; i<mbs; i++) { 1012 rvals[0] = bs*(baij->rstartbs + i); 1013 for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1; 1014 for (j=ai[i]; j<ai[i+1]; j++) { 1015 col = (baij->cstartbs+aj[j])*bs; 1016 for (k=0; k<bs; k++) { 1017 ierr = MatSetValues_MPIBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 1018 col++; a += bs; 1019 } 1020 } 1021 } 1022 /* copy over the B part */ 1023 Aloc = (Mat_SeqBAIJ*)baij->B->data; 1024 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1025 for (i=0; i<mbs; i++) { 1026 rvals[0] = bs*(baij->rstartbs + i); 1027 for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1; 1028 for (j=ai[i]; j<ai[i+1]; j++) { 1029 col = baij->garray[aj[j]]*bs; 1030 for (k=0; k<bs; k++) { 1031 ierr = MatSetValues_MPIBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 1032 col++; a += bs; 1033 } 1034 } 1035 } 1036 ierr = PetscFree(rvals);CHKERRQ(ierr); 1037 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1038 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1039 /* 1040 Everyone has to call to draw the matrix since the graphics waits are 1041 synchronized across all processors that share the PetscDraw object 1042 */ 1043 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 1044 if (!rank) { 1045 ierr = PetscObjectSetName((PetscObject)((Mat_MPIBAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1046 /* Set the type name to MATMPIBAIJ so that the correct type can be printed out by PetscObjectPrintClassNamePrefixType() in MatView_SeqBAIJ_ASCII()*/ 1047 PetscStrcpy(((PetscObject)((Mat_MPIBAIJ*)(A->data))->A)->type_name,MATMPIBAIJ); 1048 ierr = MatView(((Mat_MPIBAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 1049 } 1050 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 1051 ierr = MatDestroy(&A);CHKERRQ(ierr); 1052 } 1053 PetscFunctionReturn(0); 1054 } 1055 1056 #undef __FUNCT__ 1057 #define __FUNCT__ "MatView_MPIBAIJ_Binary" 1058 static PetscErrorCode MatView_MPIBAIJ_Binary(Mat mat,PetscViewer viewer) 1059 { 1060 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)mat->data; 1061 Mat_SeqBAIJ *A = (Mat_SeqBAIJ*)a->A->data; 1062 Mat_SeqBAIJ *B = (Mat_SeqBAIJ*)a->B->data; 1063 PetscErrorCode ierr; 1064 PetscInt i,*row_lens,*crow_lens,bs = mat->rmap->bs,j,k,bs2=a->bs2,header[4],nz,rlen; 1065 PetscInt *range=0,nzmax,*column_indices,cnt,col,*garray = a->garray,cstart = mat->cmap->rstart/bs,len,pcnt,l,ll; 1066 int fd; 1067 PetscScalar *column_values; 1068 FILE *file; 1069 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 1070 PetscInt message_count,flowcontrolcount; 1071 1072 PetscFunctionBegin; 1073 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 1074 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);CHKERRQ(ierr); 1075 nz = bs2*(A->nz + B->nz); 1076 rlen = mat->rmap->n; 1077 if (!rank) { 1078 header[0] = MAT_FILE_CLASSID; 1079 header[1] = mat->rmap->N; 1080 header[2] = mat->cmap->N; 1081 1082 ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1083 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1084 ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1085 /* get largest number of rows any processor has */ 1086 range = mat->rmap->range; 1087 for (i=1; i<size; i++) { 1088 rlen = PetscMax(rlen,range[i+1] - range[i]); 1089 } 1090 } else { 1091 ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1092 } 1093 1094 ierr = PetscMalloc((rlen/bs)*sizeof(PetscInt),&crow_lens);CHKERRQ(ierr); 1095 /* compute lengths of each row */ 1096 for (i=0; i<a->mbs; i++) { 1097 crow_lens[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i]; 1098 } 1099 /* store the row lengths to the file */ 1100 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1101 if (!rank) { 1102 MPI_Status status; 1103 ierr = PetscMalloc(rlen*sizeof(PetscInt),&row_lens);CHKERRQ(ierr); 1104 rlen = (range[1] - range[0])/bs; 1105 for (i=0; i<rlen; i++) { 1106 for (j=0; j<bs; j++) { 1107 row_lens[i*bs+j] = bs*crow_lens[i]; 1108 } 1109 } 1110 ierr = PetscBinaryWrite(fd,row_lens,bs*rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1111 for (i=1; i<size; i++) { 1112 rlen = (range[i+1] - range[i])/bs; 1113 ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr); 1114 ierr = MPI_Recv(crow_lens,rlen,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr); 1115 for (k=0; k<rlen; k++) { 1116 for (j=0; j<bs; j++) { 1117 row_lens[k*bs+j] = bs*crow_lens[k]; 1118 } 1119 } 1120 ierr = PetscBinaryWrite(fd,row_lens,bs*rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1121 } 1122 ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr); 1123 ierr = PetscFree(row_lens);CHKERRQ(ierr); 1124 } else { 1125 ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr); 1126 ierr = MPI_Send(crow_lens,mat->rmap->n/bs,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1127 ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr); 1128 } 1129 ierr = PetscFree(crow_lens);CHKERRQ(ierr); 1130 1131 /* load up the local column indices. Include for all rows not just one for each block row since process 0 does not have the 1132 information needed to make it for each row from a block row. This does require more communication but still not more than 1133 the communication needed for the nonzero values */ 1134 nzmax = nz; /* space a largest processor needs */ 1135 ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1136 ierr = PetscMalloc(nzmax*sizeof(PetscInt),&column_indices);CHKERRQ(ierr); 1137 cnt = 0; 1138 for (i=0; i<a->mbs; i++) { 1139 pcnt = cnt; 1140 for (j=B->i[i]; j<B->i[i+1]; j++) { 1141 if ((col = garray[B->j[j]]) > cstart) break; 1142 for (l=0; l<bs; l++) { 1143 column_indices[cnt++] = bs*col+l; 1144 } 1145 } 1146 for (k=A->i[i]; k<A->i[i+1]; k++) { 1147 for (l=0; l<bs; l++) { 1148 column_indices[cnt++] = bs*(A->j[k] + cstart)+l; 1149 } 1150 } 1151 for (; j<B->i[i+1]; j++) { 1152 for (l=0; l<bs; l++) { 1153 column_indices[cnt++] = bs*garray[B->j[j]]+l; 1154 } 1155 } 1156 len = cnt - pcnt; 1157 for (k=1; k<bs; k++) { 1158 ierr = PetscMemcpy(&column_indices[cnt],&column_indices[pcnt],len*sizeof(PetscInt));CHKERRQ(ierr); 1159 cnt += len; 1160 } 1161 } 1162 if (cnt != nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,nz); 1163 1164 /* store the columns to the file */ 1165 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1166 if (!rank) { 1167 MPI_Status status; 1168 ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1169 for (i=1; i<size; i++) { 1170 ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr); 1171 ierr = MPI_Recv(&cnt,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr); 1172 ierr = MPI_Recv(column_indices,cnt,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr); 1173 ierr = PetscBinaryWrite(fd,column_indices,cnt,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1174 } 1175 ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr); 1176 } else { 1177 ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr); 1178 ierr = MPI_Send(&cnt,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1179 ierr = MPI_Send(column_indices,cnt,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1180 ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr); 1181 } 1182 ierr = PetscFree(column_indices);CHKERRQ(ierr); 1183 1184 /* load up the numerical values */ 1185 ierr = PetscMalloc(nzmax*sizeof(PetscScalar),&column_values);CHKERRQ(ierr); 1186 cnt = 0; 1187 for (i=0; i<a->mbs; i++) { 1188 rlen = bs*(B->i[i+1] - B->i[i] + A->i[i+1] - A->i[i]); 1189 for (j=B->i[i]; j<B->i[i+1]; j++) { 1190 if (garray[B->j[j]] > cstart) break; 1191 for (l=0; l<bs; l++) { 1192 for (ll=0; ll<bs; ll++) { 1193 column_values[cnt + l*rlen + ll] = B->a[bs2*j+l+bs*ll]; 1194 } 1195 } 1196 cnt += bs; 1197 } 1198 for (k=A->i[i]; k<A->i[i+1]; k++) { 1199 for (l=0; l<bs; l++) { 1200 for (ll=0; ll<bs; ll++) { 1201 column_values[cnt + l*rlen + ll] = A->a[bs2*k+l+bs*ll]; 1202 } 1203 } 1204 cnt += bs; 1205 } 1206 for (; j<B->i[i+1]; j++) { 1207 for (l=0; l<bs; l++) { 1208 for (ll=0; ll<bs; ll++) { 1209 column_values[cnt + l*rlen + ll] = B->a[bs2*j+l+bs*ll]; 1210 } 1211 } 1212 cnt += bs; 1213 } 1214 cnt += (bs-1)*rlen; 1215 } 1216 if (cnt != nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,nz); 1217 1218 /* store the column values to the file */ 1219 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1220 if (!rank) { 1221 MPI_Status status; 1222 ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1223 for (i=1; i<size; i++) { 1224 ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr); 1225 ierr = MPI_Recv(&cnt,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr); 1226 ierr = MPI_Recv(column_values,cnt,MPIU_SCALAR,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr); 1227 ierr = PetscBinaryWrite(fd,column_values,cnt,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1228 } 1229 ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr); 1230 } else { 1231 ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr); 1232 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1233 ierr = MPI_Send(column_values,nz,MPIU_SCALAR,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1234 ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr); 1235 } 1236 ierr = PetscFree(column_values);CHKERRQ(ierr); 1237 1238 ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr); 1239 if (file) { 1240 fprintf(file,"-matload_block_size %d\n",(int)mat->rmap->bs); 1241 } 1242 PetscFunctionReturn(0); 1243 } 1244 1245 #undef __FUNCT__ 1246 #define __FUNCT__ "MatView_MPIBAIJ" 1247 PetscErrorCode MatView_MPIBAIJ(Mat mat,PetscViewer viewer) 1248 { 1249 PetscErrorCode ierr; 1250 PetscBool iascii,isdraw,issocket,isbinary; 1251 1252 PetscFunctionBegin; 1253 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1254 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1255 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr); 1256 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1257 if (iascii || isdraw || issocket) { 1258 ierr = MatView_MPIBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 1259 } else if (isbinary) { 1260 ierr = MatView_MPIBAIJ_Binary(mat,viewer);CHKERRQ(ierr); 1261 } 1262 PetscFunctionReturn(0); 1263 } 1264 1265 #undef __FUNCT__ 1266 #define __FUNCT__ "MatDestroy_MPIBAIJ" 1267 PetscErrorCode MatDestroy_MPIBAIJ(Mat mat) 1268 { 1269 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 1270 PetscErrorCode ierr; 1271 1272 PetscFunctionBegin; 1273 #if defined(PETSC_USE_LOG) 1274 PetscLogObjectState((PetscObject)mat,"Rows=%D,Cols=%D",mat->rmap->N,mat->cmap->N); 1275 #endif 1276 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 1277 ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr); 1278 ierr = MatDestroy(&baij->A);CHKERRQ(ierr); 1279 ierr = MatDestroy(&baij->B);CHKERRQ(ierr); 1280 #if defined(PETSC_USE_CTABLE) 1281 ierr = PetscTableDestroy(&baij->colmap);CHKERRQ(ierr); 1282 #else 1283 ierr = PetscFree(baij->colmap);CHKERRQ(ierr); 1284 #endif 1285 ierr = PetscFree(baij->garray);CHKERRQ(ierr); 1286 ierr = VecDestroy(&baij->lvec);CHKERRQ(ierr); 1287 ierr = VecScatterDestroy(&baij->Mvctx);CHKERRQ(ierr); 1288 ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr); 1289 ierr = PetscFree(baij->barray);CHKERRQ(ierr); 1290 ierr = PetscFree2(baij->hd,baij->ht);CHKERRQ(ierr); 1291 ierr = PetscFree(baij->rangebs);CHKERRQ(ierr); 1292 ierr = PetscFree(mat->data);CHKERRQ(ierr); 1293 1294 ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr); 1295 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",NULL);CHKERRQ(ierr); 1296 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",NULL);CHKERRQ(ierr); 1297 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",NULL);CHKERRQ(ierr); 1298 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIBAIJSetPreallocation_C","",NULL);CHKERRQ(ierr); 1299 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIBAIJSetPreallocationCSR_C","",NULL);CHKERRQ(ierr); 1300 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C","",NULL);CHKERRQ(ierr); 1301 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatSetHashTableFactor_C","",NULL);CHKERRQ(ierr); 1302 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpibaij_mpisbaij_C","",NULL);CHKERRQ(ierr); 1303 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpibaij_mpibstrm_C","",NULL);CHKERRQ(ierr); 1304 PetscFunctionReturn(0); 1305 } 1306 1307 #undef __FUNCT__ 1308 #define __FUNCT__ "MatMult_MPIBAIJ" 1309 PetscErrorCode MatMult_MPIBAIJ(Mat A,Vec xx,Vec yy) 1310 { 1311 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1312 PetscErrorCode ierr; 1313 PetscInt nt; 1314 1315 PetscFunctionBegin; 1316 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 1317 if (nt != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx"); 1318 ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr); 1319 if (nt != A->rmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy"); 1320 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1321 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 1322 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1323 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 1324 PetscFunctionReturn(0); 1325 } 1326 1327 #undef __FUNCT__ 1328 #define __FUNCT__ "MatMultAdd_MPIBAIJ" 1329 PetscErrorCode MatMultAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1330 { 1331 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1332 PetscErrorCode ierr; 1333 1334 PetscFunctionBegin; 1335 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1336 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1337 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1338 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 1339 PetscFunctionReturn(0); 1340 } 1341 1342 #undef __FUNCT__ 1343 #define __FUNCT__ "MatMultTranspose_MPIBAIJ" 1344 PetscErrorCode MatMultTranspose_MPIBAIJ(Mat A,Vec xx,Vec yy) 1345 { 1346 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1347 PetscErrorCode ierr; 1348 PetscBool merged; 1349 1350 PetscFunctionBegin; 1351 ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr); 1352 /* do nondiagonal part */ 1353 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1354 if (!merged) { 1355 /* send it on its way */ 1356 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1357 /* do local part */ 1358 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1359 /* receive remote parts: note this assumes the values are not actually */ 1360 /* inserted in yy until the next line */ 1361 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1362 } else { 1363 /* do local part */ 1364 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1365 /* send it on its way */ 1366 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1367 /* values actually were received in the Begin() but we need to call this nop */ 1368 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1369 } 1370 PetscFunctionReturn(0); 1371 } 1372 1373 #undef __FUNCT__ 1374 #define __FUNCT__ "MatMultTransposeAdd_MPIBAIJ" 1375 PetscErrorCode MatMultTransposeAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1376 { 1377 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1378 PetscErrorCode ierr; 1379 1380 PetscFunctionBegin; 1381 /* do nondiagonal part */ 1382 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1383 /* send it on its way */ 1384 ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1385 /* do local part */ 1386 ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1387 /* receive remote parts: note this assumes the values are not actually */ 1388 /* inserted in yy until the next line, which is true for my implementation*/ 1389 /* but is not perhaps always true. */ 1390 ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1391 PetscFunctionReturn(0); 1392 } 1393 1394 /* 1395 This only works correctly for square matrices where the subblock A->A is the 1396 diagonal block 1397 */ 1398 #undef __FUNCT__ 1399 #define __FUNCT__ "MatGetDiagonal_MPIBAIJ" 1400 PetscErrorCode MatGetDiagonal_MPIBAIJ(Mat A,Vec v) 1401 { 1402 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1403 PetscErrorCode ierr; 1404 1405 PetscFunctionBegin; 1406 if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); 1407 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 1408 PetscFunctionReturn(0); 1409 } 1410 1411 #undef __FUNCT__ 1412 #define __FUNCT__ "MatScale_MPIBAIJ" 1413 PetscErrorCode MatScale_MPIBAIJ(Mat A,PetscScalar aa) 1414 { 1415 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1416 PetscErrorCode ierr; 1417 1418 PetscFunctionBegin; 1419 ierr = MatScale(a->A,aa);CHKERRQ(ierr); 1420 ierr = MatScale(a->B,aa);CHKERRQ(ierr); 1421 PetscFunctionReturn(0); 1422 } 1423 1424 #undef __FUNCT__ 1425 #define __FUNCT__ "MatGetRow_MPIBAIJ" 1426 PetscErrorCode MatGetRow_MPIBAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1427 { 1428 Mat_MPIBAIJ *mat = (Mat_MPIBAIJ*)matin->data; 1429 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1430 PetscErrorCode ierr; 1431 PetscInt bs = matin->rmap->bs,bs2 = mat->bs2,i,*cworkA,*cworkB,**pcA,**pcB; 1432 PetscInt nztot,nzA,nzB,lrow,brstart = matin->rmap->rstart,brend = matin->rmap->rend; 1433 PetscInt *cmap,*idx_p,cstart = mat->cstartbs; 1434 1435 PetscFunctionBegin; 1436 if (row < brstart || row >= brend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local rows"); 1437 if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1438 mat->getrowactive = PETSC_TRUE; 1439 1440 if (!mat->rowvalues && (idx || v)) { 1441 /* 1442 allocate enough space to hold information from the longest row. 1443 */ 1444 Mat_SeqBAIJ *Aa = (Mat_SeqBAIJ*)mat->A->data,*Ba = (Mat_SeqBAIJ*)mat->B->data; 1445 PetscInt max = 1,mbs = mat->mbs,tmp; 1446 for (i=0; i<mbs; i++) { 1447 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1448 if (max < tmp) max = tmp; 1449 } 1450 ierr = PetscMalloc2(max*bs2,PetscScalar,&mat->rowvalues,max*bs2,PetscInt,&mat->rowindices);CHKERRQ(ierr); 1451 } 1452 lrow = row - brstart; 1453 1454 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1455 if (!v) {pvA = 0; pvB = 0;} 1456 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1457 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1458 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1459 nztot = nzA + nzB; 1460 1461 cmap = mat->garray; 1462 if (v || idx) { 1463 if (nztot) { 1464 /* Sort by increasing column numbers, assuming A and B already sorted */ 1465 PetscInt imark = -1; 1466 if (v) { 1467 *v = v_p = mat->rowvalues; 1468 for (i=0; i<nzB; i++) { 1469 if (cmap[cworkB[i]/bs] < cstart) v_p[i] = vworkB[i]; 1470 else break; 1471 } 1472 imark = i; 1473 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1474 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1475 } 1476 if (idx) { 1477 *idx = idx_p = mat->rowindices; 1478 if (imark > -1) { 1479 for (i=0; i<imark; i++) { 1480 idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs; 1481 } 1482 } else { 1483 for (i=0; i<nzB; i++) { 1484 if (cmap[cworkB[i]/bs] < cstart) idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs; 1485 else break; 1486 } 1487 imark = i; 1488 } 1489 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart*bs + cworkA[i]; 1490 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ; 1491 } 1492 } else { 1493 if (idx) *idx = 0; 1494 if (v) *v = 0; 1495 } 1496 } 1497 *nz = nztot; 1498 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1499 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1500 PetscFunctionReturn(0); 1501 } 1502 1503 #undef __FUNCT__ 1504 #define __FUNCT__ "MatRestoreRow_MPIBAIJ" 1505 PetscErrorCode MatRestoreRow_MPIBAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1506 { 1507 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 1508 1509 PetscFunctionBegin; 1510 if (!baij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow not called"); 1511 baij->getrowactive = PETSC_FALSE; 1512 PetscFunctionReturn(0); 1513 } 1514 1515 #undef __FUNCT__ 1516 #define __FUNCT__ "MatZeroEntries_MPIBAIJ" 1517 PetscErrorCode MatZeroEntries_MPIBAIJ(Mat A) 1518 { 1519 Mat_MPIBAIJ *l = (Mat_MPIBAIJ*)A->data; 1520 PetscErrorCode ierr; 1521 1522 PetscFunctionBegin; 1523 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 1524 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 1525 PetscFunctionReturn(0); 1526 } 1527 1528 #undef __FUNCT__ 1529 #define __FUNCT__ "MatGetInfo_MPIBAIJ" 1530 PetscErrorCode MatGetInfo_MPIBAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1531 { 1532 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)matin->data; 1533 Mat A = a->A,B = a->B; 1534 PetscErrorCode ierr; 1535 PetscReal isend[5],irecv[5]; 1536 1537 PetscFunctionBegin; 1538 info->block_size = (PetscReal)matin->rmap->bs; 1539 1540 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1541 1542 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1543 isend[3] = info->memory; isend[4] = info->mallocs; 1544 1545 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1546 1547 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1548 isend[3] += info->memory; isend[4] += info->mallocs; 1549 1550 if (flag == MAT_LOCAL) { 1551 info->nz_used = isend[0]; 1552 info->nz_allocated = isend[1]; 1553 info->nz_unneeded = isend[2]; 1554 info->memory = isend[3]; 1555 info->mallocs = isend[4]; 1556 } else if (flag == MAT_GLOBAL_MAX) { 1557 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr); 1558 1559 info->nz_used = irecv[0]; 1560 info->nz_allocated = irecv[1]; 1561 info->nz_unneeded = irecv[2]; 1562 info->memory = irecv[3]; 1563 info->mallocs = irecv[4]; 1564 } else if (flag == MAT_GLOBAL_SUM) { 1565 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr); 1566 1567 info->nz_used = irecv[0]; 1568 info->nz_allocated = irecv[1]; 1569 info->nz_unneeded = irecv[2]; 1570 info->memory = irecv[3]; 1571 info->mallocs = irecv[4]; 1572 } else SETERRQ1(PetscObjectComm((PetscObject)matin),PETSC_ERR_ARG_WRONG,"Unknown MatInfoType argument %d",(int)flag); 1573 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1574 info->fill_ratio_needed = 0; 1575 info->factor_mallocs = 0; 1576 PetscFunctionReturn(0); 1577 } 1578 1579 #undef __FUNCT__ 1580 #define __FUNCT__ "MatSetOption_MPIBAIJ" 1581 PetscErrorCode MatSetOption_MPIBAIJ(Mat A,MatOption op,PetscBool flg) 1582 { 1583 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1584 PetscErrorCode ierr; 1585 1586 PetscFunctionBegin; 1587 switch (op) { 1588 case MAT_NEW_NONZERO_LOCATIONS: 1589 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1590 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1591 case MAT_KEEP_NONZERO_PATTERN: 1592 case MAT_NEW_NONZERO_LOCATION_ERR: 1593 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1594 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1595 break; 1596 case MAT_ROW_ORIENTED: 1597 a->roworiented = flg; 1598 1599 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1600 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1601 break; 1602 case MAT_NEW_DIAGONALS: 1603 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1604 break; 1605 case MAT_IGNORE_OFF_PROC_ENTRIES: 1606 a->donotstash = flg; 1607 break; 1608 case MAT_USE_HASH_TABLE: 1609 a->ht_flag = flg; 1610 break; 1611 case MAT_SYMMETRIC: 1612 case MAT_STRUCTURALLY_SYMMETRIC: 1613 case MAT_HERMITIAN: 1614 case MAT_SYMMETRY_ETERNAL: 1615 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1616 break; 1617 default: 1618 SETERRQ1(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"unknown option %d",op); 1619 } 1620 PetscFunctionReturn(0); 1621 } 1622 1623 #undef __FUNCT__ 1624 #define __FUNCT__ "MatTranspose_MPIBAIJ" 1625 PetscErrorCode MatTranspose_MPIBAIJ(Mat A,MatReuse reuse,Mat *matout) 1626 { 1627 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)A->data; 1628 Mat_SeqBAIJ *Aloc; 1629 Mat B; 1630 PetscErrorCode ierr; 1631 PetscInt M =A->rmap->N,N=A->cmap->N,*ai,*aj,i,*rvals,j,k,col; 1632 PetscInt bs=A->rmap->bs,mbs=baij->mbs; 1633 MatScalar *a; 1634 1635 PetscFunctionBegin; 1636 if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_SIZ,"Square matrix only for in-place"); 1637 if (reuse == MAT_INITIAL_MATRIX || *matout == A) { 1638 ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr); 1639 ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr); 1640 ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr); 1641 /* Do not know preallocation information, but must set block size */ 1642 ierr = MatMPIBAIJSetPreallocation(B,A->rmap->bs,PETSC_DECIDE,NULL,PETSC_DECIDE,NULL);CHKERRQ(ierr); 1643 } else { 1644 B = *matout; 1645 } 1646 1647 /* copy over the A part */ 1648 Aloc = (Mat_SeqBAIJ*)baij->A->data; 1649 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1650 ierr = PetscMalloc(bs*sizeof(PetscInt),&rvals);CHKERRQ(ierr); 1651 1652 for (i=0; i<mbs; i++) { 1653 rvals[0] = bs*(baij->rstartbs + i); 1654 for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1; 1655 for (j=ai[i]; j<ai[i+1]; j++) { 1656 col = (baij->cstartbs+aj[j])*bs; 1657 for (k=0; k<bs; k++) { 1658 ierr = MatSetValues_MPIBAIJ(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr); 1659 1660 col++; a += bs; 1661 } 1662 } 1663 } 1664 /* copy over the B part */ 1665 Aloc = (Mat_SeqBAIJ*)baij->B->data; 1666 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1667 for (i=0; i<mbs; i++) { 1668 rvals[0] = bs*(baij->rstartbs + i); 1669 for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1; 1670 for (j=ai[i]; j<ai[i+1]; j++) { 1671 col = baij->garray[aj[j]]*bs; 1672 for (k=0; k<bs; k++) { 1673 ierr = MatSetValues_MPIBAIJ(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr); 1674 col++; 1675 a += bs; 1676 } 1677 } 1678 } 1679 ierr = PetscFree(rvals);CHKERRQ(ierr); 1680 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1681 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1682 1683 if (reuse == MAT_INITIAL_MATRIX || *matout != A) *matout = B; 1684 else { 1685 ierr = MatHeaderMerge(A,B);CHKERRQ(ierr); 1686 } 1687 PetscFunctionReturn(0); 1688 } 1689 1690 #undef __FUNCT__ 1691 #define __FUNCT__ "MatDiagonalScale_MPIBAIJ" 1692 PetscErrorCode MatDiagonalScale_MPIBAIJ(Mat mat,Vec ll,Vec rr) 1693 { 1694 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 1695 Mat a = baij->A,b = baij->B; 1696 PetscErrorCode ierr; 1697 PetscInt s1,s2,s3; 1698 1699 PetscFunctionBegin; 1700 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 1701 if (rr) { 1702 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 1703 if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 1704 /* Overlap communication with computation. */ 1705 ierr = VecScatterBegin(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1706 } 1707 if (ll) { 1708 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 1709 if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 1710 ierr = (*b->ops->diagonalscale)(b,ll,NULL);CHKERRQ(ierr); 1711 } 1712 /* scale the diagonal block */ 1713 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 1714 1715 if (rr) { 1716 /* Do a scatter end and then right scale the off-diagonal block */ 1717 ierr = VecScatterEnd(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1718 ierr = (*b->ops->diagonalscale)(b,NULL,baij->lvec);CHKERRQ(ierr); 1719 } 1720 PetscFunctionReturn(0); 1721 } 1722 1723 #undef __FUNCT__ 1724 #define __FUNCT__ "MatZeroRows_MPIBAIJ" 1725 PetscErrorCode MatZeroRows_MPIBAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 1726 { 1727 Mat_MPIBAIJ *l = (Mat_MPIBAIJ*)A->data; 1728 PetscErrorCode ierr; 1729 PetscMPIInt imdex,size = l->size,n,rank = l->rank; 1730 PetscInt i,*owners = A->rmap->range; 1731 PetscInt *nprocs,j,idx,nsends,row; 1732 PetscInt nmax,*svalues,*starts,*owner,nrecvs; 1733 PetscInt *rvalues,tag = ((PetscObject)A)->tag,count,base,slen,*source,lastidx = -1; 1734 PetscInt *lens,*lrows,*values,rstart_bs=A->rmap->rstart; 1735 MPI_Comm comm; 1736 MPI_Request *send_waits,*recv_waits; 1737 MPI_Status recv_status,*send_status; 1738 const PetscScalar *xx; 1739 PetscScalar *bb; 1740 #if defined(PETSC_DEBUG) 1741 PetscBool found = PETSC_FALSE; 1742 #endif 1743 1744 PetscFunctionBegin; 1745 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 1746 /* first count number of contributors to each processor */ 1747 ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr); 1748 ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr); 1749 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/ 1750 j = 0; 1751 for (i=0; i<N; i++) { 1752 if (lastidx > (idx = rows[i])) j = 0; 1753 lastidx = idx; 1754 for (; j<size; j++) { 1755 if (idx >= owners[j] && idx < owners[j+1]) { 1756 nprocs[2*j]++; 1757 nprocs[2*j+1] = 1; 1758 owner[i] = j; 1759 #if defined(PETSC_DEBUG) 1760 found = PETSC_TRUE; 1761 #endif 1762 break; 1763 } 1764 } 1765 #if defined(PETSC_DEBUG) 1766 if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range"); 1767 found = PETSC_FALSE; 1768 #endif 1769 } 1770 nsends = 0; for (i=0; i<size; i++) nsends += nprocs[2*i+1]; 1771 1772 if (A->nooffproczerorows) { 1773 if (nsends > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"You called MatSetOption(,MAT_NO_OFF_PROC_ZERO_ROWS,PETSC_TRUE) but set an off process zero row"); 1774 nrecvs = nsends; 1775 nmax = N; 1776 } else { 1777 /* inform other processors of number of messages and max length*/ 1778 ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr); 1779 } 1780 1781 /* post receives: */ 1782 ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr); 1783 ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr); 1784 for (i=0; i<nrecvs; i++) { 1785 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 1786 } 1787 1788 /* do sends: 1789 1) starts[i] gives the starting index in svalues for stuff going to 1790 the ith processor 1791 */ 1792 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr); 1793 ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr); 1794 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr); 1795 starts[0] = 0; 1796 for (i=1; i<size; i++) starts[i] = starts[i-1] + nprocs[2*i-2]; 1797 for (i=0; i<N; i++) { 1798 svalues[starts[owner[i]]++] = rows[i]; 1799 } 1800 1801 starts[0] = 0; 1802 for (i=1; i<size+1; i++) starts[i] = starts[i-1] + nprocs[2*i-2]; 1803 count = 0; 1804 for (i=0; i<size; i++) { 1805 if (nprocs[2*i+1]) { 1806 ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 1807 } 1808 } 1809 ierr = PetscFree(starts);CHKERRQ(ierr); 1810 1811 base = owners[rank]; 1812 1813 /* wait on receives */ 1814 ierr = PetscMalloc2(nrecvs+1,PetscInt,&lens,nrecvs+1,PetscInt,&source);CHKERRQ(ierr); 1815 count = nrecvs; 1816 slen = 0; 1817 while (count) { 1818 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 1819 /* unpack receives into our local space */ 1820 ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr); 1821 1822 source[imdex] = recv_status.MPI_SOURCE; 1823 lens[imdex] = n; 1824 slen += n; 1825 count--; 1826 } 1827 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 1828 1829 /* move the data into the send scatter */ 1830 ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr); 1831 count = 0; 1832 for (i=0; i<nrecvs; i++) { 1833 values = rvalues + i*nmax; 1834 for (j=0; j<lens[i]; j++) { 1835 lrows[count++] = values[j] - base; 1836 } 1837 } 1838 ierr = PetscFree(rvalues);CHKERRQ(ierr); 1839 ierr = PetscFree2(lens,source);CHKERRQ(ierr); 1840 ierr = PetscFree(owner);CHKERRQ(ierr); 1841 ierr = PetscFree(nprocs);CHKERRQ(ierr); 1842 1843 /* fix right hand side if needed */ 1844 if (x && b) { 1845 ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr); 1846 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 1847 for (i=0; i<slen; i++) { 1848 bb[lrows[i]] = diag*xx[lrows[i]]; 1849 } 1850 ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr); 1851 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 1852 } 1853 1854 /* actually zap the local rows */ 1855 /* 1856 Zero the required rows. If the "diagonal block" of the matrix 1857 is square and the user wishes to set the diagonal we use separate 1858 code so that MatSetValues() is not called for each diagonal allocating 1859 new memory, thus calling lots of mallocs and slowing things down. 1860 1861 */ 1862 /* must zero l->B before l->A because the (diag) case below may put values into l->B*/ 1863 ierr = MatZeroRows_SeqBAIJ(l->B,slen,lrows,0.0,0,0);CHKERRQ(ierr); 1864 if ((diag != 0.0) && (l->A->rmap->N == l->A->cmap->N)) { 1865 ierr = MatZeroRows_SeqBAIJ(l->A,slen,lrows,diag,0,0);CHKERRQ(ierr); 1866 } else if (diag != 0.0) { 1867 ierr = MatZeroRows_SeqBAIJ(l->A,slen,lrows,0.0,0,0);CHKERRQ(ierr); 1868 if (((Mat_SeqBAIJ*)l->A->data)->nonew) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options \n\ 1869 MAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR"); 1870 for (i=0; i<slen; i++) { 1871 row = lrows[i] + rstart_bs; 1872 ierr = MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr); 1873 } 1874 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1875 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1876 } else { 1877 ierr = MatZeroRows_SeqBAIJ(l->A,slen,lrows,0.0,0,0);CHKERRQ(ierr); 1878 } 1879 1880 ierr = PetscFree(lrows);CHKERRQ(ierr); 1881 1882 /* wait on sends */ 1883 if (nsends) { 1884 ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr); 1885 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 1886 ierr = PetscFree(send_status);CHKERRQ(ierr); 1887 } 1888 ierr = PetscFree(send_waits);CHKERRQ(ierr); 1889 ierr = PetscFree(svalues);CHKERRQ(ierr); 1890 PetscFunctionReturn(0); 1891 } 1892 1893 #undef __FUNCT__ 1894 #define __FUNCT__ "MatSetUnfactored_MPIBAIJ" 1895 PetscErrorCode MatSetUnfactored_MPIBAIJ(Mat A) 1896 { 1897 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1898 PetscErrorCode ierr; 1899 1900 PetscFunctionBegin; 1901 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 1902 PetscFunctionReturn(0); 1903 } 1904 1905 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat,MatDuplicateOption,Mat*); 1906 1907 #undef __FUNCT__ 1908 #define __FUNCT__ "MatEqual_MPIBAIJ" 1909 PetscErrorCode MatEqual_MPIBAIJ(Mat A,Mat B,PetscBool *flag) 1910 { 1911 Mat_MPIBAIJ *matB = (Mat_MPIBAIJ*)B->data,*matA = (Mat_MPIBAIJ*)A->data; 1912 Mat a,b,c,d; 1913 PetscBool flg; 1914 PetscErrorCode ierr; 1915 1916 PetscFunctionBegin; 1917 a = matA->A; b = matA->B; 1918 c = matB->A; d = matB->B; 1919 1920 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 1921 if (flg) { 1922 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 1923 } 1924 ierr = MPI_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 1925 PetscFunctionReturn(0); 1926 } 1927 1928 #undef __FUNCT__ 1929 #define __FUNCT__ "MatCopy_MPIBAIJ" 1930 PetscErrorCode MatCopy_MPIBAIJ(Mat A,Mat B,MatStructure str) 1931 { 1932 PetscErrorCode ierr; 1933 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1934 Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)B->data; 1935 1936 PetscFunctionBegin; 1937 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 1938 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 1939 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 1940 } else { 1941 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 1942 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 1943 } 1944 PetscFunctionReturn(0); 1945 } 1946 1947 #undef __FUNCT__ 1948 #define __FUNCT__ "MatSetUp_MPIBAIJ" 1949 PetscErrorCode MatSetUp_MPIBAIJ(Mat A) 1950 { 1951 PetscErrorCode ierr; 1952 1953 PetscFunctionBegin; 1954 ierr = MatMPIBAIJSetPreallocation(A,A->rmap->bs,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 1955 PetscFunctionReturn(0); 1956 } 1957 1958 #undef __FUNCT__ 1959 #define __FUNCT__ "MatAXPY_MPIBAIJ" 1960 PetscErrorCode MatAXPY_MPIBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 1961 { 1962 PetscErrorCode ierr; 1963 Mat_MPIBAIJ *xx=(Mat_MPIBAIJ*)X->data,*yy=(Mat_MPIBAIJ*)Y->data; 1964 PetscBLASInt bnz,one=1; 1965 Mat_SeqBAIJ *x,*y; 1966 1967 PetscFunctionBegin; 1968 if (str == SAME_NONZERO_PATTERN) { 1969 PetscScalar alpha = a; 1970 x = (Mat_SeqBAIJ*)xx->A->data; 1971 y = (Mat_SeqBAIJ*)yy->A->data; 1972 ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr); 1973 PetscStackCall("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one)); 1974 x = (Mat_SeqBAIJ*)xx->B->data; 1975 y = (Mat_SeqBAIJ*)yy->B->data; 1976 ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr); 1977 PetscStackCall("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one)); 1978 } else { 1979 ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr); 1980 } 1981 PetscFunctionReturn(0); 1982 } 1983 1984 #undef __FUNCT__ 1985 #define __FUNCT__ "MatRealPart_MPIBAIJ" 1986 PetscErrorCode MatRealPart_MPIBAIJ(Mat A) 1987 { 1988 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1989 PetscErrorCode ierr; 1990 1991 PetscFunctionBegin; 1992 ierr = MatRealPart(a->A);CHKERRQ(ierr); 1993 ierr = MatRealPart(a->B);CHKERRQ(ierr); 1994 PetscFunctionReturn(0); 1995 } 1996 1997 #undef __FUNCT__ 1998 #define __FUNCT__ "MatImaginaryPart_MPIBAIJ" 1999 PetscErrorCode MatImaginaryPart_MPIBAIJ(Mat A) 2000 { 2001 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 2002 PetscErrorCode ierr; 2003 2004 PetscFunctionBegin; 2005 ierr = MatImaginaryPart(a->A);CHKERRQ(ierr); 2006 ierr = MatImaginaryPart(a->B);CHKERRQ(ierr); 2007 PetscFunctionReturn(0); 2008 } 2009 2010 #undef __FUNCT__ 2011 #define __FUNCT__ "MatGetSubMatrix_MPIBAIJ" 2012 PetscErrorCode MatGetSubMatrix_MPIBAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat) 2013 { 2014 PetscErrorCode ierr; 2015 IS iscol_local; 2016 PetscInt csize; 2017 2018 PetscFunctionBegin; 2019 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 2020 if (call == MAT_REUSE_MATRIX) { 2021 ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr); 2022 if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 2023 } else { 2024 ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr); 2025 } 2026 ierr = MatGetSubMatrix_MPIBAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr); 2027 if (call == MAT_INITIAL_MATRIX) { 2028 ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr); 2029 ierr = ISDestroy(&iscol_local);CHKERRQ(ierr); 2030 } 2031 PetscFunctionReturn(0); 2032 } 2033 extern PetscErrorCode MatGetSubMatrices_MPIBAIJ_local(Mat,PetscInt,const IS[],const IS[],MatReuse,PetscBool*,PetscBool*,Mat*); 2034 #undef __FUNCT__ 2035 #define __FUNCT__ "MatGetSubMatrix_MPIBAIJ_Private" 2036 /* 2037 Not great since it makes two copies of the submatrix, first an SeqBAIJ 2038 in local and then by concatenating the local matrices the end result. 2039 Writing it directly would be much like MatGetSubMatrices_MPIBAIJ() 2040 */ 2041 PetscErrorCode MatGetSubMatrix_MPIBAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat) 2042 { 2043 PetscErrorCode ierr; 2044 PetscMPIInt rank,size; 2045 PetscInt i,m,n,rstart,row,rend,nz,*cwork,j,bs; 2046 PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal,ncol,nrow; 2047 Mat M,Mreuse; 2048 MatScalar *vwork,*aa; 2049 MPI_Comm comm; 2050 IS isrow_new, iscol_new; 2051 PetscBool idflag,allrows, allcols; 2052 Mat_SeqBAIJ *aij; 2053 2054 PetscFunctionBegin; 2055 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 2056 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2057 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2058 /* The compression and expansion should be avoided. Doesn't point 2059 out errors, might change the indices, hence buggey */ 2060 ierr = ISCompressIndicesGeneral(mat->rmap->N,mat->rmap->n,mat->rmap->bs,1,&isrow,&isrow_new);CHKERRQ(ierr); 2061 ierr = ISCompressIndicesGeneral(mat->cmap->N,mat->cmap->n,mat->cmap->bs,1,&iscol,&iscol_new);CHKERRQ(ierr); 2062 2063 /* Check for special case: each processor gets entire matrix columns */ 2064 ierr = ISIdentity(iscol,&idflag);CHKERRQ(ierr); 2065 ierr = ISGetLocalSize(iscol,&ncol);CHKERRQ(ierr); 2066 if (idflag && ncol == mat->cmap->N) allcols = PETSC_TRUE; 2067 else allcols = PETSC_FALSE; 2068 2069 ierr = ISIdentity(isrow,&idflag);CHKERRQ(ierr); 2070 ierr = ISGetLocalSize(isrow,&nrow);CHKERRQ(ierr); 2071 if (idflag && nrow == mat->rmap->N) allrows = PETSC_TRUE; 2072 else allrows = PETSC_FALSE; 2073 2074 if (call == MAT_REUSE_MATRIX) { 2075 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Mreuse);CHKERRQ(ierr); 2076 if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 2077 ierr = MatGetSubMatrices_MPIBAIJ_local(mat,1,&isrow_new,&iscol_new,MAT_REUSE_MATRIX,&allrows,&allcols,&Mreuse);CHKERRQ(ierr); 2078 } else { 2079 ierr = MatGetSubMatrices_MPIBAIJ_local(mat,1,&isrow_new,&iscol_new,MAT_INITIAL_MATRIX,&allrows,&allcols,&Mreuse);CHKERRQ(ierr); 2080 } 2081 ierr = ISDestroy(&isrow_new);CHKERRQ(ierr); 2082 ierr = ISDestroy(&iscol_new);CHKERRQ(ierr); 2083 /* 2084 m - number of local rows 2085 n - number of columns (same on all processors) 2086 rstart - first row in new global matrix generated 2087 */ 2088 ierr = MatGetBlockSize(mat,&bs);CHKERRQ(ierr); 2089 ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr); 2090 m = m/bs; 2091 n = n/bs; 2092 2093 if (call == MAT_INITIAL_MATRIX) { 2094 aij = (Mat_SeqBAIJ*)(Mreuse)->data; 2095 ii = aij->i; 2096 jj = aij->j; 2097 2098 /* 2099 Determine the number of non-zeros in the diagonal and off-diagonal 2100 portions of the matrix in order to do correct preallocation 2101 */ 2102 2103 /* first get start and end of "diagonal" columns */ 2104 if (csize == PETSC_DECIDE) { 2105 ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr); 2106 if (mglobal == n*bs) { /* square matrix */ 2107 nlocal = m; 2108 } else { 2109 nlocal = n/size + ((n % size) > rank); 2110 } 2111 } else { 2112 nlocal = csize/bs; 2113 } 2114 ierr = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 2115 rstart = rend - nlocal; 2116 if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n); 2117 2118 /* next, compute all the lengths */ 2119 ierr = PetscMalloc((2*m+1)*sizeof(PetscInt),&dlens);CHKERRQ(ierr); 2120 olens = dlens + m; 2121 for (i=0; i<m; i++) { 2122 jend = ii[i+1] - ii[i]; 2123 olen = 0; 2124 dlen = 0; 2125 for (j=0; j<jend; j++) { 2126 if (*jj < rstart || *jj >= rend) olen++; 2127 else dlen++; 2128 jj++; 2129 } 2130 olens[i] = olen; 2131 dlens[i] = dlen; 2132 } 2133 ierr = MatCreate(comm,&M);CHKERRQ(ierr); 2134 ierr = MatSetSizes(M,bs*m,bs*nlocal,PETSC_DECIDE,bs*n);CHKERRQ(ierr); 2135 ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr); 2136 ierr = MatMPIBAIJSetPreallocation(M,bs,0,dlens,0,olens);CHKERRQ(ierr); 2137 ierr = PetscFree(dlens);CHKERRQ(ierr); 2138 } else { 2139 PetscInt ml,nl; 2140 2141 M = *newmat; 2142 ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr); 2143 if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 2144 ierr = MatZeroEntries(M);CHKERRQ(ierr); 2145 /* 2146 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 2147 rather than the slower MatSetValues(). 2148 */ 2149 M->was_assembled = PETSC_TRUE; 2150 M->assembled = PETSC_FALSE; 2151 } 2152 ierr = MatSetOption(M,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); 2153 ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr); 2154 aij = (Mat_SeqBAIJ*)(Mreuse)->data; 2155 ii = aij->i; 2156 jj = aij->j; 2157 aa = aij->a; 2158 for (i=0; i<m; i++) { 2159 row = rstart/bs + i; 2160 nz = ii[i+1] - ii[i]; 2161 cwork = jj; jj += nz; 2162 vwork = aa; aa += nz*bs*bs; 2163 ierr = MatSetValuesBlocked_MPIBAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr); 2164 } 2165 2166 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2167 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2168 *newmat = M; 2169 2170 /* save submatrix used in processor for next request */ 2171 if (call == MAT_INITIAL_MATRIX) { 2172 ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr); 2173 ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr); 2174 } 2175 PetscFunctionReturn(0); 2176 } 2177 2178 #undef __FUNCT__ 2179 #define __FUNCT__ "MatPermute_MPIBAIJ" 2180 PetscErrorCode MatPermute_MPIBAIJ(Mat A,IS rowp,IS colp,Mat *B) 2181 { 2182 MPI_Comm comm,pcomm; 2183 PetscInt first,rlocal_size,clocal_size,nrows; 2184 const PetscInt *rows; 2185 PetscMPIInt size; 2186 IS crowp,growp,irowp,lrowp,lcolp; 2187 PetscErrorCode ierr; 2188 2189 PetscFunctionBegin; 2190 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 2191 /* make a collective version of 'rowp' */ 2192 ierr = PetscObjectGetComm((PetscObject)rowp,&pcomm);CHKERRQ(ierr); 2193 if (pcomm==comm) { 2194 crowp = rowp; 2195 } else { 2196 ierr = ISGetSize(rowp,&nrows);CHKERRQ(ierr); 2197 ierr = ISGetIndices(rowp,&rows);CHKERRQ(ierr); 2198 ierr = ISCreateGeneral(comm,nrows,rows,PETSC_COPY_VALUES,&crowp);CHKERRQ(ierr); 2199 ierr = ISRestoreIndices(rowp,&rows);CHKERRQ(ierr); 2200 } 2201 /* collect the global row permutation and invert it */ 2202 ierr = ISAllGather(crowp,&growp);CHKERRQ(ierr); 2203 ierr = ISSetPermutation(growp);CHKERRQ(ierr); 2204 if (pcomm!=comm) { 2205 ierr = ISDestroy(&crowp);CHKERRQ(ierr); 2206 } 2207 ierr = ISInvertPermutation(growp,PETSC_DECIDE,&irowp);CHKERRQ(ierr); 2208 ierr = ISDestroy(&growp);CHKERRQ(ierr); 2209 /* get the local target indices */ 2210 ierr = MatGetOwnershipRange(A,&first,NULL);CHKERRQ(ierr); 2211 ierr = MatGetLocalSize(A,&rlocal_size,&clocal_size);CHKERRQ(ierr); 2212 ierr = ISGetIndices(irowp,&rows);CHKERRQ(ierr); 2213 ierr = ISCreateGeneral(MPI_COMM_SELF,rlocal_size,rows+first,PETSC_COPY_VALUES,&lrowp);CHKERRQ(ierr); 2214 ierr = ISRestoreIndices(irowp,&rows);CHKERRQ(ierr); 2215 ierr = ISDestroy(&irowp);CHKERRQ(ierr); 2216 /* the column permutation is so much easier; 2217 make a local version of 'colp' and invert it */ 2218 ierr = PetscObjectGetComm((PetscObject)colp,&pcomm);CHKERRQ(ierr); 2219 ierr = MPI_Comm_size(pcomm,&size);CHKERRQ(ierr); 2220 if (size==1) { 2221 lcolp = colp; 2222 } else { 2223 ierr = ISAllGather(colp,&lcolp);CHKERRQ(ierr); 2224 } 2225 ierr = ISSetPermutation(lcolp);CHKERRQ(ierr); 2226 /* now we just get the submatrix */ 2227 ierr = MatGetSubMatrix_MPIBAIJ_Private(A,lrowp,lcolp,clocal_size,MAT_INITIAL_MATRIX,B);CHKERRQ(ierr); 2228 if (size>1) { 2229 ierr = ISDestroy(&lcolp);CHKERRQ(ierr); 2230 } 2231 /* clean up */ 2232 ierr = ISDestroy(&lrowp);CHKERRQ(ierr); 2233 PetscFunctionReturn(0); 2234 } 2235 2236 #undef __FUNCT__ 2237 #define __FUNCT__ "MatGetGhosts_MPIBAIJ" 2238 PetscErrorCode MatGetGhosts_MPIBAIJ(Mat mat,PetscInt *nghosts,const PetscInt *ghosts[]) 2239 { 2240 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*) mat->data; 2241 Mat_SeqBAIJ *B = (Mat_SeqBAIJ*)baij->B->data; 2242 2243 PetscFunctionBegin; 2244 if (nghosts) *nghosts = B->nbs; 2245 if (ghosts) *ghosts = baij->garray; 2246 PetscFunctionReturn(0); 2247 } 2248 2249 extern PetscErrorCode MatCreateColmap_MPIBAIJ_Private(Mat); 2250 2251 #undef __FUNCT__ 2252 #define __FUNCT__ "MatFDColoringCreate_MPIBAIJ" 2253 /* 2254 This routine is almost identical to MatFDColoringCreate_MPIBAIJ()! 2255 */ 2256 PetscErrorCode MatFDColoringCreate_MPIBAIJ(Mat mat,ISColoring iscoloring,MatFDColoring c) 2257 { 2258 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 2259 PetscErrorCode ierr; 2260 PetscMPIInt size,*ncolsonproc,*disp,nn; 2261 PetscInt bs,i,n,nrows,j,k,m,ncols,col; 2262 const PetscInt *is,*rows = 0,*A_ci,*A_cj,*B_ci,*B_cj,*ltog; 2263 PetscInt nis = iscoloring->n,nctot,*cols; 2264 PetscInt *rowhit,M,cstart,cend,colb; 2265 PetscInt *columnsforrow,l; 2266 IS *isa; 2267 PetscBool done,flg; 2268 ISLocalToGlobalMapping map = mat->cmap->bmapping; 2269 PetscInt ctype=c->ctype; 2270 2271 PetscFunctionBegin; 2272 if (!mat->assembled) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_WRONGSTATE,"Matrix must be assembled first; MatAssemblyBegin/End();"); 2273 if (ctype == IS_COLORING_GHOSTED && !map) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_INCOMP,"When using ghosted differencing matrix must have local to global mapping provided with MatSetLocalToGlobalMappingBlock"); 2274 2275 if (map) {ierr = ISLocalToGlobalMappingGetIndices(map,<og);CHKERRQ(ierr);} 2276 else ltog = NULL; 2277 ierr = ISColoringGetIS(iscoloring,PETSC_IGNORE,&isa);CHKERRQ(ierr); 2278 ierr = MatGetBlockSize(mat,&bs);CHKERRQ(ierr); 2279 2280 M = mat->rmap->n/bs; 2281 cstart = mat->cmap->rstart/bs; 2282 cend = mat->cmap->rend/bs; 2283 c->M = mat->rmap->N/bs; /* set the global rows and columns and local rows */ 2284 c->N = mat->cmap->N/bs; 2285 c->m = mat->rmap->n/bs; 2286 c->rstart = mat->rmap->rstart/bs; 2287 2288 c->ncolors = nis; 2289 ierr = PetscMalloc(nis*sizeof(PetscInt),&c->ncolumns);CHKERRQ(ierr); 2290 ierr = PetscMalloc(nis*sizeof(PetscInt*),&c->columns);CHKERRQ(ierr); 2291 ierr = PetscMalloc(nis*sizeof(PetscInt),&c->nrows);CHKERRQ(ierr); 2292 ierr = PetscMalloc(nis*sizeof(PetscInt*),&c->rows);CHKERRQ(ierr); 2293 ierr = PetscMalloc(nis*sizeof(PetscInt*),&c->columnsforrow);CHKERRQ(ierr); 2294 ierr = PetscLogObjectMemory(c,5*nis*sizeof(PetscInt));CHKERRQ(ierr); 2295 2296 /* Allow access to data structures of local part of matrix */ 2297 if (!baij->colmap) { 2298 ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 2299 } 2300 ierr = MatGetColumnIJ(baij->A,0,PETSC_FALSE,PETSC_FALSE,&ncols,&A_ci,&A_cj,&done);CHKERRQ(ierr); 2301 ierr = MatGetColumnIJ(baij->B,0,PETSC_FALSE,PETSC_FALSE,&ncols,&B_ci,&B_cj,&done);CHKERRQ(ierr); 2302 2303 ierr = PetscMalloc((M+1)*sizeof(PetscInt),&rowhit);CHKERRQ(ierr); 2304 ierr = PetscMalloc((M+1)*sizeof(PetscInt),&columnsforrow);CHKERRQ(ierr); 2305 2306 for (i=0; i<nis; i++) { 2307 ierr = ISGetLocalSize(isa[i],&n);CHKERRQ(ierr); 2308 ierr = ISGetIndices(isa[i],&is);CHKERRQ(ierr); 2309 2310 c->ncolumns[i] = n; 2311 if (n) { 2312 ierr = PetscMalloc(n*sizeof(PetscInt),&c->columns[i]);CHKERRQ(ierr); 2313 ierr = PetscLogObjectMemory(c,n*sizeof(PetscInt));CHKERRQ(ierr); 2314 ierr = PetscMemcpy(c->columns[i],is,n*sizeof(PetscInt));CHKERRQ(ierr); 2315 } else { 2316 c->columns[i] = 0; 2317 } 2318 2319 if (ctype == IS_COLORING_GLOBAL) { 2320 /* Determine the total (parallel) number of columns of this color */ 2321 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);CHKERRQ(ierr); 2322 ierr = PetscMalloc2(size,PetscMPIInt,&ncolsonproc,size,PetscMPIInt,&disp);CHKERRQ(ierr); 2323 2324 ierr = PetscMPIIntCast(n,&nn);CHKERRQ(ierr); 2325 ierr = MPI_Allgather(&nn,1,MPI_INT,ncolsonproc,1,MPI_INT,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 2326 nctot = 0; for (j=0; j<size; j++) nctot += ncolsonproc[j]; 2327 if (!nctot) { 2328 ierr = PetscInfo(mat,"Coloring of matrix has some unneeded colors with no corresponding rows\n");CHKERRQ(ierr); 2329 } 2330 2331 disp[0] = 0; 2332 for (j=1; j<size; j++) { 2333 disp[j] = disp[j-1] + ncolsonproc[j-1]; 2334 } 2335 2336 /* Get complete list of columns for color on each processor */ 2337 ierr = PetscMalloc((nctot+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr); 2338 ierr = MPI_Allgatherv((void*)is,n,MPIU_INT,cols,ncolsonproc,disp,MPIU_INT,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 2339 ierr = PetscFree2(ncolsonproc,disp);CHKERRQ(ierr); 2340 } else if (ctype == IS_COLORING_GHOSTED) { 2341 /* Determine local number of columns of this color on this process, including ghost points */ 2342 nctot = n; 2343 ierr = PetscMalloc((nctot+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr); 2344 ierr = PetscMemcpy(cols,is,n*sizeof(PetscInt));CHKERRQ(ierr); 2345 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not provided for this MatFDColoring type"); 2346 2347 /* 2348 Mark all rows affect by these columns 2349 */ 2350 /* Temporary option to allow for debugging/testing */ 2351 flg = PETSC_FALSE; 2352 ierr = PetscOptionsGetBool(NULL,"-matfdcoloring_slow",&flg,NULL);CHKERRQ(ierr); 2353 if (!flg) { /*-----------------------------------------------------------------------------*/ 2354 /* crude, fast version */ 2355 ierr = PetscMemzero(rowhit,M*sizeof(PetscInt));CHKERRQ(ierr); 2356 /* loop over columns*/ 2357 for (j=0; j<nctot; j++) { 2358 if (ctype == IS_COLORING_GHOSTED) { 2359 col = ltog[cols[j]]; 2360 } else { 2361 col = cols[j]; 2362 } 2363 if (col >= cstart && col < cend) { 2364 /* column is in diagonal block of matrix */ 2365 rows = A_cj + A_ci[col-cstart]; 2366 m = A_ci[col-cstart+1] - A_ci[col-cstart]; 2367 } else { 2368 #if defined(PETSC_USE_CTABLE) 2369 ierr = PetscTableFind(baij->colmap,col+1,&colb);CHKERRQ(ierr); 2370 colb--; 2371 #else 2372 colb = baij->colmap[col] - 1; 2373 #endif 2374 if (colb == -1) { 2375 m = 0; 2376 } else { 2377 colb = colb/bs; 2378 rows = B_cj + B_ci[colb]; 2379 m = B_ci[colb+1] - B_ci[colb]; 2380 } 2381 } 2382 /* loop over columns marking them in rowhit */ 2383 for (k=0; k<m; k++) { 2384 rowhit[*rows++] = col + 1; 2385 } 2386 } 2387 2388 /* count the number of hits */ 2389 nrows = 0; 2390 for (j=0; j<M; j++) { 2391 if (rowhit[j]) nrows++; 2392 } 2393 c->nrows[i] = nrows; 2394 ierr = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->rows[i]);CHKERRQ(ierr); 2395 ierr = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->columnsforrow[i]);CHKERRQ(ierr); 2396 ierr = PetscLogObjectMemory(c,2*(nrows+1)*sizeof(PetscInt));CHKERRQ(ierr); 2397 nrows = 0; 2398 for (j=0; j<M; j++) { 2399 if (rowhit[j]) { 2400 c->rows[i][nrows] = j; 2401 c->columnsforrow[i][nrows] = rowhit[j] - 1; 2402 nrows++; 2403 } 2404 } 2405 } else { /*-------------------------------------------------------------------------------*/ 2406 /* slow version, using rowhit as a linked list */ 2407 PetscInt currentcol,fm,mfm; 2408 rowhit[M] = M; 2409 nrows = 0; 2410 /* loop over columns*/ 2411 for (j=0; j<nctot; j++) { 2412 if (ctype == IS_COLORING_GHOSTED) { 2413 col = ltog[cols[j]]; 2414 } else { 2415 col = cols[j]; 2416 } 2417 if (col >= cstart && col < cend) { 2418 /* column is in diagonal block of matrix */ 2419 rows = A_cj + A_ci[col-cstart]; 2420 m = A_ci[col-cstart+1] - A_ci[col-cstart]; 2421 } else { 2422 #if defined(PETSC_USE_CTABLE) 2423 ierr = PetscTableFind(baij->colmap,col+1,&colb);CHKERRQ(ierr); 2424 colb--; 2425 #else 2426 colb = baij->colmap[col] - 1; 2427 #endif 2428 if (colb == -1) { 2429 m = 0; 2430 } else { 2431 colb = colb/bs; 2432 rows = B_cj + B_ci[colb]; 2433 m = B_ci[colb+1] - B_ci[colb]; 2434 } 2435 } 2436 2437 /* loop over columns marking them in rowhit */ 2438 fm = M; /* fm points to first entry in linked list */ 2439 for (k=0; k<m; k++) { 2440 currentcol = *rows++; 2441 /* is it already in the list? */ 2442 do { 2443 mfm = fm; 2444 fm = rowhit[fm]; 2445 } while (fm < currentcol); 2446 /* not in list so add it */ 2447 if (fm != currentcol) { 2448 nrows++; 2449 columnsforrow[currentcol] = col; 2450 /* next three lines insert new entry into linked list */ 2451 rowhit[mfm] = currentcol; 2452 rowhit[currentcol] = fm; 2453 fm = currentcol; 2454 /* fm points to present position in list since we know the columns are sorted */ 2455 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Invalid coloring of matrix detected"); 2456 } 2457 } 2458 c->nrows[i] = nrows; 2459 ierr = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->rows[i]);CHKERRQ(ierr); 2460 ierr = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->columnsforrow[i]);CHKERRQ(ierr); 2461 ierr = PetscLogObjectMemory(c,(nrows+1)*sizeof(PetscInt));CHKERRQ(ierr); 2462 /* now store the linked list of rows into c->rows[i] */ 2463 nrows = 0; 2464 fm = rowhit[M]; 2465 do { 2466 c->rows[i][nrows] = fm; 2467 c->columnsforrow[i][nrows++] = columnsforrow[fm]; 2468 fm = rowhit[fm]; 2469 } while (fm < M); 2470 } /* ---------------------------------------------------------------------------------------*/ 2471 ierr = PetscFree(cols);CHKERRQ(ierr); 2472 } 2473 2474 /* Optimize by adding the vscale, and scaleforrow[][] fields */ 2475 /* 2476 vscale will contain the "diagonal" on processor scalings followed by the off processor 2477 */ 2478 if (ctype == IS_COLORING_GLOBAL) { 2479 PetscInt *garray; 2480 ierr = PetscMalloc(baij->B->cmap->n*sizeof(PetscInt),&garray);CHKERRQ(ierr); 2481 for (i=0; i<baij->B->cmap->n/bs; i++) { 2482 for (j=0; j<bs; j++) { 2483 garray[i*bs+j] = bs*baij->garray[i]+j; 2484 } 2485 } 2486 ierr = VecCreateGhost(PetscObjectComm((PetscObject)mat),baij->A->rmap->n,PETSC_DETERMINE,baij->B->cmap->n,garray,&c->vscale);CHKERRQ(ierr); 2487 ierr = PetscFree(garray);CHKERRQ(ierr); 2488 ierr = PetscMalloc(c->ncolors*sizeof(PetscInt*),&c->vscaleforrow);CHKERRQ(ierr); 2489 for (k=0; k<c->ncolors; k++) { 2490 ierr = PetscMalloc((c->nrows[k]+1)*sizeof(PetscInt),&c->vscaleforrow[k]);CHKERRQ(ierr); 2491 for (l=0; l<c->nrows[k]; l++) { 2492 col = c->columnsforrow[k][l]; 2493 if (col >= cstart && col < cend) { 2494 /* column is in diagonal block of matrix */ 2495 colb = col - cstart; 2496 } else { 2497 /* column is in "off-processor" part */ 2498 #if defined(PETSC_USE_CTABLE) 2499 ierr = PetscTableFind(baij->colmap,col+1,&colb);CHKERRQ(ierr); 2500 colb--; 2501 #else 2502 colb = baij->colmap[col] - 1; 2503 #endif 2504 colb = colb/bs; 2505 colb += cend - cstart; 2506 } 2507 c->vscaleforrow[k][l] = colb; 2508 } 2509 } 2510 } else if (ctype == IS_COLORING_GHOSTED) { 2511 /* Get gtol mapping */ 2512 PetscInt N = mat->cmap->N,nlocal,*gtol; 2513 ierr = PetscMalloc((N+1)*sizeof(PetscInt),>ol);CHKERRQ(ierr); 2514 for (i=0; i<N; i++) gtol[i] = -1; 2515 ierr = ISLocalToGlobalMappingGetSize(map,&nlocal);CHKERRQ(ierr); 2516 for (i=0; i<nlocal; i++) gtol[ltog[i]] = i; 2517 2518 c->vscale = 0; /* will be created in MatFDColoringApply() */ 2519 ierr = PetscMalloc(c->ncolors*sizeof(PetscInt*),&c->vscaleforrow);CHKERRQ(ierr); 2520 for (k=0; k<c->ncolors; k++) { 2521 ierr = PetscMalloc((c->nrows[k]+1)*sizeof(PetscInt),&c->vscaleforrow[k]);CHKERRQ(ierr); 2522 for (l=0; l<c->nrows[k]; l++) { 2523 col = c->columnsforrow[k][l]; /* global column index */ 2524 2525 c->vscaleforrow[k][l] = gtol[col]; /* local column index */ 2526 } 2527 } 2528 ierr = PetscFree(gtol);CHKERRQ(ierr); 2529 } 2530 ierr = ISColoringRestoreIS(iscoloring,&isa);CHKERRQ(ierr); 2531 2532 ierr = PetscFree(rowhit);CHKERRQ(ierr); 2533 ierr = PetscFree(columnsforrow);CHKERRQ(ierr); 2534 ierr = MatRestoreColumnIJ(baij->A,0,PETSC_FALSE,PETSC_FALSE,&ncols,&A_ci,&A_cj,&done);CHKERRQ(ierr); 2535 ierr = MatRestoreColumnIJ(baij->B,0,PETSC_FALSE,PETSC_FALSE,&ncols,&B_ci,&B_cj,&done);CHKERRQ(ierr); 2536 if (map) {ierr = ISLocalToGlobalMappingRestoreIndices(map,<og);CHKERRQ(ierr);} 2537 PetscFunctionReturn(0); 2538 } 2539 2540 #undef __FUNCT__ 2541 #define __FUNCT__ "MatGetSeqNonzeroStructure_MPIBAIJ" 2542 PetscErrorCode MatGetSeqNonzeroStructure_MPIBAIJ(Mat A,Mat *newmat) 2543 { 2544 Mat B; 2545 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 2546 Mat_SeqBAIJ *ad = (Mat_SeqBAIJ*)a->A->data,*bd = (Mat_SeqBAIJ*)a->B->data; 2547 Mat_SeqAIJ *b; 2548 PetscErrorCode ierr; 2549 PetscMPIInt size,rank,*recvcounts = 0,*displs = 0; 2550 PetscInt sendcount,i,*rstarts = A->rmap->range,n,cnt,j,bs = A->rmap->bs; 2551 PetscInt m,*garray = a->garray,*lens,*jsendbuf,*a_jsendbuf,*b_jsendbuf; 2552 2553 PetscFunctionBegin; 2554 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRQ(ierr); 2555 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRQ(ierr); 2556 2557 /* ---------------------------------------------------------------- 2558 Tell every processor the number of nonzeros per row 2559 */ 2560 ierr = PetscMalloc((A->rmap->N/bs)*sizeof(PetscInt),&lens);CHKERRQ(ierr); 2561 for (i=A->rmap->rstart/bs; i<A->rmap->rend/bs; i++) { 2562 lens[i] = ad->i[i-A->rmap->rstart/bs+1] - ad->i[i-A->rmap->rstart/bs] + bd->i[i-A->rmap->rstart/bs+1] - bd->i[i-A->rmap->rstart/bs]; 2563 } 2564 sendcount = A->rmap->rend/bs - A->rmap->rstart/bs; 2565 ierr = PetscMalloc(2*size*sizeof(PetscMPIInt),&recvcounts);CHKERRQ(ierr); 2566 displs = recvcounts + size; 2567 for (i=0; i<size; i++) { 2568 recvcounts[i] = A->rmap->range[i+1]/bs - A->rmap->range[i]/bs; 2569 displs[i] = A->rmap->range[i]/bs; 2570 } 2571 #if defined(PETSC_HAVE_MPI_IN_PLACE) 2572 ierr = MPI_Allgatherv(MPI_IN_PLACE,0,MPI_DATATYPE_NULL,lens,recvcounts,displs,MPIU_INT,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 2573 #else 2574 ierr = MPI_Allgatherv(lens+A->rmap->rstart/bs,sendcount,MPIU_INT,lens,recvcounts,displs,MPIU_INT,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 2575 #endif 2576 /* --------------------------------------------------------------- 2577 Create the sequential matrix of the same type as the local block diagonal 2578 */ 2579 ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr); 2580 ierr = MatSetSizes(B,A->rmap->N/bs,A->cmap->N/bs,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 2581 ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr); 2582 ierr = MatSeqAIJSetPreallocation(B,0,lens);CHKERRQ(ierr); 2583 b = (Mat_SeqAIJ*)B->data; 2584 2585 /*-------------------------------------------------------------------- 2586 Copy my part of matrix column indices over 2587 */ 2588 sendcount = ad->nz + bd->nz; 2589 jsendbuf = b->j + b->i[rstarts[rank]/bs]; 2590 a_jsendbuf = ad->j; 2591 b_jsendbuf = bd->j; 2592 n = A->rmap->rend/bs - A->rmap->rstart/bs; 2593 cnt = 0; 2594 for (i=0; i<n; i++) { 2595 2596 /* put in lower diagonal portion */ 2597 m = bd->i[i+1] - bd->i[i]; 2598 while (m > 0) { 2599 /* is it above diagonal (in bd (compressed) numbering) */ 2600 if (garray[*b_jsendbuf] > A->rmap->rstart/bs + i) break; 2601 jsendbuf[cnt++] = garray[*b_jsendbuf++]; 2602 m--; 2603 } 2604 2605 /* put in diagonal portion */ 2606 for (j=ad->i[i]; j<ad->i[i+1]; j++) { 2607 jsendbuf[cnt++] = A->rmap->rstart/bs + *a_jsendbuf++; 2608 } 2609 2610 /* put in upper diagonal portion */ 2611 while (m-- > 0) { 2612 jsendbuf[cnt++] = garray[*b_jsendbuf++]; 2613 } 2614 } 2615 if (cnt != sendcount) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Corrupted PETSc matrix: nz given %D actual nz %D",sendcount,cnt); 2616 2617 /*-------------------------------------------------------------------- 2618 Gather all column indices to all processors 2619 */ 2620 for (i=0; i<size; i++) { 2621 recvcounts[i] = 0; 2622 for (j=A->rmap->range[i]/bs; j<A->rmap->range[i+1]/bs; j++) { 2623 recvcounts[i] += lens[j]; 2624 } 2625 } 2626 displs[0] = 0; 2627 for (i=1; i<size; i++) { 2628 displs[i] = displs[i-1] + recvcounts[i-1]; 2629 } 2630 #if defined(PETSC_HAVE_MPI_IN_PLACE) 2631 ierr = MPI_Allgatherv(MPI_IN_PLACE,0,MPI_DATATYPE_NULL,b->j,recvcounts,displs,MPIU_INT,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 2632 #else 2633 ierr = MPI_Allgatherv(jsendbuf,sendcount,MPIU_INT,b->j,recvcounts,displs,MPIU_INT,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 2634 #endif 2635 /*-------------------------------------------------------------------- 2636 Assemble the matrix into useable form (note numerical values not yet set) 2637 */ 2638 /* set the b->ilen (length of each row) values */ 2639 ierr = PetscMemcpy(b->ilen,lens,(A->rmap->N/bs)*sizeof(PetscInt));CHKERRQ(ierr); 2640 /* set the b->i indices */ 2641 b->i[0] = 0; 2642 for (i=1; i<=A->rmap->N/bs; i++) { 2643 b->i[i] = b->i[i-1] + lens[i-1]; 2644 } 2645 ierr = PetscFree(lens);CHKERRQ(ierr); 2646 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2647 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2648 ierr = PetscFree(recvcounts);CHKERRQ(ierr); 2649 2650 if (A->symmetric) { 2651 ierr = MatSetOption(B,MAT_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr); 2652 } else if (A->hermitian) { 2653 ierr = MatSetOption(B,MAT_HERMITIAN,PETSC_TRUE);CHKERRQ(ierr); 2654 } else if (A->structurally_symmetric) { 2655 ierr = MatSetOption(B,MAT_STRUCTURALLY_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr); 2656 } 2657 *newmat = B; 2658 PetscFunctionReturn(0); 2659 } 2660 2661 #undef __FUNCT__ 2662 #define __FUNCT__ "MatSOR_MPIBAIJ" 2663 PetscErrorCode MatSOR_MPIBAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 2664 { 2665 Mat_MPIBAIJ *mat = (Mat_MPIBAIJ*)matin->data; 2666 PetscErrorCode ierr; 2667 Vec bb1 = 0; 2668 2669 PetscFunctionBegin; 2670 if (flag == SOR_APPLY_UPPER) { 2671 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 2672 PetscFunctionReturn(0); 2673 } 2674 2675 if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS) { 2676 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 2677 } 2678 2679 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) { 2680 if (flag & SOR_ZERO_INITIAL_GUESS) { 2681 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 2682 its--; 2683 } 2684 2685 while (its--) { 2686 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2687 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2688 2689 /* update rhs: bb1 = bb - B*x */ 2690 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 2691 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 2692 2693 /* local sweep */ 2694 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 2695 } 2696 } else if (flag & SOR_LOCAL_FORWARD_SWEEP) { 2697 if (flag & SOR_ZERO_INITIAL_GUESS) { 2698 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 2699 its--; 2700 } 2701 while (its--) { 2702 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2703 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2704 2705 /* update rhs: bb1 = bb - B*x */ 2706 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 2707 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 2708 2709 /* local sweep */ 2710 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 2711 } 2712 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) { 2713 if (flag & SOR_ZERO_INITIAL_GUESS) { 2714 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 2715 its--; 2716 } 2717 while (its--) { 2718 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2719 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2720 2721 /* update rhs: bb1 = bb - B*x */ 2722 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 2723 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 2724 2725 /* local sweep */ 2726 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 2727 } 2728 } else SETERRQ(PetscObjectComm((PetscObject)matin),PETSC_ERR_SUP,"Parallel version of SOR requested not supported"); 2729 2730 ierr = VecDestroy(&bb1);CHKERRQ(ierr); 2731 PetscFunctionReturn(0); 2732 } 2733 2734 extern PetscErrorCode MatFDColoringApply_BAIJ(Mat,MatFDColoring,Vec,MatStructure*,void*); 2735 2736 #undef __FUNCT__ 2737 #define __FUNCT__ "MatInvertBlockDiagonal_MPIBAIJ" 2738 PetscErrorCode MatInvertBlockDiagonal_MPIBAIJ(Mat A,const PetscScalar **values) 2739 { 2740 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*) A->data; 2741 PetscErrorCode ierr; 2742 2743 PetscFunctionBegin; 2744 ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr); 2745 PetscFunctionReturn(0); 2746 } 2747 2748 2749 /* -------------------------------------------------------------------*/ 2750 static struct _MatOps MatOps_Values = {MatSetValues_MPIBAIJ, 2751 MatGetRow_MPIBAIJ, 2752 MatRestoreRow_MPIBAIJ, 2753 MatMult_MPIBAIJ, 2754 /* 4*/ MatMultAdd_MPIBAIJ, 2755 MatMultTranspose_MPIBAIJ, 2756 MatMultTransposeAdd_MPIBAIJ, 2757 0, 2758 0, 2759 0, 2760 /*10*/ 0, 2761 0, 2762 0, 2763 MatSOR_MPIBAIJ, 2764 MatTranspose_MPIBAIJ, 2765 /*15*/ MatGetInfo_MPIBAIJ, 2766 MatEqual_MPIBAIJ, 2767 MatGetDiagonal_MPIBAIJ, 2768 MatDiagonalScale_MPIBAIJ, 2769 MatNorm_MPIBAIJ, 2770 /*20*/ MatAssemblyBegin_MPIBAIJ, 2771 MatAssemblyEnd_MPIBAIJ, 2772 MatSetOption_MPIBAIJ, 2773 MatZeroEntries_MPIBAIJ, 2774 /*24*/ MatZeroRows_MPIBAIJ, 2775 0, 2776 0, 2777 0, 2778 0, 2779 /*29*/ MatSetUp_MPIBAIJ, 2780 0, 2781 0, 2782 0, 2783 0, 2784 /*34*/ MatDuplicate_MPIBAIJ, 2785 0, 2786 0, 2787 0, 2788 0, 2789 /*39*/ MatAXPY_MPIBAIJ, 2790 MatGetSubMatrices_MPIBAIJ, 2791 MatIncreaseOverlap_MPIBAIJ, 2792 MatGetValues_MPIBAIJ, 2793 MatCopy_MPIBAIJ, 2794 /*44*/ 0, 2795 MatScale_MPIBAIJ, 2796 0, 2797 0, 2798 0, 2799 /*49*/ 0, 2800 0, 2801 0, 2802 0, 2803 0, 2804 /*54*/ MatFDColoringCreate_MPIBAIJ, 2805 0, 2806 MatSetUnfactored_MPIBAIJ, 2807 MatPermute_MPIBAIJ, 2808 MatSetValuesBlocked_MPIBAIJ, 2809 /*59*/ MatGetSubMatrix_MPIBAIJ, 2810 MatDestroy_MPIBAIJ, 2811 MatView_MPIBAIJ, 2812 0, 2813 0, 2814 /*64*/ 0, 2815 0, 2816 0, 2817 0, 2818 0, 2819 /*69*/ MatGetRowMaxAbs_MPIBAIJ, 2820 0, 2821 0, 2822 0, 2823 0, 2824 /*74*/ 0, 2825 MatFDColoringApply_BAIJ, 2826 0, 2827 0, 2828 0, 2829 /*79*/ 0, 2830 0, 2831 0, 2832 0, 2833 MatLoad_MPIBAIJ, 2834 /*84*/ 0, 2835 0, 2836 0, 2837 0, 2838 0, 2839 /*89*/ 0, 2840 0, 2841 0, 2842 0, 2843 0, 2844 /*94*/ 0, 2845 0, 2846 0, 2847 0, 2848 0, 2849 /*99*/ 0, 2850 0, 2851 0, 2852 0, 2853 0, 2854 /*104*/0, 2855 MatRealPart_MPIBAIJ, 2856 MatImaginaryPart_MPIBAIJ, 2857 0, 2858 0, 2859 /*109*/0, 2860 0, 2861 0, 2862 0, 2863 0, 2864 /*114*/MatGetSeqNonzeroStructure_MPIBAIJ, 2865 0, 2866 MatGetGhosts_MPIBAIJ, 2867 0, 2868 0, 2869 /*119*/0, 2870 0, 2871 0, 2872 0, 2873 0, 2874 /*124*/0, 2875 0, 2876 MatInvertBlockDiagonal_MPIBAIJ, 2877 0, 2878 0, 2879 /*129*/ 0, 2880 0, 2881 0, 2882 0, 2883 0, 2884 /*134*/ 0, 2885 0, 2886 0, 2887 0, 2888 0, 2889 /*139*/ 0, 2890 0 2891 }; 2892 2893 #undef __FUNCT__ 2894 #define __FUNCT__ "MatGetDiagonalBlock_MPIBAIJ" 2895 PetscErrorCode MatGetDiagonalBlock_MPIBAIJ(Mat A,Mat *a) 2896 { 2897 PetscFunctionBegin; 2898 *a = ((Mat_MPIBAIJ*)A->data)->A; 2899 PetscFunctionReturn(0); 2900 } 2901 2902 PETSC_EXTERN PetscErrorCode MatConvert_MPIBAIJ_MPISBAIJ(Mat, MatType,MatReuse,Mat*); 2903 2904 #undef __FUNCT__ 2905 #define __FUNCT__ "MatMPIBAIJSetPreallocationCSR_MPIBAIJ" 2906 PetscErrorCode MatMPIBAIJSetPreallocationCSR_MPIBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[]) 2907 { 2908 PetscInt m,rstart,cstart,cend; 2909 PetscInt i,j,d,nz,nz_max=0,*d_nnz=0,*o_nnz=0; 2910 const PetscInt *JJ =0; 2911 PetscScalar *values=0; 2912 PetscErrorCode ierr; 2913 2914 PetscFunctionBegin; 2915 ierr = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr); 2916 ierr = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr); 2917 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 2918 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 2919 ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr); 2920 m = B->rmap->n/bs; 2921 rstart = B->rmap->rstart/bs; 2922 cstart = B->cmap->rstart/bs; 2923 cend = B->cmap->rend/bs; 2924 2925 if (ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"ii[0] must be 0 but it is %D",ii[0]); 2926 ierr = PetscMalloc2(m,PetscInt,&d_nnz,m,PetscInt,&o_nnz);CHKERRQ(ierr); 2927 for (i=0; i<m; i++) { 2928 nz = ii[i+1] - ii[i]; 2929 if (nz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative number of columns %D",i,nz); 2930 nz_max = PetscMax(nz_max,nz); 2931 JJ = jj + ii[i]; 2932 for (j=0; j<nz; j++) { 2933 if (*JJ >= cstart) break; 2934 JJ++; 2935 } 2936 d = 0; 2937 for (; j<nz; j++) { 2938 if (*JJ++ >= cend) break; 2939 d++; 2940 } 2941 d_nnz[i] = d; 2942 o_nnz[i] = nz - d; 2943 } 2944 ierr = MatMPIBAIJSetPreallocation(B,bs,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 2945 ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr); 2946 2947 values = (PetscScalar*)V; 2948 if (!values) { 2949 ierr = PetscMalloc(bs*bs*nz_max*sizeof(PetscScalar),&values);CHKERRQ(ierr); 2950 ierr = PetscMemzero(values,bs*bs*nz_max*sizeof(PetscScalar));CHKERRQ(ierr); 2951 } 2952 for (i=0; i<m; i++) { 2953 PetscInt row = i + rstart; 2954 PetscInt ncols = ii[i+1] - ii[i]; 2955 const PetscInt *icols = jj + ii[i]; 2956 const PetscScalar *svals = values + (V ? (bs*bs*ii[i]) : 0); 2957 ierr = MatSetValuesBlocked_MPIBAIJ(B,1,&row,ncols,icols,svals,INSERT_VALUES);CHKERRQ(ierr); 2958 } 2959 2960 if (!V) { ierr = PetscFree(values);CHKERRQ(ierr); } 2961 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2962 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2963 ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 2964 PetscFunctionReturn(0); 2965 } 2966 2967 #undef __FUNCT__ 2968 #define __FUNCT__ "MatMPIBAIJSetPreallocationCSR" 2969 /*@C 2970 MatMPIBAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in BAIJ format 2971 (the default parallel PETSc format). 2972 2973 Collective on MPI_Comm 2974 2975 Input Parameters: 2976 + A - the matrix 2977 . bs - the block size 2978 . i - the indices into j for the start of each local row (starts with zero) 2979 . j - the column indices for each local row (starts with zero) these must be sorted for each row 2980 - v - optional values in the matrix 2981 2982 Level: developer 2983 2984 .keywords: matrix, aij, compressed row, sparse, parallel 2985 2986 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIBAIJSetPreallocation(), MatCreateAIJ(), MPIAIJ 2987 @*/ 2988 PetscErrorCode MatMPIBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 2989 { 2990 PetscErrorCode ierr; 2991 2992 PetscFunctionBegin; 2993 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 2994 PetscValidType(B,1); 2995 PetscValidLogicalCollectiveInt(B,bs,2); 2996 ierr = PetscTryMethod(B,"MatMPIBAIJSetPreallocationCSR_C",(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,bs,i,j,v));CHKERRQ(ierr); 2997 PetscFunctionReturn(0); 2998 } 2999 3000 #undef __FUNCT__ 3001 #define __FUNCT__ "MatMPIBAIJSetPreallocation_MPIBAIJ" 3002 PetscErrorCode MatMPIBAIJSetPreallocation_MPIBAIJ(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt *d_nnz,PetscInt o_nz,const PetscInt *o_nnz) 3003 { 3004 Mat_MPIBAIJ *b; 3005 PetscErrorCode ierr; 3006 PetscInt i; 3007 PetscBool d_realalloc = PETSC_FALSE,o_realalloc = PETSC_FALSE; 3008 3009 PetscFunctionBegin; 3010 if (d_nz >= 0 || d_nnz) d_realalloc = PETSC_TRUE; 3011 if (o_nz >= 0 || o_nnz) o_realalloc = PETSC_TRUE; 3012 3013 if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5; 3014 if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2; 3015 if (d_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz); 3016 if (o_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz); 3017 3018 ierr = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr); 3019 ierr = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr); 3020 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 3021 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 3022 ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr); 3023 3024 if (d_nnz) { 3025 for (i=0; i<B->rmap->n/bs; i++) { 3026 if (d_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than -1: local row %D value %D",i,d_nnz[i]); 3027 } 3028 } 3029 if (o_nnz) { 3030 for (i=0; i<B->rmap->n/bs; i++) { 3031 if (o_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than -1: local row %D value %D",i,o_nnz[i]); 3032 } 3033 } 3034 3035 b = (Mat_MPIBAIJ*)B->data; 3036 b->bs2 = bs*bs; 3037 b->mbs = B->rmap->n/bs; 3038 b->nbs = B->cmap->n/bs; 3039 b->Mbs = B->rmap->N/bs; 3040 b->Nbs = B->cmap->N/bs; 3041 3042 for (i=0; i<=b->size; i++) { 3043 b->rangebs[i] = B->rmap->range[i]/bs; 3044 } 3045 b->rstartbs = B->rmap->rstart/bs; 3046 b->rendbs = B->rmap->rend/bs; 3047 b->cstartbs = B->cmap->rstart/bs; 3048 b->cendbs = B->cmap->rend/bs; 3049 3050 if (!B->preallocated) { 3051 ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr); 3052 ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr); 3053 ierr = MatSetType(b->A,MATSEQBAIJ);CHKERRQ(ierr); 3054 ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr); 3055 ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr); 3056 ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr); 3057 ierr = MatSetType(b->B,MATSEQBAIJ);CHKERRQ(ierr); 3058 ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr); 3059 ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),bs,&B->bstash);CHKERRQ(ierr); 3060 } 3061 3062 ierr = MatSeqBAIJSetPreallocation(b->A,bs,d_nz,d_nnz);CHKERRQ(ierr); 3063 ierr = MatSeqBAIJSetPreallocation(b->B,bs,o_nz,o_nnz);CHKERRQ(ierr); 3064 /* Do not error if the user did not give real preallocation information. Ugly because this would overwrite a previous user call to MatSetOption(). */ 3065 if (!d_realalloc) {ierr = MatSetOption(b->A,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);} 3066 if (!o_realalloc) {ierr = MatSetOption(b->B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);} 3067 B->preallocated = PETSC_TRUE; 3068 PetscFunctionReturn(0); 3069 } 3070 3071 extern PetscErrorCode MatDiagonalScaleLocal_MPIBAIJ(Mat,Vec); 3072 extern PetscErrorCode MatSetHashTableFactor_MPIBAIJ(Mat,PetscReal); 3073 3074 #undef __FUNCT__ 3075 #define __FUNCT__ "MatConvert_MPIBAIJ_MPIAdj" 3076 PETSC_EXTERN PetscErrorCode MatConvert_MPIBAIJ_MPIAdj(Mat B, MatType newtype,MatReuse reuse,Mat *adj) 3077 { 3078 Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)B->data; 3079 PetscErrorCode ierr; 3080 Mat_SeqBAIJ *d = (Mat_SeqBAIJ*) b->A->data,*o = (Mat_SeqBAIJ*) b->B->data; 3081 PetscInt M = B->rmap->n/B->rmap->bs,i,*ii,*jj,cnt,j,k,rstart = B->rmap->rstart/B->rmap->bs; 3082 const PetscInt *id = d->i, *jd = d->j, *io = o->i, *jo = o->j, *garray = b->garray; 3083 3084 PetscFunctionBegin; 3085 ierr = PetscMalloc((M+1)*sizeof(PetscInt),&ii);CHKERRQ(ierr); 3086 ii[0] = 0; 3087 for (i=0; i<M; i++) { 3088 if ((id[i+1] - id[i]) < 0) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Indices wrong %D %D %D",i,id[i],id[i+1]); 3089 if ((io[i+1] - io[i]) < 0) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Indices wrong %D %D %D",i,io[i],io[i+1]); 3090 ii[i+1] = ii[i] + id[i+1] - id[i] + io[i+1] - io[i]; 3091 /* remove one from count of matrix has diagonal */ 3092 for (j=id[i]; j<id[i+1]; j++) { 3093 if (jd[j] == i) {ii[i+1]--;break;} 3094 } 3095 } 3096 ierr = PetscMalloc(ii[M]*sizeof(PetscInt),&jj);CHKERRQ(ierr); 3097 cnt = 0; 3098 for (i=0; i<M; i++) { 3099 for (j=io[i]; j<io[i+1]; j++) { 3100 if (garray[jo[j]] > rstart) break; 3101 jj[cnt++] = garray[jo[j]]; 3102 } 3103 for (k=id[i]; k<id[i+1]; k++) { 3104 if (jd[k] != i) { 3105 jj[cnt++] = rstart + jd[k]; 3106 } 3107 } 3108 for (; j<io[i+1]; j++) { 3109 jj[cnt++] = garray[jo[j]]; 3110 } 3111 } 3112 ierr = MatCreateMPIAdj(PetscObjectComm((PetscObject)B),M,B->cmap->N/B->rmap->bs,ii,jj,NULL,adj);CHKERRQ(ierr); 3113 PetscFunctionReturn(0); 3114 } 3115 3116 #include <../src/mat/impls/aij/mpi/mpiaij.h> 3117 3118 PETSC_EXTERN PetscErrorCode MatConvert_SeqBAIJ_SeqAIJ(Mat,MatType,MatReuse,Mat*); 3119 3120 #undef __FUNCT__ 3121 #define __FUNCT__ "MatConvert_MPIBAIJ_MPIAIJ" 3122 PETSC_EXTERN PetscErrorCode MatConvert_MPIBAIJ_MPIAIJ(Mat A,MatType newtype,MatReuse reuse,Mat *newmat) 3123 { 3124 PetscErrorCode ierr; 3125 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 3126 Mat B; 3127 Mat_MPIAIJ *b; 3128 3129 PetscFunctionBegin; 3130 if (!A->assembled) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Matrix must be assembled"); 3131 3132 ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr); 3133 ierr = MatSetSizes(B,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr); 3134 ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr); 3135 ierr = MatSeqAIJSetPreallocation(B,0,NULL);CHKERRQ(ierr); 3136 ierr = MatMPIAIJSetPreallocation(B,0,NULL,0,NULL);CHKERRQ(ierr); 3137 b = (Mat_MPIAIJ*) B->data; 3138 3139 ierr = MatDestroy(&b->A);CHKERRQ(ierr); 3140 ierr = MatDestroy(&b->B);CHKERRQ(ierr); 3141 ierr = MatDisAssemble_MPIBAIJ(A);CHKERRQ(ierr); 3142 ierr = MatConvert_SeqBAIJ_SeqAIJ(a->A, MATSEQAIJ, MAT_INITIAL_MATRIX, &b->A);CHKERRQ(ierr); 3143 ierr = MatConvert_SeqBAIJ_SeqAIJ(a->B, MATSEQAIJ, MAT_INITIAL_MATRIX, &b->B);CHKERRQ(ierr); 3144 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3145 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3146 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3147 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3148 if (reuse == MAT_REUSE_MATRIX) { 3149 ierr = MatHeaderReplace(A,B);CHKERRQ(ierr); 3150 } else { 3151 *newmat = B; 3152 } 3153 PetscFunctionReturn(0); 3154 } 3155 3156 #if defined(PETSC_HAVE_MUMPS) 3157 PETSC_EXTERN PetscErrorCode MatGetFactor_baij_mumps(Mat,MatFactorType,Mat*); 3158 #endif 3159 3160 /*MC 3161 MATMPIBAIJ - MATMPIBAIJ = "mpibaij" - A matrix type to be used for distributed block sparse matrices. 3162 3163 Options Database Keys: 3164 + -mat_type mpibaij - sets the matrix type to "mpibaij" during a call to MatSetFromOptions() 3165 . -mat_block_size <bs> - set the blocksize used to store the matrix 3166 - -mat_use_hash_table <fact> 3167 3168 Level: beginner 3169 3170 .seealso: MatCreateMPIBAIJ 3171 M*/ 3172 3173 PETSC_EXTERN PetscErrorCode MatConvert_MPIBAIJ_MPIBSTRM(Mat,MatType,MatReuse,Mat*); 3174 3175 #undef __FUNCT__ 3176 #define __FUNCT__ "MatCreate_MPIBAIJ" 3177 PETSC_EXTERN PetscErrorCode MatCreate_MPIBAIJ(Mat B) 3178 { 3179 Mat_MPIBAIJ *b; 3180 PetscErrorCode ierr; 3181 PetscBool flg; 3182 3183 PetscFunctionBegin; 3184 ierr = PetscNewLog(B,Mat_MPIBAIJ,&b);CHKERRQ(ierr); 3185 B->data = (void*)b; 3186 3187 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 3188 B->assembled = PETSC_FALSE; 3189 3190 B->insertmode = NOT_SET_VALUES; 3191 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank);CHKERRQ(ierr); 3192 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&b->size);CHKERRQ(ierr); 3193 3194 /* build local table of row and column ownerships */ 3195 ierr = PetscMalloc((b->size+1)*sizeof(PetscInt),&b->rangebs);CHKERRQ(ierr); 3196 3197 /* build cache for off array entries formed */ 3198 ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash);CHKERRQ(ierr); 3199 3200 b->donotstash = PETSC_FALSE; 3201 b->colmap = NULL; 3202 b->garray = NULL; 3203 b->roworiented = PETSC_TRUE; 3204 3205 /* stuff used in block assembly */ 3206 b->barray = 0; 3207 3208 /* stuff used for matrix vector multiply */ 3209 b->lvec = 0; 3210 b->Mvctx = 0; 3211 3212 /* stuff for MatGetRow() */ 3213 b->rowindices = 0; 3214 b->rowvalues = 0; 3215 b->getrowactive = PETSC_FALSE; 3216 3217 /* hash table stuff */ 3218 b->ht = 0; 3219 b->hd = 0; 3220 b->ht_size = 0; 3221 b->ht_flag = PETSC_FALSE; 3222 b->ht_fact = 0; 3223 b->ht_total_ct = 0; 3224 b->ht_insert_ct = 0; 3225 3226 /* stuff for MatGetSubMatrices_MPIBAIJ_local() */ 3227 b->ijonly = PETSC_FALSE; 3228 3229 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)B),NULL,"Options for loading MPIBAIJ matrix 1","Mat");CHKERRQ(ierr); 3230 ierr = PetscOptionsBool("-mat_use_hash_table","Use hash table to save memory in constructing matrix","MatSetOption",PETSC_FALSE,&flg,NULL);CHKERRQ(ierr); 3231 if (flg) { 3232 PetscReal fact = 1.39; 3233 ierr = MatSetOption(B,MAT_USE_HASH_TABLE,PETSC_TRUE);CHKERRQ(ierr); 3234 ierr = PetscOptionsReal("-mat_use_hash_table","Use hash table factor","MatMPIBAIJSetHashTableFactor",fact,&fact,NULL);CHKERRQ(ierr); 3235 if (fact <= 1.0) fact = 1.39; 3236 ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr); 3237 ierr = PetscInfo1(B,"Hash table Factor used %5.2f\n",fact);CHKERRQ(ierr); 3238 } 3239 ierr = PetscOptionsEnd();CHKERRQ(ierr); 3240 3241 #if defined(PETSC_HAVE_MUMPS) 3242 ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_mumps_C", "MatGetFactor_baij_mumps",MatGetFactor_baij_mumps);CHKERRQ(ierr); 3243 #endif 3244 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpibaij_mpiadj_C","MatConvert_MPIBAIJ_MPIAdj",MatConvert_MPIBAIJ_MPIAdj);CHKERRQ(ierr); 3245 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpibaij_mpiaij_C","MatConvert_MPIBAIJ_MPIAIJ",MatConvert_MPIBAIJ_MPIAIJ);CHKERRQ(ierr); 3246 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpibaij_mpisbaij_C","MatConvert_MPIBAIJ_MPISBAIJ",MatConvert_MPIBAIJ_MPISBAIJ);CHKERRQ(ierr); 3247 ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C","MatStoreValues_MPIBAIJ",MatStoreValues_MPIBAIJ);CHKERRQ(ierr); 3248 ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C","MatRetrieveValues_MPIBAIJ",MatRetrieveValues_MPIBAIJ);CHKERRQ(ierr); 3249 ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetDiagonalBlock_C","MatGetDiagonalBlock_MPIBAIJ",MatGetDiagonalBlock_MPIBAIJ);CHKERRQ(ierr); 3250 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIBAIJSetPreallocation_C","MatMPIBAIJSetPreallocation_MPIBAIJ",MatMPIBAIJSetPreallocation_MPIBAIJ);CHKERRQ(ierr); 3251 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIBAIJSetPreallocationCSR_C","MatMPIBAIJSetPreallocationCSR_MPIBAIJ",MatMPIBAIJSetPreallocationCSR_MPIBAIJ);CHKERRQ(ierr); 3252 ierr = PetscObjectComposeFunction((PetscObject)B,"MatDiagonalScaleLocal_C","MatDiagonalScaleLocal_MPIBAIJ",MatDiagonalScaleLocal_MPIBAIJ);CHKERRQ(ierr); 3253 ierr = PetscObjectComposeFunction((PetscObject)B,"MatSetHashTableFactor_C","MatSetHashTableFactor_MPIBAIJ",MatSetHashTableFactor_MPIBAIJ);CHKERRQ(ierr); 3254 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpibaij_mpibstrm_C","MatConvert_MPIBAIJ_MPIBSTRM",MatConvert_MPIBAIJ_MPIBSTRM);CHKERRQ(ierr); 3255 ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIBAIJ);CHKERRQ(ierr); 3256 PetscFunctionReturn(0); 3257 } 3258 3259 /*MC 3260 MATBAIJ - MATBAIJ = "baij" - A matrix type to be used for block sparse matrices. 3261 3262 This matrix type is identical to MATSEQBAIJ when constructed with a single process communicator, 3263 and MATMPIBAIJ otherwise. 3264 3265 Options Database Keys: 3266 . -mat_type baij - sets the matrix type to "baij" during a call to MatSetFromOptions() 3267 3268 Level: beginner 3269 3270 .seealso: MatCreateBAIJ(),MATSEQBAIJ,MATMPIBAIJ, MatMPIBAIJSetPreallocation(), MatMPIBAIJSetPreallocationCSR() 3271 M*/ 3272 3273 #undef __FUNCT__ 3274 #define __FUNCT__ "MatMPIBAIJSetPreallocation" 3275 /*@C 3276 MatMPIBAIJSetPreallocation - Allocates memory for a sparse parallel matrix in block AIJ format 3277 (block compressed row). For good matrix assembly performance 3278 the user should preallocate the matrix storage by setting the parameters 3279 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 3280 performance can be increased by more than a factor of 50. 3281 3282 Collective on Mat 3283 3284 Input Parameters: 3285 + A - the matrix 3286 . bs - size of blockk 3287 . d_nz - number of block nonzeros per block row in diagonal portion of local 3288 submatrix (same for all local rows) 3289 . d_nnz - array containing the number of block nonzeros in the various block rows 3290 of the in diagonal portion of the local (possibly different for each block 3291 row) or NULL. If you plan to factor the matrix you must leave room for the diagonal entry and 3292 set it even if it is zero. 3293 . o_nz - number of block nonzeros per block row in the off-diagonal portion of local 3294 submatrix (same for all local rows). 3295 - o_nnz - array containing the number of nonzeros in the various block rows of the 3296 off-diagonal portion of the local submatrix (possibly different for 3297 each block row) or NULL. 3298 3299 If the *_nnz parameter is given then the *_nz parameter is ignored 3300 3301 Options Database Keys: 3302 + -mat_block_size - size of the blocks to use 3303 - -mat_use_hash_table <fact> 3304 3305 Notes: 3306 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor 3307 than it must be used on all processors that share the object for that argument. 3308 3309 Storage Information: 3310 For a square global matrix we define each processor's diagonal portion 3311 to be its local rows and the corresponding columns (a square submatrix); 3312 each processor's off-diagonal portion encompasses the remainder of the 3313 local matrix (a rectangular submatrix). 3314 3315 The user can specify preallocated storage for the diagonal part of 3316 the local submatrix with either d_nz or d_nnz (not both). Set 3317 d_nz=PETSC_DEFAULT and d_nnz=NULL for PETSc to control dynamic 3318 memory allocation. Likewise, specify preallocated storage for the 3319 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 3320 3321 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 3322 the figure below we depict these three local rows and all columns (0-11). 3323 3324 .vb 3325 0 1 2 3 4 5 6 7 8 9 10 11 3326 ------------------- 3327 row 3 | o o o d d d o o o o o o 3328 row 4 | o o o d d d o o o o o o 3329 row 5 | o o o d d d o o o o o o 3330 ------------------- 3331 .ve 3332 3333 Thus, any entries in the d locations are stored in the d (diagonal) 3334 submatrix, and any entries in the o locations are stored in the 3335 o (off-diagonal) submatrix. Note that the d and the o submatrices are 3336 stored simply in the MATSEQBAIJ format for compressed row storage. 3337 3338 Now d_nz should indicate the number of block nonzeros per row in the d matrix, 3339 and o_nz should indicate the number of block nonzeros per row in the o matrix. 3340 In general, for PDE problems in which most nonzeros are near the diagonal, 3341 one expects d_nz >> o_nz. For large problems you MUST preallocate memory 3342 or you will get TERRIBLE performance; see the users' manual chapter on 3343 matrices. 3344 3345 You can call MatGetInfo() to get information on how effective the preallocation was; 3346 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 3347 You can also run with the option -info and look for messages with the string 3348 malloc in them to see if additional memory allocation was needed. 3349 3350 Level: intermediate 3351 3352 .keywords: matrix, block, aij, compressed row, sparse, parallel 3353 3354 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateBAIJ(), MatMPIBAIJSetPreallocationCSR(), PetscSplitOwnership() 3355 @*/ 3356 PetscErrorCode MatMPIBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 3357 { 3358 PetscErrorCode ierr; 3359 3360 PetscFunctionBegin; 3361 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 3362 PetscValidType(B,1); 3363 PetscValidLogicalCollectiveInt(B,bs,2); 3364 ierr = PetscTryMethod(B,"MatMPIBAIJSetPreallocation_C",(Mat,PetscInt,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,bs,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr); 3365 PetscFunctionReturn(0); 3366 } 3367 3368 #undef __FUNCT__ 3369 #define __FUNCT__ "MatCreateBAIJ" 3370 /*@C 3371 MatCreateBAIJ - Creates a sparse parallel matrix in block AIJ format 3372 (block compressed row). For good matrix assembly performance 3373 the user should preallocate the matrix storage by setting the parameters 3374 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 3375 performance can be increased by more than a factor of 50. 3376 3377 Collective on MPI_Comm 3378 3379 Input Parameters: 3380 + comm - MPI communicator 3381 . bs - size of blockk 3382 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 3383 This value should be the same as the local size used in creating the 3384 y vector for the matrix-vector product y = Ax. 3385 . n - number of local columns (or PETSC_DECIDE to have calculated if N is given) 3386 This value should be the same as the local size used in creating the 3387 x vector for the matrix-vector product y = Ax. 3388 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 3389 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 3390 . d_nz - number of nonzero blocks per block row in diagonal portion of local 3391 submatrix (same for all local rows) 3392 . d_nnz - array containing the number of nonzero blocks in the various block rows 3393 of the in diagonal portion of the local (possibly different for each block 3394 row) or NULL. If you plan to factor the matrix you must leave room for the diagonal entry 3395 and set it even if it is zero. 3396 . o_nz - number of nonzero blocks per block row in the off-diagonal portion of local 3397 submatrix (same for all local rows). 3398 - o_nnz - array containing the number of nonzero blocks in the various block rows of the 3399 off-diagonal portion of the local submatrix (possibly different for 3400 each block row) or NULL. 3401 3402 Output Parameter: 3403 . A - the matrix 3404 3405 Options Database Keys: 3406 + -mat_block_size - size of the blocks to use 3407 - -mat_use_hash_table <fact> 3408 3409 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 3410 MatXXXXSetPreallocation() paradgm instead of this routine directly. 3411 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 3412 3413 Notes: 3414 If the *_nnz parameter is given then the *_nz parameter is ignored 3415 3416 A nonzero block is any block that as 1 or more nonzeros in it 3417 3418 The user MUST specify either the local or global matrix dimensions 3419 (possibly both). 3420 3421 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor 3422 than it must be used on all processors that share the object for that argument. 3423 3424 Storage Information: 3425 For a square global matrix we define each processor's diagonal portion 3426 to be its local rows and the corresponding columns (a square submatrix); 3427 each processor's off-diagonal portion encompasses the remainder of the 3428 local matrix (a rectangular submatrix). 3429 3430 The user can specify preallocated storage for the diagonal part of 3431 the local submatrix with either d_nz or d_nnz (not both). Set 3432 d_nz=PETSC_DEFAULT and d_nnz=NULL for PETSc to control dynamic 3433 memory allocation. Likewise, specify preallocated storage for the 3434 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 3435 3436 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 3437 the figure below we depict these three local rows and all columns (0-11). 3438 3439 .vb 3440 0 1 2 3 4 5 6 7 8 9 10 11 3441 ------------------- 3442 row 3 | o o o d d d o o o o o o 3443 row 4 | o o o d d d o o o o o o 3444 row 5 | o o o d d d o o o o o o 3445 ------------------- 3446 .ve 3447 3448 Thus, any entries in the d locations are stored in the d (diagonal) 3449 submatrix, and any entries in the o locations are stored in the 3450 o (off-diagonal) submatrix. Note that the d and the o submatrices are 3451 stored simply in the MATSEQBAIJ format for compressed row storage. 3452 3453 Now d_nz should indicate the number of block nonzeros per row in the d matrix, 3454 and o_nz should indicate the number of block nonzeros per row in the o matrix. 3455 In general, for PDE problems in which most nonzeros are near the diagonal, 3456 one expects d_nz >> o_nz. For large problems you MUST preallocate memory 3457 or you will get TERRIBLE performance; see the users' manual chapter on 3458 matrices. 3459 3460 Level: intermediate 3461 3462 .keywords: matrix, block, aij, compressed row, sparse, parallel 3463 3464 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateBAIJ(), MatMPIBAIJSetPreallocation(), MatMPIBAIJSetPreallocationCSR() 3465 @*/ 3466 PetscErrorCode MatCreateBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 3467 { 3468 PetscErrorCode ierr; 3469 PetscMPIInt size; 3470 3471 PetscFunctionBegin; 3472 ierr = MatCreate(comm,A);CHKERRQ(ierr); 3473 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 3474 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3475 if (size > 1) { 3476 ierr = MatSetType(*A,MATMPIBAIJ);CHKERRQ(ierr); 3477 ierr = MatMPIBAIJSetPreallocation(*A,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 3478 } else { 3479 ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr); 3480 ierr = MatSeqBAIJSetPreallocation(*A,bs,d_nz,d_nnz);CHKERRQ(ierr); 3481 } 3482 PetscFunctionReturn(0); 3483 } 3484 3485 #undef __FUNCT__ 3486 #define __FUNCT__ "MatDuplicate_MPIBAIJ" 3487 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 3488 { 3489 Mat mat; 3490 Mat_MPIBAIJ *a,*oldmat = (Mat_MPIBAIJ*)matin->data; 3491 PetscErrorCode ierr; 3492 PetscInt len=0; 3493 3494 PetscFunctionBegin; 3495 *newmat = 0; 3496 ierr = MatCreate(PetscObjectComm((PetscObject)matin),&mat);CHKERRQ(ierr); 3497 ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr); 3498 ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr); 3499 ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 3500 3501 mat->factortype = matin->factortype; 3502 mat->preallocated = PETSC_TRUE; 3503 mat->assembled = PETSC_TRUE; 3504 mat->insertmode = NOT_SET_VALUES; 3505 3506 a = (Mat_MPIBAIJ*)mat->data; 3507 mat->rmap->bs = matin->rmap->bs; 3508 a->bs2 = oldmat->bs2; 3509 a->mbs = oldmat->mbs; 3510 a->nbs = oldmat->nbs; 3511 a->Mbs = oldmat->Mbs; 3512 a->Nbs = oldmat->Nbs; 3513 3514 ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr); 3515 ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr); 3516 3517 a->size = oldmat->size; 3518 a->rank = oldmat->rank; 3519 a->donotstash = oldmat->donotstash; 3520 a->roworiented = oldmat->roworiented; 3521 a->rowindices = 0; 3522 a->rowvalues = 0; 3523 a->getrowactive = PETSC_FALSE; 3524 a->barray = 0; 3525 a->rstartbs = oldmat->rstartbs; 3526 a->rendbs = oldmat->rendbs; 3527 a->cstartbs = oldmat->cstartbs; 3528 a->cendbs = oldmat->cendbs; 3529 3530 /* hash table stuff */ 3531 a->ht = 0; 3532 a->hd = 0; 3533 a->ht_size = 0; 3534 a->ht_flag = oldmat->ht_flag; 3535 a->ht_fact = oldmat->ht_fact; 3536 a->ht_total_ct = 0; 3537 a->ht_insert_ct = 0; 3538 3539 ierr = PetscMemcpy(a->rangebs,oldmat->rangebs,(a->size+1)*sizeof(PetscInt));CHKERRQ(ierr); 3540 if (oldmat->colmap) { 3541 #if defined(PETSC_USE_CTABLE) 3542 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 3543 #else 3544 ierr = PetscMalloc((a->Nbs)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr); 3545 ierr = PetscLogObjectMemory(mat,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr); 3546 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr); 3547 #endif 3548 } else a->colmap = 0; 3549 3550 if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) { 3551 ierr = PetscMalloc(len*sizeof(PetscInt),&a->garray);CHKERRQ(ierr); 3552 ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr); 3553 ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); 3554 } else a->garray = 0; 3555 3556 ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)matin),matin->rmap->bs,&mat->bstash);CHKERRQ(ierr); 3557 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 3558 ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr); 3559 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 3560 ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr); 3561 3562 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 3563 ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr); 3564 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 3565 ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr); 3566 ierr = PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr); 3567 *newmat = mat; 3568 PetscFunctionReturn(0); 3569 } 3570 3571 #undef __FUNCT__ 3572 #define __FUNCT__ "MatLoad_MPIBAIJ" 3573 PetscErrorCode MatLoad_MPIBAIJ(Mat newmat,PetscViewer viewer) 3574 { 3575 PetscErrorCode ierr; 3576 int fd; 3577 PetscInt i,nz,j,rstart,rend; 3578 PetscScalar *vals,*buf; 3579 MPI_Comm comm; 3580 MPI_Status status; 3581 PetscMPIInt rank,size,maxnz; 3582 PetscInt header[4],*rowlengths = 0,M,N,m,*rowners,*cols; 3583 PetscInt *locrowlens = NULL,*procsnz = NULL,*browners = NULL; 3584 PetscInt jj,*mycols,*ibuf,bs=1,Mbs,mbs,extra_rows,mmax; 3585 PetscMPIInt tag = ((PetscObject)viewer)->tag; 3586 PetscInt *dlens = NULL,*odlens = NULL,*mask = NULL,*masked1 = NULL,*masked2 = NULL,rowcount,odcount; 3587 PetscInt dcount,kmax,k,nzcount,tmp,mend,sizesset=1,grows,gcols; 3588 3589 PetscFunctionBegin; 3590 ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr); 3591 ierr = PetscOptionsBegin(comm,NULL,"Options for loading MPIBAIJ matrix 2","Mat");CHKERRQ(ierr); 3592 ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,NULL);CHKERRQ(ierr); 3593 ierr = PetscOptionsEnd();CHKERRQ(ierr); 3594 3595 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3596 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3597 if (!rank) { 3598 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 3599 ierr = PetscBinaryRead(fd,(char*)header,4,PETSC_INT);CHKERRQ(ierr); 3600 if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 3601 } 3602 3603 if (newmat->rmap->n < 0 && newmat->rmap->N < 0 && newmat->cmap->n < 0 && newmat->cmap->N < 0) sizesset = 0; 3604 3605 ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr); 3606 M = header[1]; N = header[2]; 3607 3608 /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */ 3609 if (sizesset && newmat->rmap->N < 0) newmat->rmap->N = M; 3610 if (sizesset && newmat->cmap->N < 0) newmat->cmap->N = N; 3611 3612 /* If global sizes are set, check if they are consistent with that given in the file */ 3613 if (sizesset) { 3614 ierr = MatGetSize(newmat,&grows,&gcols);CHKERRQ(ierr); 3615 } 3616 if (sizesset && newmat->rmap->N != grows) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%d) and input matrix has (%d)",M,grows); 3617 if (sizesset && newmat->cmap->N != gcols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%d) and input matrix has (%d)",N,gcols); 3618 3619 if (M != N) SETERRQ(PetscObjectComm((PetscObject)viewer),PETSC_ERR_SUP,"Can only do square matrices"); 3620 3621 /* 3622 This code adds extra rows to make sure the number of rows is 3623 divisible by the blocksize 3624 */ 3625 Mbs = M/bs; 3626 extra_rows = bs - M + bs*Mbs; 3627 if (extra_rows == bs) extra_rows = 0; 3628 else Mbs++; 3629 if (extra_rows && !rank) { 3630 ierr = PetscInfo(viewer,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr); 3631 } 3632 3633 /* determine ownership of all rows */ 3634 if (newmat->rmap->n < 0) { /* PETSC_DECIDE */ 3635 mbs = Mbs/size + ((Mbs % size) > rank); 3636 m = mbs*bs; 3637 } else { /* User set */ 3638 m = newmat->rmap->n; 3639 mbs = m/bs; 3640 } 3641 ierr = PetscMalloc2(size+1,PetscInt,&rowners,size+1,PetscInt,&browners);CHKERRQ(ierr); 3642 ierr = MPI_Allgather(&mbs,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 3643 3644 /* process 0 needs enough room for process with most rows */ 3645 if (!rank) { 3646 mmax = rowners[1]; 3647 for (i=2; i<=size; i++) { 3648 mmax = PetscMax(mmax,rowners[i]); 3649 } 3650 mmax*=bs; 3651 } else mmax = -1; /* unused, but compiler warns anyway */ 3652 3653 rowners[0] = 0; 3654 for (i=2; i<=size; i++) rowners[i] += rowners[i-1]; 3655 for (i=0; i<=size; i++) browners[i] = rowners[i]*bs; 3656 rstart = rowners[rank]; 3657 rend = rowners[rank+1]; 3658 3659 /* distribute row lengths to all processors */ 3660 ierr = PetscMalloc(m*sizeof(PetscInt),&locrowlens);CHKERRQ(ierr); 3661 if (!rank) { 3662 mend = m; 3663 if (size == 1) mend = mend - extra_rows; 3664 ierr = PetscBinaryRead(fd,locrowlens,mend,PETSC_INT);CHKERRQ(ierr); 3665 for (j=mend; j<m; j++) locrowlens[j] = 1; 3666 ierr = PetscMalloc(mmax*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr); 3667 ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr); 3668 ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr); 3669 for (j=0; j<m; j++) { 3670 procsnz[0] += locrowlens[j]; 3671 } 3672 for (i=1; i<size; i++) { 3673 mend = browners[i+1] - browners[i]; 3674 if (i == size-1) mend = mend - extra_rows; 3675 ierr = PetscBinaryRead(fd,rowlengths,mend,PETSC_INT);CHKERRQ(ierr); 3676 for (j=mend; j<browners[i+1] - browners[i]; j++) rowlengths[j] = 1; 3677 /* calculate the number of nonzeros on each processor */ 3678 for (j=0; j<browners[i+1]-browners[i]; j++) { 3679 procsnz[i] += rowlengths[j]; 3680 } 3681 ierr = MPI_Send(rowlengths,browners[i+1]-browners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 3682 } 3683 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 3684 } else { 3685 ierr = MPI_Recv(locrowlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 3686 } 3687 3688 if (!rank) { 3689 /* determine max buffer needed and allocate it */ 3690 maxnz = procsnz[0]; 3691 for (i=1; i<size; i++) { 3692 maxnz = PetscMax(maxnz,procsnz[i]); 3693 } 3694 ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr); 3695 3696 /* read in my part of the matrix column indices */ 3697 nz = procsnz[0]; 3698 ierr = PetscMalloc((nz+1)*sizeof(PetscInt),&ibuf);CHKERRQ(ierr); 3699 mycols = ibuf; 3700 if (size == 1) nz -= extra_rows; 3701 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 3702 if (size == 1) { 3703 for (i=0; i< extra_rows; i++) mycols[nz+i] = M+i; 3704 } 3705 3706 /* read in every ones (except the last) and ship off */ 3707 for (i=1; i<size-1; i++) { 3708 nz = procsnz[i]; 3709 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 3710 ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 3711 } 3712 /* read in the stuff for the last proc */ 3713 if (size != 1) { 3714 nz = procsnz[size-1] - extra_rows; /* the extra rows are not on the disk */ 3715 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 3716 for (i=0; i<extra_rows; i++) cols[nz+i] = M+i; 3717 ierr = MPI_Send(cols,nz+extra_rows,MPIU_INT,size-1,tag,comm);CHKERRQ(ierr); 3718 } 3719 ierr = PetscFree(cols);CHKERRQ(ierr); 3720 } else { 3721 /* determine buffer space needed for message */ 3722 nz = 0; 3723 for (i=0; i<m; i++) { 3724 nz += locrowlens[i]; 3725 } 3726 ierr = PetscMalloc((nz+1)*sizeof(PetscInt),&ibuf);CHKERRQ(ierr); 3727 mycols = ibuf; 3728 /* receive message of column indices*/ 3729 ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 3730 ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr); 3731 if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 3732 } 3733 3734 /* loop over local rows, determining number of off diagonal entries */ 3735 ierr = PetscMalloc2(rend-rstart,PetscInt,&dlens,rend-rstart,PetscInt,&odlens);CHKERRQ(ierr); 3736 ierr = PetscMalloc3(Mbs,PetscInt,&mask,Mbs,PetscInt,&masked1,Mbs,PetscInt,&masked2);CHKERRQ(ierr); 3737 ierr = PetscMemzero(mask,Mbs*sizeof(PetscInt));CHKERRQ(ierr); 3738 ierr = PetscMemzero(masked1,Mbs*sizeof(PetscInt));CHKERRQ(ierr); 3739 ierr = PetscMemzero(masked2,Mbs*sizeof(PetscInt));CHKERRQ(ierr); 3740 rowcount = 0; nzcount = 0; 3741 for (i=0; i<mbs; i++) { 3742 dcount = 0; 3743 odcount = 0; 3744 for (j=0; j<bs; j++) { 3745 kmax = locrowlens[rowcount]; 3746 for (k=0; k<kmax; k++) { 3747 tmp = mycols[nzcount++]/bs; 3748 if (!mask[tmp]) { 3749 mask[tmp] = 1; 3750 if (tmp < rstart || tmp >= rend) masked2[odcount++] = tmp; 3751 else masked1[dcount++] = tmp; 3752 } 3753 } 3754 rowcount++; 3755 } 3756 3757 dlens[i] = dcount; 3758 odlens[i] = odcount; 3759 3760 /* zero out the mask elements we set */ 3761 for (j=0; j<dcount; j++) mask[masked1[j]] = 0; 3762 for (j=0; j<odcount; j++) mask[masked2[j]] = 0; 3763 } 3764 3765 3766 if (!sizesset) { 3767 ierr = MatSetSizes(newmat,m,m,M+extra_rows,N+extra_rows);CHKERRQ(ierr); 3768 } 3769 ierr = MatMPIBAIJSetPreallocation(newmat,bs,0,dlens,0,odlens);CHKERRQ(ierr); 3770 3771 if (!rank) { 3772 ierr = PetscMalloc((maxnz+1)*sizeof(PetscScalar),&buf);CHKERRQ(ierr); 3773 /* read in my part of the matrix numerical values */ 3774 nz = procsnz[0]; 3775 vals = buf; 3776 mycols = ibuf; 3777 if (size == 1) nz -= extra_rows; 3778 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3779 if (size == 1) { 3780 for (i=0; i< extra_rows; i++) vals[nz+i] = 1.0; 3781 } 3782 3783 /* insert into matrix */ 3784 jj = rstart*bs; 3785 for (i=0; i<m; i++) { 3786 ierr = MatSetValues_MPIBAIJ(newmat,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 3787 mycols += locrowlens[i]; 3788 vals += locrowlens[i]; 3789 jj++; 3790 } 3791 /* read in other processors (except the last one) and ship out */ 3792 for (i=1; i<size-1; i++) { 3793 nz = procsnz[i]; 3794 vals = buf; 3795 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3796 ierr = MPIULong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr); 3797 } 3798 /* the last proc */ 3799 if (size != 1) { 3800 nz = procsnz[i] - extra_rows; 3801 vals = buf; 3802 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3803 for (i=0; i<extra_rows; i++) vals[nz+i] = 1.0; 3804 ierr = MPIULong_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr); 3805 } 3806 ierr = PetscFree(procsnz);CHKERRQ(ierr); 3807 } else { 3808 /* receive numeric values */ 3809 ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&buf);CHKERRQ(ierr); 3810 3811 /* receive message of values*/ 3812 vals = buf; 3813 mycols = ibuf; 3814 ierr = MPIULong_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr); 3815 3816 /* insert into matrix */ 3817 jj = rstart*bs; 3818 for (i=0; i<m; i++) { 3819 ierr = MatSetValues_MPIBAIJ(newmat,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 3820 mycols += locrowlens[i]; 3821 vals += locrowlens[i]; 3822 jj++; 3823 } 3824 } 3825 ierr = PetscFree(locrowlens);CHKERRQ(ierr); 3826 ierr = PetscFree(buf);CHKERRQ(ierr); 3827 ierr = PetscFree(ibuf);CHKERRQ(ierr); 3828 ierr = PetscFree2(rowners,browners);CHKERRQ(ierr); 3829 ierr = PetscFree2(dlens,odlens);CHKERRQ(ierr); 3830 ierr = PetscFree3(mask,masked1,masked2);CHKERRQ(ierr); 3831 ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3832 ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3833 PetscFunctionReturn(0); 3834 } 3835 3836 #undef __FUNCT__ 3837 #define __FUNCT__ "MatMPIBAIJSetHashTableFactor" 3838 /*@ 3839 MatMPIBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable. 3840 3841 Input Parameters: 3842 . mat - the matrix 3843 . fact - factor 3844 3845 Not Collective, each process can use a different factor 3846 3847 Level: advanced 3848 3849 Notes: 3850 This can also be set by the command line option: -mat_use_hash_table <fact> 3851 3852 .keywords: matrix, hashtable, factor, HT 3853 3854 .seealso: MatSetOption() 3855 @*/ 3856 PetscErrorCode MatMPIBAIJSetHashTableFactor(Mat mat,PetscReal fact) 3857 { 3858 PetscErrorCode ierr; 3859 3860 PetscFunctionBegin; 3861 ierr = PetscTryMethod(mat,"MatSetHashTableFactor_C",(Mat,PetscReal),(mat,fact));CHKERRQ(ierr); 3862 PetscFunctionReturn(0); 3863 } 3864 3865 #undef __FUNCT__ 3866 #define __FUNCT__ "MatSetHashTableFactor_MPIBAIJ" 3867 PetscErrorCode MatSetHashTableFactor_MPIBAIJ(Mat mat,PetscReal fact) 3868 { 3869 Mat_MPIBAIJ *baij; 3870 3871 PetscFunctionBegin; 3872 baij = (Mat_MPIBAIJ*)mat->data; 3873 baij->ht_fact = fact; 3874 PetscFunctionReturn(0); 3875 } 3876 3877 #undef __FUNCT__ 3878 #define __FUNCT__ "MatMPIBAIJGetSeqBAIJ" 3879 PetscErrorCode MatMPIBAIJGetSeqBAIJ(Mat A,Mat *Ad,Mat *Ao,const PetscInt *colmap[]) 3880 { 3881 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 3882 3883 PetscFunctionBegin; 3884 *Ad = a->A; 3885 *Ao = a->B; 3886 *colmap = a->garray; 3887 PetscFunctionReturn(0); 3888 } 3889 3890 /* 3891 Special version for direct calls from Fortran (to eliminate two function call overheads 3892 */ 3893 #if defined(PETSC_HAVE_FORTRAN_CAPS) 3894 #define matmpibaijsetvaluesblocked_ MATMPIBAIJSETVALUESBLOCKED 3895 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 3896 #define matmpibaijsetvaluesblocked_ matmpibaijsetvaluesblocked 3897 #endif 3898 3899 #undef __FUNCT__ 3900 #define __FUNCT__ "matmpibiajsetvaluesblocked" 3901 /*@C 3902 MatMPIBAIJSetValuesBlocked - Direct Fortran call to replace call to MatSetValuesBlocked() 3903 3904 Collective on Mat 3905 3906 Input Parameters: 3907 + mat - the matrix 3908 . min - number of input rows 3909 . im - input rows 3910 . nin - number of input columns 3911 . in - input columns 3912 . v - numerical values input 3913 - addvin - INSERT_VALUES or ADD_VALUES 3914 3915 Notes: This has a complete copy of MatSetValuesBlocked_MPIBAIJ() which is terrible code un-reuse. 3916 3917 Level: advanced 3918 3919 .seealso: MatSetValuesBlocked() 3920 @*/ 3921 PetscErrorCode matmpibaijsetvaluesblocked_(Mat *matin,PetscInt *min,const PetscInt im[],PetscInt *nin,const PetscInt in[],const MatScalar v[],InsertMode *addvin) 3922 { 3923 /* convert input arguments to C version */ 3924 Mat mat = *matin; 3925 PetscInt m = *min, n = *nin; 3926 InsertMode addv = *addvin; 3927 3928 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 3929 const MatScalar *value; 3930 MatScalar *barray = baij->barray; 3931 PetscBool roworiented = baij->roworiented; 3932 PetscErrorCode ierr; 3933 PetscInt i,j,ii,jj,row,col,rstart=baij->rstartbs; 3934 PetscInt rend=baij->rendbs,cstart=baij->cstartbs,stepval; 3935 PetscInt cend=baij->cendbs,bs=mat->rmap->bs,bs2=baij->bs2; 3936 3937 PetscFunctionBegin; 3938 /* tasks normally handled by MatSetValuesBlocked() */ 3939 if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv; 3940 #if defined(PETSC_USE_DEBUG) 3941 else if (mat->insertmode != addv) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values"); 3942 if (mat->factortype) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 3943 #endif 3944 if (mat->assembled) { 3945 mat->was_assembled = PETSC_TRUE; 3946 mat->assembled = PETSC_FALSE; 3947 } 3948 ierr = PetscLogEventBegin(MAT_SetValues,mat,0,0,0);CHKERRQ(ierr); 3949 3950 3951 if (!barray) { 3952 ierr = PetscMalloc(bs2*sizeof(MatScalar),&barray);CHKERRQ(ierr); 3953 baij->barray = barray; 3954 } 3955 3956 if (roworiented) stepval = (n-1)*bs; 3957 else stepval = (m-1)*bs; 3958 3959 for (i=0; i<m; i++) { 3960 if (im[i] < 0) continue; 3961 #if defined(PETSC_USE_DEBUG) 3962 if (im[i] >= baij->Mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %D max %D",im[i],baij->Mbs-1); 3963 #endif 3964 if (im[i] >= rstart && im[i] < rend) { 3965 row = im[i] - rstart; 3966 for (j=0; j<n; j++) { 3967 /* If NumCol = 1 then a copy is not required */ 3968 if ((roworiented) && (n == 1)) { 3969 barray = (MatScalar*)v + i*bs2; 3970 } else if ((!roworiented) && (m == 1)) { 3971 barray = (MatScalar*)v + j*bs2; 3972 } else { /* Here a copy is required */ 3973 if (roworiented) { 3974 value = v + i*(stepval+bs)*bs + j*bs; 3975 } else { 3976 value = v + j*(stepval+bs)*bs + i*bs; 3977 } 3978 for (ii=0; ii<bs; ii++,value+=stepval) { 3979 for (jj=0; jj<bs; jj++) { 3980 *barray++ = *value++; 3981 } 3982 } 3983 barray -=bs2; 3984 } 3985 3986 if (in[j] >= cstart && in[j] < cend) { 3987 col = in[j] - cstart; 3988 ierr = MatSetValuesBlocked_SeqBAIJ(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 3989 } else if (in[j] < 0) continue; 3990 #if defined(PETSC_USE_DEBUG) 3991 else if (in[j] >= baij->Nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %D max %D",in[j],baij->Nbs-1); 3992 #endif 3993 else { 3994 if (mat->was_assembled) { 3995 if (!baij->colmap) { 3996 ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 3997 } 3998 3999 #if defined(PETSC_USE_DEBUG) 4000 #if defined(PETSC_USE_CTABLE) 4001 { PetscInt data; 4002 ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr); 4003 if ((data - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap"); 4004 } 4005 #else 4006 if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap"); 4007 #endif 4008 #endif 4009 #if defined(PETSC_USE_CTABLE) 4010 ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr); 4011 col = (col - 1)/bs; 4012 #else 4013 col = (baij->colmap[in[j]] - 1)/bs; 4014 #endif 4015 if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 4016 ierr = MatDisAssemble_MPIBAIJ(mat);CHKERRQ(ierr); 4017 col = in[j]; 4018 } 4019 } else col = in[j]; 4020 ierr = MatSetValuesBlocked_SeqBAIJ(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 4021 } 4022 } 4023 } else { 4024 if (!baij->donotstash) { 4025 if (roworiented) { 4026 ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 4027 } else { 4028 ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 4029 } 4030 } 4031 } 4032 } 4033 4034 /* task normally handled by MatSetValuesBlocked() */ 4035 ierr = PetscLogEventEnd(MAT_SetValues,mat,0,0,0);CHKERRQ(ierr); 4036 PetscFunctionReturn(0); 4037 } 4038 4039 #undef __FUNCT__ 4040 #define __FUNCT__ "MatCreateMPIBAIJWithArrays" 4041 /*@ 4042 MatCreateMPIBAIJWithArrays - creates a MPI BAIJ matrix using arrays that contain in standard 4043 CSR format the local rows. 4044 4045 Collective on MPI_Comm 4046 4047 Input Parameters: 4048 + comm - MPI communicator 4049 . bs - the block size, only a block size of 1 is supported 4050 . m - number of local rows (Cannot be PETSC_DECIDE) 4051 . n - This value should be the same as the local size used in creating the 4052 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 4053 calculated if N is given) For square matrices n is almost always m. 4054 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 4055 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 4056 . i - row indices 4057 . j - column indices 4058 - a - matrix values 4059 4060 Output Parameter: 4061 . mat - the matrix 4062 4063 Level: intermediate 4064 4065 Notes: 4066 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 4067 thus you CANNOT change the matrix entries by changing the values of a[] after you have 4068 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 4069 4070 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 4071 4072 .keywords: matrix, aij, compressed row, sparse, parallel 4073 4074 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 4075 MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays() 4076 @*/ 4077 PetscErrorCode MatCreateMPIBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat) 4078 { 4079 PetscErrorCode ierr; 4080 4081 PetscFunctionBegin; 4082 if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 4083 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 4084 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 4085 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 4086 ierr = MatSetType(*mat,MATMPISBAIJ);CHKERRQ(ierr); 4087 ierr = MatMPIBAIJSetPreallocationCSR(*mat,bs,i,j,a);CHKERRQ(ierr); 4088 PetscFunctionReturn(0); 4089 } 4090