1 2 #include <../src/mat/impls/baij/mpi/mpibaij.h> /*I "petscmat.h" I*/ 3 #include <petscblaslapack.h> 4 5 extern PetscErrorCode MatSetUpMultiply_MPIBAIJ(Mat); 6 extern PetscErrorCode MatDisAssemble_MPIBAIJ(Mat); 7 extern PetscErrorCode MatGetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt [],PetscScalar []); 8 extern PetscErrorCode MatSetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt [],const PetscScalar [],InsertMode); 9 extern PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode); 10 extern PetscErrorCode MatGetRow_SeqBAIJ(Mat,PetscInt,PetscInt*,PetscInt*[],PetscScalar*[]); 11 extern PetscErrorCode MatRestoreRow_SeqBAIJ(Mat,PetscInt,PetscInt*,PetscInt*[],PetscScalar*[]); 12 extern PetscErrorCode MatZeroRows_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscScalar,Vec,Vec); 13 14 #undef __FUNCT__ 15 #define __FUNCT__ "MatGetRowMaxAbs_MPIBAIJ" 16 PetscErrorCode MatGetRowMaxAbs_MPIBAIJ(Mat A,Vec v,PetscInt idx[]) 17 { 18 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 19 PetscErrorCode ierr; 20 PetscInt i,*idxb = 0; 21 PetscScalar *va,*vb; 22 Vec vtmp; 23 24 PetscFunctionBegin; 25 ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr); 26 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 27 if (idx) { 28 for (i=0; i<A->rmap->n; i++) {if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;} 29 } 30 31 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 32 if (idx) {ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr);} 33 ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 34 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 35 36 for (i=0; i<A->rmap->n; i++) { 37 if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {va[i] = vb[i]; if (idx) idx[i] = A->cmap->bs*a->garray[idxb[i]/A->cmap->bs] + (idxb[i] % A->cmap->bs);} 38 } 39 40 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 41 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 42 ierr = PetscFree(idxb);CHKERRQ(ierr); 43 ierr = VecDestroy(&vtmp);CHKERRQ(ierr); 44 PetscFunctionReturn(0); 45 } 46 47 EXTERN_C_BEGIN 48 #undef __FUNCT__ 49 #define __FUNCT__ "MatStoreValues_MPIBAIJ" 50 PetscErrorCode MatStoreValues_MPIBAIJ(Mat mat) 51 { 52 Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *)mat->data; 53 PetscErrorCode ierr; 54 55 PetscFunctionBegin; 56 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 57 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 58 PetscFunctionReturn(0); 59 } 60 EXTERN_C_END 61 62 EXTERN_C_BEGIN 63 #undef __FUNCT__ 64 #define __FUNCT__ "MatRetrieveValues_MPIBAIJ" 65 PetscErrorCode MatRetrieveValues_MPIBAIJ(Mat mat) 66 { 67 Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *)mat->data; 68 PetscErrorCode ierr; 69 70 PetscFunctionBegin; 71 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 72 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 73 PetscFunctionReturn(0); 74 } 75 EXTERN_C_END 76 77 /* 78 Local utility routine that creates a mapping from the global column 79 number to the local number in the off-diagonal part of the local 80 storage of the matrix. This is done in a non scalable way since the 81 length of colmap equals the global matrix length. 82 */ 83 #undef __FUNCT__ 84 #define __FUNCT__ "MatCreateColmap_MPIBAIJ_Private" 85 PetscErrorCode MatCreateColmap_MPIBAIJ_Private(Mat mat) 86 { 87 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 88 Mat_SeqBAIJ *B = (Mat_SeqBAIJ*)baij->B->data; 89 PetscErrorCode ierr; 90 PetscInt nbs = B->nbs,i,bs=mat->rmap->bs; 91 92 PetscFunctionBegin; 93 #if defined (PETSC_USE_CTABLE) 94 ierr = PetscTableCreate(baij->nbs,baij->Nbs+1,&baij->colmap);CHKERRQ(ierr); 95 for (i=0; i<nbs; i++) { 96 ierr = PetscTableAdd(baij->colmap,baij->garray[i]+1,i*bs+1,INSERT_VALUES);CHKERRQ(ierr); 97 } 98 #else 99 ierr = PetscMalloc((baij->Nbs+1)*sizeof(PetscInt),&baij->colmap);CHKERRQ(ierr); 100 ierr = PetscLogObjectMemory(mat,baij->Nbs*sizeof(PetscInt));CHKERRQ(ierr); 101 ierr = PetscMemzero(baij->colmap,baij->Nbs*sizeof(PetscInt));CHKERRQ(ierr); 102 for (i=0; i<nbs; i++) baij->colmap[baij->garray[i]] = i*bs+1; 103 #endif 104 PetscFunctionReturn(0); 105 } 106 107 #define MatSetValues_SeqBAIJ_A_Private(row,col,value,addv) \ 108 { \ 109 \ 110 brow = row/bs; \ 111 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; \ 112 rmax = aimax[brow]; nrow = ailen[brow]; \ 113 bcol = col/bs; \ 114 ridx = row % bs; cidx = col % bs; \ 115 low = 0; high = nrow; \ 116 while (high-low > 3) { \ 117 t = (low+high)/2; \ 118 if (rp[t] > bcol) high = t; \ 119 else low = t; \ 120 } \ 121 for (_i=low; _i<high; _i++) { \ 122 if (rp[_i] > bcol) break; \ 123 if (rp[_i] == bcol) { \ 124 bap = ap + bs2*_i + bs*cidx + ridx; \ 125 if (addv == ADD_VALUES) *bap += value; \ 126 else *bap = value; \ 127 goto a_noinsert; \ 128 } \ 129 } \ 130 if (a->nonew == 1) goto a_noinsert; \ 131 if (a->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 132 MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,aimax,a->nonew,MatScalar); \ 133 N = nrow++ - 1; \ 134 /* shift up all the later entries in this row */ \ 135 for (ii=N; ii>=_i; ii--) { \ 136 rp[ii+1] = rp[ii]; \ 137 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 138 } \ 139 if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr); } \ 140 rp[_i] = bcol; \ 141 ap[bs2*_i + bs*cidx + ridx] = value; \ 142 a_noinsert:; \ 143 ailen[brow] = nrow; \ 144 } 145 146 #define MatSetValues_SeqBAIJ_B_Private(row,col,value,addv) \ 147 { \ 148 brow = row/bs; \ 149 rp = bj + bi[brow]; ap = ba + bs2*bi[brow]; \ 150 rmax = bimax[brow]; nrow = bilen[brow]; \ 151 bcol = col/bs; \ 152 ridx = row % bs; cidx = col % bs; \ 153 low = 0; high = nrow; \ 154 while (high-low > 3) { \ 155 t = (low+high)/2; \ 156 if (rp[t] > bcol) high = t; \ 157 else low = t; \ 158 } \ 159 for (_i=low; _i<high; _i++) { \ 160 if (rp[_i] > bcol) break; \ 161 if (rp[_i] == bcol) { \ 162 bap = ap + bs2*_i + bs*cidx + ridx; \ 163 if (addv == ADD_VALUES) *bap += value; \ 164 else *bap = value; \ 165 goto b_noinsert; \ 166 } \ 167 } \ 168 if (b->nonew == 1) goto b_noinsert; \ 169 if (b->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 170 MatSeqXAIJReallocateAIJ(B,b->mbs,bs2,nrow,brow,bcol,rmax,ba,bi,bj,rp,ap,bimax,b->nonew,MatScalar); \ 171 CHKMEMQ;\ 172 N = nrow++ - 1; \ 173 /* shift up all the later entries in this row */ \ 174 for (ii=N; ii>=_i; ii--) { \ 175 rp[ii+1] = rp[ii]; \ 176 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 177 } \ 178 if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr);} \ 179 rp[_i] = bcol; \ 180 ap[bs2*_i + bs*cidx + ridx] = value; \ 181 b_noinsert:; \ 182 bilen[brow] = nrow; \ 183 } 184 185 #undef __FUNCT__ 186 #define __FUNCT__ "MatSetValues_MPIBAIJ" 187 PetscErrorCode MatSetValues_MPIBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 188 { 189 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 190 MatScalar value; 191 PetscBool roworiented = baij->roworiented; 192 PetscErrorCode ierr; 193 PetscInt i,j,row,col; 194 PetscInt rstart_orig=mat->rmap->rstart; 195 PetscInt rend_orig=mat->rmap->rend,cstart_orig=mat->cmap->rstart; 196 PetscInt cend_orig=mat->cmap->rend,bs=mat->rmap->bs; 197 198 /* Some Variables required in the macro */ 199 Mat A = baij->A; 200 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)(A)->data; 201 PetscInt *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j; 202 MatScalar *aa=a->a; 203 204 Mat B = baij->B; 205 Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)(B)->data; 206 PetscInt *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j; 207 MatScalar *ba=b->a; 208 209 PetscInt *rp,ii,nrow,_i,rmax,N,brow,bcol; 210 PetscInt low,high,t,ridx,cidx,bs2=a->bs2; 211 MatScalar *ap,*bap; 212 213 PetscFunctionBegin; 214 if (v) PetscValidScalarPointer(v,6); 215 for (i=0; i<m; i++) { 216 if (im[i] < 0) continue; 217 #if defined(PETSC_USE_DEBUG) 218 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 219 #endif 220 if (im[i] >= rstart_orig && im[i] < rend_orig) { 221 row = im[i] - rstart_orig; 222 for (j=0; j<n; j++) { 223 if (in[j] >= cstart_orig && in[j] < cend_orig) { 224 col = in[j] - cstart_orig; 225 if (roworiented) value = v[i*n+j]; 226 else value = v[i+j*m]; 227 MatSetValues_SeqBAIJ_A_Private(row,col,value,addv); 228 /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 229 } else if (in[j] < 0) continue; 230 #if defined(PETSC_USE_DEBUG) 231 else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); 232 #endif 233 else { 234 if (mat->was_assembled) { 235 if (!baij->colmap) { 236 ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 237 } 238 #if defined (PETSC_USE_CTABLE) 239 ierr = PetscTableFind(baij->colmap,in[j]/bs + 1,&col);CHKERRQ(ierr); 240 col = col - 1; 241 #else 242 col = baij->colmap[in[j]/bs] - 1; 243 #endif 244 if (col < 0 && !((Mat_SeqBAIJ*)(baij->B->data))->nonew) { 245 ierr = MatDisAssemble_MPIBAIJ(mat);CHKERRQ(ierr); 246 col = in[j]; 247 /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */ 248 B = baij->B; 249 b = (Mat_SeqBAIJ*)(B)->data; 250 bimax=b->imax;bi=b->i;bilen=b->ilen;bj=b->j; 251 ba=b->a; 252 } else if (col < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", im[i], in[j]); 253 else col += in[j]%bs; 254 } else col = in[j]; 255 if (roworiented) value = v[i*n+j]; 256 else value = v[i+j*m]; 257 MatSetValues_SeqBAIJ_B_Private(row,col,value,addv); 258 /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 259 } 260 } 261 } else { 262 if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]); 263 if (!baij->donotstash) { 264 mat->assembled = PETSC_FALSE; 265 if (roworiented) { 266 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,PETSC_FALSE);CHKERRQ(ierr); 267 } else { 268 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,PETSC_FALSE);CHKERRQ(ierr); 269 } 270 } 271 } 272 } 273 PetscFunctionReturn(0); 274 } 275 276 #undef __FUNCT__ 277 #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ" 278 PetscErrorCode MatSetValuesBlocked_MPIBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 279 { 280 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 281 const PetscScalar *value; 282 MatScalar *barray=baij->barray; 283 PetscBool roworiented = baij->roworiented; 284 PetscErrorCode ierr; 285 PetscInt i,j,ii,jj,row,col,rstart=baij->rstartbs; 286 PetscInt rend=baij->rendbs,cstart=baij->cstartbs,stepval; 287 PetscInt cend=baij->cendbs,bs=mat->rmap->bs,bs2=baij->bs2; 288 289 PetscFunctionBegin; 290 if (!barray) { 291 ierr = PetscMalloc(bs2*sizeof(MatScalar),&barray);CHKERRQ(ierr); 292 baij->barray = barray; 293 } 294 295 if (roworiented) { 296 stepval = (n-1)*bs; 297 } else { 298 stepval = (m-1)*bs; 299 } 300 for (i=0; i<m; i++) { 301 if (im[i] < 0) continue; 302 #if defined(PETSC_USE_DEBUG) 303 if (im[i] >= baij->Mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %D max %D",im[i],baij->Mbs-1); 304 #endif 305 if (im[i] >= rstart && im[i] < rend) { 306 row = im[i] - rstart; 307 for (j=0; j<n; j++) { 308 /* If NumCol = 1 then a copy is not required */ 309 if ((roworiented) && (n == 1)) { 310 barray = (MatScalar*)v + i*bs2; 311 } else if ((!roworiented) && (m == 1)) { 312 barray = (MatScalar*)v + j*bs2; 313 } else { /* Here a copy is required */ 314 if (roworiented) { 315 value = v + (i*(stepval+bs) + j)*bs; 316 } else { 317 value = v + (j*(stepval+bs) + i)*bs; 318 } 319 for (ii=0; ii<bs; ii++,value+=bs+stepval) { 320 for (jj=0; jj<bs; jj++) { 321 barray[jj] = value[jj]; 322 } 323 barray += bs; 324 } 325 barray -= bs2; 326 } 327 328 if (in[j] >= cstart && in[j] < cend) { 329 col = in[j] - cstart; 330 ierr = MatSetValuesBlocked_SeqBAIJ(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 331 } 332 else if (in[j] < 0) continue; 333 #if defined(PETSC_USE_DEBUG) 334 else if (in[j] >= baij->Nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %D max %D",in[j],baij->Nbs-1); 335 #endif 336 else { 337 if (mat->was_assembled) { 338 if (!baij->colmap) { 339 ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 340 } 341 342 #if defined(PETSC_USE_DEBUG) 343 #if defined (PETSC_USE_CTABLE) 344 { PetscInt data; 345 ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr); 346 if ((data - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap"); 347 } 348 #else 349 if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap"); 350 #endif 351 #endif 352 #if defined (PETSC_USE_CTABLE) 353 ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr); 354 col = (col - 1)/bs; 355 #else 356 col = (baij->colmap[in[j]] - 1)/bs; 357 #endif 358 if (col < 0 && !((Mat_SeqBAIJ*)(baij->B->data))->nonew) { 359 ierr = MatDisAssemble_MPIBAIJ(mat);CHKERRQ(ierr); 360 col = in[j]; 361 } else if (col < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", bs*im[i], bs*in[j]); 362 } else col = in[j]; 363 ierr = MatSetValuesBlocked_SeqBAIJ(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 364 } 365 } 366 } else { 367 if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]); 368 if (!baij->donotstash) { 369 if (roworiented) { 370 ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 371 } else { 372 ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 373 } 374 } 375 } 376 } 377 PetscFunctionReturn(0); 378 } 379 380 #define HASH_KEY 0.6180339887 381 #define HASH(size,key,tmp) (tmp = (key)*HASH_KEY,(PetscInt)((size)*(tmp-(PetscInt)tmp))) 382 /* #define HASH(size,key) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */ 383 /* #define HASH(size,key,tmp) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */ 384 #undef __FUNCT__ 385 #define __FUNCT__ "MatSetValues_MPIBAIJ_HT" 386 PetscErrorCode MatSetValues_MPIBAIJ_HT(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 387 { 388 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 389 PetscBool roworiented = baij->roworiented; 390 PetscErrorCode ierr; 391 PetscInt i,j,row,col; 392 PetscInt rstart_orig=mat->rmap->rstart; 393 PetscInt rend_orig=mat->rmap->rend,Nbs=baij->Nbs; 394 PetscInt h1,key,size=baij->ht_size,bs=mat->rmap->bs,*HT=baij->ht,idx; 395 PetscReal tmp; 396 MatScalar **HD = baij->hd,value; 397 #if defined(PETSC_USE_DEBUG) 398 PetscInt total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct; 399 #endif 400 401 PetscFunctionBegin; 402 if (v) PetscValidScalarPointer(v,6); 403 for (i=0; i<m; i++) { 404 #if defined(PETSC_USE_DEBUG) 405 if (im[i] < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); 406 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 407 #endif 408 row = im[i]; 409 if (row >= rstart_orig && row < rend_orig) { 410 for (j=0; j<n; j++) { 411 col = in[j]; 412 if (roworiented) value = v[i*n+j]; 413 else value = v[i+j*m]; 414 /* Look up PetscInto the Hash Table */ 415 key = (row/bs)*Nbs+(col/bs)+1; 416 h1 = HASH(size,key,tmp); 417 418 419 idx = h1; 420 #if defined(PETSC_USE_DEBUG) 421 insert_ct++; 422 total_ct++; 423 if (HT[idx] != key) { 424 for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++); 425 if (idx == size) { 426 for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++); 427 if (idx == h1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col); 428 } 429 } 430 #else 431 if (HT[idx] != key) { 432 for (idx=h1; (idx<size) && (HT[idx]!=key); idx++); 433 if (idx == size) { 434 for (idx=0; (idx<h1) && (HT[idx]!=key); idx++); 435 if (idx == h1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col); 436 } 437 } 438 #endif 439 /* A HASH table entry is found, so insert the values at the correct address */ 440 if (addv == ADD_VALUES) *(HD[idx]+ (col % bs)*bs + (row % bs)) += value; 441 else *(HD[idx]+ (col % bs)*bs + (row % bs)) = value; 442 } 443 } else { 444 if (!baij->donotstash) { 445 if (roworiented) { 446 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,PETSC_FALSE);CHKERRQ(ierr); 447 } else { 448 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,PETSC_FALSE);CHKERRQ(ierr); 449 } 450 } 451 } 452 } 453 #if defined(PETSC_USE_DEBUG) 454 baij->ht_total_ct = total_ct; 455 baij->ht_insert_ct = insert_ct; 456 #endif 457 PetscFunctionReturn(0); 458 } 459 460 #undef __FUNCT__ 461 #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ_HT" 462 PetscErrorCode MatSetValuesBlocked_MPIBAIJ_HT(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 463 { 464 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 465 PetscBool roworiented = baij->roworiented; 466 PetscErrorCode ierr; 467 PetscInt i,j,ii,jj,row,col; 468 PetscInt rstart=baij->rstartbs; 469 PetscInt rend=mat->rmap->rend,stepval,bs=mat->rmap->bs,bs2=baij->bs2,nbs2=n*bs2; 470 PetscInt h1,key,size=baij->ht_size,idx,*HT=baij->ht,Nbs=baij->Nbs; 471 PetscReal tmp; 472 MatScalar **HD = baij->hd,*baij_a; 473 const PetscScalar *v_t,*value; 474 #if defined(PETSC_USE_DEBUG) 475 PetscInt total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct; 476 #endif 477 478 PetscFunctionBegin; 479 if (roworiented) { 480 stepval = (n-1)*bs; 481 } else { 482 stepval = (m-1)*bs; 483 } 484 for (i=0; i<m; i++) { 485 #if defined(PETSC_USE_DEBUG) 486 if (im[i] < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",im[i]); 487 if (im[i] >= baij->Mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],baij->Mbs-1); 488 #endif 489 row = im[i]; 490 v_t = v + i*nbs2; 491 if (row >= rstart && row < rend) { 492 for (j=0; j<n; j++) { 493 col = in[j]; 494 495 /* Look up into the Hash Table */ 496 key = row*Nbs+col+1; 497 h1 = HASH(size,key,tmp); 498 499 idx = h1; 500 #if defined(PETSC_USE_DEBUG) 501 total_ct++; 502 insert_ct++; 503 if (HT[idx] != key) { 504 for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++); 505 if (idx == size) { 506 for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++); 507 if (idx == h1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col); 508 } 509 } 510 #else 511 if (HT[idx] != key) { 512 for (idx=h1; (idx<size) && (HT[idx]!=key); idx++); 513 if (idx == size) { 514 for (idx=0; (idx<h1) && (HT[idx]!=key); idx++); 515 if (idx == h1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col); 516 } 517 } 518 #endif 519 baij_a = HD[idx]; 520 if (roworiented) { 521 /*value = v + i*(stepval+bs)*bs + j*bs;*/ 522 /* value = v + (i*(stepval+bs)+j)*bs; */ 523 value = v_t; 524 v_t += bs; 525 if (addv == ADD_VALUES) { 526 for (ii=0; ii<bs; ii++,value+=stepval) { 527 for (jj=ii; jj<bs2; jj+=bs) { 528 baij_a[jj] += *value++; 529 } 530 } 531 } else { 532 for (ii=0; ii<bs; ii++,value+=stepval) { 533 for (jj=ii; jj<bs2; jj+=bs) { 534 baij_a[jj] = *value++; 535 } 536 } 537 } 538 } else { 539 value = v + j*(stepval+bs)*bs + i*bs; 540 if (addv == ADD_VALUES) { 541 for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) { 542 for (jj=0; jj<bs; jj++) { 543 baij_a[jj] += *value++; 544 } 545 } 546 } else { 547 for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) { 548 for (jj=0; jj<bs; jj++) { 549 baij_a[jj] = *value++; 550 } 551 } 552 } 553 } 554 } 555 } else { 556 if (!baij->donotstash) { 557 if (roworiented) { 558 ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 559 } else { 560 ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 561 } 562 } 563 } 564 } 565 #if defined(PETSC_USE_DEBUG) 566 baij->ht_total_ct = total_ct; 567 baij->ht_insert_ct = insert_ct; 568 #endif 569 PetscFunctionReturn(0); 570 } 571 572 #undef __FUNCT__ 573 #define __FUNCT__ "MatGetValues_MPIBAIJ" 574 PetscErrorCode MatGetValues_MPIBAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 575 { 576 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 577 PetscErrorCode ierr; 578 PetscInt bs=mat->rmap->bs,i,j,bsrstart = mat->rmap->rstart,bsrend = mat->rmap->rend; 579 PetscInt bscstart = mat->cmap->rstart,bscend = mat->cmap->rend,row,col,data; 580 581 PetscFunctionBegin; 582 for (i=0; i<m; i++) { 583 if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/ 584 if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1); 585 if (idxm[i] >= bsrstart && idxm[i] < bsrend) { 586 row = idxm[i] - bsrstart; 587 for (j=0; j<n; j++) { 588 if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */ 589 if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1); 590 if (idxn[j] >= bscstart && idxn[j] < bscend) { 591 col = idxn[j] - bscstart; 592 ierr = MatGetValues_SeqBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 593 } else { 594 if (!baij->colmap) { 595 ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 596 } 597 #if defined (PETSC_USE_CTABLE) 598 ierr = PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr); 599 data --; 600 #else 601 data = baij->colmap[idxn[j]/bs]-1; 602 #endif 603 if ((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0; 604 else { 605 col = data + idxn[j]%bs; 606 ierr = MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 607 } 608 } 609 } 610 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported"); 611 } 612 PetscFunctionReturn(0); 613 } 614 615 #undef __FUNCT__ 616 #define __FUNCT__ "MatNorm_MPIBAIJ" 617 PetscErrorCode MatNorm_MPIBAIJ(Mat mat,NormType type,PetscReal *nrm) 618 { 619 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 620 Mat_SeqBAIJ *amat = (Mat_SeqBAIJ*)baij->A->data,*bmat = (Mat_SeqBAIJ*)baij->B->data; 621 PetscErrorCode ierr; 622 PetscInt i,j,bs2=baij->bs2,bs=baij->A->rmap->bs,nz,row,col; 623 PetscReal sum = 0.0; 624 MatScalar *v; 625 626 PetscFunctionBegin; 627 if (baij->size == 1) { 628 ierr = MatNorm(baij->A,type,nrm);CHKERRQ(ierr); 629 } else { 630 if (type == NORM_FROBENIUS) { 631 v = amat->a; 632 nz = amat->nz*bs2; 633 for (i=0; i<nz; i++) { 634 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 635 } 636 v = bmat->a; 637 nz = bmat->nz*bs2; 638 for (i=0; i<nz; i++) { 639 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 640 } 641 ierr = MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPIU_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr); 642 *nrm = PetscSqrtReal(*nrm); 643 } else if (type == NORM_1) { /* max column sum */ 644 PetscReal *tmp,*tmp2; 645 PetscInt *jj,*garray=baij->garray,cstart=baij->rstartbs; 646 ierr = PetscMalloc2(mat->cmap->N,PetscReal,&tmp,mat->cmap->N,PetscReal,&tmp2);CHKERRQ(ierr); 647 ierr = PetscMemzero(tmp,mat->cmap->N*sizeof(PetscReal));CHKERRQ(ierr); 648 v = amat->a; jj = amat->j; 649 for (i=0; i<amat->nz; i++) { 650 for (j=0; j<bs; j++) { 651 col = bs*(cstart + *jj) + j; /* column index */ 652 for (row=0; row<bs; row++) { 653 tmp[col] += PetscAbsScalar(*v); v++; 654 } 655 } 656 jj++; 657 } 658 v = bmat->a; jj = bmat->j; 659 for (i=0; i<bmat->nz; i++) { 660 for (j=0; j<bs; j++) { 661 col = bs*garray[*jj] + j; 662 for (row=0; row<bs; row++) { 663 tmp[col] += PetscAbsScalar(*v); v++; 664 } 665 } 666 jj++; 667 } 668 ierr = MPI_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr); 669 *nrm = 0.0; 670 for (j=0; j<mat->cmap->N; j++) { 671 if (tmp2[j] > *nrm) *nrm = tmp2[j]; 672 } 673 ierr = PetscFree2(tmp,tmp2);CHKERRQ(ierr); 674 } else if (type == NORM_INFINITY) { /* max row sum */ 675 PetscReal *sums; 676 ierr = PetscMalloc(bs*sizeof(PetscReal),&sums);CHKERRQ(ierr); 677 sum = 0.0; 678 for (j=0; j<amat->mbs; j++) { 679 for (row=0; row<bs; row++) sums[row] = 0.0; 680 v = amat->a + bs2*amat->i[j]; 681 nz = amat->i[j+1]-amat->i[j]; 682 for (i=0; i<nz; i++) { 683 for (col=0; col<bs; col++) { 684 for (row=0; row<bs; row++) { 685 sums[row] += PetscAbsScalar(*v); v++; 686 } 687 } 688 } 689 v = bmat->a + bs2*bmat->i[j]; 690 nz = bmat->i[j+1]-bmat->i[j]; 691 for (i=0; i<nz; i++) { 692 for (col=0; col<bs; col++) { 693 for (row=0; row<bs; row++) { 694 sums[row] += PetscAbsScalar(*v); v++; 695 } 696 } 697 } 698 for (row=0; row<bs; row++) { 699 if (sums[row] > sum) sum = sums[row]; 700 } 701 } 702 ierr = MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPIU_MAX,((PetscObject)mat)->comm);CHKERRQ(ierr); 703 ierr = PetscFree(sums);CHKERRQ(ierr); 704 } else SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_SUP,"No support for this norm yet"); 705 } 706 PetscFunctionReturn(0); 707 } 708 709 /* 710 Creates the hash table, and sets the table 711 This table is created only once. 712 If new entried need to be added to the matrix 713 then the hash table has to be destroyed and 714 recreated. 715 */ 716 #undef __FUNCT__ 717 #define __FUNCT__ "MatCreateHashTable_MPIBAIJ_Private" 718 PetscErrorCode MatCreateHashTable_MPIBAIJ_Private(Mat mat,PetscReal factor) 719 { 720 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 721 Mat A = baij->A,B=baij->B; 722 Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data,*b=(Mat_SeqBAIJ *)B->data; 723 PetscInt i,j,k,nz=a->nz+b->nz,h1,*ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j; 724 PetscErrorCode ierr; 725 PetscInt ht_size,bs2=baij->bs2,rstart=baij->rstartbs; 726 PetscInt cstart=baij->cstartbs,*garray=baij->garray,row,col,Nbs=baij->Nbs; 727 PetscInt *HT,key; 728 MatScalar **HD; 729 PetscReal tmp; 730 #if defined(PETSC_USE_INFO) 731 PetscInt ct=0,max=0; 732 #endif 733 734 PetscFunctionBegin; 735 if (baij->ht) PetscFunctionReturn(0); 736 737 baij->ht_size = (PetscInt)(factor*nz); 738 ht_size = baij->ht_size; 739 740 /* Allocate Memory for Hash Table */ 741 ierr = PetscMalloc2(ht_size,MatScalar*,&baij->hd,ht_size,PetscInt,&baij->ht);CHKERRQ(ierr); 742 ierr = PetscMemzero(baij->hd,ht_size*sizeof(MatScalar*));CHKERRQ(ierr); 743 ierr = PetscMemzero(baij->ht,ht_size*sizeof(PetscInt));CHKERRQ(ierr); 744 HD = baij->hd; 745 HT = baij->ht; 746 747 /* Loop Over A */ 748 for (i=0; i<a->mbs; i++) { 749 for (j=ai[i]; j<ai[i+1]; j++) { 750 row = i+rstart; 751 col = aj[j]+cstart; 752 753 key = row*Nbs + col + 1; 754 h1 = HASH(ht_size,key,tmp); 755 for (k=0; k<ht_size; k++) { 756 if (!HT[(h1+k)%ht_size]) { 757 HT[(h1+k)%ht_size] = key; 758 HD[(h1+k)%ht_size] = a->a + j*bs2; 759 break; 760 #if defined(PETSC_USE_INFO) 761 } else { 762 ct++; 763 #endif 764 } 765 } 766 #if defined(PETSC_USE_INFO) 767 if (k> max) max = k; 768 #endif 769 } 770 } 771 /* Loop Over B */ 772 for (i=0; i<b->mbs; i++) { 773 for (j=bi[i]; j<bi[i+1]; j++) { 774 row = i+rstart; 775 col = garray[bj[j]]; 776 key = row*Nbs + col + 1; 777 h1 = HASH(ht_size,key,tmp); 778 for (k=0; k<ht_size; k++) { 779 if (!HT[(h1+k)%ht_size]) { 780 HT[(h1+k)%ht_size] = key; 781 HD[(h1+k)%ht_size] = b->a + j*bs2; 782 break; 783 #if defined(PETSC_USE_INFO) 784 } else { 785 ct++; 786 #endif 787 } 788 } 789 #if defined(PETSC_USE_INFO) 790 if (k> max) max = k; 791 #endif 792 } 793 } 794 795 /* Print Summary */ 796 #if defined(PETSC_USE_INFO) 797 for (i=0,j=0; i<ht_size; i++) { 798 if (HT[i]) {j++;} 799 } 800 ierr = PetscInfo2(mat,"Average Search = %5.2f,max search = %D\n",(!j)? 0.0:((PetscReal)(ct+j))/j,max);CHKERRQ(ierr); 801 #endif 802 PetscFunctionReturn(0); 803 } 804 805 #undef __FUNCT__ 806 #define __FUNCT__ "MatAssemblyBegin_MPIBAIJ" 807 PetscErrorCode MatAssemblyBegin_MPIBAIJ(Mat mat,MatAssemblyType mode) 808 { 809 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 810 PetscErrorCode ierr; 811 PetscInt nstash,reallocs; 812 InsertMode addv; 813 814 PetscFunctionBegin; 815 if (baij->donotstash || mat->nooffprocentries) { 816 PetscFunctionReturn(0); 817 } 818 819 /* make sure all processors are either in INSERTMODE or ADDMODE */ 820 ierr = MPI_Allreduce((PetscEnum*)&mat->insertmode,(PetscEnum*)&addv,1,MPIU_ENUM,MPI_BOR,((PetscObject)mat)->comm);CHKERRQ(ierr); 821 if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added"); 822 mat->insertmode = addv; /* in case this processor had no cache */ 823 824 ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr); 825 ierr = MatStashScatterBegin_Private(mat,&mat->bstash,baij->rangebs);CHKERRQ(ierr); 826 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 827 ierr = PetscInfo2(mat,"Stash has %D entries,uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 828 ierr = MatStashGetInfo_Private(&mat->bstash,&nstash,&reallocs);CHKERRQ(ierr); 829 ierr = PetscInfo2(mat,"Block-Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 830 PetscFunctionReturn(0); 831 } 832 833 #undef __FUNCT__ 834 #define __FUNCT__ "MatAssemblyEnd_MPIBAIJ" 835 PetscErrorCode MatAssemblyEnd_MPIBAIJ(Mat mat,MatAssemblyType mode) 836 { 837 Mat_MPIBAIJ *baij=(Mat_MPIBAIJ*)mat->data; 838 Mat_SeqBAIJ *a=(Mat_SeqBAIJ*)baij->A->data; 839 PetscErrorCode ierr; 840 PetscInt i,j,rstart,ncols,flg,bs2=baij->bs2; 841 PetscInt *row,*col; 842 PetscBool r1,r2,r3,other_disassembled; 843 MatScalar *val; 844 InsertMode addv = mat->insertmode; 845 PetscMPIInt n; 846 847 /* do not use 'b=(Mat_SeqBAIJ*)baij->B->data' as B can be reset in disassembly */ 848 PetscFunctionBegin; 849 if (!baij->donotstash && !mat->nooffprocentries) { 850 while (1) { 851 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 852 if (!flg) break; 853 854 for (i=0; i<n;) { 855 /* Now identify the consecutive vals belonging to the same row */ 856 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 857 if (j < n) ncols = j-i; 858 else ncols = n-i; 859 /* Now assemble all these values with a single function call */ 860 ierr = MatSetValues_MPIBAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 861 i = j; 862 } 863 } 864 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 865 /* Now process the block-stash. Since the values are stashed column-oriented, 866 set the roworiented flag to column oriented, and after MatSetValues() 867 restore the original flags */ 868 r1 = baij->roworiented; 869 r2 = a->roworiented; 870 r3 = ((Mat_SeqBAIJ*)baij->B->data)->roworiented; 871 baij->roworiented = PETSC_FALSE; 872 a->roworiented = PETSC_FALSE; 873 (((Mat_SeqBAIJ*)baij->B->data))->roworiented = PETSC_FALSE; /* b->roworiented */ 874 while (1) { 875 ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 876 if (!flg) break; 877 878 for (i=0; i<n;) { 879 /* Now identify the consecutive vals belonging to the same row */ 880 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 881 if (j < n) ncols = j-i; 882 else ncols = n-i; 883 ierr = MatSetValuesBlocked_MPIBAIJ(mat,1,row+i,ncols,col+i,val+i*bs2,addv);CHKERRQ(ierr); 884 i = j; 885 } 886 } 887 ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr); 888 baij->roworiented = r1; 889 a->roworiented = r2; 890 ((Mat_SeqBAIJ*)baij->B->data)->roworiented = r3; /* b->roworiented */ 891 } 892 893 ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr); 894 ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr); 895 896 /* determine if any processor has disassembled, if so we must 897 also disassemble ourselfs, in order that we may reassemble. */ 898 /* 899 if nonzero structure of submatrix B cannot change then we know that 900 no processor disassembled thus we can skip this stuff 901 */ 902 if (!((Mat_SeqBAIJ*)baij->B->data)->nonew) { 903 ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,((PetscObject)mat)->comm);CHKERRQ(ierr); 904 if (mat->was_assembled && !other_disassembled) { 905 ierr = MatDisAssemble_MPIBAIJ(mat);CHKERRQ(ierr); 906 } 907 } 908 909 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 910 ierr = MatSetUpMultiply_MPIBAIJ(mat);CHKERRQ(ierr); 911 } 912 ierr = MatSetOption(baij->B,MAT_CHECK_COMPRESSED_ROW,PETSC_FALSE);CHKERRQ(ierr); 913 ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr); 914 ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr); 915 916 #if defined(PETSC_USE_INFO) 917 if (baij->ht && mode== MAT_FINAL_ASSEMBLY) { 918 ierr = PetscInfo1(mat,"Average Hash Table Search in MatSetValues = %5.2f\n",((PetscReal)baij->ht_total_ct)/baij->ht_insert_ct);CHKERRQ(ierr); 919 baij->ht_total_ct = 0; 920 baij->ht_insert_ct = 0; 921 } 922 #endif 923 if (baij->ht_flag && !baij->ht && mode == MAT_FINAL_ASSEMBLY) { 924 ierr = MatCreateHashTable_MPIBAIJ_Private(mat,baij->ht_fact);CHKERRQ(ierr); 925 mat->ops->setvalues = MatSetValues_MPIBAIJ_HT; 926 mat->ops->setvaluesblocked = MatSetValuesBlocked_MPIBAIJ_HT; 927 } 928 929 ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr); 930 baij->rowvalues = 0; 931 PetscFunctionReturn(0); 932 } 933 934 #undef __FUNCT__ 935 #define __FUNCT__ "MatView_MPIBAIJ_ASCIIorDraworSocket" 936 static PetscErrorCode MatView_MPIBAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 937 { 938 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 939 PetscErrorCode ierr; 940 PetscMPIInt size = baij->size,rank = baij->rank; 941 PetscInt bs = mat->rmap->bs; 942 PetscBool iascii,isdraw; 943 PetscViewer sviewer; 944 PetscViewerFormat format; 945 946 PetscFunctionBegin; 947 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 948 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 949 if (iascii) { 950 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 951 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 952 MatInfo info; 953 ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr); 954 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 955 ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_TRUE);CHKERRQ(ierr); 956 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D bs %D mem %D\n", 957 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,mat->rmap->bs,(PetscInt)info.memory);CHKERRQ(ierr); 958 ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 959 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 960 ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 961 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 962 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 963 ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_FALSE);CHKERRQ(ierr); 964 ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr); 965 ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr); 966 PetscFunctionReturn(0); 967 } else if (format == PETSC_VIEWER_ASCII_INFO) { 968 ierr = PetscViewerASCIIPrintf(viewer," block size is %D\n",bs);CHKERRQ(ierr); 969 PetscFunctionReturn(0); 970 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 971 PetscFunctionReturn(0); 972 } 973 } 974 975 if (isdraw) { 976 PetscDraw draw; 977 PetscBool isnull; 978 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 979 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 980 } 981 982 if (size == 1) { 983 ierr = PetscObjectSetName((PetscObject)baij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 984 ierr = MatView(baij->A,viewer);CHKERRQ(ierr); 985 } else { 986 /* assemble the entire matrix onto first processor. */ 987 Mat A; 988 Mat_SeqBAIJ *Aloc; 989 PetscInt M = mat->rmap->N,N = mat->cmap->N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs; 990 MatScalar *a; 991 992 /* Here we are creating a temporary matrix, so will assume MPIBAIJ is acceptable */ 993 /* Perhaps this should be the type of mat? */ 994 ierr = MatCreate(((PetscObject)mat)->comm,&A);CHKERRQ(ierr); 995 if (!rank) { 996 ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr); 997 } else { 998 ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr); 999 } 1000 ierr = MatSetType(A,MATMPIBAIJ);CHKERRQ(ierr); 1001 ierr = MatMPIBAIJSetPreallocation(A,mat->rmap->bs,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr); 1002 ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 1003 ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr); 1004 1005 /* copy over the A part */ 1006 Aloc = (Mat_SeqBAIJ*)baij->A->data; 1007 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1008 ierr = PetscMalloc(bs*sizeof(PetscInt),&rvals);CHKERRQ(ierr); 1009 1010 for (i=0; i<mbs; i++) { 1011 rvals[0] = bs*(baij->rstartbs + i); 1012 for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; } 1013 for (j=ai[i]; j<ai[i+1]; j++) { 1014 col = (baij->cstartbs+aj[j])*bs; 1015 for (k=0; k<bs; k++) { 1016 ierr = MatSetValues_MPIBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 1017 col++; a += bs; 1018 } 1019 } 1020 } 1021 /* copy over the B part */ 1022 Aloc = (Mat_SeqBAIJ*)baij->B->data; 1023 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1024 for (i=0; i<mbs; i++) { 1025 rvals[0] = bs*(baij->rstartbs + i); 1026 for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; } 1027 for (j=ai[i]; j<ai[i+1]; j++) { 1028 col = baij->garray[aj[j]]*bs; 1029 for (k=0; k<bs; k++) { 1030 ierr = MatSetValues_MPIBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 1031 col++; a += bs; 1032 } 1033 } 1034 } 1035 ierr = PetscFree(rvals);CHKERRQ(ierr); 1036 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1037 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1038 /* 1039 Everyone has to call to draw the matrix since the graphics waits are 1040 synchronized across all processors that share the PetscDraw object 1041 */ 1042 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 1043 if (!rank) { 1044 ierr = PetscObjectSetName((PetscObject)((Mat_MPIBAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1045 /* Set the type name to MATMPIBAIJ so that the correct type can be printed out by PetscObjectPrintClassNamePrefixType() in MatView_SeqBAIJ_ASCII()*/ 1046 PetscStrcpy(((PetscObject)((Mat_MPIBAIJ*)(A->data))->A)->type_name,MATMPIBAIJ); 1047 ierr = MatView(((Mat_MPIBAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 1048 } 1049 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 1050 ierr = MatDestroy(&A);CHKERRQ(ierr); 1051 } 1052 PetscFunctionReturn(0); 1053 } 1054 1055 #undef __FUNCT__ 1056 #define __FUNCT__ "MatView_MPIBAIJ_Binary" 1057 static PetscErrorCode MatView_MPIBAIJ_Binary(Mat mat,PetscViewer viewer) 1058 { 1059 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)mat->data; 1060 Mat_SeqBAIJ* A = (Mat_SeqBAIJ*)a->A->data; 1061 Mat_SeqBAIJ* B = (Mat_SeqBAIJ*)a->B->data; 1062 PetscErrorCode ierr; 1063 PetscInt i,*row_lens,*crow_lens,bs = mat->rmap->bs,j,k,bs2=a->bs2,header[4],nz,rlen; 1064 PetscInt *range=0,nzmax,*column_indices,cnt,col,*garray = a->garray,cstart = mat->cmap->rstart/bs,len,pcnt,l,ll; 1065 int fd; 1066 PetscScalar *column_values; 1067 FILE *file; 1068 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 1069 PetscInt message_count,flowcontrolcount; 1070 1071 PetscFunctionBegin; 1072 ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr); 1073 ierr = MPI_Comm_size(((PetscObject)mat)->comm,&size);CHKERRQ(ierr); 1074 nz = bs2*(A->nz + B->nz); 1075 rlen = mat->rmap->n; 1076 if (!rank) { 1077 header[0] = MAT_FILE_CLASSID; 1078 header[1] = mat->rmap->N; 1079 header[2] = mat->cmap->N; 1080 ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr); 1081 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1082 ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1083 /* get largest number of rows any processor has */ 1084 range = mat->rmap->range; 1085 for (i=1; i<size; i++) { 1086 rlen = PetscMax(rlen,range[i+1] - range[i]); 1087 } 1088 } else { 1089 ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr); 1090 } 1091 1092 ierr = PetscMalloc((rlen/bs)*sizeof(PetscInt),&crow_lens);CHKERRQ(ierr); 1093 /* compute lengths of each row */ 1094 for (i=0; i<a->mbs; i++) { 1095 crow_lens[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i]; 1096 } 1097 /* store the row lengths to the file */ 1098 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1099 if (!rank) { 1100 MPI_Status status; 1101 ierr = PetscMalloc(rlen*sizeof(PetscInt),&row_lens);CHKERRQ(ierr); 1102 rlen = (range[1] - range[0])/bs; 1103 for (i=0; i<rlen; i++) { 1104 for (j=0; j<bs; j++) { 1105 row_lens[i*bs+j] = bs*crow_lens[i]; 1106 } 1107 } 1108 ierr = PetscBinaryWrite(fd,row_lens,bs*rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1109 for (i=1; i<size; i++) { 1110 rlen = (range[i+1] - range[i])/bs; 1111 ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr); 1112 ierr = MPI_Recv(crow_lens,rlen,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 1113 for (k=0; k<rlen; k++) { 1114 for (j=0; j<bs; j++) { 1115 row_lens[k*bs+j] = bs*crow_lens[k]; 1116 } 1117 } 1118 ierr = PetscBinaryWrite(fd,row_lens,bs*rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1119 } 1120 ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr); 1121 ierr = PetscFree(row_lens);CHKERRQ(ierr); 1122 } else { 1123 ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr); 1124 ierr = MPI_Send(crow_lens,mat->rmap->n/bs,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1125 ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr); 1126 } 1127 ierr = PetscFree(crow_lens);CHKERRQ(ierr); 1128 1129 /* load up the local column indices. Include for all rows not just one for each block row since process 0 does not have the 1130 information needed to make it for each row from a block row. This does require more communication but still not more than 1131 the communication needed for the nonzero values */ 1132 nzmax = nz; /* space a largest processor needs */ 1133 ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,((PetscObject)mat)->comm);CHKERRQ(ierr); 1134 ierr = PetscMalloc(nzmax*sizeof(PetscInt),&column_indices);CHKERRQ(ierr); 1135 cnt = 0; 1136 for (i=0; i<a->mbs; i++) { 1137 pcnt = cnt; 1138 for (j=B->i[i]; j<B->i[i+1]; j++) { 1139 if ((col = garray[B->j[j]]) > cstart) break; 1140 for (l=0; l<bs; l++) { 1141 column_indices[cnt++] = bs*col+l; 1142 } 1143 } 1144 for (k=A->i[i]; k<A->i[i+1]; k++) { 1145 for (l=0; l<bs; l++) { 1146 column_indices[cnt++] = bs*(A->j[k] + cstart)+l; 1147 } 1148 } 1149 for (; j<B->i[i+1]; j++) { 1150 for (l=0; l<bs; l++) { 1151 column_indices[cnt++] = bs*garray[B->j[j]]+l; 1152 } 1153 } 1154 len = cnt - pcnt; 1155 for (k=1; k<bs; k++) { 1156 ierr = PetscMemcpy(&column_indices[cnt],&column_indices[pcnt],len*sizeof(PetscInt));CHKERRQ(ierr); 1157 cnt += len; 1158 } 1159 } 1160 if (cnt != nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,nz); 1161 1162 /* store the columns to the file */ 1163 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1164 if (!rank) { 1165 MPI_Status status; 1166 ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1167 for (i=1; i<size; i++) { 1168 ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr); 1169 ierr = MPI_Recv(&cnt,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 1170 ierr = MPI_Recv(column_indices,cnt,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 1171 ierr = PetscBinaryWrite(fd,column_indices,cnt,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1172 } 1173 ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr); 1174 } else { 1175 ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr); 1176 ierr = MPI_Send(&cnt,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1177 ierr = MPI_Send(column_indices,cnt,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1178 ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr); 1179 } 1180 ierr = PetscFree(column_indices);CHKERRQ(ierr); 1181 1182 /* load up the numerical values */ 1183 ierr = PetscMalloc(nzmax*sizeof(PetscScalar),&column_values);CHKERRQ(ierr); 1184 cnt = 0; 1185 for (i=0; i<a->mbs; i++) { 1186 rlen = bs*(B->i[i+1] - B->i[i] + A->i[i+1] - A->i[i]); 1187 for (j=B->i[i]; j<B->i[i+1]; j++) { 1188 if (garray[B->j[j]] > cstart) break; 1189 for (l=0; l<bs; l++) { 1190 for (ll=0; ll<bs; ll++) { 1191 column_values[cnt + l*rlen + ll] = B->a[bs2*j+l+bs*ll]; 1192 } 1193 } 1194 cnt += bs; 1195 } 1196 for (k=A->i[i]; k<A->i[i+1]; k++) { 1197 for (l=0; l<bs; l++) { 1198 for (ll=0; ll<bs; ll++) { 1199 column_values[cnt + l*rlen + ll] = A->a[bs2*k+l+bs*ll]; 1200 } 1201 } 1202 cnt += bs; 1203 } 1204 for (; j<B->i[i+1]; j++) { 1205 for (l=0; l<bs; l++) { 1206 for (ll=0; ll<bs; ll++) { 1207 column_values[cnt + l*rlen + ll] = B->a[bs2*j+l+bs*ll]; 1208 } 1209 } 1210 cnt += bs; 1211 } 1212 cnt += (bs-1)*rlen; 1213 } 1214 if (cnt != nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,nz); 1215 1216 /* store the column values to the file */ 1217 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1218 if (!rank) { 1219 MPI_Status status; 1220 ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1221 for (i=1; i<size; i++) { 1222 ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr); 1223 ierr = MPI_Recv(&cnt,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 1224 ierr = MPI_Recv(column_values,cnt,MPIU_SCALAR,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 1225 ierr = PetscBinaryWrite(fd,column_values,cnt,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1226 } 1227 ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr); 1228 } else { 1229 ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr); 1230 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1231 ierr = MPI_Send(column_values,nz,MPIU_SCALAR,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1232 ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr); 1233 } 1234 ierr = PetscFree(column_values);CHKERRQ(ierr); 1235 1236 ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr); 1237 if (file) { 1238 fprintf(file,"-matload_block_size %d\n",(int)mat->rmap->bs); 1239 } 1240 PetscFunctionReturn(0); 1241 } 1242 1243 #undef __FUNCT__ 1244 #define __FUNCT__ "MatView_MPIBAIJ" 1245 PetscErrorCode MatView_MPIBAIJ(Mat mat,PetscViewer viewer) 1246 { 1247 PetscErrorCode ierr; 1248 PetscBool iascii,isdraw,issocket,isbinary; 1249 1250 PetscFunctionBegin; 1251 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1252 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1253 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr); 1254 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1255 if (iascii || isdraw || issocket) { 1256 ierr = MatView_MPIBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 1257 } else if (isbinary) { 1258 ierr = MatView_MPIBAIJ_Binary(mat,viewer);CHKERRQ(ierr); 1259 } 1260 PetscFunctionReturn(0); 1261 } 1262 1263 #undef __FUNCT__ 1264 #define __FUNCT__ "MatDestroy_MPIBAIJ" 1265 PetscErrorCode MatDestroy_MPIBAIJ(Mat mat) 1266 { 1267 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 1268 PetscErrorCode ierr; 1269 1270 PetscFunctionBegin; 1271 #if defined(PETSC_USE_LOG) 1272 PetscLogObjectState((PetscObject)mat,"Rows=%D,Cols=%D",mat->rmap->N,mat->cmap->N); 1273 #endif 1274 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 1275 ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr); 1276 ierr = MatDestroy(&baij->A);CHKERRQ(ierr); 1277 ierr = MatDestroy(&baij->B);CHKERRQ(ierr); 1278 #if defined (PETSC_USE_CTABLE) 1279 ierr = PetscTableDestroy(&baij->colmap);CHKERRQ(ierr); 1280 #else 1281 ierr = PetscFree(baij->colmap);CHKERRQ(ierr); 1282 #endif 1283 ierr = PetscFree(baij->garray);CHKERRQ(ierr); 1284 ierr = VecDestroy(&baij->lvec);CHKERRQ(ierr); 1285 ierr = VecScatterDestroy(&baij->Mvctx);CHKERRQ(ierr); 1286 ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr); 1287 ierr = PetscFree(baij->barray);CHKERRQ(ierr); 1288 ierr = PetscFree2(baij->hd,baij->ht);CHKERRQ(ierr); 1289 ierr = PetscFree(baij->rangebs);CHKERRQ(ierr); 1290 ierr = PetscFree(mat->data);CHKERRQ(ierr); 1291 1292 ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr); 1293 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr); 1294 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr); 1295 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr); 1296 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIBAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr); 1297 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIBAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr); 1298 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C","",PETSC_NULL);CHKERRQ(ierr); 1299 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatSetHashTableFactor_C","",PETSC_NULL);CHKERRQ(ierr); 1300 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpibaij_mpisbaij_C","",PETSC_NULL);CHKERRQ(ierr); 1301 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpibaij_mpibstrm_C","",PETSC_NULL);CHKERRQ(ierr); 1302 PetscFunctionReturn(0); 1303 } 1304 1305 #undef __FUNCT__ 1306 #define __FUNCT__ "MatMult_MPIBAIJ" 1307 PetscErrorCode MatMult_MPIBAIJ(Mat A,Vec xx,Vec yy) 1308 { 1309 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1310 PetscErrorCode ierr; 1311 PetscInt nt; 1312 1313 PetscFunctionBegin; 1314 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 1315 if (nt != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx"); 1316 ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr); 1317 if (nt != A->rmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy"); 1318 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1319 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 1320 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1321 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 1322 PetscFunctionReturn(0); 1323 } 1324 1325 #undef __FUNCT__ 1326 #define __FUNCT__ "MatMultAdd_MPIBAIJ" 1327 PetscErrorCode MatMultAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1328 { 1329 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1330 PetscErrorCode ierr; 1331 1332 PetscFunctionBegin; 1333 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1334 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1335 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1336 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 1337 PetscFunctionReturn(0); 1338 } 1339 1340 #undef __FUNCT__ 1341 #define __FUNCT__ "MatMultTranspose_MPIBAIJ" 1342 PetscErrorCode MatMultTranspose_MPIBAIJ(Mat A,Vec xx,Vec yy) 1343 { 1344 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1345 PetscErrorCode ierr; 1346 PetscBool merged; 1347 1348 PetscFunctionBegin; 1349 ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr); 1350 /* do nondiagonal part */ 1351 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1352 if (!merged) { 1353 /* send it on its way */ 1354 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1355 /* do local part */ 1356 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1357 /* receive remote parts: note this assumes the values are not actually */ 1358 /* inserted in yy until the next line */ 1359 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1360 } else { 1361 /* do local part */ 1362 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1363 /* send it on its way */ 1364 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1365 /* values actually were received in the Begin() but we need to call this nop */ 1366 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1367 } 1368 PetscFunctionReturn(0); 1369 } 1370 1371 #undef __FUNCT__ 1372 #define __FUNCT__ "MatMultTransposeAdd_MPIBAIJ" 1373 PetscErrorCode MatMultTransposeAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1374 { 1375 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1376 PetscErrorCode ierr; 1377 1378 PetscFunctionBegin; 1379 /* do nondiagonal part */ 1380 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1381 /* send it on its way */ 1382 ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1383 /* do local part */ 1384 ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1385 /* receive remote parts: note this assumes the values are not actually */ 1386 /* inserted in yy until the next line, which is true for my implementation*/ 1387 /* but is not perhaps always true. */ 1388 ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1389 PetscFunctionReturn(0); 1390 } 1391 1392 /* 1393 This only works correctly for square matrices where the subblock A->A is the 1394 diagonal block 1395 */ 1396 #undef __FUNCT__ 1397 #define __FUNCT__ "MatGetDiagonal_MPIBAIJ" 1398 PetscErrorCode MatGetDiagonal_MPIBAIJ(Mat A,Vec v) 1399 { 1400 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1401 PetscErrorCode ierr; 1402 1403 PetscFunctionBegin; 1404 if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); 1405 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 1406 PetscFunctionReturn(0); 1407 } 1408 1409 #undef __FUNCT__ 1410 #define __FUNCT__ "MatScale_MPIBAIJ" 1411 PetscErrorCode MatScale_MPIBAIJ(Mat A,PetscScalar aa) 1412 { 1413 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1414 PetscErrorCode ierr; 1415 1416 PetscFunctionBegin; 1417 ierr = MatScale(a->A,aa);CHKERRQ(ierr); 1418 ierr = MatScale(a->B,aa);CHKERRQ(ierr); 1419 PetscFunctionReturn(0); 1420 } 1421 1422 #undef __FUNCT__ 1423 #define __FUNCT__ "MatGetRow_MPIBAIJ" 1424 PetscErrorCode MatGetRow_MPIBAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1425 { 1426 Mat_MPIBAIJ *mat = (Mat_MPIBAIJ*)matin->data; 1427 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1428 PetscErrorCode ierr; 1429 PetscInt bs = matin->rmap->bs,bs2 = mat->bs2,i,*cworkA,*cworkB,**pcA,**pcB; 1430 PetscInt nztot,nzA,nzB,lrow,brstart = matin->rmap->rstart,brend = matin->rmap->rend; 1431 PetscInt *cmap,*idx_p,cstart = mat->cstartbs; 1432 1433 PetscFunctionBegin; 1434 if (row < brstart || row >= brend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local rows"); 1435 if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1436 mat->getrowactive = PETSC_TRUE; 1437 1438 if (!mat->rowvalues && (idx || v)) { 1439 /* 1440 allocate enough space to hold information from the longest row. 1441 */ 1442 Mat_SeqBAIJ *Aa = (Mat_SeqBAIJ*)mat->A->data,*Ba = (Mat_SeqBAIJ*)mat->B->data; 1443 PetscInt max = 1,mbs = mat->mbs,tmp; 1444 for (i=0; i<mbs; i++) { 1445 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1446 if (max < tmp) { max = tmp; } 1447 } 1448 ierr = PetscMalloc2(max*bs2,PetscScalar,&mat->rowvalues,max*bs2,PetscInt,&mat->rowindices);CHKERRQ(ierr); 1449 } 1450 lrow = row - brstart; 1451 1452 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1453 if (!v) {pvA = 0; pvB = 0;} 1454 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1455 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1456 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1457 nztot = nzA + nzB; 1458 1459 cmap = mat->garray; 1460 if (v || idx) { 1461 if (nztot) { 1462 /* Sort by increasing column numbers, assuming A and B already sorted */ 1463 PetscInt imark = -1; 1464 if (v) { 1465 *v = v_p = mat->rowvalues; 1466 for (i=0; i<nzB; i++) { 1467 if (cmap[cworkB[i]/bs] < cstart) v_p[i] = vworkB[i]; 1468 else break; 1469 } 1470 imark = i; 1471 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1472 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1473 } 1474 if (idx) { 1475 *idx = idx_p = mat->rowindices; 1476 if (imark > -1) { 1477 for (i=0; i<imark; i++) { 1478 idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs; 1479 } 1480 } else { 1481 for (i=0; i<nzB; i++) { 1482 if (cmap[cworkB[i]/bs] < cstart) 1483 idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ; 1484 else break; 1485 } 1486 imark = i; 1487 } 1488 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart*bs + cworkA[i]; 1489 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ; 1490 } 1491 } else { 1492 if (idx) *idx = 0; 1493 if (v) *v = 0; 1494 } 1495 } 1496 *nz = nztot; 1497 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1498 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1499 PetscFunctionReturn(0); 1500 } 1501 1502 #undef __FUNCT__ 1503 #define __FUNCT__ "MatRestoreRow_MPIBAIJ" 1504 PetscErrorCode MatRestoreRow_MPIBAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1505 { 1506 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 1507 1508 PetscFunctionBegin; 1509 if (!baij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow not called"); 1510 baij->getrowactive = PETSC_FALSE; 1511 PetscFunctionReturn(0); 1512 } 1513 1514 #undef __FUNCT__ 1515 #define __FUNCT__ "MatZeroEntries_MPIBAIJ" 1516 PetscErrorCode MatZeroEntries_MPIBAIJ(Mat A) 1517 { 1518 Mat_MPIBAIJ *l = (Mat_MPIBAIJ*)A->data; 1519 PetscErrorCode ierr; 1520 1521 PetscFunctionBegin; 1522 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 1523 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 1524 PetscFunctionReturn(0); 1525 } 1526 1527 #undef __FUNCT__ 1528 #define __FUNCT__ "MatGetInfo_MPIBAIJ" 1529 PetscErrorCode MatGetInfo_MPIBAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1530 { 1531 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)matin->data; 1532 Mat A = a->A,B = a->B; 1533 PetscErrorCode ierr; 1534 PetscReal isend[5],irecv[5]; 1535 1536 PetscFunctionBegin; 1537 info->block_size = (PetscReal)matin->rmap->bs; 1538 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1539 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1540 isend[3] = info->memory; isend[4] = info->mallocs; 1541 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1542 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1543 isend[3] += info->memory; isend[4] += info->mallocs; 1544 if (flag == MAT_LOCAL) { 1545 info->nz_used = isend[0]; 1546 info->nz_allocated = isend[1]; 1547 info->nz_unneeded = isend[2]; 1548 info->memory = isend[3]; 1549 info->mallocs = isend[4]; 1550 } else if (flag == MAT_GLOBAL_MAX) { 1551 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,((PetscObject)matin)->comm);CHKERRQ(ierr); 1552 info->nz_used = irecv[0]; 1553 info->nz_allocated = irecv[1]; 1554 info->nz_unneeded = irecv[2]; 1555 info->memory = irecv[3]; 1556 info->mallocs = irecv[4]; 1557 } else if (flag == MAT_GLOBAL_SUM) { 1558 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,((PetscObject)matin)->comm);CHKERRQ(ierr); 1559 info->nz_used = irecv[0]; 1560 info->nz_allocated = irecv[1]; 1561 info->nz_unneeded = irecv[2]; 1562 info->memory = irecv[3]; 1563 info->mallocs = irecv[4]; 1564 } else SETERRQ1(((PetscObject)matin)->comm,PETSC_ERR_ARG_WRONG,"Unknown MatInfoType argument %d",(int)flag); 1565 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1566 info->fill_ratio_needed = 0; 1567 info->factor_mallocs = 0; 1568 PetscFunctionReturn(0); 1569 } 1570 1571 #undef __FUNCT__ 1572 #define __FUNCT__ "MatSetOption_MPIBAIJ" 1573 PetscErrorCode MatSetOption_MPIBAIJ(Mat A,MatOption op,PetscBool flg) 1574 { 1575 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1576 PetscErrorCode ierr; 1577 1578 PetscFunctionBegin; 1579 switch (op) { 1580 case MAT_NEW_NONZERO_LOCATIONS: 1581 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1582 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1583 case MAT_KEEP_NONZERO_PATTERN: 1584 case MAT_NEW_NONZERO_LOCATION_ERR: 1585 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1586 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1587 break; 1588 case MAT_ROW_ORIENTED: 1589 a->roworiented = flg; 1590 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1591 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1592 break; 1593 case MAT_NEW_DIAGONALS: 1594 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1595 break; 1596 case MAT_IGNORE_OFF_PROC_ENTRIES: 1597 a->donotstash = flg; 1598 break; 1599 case MAT_USE_HASH_TABLE: 1600 a->ht_flag = flg; 1601 break; 1602 case MAT_SYMMETRIC: 1603 case MAT_STRUCTURALLY_SYMMETRIC: 1604 case MAT_HERMITIAN: 1605 case MAT_SYMMETRY_ETERNAL: 1606 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1607 break; 1608 default: 1609 SETERRQ1(((PetscObject)A)->comm,PETSC_ERR_SUP,"unknown option %d",op); 1610 } 1611 PetscFunctionReturn(0); 1612 } 1613 1614 #undef __FUNCT__ 1615 #define __FUNCT__ "MatTranspose_MPIBAIJ" 1616 PetscErrorCode MatTranspose_MPIBAIJ(Mat A,MatReuse reuse,Mat *matout) 1617 { 1618 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)A->data; 1619 Mat_SeqBAIJ *Aloc; 1620 Mat B; 1621 PetscErrorCode ierr; 1622 PetscInt M=A->rmap->N,N=A->cmap->N,*ai,*aj,i,*rvals,j,k,col; 1623 PetscInt bs=A->rmap->bs,mbs=baij->mbs; 1624 MatScalar *a; 1625 1626 PetscFunctionBegin; 1627 if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_ARG_SIZ,"Square matrix only for in-place"); 1628 if (reuse == MAT_INITIAL_MATRIX || *matout == A) { 1629 ierr = MatCreate(((PetscObject)A)->comm,&B);CHKERRQ(ierr); 1630 ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr); 1631 ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr); 1632 /* Do not know preallocation information, but must set block size */ 1633 ierr = MatMPIBAIJSetPreallocation(B,A->rmap->bs,PETSC_DECIDE,PETSC_NULL,PETSC_DECIDE,PETSC_NULL);CHKERRQ(ierr); 1634 } else { 1635 B = *matout; 1636 } 1637 1638 /* copy over the A part */ 1639 Aloc = (Mat_SeqBAIJ*)baij->A->data; 1640 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1641 ierr = PetscMalloc(bs*sizeof(PetscInt),&rvals);CHKERRQ(ierr); 1642 1643 for (i=0; i<mbs; i++) { 1644 rvals[0] = bs*(baij->rstartbs + i); 1645 for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; } 1646 for (j=ai[i]; j<ai[i+1]; j++) { 1647 col = (baij->cstartbs+aj[j])*bs; 1648 for (k=0; k<bs; k++) { 1649 ierr = MatSetValues_MPIBAIJ(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr); 1650 col++; a += bs; 1651 } 1652 } 1653 } 1654 /* copy over the B part */ 1655 Aloc = (Mat_SeqBAIJ*)baij->B->data; 1656 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1657 for (i=0; i<mbs; i++) { 1658 rvals[0] = bs*(baij->rstartbs + i); 1659 for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; } 1660 for (j=ai[i]; j<ai[i+1]; j++) { 1661 col = baij->garray[aj[j]]*bs; 1662 for (k=0; k<bs; k++) { 1663 ierr = MatSetValues_MPIBAIJ(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr); 1664 col++; a += bs; 1665 } 1666 } 1667 } 1668 ierr = PetscFree(rvals);CHKERRQ(ierr); 1669 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1670 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1671 1672 if (reuse == MAT_INITIAL_MATRIX || *matout != A) { 1673 *matout = B; 1674 } else { 1675 ierr = MatHeaderMerge(A,B);CHKERRQ(ierr); 1676 } 1677 PetscFunctionReturn(0); 1678 } 1679 1680 #undef __FUNCT__ 1681 #define __FUNCT__ "MatDiagonalScale_MPIBAIJ" 1682 PetscErrorCode MatDiagonalScale_MPIBAIJ(Mat mat,Vec ll,Vec rr) 1683 { 1684 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 1685 Mat a = baij->A,b = baij->B; 1686 PetscErrorCode ierr; 1687 PetscInt s1,s2,s3; 1688 1689 PetscFunctionBegin; 1690 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 1691 if (rr) { 1692 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 1693 if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 1694 /* Overlap communication with computation. */ 1695 ierr = VecScatterBegin(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1696 } 1697 if (ll) { 1698 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 1699 if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 1700 ierr = (*b->ops->diagonalscale)(b,ll,PETSC_NULL);CHKERRQ(ierr); 1701 } 1702 /* scale the diagonal block */ 1703 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 1704 1705 if (rr) { 1706 /* Do a scatter end and then right scale the off-diagonal block */ 1707 ierr = VecScatterEnd(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1708 ierr = (*b->ops->diagonalscale)(b,PETSC_NULL,baij->lvec);CHKERRQ(ierr); 1709 } 1710 1711 PetscFunctionReturn(0); 1712 } 1713 1714 #undef __FUNCT__ 1715 #define __FUNCT__ "MatZeroRows_MPIBAIJ" 1716 PetscErrorCode MatZeroRows_MPIBAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 1717 { 1718 Mat_MPIBAIJ *l = (Mat_MPIBAIJ*)A->data; 1719 PetscErrorCode ierr; 1720 PetscMPIInt imdex,size = l->size,n,rank = l->rank; 1721 PetscInt i,*owners = A->rmap->range; 1722 PetscInt *nprocs,j,idx,nsends,row; 1723 PetscInt nmax,*svalues,*starts,*owner,nrecvs; 1724 PetscInt *rvalues,tag = ((PetscObject)A)->tag,count,base,slen,*source,lastidx = -1; 1725 PetscInt *lens,*lrows,*values,rstart_bs=A->rmap->rstart; 1726 MPI_Comm comm = ((PetscObject)A)->comm; 1727 MPI_Request *send_waits,*recv_waits; 1728 MPI_Status recv_status,*send_status; 1729 const PetscScalar *xx; 1730 PetscScalar *bb; 1731 #if defined(PETSC_DEBUG) 1732 PetscBool found = PETSC_FALSE; 1733 #endif 1734 1735 PetscFunctionBegin; 1736 /* first count number of contributors to each processor */ 1737 ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr); 1738 ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr); 1739 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/ 1740 j = 0; 1741 for (i=0; i<N; i++) { 1742 if (lastidx > (idx = rows[i])) j = 0; 1743 lastidx = idx; 1744 for (; j<size; j++) { 1745 if (idx >= owners[j] && idx < owners[j+1]) { 1746 nprocs[2*j]++; 1747 nprocs[2*j+1] = 1; 1748 owner[i] = j; 1749 #if defined(PETSC_DEBUG) 1750 found = PETSC_TRUE; 1751 #endif 1752 break; 1753 } 1754 } 1755 #if defined(PETSC_DEBUG) 1756 if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range"); 1757 found = PETSC_FALSE; 1758 #endif 1759 } 1760 nsends = 0; for (i=0; i<size; i++) { nsends += nprocs[2*i+1];} 1761 1762 if (A->nooffproczerorows) { 1763 if (nsends > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"You called MatSetOption(,MAT_NO_OFF_PROC_ZERO_ROWS,PETSC_TRUE) but set an off process zero row"); 1764 nrecvs = nsends; 1765 nmax = N; 1766 } else { 1767 /* inform other processors of number of messages and max length*/ 1768 ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr); 1769 } 1770 1771 /* post receives: */ 1772 ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr); 1773 ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr); 1774 for (i=0; i<nrecvs; i++) { 1775 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 1776 } 1777 1778 /* do sends: 1779 1) starts[i] gives the starting index in svalues for stuff going to 1780 the ith processor 1781 */ 1782 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr); 1783 ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr); 1784 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr); 1785 starts[0] = 0; 1786 for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 1787 for (i=0; i<N; i++) { 1788 svalues[starts[owner[i]]++] = rows[i]; 1789 } 1790 1791 starts[0] = 0; 1792 for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 1793 count = 0; 1794 for (i=0; i<size; i++) { 1795 if (nprocs[2*i+1]) { 1796 ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 1797 } 1798 } 1799 ierr = PetscFree(starts);CHKERRQ(ierr); 1800 1801 base = owners[rank]; 1802 1803 /* wait on receives */ 1804 ierr = PetscMalloc2(nrecvs+1,PetscInt,&lens,nrecvs+1,PetscInt,&source);CHKERRQ(ierr); 1805 count = nrecvs; 1806 slen = 0; 1807 while (count) { 1808 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 1809 /* unpack receives into our local space */ 1810 ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr); 1811 source[imdex] = recv_status.MPI_SOURCE; 1812 lens[imdex] = n; 1813 slen += n; 1814 count--; 1815 } 1816 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 1817 1818 /* move the data into the send scatter */ 1819 ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr); 1820 count = 0; 1821 for (i=0; i<nrecvs; i++) { 1822 values = rvalues + i*nmax; 1823 for (j=0; j<lens[i]; j++) { 1824 lrows[count++] = values[j] - base; 1825 } 1826 } 1827 ierr = PetscFree(rvalues);CHKERRQ(ierr); 1828 ierr = PetscFree2(lens,source);CHKERRQ(ierr); 1829 ierr = PetscFree(owner);CHKERRQ(ierr); 1830 ierr = PetscFree(nprocs);CHKERRQ(ierr); 1831 1832 /* fix right hand side if needed */ 1833 if (x && b) { 1834 ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr); 1835 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 1836 for (i=0; i<slen; i++) { 1837 bb[lrows[i]] = diag*xx[lrows[i]]; 1838 } 1839 ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr); 1840 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 1841 } 1842 1843 /* actually zap the local rows */ 1844 /* 1845 Zero the required rows. If the "diagonal block" of the matrix 1846 is square and the user wishes to set the diagonal we use separate 1847 code so that MatSetValues() is not called for each diagonal allocating 1848 new memory, thus calling lots of mallocs and slowing things down. 1849 1850 */ 1851 /* must zero l->B before l->A because the (diag) case below may put values into l->B*/ 1852 ierr = MatZeroRows_SeqBAIJ(l->B,slen,lrows,0.0,0,0);CHKERRQ(ierr); 1853 if ((diag != 0.0) && (l->A->rmap->N == l->A->cmap->N)) { 1854 ierr = MatZeroRows_SeqBAIJ(l->A,slen,lrows,diag,0,0);CHKERRQ(ierr); 1855 } else if (diag != 0.0) { 1856 ierr = MatZeroRows_SeqBAIJ(l->A,slen,lrows,0.0,0,0);CHKERRQ(ierr); 1857 if (((Mat_SeqBAIJ*)l->A->data)->nonew) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options \n\ 1858 MAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR"); 1859 for (i=0; i<slen; i++) { 1860 row = lrows[i] + rstart_bs; 1861 ierr = MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr); 1862 } 1863 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1864 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1865 } else { 1866 ierr = MatZeroRows_SeqBAIJ(l->A,slen,lrows,0.0,0,0);CHKERRQ(ierr); 1867 } 1868 1869 ierr = PetscFree(lrows);CHKERRQ(ierr); 1870 1871 /* wait on sends */ 1872 if (nsends) { 1873 ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr); 1874 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 1875 ierr = PetscFree(send_status);CHKERRQ(ierr); 1876 } 1877 ierr = PetscFree(send_waits);CHKERRQ(ierr); 1878 ierr = PetscFree(svalues);CHKERRQ(ierr); 1879 1880 PetscFunctionReturn(0); 1881 } 1882 1883 #undef __FUNCT__ 1884 #define __FUNCT__ "MatSetUnfactored_MPIBAIJ" 1885 PetscErrorCode MatSetUnfactored_MPIBAIJ(Mat A) 1886 { 1887 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1888 PetscErrorCode ierr; 1889 1890 PetscFunctionBegin; 1891 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 1892 PetscFunctionReturn(0); 1893 } 1894 1895 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat,MatDuplicateOption,Mat *); 1896 1897 #undef __FUNCT__ 1898 #define __FUNCT__ "MatEqual_MPIBAIJ" 1899 PetscErrorCode MatEqual_MPIBAIJ(Mat A,Mat B,PetscBool *flag) 1900 { 1901 Mat_MPIBAIJ *matB = (Mat_MPIBAIJ*)B->data,*matA = (Mat_MPIBAIJ*)A->data; 1902 Mat a,b,c,d; 1903 PetscBool flg; 1904 PetscErrorCode ierr; 1905 1906 PetscFunctionBegin; 1907 a = matA->A; b = matA->B; 1908 c = matB->A; d = matB->B; 1909 1910 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 1911 if (flg) { 1912 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 1913 } 1914 ierr = MPI_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,((PetscObject)A)->comm);CHKERRQ(ierr); 1915 PetscFunctionReturn(0); 1916 } 1917 1918 #undef __FUNCT__ 1919 #define __FUNCT__ "MatCopy_MPIBAIJ" 1920 PetscErrorCode MatCopy_MPIBAIJ(Mat A,Mat B,MatStructure str) 1921 { 1922 PetscErrorCode ierr; 1923 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 1924 Mat_MPIBAIJ *b = (Mat_MPIBAIJ *)B->data; 1925 1926 PetscFunctionBegin; 1927 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 1928 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 1929 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 1930 } else { 1931 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 1932 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 1933 } 1934 PetscFunctionReturn(0); 1935 } 1936 1937 #undef __FUNCT__ 1938 #define __FUNCT__ "MatSetUp_MPIBAIJ" 1939 PetscErrorCode MatSetUp_MPIBAIJ(Mat A) 1940 { 1941 PetscErrorCode ierr; 1942 1943 PetscFunctionBegin; 1944 ierr = MatMPIBAIJSetPreallocation(A,A->rmap->bs,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 1945 PetscFunctionReturn(0); 1946 } 1947 1948 #undef __FUNCT__ 1949 #define __FUNCT__ "MatAXPY_MPIBAIJ" 1950 PetscErrorCode MatAXPY_MPIBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 1951 { 1952 PetscErrorCode ierr; 1953 Mat_MPIBAIJ *xx=(Mat_MPIBAIJ *)X->data,*yy=(Mat_MPIBAIJ *)Y->data; 1954 PetscBLASInt bnz,one=1; 1955 Mat_SeqBAIJ *x,*y; 1956 1957 PetscFunctionBegin; 1958 if (str == SAME_NONZERO_PATTERN) { 1959 PetscScalar alpha = a; 1960 x = (Mat_SeqBAIJ *)xx->A->data; 1961 y = (Mat_SeqBAIJ *)yy->A->data; 1962 bnz = PetscBLASIntCast(x->nz); 1963 BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one); 1964 x = (Mat_SeqBAIJ *)xx->B->data; 1965 y = (Mat_SeqBAIJ *)yy->B->data; 1966 bnz = PetscBLASIntCast(x->nz); 1967 BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one); 1968 } else { 1969 ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr); 1970 } 1971 PetscFunctionReturn(0); 1972 } 1973 1974 #undef __FUNCT__ 1975 #define __FUNCT__ "MatRealPart_MPIBAIJ" 1976 PetscErrorCode MatRealPart_MPIBAIJ(Mat A) 1977 { 1978 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1979 PetscErrorCode ierr; 1980 1981 PetscFunctionBegin; 1982 ierr = MatRealPart(a->A);CHKERRQ(ierr); 1983 ierr = MatRealPart(a->B);CHKERRQ(ierr); 1984 PetscFunctionReturn(0); 1985 } 1986 1987 #undef __FUNCT__ 1988 #define __FUNCT__ "MatImaginaryPart_MPIBAIJ" 1989 PetscErrorCode MatImaginaryPart_MPIBAIJ(Mat A) 1990 { 1991 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1992 PetscErrorCode ierr; 1993 1994 PetscFunctionBegin; 1995 ierr = MatImaginaryPart(a->A);CHKERRQ(ierr); 1996 ierr = MatImaginaryPart(a->B);CHKERRQ(ierr); 1997 PetscFunctionReturn(0); 1998 } 1999 2000 #undef __FUNCT__ 2001 #define __FUNCT__ "MatGetSubMatrix_MPIBAIJ" 2002 PetscErrorCode MatGetSubMatrix_MPIBAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat) 2003 { 2004 PetscErrorCode ierr; 2005 IS iscol_local; 2006 PetscInt csize; 2007 2008 PetscFunctionBegin; 2009 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 2010 if (call == MAT_REUSE_MATRIX) { 2011 ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr); 2012 if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 2013 } else { 2014 ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr); 2015 } 2016 ierr = MatGetSubMatrix_MPIBAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr); 2017 if (call == MAT_INITIAL_MATRIX) { 2018 ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr); 2019 ierr = ISDestroy(&iscol_local);CHKERRQ(ierr); 2020 } 2021 PetscFunctionReturn(0); 2022 } 2023 extern PetscErrorCode MatGetSubMatrices_MPIBAIJ_local(Mat,PetscInt,const IS[],const IS[],MatReuse,PetscBool*,PetscBool*,Mat*); 2024 #undef __FUNCT__ 2025 #define __FUNCT__ "MatGetSubMatrix_MPIBAIJ_Private" 2026 /* 2027 Not great since it makes two copies of the submatrix, first an SeqBAIJ 2028 in local and then by concatenating the local matrices the end result. 2029 Writing it directly would be much like MatGetSubMatrices_MPIBAIJ() 2030 */ 2031 PetscErrorCode MatGetSubMatrix_MPIBAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat) 2032 { 2033 PetscErrorCode ierr; 2034 PetscMPIInt rank,size; 2035 PetscInt i,m,n,rstart,row,rend,nz,*cwork,j,bs; 2036 PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal,ncol,nrow; 2037 Mat M,Mreuse; 2038 MatScalar *vwork,*aa; 2039 MPI_Comm comm = ((PetscObject)mat)->comm; 2040 IS isrow_new, iscol_new; 2041 PetscBool idflag,allrows, allcols; 2042 Mat_SeqBAIJ *aij; 2043 2044 2045 PetscFunctionBegin; 2046 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2047 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2048 /* The compression and expansion should be avoided. Doesn't point 2049 out errors, might change the indices, hence buggey */ 2050 ierr = ISCompressIndicesGeneral(mat->rmap->N,mat->rmap->n,mat->rmap->bs,1,&isrow,&isrow_new);CHKERRQ(ierr); 2051 ierr = ISCompressIndicesGeneral(mat->cmap->N,mat->cmap->n,mat->cmap->bs,1,&iscol,&iscol_new);CHKERRQ(ierr); 2052 2053 /* Check for special case: each processor gets entire matrix columns */ 2054 ierr = ISIdentity(iscol,&idflag);CHKERRQ(ierr); 2055 ierr = ISGetLocalSize(iscol,&ncol);CHKERRQ(ierr); 2056 if (idflag && ncol == mat->cmap->N) { 2057 allcols = PETSC_TRUE; 2058 } else { 2059 allcols = PETSC_FALSE; 2060 } 2061 2062 ierr = ISIdentity(isrow,&idflag);CHKERRQ(ierr); 2063 ierr = ISGetLocalSize(isrow,&nrow);CHKERRQ(ierr); 2064 if (idflag && nrow == mat->rmap->N) { 2065 allrows = PETSC_TRUE; 2066 } else { 2067 allrows = PETSC_FALSE; 2068 } 2069 if (call == MAT_REUSE_MATRIX) { 2070 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr); 2071 if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 2072 ierr = MatGetSubMatrices_MPIBAIJ_local(mat,1,&isrow_new,&iscol_new,MAT_REUSE_MATRIX,&allrows,&allcols,&Mreuse);CHKERRQ(ierr); 2073 } else { 2074 ierr = MatGetSubMatrices_MPIBAIJ_local(mat,1,&isrow_new,&iscol_new,MAT_INITIAL_MATRIX,&allrows,&allcols,&Mreuse);CHKERRQ(ierr); 2075 } 2076 ierr = ISDestroy(&isrow_new);CHKERRQ(ierr); 2077 ierr = ISDestroy(&iscol_new);CHKERRQ(ierr); 2078 /* 2079 m - number of local rows 2080 n - number of columns (same on all processors) 2081 rstart - first row in new global matrix generated 2082 */ 2083 ierr = MatGetBlockSize(mat,&bs);CHKERRQ(ierr); 2084 ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr); 2085 m = m/bs; 2086 n = n/bs; 2087 2088 if (call == MAT_INITIAL_MATRIX) { 2089 aij = (Mat_SeqBAIJ*)(Mreuse)->data; 2090 ii = aij->i; 2091 jj = aij->j; 2092 2093 /* 2094 Determine the number of non-zeros in the diagonal and off-diagonal 2095 portions of the matrix in order to do correct preallocation 2096 */ 2097 2098 /* first get start and end of "diagonal" columns */ 2099 if (csize == PETSC_DECIDE) { 2100 ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr); 2101 if (mglobal == n*bs) { /* square matrix */ 2102 nlocal = m; 2103 } else { 2104 nlocal = n/size + ((n % size) > rank); 2105 } 2106 } else { 2107 nlocal = csize/bs; 2108 } 2109 ierr = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 2110 rstart = rend - nlocal; 2111 if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n); 2112 2113 /* next, compute all the lengths */ 2114 ierr = PetscMalloc((2*m+1)*sizeof(PetscInt),&dlens);CHKERRQ(ierr); 2115 olens = dlens + m; 2116 for (i=0; i<m; i++) { 2117 jend = ii[i+1] - ii[i]; 2118 olen = 0; 2119 dlen = 0; 2120 for (j=0; j<jend; j++) { 2121 if (*jj < rstart || *jj >= rend) olen++; 2122 else dlen++; 2123 jj++; 2124 } 2125 olens[i] = olen; 2126 dlens[i] = dlen; 2127 } 2128 ierr = MatCreate(comm,&M);CHKERRQ(ierr); 2129 ierr = MatSetSizes(M,bs*m,bs*nlocal,PETSC_DECIDE,bs*n);CHKERRQ(ierr); 2130 ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr); 2131 ierr = MatMPIBAIJSetPreallocation(M,bs,0,dlens,0,olens);CHKERRQ(ierr); 2132 ierr = PetscFree(dlens);CHKERRQ(ierr); 2133 } else { 2134 PetscInt ml,nl; 2135 2136 M = *newmat; 2137 ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr); 2138 if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 2139 ierr = MatZeroEntries(M);CHKERRQ(ierr); 2140 /* 2141 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 2142 rather than the slower MatSetValues(). 2143 */ 2144 M->was_assembled = PETSC_TRUE; 2145 M->assembled = PETSC_FALSE; 2146 } 2147 ierr = MatSetOption(M,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); 2148 ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr); 2149 aij = (Mat_SeqBAIJ*)(Mreuse)->data; 2150 ii = aij->i; 2151 jj = aij->j; 2152 aa = aij->a; 2153 for (i=0; i<m; i++) { 2154 row = rstart/bs + i; 2155 nz = ii[i+1] - ii[i]; 2156 cwork = jj; jj += nz; 2157 vwork = aa; aa += nz*bs*bs; 2158 ierr = MatSetValuesBlocked_MPIBAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr); 2159 } 2160 2161 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2162 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2163 *newmat = M; 2164 2165 /* save submatrix used in processor for next request */ 2166 if (call == MAT_INITIAL_MATRIX) { 2167 ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr); 2168 ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr); 2169 } 2170 2171 PetscFunctionReturn(0); 2172 } 2173 2174 #undef __FUNCT__ 2175 #define __FUNCT__ "MatPermute_MPIBAIJ" 2176 PetscErrorCode MatPermute_MPIBAIJ(Mat A,IS rowp,IS colp,Mat *B) 2177 { 2178 MPI_Comm comm,pcomm; 2179 PetscInt first,rlocal_size,clocal_size,nrows; 2180 const PetscInt *rows; 2181 PetscMPIInt size; 2182 IS crowp,growp,irowp,lrowp,lcolp; 2183 PetscErrorCode ierr; 2184 2185 PetscFunctionBegin; 2186 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 2187 /* make a collective version of 'rowp' */ 2188 ierr = PetscObjectGetComm((PetscObject)rowp,&pcomm);CHKERRQ(ierr); 2189 if (pcomm==comm) { 2190 crowp = rowp; 2191 } else { 2192 ierr = ISGetSize(rowp,&nrows);CHKERRQ(ierr); 2193 ierr = ISGetIndices(rowp,&rows);CHKERRQ(ierr); 2194 ierr = ISCreateGeneral(comm,nrows,rows,PETSC_COPY_VALUES,&crowp);CHKERRQ(ierr); 2195 ierr = ISRestoreIndices(rowp,&rows);CHKERRQ(ierr); 2196 } 2197 /* collect the global row permutation and invert it */ 2198 ierr = ISAllGather(crowp,&growp);CHKERRQ(ierr); 2199 ierr = ISSetPermutation(growp);CHKERRQ(ierr); 2200 if (pcomm!=comm) { 2201 ierr = ISDestroy(&crowp);CHKERRQ(ierr); 2202 } 2203 ierr = ISInvertPermutation(growp,PETSC_DECIDE,&irowp);CHKERRQ(ierr); 2204 ierr = ISDestroy(&growp);CHKERRQ(ierr); 2205 /* get the local target indices */ 2206 ierr = MatGetOwnershipRange(A,&first,PETSC_NULL);CHKERRQ(ierr); 2207 ierr = MatGetLocalSize(A,&rlocal_size,&clocal_size);CHKERRQ(ierr); 2208 ierr = ISGetIndices(irowp,&rows);CHKERRQ(ierr); 2209 ierr = ISCreateGeneral(MPI_COMM_SELF,rlocal_size,rows+first,PETSC_COPY_VALUES,&lrowp);CHKERRQ(ierr); 2210 ierr = ISRestoreIndices(irowp,&rows);CHKERRQ(ierr); 2211 ierr = ISDestroy(&irowp);CHKERRQ(ierr); 2212 /* the column permutation is so much easier; 2213 make a local version of 'colp' and invert it */ 2214 ierr = PetscObjectGetComm((PetscObject)colp,&pcomm);CHKERRQ(ierr); 2215 ierr = MPI_Comm_size(pcomm,&size);CHKERRQ(ierr); 2216 if (size==1) { 2217 lcolp = colp; 2218 } else { 2219 ierr = ISAllGather(colp,&lcolp);CHKERRQ(ierr); 2220 } 2221 ierr = ISSetPermutation(lcolp);CHKERRQ(ierr); 2222 /* now we just get the submatrix */ 2223 ierr = MatGetSubMatrix_MPIBAIJ_Private(A,lrowp,lcolp,clocal_size,MAT_INITIAL_MATRIX,B);CHKERRQ(ierr); 2224 if (size>1) { 2225 ierr = ISDestroy(&lcolp);CHKERRQ(ierr); 2226 } 2227 /* clean up */ 2228 ierr = ISDestroy(&lrowp);CHKERRQ(ierr); 2229 PetscFunctionReturn(0); 2230 } 2231 2232 #undef __FUNCT__ 2233 #define __FUNCT__ "MatGetGhosts_MPIBAIJ" 2234 PetscErrorCode MatGetGhosts_MPIBAIJ(Mat mat,PetscInt *nghosts,const PetscInt *ghosts[]) 2235 { 2236 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*) mat->data; 2237 Mat_SeqBAIJ *B = (Mat_SeqBAIJ*)baij->B->data; 2238 2239 PetscFunctionBegin; 2240 if (nghosts) { *nghosts = B->nbs;} 2241 if (ghosts) {*ghosts = baij->garray;} 2242 PetscFunctionReturn(0); 2243 } 2244 2245 extern PetscErrorCode MatCreateColmap_MPIBAIJ_Private(Mat); 2246 2247 #undef __FUNCT__ 2248 #define __FUNCT__ "MatFDColoringCreate_MPIBAIJ" 2249 /* 2250 This routine is almost identical to MatFDColoringCreate_MPIBAIJ()! 2251 */ 2252 PetscErrorCode MatFDColoringCreate_MPIBAIJ(Mat mat,ISColoring iscoloring,MatFDColoring c) 2253 { 2254 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 2255 PetscErrorCode ierr; 2256 PetscMPIInt size,*ncolsonproc,*disp,nn; 2257 PetscInt bs,i,n,nrows,j,k,m,ncols,col; 2258 const PetscInt *is,*rows = 0,*A_ci,*A_cj,*B_ci,*B_cj; 2259 PetscInt nis = iscoloring->n,nctot,*cols; 2260 PetscInt *rowhit,M,cstart,cend,colb; 2261 PetscInt *columnsforrow,l; 2262 IS *isa; 2263 PetscBool done,flg; 2264 ISLocalToGlobalMapping map = mat->cmap->bmapping; 2265 PetscInt *ltog = (map ? map->indices : (PetscInt*) PETSC_NULL) ,ctype=c->ctype; 2266 2267 PetscFunctionBegin; 2268 if (!mat->assembled) SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_WRONGSTATE,"Matrix must be assembled first; MatAssemblyBegin/End();"); 2269 if (ctype == IS_COLORING_GHOSTED && !map) SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_INCOMP,"When using ghosted differencing matrix must have local to global mapping provided with MatSetLocalToGlobalMappingBlock"); 2270 2271 ierr = ISColoringGetIS(iscoloring,PETSC_IGNORE,&isa);CHKERRQ(ierr); 2272 ierr = MatGetBlockSize(mat,&bs);CHKERRQ(ierr); 2273 M = mat->rmap->n/bs; 2274 cstart = mat->cmap->rstart/bs; 2275 cend = mat->cmap->rend/bs; 2276 c->M = mat->rmap->N/bs; /* set the global rows and columns and local rows */ 2277 c->N = mat->cmap->N/bs; 2278 c->m = mat->rmap->n/bs; 2279 c->rstart = mat->rmap->rstart/bs; 2280 2281 c->ncolors = nis; 2282 ierr = PetscMalloc(nis*sizeof(PetscInt),&c->ncolumns);CHKERRQ(ierr); 2283 ierr = PetscMalloc(nis*sizeof(PetscInt*),&c->columns);CHKERRQ(ierr); 2284 ierr = PetscMalloc(nis*sizeof(PetscInt),&c->nrows);CHKERRQ(ierr); 2285 ierr = PetscMalloc(nis*sizeof(PetscInt*),&c->rows);CHKERRQ(ierr); 2286 ierr = PetscMalloc(nis*sizeof(PetscInt*),&c->columnsforrow);CHKERRQ(ierr); 2287 ierr = PetscLogObjectMemory(c,5*nis*sizeof(PetscInt));CHKERRQ(ierr); 2288 2289 /* Allow access to data structures of local part of matrix */ 2290 if (!baij->colmap) { 2291 ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 2292 } 2293 ierr = MatGetColumnIJ(baij->A,0,PETSC_FALSE,PETSC_FALSE,&ncols,&A_ci,&A_cj,&done);CHKERRQ(ierr); 2294 ierr = MatGetColumnIJ(baij->B,0,PETSC_FALSE,PETSC_FALSE,&ncols,&B_ci,&B_cj,&done);CHKERRQ(ierr); 2295 2296 ierr = PetscMalloc((M+1)*sizeof(PetscInt),&rowhit);CHKERRQ(ierr); 2297 ierr = PetscMalloc((M+1)*sizeof(PetscInt),&columnsforrow);CHKERRQ(ierr); 2298 2299 for (i=0; i<nis; i++) { 2300 ierr = ISGetLocalSize(isa[i],&n);CHKERRQ(ierr); 2301 ierr = ISGetIndices(isa[i],&is);CHKERRQ(ierr); 2302 c->ncolumns[i] = n; 2303 if (n) { 2304 ierr = PetscMalloc(n*sizeof(PetscInt),&c->columns[i]);CHKERRQ(ierr); 2305 ierr = PetscLogObjectMemory(c,n*sizeof(PetscInt));CHKERRQ(ierr); 2306 ierr = PetscMemcpy(c->columns[i],is,n*sizeof(PetscInt));CHKERRQ(ierr); 2307 } else { 2308 c->columns[i] = 0; 2309 } 2310 2311 if (ctype == IS_COLORING_GLOBAL) { 2312 /* Determine the total (parallel) number of columns of this color */ 2313 ierr = MPI_Comm_size(((PetscObject)mat)->comm,&size);CHKERRQ(ierr); 2314 ierr = PetscMalloc2(size,PetscMPIInt,&ncolsonproc,size,PetscMPIInt,&disp);CHKERRQ(ierr); 2315 2316 nn = PetscMPIIntCast(n); 2317 ierr = MPI_Allgather(&nn,1,MPI_INT,ncolsonproc,1,MPI_INT,((PetscObject)mat)->comm);CHKERRQ(ierr); 2318 nctot = 0; for (j=0; j<size; j++) {nctot += ncolsonproc[j];} 2319 if (!nctot) { 2320 ierr = PetscInfo(mat,"Coloring of matrix has some unneeded colors with no corresponding rows\n");CHKERRQ(ierr); 2321 } 2322 2323 disp[0] = 0; 2324 for (j=1; j<size; j++) { 2325 disp[j] = disp[j-1] + ncolsonproc[j-1]; 2326 } 2327 2328 /* Get complete list of columns for color on each processor */ 2329 ierr = PetscMalloc((nctot+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr); 2330 ierr = MPI_Allgatherv((void*)is,n,MPIU_INT,cols,ncolsonproc,disp,MPIU_INT,((PetscObject)mat)->comm);CHKERRQ(ierr); 2331 ierr = PetscFree2(ncolsonproc,disp);CHKERRQ(ierr); 2332 } else if (ctype == IS_COLORING_GHOSTED) { 2333 /* Determine local number of columns of this color on this process, including ghost points */ 2334 nctot = n; 2335 ierr = PetscMalloc((nctot+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr); 2336 ierr = PetscMemcpy(cols,is,n*sizeof(PetscInt));CHKERRQ(ierr); 2337 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not provided for this MatFDColoring type"); 2338 2339 /* 2340 Mark all rows affect by these columns 2341 */ 2342 /* Temporary option to allow for debugging/testing */ 2343 flg = PETSC_FALSE; 2344 ierr = PetscOptionsGetBool(PETSC_NULL,"-matfdcoloring_slow",&flg,PETSC_NULL);CHKERRQ(ierr); 2345 if (!flg) {/*-----------------------------------------------------------------------------*/ 2346 /* crude, fast version */ 2347 ierr = PetscMemzero(rowhit,M*sizeof(PetscInt));CHKERRQ(ierr); 2348 /* loop over columns*/ 2349 for (j=0; j<nctot; j++) { 2350 if (ctype == IS_COLORING_GHOSTED) { 2351 col = ltog[cols[j]]; 2352 } else { 2353 col = cols[j]; 2354 } 2355 if (col >= cstart && col < cend) { 2356 /* column is in diagonal block of matrix */ 2357 rows = A_cj + A_ci[col-cstart]; 2358 m = A_ci[col-cstart+1] - A_ci[col-cstart]; 2359 } else { 2360 #if defined (PETSC_USE_CTABLE) 2361 ierr = PetscTableFind(baij->colmap,col+1,&colb);CHKERRQ(ierr); 2362 colb --; 2363 #else 2364 colb = baij->colmap[col] - 1; 2365 #endif 2366 if (colb == -1) { 2367 m = 0; 2368 } else { 2369 colb = colb/bs; 2370 rows = B_cj + B_ci[colb]; 2371 m = B_ci[colb+1] - B_ci[colb]; 2372 } 2373 } 2374 /* loop over columns marking them in rowhit */ 2375 for (k=0; k<m; k++) { 2376 rowhit[*rows++] = col + 1; 2377 } 2378 } 2379 2380 /* count the number of hits */ 2381 nrows = 0; 2382 for (j=0; j<M; j++) { 2383 if (rowhit[j]) nrows++; 2384 } 2385 c->nrows[i] = nrows; 2386 ierr = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->rows[i]);CHKERRQ(ierr); 2387 ierr = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->columnsforrow[i]);CHKERRQ(ierr); 2388 ierr = PetscLogObjectMemory(c,2*(nrows+1)*sizeof(PetscInt));CHKERRQ(ierr); 2389 nrows = 0; 2390 for (j=0; j<M; j++) { 2391 if (rowhit[j]) { 2392 c->rows[i][nrows] = j; 2393 c->columnsforrow[i][nrows] = rowhit[j] - 1; 2394 nrows++; 2395 } 2396 } 2397 } else {/*-------------------------------------------------------------------------------*/ 2398 /* slow version, using rowhit as a linked list */ 2399 PetscInt currentcol,fm,mfm; 2400 rowhit[M] = M; 2401 nrows = 0; 2402 /* loop over columns*/ 2403 for (j=0; j<nctot; j++) { 2404 if (ctype == IS_COLORING_GHOSTED) { 2405 col = ltog[cols[j]]; 2406 } else { 2407 col = cols[j]; 2408 } 2409 if (col >= cstart && col < cend) { 2410 /* column is in diagonal block of matrix */ 2411 rows = A_cj + A_ci[col-cstart]; 2412 m = A_ci[col-cstart+1] - A_ci[col-cstart]; 2413 } else { 2414 #if defined (PETSC_USE_CTABLE) 2415 ierr = PetscTableFind(baij->colmap,col+1,&colb);CHKERRQ(ierr); 2416 colb --; 2417 #else 2418 colb = baij->colmap[col] - 1; 2419 #endif 2420 if (colb == -1) { 2421 m = 0; 2422 } else { 2423 colb = colb/bs; 2424 rows = B_cj + B_ci[colb]; 2425 m = B_ci[colb+1] - B_ci[colb]; 2426 } 2427 } 2428 2429 /* loop over columns marking them in rowhit */ 2430 fm = M; /* fm points to first entry in linked list */ 2431 for (k=0; k<m; k++) { 2432 currentcol = *rows++; 2433 /* is it already in the list? */ 2434 do { 2435 mfm = fm; 2436 fm = rowhit[fm]; 2437 } while (fm < currentcol); 2438 /* not in list so add it */ 2439 if (fm != currentcol) { 2440 nrows++; 2441 columnsforrow[currentcol] = col; 2442 /* next three lines insert new entry into linked list */ 2443 rowhit[mfm] = currentcol; 2444 rowhit[currentcol] = fm; 2445 fm = currentcol; 2446 /* fm points to present position in list since we know the columns are sorted */ 2447 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Invalid coloring of matrix detected"); 2448 } 2449 } 2450 c->nrows[i] = nrows; 2451 ierr = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->rows[i]);CHKERRQ(ierr); 2452 ierr = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->columnsforrow[i]);CHKERRQ(ierr); 2453 ierr = PetscLogObjectMemory(c,(nrows+1)*sizeof(PetscInt));CHKERRQ(ierr); 2454 /* now store the linked list of rows into c->rows[i] */ 2455 nrows = 0; 2456 fm = rowhit[M]; 2457 do { 2458 c->rows[i][nrows] = fm; 2459 c->columnsforrow[i][nrows++] = columnsforrow[fm]; 2460 fm = rowhit[fm]; 2461 } while (fm < M); 2462 } /* ---------------------------------------------------------------------------------------*/ 2463 ierr = PetscFree(cols);CHKERRQ(ierr); 2464 } 2465 2466 /* Optimize by adding the vscale, and scaleforrow[][] fields */ 2467 /* 2468 vscale will contain the "diagonal" on processor scalings followed by the off processor 2469 */ 2470 if (ctype == IS_COLORING_GLOBAL) { 2471 PetscInt *garray; 2472 ierr = PetscMalloc(baij->B->cmap->n*sizeof(PetscInt),&garray);CHKERRQ(ierr); 2473 for (i=0; i<baij->B->cmap->n/bs; i++) { 2474 for (j=0; j<bs; j++) { 2475 garray[i*bs+j] = bs*baij->garray[i]+j; 2476 } 2477 } 2478 ierr = VecCreateGhost(((PetscObject)mat)->comm,baij->A->rmap->n,PETSC_DETERMINE,baij->B->cmap->n,garray,&c->vscale);CHKERRQ(ierr); 2479 ierr = PetscFree(garray);CHKERRQ(ierr); 2480 CHKMEMQ; 2481 ierr = PetscMalloc(c->ncolors*sizeof(PetscInt*),&c->vscaleforrow);CHKERRQ(ierr); 2482 for (k=0; k<c->ncolors; k++) { 2483 ierr = PetscMalloc((c->nrows[k]+1)*sizeof(PetscInt),&c->vscaleforrow[k]);CHKERRQ(ierr); 2484 for (l=0; l<c->nrows[k]; l++) { 2485 col = c->columnsforrow[k][l]; 2486 if (col >= cstart && col < cend) { 2487 /* column is in diagonal block of matrix */ 2488 colb = col - cstart; 2489 } else { 2490 /* column is in "off-processor" part */ 2491 #if defined (PETSC_USE_CTABLE) 2492 ierr = PetscTableFind(baij->colmap,col+1,&colb);CHKERRQ(ierr); 2493 colb --; 2494 #else 2495 colb = baij->colmap[col] - 1; 2496 #endif 2497 colb = colb/bs; 2498 colb += cend - cstart; 2499 } 2500 c->vscaleforrow[k][l] = colb; 2501 } 2502 } 2503 } else if (ctype == IS_COLORING_GHOSTED) { 2504 /* Get gtol mapping */ 2505 PetscInt N = mat->cmap->N, *gtol; 2506 ierr = PetscMalloc((N+1)*sizeof(PetscInt),>ol);CHKERRQ(ierr); 2507 for (i=0; i<N; i++) gtol[i] = -1; 2508 for (i=0; i<map->n; i++) gtol[ltog[i]] = i; 2509 2510 c->vscale = 0; /* will be created in MatFDColoringApply() */ 2511 ierr = PetscMalloc(c->ncolors*sizeof(PetscInt*),&c->vscaleforrow);CHKERRQ(ierr); 2512 for (k=0; k<c->ncolors; k++) { 2513 ierr = PetscMalloc((c->nrows[k]+1)*sizeof(PetscInt),&c->vscaleforrow[k]);CHKERRQ(ierr); 2514 for (l=0; l<c->nrows[k]; l++) { 2515 col = c->columnsforrow[k][l]; /* global column index */ 2516 c->vscaleforrow[k][l] = gtol[col]; /* local column index */ 2517 } 2518 } 2519 ierr = PetscFree(gtol);CHKERRQ(ierr); 2520 } 2521 ierr = ISColoringRestoreIS(iscoloring,&isa);CHKERRQ(ierr); 2522 2523 ierr = PetscFree(rowhit);CHKERRQ(ierr); 2524 ierr = PetscFree(columnsforrow);CHKERRQ(ierr); 2525 ierr = MatRestoreColumnIJ(baij->A,0,PETSC_FALSE,PETSC_FALSE,&ncols,&A_ci,&A_cj,&done);CHKERRQ(ierr); 2526 ierr = MatRestoreColumnIJ(baij->B,0,PETSC_FALSE,PETSC_FALSE,&ncols,&B_ci,&B_cj,&done);CHKERRQ(ierr); 2527 CHKMEMQ; 2528 PetscFunctionReturn(0); 2529 } 2530 2531 #undef __FUNCT__ 2532 #define __FUNCT__ "MatGetSeqNonzeroStructure_MPIBAIJ" 2533 PetscErrorCode MatGetSeqNonzeroStructure_MPIBAIJ(Mat A,Mat *newmat) 2534 { 2535 Mat B; 2536 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 2537 Mat_SeqBAIJ *ad = (Mat_SeqBAIJ*)a->A->data,*bd = (Mat_SeqBAIJ*)a->B->data; 2538 Mat_SeqAIJ *b; 2539 PetscErrorCode ierr; 2540 PetscMPIInt size,rank,*recvcounts = 0,*displs = 0; 2541 PetscInt sendcount,i,*rstarts = A->rmap->range,n,cnt,j,bs = A->rmap->bs; 2542 PetscInt m,*garray = a->garray,*lens,*jsendbuf,*a_jsendbuf,*b_jsendbuf; 2543 2544 PetscFunctionBegin; 2545 ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr); 2546 ierr = MPI_Comm_rank(((PetscObject)A)->comm,&rank);CHKERRQ(ierr); 2547 2548 /* ---------------------------------------------------------------- 2549 Tell every processor the number of nonzeros per row 2550 */ 2551 ierr = PetscMalloc((A->rmap->N/bs)*sizeof(PetscInt),&lens);CHKERRQ(ierr); 2552 for (i=A->rmap->rstart/bs; i<A->rmap->rend/bs; i++) { 2553 lens[i] = ad->i[i-A->rmap->rstart/bs+1] - ad->i[i-A->rmap->rstart/bs] + bd->i[i-A->rmap->rstart/bs+1] - bd->i[i-A->rmap->rstart/bs]; 2554 } 2555 sendcount = A->rmap->rend/bs - A->rmap->rstart/bs; 2556 ierr = PetscMalloc(2*size*sizeof(PetscMPIInt),&recvcounts);CHKERRQ(ierr); 2557 displs = recvcounts + size; 2558 for (i=0; i<size; i++) { 2559 recvcounts[i] = A->rmap->range[i+1]/bs - A->rmap->range[i]/bs; 2560 displs[i] = A->rmap->range[i]/bs; 2561 } 2562 #if defined(PETSC_HAVE_MPI_IN_PLACE) 2563 ierr = MPI_Allgatherv(MPI_IN_PLACE,0,MPI_DATATYPE_NULL,lens,recvcounts,displs,MPIU_INT,((PetscObject)A)->comm);CHKERRQ(ierr); 2564 #else 2565 ierr = MPI_Allgatherv(lens+A->rmap->rstart/bs,sendcount,MPIU_INT,lens,recvcounts,displs,MPIU_INT,((PetscObject)A)->comm);CHKERRQ(ierr); 2566 #endif 2567 /* --------------------------------------------------------------- 2568 Create the sequential matrix of the same type as the local block diagonal 2569 */ 2570 ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr); 2571 ierr = MatSetSizes(B,A->rmap->N/bs,A->cmap->N/bs,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 2572 ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr); 2573 ierr = MatSeqAIJSetPreallocation(B,0,lens);CHKERRQ(ierr); 2574 b = (Mat_SeqAIJ *)B->data; 2575 2576 /*-------------------------------------------------------------------- 2577 Copy my part of matrix column indices over 2578 */ 2579 sendcount = ad->nz + bd->nz; 2580 jsendbuf = b->j + b->i[rstarts[rank]/bs]; 2581 a_jsendbuf = ad->j; 2582 b_jsendbuf = bd->j; 2583 n = A->rmap->rend/bs - A->rmap->rstart/bs; 2584 cnt = 0; 2585 for (i=0; i<n; i++) { 2586 2587 /* put in lower diagonal portion */ 2588 m = bd->i[i+1] - bd->i[i]; 2589 while (m > 0) { 2590 /* is it above diagonal (in bd (compressed) numbering) */ 2591 if (garray[*b_jsendbuf] > A->rmap->rstart/bs + i) break; 2592 jsendbuf[cnt++] = garray[*b_jsendbuf++]; 2593 m--; 2594 } 2595 2596 /* put in diagonal portion */ 2597 for (j=ad->i[i]; j<ad->i[i+1]; j++) { 2598 jsendbuf[cnt++] = A->rmap->rstart/bs + *a_jsendbuf++; 2599 } 2600 2601 /* put in upper diagonal portion */ 2602 while (m-- > 0) { 2603 jsendbuf[cnt++] = garray[*b_jsendbuf++]; 2604 } 2605 } 2606 if (cnt != sendcount) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Corrupted PETSc matrix: nz given %D actual nz %D",sendcount,cnt); 2607 2608 /*-------------------------------------------------------------------- 2609 Gather all column indices to all processors 2610 */ 2611 for (i=0; i<size; i++) { 2612 recvcounts[i] = 0; 2613 for (j=A->rmap->range[i]/bs; j<A->rmap->range[i+1]/bs; j++) { 2614 recvcounts[i] += lens[j]; 2615 } 2616 } 2617 displs[0] = 0; 2618 for (i=1; i<size; i++) { 2619 displs[i] = displs[i-1] + recvcounts[i-1]; 2620 } 2621 #if defined(PETSC_HAVE_MPI_IN_PLACE) 2622 ierr = MPI_Allgatherv(MPI_IN_PLACE,0,MPI_DATATYPE_NULL,b->j,recvcounts,displs,MPIU_INT,((PetscObject)A)->comm);CHKERRQ(ierr); 2623 #else 2624 ierr = MPI_Allgatherv(jsendbuf,sendcount,MPIU_INT,b->j,recvcounts,displs,MPIU_INT,((PetscObject)A)->comm);CHKERRQ(ierr); 2625 #endif 2626 /*-------------------------------------------------------------------- 2627 Assemble the matrix into useable form (note numerical values not yet set) 2628 */ 2629 /* set the b->ilen (length of each row) values */ 2630 ierr = PetscMemcpy(b->ilen,lens,(A->rmap->N/bs)*sizeof(PetscInt));CHKERRQ(ierr); 2631 /* set the b->i indices */ 2632 b->i[0] = 0; 2633 for (i=1; i<=A->rmap->N/bs; i++) { 2634 b->i[i] = b->i[i-1] + lens[i-1]; 2635 } 2636 ierr = PetscFree(lens);CHKERRQ(ierr); 2637 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2638 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2639 ierr = PetscFree(recvcounts);CHKERRQ(ierr); 2640 2641 if (A->symmetric) { 2642 ierr = MatSetOption(B,MAT_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr); 2643 } else if (A->hermitian) { 2644 ierr = MatSetOption(B,MAT_HERMITIAN,PETSC_TRUE);CHKERRQ(ierr); 2645 } else if (A->structurally_symmetric) { 2646 ierr = MatSetOption(B,MAT_STRUCTURALLY_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr); 2647 } 2648 *newmat = B; 2649 PetscFunctionReturn(0); 2650 } 2651 2652 #undef __FUNCT__ 2653 #define __FUNCT__ "MatSOR_MPIBAIJ" 2654 PetscErrorCode MatSOR_MPIBAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 2655 { 2656 Mat_MPIBAIJ *mat = (Mat_MPIBAIJ*)matin->data; 2657 PetscErrorCode ierr; 2658 Vec bb1 = 0; 2659 2660 PetscFunctionBegin; 2661 if (flag == SOR_APPLY_UPPER) { 2662 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 2663 PetscFunctionReturn(0); 2664 } 2665 2666 if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS) { 2667 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 2668 } 2669 2670 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) { 2671 if (flag & SOR_ZERO_INITIAL_GUESS) { 2672 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 2673 its--; 2674 } 2675 2676 while (its--) { 2677 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2678 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2679 2680 /* update rhs: bb1 = bb - B*x */ 2681 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 2682 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 2683 2684 /* local sweep */ 2685 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 2686 } 2687 } else if (flag & SOR_LOCAL_FORWARD_SWEEP) { 2688 if (flag & SOR_ZERO_INITIAL_GUESS) { 2689 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 2690 its--; 2691 } 2692 while (its--) { 2693 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2694 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2695 2696 /* update rhs: bb1 = bb - B*x */ 2697 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 2698 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 2699 2700 /* local sweep */ 2701 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 2702 } 2703 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) { 2704 if (flag & SOR_ZERO_INITIAL_GUESS) { 2705 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 2706 its--; 2707 } 2708 while (its--) { 2709 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2710 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2711 2712 /* update rhs: bb1 = bb - B*x */ 2713 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 2714 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 2715 2716 /* local sweep */ 2717 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 2718 } 2719 } else SETERRQ(((PetscObject)matin)->comm,PETSC_ERR_SUP,"Parallel version of SOR requested not supported"); 2720 2721 ierr = VecDestroy(&bb1);CHKERRQ(ierr); 2722 PetscFunctionReturn(0); 2723 } 2724 2725 extern PetscErrorCode MatFDColoringApply_BAIJ(Mat,MatFDColoring,Vec,MatStructure*,void*); 2726 2727 #undef __FUNCT__ 2728 #define __FUNCT__ "MatInvertBlockDiagonal_MPIBAIJ" 2729 PetscErrorCode MatInvertBlockDiagonal_MPIBAIJ(Mat A,const PetscScalar **values) 2730 { 2731 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*) A->data; 2732 PetscErrorCode ierr; 2733 2734 PetscFunctionBegin; 2735 ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr); 2736 PetscFunctionReturn(0); 2737 } 2738 2739 2740 /* -------------------------------------------------------------------*/ 2741 static struct _MatOps MatOps_Values = { 2742 MatSetValues_MPIBAIJ, 2743 MatGetRow_MPIBAIJ, 2744 MatRestoreRow_MPIBAIJ, 2745 MatMult_MPIBAIJ, 2746 /* 4*/ MatMultAdd_MPIBAIJ, 2747 MatMultTranspose_MPIBAIJ, 2748 MatMultTransposeAdd_MPIBAIJ, 2749 0, 2750 0, 2751 0, 2752 /*10*/ 0, 2753 0, 2754 0, 2755 MatSOR_MPIBAIJ, 2756 MatTranspose_MPIBAIJ, 2757 /*15*/ MatGetInfo_MPIBAIJ, 2758 MatEqual_MPIBAIJ, 2759 MatGetDiagonal_MPIBAIJ, 2760 MatDiagonalScale_MPIBAIJ, 2761 MatNorm_MPIBAIJ, 2762 /*20*/ MatAssemblyBegin_MPIBAIJ, 2763 MatAssemblyEnd_MPIBAIJ, 2764 MatSetOption_MPIBAIJ, 2765 MatZeroEntries_MPIBAIJ, 2766 /*24*/ MatZeroRows_MPIBAIJ, 2767 0, 2768 0, 2769 0, 2770 0, 2771 /*29*/ MatSetUp_MPIBAIJ, 2772 0, 2773 0, 2774 0, 2775 0, 2776 /*34*/ MatDuplicate_MPIBAIJ, 2777 0, 2778 0, 2779 0, 2780 0, 2781 /*39*/ MatAXPY_MPIBAIJ, 2782 MatGetSubMatrices_MPIBAIJ, 2783 MatIncreaseOverlap_MPIBAIJ, 2784 MatGetValues_MPIBAIJ, 2785 MatCopy_MPIBAIJ, 2786 /*44*/ 0, 2787 MatScale_MPIBAIJ, 2788 0, 2789 0, 2790 0, 2791 /*49*/ 0, 2792 0, 2793 0, 2794 0, 2795 0, 2796 /*54*/ MatFDColoringCreate_MPIBAIJ, 2797 0, 2798 MatSetUnfactored_MPIBAIJ, 2799 MatPermute_MPIBAIJ, 2800 MatSetValuesBlocked_MPIBAIJ, 2801 /*59*/ MatGetSubMatrix_MPIBAIJ, 2802 MatDestroy_MPIBAIJ, 2803 MatView_MPIBAIJ, 2804 0, 2805 0, 2806 /*64*/ 0, 2807 0, 2808 0, 2809 0, 2810 0, 2811 /*69*/ MatGetRowMaxAbs_MPIBAIJ, 2812 0, 2813 0, 2814 0, 2815 0, 2816 /*74*/ 0, 2817 MatFDColoringApply_BAIJ, 2818 0, 2819 0, 2820 0, 2821 /*79*/ 0, 2822 0, 2823 0, 2824 0, 2825 MatLoad_MPIBAIJ, 2826 /*84*/ 0, 2827 0, 2828 0, 2829 0, 2830 0, 2831 /*89*/ 0, 2832 0, 2833 0, 2834 0, 2835 0, 2836 /*94*/ 0, 2837 0, 2838 0, 2839 0, 2840 0, 2841 /*99*/ 0, 2842 0, 2843 0, 2844 0, 2845 0, 2846 /*104*/0, 2847 MatRealPart_MPIBAIJ, 2848 MatImaginaryPart_MPIBAIJ, 2849 0, 2850 0, 2851 /*109*/0, 2852 0, 2853 0, 2854 0, 2855 0, 2856 /*114*/MatGetSeqNonzeroStructure_MPIBAIJ, 2857 0, 2858 MatGetGhosts_MPIBAIJ, 2859 0, 2860 0, 2861 /*119*/0, 2862 0, 2863 0, 2864 0, 2865 0, 2866 /*124*/0, 2867 0, 2868 MatInvertBlockDiagonal_MPIBAIJ 2869 }; 2870 2871 EXTERN_C_BEGIN 2872 #undef __FUNCT__ 2873 #define __FUNCT__ "MatGetDiagonalBlock_MPIBAIJ" 2874 PetscErrorCode MatGetDiagonalBlock_MPIBAIJ(Mat A,Mat *a) 2875 { 2876 PetscFunctionBegin; 2877 *a = ((Mat_MPIBAIJ *)A->data)->A; 2878 PetscFunctionReturn(0); 2879 } 2880 EXTERN_C_END 2881 2882 EXTERN_C_BEGIN 2883 extern PetscErrorCode MatConvert_MPIBAIJ_MPISBAIJ(Mat, MatType,MatReuse,Mat*); 2884 EXTERN_C_END 2885 2886 EXTERN_C_BEGIN 2887 #undef __FUNCT__ 2888 #define __FUNCT__ "MatMPIBAIJSetPreallocationCSR_MPIBAIJ" 2889 PetscErrorCode MatMPIBAIJSetPreallocationCSR_MPIBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[]) 2890 { 2891 PetscInt m,rstart,cstart,cend; 2892 PetscInt i,j,d,nz,nz_max=0,*d_nnz=0,*o_nnz=0; 2893 const PetscInt *JJ=0; 2894 PetscScalar *values=0; 2895 PetscErrorCode ierr; 2896 2897 PetscFunctionBegin; 2898 ierr = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr); 2899 ierr = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr); 2900 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 2901 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 2902 ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr); 2903 m = B->rmap->n/bs; 2904 rstart = B->rmap->rstart/bs; 2905 cstart = B->cmap->rstart/bs; 2906 cend = B->cmap->rend/bs; 2907 2908 if (ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"ii[0] must be 0 but it is %D",ii[0]); 2909 ierr = PetscMalloc2(m,PetscInt,&d_nnz,m,PetscInt,&o_nnz);CHKERRQ(ierr); 2910 for (i=0; i<m; i++) { 2911 nz = ii[i+1] - ii[i]; 2912 if (nz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative number of columns %D",i,nz); 2913 nz_max = PetscMax(nz_max,nz); 2914 JJ = jj + ii[i]; 2915 for (j=0; j<nz; j++) { 2916 if (*JJ >= cstart) break; 2917 JJ++; 2918 } 2919 d = 0; 2920 for (; j<nz; j++) { 2921 if (*JJ++ >= cend) break; 2922 d++; 2923 } 2924 d_nnz[i] = d; 2925 o_nnz[i] = nz - d; 2926 } 2927 ierr = MatMPIBAIJSetPreallocation(B,bs,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 2928 ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr); 2929 2930 values = (PetscScalar*)V; 2931 if (!values) { 2932 ierr = PetscMalloc(bs*bs*nz_max*sizeof(PetscScalar),&values);CHKERRQ(ierr); 2933 ierr = PetscMemzero(values,bs*bs*nz_max*sizeof(PetscScalar));CHKERRQ(ierr); 2934 } 2935 for (i=0; i<m; i++) { 2936 PetscInt row = i + rstart; 2937 PetscInt ncols = ii[i+1] - ii[i]; 2938 const PetscInt *icols = jj + ii[i]; 2939 const PetscScalar *svals = values + (V ? (bs*bs*ii[i]) : 0); 2940 ierr = MatSetValuesBlocked_MPIBAIJ(B,1,&row,ncols,icols,svals,INSERT_VALUES);CHKERRQ(ierr); 2941 } 2942 2943 if (!V) { ierr = PetscFree(values);CHKERRQ(ierr); } 2944 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2945 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2946 ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 2947 PetscFunctionReturn(0); 2948 } 2949 EXTERN_C_END 2950 2951 #undef __FUNCT__ 2952 #define __FUNCT__ "MatMPIBAIJSetPreallocationCSR" 2953 /*@C 2954 MatMPIBAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in BAIJ format 2955 (the default parallel PETSc format). 2956 2957 Collective on MPI_Comm 2958 2959 Input Parameters: 2960 + A - the matrix 2961 . bs - the block size 2962 . i - the indices into j for the start of each local row (starts with zero) 2963 . j - the column indices for each local row (starts with zero) these must be sorted for each row 2964 - v - optional values in the matrix 2965 2966 Level: developer 2967 2968 .keywords: matrix, aij, compressed row, sparse, parallel 2969 2970 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIBAIJSetPreallocation(), MatCreateAIJ(), MPIAIJ 2971 @*/ 2972 PetscErrorCode MatMPIBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 2973 { 2974 PetscErrorCode ierr; 2975 2976 PetscFunctionBegin; 2977 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 2978 PetscValidType(B,1); 2979 PetscValidLogicalCollectiveInt(B,bs,2); 2980 ierr = PetscTryMethod(B,"MatMPIBAIJSetPreallocationCSR_C",(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,bs,i,j,v));CHKERRQ(ierr); 2981 PetscFunctionReturn(0); 2982 } 2983 2984 EXTERN_C_BEGIN 2985 #undef __FUNCT__ 2986 #define __FUNCT__ "MatMPIBAIJSetPreallocation_MPIBAIJ" 2987 PetscErrorCode MatMPIBAIJSetPreallocation_MPIBAIJ(Mat B,PetscInt bs,PetscInt d_nz,PetscInt *d_nnz,PetscInt o_nz,PetscInt *o_nnz) 2988 { 2989 Mat_MPIBAIJ *b; 2990 PetscErrorCode ierr; 2991 PetscInt i; 2992 PetscBool d_realalloc = PETSC_FALSE,o_realalloc = PETSC_FALSE; 2993 2994 PetscFunctionBegin; 2995 if (d_nz >= 0 || d_nnz) d_realalloc = PETSC_TRUE; 2996 if (o_nz >= 0 || o_nnz) o_realalloc = PETSC_TRUE; 2997 2998 if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5; 2999 if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2; 3000 if (d_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz); 3001 if (o_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz); 3002 3003 ierr = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr); 3004 ierr = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr); 3005 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 3006 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 3007 ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr); 3008 3009 if (d_nnz) { 3010 for (i=0; i<B->rmap->n/bs; i++) { 3011 if (d_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than -1: local row %D value %D",i,d_nnz[i]); 3012 } 3013 } 3014 if (o_nnz) { 3015 for (i=0; i<B->rmap->n/bs; i++) { 3016 if (o_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than -1: local row %D value %D",i,o_nnz[i]); 3017 } 3018 } 3019 3020 b = (Mat_MPIBAIJ*)B->data; 3021 b->bs2 = bs*bs; 3022 b->mbs = B->rmap->n/bs; 3023 b->nbs = B->cmap->n/bs; 3024 b->Mbs = B->rmap->N/bs; 3025 b->Nbs = B->cmap->N/bs; 3026 3027 for (i=0; i<=b->size; i++) { 3028 b->rangebs[i] = B->rmap->range[i]/bs; 3029 } 3030 b->rstartbs = B->rmap->rstart/bs; 3031 b->rendbs = B->rmap->rend/bs; 3032 b->cstartbs = B->cmap->rstart/bs; 3033 b->cendbs = B->cmap->rend/bs; 3034 3035 if (!B->preallocated) { 3036 ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr); 3037 ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr); 3038 ierr = MatSetType(b->A,MATSEQBAIJ);CHKERRQ(ierr); 3039 ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr); 3040 ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr); 3041 ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr); 3042 ierr = MatSetType(b->B,MATSEQBAIJ);CHKERRQ(ierr); 3043 ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr); 3044 ierr = MatStashCreate_Private(((PetscObject)B)->comm,bs,&B->bstash);CHKERRQ(ierr); 3045 } 3046 3047 ierr = MatSeqBAIJSetPreallocation(b->A,bs,d_nz,d_nnz);CHKERRQ(ierr); 3048 ierr = MatSeqBAIJSetPreallocation(b->B,bs,o_nz,o_nnz);CHKERRQ(ierr); 3049 /* Do not error if the user did not give real preallocation information. Ugly because this would overwrite a previous user call to MatSetOption(). */ 3050 if (!d_realalloc) {ierr = MatSetOption(b->A,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);} 3051 if (!o_realalloc) {ierr = MatSetOption(b->B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);} 3052 B->preallocated = PETSC_TRUE; 3053 PetscFunctionReturn(0); 3054 } 3055 EXTERN_C_END 3056 3057 EXTERN_C_BEGIN 3058 extern PetscErrorCode MatDiagonalScaleLocal_MPIBAIJ(Mat,Vec); 3059 extern PetscErrorCode MatSetHashTableFactor_MPIBAIJ(Mat,PetscReal); 3060 EXTERN_C_END 3061 3062 3063 EXTERN_C_BEGIN 3064 #undef __FUNCT__ 3065 #define __FUNCT__ "MatConvert_MPIBAIJ_MPIAdj" 3066 PetscErrorCode MatConvert_MPIBAIJ_MPIAdj(Mat B, MatType newtype,MatReuse reuse,Mat *adj) 3067 { 3068 Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)B->data; 3069 PetscErrorCode ierr; 3070 Mat_SeqBAIJ *d = (Mat_SeqBAIJ*) b->A->data,*o = (Mat_SeqBAIJ*) b->B->data; 3071 PetscInt M = B->rmap->n/B->rmap->bs,i,*ii,*jj,cnt,j,k,rstart = B->rmap->rstart/B->rmap->bs; 3072 const PetscInt *id = d->i, *jd = d->j, *io = o->i, *jo = o->j, *garray = b->garray; 3073 3074 PetscFunctionBegin; 3075 ierr = PetscMalloc((M+1)*sizeof(PetscInt),&ii);CHKERRQ(ierr); 3076 ii[0] = 0; 3077 CHKMEMQ; 3078 for (i=0; i<M; i++) { 3079 if ((id[i+1] - id[i]) < 0) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Indices wrong %D %D %D",i,id[i],id[i+1]); 3080 if ((io[i+1] - io[i]) < 0) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Indices wrong %D %D %D",i,io[i],io[i+1]); 3081 ii[i+1] = ii[i] + id[i+1] - id[i] + io[i+1] - io[i]; 3082 /* remove one from count of matrix has diagonal */ 3083 for (j=id[i]; j<id[i+1]; j++) { 3084 if (jd[j] == i) {ii[i+1]--;break;} 3085 } 3086 CHKMEMQ; 3087 } 3088 ierr = PetscMalloc(ii[M]*sizeof(PetscInt),&jj);CHKERRQ(ierr); 3089 cnt = 0; 3090 for (i=0; i<M; i++) { 3091 for (j=io[i]; j<io[i+1]; j++) { 3092 if (garray[jo[j]] > rstart) break; 3093 jj[cnt++] = garray[jo[j]]; 3094 CHKMEMQ; 3095 } 3096 for (k=id[i]; k<id[i+1]; k++) { 3097 if (jd[k] != i) { 3098 jj[cnt++] = rstart + jd[k]; 3099 CHKMEMQ; 3100 } 3101 } 3102 for (;j<io[i+1]; j++) { 3103 jj[cnt++] = garray[jo[j]]; 3104 CHKMEMQ; 3105 } 3106 } 3107 ierr = MatCreateMPIAdj(((PetscObject)B)->comm,M,B->cmap->N/B->rmap->bs,ii,jj,PETSC_NULL,adj);CHKERRQ(ierr); 3108 PetscFunctionReturn(0); 3109 } 3110 EXTERN_C_END 3111 3112 #include <../src/mat/impls/aij/mpi/mpiaij.h> 3113 EXTERN_C_BEGIN 3114 PetscErrorCode MatConvert_SeqBAIJ_SeqAIJ(Mat,MatType,MatReuse,Mat*); 3115 EXTERN_C_END 3116 3117 EXTERN_C_BEGIN 3118 #undef __FUNCT__ 3119 #define __FUNCT__ "MatConvert_MPIBAIJ_MPIAIJ" 3120 PetscErrorCode MatConvert_MPIBAIJ_MPIAIJ(Mat A,MatType newtype,MatReuse reuse,Mat *newmat) 3121 { 3122 PetscErrorCode ierr; 3123 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 3124 Mat B; 3125 Mat_MPIAIJ *b; 3126 3127 PetscFunctionBegin; 3128 if (!A->assembled) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_SUP,"Matrix must be assembled"); 3129 3130 ierr = MatCreate(((PetscObject)A)->comm,&B);CHKERRQ(ierr); 3131 ierr = MatSetSizes(B,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr); 3132 ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr); 3133 ierr = MatSeqAIJSetPreallocation(B,0,PETSC_NULL);CHKERRQ(ierr); 3134 ierr = MatMPIAIJSetPreallocation(B,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr); 3135 b = (Mat_MPIAIJ*) B->data; 3136 3137 ierr = MatDestroy(&b->A);CHKERRQ(ierr); 3138 ierr = MatDestroy(&b->B);CHKERRQ(ierr); 3139 ierr = MatDisAssemble_MPIBAIJ(A);CHKERRQ(ierr); 3140 ierr = MatConvert_SeqBAIJ_SeqAIJ(a->A, MATSEQAIJ, MAT_INITIAL_MATRIX, &b->A);CHKERRQ(ierr); 3141 ierr = MatConvert_SeqBAIJ_SeqAIJ(a->B, MATSEQAIJ, MAT_INITIAL_MATRIX, &b->B);CHKERRQ(ierr); 3142 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3143 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3144 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3145 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3146 if (reuse == MAT_REUSE_MATRIX) { 3147 ierr = MatHeaderReplace(A,B);CHKERRQ(ierr); 3148 } else { 3149 *newmat = B; 3150 } 3151 PetscFunctionReturn(0); 3152 } 3153 EXTERN_C_END 3154 3155 EXTERN_C_BEGIN 3156 #if defined(PETSC_HAVE_MUMPS) 3157 extern PetscErrorCode MatGetFactor_baij_mumps(Mat,MatFactorType,Mat*); 3158 #endif 3159 EXTERN_C_END 3160 3161 /*MC 3162 MATMPIBAIJ - MATMPIBAIJ = "mpibaij" - A matrix type to be used for distributed block sparse matrices. 3163 3164 Options Database Keys: 3165 + -mat_type mpibaij - sets the matrix type to "mpibaij" during a call to MatSetFromOptions() 3166 . -mat_block_size <bs> - set the blocksize used to store the matrix 3167 - -mat_use_hash_table <fact> 3168 3169 Level: beginner 3170 3171 .seealso: MatCreateMPIBAIJ 3172 M*/ 3173 3174 EXTERN_C_BEGIN 3175 extern PetscErrorCode MatConvert_MPIBAIJ_MPIBSTRM(Mat,MatType,MatReuse,Mat*); 3176 EXTERN_C_END 3177 3178 EXTERN_C_BEGIN 3179 #undef __FUNCT__ 3180 #define __FUNCT__ "MatCreate_MPIBAIJ" 3181 PetscErrorCode MatCreate_MPIBAIJ(Mat B) 3182 { 3183 Mat_MPIBAIJ *b; 3184 PetscErrorCode ierr; 3185 PetscBool flg; 3186 3187 PetscFunctionBegin; 3188 ierr = PetscNewLog(B,Mat_MPIBAIJ,&b);CHKERRQ(ierr); 3189 B->data = (void*)b; 3190 3191 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 3192 B->assembled = PETSC_FALSE; 3193 3194 B->insertmode = NOT_SET_VALUES; 3195 ierr = MPI_Comm_rank(((PetscObject)B)->comm,&b->rank);CHKERRQ(ierr); 3196 ierr = MPI_Comm_size(((PetscObject)B)->comm,&b->size);CHKERRQ(ierr); 3197 3198 /* build local table of row and column ownerships */ 3199 ierr = PetscMalloc((b->size+1)*sizeof(PetscInt),&b->rangebs);CHKERRQ(ierr); 3200 3201 /* build cache for off array entries formed */ 3202 ierr = MatStashCreate_Private(((PetscObject)B)->comm,1,&B->stash);CHKERRQ(ierr); 3203 b->donotstash = PETSC_FALSE; 3204 b->colmap = PETSC_NULL; 3205 b->garray = PETSC_NULL; 3206 b->roworiented = PETSC_TRUE; 3207 3208 /* stuff used in block assembly */ 3209 b->barray = 0; 3210 3211 /* stuff used for matrix vector multiply */ 3212 b->lvec = 0; 3213 b->Mvctx = 0; 3214 3215 /* stuff for MatGetRow() */ 3216 b->rowindices = 0; 3217 b->rowvalues = 0; 3218 b->getrowactive = PETSC_FALSE; 3219 3220 /* hash table stuff */ 3221 b->ht = 0; 3222 b->hd = 0; 3223 b->ht_size = 0; 3224 b->ht_flag = PETSC_FALSE; 3225 b->ht_fact = 0; 3226 b->ht_total_ct = 0; 3227 b->ht_insert_ct = 0; 3228 3229 /* stuff for MatGetSubMatrices_MPIBAIJ_local() */ 3230 b->ijonly = PETSC_FALSE; 3231 3232 ierr = PetscOptionsBegin(((PetscObject)B)->comm,PETSC_NULL,"Options for loading MPIBAIJ matrix 1","Mat");CHKERRQ(ierr); 3233 ierr = PetscOptionsBool("-mat_use_hash_table","Use hash table to save memory in constructing matrix","MatSetOption",PETSC_FALSE,&flg,PETSC_NULL);CHKERRQ(ierr); 3234 if (flg) { 3235 PetscReal fact = 1.39; 3236 ierr = MatSetOption(B,MAT_USE_HASH_TABLE,PETSC_TRUE);CHKERRQ(ierr); 3237 ierr = PetscOptionsReal("-mat_use_hash_table","Use hash table factor","MatMPIBAIJSetHashTableFactor",fact,&fact,PETSC_NULL);CHKERRQ(ierr); 3238 if (fact <= 1.0) fact = 1.39; 3239 ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr); 3240 ierr = PetscInfo1(B,"Hash table Factor used %5.2f\n",fact);CHKERRQ(ierr); 3241 } 3242 ierr = PetscOptionsEnd();CHKERRQ(ierr); 3243 3244 #if defined(PETSC_HAVE_MUMPS) 3245 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_mumps_C", "MatGetFactor_baij_mumps",MatGetFactor_baij_mumps);CHKERRQ(ierr); 3246 #endif 3247 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpibaij_mpiadj_C", 3248 "MatConvert_MPIBAIJ_MPIAdj", 3249 MatConvert_MPIBAIJ_MPIAdj);CHKERRQ(ierr); 3250 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpibaij_mpiaij_C", 3251 "MatConvert_MPIBAIJ_MPIAIJ", 3252 MatConvert_MPIBAIJ_MPIAIJ);CHKERRQ(ierr); 3253 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpibaij_mpisbaij_C", 3254 "MatConvert_MPIBAIJ_MPISBAIJ", 3255 MatConvert_MPIBAIJ_MPISBAIJ);CHKERRQ(ierr); 3256 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 3257 "MatStoreValues_MPIBAIJ", 3258 MatStoreValues_MPIBAIJ);CHKERRQ(ierr); 3259 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 3260 "MatRetrieveValues_MPIBAIJ", 3261 MatRetrieveValues_MPIBAIJ);CHKERRQ(ierr); 3262 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C", 3263 "MatGetDiagonalBlock_MPIBAIJ", 3264 MatGetDiagonalBlock_MPIBAIJ);CHKERRQ(ierr); 3265 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIBAIJSetPreallocation_C", 3266 "MatMPIBAIJSetPreallocation_MPIBAIJ", 3267 MatMPIBAIJSetPreallocation_MPIBAIJ);CHKERRQ(ierr); 3268 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIBAIJSetPreallocationCSR_C", 3269 "MatMPIBAIJSetPreallocationCSR_MPIBAIJ", 3270 MatMPIBAIJSetPreallocationCSR_MPIBAIJ);CHKERRQ(ierr); 3271 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C", 3272 "MatDiagonalScaleLocal_MPIBAIJ", 3273 MatDiagonalScaleLocal_MPIBAIJ);CHKERRQ(ierr); 3274 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSetHashTableFactor_C", 3275 "MatSetHashTableFactor_MPIBAIJ", 3276 MatSetHashTableFactor_MPIBAIJ);CHKERRQ(ierr); 3277 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpibaij_mpibstrm_C", 3278 "MatConvert_MPIBAIJ_MPIBSTRM", 3279 MatConvert_MPIBAIJ_MPIBSTRM);CHKERRQ(ierr); 3280 ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIBAIJ);CHKERRQ(ierr); 3281 PetscFunctionReturn(0); 3282 } 3283 EXTERN_C_END 3284 3285 /*MC 3286 MATBAIJ - MATBAIJ = "baij" - A matrix type to be used for block sparse matrices. 3287 3288 This matrix type is identical to MATSEQBAIJ when constructed with a single process communicator, 3289 and MATMPIBAIJ otherwise. 3290 3291 Options Database Keys: 3292 . -mat_type baij - sets the matrix type to "baij" during a call to MatSetFromOptions() 3293 3294 Level: beginner 3295 3296 .seealso: MatCreateBAIJ(),MATSEQBAIJ,MATMPIBAIJ, MatMPIBAIJSetPreallocation(), MatMPIBAIJSetPreallocationCSR() 3297 M*/ 3298 3299 #undef __FUNCT__ 3300 #define __FUNCT__ "MatMPIBAIJSetPreallocation" 3301 /*@C 3302 MatMPIBAIJSetPreallocation - Allocates memory for a sparse parallel matrix in block AIJ format 3303 (block compressed row). For good matrix assembly performance 3304 the user should preallocate the matrix storage by setting the parameters 3305 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 3306 performance can be increased by more than a factor of 50. 3307 3308 Collective on Mat 3309 3310 Input Parameters: 3311 + A - the matrix 3312 . bs - size of blockk 3313 . d_nz - number of block nonzeros per block row in diagonal portion of local 3314 submatrix (same for all local rows) 3315 . d_nnz - array containing the number of block nonzeros in the various block rows 3316 of the in diagonal portion of the local (possibly different for each block 3317 row) or PETSC_NULL. If you plan to factor the matrix you must leave room for the diagonal entry and 3318 set it even if it is zero. 3319 . o_nz - number of block nonzeros per block row in the off-diagonal portion of local 3320 submatrix (same for all local rows). 3321 - o_nnz - array containing the number of nonzeros in the various block rows of the 3322 off-diagonal portion of the local submatrix (possibly different for 3323 each block row) or PETSC_NULL. 3324 3325 If the *_nnz parameter is given then the *_nz parameter is ignored 3326 3327 Options Database Keys: 3328 + -mat_block_size - size of the blocks to use 3329 - -mat_use_hash_table <fact> 3330 3331 Notes: 3332 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor 3333 than it must be used on all processors that share the object for that argument. 3334 3335 Storage Information: 3336 For a square global matrix we define each processor's diagonal portion 3337 to be its local rows and the corresponding columns (a square submatrix); 3338 each processor's off-diagonal portion encompasses the remainder of the 3339 local matrix (a rectangular submatrix). 3340 3341 The user can specify preallocated storage for the diagonal part of 3342 the local submatrix with either d_nz or d_nnz (not both). Set 3343 d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic 3344 memory allocation. Likewise, specify preallocated storage for the 3345 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 3346 3347 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 3348 the figure below we depict these three local rows and all columns (0-11). 3349 3350 .vb 3351 0 1 2 3 4 5 6 7 8 9 10 11 3352 ------------------- 3353 row 3 | o o o d d d o o o o o o 3354 row 4 | o o o d d d o o o o o o 3355 row 5 | o o o d d d o o o o o o 3356 ------------------- 3357 .ve 3358 3359 Thus, any entries in the d locations are stored in the d (diagonal) 3360 submatrix, and any entries in the o locations are stored in the 3361 o (off-diagonal) submatrix. Note that the d and the o submatrices are 3362 stored simply in the MATSEQBAIJ format for compressed row storage. 3363 3364 Now d_nz should indicate the number of block nonzeros per row in the d matrix, 3365 and o_nz should indicate the number of block nonzeros per row in the o matrix. 3366 In general, for PDE problems in which most nonzeros are near the diagonal, 3367 one expects d_nz >> o_nz. For large problems you MUST preallocate memory 3368 or you will get TERRIBLE performance; see the users' manual chapter on 3369 matrices. 3370 3371 You can call MatGetInfo() to get information on how effective the preallocation was; 3372 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 3373 You can also run with the option -info and look for messages with the string 3374 malloc in them to see if additional memory allocation was needed. 3375 3376 Level: intermediate 3377 3378 .keywords: matrix, block, aij, compressed row, sparse, parallel 3379 3380 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateBAIJ(), MatMPIBAIJSetPreallocationCSR(), PetscSplitOwnership() 3381 @*/ 3382 PetscErrorCode MatMPIBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 3383 { 3384 PetscErrorCode ierr; 3385 3386 PetscFunctionBegin; 3387 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 3388 PetscValidType(B,1); 3389 PetscValidLogicalCollectiveInt(B,bs,2); 3390 ierr = PetscTryMethod(B,"MatMPIBAIJSetPreallocation_C",(Mat,PetscInt,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,bs,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr); 3391 PetscFunctionReturn(0); 3392 } 3393 3394 #undef __FUNCT__ 3395 #define __FUNCT__ "MatCreateBAIJ" 3396 /*@C 3397 MatCreateBAIJ - Creates a sparse parallel matrix in block AIJ format 3398 (block compressed row). For good matrix assembly performance 3399 the user should preallocate the matrix storage by setting the parameters 3400 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 3401 performance can be increased by more than a factor of 50. 3402 3403 Collective on MPI_Comm 3404 3405 Input Parameters: 3406 + comm - MPI communicator 3407 . bs - size of blockk 3408 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 3409 This value should be the same as the local size used in creating the 3410 y vector for the matrix-vector product y = Ax. 3411 . n - number of local columns (or PETSC_DECIDE to have calculated if N is given) 3412 This value should be the same as the local size used in creating the 3413 x vector for the matrix-vector product y = Ax. 3414 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 3415 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 3416 . d_nz - number of nonzero blocks per block row in diagonal portion of local 3417 submatrix (same for all local rows) 3418 . d_nnz - array containing the number of nonzero blocks in the various block rows 3419 of the in diagonal portion of the local (possibly different for each block 3420 row) or PETSC_NULL. If you plan to factor the matrix you must leave room for the diagonal entry 3421 and set it even if it is zero. 3422 . o_nz - number of nonzero blocks per block row in the off-diagonal portion of local 3423 submatrix (same for all local rows). 3424 - o_nnz - array containing the number of nonzero blocks in the various block rows of the 3425 off-diagonal portion of the local submatrix (possibly different for 3426 each block row) or PETSC_NULL. 3427 3428 Output Parameter: 3429 . A - the matrix 3430 3431 Options Database Keys: 3432 + -mat_block_size - size of the blocks to use 3433 - -mat_use_hash_table <fact> 3434 3435 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 3436 MatXXXXSetPreallocation() paradgm instead of this routine directly. 3437 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 3438 3439 Notes: 3440 If the *_nnz parameter is given then the *_nz parameter is ignored 3441 3442 A nonzero block is any block that as 1 or more nonzeros in it 3443 3444 The user MUST specify either the local or global matrix dimensions 3445 (possibly both). 3446 3447 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor 3448 than it must be used on all processors that share the object for that argument. 3449 3450 Storage Information: 3451 For a square global matrix we define each processor's diagonal portion 3452 to be its local rows and the corresponding columns (a square submatrix); 3453 each processor's off-diagonal portion encompasses the remainder of the 3454 local matrix (a rectangular submatrix). 3455 3456 The user can specify preallocated storage for the diagonal part of 3457 the local submatrix with either d_nz or d_nnz (not both). Set 3458 d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic 3459 memory allocation. Likewise, specify preallocated storage for the 3460 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 3461 3462 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 3463 the figure below we depict these three local rows and all columns (0-11). 3464 3465 .vb 3466 0 1 2 3 4 5 6 7 8 9 10 11 3467 ------------------- 3468 row 3 | o o o d d d o o o o o o 3469 row 4 | o o o d d d o o o o o o 3470 row 5 | o o o d d d o o o o o o 3471 ------------------- 3472 .ve 3473 3474 Thus, any entries in the d locations are stored in the d (diagonal) 3475 submatrix, and any entries in the o locations are stored in the 3476 o (off-diagonal) submatrix. Note that the d and the o submatrices are 3477 stored simply in the MATSEQBAIJ format for compressed row storage. 3478 3479 Now d_nz should indicate the number of block nonzeros per row in the d matrix, 3480 and o_nz should indicate the number of block nonzeros per row in the o matrix. 3481 In general, for PDE problems in which most nonzeros are near the diagonal, 3482 one expects d_nz >> o_nz. For large problems you MUST preallocate memory 3483 or you will get TERRIBLE performance; see the users' manual chapter on 3484 matrices. 3485 3486 Level: intermediate 3487 3488 .keywords: matrix, block, aij, compressed row, sparse, parallel 3489 3490 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateBAIJ(), MatMPIBAIJSetPreallocation(), MatMPIBAIJSetPreallocationCSR() 3491 @*/ 3492 PetscErrorCode MatCreateBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 3493 { 3494 PetscErrorCode ierr; 3495 PetscMPIInt size; 3496 3497 PetscFunctionBegin; 3498 ierr = MatCreate(comm,A);CHKERRQ(ierr); 3499 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 3500 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3501 if (size > 1) { 3502 ierr = MatSetType(*A,MATMPIBAIJ);CHKERRQ(ierr); 3503 ierr = MatMPIBAIJSetPreallocation(*A,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 3504 } else { 3505 ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr); 3506 ierr = MatSeqBAIJSetPreallocation(*A,bs,d_nz,d_nnz);CHKERRQ(ierr); 3507 } 3508 PetscFunctionReturn(0); 3509 } 3510 3511 #undef __FUNCT__ 3512 #define __FUNCT__ "MatDuplicate_MPIBAIJ" 3513 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 3514 { 3515 Mat mat; 3516 Mat_MPIBAIJ *a,*oldmat = (Mat_MPIBAIJ*)matin->data; 3517 PetscErrorCode ierr; 3518 PetscInt len=0; 3519 3520 PetscFunctionBegin; 3521 *newmat = 0; 3522 ierr = MatCreate(((PetscObject)matin)->comm,&mat);CHKERRQ(ierr); 3523 ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr); 3524 ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr); 3525 ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 3526 3527 mat->factortype = matin->factortype; 3528 mat->preallocated = PETSC_TRUE; 3529 mat->assembled = PETSC_TRUE; 3530 mat->insertmode = NOT_SET_VALUES; 3531 3532 a = (Mat_MPIBAIJ*)mat->data; 3533 mat->rmap->bs = matin->rmap->bs; 3534 a->bs2 = oldmat->bs2; 3535 a->mbs = oldmat->mbs; 3536 a->nbs = oldmat->nbs; 3537 a->Mbs = oldmat->Mbs; 3538 a->Nbs = oldmat->Nbs; 3539 3540 ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr); 3541 ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr); 3542 3543 a->size = oldmat->size; 3544 a->rank = oldmat->rank; 3545 a->donotstash = oldmat->donotstash; 3546 a->roworiented = oldmat->roworiented; 3547 a->rowindices = 0; 3548 a->rowvalues = 0; 3549 a->getrowactive = PETSC_FALSE; 3550 a->barray = 0; 3551 a->rstartbs = oldmat->rstartbs; 3552 a->rendbs = oldmat->rendbs; 3553 a->cstartbs = oldmat->cstartbs; 3554 a->cendbs = oldmat->cendbs; 3555 3556 /* hash table stuff */ 3557 a->ht = 0; 3558 a->hd = 0; 3559 a->ht_size = 0; 3560 a->ht_flag = oldmat->ht_flag; 3561 a->ht_fact = oldmat->ht_fact; 3562 a->ht_total_ct = 0; 3563 a->ht_insert_ct = 0; 3564 3565 ierr = PetscMemcpy(a->rangebs,oldmat->rangebs,(a->size+1)*sizeof(PetscInt));CHKERRQ(ierr); 3566 if (oldmat->colmap) { 3567 #if defined (PETSC_USE_CTABLE) 3568 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 3569 #else 3570 ierr = PetscMalloc((a->Nbs)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr); 3571 ierr = PetscLogObjectMemory(mat,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr); 3572 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr); 3573 #endif 3574 } else a->colmap = 0; 3575 3576 if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) { 3577 ierr = PetscMalloc(len*sizeof(PetscInt),&a->garray);CHKERRQ(ierr); 3578 ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr); 3579 ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); 3580 } else a->garray = 0; 3581 3582 ierr = MatStashCreate_Private(((PetscObject)matin)->comm,matin->rmap->bs,&mat->bstash);CHKERRQ(ierr); 3583 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 3584 ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr); 3585 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 3586 ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr); 3587 3588 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 3589 ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr); 3590 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 3591 ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr); 3592 ierr = PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr); 3593 *newmat = mat; 3594 3595 PetscFunctionReturn(0); 3596 } 3597 3598 #undef __FUNCT__ 3599 #define __FUNCT__ "MatLoad_MPIBAIJ" 3600 PetscErrorCode MatLoad_MPIBAIJ(Mat newmat,PetscViewer viewer) 3601 { 3602 PetscErrorCode ierr; 3603 int fd; 3604 PetscInt i,nz,j,rstart,rend; 3605 PetscScalar *vals,*buf; 3606 MPI_Comm comm = ((PetscObject)viewer)->comm; 3607 MPI_Status status; 3608 PetscMPIInt rank,size,maxnz; 3609 PetscInt header[4],*rowlengths = 0,M,N,m,*rowners,*cols; 3610 PetscInt *locrowlens = PETSC_NULL,*procsnz = PETSC_NULL,*browners = PETSC_NULL; 3611 PetscInt jj,*mycols,*ibuf,bs=1,Mbs,mbs,extra_rows,mmax; 3612 PetscMPIInt tag = ((PetscObject)viewer)->tag; 3613 PetscInt *dlens = PETSC_NULL,*odlens = PETSC_NULL,*mask = PETSC_NULL,*masked1 = PETSC_NULL,*masked2 = PETSC_NULL,rowcount,odcount; 3614 PetscInt dcount,kmax,k,nzcount,tmp,mend,sizesset=1,grows,gcols; 3615 3616 PetscFunctionBegin; 3617 ierr = PetscOptionsBegin(comm,PETSC_NULL,"Options for loading MPIBAIJ matrix 2","Mat");CHKERRQ(ierr); 3618 ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,PETSC_NULL);CHKERRQ(ierr); 3619 ierr = PetscOptionsEnd();CHKERRQ(ierr); 3620 3621 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3622 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3623 if (!rank) { 3624 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 3625 ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr); 3626 if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 3627 } 3628 3629 if (newmat->rmap->n < 0 && newmat->rmap->N < 0 && newmat->cmap->n < 0 && newmat->cmap->N < 0) sizesset = 0; 3630 3631 ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr); 3632 M = header[1]; N = header[2]; 3633 3634 /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */ 3635 if (sizesset && newmat->rmap->N < 0) newmat->rmap->N = M; 3636 if (sizesset && newmat->cmap->N < 0) newmat->cmap->N = N; 3637 3638 /* If global sizes are set, check if they are consistent with that given in the file */ 3639 if (sizesset) { 3640 ierr = MatGetSize(newmat,&grows,&gcols);CHKERRQ(ierr); 3641 } 3642 if (sizesset && newmat->rmap->N != grows) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%d) and input matrix has (%d)",M,grows); 3643 if (sizesset && newmat->cmap->N != gcols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%d) and input matrix has (%d)",N,gcols); 3644 3645 if (M != N) SETERRQ(((PetscObject)viewer)->comm,PETSC_ERR_SUP,"Can only do square matrices"); 3646 3647 /* 3648 This code adds extra rows to make sure the number of rows is 3649 divisible by the blocksize 3650 */ 3651 Mbs = M/bs; 3652 extra_rows = bs - M + bs*Mbs; 3653 if (extra_rows == bs) extra_rows = 0; 3654 else Mbs++; 3655 if (extra_rows && !rank) { 3656 ierr = PetscInfo(viewer,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr); 3657 } 3658 3659 /* determine ownership of all rows */ 3660 if (newmat->rmap->n < 0) { /* PETSC_DECIDE */ 3661 mbs = Mbs/size + ((Mbs % size) > rank); 3662 m = mbs*bs; 3663 } else { /* User set */ 3664 m = newmat->rmap->n; 3665 mbs = m/bs; 3666 } 3667 ierr = PetscMalloc2(size+1,PetscInt,&rowners,size+1,PetscInt,&browners);CHKERRQ(ierr); 3668 ierr = MPI_Allgather(&mbs,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 3669 3670 /* process 0 needs enough room for process with most rows */ 3671 if (!rank) { 3672 mmax = rowners[1]; 3673 for (i=2; i<=size; i++) { 3674 mmax = PetscMax(mmax,rowners[i]); 3675 } 3676 mmax*=bs; 3677 } else mmax = m; 3678 3679 rowners[0] = 0; 3680 for (i=2; i<=size; i++) rowners[i] += rowners[i-1]; 3681 for (i=0; i<=size; i++) browners[i] = rowners[i]*bs; 3682 rstart = rowners[rank]; 3683 rend = rowners[rank+1]; 3684 3685 /* distribute row lengths to all processors */ 3686 ierr = PetscMalloc((mmax+1)*sizeof(PetscInt),&locrowlens);CHKERRQ(ierr); 3687 if (!rank) { 3688 mend = m; 3689 if (size == 1) mend = mend - extra_rows; 3690 ierr = PetscBinaryRead(fd,locrowlens,mend,PETSC_INT);CHKERRQ(ierr); 3691 for (j=mend; j<m; j++) locrowlens[j] = 1; 3692 ierr = PetscMalloc(m*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr); 3693 ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr); 3694 ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr); 3695 for (j=0; j<m; j++) { 3696 procsnz[0] += locrowlens[j]; 3697 } 3698 for (i=1; i<size; i++) { 3699 mend = browners[i+1] - browners[i]; 3700 if (i == size-1) mend = mend - extra_rows; 3701 ierr = PetscBinaryRead(fd,rowlengths,mend,PETSC_INT);CHKERRQ(ierr); 3702 for (j=mend; j<browners[i+1] - browners[i]; j++) rowlengths[j] = 1; 3703 /* calculate the number of nonzeros on each processor */ 3704 for (j=0; j<browners[i+1]-browners[i]; j++) { 3705 procsnz[i] += rowlengths[j]; 3706 } 3707 ierr = MPI_Send(rowlengths,browners[i+1]-browners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 3708 } 3709 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 3710 } else { 3711 ierr = MPI_Recv(locrowlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 3712 } 3713 3714 if (!rank) { 3715 /* determine max buffer needed and allocate it */ 3716 maxnz = procsnz[0]; 3717 for (i=1; i<size; i++) { 3718 maxnz = PetscMax(maxnz,procsnz[i]); 3719 } 3720 ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr); 3721 3722 /* read in my part of the matrix column indices */ 3723 nz = procsnz[0]; 3724 ierr = PetscMalloc((nz+1)*sizeof(PetscInt),&ibuf);CHKERRQ(ierr); 3725 mycols = ibuf; 3726 if (size == 1) nz -= extra_rows; 3727 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 3728 if (size == 1) for (i=0; i< extra_rows; i++) { mycols[nz+i] = M+i; } 3729 3730 /* read in every ones (except the last) and ship off */ 3731 for (i=1; i<size-1; i++) { 3732 nz = procsnz[i]; 3733 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 3734 ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 3735 } 3736 /* read in the stuff for the last proc */ 3737 if (size != 1) { 3738 nz = procsnz[size-1] - extra_rows; /* the extra rows are not on the disk */ 3739 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 3740 for (i=0; i<extra_rows; i++) cols[nz+i] = M+i; 3741 ierr = MPI_Send(cols,nz+extra_rows,MPIU_INT,size-1,tag,comm);CHKERRQ(ierr); 3742 } 3743 ierr = PetscFree(cols);CHKERRQ(ierr); 3744 } else { 3745 /* determine buffer space needed for message */ 3746 nz = 0; 3747 for (i=0; i<m; i++) { 3748 nz += locrowlens[i]; 3749 } 3750 ierr = PetscMalloc((nz+1)*sizeof(PetscInt),&ibuf);CHKERRQ(ierr); 3751 mycols = ibuf; 3752 /* receive message of column indices*/ 3753 ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 3754 ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr); 3755 if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 3756 } 3757 3758 /* loop over local rows, determining number of off diagonal entries */ 3759 ierr = PetscMalloc2(rend-rstart,PetscInt,&dlens,rend-rstart,PetscInt,&odlens);CHKERRQ(ierr); 3760 ierr = PetscMalloc3(Mbs,PetscInt,&mask,Mbs,PetscInt,&masked1,Mbs,PetscInt,&masked2);CHKERRQ(ierr); 3761 ierr = PetscMemzero(mask,Mbs*sizeof(PetscInt));CHKERRQ(ierr); 3762 ierr = PetscMemzero(masked1,Mbs*sizeof(PetscInt));CHKERRQ(ierr); 3763 ierr = PetscMemzero(masked2,Mbs*sizeof(PetscInt));CHKERRQ(ierr); 3764 rowcount = 0; nzcount = 0; 3765 for (i=0; i<mbs; i++) { 3766 dcount = 0; 3767 odcount = 0; 3768 for (j=0; j<bs; j++) { 3769 kmax = locrowlens[rowcount]; 3770 for (k=0; k<kmax; k++) { 3771 tmp = mycols[nzcount++]/bs; 3772 if (!mask[tmp]) { 3773 mask[tmp] = 1; 3774 if (tmp < rstart || tmp >= rend) masked2[odcount++] = tmp; 3775 else masked1[dcount++] = tmp; 3776 } 3777 } 3778 rowcount++; 3779 } 3780 3781 dlens[i] = dcount; 3782 odlens[i] = odcount; 3783 3784 /* zero out the mask elements we set */ 3785 for (j=0; j<dcount; j++) mask[masked1[j]] = 0; 3786 for (j=0; j<odcount; j++) mask[masked2[j]] = 0; 3787 } 3788 3789 3790 if (!sizesset) { 3791 ierr = MatSetSizes(newmat,m,m,M+extra_rows,N+extra_rows);CHKERRQ(ierr); 3792 } 3793 ierr = MatMPIBAIJSetPreallocation(newmat,bs,0,dlens,0,odlens);CHKERRQ(ierr); 3794 3795 if (!rank) { 3796 ierr = PetscMalloc((maxnz+1)*sizeof(PetscScalar),&buf);CHKERRQ(ierr); 3797 /* read in my part of the matrix numerical values */ 3798 nz = procsnz[0]; 3799 vals = buf; 3800 mycols = ibuf; 3801 if (size == 1) nz -= extra_rows; 3802 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3803 if (size == 1) for (i=0; i< extra_rows; i++) { vals[nz+i] = 1.0; } 3804 3805 /* insert into matrix */ 3806 jj = rstart*bs; 3807 for (i=0; i<m; i++) { 3808 ierr = MatSetValues_MPIBAIJ(newmat,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 3809 mycols += locrowlens[i]; 3810 vals += locrowlens[i]; 3811 jj++; 3812 } 3813 /* read in other processors (except the last one) and ship out */ 3814 for (i=1; i<size-1; i++) { 3815 nz = procsnz[i]; 3816 vals = buf; 3817 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3818 ierr = MPIULong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr); 3819 } 3820 /* the last proc */ 3821 if (size != 1) { 3822 nz = procsnz[i] - extra_rows; 3823 vals = buf; 3824 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3825 for (i=0; i<extra_rows; i++) vals[nz+i] = 1.0; 3826 ierr = MPIULong_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr); 3827 } 3828 ierr = PetscFree(procsnz);CHKERRQ(ierr); 3829 } else { 3830 /* receive numeric values */ 3831 ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&buf);CHKERRQ(ierr); 3832 3833 /* receive message of values*/ 3834 vals = buf; 3835 mycols = ibuf; 3836 ierr = MPIULong_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr); 3837 3838 /* insert into matrix */ 3839 jj = rstart*bs; 3840 for (i=0; i<m; i++) { 3841 ierr = MatSetValues_MPIBAIJ(newmat,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 3842 mycols += locrowlens[i]; 3843 vals += locrowlens[i]; 3844 jj++; 3845 } 3846 } 3847 ierr = PetscFree(locrowlens);CHKERRQ(ierr); 3848 ierr = PetscFree(buf);CHKERRQ(ierr); 3849 ierr = PetscFree(ibuf);CHKERRQ(ierr); 3850 ierr = PetscFree2(rowners,browners);CHKERRQ(ierr); 3851 ierr = PetscFree2(dlens,odlens);CHKERRQ(ierr); 3852 ierr = PetscFree3(mask,masked1,masked2);CHKERRQ(ierr); 3853 ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3854 ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3855 3856 PetscFunctionReturn(0); 3857 } 3858 3859 #undef __FUNCT__ 3860 #define __FUNCT__ "MatMPIBAIJSetHashTableFactor" 3861 /*@ 3862 MatMPIBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable. 3863 3864 Input Parameters: 3865 . mat - the matrix 3866 . fact - factor 3867 3868 Not Collective, each process can use a different factor 3869 3870 Level: advanced 3871 3872 Notes: 3873 This can also be set by the command line option: -mat_use_hash_table <fact> 3874 3875 .keywords: matrix, hashtable, factor, HT 3876 3877 .seealso: MatSetOption() 3878 @*/ 3879 PetscErrorCode MatMPIBAIJSetHashTableFactor(Mat mat,PetscReal fact) 3880 { 3881 PetscErrorCode ierr; 3882 3883 PetscFunctionBegin; 3884 ierr = PetscTryMethod(mat,"MatSetHashTableFactor_C",(Mat,PetscReal),(mat,fact));CHKERRQ(ierr); 3885 PetscFunctionReturn(0); 3886 } 3887 3888 EXTERN_C_BEGIN 3889 #undef __FUNCT__ 3890 #define __FUNCT__ "MatSetHashTableFactor_MPIBAIJ" 3891 PetscErrorCode MatSetHashTableFactor_MPIBAIJ(Mat mat,PetscReal fact) 3892 { 3893 Mat_MPIBAIJ *baij; 3894 3895 PetscFunctionBegin; 3896 baij = (Mat_MPIBAIJ*)mat->data; 3897 baij->ht_fact = fact; 3898 PetscFunctionReturn(0); 3899 } 3900 EXTERN_C_END 3901 3902 #undef __FUNCT__ 3903 #define __FUNCT__ "MatMPIBAIJGetSeqBAIJ" 3904 PetscErrorCode MatMPIBAIJGetSeqBAIJ(Mat A,Mat *Ad,Mat *Ao,const PetscInt *colmap[]) 3905 { 3906 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 3907 PetscFunctionBegin; 3908 *Ad = a->A; 3909 *Ao = a->B; 3910 *colmap = a->garray; 3911 PetscFunctionReturn(0); 3912 } 3913 3914 /* 3915 Special version for direct calls from Fortran (to eliminate two function call overheads 3916 */ 3917 #if defined(PETSC_HAVE_FORTRAN_CAPS) 3918 #define matmpibaijsetvaluesblocked_ MATMPIBAIJSETVALUESBLOCKED 3919 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 3920 #define matmpibaijsetvaluesblocked_ matmpibaijsetvaluesblocked 3921 #endif 3922 3923 #undef __FUNCT__ 3924 #define __FUNCT__ "matmpibiajsetvaluesblocked" 3925 /*@C 3926 MatMPIBAIJSetValuesBlocked - Direct Fortran call to replace call to MatSetValuesBlocked() 3927 3928 Collective on Mat 3929 3930 Input Parameters: 3931 + mat - the matrix 3932 . min - number of input rows 3933 . im - input rows 3934 . nin - number of input columns 3935 . in - input columns 3936 . v - numerical values input 3937 - addvin - INSERT_VALUES or ADD_VALUES 3938 3939 Notes: This has a complete copy of MatSetValuesBlocked_MPIBAIJ() which is terrible code un-reuse. 3940 3941 Level: advanced 3942 3943 .seealso: MatSetValuesBlocked() 3944 @*/ 3945 PetscErrorCode matmpibaijsetvaluesblocked_(Mat *matin,PetscInt *min,const PetscInt im[],PetscInt *nin,const PetscInt in[],const MatScalar v[],InsertMode *addvin) 3946 { 3947 /* convert input arguments to C version */ 3948 Mat mat = *matin; 3949 PetscInt m = *min, n = *nin; 3950 InsertMode addv = *addvin; 3951 3952 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 3953 const MatScalar *value; 3954 MatScalar *barray=baij->barray; 3955 PetscBool roworiented = baij->roworiented; 3956 PetscErrorCode ierr; 3957 PetscInt i,j,ii,jj,row,col,rstart=baij->rstartbs; 3958 PetscInt rend=baij->rendbs,cstart=baij->cstartbs,stepval; 3959 PetscInt cend=baij->cendbs,bs=mat->rmap->bs,bs2=baij->bs2; 3960 3961 PetscFunctionBegin; 3962 /* tasks normally handled by MatSetValuesBlocked() */ 3963 if (mat->insertmode == NOT_SET_VALUES) { 3964 mat->insertmode = addv; 3965 } 3966 #if defined(PETSC_USE_DEBUG) 3967 else if (mat->insertmode != addv) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values"); 3968 if (mat->factortype) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 3969 #endif 3970 if (mat->assembled) { 3971 mat->was_assembled = PETSC_TRUE; 3972 mat->assembled = PETSC_FALSE; 3973 } 3974 ierr = PetscLogEventBegin(MAT_SetValues,mat,0,0,0);CHKERRQ(ierr); 3975 3976 3977 if (!barray) { 3978 ierr = PetscMalloc(bs2*sizeof(MatScalar),&barray);CHKERRQ(ierr); 3979 baij->barray = barray; 3980 } 3981 3982 if (roworiented) { 3983 stepval = (n-1)*bs; 3984 } else { 3985 stepval = (m-1)*bs; 3986 } 3987 for (i=0; i<m; i++) { 3988 if (im[i] < 0) continue; 3989 #if defined(PETSC_USE_DEBUG) 3990 if (im[i] >= baij->Mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %D max %D",im[i],baij->Mbs-1); 3991 #endif 3992 if (im[i] >= rstart && im[i] < rend) { 3993 row = im[i] - rstart; 3994 for (j=0; j<n; j++) { 3995 /* If NumCol = 1 then a copy is not required */ 3996 if ((roworiented) && (n == 1)) { 3997 barray = (MatScalar*)v + i*bs2; 3998 } else if ((!roworiented) && (m == 1)) { 3999 barray = (MatScalar*)v + j*bs2; 4000 } else { /* Here a copy is required */ 4001 if (roworiented) { 4002 value = v + i*(stepval+bs)*bs + j*bs; 4003 } else { 4004 value = v + j*(stepval+bs)*bs + i*bs; 4005 } 4006 for (ii=0; ii<bs; ii++,value+=stepval) { 4007 for (jj=0; jj<bs; jj++) { 4008 *barray++ = *value++; 4009 } 4010 } 4011 barray -=bs2; 4012 } 4013 4014 if (in[j] >= cstart && in[j] < cend) { 4015 col = in[j] - cstart; 4016 ierr = MatSetValuesBlocked_SeqBAIJ(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 4017 } 4018 else if (in[j] < 0) continue; 4019 #if defined(PETSC_USE_DEBUG) 4020 else if (in[j] >= baij->Nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %D max %D",in[j],baij->Nbs-1); 4021 #endif 4022 else { 4023 if (mat->was_assembled) { 4024 if (!baij->colmap) { 4025 ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 4026 } 4027 4028 #if defined(PETSC_USE_DEBUG) 4029 #if defined (PETSC_USE_CTABLE) 4030 { PetscInt data; 4031 ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr); 4032 if ((data - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap"); 4033 } 4034 #else 4035 if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap"); 4036 #endif 4037 #endif 4038 #if defined (PETSC_USE_CTABLE) 4039 ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr); 4040 col = (col - 1)/bs; 4041 #else 4042 col = (baij->colmap[in[j]] - 1)/bs; 4043 #endif 4044 if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 4045 ierr = MatDisAssemble_MPIBAIJ(mat);CHKERRQ(ierr); 4046 col = in[j]; 4047 } 4048 } 4049 else col = in[j]; 4050 ierr = MatSetValuesBlocked_SeqBAIJ(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 4051 } 4052 } 4053 } else { 4054 if (!baij->donotstash) { 4055 if (roworiented) { 4056 ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 4057 } else { 4058 ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 4059 } 4060 } 4061 } 4062 } 4063 4064 /* task normally handled by MatSetValuesBlocked() */ 4065 ierr = PetscLogEventEnd(MAT_SetValues,mat,0,0,0);CHKERRQ(ierr); 4066 PetscFunctionReturn(0); 4067 } 4068 4069 #undef __FUNCT__ 4070 #define __FUNCT__ "MatCreateMPIBAIJWithArrays" 4071 /*@ 4072 MatCreateMPIBAIJWithArrays - creates a MPI BAIJ matrix using arrays that contain in standard 4073 CSR format the local rows. 4074 4075 Collective on MPI_Comm 4076 4077 Input Parameters: 4078 + comm - MPI communicator 4079 . bs - the block size, only a block size of 1 is supported 4080 . m - number of local rows (Cannot be PETSC_DECIDE) 4081 . n - This value should be the same as the local size used in creating the 4082 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 4083 calculated if N is given) For square matrices n is almost always m. 4084 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 4085 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 4086 . i - row indices 4087 . j - column indices 4088 - a - matrix values 4089 4090 Output Parameter: 4091 . mat - the matrix 4092 4093 Level: intermediate 4094 4095 Notes: 4096 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 4097 thus you CANNOT change the matrix entries by changing the values of a[] after you have 4098 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 4099 4100 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 4101 4102 .keywords: matrix, aij, compressed row, sparse, parallel 4103 4104 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 4105 MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays() 4106 @*/ 4107 PetscErrorCode MatCreateMPIBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat) 4108 { 4109 PetscErrorCode ierr; 4110 4111 4112 PetscFunctionBegin; 4113 if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 4114 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 4115 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 4116 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 4117 ierr = MatSetType(*mat,MATMPISBAIJ);CHKERRQ(ierr); 4118 ierr = MatMPIBAIJSetPreallocationCSR(*mat,bs,i,j,a);CHKERRQ(ierr); 4119 PetscFunctionReturn(0); 4120 } 4121