1 2 #include <../src/mat/impls/baij/mpi/mpibaij.h> /*I "petscmat.h" I*/ 3 4 #include <petscblaslapack.h> 5 #include <petscsf.h> 6 7 #undef __FUNCT__ 8 #define __FUNCT__ "MatGetRowMaxAbs_MPIBAIJ" 9 PetscErrorCode MatGetRowMaxAbs_MPIBAIJ(Mat A,Vec v,PetscInt idx[]) 10 { 11 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 12 PetscErrorCode ierr; 13 PetscInt i,*idxb = 0; 14 PetscScalar *va,*vb; 15 Vec vtmp; 16 17 PetscFunctionBegin; 18 ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr); 19 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 20 if (idx) { 21 for (i=0; i<A->rmap->n; i++) { 22 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 23 } 24 } 25 26 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 27 if (idx) {ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr);} 28 ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 29 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 30 31 for (i=0; i<A->rmap->n; i++) { 32 if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) { 33 va[i] = vb[i]; 34 if (idx) idx[i] = A->cmap->bs*a->garray[idxb[i]/A->cmap->bs] + (idxb[i] % A->cmap->bs); 35 } 36 } 37 38 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 39 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 40 ierr = PetscFree(idxb);CHKERRQ(ierr); 41 ierr = VecDestroy(&vtmp);CHKERRQ(ierr); 42 PetscFunctionReturn(0); 43 } 44 45 #undef __FUNCT__ 46 #define __FUNCT__ "MatStoreValues_MPIBAIJ" 47 PetscErrorCode MatStoreValues_MPIBAIJ(Mat mat) 48 { 49 Mat_MPIBAIJ *aij = (Mat_MPIBAIJ*)mat->data; 50 PetscErrorCode ierr; 51 52 PetscFunctionBegin; 53 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 54 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 55 PetscFunctionReturn(0); 56 } 57 58 #undef __FUNCT__ 59 #define __FUNCT__ "MatRetrieveValues_MPIBAIJ" 60 PetscErrorCode MatRetrieveValues_MPIBAIJ(Mat mat) 61 { 62 Mat_MPIBAIJ *aij = (Mat_MPIBAIJ*)mat->data; 63 PetscErrorCode ierr; 64 65 PetscFunctionBegin; 66 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 67 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 68 PetscFunctionReturn(0); 69 } 70 71 /* 72 Local utility routine that creates a mapping from the global column 73 number to the local number in the off-diagonal part of the local 74 storage of the matrix. This is done in a non scalable way since the 75 length of colmap equals the global matrix length. 76 */ 77 #undef __FUNCT__ 78 #define __FUNCT__ "MatCreateColmap_MPIBAIJ_Private" 79 PetscErrorCode MatCreateColmap_MPIBAIJ_Private(Mat mat) 80 { 81 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 82 Mat_SeqBAIJ *B = (Mat_SeqBAIJ*)baij->B->data; 83 PetscErrorCode ierr; 84 PetscInt nbs = B->nbs,i,bs=mat->rmap->bs; 85 86 PetscFunctionBegin; 87 #if defined(PETSC_USE_CTABLE) 88 ierr = PetscTableCreate(baij->nbs,baij->Nbs+1,&baij->colmap);CHKERRQ(ierr); 89 for (i=0; i<nbs; i++) { 90 ierr = PetscTableAdd(baij->colmap,baij->garray[i]+1,i*bs+1,INSERT_VALUES);CHKERRQ(ierr); 91 } 92 #else 93 ierr = PetscMalloc1(baij->Nbs+1,&baij->colmap);CHKERRQ(ierr); 94 ierr = PetscLogObjectMemory((PetscObject)mat,baij->Nbs*sizeof(PetscInt));CHKERRQ(ierr); 95 ierr = PetscMemzero(baij->colmap,baij->Nbs*sizeof(PetscInt));CHKERRQ(ierr); 96 for (i=0; i<nbs; i++) baij->colmap[baij->garray[i]] = i*bs+1; 97 #endif 98 PetscFunctionReturn(0); 99 } 100 101 #define MatSetValues_SeqBAIJ_A_Private(row,col,value,addv) \ 102 { \ 103 \ 104 brow = row/bs; \ 105 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; \ 106 rmax = aimax[brow]; nrow = ailen[brow]; \ 107 bcol = col/bs; \ 108 ridx = row % bs; cidx = col % bs; \ 109 low = 0; high = nrow; \ 110 while (high-low > 3) { \ 111 t = (low+high)/2; \ 112 if (rp[t] > bcol) high = t; \ 113 else low = t; \ 114 } \ 115 for (_i=low; _i<high; _i++) { \ 116 if (rp[_i] > bcol) break; \ 117 if (rp[_i] == bcol) { \ 118 bap = ap + bs2*_i + bs*cidx + ridx; \ 119 if (addv == ADD_VALUES) *bap += value; \ 120 else *bap = value; \ 121 goto a_noinsert; \ 122 } \ 123 } \ 124 if (a->nonew == 1) goto a_noinsert; \ 125 if (a->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 126 MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,aimax,a->nonew,MatScalar); \ 127 N = nrow++ - 1; \ 128 /* shift up all the later entries in this row */ \ 129 for (ii=N; ii>=_i; ii--) { \ 130 rp[ii+1] = rp[ii]; \ 131 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 132 } \ 133 if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr); } \ 134 rp[_i] = bcol; \ 135 ap[bs2*_i + bs*cidx + ridx] = value; \ 136 a_noinsert:; \ 137 ailen[brow] = nrow; \ 138 } 139 140 #define MatSetValues_SeqBAIJ_B_Private(row,col,value,addv) \ 141 { \ 142 brow = row/bs; \ 143 rp = bj + bi[brow]; ap = ba + bs2*bi[brow]; \ 144 rmax = bimax[brow]; nrow = bilen[brow]; \ 145 bcol = col/bs; \ 146 ridx = row % bs; cidx = col % bs; \ 147 low = 0; high = nrow; \ 148 while (high-low > 3) { \ 149 t = (low+high)/2; \ 150 if (rp[t] > bcol) high = t; \ 151 else low = t; \ 152 } \ 153 for (_i=low; _i<high; _i++) { \ 154 if (rp[_i] > bcol) break; \ 155 if (rp[_i] == bcol) { \ 156 bap = ap + bs2*_i + bs*cidx + ridx; \ 157 if (addv == ADD_VALUES) *bap += value; \ 158 else *bap = value; \ 159 goto b_noinsert; \ 160 } \ 161 } \ 162 if (b->nonew == 1) goto b_noinsert; \ 163 if (b->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 164 MatSeqXAIJReallocateAIJ(B,b->mbs,bs2,nrow,brow,bcol,rmax,ba,bi,bj,rp,ap,bimax,b->nonew,MatScalar); \ 165 N = nrow++ - 1; \ 166 /* shift up all the later entries in this row */ \ 167 for (ii=N; ii>=_i; ii--) { \ 168 rp[ii+1] = rp[ii]; \ 169 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 170 } \ 171 if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr);} \ 172 rp[_i] = bcol; \ 173 ap[bs2*_i + bs*cidx + ridx] = value; \ 174 b_noinsert:; \ 175 bilen[brow] = nrow; \ 176 } 177 178 #undef __FUNCT__ 179 #define __FUNCT__ "MatSetValues_MPIBAIJ" 180 PetscErrorCode MatSetValues_MPIBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 181 { 182 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 183 MatScalar value; 184 PetscBool roworiented = baij->roworiented; 185 PetscErrorCode ierr; 186 PetscInt i,j,row,col; 187 PetscInt rstart_orig=mat->rmap->rstart; 188 PetscInt rend_orig =mat->rmap->rend,cstart_orig=mat->cmap->rstart; 189 PetscInt cend_orig =mat->cmap->rend,bs=mat->rmap->bs; 190 191 /* Some Variables required in the macro */ 192 Mat A = baij->A; 193 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)(A)->data; 194 PetscInt *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j; 195 MatScalar *aa =a->a; 196 197 Mat B = baij->B; 198 Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)(B)->data; 199 PetscInt *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j; 200 MatScalar *ba =b->a; 201 202 PetscInt *rp,ii,nrow,_i,rmax,N,brow,bcol; 203 PetscInt low,high,t,ridx,cidx,bs2=a->bs2; 204 MatScalar *ap,*bap; 205 206 PetscFunctionBegin; 207 for (i=0; i<m; i++) { 208 if (im[i] < 0) continue; 209 #if defined(PETSC_USE_DEBUG) 210 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 211 #endif 212 if (im[i] >= rstart_orig && im[i] < rend_orig) { 213 row = im[i] - rstart_orig; 214 for (j=0; j<n; j++) { 215 if (in[j] >= cstart_orig && in[j] < cend_orig) { 216 col = in[j] - cstart_orig; 217 if (roworiented) value = v[i*n+j]; 218 else value = v[i+j*m]; 219 MatSetValues_SeqBAIJ_A_Private(row,col,value,addv); 220 /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 221 } else if (in[j] < 0) continue; 222 #if defined(PETSC_USE_DEBUG) 223 else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); 224 #endif 225 else { 226 if (mat->was_assembled) { 227 if (!baij->colmap) { 228 ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 229 } 230 #if defined(PETSC_USE_CTABLE) 231 ierr = PetscTableFind(baij->colmap,in[j]/bs + 1,&col);CHKERRQ(ierr); 232 col = col - 1; 233 #else 234 col = baij->colmap[in[j]/bs] - 1; 235 #endif 236 if (col < 0 && !((Mat_SeqBAIJ*)(baij->B->data))->nonew) { 237 ierr = MatDisAssemble_MPIBAIJ(mat);CHKERRQ(ierr); 238 col = in[j]; 239 /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */ 240 B = baij->B; 241 b = (Mat_SeqBAIJ*)(B)->data; 242 bimax=b->imax;bi=b->i;bilen=b->ilen;bj=b->j; 243 ba =b->a; 244 } else if (col < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", im[i], in[j]); 245 else col += in[j]%bs; 246 } else col = in[j]; 247 if (roworiented) value = v[i*n+j]; 248 else value = v[i+j*m]; 249 MatSetValues_SeqBAIJ_B_Private(row,col,value,addv); 250 /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 251 } 252 } 253 } else { 254 if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]); 255 if (!baij->donotstash) { 256 mat->assembled = PETSC_FALSE; 257 if (roworiented) { 258 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,PETSC_FALSE);CHKERRQ(ierr); 259 } else { 260 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,PETSC_FALSE);CHKERRQ(ierr); 261 } 262 } 263 } 264 } 265 PetscFunctionReturn(0); 266 } 267 268 #undef __FUNCT__ 269 #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ" 270 PetscErrorCode MatSetValuesBlocked_MPIBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 271 { 272 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 273 const PetscScalar *value; 274 MatScalar *barray = baij->barray; 275 PetscBool roworiented = baij->roworiented; 276 PetscErrorCode ierr; 277 PetscInt i,j,ii,jj,row,col,rstart=baij->rstartbs; 278 PetscInt rend=baij->rendbs,cstart=baij->cstartbs,stepval; 279 PetscInt cend=baij->cendbs,bs=mat->rmap->bs,bs2=baij->bs2; 280 281 PetscFunctionBegin; 282 if (!barray) { 283 ierr = PetscMalloc1(bs2,&barray);CHKERRQ(ierr); 284 baij->barray = barray; 285 } 286 287 if (roworiented) stepval = (n-1)*bs; 288 else stepval = (m-1)*bs; 289 290 for (i=0; i<m; i++) { 291 if (im[i] < 0) continue; 292 #if defined(PETSC_USE_DEBUG) 293 if (im[i] >= baij->Mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %D max %D",im[i],baij->Mbs-1); 294 #endif 295 if (im[i] >= rstart && im[i] < rend) { 296 row = im[i] - rstart; 297 for (j=0; j<n; j++) { 298 /* If NumCol = 1 then a copy is not required */ 299 if ((roworiented) && (n == 1)) { 300 barray = (MatScalar*)v + i*bs2; 301 } else if ((!roworiented) && (m == 1)) { 302 barray = (MatScalar*)v + j*bs2; 303 } else { /* Here a copy is required */ 304 if (roworiented) { 305 value = v + (i*(stepval+bs) + j)*bs; 306 } else { 307 value = v + (j*(stepval+bs) + i)*bs; 308 } 309 for (ii=0; ii<bs; ii++,value+=bs+stepval) { 310 for (jj=0; jj<bs; jj++) barray[jj] = value[jj]; 311 barray += bs; 312 } 313 barray -= bs2; 314 } 315 316 if (in[j] >= cstart && in[j] < cend) { 317 col = in[j] - cstart; 318 ierr = MatSetValuesBlocked_SeqBAIJ(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 319 } else if (in[j] < 0) continue; 320 #if defined(PETSC_USE_DEBUG) 321 else if (in[j] >= baij->Nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %D max %D",in[j],baij->Nbs-1); 322 #endif 323 else { 324 if (mat->was_assembled) { 325 if (!baij->colmap) { 326 ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 327 } 328 329 #if defined(PETSC_USE_DEBUG) 330 #if defined(PETSC_USE_CTABLE) 331 { PetscInt data; 332 ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr); 333 if ((data - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap"); 334 } 335 #else 336 if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap"); 337 #endif 338 #endif 339 #if defined(PETSC_USE_CTABLE) 340 ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr); 341 col = (col - 1)/bs; 342 #else 343 col = (baij->colmap[in[j]] - 1)/bs; 344 #endif 345 if (col < 0 && !((Mat_SeqBAIJ*)(baij->B->data))->nonew) { 346 ierr = MatDisAssemble_MPIBAIJ(mat);CHKERRQ(ierr); 347 col = in[j]; 348 } else if (col < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", bs*im[i], bs*in[j]); 349 } else col = in[j]; 350 ierr = MatSetValuesBlocked_SeqBAIJ(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 351 } 352 } 353 } else { 354 if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]); 355 if (!baij->donotstash) { 356 if (roworiented) { 357 ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 358 } else { 359 ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 360 } 361 } 362 } 363 } 364 PetscFunctionReturn(0); 365 } 366 367 #define HASH_KEY 0.6180339887 368 #define HASH(size,key,tmp) (tmp = (key)*HASH_KEY,(PetscInt)((size)*(tmp-(PetscInt)tmp))) 369 /* #define HASH(size,key) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */ 370 /* #define HASH(size,key,tmp) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */ 371 #undef __FUNCT__ 372 #define __FUNCT__ "MatSetValues_MPIBAIJ_HT" 373 PetscErrorCode MatSetValues_MPIBAIJ_HT(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 374 { 375 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 376 PetscBool roworiented = baij->roworiented; 377 PetscErrorCode ierr; 378 PetscInt i,j,row,col; 379 PetscInt rstart_orig=mat->rmap->rstart; 380 PetscInt rend_orig =mat->rmap->rend,Nbs=baij->Nbs; 381 PetscInt h1,key,size=baij->ht_size,bs=mat->rmap->bs,*HT=baij->ht,idx; 382 PetscReal tmp; 383 MatScalar **HD = baij->hd,value; 384 #if defined(PETSC_USE_DEBUG) 385 PetscInt total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct; 386 #endif 387 388 PetscFunctionBegin; 389 for (i=0; i<m; i++) { 390 #if defined(PETSC_USE_DEBUG) 391 if (im[i] < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); 392 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 393 #endif 394 row = im[i]; 395 if (row >= rstart_orig && row < rend_orig) { 396 for (j=0; j<n; j++) { 397 col = in[j]; 398 if (roworiented) value = v[i*n+j]; 399 else value = v[i+j*m]; 400 /* Look up PetscInto the Hash Table */ 401 key = (row/bs)*Nbs+(col/bs)+1; 402 h1 = HASH(size,key,tmp); 403 404 405 idx = h1; 406 #if defined(PETSC_USE_DEBUG) 407 insert_ct++; 408 total_ct++; 409 if (HT[idx] != key) { 410 for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++) ; 411 if (idx == size) { 412 for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++) ; 413 if (idx == h1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col); 414 } 415 } 416 #else 417 if (HT[idx] != key) { 418 for (idx=h1; (idx<size) && (HT[idx]!=key); idx++) ; 419 if (idx == size) { 420 for (idx=0; (idx<h1) && (HT[idx]!=key); idx++) ; 421 if (idx == h1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col); 422 } 423 } 424 #endif 425 /* A HASH table entry is found, so insert the values at the correct address */ 426 if (addv == ADD_VALUES) *(HD[idx]+ (col % bs)*bs + (row % bs)) += value; 427 else *(HD[idx]+ (col % bs)*bs + (row % bs)) = value; 428 } 429 } else if (!baij->donotstash) { 430 if (roworiented) { 431 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,PETSC_FALSE);CHKERRQ(ierr); 432 } else { 433 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,PETSC_FALSE);CHKERRQ(ierr); 434 } 435 } 436 } 437 #if defined(PETSC_USE_DEBUG) 438 baij->ht_total_ct = total_ct; 439 baij->ht_insert_ct = insert_ct; 440 #endif 441 PetscFunctionReturn(0); 442 } 443 444 #undef __FUNCT__ 445 #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ_HT" 446 PetscErrorCode MatSetValuesBlocked_MPIBAIJ_HT(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 447 { 448 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 449 PetscBool roworiented = baij->roworiented; 450 PetscErrorCode ierr; 451 PetscInt i,j,ii,jj,row,col; 452 PetscInt rstart=baij->rstartbs; 453 PetscInt rend =mat->rmap->rend,stepval,bs=mat->rmap->bs,bs2=baij->bs2,nbs2=n*bs2; 454 PetscInt h1,key,size=baij->ht_size,idx,*HT=baij->ht,Nbs=baij->Nbs; 455 PetscReal tmp; 456 MatScalar **HD = baij->hd,*baij_a; 457 const PetscScalar *v_t,*value; 458 #if defined(PETSC_USE_DEBUG) 459 PetscInt total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct; 460 #endif 461 462 PetscFunctionBegin; 463 if (roworiented) stepval = (n-1)*bs; 464 else stepval = (m-1)*bs; 465 466 for (i=0; i<m; i++) { 467 #if defined(PETSC_USE_DEBUG) 468 if (im[i] < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",im[i]); 469 if (im[i] >= baij->Mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],baij->Mbs-1); 470 #endif 471 row = im[i]; 472 v_t = v + i*nbs2; 473 if (row >= rstart && row < rend) { 474 for (j=0; j<n; j++) { 475 col = in[j]; 476 477 /* Look up into the Hash Table */ 478 key = row*Nbs+col+1; 479 h1 = HASH(size,key,tmp); 480 481 idx = h1; 482 #if defined(PETSC_USE_DEBUG) 483 total_ct++; 484 insert_ct++; 485 if (HT[idx] != key) { 486 for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++) ; 487 if (idx == size) { 488 for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++) ; 489 if (idx == h1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col); 490 } 491 } 492 #else 493 if (HT[idx] != key) { 494 for (idx=h1; (idx<size) && (HT[idx]!=key); idx++) ; 495 if (idx == size) { 496 for (idx=0; (idx<h1) && (HT[idx]!=key); idx++) ; 497 if (idx == h1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col); 498 } 499 } 500 #endif 501 baij_a = HD[idx]; 502 if (roworiented) { 503 /*value = v + i*(stepval+bs)*bs + j*bs;*/ 504 /* value = v + (i*(stepval+bs)+j)*bs; */ 505 value = v_t; 506 v_t += bs; 507 if (addv == ADD_VALUES) { 508 for (ii=0; ii<bs; ii++,value+=stepval) { 509 for (jj=ii; jj<bs2; jj+=bs) { 510 baij_a[jj] += *value++; 511 } 512 } 513 } else { 514 for (ii=0; ii<bs; ii++,value+=stepval) { 515 for (jj=ii; jj<bs2; jj+=bs) { 516 baij_a[jj] = *value++; 517 } 518 } 519 } 520 } else { 521 value = v + j*(stepval+bs)*bs + i*bs; 522 if (addv == ADD_VALUES) { 523 for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) { 524 for (jj=0; jj<bs; jj++) { 525 baij_a[jj] += *value++; 526 } 527 } 528 } else { 529 for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) { 530 for (jj=0; jj<bs; jj++) { 531 baij_a[jj] = *value++; 532 } 533 } 534 } 535 } 536 } 537 } else { 538 if (!baij->donotstash) { 539 if (roworiented) { 540 ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 541 } else { 542 ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 543 } 544 } 545 } 546 } 547 #if defined(PETSC_USE_DEBUG) 548 baij->ht_total_ct = total_ct; 549 baij->ht_insert_ct = insert_ct; 550 #endif 551 PetscFunctionReturn(0); 552 } 553 554 #undef __FUNCT__ 555 #define __FUNCT__ "MatGetValues_MPIBAIJ" 556 PetscErrorCode MatGetValues_MPIBAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 557 { 558 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 559 PetscErrorCode ierr; 560 PetscInt bs = mat->rmap->bs,i,j,bsrstart = mat->rmap->rstart,bsrend = mat->rmap->rend; 561 PetscInt bscstart = mat->cmap->rstart,bscend = mat->cmap->rend,row,col,data; 562 563 PetscFunctionBegin; 564 for (i=0; i<m; i++) { 565 if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/ 566 if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1); 567 if (idxm[i] >= bsrstart && idxm[i] < bsrend) { 568 row = idxm[i] - bsrstart; 569 for (j=0; j<n; j++) { 570 if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */ 571 if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1); 572 if (idxn[j] >= bscstart && idxn[j] < bscend) { 573 col = idxn[j] - bscstart; 574 ierr = MatGetValues_SeqBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 575 } else { 576 if (!baij->colmap) { 577 ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 578 } 579 #if defined(PETSC_USE_CTABLE) 580 ierr = PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr); 581 data--; 582 #else 583 data = baij->colmap[idxn[j]/bs]-1; 584 #endif 585 if ((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0; 586 else { 587 col = data + idxn[j]%bs; 588 ierr = MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 589 } 590 } 591 } 592 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported"); 593 } 594 PetscFunctionReturn(0); 595 } 596 597 #undef __FUNCT__ 598 #define __FUNCT__ "MatNorm_MPIBAIJ" 599 PetscErrorCode MatNorm_MPIBAIJ(Mat mat,NormType type,PetscReal *nrm) 600 { 601 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 602 Mat_SeqBAIJ *amat = (Mat_SeqBAIJ*)baij->A->data,*bmat = (Mat_SeqBAIJ*)baij->B->data; 603 PetscErrorCode ierr; 604 PetscInt i,j,bs2=baij->bs2,bs=baij->A->rmap->bs,nz,row,col; 605 PetscReal sum = 0.0; 606 MatScalar *v; 607 608 PetscFunctionBegin; 609 if (baij->size == 1) { 610 ierr = MatNorm(baij->A,type,nrm);CHKERRQ(ierr); 611 } else { 612 if (type == NORM_FROBENIUS) { 613 v = amat->a; 614 nz = amat->nz*bs2; 615 for (i=0; i<nz; i++) { 616 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 617 } 618 v = bmat->a; 619 nz = bmat->nz*bs2; 620 for (i=0; i<nz; i++) { 621 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 622 } 623 ierr = MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 624 *nrm = PetscSqrtReal(*nrm); 625 } else if (type == NORM_1) { /* max column sum */ 626 PetscReal *tmp,*tmp2; 627 PetscInt *jj,*garray=baij->garray,cstart=baij->rstartbs; 628 ierr = PetscMalloc2(mat->cmap->N,&tmp,mat->cmap->N,&tmp2);CHKERRQ(ierr); 629 ierr = PetscMemzero(tmp,mat->cmap->N*sizeof(PetscReal));CHKERRQ(ierr); 630 v = amat->a; jj = amat->j; 631 for (i=0; i<amat->nz; i++) { 632 for (j=0; j<bs; j++) { 633 col = bs*(cstart + *jj) + j; /* column index */ 634 for (row=0; row<bs; row++) { 635 tmp[col] += PetscAbsScalar(*v); v++; 636 } 637 } 638 jj++; 639 } 640 v = bmat->a; jj = bmat->j; 641 for (i=0; i<bmat->nz; i++) { 642 for (j=0; j<bs; j++) { 643 col = bs*garray[*jj] + j; 644 for (row=0; row<bs; row++) { 645 tmp[col] += PetscAbsScalar(*v); v++; 646 } 647 } 648 jj++; 649 } 650 ierr = MPI_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 651 *nrm = 0.0; 652 for (j=0; j<mat->cmap->N; j++) { 653 if (tmp2[j] > *nrm) *nrm = tmp2[j]; 654 } 655 ierr = PetscFree2(tmp,tmp2);CHKERRQ(ierr); 656 } else if (type == NORM_INFINITY) { /* max row sum */ 657 PetscReal *sums; 658 ierr = PetscMalloc1(bs,&sums);CHKERRQ(ierr); 659 sum = 0.0; 660 for (j=0; j<amat->mbs; j++) { 661 for (row=0; row<bs; row++) sums[row] = 0.0; 662 v = amat->a + bs2*amat->i[j]; 663 nz = amat->i[j+1]-amat->i[j]; 664 for (i=0; i<nz; i++) { 665 for (col=0; col<bs; col++) { 666 for (row=0; row<bs; row++) { 667 sums[row] += PetscAbsScalar(*v); v++; 668 } 669 } 670 } 671 v = bmat->a + bs2*bmat->i[j]; 672 nz = bmat->i[j+1]-bmat->i[j]; 673 for (i=0; i<nz; i++) { 674 for (col=0; col<bs; col++) { 675 for (row=0; row<bs; row++) { 676 sums[row] += PetscAbsScalar(*v); v++; 677 } 678 } 679 } 680 for (row=0; row<bs; row++) { 681 if (sums[row] > sum) sum = sums[row]; 682 } 683 } 684 ierr = MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 685 ierr = PetscFree(sums);CHKERRQ(ierr); 686 } else SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"No support for this norm yet"); 687 } 688 PetscFunctionReturn(0); 689 } 690 691 /* 692 Creates the hash table, and sets the table 693 This table is created only once. 694 If new entried need to be added to the matrix 695 then the hash table has to be destroyed and 696 recreated. 697 */ 698 #undef __FUNCT__ 699 #define __FUNCT__ "MatCreateHashTable_MPIBAIJ_Private" 700 PetscErrorCode MatCreateHashTable_MPIBAIJ_Private(Mat mat,PetscReal factor) 701 { 702 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 703 Mat A = baij->A,B=baij->B; 704 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data,*b=(Mat_SeqBAIJ*)B->data; 705 PetscInt i,j,k,nz=a->nz+b->nz,h1,*ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j; 706 PetscErrorCode ierr; 707 PetscInt ht_size,bs2=baij->bs2,rstart=baij->rstartbs; 708 PetscInt cstart=baij->cstartbs,*garray=baij->garray,row,col,Nbs=baij->Nbs; 709 PetscInt *HT,key; 710 MatScalar **HD; 711 PetscReal tmp; 712 #if defined(PETSC_USE_INFO) 713 PetscInt ct=0,max=0; 714 #endif 715 716 PetscFunctionBegin; 717 if (baij->ht) PetscFunctionReturn(0); 718 719 baij->ht_size = (PetscInt)(factor*nz); 720 ht_size = baij->ht_size; 721 722 /* Allocate Memory for Hash Table */ 723 ierr = PetscCalloc2(ht_size,&baij->hd,ht_size,&baij->ht);CHKERRQ(ierr); 724 HD = baij->hd; 725 HT = baij->ht; 726 727 /* Loop Over A */ 728 for (i=0; i<a->mbs; i++) { 729 for (j=ai[i]; j<ai[i+1]; j++) { 730 row = i+rstart; 731 col = aj[j]+cstart; 732 733 key = row*Nbs + col + 1; 734 h1 = HASH(ht_size,key,tmp); 735 for (k=0; k<ht_size; k++) { 736 if (!HT[(h1+k)%ht_size]) { 737 HT[(h1+k)%ht_size] = key; 738 HD[(h1+k)%ht_size] = a->a + j*bs2; 739 break; 740 #if defined(PETSC_USE_INFO) 741 } else { 742 ct++; 743 #endif 744 } 745 } 746 #if defined(PETSC_USE_INFO) 747 if (k> max) max = k; 748 #endif 749 } 750 } 751 /* Loop Over B */ 752 for (i=0; i<b->mbs; i++) { 753 for (j=bi[i]; j<bi[i+1]; j++) { 754 row = i+rstart; 755 col = garray[bj[j]]; 756 key = row*Nbs + col + 1; 757 h1 = HASH(ht_size,key,tmp); 758 for (k=0; k<ht_size; k++) { 759 if (!HT[(h1+k)%ht_size]) { 760 HT[(h1+k)%ht_size] = key; 761 HD[(h1+k)%ht_size] = b->a + j*bs2; 762 break; 763 #if defined(PETSC_USE_INFO) 764 } else { 765 ct++; 766 #endif 767 } 768 } 769 #if defined(PETSC_USE_INFO) 770 if (k> max) max = k; 771 #endif 772 } 773 } 774 775 /* Print Summary */ 776 #if defined(PETSC_USE_INFO) 777 for (i=0,j=0; i<ht_size; i++) { 778 if (HT[i]) j++; 779 } 780 ierr = PetscInfo2(mat,"Average Search = %5.2f,max search = %D\n",(!j)? 0.0:((PetscReal)(ct+j))/j,max);CHKERRQ(ierr); 781 #endif 782 PetscFunctionReturn(0); 783 } 784 785 #undef __FUNCT__ 786 #define __FUNCT__ "MatAssemblyBegin_MPIBAIJ" 787 PetscErrorCode MatAssemblyBegin_MPIBAIJ(Mat mat,MatAssemblyType mode) 788 { 789 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 790 PetscErrorCode ierr; 791 PetscInt nstash,reallocs; 792 InsertMode addv; 793 794 PetscFunctionBegin; 795 if (baij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0); 796 797 /* make sure all processors are either in INSERTMODE or ADDMODE */ 798 ierr = MPI_Allreduce((PetscEnum*)&mat->insertmode,(PetscEnum*)&addv,1,MPIU_ENUM,MPI_BOR,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 799 if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added"); 800 mat->insertmode = addv; /* in case this processor had no cache */ 801 802 ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr); 803 ierr = MatStashScatterBegin_Private(mat,&mat->bstash,baij->rangebs);CHKERRQ(ierr); 804 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 805 ierr = PetscInfo2(mat,"Stash has %D entries,uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 806 ierr = MatStashGetInfo_Private(&mat->bstash,&nstash,&reallocs);CHKERRQ(ierr); 807 ierr = PetscInfo2(mat,"Block-Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 808 PetscFunctionReturn(0); 809 } 810 811 #undef __FUNCT__ 812 #define __FUNCT__ "MatAssemblyEnd_MPIBAIJ" 813 PetscErrorCode MatAssemblyEnd_MPIBAIJ(Mat mat,MatAssemblyType mode) 814 { 815 Mat_MPIBAIJ *baij=(Mat_MPIBAIJ*)mat->data; 816 Mat_SeqBAIJ *a =(Mat_SeqBAIJ*)baij->A->data; 817 PetscErrorCode ierr; 818 PetscInt i,j,rstart,ncols,flg,bs2=baij->bs2; 819 PetscInt *row,*col; 820 PetscBool r1,r2,r3,other_disassembled; 821 MatScalar *val; 822 InsertMode addv = mat->insertmode; 823 PetscMPIInt n; 824 825 PetscFunctionBegin; 826 /* do not use 'b=(Mat_SeqBAIJ*)baij->B->data' as B can be reset in disassembly */ 827 if (!baij->donotstash && !mat->nooffprocentries) { 828 while (1) { 829 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 830 if (!flg) break; 831 832 for (i=0; i<n;) { 833 /* Now identify the consecutive vals belonging to the same row */ 834 for (j=i,rstart=row[j]; j<n; j++) { 835 if (row[j] != rstart) break; 836 } 837 if (j < n) ncols = j-i; 838 else ncols = n-i; 839 /* Now assemble all these values with a single function call */ 840 ierr = MatSetValues_MPIBAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 841 i = j; 842 } 843 } 844 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 845 /* Now process the block-stash. Since the values are stashed column-oriented, 846 set the roworiented flag to column oriented, and after MatSetValues() 847 restore the original flags */ 848 r1 = baij->roworiented; 849 r2 = a->roworiented; 850 r3 = ((Mat_SeqBAIJ*)baij->B->data)->roworiented; 851 852 baij->roworiented = PETSC_FALSE; 853 a->roworiented = PETSC_FALSE; 854 855 (((Mat_SeqBAIJ*)baij->B->data))->roworiented = PETSC_FALSE; /* b->roworiented */ 856 while (1) { 857 ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 858 if (!flg) break; 859 860 for (i=0; i<n;) { 861 /* Now identify the consecutive vals belonging to the same row */ 862 for (j=i,rstart=row[j]; j<n; j++) { 863 if (row[j] != rstart) break; 864 } 865 if (j < n) ncols = j-i; 866 else ncols = n-i; 867 ierr = MatSetValuesBlocked_MPIBAIJ(mat,1,row+i,ncols,col+i,val+i*bs2,addv);CHKERRQ(ierr); 868 i = j; 869 } 870 } 871 ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr); 872 873 baij->roworiented = r1; 874 a->roworiented = r2; 875 876 ((Mat_SeqBAIJ*)baij->B->data)->roworiented = r3; /* b->roworiented */ 877 } 878 879 ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr); 880 ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr); 881 882 /* determine if any processor has disassembled, if so we must 883 also disassemble ourselfs, in order that we may reassemble. */ 884 /* 885 if nonzero structure of submatrix B cannot change then we know that 886 no processor disassembled thus we can skip this stuff 887 */ 888 if (!((Mat_SeqBAIJ*)baij->B->data)->nonew) { 889 ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 890 if (mat->was_assembled && !other_disassembled) { 891 ierr = MatDisAssemble_MPIBAIJ(mat);CHKERRQ(ierr); 892 } 893 } 894 895 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 896 ierr = MatSetUpMultiply_MPIBAIJ(mat);CHKERRQ(ierr); 897 } 898 ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr); 899 ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr); 900 901 #if defined(PETSC_USE_INFO) 902 if (baij->ht && mode== MAT_FINAL_ASSEMBLY) { 903 ierr = PetscInfo1(mat,"Average Hash Table Search in MatSetValues = %5.2f\n",((PetscReal)baij->ht_total_ct)/baij->ht_insert_ct);CHKERRQ(ierr); 904 905 baij->ht_total_ct = 0; 906 baij->ht_insert_ct = 0; 907 } 908 #endif 909 if (baij->ht_flag && !baij->ht && mode == MAT_FINAL_ASSEMBLY) { 910 ierr = MatCreateHashTable_MPIBAIJ_Private(mat,baij->ht_fact);CHKERRQ(ierr); 911 912 mat->ops->setvalues = MatSetValues_MPIBAIJ_HT; 913 mat->ops->setvaluesblocked = MatSetValuesBlocked_MPIBAIJ_HT; 914 } 915 916 ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr); 917 918 baij->rowvalues = 0; 919 920 /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */ 921 if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 922 PetscObjectState state = baij->A->nonzerostate + baij->B->nonzerostate; 923 ierr = MPI_Allreduce(&state,&mat->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 924 } 925 PetscFunctionReturn(0); 926 } 927 928 extern PetscErrorCode MatView_SeqBAIJ(Mat,PetscViewer); 929 #include <petscdraw.h> 930 #undef __FUNCT__ 931 #define __FUNCT__ "MatView_MPIBAIJ_ASCIIorDraworSocket" 932 static PetscErrorCode MatView_MPIBAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 933 { 934 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 935 PetscErrorCode ierr; 936 PetscMPIInt rank = baij->rank; 937 PetscInt bs = mat->rmap->bs; 938 PetscBool iascii,isdraw; 939 PetscViewer sviewer; 940 PetscViewerFormat format; 941 942 PetscFunctionBegin; 943 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 944 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 945 if (iascii) { 946 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 947 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 948 MatInfo info; 949 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 950 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 951 ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_TRUE);CHKERRQ(ierr); 952 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D bs %D mem %D\n", 953 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,mat->rmap->bs,(PetscInt)info.memory);CHKERRQ(ierr); 954 ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 955 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 956 ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 957 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 958 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 959 ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_FALSE);CHKERRQ(ierr); 960 ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr); 961 ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr); 962 PetscFunctionReturn(0); 963 } else if (format == PETSC_VIEWER_ASCII_INFO) { 964 ierr = PetscViewerASCIIPrintf(viewer," block size is %D\n",bs);CHKERRQ(ierr); 965 PetscFunctionReturn(0); 966 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 967 PetscFunctionReturn(0); 968 } 969 } 970 971 if (isdraw) { 972 PetscDraw draw; 973 PetscBool isnull; 974 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 975 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 976 } 977 978 { 979 /* assemble the entire matrix onto first processor. */ 980 Mat A; 981 Mat_SeqBAIJ *Aloc; 982 PetscInt M = mat->rmap->N,N = mat->cmap->N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs; 983 MatScalar *a; 984 const char *matname; 985 986 /* Here we are creating a temporary matrix, so will assume MPIBAIJ is acceptable */ 987 /* Perhaps this should be the type of mat? */ 988 ierr = MatCreate(PetscObjectComm((PetscObject)mat),&A);CHKERRQ(ierr); 989 if (!rank) { 990 ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr); 991 } else { 992 ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr); 993 } 994 ierr = MatSetType(A,MATMPIBAIJ);CHKERRQ(ierr); 995 ierr = MatMPIBAIJSetPreallocation(A,mat->rmap->bs,0,NULL,0,NULL);CHKERRQ(ierr); 996 ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 997 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)A);CHKERRQ(ierr); 998 999 /* copy over the A part */ 1000 Aloc = (Mat_SeqBAIJ*)baij->A->data; 1001 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1002 ierr = PetscMalloc1(bs,&rvals);CHKERRQ(ierr); 1003 1004 for (i=0; i<mbs; i++) { 1005 rvals[0] = bs*(baij->rstartbs + i); 1006 for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1; 1007 for (j=ai[i]; j<ai[i+1]; j++) { 1008 col = (baij->cstartbs+aj[j])*bs; 1009 for (k=0; k<bs; k++) { 1010 ierr = MatSetValues_MPIBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 1011 col++; a += bs; 1012 } 1013 } 1014 } 1015 /* copy over the B part */ 1016 Aloc = (Mat_SeqBAIJ*)baij->B->data; 1017 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1018 for (i=0; i<mbs; i++) { 1019 rvals[0] = bs*(baij->rstartbs + i); 1020 for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1; 1021 for (j=ai[i]; j<ai[i+1]; j++) { 1022 col = baij->garray[aj[j]]*bs; 1023 for (k=0; k<bs; k++) { 1024 ierr = MatSetValues_MPIBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 1025 col++; a += bs; 1026 } 1027 } 1028 } 1029 ierr = PetscFree(rvals);CHKERRQ(ierr); 1030 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1031 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1032 /* 1033 Everyone has to call to draw the matrix since the graphics waits are 1034 synchronized across all processors that share the PetscDraw object 1035 */ 1036 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 1037 ierr = PetscObjectGetName((PetscObject)mat,&matname);CHKERRQ(ierr); 1038 if (!rank) { 1039 ierr = PetscObjectSetName((PetscObject)((Mat_MPIBAIJ*)(A->data))->A,matname);CHKERRQ(ierr); 1040 ierr = MatView_SeqBAIJ(((Mat_MPIBAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 1041 } 1042 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 1043 ierr = MatDestroy(&A);CHKERRQ(ierr); 1044 } 1045 PetscFunctionReturn(0); 1046 } 1047 1048 #undef __FUNCT__ 1049 #define __FUNCT__ "MatView_MPIBAIJ_Binary" 1050 static PetscErrorCode MatView_MPIBAIJ_Binary(Mat mat,PetscViewer viewer) 1051 { 1052 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)mat->data; 1053 Mat_SeqBAIJ *A = (Mat_SeqBAIJ*)a->A->data; 1054 Mat_SeqBAIJ *B = (Mat_SeqBAIJ*)a->B->data; 1055 PetscErrorCode ierr; 1056 PetscInt i,*row_lens,*crow_lens,bs = mat->rmap->bs,j,k,bs2=a->bs2,header[4],nz,rlen; 1057 PetscInt *range=0,nzmax,*column_indices,cnt,col,*garray = a->garray,cstart = mat->cmap->rstart/bs,len,pcnt,l,ll; 1058 int fd; 1059 PetscScalar *column_values; 1060 FILE *file; 1061 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 1062 PetscInt message_count,flowcontrolcount; 1063 1064 PetscFunctionBegin; 1065 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 1066 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);CHKERRQ(ierr); 1067 nz = bs2*(A->nz + B->nz); 1068 rlen = mat->rmap->n; 1069 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1070 if (!rank) { 1071 header[0] = MAT_FILE_CLASSID; 1072 header[1] = mat->rmap->N; 1073 header[2] = mat->cmap->N; 1074 1075 ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1076 ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1077 /* get largest number of rows any processor has */ 1078 range = mat->rmap->range; 1079 for (i=1; i<size; i++) { 1080 rlen = PetscMax(rlen,range[i+1] - range[i]); 1081 } 1082 } else { 1083 ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1084 } 1085 1086 ierr = PetscMalloc1(rlen/bs,&crow_lens);CHKERRQ(ierr); 1087 /* compute lengths of each row */ 1088 for (i=0; i<a->mbs; i++) { 1089 crow_lens[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i]; 1090 } 1091 /* store the row lengths to the file */ 1092 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1093 if (!rank) { 1094 MPI_Status status; 1095 ierr = PetscMalloc1(rlen,&row_lens);CHKERRQ(ierr); 1096 rlen = (range[1] - range[0])/bs; 1097 for (i=0; i<rlen; i++) { 1098 for (j=0; j<bs; j++) { 1099 row_lens[i*bs+j] = bs*crow_lens[i]; 1100 } 1101 } 1102 ierr = PetscBinaryWrite(fd,row_lens,bs*rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1103 for (i=1; i<size; i++) { 1104 rlen = (range[i+1] - range[i])/bs; 1105 ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr); 1106 ierr = MPI_Recv(crow_lens,rlen,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr); 1107 for (k=0; k<rlen; k++) { 1108 for (j=0; j<bs; j++) { 1109 row_lens[k*bs+j] = bs*crow_lens[k]; 1110 } 1111 } 1112 ierr = PetscBinaryWrite(fd,row_lens,bs*rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1113 } 1114 ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr); 1115 ierr = PetscFree(row_lens);CHKERRQ(ierr); 1116 } else { 1117 ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr); 1118 ierr = MPI_Send(crow_lens,mat->rmap->n/bs,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1119 ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr); 1120 } 1121 ierr = PetscFree(crow_lens);CHKERRQ(ierr); 1122 1123 /* load up the local column indices. Include for all rows not just one for each block row since process 0 does not have the 1124 information needed to make it for each row from a block row. This does require more communication but still not more than 1125 the communication needed for the nonzero values */ 1126 nzmax = nz; /* space a largest processor needs */ 1127 ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1128 ierr = PetscMalloc1(nzmax,&column_indices);CHKERRQ(ierr); 1129 cnt = 0; 1130 for (i=0; i<a->mbs; i++) { 1131 pcnt = cnt; 1132 for (j=B->i[i]; j<B->i[i+1]; j++) { 1133 if ((col = garray[B->j[j]]) > cstart) break; 1134 for (l=0; l<bs; l++) { 1135 column_indices[cnt++] = bs*col+l; 1136 } 1137 } 1138 for (k=A->i[i]; k<A->i[i+1]; k++) { 1139 for (l=0; l<bs; l++) { 1140 column_indices[cnt++] = bs*(A->j[k] + cstart)+l; 1141 } 1142 } 1143 for (; j<B->i[i+1]; j++) { 1144 for (l=0; l<bs; l++) { 1145 column_indices[cnt++] = bs*garray[B->j[j]]+l; 1146 } 1147 } 1148 len = cnt - pcnt; 1149 for (k=1; k<bs; k++) { 1150 ierr = PetscMemcpy(&column_indices[cnt],&column_indices[pcnt],len*sizeof(PetscInt));CHKERRQ(ierr); 1151 cnt += len; 1152 } 1153 } 1154 if (cnt != nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,nz); 1155 1156 /* store the columns to the file */ 1157 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1158 if (!rank) { 1159 MPI_Status status; 1160 ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1161 for (i=1; i<size; i++) { 1162 ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr); 1163 ierr = MPI_Recv(&cnt,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr); 1164 ierr = MPI_Recv(column_indices,cnt,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr); 1165 ierr = PetscBinaryWrite(fd,column_indices,cnt,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1166 } 1167 ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr); 1168 } else { 1169 ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr); 1170 ierr = MPI_Send(&cnt,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1171 ierr = MPI_Send(column_indices,cnt,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1172 ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr); 1173 } 1174 ierr = PetscFree(column_indices);CHKERRQ(ierr); 1175 1176 /* load up the numerical values */ 1177 ierr = PetscMalloc1(nzmax,&column_values);CHKERRQ(ierr); 1178 cnt = 0; 1179 for (i=0; i<a->mbs; i++) { 1180 rlen = bs*(B->i[i+1] - B->i[i] + A->i[i+1] - A->i[i]); 1181 for (j=B->i[i]; j<B->i[i+1]; j++) { 1182 if (garray[B->j[j]] > cstart) break; 1183 for (l=0; l<bs; l++) { 1184 for (ll=0; ll<bs; ll++) { 1185 column_values[cnt + l*rlen + ll] = B->a[bs2*j+l+bs*ll]; 1186 } 1187 } 1188 cnt += bs; 1189 } 1190 for (k=A->i[i]; k<A->i[i+1]; k++) { 1191 for (l=0; l<bs; l++) { 1192 for (ll=0; ll<bs; ll++) { 1193 column_values[cnt + l*rlen + ll] = A->a[bs2*k+l+bs*ll]; 1194 } 1195 } 1196 cnt += bs; 1197 } 1198 for (; j<B->i[i+1]; j++) { 1199 for (l=0; l<bs; l++) { 1200 for (ll=0; ll<bs; ll++) { 1201 column_values[cnt + l*rlen + ll] = B->a[bs2*j+l+bs*ll]; 1202 } 1203 } 1204 cnt += bs; 1205 } 1206 cnt += (bs-1)*rlen; 1207 } 1208 if (cnt != nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,nz); 1209 1210 /* store the column values to the file */ 1211 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1212 if (!rank) { 1213 MPI_Status status; 1214 ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1215 for (i=1; i<size; i++) { 1216 ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr); 1217 ierr = MPI_Recv(&cnt,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr); 1218 ierr = MPI_Recv(column_values,cnt,MPIU_SCALAR,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr); 1219 ierr = PetscBinaryWrite(fd,column_values,cnt,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1220 } 1221 ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr); 1222 } else { 1223 ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr); 1224 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1225 ierr = MPI_Send(column_values,nz,MPIU_SCALAR,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1226 ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr); 1227 } 1228 ierr = PetscFree(column_values);CHKERRQ(ierr); 1229 1230 ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr); 1231 if (file) { 1232 fprintf(file,"-matload_block_size %d\n",(int)mat->rmap->bs); 1233 } 1234 PetscFunctionReturn(0); 1235 } 1236 1237 #undef __FUNCT__ 1238 #define __FUNCT__ "MatView_MPIBAIJ" 1239 PetscErrorCode MatView_MPIBAIJ(Mat mat,PetscViewer viewer) 1240 { 1241 PetscErrorCode ierr; 1242 PetscBool iascii,isdraw,issocket,isbinary; 1243 1244 PetscFunctionBegin; 1245 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1246 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1247 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr); 1248 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1249 if (iascii || isdraw || issocket) { 1250 ierr = MatView_MPIBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 1251 } else if (isbinary) { 1252 ierr = MatView_MPIBAIJ_Binary(mat,viewer);CHKERRQ(ierr); 1253 } 1254 PetscFunctionReturn(0); 1255 } 1256 1257 #undef __FUNCT__ 1258 #define __FUNCT__ "MatDestroy_MPIBAIJ" 1259 PetscErrorCode MatDestroy_MPIBAIJ(Mat mat) 1260 { 1261 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 1262 PetscErrorCode ierr; 1263 1264 PetscFunctionBegin; 1265 #if defined(PETSC_USE_LOG) 1266 PetscLogObjectState((PetscObject)mat,"Rows=%D,Cols=%D",mat->rmap->N,mat->cmap->N); 1267 #endif 1268 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 1269 ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr); 1270 ierr = MatDestroy(&baij->A);CHKERRQ(ierr); 1271 ierr = MatDestroy(&baij->B);CHKERRQ(ierr); 1272 #if defined(PETSC_USE_CTABLE) 1273 ierr = PetscTableDestroy(&baij->colmap);CHKERRQ(ierr); 1274 #else 1275 ierr = PetscFree(baij->colmap);CHKERRQ(ierr); 1276 #endif 1277 ierr = PetscFree(baij->garray);CHKERRQ(ierr); 1278 ierr = VecDestroy(&baij->lvec);CHKERRQ(ierr); 1279 ierr = VecScatterDestroy(&baij->Mvctx);CHKERRQ(ierr); 1280 ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr); 1281 ierr = PetscFree(baij->barray);CHKERRQ(ierr); 1282 ierr = PetscFree2(baij->hd,baij->ht);CHKERRQ(ierr); 1283 ierr = PetscFree(baij->rangebs);CHKERRQ(ierr); 1284 ierr = PetscFree(mat->data);CHKERRQ(ierr); 1285 1286 ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr); 1287 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL);CHKERRQ(ierr); 1288 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL);CHKERRQ(ierr); 1289 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C",NULL);CHKERRQ(ierr); 1290 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIBAIJSetPreallocation_C",NULL);CHKERRQ(ierr); 1291 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIBAIJSetPreallocationCSR_C",NULL);CHKERRQ(ierr); 1292 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C",NULL);CHKERRQ(ierr); 1293 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatSetHashTableFactor_C",NULL);CHKERRQ(ierr); 1294 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpibaij_mpisbaij_C",NULL);CHKERRQ(ierr); 1295 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpibaij_mpibstrm_C",NULL);CHKERRQ(ierr); 1296 PetscFunctionReturn(0); 1297 } 1298 1299 #undef __FUNCT__ 1300 #define __FUNCT__ "MatMult_MPIBAIJ" 1301 PetscErrorCode MatMult_MPIBAIJ(Mat A,Vec xx,Vec yy) 1302 { 1303 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1304 PetscErrorCode ierr; 1305 PetscInt nt; 1306 1307 PetscFunctionBegin; 1308 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 1309 if (nt != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx"); 1310 ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr); 1311 if (nt != A->rmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy"); 1312 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1313 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 1314 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1315 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 1316 PetscFunctionReturn(0); 1317 } 1318 1319 #undef __FUNCT__ 1320 #define __FUNCT__ "MatMultAdd_MPIBAIJ" 1321 PetscErrorCode MatMultAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1322 { 1323 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1324 PetscErrorCode ierr; 1325 1326 PetscFunctionBegin; 1327 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1328 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1329 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1330 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 1331 PetscFunctionReturn(0); 1332 } 1333 1334 #undef __FUNCT__ 1335 #define __FUNCT__ "MatMultTranspose_MPIBAIJ" 1336 PetscErrorCode MatMultTranspose_MPIBAIJ(Mat A,Vec xx,Vec yy) 1337 { 1338 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1339 PetscErrorCode ierr; 1340 PetscBool merged; 1341 1342 PetscFunctionBegin; 1343 ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr); 1344 /* do nondiagonal part */ 1345 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1346 if (!merged) { 1347 /* send it on its way */ 1348 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1349 /* do local part */ 1350 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1351 /* receive remote parts: note this assumes the values are not actually */ 1352 /* inserted in yy until the next line */ 1353 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1354 } else { 1355 /* do local part */ 1356 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1357 /* send it on its way */ 1358 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1359 /* values actually were received in the Begin() but we need to call this nop */ 1360 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1361 } 1362 PetscFunctionReturn(0); 1363 } 1364 1365 #undef __FUNCT__ 1366 #define __FUNCT__ "MatMultTransposeAdd_MPIBAIJ" 1367 PetscErrorCode MatMultTransposeAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1368 { 1369 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1370 PetscErrorCode ierr; 1371 1372 PetscFunctionBegin; 1373 /* do nondiagonal part */ 1374 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1375 /* send it on its way */ 1376 ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1377 /* do local part */ 1378 ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1379 /* receive remote parts: note this assumes the values are not actually */ 1380 /* inserted in yy until the next line, which is true for my implementation*/ 1381 /* but is not perhaps always true. */ 1382 ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1383 PetscFunctionReturn(0); 1384 } 1385 1386 /* 1387 This only works correctly for square matrices where the subblock A->A is the 1388 diagonal block 1389 */ 1390 #undef __FUNCT__ 1391 #define __FUNCT__ "MatGetDiagonal_MPIBAIJ" 1392 PetscErrorCode MatGetDiagonal_MPIBAIJ(Mat A,Vec v) 1393 { 1394 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1395 PetscErrorCode ierr; 1396 1397 PetscFunctionBegin; 1398 if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); 1399 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 1400 PetscFunctionReturn(0); 1401 } 1402 1403 #undef __FUNCT__ 1404 #define __FUNCT__ "MatScale_MPIBAIJ" 1405 PetscErrorCode MatScale_MPIBAIJ(Mat A,PetscScalar aa) 1406 { 1407 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1408 PetscErrorCode ierr; 1409 1410 PetscFunctionBegin; 1411 ierr = MatScale(a->A,aa);CHKERRQ(ierr); 1412 ierr = MatScale(a->B,aa);CHKERRQ(ierr); 1413 PetscFunctionReturn(0); 1414 } 1415 1416 #undef __FUNCT__ 1417 #define __FUNCT__ "MatGetRow_MPIBAIJ" 1418 PetscErrorCode MatGetRow_MPIBAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1419 { 1420 Mat_MPIBAIJ *mat = (Mat_MPIBAIJ*)matin->data; 1421 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1422 PetscErrorCode ierr; 1423 PetscInt bs = matin->rmap->bs,bs2 = mat->bs2,i,*cworkA,*cworkB,**pcA,**pcB; 1424 PetscInt nztot,nzA,nzB,lrow,brstart = matin->rmap->rstart,brend = matin->rmap->rend; 1425 PetscInt *cmap,*idx_p,cstart = mat->cstartbs; 1426 1427 PetscFunctionBegin; 1428 if (row < brstart || row >= brend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local rows"); 1429 if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1430 mat->getrowactive = PETSC_TRUE; 1431 1432 if (!mat->rowvalues && (idx || v)) { 1433 /* 1434 allocate enough space to hold information from the longest row. 1435 */ 1436 Mat_SeqBAIJ *Aa = (Mat_SeqBAIJ*)mat->A->data,*Ba = (Mat_SeqBAIJ*)mat->B->data; 1437 PetscInt max = 1,mbs = mat->mbs,tmp; 1438 for (i=0; i<mbs; i++) { 1439 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1440 if (max < tmp) max = tmp; 1441 } 1442 ierr = PetscMalloc2(max*bs2,&mat->rowvalues,max*bs2,&mat->rowindices);CHKERRQ(ierr); 1443 } 1444 lrow = row - brstart; 1445 1446 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1447 if (!v) {pvA = 0; pvB = 0;} 1448 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1449 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1450 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1451 nztot = nzA + nzB; 1452 1453 cmap = mat->garray; 1454 if (v || idx) { 1455 if (nztot) { 1456 /* Sort by increasing column numbers, assuming A and B already sorted */ 1457 PetscInt imark = -1; 1458 if (v) { 1459 *v = v_p = mat->rowvalues; 1460 for (i=0; i<nzB; i++) { 1461 if (cmap[cworkB[i]/bs] < cstart) v_p[i] = vworkB[i]; 1462 else break; 1463 } 1464 imark = i; 1465 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1466 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1467 } 1468 if (idx) { 1469 *idx = idx_p = mat->rowindices; 1470 if (imark > -1) { 1471 for (i=0; i<imark; i++) { 1472 idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs; 1473 } 1474 } else { 1475 for (i=0; i<nzB; i++) { 1476 if (cmap[cworkB[i]/bs] < cstart) idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs; 1477 else break; 1478 } 1479 imark = i; 1480 } 1481 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart*bs + cworkA[i]; 1482 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ; 1483 } 1484 } else { 1485 if (idx) *idx = 0; 1486 if (v) *v = 0; 1487 } 1488 } 1489 *nz = nztot; 1490 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1491 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1492 PetscFunctionReturn(0); 1493 } 1494 1495 #undef __FUNCT__ 1496 #define __FUNCT__ "MatRestoreRow_MPIBAIJ" 1497 PetscErrorCode MatRestoreRow_MPIBAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1498 { 1499 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 1500 1501 PetscFunctionBegin; 1502 if (!baij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow not called"); 1503 baij->getrowactive = PETSC_FALSE; 1504 PetscFunctionReturn(0); 1505 } 1506 1507 #undef __FUNCT__ 1508 #define __FUNCT__ "MatZeroEntries_MPIBAIJ" 1509 PetscErrorCode MatZeroEntries_MPIBAIJ(Mat A) 1510 { 1511 Mat_MPIBAIJ *l = (Mat_MPIBAIJ*)A->data; 1512 PetscErrorCode ierr; 1513 1514 PetscFunctionBegin; 1515 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 1516 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 1517 PetscFunctionReturn(0); 1518 } 1519 1520 #undef __FUNCT__ 1521 #define __FUNCT__ "MatGetInfo_MPIBAIJ" 1522 PetscErrorCode MatGetInfo_MPIBAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1523 { 1524 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)matin->data; 1525 Mat A = a->A,B = a->B; 1526 PetscErrorCode ierr; 1527 PetscReal isend[5],irecv[5]; 1528 1529 PetscFunctionBegin; 1530 info->block_size = (PetscReal)matin->rmap->bs; 1531 1532 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1533 1534 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1535 isend[3] = info->memory; isend[4] = info->mallocs; 1536 1537 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1538 1539 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1540 isend[3] += info->memory; isend[4] += info->mallocs; 1541 1542 if (flag == MAT_LOCAL) { 1543 info->nz_used = isend[0]; 1544 info->nz_allocated = isend[1]; 1545 info->nz_unneeded = isend[2]; 1546 info->memory = isend[3]; 1547 info->mallocs = isend[4]; 1548 } else if (flag == MAT_GLOBAL_MAX) { 1549 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr); 1550 1551 info->nz_used = irecv[0]; 1552 info->nz_allocated = irecv[1]; 1553 info->nz_unneeded = irecv[2]; 1554 info->memory = irecv[3]; 1555 info->mallocs = irecv[4]; 1556 } else if (flag == MAT_GLOBAL_SUM) { 1557 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr); 1558 1559 info->nz_used = irecv[0]; 1560 info->nz_allocated = irecv[1]; 1561 info->nz_unneeded = irecv[2]; 1562 info->memory = irecv[3]; 1563 info->mallocs = irecv[4]; 1564 } else SETERRQ1(PetscObjectComm((PetscObject)matin),PETSC_ERR_ARG_WRONG,"Unknown MatInfoType argument %d",(int)flag); 1565 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1566 info->fill_ratio_needed = 0; 1567 info->factor_mallocs = 0; 1568 PetscFunctionReturn(0); 1569 } 1570 1571 #undef __FUNCT__ 1572 #define __FUNCT__ "MatSetOption_MPIBAIJ" 1573 PetscErrorCode MatSetOption_MPIBAIJ(Mat A,MatOption op,PetscBool flg) 1574 { 1575 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1576 PetscErrorCode ierr; 1577 1578 PetscFunctionBegin; 1579 switch (op) { 1580 case MAT_NEW_NONZERO_LOCATIONS: 1581 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1582 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1583 case MAT_KEEP_NONZERO_PATTERN: 1584 case MAT_NEW_NONZERO_LOCATION_ERR: 1585 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1586 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1587 break; 1588 case MAT_ROW_ORIENTED: 1589 a->roworiented = flg; 1590 1591 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1592 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1593 break; 1594 case MAT_NEW_DIAGONALS: 1595 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1596 break; 1597 case MAT_IGNORE_OFF_PROC_ENTRIES: 1598 a->donotstash = flg; 1599 break; 1600 case MAT_USE_HASH_TABLE: 1601 a->ht_flag = flg; 1602 break; 1603 case MAT_SYMMETRIC: 1604 case MAT_STRUCTURALLY_SYMMETRIC: 1605 case MAT_HERMITIAN: 1606 case MAT_SYMMETRY_ETERNAL: 1607 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1608 break; 1609 default: 1610 SETERRQ1(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"unknown option %d",op); 1611 } 1612 PetscFunctionReturn(0); 1613 } 1614 1615 #undef __FUNCT__ 1616 #define __FUNCT__ "MatTranspose_MPIBAIJ" 1617 PetscErrorCode MatTranspose_MPIBAIJ(Mat A,MatReuse reuse,Mat *matout) 1618 { 1619 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)A->data; 1620 Mat_SeqBAIJ *Aloc; 1621 Mat B; 1622 PetscErrorCode ierr; 1623 PetscInt M =A->rmap->N,N=A->cmap->N,*ai,*aj,i,*rvals,j,k,col; 1624 PetscInt bs=A->rmap->bs,mbs=baij->mbs; 1625 MatScalar *a; 1626 1627 PetscFunctionBegin; 1628 if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_SIZ,"Square matrix only for in-place"); 1629 if (reuse == MAT_INITIAL_MATRIX || *matout == A) { 1630 ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr); 1631 ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr); 1632 ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr); 1633 /* Do not know preallocation information, but must set block size */ 1634 ierr = MatMPIBAIJSetPreallocation(B,A->rmap->bs,PETSC_DECIDE,NULL,PETSC_DECIDE,NULL);CHKERRQ(ierr); 1635 } else { 1636 B = *matout; 1637 } 1638 1639 /* copy over the A part */ 1640 Aloc = (Mat_SeqBAIJ*)baij->A->data; 1641 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1642 ierr = PetscMalloc1(bs,&rvals);CHKERRQ(ierr); 1643 1644 for (i=0; i<mbs; i++) { 1645 rvals[0] = bs*(baij->rstartbs + i); 1646 for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1; 1647 for (j=ai[i]; j<ai[i+1]; j++) { 1648 col = (baij->cstartbs+aj[j])*bs; 1649 for (k=0; k<bs; k++) { 1650 ierr = MatSetValues_MPIBAIJ(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr); 1651 1652 col++; a += bs; 1653 } 1654 } 1655 } 1656 /* copy over the B part */ 1657 Aloc = (Mat_SeqBAIJ*)baij->B->data; 1658 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1659 for (i=0; i<mbs; i++) { 1660 rvals[0] = bs*(baij->rstartbs + i); 1661 for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1; 1662 for (j=ai[i]; j<ai[i+1]; j++) { 1663 col = baij->garray[aj[j]]*bs; 1664 for (k=0; k<bs; k++) { 1665 ierr = MatSetValues_MPIBAIJ(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr); 1666 col++; 1667 a += bs; 1668 } 1669 } 1670 } 1671 ierr = PetscFree(rvals);CHKERRQ(ierr); 1672 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1673 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1674 1675 if (reuse == MAT_INITIAL_MATRIX || *matout != A) *matout = B; 1676 else { 1677 ierr = MatHeaderMerge(A,B);CHKERRQ(ierr); 1678 } 1679 PetscFunctionReturn(0); 1680 } 1681 1682 #undef __FUNCT__ 1683 #define __FUNCT__ "MatDiagonalScale_MPIBAIJ" 1684 PetscErrorCode MatDiagonalScale_MPIBAIJ(Mat mat,Vec ll,Vec rr) 1685 { 1686 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 1687 Mat a = baij->A,b = baij->B; 1688 PetscErrorCode ierr; 1689 PetscInt s1,s2,s3; 1690 1691 PetscFunctionBegin; 1692 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 1693 if (rr) { 1694 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 1695 if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 1696 /* Overlap communication with computation. */ 1697 ierr = VecScatterBegin(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1698 } 1699 if (ll) { 1700 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 1701 if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 1702 ierr = (*b->ops->diagonalscale)(b,ll,NULL);CHKERRQ(ierr); 1703 } 1704 /* scale the diagonal block */ 1705 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 1706 1707 if (rr) { 1708 /* Do a scatter end and then right scale the off-diagonal block */ 1709 ierr = VecScatterEnd(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1710 ierr = (*b->ops->diagonalscale)(b,NULL,baij->lvec);CHKERRQ(ierr); 1711 } 1712 PetscFunctionReturn(0); 1713 } 1714 1715 #undef __FUNCT__ 1716 #define __FUNCT__ "MatZeroRows_MPIBAIJ" 1717 PetscErrorCode MatZeroRows_MPIBAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 1718 { 1719 Mat_MPIBAIJ *l = (Mat_MPIBAIJ *) A->data; 1720 PetscInt *owners = A->rmap->range; 1721 PetscInt n = A->rmap->n; 1722 PetscSF sf; 1723 PetscInt *lrows; 1724 PetscSFNode *rrows; 1725 PetscInt r, p = 0, len = 0; 1726 PetscErrorCode ierr; 1727 1728 PetscFunctionBegin; 1729 /* Create SF where leaves are input rows and roots are owned rows */ 1730 ierr = PetscMalloc1(n, &lrows);CHKERRQ(ierr); 1731 for (r = 0; r < n; ++r) lrows[r] = -1; 1732 if (!A->nooffproczerorows) {ierr = PetscMalloc1(N, &rrows);CHKERRQ(ierr);} 1733 for (r = 0; r < N; ++r) { 1734 const PetscInt idx = rows[r]; 1735 if (idx < 0 || A->rmap->N <= idx) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range [0,%D)",idx,A->rmap->N); 1736 if (idx < owners[p] || owners[p+1] <= idx) { /* short-circuit the search if the last p owns this row too */ 1737 ierr = PetscLayoutFindOwner(A->rmap,idx,&p);CHKERRQ(ierr); 1738 } 1739 if (A->nooffproczerorows) { 1740 if (p != l->rank) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"MAT_NO_OFF_PROC_ZERO_ROWS set, but row %D is not owned by rank %d",idx,l->rank); 1741 lrows[len++] = idx - owners[p]; 1742 } else { 1743 rrows[r].rank = p; 1744 rrows[r].index = rows[r] - owners[p]; 1745 } 1746 } 1747 if (!A->nooffproczerorows) { 1748 ierr = PetscSFCreate(PetscObjectComm((PetscObject) A), &sf);CHKERRQ(ierr); 1749 ierr = PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER);CHKERRQ(ierr); 1750 /* Collect flags for rows to be zeroed */ 1751 ierr = PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr); 1752 ierr = PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr); 1753 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 1754 /* Compress and put in row numbers */ 1755 for (r = 0; r < n; ++r) if (lrows[r] >= 0) lrows[len++] = r; 1756 } 1757 /* fix right hand side if needed */ 1758 if (x && b) { 1759 const PetscScalar *xx; 1760 PetscScalar *bb; 1761 1762 ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr); 1763 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 1764 for (r = 0; r < len; ++r) bb[lrows[r]] = diag*xx[lrows[r]]; 1765 ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr); 1766 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 1767 } 1768 1769 /* actually zap the local rows */ 1770 /* 1771 Zero the required rows. If the "diagonal block" of the matrix 1772 is square and the user wishes to set the diagonal we use separate 1773 code so that MatSetValues() is not called for each diagonal allocating 1774 new memory, thus calling lots of mallocs and slowing things down. 1775 1776 */ 1777 /* must zero l->B before l->A because the (diag) case below may put values into l->B*/ 1778 ierr = MatZeroRows_SeqBAIJ(l->B,len,lrows,0.0,NULL,NULL);CHKERRQ(ierr); 1779 if ((diag != 0.0) && (l->A->rmap->N == l->A->cmap->N)) { 1780 ierr = MatZeroRows_SeqBAIJ(l->A,len,lrows,diag,NULL,NULL);CHKERRQ(ierr); 1781 } else if (diag != 0.0) { 1782 ierr = MatZeroRows_SeqBAIJ(l->A,len,lrows,0.0,0,0);CHKERRQ(ierr); 1783 if (((Mat_SeqBAIJ*)l->A->data)->nonew) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options \n\ 1784 MAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR"); 1785 for (r = 0; r < len; ++r) { 1786 const PetscInt row = lrows[r] + A->rmap->rstart; 1787 ierr = MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr); 1788 } 1789 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1790 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1791 } else { 1792 ierr = MatZeroRows_SeqBAIJ(l->A,len,lrows,0.0,NULL,NULL);CHKERRQ(ierr); 1793 } 1794 ierr = PetscFree(lrows);CHKERRQ(ierr); 1795 1796 /* only change matrix nonzero state if pattern was allowed to be changed */ 1797 if (!((Mat_SeqBAIJ*)(l->A->data))->keepnonzeropattern) { 1798 PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate; 1799 ierr = MPI_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 1800 } 1801 PetscFunctionReturn(0); 1802 } 1803 1804 #undef __FUNCT__ 1805 #define __FUNCT__ "MatZeroRowsColumns_MPIBAIJ" 1806 PetscErrorCode MatZeroRowsColumns_MPIBAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 1807 { 1808 Mat_MPIBAIJ *l = (Mat_MPIBAIJ*)A->data; 1809 PetscErrorCode ierr; 1810 PetscMPIInt n = A->rmap->n; 1811 PetscInt i,j,k,r,p = 0,len = 0,row,col,count; 1812 PetscInt *lrows,*owners = A->rmap->range; 1813 PetscSFNode *rrows; 1814 PetscSF sf; 1815 const PetscScalar *xx; 1816 PetscScalar *bb,*mask; 1817 Vec xmask,lmask; 1818 Mat_SeqBAIJ *baij = (Mat_SeqBAIJ*)l->B->data; 1819 PetscInt bs = A->rmap->bs, bs2 = baij->bs2; 1820 PetscScalar *aa; 1821 1822 PetscFunctionBegin; 1823 /* Create SF where leaves are input rows and roots are owned rows */ 1824 ierr = PetscMalloc1(n, &lrows);CHKERRQ(ierr); 1825 for (r = 0; r < n; ++r) lrows[r] = -1; 1826 ierr = PetscMalloc1(N, &rrows);CHKERRQ(ierr); 1827 for (r = 0; r < N; ++r) { 1828 const PetscInt idx = rows[r]; 1829 if (idx < 0 || A->rmap->N <= idx) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range [0,%D)",idx,A->rmap->N); 1830 if (idx < owners[p] || owners[p+1] <= idx) { /* short-circuit the search if the last p owns this row too */ 1831 ierr = PetscLayoutFindOwner(A->rmap,idx,&p);CHKERRQ(ierr); 1832 } 1833 rrows[r].rank = p; 1834 rrows[r].index = rows[r] - owners[p]; 1835 } 1836 ierr = PetscSFCreate(PetscObjectComm((PetscObject) A), &sf);CHKERRQ(ierr); 1837 ierr = PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER);CHKERRQ(ierr); 1838 /* Collect flags for rows to be zeroed */ 1839 ierr = PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr); 1840 ierr = PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr); 1841 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 1842 /* Compress and put in row numbers */ 1843 for (r = 0; r < n; ++r) if (lrows[r] >= 0) lrows[len++] = r; 1844 /* zero diagonal part of matrix */ 1845 ierr = MatZeroRowsColumns(l->A,len,lrows,diag,x,b);CHKERRQ(ierr); 1846 /* handle off diagonal part of matrix */ 1847 ierr = MatCreateVecs(A,&xmask,NULL);CHKERRQ(ierr); 1848 ierr = VecDuplicate(l->lvec,&lmask);CHKERRQ(ierr); 1849 ierr = VecGetArray(xmask,&bb);CHKERRQ(ierr); 1850 for (i=0; i<len; i++) bb[lrows[i]] = 1; 1851 ierr = VecRestoreArray(xmask,&bb);CHKERRQ(ierr); 1852 ierr = VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1853 ierr = VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1854 ierr = VecDestroy(&xmask);CHKERRQ(ierr); 1855 if (x) { 1856 ierr = VecScatterBegin(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1857 ierr = VecScatterEnd(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1858 ierr = VecGetArrayRead(l->lvec,&xx);CHKERRQ(ierr); 1859 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 1860 } 1861 ierr = VecGetArray(lmask,&mask);CHKERRQ(ierr); 1862 /* remove zeroed rows of off diagonal matrix */ 1863 for (i = 0; i < len; ++i) { 1864 row = lrows[i]; 1865 count = (baij->i[row/bs +1] - baij->i[row/bs])*bs; 1866 aa = ((MatScalar*)(baij->a)) + baij->i[row/bs]*bs2 + (row%bs); 1867 for (k = 0; k < count; ++k) { 1868 aa[0] = 0.0; 1869 aa += bs; 1870 } 1871 } 1872 /* loop over all elements of off process part of matrix zeroing removed columns*/ 1873 for (i = 0; i < l->B->rmap->N; ++i) { 1874 row = i/bs; 1875 for (j = baij->i[row]; j < baij->i[row+1]; ++j) { 1876 for (k = 0; k < bs; ++k) { 1877 col = bs*baij->j[j] + k; 1878 if (PetscAbsScalar(mask[col])) { 1879 aa = ((MatScalar*)(baij->a)) + j*bs2 + (i%bs) + bs*k; 1880 if (b) bb[i] -= aa[0]*xx[col]; 1881 aa[0] = 0.0; 1882 } 1883 } 1884 } 1885 } 1886 if (x) { 1887 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 1888 ierr = VecRestoreArrayRead(l->lvec,&xx);CHKERRQ(ierr); 1889 } 1890 ierr = VecRestoreArray(lmask,&mask);CHKERRQ(ierr); 1891 ierr = VecDestroy(&lmask);CHKERRQ(ierr); 1892 ierr = PetscFree(lrows);CHKERRQ(ierr); 1893 1894 /* only change matrix nonzero state if pattern was allowed to be changed */ 1895 if (!((Mat_SeqBAIJ*)(l->A->data))->keepnonzeropattern) { 1896 PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate; 1897 ierr = MPI_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 1898 } 1899 PetscFunctionReturn(0); 1900 } 1901 1902 #undef __FUNCT__ 1903 #define __FUNCT__ "MatSetUnfactored_MPIBAIJ" 1904 PetscErrorCode MatSetUnfactored_MPIBAIJ(Mat A) 1905 { 1906 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1907 PetscErrorCode ierr; 1908 1909 PetscFunctionBegin; 1910 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 1911 PetscFunctionReturn(0); 1912 } 1913 1914 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat,MatDuplicateOption,Mat*); 1915 1916 #undef __FUNCT__ 1917 #define __FUNCT__ "MatEqual_MPIBAIJ" 1918 PetscErrorCode MatEqual_MPIBAIJ(Mat A,Mat B,PetscBool *flag) 1919 { 1920 Mat_MPIBAIJ *matB = (Mat_MPIBAIJ*)B->data,*matA = (Mat_MPIBAIJ*)A->data; 1921 Mat a,b,c,d; 1922 PetscBool flg; 1923 PetscErrorCode ierr; 1924 1925 PetscFunctionBegin; 1926 a = matA->A; b = matA->B; 1927 c = matB->A; d = matB->B; 1928 1929 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 1930 if (flg) { 1931 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 1932 } 1933 ierr = MPI_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 1934 PetscFunctionReturn(0); 1935 } 1936 1937 #undef __FUNCT__ 1938 #define __FUNCT__ "MatCopy_MPIBAIJ" 1939 PetscErrorCode MatCopy_MPIBAIJ(Mat A,Mat B,MatStructure str) 1940 { 1941 PetscErrorCode ierr; 1942 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1943 Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)B->data; 1944 1945 PetscFunctionBegin; 1946 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 1947 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 1948 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 1949 } else { 1950 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 1951 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 1952 } 1953 PetscFunctionReturn(0); 1954 } 1955 1956 #undef __FUNCT__ 1957 #define __FUNCT__ "MatSetUp_MPIBAIJ" 1958 PetscErrorCode MatSetUp_MPIBAIJ(Mat A) 1959 { 1960 PetscErrorCode ierr; 1961 1962 PetscFunctionBegin; 1963 ierr = MatMPIBAIJSetPreallocation(A,A->rmap->bs,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 1964 PetscFunctionReturn(0); 1965 } 1966 1967 #undef __FUNCT__ 1968 #define __FUNCT__ "MatAXPYGetPreallocation_MPIBAIJ" 1969 PetscErrorCode MatAXPYGetPreallocation_MPIBAIJ(Mat Y,const PetscInt *yltog,Mat X,const PetscInt *xltog,PetscInt *nnz) 1970 { 1971 PetscErrorCode ierr; 1972 PetscInt bs = Y->rmap->bs,m = Y->rmap->N/bs; 1973 Mat_SeqBAIJ *x = (Mat_SeqBAIJ*)X->data; 1974 Mat_SeqBAIJ *y = (Mat_SeqBAIJ*)Y->data; 1975 1976 PetscFunctionBegin; 1977 ierr = MatAXPYGetPreallocation_MPIX_private(m,x->i,x->j,xltog,y->i,y->j,yltog,nnz);CHKERRQ(ierr); 1978 PetscFunctionReturn(0); 1979 } 1980 1981 #undef __FUNCT__ 1982 #define __FUNCT__ "MatAXPY_MPIBAIJ" 1983 PetscErrorCode MatAXPY_MPIBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 1984 { 1985 PetscErrorCode ierr; 1986 Mat_MPIBAIJ *xx=(Mat_MPIBAIJ*)X->data,*yy=(Mat_MPIBAIJ*)Y->data; 1987 PetscBLASInt bnz,one=1; 1988 Mat_SeqBAIJ *x,*y; 1989 1990 PetscFunctionBegin; 1991 if (str == SAME_NONZERO_PATTERN) { 1992 PetscScalar alpha = a; 1993 x = (Mat_SeqBAIJ*)xx->A->data; 1994 y = (Mat_SeqBAIJ*)yy->A->data; 1995 ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr); 1996 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one)); 1997 x = (Mat_SeqBAIJ*)xx->B->data; 1998 y = (Mat_SeqBAIJ*)yy->B->data; 1999 ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr); 2000 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one)); 2001 ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr); 2002 } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */ 2003 ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr); 2004 } else { 2005 Mat B; 2006 PetscInt *nnz_d,*nnz_o,bs=Y->rmap->bs; 2007 ierr = PetscMalloc1(yy->A->rmap->N,&nnz_d);CHKERRQ(ierr); 2008 ierr = PetscMalloc1(yy->B->rmap->N,&nnz_o);CHKERRQ(ierr); 2009 ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr); 2010 ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr); 2011 ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr); 2012 ierr = MatSetBlockSizesFromMats(B,Y,Y);CHKERRQ(ierr); 2013 ierr = MatSetType(B,MATMPIBAIJ);CHKERRQ(ierr); 2014 ierr = MatAXPYGetPreallocation_SeqBAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr); 2015 ierr = MatAXPYGetPreallocation_MPIBAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);CHKERRQ(ierr); 2016 ierr = MatMPIBAIJSetPreallocation(B,bs,0,nnz_d,0,nnz_o);CHKERRQ(ierr); 2017 /* MatAXPY_BasicWithPreallocation() for BAIJ matrix is much slower than AIJ, even for bs=1 ! */ 2018 ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr); 2019 ierr = MatHeaderReplace(Y,B);CHKERRQ(ierr); 2020 ierr = PetscFree(nnz_d);CHKERRQ(ierr); 2021 ierr = PetscFree(nnz_o);CHKERRQ(ierr); 2022 } 2023 PetscFunctionReturn(0); 2024 } 2025 2026 #undef __FUNCT__ 2027 #define __FUNCT__ "MatRealPart_MPIBAIJ" 2028 PetscErrorCode MatRealPart_MPIBAIJ(Mat A) 2029 { 2030 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 2031 PetscErrorCode ierr; 2032 2033 PetscFunctionBegin; 2034 ierr = MatRealPart(a->A);CHKERRQ(ierr); 2035 ierr = MatRealPart(a->B);CHKERRQ(ierr); 2036 PetscFunctionReturn(0); 2037 } 2038 2039 #undef __FUNCT__ 2040 #define __FUNCT__ "MatImaginaryPart_MPIBAIJ" 2041 PetscErrorCode MatImaginaryPart_MPIBAIJ(Mat A) 2042 { 2043 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 2044 PetscErrorCode ierr; 2045 2046 PetscFunctionBegin; 2047 ierr = MatImaginaryPart(a->A);CHKERRQ(ierr); 2048 ierr = MatImaginaryPart(a->B);CHKERRQ(ierr); 2049 PetscFunctionReturn(0); 2050 } 2051 2052 #undef __FUNCT__ 2053 #define __FUNCT__ "MatGetSubMatrix_MPIBAIJ" 2054 PetscErrorCode MatGetSubMatrix_MPIBAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat) 2055 { 2056 PetscErrorCode ierr; 2057 IS iscol_local; 2058 PetscInt csize; 2059 2060 PetscFunctionBegin; 2061 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 2062 if (call == MAT_REUSE_MATRIX) { 2063 ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr); 2064 if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 2065 } else { 2066 ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr); 2067 } 2068 ierr = MatGetSubMatrix_MPIBAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr); 2069 if (call == MAT_INITIAL_MATRIX) { 2070 ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr); 2071 ierr = ISDestroy(&iscol_local);CHKERRQ(ierr); 2072 } 2073 PetscFunctionReturn(0); 2074 } 2075 extern PetscErrorCode MatGetSubMatrices_MPIBAIJ_local(Mat,PetscInt,const IS[],const IS[],MatReuse,PetscBool*,PetscBool*,Mat*); 2076 #undef __FUNCT__ 2077 #define __FUNCT__ "MatGetSubMatrix_MPIBAIJ_Private" 2078 /* 2079 Not great since it makes two copies of the submatrix, first an SeqBAIJ 2080 in local and then by concatenating the local matrices the end result. 2081 Writing it directly would be much like MatGetSubMatrices_MPIBAIJ(). 2082 This routine is used for BAIJ and SBAIJ matrices (unfortunate dependency). 2083 */ 2084 PetscErrorCode MatGetSubMatrix_MPIBAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat) 2085 { 2086 PetscErrorCode ierr; 2087 PetscMPIInt rank,size; 2088 PetscInt i,m,n,rstart,row,rend,nz,*cwork,j,bs; 2089 PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal,ncol,nrow; 2090 Mat M,Mreuse; 2091 MatScalar *vwork,*aa; 2092 MPI_Comm comm; 2093 IS isrow_new, iscol_new; 2094 PetscBool idflag,allrows, allcols; 2095 Mat_SeqBAIJ *aij; 2096 2097 PetscFunctionBegin; 2098 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 2099 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2100 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2101 /* The compression and expansion should be avoided. Doesn't point 2102 out errors, might change the indices, hence buggey */ 2103 ierr = ISCompressIndicesGeneral(mat->rmap->N,mat->rmap->n,mat->rmap->bs,1,&isrow,&isrow_new);CHKERRQ(ierr); 2104 ierr = ISCompressIndicesGeneral(mat->cmap->N,mat->cmap->n,mat->cmap->bs,1,&iscol,&iscol_new);CHKERRQ(ierr); 2105 2106 /* Check for special case: each processor gets entire matrix columns */ 2107 ierr = ISIdentity(iscol,&idflag);CHKERRQ(ierr); 2108 ierr = ISGetLocalSize(iscol,&ncol);CHKERRQ(ierr); 2109 if (idflag && ncol == mat->cmap->N) allcols = PETSC_TRUE; 2110 else allcols = PETSC_FALSE; 2111 2112 ierr = ISIdentity(isrow,&idflag);CHKERRQ(ierr); 2113 ierr = ISGetLocalSize(isrow,&nrow);CHKERRQ(ierr); 2114 if (idflag && nrow == mat->rmap->N) allrows = PETSC_TRUE; 2115 else allrows = PETSC_FALSE; 2116 2117 if (call == MAT_REUSE_MATRIX) { 2118 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Mreuse);CHKERRQ(ierr); 2119 if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 2120 ierr = MatGetSubMatrices_MPIBAIJ_local(mat,1,&isrow_new,&iscol_new,MAT_REUSE_MATRIX,&allrows,&allcols,&Mreuse);CHKERRQ(ierr); 2121 } else { 2122 ierr = MatGetSubMatrices_MPIBAIJ_local(mat,1,&isrow_new,&iscol_new,MAT_INITIAL_MATRIX,&allrows,&allcols,&Mreuse);CHKERRQ(ierr); 2123 } 2124 ierr = ISDestroy(&isrow_new);CHKERRQ(ierr); 2125 ierr = ISDestroy(&iscol_new);CHKERRQ(ierr); 2126 /* 2127 m - number of local rows 2128 n - number of columns (same on all processors) 2129 rstart - first row in new global matrix generated 2130 */ 2131 ierr = MatGetBlockSize(mat,&bs);CHKERRQ(ierr); 2132 ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr); 2133 m = m/bs; 2134 n = n/bs; 2135 2136 if (call == MAT_INITIAL_MATRIX) { 2137 aij = (Mat_SeqBAIJ*)(Mreuse)->data; 2138 ii = aij->i; 2139 jj = aij->j; 2140 2141 /* 2142 Determine the number of non-zeros in the diagonal and off-diagonal 2143 portions of the matrix in order to do correct preallocation 2144 */ 2145 2146 /* first get start and end of "diagonal" columns */ 2147 if (csize == PETSC_DECIDE) { 2148 ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr); 2149 if (mglobal == n*bs) { /* square matrix */ 2150 nlocal = m; 2151 } else { 2152 nlocal = n/size + ((n % size) > rank); 2153 } 2154 } else { 2155 nlocal = csize/bs; 2156 } 2157 ierr = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 2158 rstart = rend - nlocal; 2159 if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n); 2160 2161 /* next, compute all the lengths */ 2162 ierr = PetscMalloc2(m+1,&dlens,m+1,&olens);CHKERRQ(ierr); 2163 for (i=0; i<m; i++) { 2164 jend = ii[i+1] - ii[i]; 2165 olen = 0; 2166 dlen = 0; 2167 for (j=0; j<jend; j++) { 2168 if (*jj < rstart || *jj >= rend) olen++; 2169 else dlen++; 2170 jj++; 2171 } 2172 olens[i] = olen; 2173 dlens[i] = dlen; 2174 } 2175 ierr = MatCreate(comm,&M);CHKERRQ(ierr); 2176 ierr = MatSetSizes(M,bs*m,bs*nlocal,PETSC_DECIDE,bs*n);CHKERRQ(ierr); 2177 ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr); 2178 ierr = MatMPIBAIJSetPreallocation(M,bs,0,dlens,0,olens);CHKERRQ(ierr); 2179 ierr = MatMPISBAIJSetPreallocation(M,bs,0,dlens,0,olens);CHKERRQ(ierr); 2180 ierr = PetscFree2(dlens,olens);CHKERRQ(ierr); 2181 } else { 2182 PetscInt ml,nl; 2183 2184 M = *newmat; 2185 ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr); 2186 if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 2187 ierr = MatZeroEntries(M);CHKERRQ(ierr); 2188 /* 2189 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 2190 rather than the slower MatSetValues(). 2191 */ 2192 M->was_assembled = PETSC_TRUE; 2193 M->assembled = PETSC_FALSE; 2194 } 2195 ierr = MatSetOption(M,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); 2196 ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr); 2197 aij = (Mat_SeqBAIJ*)(Mreuse)->data; 2198 ii = aij->i; 2199 jj = aij->j; 2200 aa = aij->a; 2201 for (i=0; i<m; i++) { 2202 row = rstart/bs + i; 2203 nz = ii[i+1] - ii[i]; 2204 cwork = jj; jj += nz; 2205 vwork = aa; aa += nz*bs*bs; 2206 ierr = MatSetValuesBlocked_MPIBAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr); 2207 } 2208 2209 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2210 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2211 *newmat = M; 2212 2213 /* save submatrix used in processor for next request */ 2214 if (call == MAT_INITIAL_MATRIX) { 2215 ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr); 2216 ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr); 2217 } 2218 PetscFunctionReturn(0); 2219 } 2220 2221 #undef __FUNCT__ 2222 #define __FUNCT__ "MatPermute_MPIBAIJ" 2223 PetscErrorCode MatPermute_MPIBAIJ(Mat A,IS rowp,IS colp,Mat *B) 2224 { 2225 MPI_Comm comm,pcomm; 2226 PetscInt clocal_size,nrows; 2227 const PetscInt *rows; 2228 PetscMPIInt size; 2229 IS crowp,lcolp; 2230 PetscErrorCode ierr; 2231 2232 PetscFunctionBegin; 2233 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 2234 /* make a collective version of 'rowp' */ 2235 ierr = PetscObjectGetComm((PetscObject)rowp,&pcomm);CHKERRQ(ierr); 2236 if (pcomm==comm) { 2237 crowp = rowp; 2238 } else { 2239 ierr = ISGetSize(rowp,&nrows);CHKERRQ(ierr); 2240 ierr = ISGetIndices(rowp,&rows);CHKERRQ(ierr); 2241 ierr = ISCreateGeneral(comm,nrows,rows,PETSC_COPY_VALUES,&crowp);CHKERRQ(ierr); 2242 ierr = ISRestoreIndices(rowp,&rows);CHKERRQ(ierr); 2243 } 2244 ierr = ISSetPermutation(crowp);CHKERRQ(ierr); 2245 /* make a local version of 'colp' */ 2246 ierr = PetscObjectGetComm((PetscObject)colp,&pcomm);CHKERRQ(ierr); 2247 ierr = MPI_Comm_size(pcomm,&size);CHKERRQ(ierr); 2248 if (size==1) { 2249 lcolp = colp; 2250 } else { 2251 ierr = ISAllGather(colp,&lcolp);CHKERRQ(ierr); 2252 } 2253 ierr = ISSetPermutation(lcolp);CHKERRQ(ierr); 2254 /* now we just get the submatrix */ 2255 ierr = MatGetLocalSize(A,NULL,&clocal_size);CHKERRQ(ierr); 2256 ierr = MatGetSubMatrix_MPIBAIJ_Private(A,crowp,lcolp,clocal_size,MAT_INITIAL_MATRIX,B);CHKERRQ(ierr); 2257 /* clean up */ 2258 if (pcomm!=comm) { 2259 ierr = ISDestroy(&crowp);CHKERRQ(ierr); 2260 } 2261 if (size>1) { 2262 ierr = ISDestroy(&lcolp);CHKERRQ(ierr); 2263 } 2264 PetscFunctionReturn(0); 2265 } 2266 2267 #undef __FUNCT__ 2268 #define __FUNCT__ "MatGetGhosts_MPIBAIJ" 2269 PetscErrorCode MatGetGhosts_MPIBAIJ(Mat mat,PetscInt *nghosts,const PetscInt *ghosts[]) 2270 { 2271 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*) mat->data; 2272 Mat_SeqBAIJ *B = (Mat_SeqBAIJ*)baij->B->data; 2273 2274 PetscFunctionBegin; 2275 if (nghosts) *nghosts = B->nbs; 2276 if (ghosts) *ghosts = baij->garray; 2277 PetscFunctionReturn(0); 2278 } 2279 2280 #undef __FUNCT__ 2281 #define __FUNCT__ "MatGetSeqNonzeroStructure_MPIBAIJ" 2282 PetscErrorCode MatGetSeqNonzeroStructure_MPIBAIJ(Mat A,Mat *newmat) 2283 { 2284 Mat B; 2285 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 2286 Mat_SeqBAIJ *ad = (Mat_SeqBAIJ*)a->A->data,*bd = (Mat_SeqBAIJ*)a->B->data; 2287 Mat_SeqAIJ *b; 2288 PetscErrorCode ierr; 2289 PetscMPIInt size,rank,*recvcounts = 0,*displs = 0; 2290 PetscInt sendcount,i,*rstarts = A->rmap->range,n,cnt,j,bs = A->rmap->bs; 2291 PetscInt m,*garray = a->garray,*lens,*jsendbuf,*a_jsendbuf,*b_jsendbuf; 2292 2293 PetscFunctionBegin; 2294 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRQ(ierr); 2295 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRQ(ierr); 2296 2297 /* ---------------------------------------------------------------- 2298 Tell every processor the number of nonzeros per row 2299 */ 2300 ierr = PetscMalloc1(A->rmap->N/bs,&lens);CHKERRQ(ierr); 2301 for (i=A->rmap->rstart/bs; i<A->rmap->rend/bs; i++) { 2302 lens[i] = ad->i[i-A->rmap->rstart/bs+1] - ad->i[i-A->rmap->rstart/bs] + bd->i[i-A->rmap->rstart/bs+1] - bd->i[i-A->rmap->rstart/bs]; 2303 } 2304 sendcount = A->rmap->rend/bs - A->rmap->rstart/bs; 2305 ierr = PetscMalloc1(2*size,&recvcounts);CHKERRQ(ierr); 2306 displs = recvcounts + size; 2307 for (i=0; i<size; i++) { 2308 recvcounts[i] = A->rmap->range[i+1]/bs - A->rmap->range[i]/bs; 2309 displs[i] = A->rmap->range[i]/bs; 2310 } 2311 #if defined(PETSC_HAVE_MPI_IN_PLACE) 2312 ierr = MPI_Allgatherv(MPI_IN_PLACE,0,MPI_DATATYPE_NULL,lens,recvcounts,displs,MPIU_INT,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 2313 #else 2314 ierr = MPI_Allgatherv(lens+A->rmap->rstart/bs,sendcount,MPIU_INT,lens,recvcounts,displs,MPIU_INT,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 2315 #endif 2316 /* --------------------------------------------------------------- 2317 Create the sequential matrix of the same type as the local block diagonal 2318 */ 2319 ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr); 2320 ierr = MatSetSizes(B,A->rmap->N/bs,A->cmap->N/bs,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 2321 ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr); 2322 ierr = MatSeqAIJSetPreallocation(B,0,lens);CHKERRQ(ierr); 2323 b = (Mat_SeqAIJ*)B->data; 2324 2325 /*-------------------------------------------------------------------- 2326 Copy my part of matrix column indices over 2327 */ 2328 sendcount = ad->nz + bd->nz; 2329 jsendbuf = b->j + b->i[rstarts[rank]/bs]; 2330 a_jsendbuf = ad->j; 2331 b_jsendbuf = bd->j; 2332 n = A->rmap->rend/bs - A->rmap->rstart/bs; 2333 cnt = 0; 2334 for (i=0; i<n; i++) { 2335 2336 /* put in lower diagonal portion */ 2337 m = bd->i[i+1] - bd->i[i]; 2338 while (m > 0) { 2339 /* is it above diagonal (in bd (compressed) numbering) */ 2340 if (garray[*b_jsendbuf] > A->rmap->rstart/bs + i) break; 2341 jsendbuf[cnt++] = garray[*b_jsendbuf++]; 2342 m--; 2343 } 2344 2345 /* put in diagonal portion */ 2346 for (j=ad->i[i]; j<ad->i[i+1]; j++) { 2347 jsendbuf[cnt++] = A->rmap->rstart/bs + *a_jsendbuf++; 2348 } 2349 2350 /* put in upper diagonal portion */ 2351 while (m-- > 0) { 2352 jsendbuf[cnt++] = garray[*b_jsendbuf++]; 2353 } 2354 } 2355 if (cnt != sendcount) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Corrupted PETSc matrix: nz given %D actual nz %D",sendcount,cnt); 2356 2357 /*-------------------------------------------------------------------- 2358 Gather all column indices to all processors 2359 */ 2360 for (i=0; i<size; i++) { 2361 recvcounts[i] = 0; 2362 for (j=A->rmap->range[i]/bs; j<A->rmap->range[i+1]/bs; j++) { 2363 recvcounts[i] += lens[j]; 2364 } 2365 } 2366 displs[0] = 0; 2367 for (i=1; i<size; i++) { 2368 displs[i] = displs[i-1] + recvcounts[i-1]; 2369 } 2370 #if defined(PETSC_HAVE_MPI_IN_PLACE) 2371 ierr = MPI_Allgatherv(MPI_IN_PLACE,0,MPI_DATATYPE_NULL,b->j,recvcounts,displs,MPIU_INT,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 2372 #else 2373 ierr = MPI_Allgatherv(jsendbuf,sendcount,MPIU_INT,b->j,recvcounts,displs,MPIU_INT,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 2374 #endif 2375 /*-------------------------------------------------------------------- 2376 Assemble the matrix into useable form (note numerical values not yet set) 2377 */ 2378 /* set the b->ilen (length of each row) values */ 2379 ierr = PetscMemcpy(b->ilen,lens,(A->rmap->N/bs)*sizeof(PetscInt));CHKERRQ(ierr); 2380 /* set the b->i indices */ 2381 b->i[0] = 0; 2382 for (i=1; i<=A->rmap->N/bs; i++) { 2383 b->i[i] = b->i[i-1] + lens[i-1]; 2384 } 2385 ierr = PetscFree(lens);CHKERRQ(ierr); 2386 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2387 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2388 ierr = PetscFree(recvcounts);CHKERRQ(ierr); 2389 2390 if (A->symmetric) { 2391 ierr = MatSetOption(B,MAT_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr); 2392 } else if (A->hermitian) { 2393 ierr = MatSetOption(B,MAT_HERMITIAN,PETSC_TRUE);CHKERRQ(ierr); 2394 } else if (A->structurally_symmetric) { 2395 ierr = MatSetOption(B,MAT_STRUCTURALLY_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr); 2396 } 2397 *newmat = B; 2398 PetscFunctionReturn(0); 2399 } 2400 2401 #undef __FUNCT__ 2402 #define __FUNCT__ "MatSOR_MPIBAIJ" 2403 PetscErrorCode MatSOR_MPIBAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 2404 { 2405 Mat_MPIBAIJ *mat = (Mat_MPIBAIJ*)matin->data; 2406 PetscErrorCode ierr; 2407 Vec bb1 = 0; 2408 2409 PetscFunctionBegin; 2410 if (flag == SOR_APPLY_UPPER) { 2411 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 2412 PetscFunctionReturn(0); 2413 } 2414 2415 if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS) { 2416 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 2417 } 2418 2419 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) { 2420 if (flag & SOR_ZERO_INITIAL_GUESS) { 2421 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 2422 its--; 2423 } 2424 2425 while (its--) { 2426 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2427 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2428 2429 /* update rhs: bb1 = bb - B*x */ 2430 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 2431 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 2432 2433 /* local sweep */ 2434 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 2435 } 2436 } else if (flag & SOR_LOCAL_FORWARD_SWEEP) { 2437 if (flag & SOR_ZERO_INITIAL_GUESS) { 2438 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 2439 its--; 2440 } 2441 while (its--) { 2442 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2443 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2444 2445 /* update rhs: bb1 = bb - B*x */ 2446 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 2447 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 2448 2449 /* local sweep */ 2450 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 2451 } 2452 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) { 2453 if (flag & SOR_ZERO_INITIAL_GUESS) { 2454 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 2455 its--; 2456 } 2457 while (its--) { 2458 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2459 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2460 2461 /* update rhs: bb1 = bb - B*x */ 2462 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 2463 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 2464 2465 /* local sweep */ 2466 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 2467 } 2468 } else SETERRQ(PetscObjectComm((PetscObject)matin),PETSC_ERR_SUP,"Parallel version of SOR requested not supported"); 2469 2470 ierr = VecDestroy(&bb1);CHKERRQ(ierr); 2471 PetscFunctionReturn(0); 2472 } 2473 2474 #undef __FUNCT__ 2475 #define __FUNCT__ "MatGetColumnNorms_MPIBAIJ" 2476 PetscErrorCode MatGetColumnNorms_MPIBAIJ(Mat A,NormType type,PetscReal *norms) 2477 { 2478 PetscErrorCode ierr; 2479 Mat_MPIBAIJ *aij = (Mat_MPIBAIJ*)A->data; 2480 PetscInt N,i,*garray = aij->garray; 2481 PetscInt ib,jb,bs = A->rmap->bs; 2482 Mat_SeqBAIJ *a_aij = (Mat_SeqBAIJ*) aij->A->data; 2483 MatScalar *a_val = a_aij->a; 2484 Mat_SeqBAIJ *b_aij = (Mat_SeqBAIJ*) aij->B->data; 2485 MatScalar *b_val = b_aij->a; 2486 PetscReal *work; 2487 2488 PetscFunctionBegin; 2489 ierr = MatGetSize(A,NULL,&N);CHKERRQ(ierr); 2490 ierr = PetscCalloc1(N,&work);CHKERRQ(ierr); 2491 if (type == NORM_2) { 2492 for (i=a_aij->i[0]; i<a_aij->i[aij->A->rmap->n/bs]; i++) { 2493 for (jb=0; jb<bs; jb++) { 2494 for (ib=0; ib<bs; ib++) { 2495 work[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscAbsScalar(*a_val * *a_val); 2496 a_val++; 2497 } 2498 } 2499 } 2500 for (i=b_aij->i[0]; i<b_aij->i[aij->B->rmap->n/bs]; i++) { 2501 for (jb=0; jb<bs; jb++) { 2502 for (ib=0; ib<bs; ib++) { 2503 work[garray[b_aij->j[i]] * bs + jb] += PetscAbsScalar(*b_val * *b_val); 2504 b_val++; 2505 } 2506 } 2507 } 2508 } else if (type == NORM_1) { 2509 for (i=a_aij->i[0]; i<a_aij->i[aij->A->rmap->n/bs]; i++) { 2510 for (jb=0; jb<bs; jb++) { 2511 for (ib=0; ib<bs; ib++) { 2512 work[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscAbsScalar(*a_val); 2513 a_val++; 2514 } 2515 } 2516 } 2517 for (i=b_aij->i[0]; i<b_aij->i[aij->B->rmap->n/bs]; i++) { 2518 for (jb=0; jb<bs; jb++) { 2519 for (ib=0; ib<bs; ib++) { 2520 work[garray[b_aij->j[i]] * bs + jb] += PetscAbsScalar(*b_val); 2521 b_val++; 2522 } 2523 } 2524 } 2525 } else if (type == NORM_INFINITY) { 2526 for (i=a_aij->i[0]; i<a_aij->i[aij->A->rmap->n/bs]; i++) { 2527 for (jb=0; jb<bs; jb++) { 2528 for (ib=0; ib<bs; ib++) { 2529 int col = A->cmap->rstart + a_aij->j[i] * bs + jb; 2530 work[col] = PetscMax(PetscAbsScalar(*a_val), work[col]); 2531 a_val++; 2532 } 2533 } 2534 } 2535 for (i=b_aij->i[0]; i<b_aij->i[aij->B->rmap->n/bs]; i++) { 2536 for (jb=0; jb<bs; jb++) { 2537 for (ib=0; ib<bs; ib++) { 2538 int col = garray[b_aij->j[i]] * bs + jb; 2539 work[col] = PetscMax(PetscAbsScalar(*b_val), work[col]); 2540 b_val++; 2541 } 2542 } 2543 } 2544 } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Unknown NormType"); 2545 if (type == NORM_INFINITY) { 2546 ierr = MPI_Allreduce(work,norms,N,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 2547 } else { 2548 ierr = MPI_Allreduce(work,norms,N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 2549 } 2550 ierr = PetscFree(work);CHKERRQ(ierr); 2551 if (type == NORM_2) { 2552 for (i=0; i<N; i++) norms[i] = PetscSqrtReal(norms[i]); 2553 } 2554 PetscFunctionReturn(0); 2555 } 2556 2557 #undef __FUNCT__ 2558 #define __FUNCT__ "MatInvertBlockDiagonal_MPIBAIJ" 2559 PetscErrorCode MatInvertBlockDiagonal_MPIBAIJ(Mat A,const PetscScalar **values) 2560 { 2561 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*) A->data; 2562 PetscErrorCode ierr; 2563 2564 PetscFunctionBegin; 2565 ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr); 2566 PetscFunctionReturn(0); 2567 } 2568 2569 2570 /* -------------------------------------------------------------------*/ 2571 static struct _MatOps MatOps_Values = {MatSetValues_MPIBAIJ, 2572 MatGetRow_MPIBAIJ, 2573 MatRestoreRow_MPIBAIJ, 2574 MatMult_MPIBAIJ, 2575 /* 4*/ MatMultAdd_MPIBAIJ, 2576 MatMultTranspose_MPIBAIJ, 2577 MatMultTransposeAdd_MPIBAIJ, 2578 0, 2579 0, 2580 0, 2581 /*10*/ 0, 2582 0, 2583 0, 2584 MatSOR_MPIBAIJ, 2585 MatTranspose_MPIBAIJ, 2586 /*15*/ MatGetInfo_MPIBAIJ, 2587 MatEqual_MPIBAIJ, 2588 MatGetDiagonal_MPIBAIJ, 2589 MatDiagonalScale_MPIBAIJ, 2590 MatNorm_MPIBAIJ, 2591 /*20*/ MatAssemblyBegin_MPIBAIJ, 2592 MatAssemblyEnd_MPIBAIJ, 2593 MatSetOption_MPIBAIJ, 2594 MatZeroEntries_MPIBAIJ, 2595 /*24*/ MatZeroRows_MPIBAIJ, 2596 0, 2597 0, 2598 0, 2599 0, 2600 /*29*/ MatSetUp_MPIBAIJ, 2601 0, 2602 0, 2603 0, 2604 0, 2605 /*34*/ MatDuplicate_MPIBAIJ, 2606 0, 2607 0, 2608 0, 2609 0, 2610 /*39*/ MatAXPY_MPIBAIJ, 2611 MatGetSubMatrices_MPIBAIJ, 2612 MatIncreaseOverlap_MPIBAIJ, 2613 MatGetValues_MPIBAIJ, 2614 MatCopy_MPIBAIJ, 2615 /*44*/ 0, 2616 MatScale_MPIBAIJ, 2617 0, 2618 0, 2619 MatZeroRowsColumns_MPIBAIJ, 2620 /*49*/ 0, 2621 0, 2622 0, 2623 0, 2624 0, 2625 /*54*/ MatFDColoringCreate_MPIXAIJ, 2626 0, 2627 MatSetUnfactored_MPIBAIJ, 2628 MatPermute_MPIBAIJ, 2629 MatSetValuesBlocked_MPIBAIJ, 2630 /*59*/ MatGetSubMatrix_MPIBAIJ, 2631 MatDestroy_MPIBAIJ, 2632 MatView_MPIBAIJ, 2633 0, 2634 0, 2635 /*64*/ 0, 2636 0, 2637 0, 2638 0, 2639 0, 2640 /*69*/ MatGetRowMaxAbs_MPIBAIJ, 2641 0, 2642 0, 2643 0, 2644 0, 2645 /*74*/ 0, 2646 MatFDColoringApply_BAIJ, 2647 0, 2648 0, 2649 0, 2650 /*79*/ 0, 2651 0, 2652 0, 2653 0, 2654 MatLoad_MPIBAIJ, 2655 /*84*/ 0, 2656 0, 2657 0, 2658 0, 2659 0, 2660 /*89*/ 0, 2661 0, 2662 0, 2663 0, 2664 0, 2665 /*94*/ 0, 2666 0, 2667 0, 2668 0, 2669 0, 2670 /*99*/ 0, 2671 0, 2672 0, 2673 0, 2674 0, 2675 /*104*/0, 2676 MatRealPart_MPIBAIJ, 2677 MatImaginaryPart_MPIBAIJ, 2678 0, 2679 0, 2680 /*109*/0, 2681 0, 2682 0, 2683 0, 2684 0, 2685 /*114*/MatGetSeqNonzeroStructure_MPIBAIJ, 2686 0, 2687 MatGetGhosts_MPIBAIJ, 2688 0, 2689 0, 2690 /*119*/0, 2691 0, 2692 0, 2693 0, 2694 MatGetMultiProcBlock_MPIBAIJ, 2695 /*124*/0, 2696 MatGetColumnNorms_MPIBAIJ, 2697 MatInvertBlockDiagonal_MPIBAIJ, 2698 0, 2699 0, 2700 /*129*/ 0, 2701 0, 2702 0, 2703 0, 2704 0, 2705 /*134*/ 0, 2706 0, 2707 0, 2708 0, 2709 0, 2710 /*139*/ 0, 2711 0, 2712 0, 2713 MatFDColoringSetUp_MPIXAIJ, 2714 0, 2715 /*144*/MatCreateMPIMatConcatenateSeqMat_MPIBAIJ 2716 }; 2717 2718 #undef __FUNCT__ 2719 #define __FUNCT__ "MatGetDiagonalBlock_MPIBAIJ" 2720 PetscErrorCode MatGetDiagonalBlock_MPIBAIJ(Mat A,Mat *a) 2721 { 2722 PetscFunctionBegin; 2723 *a = ((Mat_MPIBAIJ*)A->data)->A; 2724 PetscFunctionReturn(0); 2725 } 2726 2727 PETSC_EXTERN PetscErrorCode MatConvert_MPIBAIJ_MPISBAIJ(Mat, MatType,MatReuse,Mat*); 2728 2729 #undef __FUNCT__ 2730 #define __FUNCT__ "MatMPIBAIJSetPreallocationCSR_MPIBAIJ" 2731 PetscErrorCode MatMPIBAIJSetPreallocationCSR_MPIBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[]) 2732 { 2733 PetscInt m,rstart,cstart,cend; 2734 PetscInt i,j,d,nz,nz_max=0,*d_nnz=0,*o_nnz=0; 2735 const PetscInt *JJ =0; 2736 PetscScalar *values=0; 2737 PetscBool roworiented = ((Mat_MPIBAIJ*)B->data)->roworiented; 2738 PetscErrorCode ierr; 2739 2740 PetscFunctionBegin; 2741 ierr = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr); 2742 ierr = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr); 2743 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 2744 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 2745 ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr); 2746 m = B->rmap->n/bs; 2747 rstart = B->rmap->rstart/bs; 2748 cstart = B->cmap->rstart/bs; 2749 cend = B->cmap->rend/bs; 2750 2751 if (ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"ii[0] must be 0 but it is %D",ii[0]); 2752 ierr = PetscMalloc2(m,&d_nnz,m,&o_nnz);CHKERRQ(ierr); 2753 for (i=0; i<m; i++) { 2754 nz = ii[i+1] - ii[i]; 2755 if (nz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative number of columns %D",i,nz); 2756 nz_max = PetscMax(nz_max,nz); 2757 JJ = jj + ii[i]; 2758 for (j=0; j<nz; j++) { 2759 if (*JJ >= cstart) break; 2760 JJ++; 2761 } 2762 d = 0; 2763 for (; j<nz; j++) { 2764 if (*JJ++ >= cend) break; 2765 d++; 2766 } 2767 d_nnz[i] = d; 2768 o_nnz[i] = nz - d; 2769 } 2770 ierr = MatMPIBAIJSetPreallocation(B,bs,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 2771 ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr); 2772 2773 values = (PetscScalar*)V; 2774 if (!values) { 2775 ierr = PetscMalloc1(bs*bs*nz_max,&values);CHKERRQ(ierr); 2776 ierr = PetscMemzero(values,bs*bs*nz_max*sizeof(PetscScalar));CHKERRQ(ierr); 2777 } 2778 for (i=0; i<m; i++) { 2779 PetscInt row = i + rstart; 2780 PetscInt ncols = ii[i+1] - ii[i]; 2781 const PetscInt *icols = jj + ii[i]; 2782 if (!roworiented) { /* block ordering matches the non-nested layout of MatSetValues so we can insert entire rows */ 2783 const PetscScalar *svals = values + (V ? (bs*bs*ii[i]) : 0); 2784 ierr = MatSetValuesBlocked_MPIBAIJ(B,1,&row,ncols,icols,svals,INSERT_VALUES);CHKERRQ(ierr); 2785 } else { /* block ordering does not match so we can only insert one block at a time. */ 2786 PetscInt j; 2787 for (j=0; j<ncols; j++) { 2788 const PetscScalar *svals = values + (V ? (bs*bs*(ii[i]+j)) : 0); 2789 ierr = MatSetValuesBlocked_MPIBAIJ(B,1,&row,1,&icols[j],svals,INSERT_VALUES);CHKERRQ(ierr); 2790 } 2791 } 2792 } 2793 2794 if (!V) { ierr = PetscFree(values);CHKERRQ(ierr); } 2795 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2796 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2797 ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 2798 PetscFunctionReturn(0); 2799 } 2800 2801 #undef __FUNCT__ 2802 #define __FUNCT__ "MatMPIBAIJSetPreallocationCSR" 2803 /*@C 2804 MatMPIBAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in BAIJ format 2805 (the default parallel PETSc format). 2806 2807 Collective on MPI_Comm 2808 2809 Input Parameters: 2810 + B - the matrix 2811 . bs - the block size 2812 . i - the indices into j for the start of each local row (starts with zero) 2813 . j - the column indices for each local row (starts with zero) these must be sorted for each row 2814 - v - optional values in the matrix 2815 2816 Level: developer 2817 2818 Notes: The order of the entries in values is specified by the MatOption MAT_ROW_ORIENTED. For example, C programs 2819 may want to use the default MAT_ROW_ORIENTED=PETSC_TRUE and use an array v[nnz][bs][bs] where the second index is 2820 over rows within a block and the last index is over columns within a block row. Fortran programs will likely set 2821 MAT_ROW_ORIENTED=PETSC_FALSE and use a Fortran array v(bs,bs,nnz) in which the first index is over rows within a 2822 block column and the second index is over columns within a block. 2823 2824 .keywords: matrix, aij, compressed row, sparse, parallel 2825 2826 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIBAIJSetPreallocation(), MatCreateAIJ(), MPIAIJ, MatCreateMPIBAIJWithArrays(), MPIBAIJ 2827 @*/ 2828 PetscErrorCode MatMPIBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 2829 { 2830 PetscErrorCode ierr; 2831 2832 PetscFunctionBegin; 2833 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 2834 PetscValidType(B,1); 2835 PetscValidLogicalCollectiveInt(B,bs,2); 2836 ierr = PetscTryMethod(B,"MatMPIBAIJSetPreallocationCSR_C",(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,bs,i,j,v));CHKERRQ(ierr); 2837 PetscFunctionReturn(0); 2838 } 2839 2840 #undef __FUNCT__ 2841 #define __FUNCT__ "MatMPIBAIJSetPreallocation_MPIBAIJ" 2842 PetscErrorCode MatMPIBAIJSetPreallocation_MPIBAIJ(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt *d_nnz,PetscInt o_nz,const PetscInt *o_nnz) 2843 { 2844 Mat_MPIBAIJ *b; 2845 PetscErrorCode ierr; 2846 PetscInt i; 2847 2848 PetscFunctionBegin; 2849 ierr = MatSetBlockSize(B,PetscAbs(bs));CHKERRQ(ierr); 2850 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 2851 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 2852 ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr); 2853 2854 if (d_nnz) { 2855 for (i=0; i<B->rmap->n/bs; i++) { 2856 if (d_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than -1: local row %D value %D",i,d_nnz[i]); 2857 } 2858 } 2859 if (o_nnz) { 2860 for (i=0; i<B->rmap->n/bs; i++) { 2861 if (o_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than -1: local row %D value %D",i,o_nnz[i]); 2862 } 2863 } 2864 2865 b = (Mat_MPIBAIJ*)B->data; 2866 b->bs2 = bs*bs; 2867 b->mbs = B->rmap->n/bs; 2868 b->nbs = B->cmap->n/bs; 2869 b->Mbs = B->rmap->N/bs; 2870 b->Nbs = B->cmap->N/bs; 2871 2872 for (i=0; i<=b->size; i++) { 2873 b->rangebs[i] = B->rmap->range[i]/bs; 2874 } 2875 b->rstartbs = B->rmap->rstart/bs; 2876 b->rendbs = B->rmap->rend/bs; 2877 b->cstartbs = B->cmap->rstart/bs; 2878 b->cendbs = B->cmap->rend/bs; 2879 2880 if (!B->preallocated) { 2881 ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr); 2882 ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr); 2883 ierr = MatSetType(b->A,MATSEQBAIJ);CHKERRQ(ierr); 2884 ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->A);CHKERRQ(ierr); 2885 ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr); 2886 ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr); 2887 ierr = MatSetType(b->B,MATSEQBAIJ);CHKERRQ(ierr); 2888 ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->B);CHKERRQ(ierr); 2889 ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),bs,&B->bstash);CHKERRQ(ierr); 2890 } 2891 2892 ierr = MatSeqBAIJSetPreallocation(b->A,bs,d_nz,d_nnz);CHKERRQ(ierr); 2893 ierr = MatSeqBAIJSetPreallocation(b->B,bs,o_nz,o_nnz);CHKERRQ(ierr); 2894 B->preallocated = PETSC_TRUE; 2895 PetscFunctionReturn(0); 2896 } 2897 2898 extern PetscErrorCode MatDiagonalScaleLocal_MPIBAIJ(Mat,Vec); 2899 extern PetscErrorCode MatSetHashTableFactor_MPIBAIJ(Mat,PetscReal); 2900 2901 #undef __FUNCT__ 2902 #define __FUNCT__ "MatConvert_MPIBAIJ_MPIAdj" 2903 PETSC_EXTERN PetscErrorCode MatConvert_MPIBAIJ_MPIAdj(Mat B, MatType newtype,MatReuse reuse,Mat *adj) 2904 { 2905 Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)B->data; 2906 PetscErrorCode ierr; 2907 Mat_SeqBAIJ *d = (Mat_SeqBAIJ*) b->A->data,*o = (Mat_SeqBAIJ*) b->B->data; 2908 PetscInt M = B->rmap->n/B->rmap->bs,i,*ii,*jj,cnt,j,k,rstart = B->rmap->rstart/B->rmap->bs; 2909 const PetscInt *id = d->i, *jd = d->j, *io = o->i, *jo = o->j, *garray = b->garray; 2910 2911 PetscFunctionBegin; 2912 ierr = PetscMalloc1(M+1,&ii);CHKERRQ(ierr); 2913 ii[0] = 0; 2914 for (i=0; i<M; i++) { 2915 if ((id[i+1] - id[i]) < 0) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Indices wrong %D %D %D",i,id[i],id[i+1]); 2916 if ((io[i+1] - io[i]) < 0) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Indices wrong %D %D %D",i,io[i],io[i+1]); 2917 ii[i+1] = ii[i] + id[i+1] - id[i] + io[i+1] - io[i]; 2918 /* remove one from count of matrix has diagonal */ 2919 for (j=id[i]; j<id[i+1]; j++) { 2920 if (jd[j] == i) {ii[i+1]--;break;} 2921 } 2922 } 2923 ierr = PetscMalloc1(ii[M],&jj);CHKERRQ(ierr); 2924 cnt = 0; 2925 for (i=0; i<M; i++) { 2926 for (j=io[i]; j<io[i+1]; j++) { 2927 if (garray[jo[j]] > rstart) break; 2928 jj[cnt++] = garray[jo[j]]; 2929 } 2930 for (k=id[i]; k<id[i+1]; k++) { 2931 if (jd[k] != i) { 2932 jj[cnt++] = rstart + jd[k]; 2933 } 2934 } 2935 for (; j<io[i+1]; j++) { 2936 jj[cnt++] = garray[jo[j]]; 2937 } 2938 } 2939 ierr = MatCreateMPIAdj(PetscObjectComm((PetscObject)B),M,B->cmap->N/B->rmap->bs,ii,jj,NULL,adj);CHKERRQ(ierr); 2940 PetscFunctionReturn(0); 2941 } 2942 2943 #include <../src/mat/impls/aij/mpi/mpiaij.h> 2944 2945 PETSC_EXTERN PetscErrorCode MatConvert_SeqBAIJ_SeqAIJ(Mat,MatType,MatReuse,Mat*); 2946 2947 #undef __FUNCT__ 2948 #define __FUNCT__ "MatConvert_MPIBAIJ_MPIAIJ" 2949 PETSC_EXTERN PetscErrorCode MatConvert_MPIBAIJ_MPIAIJ(Mat A,MatType newtype,MatReuse reuse,Mat *newmat) 2950 { 2951 PetscErrorCode ierr; 2952 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 2953 Mat B; 2954 Mat_MPIAIJ *b; 2955 2956 PetscFunctionBegin; 2957 if (!A->assembled) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Matrix must be assembled"); 2958 2959 ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr); 2960 ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr); 2961 ierr = MatSetSizes(B,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr); 2962 ierr = MatSetBlockSizes(B,A->rmap->bs,A->cmap->bs);CHKERRQ(ierr); 2963 ierr = MatSeqAIJSetPreallocation(B,0,NULL);CHKERRQ(ierr); 2964 ierr = MatMPIAIJSetPreallocation(B,0,NULL,0,NULL);CHKERRQ(ierr); 2965 b = (Mat_MPIAIJ*) B->data; 2966 2967 ierr = MatDestroy(&b->A);CHKERRQ(ierr); 2968 ierr = MatDestroy(&b->B);CHKERRQ(ierr); 2969 ierr = MatDisAssemble_MPIBAIJ(A);CHKERRQ(ierr); 2970 ierr = MatConvert_SeqBAIJ_SeqAIJ(a->A, MATSEQAIJ, MAT_INITIAL_MATRIX, &b->A);CHKERRQ(ierr); 2971 ierr = MatConvert_SeqBAIJ_SeqAIJ(a->B, MATSEQAIJ, MAT_INITIAL_MATRIX, &b->B);CHKERRQ(ierr); 2972 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2973 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2974 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2975 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2976 if (reuse == MAT_REUSE_MATRIX) { 2977 ierr = MatHeaderReplace(A,B);CHKERRQ(ierr); 2978 } else { 2979 *newmat = B; 2980 } 2981 PetscFunctionReturn(0); 2982 } 2983 2984 /*MC 2985 MATMPIBAIJ - MATMPIBAIJ = "mpibaij" - A matrix type to be used for distributed block sparse matrices. 2986 2987 Options Database Keys: 2988 + -mat_type mpibaij - sets the matrix type to "mpibaij" during a call to MatSetFromOptions() 2989 . -mat_block_size <bs> - set the blocksize used to store the matrix 2990 - -mat_use_hash_table <fact> 2991 2992 Level: beginner 2993 2994 .seealso: MatCreateMPIBAIJ 2995 M*/ 2996 2997 PETSC_EXTERN PetscErrorCode MatConvert_MPIBAIJ_MPIBSTRM(Mat,MatType,MatReuse,Mat*); 2998 2999 #undef __FUNCT__ 3000 #define __FUNCT__ "MatCreate_MPIBAIJ" 3001 PETSC_EXTERN PetscErrorCode MatCreate_MPIBAIJ(Mat B) 3002 { 3003 Mat_MPIBAIJ *b; 3004 PetscErrorCode ierr; 3005 PetscBool flg = PETSC_FALSE; 3006 3007 PetscFunctionBegin; 3008 ierr = PetscNewLog(B,&b);CHKERRQ(ierr); 3009 B->data = (void*)b; 3010 3011 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 3012 B->assembled = PETSC_FALSE; 3013 3014 B->insertmode = NOT_SET_VALUES; 3015 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank);CHKERRQ(ierr); 3016 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&b->size);CHKERRQ(ierr); 3017 3018 /* build local table of row and column ownerships */ 3019 ierr = PetscMalloc1(b->size+1,&b->rangebs);CHKERRQ(ierr); 3020 3021 /* build cache for off array entries formed */ 3022 ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash);CHKERRQ(ierr); 3023 3024 b->donotstash = PETSC_FALSE; 3025 b->colmap = NULL; 3026 b->garray = NULL; 3027 b->roworiented = PETSC_TRUE; 3028 3029 /* stuff used in block assembly */ 3030 b->barray = 0; 3031 3032 /* stuff used for matrix vector multiply */ 3033 b->lvec = 0; 3034 b->Mvctx = 0; 3035 3036 /* stuff for MatGetRow() */ 3037 b->rowindices = 0; 3038 b->rowvalues = 0; 3039 b->getrowactive = PETSC_FALSE; 3040 3041 /* hash table stuff */ 3042 b->ht = 0; 3043 b->hd = 0; 3044 b->ht_size = 0; 3045 b->ht_flag = PETSC_FALSE; 3046 b->ht_fact = 0; 3047 b->ht_total_ct = 0; 3048 b->ht_insert_ct = 0; 3049 3050 /* stuff for MatGetSubMatrices_MPIBAIJ_local() */ 3051 b->ijonly = PETSC_FALSE; 3052 3053 3054 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpibaij_mpiadj_C",MatConvert_MPIBAIJ_MPIAdj);CHKERRQ(ierr); 3055 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpibaij_mpiaij_C",MatConvert_MPIBAIJ_MPIAIJ);CHKERRQ(ierr); 3056 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpibaij_mpisbaij_C",MatConvert_MPIBAIJ_MPISBAIJ);CHKERRQ(ierr); 3057 ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPIBAIJ);CHKERRQ(ierr); 3058 ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPIBAIJ);CHKERRQ(ierr); 3059 ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetDiagonalBlock_C",MatGetDiagonalBlock_MPIBAIJ);CHKERRQ(ierr); 3060 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIBAIJSetPreallocation_C",MatMPIBAIJSetPreallocation_MPIBAIJ);CHKERRQ(ierr); 3061 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIBAIJSetPreallocationCSR_C",MatMPIBAIJSetPreallocationCSR_MPIBAIJ);CHKERRQ(ierr); 3062 ierr = PetscObjectComposeFunction((PetscObject)B,"MatDiagonalScaleLocal_C",MatDiagonalScaleLocal_MPIBAIJ);CHKERRQ(ierr); 3063 ierr = PetscObjectComposeFunction((PetscObject)B,"MatSetHashTableFactor_C",MatSetHashTableFactor_MPIBAIJ);CHKERRQ(ierr); 3064 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpibaij_mpibstrm_C",MatConvert_MPIBAIJ_MPIBSTRM);CHKERRQ(ierr); 3065 ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIBAIJ);CHKERRQ(ierr); 3066 3067 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)B),NULL,"Options for loading MPIBAIJ matrix 1","Mat");CHKERRQ(ierr); 3068 ierr = PetscOptionsBool("-mat_use_hash_table","Use hash table to save memory in constructing matrix","MatSetOption",flg,&flg,NULL);CHKERRQ(ierr); 3069 if (flg) { 3070 PetscReal fact = 1.39; 3071 ierr = MatSetOption(B,MAT_USE_HASH_TABLE,PETSC_TRUE);CHKERRQ(ierr); 3072 ierr = PetscOptionsReal("-mat_use_hash_table","Use hash table factor","MatMPIBAIJSetHashTableFactor",fact,&fact,NULL);CHKERRQ(ierr); 3073 if (fact <= 1.0) fact = 1.39; 3074 ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr); 3075 ierr = PetscInfo1(B,"Hash table Factor used %5.2f\n",fact);CHKERRQ(ierr); 3076 } 3077 ierr = PetscOptionsEnd();CHKERRQ(ierr); 3078 PetscFunctionReturn(0); 3079 } 3080 3081 /*MC 3082 MATBAIJ - MATBAIJ = "baij" - A matrix type to be used for block sparse matrices. 3083 3084 This matrix type is identical to MATSEQBAIJ when constructed with a single process communicator, 3085 and MATMPIBAIJ otherwise. 3086 3087 Options Database Keys: 3088 . -mat_type baij - sets the matrix type to "baij" during a call to MatSetFromOptions() 3089 3090 Level: beginner 3091 3092 .seealso: MatCreateBAIJ(),MATSEQBAIJ,MATMPIBAIJ, MatMPIBAIJSetPreallocation(), MatMPIBAIJSetPreallocationCSR() 3093 M*/ 3094 3095 #undef __FUNCT__ 3096 #define __FUNCT__ "MatMPIBAIJSetPreallocation" 3097 /*@C 3098 MatMPIBAIJSetPreallocation - Allocates memory for a sparse parallel matrix in block AIJ format 3099 (block compressed row). For good matrix assembly performance 3100 the user should preallocate the matrix storage by setting the parameters 3101 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 3102 performance can be increased by more than a factor of 50. 3103 3104 Collective on Mat 3105 3106 Input Parameters: 3107 + B - the matrix 3108 . bs - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row 3109 blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs() 3110 . d_nz - number of block nonzeros per block row in diagonal portion of local 3111 submatrix (same for all local rows) 3112 . d_nnz - array containing the number of block nonzeros in the various block rows 3113 of the in diagonal portion of the local (possibly different for each block 3114 row) or NULL. If you plan to factor the matrix you must leave room for the diagonal entry and 3115 set it even if it is zero. 3116 . o_nz - number of block nonzeros per block row in the off-diagonal portion of local 3117 submatrix (same for all local rows). 3118 - o_nnz - array containing the number of nonzeros in the various block rows of the 3119 off-diagonal portion of the local submatrix (possibly different for 3120 each block row) or NULL. 3121 3122 If the *_nnz parameter is given then the *_nz parameter is ignored 3123 3124 Options Database Keys: 3125 + -mat_block_size - size of the blocks to use 3126 - -mat_use_hash_table <fact> 3127 3128 Notes: 3129 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor 3130 than it must be used on all processors that share the object for that argument. 3131 3132 Storage Information: 3133 For a square global matrix we define each processor's diagonal portion 3134 to be its local rows and the corresponding columns (a square submatrix); 3135 each processor's off-diagonal portion encompasses the remainder of the 3136 local matrix (a rectangular submatrix). 3137 3138 The user can specify preallocated storage for the diagonal part of 3139 the local submatrix with either d_nz or d_nnz (not both). Set 3140 d_nz=PETSC_DEFAULT and d_nnz=NULL for PETSc to control dynamic 3141 memory allocation. Likewise, specify preallocated storage for the 3142 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 3143 3144 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 3145 the figure below we depict these three local rows and all columns (0-11). 3146 3147 .vb 3148 0 1 2 3 4 5 6 7 8 9 10 11 3149 -------------------------- 3150 row 3 |o o o d d d o o o o o o 3151 row 4 |o o o d d d o o o o o o 3152 row 5 |o o o d d d o o o o o o 3153 -------------------------- 3154 .ve 3155 3156 Thus, any entries in the d locations are stored in the d (diagonal) 3157 submatrix, and any entries in the o locations are stored in the 3158 o (off-diagonal) submatrix. Note that the d and the o submatrices are 3159 stored simply in the MATSEQBAIJ format for compressed row storage. 3160 3161 Now d_nz should indicate the number of block nonzeros per row in the d matrix, 3162 and o_nz should indicate the number of block nonzeros per row in the o matrix. 3163 In general, for PDE problems in which most nonzeros are near the diagonal, 3164 one expects d_nz >> o_nz. For large problems you MUST preallocate memory 3165 or you will get TERRIBLE performance; see the users' manual chapter on 3166 matrices. 3167 3168 You can call MatGetInfo() to get information on how effective the preallocation was; 3169 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 3170 You can also run with the option -info and look for messages with the string 3171 malloc in them to see if additional memory allocation was needed. 3172 3173 Level: intermediate 3174 3175 .keywords: matrix, block, aij, compressed row, sparse, parallel 3176 3177 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateBAIJ(), MatMPIBAIJSetPreallocationCSR(), PetscSplitOwnership() 3178 @*/ 3179 PetscErrorCode MatMPIBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 3180 { 3181 PetscErrorCode ierr; 3182 3183 PetscFunctionBegin; 3184 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 3185 PetscValidType(B,1); 3186 PetscValidLogicalCollectiveInt(B,bs,2); 3187 ierr = PetscTryMethod(B,"MatMPIBAIJSetPreallocation_C",(Mat,PetscInt,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,bs,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr); 3188 PetscFunctionReturn(0); 3189 } 3190 3191 #undef __FUNCT__ 3192 #define __FUNCT__ "MatCreateBAIJ" 3193 /*@C 3194 MatCreateBAIJ - Creates a sparse parallel matrix in block AIJ format 3195 (block compressed row). For good matrix assembly performance 3196 the user should preallocate the matrix storage by setting the parameters 3197 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 3198 performance can be increased by more than a factor of 50. 3199 3200 Collective on MPI_Comm 3201 3202 Input Parameters: 3203 + comm - MPI communicator 3204 . bs - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row 3205 blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs() 3206 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 3207 This value should be the same as the local size used in creating the 3208 y vector for the matrix-vector product y = Ax. 3209 . n - number of local columns (or PETSC_DECIDE to have calculated if N is given) 3210 This value should be the same as the local size used in creating the 3211 x vector for the matrix-vector product y = Ax. 3212 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 3213 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 3214 . d_nz - number of nonzero blocks per block row in diagonal portion of local 3215 submatrix (same for all local rows) 3216 . d_nnz - array containing the number of nonzero blocks in the various block rows 3217 of the in diagonal portion of the local (possibly different for each block 3218 row) or NULL. If you plan to factor the matrix you must leave room for the diagonal entry 3219 and set it even if it is zero. 3220 . o_nz - number of nonzero blocks per block row in the off-diagonal portion of local 3221 submatrix (same for all local rows). 3222 - o_nnz - array containing the number of nonzero blocks in the various block rows of the 3223 off-diagonal portion of the local submatrix (possibly different for 3224 each block row) or NULL. 3225 3226 Output Parameter: 3227 . A - the matrix 3228 3229 Options Database Keys: 3230 + -mat_block_size - size of the blocks to use 3231 - -mat_use_hash_table <fact> 3232 3233 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 3234 MatXXXXSetPreallocation() paradgm instead of this routine directly. 3235 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 3236 3237 Notes: 3238 If the *_nnz parameter is given then the *_nz parameter is ignored 3239 3240 A nonzero block is any block that as 1 or more nonzeros in it 3241 3242 The user MUST specify either the local or global matrix dimensions 3243 (possibly both). 3244 3245 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor 3246 than it must be used on all processors that share the object for that argument. 3247 3248 Storage Information: 3249 For a square global matrix we define each processor's diagonal portion 3250 to be its local rows and the corresponding columns (a square submatrix); 3251 each processor's off-diagonal portion encompasses the remainder of the 3252 local matrix (a rectangular submatrix). 3253 3254 The user can specify preallocated storage for the diagonal part of 3255 the local submatrix with either d_nz or d_nnz (not both). Set 3256 d_nz=PETSC_DEFAULT and d_nnz=NULL for PETSc to control dynamic 3257 memory allocation. Likewise, specify preallocated storage for the 3258 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 3259 3260 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 3261 the figure below we depict these three local rows and all columns (0-11). 3262 3263 .vb 3264 0 1 2 3 4 5 6 7 8 9 10 11 3265 -------------------------- 3266 row 3 |o o o d d d o o o o o o 3267 row 4 |o o o d d d o o o o o o 3268 row 5 |o o o d d d o o o o o o 3269 -------------------------- 3270 .ve 3271 3272 Thus, any entries in the d locations are stored in the d (diagonal) 3273 submatrix, and any entries in the o locations are stored in the 3274 o (off-diagonal) submatrix. Note that the d and the o submatrices are 3275 stored simply in the MATSEQBAIJ format for compressed row storage. 3276 3277 Now d_nz should indicate the number of block nonzeros per row in the d matrix, 3278 and o_nz should indicate the number of block nonzeros per row in the o matrix. 3279 In general, for PDE problems in which most nonzeros are near the diagonal, 3280 one expects d_nz >> o_nz. For large problems you MUST preallocate memory 3281 or you will get TERRIBLE performance; see the users' manual chapter on 3282 matrices. 3283 3284 Level: intermediate 3285 3286 .keywords: matrix, block, aij, compressed row, sparse, parallel 3287 3288 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateBAIJ(), MatMPIBAIJSetPreallocation(), MatMPIBAIJSetPreallocationCSR() 3289 @*/ 3290 PetscErrorCode MatCreateBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 3291 { 3292 PetscErrorCode ierr; 3293 PetscMPIInt size; 3294 3295 PetscFunctionBegin; 3296 ierr = MatCreate(comm,A);CHKERRQ(ierr); 3297 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 3298 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3299 if (size > 1) { 3300 ierr = MatSetType(*A,MATMPIBAIJ);CHKERRQ(ierr); 3301 ierr = MatMPIBAIJSetPreallocation(*A,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 3302 } else { 3303 ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr); 3304 ierr = MatSeqBAIJSetPreallocation(*A,bs,d_nz,d_nnz);CHKERRQ(ierr); 3305 } 3306 PetscFunctionReturn(0); 3307 } 3308 3309 #undef __FUNCT__ 3310 #define __FUNCT__ "MatDuplicate_MPIBAIJ" 3311 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 3312 { 3313 Mat mat; 3314 Mat_MPIBAIJ *a,*oldmat = (Mat_MPIBAIJ*)matin->data; 3315 PetscErrorCode ierr; 3316 PetscInt len=0; 3317 3318 PetscFunctionBegin; 3319 *newmat = 0; 3320 ierr = MatCreate(PetscObjectComm((PetscObject)matin),&mat);CHKERRQ(ierr); 3321 ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr); 3322 ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr); 3323 ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 3324 3325 mat->factortype = matin->factortype; 3326 mat->preallocated = PETSC_TRUE; 3327 mat->assembled = PETSC_TRUE; 3328 mat->insertmode = NOT_SET_VALUES; 3329 3330 a = (Mat_MPIBAIJ*)mat->data; 3331 mat->rmap->bs = matin->rmap->bs; 3332 a->bs2 = oldmat->bs2; 3333 a->mbs = oldmat->mbs; 3334 a->nbs = oldmat->nbs; 3335 a->Mbs = oldmat->Mbs; 3336 a->Nbs = oldmat->Nbs; 3337 3338 ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr); 3339 ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr); 3340 3341 a->size = oldmat->size; 3342 a->rank = oldmat->rank; 3343 a->donotstash = oldmat->donotstash; 3344 a->roworiented = oldmat->roworiented; 3345 a->rowindices = 0; 3346 a->rowvalues = 0; 3347 a->getrowactive = PETSC_FALSE; 3348 a->barray = 0; 3349 a->rstartbs = oldmat->rstartbs; 3350 a->rendbs = oldmat->rendbs; 3351 a->cstartbs = oldmat->cstartbs; 3352 a->cendbs = oldmat->cendbs; 3353 3354 /* hash table stuff */ 3355 a->ht = 0; 3356 a->hd = 0; 3357 a->ht_size = 0; 3358 a->ht_flag = oldmat->ht_flag; 3359 a->ht_fact = oldmat->ht_fact; 3360 a->ht_total_ct = 0; 3361 a->ht_insert_ct = 0; 3362 3363 ierr = PetscMemcpy(a->rangebs,oldmat->rangebs,(a->size+1)*sizeof(PetscInt));CHKERRQ(ierr); 3364 if (oldmat->colmap) { 3365 #if defined(PETSC_USE_CTABLE) 3366 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 3367 #else 3368 ierr = PetscMalloc1(a->Nbs,&a->colmap);CHKERRQ(ierr); 3369 ierr = PetscLogObjectMemory((PetscObject)mat,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr); 3370 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr); 3371 #endif 3372 } else a->colmap = 0; 3373 3374 if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) { 3375 ierr = PetscMalloc1(len,&a->garray);CHKERRQ(ierr); 3376 ierr = PetscLogObjectMemory((PetscObject)mat,len*sizeof(PetscInt));CHKERRQ(ierr); 3377 ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); 3378 } else a->garray = 0; 3379 3380 ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)matin),matin->rmap->bs,&mat->bstash);CHKERRQ(ierr); 3381 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 3382 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->lvec);CHKERRQ(ierr); 3383 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 3384 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx);CHKERRQ(ierr); 3385 3386 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 3387 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr); 3388 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 3389 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->B);CHKERRQ(ierr); 3390 ierr = PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr); 3391 *newmat = mat; 3392 PetscFunctionReturn(0); 3393 } 3394 3395 #undef __FUNCT__ 3396 #define __FUNCT__ "MatLoad_MPIBAIJ" 3397 PetscErrorCode MatLoad_MPIBAIJ(Mat newmat,PetscViewer viewer) 3398 { 3399 PetscErrorCode ierr; 3400 int fd; 3401 PetscInt i,nz,j,rstart,rend; 3402 PetscScalar *vals,*buf; 3403 MPI_Comm comm; 3404 MPI_Status status; 3405 PetscMPIInt rank,size,maxnz; 3406 PetscInt header[4],*rowlengths = 0,M,N,m,*rowners,*cols; 3407 PetscInt *locrowlens = NULL,*procsnz = NULL,*browners = NULL; 3408 PetscInt jj,*mycols,*ibuf,bs = newmat->rmap->bs,Mbs,mbs,extra_rows,mmax; 3409 PetscMPIInt tag = ((PetscObject)viewer)->tag; 3410 PetscInt *dlens = NULL,*odlens = NULL,*mask = NULL,*masked1 = NULL,*masked2 = NULL,rowcount,odcount; 3411 PetscInt dcount,kmax,k,nzcount,tmp,mend,sizesset=1,grows,gcols; 3412 3413 PetscFunctionBegin; 3414 /* force binary viewer to load .info file if it has not yet done so */ 3415 ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr); 3416 ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr); 3417 ierr = PetscOptionsBegin(comm,NULL,"Options for loading MPIBAIJ matrix 2","Mat");CHKERRQ(ierr); 3418 ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,NULL);CHKERRQ(ierr); 3419 ierr = PetscOptionsEnd();CHKERRQ(ierr); 3420 if (bs < 0) bs = 1; 3421 3422 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3423 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3424 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 3425 if (!rank) { 3426 ierr = PetscBinaryRead(fd,(char*)header,4,PETSC_INT);CHKERRQ(ierr); 3427 if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 3428 } 3429 3430 if (newmat->rmap->n < 0 && newmat->rmap->N < 0 && newmat->cmap->n < 0 && newmat->cmap->N < 0) sizesset = 0; 3431 3432 ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr); 3433 M = header[1]; N = header[2]; 3434 3435 /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */ 3436 if (sizesset && newmat->rmap->N < 0) newmat->rmap->N = M; 3437 if (sizesset && newmat->cmap->N < 0) newmat->cmap->N = N; 3438 3439 /* If global sizes are set, check if they are consistent with that given in the file */ 3440 if (sizesset) { 3441 ierr = MatGetSize(newmat,&grows,&gcols);CHKERRQ(ierr); 3442 } 3443 if (sizesset && newmat->rmap->N != grows) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%d) and input matrix has (%d)",M,grows); 3444 if (sizesset && newmat->cmap->N != gcols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%d) and input matrix has (%d)",N,gcols); 3445 3446 if (M != N) SETERRQ(PetscObjectComm((PetscObject)viewer),PETSC_ERR_SUP,"Can only do square matrices"); 3447 3448 /* 3449 This code adds extra rows to make sure the number of rows is 3450 divisible by the blocksize 3451 */ 3452 Mbs = M/bs; 3453 extra_rows = bs - M + bs*Mbs; 3454 if (extra_rows == bs) extra_rows = 0; 3455 else Mbs++; 3456 if (extra_rows && !rank) { 3457 ierr = PetscInfo(viewer,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr); 3458 } 3459 3460 /* determine ownership of all rows */ 3461 if (newmat->rmap->n < 0) { /* PETSC_DECIDE */ 3462 mbs = Mbs/size + ((Mbs % size) > rank); 3463 m = mbs*bs; 3464 } else { /* User set */ 3465 m = newmat->rmap->n; 3466 mbs = m/bs; 3467 } 3468 ierr = PetscMalloc2(size+1,&rowners,size+1,&browners);CHKERRQ(ierr); 3469 ierr = MPI_Allgather(&mbs,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 3470 3471 /* process 0 needs enough room for process with most rows */ 3472 if (!rank) { 3473 mmax = rowners[1]; 3474 for (i=2; i<=size; i++) { 3475 mmax = PetscMax(mmax,rowners[i]); 3476 } 3477 mmax*=bs; 3478 } else mmax = -1; /* unused, but compiler warns anyway */ 3479 3480 rowners[0] = 0; 3481 for (i=2; i<=size; i++) rowners[i] += rowners[i-1]; 3482 for (i=0; i<=size; i++) browners[i] = rowners[i]*bs; 3483 rstart = rowners[rank]; 3484 rend = rowners[rank+1]; 3485 3486 /* distribute row lengths to all processors */ 3487 ierr = PetscMalloc1(m,&locrowlens);CHKERRQ(ierr); 3488 if (!rank) { 3489 mend = m; 3490 if (size == 1) mend = mend - extra_rows; 3491 ierr = PetscBinaryRead(fd,locrowlens,mend,PETSC_INT);CHKERRQ(ierr); 3492 for (j=mend; j<m; j++) locrowlens[j] = 1; 3493 ierr = PetscMalloc1(mmax,&rowlengths);CHKERRQ(ierr); 3494 ierr = PetscCalloc1(size,&procsnz);CHKERRQ(ierr); 3495 for (j=0; j<m; j++) { 3496 procsnz[0] += locrowlens[j]; 3497 } 3498 for (i=1; i<size; i++) { 3499 mend = browners[i+1] - browners[i]; 3500 if (i == size-1) mend = mend - extra_rows; 3501 ierr = PetscBinaryRead(fd,rowlengths,mend,PETSC_INT);CHKERRQ(ierr); 3502 for (j=mend; j<browners[i+1] - browners[i]; j++) rowlengths[j] = 1; 3503 /* calculate the number of nonzeros on each processor */ 3504 for (j=0; j<browners[i+1]-browners[i]; j++) { 3505 procsnz[i] += rowlengths[j]; 3506 } 3507 ierr = MPI_Send(rowlengths,browners[i+1]-browners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 3508 } 3509 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 3510 } else { 3511 ierr = MPI_Recv(locrowlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 3512 } 3513 3514 if (!rank) { 3515 /* determine max buffer needed and allocate it */ 3516 maxnz = procsnz[0]; 3517 for (i=1; i<size; i++) { 3518 maxnz = PetscMax(maxnz,procsnz[i]); 3519 } 3520 ierr = PetscMalloc1(maxnz,&cols);CHKERRQ(ierr); 3521 3522 /* read in my part of the matrix column indices */ 3523 nz = procsnz[0]; 3524 ierr = PetscMalloc1(nz+1,&ibuf);CHKERRQ(ierr); 3525 mycols = ibuf; 3526 if (size == 1) nz -= extra_rows; 3527 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 3528 if (size == 1) { 3529 for (i=0; i< extra_rows; i++) mycols[nz+i] = M+i; 3530 } 3531 3532 /* read in every ones (except the last) and ship off */ 3533 for (i=1; i<size-1; i++) { 3534 nz = procsnz[i]; 3535 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 3536 ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 3537 } 3538 /* read in the stuff for the last proc */ 3539 if (size != 1) { 3540 nz = procsnz[size-1] - extra_rows; /* the extra rows are not on the disk */ 3541 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 3542 for (i=0; i<extra_rows; i++) cols[nz+i] = M+i; 3543 ierr = MPI_Send(cols,nz+extra_rows,MPIU_INT,size-1,tag,comm);CHKERRQ(ierr); 3544 } 3545 ierr = PetscFree(cols);CHKERRQ(ierr); 3546 } else { 3547 /* determine buffer space needed for message */ 3548 nz = 0; 3549 for (i=0; i<m; i++) { 3550 nz += locrowlens[i]; 3551 } 3552 ierr = PetscMalloc1(nz+1,&ibuf);CHKERRQ(ierr); 3553 mycols = ibuf; 3554 /* receive message of column indices*/ 3555 ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 3556 ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr); 3557 if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 3558 } 3559 3560 /* loop over local rows, determining number of off diagonal entries */ 3561 ierr = PetscMalloc2(rend-rstart,&dlens,rend-rstart,&odlens);CHKERRQ(ierr); 3562 ierr = PetscCalloc3(Mbs,&mask,Mbs,&masked1,Mbs,&masked2);CHKERRQ(ierr); 3563 rowcount = 0; nzcount = 0; 3564 for (i=0; i<mbs; i++) { 3565 dcount = 0; 3566 odcount = 0; 3567 for (j=0; j<bs; j++) { 3568 kmax = locrowlens[rowcount]; 3569 for (k=0; k<kmax; k++) { 3570 tmp = mycols[nzcount++]/bs; 3571 if (!mask[tmp]) { 3572 mask[tmp] = 1; 3573 if (tmp < rstart || tmp >= rend) masked2[odcount++] = tmp; 3574 else masked1[dcount++] = tmp; 3575 } 3576 } 3577 rowcount++; 3578 } 3579 3580 dlens[i] = dcount; 3581 odlens[i] = odcount; 3582 3583 /* zero out the mask elements we set */ 3584 for (j=0; j<dcount; j++) mask[masked1[j]] = 0; 3585 for (j=0; j<odcount; j++) mask[masked2[j]] = 0; 3586 } 3587 3588 3589 if (!sizesset) { 3590 ierr = MatSetSizes(newmat,m,m,M+extra_rows,N+extra_rows);CHKERRQ(ierr); 3591 } 3592 ierr = MatMPIBAIJSetPreallocation(newmat,bs,0,dlens,0,odlens);CHKERRQ(ierr); 3593 3594 if (!rank) { 3595 ierr = PetscMalloc1(maxnz+1,&buf);CHKERRQ(ierr); 3596 /* read in my part of the matrix numerical values */ 3597 nz = procsnz[0]; 3598 vals = buf; 3599 mycols = ibuf; 3600 if (size == 1) nz -= extra_rows; 3601 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3602 if (size == 1) { 3603 for (i=0; i< extra_rows; i++) vals[nz+i] = 1.0; 3604 } 3605 3606 /* insert into matrix */ 3607 jj = rstart*bs; 3608 for (i=0; i<m; i++) { 3609 ierr = MatSetValues_MPIBAIJ(newmat,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 3610 mycols += locrowlens[i]; 3611 vals += locrowlens[i]; 3612 jj++; 3613 } 3614 /* read in other processors (except the last one) and ship out */ 3615 for (i=1; i<size-1; i++) { 3616 nz = procsnz[i]; 3617 vals = buf; 3618 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3619 ierr = MPIULong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr); 3620 } 3621 /* the last proc */ 3622 if (size != 1) { 3623 nz = procsnz[i] - extra_rows; 3624 vals = buf; 3625 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3626 for (i=0; i<extra_rows; i++) vals[nz+i] = 1.0; 3627 ierr = MPIULong_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr); 3628 } 3629 ierr = PetscFree(procsnz);CHKERRQ(ierr); 3630 } else { 3631 /* receive numeric values */ 3632 ierr = PetscMalloc1(nz+1,&buf);CHKERRQ(ierr); 3633 3634 /* receive message of values*/ 3635 vals = buf; 3636 mycols = ibuf; 3637 ierr = MPIULong_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr); 3638 3639 /* insert into matrix */ 3640 jj = rstart*bs; 3641 for (i=0; i<m; i++) { 3642 ierr = MatSetValues_MPIBAIJ(newmat,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 3643 mycols += locrowlens[i]; 3644 vals += locrowlens[i]; 3645 jj++; 3646 } 3647 } 3648 ierr = PetscFree(locrowlens);CHKERRQ(ierr); 3649 ierr = PetscFree(buf);CHKERRQ(ierr); 3650 ierr = PetscFree(ibuf);CHKERRQ(ierr); 3651 ierr = PetscFree2(rowners,browners);CHKERRQ(ierr); 3652 ierr = PetscFree2(dlens,odlens);CHKERRQ(ierr); 3653 ierr = PetscFree3(mask,masked1,masked2);CHKERRQ(ierr); 3654 ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3655 ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3656 PetscFunctionReturn(0); 3657 } 3658 3659 #undef __FUNCT__ 3660 #define __FUNCT__ "MatMPIBAIJSetHashTableFactor" 3661 /*@ 3662 MatMPIBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable. 3663 3664 Input Parameters: 3665 . mat - the matrix 3666 . fact - factor 3667 3668 Not Collective, each process can use a different factor 3669 3670 Level: advanced 3671 3672 Notes: 3673 This can also be set by the command line option: -mat_use_hash_table <fact> 3674 3675 .keywords: matrix, hashtable, factor, HT 3676 3677 .seealso: MatSetOption() 3678 @*/ 3679 PetscErrorCode MatMPIBAIJSetHashTableFactor(Mat mat,PetscReal fact) 3680 { 3681 PetscErrorCode ierr; 3682 3683 PetscFunctionBegin; 3684 ierr = PetscTryMethod(mat,"MatSetHashTableFactor_C",(Mat,PetscReal),(mat,fact));CHKERRQ(ierr); 3685 PetscFunctionReturn(0); 3686 } 3687 3688 #undef __FUNCT__ 3689 #define __FUNCT__ "MatSetHashTableFactor_MPIBAIJ" 3690 PetscErrorCode MatSetHashTableFactor_MPIBAIJ(Mat mat,PetscReal fact) 3691 { 3692 Mat_MPIBAIJ *baij; 3693 3694 PetscFunctionBegin; 3695 baij = (Mat_MPIBAIJ*)mat->data; 3696 baij->ht_fact = fact; 3697 PetscFunctionReturn(0); 3698 } 3699 3700 #undef __FUNCT__ 3701 #define __FUNCT__ "MatMPIBAIJGetSeqBAIJ" 3702 PetscErrorCode MatMPIBAIJGetSeqBAIJ(Mat A,Mat *Ad,Mat *Ao,const PetscInt *colmap[]) 3703 { 3704 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 3705 3706 PetscFunctionBegin; 3707 if (Ad) *Ad = a->A; 3708 if (Ao) *Ao = a->B; 3709 if (colmap) *colmap = a->garray; 3710 PetscFunctionReturn(0); 3711 } 3712 3713 /* 3714 Special version for direct calls from Fortran (to eliminate two function call overheads 3715 */ 3716 #if defined(PETSC_HAVE_FORTRAN_CAPS) 3717 #define matmpibaijsetvaluesblocked_ MATMPIBAIJSETVALUESBLOCKED 3718 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 3719 #define matmpibaijsetvaluesblocked_ matmpibaijsetvaluesblocked 3720 #endif 3721 3722 #undef __FUNCT__ 3723 #define __FUNCT__ "matmpibiajsetvaluesblocked" 3724 /*@C 3725 MatMPIBAIJSetValuesBlocked - Direct Fortran call to replace call to MatSetValuesBlocked() 3726 3727 Collective on Mat 3728 3729 Input Parameters: 3730 + mat - the matrix 3731 . min - number of input rows 3732 . im - input rows 3733 . nin - number of input columns 3734 . in - input columns 3735 . v - numerical values input 3736 - addvin - INSERT_VALUES or ADD_VALUES 3737 3738 Notes: This has a complete copy of MatSetValuesBlocked_MPIBAIJ() which is terrible code un-reuse. 3739 3740 Level: advanced 3741 3742 .seealso: MatSetValuesBlocked() 3743 @*/ 3744 PetscErrorCode matmpibaijsetvaluesblocked_(Mat *matin,PetscInt *min,const PetscInt im[],PetscInt *nin,const PetscInt in[],const MatScalar v[],InsertMode *addvin) 3745 { 3746 /* convert input arguments to C version */ 3747 Mat mat = *matin; 3748 PetscInt m = *min, n = *nin; 3749 InsertMode addv = *addvin; 3750 3751 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 3752 const MatScalar *value; 3753 MatScalar *barray = baij->barray; 3754 PetscBool roworiented = baij->roworiented; 3755 PetscErrorCode ierr; 3756 PetscInt i,j,ii,jj,row,col,rstart=baij->rstartbs; 3757 PetscInt rend=baij->rendbs,cstart=baij->cstartbs,stepval; 3758 PetscInt cend=baij->cendbs,bs=mat->rmap->bs,bs2=baij->bs2; 3759 3760 PetscFunctionBegin; 3761 /* tasks normally handled by MatSetValuesBlocked() */ 3762 if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv; 3763 #if defined(PETSC_USE_DEBUG) 3764 else if (mat->insertmode != addv) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values"); 3765 if (mat->factortype) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 3766 #endif 3767 if (mat->assembled) { 3768 mat->was_assembled = PETSC_TRUE; 3769 mat->assembled = PETSC_FALSE; 3770 } 3771 ierr = PetscLogEventBegin(MAT_SetValues,mat,0,0,0);CHKERRQ(ierr); 3772 3773 3774 if (!barray) { 3775 ierr = PetscMalloc1(bs2,&barray);CHKERRQ(ierr); 3776 baij->barray = barray; 3777 } 3778 3779 if (roworiented) stepval = (n-1)*bs; 3780 else stepval = (m-1)*bs; 3781 3782 for (i=0; i<m; i++) { 3783 if (im[i] < 0) continue; 3784 #if defined(PETSC_USE_DEBUG) 3785 if (im[i] >= baij->Mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %D max %D",im[i],baij->Mbs-1); 3786 #endif 3787 if (im[i] >= rstart && im[i] < rend) { 3788 row = im[i] - rstart; 3789 for (j=0; j<n; j++) { 3790 /* If NumCol = 1 then a copy is not required */ 3791 if ((roworiented) && (n == 1)) { 3792 barray = (MatScalar*)v + i*bs2; 3793 } else if ((!roworiented) && (m == 1)) { 3794 barray = (MatScalar*)v + j*bs2; 3795 } else { /* Here a copy is required */ 3796 if (roworiented) { 3797 value = v + i*(stepval+bs)*bs + j*bs; 3798 } else { 3799 value = v + j*(stepval+bs)*bs + i*bs; 3800 } 3801 for (ii=0; ii<bs; ii++,value+=stepval) { 3802 for (jj=0; jj<bs; jj++) { 3803 *barray++ = *value++; 3804 } 3805 } 3806 barray -=bs2; 3807 } 3808 3809 if (in[j] >= cstart && in[j] < cend) { 3810 col = in[j] - cstart; 3811 ierr = MatSetValuesBlocked_SeqBAIJ(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 3812 } else if (in[j] < 0) continue; 3813 #if defined(PETSC_USE_DEBUG) 3814 else if (in[j] >= baij->Nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %D max %D",in[j],baij->Nbs-1); 3815 #endif 3816 else { 3817 if (mat->was_assembled) { 3818 if (!baij->colmap) { 3819 ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 3820 } 3821 3822 #if defined(PETSC_USE_DEBUG) 3823 #if defined(PETSC_USE_CTABLE) 3824 { PetscInt data; 3825 ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr); 3826 if ((data - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap"); 3827 } 3828 #else 3829 if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap"); 3830 #endif 3831 #endif 3832 #if defined(PETSC_USE_CTABLE) 3833 ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr); 3834 col = (col - 1)/bs; 3835 #else 3836 col = (baij->colmap[in[j]] - 1)/bs; 3837 #endif 3838 if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 3839 ierr = MatDisAssemble_MPIBAIJ(mat);CHKERRQ(ierr); 3840 col = in[j]; 3841 } 3842 } else col = in[j]; 3843 ierr = MatSetValuesBlocked_SeqBAIJ(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 3844 } 3845 } 3846 } else { 3847 if (!baij->donotstash) { 3848 if (roworiented) { 3849 ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 3850 } else { 3851 ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 3852 } 3853 } 3854 } 3855 } 3856 3857 /* task normally handled by MatSetValuesBlocked() */ 3858 ierr = PetscLogEventEnd(MAT_SetValues,mat,0,0,0);CHKERRQ(ierr); 3859 PetscFunctionReturn(0); 3860 } 3861 3862 #undef __FUNCT__ 3863 #define __FUNCT__ "MatCreateMPIBAIJWithArrays" 3864 /*@ 3865 MatCreateMPIBAIJWithArrays - creates a MPI BAIJ matrix using arrays that contain in standard 3866 CSR format the local rows. 3867 3868 Collective on MPI_Comm 3869 3870 Input Parameters: 3871 + comm - MPI communicator 3872 . bs - the block size, only a block size of 1 is supported 3873 . m - number of local rows (Cannot be PETSC_DECIDE) 3874 . n - This value should be the same as the local size used in creating the 3875 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 3876 calculated if N is given) For square matrices n is almost always m. 3877 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 3878 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 3879 . i - row indices 3880 . j - column indices 3881 - a - matrix values 3882 3883 Output Parameter: 3884 . mat - the matrix 3885 3886 Level: intermediate 3887 3888 Notes: 3889 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 3890 thus you CANNOT change the matrix entries by changing the values of a[] after you have 3891 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 3892 3893 The order of the entries in values is the same as the block compressed sparse row storage format; that is, it is 3894 the same as a three dimensional array in Fortran values(bs,bs,nnz) that contains the first column of the first 3895 block, followed by the second column of the first block etc etc. That is, the blocks are contiguous in memory 3896 with column-major ordering within blocks. 3897 3898 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 3899 3900 .keywords: matrix, aij, compressed row, sparse, parallel 3901 3902 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 3903 MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays() 3904 @*/ 3905 PetscErrorCode MatCreateMPIBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat) 3906 { 3907 PetscErrorCode ierr; 3908 3909 PetscFunctionBegin; 3910 if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 3911 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 3912 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 3913 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 3914 ierr = MatSetType(*mat,MATMPISBAIJ);CHKERRQ(ierr); 3915 ierr = MatSetOption(*mat,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); 3916 ierr = MatMPIBAIJSetPreallocationCSR(*mat,bs,i,j,a);CHKERRQ(ierr); 3917 ierr = MatSetOption(*mat,MAT_ROW_ORIENTED,PETSC_TRUE);CHKERRQ(ierr); 3918 PetscFunctionReturn(0); 3919 } 3920 3921 #undef __FUNCT__ 3922 #define __FUNCT__ "MatCreateMPIMatConcatenateSeqMat_MPIBAIJ" 3923 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIBAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat) 3924 { 3925 PetscErrorCode ierr; 3926 PetscInt m,N,i,rstart,nnz,Ii,bs,cbs; 3927 PetscInt *indx; 3928 PetscScalar *values; 3929 3930 PetscFunctionBegin; 3931 ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr); 3932 if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */ 3933 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)inmat->data; 3934 PetscInt *dnz,*onz,sum,mbs,Nbs; 3935 PetscInt *bindx,rmax=a->rmax,j; 3936 3937 ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr); 3938 mbs = m/bs; Nbs = N/cbs; 3939 if (n == PETSC_DECIDE) { 3940 ierr = PetscSplitOwnership(comm,&n,&Nbs);CHKERRQ(ierr); 3941 } 3942 /* Check sum(n) = Nbs */ 3943 ierr = MPI_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3944 if (sum != Nbs) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns != global columns %d",Nbs); 3945 3946 ierr = MPI_Scan(&mbs, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3947 rstart -= mbs; 3948 3949 ierr = PetscMalloc1(rmax,&bindx);CHKERRQ(ierr); 3950 ierr = MatPreallocateInitialize(comm,mbs,n,dnz,onz);CHKERRQ(ierr); 3951 for (i=0; i<mbs; i++) { 3952 ierr = MatGetRow_SeqBAIJ(inmat,i*bs,&nnz,&indx,NULL);CHKERRQ(ierr); /* non-blocked nnz and indx */ 3953 nnz = nnz/bs; 3954 for (j=0; j<nnz; j++) bindx[j] = indx[j*bs]/bs; 3955 ierr = MatPreallocateSet(i+rstart,nnz,bindx,dnz,onz);CHKERRQ(ierr); 3956 ierr = MatRestoreRow_SeqBAIJ(inmat,i*bs,&nnz,&indx,NULL);CHKERRQ(ierr); 3957 } 3958 ierr = PetscFree(bindx);CHKERRQ(ierr); 3959 3960 ierr = MatCreate(comm,outmat);CHKERRQ(ierr); 3961 ierr = MatSetSizes(*outmat,m,n*bs,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 3962 ierr = MatSetBlockSizes(*outmat,bs,cbs);CHKERRQ(ierr); 3963 ierr = MatSetType(*outmat,MATMPIBAIJ);CHKERRQ(ierr); 3964 ierr = MatMPIBAIJSetPreallocation(*outmat,bs,0,dnz,0,onz);CHKERRQ(ierr); 3965 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 3966 } 3967 3968 /* numeric phase */ 3969 ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr); 3970 ierr = MatGetOwnershipRange(*outmat,&rstart,NULL);CHKERRQ(ierr); 3971 3972 for (i=0; i<m; i++) { 3973 ierr = MatGetRow_SeqBAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 3974 Ii = i + rstart; 3975 ierr = MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 3976 ierr = MatRestoreRow_SeqBAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 3977 } 3978 ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3979 ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3980 PetscFunctionReturn(0); 3981 } 3982