1 #define PETSCMAT_DLL 2 3 #include "src/mat/impls/aij/mpi/mpiaij.h" 4 #include "src/inline/spops.h" 5 6 /* 7 Local utility routine that creates a mapping from the global column 8 number to the local number in the off-diagonal part of the local 9 storage of the matrix. When PETSC_USE_CTABLE is used this is scalable at 10 a slightly higher hash table cost; without it it is not scalable (each processor 11 has an order N integer array but is fast to acess. 12 */ 13 #undef __FUNCT__ 14 #define __FUNCT__ "CreateColmap_MPIAIJ_Private" 15 PetscErrorCode CreateColmap_MPIAIJ_Private(Mat mat) 16 { 17 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 18 PetscErrorCode ierr; 19 PetscInt n = aij->B->n,i; 20 21 PetscFunctionBegin; 22 #if defined (PETSC_USE_CTABLE) 23 ierr = PetscTableCreate(n,&aij->colmap);CHKERRQ(ierr); 24 for (i=0; i<n; i++){ 25 ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1);CHKERRQ(ierr); 26 } 27 #else 28 ierr = PetscMalloc((mat->N+1)*sizeof(PetscInt),&aij->colmap);CHKERRQ(ierr); 29 ierr = PetscLogObjectMemory(mat,mat->N*sizeof(PetscInt));CHKERRQ(ierr); 30 ierr = PetscMemzero(aij->colmap,mat->N*sizeof(PetscInt));CHKERRQ(ierr); 31 for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1; 32 #endif 33 PetscFunctionReturn(0); 34 } 35 36 37 #define CHUNKSIZE 15 38 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \ 39 { \ 40 if (lastcol1 > col) low1 = 0; else high1 = nrow1; \ 41 lastcol1 = col;\ 42 while (high1-low1 > 5) { \ 43 t = (low1+high1)/2; \ 44 if (rp1[t] > col) high1 = t; \ 45 else low1 = t; \ 46 } \ 47 for (_i=low1; _i<high1; _i++) { \ 48 if (rp1[_i] > col) break; \ 49 if (rp1[_i] == col) { \ 50 if (addv == ADD_VALUES) ap1[_i] += value; \ 51 else ap1[_i] = value; \ 52 goto a_noinsert; \ 53 } \ 54 } \ 55 if (value == 0.0 && ignorezeroentries) goto a_noinsert; \ 56 if (nonew == 1) goto a_noinsert; \ 57 if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 58 MatSeqXAIJReallocateAIJ(a,1,nrow1,row,col,rmax1,aa,ai,aj,am,rp1,ap1,aimax,nonew); \ 59 N = nrow1++ - 1; a->nz++; \ 60 /* shift up all the later entries in this row */ \ 61 for (ii=N; ii>=_i; ii--) { \ 62 rp1[ii+1] = rp1[ii]; \ 63 ap1[ii+1] = ap1[ii]; \ 64 } \ 65 rp1[_i] = col; \ 66 ap1[_i] = value; \ 67 a_noinsert: ; \ 68 ailen[row] = nrow1; \ 69 } 70 71 72 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \ 73 { \ 74 if (lastcol2 > col) low2 = 0; else high2 = nrow2; \ 75 lastcol2 = col;\ 76 while (high2-low2 > 5) { \ 77 t = (low2+high2)/2; \ 78 if (rp2[t] > col) high2 = t; \ 79 else low2 = t; \ 80 } \ 81 for (_i=low2; _i<high2; _i++) { \ 82 if (rp2[_i] > col) break; \ 83 if (rp2[_i] == col) { \ 84 if (addv == ADD_VALUES) ap2[_i] += value; \ 85 else ap2[_i] = value; \ 86 goto b_noinsert; \ 87 } \ 88 } \ 89 if (value == 0.0 && ignorezeroentries) goto b_noinsert; \ 90 if (nonew == 1) goto b_noinsert; \ 91 if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 92 MatSeqXAIJReallocateAIJ(b,1,nrow2,row,col,rmax2,ba,bi,bj,bm,rp2,ap2,bimax,nonew); \ 93 N = nrow2++ - 1; b->nz++; \ 94 /* shift up all the later entries in this row */ \ 95 for (ii=N; ii>=_i; ii--) { \ 96 rp2[ii+1] = rp2[ii]; \ 97 ap2[ii+1] = ap2[ii]; \ 98 } \ 99 rp2[_i] = col; \ 100 ap2[_i] = value; \ 101 b_noinsert: ; \ 102 bilen[row] = nrow2; \ 103 } 104 105 #undef __FUNCT__ 106 #define __FUNCT__ "MatSetValues_MPIAIJ" 107 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 108 { 109 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 110 PetscScalar value; 111 PetscErrorCode ierr; 112 PetscInt i,j,rstart = aij->rstart,rend = aij->rend; 113 PetscInt cstart = aij->cstart,cend = aij->cend,row,col; 114 PetscTruth roworiented = aij->roworiented; 115 116 /* Some Variables required in the macro */ 117 Mat A = aij->A; 118 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 119 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 120 PetscScalar *aa = a->a; 121 PetscTruth ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES))?PETSC_TRUE:PETSC_FALSE); 122 Mat B = aij->B; 123 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 124 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->m,am = aij->A->m; 125 PetscScalar *ba = b->a; 126 127 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 128 PetscInt nonew = a->nonew; 129 PetscScalar *ap1,*ap2; 130 131 PetscFunctionBegin; 132 for (i=0; i<m; i++) { 133 if (im[i] < 0) continue; 134 #if defined(PETSC_USE_DEBUG) 135 if (im[i] >= mat->M) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->M-1); 136 #endif 137 if (im[i] >= rstart && im[i] < rend) { 138 row = im[i] - rstart; 139 lastcol1 = -1; 140 rp1 = aj + ai[row]; 141 ap1 = aa + ai[row]; 142 rmax1 = aimax[row]; 143 nrow1 = ailen[row]; 144 low1 = 0; 145 high1 = nrow1; 146 lastcol2 = -1; 147 rp2 = bj + bi[row]; 148 ap2 = ba + bi[row]; 149 rmax2 = bimax[row]; 150 nrow2 = bilen[row]; 151 low2 = 0; 152 high2 = nrow2; 153 154 for (j=0; j<n; j++) { 155 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 156 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue; 157 if (in[j] >= cstart && in[j] < cend){ 158 col = in[j] - cstart; 159 MatSetValues_SeqAIJ_A_Private(row,col,value,addv); 160 } else if (in[j] < 0) continue; 161 #if defined(PETSC_USE_DEBUG) 162 else if (in[j] >= mat->N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->N-1);} 163 #endif 164 else { 165 if (mat->was_assembled) { 166 if (!aij->colmap) { 167 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 168 } 169 #if defined (PETSC_USE_CTABLE) 170 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 171 col--; 172 #else 173 col = aij->colmap[in[j]] - 1; 174 #endif 175 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 176 ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 177 col = in[j]; 178 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 179 B = aij->B; 180 b = (Mat_SeqAIJ*)B->data; 181 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; 182 rp2 = bj + bi[row]; 183 ap2 = ba + bi[row]; 184 rmax2 = bimax[row]; 185 nrow2 = bilen[row]; 186 low2 = 0; 187 high2 = nrow2; 188 bm = aij->B->m; 189 ba = b->a; 190 } 191 } else col = in[j]; 192 MatSetValues_SeqAIJ_B_Private(row,col,value,addv); 193 } 194 } 195 } else { 196 if (!aij->donotstash) { 197 if (roworiented) { 198 if (ignorezeroentries && v[i*n] == 0.0) continue; 199 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr); 200 } else { 201 if (ignorezeroentries && v[i] == 0.0) continue; 202 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr); 203 } 204 } 205 } 206 } 207 PetscFunctionReturn(0); 208 } 209 210 211 #undef __FUNCT__ 212 #define __FUNCT__ "MatGetValues_MPIAIJ" 213 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 214 { 215 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 216 PetscErrorCode ierr; 217 PetscInt i,j,rstart = aij->rstart,rend = aij->rend; 218 PetscInt cstart = aij->cstart,cend = aij->cend,row,col; 219 220 PetscFunctionBegin; 221 for (i=0; i<m; i++) { 222 if (idxm[i] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]); 223 if (idxm[i] >= mat->M) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->M-1); 224 if (idxm[i] >= rstart && idxm[i] < rend) { 225 row = idxm[i] - rstart; 226 for (j=0; j<n; j++) { 227 if (idxn[j] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); 228 if (idxn[j] >= mat->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->N-1); 229 if (idxn[j] >= cstart && idxn[j] < cend){ 230 col = idxn[j] - cstart; 231 ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 232 } else { 233 if (!aij->colmap) { 234 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 235 } 236 #if defined (PETSC_USE_CTABLE) 237 ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr); 238 col --; 239 #else 240 col = aij->colmap[idxn[j]] - 1; 241 #endif 242 if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0; 243 else { 244 ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 245 } 246 } 247 } 248 } else { 249 SETERRQ(PETSC_ERR_SUP,"Only local values currently supported"); 250 } 251 } 252 PetscFunctionReturn(0); 253 } 254 255 #undef __FUNCT__ 256 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ" 257 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode) 258 { 259 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 260 PetscErrorCode ierr; 261 PetscInt nstash,reallocs; 262 InsertMode addv; 263 264 PetscFunctionBegin; 265 if (aij->donotstash) { 266 PetscFunctionReturn(0); 267 } 268 269 /* make sure all processors are either in INSERTMODE or ADDMODE */ 270 ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,mat->comm);CHKERRQ(ierr); 271 if (addv == (ADD_VALUES|INSERT_VALUES)) { 272 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added"); 273 } 274 mat->insertmode = addv; /* in case this processor had no cache */ 275 276 ierr = MatStashScatterBegin_Private(&mat->stash,aij->rowners);CHKERRQ(ierr); 277 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 278 ierr = PetscLogInfo((aij->A,"MatAssemblyBegin_MPIAIJ:Stash has %D entries, uses %D mallocs.\n",nstash,reallocs));CHKERRQ(ierr); 279 PetscFunctionReturn(0); 280 } 281 282 #undef __FUNCT__ 283 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ" 284 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode) 285 { 286 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 287 Mat_SeqAIJ *a=(Mat_SeqAIJ *)aij->A->data,*b= (Mat_SeqAIJ *)aij->B->data; 288 PetscErrorCode ierr; 289 PetscMPIInt n; 290 PetscInt i,j,rstart,ncols,flg; 291 PetscInt *row,*col,other_disassembled; 292 PetscScalar *val; 293 InsertMode addv = mat->insertmode; 294 295 PetscFunctionBegin; 296 if (!aij->donotstash) { 297 while (1) { 298 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 299 if (!flg) break; 300 301 for (i=0; i<n;) { 302 /* Now identify the consecutive vals belonging to the same row */ 303 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 304 if (j < n) ncols = j-i; 305 else ncols = n-i; 306 /* Now assemble all these values with a single function call */ 307 ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 308 i = j; 309 } 310 } 311 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 312 } 313 a->compressedrow.use = PETSC_FALSE; 314 ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr); 315 ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr); 316 317 /* determine if any processor has disassembled, if so we must 318 also disassemble ourselfs, in order that we may reassemble. */ 319 /* 320 if nonzero structure of submatrix B cannot change then we know that 321 no processor disassembled thus we can skip this stuff 322 */ 323 if (!((Mat_SeqAIJ*)aij->B->data)->nonew) { 324 ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr); 325 if (mat->was_assembled && !other_disassembled) { 326 ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 327 } 328 } 329 /* reaccess the b because aij->B was changed in MatSetValues() or DisAssemble() */ 330 b = (Mat_SeqAIJ *)aij->B->data; 331 332 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 333 ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr); 334 } 335 ierr = MatSetOption(aij->B,MAT_DO_NOT_USE_INODES);CHKERRQ(ierr); 336 b->compressedrow.use = PETSC_TRUE; 337 ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr); 338 ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr); 339 340 if (aij->rowvalues) { 341 ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr); 342 aij->rowvalues = 0; 343 } 344 345 /* used by MatAXPY() */ 346 a->xtoy = 0; b->xtoy = 0; 347 a->XtoY = 0; b->XtoY = 0; 348 349 PetscFunctionReturn(0); 350 } 351 352 #undef __FUNCT__ 353 #define __FUNCT__ "MatZeroEntries_MPIAIJ" 354 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A) 355 { 356 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 357 PetscErrorCode ierr; 358 359 PetscFunctionBegin; 360 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 361 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 362 PetscFunctionReturn(0); 363 } 364 365 #undef __FUNCT__ 366 #define __FUNCT__ "MatZeroRows_MPIAIJ" 367 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag) 368 { 369 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 370 PetscErrorCode ierr; 371 PetscMPIInt size = l->size,imdex,n,rank = l->rank,tag = A->tag,lastidx = -1; 372 PetscInt i,*owners = l->rowners; 373 PetscInt *nprocs,j,idx,nsends,row; 374 PetscInt nmax,*svalues,*starts,*owner,nrecvs; 375 PetscInt *rvalues,count,base,slen,*source; 376 PetscInt *lens,*lrows,*values,rstart=l->rstart; 377 MPI_Comm comm = A->comm; 378 MPI_Request *send_waits,*recv_waits; 379 MPI_Status recv_status,*send_status; 380 #if defined(PETSC_DEBUG) 381 PetscTruth found = PETSC_FALSE; 382 #endif 383 384 PetscFunctionBegin; 385 /* first count number of contributors to each processor */ 386 ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr); 387 ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr); 388 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/ 389 j = 0; 390 for (i=0; i<N; i++) { 391 if (lastidx > (idx = rows[i])) j = 0; 392 lastidx = idx; 393 for (; j<size; j++) { 394 if (idx >= owners[j] && idx < owners[j+1]) { 395 nprocs[2*j]++; 396 nprocs[2*j+1] = 1; 397 owner[i] = j; 398 #if defined(PETSC_DEBUG) 399 found = PETSC_TRUE; 400 #endif 401 break; 402 } 403 } 404 #if defined(PETSC_DEBUG) 405 if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range"); 406 found = PETSC_FALSE; 407 #endif 408 } 409 nsends = 0; for (i=0; i<size; i++) { nsends += nprocs[2*i+1];} 410 411 /* inform other processors of number of messages and max length*/ 412 ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr); 413 414 /* post receives: */ 415 ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr); 416 ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr); 417 for (i=0; i<nrecvs; i++) { 418 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 419 } 420 421 /* do sends: 422 1) starts[i] gives the starting index in svalues for stuff going to 423 the ith processor 424 */ 425 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr); 426 ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr); 427 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr); 428 starts[0] = 0; 429 for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 430 for (i=0; i<N; i++) { 431 svalues[starts[owner[i]]++] = rows[i]; 432 } 433 434 starts[0] = 0; 435 for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 436 count = 0; 437 for (i=0; i<size; i++) { 438 if (nprocs[2*i+1]) { 439 ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 440 } 441 } 442 ierr = PetscFree(starts);CHKERRQ(ierr); 443 444 base = owners[rank]; 445 446 /* wait on receives */ 447 ierr = PetscMalloc(2*(nrecvs+1)*sizeof(PetscInt),&lens);CHKERRQ(ierr); 448 source = lens + nrecvs; 449 count = nrecvs; slen = 0; 450 while (count) { 451 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 452 /* unpack receives into our local space */ 453 ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr); 454 source[imdex] = recv_status.MPI_SOURCE; 455 lens[imdex] = n; 456 slen += n; 457 count--; 458 } 459 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 460 461 /* move the data into the send scatter */ 462 ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr); 463 count = 0; 464 for (i=0; i<nrecvs; i++) { 465 values = rvalues + i*nmax; 466 for (j=0; j<lens[i]; j++) { 467 lrows[count++] = values[j] - base; 468 } 469 } 470 ierr = PetscFree(rvalues);CHKERRQ(ierr); 471 ierr = PetscFree(lens);CHKERRQ(ierr); 472 ierr = PetscFree(owner);CHKERRQ(ierr); 473 ierr = PetscFree(nprocs);CHKERRQ(ierr); 474 475 /* actually zap the local rows */ 476 /* 477 Zero the required rows. If the "diagonal block" of the matrix 478 is square and the user wishes to set the diagonal we use seperate 479 code so that MatSetValues() is not called for each diagonal allocating 480 new memory, thus calling lots of mallocs and slowing things down. 481 482 Contributed by: Matthew Knepley 483 */ 484 /* must zero l->B before l->A because the (diag) case below may put values into l->B*/ 485 ierr = MatZeroRows(l->B,slen,lrows,0.0);CHKERRQ(ierr); 486 if ((diag != 0.0) && (l->A->M == l->A->N)) { 487 ierr = MatZeroRows(l->A,slen,lrows,diag);CHKERRQ(ierr); 488 } else if (diag != 0.0) { 489 ierr = MatZeroRows(l->A,slen,lrows,0.0);CHKERRQ(ierr); 490 if (((Mat_SeqAIJ*)l->A->data)->nonew) { 491 SETERRQ(PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\n\ 492 MAT_NO_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR"); 493 } 494 for (i = 0; i < slen; i++) { 495 row = lrows[i] + rstart; 496 ierr = MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr); 497 } 498 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 499 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 500 } else { 501 ierr = MatZeroRows(l->A,slen,lrows,0.0);CHKERRQ(ierr); 502 } 503 ierr = PetscFree(lrows);CHKERRQ(ierr); 504 505 /* wait on sends */ 506 if (nsends) { 507 ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr); 508 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 509 ierr = PetscFree(send_status);CHKERRQ(ierr); 510 } 511 ierr = PetscFree(send_waits);CHKERRQ(ierr); 512 ierr = PetscFree(svalues);CHKERRQ(ierr); 513 514 PetscFunctionReturn(0); 515 } 516 517 #undef __FUNCT__ 518 #define __FUNCT__ "MatMult_MPIAIJ" 519 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy) 520 { 521 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 522 PetscErrorCode ierr; 523 PetscInt nt; 524 525 PetscFunctionBegin; 526 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 527 if (nt != A->n) { 528 SETERRQ2(PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->n,nt); 529 } 530 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 531 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 532 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 533 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 534 PetscFunctionReturn(0); 535 } 536 537 #undef __FUNCT__ 538 #define __FUNCT__ "MatMultAdd_MPIAIJ" 539 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 540 { 541 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 542 PetscErrorCode ierr; 543 544 PetscFunctionBegin; 545 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 546 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 547 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 548 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 549 PetscFunctionReturn(0); 550 } 551 552 #undef __FUNCT__ 553 #define __FUNCT__ "MatMultTranspose_MPIAIJ" 554 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy) 555 { 556 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 557 PetscErrorCode ierr; 558 PetscTruth merged; 559 560 PetscFunctionBegin; 561 ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr); 562 /* do nondiagonal part */ 563 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 564 if (!merged) { 565 /* send it on its way */ 566 ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 567 /* do local part */ 568 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 569 /* receive remote parts: note this assumes the values are not actually */ 570 /* added in yy until the next line, */ 571 ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 572 } else { 573 /* do local part */ 574 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 575 /* send it on its way */ 576 ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 577 /* values actually were received in the Begin() but we need to call this nop */ 578 ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 579 } 580 PetscFunctionReturn(0); 581 } 582 583 EXTERN_C_BEGIN 584 #undef __FUNCT__ 585 #define __FUNCT__ "MatIsTranspose_MPIAIJ" 586 PetscErrorCode PETSCMAT_DLLEXPORT MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscTruth *f) 587 { 588 MPI_Comm comm; 589 Mat_MPIAIJ *Aij = (Mat_MPIAIJ *) Amat->data, *Bij; 590 Mat Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs; 591 IS Me,Notme; 592 PetscErrorCode ierr; 593 PetscInt M,N,first,last,*notme,i; 594 PetscMPIInt size; 595 596 PetscFunctionBegin; 597 598 /* Easy test: symmetric diagonal block */ 599 Bij = (Mat_MPIAIJ *) Bmat->data; Bdia = Bij->A; 600 ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr); 601 if (!*f) PetscFunctionReturn(0); 602 ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr); 603 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 604 if (size == 1) PetscFunctionReturn(0); 605 606 /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */ 607 ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr); 608 ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr); 609 ierr = PetscMalloc((N-last+first)*sizeof(PetscInt),¬me);CHKERRQ(ierr); 610 for (i=0; i<first; i++) notme[i] = i; 611 for (i=last; i<M; i++) notme[i-last+first] = i; 612 ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,&Notme);CHKERRQ(ierr); 613 ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr); 614 ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr); 615 Aoff = Aoffs[0]; 616 ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr); 617 Boff = Boffs[0]; 618 ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr); 619 ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr); 620 ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr); 621 ierr = ISDestroy(Me);CHKERRQ(ierr); 622 ierr = ISDestroy(Notme);CHKERRQ(ierr); 623 624 PetscFunctionReturn(0); 625 } 626 EXTERN_C_END 627 628 #undef __FUNCT__ 629 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ" 630 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 631 { 632 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 633 PetscErrorCode ierr; 634 635 PetscFunctionBegin; 636 /* do nondiagonal part */ 637 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 638 /* send it on its way */ 639 ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 640 /* do local part */ 641 ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 642 /* receive remote parts */ 643 ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 644 PetscFunctionReturn(0); 645 } 646 647 /* 648 This only works correctly for square matrices where the subblock A->A is the 649 diagonal block 650 */ 651 #undef __FUNCT__ 652 #define __FUNCT__ "MatGetDiagonal_MPIAIJ" 653 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v) 654 { 655 PetscErrorCode ierr; 656 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 657 658 PetscFunctionBegin; 659 if (A->M != A->N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); 660 if (a->rstart != a->cstart || a->rend != a->cend) { 661 SETERRQ(PETSC_ERR_ARG_SIZ,"row partition must equal col partition"); 662 } 663 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 664 PetscFunctionReturn(0); 665 } 666 667 #undef __FUNCT__ 668 #define __FUNCT__ "MatScale_MPIAIJ" 669 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa) 670 { 671 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 672 PetscErrorCode ierr; 673 674 PetscFunctionBegin; 675 ierr = MatScale(a->A,aa);CHKERRQ(ierr); 676 ierr = MatScale(a->B,aa);CHKERRQ(ierr); 677 PetscFunctionReturn(0); 678 } 679 680 #undef __FUNCT__ 681 #define __FUNCT__ "MatDestroy_MPIAIJ" 682 PetscErrorCode MatDestroy_MPIAIJ(Mat mat) 683 { 684 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 685 PetscErrorCode ierr; 686 687 PetscFunctionBegin; 688 #if defined(PETSC_USE_LOG) 689 PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->M,mat->N); 690 #endif 691 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 692 ierr = PetscFree(aij->rowners);CHKERRQ(ierr); 693 ierr = MatDestroy(aij->A);CHKERRQ(ierr); 694 ierr = MatDestroy(aij->B);CHKERRQ(ierr); 695 #if defined (PETSC_USE_CTABLE) 696 if (aij->colmap) {ierr = PetscTableDelete(aij->colmap);CHKERRQ(ierr);} 697 #else 698 if (aij->colmap) {ierr = PetscFree(aij->colmap);CHKERRQ(ierr);} 699 #endif 700 if (aij->garray) {ierr = PetscFree(aij->garray);CHKERRQ(ierr);} 701 if (aij->lvec) {ierr = VecDestroy(aij->lvec);CHKERRQ(ierr);} 702 if (aij->Mvctx) {ierr = VecScatterDestroy(aij->Mvctx);CHKERRQ(ierr);} 703 if (aij->rowvalues) {ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr);} 704 ierr = PetscFree(aij);CHKERRQ(ierr); 705 706 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr); 707 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr); 708 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr); 709 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C","",PETSC_NULL);CHKERRQ(ierr); 710 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr); 711 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr); 712 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C","",PETSC_NULL);CHKERRQ(ierr); 713 PetscFunctionReturn(0); 714 } 715 716 #undef __FUNCT__ 717 #define __FUNCT__ "MatView_MPIAIJ_Binary" 718 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer) 719 { 720 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 721 Mat_SeqAIJ* A = (Mat_SeqAIJ*)aij->A->data; 722 Mat_SeqAIJ* B = (Mat_SeqAIJ*)aij->B->data; 723 PetscErrorCode ierr; 724 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 725 int fd; 726 PetscInt nz,header[4],*row_lengths,*range,rlen,i; 727 PetscInt nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = aij->cstart,rnz; 728 PetscScalar *column_values; 729 730 PetscFunctionBegin; 731 ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr); 732 ierr = MPI_Comm_size(mat->comm,&size);CHKERRQ(ierr); 733 nz = A->nz + B->nz; 734 if (!rank) { 735 header[0] = MAT_FILE_COOKIE; 736 header[1] = mat->M; 737 header[2] = mat->N; 738 ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,mat->comm);CHKERRQ(ierr); 739 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 740 ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 741 /* get largest number of rows any processor has */ 742 rlen = mat->m; 743 ierr = PetscMapGetGlobalRange(mat->rmap,&range);CHKERRQ(ierr); 744 for (i=1; i<size; i++) { 745 rlen = PetscMax(rlen,range[i+1] - range[i]); 746 } 747 } else { 748 ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,mat->comm);CHKERRQ(ierr); 749 rlen = mat->m; 750 } 751 752 /* load up the local row counts */ 753 ierr = PetscMalloc((rlen+1)*sizeof(PetscInt),&row_lengths);CHKERRQ(ierr); 754 for (i=0; i<mat->m; i++) { 755 row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i]; 756 } 757 758 /* store the row lengths to the file */ 759 if (!rank) { 760 MPI_Status status; 761 ierr = PetscBinaryWrite(fd,row_lengths,mat->m,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 762 for (i=1; i<size; i++) { 763 rlen = range[i+1] - range[i]; 764 ierr = MPI_Recv(row_lengths,rlen,MPIU_INT,i,tag,mat->comm,&status);CHKERRQ(ierr); 765 ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 766 } 767 } else { 768 ierr = MPI_Send(row_lengths,mat->m,MPIU_INT,0,tag,mat->comm);CHKERRQ(ierr); 769 } 770 ierr = PetscFree(row_lengths);CHKERRQ(ierr); 771 772 /* load up the local column indices */ 773 nzmax = nz; /* )th processor needs space a largest processor needs */ 774 ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,mat->comm);CHKERRQ(ierr); 775 ierr = PetscMalloc((nzmax+1)*sizeof(PetscInt),&column_indices);CHKERRQ(ierr); 776 cnt = 0; 777 for (i=0; i<mat->m; i++) { 778 for (j=B->i[i]; j<B->i[i+1]; j++) { 779 if ( (col = garray[B->j[j]]) > cstart) break; 780 column_indices[cnt++] = col; 781 } 782 for (k=A->i[i]; k<A->i[i+1]; k++) { 783 column_indices[cnt++] = A->j[k] + cstart; 784 } 785 for (; j<B->i[i+1]; j++) { 786 column_indices[cnt++] = garray[B->j[j]]; 787 } 788 } 789 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 790 791 /* store the column indices to the file */ 792 if (!rank) { 793 MPI_Status status; 794 ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 795 for (i=1; i<size; i++) { 796 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,mat->comm,&status);CHKERRQ(ierr); 797 if (rnz > nzmax) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 798 ierr = MPI_Recv(column_indices,rnz,MPIU_INT,i,tag,mat->comm,&status);CHKERRQ(ierr); 799 ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 800 } 801 } else { 802 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,mat->comm);CHKERRQ(ierr); 803 ierr = MPI_Send(column_indices,nz,MPIU_INT,0,tag,mat->comm);CHKERRQ(ierr); 804 } 805 ierr = PetscFree(column_indices);CHKERRQ(ierr); 806 807 /* load up the local column values */ 808 ierr = PetscMalloc((nzmax+1)*sizeof(PetscScalar),&column_values);CHKERRQ(ierr); 809 cnt = 0; 810 for (i=0; i<mat->m; i++) { 811 for (j=B->i[i]; j<B->i[i+1]; j++) { 812 if ( garray[B->j[j]] > cstart) break; 813 column_values[cnt++] = B->a[j]; 814 } 815 for (k=A->i[i]; k<A->i[i+1]; k++) { 816 column_values[cnt++] = A->a[k]; 817 } 818 for (; j<B->i[i+1]; j++) { 819 column_values[cnt++] = B->a[j]; 820 } 821 } 822 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 823 824 /* store the column values to the file */ 825 if (!rank) { 826 MPI_Status status; 827 ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 828 for (i=1; i<size; i++) { 829 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,mat->comm,&status);CHKERRQ(ierr); 830 if (rnz > nzmax) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 831 ierr = MPI_Recv(column_values,rnz,MPIU_SCALAR,i,tag,mat->comm,&status);CHKERRQ(ierr); 832 ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 833 } 834 } else { 835 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,mat->comm);CHKERRQ(ierr); 836 ierr = MPI_Send(column_values,nz,MPIU_SCALAR,0,tag,mat->comm);CHKERRQ(ierr); 837 } 838 ierr = PetscFree(column_values);CHKERRQ(ierr); 839 PetscFunctionReturn(0); 840 } 841 842 #undef __FUNCT__ 843 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket" 844 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 845 { 846 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 847 PetscErrorCode ierr; 848 PetscMPIInt rank = aij->rank,size = aij->size; 849 PetscTruth isdraw,iascii,isbinary; 850 PetscViewer sviewer; 851 PetscViewerFormat format; 852 853 PetscFunctionBegin; 854 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 855 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 856 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr); 857 if (iascii) { 858 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 859 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 860 MatInfo info; 861 PetscTruth inodes; 862 863 ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr); 864 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 865 ierr = MatInodeGetInodeSizes(aij->A,PETSC_NULL,(PetscInt **)&inodes,PETSC_NULL);CHKERRQ(ierr); 866 if (!inodes) { 867 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, not using I-node routines\n", 868 rank,mat->m,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 869 } else { 870 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, using I-node routines\n", 871 rank,mat->m,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 872 } 873 ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 874 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 875 ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 876 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 877 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 878 ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr); 879 PetscFunctionReturn(0); 880 } else if (format == PETSC_VIEWER_ASCII_INFO) { 881 PetscInt inodecount,inodelimit,*inodes; 882 ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr); 883 if (inodes) { 884 ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr); 885 } else { 886 ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr); 887 } 888 PetscFunctionReturn(0); 889 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 890 PetscFunctionReturn(0); 891 } 892 } else if (isbinary) { 893 if (size == 1) { 894 ierr = PetscObjectSetName((PetscObject)aij->A,mat->name);CHKERRQ(ierr); 895 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 896 } else { 897 ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr); 898 } 899 PetscFunctionReturn(0); 900 } else if (isdraw) { 901 PetscDraw draw; 902 PetscTruth isnull; 903 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 904 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 905 } 906 907 if (size == 1) { 908 ierr = PetscObjectSetName((PetscObject)aij->A,mat->name);CHKERRQ(ierr); 909 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 910 } else { 911 /* assemble the entire matrix onto first processor. */ 912 Mat A; 913 Mat_SeqAIJ *Aloc; 914 PetscInt M = mat->M,N = mat->N,m,*ai,*aj,row,*cols,i,*ct; 915 PetscScalar *a; 916 917 ierr = MatCreate(mat->comm,&A);CHKERRQ(ierr); 918 if (!rank) { 919 ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr); 920 } else { 921 ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr); 922 } 923 /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */ 924 ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr); 925 ierr = MatMPIAIJSetPreallocation(A,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr); 926 ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr); 927 928 /* copy over the A part */ 929 Aloc = (Mat_SeqAIJ*)aij->A->data; 930 m = aij->A->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 931 row = aij->rstart; 932 for (i=0; i<ai[m]; i++) {aj[i] += aij->cstart ;} 933 for (i=0; i<m; i++) { 934 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr); 935 row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 936 } 937 aj = Aloc->j; 938 for (i=0; i<ai[m]; i++) {aj[i] -= aij->cstart;} 939 940 /* copy over the B part */ 941 Aloc = (Mat_SeqAIJ*)aij->B->data; 942 m = aij->B->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 943 row = aij->rstart; 944 ierr = PetscMalloc((ai[m]+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr); 945 ct = cols; 946 for (i=0; i<ai[m]; i++) {cols[i] = aij->garray[aj[i]];} 947 for (i=0; i<m; i++) { 948 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr); 949 row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 950 } 951 ierr = PetscFree(ct);CHKERRQ(ierr); 952 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 953 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 954 /* 955 Everyone has to call to draw the matrix since the graphics waits are 956 synchronized across all processors that share the PetscDraw object 957 */ 958 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 959 if (!rank) { 960 ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,mat->name);CHKERRQ(ierr); 961 ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 962 } 963 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 964 ierr = MatDestroy(A);CHKERRQ(ierr); 965 } 966 PetscFunctionReturn(0); 967 } 968 969 #undef __FUNCT__ 970 #define __FUNCT__ "MatView_MPIAIJ" 971 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer) 972 { 973 PetscErrorCode ierr; 974 PetscTruth iascii,isdraw,issocket,isbinary; 975 976 PetscFunctionBegin; 977 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 978 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 979 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr); 980 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr); 981 if (iascii || isdraw || isbinary || issocket) { 982 ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 983 } else { 984 SETERRQ1(PETSC_ERR_SUP,"Viewer type %s not supported by MPIAIJ matrices",((PetscObject)viewer)->type_name); 985 } 986 PetscFunctionReturn(0); 987 } 988 989 990 991 #undef __FUNCT__ 992 #define __FUNCT__ "MatRelax_MPIAIJ" 993 PetscErrorCode MatRelax_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 994 { 995 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 996 PetscErrorCode ierr; 997 Vec bb1; 998 PetscScalar mone=-1.0; 999 1000 PetscFunctionBegin; 1001 if (its <= 0 || lits <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 1002 1003 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 1004 1005 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){ 1006 if (flag & SOR_ZERO_INITIAL_GUESS) { 1007 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr); 1008 its--; 1009 } 1010 1011 while (its--) { 1012 ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1013 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1014 1015 /* update rhs: bb1 = bb - B*x */ 1016 ierr = VecScale(mat->lvec,mone);CHKERRQ(ierr); 1017 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1018 1019 /* local sweep */ 1020 ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx); 1021 CHKERRQ(ierr); 1022 } 1023 } else if (flag & SOR_LOCAL_FORWARD_SWEEP){ 1024 if (flag & SOR_ZERO_INITIAL_GUESS) { 1025 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr); 1026 its--; 1027 } 1028 while (its--) { 1029 ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1030 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1031 1032 /* update rhs: bb1 = bb - B*x */ 1033 ierr = VecScale(mat->lvec,mone);CHKERRQ(ierr); 1034 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1035 1036 /* local sweep */ 1037 ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,PETSC_NULL,xx); 1038 CHKERRQ(ierr); 1039 } 1040 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){ 1041 if (flag & SOR_ZERO_INITIAL_GUESS) { 1042 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr); 1043 its--; 1044 } 1045 while (its--) { 1046 ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1047 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1048 1049 /* update rhs: bb1 = bb - B*x */ 1050 ierr = VecScale(mat->lvec,mone);CHKERRQ(ierr); 1051 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1052 1053 /* local sweep */ 1054 ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,PETSC_NULL,xx); 1055 CHKERRQ(ierr); 1056 } 1057 } else { 1058 SETERRQ(PETSC_ERR_SUP,"Parallel SOR not supported"); 1059 } 1060 1061 ierr = VecDestroy(bb1);CHKERRQ(ierr); 1062 PetscFunctionReturn(0); 1063 } 1064 1065 #undef __FUNCT__ 1066 #define __FUNCT__ "MatGetInfo_MPIAIJ" 1067 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1068 { 1069 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1070 Mat A = mat->A,B = mat->B; 1071 PetscErrorCode ierr; 1072 PetscReal isend[5],irecv[5]; 1073 1074 PetscFunctionBegin; 1075 info->block_size = 1.0; 1076 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1077 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1078 isend[3] = info->memory; isend[4] = info->mallocs; 1079 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1080 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1081 isend[3] += info->memory; isend[4] += info->mallocs; 1082 if (flag == MAT_LOCAL) { 1083 info->nz_used = isend[0]; 1084 info->nz_allocated = isend[1]; 1085 info->nz_unneeded = isend[2]; 1086 info->memory = isend[3]; 1087 info->mallocs = isend[4]; 1088 } else if (flag == MAT_GLOBAL_MAX) { 1089 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,matin->comm);CHKERRQ(ierr); 1090 info->nz_used = irecv[0]; 1091 info->nz_allocated = irecv[1]; 1092 info->nz_unneeded = irecv[2]; 1093 info->memory = irecv[3]; 1094 info->mallocs = irecv[4]; 1095 } else if (flag == MAT_GLOBAL_SUM) { 1096 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,matin->comm);CHKERRQ(ierr); 1097 info->nz_used = irecv[0]; 1098 info->nz_allocated = irecv[1]; 1099 info->nz_unneeded = irecv[2]; 1100 info->memory = irecv[3]; 1101 info->mallocs = irecv[4]; 1102 } 1103 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1104 info->fill_ratio_needed = 0; 1105 info->factor_mallocs = 0; 1106 info->rows_global = (double)matin->M; 1107 info->columns_global = (double)matin->N; 1108 info->rows_local = (double)matin->m; 1109 info->columns_local = (double)matin->N; 1110 1111 PetscFunctionReturn(0); 1112 } 1113 1114 #undef __FUNCT__ 1115 #define __FUNCT__ "MatSetOption_MPIAIJ" 1116 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op) 1117 { 1118 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1119 PetscErrorCode ierr; 1120 1121 PetscFunctionBegin; 1122 switch (op) { 1123 case MAT_NO_NEW_NONZERO_LOCATIONS: 1124 case MAT_YES_NEW_NONZERO_LOCATIONS: 1125 case MAT_COLUMNS_UNSORTED: 1126 case MAT_COLUMNS_SORTED: 1127 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1128 case MAT_KEEP_ZEROED_ROWS: 1129 case MAT_NEW_NONZERO_LOCATION_ERR: 1130 case MAT_USE_INODES: 1131 case MAT_DO_NOT_USE_INODES: 1132 case MAT_IGNORE_ZERO_ENTRIES: 1133 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1134 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1135 break; 1136 case MAT_ROW_ORIENTED: 1137 a->roworiented = PETSC_TRUE; 1138 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1139 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1140 break; 1141 case MAT_ROWS_SORTED: 1142 case MAT_ROWS_UNSORTED: 1143 case MAT_YES_NEW_DIAGONALS: 1144 ierr = PetscLogInfo((A,"MatSetOption_MPIAIJ:Option ignored\n"));CHKERRQ(ierr); 1145 break; 1146 case MAT_COLUMN_ORIENTED: 1147 a->roworiented = PETSC_FALSE; 1148 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1149 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1150 break; 1151 case MAT_IGNORE_OFF_PROC_ENTRIES: 1152 a->donotstash = PETSC_TRUE; 1153 break; 1154 case MAT_NO_NEW_DIAGONALS: 1155 SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS"); 1156 case MAT_SYMMETRIC: 1157 case MAT_STRUCTURALLY_SYMMETRIC: 1158 case MAT_HERMITIAN: 1159 case MAT_SYMMETRY_ETERNAL: 1160 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1161 break; 1162 case MAT_NOT_SYMMETRIC: 1163 case MAT_NOT_STRUCTURALLY_SYMMETRIC: 1164 case MAT_NOT_HERMITIAN: 1165 case MAT_NOT_SYMMETRY_ETERNAL: 1166 break; 1167 default: 1168 SETERRQ(PETSC_ERR_SUP,"unknown option"); 1169 } 1170 PetscFunctionReturn(0); 1171 } 1172 1173 #undef __FUNCT__ 1174 #define __FUNCT__ "MatGetRow_MPIAIJ" 1175 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1176 { 1177 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1178 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1179 PetscErrorCode ierr; 1180 PetscInt i,*cworkA,*cworkB,**pcA,**pcB,cstart = mat->cstart; 1181 PetscInt nztot,nzA,nzB,lrow,rstart = mat->rstart,rend = mat->rend; 1182 PetscInt *cmap,*idx_p; 1183 1184 PetscFunctionBegin; 1185 if (mat->getrowactive) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1186 mat->getrowactive = PETSC_TRUE; 1187 1188 if (!mat->rowvalues && (idx || v)) { 1189 /* 1190 allocate enough space to hold information from the longest row. 1191 */ 1192 Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data; 1193 PetscInt max = 1,tmp; 1194 for (i=0; i<matin->m; i++) { 1195 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1196 if (max < tmp) { max = tmp; } 1197 } 1198 ierr = PetscMalloc(max*(sizeof(PetscInt)+sizeof(PetscScalar)),&mat->rowvalues);CHKERRQ(ierr); 1199 mat->rowindices = (PetscInt*)(mat->rowvalues + max); 1200 } 1201 1202 if (row < rstart || row >= rend) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Only local rows") 1203 lrow = row - rstart; 1204 1205 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1206 if (!v) {pvA = 0; pvB = 0;} 1207 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1208 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1209 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1210 nztot = nzA + nzB; 1211 1212 cmap = mat->garray; 1213 if (v || idx) { 1214 if (nztot) { 1215 /* Sort by increasing column numbers, assuming A and B already sorted */ 1216 PetscInt imark = -1; 1217 if (v) { 1218 *v = v_p = mat->rowvalues; 1219 for (i=0; i<nzB; i++) { 1220 if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 1221 else break; 1222 } 1223 imark = i; 1224 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1225 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1226 } 1227 if (idx) { 1228 *idx = idx_p = mat->rowindices; 1229 if (imark > -1) { 1230 for (i=0; i<imark; i++) { 1231 idx_p[i] = cmap[cworkB[i]]; 1232 } 1233 } else { 1234 for (i=0; i<nzB; i++) { 1235 if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 1236 else break; 1237 } 1238 imark = i; 1239 } 1240 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart + cworkA[i]; 1241 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]]; 1242 } 1243 } else { 1244 if (idx) *idx = 0; 1245 if (v) *v = 0; 1246 } 1247 } 1248 *nz = nztot; 1249 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1250 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1251 PetscFunctionReturn(0); 1252 } 1253 1254 #undef __FUNCT__ 1255 #define __FUNCT__ "MatRestoreRow_MPIAIJ" 1256 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1257 { 1258 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1259 1260 PetscFunctionBegin; 1261 if (!aij->getrowactive) { 1262 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first"); 1263 } 1264 aij->getrowactive = PETSC_FALSE; 1265 PetscFunctionReturn(0); 1266 } 1267 1268 #undef __FUNCT__ 1269 #define __FUNCT__ "MatNorm_MPIAIJ" 1270 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm) 1271 { 1272 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1273 Mat_SeqAIJ *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data; 1274 PetscErrorCode ierr; 1275 PetscInt i,j,cstart = aij->cstart; 1276 PetscReal sum = 0.0; 1277 PetscScalar *v; 1278 1279 PetscFunctionBegin; 1280 if (aij->size == 1) { 1281 ierr = MatNorm(aij->A,type,norm);CHKERRQ(ierr); 1282 } else { 1283 if (type == NORM_FROBENIUS) { 1284 v = amat->a; 1285 for (i=0; i<amat->nz; i++) { 1286 #if defined(PETSC_USE_COMPLEX) 1287 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1288 #else 1289 sum += (*v)*(*v); v++; 1290 #endif 1291 } 1292 v = bmat->a; 1293 for (i=0; i<bmat->nz; i++) { 1294 #if defined(PETSC_USE_COMPLEX) 1295 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1296 #else 1297 sum += (*v)*(*v); v++; 1298 #endif 1299 } 1300 ierr = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr); 1301 *norm = sqrt(*norm); 1302 } else if (type == NORM_1) { /* max column norm */ 1303 PetscReal *tmp,*tmp2; 1304 PetscInt *jj,*garray = aij->garray; 1305 ierr = PetscMalloc((mat->N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr); 1306 ierr = PetscMalloc((mat->N+1)*sizeof(PetscReal),&tmp2);CHKERRQ(ierr); 1307 ierr = PetscMemzero(tmp,mat->N*sizeof(PetscReal));CHKERRQ(ierr); 1308 *norm = 0.0; 1309 v = amat->a; jj = amat->j; 1310 for (j=0; j<amat->nz; j++) { 1311 tmp[cstart + *jj++ ] += PetscAbsScalar(*v); v++; 1312 } 1313 v = bmat->a; jj = bmat->j; 1314 for (j=0; j<bmat->nz; j++) { 1315 tmp[garray[*jj++]] += PetscAbsScalar(*v); v++; 1316 } 1317 ierr = MPI_Allreduce(tmp,tmp2,mat->N,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr); 1318 for (j=0; j<mat->N; j++) { 1319 if (tmp2[j] > *norm) *norm = tmp2[j]; 1320 } 1321 ierr = PetscFree(tmp);CHKERRQ(ierr); 1322 ierr = PetscFree(tmp2);CHKERRQ(ierr); 1323 } else if (type == NORM_INFINITY) { /* max row norm */ 1324 PetscReal ntemp = 0.0; 1325 for (j=0; j<aij->A->m; j++) { 1326 v = amat->a + amat->i[j]; 1327 sum = 0.0; 1328 for (i=0; i<amat->i[j+1]-amat->i[j]; i++) { 1329 sum += PetscAbsScalar(*v); v++; 1330 } 1331 v = bmat->a + bmat->i[j]; 1332 for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) { 1333 sum += PetscAbsScalar(*v); v++; 1334 } 1335 if (sum > ntemp) ntemp = sum; 1336 } 1337 ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPI_MAX,mat->comm);CHKERRQ(ierr); 1338 } else { 1339 SETERRQ(PETSC_ERR_SUP,"No support for two norm"); 1340 } 1341 } 1342 PetscFunctionReturn(0); 1343 } 1344 1345 #undef __FUNCT__ 1346 #define __FUNCT__ "MatTranspose_MPIAIJ" 1347 PetscErrorCode MatTranspose_MPIAIJ(Mat A,Mat *matout) 1348 { 1349 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1350 Mat_SeqAIJ *Aloc = (Mat_SeqAIJ*)a->A->data; 1351 PetscErrorCode ierr; 1352 PetscInt M = A->M,N = A->N,m,*ai,*aj,row,*cols,i,*ct; 1353 Mat B; 1354 PetscScalar *array; 1355 1356 PetscFunctionBegin; 1357 if (!matout && M != N) { 1358 SETERRQ(PETSC_ERR_ARG_SIZ,"Square matrix only for in-place"); 1359 } 1360 1361 ierr = MatCreate(A->comm,&B);CHKERRQ(ierr); 1362 ierr = MatSetSizes(B,A->n,A->m,N,M);CHKERRQ(ierr); 1363 ierr = MatSetType(B,A->type_name);CHKERRQ(ierr); 1364 ierr = MatMPIAIJSetPreallocation(B,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr); 1365 1366 /* copy over the A part */ 1367 Aloc = (Mat_SeqAIJ*)a->A->data; 1368 m = a->A->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a; 1369 row = a->rstart; 1370 for (i=0; i<ai[m]; i++) {aj[i] += a->cstart ;} 1371 for (i=0; i<m; i++) { 1372 ierr = MatSetValues(B,ai[i+1]-ai[i],aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1373 row++; array += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 1374 } 1375 aj = Aloc->j; 1376 for (i=0; i<ai[m]; i++) {aj[i] -= a->cstart ;} 1377 1378 /* copy over the B part */ 1379 Aloc = (Mat_SeqAIJ*)a->B->data; 1380 m = a->B->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a; 1381 row = a->rstart; 1382 ierr = PetscMalloc((1+ai[m])*sizeof(PetscInt),&cols);CHKERRQ(ierr); 1383 ct = cols; 1384 for (i=0; i<ai[m]; i++) {cols[i] = a->garray[aj[i]];} 1385 for (i=0; i<m; i++) { 1386 ierr = MatSetValues(B,ai[i+1]-ai[i],cols,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1387 row++; array += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 1388 } 1389 ierr = PetscFree(ct);CHKERRQ(ierr); 1390 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1391 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1392 if (matout) { 1393 *matout = B; 1394 } else { 1395 ierr = MatHeaderCopy(A,B);CHKERRQ(ierr); 1396 } 1397 PetscFunctionReturn(0); 1398 } 1399 1400 #undef __FUNCT__ 1401 #define __FUNCT__ "MatDiagonalScale_MPIAIJ" 1402 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr) 1403 { 1404 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1405 Mat a = aij->A,b = aij->B; 1406 PetscErrorCode ierr; 1407 PetscInt s1,s2,s3; 1408 1409 PetscFunctionBegin; 1410 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 1411 if (rr) { 1412 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 1413 if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 1414 /* Overlap communication with computation. */ 1415 ierr = VecScatterBegin(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx);CHKERRQ(ierr); 1416 } 1417 if (ll) { 1418 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 1419 if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 1420 ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr); 1421 } 1422 /* scale the diagonal block */ 1423 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 1424 1425 if (rr) { 1426 /* Do a scatter end and then right scale the off-diagonal block */ 1427 ierr = VecScatterEnd(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx);CHKERRQ(ierr); 1428 ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr); 1429 } 1430 1431 PetscFunctionReturn(0); 1432 } 1433 1434 1435 #undef __FUNCT__ 1436 #define __FUNCT__ "MatPrintHelp_MPIAIJ" 1437 PetscErrorCode MatPrintHelp_MPIAIJ(Mat A) 1438 { 1439 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1440 PetscErrorCode ierr; 1441 1442 PetscFunctionBegin; 1443 if (!a->rank) { 1444 ierr = MatPrintHelp_SeqAIJ(a->A);CHKERRQ(ierr); 1445 } 1446 PetscFunctionReturn(0); 1447 } 1448 1449 #undef __FUNCT__ 1450 #define __FUNCT__ "MatSetBlockSize_MPIAIJ" 1451 PetscErrorCode MatSetBlockSize_MPIAIJ(Mat A,PetscInt bs) 1452 { 1453 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1454 PetscErrorCode ierr; 1455 1456 PetscFunctionBegin; 1457 ierr = MatSetBlockSize(a->A,bs);CHKERRQ(ierr); 1458 ierr = MatSetBlockSize(a->B,bs);CHKERRQ(ierr); 1459 PetscFunctionReturn(0); 1460 } 1461 #undef __FUNCT__ 1462 #define __FUNCT__ "MatSetUnfactored_MPIAIJ" 1463 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A) 1464 { 1465 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1466 PetscErrorCode ierr; 1467 1468 PetscFunctionBegin; 1469 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 1470 PetscFunctionReturn(0); 1471 } 1472 1473 #undef __FUNCT__ 1474 #define __FUNCT__ "MatEqual_MPIAIJ" 1475 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscTruth *flag) 1476 { 1477 Mat_MPIAIJ *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data; 1478 Mat a,b,c,d; 1479 PetscTruth flg; 1480 PetscErrorCode ierr; 1481 1482 PetscFunctionBegin; 1483 a = matA->A; b = matA->B; 1484 c = matB->A; d = matB->B; 1485 1486 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 1487 if (flg) { 1488 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 1489 } 1490 ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,A->comm);CHKERRQ(ierr); 1491 PetscFunctionReturn(0); 1492 } 1493 1494 #undef __FUNCT__ 1495 #define __FUNCT__ "MatCopy_MPIAIJ" 1496 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str) 1497 { 1498 PetscErrorCode ierr; 1499 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1500 Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data; 1501 1502 PetscFunctionBegin; 1503 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 1504 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 1505 /* because of the column compression in the off-processor part of the matrix a->B, 1506 the number of columns in a->B and b->B may be different, hence we cannot call 1507 the MatCopy() directly on the two parts. If need be, we can provide a more 1508 efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices 1509 then copying the submatrices */ 1510 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 1511 } else { 1512 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 1513 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 1514 } 1515 PetscFunctionReturn(0); 1516 } 1517 1518 #undef __FUNCT__ 1519 #define __FUNCT__ "MatSetUpPreallocation_MPIAIJ" 1520 PetscErrorCode MatSetUpPreallocation_MPIAIJ(Mat A) 1521 { 1522 PetscErrorCode ierr; 1523 1524 PetscFunctionBegin; 1525 ierr = MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 1526 PetscFunctionReturn(0); 1527 } 1528 1529 #include "petscblaslapack.h" 1530 #undef __FUNCT__ 1531 #define __FUNCT__ "MatAXPY_MPIAIJ" 1532 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 1533 { 1534 PetscErrorCode ierr; 1535 PetscInt i; 1536 Mat_MPIAIJ *xx = (Mat_MPIAIJ *)X->data,*yy = (Mat_MPIAIJ *)Y->data; 1537 PetscBLASInt bnz,one=1; 1538 Mat_SeqAIJ *x,*y; 1539 1540 PetscFunctionBegin; 1541 if (str == SAME_NONZERO_PATTERN) { 1542 PetscScalar alpha = a; 1543 x = (Mat_SeqAIJ *)xx->A->data; 1544 y = (Mat_SeqAIJ *)yy->A->data; 1545 bnz = (PetscBLASInt)x->nz; 1546 BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one); 1547 x = (Mat_SeqAIJ *)xx->B->data; 1548 y = (Mat_SeqAIJ *)yy->B->data; 1549 bnz = (PetscBLASInt)x->nz; 1550 BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one); 1551 } else if (str == SUBSET_NONZERO_PATTERN) { 1552 ierr = MatAXPY_SeqAIJ(yy->A,a,xx->A,str);CHKERRQ(ierr); 1553 1554 x = (Mat_SeqAIJ *)xx->B->data; 1555 y = (Mat_SeqAIJ *)yy->B->data; 1556 if (y->xtoy && y->XtoY != xx->B) { 1557 ierr = PetscFree(y->xtoy);CHKERRQ(ierr); 1558 ierr = MatDestroy(y->XtoY);CHKERRQ(ierr); 1559 } 1560 if (!y->xtoy) { /* get xtoy */ 1561 ierr = MatAXPYGetxtoy_Private(xx->B->m,x->i,x->j,xx->garray,y->i,y->j,yy->garray,&y->xtoy);CHKERRQ(ierr); 1562 y->XtoY = xx->B; 1563 } 1564 for (i=0; i<x->nz; i++) y->a[y->xtoy[i]] += a*(x->a[i]); 1565 } else { 1566 ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr); 1567 } 1568 PetscFunctionReturn(0); 1569 } 1570 1571 EXTERN PetscErrorCode PETSCMAT_DLLEXPORT MatConjugate_SeqAIJ(Mat); 1572 1573 #undef __FUNCT__ 1574 #define __FUNCT__ "MatConjugate_MPIAIJ" 1575 PetscErrorCode PETSCMAT_DLLEXPORT MatConjugate_MPIAIJ(Mat mat) 1576 { 1577 #if defined(PETSC_USE_COMPLEX) 1578 PetscErrorCode ierr; 1579 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1580 1581 PetscFunctionBegin; 1582 ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr); 1583 ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr); 1584 #else 1585 PetscFunctionBegin; 1586 #endif 1587 PetscFunctionReturn(0); 1588 } 1589 1590 /* -------------------------------------------------------------------*/ 1591 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ, 1592 MatGetRow_MPIAIJ, 1593 MatRestoreRow_MPIAIJ, 1594 MatMult_MPIAIJ, 1595 /* 4*/ MatMultAdd_MPIAIJ, 1596 MatMultTranspose_MPIAIJ, 1597 MatMultTransposeAdd_MPIAIJ, 1598 0, 1599 0, 1600 0, 1601 /*10*/ 0, 1602 0, 1603 0, 1604 MatRelax_MPIAIJ, 1605 MatTranspose_MPIAIJ, 1606 /*15*/ MatGetInfo_MPIAIJ, 1607 MatEqual_MPIAIJ, 1608 MatGetDiagonal_MPIAIJ, 1609 MatDiagonalScale_MPIAIJ, 1610 MatNorm_MPIAIJ, 1611 /*20*/ MatAssemblyBegin_MPIAIJ, 1612 MatAssemblyEnd_MPIAIJ, 1613 0, 1614 MatSetOption_MPIAIJ, 1615 MatZeroEntries_MPIAIJ, 1616 /*25*/ MatZeroRows_MPIAIJ, 1617 0, 1618 0, 1619 0, 1620 0, 1621 /*30*/ MatSetUpPreallocation_MPIAIJ, 1622 0, 1623 0, 1624 0, 1625 0, 1626 /*35*/ MatDuplicate_MPIAIJ, 1627 0, 1628 0, 1629 0, 1630 0, 1631 /*40*/ MatAXPY_MPIAIJ, 1632 MatGetSubMatrices_MPIAIJ, 1633 MatIncreaseOverlap_MPIAIJ, 1634 MatGetValues_MPIAIJ, 1635 MatCopy_MPIAIJ, 1636 /*45*/ MatPrintHelp_MPIAIJ, 1637 MatScale_MPIAIJ, 1638 0, 1639 0, 1640 0, 1641 /*50*/ MatSetBlockSize_MPIAIJ, 1642 0, 1643 0, 1644 0, 1645 0, 1646 /*55*/ MatFDColoringCreate_MPIAIJ, 1647 0, 1648 MatSetUnfactored_MPIAIJ, 1649 0, 1650 0, 1651 /*60*/ MatGetSubMatrix_MPIAIJ, 1652 MatDestroy_MPIAIJ, 1653 MatView_MPIAIJ, 1654 MatGetPetscMaps_Petsc, 1655 0, 1656 /*65*/ 0, 1657 0, 1658 0, 1659 0, 1660 0, 1661 /*70*/ 0, 1662 0, 1663 MatSetColoring_MPIAIJ, 1664 #if defined(PETSC_HAVE_ADIC) 1665 MatSetValuesAdic_MPIAIJ, 1666 #else 1667 0, 1668 #endif 1669 MatSetValuesAdifor_MPIAIJ, 1670 /*75*/ 0, 1671 0, 1672 0, 1673 0, 1674 0, 1675 /*80*/ 0, 1676 0, 1677 0, 1678 0, 1679 /*84*/ MatLoad_MPIAIJ, 1680 0, 1681 0, 1682 0, 1683 0, 1684 0, 1685 /*90*/ MatMatMult_MPIAIJ_MPIAIJ, 1686 MatMatMultSymbolic_MPIAIJ_MPIAIJ, 1687 MatMatMultNumeric_MPIAIJ_MPIAIJ, 1688 MatPtAP_Basic, 1689 MatPtAPSymbolic_MPIAIJ, 1690 /*95*/ MatPtAPNumeric_MPIAIJ, 1691 0, 1692 0, 1693 0, 1694 0, 1695 /*100*/0, 1696 MatPtAPSymbolic_MPIAIJ_MPIAIJ, 1697 MatPtAPNumeric_MPIAIJ_MPIAIJ, 1698 MatConjugate_MPIAIJ 1699 }; 1700 1701 /* ----------------------------------------------------------------------------------------*/ 1702 1703 EXTERN_C_BEGIN 1704 #undef __FUNCT__ 1705 #define __FUNCT__ "MatStoreValues_MPIAIJ" 1706 PetscErrorCode PETSCMAT_DLLEXPORT MatStoreValues_MPIAIJ(Mat mat) 1707 { 1708 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1709 PetscErrorCode ierr; 1710 1711 PetscFunctionBegin; 1712 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 1713 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 1714 PetscFunctionReturn(0); 1715 } 1716 EXTERN_C_END 1717 1718 EXTERN_C_BEGIN 1719 #undef __FUNCT__ 1720 #define __FUNCT__ "MatRetrieveValues_MPIAIJ" 1721 PetscErrorCode PETSCMAT_DLLEXPORT MatRetrieveValues_MPIAIJ(Mat mat) 1722 { 1723 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1724 PetscErrorCode ierr; 1725 1726 PetscFunctionBegin; 1727 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 1728 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 1729 PetscFunctionReturn(0); 1730 } 1731 EXTERN_C_END 1732 1733 #include "petscpc.h" 1734 EXTERN_C_BEGIN 1735 #undef __FUNCT__ 1736 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ" 1737 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 1738 { 1739 Mat_MPIAIJ *b; 1740 PetscErrorCode ierr; 1741 PetscInt i; 1742 1743 PetscFunctionBegin; 1744 B->preallocated = PETSC_TRUE; 1745 if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5; 1746 if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2; 1747 if (d_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz); 1748 if (o_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz); 1749 if (d_nnz) { 1750 for (i=0; i<B->m; i++) { 1751 if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than 0: local row %D value %D",i,d_nnz[i]); 1752 } 1753 } 1754 if (o_nnz) { 1755 for (i=0; i<B->m; i++) { 1756 if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than 0: local row %D value %D",i,o_nnz[i]); 1757 } 1758 } 1759 b = (Mat_MPIAIJ*)B->data; 1760 ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr); 1761 ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr); 1762 1763 PetscFunctionReturn(0); 1764 } 1765 EXTERN_C_END 1766 1767 #undef __FUNCT__ 1768 #define __FUNCT__ "MatDuplicate_MPIAIJ" 1769 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 1770 { 1771 Mat mat; 1772 Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ*)matin->data; 1773 PetscErrorCode ierr; 1774 1775 PetscFunctionBegin; 1776 *newmat = 0; 1777 ierr = MatCreate(matin->comm,&mat);CHKERRQ(ierr); 1778 ierr = MatSetSizes(mat,matin->m,matin->n,matin->M,matin->N);CHKERRQ(ierr); 1779 ierr = MatSetType(mat,matin->type_name);CHKERRQ(ierr); 1780 ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 1781 a = (Mat_MPIAIJ*)mat->data; 1782 1783 mat->factor = matin->factor; 1784 mat->bs = matin->bs; 1785 mat->assembled = PETSC_TRUE; 1786 mat->insertmode = NOT_SET_VALUES; 1787 mat->preallocated = PETSC_TRUE; 1788 1789 a->rstart = oldmat->rstart; 1790 a->rend = oldmat->rend; 1791 a->cstart = oldmat->cstart; 1792 a->cend = oldmat->cend; 1793 a->size = oldmat->size; 1794 a->rank = oldmat->rank; 1795 a->donotstash = oldmat->donotstash; 1796 a->roworiented = oldmat->roworiented; 1797 a->rowindices = 0; 1798 a->rowvalues = 0; 1799 a->getrowactive = PETSC_FALSE; 1800 1801 ierr = PetscMemcpy(a->rowners,oldmat->rowners,2*(a->size+2)*sizeof(PetscInt));CHKERRQ(ierr); 1802 ierr = MatStashCreate_Private(matin->comm,1,&mat->stash);CHKERRQ(ierr); 1803 if (oldmat->colmap) { 1804 #if defined (PETSC_USE_CTABLE) 1805 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 1806 #else 1807 ierr = PetscMalloc((mat->N)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr); 1808 ierr = PetscLogObjectMemory(mat,(mat->N)*sizeof(PetscInt));CHKERRQ(ierr); 1809 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->N)*sizeof(PetscInt));CHKERRQ(ierr); 1810 #endif 1811 } else a->colmap = 0; 1812 if (oldmat->garray) { 1813 PetscInt len; 1814 len = oldmat->B->n; 1815 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&a->garray);CHKERRQ(ierr); 1816 ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr); 1817 if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); } 1818 } else a->garray = 0; 1819 1820 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 1821 ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr); 1822 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 1823 ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr); 1824 ierr = MatDestroy(a->A);CHKERRQ(ierr); 1825 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 1826 ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr); 1827 ierr = MatDestroy(a->B);CHKERRQ(ierr); 1828 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 1829 ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr); 1830 ierr = PetscFListDuplicate(matin->qlist,&mat->qlist);CHKERRQ(ierr); 1831 *newmat = mat; 1832 PetscFunctionReturn(0); 1833 } 1834 1835 #include "petscsys.h" 1836 1837 #undef __FUNCT__ 1838 #define __FUNCT__ "MatLoad_MPIAIJ" 1839 PetscErrorCode MatLoad_MPIAIJ(PetscViewer viewer, MatType type,Mat *newmat) 1840 { 1841 Mat A; 1842 PetscScalar *vals,*svals; 1843 MPI_Comm comm = ((PetscObject)viewer)->comm; 1844 MPI_Status status; 1845 PetscErrorCode ierr; 1846 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag,maxnz; 1847 PetscInt i,nz,j,rstart,rend,mmax; 1848 PetscInt header[4],*rowlengths = 0,M,N,m,*cols; 1849 PetscInt *ourlens,*procsnz = 0,*offlens,jj,*mycols,*smycols; 1850 PetscInt cend,cstart,n,*rowners; 1851 int fd; 1852 1853 PetscFunctionBegin; 1854 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1855 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 1856 if (!rank) { 1857 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1858 ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr); 1859 if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 1860 } 1861 1862 ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr); 1863 M = header[1]; N = header[2]; 1864 /* determine ownership of all rows */ 1865 m = M/size + ((M % size) > rank); 1866 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr); 1867 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 1868 1869 /* First process needs enough room for process with most rows */ 1870 if (!rank) { 1871 mmax = rowners[1]; 1872 for (i=2; i<size; i++) { 1873 mmax = PetscMax(mmax,rowners[i]); 1874 } 1875 } else mmax = m; 1876 1877 rowners[0] = 0; 1878 for (i=2; i<=size; i++) { 1879 mmax = PetscMax(mmax,rowners[i]); 1880 rowners[i] += rowners[i-1]; 1881 } 1882 rstart = rowners[rank]; 1883 rend = rowners[rank+1]; 1884 1885 /* distribute row lengths to all processors */ 1886 ierr = PetscMalloc2(mmax,PetscInt,&ourlens,mmax,PetscInt,&offlens);CHKERRQ(ierr); 1887 if (!rank) { 1888 ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr); 1889 ierr = PetscMalloc(m*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr); 1890 ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr); 1891 ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr); 1892 for (j=0; j<m; j++) { 1893 procsnz[0] += ourlens[j]; 1894 } 1895 for (i=1; i<size; i++) { 1896 ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr); 1897 /* calculate the number of nonzeros on each processor */ 1898 for (j=0; j<rowners[i+1]-rowners[i]; j++) { 1899 procsnz[i] += rowlengths[j]; 1900 } 1901 ierr = MPI_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 1902 } 1903 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 1904 } else { 1905 ierr = MPI_Recv(ourlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 1906 } 1907 1908 if (!rank) { 1909 /* determine max buffer needed and allocate it */ 1910 maxnz = 0; 1911 for (i=0; i<size; i++) { 1912 maxnz = PetscMax(maxnz,procsnz[i]); 1913 } 1914 ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr); 1915 1916 /* read in my part of the matrix column indices */ 1917 nz = procsnz[0]; 1918 ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr); 1919 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 1920 1921 /* read in every one elses and ship off */ 1922 for (i=1; i<size; i++) { 1923 nz = procsnz[i]; 1924 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 1925 ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 1926 } 1927 ierr = PetscFree(cols);CHKERRQ(ierr); 1928 } else { 1929 /* determine buffer space needed for message */ 1930 nz = 0; 1931 for (i=0; i<m; i++) { 1932 nz += ourlens[i]; 1933 } 1934 ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr); 1935 1936 /* receive message of column indices*/ 1937 ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 1938 ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr); 1939 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 1940 } 1941 1942 /* determine column ownership if matrix is not square */ 1943 if (N != M) { 1944 n = N/size + ((N % size) > rank); 1945 ierr = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 1946 cstart = cend - n; 1947 } else { 1948 cstart = rstart; 1949 cend = rend; 1950 n = cend - cstart; 1951 } 1952 1953 /* loop over local rows, determining number of off diagonal entries */ 1954 ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr); 1955 jj = 0; 1956 for (i=0; i<m; i++) { 1957 for (j=0; j<ourlens[i]; j++) { 1958 if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++; 1959 jj++; 1960 } 1961 } 1962 1963 /* create our matrix */ 1964 for (i=0; i<m; i++) { 1965 ourlens[i] -= offlens[i]; 1966 } 1967 ierr = MatCreate(comm,&A);CHKERRQ(ierr); 1968 ierr = MatSetSizes(A,m,n,M,N);CHKERRQ(ierr); 1969 ierr = MatSetType(A,type);CHKERRQ(ierr); 1970 ierr = MatMPIAIJSetPreallocation(A,0,ourlens,0,offlens);CHKERRQ(ierr); 1971 1972 ierr = MatSetOption(A,MAT_COLUMNS_SORTED);CHKERRQ(ierr); 1973 for (i=0; i<m; i++) { 1974 ourlens[i] += offlens[i]; 1975 } 1976 1977 if (!rank) { 1978 ierr = PetscMalloc((maxnz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 1979 1980 /* read in my part of the matrix numerical values */ 1981 nz = procsnz[0]; 1982 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 1983 1984 /* insert into matrix */ 1985 jj = rstart; 1986 smycols = mycols; 1987 svals = vals; 1988 for (i=0; i<m; i++) { 1989 ierr = MatSetValues_MPIAIJ(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 1990 smycols += ourlens[i]; 1991 svals += ourlens[i]; 1992 jj++; 1993 } 1994 1995 /* read in other processors and ship out */ 1996 for (i=1; i<size; i++) { 1997 nz = procsnz[i]; 1998 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 1999 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr); 2000 } 2001 ierr = PetscFree(procsnz);CHKERRQ(ierr); 2002 } else { 2003 /* receive numeric values */ 2004 ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 2005 2006 /* receive message of values*/ 2007 ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr); 2008 ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr); 2009 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 2010 2011 /* insert into matrix */ 2012 jj = rstart; 2013 smycols = mycols; 2014 svals = vals; 2015 for (i=0; i<m; i++) { 2016 ierr = MatSetValues_MPIAIJ(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 2017 smycols += ourlens[i]; 2018 svals += ourlens[i]; 2019 jj++; 2020 } 2021 } 2022 ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr); 2023 ierr = PetscFree(vals);CHKERRQ(ierr); 2024 ierr = PetscFree(mycols);CHKERRQ(ierr); 2025 ierr = PetscFree(rowners);CHKERRQ(ierr); 2026 2027 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2028 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2029 *newmat = A; 2030 PetscFunctionReturn(0); 2031 } 2032 2033 #undef __FUNCT__ 2034 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ" 2035 /* 2036 Not great since it makes two copies of the submatrix, first an SeqAIJ 2037 in local and then by concatenating the local matrices the end result. 2038 Writing it directly would be much like MatGetSubMatrices_MPIAIJ() 2039 */ 2040 PetscErrorCode MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat) 2041 { 2042 PetscErrorCode ierr; 2043 PetscMPIInt rank,size; 2044 PetscInt i,m,n,rstart,row,rend,nz,*cwork,j; 2045 PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal; 2046 Mat *local,M,Mreuse; 2047 PetscScalar *vwork,*aa; 2048 MPI_Comm comm = mat->comm; 2049 Mat_SeqAIJ *aij; 2050 2051 2052 PetscFunctionBegin; 2053 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2054 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2055 2056 if (call == MAT_REUSE_MATRIX) { 2057 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr); 2058 if (!Mreuse) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 2059 local = &Mreuse; 2060 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);CHKERRQ(ierr); 2061 } else { 2062 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr); 2063 Mreuse = *local; 2064 ierr = PetscFree(local);CHKERRQ(ierr); 2065 } 2066 2067 /* 2068 m - number of local rows 2069 n - number of columns (same on all processors) 2070 rstart - first row in new global matrix generated 2071 */ 2072 ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr); 2073 if (call == MAT_INITIAL_MATRIX) { 2074 aij = (Mat_SeqAIJ*)(Mreuse)->data; 2075 ii = aij->i; 2076 jj = aij->j; 2077 2078 /* 2079 Determine the number of non-zeros in the diagonal and off-diagonal 2080 portions of the matrix in order to do correct preallocation 2081 */ 2082 2083 /* first get start and end of "diagonal" columns */ 2084 if (csize == PETSC_DECIDE) { 2085 ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr); 2086 if (mglobal == n) { /* square matrix */ 2087 nlocal = m; 2088 } else { 2089 nlocal = n/size + ((n % size) > rank); 2090 } 2091 } else { 2092 nlocal = csize; 2093 } 2094 ierr = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 2095 rstart = rend - nlocal; 2096 if (rank == size - 1 && rend != n) { 2097 SETERRQ2(PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n); 2098 } 2099 2100 /* next, compute all the lengths */ 2101 ierr = PetscMalloc((2*m+1)*sizeof(PetscInt),&dlens);CHKERRQ(ierr); 2102 olens = dlens + m; 2103 for (i=0; i<m; i++) { 2104 jend = ii[i+1] - ii[i]; 2105 olen = 0; 2106 dlen = 0; 2107 for (j=0; j<jend; j++) { 2108 if (*jj < rstart || *jj >= rend) olen++; 2109 else dlen++; 2110 jj++; 2111 } 2112 olens[i] = olen; 2113 dlens[i] = dlen; 2114 } 2115 ierr = MatCreate(comm,&M);CHKERRQ(ierr); 2116 ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr); 2117 ierr = MatSetType(M,mat->type_name);CHKERRQ(ierr); 2118 ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr); 2119 ierr = PetscFree(dlens);CHKERRQ(ierr); 2120 } else { 2121 PetscInt ml,nl; 2122 2123 M = *newmat; 2124 ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr); 2125 if (ml != m) SETERRQ(PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 2126 ierr = MatZeroEntries(M);CHKERRQ(ierr); 2127 /* 2128 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 2129 rather than the slower MatSetValues(). 2130 */ 2131 M->was_assembled = PETSC_TRUE; 2132 M->assembled = PETSC_FALSE; 2133 } 2134 ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr); 2135 aij = (Mat_SeqAIJ*)(Mreuse)->data; 2136 ii = aij->i; 2137 jj = aij->j; 2138 aa = aij->a; 2139 for (i=0; i<m; i++) { 2140 row = rstart + i; 2141 nz = ii[i+1] - ii[i]; 2142 cwork = jj; jj += nz; 2143 vwork = aa; aa += nz; 2144 ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr); 2145 } 2146 2147 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2148 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2149 *newmat = M; 2150 2151 /* save submatrix used in processor for next request */ 2152 if (call == MAT_INITIAL_MATRIX) { 2153 ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr); 2154 ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr); 2155 } 2156 2157 PetscFunctionReturn(0); 2158 } 2159 2160 EXTERN_C_BEGIN 2161 #undef __FUNCT__ 2162 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR_MPIAIJ" 2163 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt I[],const PetscInt J[],const PetscScalar v[]) 2164 { 2165 Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data; 2166 PetscInt m = B->m,cstart = b->cstart, cend = b->cend,j,nnz,i,d; 2167 PetscInt *d_nnz,*o_nnz,nnz_max = 0,rstart = b->rstart,ii; 2168 const PetscInt *JJ; 2169 PetscScalar *values; 2170 PetscErrorCode ierr; 2171 2172 PetscFunctionBegin; 2173 if (I[0]) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"I[0] must be 0 it is %D",I[0]); 2174 ierr = PetscMalloc((2*m+1)*sizeof(PetscInt),&d_nnz);CHKERRQ(ierr); 2175 o_nnz = d_nnz + m; 2176 2177 for (i=0; i<m; i++) { 2178 nnz = I[i+1]- I[i]; 2179 JJ = J + I[i]; 2180 nnz_max = PetscMax(nnz_max,nnz); 2181 if (nnz < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz); 2182 for (j=0; j<nnz; j++) { 2183 if (*JJ >= cstart) break; 2184 JJ++; 2185 } 2186 d = 0; 2187 for (; j<nnz; j++) { 2188 if (*JJ++ >= cend) break; 2189 d++; 2190 } 2191 d_nnz[i] = d; 2192 o_nnz[i] = nnz - d; 2193 } 2194 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 2195 ierr = PetscFree(d_nnz);CHKERRQ(ierr); 2196 2197 if (v) values = (PetscScalar*)v; 2198 else { 2199 ierr = PetscMalloc((nnz_max+1)*sizeof(PetscScalar),&values);CHKERRQ(ierr); 2200 ierr = PetscMemzero(values,nnz_max*sizeof(PetscScalar));CHKERRQ(ierr); 2201 } 2202 2203 ierr = MatSetOption(B,MAT_COLUMNS_SORTED);CHKERRQ(ierr); 2204 for (i=0; i<m; i++) { 2205 ii = i + rstart; 2206 nnz = I[i+1]- I[i]; 2207 ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+I[i],values+(v ? I[i] : 0),INSERT_VALUES);CHKERRQ(ierr); 2208 } 2209 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2210 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2211 ierr = MatSetOption(B,MAT_COLUMNS_UNSORTED);CHKERRQ(ierr); 2212 2213 if (!v) { 2214 ierr = PetscFree(values);CHKERRQ(ierr); 2215 } 2216 PetscFunctionReturn(0); 2217 } 2218 EXTERN_C_END 2219 2220 #undef __FUNCT__ 2221 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR" 2222 /*@C 2223 MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format 2224 (the default parallel PETSc format). 2225 2226 Collective on MPI_Comm 2227 2228 Input Parameters: 2229 + B - the matrix 2230 . i - the indices into j for the start of each local row (starts with zero) 2231 . j - the column indices for each local row (starts with zero) these must be sorted for each row 2232 - v - optional values in the matrix 2233 2234 Level: developer 2235 2236 .keywords: matrix, aij, compressed row, sparse, parallel 2237 2238 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateMPIAIJ(), MPIAIJ 2239 @*/ 2240 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 2241 { 2242 PetscErrorCode ierr,(*f)(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]); 2243 2244 PetscFunctionBegin; 2245 ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",(void (**)(void))&f);CHKERRQ(ierr); 2246 if (f) { 2247 ierr = (*f)(B,i,j,v);CHKERRQ(ierr); 2248 } 2249 PetscFunctionReturn(0); 2250 } 2251 2252 #undef __FUNCT__ 2253 #define __FUNCT__ "MatMPIAIJSetPreallocation" 2254 /*@C 2255 MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format 2256 (the default parallel PETSc format). For good matrix assembly performance 2257 the user should preallocate the matrix storage by setting the parameters 2258 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 2259 performance can be increased by more than a factor of 50. 2260 2261 Collective on MPI_Comm 2262 2263 Input Parameters: 2264 + A - the matrix 2265 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 2266 (same value is used for all local rows) 2267 . d_nnz - array containing the number of nonzeros in the various rows of the 2268 DIAGONAL portion of the local submatrix (possibly different for each row) 2269 or PETSC_NULL, if d_nz is used to specify the nonzero structure. 2270 The size of this array is equal to the number of local rows, i.e 'm'. 2271 You must leave room for the diagonal entry even if it is zero. 2272 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 2273 submatrix (same value is used for all local rows). 2274 - o_nnz - array containing the number of nonzeros in the various rows of the 2275 OFF-DIAGONAL portion of the local submatrix (possibly different for 2276 each row) or PETSC_NULL, if o_nz is used to specify the nonzero 2277 structure. The size of this array is equal to the number 2278 of local rows, i.e 'm'. 2279 2280 If the *_nnz parameter is given then the *_nz parameter is ignored 2281 2282 The AIJ format (also called the Yale sparse matrix format or 2283 compressed row storage (CSR)), is fully compatible with standard Fortran 77 2284 storage. The stored row and column indices begin with zero. See the users manual for details. 2285 2286 The parallel matrix is partitioned such that the first m0 rows belong to 2287 process 0, the next m1 rows belong to process 1, the next m2 rows belong 2288 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 2289 2290 The DIAGONAL portion of the local submatrix of a processor can be defined 2291 as the submatrix which is obtained by extraction the part corresponding 2292 to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the 2293 first row that belongs to the processor, and r2 is the last row belonging 2294 to the this processor. This is a square mxm matrix. The remaining portion 2295 of the local submatrix (mxN) constitute the OFF-DIAGONAL portion. 2296 2297 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 2298 2299 Example usage: 2300 2301 Consider the following 8x8 matrix with 34 non-zero values, that is 2302 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 2303 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 2304 as follows: 2305 2306 .vb 2307 1 2 0 | 0 3 0 | 0 4 2308 Proc0 0 5 6 | 7 0 0 | 8 0 2309 9 0 10 | 11 0 0 | 12 0 2310 ------------------------------------- 2311 13 0 14 | 15 16 17 | 0 0 2312 Proc1 0 18 0 | 19 20 21 | 0 0 2313 0 0 0 | 22 23 0 | 24 0 2314 ------------------------------------- 2315 Proc2 25 26 27 | 0 0 28 | 29 0 2316 30 0 0 | 31 32 33 | 0 34 2317 .ve 2318 2319 This can be represented as a collection of submatrices as: 2320 2321 .vb 2322 A B C 2323 D E F 2324 G H I 2325 .ve 2326 2327 Where the submatrices A,B,C are owned by proc0, D,E,F are 2328 owned by proc1, G,H,I are owned by proc2. 2329 2330 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 2331 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 2332 The 'M','N' parameters are 8,8, and have the same values on all procs. 2333 2334 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 2335 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 2336 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 2337 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 2338 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 2339 matrix, ans [DF] as another SeqAIJ matrix. 2340 2341 When d_nz, o_nz parameters are specified, d_nz storage elements are 2342 allocated for every row of the local diagonal submatrix, and o_nz 2343 storage locations are allocated for every row of the OFF-DIAGONAL submat. 2344 One way to choose d_nz and o_nz is to use the max nonzerors per local 2345 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 2346 In this case, the values of d_nz,o_nz are: 2347 .vb 2348 proc0 : dnz = 2, o_nz = 2 2349 proc1 : dnz = 3, o_nz = 2 2350 proc2 : dnz = 1, o_nz = 4 2351 .ve 2352 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 2353 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 2354 for proc3. i.e we are using 12+15+10=37 storage locations to store 2355 34 values. 2356 2357 When d_nnz, o_nnz parameters are specified, the storage is specified 2358 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 2359 In the above case the values for d_nnz,o_nnz are: 2360 .vb 2361 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 2362 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 2363 proc2: d_nnz = [1,1] and o_nnz = [4,4] 2364 .ve 2365 Here the space allocated is sum of all the above values i.e 34, and 2366 hence pre-allocation is perfect. 2367 2368 Level: intermediate 2369 2370 .keywords: matrix, aij, compressed row, sparse, parallel 2371 2372 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIAIJ(), MatMPIAIJSetPreallocationCSR(), 2373 MPIAIJ 2374 @*/ 2375 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 2376 { 2377 PetscErrorCode ierr,(*f)(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]); 2378 2379 PetscFunctionBegin; 2380 ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr); 2381 if (f) { 2382 ierr = (*f)(B,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 2383 } 2384 PetscFunctionReturn(0); 2385 } 2386 2387 #undef __FUNCT__ 2388 #define __FUNCT__ "MatCreateMPIAIJ" 2389 /*@C 2390 MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format 2391 (the default parallel PETSc format). For good matrix assembly performance 2392 the user should preallocate the matrix storage by setting the parameters 2393 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 2394 performance can be increased by more than a factor of 50. 2395 2396 Collective on MPI_Comm 2397 2398 Input Parameters: 2399 + comm - MPI communicator 2400 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 2401 This value should be the same as the local size used in creating the 2402 y vector for the matrix-vector product y = Ax. 2403 . n - This value should be the same as the local size used in creating the 2404 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 2405 calculated if N is given) For square matrices n is almost always m. 2406 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 2407 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 2408 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 2409 (same value is used for all local rows) 2410 . d_nnz - array containing the number of nonzeros in the various rows of the 2411 DIAGONAL portion of the local submatrix (possibly different for each row) 2412 or PETSC_NULL, if d_nz is used to specify the nonzero structure. 2413 The size of this array is equal to the number of local rows, i.e 'm'. 2414 You must leave room for the diagonal entry even if it is zero. 2415 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 2416 submatrix (same value is used for all local rows). 2417 - o_nnz - array containing the number of nonzeros in the various rows of the 2418 OFF-DIAGONAL portion of the local submatrix (possibly different for 2419 each row) or PETSC_NULL, if o_nz is used to specify the nonzero 2420 structure. The size of this array is equal to the number 2421 of local rows, i.e 'm'. 2422 2423 Output Parameter: 2424 . A - the matrix 2425 2426 Notes: 2427 If the *_nnz parameter is given then the *_nz parameter is ignored 2428 2429 m,n,M,N parameters specify the size of the matrix, and its partitioning across 2430 processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate 2431 storage requirements for this matrix. 2432 2433 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one 2434 processor than it must be used on all processors that share the object for 2435 that argument. 2436 2437 The user MUST specify either the local or global matrix dimensions 2438 (possibly both). 2439 2440 The parallel matrix is partitioned such that the first m0 rows belong to 2441 process 0, the next m1 rows belong to process 1, the next m2 rows belong 2442 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 2443 2444 The DIAGONAL portion of the local submatrix of a processor can be defined 2445 as the submatrix which is obtained by extraction the part corresponding 2446 to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the 2447 first row that belongs to the processor, and r2 is the last row belonging 2448 to the this processor. This is a square mxm matrix. The remaining portion 2449 of the local submatrix (mxN) constitute the OFF-DIAGONAL portion. 2450 2451 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 2452 2453 When calling this routine with a single process communicator, a matrix of 2454 type SEQAIJ is returned. If a matrix of type MPIAIJ is desired for this 2455 type of communicator, use the construction mechanism: 2456 MatCreate(...,&A); MatSetType(A,MPIAIJ); MatMPIAIJSetPreallocation(A,...); 2457 2458 By default, this format uses inodes (identical nodes) when possible. 2459 We search for consecutive rows with the same nonzero structure, thereby 2460 reusing matrix information to achieve increased efficiency. 2461 2462 Options Database Keys: 2463 + -mat_no_inode - Do not use inodes 2464 . -mat_inode_limit <limit> - Sets inode limit (max limit=5) 2465 - -mat_aij_oneindex - Internally use indexing starting at 1 2466 rather than 0. Note that when calling MatSetValues(), 2467 the user still MUST index entries starting at 0! 2468 2469 2470 Example usage: 2471 2472 Consider the following 8x8 matrix with 34 non-zero values, that is 2473 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 2474 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 2475 as follows: 2476 2477 .vb 2478 1 2 0 | 0 3 0 | 0 4 2479 Proc0 0 5 6 | 7 0 0 | 8 0 2480 9 0 10 | 11 0 0 | 12 0 2481 ------------------------------------- 2482 13 0 14 | 15 16 17 | 0 0 2483 Proc1 0 18 0 | 19 20 21 | 0 0 2484 0 0 0 | 22 23 0 | 24 0 2485 ------------------------------------- 2486 Proc2 25 26 27 | 0 0 28 | 29 0 2487 30 0 0 | 31 32 33 | 0 34 2488 .ve 2489 2490 This can be represented as a collection of submatrices as: 2491 2492 .vb 2493 A B C 2494 D E F 2495 G H I 2496 .ve 2497 2498 Where the submatrices A,B,C are owned by proc0, D,E,F are 2499 owned by proc1, G,H,I are owned by proc2. 2500 2501 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 2502 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 2503 The 'M','N' parameters are 8,8, and have the same values on all procs. 2504 2505 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 2506 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 2507 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 2508 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 2509 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 2510 matrix, ans [DF] as another SeqAIJ matrix. 2511 2512 When d_nz, o_nz parameters are specified, d_nz storage elements are 2513 allocated for every row of the local diagonal submatrix, and o_nz 2514 storage locations are allocated for every row of the OFF-DIAGONAL submat. 2515 One way to choose d_nz and o_nz is to use the max nonzerors per local 2516 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 2517 In this case, the values of d_nz,o_nz are: 2518 .vb 2519 proc0 : dnz = 2, o_nz = 2 2520 proc1 : dnz = 3, o_nz = 2 2521 proc2 : dnz = 1, o_nz = 4 2522 .ve 2523 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 2524 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 2525 for proc3. i.e we are using 12+15+10=37 storage locations to store 2526 34 values. 2527 2528 When d_nnz, o_nnz parameters are specified, the storage is specified 2529 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 2530 In the above case the values for d_nnz,o_nnz are: 2531 .vb 2532 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 2533 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 2534 proc2: d_nnz = [1,1] and o_nnz = [4,4] 2535 .ve 2536 Here the space allocated is sum of all the above values i.e 34, and 2537 hence pre-allocation is perfect. 2538 2539 Level: intermediate 2540 2541 .keywords: matrix, aij, compressed row, sparse, parallel 2542 2543 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 2544 MPIAIJ 2545 @*/ 2546 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 2547 { 2548 PetscErrorCode ierr; 2549 PetscMPIInt size; 2550 2551 PetscFunctionBegin; 2552 ierr = MatCreate(comm,A);CHKERRQ(ierr); 2553 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 2554 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2555 if (size > 1) { 2556 ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr); 2557 ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 2558 } else { 2559 ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr); 2560 ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr); 2561 } 2562 PetscFunctionReturn(0); 2563 } 2564 2565 #undef __FUNCT__ 2566 #define __FUNCT__ "MatMPIAIJGetSeqAIJ" 2567 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,PetscInt *colmap[]) 2568 { 2569 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2570 2571 PetscFunctionBegin; 2572 *Ad = a->A; 2573 *Ao = a->B; 2574 *colmap = a->garray; 2575 PetscFunctionReturn(0); 2576 } 2577 2578 #undef __FUNCT__ 2579 #define __FUNCT__ "MatSetColoring_MPIAIJ" 2580 PetscErrorCode MatSetColoring_MPIAIJ(Mat A,ISColoring coloring) 2581 { 2582 PetscErrorCode ierr; 2583 PetscInt i; 2584 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2585 2586 PetscFunctionBegin; 2587 if (coloring->ctype == IS_COLORING_LOCAL) { 2588 ISColoringValue *allcolors,*colors; 2589 ISColoring ocoloring; 2590 2591 /* set coloring for diagonal portion */ 2592 ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr); 2593 2594 /* set coloring for off-diagonal portion */ 2595 ierr = ISAllGatherColors(A->comm,coloring->n,coloring->colors,PETSC_NULL,&allcolors);CHKERRQ(ierr); 2596 ierr = PetscMalloc((a->B->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 2597 for (i=0; i<a->B->n; i++) { 2598 colors[i] = allcolors[a->garray[i]]; 2599 } 2600 ierr = PetscFree(allcolors);CHKERRQ(ierr); 2601 ierr = ISColoringCreate(MPI_COMM_SELF,a->B->n,colors,&ocoloring);CHKERRQ(ierr); 2602 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 2603 ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr); 2604 } else if (coloring->ctype == IS_COLORING_GHOSTED) { 2605 ISColoringValue *colors; 2606 PetscInt *larray; 2607 ISColoring ocoloring; 2608 2609 /* set coloring for diagonal portion */ 2610 ierr = PetscMalloc((a->A->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr); 2611 for (i=0; i<a->A->n; i++) { 2612 larray[i] = i + a->cstart; 2613 } 2614 ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->A->n,larray,PETSC_NULL,larray);CHKERRQ(ierr); 2615 ierr = PetscMalloc((a->A->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 2616 for (i=0; i<a->A->n; i++) { 2617 colors[i] = coloring->colors[larray[i]]; 2618 } 2619 ierr = PetscFree(larray);CHKERRQ(ierr); 2620 ierr = ISColoringCreate(PETSC_COMM_SELF,a->A->n,colors,&ocoloring);CHKERRQ(ierr); 2621 ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr); 2622 ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr); 2623 2624 /* set coloring for off-diagonal portion */ 2625 ierr = PetscMalloc((a->B->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr); 2626 ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->B->n,a->garray,PETSC_NULL,larray);CHKERRQ(ierr); 2627 ierr = PetscMalloc((a->B->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 2628 for (i=0; i<a->B->n; i++) { 2629 colors[i] = coloring->colors[larray[i]]; 2630 } 2631 ierr = PetscFree(larray);CHKERRQ(ierr); 2632 ierr = ISColoringCreate(MPI_COMM_SELF,a->B->n,colors,&ocoloring);CHKERRQ(ierr); 2633 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 2634 ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr); 2635 } else { 2636 SETERRQ1(PETSC_ERR_SUP,"No support ISColoringType %d",(int)coloring->ctype); 2637 } 2638 2639 PetscFunctionReturn(0); 2640 } 2641 2642 #if defined(PETSC_HAVE_ADIC) 2643 #undef __FUNCT__ 2644 #define __FUNCT__ "MatSetValuesAdic_MPIAIJ" 2645 PetscErrorCode MatSetValuesAdic_MPIAIJ(Mat A,void *advalues) 2646 { 2647 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2648 PetscErrorCode ierr; 2649 2650 PetscFunctionBegin; 2651 ierr = MatSetValuesAdic_SeqAIJ(a->A,advalues);CHKERRQ(ierr); 2652 ierr = MatSetValuesAdic_SeqAIJ(a->B,advalues);CHKERRQ(ierr); 2653 PetscFunctionReturn(0); 2654 } 2655 #endif 2656 2657 #undef __FUNCT__ 2658 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ" 2659 PetscErrorCode MatSetValuesAdifor_MPIAIJ(Mat A,PetscInt nl,void *advalues) 2660 { 2661 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2662 PetscErrorCode ierr; 2663 2664 PetscFunctionBegin; 2665 ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr); 2666 ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr); 2667 PetscFunctionReturn(0); 2668 } 2669 2670 #undef __FUNCT__ 2671 #define __FUNCT__ "MatMerge" 2672 /*@C 2673 MatMerge - Creates a single large PETSc matrix by concatinating sequential 2674 matrices from each processor 2675 2676 Collective on MPI_Comm 2677 2678 Input Parameters: 2679 + comm - the communicators the parallel matrix will live on 2680 . inmat - the input sequential matrices 2681 . n - number of local columns (or PETSC_DECIDE) 2682 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 2683 2684 Output Parameter: 2685 . outmat - the parallel matrix generated 2686 2687 Level: advanced 2688 2689 Notes: The number of columns of the matrix in EACH processor MUST be the same. 2690 2691 @*/ 2692 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat) 2693 { 2694 PetscErrorCode ierr; 2695 PetscInt m,N,i,rstart,nnz,I,*dnz,*onz; 2696 PetscInt *indx; 2697 PetscScalar *values; 2698 PetscMap columnmap,rowmap; 2699 2700 PetscFunctionBegin; 2701 ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr); 2702 /* 2703 PetscMPIInt rank; 2704 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2705 ierr = PetscPrintf(PETSC_COMM_SELF," [%d] inmat m=%d, n=%d, N=%d\n",rank,m,n,N); 2706 */ 2707 if (scall == MAT_INITIAL_MATRIX){ 2708 /* count nonzeros in each row, for diagonal and off diagonal portion of matrix */ 2709 if (n == PETSC_DECIDE){ 2710 ierr = PetscMapCreate(comm,&columnmap);CHKERRQ(ierr); 2711 ierr = PetscMapSetSize(columnmap,N);CHKERRQ(ierr); 2712 ierr = PetscMapSetType(columnmap,MAP_MPI);CHKERRQ(ierr); 2713 ierr = PetscMapGetLocalSize(columnmap,&n);CHKERRQ(ierr); 2714 ierr = PetscMapDestroy(columnmap);CHKERRQ(ierr); 2715 } 2716 2717 ierr = PetscMapCreate(comm,&rowmap);CHKERRQ(ierr); 2718 ierr = PetscMapSetLocalSize(rowmap,m);CHKERRQ(ierr); 2719 ierr = PetscMapSetType(rowmap,MAP_MPI);CHKERRQ(ierr); 2720 ierr = PetscMapGetLocalRange(rowmap,&rstart,0);CHKERRQ(ierr); 2721 ierr = PetscMapDestroy(rowmap);CHKERRQ(ierr); 2722 2723 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 2724 for (i=0;i<m;i++) { 2725 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr); 2726 ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr); 2727 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr); 2728 } 2729 /* This routine will ONLY return MPIAIJ type matrix */ 2730 ierr = MatCreate(comm,outmat);CHKERRQ(ierr); 2731 ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 2732 ierr = MatSetType(*outmat,MATMPIAIJ);CHKERRQ(ierr); 2733 ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr); 2734 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 2735 2736 } else if (scall == MAT_REUSE_MATRIX){ 2737 ierr = MatGetOwnershipRange(*outmat,&rstart,PETSC_NULL);CHKERRQ(ierr); 2738 } else { 2739 SETERRQ1(PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall); 2740 } 2741 2742 for (i=0;i<m;i++) { 2743 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 2744 I = i + rstart; 2745 ierr = MatSetValues(*outmat,1,&I,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 2746 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 2747 } 2748 ierr = MatDestroy(inmat);CHKERRQ(ierr); 2749 ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2750 ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2751 2752 PetscFunctionReturn(0); 2753 } 2754 2755 #undef __FUNCT__ 2756 #define __FUNCT__ "MatFileSplit" 2757 PetscErrorCode MatFileSplit(Mat A,char *outfile) 2758 { 2759 PetscErrorCode ierr; 2760 PetscMPIInt rank; 2761 PetscInt m,N,i,rstart,nnz; 2762 size_t len; 2763 const PetscInt *indx; 2764 PetscViewer out; 2765 char *name; 2766 Mat B; 2767 const PetscScalar *values; 2768 2769 PetscFunctionBegin; 2770 ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr); 2771 ierr = MatGetSize(A,0,&N);CHKERRQ(ierr); 2772 /* Should this be the type of the diagonal block of A? */ 2773 ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr); 2774 ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr); 2775 ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr); 2776 ierr = MatSeqAIJSetPreallocation(B,0,PETSC_NULL);CHKERRQ(ierr); 2777 ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr); 2778 for (i=0;i<m;i++) { 2779 ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 2780 ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 2781 ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 2782 } 2783 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2784 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2785 2786 ierr = MPI_Comm_rank(A->comm,&rank);CHKERRQ(ierr); 2787 ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr); 2788 ierr = PetscMalloc((len+5)*sizeof(char),&name);CHKERRQ(ierr); 2789 sprintf(name,"%s.%d",outfile,rank); 2790 ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,PETSC_FILE_CREATE,&out);CHKERRQ(ierr); 2791 ierr = PetscFree(name); 2792 ierr = MatView(B,out);CHKERRQ(ierr); 2793 ierr = PetscViewerDestroy(out);CHKERRQ(ierr); 2794 ierr = MatDestroy(B);CHKERRQ(ierr); 2795 PetscFunctionReturn(0); 2796 } 2797 2798 EXTERN PetscErrorCode MatDestroy_MPIAIJ(Mat); 2799 #undef __FUNCT__ 2800 #define __FUNCT__ "MatDestroy_MPIAIJ_SeqsToMPI" 2801 PetscErrorCode PETSCMAT_DLLEXPORT MatDestroy_MPIAIJ_SeqsToMPI(Mat A) 2802 { 2803 PetscErrorCode ierr; 2804 Mat_Merge_SeqsToMPI *merge; 2805 PetscObjectContainer container; 2806 2807 PetscFunctionBegin; 2808 ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr); 2809 if (container) { 2810 ierr = PetscObjectContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr); 2811 ierr = PetscFree(merge->id_r);CHKERRQ(ierr); 2812 ierr = PetscFree(merge->len_s);CHKERRQ(ierr); 2813 ierr = PetscFree(merge->len_r);CHKERRQ(ierr); 2814 ierr = PetscFree(merge->bi);CHKERRQ(ierr); 2815 ierr = PetscFree(merge->bj);CHKERRQ(ierr); 2816 ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr); 2817 ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr); 2818 ierr = PetscMapDestroy(merge->rowmap);CHKERRQ(ierr); 2819 if (merge->coi){ierr = PetscFree(merge->coi);CHKERRQ(ierr);} 2820 if (merge->coj){ierr = PetscFree(merge->coj);CHKERRQ(ierr);} 2821 if (merge->owners_co){ierr = PetscFree(merge->owners_co);CHKERRQ(ierr);} 2822 2823 ierr = PetscObjectContainerDestroy(container);CHKERRQ(ierr); 2824 ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr); 2825 } 2826 ierr = PetscFree(merge);CHKERRQ(ierr); 2827 2828 ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr); 2829 PetscFunctionReturn(0); 2830 } 2831 2832 #include "src/mat/utils/freespace.h" 2833 #include "petscbt.h" 2834 static PetscEvent logkey_seqstompinum = 0; 2835 #undef __FUNCT__ 2836 #define __FUNCT__ "MatMerge_SeqsToMPINumeric" 2837 /*@C 2838 MatMerge_SeqsToMPI - Creates a MPIAIJ matrix by adding sequential 2839 matrices from each processor 2840 2841 Collective on MPI_Comm 2842 2843 Input Parameters: 2844 + comm - the communicators the parallel matrix will live on 2845 . seqmat - the input sequential matrices 2846 . m - number of local rows (or PETSC_DECIDE) 2847 . n - number of local columns (or PETSC_DECIDE) 2848 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 2849 2850 Output Parameter: 2851 . mpimat - the parallel matrix generated 2852 2853 Level: advanced 2854 2855 Notes: 2856 The dimensions of the sequential matrix in each processor MUST be the same. 2857 The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be 2858 destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat. 2859 @*/ 2860 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPINumeric(Mat seqmat,Mat mpimat) 2861 { 2862 PetscErrorCode ierr; 2863 MPI_Comm comm=mpimat->comm; 2864 Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data; 2865 PetscMPIInt size,rank,taga,*len_s; 2866 PetscInt N=mpimat->N,i,j,*owners,*ai=a->i,*aj=a->j; 2867 PetscInt proc,m; 2868 PetscInt **buf_ri,**buf_rj; 2869 PetscInt k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj; 2870 PetscInt nrows,**buf_ri_k,**nextrow,**nextai; 2871 MPI_Request *s_waits,*r_waits; 2872 MPI_Status *status; 2873 MatScalar *aa=a->a,**abuf_r,*ba_i; 2874 Mat_Merge_SeqsToMPI *merge; 2875 PetscObjectContainer container; 2876 2877 PetscFunctionBegin; 2878 if (!logkey_seqstompinum) { 2879 ierr = PetscLogEventRegister(&logkey_seqstompinum,"MatMerge_SeqsToMPINumeric",MAT_COOKIE); 2880 } 2881 ierr = PetscLogEventBegin(logkey_seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 2882 2883 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2884 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2885 2886 ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr); 2887 if (container) { 2888 ierr = PetscObjectContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr); 2889 } 2890 bi = merge->bi; 2891 bj = merge->bj; 2892 buf_ri = merge->buf_ri; 2893 buf_rj = merge->buf_rj; 2894 2895 ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr); 2896 ierr = PetscMapGetGlobalRange(merge->rowmap,&owners);CHKERRQ(ierr); 2897 len_s = merge->len_s; 2898 2899 /* send and recv matrix values */ 2900 /*-----------------------------*/ 2901 ierr = PetscObjectGetNewTag((PetscObject)merge->rowmap,&taga);CHKERRQ(ierr); 2902 ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr); 2903 2904 ierr = PetscMalloc((merge->nsend+1)*sizeof(MPI_Request),&s_waits);CHKERRQ(ierr); 2905 for (proc=0,k=0; proc<size; proc++){ 2906 if (!len_s[proc]) continue; 2907 i = owners[proc]; 2908 ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr); 2909 k++; 2910 } 2911 2912 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);} 2913 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);} 2914 ierr = PetscFree(status);CHKERRQ(ierr); 2915 2916 ierr = PetscFree(s_waits);CHKERRQ(ierr); 2917 ierr = PetscFree(r_waits);CHKERRQ(ierr); 2918 2919 /* insert mat values of mpimat */ 2920 /*----------------------------*/ 2921 ierr = PetscMalloc(N*sizeof(MatScalar),&ba_i);CHKERRQ(ierr); 2922 ierr = PetscMalloc((3*merge->nrecv+1)*sizeof(PetscInt**),&buf_ri_k);CHKERRQ(ierr); 2923 nextrow = buf_ri_k + merge->nrecv; 2924 nextai = nextrow + merge->nrecv; 2925 2926 for (k=0; k<merge->nrecv; k++){ 2927 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 2928 nrows = *(buf_ri_k[k]); 2929 nextrow[k] = buf_ri_k[k]+1; /* next row number of k-th recved i-structure */ 2930 nextai[k] = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure */ 2931 } 2932 2933 /* set values of ba */ 2934 ierr = PetscMapGetLocalSize(merge->rowmap,&m);CHKERRQ(ierr); 2935 for (i=0; i<m; i++) { 2936 arow = owners[rank] + i; 2937 bj_i = bj+bi[i]; /* col indices of the i-th row of mpimat */ 2938 bnzi = bi[i+1] - bi[i]; 2939 ierr = PetscMemzero(ba_i,bnzi*sizeof(MatScalar));CHKERRQ(ierr); 2940 2941 /* add local non-zero vals of this proc's seqmat into ba */ 2942 anzi = ai[arow+1] - ai[arow]; 2943 aj = a->j + ai[arow]; 2944 aa = a->a + ai[arow]; 2945 nextaj = 0; 2946 for (j=0; nextaj<anzi; j++){ 2947 if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */ 2948 ba_i[j] += aa[nextaj++]; 2949 } 2950 } 2951 2952 /* add received vals into ba */ 2953 for (k=0; k<merge->nrecv; k++){ /* k-th received message */ 2954 /* i-th row */ 2955 if (i == *nextrow[k]) { 2956 anzi = *(nextai[k]+1) - *nextai[k]; 2957 aj = buf_rj[k] + *(nextai[k]); 2958 aa = abuf_r[k] + *(nextai[k]); 2959 nextaj = 0; 2960 for (j=0; nextaj<anzi; j++){ 2961 if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */ 2962 ba_i[j] += aa[nextaj++]; 2963 } 2964 } 2965 nextrow[k]++; nextai[k]++; 2966 } 2967 } 2968 ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr); 2969 } 2970 ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2971 ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2972 2973 ierr = PetscFree(abuf_r);CHKERRQ(ierr); 2974 ierr = PetscFree(ba_i);CHKERRQ(ierr); 2975 ierr = PetscFree(buf_ri_k);CHKERRQ(ierr); 2976 ierr = PetscLogEventEnd(logkey_seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 2977 PetscFunctionReturn(0); 2978 } 2979 2980 static PetscEvent logkey_seqstompisym = 0; 2981 #undef __FUNCT__ 2982 #define __FUNCT__ "MatMerge_SeqsToMPISymbolic" 2983 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPISymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat) 2984 { 2985 PetscErrorCode ierr; 2986 Mat B_mpi; 2987 Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data; 2988 PetscMPIInt size,rank,tagi,tagj,*len_s,*len_si,*len_ri; 2989 PetscInt **buf_rj,**buf_ri,**buf_ri_k; 2990 PetscInt M=seqmat->m,N=seqmat->n,i,*owners,*ai=a->i,*aj=a->j; 2991 PetscInt len,proc,*dnz,*onz; 2992 PetscInt k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0; 2993 PetscInt nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai; 2994 MPI_Request *si_waits,*sj_waits,*ri_waits,*rj_waits; 2995 MPI_Status *status; 2996 FreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 2997 PetscBT lnkbt; 2998 Mat_Merge_SeqsToMPI *merge; 2999 PetscObjectContainer container; 3000 3001 PetscFunctionBegin; 3002 if (!logkey_seqstompisym) { 3003 ierr = PetscLogEventRegister(&logkey_seqstompisym,"MatMerge_SeqsToMPISymbolic",MAT_COOKIE); 3004 } 3005 ierr = PetscLogEventBegin(logkey_seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 3006 3007 /* make sure it is a PETSc comm */ 3008 ierr = PetscCommDuplicate(comm,&comm,PETSC_NULL);CHKERRQ(ierr); 3009 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3010 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3011 3012 ierr = PetscNew(Mat_Merge_SeqsToMPI,&merge);CHKERRQ(ierr); 3013 ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr); 3014 3015 /* determine row ownership */ 3016 /*---------------------------------------------------------*/ 3017 ierr = PetscMapCreate(comm,&merge->rowmap);CHKERRQ(ierr); 3018 if (m == PETSC_DECIDE) { 3019 ierr = PetscMapSetSize(merge->rowmap,M);CHKERRQ(ierr); 3020 } else { 3021 ierr = PetscMapSetLocalSize(merge->rowmap,m);CHKERRQ(ierr); 3022 } 3023 ierr = PetscMapSetType(merge->rowmap,MAP_MPI);CHKERRQ(ierr); 3024 ierr = PetscMalloc(size*sizeof(PetscMPIInt),&len_si);CHKERRQ(ierr); 3025 ierr = PetscMalloc(size*sizeof(PetscMPIInt),&merge->len_s);CHKERRQ(ierr); 3026 3027 if (m == PETSC_DECIDE) {ierr = PetscMapGetLocalSize(merge->rowmap,&m);CHKERRQ(ierr); } 3028 ierr = PetscMapGetGlobalRange(merge->rowmap,&owners);CHKERRQ(ierr); 3029 3030 /* determine the number of messages to send, their lengths */ 3031 /*---------------------------------------------------------*/ 3032 len_s = merge->len_s; 3033 3034 len = 0; /* length of buf_si[] */ 3035 merge->nsend = 0; 3036 for (proc=0; proc<size; proc++){ 3037 len_si[proc] = 0; 3038 if (proc == rank){ 3039 len_s[proc] = 0; 3040 } else { 3041 len_si[proc] = owners[proc+1] - owners[proc] + 1; 3042 len_s[proc] = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */ 3043 } 3044 if (len_s[proc]) { 3045 merge->nsend++; 3046 nrows = 0; 3047 for (i=owners[proc]; i<owners[proc+1]; i++){ 3048 if (ai[i+1] > ai[i]) nrows++; 3049 } 3050 len_si[proc] = 2*(nrows+1); 3051 len += len_si[proc]; 3052 } 3053 } 3054 3055 /* determine the number and length of messages to receive for ij-structure */ 3056 /*-------------------------------------------------------------------------*/ 3057 ierr = PetscGatherNumberOfMessages(comm,PETSC_NULL,len_s,&merge->nrecv);CHKERRQ(ierr); 3058 ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr); 3059 3060 /* post the Irecv of j-structure */ 3061 /*-------------------------------*/ 3062 ierr = PetscObjectGetNewTag((PetscObject)merge->rowmap,&tagj);CHKERRQ(ierr); 3063 ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr); 3064 3065 /* post the Isend of j-structure */ 3066 /*--------------------------------*/ 3067 ierr = PetscMalloc((2*merge->nsend+1)*sizeof(MPI_Request),&si_waits);CHKERRQ(ierr); 3068 sj_waits = si_waits + merge->nsend; 3069 3070 for (proc=0, k=0; proc<size; proc++){ 3071 if (!len_s[proc]) continue; 3072 i = owners[proc]; 3073 ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr); 3074 k++; 3075 } 3076 3077 /* receives and sends of j-structure are complete */ 3078 /*------------------------------------------------*/ 3079 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);} 3080 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);} 3081 3082 /* send and recv i-structure */ 3083 /*---------------------------*/ 3084 ierr = PetscObjectGetNewTag((PetscObject)merge->rowmap,&tagi);CHKERRQ(ierr); 3085 ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr); 3086 3087 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&buf_s);CHKERRQ(ierr); 3088 buf_si = buf_s; /* points to the beginning of k-th msg to be sent */ 3089 for (proc=0,k=0; proc<size; proc++){ 3090 if (!len_s[proc]) continue; 3091 /* form outgoing message for i-structure: 3092 buf_si[0]: nrows to be sent 3093 [1:nrows]: row index (global) 3094 [nrows+1:2*nrows+1]: i-structure index 3095 */ 3096 /*-------------------------------------------*/ 3097 nrows = len_si[proc]/2 - 1; 3098 buf_si_i = buf_si + nrows+1; 3099 buf_si[0] = nrows; 3100 buf_si_i[0] = 0; 3101 nrows = 0; 3102 for (i=owners[proc]; i<owners[proc+1]; i++){ 3103 anzi = ai[i+1] - ai[i]; 3104 if (anzi) { 3105 buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */ 3106 buf_si[nrows+1] = i-owners[proc]; /* local row index */ 3107 nrows++; 3108 } 3109 } 3110 ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr); 3111 k++; 3112 buf_si += len_si[proc]; 3113 } 3114 3115 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);} 3116 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);} 3117 3118 ierr = PetscLogInfo(((PetscObject)(seqmat),"MatMerge_SeqsToMPI: nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv));CHKERRQ(ierr); 3119 for (i=0; i<merge->nrecv; i++){ 3120 ierr = PetscLogInfo(((PetscObject)(seqmat),"MatMerge_SeqsToMPI: recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]));CHKERRQ(ierr); 3121 } 3122 3123 ierr = PetscFree(len_si);CHKERRQ(ierr); 3124 ierr = PetscFree(len_ri);CHKERRQ(ierr); 3125 ierr = PetscFree(rj_waits);CHKERRQ(ierr); 3126 ierr = PetscFree(si_waits);CHKERRQ(ierr); 3127 ierr = PetscFree(ri_waits);CHKERRQ(ierr); 3128 ierr = PetscFree(buf_s);CHKERRQ(ierr); 3129 ierr = PetscFree(status);CHKERRQ(ierr); 3130 3131 /* compute a local seq matrix in each processor */ 3132 /*----------------------------------------------*/ 3133 /* allocate bi array and free space for accumulating nonzero column info */ 3134 ierr = PetscMalloc((m+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr); 3135 bi[0] = 0; 3136 3137 /* create and initialize a linked list */ 3138 nlnk = N+1; 3139 ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 3140 3141 /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */ 3142 len = 0; 3143 len = ai[owners[rank+1]] - ai[owners[rank]]; 3144 ierr = GetMoreSpace((PetscInt)(2*len+1),&free_space);CHKERRQ(ierr); 3145 current_space = free_space; 3146 3147 /* determine symbolic info for each local row */ 3148 ierr = PetscMalloc((3*merge->nrecv+1)*sizeof(PetscInt**),&buf_ri_k);CHKERRQ(ierr); 3149 nextrow = buf_ri_k + merge->nrecv; 3150 nextai = nextrow + merge->nrecv; 3151 for (k=0; k<merge->nrecv; k++){ 3152 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 3153 nrows = *buf_ri_k[k]; 3154 nextrow[k] = buf_ri_k[k] + 1; /* next row number of k-th recved i-structure */ 3155 nextai[k] = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure */ 3156 } 3157 3158 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 3159 len = 0; 3160 for (i=0;i<m;i++) { 3161 bnzi = 0; 3162 /* add local non-zero cols of this proc's seqmat into lnk */ 3163 arow = owners[rank] + i; 3164 anzi = ai[arow+1] - ai[arow]; 3165 aj = a->j + ai[arow]; 3166 ierr = PetscLLAdd(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 3167 bnzi += nlnk; 3168 /* add received col data into lnk */ 3169 for (k=0; k<merge->nrecv; k++){ /* k-th received message */ 3170 if (i == *nextrow[k]) { /* i-th row */ 3171 anzi = *(nextai[k]+1) - *nextai[k]; 3172 aj = buf_rj[k] + *nextai[k]; 3173 ierr = PetscLLAdd(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 3174 bnzi += nlnk; 3175 nextrow[k]++; nextai[k]++; 3176 } 3177 } 3178 if (len < bnzi) len = bnzi; /* =max(bnzi) */ 3179 3180 /* if free space is not available, make more free space */ 3181 if (current_space->local_remaining<bnzi) { 3182 ierr = GetMoreSpace(current_space->total_array_size,¤t_space);CHKERRQ(ierr); 3183 nspacedouble++; 3184 } 3185 /* copy data into free space, then initialize lnk */ 3186 ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 3187 ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr); 3188 3189 current_space->array += bnzi; 3190 current_space->local_used += bnzi; 3191 current_space->local_remaining -= bnzi; 3192 3193 bi[i+1] = bi[i] + bnzi; 3194 } 3195 3196 ierr = PetscFree(buf_ri_k);CHKERRQ(ierr); 3197 3198 ierr = PetscMalloc((bi[m]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr); 3199 ierr = MakeSpaceContiguous(&free_space,bj);CHKERRQ(ierr); 3200 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 3201 3202 /* create symbolic parallel matrix B_mpi */ 3203 /*---------------------------------------*/ 3204 ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr); 3205 if (n==PETSC_DECIDE) { 3206 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr); 3207 } else { 3208 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 3209 } 3210 ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr); 3211 ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr); 3212 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 3213 3214 /* B_mpi is not ready for use - assembly will be done by MatMerge_SeqsToMPINumeric() */ 3215 B_mpi->assembled = PETSC_FALSE; 3216 B_mpi->ops->destroy = MatDestroy_MPIAIJ_SeqsToMPI; 3217 merge->bi = bi; 3218 merge->bj = bj; 3219 merge->buf_ri = buf_ri; 3220 merge->buf_rj = buf_rj; 3221 merge->coi = PETSC_NULL; 3222 merge->coj = PETSC_NULL; 3223 merge->owners_co = PETSC_NULL; 3224 3225 /* attach the supporting struct to B_mpi for reuse */ 3226 ierr = PetscObjectContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr); 3227 ierr = PetscObjectContainerSetPointer(container,merge);CHKERRQ(ierr); 3228 ierr = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr); 3229 *mpimat = B_mpi; 3230 3231 ierr = PetscCommDestroy(&comm);CHKERRQ(ierr); 3232 ierr = PetscLogEventEnd(logkey_seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 3233 PetscFunctionReturn(0); 3234 } 3235 3236 static PetscEvent logkey_seqstompi = 0; 3237 #undef __FUNCT__ 3238 #define __FUNCT__ "MatMerge_SeqsToMPI" 3239 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPI(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat) 3240 { 3241 PetscErrorCode ierr; 3242 3243 PetscFunctionBegin; 3244 if (!logkey_seqstompi) { 3245 ierr = PetscLogEventRegister(&logkey_seqstompi,"MatMerge_SeqsToMPI",MAT_COOKIE); 3246 } 3247 ierr = PetscLogEventBegin(logkey_seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 3248 if (scall == MAT_INITIAL_MATRIX){ 3249 ierr = MatMerge_SeqsToMPISymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr); 3250 } 3251 ierr = MatMerge_SeqsToMPINumeric(seqmat,*mpimat);CHKERRQ(ierr); 3252 ierr = PetscLogEventEnd(logkey_seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 3253 PetscFunctionReturn(0); 3254 } 3255 static PetscEvent logkey_getlocalmat = 0; 3256 #undef __FUNCT__ 3257 #define __FUNCT__ "MatGetLocalMat" 3258 /*@C 3259 MatGetLocalMat - Creates a SeqAIJ matrix by taking all its local rows 3260 3261 Not Collective 3262 3263 Input Parameters: 3264 + A - the matrix 3265 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 3266 3267 Output Parameter: 3268 . A_loc - the local sequential matrix generated 3269 3270 Level: developer 3271 3272 @*/ 3273 PetscErrorCode PETSCMAT_DLLEXPORT MatGetLocalMat(Mat A,MatReuse scall,Mat *A_loc) 3274 { 3275 PetscErrorCode ierr; 3276 Mat_MPIAIJ *mpimat=(Mat_MPIAIJ*)A->data; 3277 Mat_SeqAIJ *mat,*a=(Mat_SeqAIJ*)(mpimat->A)->data,*b=(Mat_SeqAIJ*)(mpimat->B)->data; 3278 PetscInt *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*cmap=mpimat->garray; 3279 PetscScalar *aa=a->a,*ba=b->a,*ca; 3280 PetscInt am=A->m,i,j,k,cstart=mpimat->cstart; 3281 PetscInt *ci,*cj,col,ncols_d,ncols_o,jo; 3282 3283 PetscFunctionBegin; 3284 if (!logkey_getlocalmat) { 3285 ierr = PetscLogEventRegister(&logkey_getlocalmat,"MatGetLocalMat",MAT_COOKIE); 3286 } 3287 ierr = PetscLogEventBegin(logkey_getlocalmat,A,0,0,0);CHKERRQ(ierr); 3288 if (scall == MAT_INITIAL_MATRIX){ 3289 ierr = PetscMalloc((1+am)*sizeof(PetscInt),&ci);CHKERRQ(ierr); 3290 ci[0] = 0; 3291 for (i=0; i<am; i++){ 3292 ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]); 3293 } 3294 ierr = PetscMalloc((1+ci[am])*sizeof(PetscInt),&cj);CHKERRQ(ierr); 3295 ierr = PetscMalloc((1+ci[am])*sizeof(PetscScalar),&ca);CHKERRQ(ierr); 3296 k = 0; 3297 for (i=0; i<am; i++) { 3298 ncols_o = bi[i+1] - bi[i]; 3299 ncols_d = ai[i+1] - ai[i]; 3300 /* off-diagonal portion of A */ 3301 for (jo=0; jo<ncols_o; jo++) { 3302 col = cmap[*bj]; 3303 if (col >= cstart) break; 3304 cj[k] = col; bj++; 3305 ca[k++] = *ba++; 3306 } 3307 /* diagonal portion of A */ 3308 for (j=0; j<ncols_d; j++) { 3309 cj[k] = cstart + *aj++; 3310 ca[k++] = *aa++; 3311 } 3312 /* off-diagonal portion of A */ 3313 for (j=jo; j<ncols_o; j++) { 3314 cj[k] = cmap[*bj++]; 3315 ca[k++] = *ba++; 3316 } 3317 } 3318 /* put together the new matrix */ 3319 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->N,ci,cj,ca,A_loc);CHKERRQ(ierr); 3320 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 3321 /* Since these are PETSc arrays, change flags to free them as necessary. */ 3322 mat = (Mat_SeqAIJ*)(*A_loc)->data; 3323 mat->freedata = PETSC_TRUE; 3324 mat->nonew = 0; 3325 } else if (scall == MAT_REUSE_MATRIX){ 3326 mat=(Mat_SeqAIJ*)(*A_loc)->data; 3327 ci = mat->i; cj = mat->j; ca = mat->a; 3328 for (i=0; i<am; i++) { 3329 /* off-diagonal portion of A */ 3330 ncols_o = bi[i+1] - bi[i]; 3331 for (jo=0; jo<ncols_o; jo++) { 3332 col = cmap[*bj]; 3333 if (col >= cstart) break; 3334 *ca++ = *ba++; bj++; 3335 } 3336 /* diagonal portion of A */ 3337 ncols_d = ai[i+1] - ai[i]; 3338 for (j=0; j<ncols_d; j++) *ca++ = *aa++; 3339 /* off-diagonal portion of A */ 3340 for (j=jo; j<ncols_o; j++) { 3341 *ca++ = *ba++; bj++; 3342 } 3343 } 3344 } else { 3345 SETERRQ1(PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall); 3346 } 3347 3348 ierr = PetscLogEventEnd(logkey_getlocalmat,A,0,0,0);CHKERRQ(ierr); 3349 PetscFunctionReturn(0); 3350 } 3351 3352 static PetscEvent logkey_getlocalmatcondensed = 0; 3353 #undef __FUNCT__ 3354 #define __FUNCT__ "MatGetLocalMatCondensed" 3355 /*@C 3356 MatGetLocalMatCondensed - Creates a SeqAIJ matrix by taking all its local rows and NON-ZERO columns 3357 3358 Not Collective 3359 3360 Input Parameters: 3361 + A - the matrix 3362 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 3363 - row, col - index sets of rows and columns to extract (or PETSC_NULL) 3364 3365 Output Parameter: 3366 . A_loc - the local sequential matrix generated 3367 3368 Level: developer 3369 3370 @*/ 3371 PetscErrorCode PETSCMAT_DLLEXPORT MatGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc) 3372 { 3373 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 3374 PetscErrorCode ierr; 3375 PetscInt i,start,end,ncols,nzA,nzB,*cmap,imark,*idx; 3376 IS isrowa,iscola; 3377 Mat *aloc; 3378 3379 PetscFunctionBegin; 3380 if (!logkey_getlocalmatcondensed) { 3381 ierr = PetscLogEventRegister(&logkey_getlocalmatcondensed,"MatGetLocalMatCondensed",MAT_COOKIE); 3382 } 3383 ierr = PetscLogEventBegin(logkey_getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 3384 if (!row){ 3385 start = a->rstart; end = a->rend; 3386 ierr = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr); 3387 } else { 3388 isrowa = *row; 3389 } 3390 if (!col){ 3391 start = a->cstart; 3392 cmap = a->garray; 3393 nzA = a->A->n; 3394 nzB = a->B->n; 3395 ierr = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr); 3396 ncols = 0; 3397 for (i=0; i<nzB; i++) { 3398 if (cmap[i] < start) idx[ncols++] = cmap[i]; 3399 else break; 3400 } 3401 imark = i; 3402 for (i=0; i<nzA; i++) idx[ncols++] = start + i; 3403 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; 3404 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,&iscola);CHKERRQ(ierr); 3405 ierr = PetscFree(idx);CHKERRQ(ierr); 3406 } else { 3407 iscola = *col; 3408 } 3409 if (scall != MAT_INITIAL_MATRIX){ 3410 ierr = PetscMalloc(sizeof(Mat),&aloc);CHKERRQ(ierr); 3411 aloc[0] = *A_loc; 3412 } 3413 ierr = MatGetSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr); 3414 *A_loc = aloc[0]; 3415 ierr = PetscFree(aloc);CHKERRQ(ierr); 3416 if (!row){ 3417 ierr = ISDestroy(isrowa);CHKERRQ(ierr); 3418 } 3419 if (!col){ 3420 ierr = ISDestroy(iscola);CHKERRQ(ierr); 3421 } 3422 ierr = PetscLogEventEnd(logkey_getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 3423 PetscFunctionReturn(0); 3424 } 3425 3426 static PetscEvent logkey_GetBrowsOfAcols = 0; 3427 #undef __FUNCT__ 3428 #define __FUNCT__ "MatGetBrowsOfAcols" 3429 /*@C 3430 MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A 3431 3432 Collective on Mat 3433 3434 Input Parameters: 3435 + A,B - the matrices in mpiaij format 3436 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 3437 - rowb, colb - index sets of rows and columns of B to extract (or PETSC_NULL) 3438 3439 Output Parameter: 3440 + rowb, colb - index sets of rows and columns of B to extract 3441 . brstart - row index of B_seq from which next B->m rows are taken from B's local rows 3442 - B_seq - the sequential matrix generated 3443 3444 Level: developer 3445 3446 @*/ 3447 PetscErrorCode PETSCMAT_DLLEXPORT MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,PetscInt *brstart,Mat *B_seq) 3448 { 3449 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data,*b=(Mat_MPIAIJ*)B->data; 3450 PetscErrorCode ierr; 3451 PetscInt *idx,i,start,ncols,nzA,nzB,*cmap,imark; 3452 IS isrowb,iscolb; 3453 Mat *bseq; 3454 3455 PetscFunctionBegin; 3456 if (a->cstart != b->rstart || a->cend != b->rend){ 3457 SETERRQ4(PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",a->cstart,a->cend,b->rstart,b->rend); 3458 } 3459 if (!logkey_GetBrowsOfAcols) { 3460 ierr = PetscLogEventRegister(&logkey_GetBrowsOfAcols,"MatGetBrowsOfAcols",MAT_COOKIE); 3461 } 3462 ierr = PetscLogEventBegin(logkey_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 3463 3464 if (scall == MAT_INITIAL_MATRIX){ 3465 start = a->cstart; 3466 cmap = a->garray; 3467 nzA = a->A->n; 3468 nzB = a->B->n; 3469 ierr = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr); 3470 ncols = 0; 3471 for (i=0; i<nzB; i++) { /* row < local row index */ 3472 if (cmap[i] < start) idx[ncols++] = cmap[i]; 3473 else break; 3474 } 3475 imark = i; 3476 for (i=0; i<nzA; i++) idx[ncols++] = start + i; /* local rows */ 3477 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */ 3478 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,&isrowb);CHKERRQ(ierr); 3479 ierr = PetscFree(idx);CHKERRQ(ierr); 3480 *brstart = imark; 3481 ierr = ISCreateStride(PETSC_COMM_SELF,B->N,0,1,&iscolb);CHKERRQ(ierr); 3482 } else { 3483 if (!rowb || !colb) SETERRQ(PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX"); 3484 isrowb = *rowb; iscolb = *colb; 3485 ierr = PetscMalloc(sizeof(Mat),&bseq);CHKERRQ(ierr); 3486 bseq[0] = *B_seq; 3487 } 3488 ierr = MatGetSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr); 3489 *B_seq = bseq[0]; 3490 ierr = PetscFree(bseq);CHKERRQ(ierr); 3491 if (!rowb){ 3492 ierr = ISDestroy(isrowb);CHKERRQ(ierr); 3493 } else { 3494 *rowb = isrowb; 3495 } 3496 if (!colb){ 3497 ierr = ISDestroy(iscolb);CHKERRQ(ierr); 3498 } else { 3499 *colb = iscolb; 3500 } 3501 ierr = PetscLogEventEnd(logkey_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 3502 PetscFunctionReturn(0); 3503 } 3504 3505 static PetscEvent logkey_GetBrowsOfAocols = 0; 3506 #undef __FUNCT__ 3507 #define __FUNCT__ "MatGetBrowsOfAoCols" 3508 /*@C 3509 MatGetBrowsOfAoCols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns 3510 of the OFF-DIAGONAL portion of local A 3511 3512 Collective on Mat 3513 3514 Input Parameters: 3515 + A,B - the matrices in mpiaij format 3516 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 3517 . startsj - starting point in B's sending and receiving j-arrays, saved for MAT_REUSE (or PETSC_NULL) 3518 - bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or PETSC_NULL) 3519 3520 Output Parameter: 3521 + B_oth - the sequential matrix generated 3522 3523 Level: developer 3524 3525 @*/ 3526 PetscErrorCode PETSCMAT_DLLEXPORT MatGetBrowsOfAoCols(Mat A,Mat B,MatReuse scall,PetscInt **startsj,PetscScalar **bufa_ptr,Mat *B_oth) 3527 { 3528 VecScatter_MPI_General *gen_to,*gen_from; 3529 PetscErrorCode ierr; 3530 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data,*b=(Mat_MPIAIJ*)B->data; 3531 Mat_SeqAIJ *b_oth; 3532 VecScatter ctx=a->Mvctx; 3533 MPI_Comm comm=ctx->comm; 3534 PetscMPIInt *rprocs,*sprocs,tag=ctx->tag,rank; 3535 PetscInt *rowlen,*bufj,*bufJ,ncols,aBn=a->B->n,row,*b_othi,*b_othj; 3536 PetscScalar *rvalues,*svalues,*b_otha,*bufa,*bufA; 3537 PetscInt i,k,l,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len; 3538 MPI_Request *rwaits,*swaits; 3539 MPI_Status *sstatus,rstatus; 3540 PetscInt *cols; 3541 PetscScalar *vals; 3542 PetscMPIInt j; 3543 3544 PetscFunctionBegin; 3545 if (a->cstart != b->rstart || a->cend != b->rend){ 3546 SETERRQ4(PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",a->cstart,a->cend,b->rstart,b->rend); 3547 } 3548 if (!logkey_GetBrowsOfAocols) { 3549 ierr = PetscLogEventRegister(&logkey_GetBrowsOfAocols,"MatGetBrAoCol",MAT_COOKIE); 3550 } 3551 ierr = PetscLogEventBegin(logkey_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 3552 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3553 3554 gen_to = (VecScatter_MPI_General*)ctx->todata; 3555 gen_from = (VecScatter_MPI_General*)ctx->fromdata; 3556 rvalues = gen_from->values; /* holds the length of sending row */ 3557 svalues = gen_to->values; /* holds the length of receiving row */ 3558 nrecvs = gen_from->n; 3559 nsends = gen_to->n; 3560 rwaits = gen_from->requests; 3561 swaits = gen_to->requests; 3562 srow = gen_to->indices; /* local row index to be sent */ 3563 rstarts = gen_from->starts; 3564 sstarts = gen_to->starts; 3565 rprocs = gen_from->procs; 3566 sprocs = gen_to->procs; 3567 sstatus = gen_to->sstatus; 3568 3569 if (!startsj || !bufa_ptr) scall = MAT_INITIAL_MATRIX; 3570 if (scall == MAT_INITIAL_MATRIX){ 3571 /* i-array */ 3572 /*---------*/ 3573 /* post receives */ 3574 for (i=0; i<nrecvs; i++){ 3575 rowlen = (PetscInt*)rvalues + rstarts[i]; 3576 nrows = rstarts[i+1]-rstarts[i]; 3577 ierr = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 3578 } 3579 3580 /* pack the outgoing message */ 3581 ierr = PetscMalloc((nsends+nrecvs+3)*sizeof(PetscInt),&sstartsj);CHKERRQ(ierr); 3582 rstartsj = sstartsj + nsends +1; 3583 sstartsj[0] = 0; rstartsj[0] = 0; 3584 len = 0; /* total length of j or a array to be sent */ 3585 k = 0; 3586 for (i=0; i<nsends; i++){ 3587 rowlen = (PetscInt*)svalues + sstarts[i]; 3588 nrows = sstarts[i+1]-sstarts[i]; /* num of rows */ 3589 for (j=0; j<nrows; j++) { 3590 row = srow[k] + b->rowners[rank]; /* global row idx */ 3591 ierr = MatGetRow_MPIAIJ(B,row,&rowlen[j],PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); /* rowlength */ 3592 len += rowlen[j]; 3593 ierr = MatRestoreRow_MPIAIJ(B,row,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); 3594 k++; 3595 } 3596 ierr = MPI_Isend(rowlen,nrows,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 3597 sstartsj[i+1] = len; /* starting point of (i+1)-th outgoing msg in bufj and bufa */ 3598 } 3599 /* recvs and sends of i-array are completed */ 3600 i = nrecvs; 3601 while (i--) { 3602 ierr = MPI_Waitany(nrecvs,rwaits,&j,&rstatus);CHKERRQ(ierr); 3603 } 3604 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 3605 /* allocate buffers for sending j and a arrays */ 3606 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&bufj);CHKERRQ(ierr); 3607 ierr = PetscMalloc((len+1)*sizeof(PetscScalar),&bufa);CHKERRQ(ierr); 3608 3609 /* create i-array of B_oth */ 3610 ierr = PetscMalloc((aBn+2)*sizeof(PetscInt),&b_othi);CHKERRQ(ierr); 3611 b_othi[0] = 0; 3612 len = 0; /* total length of j or a array to be received */ 3613 k = 0; 3614 for (i=0; i<nrecvs; i++){ 3615 rowlen = (PetscInt*)rvalues + rstarts[i]; 3616 nrows = rstarts[i+1]-rstarts[i]; 3617 for (j=0; j<nrows; j++) { 3618 b_othi[k+1] = b_othi[k] + rowlen[j]; 3619 len += rowlen[j]; k++; 3620 } 3621 rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */ 3622 } 3623 3624 /* allocate space for j and a arrrays of B_oth */ 3625 ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(PetscInt),&b_othj);CHKERRQ(ierr); 3626 ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(PetscScalar),&b_otha);CHKERRQ(ierr); 3627 3628 /* j-array */ 3629 /*---------*/ 3630 /* post receives of j-array */ 3631 for (i=0; i<nrecvs; i++){ 3632 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 3633 ierr = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 3634 } 3635 k = 0; 3636 for (i=0; i<nsends; i++){ 3637 nrows = sstarts[i+1]-sstarts[i]; /* num of rows */ 3638 bufJ = bufj+sstartsj[i]; 3639 for (j=0; j<nrows; j++) { 3640 row = srow[k++] + b->rowners[rank]; /* global row idx */ 3641 ierr = MatGetRow_MPIAIJ(B,row,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr); 3642 for (l=0; l<ncols; l++){ 3643 *bufJ++ = cols[l]; 3644 } 3645 ierr = MatRestoreRow_MPIAIJ(B,row,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr); 3646 } 3647 ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 3648 } 3649 3650 /* recvs and sends of j-array are completed */ 3651 i = nrecvs; 3652 while (i--) { 3653 ierr = MPI_Waitany(nrecvs,rwaits,&j,&rstatus);CHKERRQ(ierr); 3654 } 3655 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 3656 } else if (scall == MAT_REUSE_MATRIX){ 3657 sstartsj = *startsj; 3658 rstartsj = sstartsj + nsends +1; 3659 bufa = *bufa_ptr; 3660 b_oth = (Mat_SeqAIJ*)(*B_oth)->data; 3661 b_otha = b_oth->a; 3662 } else { 3663 SETERRQ(PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container"); 3664 } 3665 3666 /* a-array */ 3667 /*---------*/ 3668 /* post receives of a-array */ 3669 for (i=0; i<nrecvs; i++){ 3670 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 3671 ierr = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 3672 } 3673 k = 0; 3674 for (i=0; i<nsends; i++){ 3675 nrows = sstarts[i+1]-sstarts[i]; 3676 bufA = bufa+sstartsj[i]; 3677 for (j=0; j<nrows; j++) { 3678 row = srow[k++] + b->rowners[rank]; /* global row idx */ 3679 ierr = MatGetRow_MPIAIJ(B,row,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr); 3680 for (l=0; l<ncols; l++){ 3681 *bufA++ = vals[l]; 3682 } 3683 ierr = MatRestoreRow_MPIAIJ(B,row,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr); 3684 3685 } 3686 ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 3687 } 3688 /* recvs and sends of a-array are completed */ 3689 i = nrecvs; 3690 while (i--) { 3691 ierr = MPI_Waitany(nrecvs,rwaits,&j,&rstatus);CHKERRQ(ierr); 3692 } 3693 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 3694 3695 if (scall == MAT_INITIAL_MATRIX){ 3696 /* put together the new matrix */ 3697 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr); 3698 3699 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 3700 /* Since these are PETSc arrays, change flags to free them as necessary. */ 3701 b_oth = (Mat_SeqAIJ *)(*B_oth)->data; 3702 b_oth->freedata = PETSC_TRUE; 3703 b_oth->nonew = 0; 3704 3705 ierr = PetscFree(bufj);CHKERRQ(ierr); 3706 if (!startsj || !bufa_ptr){ 3707 ierr = PetscFree(sstartsj);CHKERRQ(ierr); 3708 ierr = PetscFree(bufa_ptr);CHKERRQ(ierr); 3709 } else { 3710 *startsj = sstartsj; 3711 *bufa_ptr = bufa; 3712 } 3713 } 3714 ierr = PetscLogEventEnd(logkey_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 3715 3716 PetscFunctionReturn(0); 3717 } 3718 3719 /*MC 3720 MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices. 3721 3722 Options Database Keys: 3723 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions() 3724 3725 Level: beginner 3726 3727 .seealso: MatCreateMPIAIJ 3728 M*/ 3729 3730 EXTERN_C_BEGIN 3731 #undef __FUNCT__ 3732 #define __FUNCT__ "MatCreate_MPIAIJ" 3733 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_MPIAIJ(Mat B) 3734 { 3735 Mat_MPIAIJ *b; 3736 PetscErrorCode ierr; 3737 PetscInt i; 3738 PetscMPIInt size; 3739 3740 PetscFunctionBegin; 3741 ierr = MPI_Comm_size(B->comm,&size);CHKERRQ(ierr); 3742 3743 ierr = PetscNew(Mat_MPIAIJ,&b);CHKERRQ(ierr); 3744 B->data = (void*)b; 3745 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 3746 B->factor = 0; 3747 B->bs = 1; 3748 B->assembled = PETSC_FALSE; 3749 B->mapping = 0; 3750 3751 B->insertmode = NOT_SET_VALUES; 3752 b->size = size; 3753 ierr = MPI_Comm_rank(B->comm,&b->rank);CHKERRQ(ierr); 3754 3755 ierr = PetscSplitOwnership(B->comm,&B->m,&B->M);CHKERRQ(ierr); 3756 ierr = PetscSplitOwnership(B->comm,&B->n,&B->N);CHKERRQ(ierr); 3757 3758 /* the information in the maps duplicates the information computed below, eventually 3759 we should remove the duplicate information that is not contained in the maps */ 3760 ierr = PetscMapCreateMPI(B->comm,B->m,B->M,&B->rmap);CHKERRQ(ierr); 3761 ierr = PetscMapCreateMPI(B->comm,B->n,B->N,&B->cmap);CHKERRQ(ierr); 3762 3763 /* build local table of row and column ownerships */ 3764 ierr = PetscMalloc(2*(b->size+2)*sizeof(PetscInt),&b->rowners);CHKERRQ(ierr); 3765 ierr = PetscLogObjectMemory(B,2*(b->size+2)*sizeof(PetscInt)+sizeof(struct _p_Mat)+sizeof(Mat_MPIAIJ));CHKERRQ(ierr); 3766 b->cowners = b->rowners + b->size + 2; 3767 ierr = MPI_Allgather(&B->m,1,MPIU_INT,b->rowners+1,1,MPIU_INT,B->comm);CHKERRQ(ierr); 3768 b->rowners[0] = 0; 3769 for (i=2; i<=b->size; i++) { 3770 b->rowners[i] += b->rowners[i-1]; 3771 } 3772 b->rstart = b->rowners[b->rank]; 3773 b->rend = b->rowners[b->rank+1]; 3774 ierr = MPI_Allgather(&B->n,1,MPIU_INT,b->cowners+1,1,MPIU_INT,B->comm);CHKERRQ(ierr); 3775 b->cowners[0] = 0; 3776 for (i=2; i<=b->size; i++) { 3777 b->cowners[i] += b->cowners[i-1]; 3778 } 3779 b->cstart = b->cowners[b->rank]; 3780 b->cend = b->cowners[b->rank+1]; 3781 3782 /* build cache for off array entries formed */ 3783 ierr = MatStashCreate_Private(B->comm,1,&B->stash);CHKERRQ(ierr); 3784 b->donotstash = PETSC_FALSE; 3785 b->colmap = 0; 3786 b->garray = 0; 3787 b->roworiented = PETSC_TRUE; 3788 3789 /* stuff used for matrix vector multiply */ 3790 b->lvec = PETSC_NULL; 3791 b->Mvctx = PETSC_NULL; 3792 3793 /* stuff for MatGetRow() */ 3794 b->rowindices = 0; 3795 b->rowvalues = 0; 3796 b->getrowactive = PETSC_FALSE; 3797 3798 /* Explicitly create 2 MATSEQAIJ matrices. */ 3799 ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr); 3800 ierr = MatSetSizes(b->A,B->m,B->n,B->m,B->n);CHKERRQ(ierr); 3801 ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr); 3802 ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr); 3803 ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr); 3804 ierr = MatSetSizes(b->B,B->m,B->N,B->m,B->N);CHKERRQ(ierr); 3805 ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr); 3806 ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr); 3807 3808 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 3809 "MatStoreValues_MPIAIJ", 3810 MatStoreValues_MPIAIJ);CHKERRQ(ierr); 3811 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 3812 "MatRetrieveValues_MPIAIJ", 3813 MatRetrieveValues_MPIAIJ);CHKERRQ(ierr); 3814 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C", 3815 "MatGetDiagonalBlock_MPIAIJ", 3816 MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr); 3817 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatIsTranspose_C", 3818 "MatIsTranspose_MPIAIJ", 3819 MatIsTranspose_MPIAIJ);CHKERRQ(ierr); 3820 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocation_C", 3821 "MatMPIAIJSetPreallocation_MPIAIJ", 3822 MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr); 3823 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C", 3824 "MatMPIAIJSetPreallocationCSR_MPIAIJ", 3825 MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr); 3826 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C", 3827 "MatDiagonalScaleLocal_MPIAIJ", 3828 MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr); 3829 PetscFunctionReturn(0); 3830 } 3831 EXTERN_C_END 3832