1 #define PETSCMAT_DLL 2 3 #include "src/mat/impls/aij/mpi/mpiaij.h" 4 #include "src/inline/spops.h" 5 6 /* 7 Local utility routine that creates a mapping from the global column 8 number to the local number in the off-diagonal part of the local 9 storage of the matrix. When PETSC_USE_CTABLE is used this is scalable at 10 a slightly higher hash table cost; without it it is not scalable (each processor 11 has an order N integer array but is fast to acess. 12 */ 13 #undef __FUNCT__ 14 #define __FUNCT__ "CreateColmap_MPIAIJ_Private" 15 PetscErrorCode CreateColmap_MPIAIJ_Private(Mat mat) 16 { 17 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 18 PetscErrorCode ierr; 19 PetscInt n = aij->B->n,i; 20 21 PetscFunctionBegin; 22 #if defined (PETSC_USE_CTABLE) 23 ierr = PetscTableCreate(n,&aij->colmap);CHKERRQ(ierr); 24 for (i=0; i<n; i++){ 25 ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1);CHKERRQ(ierr); 26 } 27 #else 28 ierr = PetscMalloc((mat->N+1)*sizeof(PetscInt),&aij->colmap);CHKERRQ(ierr); 29 ierr = PetscLogObjectMemory(mat,mat->N*sizeof(PetscInt));CHKERRQ(ierr); 30 ierr = PetscMemzero(aij->colmap,mat->N*sizeof(PetscInt));CHKERRQ(ierr); 31 for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1; 32 #endif 33 PetscFunctionReturn(0); 34 } 35 36 37 #define CHUNKSIZE 15 38 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \ 39 { \ 40 if (lastcol1 > col) low1 = 0; else high1 = nrow1; \ 41 lastcol1 = col;\ 42 while (high1-low1 > 5) { \ 43 t = (low1+high1)/2; \ 44 if (rp1[t] > col) high1 = t; \ 45 else low1 = t; \ 46 } \ 47 for (_i=low1; _i<high1; _i++) { \ 48 if (rp1[_i] > col) break; \ 49 if (rp1[_i] == col) { \ 50 if (addv == ADD_VALUES) ap1[_i] += value; \ 51 else ap1[_i] = value; \ 52 goto a_noinsert; \ 53 } \ 54 } \ 55 if (value == 0.0 && ignorezeroentries) goto a_noinsert; \ 56 if (nonew == 1) goto a_noinsert; \ 57 if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 58 MatSeqXAIJReallocateAIJ(a,1,nrow1,row,col,rmax1,aa,ai,aj,am,rp1,ap1,aimax,nonew); \ 59 N = nrow1++ - 1; a->nz++; \ 60 /* shift up all the later entries in this row */ \ 61 for (ii=N; ii>=_i; ii--) { \ 62 rp1[ii+1] = rp1[ii]; \ 63 ap1[ii+1] = ap1[ii]; \ 64 } \ 65 rp1[_i] = col; \ 66 ap1[_i] = value; \ 67 a_noinsert: ; \ 68 ailen[row] = nrow1; \ 69 } 70 71 72 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \ 73 { \ 74 if (lastcol2 > col) low2 = 0; else high2 = nrow2; \ 75 lastcol2 = col;\ 76 while (high2-low2 > 5) { \ 77 t = (low2+high2)/2; \ 78 if (rp2[t] > col) high2 = t; \ 79 else low2 = t; \ 80 } \ 81 for (_i=low2; _i<high2; _i++) { \ 82 if (rp2[_i] > col) break; \ 83 if (rp2[_i] == col) { \ 84 if (addv == ADD_VALUES) ap2[_i] += value; \ 85 else ap2[_i] = value; \ 86 goto b_noinsert; \ 87 } \ 88 } \ 89 if (value == 0.0 && ignorezeroentries) goto b_noinsert; \ 90 if (nonew == 1) goto b_noinsert; \ 91 if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 92 MatSeqXAIJReallocateAIJ(b,1,nrow2,row,col,rmax2,ba,bi,bj,bm,rp2,ap2,bimax,nonew); \ 93 N = nrow2++ - 1; b->nz++; \ 94 /* shift up all the later entries in this row */ \ 95 for (ii=N; ii>=_i; ii--) { \ 96 rp2[ii+1] = rp2[ii]; \ 97 ap2[ii+1] = ap2[ii]; \ 98 } \ 99 rp2[_i] = col; \ 100 ap2[_i] = value; \ 101 b_noinsert: ; \ 102 bilen[row] = nrow2; \ 103 } 104 105 #undef __FUNCT__ 106 #define __FUNCT__ "MatSetValues_MPIAIJ" 107 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 108 { 109 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 110 PetscScalar value; 111 PetscErrorCode ierr; 112 PetscInt i,j,rstart = aij->rstart,rend = aij->rend; 113 PetscInt cstart = aij->cstart,cend = aij->cend,row,col; 114 PetscTruth roworiented = aij->roworiented; 115 116 /* Some Variables required in the macro */ 117 Mat A = aij->A; 118 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 119 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 120 PetscScalar *aa = a->a; 121 PetscTruth ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES))?PETSC_TRUE:PETSC_FALSE); 122 Mat B = aij->B; 123 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 124 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->m,am = aij->A->m; 125 PetscScalar *ba = b->a; 126 127 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 128 PetscInt nonew = a->nonew; 129 PetscScalar *ap1,*ap2; 130 131 PetscFunctionBegin; 132 for (i=0; i<m; i++) { 133 if (im[i] < 0) continue; 134 #if defined(PETSC_USE_DEBUG) 135 if (im[i] >= mat->M) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->M-1); 136 #endif 137 if (im[i] >= rstart && im[i] < rend) { 138 row = im[i] - rstart; 139 lastcol1 = -1; 140 rp1 = aj + ai[row]; 141 ap1 = aa + ai[row]; 142 rmax1 = aimax[row]; 143 nrow1 = ailen[row]; 144 low1 = 0; 145 high1 = nrow1; 146 lastcol2 = -1; 147 rp2 = bj + bi[row]; 148 ap2 = ba + bi[row]; 149 rmax2 = bimax[row]; 150 nrow2 = bilen[row]; 151 low2 = 0; 152 high2 = nrow2; 153 154 for (j=0; j<n; j++) { 155 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 156 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue; 157 if (in[j] >= cstart && in[j] < cend){ 158 col = in[j] - cstart; 159 MatSetValues_SeqAIJ_A_Private(row,col,value,addv); 160 } else if (in[j] < 0) continue; 161 #if defined(PETSC_USE_DEBUG) 162 else if (in[j] >= mat->N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->N-1);} 163 #endif 164 else { 165 if (mat->was_assembled) { 166 if (!aij->colmap) { 167 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 168 } 169 #if defined (PETSC_USE_CTABLE) 170 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 171 col--; 172 #else 173 col = aij->colmap[in[j]] - 1; 174 #endif 175 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 176 ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 177 col = in[j]; 178 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 179 B = aij->B; 180 b = (Mat_SeqAIJ*)B->data; 181 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; 182 rp2 = bj + bi[row]; 183 ap2 = ba + bi[row]; 184 rmax2 = bimax[row]; 185 nrow2 = bilen[row]; 186 low2 = 0; 187 high2 = nrow2; 188 bm = aij->B->m; 189 ba = b->a; 190 } 191 } else col = in[j]; 192 MatSetValues_SeqAIJ_B_Private(row,col,value,addv); 193 } 194 } 195 } else { 196 if (!aij->donotstash) { 197 if (roworiented) { 198 if (ignorezeroentries && v[i*n] == 0.0) continue; 199 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr); 200 } else { 201 if (ignorezeroentries && v[i] == 0.0) continue; 202 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr); 203 } 204 } 205 } 206 } 207 PetscFunctionReturn(0); 208 } 209 210 211 #undef __FUNCT__ 212 #define __FUNCT__ "MatGetValues_MPIAIJ" 213 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 214 { 215 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 216 PetscErrorCode ierr; 217 PetscInt i,j,rstart = aij->rstart,rend = aij->rend; 218 PetscInt cstart = aij->cstart,cend = aij->cend,row,col; 219 220 PetscFunctionBegin; 221 for (i=0; i<m; i++) { 222 if (idxm[i] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]); 223 if (idxm[i] >= mat->M) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->M-1); 224 if (idxm[i] >= rstart && idxm[i] < rend) { 225 row = idxm[i] - rstart; 226 for (j=0; j<n; j++) { 227 if (idxn[j] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); 228 if (idxn[j] >= mat->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->N-1); 229 if (idxn[j] >= cstart && idxn[j] < cend){ 230 col = idxn[j] - cstart; 231 ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 232 } else { 233 if (!aij->colmap) { 234 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 235 } 236 #if defined (PETSC_USE_CTABLE) 237 ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr); 238 col --; 239 #else 240 col = aij->colmap[idxn[j]] - 1; 241 #endif 242 if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0; 243 else { 244 ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 245 } 246 } 247 } 248 } else { 249 SETERRQ(PETSC_ERR_SUP,"Only local values currently supported"); 250 } 251 } 252 PetscFunctionReturn(0); 253 } 254 255 #undef __FUNCT__ 256 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ" 257 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode) 258 { 259 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 260 PetscErrorCode ierr; 261 PetscInt nstash,reallocs; 262 InsertMode addv; 263 264 PetscFunctionBegin; 265 if (aij->donotstash) { 266 PetscFunctionReturn(0); 267 } 268 269 /* make sure all processors are either in INSERTMODE or ADDMODE */ 270 ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,mat->comm);CHKERRQ(ierr); 271 if (addv == (ADD_VALUES|INSERT_VALUES)) { 272 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added"); 273 } 274 mat->insertmode = addv; /* in case this processor had no cache */ 275 276 ierr = MatStashScatterBegin_Private(&mat->stash,aij->rowners);CHKERRQ(ierr); 277 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 278 ierr = PetscLogInfo((aij->A,"MatAssemblyBegin_MPIAIJ:Stash has %D entries, uses %D mallocs.\n",nstash,reallocs));CHKERRQ(ierr); 279 PetscFunctionReturn(0); 280 } 281 282 #undef __FUNCT__ 283 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ" 284 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode) 285 { 286 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 287 Mat_SeqAIJ *a=(Mat_SeqAIJ *)aij->A->data,*b= (Mat_SeqAIJ *)aij->B->data; 288 PetscErrorCode ierr; 289 PetscMPIInt n; 290 PetscInt i,j,rstart,ncols,flg; 291 PetscInt *row,*col,other_disassembled; 292 PetscScalar *val; 293 InsertMode addv = mat->insertmode; 294 295 PetscFunctionBegin; 296 if (!aij->donotstash) { 297 while (1) { 298 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 299 if (!flg) break; 300 301 for (i=0; i<n;) { 302 /* Now identify the consecutive vals belonging to the same row */ 303 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 304 if (j < n) ncols = j-i; 305 else ncols = n-i; 306 /* Now assemble all these values with a single function call */ 307 ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 308 i = j; 309 } 310 } 311 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 312 } 313 a->compressedrow.use = PETSC_FALSE; 314 ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr); 315 ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr); 316 317 /* determine if any processor has disassembled, if so we must 318 also disassemble ourselfs, in order that we may reassemble. */ 319 /* 320 if nonzero structure of submatrix B cannot change then we know that 321 no processor disassembled thus we can skip this stuff 322 */ 323 if (!((Mat_SeqAIJ*)aij->B->data)->nonew) { 324 ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr); 325 if (mat->was_assembled && !other_disassembled) { 326 ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 327 } 328 } 329 /* reaccess the b because aij->B was changed in MatSetValues() or DisAssemble() */ 330 b = (Mat_SeqAIJ *)aij->B->data; 331 332 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 333 ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr); 334 } 335 ierr = MatSetOption(aij->B,MAT_DO_NOT_USE_INODES);CHKERRQ(ierr); 336 b->compressedrow.use = PETSC_TRUE; 337 ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr); 338 ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr); 339 340 if (aij->rowvalues) { 341 ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr); 342 aij->rowvalues = 0; 343 } 344 345 /* used by MatAXPY() */ 346 a->xtoy = 0; b->xtoy = 0; 347 a->XtoY = 0; b->XtoY = 0; 348 349 PetscFunctionReturn(0); 350 } 351 352 #undef __FUNCT__ 353 #define __FUNCT__ "MatZeroEntries_MPIAIJ" 354 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A) 355 { 356 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 357 PetscErrorCode ierr; 358 359 PetscFunctionBegin; 360 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 361 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 362 PetscFunctionReturn(0); 363 } 364 365 #undef __FUNCT__ 366 #define __FUNCT__ "MatZeroRows_MPIAIJ" 367 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,IS is,const PetscScalar *diag) 368 { 369 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 370 PetscErrorCode ierr; 371 PetscMPIInt size = l->size,imdex,n,rank = l->rank,tag = A->tag,lastidx = -1; 372 PetscInt i,N,*rows,*owners = l->rowners; 373 PetscInt *nprocs,j,idx,nsends,row; 374 PetscInt nmax,*svalues,*starts,*owner,nrecvs; 375 PetscInt *rvalues,count,base,slen,*source; 376 PetscInt *lens,*lrows,*values,rstart=l->rstart; 377 MPI_Comm comm = A->comm; 378 MPI_Request *send_waits,*recv_waits; 379 MPI_Status recv_status,*send_status; 380 IS istmp; 381 #if defined(PETSC_DEBUG) 382 PetscTruth found = PETSC_FALSE; 383 #endif 384 385 PetscFunctionBegin; 386 ierr = ISGetLocalSize(is,&N);CHKERRQ(ierr); 387 ierr = ISGetIndices(is,&rows);CHKERRQ(ierr); 388 389 /* first count number of contributors to each processor */ 390 ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr); 391 ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr); 392 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/ 393 j = 0; 394 for (i=0; i<N; i++) { 395 if (lastidx > (idx = rows[i])) j = 0; 396 lastidx = idx; 397 for (; j<size; j++) { 398 if (idx >= owners[j] && idx < owners[j+1]) { 399 nprocs[2*j]++; 400 nprocs[2*j+1] = 1; 401 owner[i] = j; 402 #if defined(PETSC_DEBUG) 403 found = PETSC_TRUE; 404 #endif 405 break; 406 } 407 } 408 #if defined(PETSC_DEBUG) 409 if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range"); 410 found = PETSC_FALSE; 411 #endif 412 } 413 nsends = 0; for (i=0; i<size; i++) { nsends += nprocs[2*i+1];} 414 415 /* inform other processors of number of messages and max length*/ 416 ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr); 417 418 /* post receives: */ 419 ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr); 420 ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr); 421 for (i=0; i<nrecvs; i++) { 422 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 423 } 424 425 /* do sends: 426 1) starts[i] gives the starting index in svalues for stuff going to 427 the ith processor 428 */ 429 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr); 430 ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr); 431 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr); 432 starts[0] = 0; 433 for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 434 for (i=0; i<N; i++) { 435 svalues[starts[owner[i]]++] = rows[i]; 436 } 437 ierr = ISRestoreIndices(is,&rows);CHKERRQ(ierr); 438 439 starts[0] = 0; 440 for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 441 count = 0; 442 for (i=0; i<size; i++) { 443 if (nprocs[2*i+1]) { 444 ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 445 } 446 } 447 ierr = PetscFree(starts);CHKERRQ(ierr); 448 449 base = owners[rank]; 450 451 /* wait on receives */ 452 ierr = PetscMalloc(2*(nrecvs+1)*sizeof(PetscInt),&lens);CHKERRQ(ierr); 453 source = lens + nrecvs; 454 count = nrecvs; slen = 0; 455 while (count) { 456 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 457 /* unpack receives into our local space */ 458 ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr); 459 source[imdex] = recv_status.MPI_SOURCE; 460 lens[imdex] = n; 461 slen += n; 462 count--; 463 } 464 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 465 466 /* move the data into the send scatter */ 467 ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr); 468 count = 0; 469 for (i=0; i<nrecvs; i++) { 470 values = rvalues + i*nmax; 471 for (j=0; j<lens[i]; j++) { 472 lrows[count++] = values[j] - base; 473 } 474 } 475 ierr = PetscFree(rvalues);CHKERRQ(ierr); 476 ierr = PetscFree(lens);CHKERRQ(ierr); 477 ierr = PetscFree(owner);CHKERRQ(ierr); 478 ierr = PetscFree(nprocs);CHKERRQ(ierr); 479 480 /* actually zap the local rows */ 481 ierr = ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr); 482 ierr = PetscLogObjectParent(A,istmp);CHKERRQ(ierr); 483 484 /* 485 Zero the required rows. If the "diagonal block" of the matrix 486 is square and the user wishes to set the diagonal we use seperate 487 code so that MatSetValues() is not called for each diagonal allocating 488 new memory, thus calling lots of mallocs and slowing things down. 489 490 Contributed by: Mathew Knepley 491 */ 492 /* must zero l->B before l->A because the (diag) case below may put values into l->B*/ 493 ierr = MatZeroRows(l->B,istmp,0);CHKERRQ(ierr); 494 if (diag && (l->A->M == l->A->N)) { 495 ierr = MatZeroRows(l->A,istmp,diag);CHKERRQ(ierr); 496 } else if (diag) { 497 ierr = MatZeroRows(l->A,istmp,0);CHKERRQ(ierr); 498 if (((Mat_SeqAIJ*)l->A->data)->nonew) { 499 SETERRQ(PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\n\ 500 MAT_NO_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR"); 501 } 502 for (i = 0; i < slen; i++) { 503 row = lrows[i] + rstart; 504 ierr = MatSetValues(A,1,&row,1,&row,diag,INSERT_VALUES);CHKERRQ(ierr); 505 } 506 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 507 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 508 } else { 509 ierr = MatZeroRows(l->A,istmp,0);CHKERRQ(ierr); 510 } 511 ierr = ISDestroy(istmp);CHKERRQ(ierr); 512 ierr = PetscFree(lrows);CHKERRQ(ierr); 513 514 /* wait on sends */ 515 if (nsends) { 516 ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr); 517 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 518 ierr = PetscFree(send_status);CHKERRQ(ierr); 519 } 520 ierr = PetscFree(send_waits);CHKERRQ(ierr); 521 ierr = PetscFree(svalues);CHKERRQ(ierr); 522 523 PetscFunctionReturn(0); 524 } 525 526 #undef __FUNCT__ 527 #define __FUNCT__ "MatMult_MPIAIJ" 528 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy) 529 { 530 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 531 PetscErrorCode ierr; 532 PetscInt nt; 533 534 PetscFunctionBegin; 535 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 536 if (nt != A->n) { 537 SETERRQ2(PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->n,nt); 538 } 539 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 540 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 541 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 542 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 543 PetscFunctionReturn(0); 544 } 545 546 #undef __FUNCT__ 547 #define __FUNCT__ "MatMultAdd_MPIAIJ" 548 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 549 { 550 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 551 PetscErrorCode ierr; 552 553 PetscFunctionBegin; 554 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 555 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 556 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 557 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 558 PetscFunctionReturn(0); 559 } 560 561 #undef __FUNCT__ 562 #define __FUNCT__ "MatMultTranspose_MPIAIJ" 563 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy) 564 { 565 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 566 PetscErrorCode ierr; 567 PetscTruth merged; 568 569 PetscFunctionBegin; 570 ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr); 571 /* do nondiagonal part */ 572 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 573 if (!merged) { 574 /* send it on its way */ 575 ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 576 /* do local part */ 577 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 578 /* receive remote parts: note this assumes the values are not actually */ 579 /* added in yy until the next line, */ 580 ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 581 } else { 582 /* do local part */ 583 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 584 /* send it on its way */ 585 ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 586 /* values actually were received in the Begin() but we need to call this nop */ 587 ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 588 } 589 PetscFunctionReturn(0); 590 } 591 592 EXTERN_C_BEGIN 593 #undef __FUNCT__ 594 #define __FUNCT__ "MatIsTranspose_MPIAIJ" 595 PetscErrorCode PETSCMAT_DLLEXPORT MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscTruth tol,PetscTruth *f) 596 { 597 MPI_Comm comm; 598 Mat_MPIAIJ *Aij = (Mat_MPIAIJ *) Amat->data, *Bij; 599 Mat Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs; 600 IS Me,Notme; 601 PetscErrorCode ierr; 602 PetscInt M,N,first,last,*notme,i; 603 PetscMPIInt size; 604 605 PetscFunctionBegin; 606 607 /* Easy test: symmetric diagonal block */ 608 Bij = (Mat_MPIAIJ *) Bmat->data; Bdia = Bij->A; 609 ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr); 610 if (!*f) PetscFunctionReturn(0); 611 ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr); 612 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 613 if (size == 1) PetscFunctionReturn(0); 614 615 /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */ 616 ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr); 617 ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr); 618 ierr = PetscMalloc((N-last+first)*sizeof(PetscInt),¬me);CHKERRQ(ierr); 619 for (i=0; i<first; i++) notme[i] = i; 620 for (i=last; i<M; i++) notme[i-last+first] = i; 621 ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,&Notme);CHKERRQ(ierr); 622 ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr); 623 ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr); 624 Aoff = Aoffs[0]; 625 ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr); 626 Boff = Boffs[0]; 627 ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr); 628 ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr); 629 ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr); 630 ierr = ISDestroy(Me);CHKERRQ(ierr); 631 ierr = ISDestroy(Notme);CHKERRQ(ierr); 632 633 PetscFunctionReturn(0); 634 } 635 EXTERN_C_END 636 637 #undef __FUNCT__ 638 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ" 639 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 640 { 641 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 642 PetscErrorCode ierr; 643 644 PetscFunctionBegin; 645 /* do nondiagonal part */ 646 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 647 /* send it on its way */ 648 ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 649 /* do local part */ 650 ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 651 /* receive remote parts */ 652 ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 653 PetscFunctionReturn(0); 654 } 655 656 /* 657 This only works correctly for square matrices where the subblock A->A is the 658 diagonal block 659 */ 660 #undef __FUNCT__ 661 #define __FUNCT__ "MatGetDiagonal_MPIAIJ" 662 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v) 663 { 664 PetscErrorCode ierr; 665 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 666 667 PetscFunctionBegin; 668 if (A->M != A->N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); 669 if (a->rstart != a->cstart || a->rend != a->cend) { 670 SETERRQ(PETSC_ERR_ARG_SIZ,"row partition must equal col partition"); 671 } 672 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 673 PetscFunctionReturn(0); 674 } 675 676 #undef __FUNCT__ 677 #define __FUNCT__ "MatScale_MPIAIJ" 678 PetscErrorCode MatScale_MPIAIJ(const PetscScalar aa[],Mat A) 679 { 680 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 681 PetscErrorCode ierr; 682 683 PetscFunctionBegin; 684 ierr = MatScale(aa,a->A);CHKERRQ(ierr); 685 ierr = MatScale(aa,a->B);CHKERRQ(ierr); 686 PetscFunctionReturn(0); 687 } 688 689 #undef __FUNCT__ 690 #define __FUNCT__ "MatDestroy_MPIAIJ" 691 PetscErrorCode MatDestroy_MPIAIJ(Mat mat) 692 { 693 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 694 PetscErrorCode ierr; 695 696 PetscFunctionBegin; 697 #if defined(PETSC_USE_LOG) 698 PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->M,mat->N); 699 #endif 700 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 701 ierr = PetscFree(aij->rowners);CHKERRQ(ierr); 702 ierr = MatDestroy(aij->A);CHKERRQ(ierr); 703 ierr = MatDestroy(aij->B);CHKERRQ(ierr); 704 #if defined (PETSC_USE_CTABLE) 705 if (aij->colmap) {ierr = PetscTableDelete(aij->colmap);CHKERRQ(ierr);} 706 #else 707 if (aij->colmap) {ierr = PetscFree(aij->colmap);CHKERRQ(ierr);} 708 #endif 709 if (aij->garray) {ierr = PetscFree(aij->garray);CHKERRQ(ierr);} 710 if (aij->lvec) {ierr = VecDestroy(aij->lvec);CHKERRQ(ierr);} 711 if (aij->Mvctx) {ierr = VecScatterDestroy(aij->Mvctx);CHKERRQ(ierr);} 712 if (aij->rowvalues) {ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr);} 713 ierr = PetscFree(aij);CHKERRQ(ierr); 714 715 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr); 716 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr); 717 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr); 718 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C","",PETSC_NULL);CHKERRQ(ierr); 719 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr); 720 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr); 721 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C","",PETSC_NULL);CHKERRQ(ierr); 722 PetscFunctionReturn(0); 723 } 724 725 #undef __FUNCT__ 726 #define __FUNCT__ "MatView_MPIAIJ_Binary" 727 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer) 728 { 729 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 730 Mat_SeqAIJ* A = (Mat_SeqAIJ*)aij->A->data; 731 Mat_SeqAIJ* B = (Mat_SeqAIJ*)aij->B->data; 732 PetscErrorCode ierr; 733 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 734 int fd; 735 PetscInt nz,header[4],*row_lengths,*range,rlen,i; 736 PetscInt nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = aij->cstart,rnz; 737 PetscScalar *column_values; 738 739 PetscFunctionBegin; 740 ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr); 741 ierr = MPI_Comm_size(mat->comm,&size);CHKERRQ(ierr); 742 nz = A->nz + B->nz; 743 if (!rank) { 744 header[0] = MAT_FILE_COOKIE; 745 header[1] = mat->M; 746 header[2] = mat->N; 747 ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,mat->comm);CHKERRQ(ierr); 748 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 749 ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 750 /* get largest number of rows any processor has */ 751 rlen = mat->m; 752 ierr = PetscMapGetGlobalRange(mat->rmap,&range);CHKERRQ(ierr); 753 for (i=1; i<size; i++) { 754 rlen = PetscMax(rlen,range[i+1] - range[i]); 755 } 756 } else { 757 ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,mat->comm);CHKERRQ(ierr); 758 rlen = mat->m; 759 } 760 761 /* load up the local row counts */ 762 ierr = PetscMalloc((rlen+1)*sizeof(PetscInt),&row_lengths);CHKERRQ(ierr); 763 for (i=0; i<mat->m; i++) { 764 row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i]; 765 } 766 767 /* store the row lengths to the file */ 768 if (!rank) { 769 MPI_Status status; 770 ierr = PetscBinaryWrite(fd,row_lengths,mat->m,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 771 for (i=1; i<size; i++) { 772 rlen = range[i+1] - range[i]; 773 ierr = MPI_Recv(row_lengths,rlen,MPIU_INT,i,tag,mat->comm,&status);CHKERRQ(ierr); 774 ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 775 } 776 } else { 777 ierr = MPI_Send(row_lengths,mat->m,MPIU_INT,0,tag,mat->comm);CHKERRQ(ierr); 778 } 779 ierr = PetscFree(row_lengths);CHKERRQ(ierr); 780 781 /* load up the local column indices */ 782 nzmax = nz; /* )th processor needs space a largest processor needs */ 783 ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,mat->comm);CHKERRQ(ierr); 784 ierr = PetscMalloc((nzmax+1)*sizeof(PetscInt),&column_indices);CHKERRQ(ierr); 785 cnt = 0; 786 for (i=0; i<mat->m; i++) { 787 for (j=B->i[i]; j<B->i[i+1]; j++) { 788 if ( (col = garray[B->j[j]]) > cstart) break; 789 column_indices[cnt++] = col; 790 } 791 for (k=A->i[i]; k<A->i[i+1]; k++) { 792 column_indices[cnt++] = A->j[k] + cstart; 793 } 794 for (; j<B->i[i+1]; j++) { 795 column_indices[cnt++] = garray[B->j[j]]; 796 } 797 } 798 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 799 800 /* store the column indices to the file */ 801 if (!rank) { 802 MPI_Status status; 803 ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 804 for (i=1; i<size; i++) { 805 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,mat->comm,&status);CHKERRQ(ierr); 806 if (rnz > nzmax) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 807 ierr = MPI_Recv(column_indices,rnz,MPIU_INT,i,tag,mat->comm,&status);CHKERRQ(ierr); 808 ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 809 } 810 } else { 811 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,mat->comm);CHKERRQ(ierr); 812 ierr = MPI_Send(column_indices,nz,MPIU_INT,0,tag,mat->comm);CHKERRQ(ierr); 813 } 814 ierr = PetscFree(column_indices);CHKERRQ(ierr); 815 816 /* load up the local column values */ 817 ierr = PetscMalloc((nzmax+1)*sizeof(PetscScalar),&column_values);CHKERRQ(ierr); 818 cnt = 0; 819 for (i=0; i<mat->m; i++) { 820 for (j=B->i[i]; j<B->i[i+1]; j++) { 821 if ( garray[B->j[j]] > cstart) break; 822 column_values[cnt++] = B->a[j]; 823 } 824 for (k=A->i[i]; k<A->i[i+1]; k++) { 825 column_values[cnt++] = A->a[k]; 826 } 827 for (; j<B->i[i+1]; j++) { 828 column_values[cnt++] = B->a[j]; 829 } 830 } 831 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 832 833 /* store the column values to the file */ 834 if (!rank) { 835 MPI_Status status; 836 ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 837 for (i=1; i<size; i++) { 838 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,mat->comm,&status);CHKERRQ(ierr); 839 if (rnz > nzmax) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 840 ierr = MPI_Recv(column_values,rnz,MPIU_SCALAR,i,tag,mat->comm,&status);CHKERRQ(ierr); 841 ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 842 } 843 } else { 844 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,mat->comm);CHKERRQ(ierr); 845 ierr = MPI_Send(column_values,nz,MPIU_SCALAR,0,tag,mat->comm);CHKERRQ(ierr); 846 } 847 ierr = PetscFree(column_values);CHKERRQ(ierr); 848 PetscFunctionReturn(0); 849 } 850 851 #undef __FUNCT__ 852 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket" 853 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 854 { 855 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 856 PetscErrorCode ierr; 857 PetscMPIInt rank = aij->rank,size = aij->size; 858 PetscTruth isdraw,iascii,isbinary; 859 PetscViewer sviewer; 860 PetscViewerFormat format; 861 862 PetscFunctionBegin; 863 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 864 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 865 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr); 866 if (iascii) { 867 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 868 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 869 MatInfo info; 870 PetscTruth inodes; 871 872 ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr); 873 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 874 ierr = MatInodeGetInodeSizes(aij->A,PETSC_NULL,(PetscInt **)&inodes,PETSC_NULL);CHKERRQ(ierr); 875 if (!inodes) { 876 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, not using I-node routines\n", 877 rank,mat->m,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 878 } else { 879 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, using I-node routines\n", 880 rank,mat->m,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 881 } 882 ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 883 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 884 ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 885 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 886 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 887 ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr); 888 PetscFunctionReturn(0); 889 } else if (format == PETSC_VIEWER_ASCII_INFO) { 890 PetscInt inodecount,inodelimit,*inodes; 891 ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr); 892 if (inodes) { 893 ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr); 894 } else { 895 ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr); 896 } 897 PetscFunctionReturn(0); 898 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 899 PetscFunctionReturn(0); 900 } 901 } else if (isbinary) { 902 if (size == 1) { 903 ierr = PetscObjectSetName((PetscObject)aij->A,mat->name);CHKERRQ(ierr); 904 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 905 } else { 906 ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr); 907 } 908 PetscFunctionReturn(0); 909 } else if (isdraw) { 910 PetscDraw draw; 911 PetscTruth isnull; 912 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 913 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 914 } 915 916 if (size == 1) { 917 ierr = PetscObjectSetName((PetscObject)aij->A,mat->name);CHKERRQ(ierr); 918 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 919 } else { 920 /* assemble the entire matrix onto first processor. */ 921 Mat A; 922 Mat_SeqAIJ *Aloc; 923 PetscInt M = mat->M,N = mat->N,m,*ai,*aj,row,*cols,i,*ct; 924 PetscScalar *a; 925 926 if (!rank) { 927 ierr = MatCreate(mat->comm,M,N,M,N,&A);CHKERRQ(ierr); 928 } else { 929 ierr = MatCreate(mat->comm,0,0,M,N,&A);CHKERRQ(ierr); 930 } 931 /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */ 932 ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr); 933 ierr = MatMPIAIJSetPreallocation(A,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr); 934 ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr); 935 936 /* copy over the A part */ 937 Aloc = (Mat_SeqAIJ*)aij->A->data; 938 m = aij->A->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 939 row = aij->rstart; 940 for (i=0; i<ai[m]; i++) {aj[i] += aij->cstart ;} 941 for (i=0; i<m; i++) { 942 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr); 943 row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 944 } 945 aj = Aloc->j; 946 for (i=0; i<ai[m]; i++) {aj[i] -= aij->cstart;} 947 948 /* copy over the B part */ 949 Aloc = (Mat_SeqAIJ*)aij->B->data; 950 m = aij->B->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 951 row = aij->rstart; 952 ierr = PetscMalloc((ai[m]+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr); 953 ct = cols; 954 for (i=0; i<ai[m]; i++) {cols[i] = aij->garray[aj[i]];} 955 for (i=0; i<m; i++) { 956 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr); 957 row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 958 } 959 ierr = PetscFree(ct);CHKERRQ(ierr); 960 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 961 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 962 /* 963 Everyone has to call to draw the matrix since the graphics waits are 964 synchronized across all processors that share the PetscDraw object 965 */ 966 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 967 if (!rank) { 968 ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,mat->name);CHKERRQ(ierr); 969 ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 970 } 971 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 972 ierr = MatDestroy(A);CHKERRQ(ierr); 973 } 974 PetscFunctionReturn(0); 975 } 976 977 #undef __FUNCT__ 978 #define __FUNCT__ "MatView_MPIAIJ" 979 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer) 980 { 981 PetscErrorCode ierr; 982 PetscTruth iascii,isdraw,issocket,isbinary; 983 984 PetscFunctionBegin; 985 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 986 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 987 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr); 988 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr); 989 if (iascii || isdraw || isbinary || issocket) { 990 ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 991 } else { 992 SETERRQ1(PETSC_ERR_SUP,"Viewer type %s not supported by MPIAIJ matrices",((PetscObject)viewer)->type_name); 993 } 994 PetscFunctionReturn(0); 995 } 996 997 998 999 #undef __FUNCT__ 1000 #define __FUNCT__ "MatRelax_MPIAIJ" 1001 PetscErrorCode MatRelax_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 1002 { 1003 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1004 PetscErrorCode ierr; 1005 Vec bb1; 1006 PetscScalar mone=-1.0; 1007 1008 PetscFunctionBegin; 1009 if (its <= 0 || lits <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 1010 1011 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 1012 1013 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){ 1014 if (flag & SOR_ZERO_INITIAL_GUESS) { 1015 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr); 1016 its--; 1017 } 1018 1019 while (its--) { 1020 ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1021 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1022 1023 /* update rhs: bb1 = bb - B*x */ 1024 ierr = VecScale(&mone,mat->lvec);CHKERRQ(ierr); 1025 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1026 1027 /* local sweep */ 1028 ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx); 1029 CHKERRQ(ierr); 1030 } 1031 } else if (flag & SOR_LOCAL_FORWARD_SWEEP){ 1032 if (flag & SOR_ZERO_INITIAL_GUESS) { 1033 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr); 1034 its--; 1035 } 1036 while (its--) { 1037 ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1038 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1039 1040 /* update rhs: bb1 = bb - B*x */ 1041 ierr = VecScale(&mone,mat->lvec);CHKERRQ(ierr); 1042 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1043 1044 /* local sweep */ 1045 ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,PETSC_NULL,xx); 1046 CHKERRQ(ierr); 1047 } 1048 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){ 1049 if (flag & SOR_ZERO_INITIAL_GUESS) { 1050 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr); 1051 its--; 1052 } 1053 while (its--) { 1054 ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1055 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1056 1057 /* update rhs: bb1 = bb - B*x */ 1058 ierr = VecScale(&mone,mat->lvec);CHKERRQ(ierr); 1059 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1060 1061 /* local sweep */ 1062 ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,PETSC_NULL,xx); 1063 CHKERRQ(ierr); 1064 } 1065 } else { 1066 SETERRQ(PETSC_ERR_SUP,"Parallel SOR not supported"); 1067 } 1068 1069 ierr = VecDestroy(bb1);CHKERRQ(ierr); 1070 PetscFunctionReturn(0); 1071 } 1072 1073 #undef __FUNCT__ 1074 #define __FUNCT__ "MatGetInfo_MPIAIJ" 1075 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1076 { 1077 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1078 Mat A = mat->A,B = mat->B; 1079 PetscErrorCode ierr; 1080 PetscReal isend[5],irecv[5]; 1081 1082 PetscFunctionBegin; 1083 info->block_size = 1.0; 1084 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1085 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1086 isend[3] = info->memory; isend[4] = info->mallocs; 1087 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1088 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1089 isend[3] += info->memory; isend[4] += info->mallocs; 1090 if (flag == MAT_LOCAL) { 1091 info->nz_used = isend[0]; 1092 info->nz_allocated = isend[1]; 1093 info->nz_unneeded = isend[2]; 1094 info->memory = isend[3]; 1095 info->mallocs = isend[4]; 1096 } else if (flag == MAT_GLOBAL_MAX) { 1097 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,matin->comm);CHKERRQ(ierr); 1098 info->nz_used = irecv[0]; 1099 info->nz_allocated = irecv[1]; 1100 info->nz_unneeded = irecv[2]; 1101 info->memory = irecv[3]; 1102 info->mallocs = irecv[4]; 1103 } else if (flag == MAT_GLOBAL_SUM) { 1104 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,matin->comm);CHKERRQ(ierr); 1105 info->nz_used = irecv[0]; 1106 info->nz_allocated = irecv[1]; 1107 info->nz_unneeded = irecv[2]; 1108 info->memory = irecv[3]; 1109 info->mallocs = irecv[4]; 1110 } 1111 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1112 info->fill_ratio_needed = 0; 1113 info->factor_mallocs = 0; 1114 info->rows_global = (double)matin->M; 1115 info->columns_global = (double)matin->N; 1116 info->rows_local = (double)matin->m; 1117 info->columns_local = (double)matin->N; 1118 1119 PetscFunctionReturn(0); 1120 } 1121 1122 #undef __FUNCT__ 1123 #define __FUNCT__ "MatSetOption_MPIAIJ" 1124 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op) 1125 { 1126 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1127 PetscErrorCode ierr; 1128 1129 PetscFunctionBegin; 1130 switch (op) { 1131 case MAT_NO_NEW_NONZERO_LOCATIONS: 1132 case MAT_YES_NEW_NONZERO_LOCATIONS: 1133 case MAT_COLUMNS_UNSORTED: 1134 case MAT_COLUMNS_SORTED: 1135 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1136 case MAT_KEEP_ZEROED_ROWS: 1137 case MAT_NEW_NONZERO_LOCATION_ERR: 1138 case MAT_USE_INODES: 1139 case MAT_DO_NOT_USE_INODES: 1140 case MAT_IGNORE_ZERO_ENTRIES: 1141 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1142 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1143 break; 1144 case MAT_ROW_ORIENTED: 1145 a->roworiented = PETSC_TRUE; 1146 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1147 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1148 break; 1149 case MAT_ROWS_SORTED: 1150 case MAT_ROWS_UNSORTED: 1151 case MAT_YES_NEW_DIAGONALS: 1152 ierr = PetscLogInfo((A,"MatSetOption_MPIAIJ:Option ignored\n"));CHKERRQ(ierr); 1153 break; 1154 case MAT_COLUMN_ORIENTED: 1155 a->roworiented = PETSC_FALSE; 1156 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1157 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1158 break; 1159 case MAT_IGNORE_OFF_PROC_ENTRIES: 1160 a->donotstash = PETSC_TRUE; 1161 break; 1162 case MAT_NO_NEW_DIAGONALS: 1163 SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS"); 1164 case MAT_SYMMETRIC: 1165 case MAT_STRUCTURALLY_SYMMETRIC: 1166 case MAT_HERMITIAN: 1167 case MAT_SYMMETRY_ETERNAL: 1168 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1169 break; 1170 case MAT_NOT_SYMMETRIC: 1171 case MAT_NOT_STRUCTURALLY_SYMMETRIC: 1172 case MAT_NOT_HERMITIAN: 1173 case MAT_NOT_SYMMETRY_ETERNAL: 1174 break; 1175 default: 1176 SETERRQ(PETSC_ERR_SUP,"unknown option"); 1177 } 1178 PetscFunctionReturn(0); 1179 } 1180 1181 #undef __FUNCT__ 1182 #define __FUNCT__ "MatGetRow_MPIAIJ" 1183 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1184 { 1185 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1186 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1187 PetscErrorCode ierr; 1188 PetscInt i,*cworkA,*cworkB,**pcA,**pcB,cstart = mat->cstart; 1189 PetscInt nztot,nzA,nzB,lrow,rstart = mat->rstart,rend = mat->rend; 1190 PetscInt *cmap,*idx_p; 1191 1192 PetscFunctionBegin; 1193 if (mat->getrowactive) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1194 mat->getrowactive = PETSC_TRUE; 1195 1196 if (!mat->rowvalues && (idx || v)) { 1197 /* 1198 allocate enough space to hold information from the longest row. 1199 */ 1200 Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data; 1201 PetscInt max = 1,tmp; 1202 for (i=0; i<matin->m; i++) { 1203 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1204 if (max < tmp) { max = tmp; } 1205 } 1206 ierr = PetscMalloc(max*(sizeof(PetscInt)+sizeof(PetscScalar)),&mat->rowvalues);CHKERRQ(ierr); 1207 mat->rowindices = (PetscInt*)(mat->rowvalues + max); 1208 } 1209 1210 if (row < rstart || row >= rend) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Only local rows") 1211 lrow = row - rstart; 1212 1213 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1214 if (!v) {pvA = 0; pvB = 0;} 1215 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1216 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1217 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1218 nztot = nzA + nzB; 1219 1220 cmap = mat->garray; 1221 if (v || idx) { 1222 if (nztot) { 1223 /* Sort by increasing column numbers, assuming A and B already sorted */ 1224 PetscInt imark = -1; 1225 if (v) { 1226 *v = v_p = mat->rowvalues; 1227 for (i=0; i<nzB; i++) { 1228 if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 1229 else break; 1230 } 1231 imark = i; 1232 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1233 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1234 } 1235 if (idx) { 1236 *idx = idx_p = mat->rowindices; 1237 if (imark > -1) { 1238 for (i=0; i<imark; i++) { 1239 idx_p[i] = cmap[cworkB[i]]; 1240 } 1241 } else { 1242 for (i=0; i<nzB; i++) { 1243 if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 1244 else break; 1245 } 1246 imark = i; 1247 } 1248 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart + cworkA[i]; 1249 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]]; 1250 } 1251 } else { 1252 if (idx) *idx = 0; 1253 if (v) *v = 0; 1254 } 1255 } 1256 *nz = nztot; 1257 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1258 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1259 PetscFunctionReturn(0); 1260 } 1261 1262 #undef __FUNCT__ 1263 #define __FUNCT__ "MatRestoreRow_MPIAIJ" 1264 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1265 { 1266 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1267 1268 PetscFunctionBegin; 1269 if (!aij->getrowactive) { 1270 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first"); 1271 } 1272 aij->getrowactive = PETSC_FALSE; 1273 PetscFunctionReturn(0); 1274 } 1275 1276 #undef __FUNCT__ 1277 #define __FUNCT__ "MatNorm_MPIAIJ" 1278 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm) 1279 { 1280 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1281 Mat_SeqAIJ *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data; 1282 PetscErrorCode ierr; 1283 PetscInt i,j,cstart = aij->cstart; 1284 PetscReal sum = 0.0; 1285 PetscScalar *v; 1286 1287 PetscFunctionBegin; 1288 if (aij->size == 1) { 1289 ierr = MatNorm(aij->A,type,norm);CHKERRQ(ierr); 1290 } else { 1291 if (type == NORM_FROBENIUS) { 1292 v = amat->a; 1293 for (i=0; i<amat->nz; i++) { 1294 #if defined(PETSC_USE_COMPLEX) 1295 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1296 #else 1297 sum += (*v)*(*v); v++; 1298 #endif 1299 } 1300 v = bmat->a; 1301 for (i=0; i<bmat->nz; i++) { 1302 #if defined(PETSC_USE_COMPLEX) 1303 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1304 #else 1305 sum += (*v)*(*v); v++; 1306 #endif 1307 } 1308 ierr = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr); 1309 *norm = sqrt(*norm); 1310 } else if (type == NORM_1) { /* max column norm */ 1311 PetscReal *tmp,*tmp2; 1312 PetscInt *jj,*garray = aij->garray; 1313 ierr = PetscMalloc((mat->N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr); 1314 ierr = PetscMalloc((mat->N+1)*sizeof(PetscReal),&tmp2);CHKERRQ(ierr); 1315 ierr = PetscMemzero(tmp,mat->N*sizeof(PetscReal));CHKERRQ(ierr); 1316 *norm = 0.0; 1317 v = amat->a; jj = amat->j; 1318 for (j=0; j<amat->nz; j++) { 1319 tmp[cstart + *jj++ ] += PetscAbsScalar(*v); v++; 1320 } 1321 v = bmat->a; jj = bmat->j; 1322 for (j=0; j<bmat->nz; j++) { 1323 tmp[garray[*jj++]] += PetscAbsScalar(*v); v++; 1324 } 1325 ierr = MPI_Allreduce(tmp,tmp2,mat->N,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr); 1326 for (j=0; j<mat->N; j++) { 1327 if (tmp2[j] > *norm) *norm = tmp2[j]; 1328 } 1329 ierr = PetscFree(tmp);CHKERRQ(ierr); 1330 ierr = PetscFree(tmp2);CHKERRQ(ierr); 1331 } else if (type == NORM_INFINITY) { /* max row norm */ 1332 PetscReal ntemp = 0.0; 1333 for (j=0; j<aij->A->m; j++) { 1334 v = amat->a + amat->i[j]; 1335 sum = 0.0; 1336 for (i=0; i<amat->i[j+1]-amat->i[j]; i++) { 1337 sum += PetscAbsScalar(*v); v++; 1338 } 1339 v = bmat->a + bmat->i[j]; 1340 for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) { 1341 sum += PetscAbsScalar(*v); v++; 1342 } 1343 if (sum > ntemp) ntemp = sum; 1344 } 1345 ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPI_MAX,mat->comm);CHKERRQ(ierr); 1346 } else { 1347 SETERRQ(PETSC_ERR_SUP,"No support for two norm"); 1348 } 1349 } 1350 PetscFunctionReturn(0); 1351 } 1352 1353 #undef __FUNCT__ 1354 #define __FUNCT__ "MatTranspose_MPIAIJ" 1355 PetscErrorCode MatTranspose_MPIAIJ(Mat A,Mat *matout) 1356 { 1357 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1358 Mat_SeqAIJ *Aloc = (Mat_SeqAIJ*)a->A->data; 1359 PetscErrorCode ierr; 1360 PetscInt M = A->M,N = A->N,m,*ai,*aj,row,*cols,i,*ct; 1361 Mat B; 1362 PetscScalar *array; 1363 1364 PetscFunctionBegin; 1365 if (!matout && M != N) { 1366 SETERRQ(PETSC_ERR_ARG_SIZ,"Square matrix only for in-place"); 1367 } 1368 1369 ierr = MatCreate(A->comm,A->n,A->m,N,M,&B);CHKERRQ(ierr); 1370 ierr = MatSetType(B,A->type_name);CHKERRQ(ierr); 1371 ierr = MatMPIAIJSetPreallocation(B,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr); 1372 1373 /* copy over the A part */ 1374 Aloc = (Mat_SeqAIJ*)a->A->data; 1375 m = a->A->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a; 1376 row = a->rstart; 1377 for (i=0; i<ai[m]; i++) {aj[i] += a->cstart ;} 1378 for (i=0; i<m; i++) { 1379 ierr = MatSetValues(B,ai[i+1]-ai[i],aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1380 row++; array += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 1381 } 1382 aj = Aloc->j; 1383 for (i=0; i<ai[m]; i++) {aj[i] -= a->cstart ;} 1384 1385 /* copy over the B part */ 1386 Aloc = (Mat_SeqAIJ*)a->B->data; 1387 m = a->B->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a; 1388 row = a->rstart; 1389 ierr = PetscMalloc((1+ai[m])*sizeof(PetscInt),&cols);CHKERRQ(ierr); 1390 ct = cols; 1391 for (i=0; i<ai[m]; i++) {cols[i] = a->garray[aj[i]];} 1392 for (i=0; i<m; i++) { 1393 ierr = MatSetValues(B,ai[i+1]-ai[i],cols,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1394 row++; array += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 1395 } 1396 ierr = PetscFree(ct);CHKERRQ(ierr); 1397 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1398 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1399 if (matout) { 1400 *matout = B; 1401 } else { 1402 ierr = MatHeaderCopy(A,B);CHKERRQ(ierr); 1403 } 1404 PetscFunctionReturn(0); 1405 } 1406 1407 #undef __FUNCT__ 1408 #define __FUNCT__ "MatDiagonalScale_MPIAIJ" 1409 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr) 1410 { 1411 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1412 Mat a = aij->A,b = aij->B; 1413 PetscErrorCode ierr; 1414 PetscInt s1,s2,s3; 1415 1416 PetscFunctionBegin; 1417 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 1418 if (rr) { 1419 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 1420 if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 1421 /* Overlap communication with computation. */ 1422 ierr = VecScatterBegin(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx);CHKERRQ(ierr); 1423 } 1424 if (ll) { 1425 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 1426 if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 1427 ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr); 1428 } 1429 /* scale the diagonal block */ 1430 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 1431 1432 if (rr) { 1433 /* Do a scatter end and then right scale the off-diagonal block */ 1434 ierr = VecScatterEnd(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx);CHKERRQ(ierr); 1435 ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr); 1436 } 1437 1438 PetscFunctionReturn(0); 1439 } 1440 1441 1442 #undef __FUNCT__ 1443 #define __FUNCT__ "MatPrintHelp_MPIAIJ" 1444 PetscErrorCode MatPrintHelp_MPIAIJ(Mat A) 1445 { 1446 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1447 PetscErrorCode ierr; 1448 1449 PetscFunctionBegin; 1450 if (!a->rank) { 1451 ierr = MatPrintHelp_SeqAIJ(a->A);CHKERRQ(ierr); 1452 } 1453 PetscFunctionReturn(0); 1454 } 1455 1456 #undef __FUNCT__ 1457 #define __FUNCT__ "MatSetBlockSize_MPIAIJ" 1458 PetscErrorCode MatSetBlockSize_MPIAIJ(Mat A,PetscInt bs) 1459 { 1460 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1461 PetscErrorCode ierr; 1462 1463 PetscFunctionBegin; 1464 ierr = MatSetBlockSize(a->A,bs);CHKERRQ(ierr); 1465 ierr = MatSetBlockSize(a->B,bs);CHKERRQ(ierr); 1466 PetscFunctionReturn(0); 1467 } 1468 #undef __FUNCT__ 1469 #define __FUNCT__ "MatSetUnfactored_MPIAIJ" 1470 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A) 1471 { 1472 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1473 PetscErrorCode ierr; 1474 1475 PetscFunctionBegin; 1476 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 1477 PetscFunctionReturn(0); 1478 } 1479 1480 #undef __FUNCT__ 1481 #define __FUNCT__ "MatEqual_MPIAIJ" 1482 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscTruth *flag) 1483 { 1484 Mat_MPIAIJ *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data; 1485 Mat a,b,c,d; 1486 PetscTruth flg; 1487 PetscErrorCode ierr; 1488 1489 PetscFunctionBegin; 1490 a = matA->A; b = matA->B; 1491 c = matB->A; d = matB->B; 1492 1493 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 1494 if (flg) { 1495 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 1496 } 1497 ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,A->comm);CHKERRQ(ierr); 1498 PetscFunctionReturn(0); 1499 } 1500 1501 #undef __FUNCT__ 1502 #define __FUNCT__ "MatCopy_MPIAIJ" 1503 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str) 1504 { 1505 PetscErrorCode ierr; 1506 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1507 Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data; 1508 1509 PetscFunctionBegin; 1510 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 1511 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 1512 /* because of the column compression in the off-processor part of the matrix a->B, 1513 the number of columns in a->B and b->B may be different, hence we cannot call 1514 the MatCopy() directly on the two parts. If need be, we can provide a more 1515 efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices 1516 then copying the submatrices */ 1517 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 1518 } else { 1519 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 1520 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 1521 } 1522 PetscFunctionReturn(0); 1523 } 1524 1525 #undef __FUNCT__ 1526 #define __FUNCT__ "MatSetUpPreallocation_MPIAIJ" 1527 PetscErrorCode MatSetUpPreallocation_MPIAIJ(Mat A) 1528 { 1529 PetscErrorCode ierr; 1530 1531 PetscFunctionBegin; 1532 ierr = MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 1533 PetscFunctionReturn(0); 1534 } 1535 1536 #include "petscblaslapack.h" 1537 #undef __FUNCT__ 1538 #define __FUNCT__ "MatAXPY_MPIAIJ" 1539 PetscErrorCode MatAXPY_MPIAIJ(const PetscScalar a[],Mat X,Mat Y,MatStructure str) 1540 { 1541 PetscErrorCode ierr; 1542 PetscInt i; 1543 Mat_MPIAIJ *xx = (Mat_MPIAIJ *)X->data,*yy = (Mat_MPIAIJ *)Y->data; 1544 PetscBLASInt bnz,one=1; 1545 Mat_SeqAIJ *x,*y; 1546 1547 PetscFunctionBegin; 1548 if (str == SAME_NONZERO_PATTERN) { 1549 x = (Mat_SeqAIJ *)xx->A->data; 1550 y = (Mat_SeqAIJ *)yy->A->data; 1551 bnz = (PetscBLASInt)x->nz; 1552 BLASaxpy_(&bnz,(PetscScalar*)a,x->a,&one,y->a,&one); 1553 x = (Mat_SeqAIJ *)xx->B->data; 1554 y = (Mat_SeqAIJ *)yy->B->data; 1555 bnz = (PetscBLASInt)x->nz; 1556 BLASaxpy_(&bnz,(PetscScalar*)a,x->a,&one,y->a,&one); 1557 } else if (str == SUBSET_NONZERO_PATTERN) { 1558 ierr = MatAXPY_SeqAIJ(a,xx->A,yy->A,str);CHKERRQ(ierr); 1559 1560 x = (Mat_SeqAIJ *)xx->B->data; 1561 y = (Mat_SeqAIJ *)yy->B->data; 1562 if (y->xtoy && y->XtoY != xx->B) { 1563 ierr = PetscFree(y->xtoy);CHKERRQ(ierr); 1564 ierr = MatDestroy(y->XtoY);CHKERRQ(ierr); 1565 } 1566 if (!y->xtoy) { /* get xtoy */ 1567 ierr = MatAXPYGetxtoy_Private(xx->B->m,x->i,x->j,xx->garray,y->i,y->j,yy->garray,&y->xtoy);CHKERRQ(ierr); 1568 y->XtoY = xx->B; 1569 } 1570 for (i=0; i<x->nz; i++) y->a[y->xtoy[i]] += (*a)*(x->a[i]); 1571 } else { 1572 ierr = MatAXPY_Basic(a,X,Y,str);CHKERRQ(ierr); 1573 } 1574 PetscFunctionReturn(0); 1575 } 1576 1577 /* -------------------------------------------------------------------*/ 1578 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ, 1579 MatGetRow_MPIAIJ, 1580 MatRestoreRow_MPIAIJ, 1581 MatMult_MPIAIJ, 1582 /* 4*/ MatMultAdd_MPIAIJ, 1583 MatMultTranspose_MPIAIJ, 1584 MatMultTransposeAdd_MPIAIJ, 1585 0, 1586 0, 1587 0, 1588 /*10*/ 0, 1589 0, 1590 0, 1591 MatRelax_MPIAIJ, 1592 MatTranspose_MPIAIJ, 1593 /*15*/ MatGetInfo_MPIAIJ, 1594 MatEqual_MPIAIJ, 1595 MatGetDiagonal_MPIAIJ, 1596 MatDiagonalScale_MPIAIJ, 1597 MatNorm_MPIAIJ, 1598 /*20*/ MatAssemblyBegin_MPIAIJ, 1599 MatAssemblyEnd_MPIAIJ, 1600 0, 1601 MatSetOption_MPIAIJ, 1602 MatZeroEntries_MPIAIJ, 1603 /*25*/ MatZeroRows_MPIAIJ, 1604 0, 1605 0, 1606 0, 1607 0, 1608 /*30*/ MatSetUpPreallocation_MPIAIJ, 1609 0, 1610 0, 1611 0, 1612 0, 1613 /*35*/ MatDuplicate_MPIAIJ, 1614 0, 1615 0, 1616 0, 1617 0, 1618 /*40*/ MatAXPY_MPIAIJ, 1619 MatGetSubMatrices_MPIAIJ, 1620 MatIncreaseOverlap_MPIAIJ, 1621 MatGetValues_MPIAIJ, 1622 MatCopy_MPIAIJ, 1623 /*45*/ MatPrintHelp_MPIAIJ, 1624 MatScale_MPIAIJ, 1625 0, 1626 0, 1627 0, 1628 /*50*/ MatSetBlockSize_MPIAIJ, 1629 0, 1630 0, 1631 0, 1632 0, 1633 /*55*/ MatFDColoringCreate_MPIAIJ, 1634 0, 1635 MatSetUnfactored_MPIAIJ, 1636 0, 1637 0, 1638 /*60*/ MatGetSubMatrix_MPIAIJ, 1639 MatDestroy_MPIAIJ, 1640 MatView_MPIAIJ, 1641 MatGetPetscMaps_Petsc, 1642 0, 1643 /*65*/ 0, 1644 0, 1645 0, 1646 0, 1647 0, 1648 /*70*/ 0, 1649 0, 1650 MatSetColoring_MPIAIJ, 1651 #if defined(PETSC_HAVE_ADIC) 1652 MatSetValuesAdic_MPIAIJ, 1653 #else 1654 0, 1655 #endif 1656 MatSetValuesAdifor_MPIAIJ, 1657 /*75*/ 0, 1658 0, 1659 0, 1660 0, 1661 0, 1662 /*80*/ 0, 1663 0, 1664 0, 1665 0, 1666 /*84*/ MatLoad_MPIAIJ, 1667 0, 1668 0, 1669 0, 1670 0, 1671 0, 1672 /*90*/ MatMatMult_MPIAIJ_MPIAIJ, 1673 MatMatMultSymbolic_MPIAIJ_MPIAIJ, 1674 MatMatMultNumeric_MPIAIJ_MPIAIJ, 1675 MatPtAP_Basic, 1676 MatPtAPSymbolic_MPIAIJ, 1677 /*95*/ MatPtAPNumeric_MPIAIJ, 1678 0, 1679 0, 1680 0, 1681 0, 1682 /*100*/0, 1683 MatPtAPSymbolic_MPIAIJ_MPIAIJ, 1684 MatPtAPNumeric_MPIAIJ_MPIAIJ, 1685 }; 1686 1687 /* ----------------------------------------------------------------------------------------*/ 1688 1689 EXTERN_C_BEGIN 1690 #undef __FUNCT__ 1691 #define __FUNCT__ "MatStoreValues_MPIAIJ" 1692 PetscErrorCode PETSCMAT_DLLEXPORT MatStoreValues_MPIAIJ(Mat mat) 1693 { 1694 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1695 PetscErrorCode ierr; 1696 1697 PetscFunctionBegin; 1698 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 1699 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 1700 PetscFunctionReturn(0); 1701 } 1702 EXTERN_C_END 1703 1704 EXTERN_C_BEGIN 1705 #undef __FUNCT__ 1706 #define __FUNCT__ "MatRetrieveValues_MPIAIJ" 1707 PetscErrorCode PETSCMAT_DLLEXPORT MatRetrieveValues_MPIAIJ(Mat mat) 1708 { 1709 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1710 PetscErrorCode ierr; 1711 1712 PetscFunctionBegin; 1713 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 1714 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 1715 PetscFunctionReturn(0); 1716 } 1717 EXTERN_C_END 1718 1719 #include "petscpc.h" 1720 EXTERN_C_BEGIN 1721 #undef __FUNCT__ 1722 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ" 1723 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 1724 { 1725 Mat_MPIAIJ *b; 1726 PetscErrorCode ierr; 1727 PetscInt i; 1728 1729 PetscFunctionBegin; 1730 B->preallocated = PETSC_TRUE; 1731 if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5; 1732 if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2; 1733 if (d_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz); 1734 if (o_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz); 1735 if (d_nnz) { 1736 for (i=0; i<B->m; i++) { 1737 if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than 0: local row %D value %D",i,d_nnz[i]); 1738 } 1739 } 1740 if (o_nnz) { 1741 for (i=0; i<B->m; i++) { 1742 if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than 0: local row %D value %D",i,o_nnz[i]); 1743 } 1744 } 1745 b = (Mat_MPIAIJ*)B->data; 1746 ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr); 1747 ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr); 1748 1749 PetscFunctionReturn(0); 1750 } 1751 EXTERN_C_END 1752 1753 #undef __FUNCT__ 1754 #define __FUNCT__ "MatDuplicate_MPIAIJ" 1755 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 1756 { 1757 Mat mat; 1758 Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ*)matin->data; 1759 PetscErrorCode ierr; 1760 1761 PetscFunctionBegin; 1762 *newmat = 0; 1763 ierr = MatCreate(matin->comm,matin->m,matin->n,matin->M,matin->N,&mat);CHKERRQ(ierr); 1764 ierr = MatSetType(mat,matin->type_name);CHKERRQ(ierr); 1765 ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 1766 a = (Mat_MPIAIJ*)mat->data; 1767 1768 mat->factor = matin->factor; 1769 mat->bs = matin->bs; 1770 mat->assembled = PETSC_TRUE; 1771 mat->insertmode = NOT_SET_VALUES; 1772 mat->preallocated = PETSC_TRUE; 1773 1774 a->rstart = oldmat->rstart; 1775 a->rend = oldmat->rend; 1776 a->cstart = oldmat->cstart; 1777 a->cend = oldmat->cend; 1778 a->size = oldmat->size; 1779 a->rank = oldmat->rank; 1780 a->donotstash = oldmat->donotstash; 1781 a->roworiented = oldmat->roworiented; 1782 a->rowindices = 0; 1783 a->rowvalues = 0; 1784 a->getrowactive = PETSC_FALSE; 1785 1786 ierr = PetscMemcpy(a->rowners,oldmat->rowners,2*(a->size+2)*sizeof(PetscInt));CHKERRQ(ierr); 1787 ierr = MatStashCreate_Private(matin->comm,1,&mat->stash);CHKERRQ(ierr); 1788 if (oldmat->colmap) { 1789 #if defined (PETSC_USE_CTABLE) 1790 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 1791 #else 1792 ierr = PetscMalloc((mat->N)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr); 1793 ierr = PetscLogObjectMemory(mat,(mat->N)*sizeof(PetscInt));CHKERRQ(ierr); 1794 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->N)*sizeof(PetscInt));CHKERRQ(ierr); 1795 #endif 1796 } else a->colmap = 0; 1797 if (oldmat->garray) { 1798 PetscInt len; 1799 len = oldmat->B->n; 1800 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&a->garray);CHKERRQ(ierr); 1801 ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr); 1802 if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); } 1803 } else a->garray = 0; 1804 1805 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 1806 ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr); 1807 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 1808 ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr); 1809 ierr = MatDestroy(a->A);CHKERRQ(ierr); 1810 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 1811 ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr); 1812 ierr = MatDestroy(a->B);CHKERRQ(ierr); 1813 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 1814 ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr); 1815 ierr = PetscFListDuplicate(matin->qlist,&mat->qlist);CHKERRQ(ierr); 1816 *newmat = mat; 1817 PetscFunctionReturn(0); 1818 } 1819 1820 #include "petscsys.h" 1821 1822 #undef __FUNCT__ 1823 #define __FUNCT__ "MatLoad_MPIAIJ" 1824 PetscErrorCode MatLoad_MPIAIJ(PetscViewer viewer,const MatType type,Mat *newmat) 1825 { 1826 Mat A; 1827 PetscScalar *vals,*svals; 1828 MPI_Comm comm = ((PetscObject)viewer)->comm; 1829 MPI_Status status; 1830 PetscErrorCode ierr; 1831 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag,maxnz; 1832 PetscInt i,nz,j,rstart,rend,mmax; 1833 PetscInt header[4],*rowlengths = 0,M,N,m,*cols; 1834 PetscInt *ourlens,*procsnz = 0,*offlens,jj,*mycols,*smycols; 1835 PetscInt cend,cstart,n,*rowners; 1836 int fd; 1837 1838 PetscFunctionBegin; 1839 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1840 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 1841 if (!rank) { 1842 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1843 ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr); 1844 if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 1845 } 1846 1847 ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr); 1848 M = header[1]; N = header[2]; 1849 /* determine ownership of all rows */ 1850 m = M/size + ((M % size) > rank); 1851 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr); 1852 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 1853 1854 /* First process needs enough room for process with most rows */ 1855 if (!rank) { 1856 mmax = rowners[1]; 1857 for (i=2; i<size; i++) { 1858 mmax = PetscMax(mmax,rowners[i]); 1859 } 1860 } else mmax = m; 1861 1862 rowners[0] = 0; 1863 for (i=2; i<=size; i++) { 1864 mmax = PetscMax(mmax,rowners[i]); 1865 rowners[i] += rowners[i-1]; 1866 } 1867 rstart = rowners[rank]; 1868 rend = rowners[rank+1]; 1869 1870 /* distribute row lengths to all processors */ 1871 ierr = PetscMalloc2(mmax,PetscInt,&ourlens,mmax,PetscInt,&offlens);CHKERRQ(ierr); 1872 if (!rank) { 1873 ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr); 1874 ierr = PetscMalloc(m*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr); 1875 ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr); 1876 ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr); 1877 for (j=0; j<m; j++) { 1878 procsnz[0] += ourlens[j]; 1879 } 1880 for (i=1; i<size; i++) { 1881 ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr); 1882 /* calculate the number of nonzeros on each processor */ 1883 for (j=0; j<rowners[i+1]-rowners[i]; j++) { 1884 procsnz[i] += rowlengths[j]; 1885 } 1886 ierr = MPI_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 1887 } 1888 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 1889 } else { 1890 ierr = MPI_Recv(ourlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 1891 } 1892 1893 if (!rank) { 1894 /* determine max buffer needed and allocate it */ 1895 maxnz = 0; 1896 for (i=0; i<size; i++) { 1897 maxnz = PetscMax(maxnz,procsnz[i]); 1898 } 1899 ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr); 1900 1901 /* read in my part of the matrix column indices */ 1902 nz = procsnz[0]; 1903 ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr); 1904 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 1905 1906 /* read in every one elses and ship off */ 1907 for (i=1; i<size; i++) { 1908 nz = procsnz[i]; 1909 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 1910 ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 1911 } 1912 ierr = PetscFree(cols);CHKERRQ(ierr); 1913 } else { 1914 /* determine buffer space needed for message */ 1915 nz = 0; 1916 for (i=0; i<m; i++) { 1917 nz += ourlens[i]; 1918 } 1919 ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr); 1920 1921 /* receive message of column indices*/ 1922 ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 1923 ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr); 1924 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 1925 } 1926 1927 /* determine column ownership if matrix is not square */ 1928 if (N != M) { 1929 n = N/size + ((N % size) > rank); 1930 ierr = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 1931 cstart = cend - n; 1932 } else { 1933 cstart = rstart; 1934 cend = rend; 1935 n = cend - cstart; 1936 } 1937 1938 /* loop over local rows, determining number of off diagonal entries */ 1939 ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr); 1940 jj = 0; 1941 for (i=0; i<m; i++) { 1942 for (j=0; j<ourlens[i]; j++) { 1943 if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++; 1944 jj++; 1945 } 1946 } 1947 1948 /* create our matrix */ 1949 for (i=0; i<m; i++) { 1950 ourlens[i] -= offlens[i]; 1951 } 1952 ierr = MatCreate(comm,m,n,M,N,&A);CHKERRQ(ierr); 1953 ierr = MatSetType(A,type);CHKERRQ(ierr); 1954 ierr = MatMPIAIJSetPreallocation(A,0,ourlens,0,offlens);CHKERRQ(ierr); 1955 1956 ierr = MatSetOption(A,MAT_COLUMNS_SORTED);CHKERRQ(ierr); 1957 for (i=0; i<m; i++) { 1958 ourlens[i] += offlens[i]; 1959 } 1960 1961 if (!rank) { 1962 ierr = PetscMalloc((maxnz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 1963 1964 /* read in my part of the matrix numerical values */ 1965 nz = procsnz[0]; 1966 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 1967 1968 /* insert into matrix */ 1969 jj = rstart; 1970 smycols = mycols; 1971 svals = vals; 1972 for (i=0; i<m; i++) { 1973 ierr = MatSetValues_MPIAIJ(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 1974 smycols += ourlens[i]; 1975 svals += ourlens[i]; 1976 jj++; 1977 } 1978 1979 /* read in other processors and ship out */ 1980 for (i=1; i<size; i++) { 1981 nz = procsnz[i]; 1982 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 1983 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr); 1984 } 1985 ierr = PetscFree(procsnz);CHKERRQ(ierr); 1986 } else { 1987 /* receive numeric values */ 1988 ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 1989 1990 /* receive message of values*/ 1991 ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr); 1992 ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr); 1993 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 1994 1995 /* insert into matrix */ 1996 jj = rstart; 1997 smycols = mycols; 1998 svals = vals; 1999 for (i=0; i<m; i++) { 2000 ierr = MatSetValues_MPIAIJ(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 2001 smycols += ourlens[i]; 2002 svals += ourlens[i]; 2003 jj++; 2004 } 2005 } 2006 ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr); 2007 ierr = PetscFree(vals);CHKERRQ(ierr); 2008 ierr = PetscFree(mycols);CHKERRQ(ierr); 2009 ierr = PetscFree(rowners);CHKERRQ(ierr); 2010 2011 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2012 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2013 *newmat = A; 2014 PetscFunctionReturn(0); 2015 } 2016 2017 #undef __FUNCT__ 2018 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ" 2019 /* 2020 Not great since it makes two copies of the submatrix, first an SeqAIJ 2021 in local and then by concatenating the local matrices the end result. 2022 Writing it directly would be much like MatGetSubMatrices_MPIAIJ() 2023 */ 2024 PetscErrorCode MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat) 2025 { 2026 PetscErrorCode ierr; 2027 PetscMPIInt rank,size; 2028 PetscInt i,m,n,rstart,row,rend,nz,*cwork,j; 2029 PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal; 2030 Mat *local,M,Mreuse; 2031 PetscScalar *vwork,*aa; 2032 MPI_Comm comm = mat->comm; 2033 Mat_SeqAIJ *aij; 2034 2035 2036 PetscFunctionBegin; 2037 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2038 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2039 2040 if (call == MAT_REUSE_MATRIX) { 2041 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr); 2042 if (!Mreuse) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 2043 local = &Mreuse; 2044 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);CHKERRQ(ierr); 2045 } else { 2046 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr); 2047 Mreuse = *local; 2048 ierr = PetscFree(local);CHKERRQ(ierr); 2049 } 2050 2051 /* 2052 m - number of local rows 2053 n - number of columns (same on all processors) 2054 rstart - first row in new global matrix generated 2055 */ 2056 ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr); 2057 if (call == MAT_INITIAL_MATRIX) { 2058 aij = (Mat_SeqAIJ*)(Mreuse)->data; 2059 ii = aij->i; 2060 jj = aij->j; 2061 2062 /* 2063 Determine the number of non-zeros in the diagonal and off-diagonal 2064 portions of the matrix in order to do correct preallocation 2065 */ 2066 2067 /* first get start and end of "diagonal" columns */ 2068 if (csize == PETSC_DECIDE) { 2069 ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr); 2070 if (mglobal == n) { /* square matrix */ 2071 nlocal = m; 2072 } else { 2073 nlocal = n/size + ((n % size) > rank); 2074 } 2075 } else { 2076 nlocal = csize; 2077 } 2078 ierr = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 2079 rstart = rend - nlocal; 2080 if (rank == size - 1 && rend != n) { 2081 SETERRQ2(PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n); 2082 } 2083 2084 /* next, compute all the lengths */ 2085 ierr = PetscMalloc((2*m+1)*sizeof(PetscInt),&dlens);CHKERRQ(ierr); 2086 olens = dlens + m; 2087 for (i=0; i<m; i++) { 2088 jend = ii[i+1] - ii[i]; 2089 olen = 0; 2090 dlen = 0; 2091 for (j=0; j<jend; j++) { 2092 if (*jj < rstart || *jj >= rend) olen++; 2093 else dlen++; 2094 jj++; 2095 } 2096 olens[i] = olen; 2097 dlens[i] = dlen; 2098 } 2099 ierr = MatCreate(comm,m,nlocal,PETSC_DECIDE,n,&M);CHKERRQ(ierr); 2100 ierr = MatSetType(M,mat->type_name);CHKERRQ(ierr); 2101 ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr); 2102 ierr = PetscFree(dlens);CHKERRQ(ierr); 2103 } else { 2104 PetscInt ml,nl; 2105 2106 M = *newmat; 2107 ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr); 2108 if (ml != m) SETERRQ(PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 2109 ierr = MatZeroEntries(M);CHKERRQ(ierr); 2110 /* 2111 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 2112 rather than the slower MatSetValues(). 2113 */ 2114 M->was_assembled = PETSC_TRUE; 2115 M->assembled = PETSC_FALSE; 2116 } 2117 ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr); 2118 aij = (Mat_SeqAIJ*)(Mreuse)->data; 2119 ii = aij->i; 2120 jj = aij->j; 2121 aa = aij->a; 2122 for (i=0; i<m; i++) { 2123 row = rstart + i; 2124 nz = ii[i+1] - ii[i]; 2125 cwork = jj; jj += nz; 2126 vwork = aa; aa += nz; 2127 ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr); 2128 } 2129 2130 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2131 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2132 *newmat = M; 2133 2134 /* save submatrix used in processor for next request */ 2135 if (call == MAT_INITIAL_MATRIX) { 2136 ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr); 2137 ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr); 2138 } 2139 2140 PetscFunctionReturn(0); 2141 } 2142 2143 EXTERN_C_BEGIN 2144 #undef __FUNCT__ 2145 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR_MPIAIJ" 2146 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt I[],const PetscInt J[],const PetscScalar v[]) 2147 { 2148 Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data; 2149 PetscInt m = B->m,cstart = b->cstart, cend = b->cend,j,nnz,i,d; 2150 PetscInt *d_nnz,*o_nnz,nnz_max = 0,rstart = b->rstart,ii; 2151 const PetscInt *JJ; 2152 PetscScalar *values; 2153 PetscErrorCode ierr; 2154 2155 PetscFunctionBegin; 2156 #if defined(PETSC_OPT_g) 2157 if (I[0]) SETERRQ1(PETSC_ERR_ARG_RANGE,"I[0] must be 0 it is %D",I[0]); 2158 #endif 2159 ierr = PetscMalloc((2*m+1)*sizeof(PetscInt),&d_nnz);CHKERRQ(ierr); 2160 o_nnz = d_nnz + m; 2161 2162 for (i=0; i<m; i++) { 2163 nnz = I[i+1]- I[i]; 2164 JJ = J + I[i]; 2165 nnz_max = PetscMax(nnz_max,nnz); 2166 #if defined(PETSC_OPT_g) 2167 if (nnz < 0) SETERRQ1(PETSC_ERR_ARG_RANGE,"Local row %D has a negative %D number of columns",i,nnz); 2168 #endif 2169 for (j=0; j<nnz; j++) { 2170 if (*JJ >= cstart) break; 2171 JJ++; 2172 } 2173 d = 0; 2174 for (; j<nnz; j++) { 2175 if (*JJ++ >= cend) break; 2176 d++; 2177 } 2178 d_nnz[i] = d; 2179 o_nnz[i] = nnz - d; 2180 } 2181 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 2182 ierr = PetscFree(d_nnz);CHKERRQ(ierr); 2183 2184 if (v) values = (PetscScalar*)v; 2185 else { 2186 ierr = PetscMalloc((nnz_max+1)*sizeof(PetscScalar),&values);CHKERRQ(ierr); 2187 ierr = PetscMemzero(values,nnz_max*sizeof(PetscScalar));CHKERRQ(ierr); 2188 } 2189 2190 ierr = MatSetOption(B,MAT_COLUMNS_SORTED);CHKERRQ(ierr); 2191 for (i=0; i<m; i++) { 2192 ii = i + rstart; 2193 nnz = I[i+1]- I[i]; 2194 ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+I[i],values+(v ? I[i] : 0),INSERT_VALUES);CHKERRQ(ierr); 2195 } 2196 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2197 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2198 ierr = MatSetOption(B,MAT_COLUMNS_UNSORTED);CHKERRQ(ierr); 2199 2200 if (!v) { 2201 ierr = PetscFree(values);CHKERRQ(ierr); 2202 } 2203 PetscFunctionReturn(0); 2204 } 2205 EXTERN_C_END 2206 2207 #undef __FUNCT__ 2208 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR" 2209 /*@C 2210 MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format 2211 (the default parallel PETSc format). 2212 2213 Collective on MPI_Comm 2214 2215 Input Parameters: 2216 + A - the matrix 2217 . i - the indices into j for the start of each local row (starts with zero) 2218 . j - the column indices for each local row (starts with zero) these must be sorted for each row 2219 - v - optional values in the matrix 2220 2221 Level: developer 2222 2223 .keywords: matrix, aij, compressed row, sparse, parallel 2224 2225 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateMPIAIJ(), MPIAIJ 2226 @*/ 2227 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 2228 { 2229 PetscErrorCode ierr,(*f)(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]); 2230 2231 PetscFunctionBegin; 2232 ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",(void (**)(void))&f);CHKERRQ(ierr); 2233 if (f) { 2234 ierr = (*f)(B,i,j,v);CHKERRQ(ierr); 2235 } 2236 PetscFunctionReturn(0); 2237 } 2238 2239 #undef __FUNCT__ 2240 #define __FUNCT__ "MatMPIAIJSetPreallocation" 2241 /*@C 2242 MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format 2243 (the default parallel PETSc format). For good matrix assembly performance 2244 the user should preallocate the matrix storage by setting the parameters 2245 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 2246 performance can be increased by more than a factor of 50. 2247 2248 Collective on MPI_Comm 2249 2250 Input Parameters: 2251 + A - the matrix 2252 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 2253 (same value is used for all local rows) 2254 . d_nnz - array containing the number of nonzeros in the various rows of the 2255 DIAGONAL portion of the local submatrix (possibly different for each row) 2256 or PETSC_NULL, if d_nz is used to specify the nonzero structure. 2257 The size of this array is equal to the number of local rows, i.e 'm'. 2258 You must leave room for the diagonal entry even if it is zero. 2259 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 2260 submatrix (same value is used for all local rows). 2261 - o_nnz - array containing the number of nonzeros in the various rows of the 2262 OFF-DIAGONAL portion of the local submatrix (possibly different for 2263 each row) or PETSC_NULL, if o_nz is used to specify the nonzero 2264 structure. The size of this array is equal to the number 2265 of local rows, i.e 'm'. 2266 2267 If the *_nnz parameter is given then the *_nz parameter is ignored 2268 2269 The AIJ format (also called the Yale sparse matrix format or 2270 compressed row storage (CSR)), is fully compatible with standard Fortran 77 2271 storage. The stored row and column indices begin with zero. See the users manual for details. 2272 2273 The parallel matrix is partitioned such that the first m0 rows belong to 2274 process 0, the next m1 rows belong to process 1, the next m2 rows belong 2275 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 2276 2277 The DIAGONAL portion of the local submatrix of a processor can be defined 2278 as the submatrix which is obtained by extraction the part corresponding 2279 to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the 2280 first row that belongs to the processor, and r2 is the last row belonging 2281 to the this processor. This is a square mxm matrix. The remaining portion 2282 of the local submatrix (mxN) constitute the OFF-DIAGONAL portion. 2283 2284 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 2285 2286 Example usage: 2287 2288 Consider the following 8x8 matrix with 34 non-zero values, that is 2289 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 2290 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 2291 as follows: 2292 2293 .vb 2294 1 2 0 | 0 3 0 | 0 4 2295 Proc0 0 5 6 | 7 0 0 | 8 0 2296 9 0 10 | 11 0 0 | 12 0 2297 ------------------------------------- 2298 13 0 14 | 15 16 17 | 0 0 2299 Proc1 0 18 0 | 19 20 21 | 0 0 2300 0 0 0 | 22 23 0 | 24 0 2301 ------------------------------------- 2302 Proc2 25 26 27 | 0 0 28 | 29 0 2303 30 0 0 | 31 32 33 | 0 34 2304 .ve 2305 2306 This can be represented as a collection of submatrices as: 2307 2308 .vb 2309 A B C 2310 D E F 2311 G H I 2312 .ve 2313 2314 Where the submatrices A,B,C are owned by proc0, D,E,F are 2315 owned by proc1, G,H,I are owned by proc2. 2316 2317 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 2318 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 2319 The 'M','N' parameters are 8,8, and have the same values on all procs. 2320 2321 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 2322 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 2323 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 2324 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 2325 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 2326 matrix, ans [DF] as another SeqAIJ matrix. 2327 2328 When d_nz, o_nz parameters are specified, d_nz storage elements are 2329 allocated for every row of the local diagonal submatrix, and o_nz 2330 storage locations are allocated for every row of the OFF-DIAGONAL submat. 2331 One way to choose d_nz and o_nz is to use the max nonzerors per local 2332 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 2333 In this case, the values of d_nz,o_nz are: 2334 .vb 2335 proc0 : dnz = 2, o_nz = 2 2336 proc1 : dnz = 3, o_nz = 2 2337 proc2 : dnz = 1, o_nz = 4 2338 .ve 2339 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 2340 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 2341 for proc3. i.e we are using 12+15+10=37 storage locations to store 2342 34 values. 2343 2344 When d_nnz, o_nnz parameters are specified, the storage is specified 2345 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 2346 In the above case the values for d_nnz,o_nnz are: 2347 .vb 2348 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 2349 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 2350 proc2: d_nnz = [1,1] and o_nnz = [4,4] 2351 .ve 2352 Here the space allocated is sum of all the above values i.e 34, and 2353 hence pre-allocation is perfect. 2354 2355 Level: intermediate 2356 2357 .keywords: matrix, aij, compressed row, sparse, parallel 2358 2359 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIAIJ(), MatMPIAIJSetPreallocationCSR(), 2360 MPIAIJ 2361 @*/ 2362 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 2363 { 2364 PetscErrorCode ierr,(*f)(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]); 2365 2366 PetscFunctionBegin; 2367 ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr); 2368 if (f) { 2369 ierr = (*f)(B,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 2370 } 2371 PetscFunctionReturn(0); 2372 } 2373 2374 #undef __FUNCT__ 2375 #define __FUNCT__ "MatCreateMPIAIJ" 2376 /*@C 2377 MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format 2378 (the default parallel PETSc format). For good matrix assembly performance 2379 the user should preallocate the matrix storage by setting the parameters 2380 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 2381 performance can be increased by more than a factor of 50. 2382 2383 Collective on MPI_Comm 2384 2385 Input Parameters: 2386 + comm - MPI communicator 2387 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 2388 This value should be the same as the local size used in creating the 2389 y vector for the matrix-vector product y = Ax. 2390 . n - This value should be the same as the local size used in creating the 2391 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 2392 calculated if N is given) For square matrices n is almost always m. 2393 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 2394 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 2395 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 2396 (same value is used for all local rows) 2397 . d_nnz - array containing the number of nonzeros in the various rows of the 2398 DIAGONAL portion of the local submatrix (possibly different for each row) 2399 or PETSC_NULL, if d_nz is used to specify the nonzero structure. 2400 The size of this array is equal to the number of local rows, i.e 'm'. 2401 You must leave room for the diagonal entry even if it is zero. 2402 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 2403 submatrix (same value is used for all local rows). 2404 - o_nnz - array containing the number of nonzeros in the various rows of the 2405 OFF-DIAGONAL portion of the local submatrix (possibly different for 2406 each row) or PETSC_NULL, if o_nz is used to specify the nonzero 2407 structure. The size of this array is equal to the number 2408 of local rows, i.e 'm'. 2409 2410 Output Parameter: 2411 . A - the matrix 2412 2413 Notes: 2414 If the *_nnz parameter is given then the *_nz parameter is ignored 2415 2416 m,n,M,N parameters specify the size of the matrix, and its partitioning across 2417 processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate 2418 storage requirements for this matrix. 2419 2420 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one 2421 processor than it must be used on all processors that share the object for 2422 that argument. 2423 2424 The user MUST specify either the local or global matrix dimensions 2425 (possibly both). 2426 2427 The parallel matrix is partitioned such that the first m0 rows belong to 2428 process 0, the next m1 rows belong to process 1, the next m2 rows belong 2429 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 2430 2431 The DIAGONAL portion of the local submatrix of a processor can be defined 2432 as the submatrix which is obtained by extraction the part corresponding 2433 to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the 2434 first row that belongs to the processor, and r2 is the last row belonging 2435 to the this processor. This is a square mxm matrix. The remaining portion 2436 of the local submatrix (mxN) constitute the OFF-DIAGONAL portion. 2437 2438 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 2439 2440 When calling this routine with a single process communicator, a matrix of 2441 type SEQAIJ is returned. If a matrix of type MPIAIJ is desired for this 2442 type of communicator, use the construction mechanism: 2443 MatCreate(...,&A); MatSetType(A,MPIAIJ); MatMPIAIJSetPreallocation(A,...); 2444 2445 By default, this format uses inodes (identical nodes) when possible. 2446 We search for consecutive rows with the same nonzero structure, thereby 2447 reusing matrix information to achieve increased efficiency. 2448 2449 Options Database Keys: 2450 + -mat_no_inode - Do not use inodes 2451 . -mat_inode_limit <limit> - Sets inode limit (max limit=5) 2452 - -mat_aij_oneindex - Internally use indexing starting at 1 2453 rather than 0. Note that when calling MatSetValues(), 2454 the user still MUST index entries starting at 0! 2455 2456 2457 Example usage: 2458 2459 Consider the following 8x8 matrix with 34 non-zero values, that is 2460 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 2461 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 2462 as follows: 2463 2464 .vb 2465 1 2 0 | 0 3 0 | 0 4 2466 Proc0 0 5 6 | 7 0 0 | 8 0 2467 9 0 10 | 11 0 0 | 12 0 2468 ------------------------------------- 2469 13 0 14 | 15 16 17 | 0 0 2470 Proc1 0 18 0 | 19 20 21 | 0 0 2471 0 0 0 | 22 23 0 | 24 0 2472 ------------------------------------- 2473 Proc2 25 26 27 | 0 0 28 | 29 0 2474 30 0 0 | 31 32 33 | 0 34 2475 .ve 2476 2477 This can be represented as a collection of submatrices as: 2478 2479 .vb 2480 A B C 2481 D E F 2482 G H I 2483 .ve 2484 2485 Where the submatrices A,B,C are owned by proc0, D,E,F are 2486 owned by proc1, G,H,I are owned by proc2. 2487 2488 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 2489 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 2490 The 'M','N' parameters are 8,8, and have the same values on all procs. 2491 2492 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 2493 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 2494 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 2495 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 2496 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 2497 matrix, ans [DF] as another SeqAIJ matrix. 2498 2499 When d_nz, o_nz parameters are specified, d_nz storage elements are 2500 allocated for every row of the local diagonal submatrix, and o_nz 2501 storage locations are allocated for every row of the OFF-DIAGONAL submat. 2502 One way to choose d_nz and o_nz is to use the max nonzerors per local 2503 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 2504 In this case, the values of d_nz,o_nz are: 2505 .vb 2506 proc0 : dnz = 2, o_nz = 2 2507 proc1 : dnz = 3, o_nz = 2 2508 proc2 : dnz = 1, o_nz = 4 2509 .ve 2510 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 2511 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 2512 for proc3. i.e we are using 12+15+10=37 storage locations to store 2513 34 values. 2514 2515 When d_nnz, o_nnz parameters are specified, the storage is specified 2516 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 2517 In the above case the values for d_nnz,o_nnz are: 2518 .vb 2519 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 2520 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 2521 proc2: d_nnz = [1,1] and o_nnz = [4,4] 2522 .ve 2523 Here the space allocated is sum of all the above values i.e 34, and 2524 hence pre-allocation is perfect. 2525 2526 Level: intermediate 2527 2528 .keywords: matrix, aij, compressed row, sparse, parallel 2529 2530 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 2531 MPIAIJ 2532 @*/ 2533 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 2534 { 2535 PetscErrorCode ierr; 2536 PetscMPIInt size; 2537 2538 PetscFunctionBegin; 2539 ierr = MatCreate(comm,m,n,M,N,A);CHKERRQ(ierr); 2540 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2541 if (size > 1) { 2542 ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr); 2543 ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 2544 } else { 2545 ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr); 2546 ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr); 2547 } 2548 PetscFunctionReturn(0); 2549 } 2550 2551 #undef __FUNCT__ 2552 #define __FUNCT__ "MatMPIAIJGetSeqAIJ" 2553 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,PetscInt *colmap[]) 2554 { 2555 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2556 2557 PetscFunctionBegin; 2558 *Ad = a->A; 2559 *Ao = a->B; 2560 *colmap = a->garray; 2561 PetscFunctionReturn(0); 2562 } 2563 2564 #undef __FUNCT__ 2565 #define __FUNCT__ "MatSetColoring_MPIAIJ" 2566 PetscErrorCode MatSetColoring_MPIAIJ(Mat A,ISColoring coloring) 2567 { 2568 PetscErrorCode ierr; 2569 PetscInt i; 2570 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2571 2572 PetscFunctionBegin; 2573 if (coloring->ctype == IS_COLORING_LOCAL) { 2574 ISColoringValue *allcolors,*colors; 2575 ISColoring ocoloring; 2576 2577 /* set coloring for diagonal portion */ 2578 ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr); 2579 2580 /* set coloring for off-diagonal portion */ 2581 ierr = ISAllGatherColors(A->comm,coloring->n,coloring->colors,PETSC_NULL,&allcolors);CHKERRQ(ierr); 2582 ierr = PetscMalloc((a->B->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 2583 for (i=0; i<a->B->n; i++) { 2584 colors[i] = allcolors[a->garray[i]]; 2585 } 2586 ierr = PetscFree(allcolors);CHKERRQ(ierr); 2587 ierr = ISColoringCreate(MPI_COMM_SELF,a->B->n,colors,&ocoloring);CHKERRQ(ierr); 2588 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 2589 ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr); 2590 } else if (coloring->ctype == IS_COLORING_GHOSTED) { 2591 ISColoringValue *colors; 2592 PetscInt *larray; 2593 ISColoring ocoloring; 2594 2595 /* set coloring for diagonal portion */ 2596 ierr = PetscMalloc((a->A->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr); 2597 for (i=0; i<a->A->n; i++) { 2598 larray[i] = i + a->cstart; 2599 } 2600 ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->A->n,larray,PETSC_NULL,larray);CHKERRQ(ierr); 2601 ierr = PetscMalloc((a->A->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 2602 for (i=0; i<a->A->n; i++) { 2603 colors[i] = coloring->colors[larray[i]]; 2604 } 2605 ierr = PetscFree(larray);CHKERRQ(ierr); 2606 ierr = ISColoringCreate(PETSC_COMM_SELF,a->A->n,colors,&ocoloring);CHKERRQ(ierr); 2607 ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr); 2608 ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr); 2609 2610 /* set coloring for off-diagonal portion */ 2611 ierr = PetscMalloc((a->B->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr); 2612 ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->B->n,a->garray,PETSC_NULL,larray);CHKERRQ(ierr); 2613 ierr = PetscMalloc((a->B->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 2614 for (i=0; i<a->B->n; i++) { 2615 colors[i] = coloring->colors[larray[i]]; 2616 } 2617 ierr = PetscFree(larray);CHKERRQ(ierr); 2618 ierr = ISColoringCreate(MPI_COMM_SELF,a->B->n,colors,&ocoloring);CHKERRQ(ierr); 2619 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 2620 ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr); 2621 } else { 2622 SETERRQ1(PETSC_ERR_SUP,"No support ISColoringType %d",(int)coloring->ctype); 2623 } 2624 2625 PetscFunctionReturn(0); 2626 } 2627 2628 #if defined(PETSC_HAVE_ADIC) 2629 #undef __FUNCT__ 2630 #define __FUNCT__ "MatSetValuesAdic_MPIAIJ" 2631 PetscErrorCode MatSetValuesAdic_MPIAIJ(Mat A,void *advalues) 2632 { 2633 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2634 PetscErrorCode ierr; 2635 2636 PetscFunctionBegin; 2637 ierr = MatSetValuesAdic_SeqAIJ(a->A,advalues);CHKERRQ(ierr); 2638 ierr = MatSetValuesAdic_SeqAIJ(a->B,advalues);CHKERRQ(ierr); 2639 PetscFunctionReturn(0); 2640 } 2641 #endif 2642 2643 #undef __FUNCT__ 2644 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ" 2645 PetscErrorCode MatSetValuesAdifor_MPIAIJ(Mat A,PetscInt nl,void *advalues) 2646 { 2647 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2648 PetscErrorCode ierr; 2649 2650 PetscFunctionBegin; 2651 ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr); 2652 ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr); 2653 PetscFunctionReturn(0); 2654 } 2655 2656 #undef __FUNCT__ 2657 #define __FUNCT__ "MatMerge" 2658 /*@C 2659 MatMerge - Creates a single large PETSc matrix by concatinating sequential 2660 matrices from each processor 2661 2662 Collective on MPI_Comm 2663 2664 Input Parameters: 2665 + comm - the communicators the parallel matrix will live on 2666 . inmat - the input sequential matrices 2667 . n - number of local columns (or PETSC_DECIDE) 2668 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 2669 2670 Output Parameter: 2671 . outmat - the parallel matrix generated 2672 2673 Level: advanced 2674 2675 Notes: The number of columns of the matrix in EACH processor MUST be the same. 2676 2677 @*/ 2678 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat) 2679 { 2680 PetscErrorCode ierr; 2681 PetscInt m,N,i,rstart,nnz,I,*dnz,*onz; 2682 PetscInt *indx; 2683 PetscScalar *values; 2684 PetscMap columnmap,rowmap; 2685 2686 PetscFunctionBegin; 2687 ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr); 2688 /* 2689 PetscMPIInt rank; 2690 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2691 ierr = PetscPrintf(PETSC_COMM_SELF," [%d] inmat m=%d, n=%d, N=%d\n",rank,m,n,N); 2692 */ 2693 if (scall == MAT_INITIAL_MATRIX){ 2694 /* count nonzeros in each row, for diagonal and off diagonal portion of matrix */ 2695 if (n == PETSC_DECIDE){ 2696 ierr = PetscMapCreate(comm,&columnmap);CHKERRQ(ierr); 2697 ierr = PetscMapSetSize(columnmap,N);CHKERRQ(ierr); 2698 ierr = PetscMapSetType(columnmap,MAP_MPI);CHKERRQ(ierr); 2699 ierr = PetscMapGetLocalSize(columnmap,&n);CHKERRQ(ierr); 2700 ierr = PetscMapDestroy(columnmap);CHKERRQ(ierr); 2701 } 2702 2703 ierr = PetscMapCreate(comm,&rowmap);CHKERRQ(ierr); 2704 ierr = PetscMapSetLocalSize(rowmap,m);CHKERRQ(ierr); 2705 ierr = PetscMapSetType(rowmap,MAP_MPI);CHKERRQ(ierr); 2706 ierr = PetscMapGetLocalRange(rowmap,&rstart,0);CHKERRQ(ierr); 2707 ierr = PetscMapDestroy(rowmap);CHKERRQ(ierr); 2708 2709 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 2710 for (i=0;i<m;i++) { 2711 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr); 2712 ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr); 2713 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr); 2714 } 2715 /* This routine will ONLY return MPIAIJ type matrix */ 2716 ierr = MatCreate(comm,m,n,PETSC_DETERMINE,PETSC_DETERMINE,outmat);CHKERRQ(ierr); 2717 ierr = MatSetType(*outmat,MATMPIAIJ);CHKERRQ(ierr); 2718 ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr); 2719 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 2720 2721 } else if (scall == MAT_REUSE_MATRIX){ 2722 ierr = MatGetOwnershipRange(*outmat,&rstart,PETSC_NULL);CHKERRQ(ierr); 2723 } else { 2724 SETERRQ1(PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall); 2725 } 2726 2727 for (i=0;i<m;i++) { 2728 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 2729 I = i + rstart; 2730 ierr = MatSetValues(*outmat,1,&I,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 2731 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 2732 } 2733 ierr = MatDestroy(inmat);CHKERRQ(ierr); 2734 ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2735 ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2736 2737 PetscFunctionReturn(0); 2738 } 2739 2740 #undef __FUNCT__ 2741 #define __FUNCT__ "MatFileSplit" 2742 PetscErrorCode MatFileSplit(Mat A,char *outfile) 2743 { 2744 PetscErrorCode ierr; 2745 PetscMPIInt rank; 2746 PetscInt m,N,i,rstart,nnz; 2747 size_t len; 2748 const PetscInt *indx; 2749 PetscViewer out; 2750 char *name; 2751 Mat B; 2752 const PetscScalar *values; 2753 2754 PetscFunctionBegin; 2755 ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr); 2756 ierr = MatGetSize(A,0,&N);CHKERRQ(ierr); 2757 /* Should this be the type of the diagonal block of A? */ 2758 ierr = MatCreate(PETSC_COMM_SELF,m,N,m,N,&B);CHKERRQ(ierr); 2759 ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr); 2760 ierr = MatSeqAIJSetPreallocation(B,0,PETSC_NULL);CHKERRQ(ierr); 2761 ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr); 2762 for (i=0;i<m;i++) { 2763 ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 2764 ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 2765 ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 2766 } 2767 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2768 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2769 2770 ierr = MPI_Comm_rank(A->comm,&rank);CHKERRQ(ierr); 2771 ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr); 2772 ierr = PetscMalloc((len+5)*sizeof(char),&name);CHKERRQ(ierr); 2773 sprintf(name,"%s.%d",outfile,rank); 2774 ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,PETSC_FILE_CREATE,&out);CHKERRQ(ierr); 2775 ierr = PetscFree(name); 2776 ierr = MatView(B,out);CHKERRQ(ierr); 2777 ierr = PetscViewerDestroy(out);CHKERRQ(ierr); 2778 ierr = MatDestroy(B);CHKERRQ(ierr); 2779 PetscFunctionReturn(0); 2780 } 2781 2782 EXTERN PetscErrorCode MatDestroy_MPIAIJ(Mat); 2783 #undef __FUNCT__ 2784 #define __FUNCT__ "MatDestroy_MPIAIJ_SeqsToMPI" 2785 PetscErrorCode PETSCMAT_DLLEXPORT MatDestroy_MPIAIJ_SeqsToMPI(Mat A) 2786 { 2787 PetscErrorCode ierr; 2788 Mat_Merge_SeqsToMPI *merge; 2789 PetscObjectContainer container; 2790 2791 PetscFunctionBegin; 2792 ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr); 2793 if (container) { 2794 ierr = PetscObjectContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr); 2795 ierr = PetscFree(merge->id_r);CHKERRQ(ierr); 2796 ierr = PetscFree(merge->len_s);CHKERRQ(ierr); 2797 ierr = PetscFree(merge->len_r);CHKERRQ(ierr); 2798 ierr = PetscFree(merge->bi);CHKERRQ(ierr); 2799 ierr = PetscFree(merge->bj);CHKERRQ(ierr); 2800 ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr); 2801 ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr); 2802 ierr = PetscMapDestroy(merge->rowmap);CHKERRQ(ierr); 2803 if (merge->coi){ierr = PetscFree(merge->coi);CHKERRQ(ierr);} 2804 if (merge->coj){ierr = PetscFree(merge->coj);CHKERRQ(ierr);} 2805 if (merge->owners_co){ierr = PetscFree(merge->owners_co);CHKERRQ(ierr);} 2806 2807 ierr = PetscObjectContainerDestroy(container);CHKERRQ(ierr); 2808 ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr); 2809 } 2810 ierr = PetscFree(merge);CHKERRQ(ierr); 2811 2812 ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr); 2813 PetscFunctionReturn(0); 2814 } 2815 2816 #include "src/mat/utils/freespace.h" 2817 #include "petscbt.h" 2818 static PetscEvent logkey_seqstompinum = 0; 2819 #undef __FUNCT__ 2820 #define __FUNCT__ "MatMerge_SeqsToMPINumeric" 2821 /*@C 2822 MatMerge_SeqsToMPI - Creates a MPIAIJ matrix by adding sequential 2823 matrices from each processor 2824 2825 Collective on MPI_Comm 2826 2827 Input Parameters: 2828 + comm - the communicators the parallel matrix will live on 2829 . seqmat - the input sequential matrices 2830 . m - number of local rows (or PETSC_DECIDE) 2831 . n - number of local columns (or PETSC_DECIDE) 2832 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 2833 2834 Output Parameter: 2835 . mpimat - the parallel matrix generated 2836 2837 Level: advanced 2838 2839 Notes: 2840 The dimensions of the sequential matrix in each processor MUST be the same. 2841 The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be 2842 destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat. 2843 @*/ 2844 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPINumeric(Mat seqmat,Mat mpimat) 2845 { 2846 PetscErrorCode ierr; 2847 MPI_Comm comm=mpimat->comm; 2848 Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data; 2849 PetscMPIInt size,rank,taga,*len_s; 2850 PetscInt N=mpimat->N,i,j,*owners,*ai=a->i,*aj=a->j; 2851 PetscInt proc,m; 2852 PetscInt **buf_ri,**buf_rj; 2853 PetscInt k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj; 2854 PetscInt nrows,**buf_ri_k,**nextrow,**nextai; 2855 MPI_Request *s_waits,*r_waits; 2856 MPI_Status *status; 2857 MatScalar *aa=a->a,**abuf_r,*ba_i; 2858 Mat_Merge_SeqsToMPI *merge; 2859 PetscObjectContainer container; 2860 2861 PetscFunctionBegin; 2862 if (!logkey_seqstompinum) { 2863 ierr = PetscLogEventRegister(&logkey_seqstompinum,"MatMerge_SeqsToMPINumeric",MAT_COOKIE); 2864 } 2865 ierr = PetscLogEventBegin(logkey_seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 2866 2867 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2868 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2869 2870 ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr); 2871 if (container) { 2872 ierr = PetscObjectContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr); 2873 } 2874 bi = merge->bi; 2875 bj = merge->bj; 2876 buf_ri = merge->buf_ri; 2877 buf_rj = merge->buf_rj; 2878 2879 ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr); 2880 ierr = PetscMapGetGlobalRange(merge->rowmap,&owners);CHKERRQ(ierr); 2881 len_s = merge->len_s; 2882 2883 /* send and recv matrix values */ 2884 /*-----------------------------*/ 2885 ierr = PetscObjectGetNewTag((PetscObject)merge->rowmap,&taga);CHKERRQ(ierr); 2886 ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr); 2887 2888 ierr = PetscMalloc((merge->nsend+1)*sizeof(MPI_Request),&s_waits);CHKERRQ(ierr); 2889 for (proc=0,k=0; proc<size; proc++){ 2890 if (!len_s[proc]) continue; 2891 i = owners[proc]; 2892 ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr); 2893 k++; 2894 } 2895 2896 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);} 2897 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);} 2898 ierr = PetscFree(status);CHKERRQ(ierr); 2899 2900 ierr = PetscFree(s_waits);CHKERRQ(ierr); 2901 ierr = PetscFree(r_waits);CHKERRQ(ierr); 2902 2903 /* insert mat values of mpimat */ 2904 /*----------------------------*/ 2905 ierr = PetscMalloc(N*sizeof(MatScalar),&ba_i);CHKERRQ(ierr); 2906 ierr = PetscMalloc((3*merge->nrecv+1)*sizeof(PetscInt**),&buf_ri_k);CHKERRQ(ierr); 2907 nextrow = buf_ri_k + merge->nrecv; 2908 nextai = nextrow + merge->nrecv; 2909 2910 for (k=0; k<merge->nrecv; k++){ 2911 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 2912 nrows = *(buf_ri_k[k]); 2913 nextrow[k] = buf_ri_k[k]+1; /* next row number of k-th recved i-structure */ 2914 nextai[k] = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure */ 2915 } 2916 2917 /* set values of ba */ 2918 ierr = PetscMapGetLocalSize(merge->rowmap,&m);CHKERRQ(ierr); 2919 for (i=0; i<m; i++) { 2920 arow = owners[rank] + i; 2921 bj_i = bj+bi[i]; /* col indices of the i-th row of mpimat */ 2922 bnzi = bi[i+1] - bi[i]; 2923 ierr = PetscMemzero(ba_i,bnzi*sizeof(MatScalar));CHKERRQ(ierr); 2924 2925 /* add local non-zero vals of this proc's seqmat into ba */ 2926 anzi = ai[arow+1] - ai[arow]; 2927 aj = a->j + ai[arow]; 2928 aa = a->a + ai[arow]; 2929 nextaj = 0; 2930 for (j=0; nextaj<anzi; j++){ 2931 if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */ 2932 ba_i[j] += aa[nextaj++]; 2933 } 2934 } 2935 2936 /* add received vals into ba */ 2937 for (k=0; k<merge->nrecv; k++){ /* k-th received message */ 2938 /* i-th row */ 2939 if (i == *nextrow[k]) { 2940 anzi = *(nextai[k]+1) - *nextai[k]; 2941 aj = buf_rj[k] + *(nextai[k]); 2942 aa = abuf_r[k] + *(nextai[k]); 2943 nextaj = 0; 2944 for (j=0; nextaj<anzi; j++){ 2945 if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */ 2946 ba_i[j] += aa[nextaj++]; 2947 } 2948 } 2949 nextrow[k]++; nextai[k]++; 2950 } 2951 } 2952 ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr); 2953 } 2954 ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2955 ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2956 2957 ierr = PetscFree(abuf_r);CHKERRQ(ierr); 2958 ierr = PetscFree(ba_i);CHKERRQ(ierr); 2959 ierr = PetscFree(buf_ri_k);CHKERRQ(ierr); 2960 ierr = PetscLogEventEnd(logkey_seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 2961 PetscFunctionReturn(0); 2962 } 2963 2964 static PetscEvent logkey_seqstompisym = 0; 2965 #undef __FUNCT__ 2966 #define __FUNCT__ "MatMerge_SeqsToMPISymbolic" 2967 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPISymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat) 2968 { 2969 PetscErrorCode ierr; 2970 Mat B_mpi; 2971 Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data; 2972 PetscMPIInt size,rank,tagi,tagj,*len_s,*len_si,*len_ri; 2973 PetscInt **buf_rj,**buf_ri,**buf_ri_k; 2974 PetscInt M=seqmat->m,N=seqmat->n,i,*owners,*ai=a->i,*aj=a->j; 2975 PetscInt len,proc,*dnz,*onz; 2976 PetscInt k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0; 2977 PetscInt nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai; 2978 MPI_Request *si_waits,*sj_waits,*ri_waits,*rj_waits; 2979 MPI_Status *status; 2980 FreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 2981 PetscBT lnkbt; 2982 Mat_Merge_SeqsToMPI *merge; 2983 PetscObjectContainer container; 2984 2985 PetscFunctionBegin; 2986 if (!logkey_seqstompisym) { 2987 ierr = PetscLogEventRegister(&logkey_seqstompisym,"MatMerge_SeqsToMPISymbolic",MAT_COOKIE); 2988 } 2989 ierr = PetscLogEventBegin(logkey_seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 2990 2991 /* make sure it is a PETSc comm */ 2992 ierr = PetscCommDuplicate(comm,&comm,PETSC_NULL);CHKERRQ(ierr); 2993 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2994 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2995 2996 ierr = PetscNew(Mat_Merge_SeqsToMPI,&merge);CHKERRQ(ierr); 2997 ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr); 2998 2999 /* determine row ownership */ 3000 /*---------------------------------------------------------*/ 3001 ierr = PetscMapCreate(comm,&merge->rowmap);CHKERRQ(ierr); 3002 if (m == PETSC_DECIDE) { 3003 ierr = PetscMapSetSize(merge->rowmap,M);CHKERRQ(ierr); 3004 } else { 3005 ierr = PetscMapSetLocalSize(merge->rowmap,m);CHKERRQ(ierr); 3006 } 3007 ierr = PetscMapSetType(merge->rowmap,MAP_MPI);CHKERRQ(ierr); 3008 ierr = PetscMalloc(size*sizeof(PetscMPIInt),&len_si);CHKERRQ(ierr); 3009 ierr = PetscMalloc(size*sizeof(PetscMPIInt),&merge->len_s);CHKERRQ(ierr); 3010 3011 if (m == PETSC_DECIDE) {ierr = PetscMapGetLocalSize(merge->rowmap,&m);CHKERRQ(ierr); } 3012 ierr = PetscMapGetGlobalRange(merge->rowmap,&owners);CHKERRQ(ierr); 3013 3014 /* determine the number of messages to send, their lengths */ 3015 /*---------------------------------------------------------*/ 3016 len_s = merge->len_s; 3017 3018 len = 0; /* length of buf_si[] */ 3019 merge->nsend = 0; 3020 for (proc=0; proc<size; proc++){ 3021 len_si[proc] = 0; 3022 if (proc == rank){ 3023 len_s[proc] = 0; 3024 } else { 3025 len_si[proc] = owners[proc+1] - owners[proc] + 1; 3026 len_s[proc] = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */ 3027 } 3028 if (len_s[proc]) { 3029 merge->nsend++; 3030 nrows = 0; 3031 for (i=owners[proc]; i<owners[proc+1]; i++){ 3032 if (ai[i+1] > ai[i]) nrows++; 3033 } 3034 len_si[proc] = 2*(nrows+1); 3035 len += len_si[proc]; 3036 } 3037 } 3038 3039 /* determine the number and length of messages to receive for ij-structure */ 3040 /*-------------------------------------------------------------------------*/ 3041 ierr = PetscGatherNumberOfMessages(comm,PETSC_NULL,len_s,&merge->nrecv);CHKERRQ(ierr); 3042 ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr); 3043 3044 /* post the Irecv of j-structure */ 3045 /*-------------------------------*/ 3046 ierr = PetscObjectGetNewTag((PetscObject)merge->rowmap,&tagj);CHKERRQ(ierr); 3047 ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr); 3048 3049 /* post the Isend of j-structure */ 3050 /*--------------------------------*/ 3051 ierr = PetscMalloc((2*merge->nsend+1)*sizeof(MPI_Request),&si_waits);CHKERRQ(ierr); 3052 sj_waits = si_waits + merge->nsend; 3053 3054 for (proc=0, k=0; proc<size; proc++){ 3055 if (!len_s[proc]) continue; 3056 i = owners[proc]; 3057 ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr); 3058 k++; 3059 } 3060 3061 /* receives and sends of j-structure are complete */ 3062 /*------------------------------------------------*/ 3063 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);} 3064 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);} 3065 3066 /* send and recv i-structure */ 3067 /*---------------------------*/ 3068 ierr = PetscObjectGetNewTag((PetscObject)merge->rowmap,&tagi);CHKERRQ(ierr); 3069 ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr); 3070 3071 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&buf_s);CHKERRQ(ierr); 3072 buf_si = buf_s; /* points to the beginning of k-th msg to be sent */ 3073 for (proc=0,k=0; proc<size; proc++){ 3074 if (!len_s[proc]) continue; 3075 /* form outgoing message for i-structure: 3076 buf_si[0]: nrows to be sent 3077 [1:nrows]: row index (global) 3078 [nrows+1:2*nrows+1]: i-structure index 3079 */ 3080 /*-------------------------------------------*/ 3081 nrows = len_si[proc]/2 - 1; 3082 buf_si_i = buf_si + nrows+1; 3083 buf_si[0] = nrows; 3084 buf_si_i[0] = 0; 3085 nrows = 0; 3086 for (i=owners[proc]; i<owners[proc+1]; i++){ 3087 anzi = ai[i+1] - ai[i]; 3088 if (anzi) { 3089 buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */ 3090 buf_si[nrows+1] = i-owners[proc]; /* local row index */ 3091 nrows++; 3092 } 3093 } 3094 ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr); 3095 k++; 3096 buf_si += len_si[proc]; 3097 } 3098 3099 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);} 3100 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);} 3101 3102 ierr = PetscLogInfo(((PetscObject)(seqmat),"MatMerge_SeqsToMPI: nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv));CHKERRQ(ierr); 3103 for (i=0; i<merge->nrecv; i++){ 3104 ierr = PetscLogInfo(((PetscObject)(seqmat),"MatMerge_SeqsToMPI: recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]));CHKERRQ(ierr); 3105 } 3106 3107 ierr = PetscFree(len_si);CHKERRQ(ierr); 3108 ierr = PetscFree(len_ri);CHKERRQ(ierr); 3109 ierr = PetscFree(rj_waits);CHKERRQ(ierr); 3110 ierr = PetscFree(si_waits);CHKERRQ(ierr); 3111 ierr = PetscFree(ri_waits);CHKERRQ(ierr); 3112 ierr = PetscFree(buf_s);CHKERRQ(ierr); 3113 ierr = PetscFree(status);CHKERRQ(ierr); 3114 3115 /* compute a local seq matrix in each processor */ 3116 /*----------------------------------------------*/ 3117 /* allocate bi array and free space for accumulating nonzero column info */ 3118 ierr = PetscMalloc((m+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr); 3119 bi[0] = 0; 3120 3121 /* create and initialize a linked list */ 3122 nlnk = N+1; 3123 ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 3124 3125 /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */ 3126 len = 0; 3127 len = ai[owners[rank+1]] - ai[owners[rank]]; 3128 ierr = GetMoreSpace((PetscInt)(2*len+1),&free_space);CHKERRQ(ierr); 3129 current_space = free_space; 3130 3131 /* determine symbolic info for each local row */ 3132 ierr = PetscMalloc((3*merge->nrecv+1)*sizeof(PetscInt**),&buf_ri_k);CHKERRQ(ierr); 3133 nextrow = buf_ri_k + merge->nrecv; 3134 nextai = nextrow + merge->nrecv; 3135 for (k=0; k<merge->nrecv; k++){ 3136 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 3137 nrows = *buf_ri_k[k]; 3138 nextrow[k] = buf_ri_k[k] + 1; /* next row number of k-th recved i-structure */ 3139 nextai[k] = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure */ 3140 } 3141 3142 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 3143 len = 0; 3144 for (i=0;i<m;i++) { 3145 bnzi = 0; 3146 /* add local non-zero cols of this proc's seqmat into lnk */ 3147 arow = owners[rank] + i; 3148 anzi = ai[arow+1] - ai[arow]; 3149 aj = a->j + ai[arow]; 3150 ierr = PetscLLAdd(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 3151 bnzi += nlnk; 3152 /* add received col data into lnk */ 3153 for (k=0; k<merge->nrecv; k++){ /* k-th received message */ 3154 if (i == *nextrow[k]) { /* i-th row */ 3155 anzi = *(nextai[k]+1) - *nextai[k]; 3156 aj = buf_rj[k] + *nextai[k]; 3157 ierr = PetscLLAdd(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 3158 bnzi += nlnk; 3159 nextrow[k]++; nextai[k]++; 3160 } 3161 } 3162 if (len < bnzi) len = bnzi; /* =max(bnzi) */ 3163 3164 /* if free space is not available, make more free space */ 3165 if (current_space->local_remaining<bnzi) { 3166 ierr = GetMoreSpace(current_space->total_array_size,¤t_space);CHKERRQ(ierr); 3167 nspacedouble++; 3168 } 3169 /* copy data into free space, then initialize lnk */ 3170 ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 3171 ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr); 3172 3173 current_space->array += bnzi; 3174 current_space->local_used += bnzi; 3175 current_space->local_remaining -= bnzi; 3176 3177 bi[i+1] = bi[i] + bnzi; 3178 } 3179 3180 ierr = PetscFree(buf_ri_k);CHKERRQ(ierr); 3181 3182 ierr = PetscMalloc((bi[m]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr); 3183 ierr = MakeSpaceContiguous(&free_space,bj);CHKERRQ(ierr); 3184 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 3185 3186 /* create symbolic parallel matrix B_mpi */ 3187 /*---------------------------------------*/ 3188 if (n==PETSC_DECIDE) { 3189 ierr = MatCreate(comm,m,n,PETSC_DETERMINE,N,&B_mpi);CHKERRQ(ierr); 3190 } else { 3191 ierr = MatCreate(comm,m,n,PETSC_DETERMINE,PETSC_DETERMINE,&B_mpi);CHKERRQ(ierr); 3192 } 3193 ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr); 3194 ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr); 3195 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 3196 3197 /* B_mpi is not ready for use - assembly will be done by MatMerge_SeqsToMPINumeric() */ 3198 B_mpi->assembled = PETSC_FALSE; 3199 B_mpi->ops->destroy = MatDestroy_MPIAIJ_SeqsToMPI; 3200 merge->bi = bi; 3201 merge->bj = bj; 3202 merge->buf_ri = buf_ri; 3203 merge->buf_rj = buf_rj; 3204 merge->coi = PETSC_NULL; 3205 merge->coj = PETSC_NULL; 3206 merge->owners_co = PETSC_NULL; 3207 3208 /* attach the supporting struct to B_mpi for reuse */ 3209 ierr = PetscObjectContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr); 3210 ierr = PetscObjectContainerSetPointer(container,merge);CHKERRQ(ierr); 3211 ierr = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr); 3212 *mpimat = B_mpi; 3213 3214 ierr = PetscCommDestroy(&comm);CHKERRQ(ierr); 3215 ierr = PetscLogEventEnd(logkey_seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 3216 PetscFunctionReturn(0); 3217 } 3218 3219 static PetscEvent logkey_seqstompi = 0; 3220 #undef __FUNCT__ 3221 #define __FUNCT__ "MatMerge_SeqsToMPI" 3222 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPI(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat) 3223 { 3224 PetscErrorCode ierr; 3225 3226 PetscFunctionBegin; 3227 if (!logkey_seqstompi) { 3228 ierr = PetscLogEventRegister(&logkey_seqstompi,"MatMerge_SeqsToMPI",MAT_COOKIE); 3229 } 3230 ierr = PetscLogEventBegin(logkey_seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 3231 if (scall == MAT_INITIAL_MATRIX){ 3232 ierr = MatMerge_SeqsToMPISymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr); 3233 } 3234 ierr = MatMerge_SeqsToMPINumeric(seqmat,*mpimat);CHKERRQ(ierr); 3235 ierr = PetscLogEventEnd(logkey_seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 3236 PetscFunctionReturn(0); 3237 } 3238 static PetscEvent logkey_getlocalmat = 0; 3239 #undef __FUNCT__ 3240 #define __FUNCT__ "MatGetLocalMat" 3241 /*@C 3242 MatGetLocalMat - Creates a SeqAIJ matrix by taking all its local rows 3243 3244 Not Collective 3245 3246 Input Parameters: 3247 + A - the matrix 3248 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 3249 3250 Output Parameter: 3251 . A_loc - the local sequential matrix generated 3252 3253 Level: developer 3254 3255 @*/ 3256 PetscErrorCode PETSCMAT_DLLEXPORT MatGetLocalMat(Mat A,MatReuse scall,Mat *A_loc) 3257 { 3258 PetscErrorCode ierr; 3259 Mat_MPIAIJ *mpimat=(Mat_MPIAIJ*)A->data; 3260 Mat_SeqAIJ *mat,*a=(Mat_SeqAIJ*)(mpimat->A)->data,*b=(Mat_SeqAIJ*)(mpimat->B)->data; 3261 PetscInt *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*cmap=mpimat->garray; 3262 PetscScalar *aa=a->a,*ba=b->a,*ca; 3263 PetscInt am=A->m,i,j,k,cstart=mpimat->cstart; 3264 PetscInt *ci,*cj,col,ncols_d,ncols_o,jo; 3265 3266 PetscFunctionBegin; 3267 if (!logkey_getlocalmat) { 3268 ierr = PetscLogEventRegister(&logkey_getlocalmat,"MatGetLocalMat",MAT_COOKIE); 3269 } 3270 ierr = PetscLogEventBegin(logkey_getlocalmat,A,0,0,0);CHKERRQ(ierr); 3271 if (scall == MAT_INITIAL_MATRIX){ 3272 ierr = PetscMalloc((1+am)*sizeof(PetscInt),&ci);CHKERRQ(ierr); 3273 ci[0] = 0; 3274 for (i=0; i<am; i++){ 3275 ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]); 3276 } 3277 ierr = PetscMalloc((1+ci[am])*sizeof(PetscInt),&cj);CHKERRQ(ierr); 3278 ierr = PetscMalloc((1+ci[am])*sizeof(PetscScalar),&ca);CHKERRQ(ierr); 3279 k = 0; 3280 for (i=0; i<am; i++) { 3281 ncols_o = bi[i+1] - bi[i]; 3282 ncols_d = ai[i+1] - ai[i]; 3283 /* off-diagonal portion of A */ 3284 for (jo=0; jo<ncols_o; jo++) { 3285 col = cmap[*bj]; 3286 if (col >= cstart) break; 3287 cj[k] = col; bj++; 3288 ca[k++] = *ba++; 3289 } 3290 /* diagonal portion of A */ 3291 for (j=0; j<ncols_d; j++) { 3292 cj[k] = cstart + *aj++; 3293 ca[k++] = *aa++; 3294 } 3295 /* off-diagonal portion of A */ 3296 for (j=jo; j<ncols_o; j++) { 3297 cj[k] = cmap[*bj++]; 3298 ca[k++] = *ba++; 3299 } 3300 } 3301 /* put together the new matrix */ 3302 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->N,ci,cj,ca,A_loc);CHKERRQ(ierr); 3303 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 3304 /* Since these are PETSc arrays, change flags to free them as necessary. */ 3305 mat = (Mat_SeqAIJ*)(*A_loc)->data; 3306 mat->freedata = PETSC_TRUE; 3307 mat->nonew = 0; 3308 } else if (scall == MAT_REUSE_MATRIX){ 3309 mat=(Mat_SeqAIJ*)(*A_loc)->data; 3310 ci = mat->i; cj = mat->j; ca = mat->a; 3311 for (i=0; i<am; i++) { 3312 /* off-diagonal portion of A */ 3313 ncols_o = bi[i+1] - bi[i]; 3314 for (jo=0; jo<ncols_o; jo++) { 3315 col = cmap[*bj]; 3316 if (col >= cstart) break; 3317 *ca++ = *ba++; bj++; 3318 } 3319 /* diagonal portion of A */ 3320 ncols_d = ai[i+1] - ai[i]; 3321 for (j=0; j<ncols_d; j++) *ca++ = *aa++; 3322 /* off-diagonal portion of A */ 3323 for (j=jo; j<ncols_o; j++) { 3324 *ca++ = *ba++; bj++; 3325 } 3326 } 3327 } else { 3328 SETERRQ1(PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall); 3329 } 3330 3331 ierr = PetscLogEventEnd(logkey_getlocalmat,A,0,0,0);CHKERRQ(ierr); 3332 PetscFunctionReturn(0); 3333 } 3334 3335 static PetscEvent logkey_getlocalmatcondensed = 0; 3336 #undef __FUNCT__ 3337 #define __FUNCT__ "MatGetLocalMatCondensed" 3338 /*@C 3339 MatGetLocalMatCondensed - Creates a SeqAIJ matrix by taking all its local rows and NON-ZERO columns 3340 3341 Not Collective 3342 3343 Input Parameters: 3344 + A - the matrix 3345 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 3346 - row, col - index sets of rows and columns to extract (or PETSC_NULL) 3347 3348 Output Parameter: 3349 . A_loc - the local sequential matrix generated 3350 3351 Level: developer 3352 3353 @*/ 3354 PetscErrorCode PETSCMAT_DLLEXPORT MatGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc) 3355 { 3356 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 3357 PetscErrorCode ierr; 3358 PetscInt i,start,end,ncols,nzA,nzB,*cmap,imark,*idx; 3359 IS isrowa,iscola; 3360 Mat *aloc; 3361 3362 PetscFunctionBegin; 3363 if (!logkey_getlocalmatcondensed) { 3364 ierr = PetscLogEventRegister(&logkey_getlocalmatcondensed,"MatGetLocalMatCondensed",MAT_COOKIE); 3365 } 3366 ierr = PetscLogEventBegin(logkey_getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 3367 if (!row){ 3368 start = a->rstart; end = a->rend; 3369 ierr = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr); 3370 } else { 3371 isrowa = *row; 3372 } 3373 if (!col){ 3374 start = a->cstart; 3375 cmap = a->garray; 3376 nzA = a->A->n; 3377 nzB = a->B->n; 3378 ierr = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr); 3379 ncols = 0; 3380 for (i=0; i<nzB; i++) { 3381 if (cmap[i] < start) idx[ncols++] = cmap[i]; 3382 else break; 3383 } 3384 imark = i; 3385 for (i=0; i<nzA; i++) idx[ncols++] = start + i; 3386 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; 3387 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,&iscola);CHKERRQ(ierr); 3388 ierr = PetscFree(idx);CHKERRQ(ierr); 3389 } else { 3390 iscola = *col; 3391 } 3392 if (scall != MAT_INITIAL_MATRIX){ 3393 ierr = PetscMalloc(sizeof(Mat),&aloc);CHKERRQ(ierr); 3394 aloc[0] = *A_loc; 3395 } 3396 ierr = MatGetSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr); 3397 *A_loc = aloc[0]; 3398 ierr = PetscFree(aloc);CHKERRQ(ierr); 3399 if (!row){ 3400 ierr = ISDestroy(isrowa);CHKERRQ(ierr); 3401 } 3402 if (!col){ 3403 ierr = ISDestroy(iscola);CHKERRQ(ierr); 3404 } 3405 ierr = PetscLogEventEnd(logkey_getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 3406 PetscFunctionReturn(0); 3407 } 3408 3409 static PetscEvent logkey_GetBrowsOfAcols = 0; 3410 #undef __FUNCT__ 3411 #define __FUNCT__ "MatGetBrowsOfAcols" 3412 /*@C 3413 MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A 3414 3415 Collective on Mat 3416 3417 Input Parameters: 3418 + A,B - the matrices in mpiaij format 3419 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 3420 - rowb, colb - index sets of rows and columns of B to extract (or PETSC_NULL) 3421 3422 Output Parameter: 3423 + rowb, colb - index sets of rows and columns of B to extract 3424 . brstart - row index of B_seq from which next B->m rows are taken from B's local rows 3425 - B_seq - the sequential matrix generated 3426 3427 Level: developer 3428 3429 @*/ 3430 PetscErrorCode PETSCMAT_DLLEXPORT MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,PetscInt *brstart,Mat *B_seq) 3431 { 3432 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data,*b=(Mat_MPIAIJ*)B->data; 3433 PetscErrorCode ierr; 3434 PetscInt *idx,i,start,ncols,nzA,nzB,*cmap,imark; 3435 IS isrowb,iscolb; 3436 Mat *bseq; 3437 3438 PetscFunctionBegin; 3439 if (a->cstart != b->rstart || a->cend != b->rend){ 3440 SETERRQ4(PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",a->cstart,a->cend,b->rstart,b->rend); 3441 } 3442 if (!logkey_GetBrowsOfAcols) { 3443 ierr = PetscLogEventRegister(&logkey_GetBrowsOfAcols,"MatGetBrowsOfAcols",MAT_COOKIE); 3444 } 3445 ierr = PetscLogEventBegin(logkey_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 3446 3447 if (scall == MAT_INITIAL_MATRIX){ 3448 start = a->cstart; 3449 cmap = a->garray; 3450 nzA = a->A->n; 3451 nzB = a->B->n; 3452 ierr = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr); 3453 ncols = 0; 3454 for (i=0; i<nzB; i++) { /* row < local row index */ 3455 if (cmap[i] < start) idx[ncols++] = cmap[i]; 3456 else break; 3457 } 3458 imark = i; 3459 for (i=0; i<nzA; i++) idx[ncols++] = start + i; /* local rows */ 3460 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */ 3461 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,&isrowb);CHKERRQ(ierr); 3462 ierr = PetscFree(idx);CHKERRQ(ierr); 3463 *brstart = imark; 3464 ierr = ISCreateStride(PETSC_COMM_SELF,B->N,0,1,&iscolb);CHKERRQ(ierr); 3465 } else { 3466 if (!rowb || !colb) SETERRQ(PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX"); 3467 isrowb = *rowb; iscolb = *colb; 3468 ierr = PetscMalloc(sizeof(Mat),&bseq);CHKERRQ(ierr); 3469 bseq[0] = *B_seq; 3470 } 3471 ierr = MatGetSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr); 3472 *B_seq = bseq[0]; 3473 ierr = PetscFree(bseq);CHKERRQ(ierr); 3474 if (!rowb){ 3475 ierr = ISDestroy(isrowb);CHKERRQ(ierr); 3476 } else { 3477 *rowb = isrowb; 3478 } 3479 if (!colb){ 3480 ierr = ISDestroy(iscolb);CHKERRQ(ierr); 3481 } else { 3482 *colb = iscolb; 3483 } 3484 ierr = PetscLogEventEnd(logkey_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 3485 PetscFunctionReturn(0); 3486 } 3487 3488 static PetscEvent logkey_GetBrowsOfAocols = 0; 3489 #undef __FUNCT__ 3490 #define __FUNCT__ "MatGetBrowsOfAoCols" 3491 /*@C 3492 MatGetBrowsOfAoCols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns 3493 of the OFF-DIAGONAL portion of local A 3494 3495 Collective on Mat 3496 3497 Input Parameters: 3498 + A,B - the matrices in mpiaij format 3499 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 3500 . startsj - starting point in B's sending and receiving j-arrays, saved for MAT_REUSE (or PETSC_NULL) 3501 - bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or PETSC_NULL) 3502 3503 Output Parameter: 3504 + B_oth - the sequential matrix generated 3505 3506 Level: developer 3507 3508 @*/ 3509 PetscErrorCode PETSCMAT_DLLEXPORT MatGetBrowsOfAoCols(Mat A,Mat B,MatReuse scall,PetscInt **startsj,PetscScalar **bufa_ptr,Mat *B_oth) 3510 { 3511 VecScatter_MPI_General *gen_to,*gen_from; 3512 PetscErrorCode ierr; 3513 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data,*b=(Mat_MPIAIJ*)B->data; 3514 Mat_SeqAIJ *b_oth; 3515 VecScatter ctx=a->Mvctx; 3516 MPI_Comm comm=ctx->comm; 3517 PetscMPIInt *rprocs,*sprocs,tag=ctx->tag,rank; 3518 PetscInt *rowlen,*bufj,*bufJ,ncols,aBn=a->B->n,row,*b_othi,*b_othj; 3519 PetscScalar *rvalues,*svalues,*b_otha,*bufa,*bufA; 3520 PetscInt i,k,l,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len; 3521 MPI_Request *rwaits,*swaits; 3522 MPI_Status *sstatus,rstatus; 3523 PetscInt *cols; 3524 PetscScalar *vals; 3525 PetscMPIInt j; 3526 3527 PetscFunctionBegin; 3528 if (a->cstart != b->rstart || a->cend != b->rend){ 3529 SETERRQ4(PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",a->cstart,a->cend,b->rstart,b->rend); 3530 } 3531 if (!logkey_GetBrowsOfAocols) { 3532 ierr = PetscLogEventRegister(&logkey_GetBrowsOfAocols,"MatGetBrAoCol",MAT_COOKIE); 3533 } 3534 ierr = PetscLogEventBegin(logkey_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 3535 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3536 3537 gen_to = (VecScatter_MPI_General*)ctx->todata; 3538 gen_from = (VecScatter_MPI_General*)ctx->fromdata; 3539 rvalues = gen_from->values; /* holds the length of sending row */ 3540 svalues = gen_to->values; /* holds the length of receiving row */ 3541 nrecvs = gen_from->n; 3542 nsends = gen_to->n; 3543 rwaits = gen_from->requests; 3544 swaits = gen_to->requests; 3545 srow = gen_to->indices; /* local row index to be sent */ 3546 rstarts = gen_from->starts; 3547 sstarts = gen_to->starts; 3548 rprocs = gen_from->procs; 3549 sprocs = gen_to->procs; 3550 sstatus = gen_to->sstatus; 3551 3552 if (!startsj || !bufa_ptr) scall = MAT_INITIAL_MATRIX; 3553 if (scall == MAT_INITIAL_MATRIX){ 3554 /* i-array */ 3555 /*---------*/ 3556 /* post receives */ 3557 for (i=0; i<nrecvs; i++){ 3558 rowlen = (PetscInt*)rvalues + rstarts[i]; 3559 nrows = rstarts[i+1]-rstarts[i]; 3560 ierr = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 3561 } 3562 3563 /* pack the outgoing message */ 3564 ierr = PetscMalloc((nsends+nrecvs+3)*sizeof(PetscInt),&sstartsj);CHKERRQ(ierr); 3565 rstartsj = sstartsj + nsends +1; 3566 sstartsj[0] = 0; rstartsj[0] = 0; 3567 len = 0; /* total length of j or a array to be sent */ 3568 k = 0; 3569 for (i=0; i<nsends; i++){ 3570 rowlen = (PetscInt*)svalues + sstarts[i]; 3571 nrows = sstarts[i+1]-sstarts[i]; /* num of rows */ 3572 for (j=0; j<nrows; j++) { 3573 row = srow[k] + b->rowners[rank]; /* global row idx */ 3574 ierr = MatGetRow_MPIAIJ(B,row,&rowlen[j],PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); /* rowlength */ 3575 len += rowlen[j]; 3576 ierr = MatRestoreRow_MPIAIJ(B,row,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); 3577 k++; 3578 } 3579 ierr = MPI_Isend(rowlen,nrows,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 3580 sstartsj[i+1] = len; /* starting point of (i+1)-th outgoing msg in bufj and bufa */ 3581 } 3582 /* recvs and sends of i-array are completed */ 3583 i = nrecvs; 3584 while (i--) { 3585 ierr = MPI_Waitany(nrecvs,rwaits,&j,&rstatus);CHKERRQ(ierr); 3586 } 3587 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 3588 /* allocate buffers for sending j and a arrays */ 3589 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&bufj);CHKERRQ(ierr); 3590 ierr = PetscMalloc((len+1)*sizeof(PetscScalar),&bufa);CHKERRQ(ierr); 3591 3592 /* create i-array of B_oth */ 3593 ierr = PetscMalloc((aBn+2)*sizeof(PetscInt),&b_othi);CHKERRQ(ierr); 3594 b_othi[0] = 0; 3595 len = 0; /* total length of j or a array to be received */ 3596 k = 0; 3597 for (i=0; i<nrecvs; i++){ 3598 rowlen = (PetscInt*)rvalues + rstarts[i]; 3599 nrows = rstarts[i+1]-rstarts[i]; 3600 for (j=0; j<nrows; j++) { 3601 b_othi[k+1] = b_othi[k] + rowlen[j]; 3602 len += rowlen[j]; k++; 3603 } 3604 rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */ 3605 } 3606 3607 /* allocate space for j and a arrrays of B_oth */ 3608 ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(PetscInt),&b_othj);CHKERRQ(ierr); 3609 ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(PetscScalar),&b_otha);CHKERRQ(ierr); 3610 3611 /* j-array */ 3612 /*---------*/ 3613 /* post receives of j-array */ 3614 for (i=0; i<nrecvs; i++){ 3615 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 3616 ierr = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 3617 } 3618 k = 0; 3619 for (i=0; i<nsends; i++){ 3620 nrows = sstarts[i+1]-sstarts[i]; /* num of rows */ 3621 bufJ = bufj+sstartsj[i]; 3622 for (j=0; j<nrows; j++) { 3623 row = srow[k++] + b->rowners[rank]; /* global row idx */ 3624 ierr = MatGetRow_MPIAIJ(B,row,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr); 3625 for (l=0; l<ncols; l++){ 3626 *bufJ++ = cols[l]; 3627 } 3628 ierr = MatRestoreRow_MPIAIJ(B,row,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr); 3629 } 3630 ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 3631 } 3632 3633 /* recvs and sends of j-array are completed */ 3634 i = nrecvs; 3635 while (i--) { 3636 ierr = MPI_Waitany(nrecvs,rwaits,&j,&rstatus);CHKERRQ(ierr); 3637 } 3638 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 3639 } else if (scall == MAT_REUSE_MATRIX){ 3640 sstartsj = *startsj; 3641 rstartsj = sstartsj + nsends +1; 3642 bufa = *bufa_ptr; 3643 b_oth = (Mat_SeqAIJ*)(*B_oth)->data; 3644 b_otha = b_oth->a; 3645 } else { 3646 SETERRQ(PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container"); 3647 } 3648 3649 /* a-array */ 3650 /*---------*/ 3651 /* post receives of a-array */ 3652 for (i=0; i<nrecvs; i++){ 3653 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 3654 ierr = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 3655 } 3656 k = 0; 3657 for (i=0; i<nsends; i++){ 3658 nrows = sstarts[i+1]-sstarts[i]; 3659 bufA = bufa+sstartsj[i]; 3660 for (j=0; j<nrows; j++) { 3661 row = srow[k++] + b->rowners[rank]; /* global row idx */ 3662 ierr = MatGetRow_MPIAIJ(B,row,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr); 3663 for (l=0; l<ncols; l++){ 3664 *bufA++ = vals[l]; 3665 } 3666 ierr = MatRestoreRow_MPIAIJ(B,row,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr); 3667 3668 } 3669 ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 3670 } 3671 /* recvs and sends of a-array are completed */ 3672 i = nrecvs; 3673 while (i--) { 3674 ierr = MPI_Waitany(nrecvs,rwaits,&j,&rstatus);CHKERRQ(ierr); 3675 } 3676 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 3677 3678 if (scall == MAT_INITIAL_MATRIX){ 3679 /* put together the new matrix */ 3680 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr); 3681 3682 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 3683 /* Since these are PETSc arrays, change flags to free them as necessary. */ 3684 b_oth = (Mat_SeqAIJ *)(*B_oth)->data; 3685 b_oth->freedata = PETSC_TRUE; 3686 b_oth->nonew = 0; 3687 3688 ierr = PetscFree(bufj);CHKERRQ(ierr); 3689 if (!startsj || !bufa_ptr){ 3690 ierr = PetscFree(sstartsj);CHKERRQ(ierr); 3691 ierr = PetscFree(bufa_ptr);CHKERRQ(ierr); 3692 } else { 3693 *startsj = sstartsj; 3694 *bufa_ptr = bufa; 3695 } 3696 } 3697 ierr = PetscLogEventEnd(logkey_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 3698 3699 PetscFunctionReturn(0); 3700 } 3701 3702 /*MC 3703 MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices. 3704 3705 Options Database Keys: 3706 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions() 3707 3708 Level: beginner 3709 3710 .seealso: MatCreateMPIAIJ 3711 M*/ 3712 3713 EXTERN_C_BEGIN 3714 #undef __FUNCT__ 3715 #define __FUNCT__ "MatCreate_MPIAIJ" 3716 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_MPIAIJ(Mat B) 3717 { 3718 Mat_MPIAIJ *b; 3719 PetscErrorCode ierr; 3720 PetscInt i; 3721 PetscMPIInt size; 3722 3723 PetscFunctionBegin; 3724 ierr = MPI_Comm_size(B->comm,&size);CHKERRQ(ierr); 3725 3726 ierr = PetscNew(Mat_MPIAIJ,&b);CHKERRQ(ierr); 3727 B->data = (void*)b; 3728 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 3729 B->factor = 0; 3730 B->bs = 1; 3731 B->assembled = PETSC_FALSE; 3732 B->mapping = 0; 3733 3734 B->insertmode = NOT_SET_VALUES; 3735 b->size = size; 3736 ierr = MPI_Comm_rank(B->comm,&b->rank);CHKERRQ(ierr); 3737 3738 ierr = PetscSplitOwnership(B->comm,&B->m,&B->M);CHKERRQ(ierr); 3739 ierr = PetscSplitOwnership(B->comm,&B->n,&B->N);CHKERRQ(ierr); 3740 3741 /* the information in the maps duplicates the information computed below, eventually 3742 we should remove the duplicate information that is not contained in the maps */ 3743 ierr = PetscMapCreateMPI(B->comm,B->m,B->M,&B->rmap);CHKERRQ(ierr); 3744 ierr = PetscMapCreateMPI(B->comm,B->n,B->N,&B->cmap);CHKERRQ(ierr); 3745 3746 /* build local table of row and column ownerships */ 3747 ierr = PetscMalloc(2*(b->size+2)*sizeof(PetscInt),&b->rowners);CHKERRQ(ierr); 3748 ierr = PetscLogObjectMemory(B,2*(b->size+2)*sizeof(PetscInt)+sizeof(struct _p_Mat)+sizeof(Mat_MPIAIJ));CHKERRQ(ierr); 3749 b->cowners = b->rowners + b->size + 2; 3750 ierr = MPI_Allgather(&B->m,1,MPIU_INT,b->rowners+1,1,MPIU_INT,B->comm);CHKERRQ(ierr); 3751 b->rowners[0] = 0; 3752 for (i=2; i<=b->size; i++) { 3753 b->rowners[i] += b->rowners[i-1]; 3754 } 3755 b->rstart = b->rowners[b->rank]; 3756 b->rend = b->rowners[b->rank+1]; 3757 ierr = MPI_Allgather(&B->n,1,MPIU_INT,b->cowners+1,1,MPIU_INT,B->comm);CHKERRQ(ierr); 3758 b->cowners[0] = 0; 3759 for (i=2; i<=b->size; i++) { 3760 b->cowners[i] += b->cowners[i-1]; 3761 } 3762 b->cstart = b->cowners[b->rank]; 3763 b->cend = b->cowners[b->rank+1]; 3764 3765 /* build cache for off array entries formed */ 3766 ierr = MatStashCreate_Private(B->comm,1,&B->stash);CHKERRQ(ierr); 3767 b->donotstash = PETSC_FALSE; 3768 b->colmap = 0; 3769 b->garray = 0; 3770 b->roworiented = PETSC_TRUE; 3771 3772 /* stuff used for matrix vector multiply */ 3773 b->lvec = PETSC_NULL; 3774 b->Mvctx = PETSC_NULL; 3775 3776 /* stuff for MatGetRow() */ 3777 b->rowindices = 0; 3778 b->rowvalues = 0; 3779 b->getrowactive = PETSC_FALSE; 3780 3781 /* Explicitly create 2 MATSEQAIJ matrices. */ 3782 ierr = MatCreate(PETSC_COMM_SELF,B->m,B->n,B->m,B->n,&b->A);CHKERRQ(ierr); 3783 ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr); 3784 ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr); 3785 ierr = MatCreate(PETSC_COMM_SELF,B->m,B->N,B->m,B->N,&b->B);CHKERRQ(ierr); 3786 ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr); 3787 ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr); 3788 3789 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 3790 "MatStoreValues_MPIAIJ", 3791 MatStoreValues_MPIAIJ);CHKERRQ(ierr); 3792 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 3793 "MatRetrieveValues_MPIAIJ", 3794 MatRetrieveValues_MPIAIJ);CHKERRQ(ierr); 3795 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C", 3796 "MatGetDiagonalBlock_MPIAIJ", 3797 MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr); 3798 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatIsTranspose_C", 3799 "MatIsTranspose_MPIAIJ", 3800 MatIsTranspose_MPIAIJ);CHKERRQ(ierr); 3801 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocation_C", 3802 "MatMPIAIJSetPreallocation_MPIAIJ", 3803 MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr); 3804 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C", 3805 "MatMPIAIJSetPreallocationCSR_MPIAIJ", 3806 MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr); 3807 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C", 3808 "MatDiagonalScaleLocal_MPIAIJ", 3809 MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr); 3810 PetscFunctionReturn(0); 3811 } 3812 EXTERN_C_END 3813