1cb512458SBarry Smith #ifndef lint 2*603f58a4SSatish Balay static char vcid[] = "$Id: mpiaij.c,v 1.154 1996/07/11 04:07:59 balay Exp balay $"; 3cb512458SBarry Smith #endif 48a729477SBarry Smith 51eb62cbbSBarry Smith #include "mpiaij.h" 6f5eb4b81SSatish Balay #include "src/vec/vecimpl.h" 7d9942c19SSatish Balay #include "src/inline/spops.h" 88a729477SBarry Smith 99e25ed09SBarry Smith /* local utility routine that creates a mapping from the global column 109e25ed09SBarry Smith number to the local number in the off-diagonal part of the local 119e25ed09SBarry Smith storage of the matrix. This is done in a non scable way since the 129e25ed09SBarry Smith length of colmap equals the global matrix length. 139e25ed09SBarry Smith */ 14b7c46309SBarry Smith static int CreateColmap_Private(Mat mat) 159e25ed09SBarry Smith { 1644a69424SLois Curfman McInnes Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 17ec8511deSBarry Smith Mat_SeqAIJ *B = (Mat_SeqAIJ*) aij->B->data; 18416022c9SBarry Smith int n = B->n,i,shift = B->indexshift; 19dbb450caSBarry Smith 200452661fSBarry Smith aij->colmap = (int *) PetscMalloc(aij->N*sizeof(int));CHKPTRQ(aij->colmap); 21464493b3SBarry Smith PLogObjectMemory(mat,aij->N*sizeof(int)); 22cddf8d76SBarry Smith PetscMemzero(aij->colmap,aij->N*sizeof(int)); 23dbb450caSBarry Smith for ( i=0; i<n; i++ ) aij->colmap[aij->garray[i]] = i-shift; 249e25ed09SBarry Smith return 0; 259e25ed09SBarry Smith } 269e25ed09SBarry Smith 272493cbb0SBarry Smith extern int DisAssemble_MPIAIJ(Mat); 282493cbb0SBarry Smith 2970423df4SBarry Smith static int MatGetReordering_MPIAIJ(Mat mat,MatReordering type,IS *rperm,IS *cperm) 30299609e3SLois Curfman McInnes { 31299609e3SLois Curfman McInnes Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 32299609e3SLois Curfman McInnes int ierr; 3317699dbbSLois Curfman McInnes if (aij->size == 1) { 3451c98ccdSLois Curfman McInnes ierr = MatGetReordering(aij->A,type,rperm,cperm); CHKERRQ(ierr); 3548d91487SBarry Smith } else SETERRQ(1,"MatGetReordering_MPIAIJ:not supported in parallel"); 36299609e3SLois Curfman McInnes return 0; 37299609e3SLois Curfman McInnes } 38299609e3SLois Curfman McInnes 394b0e389bSBarry Smith static int MatSetValues_MPIAIJ(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv) 408a729477SBarry Smith { 4144a69424SLois Curfman McInnes Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 42dbb450caSBarry Smith Mat_SeqAIJ *C = (Mat_SeqAIJ*) aij->A->data; 434b0e389bSBarry Smith Scalar value; 441eb62cbbSBarry Smith int ierr,i,j, rstart = aij->rstart, rend = aij->rend; 451eb62cbbSBarry Smith int cstart = aij->cstart, cend = aij->cend,row,col; 464b0e389bSBarry Smith int shift = C->indexshift,roworiented = aij->roworiented; 478a729477SBarry Smith 4864119d58SLois Curfman McInnes if (aij->insertmode != NOT_SET_VALUES && aij->insertmode != addv) { 4948d91487SBarry Smith SETERRQ(1,"MatSetValues_MPIAIJ:Cannot mix inserts and adds"); 508a729477SBarry Smith } 511eb62cbbSBarry Smith aij->insertmode = addv; 528a729477SBarry Smith for ( i=0; i<m; i++ ) { 534b0e389bSBarry Smith if (im[i] < 0) SETERRQ(1,"MatSetValues_MPIAIJ:Negative row"); 544b0e389bSBarry Smith if (im[i] >= aij->M) SETERRQ(1,"MatSetValues_MPIAIJ:Row too large"); 554b0e389bSBarry Smith if (im[i] >= rstart && im[i] < rend) { 564b0e389bSBarry Smith row = im[i] - rstart; 571eb62cbbSBarry Smith for ( j=0; j<n; j++ ) { 584b0e389bSBarry Smith if (in[j] < 0) SETERRQ(1,"MatSetValues_MPIAIJ:Negative column"); 594b0e389bSBarry Smith if (in[j] >= aij->N) SETERRQ(1,"MatSetValues_MPIAIJ:Col too large"); 604b0e389bSBarry Smith if (in[j] >= cstart && in[j] < cend){ 614b0e389bSBarry Smith col = in[j] - cstart; 624b0e389bSBarry Smith if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 634b0e389bSBarry Smith ierr = MatSetValues(aij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); 641eb62cbbSBarry Smith } 651eb62cbbSBarry Smith else { 66227d817aSBarry Smith if (mat->was_assembled) { 67b7c46309SBarry Smith if (!aij->colmap) {ierr = CreateColmap_Private(mat);CHKERRQ(ierr);} 684b0e389bSBarry Smith col = aij->colmap[in[j]] + shift; 69ec8511deSBarry Smith if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 702493cbb0SBarry Smith ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr); 714b0e389bSBarry Smith col = in[j]; 72d6dfbf8fSBarry Smith } 739e25ed09SBarry Smith } 744b0e389bSBarry Smith else col = in[j]; 754b0e389bSBarry Smith if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 764b0e389bSBarry Smith ierr = MatSetValues(aij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); 771eb62cbbSBarry Smith } 781eb62cbbSBarry Smith } 791eb62cbbSBarry Smith } 801eb62cbbSBarry Smith else { 814b0e389bSBarry Smith if (roworiented) { 824b0e389bSBarry Smith ierr = StashValues_Private(&aij->stash,im[i],n,in,v+i*n,addv);CHKERRQ(ierr); 834b0e389bSBarry Smith } 844b0e389bSBarry Smith else { 854b0e389bSBarry Smith row = im[i]; 864b0e389bSBarry Smith for ( j=0; j<n; j++ ) { 874b0e389bSBarry Smith ierr = StashValues_Private(&aij->stash,row,1,in+j,v+i+j*m,addv);CHKERRQ(ierr); 884b0e389bSBarry Smith } 894b0e389bSBarry Smith } 901eb62cbbSBarry Smith } 918a729477SBarry Smith } 928a729477SBarry Smith return 0; 938a729477SBarry Smith } 948a729477SBarry Smith 95b49de8d1SLois Curfman McInnes static int MatGetValues_MPIAIJ(Mat mat,int m,int *idxm,int n,int *idxn,Scalar *v) 96b49de8d1SLois Curfman McInnes { 97b49de8d1SLois Curfman McInnes Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 98b49de8d1SLois Curfman McInnes Mat_SeqAIJ *C = (Mat_SeqAIJ*) aij->A->data; 99b49de8d1SLois Curfman McInnes int ierr,i,j, rstart = aij->rstart, rend = aij->rend; 100b49de8d1SLois Curfman McInnes int cstart = aij->cstart, cend = aij->cend,row,col; 101b49de8d1SLois Curfman McInnes int shift = C->indexshift; 102b49de8d1SLois Curfman McInnes 103b49de8d1SLois Curfman McInnes for ( i=0; i<m; i++ ) { 104b49de8d1SLois Curfman McInnes if (idxm[i] < 0) SETERRQ(1,"MatGetValues_MPIAIJ:Negative row"); 105b49de8d1SLois Curfman McInnes if (idxm[i] >= aij->M) SETERRQ(1,"MatGetValues_MPIAIJ:Row too large"); 106b49de8d1SLois Curfman McInnes if (idxm[i] >= rstart && idxm[i] < rend) { 107b49de8d1SLois Curfman McInnes row = idxm[i] - rstart; 108b49de8d1SLois Curfman McInnes for ( j=0; j<n; j++ ) { 109b49de8d1SLois Curfman McInnes if (idxn[j] < 0) SETERRQ(1,"MatGetValues_MPIAIJ:Negative column"); 110b49de8d1SLois Curfman McInnes if (idxn[j] >= aij->N) SETERRQ(1,"MatGetValues_MPIAIJ:Col too large"); 111b49de8d1SLois Curfman McInnes if (idxn[j] >= cstart && idxn[j] < cend){ 112b49de8d1SLois Curfman McInnes col = idxn[j] - cstart; 113b49de8d1SLois Curfman McInnes ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j); CHKERRQ(ierr); 114b49de8d1SLois Curfman McInnes } 115b49de8d1SLois Curfman McInnes else { 11670423df4SBarry Smith if (!aij->colmap) {ierr = CreateColmap_Private(mat);CHKERRQ(ierr);} 117b49de8d1SLois Curfman McInnes col = aij->colmap[idxn[j]] + shift; 118d9d09a02SSatish Balay if ( aij->garray[col] != idxn[j] ) *(v+i*n+j) = 0.0; 119d9d09a02SSatish Balay else { 120b49de8d1SLois Curfman McInnes ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j); CHKERRQ(ierr); 121b49de8d1SLois Curfman McInnes } 122b49de8d1SLois Curfman McInnes } 123b49de8d1SLois Curfman McInnes } 124d9d09a02SSatish Balay } 125b49de8d1SLois Curfman McInnes else { 126b49de8d1SLois Curfman McInnes SETERRQ(1,"MatGetValues_MPIAIJ:Only local values currently supported"); 127b49de8d1SLois Curfman McInnes } 128b49de8d1SLois Curfman McInnes } 129b49de8d1SLois Curfman McInnes return 0; 130b49de8d1SLois Curfman McInnes } 131b49de8d1SLois Curfman McInnes 132fc76ce87SLois Curfman McInnes static int MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode) 1338a729477SBarry Smith { 13444a69424SLois Curfman McInnes Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 135d6dfbf8fSBarry Smith MPI_Comm comm = mat->comm; 13617699dbbSLois Curfman McInnes int size = aij->size, *owners = aij->rowners; 13717699dbbSLois Curfman McInnes int rank = aij->rank,tag = mat->tag, *owner,*starts,count,ierr; 1381eb62cbbSBarry Smith MPI_Request *send_waits,*recv_waits; 1396abc6512SBarry Smith int *nprocs,i,j,idx,*procs,nsends,nreceives,nmax,*work; 1401eb62cbbSBarry Smith InsertMode addv; 1411eb62cbbSBarry Smith Scalar *rvalues,*svalues; 1421eb62cbbSBarry Smith 1431eb62cbbSBarry Smith /* make sure all processors are either in INSERTMODE or ADDMODE */ 144d65a2f8fSBarry Smith MPI_Allreduce(&aij->insertmode,&addv,1,MPI_INT,MPI_BOR,comm); 145dbb450caSBarry Smith if (addv == (ADD_VALUES|INSERT_VALUES)) { 146bbb6d6a8SBarry Smith SETERRQ(1,"MatAssemblyBegin_MPIAIJ:Some processors inserted others added"); 1471eb62cbbSBarry Smith } 1481eb62cbbSBarry Smith aij->insertmode = addv; /* in case this processor had no cache */ 1491eb62cbbSBarry Smith 1501eb62cbbSBarry Smith /* first count number of contributors to each processor */ 1510452661fSBarry Smith nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs); 152cddf8d76SBarry Smith PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size; 1530452661fSBarry Smith owner = (int *) PetscMalloc( (aij->stash.n+1)*sizeof(int) ); CHKPTRQ(owner); 1541eb62cbbSBarry Smith for ( i=0; i<aij->stash.n; i++ ) { 1551eb62cbbSBarry Smith idx = aij->stash.idx[i]; 15617699dbbSLois Curfman McInnes for ( j=0; j<size; j++ ) { 1571eb62cbbSBarry Smith if (idx >= owners[j] && idx < owners[j+1]) { 1581eb62cbbSBarry Smith nprocs[j]++; procs[j] = 1; owner[i] = j; break; 1598a729477SBarry Smith } 1608a729477SBarry Smith } 1618a729477SBarry Smith } 16217699dbbSLois Curfman McInnes nsends = 0; for ( i=0; i<size; i++ ) { nsends += procs[i];} 1631eb62cbbSBarry Smith 1641eb62cbbSBarry Smith /* inform other processors of number of messages and max length*/ 1650452661fSBarry Smith work = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work); 16617699dbbSLois Curfman McInnes MPI_Allreduce(procs, work,size,MPI_INT,MPI_SUM,comm); 16717699dbbSLois Curfman McInnes nreceives = work[rank]; 16817699dbbSLois Curfman McInnes MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm); 16917699dbbSLois Curfman McInnes nmax = work[rank]; 1700452661fSBarry Smith PetscFree(work); 1711eb62cbbSBarry Smith 1721eb62cbbSBarry Smith /* post receives: 1731eb62cbbSBarry Smith 1) each message will consist of ordered pairs 1741eb62cbbSBarry Smith (global index,value) we store the global index as a double 175d6dfbf8fSBarry Smith to simplify the message passing. 1761eb62cbbSBarry Smith 2) since we don't know how long each individual message is we 1771eb62cbbSBarry Smith allocate the largest needed buffer for each receive. Potentially 1781eb62cbbSBarry Smith this is a lot of wasted space. 1791eb62cbbSBarry Smith 1801eb62cbbSBarry Smith 1811eb62cbbSBarry Smith This could be done better. 1821eb62cbbSBarry Smith */ 1830452661fSBarry Smith rvalues = (Scalar *) PetscMalloc(3*(nreceives+1)*(nmax+1)*sizeof(Scalar)); 18478b31e54SBarry Smith CHKPTRQ(rvalues); 1850452661fSBarry Smith recv_waits = (MPI_Request *) PetscMalloc((nreceives+1)*sizeof(MPI_Request)); 18678b31e54SBarry Smith CHKPTRQ(recv_waits); 1871eb62cbbSBarry Smith for ( i=0; i<nreceives; i++ ) { 188416022c9SBarry Smith MPI_Irecv(rvalues+3*nmax*i,3*nmax,MPIU_SCALAR,MPI_ANY_SOURCE,tag, 1891eb62cbbSBarry Smith comm,recv_waits+i); 1901eb62cbbSBarry Smith } 1911eb62cbbSBarry Smith 1921eb62cbbSBarry Smith /* do sends: 1931eb62cbbSBarry Smith 1) starts[i] gives the starting index in svalues for stuff going to 1941eb62cbbSBarry Smith the ith processor 1951eb62cbbSBarry Smith */ 1960452661fSBarry Smith svalues = (Scalar *) PetscMalloc(3*(aij->stash.n+1)*sizeof(Scalar));CHKPTRQ(svalues); 1970452661fSBarry Smith send_waits = (MPI_Request *) PetscMalloc( (nsends+1)*sizeof(MPI_Request)); 19878b31e54SBarry Smith CHKPTRQ(send_waits); 1990452661fSBarry Smith starts = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(starts); 2001eb62cbbSBarry Smith starts[0] = 0; 20117699dbbSLois Curfman McInnes for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 2021eb62cbbSBarry Smith for ( i=0; i<aij->stash.n; i++ ) { 2031eb62cbbSBarry Smith svalues[3*starts[owner[i]]] = (Scalar) aij->stash.idx[i]; 2041eb62cbbSBarry Smith svalues[3*starts[owner[i]]+1] = (Scalar) aij->stash.idy[i]; 2051eb62cbbSBarry Smith svalues[3*(starts[owner[i]]++)+2] = aij->stash.array[i]; 2061eb62cbbSBarry Smith } 2070452661fSBarry Smith PetscFree(owner); 2081eb62cbbSBarry Smith starts[0] = 0; 20917699dbbSLois Curfman McInnes for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 2101eb62cbbSBarry Smith count = 0; 21117699dbbSLois Curfman McInnes for ( i=0; i<size; i++ ) { 2121eb62cbbSBarry Smith if (procs[i]) { 213416022c9SBarry Smith MPI_Isend(svalues+3*starts[i],3*nprocs[i],MPIU_SCALAR,i,tag, 2141eb62cbbSBarry Smith comm,send_waits+count++); 2151eb62cbbSBarry Smith } 2161eb62cbbSBarry Smith } 2170452661fSBarry Smith PetscFree(starts); PetscFree(nprocs); 2181eb62cbbSBarry Smith 2191eb62cbbSBarry Smith /* Free cache space */ 2202191d07cSBarry Smith PLogInfo(0,"[%d]MatAssemblyBegin_MPIAIJ:Number of off processor values %d\n",rank,aij->stash.n); 22178b31e54SBarry Smith ierr = StashDestroy_Private(&aij->stash); CHKERRQ(ierr); 2221eb62cbbSBarry Smith 2231eb62cbbSBarry Smith aij->svalues = svalues; aij->rvalues = rvalues; 2241eb62cbbSBarry Smith aij->nsends = nsends; aij->nrecvs = nreceives; 2251eb62cbbSBarry Smith aij->send_waits = send_waits; aij->recv_waits = recv_waits; 2261eb62cbbSBarry Smith aij->rmax = nmax; 2271eb62cbbSBarry Smith 2281eb62cbbSBarry Smith return 0; 2291eb62cbbSBarry Smith } 23044a69424SLois Curfman McInnes extern int MatSetUpMultiply_MPIAIJ(Mat); 2311eb62cbbSBarry Smith 232fc76ce87SLois Curfman McInnes static int MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode) 2331eb62cbbSBarry Smith { 23444a69424SLois Curfman McInnes Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 235dbb450caSBarry Smith Mat_SeqAIJ *C = (Mat_SeqAIJ *) aij->A->data; 2361eb62cbbSBarry Smith MPI_Status *send_status,recv_status; 237416022c9SBarry Smith int imdex,nrecvs = aij->nrecvs, count = nrecvs, i, n, ierr; 238416022c9SBarry Smith int row,col,other_disassembled,shift = C->indexshift; 2391eb62cbbSBarry Smith Scalar *values,val; 2401eb62cbbSBarry Smith InsertMode addv = aij->insertmode; 2411eb62cbbSBarry Smith 2421eb62cbbSBarry Smith /* wait on receives */ 2431eb62cbbSBarry Smith while (count) { 244d6dfbf8fSBarry Smith MPI_Waitany(nrecvs,aij->recv_waits,&imdex,&recv_status); 2451eb62cbbSBarry Smith /* unpack receives into our local space */ 246d6dfbf8fSBarry Smith values = aij->rvalues + 3*imdex*aij->rmax; 247c60448a5SBarry Smith MPI_Get_count(&recv_status,MPIU_SCALAR,&n); 2481eb62cbbSBarry Smith n = n/3; 2491eb62cbbSBarry Smith for ( i=0; i<n; i++ ) { 250227d817aSBarry Smith row = (int) PetscReal(values[3*i]) - aij->rstart; 251227d817aSBarry Smith col = (int) PetscReal(values[3*i+1]); 2521eb62cbbSBarry Smith val = values[3*i+2]; 2531eb62cbbSBarry Smith if (col >= aij->cstart && col < aij->cend) { 2541eb62cbbSBarry Smith col -= aij->cstart; 2551eb62cbbSBarry Smith MatSetValues(aij->A,1,&row,1,&col,&val,addv); 2561eb62cbbSBarry Smith } 2571eb62cbbSBarry Smith else { 258227d817aSBarry Smith if (mat->was_assembled) { 259b7c46309SBarry Smith if (!aij->colmap) {ierr = CreateColmap_Private(mat);CHKERRQ(ierr);} 260dbb450caSBarry Smith col = aij->colmap[col] + shift; 261ec8511deSBarry Smith if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 2622493cbb0SBarry Smith ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr); 263227d817aSBarry Smith col = (int) PetscReal(values[3*i+1]); 264d6dfbf8fSBarry Smith } 2659e25ed09SBarry Smith } 2661eb62cbbSBarry Smith MatSetValues(aij->B,1,&row,1,&col,&val,addv); 2671eb62cbbSBarry Smith } 2681eb62cbbSBarry Smith } 2691eb62cbbSBarry Smith count--; 2701eb62cbbSBarry Smith } 2710452661fSBarry Smith PetscFree(aij->recv_waits); PetscFree(aij->rvalues); 2721eb62cbbSBarry Smith 2731eb62cbbSBarry Smith /* wait on sends */ 2741eb62cbbSBarry Smith if (aij->nsends) { 2750452661fSBarry Smith send_status = (MPI_Status *) PetscMalloc(aij->nsends*sizeof(MPI_Status)); 27678b31e54SBarry Smith CHKPTRQ(send_status); 2771eb62cbbSBarry Smith MPI_Waitall(aij->nsends,aij->send_waits,send_status); 2780452661fSBarry Smith PetscFree(send_status); 2791eb62cbbSBarry Smith } 2800452661fSBarry Smith PetscFree(aij->send_waits); PetscFree(aij->svalues); 2811eb62cbbSBarry Smith 28264119d58SLois Curfman McInnes aij->insertmode = NOT_SET_VALUES; 28378b31e54SBarry Smith ierr = MatAssemblyBegin(aij->A,mode); CHKERRQ(ierr); 28478b31e54SBarry Smith ierr = MatAssemblyEnd(aij->A,mode); CHKERRQ(ierr); 2851eb62cbbSBarry Smith 2862493cbb0SBarry Smith /* determine if any processor has disassembled, if so we must 2872493cbb0SBarry Smith also disassemble ourselfs, in order that we may reassemble. */ 288227d817aSBarry Smith MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm); 289227d817aSBarry Smith if (mat->was_assembled && !other_disassembled) { 2902493cbb0SBarry Smith ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr); 2912493cbb0SBarry Smith } 2922493cbb0SBarry Smith 2936d4a8577SBarry Smith if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 29478b31e54SBarry Smith ierr = MatSetUpMultiply_MPIAIJ(mat); CHKERRQ(ierr); 2955e42470aSBarry Smith } 29678b31e54SBarry Smith ierr = MatAssemblyBegin(aij->B,mode); CHKERRQ(ierr); 29778b31e54SBarry Smith ierr = MatAssemblyEnd(aij->B,mode); CHKERRQ(ierr); 2985e42470aSBarry Smith 2997a0afa10SBarry Smith if (aij->rowvalues) {PetscFree(aij->rowvalues); aij->rowvalues = 0;} 3008a729477SBarry Smith return 0; 3018a729477SBarry Smith } 3028a729477SBarry Smith 3032ee70a88SLois Curfman McInnes static int MatZeroEntries_MPIAIJ(Mat A) 3041eb62cbbSBarry Smith { 30544a69424SLois Curfman McInnes Mat_MPIAIJ *l = (Mat_MPIAIJ *) A->data; 306dbd7a890SLois Curfman McInnes int ierr; 30778b31e54SBarry Smith ierr = MatZeroEntries(l->A); CHKERRQ(ierr); 30878b31e54SBarry Smith ierr = MatZeroEntries(l->B); CHKERRQ(ierr); 3091eb62cbbSBarry Smith return 0; 3101eb62cbbSBarry Smith } 3111eb62cbbSBarry Smith 3121eb62cbbSBarry Smith /* the code does not do the diagonal entries correctly unless the 3131eb62cbbSBarry Smith matrix is square and the column and row owerships are identical. 3141eb62cbbSBarry Smith This is a BUG. The only way to fix it seems to be to access 3151eb62cbbSBarry Smith aij->A and aij->B directly and not through the MatZeroRows() 3161eb62cbbSBarry Smith routine. 3171eb62cbbSBarry Smith */ 31844a69424SLois Curfman McInnes static int MatZeroRows_MPIAIJ(Mat A,IS is,Scalar *diag) 3191eb62cbbSBarry Smith { 32044a69424SLois Curfman McInnes Mat_MPIAIJ *l = (Mat_MPIAIJ *) A->data; 32117699dbbSLois Curfman McInnes int i,ierr,N, *rows,*owners = l->rowners,size = l->size; 3226abc6512SBarry Smith int *procs,*nprocs,j,found,idx,nsends,*work; 32317699dbbSLois Curfman McInnes int nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank; 3245392566eSBarry Smith int *rvalues,tag = A->tag,count,base,slen,n,*source; 325d6dfbf8fSBarry Smith int *lens,imdex,*lrows,*values; 326d6dfbf8fSBarry Smith MPI_Comm comm = A->comm; 3271eb62cbbSBarry Smith MPI_Request *send_waits,*recv_waits; 3281eb62cbbSBarry Smith MPI_Status recv_status,*send_status; 3291eb62cbbSBarry Smith IS istmp; 3301eb62cbbSBarry Smith 33177c4ece6SBarry Smith ierr = ISGetSize(is,&N); CHKERRQ(ierr); 33278b31e54SBarry Smith ierr = ISGetIndices(is,&rows); CHKERRQ(ierr); 3331eb62cbbSBarry Smith 3341eb62cbbSBarry Smith /* first count number of contributors to each processor */ 3350452661fSBarry Smith nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs); 336cddf8d76SBarry Smith PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size; 3370452661fSBarry Smith owner = (int *) PetscMalloc((N+1)*sizeof(int)); CHKPTRQ(owner); /* see note*/ 3381eb62cbbSBarry Smith for ( i=0; i<N; i++ ) { 3391eb62cbbSBarry Smith idx = rows[i]; 3401eb62cbbSBarry Smith found = 0; 34117699dbbSLois Curfman McInnes for ( j=0; j<size; j++ ) { 3421eb62cbbSBarry Smith if (idx >= owners[j] && idx < owners[j+1]) { 3431eb62cbbSBarry Smith nprocs[j]++; procs[j] = 1; owner[i] = j; found = 1; break; 3441eb62cbbSBarry Smith } 3451eb62cbbSBarry Smith } 346bbb6d6a8SBarry Smith if (!found) SETERRQ(1,"MatZeroRows_MPIAIJ:Index out of range"); 3471eb62cbbSBarry Smith } 34817699dbbSLois Curfman McInnes nsends = 0; for ( i=0; i<size; i++ ) { nsends += procs[i];} 3491eb62cbbSBarry Smith 3501eb62cbbSBarry Smith /* inform other processors of number of messages and max length*/ 3510452661fSBarry Smith work = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work); 35217699dbbSLois Curfman McInnes MPI_Allreduce( procs, work,size,MPI_INT,MPI_SUM,comm); 35317699dbbSLois Curfman McInnes nrecvs = work[rank]; 35417699dbbSLois Curfman McInnes MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm); 35517699dbbSLois Curfman McInnes nmax = work[rank]; 3560452661fSBarry Smith PetscFree(work); 3571eb62cbbSBarry Smith 3581eb62cbbSBarry Smith /* post receives: */ 3590452661fSBarry Smith rvalues = (int *) PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int)); /*see note */ 36078b31e54SBarry Smith CHKPTRQ(rvalues); 3610452661fSBarry Smith recv_waits = (MPI_Request *) PetscMalloc((nrecvs+1)*sizeof(MPI_Request)); 36278b31e54SBarry Smith CHKPTRQ(recv_waits); 3631eb62cbbSBarry Smith for ( i=0; i<nrecvs; i++ ) { 364416022c9SBarry Smith MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i); 3651eb62cbbSBarry Smith } 3661eb62cbbSBarry Smith 3671eb62cbbSBarry Smith /* do sends: 3681eb62cbbSBarry Smith 1) starts[i] gives the starting index in svalues for stuff going to 3691eb62cbbSBarry Smith the ith processor 3701eb62cbbSBarry Smith */ 3710452661fSBarry Smith svalues = (int *) PetscMalloc( (N+1)*sizeof(int) ); CHKPTRQ(svalues); 3720452661fSBarry Smith send_waits = (MPI_Request *) PetscMalloc( (nsends+1)*sizeof(MPI_Request)); 37378b31e54SBarry Smith CHKPTRQ(send_waits); 3740452661fSBarry Smith starts = (int *) PetscMalloc( (size+1)*sizeof(int) ); CHKPTRQ(starts); 3751eb62cbbSBarry Smith starts[0] = 0; 37617699dbbSLois Curfman McInnes for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 3771eb62cbbSBarry Smith for ( i=0; i<N; i++ ) { 3781eb62cbbSBarry Smith svalues[starts[owner[i]]++] = rows[i]; 3791eb62cbbSBarry Smith } 3801eb62cbbSBarry Smith ISRestoreIndices(is,&rows); 3811eb62cbbSBarry Smith 3821eb62cbbSBarry Smith starts[0] = 0; 38317699dbbSLois Curfman McInnes for ( i=1; i<size+1; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 3841eb62cbbSBarry Smith count = 0; 38517699dbbSLois Curfman McInnes for ( i=0; i<size; i++ ) { 3861eb62cbbSBarry Smith if (procs[i]) { 387416022c9SBarry Smith MPI_Isend(svalues+starts[i],nprocs[i],MPI_INT,i,tag,comm,send_waits+count++); 3881eb62cbbSBarry Smith } 3891eb62cbbSBarry Smith } 3900452661fSBarry Smith PetscFree(starts); 3911eb62cbbSBarry Smith 39217699dbbSLois Curfman McInnes base = owners[rank]; 3931eb62cbbSBarry Smith 3941eb62cbbSBarry Smith /* wait on receives */ 3950452661fSBarry Smith lens = (int *) PetscMalloc( 2*(nrecvs+1)*sizeof(int) ); CHKPTRQ(lens); 3961eb62cbbSBarry Smith source = lens + nrecvs; 3971eb62cbbSBarry Smith count = nrecvs; slen = 0; 3981eb62cbbSBarry Smith while (count) { 399d6dfbf8fSBarry Smith MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status); 4001eb62cbbSBarry Smith /* unpack receives into our local space */ 4011eb62cbbSBarry Smith MPI_Get_count(&recv_status,MPI_INT,&n); 402d6dfbf8fSBarry Smith source[imdex] = recv_status.MPI_SOURCE; 403d6dfbf8fSBarry Smith lens[imdex] = n; 4041eb62cbbSBarry Smith slen += n; 4051eb62cbbSBarry Smith count--; 4061eb62cbbSBarry Smith } 4070452661fSBarry Smith PetscFree(recv_waits); 4081eb62cbbSBarry Smith 4091eb62cbbSBarry Smith /* move the data into the send scatter */ 4100452661fSBarry Smith lrows = (int *) PetscMalloc( (slen+1)*sizeof(int) ); CHKPTRQ(lrows); 4111eb62cbbSBarry Smith count = 0; 4121eb62cbbSBarry Smith for ( i=0; i<nrecvs; i++ ) { 4131eb62cbbSBarry Smith values = rvalues + i*nmax; 4141eb62cbbSBarry Smith for ( j=0; j<lens[i]; j++ ) { 4151eb62cbbSBarry Smith lrows[count++] = values[j] - base; 4161eb62cbbSBarry Smith } 4171eb62cbbSBarry Smith } 4180452661fSBarry Smith PetscFree(rvalues); PetscFree(lens); 4190452661fSBarry Smith PetscFree(owner); PetscFree(nprocs); 4201eb62cbbSBarry Smith 4211eb62cbbSBarry Smith /* actually zap the local rows */ 422416022c9SBarry Smith ierr = ISCreateSeq(MPI_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr); 423464493b3SBarry Smith PLogObjectParent(A,istmp); 4240452661fSBarry Smith PetscFree(lrows); 42578b31e54SBarry Smith ierr = MatZeroRows(l->A,istmp,diag); CHKERRQ(ierr); 42678b31e54SBarry Smith ierr = MatZeroRows(l->B,istmp,0); CHKERRQ(ierr); 42778b31e54SBarry Smith ierr = ISDestroy(istmp); CHKERRQ(ierr); 4281eb62cbbSBarry Smith 4291eb62cbbSBarry Smith /* wait on sends */ 4301eb62cbbSBarry Smith if (nsends) { 4310452661fSBarry Smith send_status = (MPI_Status *) PetscMalloc(nsends*sizeof(MPI_Status)); 43278b31e54SBarry Smith CHKPTRQ(send_status); 4331eb62cbbSBarry Smith MPI_Waitall(nsends,send_waits,send_status); 4340452661fSBarry Smith PetscFree(send_status); 4351eb62cbbSBarry Smith } 4360452661fSBarry Smith PetscFree(send_waits); PetscFree(svalues); 4371eb62cbbSBarry Smith 4381eb62cbbSBarry Smith return 0; 4391eb62cbbSBarry Smith } 4401eb62cbbSBarry Smith 441416022c9SBarry Smith static int MatMult_MPIAIJ(Mat A,Vec xx,Vec yy) 4421eb62cbbSBarry Smith { 443416022c9SBarry Smith Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 444fbd6ef76SBarry Smith int ierr,nt; 445416022c9SBarry Smith 446a2ce50c7SBarry Smith ierr = VecGetLocalSize(xx,&nt); CHKERRQ(ierr); 447fbd6ef76SBarry Smith if (nt != a->n) { 44847b4a8eaSLois Curfman McInnes SETERRQ(1,"MatMult_MPIAIJ:Incompatible parition of A and xx"); 449fbd6ef76SBarry Smith } 450a2ce50c7SBarry Smith ierr = VecGetLocalSize(yy,&nt); CHKERRQ(ierr); 451fbd6ef76SBarry Smith if (nt != a->m) { 45247b4a8eaSLois Curfman McInnes SETERRQ(1,"MatMult_MPIAIJ:Incompatible parition of A and yy"); 453fbd6ef76SBarry Smith } 45464119d58SLois Curfman McInnes ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_ALL,a->Mvctx); CHKERRQ(ierr); 45538597bd4SSatish Balay ierr = (*a->A->ops.mult)(a->A,xx,yy); CHKERRQ(ierr); 45664119d58SLois Curfman McInnes ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_ALL,a->Mvctx); CHKERRQ(ierr); 45738597bd4SSatish Balay ierr = (*a->B->ops.multadd)(a->B,a->lvec,yy,yy); CHKERRQ(ierr); 4581eb62cbbSBarry Smith return 0; 4591eb62cbbSBarry Smith } 4601eb62cbbSBarry Smith 461416022c9SBarry Smith static int MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 462da3a660dSBarry Smith { 463416022c9SBarry Smith Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 464da3a660dSBarry Smith int ierr; 46564119d58SLois Curfman McInnes ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_ALL,a->Mvctx);CHKERRQ(ierr); 46638597bd4SSatish Balay ierr = (*a->A->ops.multadd)(a->A,xx,yy,zz); CHKERRQ(ierr); 46764119d58SLois Curfman McInnes ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_ALL,a->Mvctx);CHKERRQ(ierr); 46838597bd4SSatish Balay ierr = (*a->B->ops.multadd)(a->B,a->lvec,zz,zz); CHKERRQ(ierr); 469da3a660dSBarry Smith return 0; 470da3a660dSBarry Smith } 471da3a660dSBarry Smith 472416022c9SBarry Smith static int MatMultTrans_MPIAIJ(Mat A,Vec xx,Vec yy) 473da3a660dSBarry Smith { 474416022c9SBarry Smith Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 475da3a660dSBarry Smith int ierr; 476da3a660dSBarry Smith 477da3a660dSBarry Smith /* do nondiagonal part */ 47838597bd4SSatish Balay ierr = (*a->B->ops.multtrans)(a->B,xx,a->lvec); CHKERRQ(ierr); 479da3a660dSBarry Smith /* send it on its way */ 480416022c9SBarry Smith ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES, 48164119d58SLois Curfman McInnes (ScatterMode)(SCATTER_ALL|SCATTER_REVERSE),a->Mvctx); CHKERRQ(ierr); 482da3a660dSBarry Smith /* do local part */ 48338597bd4SSatish Balay ierr = (*a->A->ops.multtrans)(a->A,xx,yy); CHKERRQ(ierr); 484da3a660dSBarry Smith /* receive remote parts: note this assumes the values are not actually */ 485da3a660dSBarry Smith /* inserted in yy until the next line, which is true for my implementation*/ 486da3a660dSBarry Smith /* but is not perhaps always true. */ 487416022c9SBarry Smith ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES, 48864119d58SLois Curfman McInnes (ScatterMode)(SCATTER_ALL|SCATTER_REVERSE),a->Mvctx); CHKERRQ(ierr); 489da3a660dSBarry Smith return 0; 490da3a660dSBarry Smith } 491da3a660dSBarry Smith 492416022c9SBarry Smith static int MatMultTransAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 493da3a660dSBarry Smith { 494416022c9SBarry Smith Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 495da3a660dSBarry Smith int ierr; 496da3a660dSBarry Smith 497da3a660dSBarry Smith /* do nondiagonal part */ 49838597bd4SSatish Balay ierr = (*a->B->ops.multtrans)(a->B,xx,a->lvec); CHKERRQ(ierr); 499da3a660dSBarry Smith /* send it on its way */ 500416022c9SBarry Smith ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES, 50164119d58SLois Curfman McInnes (ScatterMode)(SCATTER_ALL|SCATTER_REVERSE),a->Mvctx); CHKERRQ(ierr); 502da3a660dSBarry Smith /* do local part */ 50338597bd4SSatish Balay ierr = (*a->A->ops.multtransadd)(a->A,xx,yy,zz); CHKERRQ(ierr); 504da3a660dSBarry Smith /* receive remote parts: note this assumes the values are not actually */ 505da3a660dSBarry Smith /* inserted in yy until the next line, which is true for my implementation*/ 506da3a660dSBarry Smith /* but is not perhaps always true. */ 507416022c9SBarry Smith ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES, 50864119d58SLois Curfman McInnes (ScatterMode)(SCATTER_ALL|SCATTER_REVERSE),a->Mvctx); CHKERRQ(ierr); 509da3a660dSBarry Smith return 0; 510da3a660dSBarry Smith } 511da3a660dSBarry Smith 5121eb62cbbSBarry Smith /* 5131eb62cbbSBarry Smith This only works correctly for square matrices where the subblock A->A is the 5141eb62cbbSBarry Smith diagonal block 5151eb62cbbSBarry Smith */ 516416022c9SBarry Smith static int MatGetDiagonal_MPIAIJ(Mat A,Vec v) 5171eb62cbbSBarry Smith { 518416022c9SBarry Smith Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 51944cd7ae7SLois Curfman McInnes if (a->M != a->N) 52044cd7ae7SLois Curfman McInnes SETERRQ(1,"MatGetDiagonal_MPIAIJ:Supports only square matrix where A->A is diag block"); 521416022c9SBarry Smith return MatGetDiagonal(a->A,v); 5221eb62cbbSBarry Smith } 5231eb62cbbSBarry Smith 524052efed2SBarry Smith static int MatScale_MPIAIJ(Scalar *aa,Mat A) 525052efed2SBarry Smith { 526052efed2SBarry Smith Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 527052efed2SBarry Smith int ierr; 528052efed2SBarry Smith ierr = MatScale(aa,a->A); CHKERRQ(ierr); 529052efed2SBarry Smith ierr = MatScale(aa,a->B); CHKERRQ(ierr); 530052efed2SBarry Smith return 0; 531052efed2SBarry Smith } 532052efed2SBarry Smith 53344a69424SLois Curfman McInnes static int MatDestroy_MPIAIJ(PetscObject obj) 5341eb62cbbSBarry Smith { 5351eb62cbbSBarry Smith Mat mat = (Mat) obj; 53644a69424SLois Curfman McInnes Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 5371eb62cbbSBarry Smith int ierr; 538a5a9c739SBarry Smith #if defined(PETSC_LOG) 5396e35fa57SLois Curfman McInnes PLogObjectState(obj,"Rows=%d, Cols=%d",aij->M,aij->N); 540a5a9c739SBarry Smith #endif 5410452661fSBarry Smith PetscFree(aij->rowners); 54278b31e54SBarry Smith ierr = MatDestroy(aij->A); CHKERRQ(ierr); 54378b31e54SBarry Smith ierr = MatDestroy(aij->B); CHKERRQ(ierr); 5440452661fSBarry Smith if (aij->colmap) PetscFree(aij->colmap); 5450452661fSBarry Smith if (aij->garray) PetscFree(aij->garray); 5461eb62cbbSBarry Smith if (aij->lvec) VecDestroy(aij->lvec); 547a56f8943SBarry Smith if (aij->Mvctx) VecScatterDestroy(aij->Mvctx); 5487a0afa10SBarry Smith if (aij->rowvalues) PetscFree(aij->rowvalues); 5490452661fSBarry Smith PetscFree(aij); 550a5a9c739SBarry Smith PLogObjectDestroy(mat); 5510452661fSBarry Smith PetscHeaderDestroy(mat); 5521eb62cbbSBarry Smith return 0; 5531eb62cbbSBarry Smith } 5547857610eSBarry Smith #include "draw.h" 555b16a3bb1SBarry Smith #include "pinclude/pviewer.h" 556ee50ffe9SBarry Smith 557416022c9SBarry Smith static int MatView_MPIAIJ_Binary(Mat mat,Viewer viewer) 5581eb62cbbSBarry Smith { 559416022c9SBarry Smith Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 560416022c9SBarry Smith int ierr; 561416022c9SBarry Smith 56217699dbbSLois Curfman McInnes if (aij->size == 1) { 563416022c9SBarry Smith ierr = MatView(aij->A,viewer); CHKERRQ(ierr); 564416022c9SBarry Smith } 565a523beb4SLois Curfman McInnes else SETERRQ(1,"MatView_MPIAIJ_Binary:Only uniprocessor output supported"); 566416022c9SBarry Smith return 0; 567416022c9SBarry Smith } 568416022c9SBarry Smith 569d7e8b826SBarry Smith static int MatView_MPIAIJ_ASCIIorDraworMatlab(Mat mat,Viewer viewer) 570416022c9SBarry Smith { 57144a69424SLois Curfman McInnes Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 572dbb450caSBarry Smith Mat_SeqAIJ* C = (Mat_SeqAIJ*)aij->A->data; 573a56f8943SBarry Smith int ierr, format,shift = C->indexshift,rank; 574d636dbe3SBarry Smith FILE *fd; 57519bcc07fSBarry Smith ViewerType vtype; 576416022c9SBarry Smith 57719bcc07fSBarry Smith ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr); 57819bcc07fSBarry Smith if (vtype == ASCII_FILES_VIEWER || vtype == ASCII_FILE_VIEWER) { 57990ace30eSBarry Smith ierr = ViewerGetFormat(viewer,&format); 58090ace30eSBarry Smith if (format == ASCII_FORMAT_INFO_DETAILED) { 58195e01e2fSLois Curfman McInnes int nz, nzalloc, mem, flg; 582a56f8943SBarry Smith MPI_Comm_rank(mat->comm,&rank); 58390ace30eSBarry Smith ierr = ViewerASCIIGetPointer(viewer,&fd); CHKERRQ(ierr); 584a56f8943SBarry Smith ierr = MatGetInfo(mat,MAT_LOCAL,&nz,&nzalloc,&mem); 58595e01e2fSLois Curfman McInnes ierr = OptionsHasName(PETSC_NULL,"-mat_aij_no_inode",&flg); CHKERRQ(ierr); 58677c4ece6SBarry Smith PetscSequentialPhaseBegin(mat->comm,1); 58795e01e2fSLois Curfman McInnes if (flg) fprintf(fd,"[%d] Local rows %d nz %d nz alloced %d mem %d, not using I-node routines\n", 58895e01e2fSLois Curfman McInnes rank,aij->m,nz,nzalloc,mem); 58995e01e2fSLois Curfman McInnes else fprintf(fd,"[%d] Local rows %d nz %d nz alloced %d mem %d, using I-node routines\n", 59095e01e2fSLois Curfman McInnes rank,aij->m,nz,nzalloc,mem); 59108480c60SBarry Smith ierr = MatGetInfo(aij->A,MAT_LOCAL,&nz,&nzalloc,&mem); 59295e01e2fSLois Curfman McInnes fprintf(fd,"[%d] on-diagonal part: nz %d \n",rank,nz); 59308480c60SBarry Smith ierr = MatGetInfo(aij->B,MAT_LOCAL,&nz,&nzalloc,&mem); 59495e01e2fSLois Curfman McInnes fprintf(fd,"[%d] off-diagonal part: nz %d \n",rank,nz); 595a56f8943SBarry Smith fflush(fd); 59677c4ece6SBarry Smith PetscSequentialPhaseEnd(mat->comm,1); 597a40aa06bSLois Curfman McInnes ierr = VecScatterView(aij->Mvctx,viewer); CHKERRQ(ierr); 598416022c9SBarry Smith return 0; 599416022c9SBarry Smith } 60090ace30eSBarry Smith else if (format == ASCII_FORMAT_INFO) { 60108480c60SBarry Smith return 0; 60208480c60SBarry Smith } 603416022c9SBarry Smith } 604416022c9SBarry Smith 60519bcc07fSBarry Smith if (vtype == DRAW_VIEWER) { 60619bcc07fSBarry Smith Draw draw; 60719bcc07fSBarry Smith PetscTruth isnull; 60819bcc07fSBarry Smith ierr = ViewerDrawGetDraw(viewer,&draw); CHKERRQ(ierr); 60919bcc07fSBarry Smith ierr = DrawIsNull(draw,&isnull); CHKERRQ(ierr); if (isnull) return 0; 61019bcc07fSBarry Smith } 61119bcc07fSBarry Smith 61219bcc07fSBarry Smith if (vtype == ASCII_FILE_VIEWER) { 61390ace30eSBarry Smith ierr = ViewerASCIIGetPointer(viewer,&fd); CHKERRQ(ierr); 61477c4ece6SBarry Smith PetscSequentialPhaseBegin(mat->comm,1); 615d13ab20cSBarry Smith fprintf(fd,"[%d] rows %d starts %d ends %d cols %d starts %d ends %d\n", 61617699dbbSLois Curfman McInnes aij->rank,aij->m,aij->rstart,aij->rend,aij->n,aij->cstart, 6171eb62cbbSBarry Smith aij->cend); 61878b31e54SBarry Smith ierr = MatView(aij->A,viewer); CHKERRQ(ierr); 61978b31e54SBarry Smith ierr = MatView(aij->B,viewer); CHKERRQ(ierr); 620d13ab20cSBarry Smith fflush(fd); 62177c4ece6SBarry Smith PetscSequentialPhaseEnd(mat->comm,1); 622d13ab20cSBarry Smith } 623416022c9SBarry Smith else { 624a56f8943SBarry Smith int size = aij->size; 625a56f8943SBarry Smith rank = aij->rank; 62617699dbbSLois Curfman McInnes if (size == 1) { 62778b31e54SBarry Smith ierr = MatView(aij->A,viewer); CHKERRQ(ierr); 62895373324SBarry Smith } 62995373324SBarry Smith else { 63095373324SBarry Smith /* assemble the entire matrix onto first processor. */ 63195373324SBarry Smith Mat A; 632ec8511deSBarry Smith Mat_SeqAIJ *Aloc; 6332eb8c8abSBarry Smith int M = aij->M, N = aij->N,m,*ai,*aj,row,*cols,i,*ct; 63495373324SBarry Smith Scalar *a; 6352ee70a88SLois Curfman McInnes 63617699dbbSLois Curfman McInnes if (!rank) { 637b4fd4287SBarry Smith ierr = MatCreateMPIAIJ(mat->comm,M,N,M,N,0,PETSC_NULL,0,PETSC_NULL,&A); 638c451ab25SLois Curfman McInnes CHKERRQ(ierr); 63995373324SBarry Smith } 64095373324SBarry Smith else { 641b4fd4287SBarry Smith ierr = MatCreateMPIAIJ(mat->comm,0,0,M,N,0,PETSC_NULL,0,PETSC_NULL,&A); 642c451ab25SLois Curfman McInnes CHKERRQ(ierr); 64395373324SBarry Smith } 644464493b3SBarry Smith PLogObjectParent(mat,A); 645416022c9SBarry Smith 64695373324SBarry Smith /* copy over the A part */ 647ec8511deSBarry Smith Aloc = (Mat_SeqAIJ*) aij->A->data; 6482ee70a88SLois Curfman McInnes m = Aloc->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 64995373324SBarry Smith row = aij->rstart; 650dbb450caSBarry Smith for ( i=0; i<ai[m]+shift; i++ ) {aj[i] += aij->cstart + shift;} 65195373324SBarry Smith for ( i=0; i<m; i++ ) { 652416022c9SBarry Smith ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr); 65395373324SBarry Smith row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 65495373324SBarry Smith } 6552ee70a88SLois Curfman McInnes aj = Aloc->j; 656dbb450caSBarry Smith for ( i=0; i<ai[m]+shift; i++ ) {aj[i] -= aij->cstart + shift;} 65795373324SBarry Smith 65895373324SBarry Smith /* copy over the B part */ 659ec8511deSBarry Smith Aloc = (Mat_SeqAIJ*) aij->B->data; 6602ee70a88SLois Curfman McInnes m = Aloc->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 66195373324SBarry Smith row = aij->rstart; 6620452661fSBarry Smith ct = cols = (int *) PetscMalloc( (ai[m]+1)*sizeof(int) ); CHKPTRQ(cols); 663dbb450caSBarry Smith for ( i=0; i<ai[m]+shift; i++ ) {cols[i] = aij->garray[aj[i]+shift];} 66495373324SBarry Smith for ( i=0; i<m; i++ ) { 665416022c9SBarry Smith ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr); 66695373324SBarry Smith row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 66795373324SBarry Smith } 6680452661fSBarry Smith PetscFree(ct); 6696d4a8577SBarry Smith ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 6706d4a8577SBarry Smith ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 67117699dbbSLois Curfman McInnes if (!rank) { 67278b31e54SBarry Smith ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,viewer); CHKERRQ(ierr); 67395373324SBarry Smith } 67478b31e54SBarry Smith ierr = MatDestroy(A); CHKERRQ(ierr); 67595373324SBarry Smith } 67695373324SBarry Smith } 6771eb62cbbSBarry Smith return 0; 6781eb62cbbSBarry Smith } 6791eb62cbbSBarry Smith 680416022c9SBarry Smith static int MatView_MPIAIJ(PetscObject obj,Viewer viewer) 681416022c9SBarry Smith { 682416022c9SBarry Smith Mat mat = (Mat) obj; 683416022c9SBarry Smith int ierr; 68419bcc07fSBarry Smith ViewerType vtype; 685416022c9SBarry Smith 68619bcc07fSBarry Smith ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr); 68719bcc07fSBarry Smith if (vtype == ASCII_FILE_VIEWER || vtype == ASCII_FILES_VIEWER || 68819bcc07fSBarry Smith vtype == DRAW_VIEWER || vtype == MATLAB_VIEWER) { 689d7e8b826SBarry Smith ierr = MatView_MPIAIJ_ASCIIorDraworMatlab(mat,viewer); CHKERRQ(ierr); 690416022c9SBarry Smith } 69119bcc07fSBarry Smith else if (vtype == BINARY_FILE_VIEWER) { 692416022c9SBarry Smith return MatView_MPIAIJ_Binary(mat,viewer); 693416022c9SBarry Smith } 694416022c9SBarry Smith return 0; 695416022c9SBarry Smith } 696416022c9SBarry Smith 6971eb62cbbSBarry Smith /* 6981eb62cbbSBarry Smith This has to provide several versions. 6991eb62cbbSBarry Smith 7001eb62cbbSBarry Smith 1) per sequential 7011eb62cbbSBarry Smith 2) a) use only local smoothing updating outer values only once. 7021eb62cbbSBarry Smith b) local smoothing updating outer values each inner iteration 703d6dfbf8fSBarry Smith 3) color updating out values betwen colors. 7041eb62cbbSBarry Smith */ 705fc76ce87SLois Curfman McInnes static int MatRelax_MPIAIJ(Mat matin,Vec bb,double omega,MatSORType flag, 706dbb450caSBarry Smith double fshift,int its,Vec xx) 7078a729477SBarry Smith { 70844a69424SLois Curfman McInnes Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 709d6dfbf8fSBarry Smith Mat AA = mat->A, BB = mat->B; 710ec8511deSBarry Smith Mat_SeqAIJ *A = (Mat_SeqAIJ *) AA->data, *B = (Mat_SeqAIJ *)BB->data; 7116abc6512SBarry Smith Scalar zero = 0.0,*b,*x,*xs,*ls,d,*v,sum,scale,*t,*ts; 7126abc6512SBarry Smith int ierr,*idx, *diag; 713416022c9SBarry Smith int n = mat->n, m = mat->m, i,shift = A->indexshift; 714da3a660dSBarry Smith Vec tt; 7158a729477SBarry Smith 716d6dfbf8fSBarry Smith VecGetArray(xx,&x); VecGetArray(bb,&b); VecGetArray(mat->lvec,&ls); 717dbb450caSBarry Smith xs = x + shift; /* shift by one for index start of 1 */ 718dbb450caSBarry Smith ls = ls + shift; 719ec8511deSBarry Smith if (!A->diag) {if ((ierr = MatMarkDiag_SeqAIJ(AA))) return ierr;} 720d6dfbf8fSBarry Smith diag = A->diag; 721acb40c82SBarry Smith if (flag == SOR_APPLY_UPPER || flag == SOR_APPLY_LOWER) { 72248d91487SBarry Smith SETERRQ(1,"MatRelax_MPIAIJ:Option not supported"); 723acb40c82SBarry Smith } 724da3a660dSBarry Smith if (flag & SOR_EISENSTAT) { 725da3a660dSBarry Smith /* Let A = L + U + D; where L is lower trianglar, 726da3a660dSBarry Smith U is upper triangular, E is diagonal; This routine applies 727da3a660dSBarry Smith 728da3a660dSBarry Smith (L + E)^{-1} A (U + E)^{-1} 729da3a660dSBarry Smith 730da3a660dSBarry Smith to a vector efficiently using Eisenstat's trick. This is for 731da3a660dSBarry Smith the case of SSOR preconditioner, so E is D/omega where omega 732da3a660dSBarry Smith is the relaxation factor. 733da3a660dSBarry Smith */ 73478b31e54SBarry Smith ierr = VecDuplicate(xx,&tt); CHKERRQ(ierr); 735464493b3SBarry Smith PLogObjectParent(matin,tt); 736da3a660dSBarry Smith VecGetArray(tt,&t); 737da3a660dSBarry Smith scale = (2.0/omega) - 1.0; 738da3a660dSBarry Smith /* x = (E + U)^{-1} b */ 739da3a660dSBarry Smith VecSet(&zero,mat->lvec); 74064119d58SLois Curfman McInnes ierr = VecPipelineBegin(xx,mat->lvec,INSERT_VALUES,PIPELINE_UP, 74178b31e54SBarry Smith mat->Mvctx); CHKERRQ(ierr); 742da3a660dSBarry Smith for ( i=m-1; i>-1; i-- ) { 743da3a660dSBarry Smith n = A->i[i+1] - diag[i] - 1; 744dbb450caSBarry Smith idx = A->j + diag[i] + !shift; 745dbb450caSBarry Smith v = A->a + diag[i] + !shift; 746da3a660dSBarry Smith sum = b[i]; 747da3a660dSBarry Smith SPARSEDENSEMDOT(sum,xs,v,idx,n); 748dbb450caSBarry Smith d = fshift + A->a[diag[i]+shift]; 749da3a660dSBarry Smith n = B->i[i+1] - B->i[i]; 750dbb450caSBarry Smith idx = B->j + B->i[i] + shift; 751dbb450caSBarry Smith v = B->a + B->i[i] + shift; 752da3a660dSBarry Smith SPARSEDENSEMDOT(sum,ls,v,idx,n); 753da3a660dSBarry Smith x[i] = omega*(sum/d); 754da3a660dSBarry Smith } 75564119d58SLois Curfman McInnes ierr = VecPipelineEnd(xx,mat->lvec,INSERT_VALUES,PIPELINE_UP, 75678b31e54SBarry Smith mat->Mvctx); CHKERRQ(ierr); 757da3a660dSBarry Smith 758da3a660dSBarry Smith /* t = b - (2*E - D)x */ 759da3a660dSBarry Smith v = A->a; 760dbb450caSBarry Smith for ( i=0; i<m; i++ ) { t[i] = b[i] - scale*(v[*diag++ + shift])*x[i]; } 761da3a660dSBarry Smith 762da3a660dSBarry Smith /* t = (E + L)^{-1}t */ 763dbb450caSBarry Smith ts = t + shift; /* shifted by one for index start of a or mat->j*/ 764da3a660dSBarry Smith diag = A->diag; 765da3a660dSBarry Smith VecSet(&zero,mat->lvec); 76664119d58SLois Curfman McInnes ierr = VecPipelineBegin(tt,mat->lvec,INSERT_VALUES,PIPELINE_DOWN, 76778b31e54SBarry Smith mat->Mvctx); CHKERRQ(ierr); 768da3a660dSBarry Smith for ( i=0; i<m; i++ ) { 769da3a660dSBarry Smith n = diag[i] - A->i[i]; 770dbb450caSBarry Smith idx = A->j + A->i[i] + shift; 771dbb450caSBarry Smith v = A->a + A->i[i] + shift; 772da3a660dSBarry Smith sum = t[i]; 773da3a660dSBarry Smith SPARSEDENSEMDOT(sum,ts,v,idx,n); 774dbb450caSBarry Smith d = fshift + A->a[diag[i]+shift]; 775da3a660dSBarry Smith n = B->i[i+1] - B->i[i]; 776dbb450caSBarry Smith idx = B->j + B->i[i] + shift; 777dbb450caSBarry Smith v = B->a + B->i[i] + shift; 778da3a660dSBarry Smith SPARSEDENSEMDOT(sum,ls,v,idx,n); 779da3a660dSBarry Smith t[i] = omega*(sum/d); 780da3a660dSBarry Smith } 78164119d58SLois Curfman McInnes ierr = VecPipelineEnd(tt,mat->lvec,INSERT_VALUES,PIPELINE_DOWN, 78278b31e54SBarry Smith mat->Mvctx); CHKERRQ(ierr); 783da3a660dSBarry Smith /* x = x + t */ 784da3a660dSBarry Smith for ( i=0; i<m; i++ ) { x[i] += t[i]; } 785da3a660dSBarry Smith VecDestroy(tt); 786da3a660dSBarry Smith return 0; 787da3a660dSBarry Smith } 788da3a660dSBarry Smith 7891eb62cbbSBarry Smith 790d6dfbf8fSBarry Smith if ((flag & SOR_SYMMETRIC_SWEEP) == SOR_SYMMETRIC_SWEEP){ 791da3a660dSBarry Smith if (flag & SOR_ZERO_INITIAL_GUESS) { 792da3a660dSBarry Smith VecSet(&zero,mat->lvec); VecSet(&zero,xx); 793da3a660dSBarry Smith } 794da3a660dSBarry Smith else { 79564119d58SLois Curfman McInnes ierr=VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_UP,mat->Mvctx); 79678b31e54SBarry Smith CHKERRQ(ierr); 79764119d58SLois Curfman McInnes ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_UP,mat->Mvctx); 79878b31e54SBarry Smith CHKERRQ(ierr); 799da3a660dSBarry Smith } 800d6dfbf8fSBarry Smith while (its--) { 801d6dfbf8fSBarry Smith /* go down through the rows */ 80264119d58SLois Curfman McInnes ierr = VecPipelineBegin(xx,mat->lvec,INSERT_VALUES,PIPELINE_DOWN, 80378b31e54SBarry Smith mat->Mvctx); CHKERRQ(ierr); 804d6dfbf8fSBarry Smith for ( i=0; i<m; i++ ) { 805d6dfbf8fSBarry Smith n = A->i[i+1] - A->i[i]; 806dbb450caSBarry Smith idx = A->j + A->i[i] + shift; 807dbb450caSBarry Smith v = A->a + A->i[i] + shift; 808d6dfbf8fSBarry Smith sum = b[i]; 809d6dfbf8fSBarry Smith SPARSEDENSEMDOT(sum,xs,v,idx,n); 810dbb450caSBarry Smith d = fshift + A->a[diag[i]+shift]; 811d6dfbf8fSBarry Smith n = B->i[i+1] - B->i[i]; 812dbb450caSBarry Smith idx = B->j + B->i[i] + shift; 813dbb450caSBarry Smith v = B->a + B->i[i] + shift; 814d6dfbf8fSBarry Smith SPARSEDENSEMDOT(sum,ls,v,idx,n); 815d6dfbf8fSBarry Smith x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]); 816d6dfbf8fSBarry Smith } 81764119d58SLois Curfman McInnes ierr = VecPipelineEnd(xx,mat->lvec,INSERT_VALUES,PIPELINE_DOWN, 81878b31e54SBarry Smith mat->Mvctx); CHKERRQ(ierr); 819d6dfbf8fSBarry Smith /* come up through the rows */ 82064119d58SLois Curfman McInnes ierr = VecPipelineBegin(xx,mat->lvec,INSERT_VALUES,PIPELINE_UP, 82178b31e54SBarry Smith mat->Mvctx); CHKERRQ(ierr); 822d6dfbf8fSBarry Smith for ( i=m-1; i>-1; i-- ) { 823d6dfbf8fSBarry Smith n = A->i[i+1] - A->i[i]; 824dbb450caSBarry Smith idx = A->j + A->i[i] + shift; 825dbb450caSBarry Smith v = A->a + A->i[i] + shift; 826d6dfbf8fSBarry Smith sum = b[i]; 827d6dfbf8fSBarry Smith SPARSEDENSEMDOT(sum,xs,v,idx,n); 828dbb450caSBarry Smith d = fshift + A->a[diag[i]+shift]; 829d6dfbf8fSBarry Smith n = B->i[i+1] - B->i[i]; 830dbb450caSBarry Smith idx = B->j + B->i[i] + shift; 831dbb450caSBarry Smith v = B->a + B->i[i] + shift; 832d6dfbf8fSBarry Smith SPARSEDENSEMDOT(sum,ls,v,idx,n); 833d6dfbf8fSBarry Smith x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]); 834d6dfbf8fSBarry Smith } 83564119d58SLois Curfman McInnes ierr = VecPipelineEnd(xx,mat->lvec,INSERT_VALUES,PIPELINE_UP, 83678b31e54SBarry Smith mat->Mvctx); CHKERRQ(ierr); 837d6dfbf8fSBarry Smith } 838d6dfbf8fSBarry Smith } 839d6dfbf8fSBarry Smith else if (flag & SOR_FORWARD_SWEEP){ 840da3a660dSBarry Smith if (flag & SOR_ZERO_INITIAL_GUESS) { 841da3a660dSBarry Smith VecSet(&zero,mat->lvec); 84264119d58SLois Curfman McInnes ierr = VecPipelineBegin(xx,mat->lvec,INSERT_VALUES,PIPELINE_DOWN, 84378b31e54SBarry Smith mat->Mvctx); CHKERRQ(ierr); 844da3a660dSBarry Smith for ( i=0; i<m; i++ ) { 845da3a660dSBarry Smith n = diag[i] - A->i[i]; 846dbb450caSBarry Smith idx = A->j + A->i[i] + shift; 847dbb450caSBarry Smith v = A->a + A->i[i] + shift; 848da3a660dSBarry Smith sum = b[i]; 849da3a660dSBarry Smith SPARSEDENSEMDOT(sum,xs,v,idx,n); 850dbb450caSBarry Smith d = fshift + A->a[diag[i]+shift]; 851da3a660dSBarry Smith n = B->i[i+1] - B->i[i]; 852dbb450caSBarry Smith idx = B->j + B->i[i] + shift; 853dbb450caSBarry Smith v = B->a + B->i[i] + shift; 854da3a660dSBarry Smith SPARSEDENSEMDOT(sum,ls,v,idx,n); 855da3a660dSBarry Smith x[i] = omega*(sum/d); 856da3a660dSBarry Smith } 85764119d58SLois Curfman McInnes ierr = VecPipelineEnd(xx,mat->lvec,INSERT_VALUES,PIPELINE_DOWN, 85878b31e54SBarry Smith mat->Mvctx); CHKERRQ(ierr); 859da3a660dSBarry Smith its--; 860da3a660dSBarry Smith } 861d6dfbf8fSBarry Smith while (its--) { 86264119d58SLois Curfman McInnes ierr=VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_UP,mat->Mvctx); 86378b31e54SBarry Smith CHKERRQ(ierr); 86464119d58SLois Curfman McInnes ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_UP,mat->Mvctx); 86578b31e54SBarry Smith CHKERRQ(ierr); 86664119d58SLois Curfman McInnes ierr = VecPipelineBegin(xx,mat->lvec,INSERT_VALUES,PIPELINE_DOWN, 86778b31e54SBarry Smith mat->Mvctx); CHKERRQ(ierr); 868d6dfbf8fSBarry Smith for ( i=0; i<m; i++ ) { 869d6dfbf8fSBarry Smith n = A->i[i+1] - A->i[i]; 870dbb450caSBarry Smith idx = A->j + A->i[i] + shift; 871dbb450caSBarry Smith v = A->a + A->i[i] + shift; 872d6dfbf8fSBarry Smith sum = b[i]; 873d6dfbf8fSBarry Smith SPARSEDENSEMDOT(sum,xs,v,idx,n); 874dbb450caSBarry Smith d = fshift + A->a[diag[i]+shift]; 875d6dfbf8fSBarry Smith n = B->i[i+1] - B->i[i]; 876dbb450caSBarry Smith idx = B->j + B->i[i] + shift; 877dbb450caSBarry Smith v = B->a + B->i[i] + shift; 878d6dfbf8fSBarry Smith SPARSEDENSEMDOT(sum,ls,v,idx,n); 879d6dfbf8fSBarry Smith x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]); 880d6dfbf8fSBarry Smith } 88164119d58SLois Curfman McInnes ierr = VecPipelineEnd(xx,mat->lvec,INSERT_VALUES,PIPELINE_DOWN, 88278b31e54SBarry Smith mat->Mvctx); CHKERRQ(ierr); 883d6dfbf8fSBarry Smith } 884d6dfbf8fSBarry Smith } 885d6dfbf8fSBarry Smith else if (flag & SOR_BACKWARD_SWEEP){ 886da3a660dSBarry Smith if (flag & SOR_ZERO_INITIAL_GUESS) { 887da3a660dSBarry Smith VecSet(&zero,mat->lvec); 88864119d58SLois Curfman McInnes ierr = VecPipelineBegin(xx,mat->lvec,INSERT_VALUES,PIPELINE_UP, 88978b31e54SBarry Smith mat->Mvctx); CHKERRQ(ierr); 890da3a660dSBarry Smith for ( i=m-1; i>-1; i-- ) { 891da3a660dSBarry Smith n = A->i[i+1] - diag[i] - 1; 892dbb450caSBarry Smith idx = A->j + diag[i] + !shift; 893dbb450caSBarry Smith v = A->a + diag[i] + !shift; 894da3a660dSBarry Smith sum = b[i]; 895da3a660dSBarry Smith SPARSEDENSEMDOT(sum,xs,v,idx,n); 896dbb450caSBarry Smith d = fshift + A->a[diag[i]+shift]; 897da3a660dSBarry Smith n = B->i[i+1] - B->i[i]; 898dbb450caSBarry Smith idx = B->j + B->i[i] + shift; 899dbb450caSBarry Smith v = B->a + B->i[i] + shift; 900da3a660dSBarry Smith SPARSEDENSEMDOT(sum,ls,v,idx,n); 901da3a660dSBarry Smith x[i] = omega*(sum/d); 902da3a660dSBarry Smith } 90364119d58SLois Curfman McInnes ierr = VecPipelineEnd(xx,mat->lvec,INSERT_VALUES,PIPELINE_UP, 90478b31e54SBarry Smith mat->Mvctx); CHKERRQ(ierr); 905da3a660dSBarry Smith its--; 906da3a660dSBarry Smith } 907d6dfbf8fSBarry Smith while (its--) { 90864119d58SLois Curfman McInnes ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_DOWN, 90978b31e54SBarry Smith mat->Mvctx); CHKERRQ(ierr); 91064119d58SLois Curfman McInnes ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_DOWN, 91178b31e54SBarry Smith mat->Mvctx); CHKERRQ(ierr); 91264119d58SLois Curfman McInnes ierr = VecPipelineBegin(xx,mat->lvec,INSERT_VALUES,PIPELINE_UP, 91378b31e54SBarry Smith mat->Mvctx); CHKERRQ(ierr); 914d6dfbf8fSBarry Smith for ( i=m-1; i>-1; i-- ) { 915d6dfbf8fSBarry Smith n = A->i[i+1] - A->i[i]; 916dbb450caSBarry Smith idx = A->j + A->i[i] + shift; 917dbb450caSBarry Smith v = A->a + A->i[i] + shift; 918d6dfbf8fSBarry Smith sum = b[i]; 919d6dfbf8fSBarry Smith SPARSEDENSEMDOT(sum,xs,v,idx,n); 920dbb450caSBarry Smith d = fshift + A->a[diag[i]+shift]; 921d6dfbf8fSBarry Smith n = B->i[i+1] - B->i[i]; 922dbb450caSBarry Smith idx = B->j + B->i[i] + shift; 923dbb450caSBarry Smith v = B->a + B->i[i] + shift; 924d6dfbf8fSBarry Smith SPARSEDENSEMDOT(sum,ls,v,idx,n); 925d6dfbf8fSBarry Smith x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]); 926d6dfbf8fSBarry Smith } 92764119d58SLois Curfman McInnes ierr = VecPipelineEnd(xx,mat->lvec,INSERT_VALUES,PIPELINE_UP, 92878b31e54SBarry Smith mat->Mvctx); CHKERRQ(ierr); 929d6dfbf8fSBarry Smith } 930d6dfbf8fSBarry Smith } 931d6dfbf8fSBarry Smith else if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){ 932da3a660dSBarry Smith if (flag & SOR_ZERO_INITIAL_GUESS) { 93338597bd4SSatish Balay return (*mat->A->ops.relax)(mat->A,bb,omega,flag,fshift,its,xx); 934da3a660dSBarry Smith } 93564119d58SLois Curfman McInnes ierr=VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_ALL,mat->Mvctx); 93678b31e54SBarry Smith CHKERRQ(ierr); 93764119d58SLois Curfman McInnes ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_ALL,mat->Mvctx); 93878b31e54SBarry Smith CHKERRQ(ierr); 939d6dfbf8fSBarry Smith while (its--) { 940d6dfbf8fSBarry Smith /* go down through the rows */ 941d6dfbf8fSBarry Smith for ( i=0; i<m; i++ ) { 942d6dfbf8fSBarry Smith n = A->i[i+1] - A->i[i]; 943dbb450caSBarry Smith idx = A->j + A->i[i] + shift; 944dbb450caSBarry Smith v = A->a + A->i[i] + shift; 945d6dfbf8fSBarry Smith sum = b[i]; 946d6dfbf8fSBarry Smith SPARSEDENSEMDOT(sum,xs,v,idx,n); 947dbb450caSBarry Smith d = fshift + A->a[diag[i]+shift]; 948d6dfbf8fSBarry Smith n = B->i[i+1] - B->i[i]; 949dbb450caSBarry Smith idx = B->j + B->i[i] + shift; 950dbb450caSBarry Smith v = B->a + B->i[i] + shift; 951d6dfbf8fSBarry Smith SPARSEDENSEMDOT(sum,ls,v,idx,n); 952d6dfbf8fSBarry Smith x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]); 953d6dfbf8fSBarry Smith } 954d6dfbf8fSBarry Smith /* come up through the rows */ 955d6dfbf8fSBarry Smith for ( i=m-1; i>-1; i-- ) { 956d6dfbf8fSBarry Smith n = A->i[i+1] - A->i[i]; 957dbb450caSBarry Smith idx = A->j + A->i[i] + shift; 958dbb450caSBarry Smith v = A->a + A->i[i] + shift; 959d6dfbf8fSBarry Smith sum = b[i]; 960d6dfbf8fSBarry Smith SPARSEDENSEMDOT(sum,xs,v,idx,n); 961dbb450caSBarry Smith d = fshift + A->a[diag[i]+shift]; 962d6dfbf8fSBarry Smith n = B->i[i+1] - B->i[i]; 963dbb450caSBarry Smith idx = B->j + B->i[i] + shift; 964dbb450caSBarry Smith v = B->a + B->i[i] + shift; 965d6dfbf8fSBarry Smith SPARSEDENSEMDOT(sum,ls,v,idx,n); 966d6dfbf8fSBarry Smith x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]); 967d6dfbf8fSBarry Smith } 968d6dfbf8fSBarry Smith } 969d6dfbf8fSBarry Smith } 970d6dfbf8fSBarry Smith else if (flag & SOR_LOCAL_FORWARD_SWEEP){ 971da3a660dSBarry Smith if (flag & SOR_ZERO_INITIAL_GUESS) { 97238597bd4SSatish Balay return (*mat->A->ops.relax)(mat->A,bb,omega,flag,fshift,its,xx); 973da3a660dSBarry Smith } 97464119d58SLois Curfman McInnes ierr=VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_ALL,mat->Mvctx); 97578b31e54SBarry Smith CHKERRQ(ierr); 97664119d58SLois Curfman McInnes ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_ALL,mat->Mvctx); 97778b31e54SBarry Smith CHKERRQ(ierr); 978d6dfbf8fSBarry Smith while (its--) { 979d6dfbf8fSBarry Smith for ( i=0; i<m; i++ ) { 980d6dfbf8fSBarry Smith n = A->i[i+1] - A->i[i]; 981dbb450caSBarry Smith idx = A->j + A->i[i] + shift; 982dbb450caSBarry Smith v = A->a + A->i[i] + shift; 983d6dfbf8fSBarry Smith sum = b[i]; 984d6dfbf8fSBarry Smith SPARSEDENSEMDOT(sum,xs,v,idx,n); 985dbb450caSBarry Smith d = fshift + A->a[diag[i]+shift]; 986d6dfbf8fSBarry Smith n = B->i[i+1] - B->i[i]; 987dbb450caSBarry Smith idx = B->j + B->i[i] + shift; 988dbb450caSBarry Smith v = B->a + B->i[i] + shift; 989d6dfbf8fSBarry Smith SPARSEDENSEMDOT(sum,ls,v,idx,n); 990d6dfbf8fSBarry Smith x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]); 991d6dfbf8fSBarry Smith } 992d6dfbf8fSBarry Smith } 993d6dfbf8fSBarry Smith } 994d6dfbf8fSBarry Smith else if (flag & SOR_LOCAL_BACKWARD_SWEEP){ 995da3a660dSBarry Smith if (flag & SOR_ZERO_INITIAL_GUESS) { 99638597bd4SSatish Balay return (*mat->A->ops.relax)(mat->A,bb,omega,flag,fshift,its,xx); 997da3a660dSBarry Smith } 99864119d58SLois Curfman McInnes ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_ALL, 99978b31e54SBarry Smith mat->Mvctx); CHKERRQ(ierr); 100064119d58SLois Curfman McInnes ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_ALL, 100178b31e54SBarry Smith mat->Mvctx); CHKERRQ(ierr); 1002d6dfbf8fSBarry Smith while (its--) { 1003d6dfbf8fSBarry Smith for ( i=m-1; i>-1; i-- ) { 1004d6dfbf8fSBarry Smith n = A->i[i+1] - A->i[i]; 1005dbb450caSBarry Smith idx = A->j + A->i[i] + shift; 1006dbb450caSBarry Smith v = A->a + A->i[i] + shift; 1007d6dfbf8fSBarry Smith sum = b[i]; 1008d6dfbf8fSBarry Smith SPARSEDENSEMDOT(sum,xs,v,idx,n); 1009dbb450caSBarry Smith d = fshift + A->a[diag[i]+shift]; 1010d6dfbf8fSBarry Smith n = B->i[i+1] - B->i[i]; 1011dbb450caSBarry Smith idx = B->j + B->i[i] + shift; 1012dbb450caSBarry Smith v = B->a + B->i[i] + shift; 1013d6dfbf8fSBarry Smith SPARSEDENSEMDOT(sum,ls,v,idx,n); 1014d6dfbf8fSBarry Smith x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]); 1015d6dfbf8fSBarry Smith } 1016d6dfbf8fSBarry Smith } 1017d6dfbf8fSBarry Smith } 10188a729477SBarry Smith return 0; 10198a729477SBarry Smith } 1020a66be287SLois Curfman McInnes 1021fc76ce87SLois Curfman McInnes static int MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,int *nz, 1022fc76ce87SLois Curfman McInnes int *nzalloc,int *mem) 1023a66be287SLois Curfman McInnes { 1024a66be287SLois Curfman McInnes Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 1025a66be287SLois Curfman McInnes Mat A = mat->A, B = mat->B; 1026a66be287SLois Curfman McInnes int ierr, isend[3], irecv[3], nzA, nzallocA, memA; 1027a66be287SLois Curfman McInnes 102878b31e54SBarry Smith ierr = MatGetInfo(A,MAT_LOCAL,&nzA,&nzallocA,&memA); CHKERRQ(ierr); 102978b31e54SBarry Smith ierr = MatGetInfo(B,MAT_LOCAL,&isend[0],&isend[1],&isend[2]); CHKERRQ(ierr); 1030a66be287SLois Curfman McInnes isend[0] += nzA; isend[1] += nzallocA; isend[2] += memA; 1031a66be287SLois Curfman McInnes if (flag == MAT_LOCAL) { 1032bcd2baecSBarry Smith if (nz) *nz = isend[0]; 1033bcd2baecSBarry Smith if (nzalloc) *nzalloc = isend[1]; 1034bcd2baecSBarry Smith if (mem) *mem = isend[2]; 1035a66be287SLois Curfman McInnes } else if (flag == MAT_GLOBAL_MAX) { 1036d65a2f8fSBarry Smith MPI_Allreduce( isend, irecv,3,MPI_INT,MPI_MAX,matin->comm); 1037bcd2baecSBarry Smith if (nz) *nz = irecv[0]; 1038bcd2baecSBarry Smith if (nzalloc) *nzalloc = irecv[1]; 1039bcd2baecSBarry Smith if (mem) *mem = irecv[2]; 1040a66be287SLois Curfman McInnes } else if (flag == MAT_GLOBAL_SUM) { 1041d65a2f8fSBarry Smith MPI_Allreduce( isend, irecv,3,MPI_INT,MPI_SUM,matin->comm); 1042bcd2baecSBarry Smith if (nz) *nz = irecv[0]; 1043bcd2baecSBarry Smith if (nzalloc) *nzalloc = irecv[1]; 1044bcd2baecSBarry Smith if (mem) *mem = irecv[2]; 1045a66be287SLois Curfman McInnes } 1046a66be287SLois Curfman McInnes return 0; 1047a66be287SLois Curfman McInnes } 1048a66be287SLois Curfman McInnes 1049299609e3SLois Curfman McInnes extern int MatLUFactorSymbolic_MPIAIJ(Mat,IS,IS,double,Mat*); 1050299609e3SLois Curfman McInnes extern int MatLUFactorNumeric_MPIAIJ(Mat,Mat*); 1051299609e3SLois Curfman McInnes extern int MatLUFactor_MPIAIJ(Mat,IS,IS,double); 1052299609e3SLois Curfman McInnes extern int MatILUFactorSymbolic_MPIAIJ(Mat,IS,IS,double,int,Mat *); 1053299609e3SLois Curfman McInnes extern int MatSolve_MPIAIJ(Mat,Vec,Vec); 1054299609e3SLois Curfman McInnes extern int MatSolveAdd_MPIAIJ(Mat,Vec,Vec,Vec); 1055299609e3SLois Curfman McInnes extern int MatSolveTrans_MPIAIJ(Mat,Vec,Vec); 1056299609e3SLois Curfman McInnes extern int MatSolveTransAdd_MPIAIJ(Mat,Vec,Vec,Vec); 1057299609e3SLois Curfman McInnes 1058416022c9SBarry Smith static int MatSetOption_MPIAIJ(Mat A,MatOption op) 1059c74985f6SBarry Smith { 1060c0bbcb79SLois Curfman McInnes Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 1061c74985f6SBarry Smith 10626d4a8577SBarry Smith if (op == MAT_NO_NEW_NONZERO_LOCATIONS || 10636d4a8577SBarry Smith op == MAT_YES_NEW_NONZERO_LOCATIONS || 10646d4a8577SBarry Smith op == MAT_COLUMNS_SORTED || 10656d4a8577SBarry Smith op == MAT_ROW_ORIENTED) { 1066c0bbcb79SLois Curfman McInnes MatSetOption(a->A,op); 1067c0bbcb79SLois Curfman McInnes MatSetOption(a->B,op); 1068c74985f6SBarry Smith } 10696d4a8577SBarry Smith else if (op == MAT_ROWS_SORTED || 10706d4a8577SBarry Smith op == MAT_SYMMETRIC || 10716d4a8577SBarry Smith op == MAT_STRUCTURALLY_SYMMETRIC || 10726d4a8577SBarry Smith op == MAT_YES_NEW_DIAGONALS) 107394a424c1SBarry Smith PLogInfo(A,"Info:MatSetOption_MPIAIJ:Option ignored\n"); 10746d4a8577SBarry Smith else if (op == MAT_COLUMN_ORIENTED) { 10754b0e389bSBarry Smith a->roworiented = 0; 10764b0e389bSBarry Smith MatSetOption(a->A,op); 10774b0e389bSBarry Smith MatSetOption(a->B,op); 10784b0e389bSBarry Smith } 10796d4a8577SBarry Smith else if (op == MAT_NO_NEW_DIAGONALS) 10806d4a8577SBarry Smith {SETERRQ(PETSC_ERR_SUP,"MatSetOption_MPIAIJ:MAT_NO_NEW_DIAGONALS");} 1081c0bbcb79SLois Curfman McInnes else 10824d39ef84SLois Curfman McInnes {SETERRQ(PETSC_ERR_SUP,"MatSetOption_MPIAIJ:unknown option");} 1083c74985f6SBarry Smith return 0; 1084c74985f6SBarry Smith } 1085c74985f6SBarry Smith 1086d1710a03SLois Curfman McInnes static int MatGetSize_MPIAIJ(Mat matin,int *m,int *n) 1087c74985f6SBarry Smith { 108844a69424SLois Curfman McInnes Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 1089c74985f6SBarry Smith *m = mat->M; *n = mat->N; 1090c74985f6SBarry Smith return 0; 1091c74985f6SBarry Smith } 1092c74985f6SBarry Smith 1093d1710a03SLois Curfman McInnes static int MatGetLocalSize_MPIAIJ(Mat matin,int *m,int *n) 1094c74985f6SBarry Smith { 109544a69424SLois Curfman McInnes Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 1096b7c46309SBarry Smith *m = mat->m; *n = mat->N; 1097c74985f6SBarry Smith return 0; 1098c74985f6SBarry Smith } 1099c74985f6SBarry Smith 1100d1710a03SLois Curfman McInnes static int MatGetOwnershipRange_MPIAIJ(Mat matin,int *m,int *n) 1101c74985f6SBarry Smith { 110244a69424SLois Curfman McInnes Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 1103c74985f6SBarry Smith *m = mat->rstart; *n = mat->rend; 1104c74985f6SBarry Smith return 0; 1105c74985f6SBarry Smith } 1106c74985f6SBarry Smith 11076d84be18SBarry Smith extern int MatGetRow_SeqAIJ(Mat,int,int*,int**,Scalar**); 11086d84be18SBarry Smith extern int MatRestoreRow_SeqAIJ(Mat,int,int*,int**,Scalar**); 11096d84be18SBarry Smith 11106d84be18SBarry Smith int MatGetRow_MPIAIJ(Mat matin,int row,int *nz,int **idx,Scalar **v) 111139e00950SLois Curfman McInnes { 1112154123eaSLois Curfman McInnes Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 111370f0671dSBarry Smith Scalar *vworkA, *vworkB, **pvA, **pvB,*v_p; 1114154123eaSLois Curfman McInnes int i, ierr, *cworkA, *cworkB, **pcA, **pcB, cstart = mat->cstart; 1115154123eaSLois Curfman McInnes int nztot, nzA, nzB, lrow, rstart = mat->rstart, rend = mat->rend; 111670f0671dSBarry Smith int *cmap, *idx_p; 111739e00950SLois Curfman McInnes 11187a0afa10SBarry Smith if (mat->getrowactive == PETSC_TRUE) SETERRQ(1,"MatGetRow_MPIAIJ:Already active"); 11197a0afa10SBarry Smith mat->getrowactive = PETSC_TRUE; 11207a0afa10SBarry Smith 112170f0671dSBarry Smith if (!mat->rowvalues && (idx || v)) { 11227a0afa10SBarry Smith /* 11237a0afa10SBarry Smith allocate enough space to hold information from the longest row. 11247a0afa10SBarry Smith */ 11257a0afa10SBarry Smith Mat_SeqAIJ *Aa = (Mat_SeqAIJ *) mat->A->data,*Ba = (Mat_SeqAIJ *) mat->B->data; 11267a0afa10SBarry Smith int max = 1,n = mat->n,tmp; 11277a0afa10SBarry Smith for ( i=0; i<n; i++ ) { 11287a0afa10SBarry Smith tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 11297a0afa10SBarry Smith if (max < tmp) { max = tmp; } 11307a0afa10SBarry Smith } 11317a0afa10SBarry Smith mat->rowvalues = (Scalar *) PetscMalloc( max*(sizeof(int)+sizeof(Scalar))); 11327a0afa10SBarry Smith CHKPTRQ(mat->rowvalues); 11337a0afa10SBarry Smith mat->rowindices = (int *) (mat->rowvalues + max); 11347a0afa10SBarry Smith } 11357a0afa10SBarry Smith 11367a0afa10SBarry Smith 1137b49de8d1SLois Curfman McInnes if (row < rstart || row >= rend) SETERRQ(1,"MatGetRow_MPIAIJ:Only local rows") 1138abc0e9e4SLois Curfman McInnes lrow = row - rstart; 113939e00950SLois Curfman McInnes 1140154123eaSLois Curfman McInnes pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1141154123eaSLois Curfman McInnes if (!v) {pvA = 0; pvB = 0;} 1142154123eaSLois Curfman McInnes if (!idx) {pcA = 0; if (!v) pcB = 0;} 114338597bd4SSatish Balay ierr = (*mat->A->ops.getrow)(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr); 114438597bd4SSatish Balay ierr = (*mat->B->ops.getrow)(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr); 1145154123eaSLois Curfman McInnes nztot = nzA + nzB; 1146154123eaSLois Curfman McInnes 114770f0671dSBarry Smith cmap = mat->garray; 1148154123eaSLois Curfman McInnes if (v || idx) { 1149154123eaSLois Curfman McInnes if (nztot) { 1150154123eaSLois Curfman McInnes /* Sort by increasing column numbers, assuming A and B already sorted */ 115170f0671dSBarry Smith int imark = -1; 1152154123eaSLois Curfman McInnes if (v) { 115370f0671dSBarry Smith *v = v_p = mat->rowvalues; 115439e00950SLois Curfman McInnes for ( i=0; i<nzB; i++ ) { 115570f0671dSBarry Smith if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 1156154123eaSLois Curfman McInnes else break; 1157154123eaSLois Curfman McInnes } 1158154123eaSLois Curfman McInnes imark = i; 115970f0671dSBarry Smith for ( i=0; i<nzA; i++ ) v_p[imark+i] = vworkA[i]; 116070f0671dSBarry Smith for ( i=imark; i<nzB; i++ ) v_p[nzA+i] = vworkB[i]; 1161154123eaSLois Curfman McInnes } 1162154123eaSLois Curfman McInnes if (idx) { 116370f0671dSBarry Smith *idx = idx_p = mat->rowindices; 116470f0671dSBarry Smith if (imark > -1) { 116570f0671dSBarry Smith for ( i=0; i<imark; i++ ) { 116670f0671dSBarry Smith idx_p[i] = cmap[cworkB[i]]; 116770f0671dSBarry Smith } 116870f0671dSBarry Smith } else { 1169154123eaSLois Curfman McInnes for ( i=0; i<nzB; i++ ) { 117070f0671dSBarry Smith if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 1171154123eaSLois Curfman McInnes else break; 1172154123eaSLois Curfman McInnes } 1173154123eaSLois Curfman McInnes imark = i; 117470f0671dSBarry Smith } 117570f0671dSBarry Smith for ( i=0; i<nzA; i++ ) idx_p[imark+i] = cstart + cworkA[i]; 117670f0671dSBarry Smith for ( i=imark; i<nzB; i++ ) idx_p[nzA+i] = cmap[cworkB[i]]; 117739e00950SLois Curfman McInnes } 117839e00950SLois Curfman McInnes } 11791ca473b0SSatish Balay else { 11801ca473b0SSatish Balay if (idx) *idx = 0; 11811ca473b0SSatish Balay if (v) *v = 0; 11821ca473b0SSatish Balay } 1183154123eaSLois Curfman McInnes } 118439e00950SLois Curfman McInnes *nz = nztot; 118538597bd4SSatish Balay ierr = (*mat->A->ops.restorerow)(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr); 118638597bd4SSatish Balay ierr = (*mat->B->ops.restorerow)(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr); 118739e00950SLois Curfman McInnes return 0; 118839e00950SLois Curfman McInnes } 118939e00950SLois Curfman McInnes 11906d84be18SBarry Smith int MatRestoreRow_MPIAIJ(Mat mat,int row,int *nz,int **idx,Scalar **v) 119139e00950SLois Curfman McInnes { 11927a0afa10SBarry Smith Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 11937a0afa10SBarry Smith if (aij->getrowactive == PETSC_FALSE) { 11947a0afa10SBarry Smith SETERRQ(1,"MatRestoreRow_MPIAIJ:MatGetRow not called"); 11957a0afa10SBarry Smith } 11967a0afa10SBarry Smith aij->getrowactive = PETSC_FALSE; 119739e00950SLois Curfman McInnes return 0; 119839e00950SLois Curfman McInnes } 119939e00950SLois Curfman McInnes 1200cddf8d76SBarry Smith static int MatNorm_MPIAIJ(Mat mat,NormType type,double *norm) 1201855ac2c5SLois Curfman McInnes { 1202855ac2c5SLois Curfman McInnes Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 1203ec8511deSBarry Smith Mat_SeqAIJ *amat = (Mat_SeqAIJ*) aij->A->data, *bmat = (Mat_SeqAIJ*) aij->B->data; 1204416022c9SBarry Smith int ierr, i, j, cstart = aij->cstart,shift = amat->indexshift; 1205416022c9SBarry Smith double sum = 0.0; 120604ca555eSLois Curfman McInnes Scalar *v; 120704ca555eSLois Curfman McInnes 120817699dbbSLois Curfman McInnes if (aij->size == 1) { 120914183eadSLois Curfman McInnes ierr = MatNorm(aij->A,type,norm); CHKERRQ(ierr); 121037fa93a5SLois Curfman McInnes } else { 121104ca555eSLois Curfman McInnes if (type == NORM_FROBENIUS) { 121204ca555eSLois Curfman McInnes v = amat->a; 121304ca555eSLois Curfman McInnes for (i=0; i<amat->nz; i++ ) { 121404ca555eSLois Curfman McInnes #if defined(PETSC_COMPLEX) 121504ca555eSLois Curfman McInnes sum += real(conj(*v)*(*v)); v++; 121604ca555eSLois Curfman McInnes #else 121704ca555eSLois Curfman McInnes sum += (*v)*(*v); v++; 121804ca555eSLois Curfman McInnes #endif 121904ca555eSLois Curfman McInnes } 122004ca555eSLois Curfman McInnes v = bmat->a; 122104ca555eSLois Curfman McInnes for (i=0; i<bmat->nz; i++ ) { 122204ca555eSLois Curfman McInnes #if defined(PETSC_COMPLEX) 122304ca555eSLois Curfman McInnes sum += real(conj(*v)*(*v)); v++; 122404ca555eSLois Curfman McInnes #else 122504ca555eSLois Curfman McInnes sum += (*v)*(*v); v++; 122604ca555eSLois Curfman McInnes #endif 122704ca555eSLois Curfman McInnes } 12286d84be18SBarry Smith MPI_Allreduce(&sum,norm,1,MPI_DOUBLE,MPI_SUM,mat->comm); 122904ca555eSLois Curfman McInnes *norm = sqrt(*norm); 123004ca555eSLois Curfman McInnes } 123104ca555eSLois Curfman McInnes else if (type == NORM_1) { /* max column norm */ 123204ca555eSLois Curfman McInnes double *tmp, *tmp2; 123304ca555eSLois Curfman McInnes int *jj, *garray = aij->garray; 12340452661fSBarry Smith tmp = (double *) PetscMalloc( aij->N*sizeof(double) ); CHKPTRQ(tmp); 12350452661fSBarry Smith tmp2 = (double *) PetscMalloc( aij->N*sizeof(double) ); CHKPTRQ(tmp2); 1236cddf8d76SBarry Smith PetscMemzero(tmp,aij->N*sizeof(double)); 123704ca555eSLois Curfman McInnes *norm = 0.0; 123804ca555eSLois Curfman McInnes v = amat->a; jj = amat->j; 123904ca555eSLois Curfman McInnes for ( j=0; j<amat->nz; j++ ) { 1240579c6b6fSBarry Smith tmp[cstart + *jj++ + shift] += PetscAbsScalar(*v); v++; 124104ca555eSLois Curfman McInnes } 124204ca555eSLois Curfman McInnes v = bmat->a; jj = bmat->j; 124304ca555eSLois Curfman McInnes for ( j=0; j<bmat->nz; j++ ) { 1244579c6b6fSBarry Smith tmp[garray[*jj++ + shift]] += PetscAbsScalar(*v); v++; 124504ca555eSLois Curfman McInnes } 12466d84be18SBarry Smith MPI_Allreduce(tmp,tmp2,aij->N,MPI_DOUBLE,MPI_SUM,mat->comm); 124704ca555eSLois Curfman McInnes for ( j=0; j<aij->N; j++ ) { 124804ca555eSLois Curfman McInnes if (tmp2[j] > *norm) *norm = tmp2[j]; 124904ca555eSLois Curfman McInnes } 12500452661fSBarry Smith PetscFree(tmp); PetscFree(tmp2); 125104ca555eSLois Curfman McInnes } 125204ca555eSLois Curfman McInnes else if (type == NORM_INFINITY) { /* max row norm */ 1253515d9167SLois Curfman McInnes double ntemp = 0.0; 125404ca555eSLois Curfman McInnes for ( j=0; j<amat->m; j++ ) { 1255dbb450caSBarry Smith v = amat->a + amat->i[j] + shift; 125604ca555eSLois Curfman McInnes sum = 0.0; 125704ca555eSLois Curfman McInnes for ( i=0; i<amat->i[j+1]-amat->i[j]; i++ ) { 1258cddf8d76SBarry Smith sum += PetscAbsScalar(*v); v++; 125904ca555eSLois Curfman McInnes } 1260dbb450caSBarry Smith v = bmat->a + bmat->i[j] + shift; 126104ca555eSLois Curfman McInnes for ( i=0; i<bmat->i[j+1]-bmat->i[j]; i++ ) { 1262cddf8d76SBarry Smith sum += PetscAbsScalar(*v); v++; 126304ca555eSLois Curfman McInnes } 1264515d9167SLois Curfman McInnes if (sum > ntemp) ntemp = sum; 126504ca555eSLois Curfman McInnes } 12666d84be18SBarry Smith MPI_Allreduce(&ntemp,norm,1,MPI_DOUBLE,MPI_MAX,mat->comm); 126704ca555eSLois Curfman McInnes } 126804ca555eSLois Curfman McInnes else { 1269515d9167SLois Curfman McInnes SETERRQ(1,"MatNorm_MPIAIJ:No support for two norm"); 127004ca555eSLois Curfman McInnes } 127137fa93a5SLois Curfman McInnes } 1272855ac2c5SLois Curfman McInnes return 0; 1273855ac2c5SLois Curfman McInnes } 1274855ac2c5SLois Curfman McInnes 12750de55854SLois Curfman McInnes static int MatTranspose_MPIAIJ(Mat A,Mat *matout) 1276b7c46309SBarry Smith { 1277b7c46309SBarry Smith Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 1278dbb450caSBarry Smith Mat_SeqAIJ *Aloc = (Mat_SeqAIJ *) a->A->data; 1279416022c9SBarry Smith int ierr,shift = Aloc->indexshift; 1280416022c9SBarry Smith Mat B; 1281b7c46309SBarry Smith int M = a->M, N = a->N,m,*ai,*aj,row,*cols,i,*ct; 1282b7c46309SBarry Smith Scalar *array; 1283b7c46309SBarry Smith 12843638b69dSLois Curfman McInnes if (matout == PETSC_NULL && M != N) 12853638b69dSLois Curfman McInnes SETERRQ(1,"MatTranspose_MPIAIJ:Square matrix only for in-place"); 1286b4fd4287SBarry Smith ierr = MatCreateMPIAIJ(A->comm,PETSC_DECIDE,PETSC_DECIDE,N,M,0,PETSC_NULL,0, 1287b4fd4287SBarry Smith PETSC_NULL,&B); CHKERRQ(ierr); 1288b7c46309SBarry Smith 1289b7c46309SBarry Smith /* copy over the A part */ 1290ec8511deSBarry Smith Aloc = (Mat_SeqAIJ*) a->A->data; 1291b7c46309SBarry Smith m = Aloc->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a; 1292b7c46309SBarry Smith row = a->rstart; 1293dbb450caSBarry Smith for ( i=0; i<ai[m]+shift; i++ ) {aj[i] += a->cstart + shift;} 1294b7c46309SBarry Smith for ( i=0; i<m; i++ ) { 1295416022c9SBarry Smith ierr = MatSetValues(B,ai[i+1]-ai[i],aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1296b7c46309SBarry Smith row++; array += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 1297b7c46309SBarry Smith } 1298b7c46309SBarry Smith aj = Aloc->j; 12994af08d9eSBarry Smith for ( i=0; i<ai[m]+shift; i++ ) {aj[i] -= a->cstart + shift;} 1300b7c46309SBarry Smith 1301b7c46309SBarry Smith /* copy over the B part */ 1302ec8511deSBarry Smith Aloc = (Mat_SeqAIJ*) a->B->data; 1303b7c46309SBarry Smith m = Aloc->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a; 1304b7c46309SBarry Smith row = a->rstart; 13050452661fSBarry Smith ct = cols = (int *) PetscMalloc( (1+ai[m]-shift)*sizeof(int) ); CHKPTRQ(cols); 1306dbb450caSBarry Smith for ( i=0; i<ai[m]+shift; i++ ) {cols[i] = a->garray[aj[i]+shift];} 1307b7c46309SBarry Smith for ( i=0; i<m; i++ ) { 1308416022c9SBarry Smith ierr = MatSetValues(B,ai[i+1]-ai[i],cols,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1309b7c46309SBarry Smith row++; array += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 1310b7c46309SBarry Smith } 13110452661fSBarry Smith PetscFree(ct); 13126d4a8577SBarry Smith ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 13136d4a8577SBarry Smith ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 13143638b69dSLois Curfman McInnes if (matout != PETSC_NULL) { 13150de55854SLois Curfman McInnes *matout = B; 13160de55854SLois Curfman McInnes } else { 13170de55854SLois Curfman McInnes /* This isn't really an in-place transpose .... but free data structures from a */ 13180452661fSBarry Smith PetscFree(a->rowners); 13190de55854SLois Curfman McInnes ierr = MatDestroy(a->A); CHKERRQ(ierr); 13200de55854SLois Curfman McInnes ierr = MatDestroy(a->B); CHKERRQ(ierr); 13210452661fSBarry Smith if (a->colmap) PetscFree(a->colmap); 13220452661fSBarry Smith if (a->garray) PetscFree(a->garray); 13230de55854SLois Curfman McInnes if (a->lvec) VecDestroy(a->lvec); 1324a56f8943SBarry Smith if (a->Mvctx) VecScatterDestroy(a->Mvctx); 13250452661fSBarry Smith PetscFree(a); 1326416022c9SBarry Smith PetscMemcpy(A,B,sizeof(struct _Mat)); 13270452661fSBarry Smith PetscHeaderDestroy(B); 13280de55854SLois Curfman McInnes } 1329b7c46309SBarry Smith return 0; 1330b7c46309SBarry Smith } 1331b7c46309SBarry Smith 1332682d7d0cSBarry Smith extern int MatPrintHelp_SeqAIJ(Mat); 1333682d7d0cSBarry Smith static int MatPrintHelp_MPIAIJ(Mat A) 1334682d7d0cSBarry Smith { 1335682d7d0cSBarry Smith Mat_MPIAIJ *a = (Mat_MPIAIJ*) A->data; 1336682d7d0cSBarry Smith 1337682d7d0cSBarry Smith if (!a->rank) return MatPrintHelp_SeqAIJ(a->A); 1338682d7d0cSBarry Smith else return 0; 1339682d7d0cSBarry Smith } 1340682d7d0cSBarry Smith 13415a838052SSatish Balay static int MatGetBlockSize_MPIAIJ(Mat A,int *bs) 13425a838052SSatish Balay { 13435a838052SSatish Balay *bs = 1; 13445a838052SSatish Balay return 0; 13455a838052SSatish Balay } 13465a838052SSatish Balay 1347fc76ce87SLois Curfman McInnes extern int MatConvert_MPIAIJ(Mat,MatType,Mat *); 13483d1612f7SBarry Smith static int MatConvertSameType_MPIAIJ(Mat,Mat *,int); 13492f86bd48SSatish Balay extern int MatIncreaseOverlap_MPIAIJ(Mat , int, IS *, int); 1350598137ffSSatish Balay int MatGetSubMatrices_MPIAIJ (Mat ,int , IS *,IS *,MatGetSubMatrixCall,Mat **); 13518a729477SBarry Smith /* -------------------------------------------------------------------*/ 13522ee70a88SLois Curfman McInnes static struct _MatOps MatOps = {MatSetValues_MPIAIJ, 135339e00950SLois Curfman McInnes MatGetRow_MPIAIJ,MatRestoreRow_MPIAIJ, 135444a69424SLois Curfman McInnes MatMult_MPIAIJ,MatMultAdd_MPIAIJ, 135544a69424SLois Curfman McInnes MatMultTrans_MPIAIJ,MatMultTransAdd_MPIAIJ, 1356299609e3SLois Curfman McInnes MatSolve_MPIAIJ,MatSolveAdd_MPIAIJ, 1357299609e3SLois Curfman McInnes MatSolveTrans_MPIAIJ,MatSolveTransAdd_MPIAIJ, 1358299609e3SLois Curfman McInnes MatLUFactor_MPIAIJ,0, 135944a69424SLois Curfman McInnes MatRelax_MPIAIJ, 1360b7c46309SBarry Smith MatTranspose_MPIAIJ, 1361a66be287SLois Curfman McInnes MatGetInfo_MPIAIJ,0, 1362855ac2c5SLois Curfman McInnes MatGetDiagonal_MPIAIJ,0,MatNorm_MPIAIJ, 1363ee50ffe9SBarry Smith MatAssemblyBegin_MPIAIJ,MatAssemblyEnd_MPIAIJ, 13641eb62cbbSBarry Smith 0, 1365299609e3SLois Curfman McInnes MatSetOption_MPIAIJ,MatZeroEntries_MPIAIJ,MatZeroRows_MPIAIJ, 1366299609e3SLois Curfman McInnes MatGetReordering_MPIAIJ, 1367299609e3SLois Curfman McInnes MatLUFactorSymbolic_MPIAIJ,MatLUFactorNumeric_MPIAIJ,0,0, 1368d1710a03SLois Curfman McInnes MatGetSize_MPIAIJ,MatGetLocalSize_MPIAIJ,MatGetOwnershipRange_MPIAIJ, 1369299609e3SLois Curfman McInnes MatILUFactorSymbolic_MPIAIJ,0, 13703d1612f7SBarry Smith 0,0,MatConvert_MPIAIJ,0,0,MatConvertSameType_MPIAIJ,0,0, 1371b49de8d1SLois Curfman McInnes 0,0,0, 1372598137ffSSatish Balay MatGetSubMatrices_MPIAIJ,MatIncreaseOverlap_MPIAIJ,MatGetValues_MPIAIJ,0, 1373052efed2SBarry Smith MatPrintHelp_MPIAIJ, 13745a838052SSatish Balay MatScale_MPIAIJ,0,0,0,MatGetBlockSize_MPIAIJ}; 13758a729477SBarry Smith 13761987afe7SBarry Smith /*@C 1377ff756334SLois Curfman McInnes MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format 13783a511b96SLois Curfman McInnes (the default parallel PETSc format). For good matrix assembly performance 13793a511b96SLois Curfman McInnes the user should preallocate the matrix storage by setting the parameters 13803a511b96SLois Curfman McInnes d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 13813a511b96SLois Curfman McInnes performance can be increased by more than a factor of 50. 13828a729477SBarry Smith 13838a729477SBarry Smith Input Parameters: 13841eb62cbbSBarry Smith . comm - MPI communicator 13857d3e4905SLois Curfman McInnes . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 138692e8d321SLois Curfman McInnes This value should be the same as the local size used in creating the 138792e8d321SLois Curfman McInnes y vector for the matrix-vector product y = Ax. 138892e8d321SLois Curfman McInnes . n - number of local columns (or PETSC_DECIDE to have calculated if N is given) 138992e8d321SLois Curfman McInnes This value should be the same as the local size used in creating the 139092e8d321SLois Curfman McInnes x vector for the matrix-vector product y = Ax. 13917d3e4905SLois Curfman McInnes . M - number of global rows (or PETSC_DECIDE to have calculated if m is given) 139292e8d321SLois Curfman McInnes . N - number of global columns (or PETSC_DECIDE to have calculated if n is given) 1393ab693e5aSLois Curfman McInnes . d_nz - number of nonzeros per row in diagonal portion of local submatrix 1394ff756334SLois Curfman McInnes (same for all local rows) 13952bd5e0b2SLois Curfman McInnes . d_nzz - array containing the number of nonzeros in the various rows of the 139692e8d321SLois Curfman McInnes diagonal portion of the local submatrix (possibly different for each row) 13972bd5e0b2SLois Curfman McInnes or PETSC_NULL. You must leave room for the diagonal entry even if 13982bd5e0b2SLois Curfman McInnes it is zero. 13992bd5e0b2SLois Curfman McInnes . o_nz - number of nonzeros per row in the off-diagonal portion of local 1400ab693e5aSLois Curfman McInnes submatrix (same for all local rows). 14012bd5e0b2SLois Curfman McInnes . o_nzz - array containing the number of nonzeros in the various rows of the 14022bd5e0b2SLois Curfman McInnes off-diagonal portion of the local submatrix (possibly different for 14032bd5e0b2SLois Curfman McInnes each row) or PETSC_NULL. 14048a729477SBarry Smith 1405ff756334SLois Curfman McInnes Output Parameter: 140644cd7ae7SLois Curfman McInnes . A - the matrix 14078a729477SBarry Smith 1408ff756334SLois Curfman McInnes Notes: 1409ff756334SLois Curfman McInnes The AIJ format (also called the Yale sparse matrix format or 1410ff756334SLois Curfman McInnes compressed row storage), is fully compatible with standard Fortran 77 14110002213bSLois Curfman McInnes storage. That is, the stored row and column indices can begin at 14120002213bSLois Curfman McInnes either one (as in Fortran) or zero. See the users manual for details. 1413ff756334SLois Curfman McInnes 1414ff756334SLois Curfman McInnes The user MUST specify either the local or global matrix dimensions 1415ff756334SLois Curfman McInnes (possibly both). 1416ff756334SLois Curfman McInnes 14175511cfe3SLois Curfman McInnes By default, this format uses inodes (identical nodes) when possible. 14185511cfe3SLois Curfman McInnes We search for consecutive rows with the same nonzero structure, thereby 14195511cfe3SLois Curfman McInnes reusing matrix information to achieve increased efficiency. 14205511cfe3SLois Curfman McInnes 14215511cfe3SLois Curfman McInnes Options Database Keys: 14225511cfe3SLois Curfman McInnes $ -mat_aij_no_inode - Do not use inodes 14236e62573dSLois Curfman McInnes $ -mat_aij_inode_limit <limit> - Set inode limit. 14246e62573dSLois Curfman McInnes $ (max limit=5) 14256e62573dSLois Curfman McInnes $ -mat_aij_oneindex - Internally use indexing starting at 1 14266e62573dSLois Curfman McInnes $ rather than 0. Note: When calling MatSetValues(), 14276e62573dSLois Curfman McInnes $ the user still MUST index entries starting at 0! 14285511cfe3SLois Curfman McInnes 1429e0245417SLois Curfman McInnes Storage Information: 1430e0245417SLois Curfman McInnes For a square global matrix we define each processor's diagonal portion 1431e0245417SLois Curfman McInnes to be its local rows and the corresponding columns (a square submatrix); 1432e0245417SLois Curfman McInnes each processor's off-diagonal portion encompasses the remainder of the 1433e0245417SLois Curfman McInnes local matrix (a rectangular submatrix). 1434e0245417SLois Curfman McInnes 1435e0245417SLois Curfman McInnes The user can specify preallocated storage for the diagonal part of 14365ace5be8SLois Curfman McInnes the local submatrix with either d_nz or d_nnz (not both). Set 14375ace5be8SLois Curfman McInnes d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic 14385ace5be8SLois Curfman McInnes memory allocation. Likewise, specify preallocated storage for the 14395ace5be8SLois Curfman McInnes off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 1440ff756334SLois Curfman McInnes 14415511cfe3SLois Curfman McInnes Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 14425511cfe3SLois Curfman McInnes the figure below we depict these three local rows and all columns (0-11). 14432191d07cSBarry Smith 1444b810aeb4SBarry Smith $ 0 1 2 3 4 5 6 7 8 9 10 11 1445b810aeb4SBarry Smith $ ------------------- 14465511cfe3SLois Curfman McInnes $ row 3 | o o o d d d o o o o o o 14475511cfe3SLois Curfman McInnes $ row 4 | o o o d d d o o o o o o 14485511cfe3SLois Curfman McInnes $ row 5 | o o o d d d o o o o o o 14495511cfe3SLois Curfman McInnes $ ------------------- 1450b810aeb4SBarry Smith $ 1451b810aeb4SBarry Smith 14525511cfe3SLois Curfman McInnes Thus, any entries in the d locations are stored in the d (diagonal) 14535511cfe3SLois Curfman McInnes submatrix, and any entries in the o locations are stored in the 14545511cfe3SLois Curfman McInnes o (off-diagonal) submatrix. Note that the d and the o submatrices are 14555511cfe3SLois Curfman McInnes stored simply in the MATSEQAIJ format for compressed row storage. 14565511cfe3SLois Curfman McInnes 14575511cfe3SLois Curfman McInnes Now d_nz should indicate the number of nonzeros per row in the d matrix, 14585511cfe3SLois Curfman McInnes and o_nz should indicate the number of nonzeros per row in the o matrix. 14595511cfe3SLois Curfman McInnes In general, for PDE problems in which most nonzeros are near the diagonal, 14603d323bbdSBarry Smith one expects d_nz >> o_nz. For large problems you MUST preallocate memory 146192e8d321SLois Curfman McInnes or you will get TERRIBLE performance; see the users' manual chapter on 14623d323bbdSBarry Smith matrices and the file $(PETSC_DIR)/Performance. 14633a511b96SLois Curfman McInnes 1464dbd7a890SLois Curfman McInnes .keywords: matrix, aij, compressed row, sparse, parallel 1465ff756334SLois Curfman McInnes 1466fafbff53SBarry Smith .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues() 14678a729477SBarry Smith @*/ 14681eb62cbbSBarry Smith int MatCreateMPIAIJ(MPI_Comm comm,int m,int n,int M,int N, 146944cd7ae7SLois Curfman McInnes int d_nz,int *d_nnz,int o_nz,int *o_nnz,Mat *A) 14708a729477SBarry Smith { 147144cd7ae7SLois Curfman McInnes Mat B; 147244cd7ae7SLois Curfman McInnes Mat_MPIAIJ *b; 14736abc6512SBarry Smith int ierr, i,sum[2],work[2]; 1474416022c9SBarry Smith 147544cd7ae7SLois Curfman McInnes *A = 0; 147644cd7ae7SLois Curfman McInnes PetscHeaderCreate(B,_Mat,MAT_COOKIE,MATMPIAIJ,comm); 147744cd7ae7SLois Curfman McInnes PLogObjectCreate(B); 147844cd7ae7SLois Curfman McInnes B->data = (void *) (b = PetscNew(Mat_MPIAIJ)); CHKPTRQ(b); 147944cd7ae7SLois Curfman McInnes PetscMemzero(b,sizeof(Mat_MPIAIJ)); 148044cd7ae7SLois Curfman McInnes PetscMemcpy(&B->ops,&MatOps,sizeof(struct _MatOps)); 148144cd7ae7SLois Curfman McInnes B->destroy = MatDestroy_MPIAIJ; 148244cd7ae7SLois Curfman McInnes B->view = MatView_MPIAIJ; 148344cd7ae7SLois Curfman McInnes B->factor = 0; 148444cd7ae7SLois Curfman McInnes B->assembled = PETSC_FALSE; 1485d6dfbf8fSBarry Smith 148644cd7ae7SLois Curfman McInnes b->insertmode = NOT_SET_VALUES; 148744cd7ae7SLois Curfman McInnes MPI_Comm_rank(comm,&b->rank); 148844cd7ae7SLois Curfman McInnes MPI_Comm_size(comm,&b->size); 14891eb62cbbSBarry Smith 1490b4fd4287SBarry Smith if (m == PETSC_DECIDE && (d_nnz != PETSC_NULL || o_nnz != PETSC_NULL)) 14912e0155e0SLois Curfman McInnes SETERRQ(1,"MatCreateMPIAIJ:Cannot have PETSC_DECIDE rows but set d_nnz or o_nnz"); 14921987afe7SBarry Smith 1493dbd7a890SLois Curfman McInnes if (M == PETSC_DECIDE || N == PETSC_DECIDE) { 14941eb62cbbSBarry Smith work[0] = m; work[1] = n; 1495d65a2f8fSBarry Smith MPI_Allreduce( work, sum,2,MPI_INT,MPI_SUM,comm ); 1496dbd7a890SLois Curfman McInnes if (M == PETSC_DECIDE) M = sum[0]; 1497dbd7a890SLois Curfman McInnes if (N == PETSC_DECIDE) N = sum[1]; 14981eb62cbbSBarry Smith } 149944cd7ae7SLois Curfman McInnes if (m == PETSC_DECIDE) {m = M/b->size + ((M % b->size) > b->rank);} 150044cd7ae7SLois Curfman McInnes if (n == PETSC_DECIDE) {n = N/b->size + ((N % b->size) > b->rank);} 150144cd7ae7SLois Curfman McInnes b->m = m; B->m = m; 150244cd7ae7SLois Curfman McInnes b->n = n; B->n = n; 150344cd7ae7SLois Curfman McInnes b->N = N; B->N = N; 150444cd7ae7SLois Curfman McInnes b->M = M; B->M = M; 15051eb62cbbSBarry Smith 15061eb62cbbSBarry Smith /* build local table of row and column ownerships */ 150744cd7ae7SLois Curfman McInnes b->rowners = (int *) PetscMalloc(2*(b->size+2)*sizeof(int)); CHKPTRQ(b->rowners); 150844cd7ae7SLois Curfman McInnes PLogObjectMemory(B,2*(b->size+2)*sizeof(int)+sizeof(struct _Mat)+sizeof(Mat_MPIAIJ)); 1509*603f58a4SSatish Balay b->cowners = b->rowners + b->size + 2; 151044cd7ae7SLois Curfman McInnes MPI_Allgather(&m,1,MPI_INT,b->rowners+1,1,MPI_INT,comm); 151144cd7ae7SLois Curfman McInnes b->rowners[0] = 0; 151244cd7ae7SLois Curfman McInnes for ( i=2; i<=b->size; i++ ) { 151344cd7ae7SLois Curfman McInnes b->rowners[i] += b->rowners[i-1]; 15148a729477SBarry Smith } 151544cd7ae7SLois Curfman McInnes b->rstart = b->rowners[b->rank]; 151644cd7ae7SLois Curfman McInnes b->rend = b->rowners[b->rank+1]; 151744cd7ae7SLois Curfman McInnes MPI_Allgather(&n,1,MPI_INT,b->cowners+1,1,MPI_INT,comm); 151844cd7ae7SLois Curfman McInnes b->cowners[0] = 0; 151944cd7ae7SLois Curfman McInnes for ( i=2; i<=b->size; i++ ) { 152044cd7ae7SLois Curfman McInnes b->cowners[i] += b->cowners[i-1]; 15218a729477SBarry Smith } 152244cd7ae7SLois Curfman McInnes b->cstart = b->cowners[b->rank]; 152344cd7ae7SLois Curfman McInnes b->cend = b->cowners[b->rank+1]; 15248a729477SBarry Smith 15255ace5be8SLois Curfman McInnes if (d_nz == PETSC_DEFAULT) d_nz = 5; 152644cd7ae7SLois Curfman McInnes ierr = MatCreateSeqAIJ(MPI_COMM_SELF,m,n,d_nz,d_nnz,&b->A); CHKERRQ(ierr); 152744cd7ae7SLois Curfman McInnes PLogObjectParent(B,b->A); 15287b8455f0SLois Curfman McInnes if (o_nz == PETSC_DEFAULT) o_nz = 0; 152944cd7ae7SLois Curfman McInnes ierr = MatCreateSeqAIJ(MPI_COMM_SELF,m,N,o_nz,o_nnz,&b->B); CHKERRQ(ierr); 153044cd7ae7SLois Curfman McInnes PLogObjectParent(B,b->B); 15318a729477SBarry Smith 15321eb62cbbSBarry Smith /* build cache for off array entries formed */ 153344cd7ae7SLois Curfman McInnes ierr = StashBuild_Private(&b->stash); CHKERRQ(ierr); 153444cd7ae7SLois Curfman McInnes b->colmap = 0; 153544cd7ae7SLois Curfman McInnes b->garray = 0; 153644cd7ae7SLois Curfman McInnes b->roworiented = 1; 15378a729477SBarry Smith 15381eb62cbbSBarry Smith /* stuff used for matrix vector multiply */ 153944cd7ae7SLois Curfman McInnes b->lvec = 0; 154044cd7ae7SLois Curfman McInnes b->Mvctx = 0; 15418a729477SBarry Smith 15427a0afa10SBarry Smith /* stuff for MatGetRow() */ 154344cd7ae7SLois Curfman McInnes b->rowindices = 0; 154444cd7ae7SLois Curfman McInnes b->rowvalues = 0; 154544cd7ae7SLois Curfman McInnes b->getrowactive = PETSC_FALSE; 15467a0afa10SBarry Smith 154744cd7ae7SLois Curfman McInnes *A = B; 1548d6dfbf8fSBarry Smith return 0; 1549d6dfbf8fSBarry Smith } 1550c74985f6SBarry Smith 15513d1612f7SBarry Smith static int MatConvertSameType_MPIAIJ(Mat matin,Mat *newmat,int cpvalues) 1552d6dfbf8fSBarry Smith { 1553d6dfbf8fSBarry Smith Mat mat; 1554416022c9SBarry Smith Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ *) matin->data; 1555a1b97e82SLois Curfman McInnes int ierr, len=0, flg; 1556d6dfbf8fSBarry Smith 1557416022c9SBarry Smith *newmat = 0; 15580452661fSBarry Smith PetscHeaderCreate(mat,_Mat,MAT_COOKIE,MATMPIAIJ,matin->comm); 1559a5a9c739SBarry Smith PLogObjectCreate(mat); 15600452661fSBarry Smith mat->data = (void *) (a = PetscNew(Mat_MPIAIJ)); CHKPTRQ(a); 1561416022c9SBarry Smith PetscMemcpy(&mat->ops,&MatOps,sizeof(struct _MatOps)); 156244a69424SLois Curfman McInnes mat->destroy = MatDestroy_MPIAIJ; 156344a69424SLois Curfman McInnes mat->view = MatView_MPIAIJ; 1564d6dfbf8fSBarry Smith mat->factor = matin->factor; 1565c456f294SBarry Smith mat->assembled = PETSC_TRUE; 1566d6dfbf8fSBarry Smith 156744cd7ae7SLois Curfman McInnes a->m = mat->m = oldmat->m; 156844cd7ae7SLois Curfman McInnes a->n = mat->n = oldmat->n; 156944cd7ae7SLois Curfman McInnes a->M = mat->M = oldmat->M; 157044cd7ae7SLois Curfman McInnes a->N = mat->N = oldmat->N; 1571d6dfbf8fSBarry Smith 1572416022c9SBarry Smith a->rstart = oldmat->rstart; 1573416022c9SBarry Smith a->rend = oldmat->rend; 1574416022c9SBarry Smith a->cstart = oldmat->cstart; 1575416022c9SBarry Smith a->cend = oldmat->cend; 157617699dbbSLois Curfman McInnes a->size = oldmat->size; 157717699dbbSLois Curfman McInnes a->rank = oldmat->rank; 157864119d58SLois Curfman McInnes a->insertmode = NOT_SET_VALUES; 1579bcd2baecSBarry Smith a->rowvalues = 0; 1580bcd2baecSBarry Smith a->getrowactive = PETSC_FALSE; 1581d6dfbf8fSBarry Smith 1582*603f58a4SSatish Balay a->rowners = (int *) PetscMalloc(2*(a->size+2)*sizeof(int)); CHKPTRQ(a->rowners); 1583*603f58a4SSatish Balay PLogObjectMemory(mat,2*(a->size+2)*sizeof(int)+sizeof(struct _Mat)+sizeof(Mat_MPIAIJ)); 1584*603f58a4SSatish Balay a->cowners = a->rowners + a->size + 2; 1585*603f58a4SSatish Balay PetscMemcpy(a->rowners,oldmat->rowners,2*(a->size+2)*sizeof(int)); 1586416022c9SBarry Smith ierr = StashInitialize_Private(&a->stash); CHKERRQ(ierr); 15872ee70a88SLois Curfman McInnes if (oldmat->colmap) { 15880452661fSBarry Smith a->colmap = (int *) PetscMalloc((a->N)*sizeof(int));CHKPTRQ(a->colmap); 1589416022c9SBarry Smith PLogObjectMemory(mat,(a->N)*sizeof(int)); 1590416022c9SBarry Smith PetscMemcpy(a->colmap,oldmat->colmap,(a->N)*sizeof(int)); 1591416022c9SBarry Smith } else a->colmap = 0; 1592ec8511deSBarry Smith if (oldmat->garray && (len = ((Mat_SeqAIJ *) (oldmat->B->data))->n)) { 15930452661fSBarry Smith a->garray = (int *) PetscMalloc(len*sizeof(int)); CHKPTRQ(a->garray); 1594464493b3SBarry Smith PLogObjectMemory(mat,len*sizeof(int)); 1595416022c9SBarry Smith PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int)); 1596416022c9SBarry Smith } else a->garray = 0; 1597d6dfbf8fSBarry Smith 1598416022c9SBarry Smith ierr = VecDuplicate(oldmat->lvec,&a->lvec); CHKERRQ(ierr); 1599416022c9SBarry Smith PLogObjectParent(mat,a->lvec); 1600a56f8943SBarry Smith ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx); CHKERRQ(ierr); 1601416022c9SBarry Smith PLogObjectParent(mat,a->Mvctx); 1602416022c9SBarry Smith ierr = MatConvert(oldmat->A,MATSAME,&a->A); CHKERRQ(ierr); 1603416022c9SBarry Smith PLogObjectParent(mat,a->A); 1604416022c9SBarry Smith ierr = MatConvert(oldmat->B,MATSAME,&a->B); CHKERRQ(ierr); 1605416022c9SBarry Smith PLogObjectParent(mat,a->B); 16065dd7a6c7SBarry Smith ierr = OptionsHasName(PETSC_NULL,"-help",&flg); CHKERRQ(ierr); 160725cdf11fSBarry Smith if (flg) { 1608682d7d0cSBarry Smith ierr = MatPrintHelp(mat); CHKERRQ(ierr); 1609682d7d0cSBarry Smith } 16108a729477SBarry Smith *newmat = mat; 16118a729477SBarry Smith return 0; 16128a729477SBarry Smith } 1613416022c9SBarry Smith 161477c4ece6SBarry Smith #include "sys.h" 1615416022c9SBarry Smith 161619bcc07fSBarry Smith int MatLoad_MPIAIJ(Viewer viewer,MatType type,Mat *newmat) 1617416022c9SBarry Smith { 1618d65a2f8fSBarry Smith Mat A; 1619416022c9SBarry Smith int i, nz, ierr, j,rstart, rend, fd; 1620d65a2f8fSBarry Smith Scalar *vals,*svals; 162119bcc07fSBarry Smith MPI_Comm comm = ((PetscObject)viewer)->comm; 1622416022c9SBarry Smith MPI_Status status; 162317699dbbSLois Curfman McInnes int header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,maxnz,*cols; 1624d65a2f8fSBarry Smith int *ourlens,*sndcounts = 0,*procsnz = 0, *offlens,jj,*mycols,*smycols; 162519bcc07fSBarry Smith int tag = ((PetscObject)viewer)->tag; 1626416022c9SBarry Smith 162717699dbbSLois Curfman McInnes MPI_Comm_size(comm,&size); MPI_Comm_rank(comm,&rank); 162817699dbbSLois Curfman McInnes if (!rank) { 162990ace30eSBarry Smith ierr = ViewerBinaryGetDescriptor(viewer,&fd); CHKERRQ(ierr); 163077c4ece6SBarry Smith ierr = PetscBinaryRead(fd,(char *)header,4,BINARY_INT); CHKERRQ(ierr); 1631c456f294SBarry Smith if (header[0] != MAT_COOKIE) SETERRQ(1,"MatLoad_MPIAIJ:not matrix object"); 1632416022c9SBarry Smith } 1633416022c9SBarry Smith 1634416022c9SBarry Smith MPI_Bcast(header+1,3,MPI_INT,0,comm); 1635416022c9SBarry Smith M = header[1]; N = header[2]; 1636416022c9SBarry Smith /* determine ownership of all rows */ 163717699dbbSLois Curfman McInnes m = M/size + ((M % size) > rank); 16380452661fSBarry Smith rowners = (int *) PetscMalloc((size+2)*sizeof(int)); CHKPTRQ(rowners); 1639416022c9SBarry Smith MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm); 1640416022c9SBarry Smith rowners[0] = 0; 164117699dbbSLois Curfman McInnes for ( i=2; i<=size; i++ ) { 1642416022c9SBarry Smith rowners[i] += rowners[i-1]; 1643416022c9SBarry Smith } 164417699dbbSLois Curfman McInnes rstart = rowners[rank]; 164517699dbbSLois Curfman McInnes rend = rowners[rank+1]; 1646416022c9SBarry Smith 1647416022c9SBarry Smith /* distribute row lengths to all processors */ 16480452661fSBarry Smith ourlens = (int*) PetscMalloc( 2*(rend-rstart)*sizeof(int) ); CHKPTRQ(ourlens); 1649416022c9SBarry Smith offlens = ourlens + (rend-rstart); 165017699dbbSLois Curfman McInnes if (!rank) { 16510452661fSBarry Smith rowlengths = (int*) PetscMalloc( M*sizeof(int) ); CHKPTRQ(rowlengths); 165277c4ece6SBarry Smith ierr = PetscBinaryRead(fd,rowlengths,M,BINARY_INT); CHKERRQ(ierr); 16530452661fSBarry Smith sndcounts = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(sndcounts); 165417699dbbSLois Curfman McInnes for ( i=0; i<size; i++ ) sndcounts[i] = rowners[i+1] - rowners[i]; 1655d65a2f8fSBarry Smith MPI_Scatterv(rowlengths,sndcounts,rowners,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm); 16560452661fSBarry Smith PetscFree(sndcounts); 1657416022c9SBarry Smith } 1658416022c9SBarry Smith else { 1659416022c9SBarry Smith MPI_Scatterv(0,0,0,MPI_INT,ourlens,rend-rstart,MPI_INT, 0,comm); 1660416022c9SBarry Smith } 1661416022c9SBarry Smith 166217699dbbSLois Curfman McInnes if (!rank) { 1663416022c9SBarry Smith /* calculate the number of nonzeros on each processor */ 16640452661fSBarry Smith procsnz = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(procsnz); 1665cddf8d76SBarry Smith PetscMemzero(procsnz,size*sizeof(int)); 166617699dbbSLois Curfman McInnes for ( i=0; i<size; i++ ) { 1667416022c9SBarry Smith for ( j=rowners[i]; j< rowners[i+1]; j++ ) { 1668416022c9SBarry Smith procsnz[i] += rowlengths[j]; 1669416022c9SBarry Smith } 1670416022c9SBarry Smith } 16710452661fSBarry Smith PetscFree(rowlengths); 1672416022c9SBarry Smith 1673416022c9SBarry Smith /* determine max buffer needed and allocate it */ 1674416022c9SBarry Smith maxnz = 0; 167517699dbbSLois Curfman McInnes for ( i=0; i<size; i++ ) { 16760452661fSBarry Smith maxnz = PetscMax(maxnz,procsnz[i]); 1677416022c9SBarry Smith } 16780452661fSBarry Smith cols = (int *) PetscMalloc( maxnz*sizeof(int) ); CHKPTRQ(cols); 1679416022c9SBarry Smith 1680416022c9SBarry Smith /* read in my part of the matrix column indices */ 1681416022c9SBarry Smith nz = procsnz[0]; 16820452661fSBarry Smith mycols = (int *) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(mycols); 168377c4ece6SBarry Smith ierr = PetscBinaryRead(fd,mycols,nz,BINARY_INT); CHKERRQ(ierr); 1684d65a2f8fSBarry Smith 1685d65a2f8fSBarry Smith /* read in every one elses and ship off */ 168617699dbbSLois Curfman McInnes for ( i=1; i<size; i++ ) { 1687d65a2f8fSBarry Smith nz = procsnz[i]; 168877c4ece6SBarry Smith ierr = PetscBinaryRead(fd,cols,nz,BINARY_INT); CHKERRQ(ierr); 1689d65a2f8fSBarry Smith MPI_Send(cols,nz,MPI_INT,i,tag,comm); 1690d65a2f8fSBarry Smith } 16910452661fSBarry Smith PetscFree(cols); 1692416022c9SBarry Smith } 1693416022c9SBarry Smith else { 1694416022c9SBarry Smith /* determine buffer space needed for message */ 1695416022c9SBarry Smith nz = 0; 1696416022c9SBarry Smith for ( i=0; i<m; i++ ) { 1697416022c9SBarry Smith nz += ourlens[i]; 1698416022c9SBarry Smith } 16990452661fSBarry Smith mycols = (int*) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(mycols); 1700416022c9SBarry Smith 1701416022c9SBarry Smith /* receive message of column indices*/ 1702d65a2f8fSBarry Smith MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status); 1703416022c9SBarry Smith MPI_Get_count(&status,MPI_INT,&maxnz); 1704c456f294SBarry Smith if (maxnz != nz) SETERRQ(1,"MatLoad_MPIAIJ:something is wrong with file"); 1705416022c9SBarry Smith } 1706416022c9SBarry Smith 1707416022c9SBarry Smith /* loop over local rows, determining number of off diagonal entries */ 1708cddf8d76SBarry Smith PetscMemzero(offlens,m*sizeof(int)); 1709416022c9SBarry Smith jj = 0; 1710416022c9SBarry Smith for ( i=0; i<m; i++ ) { 1711416022c9SBarry Smith for ( j=0; j<ourlens[i]; j++ ) { 1712d65a2f8fSBarry Smith if (mycols[jj] < rstart || mycols[jj] >= rend) offlens[i]++; 1713416022c9SBarry Smith jj++; 1714416022c9SBarry Smith } 1715416022c9SBarry Smith } 1716d65a2f8fSBarry Smith 1717d65a2f8fSBarry Smith /* create our matrix */ 1718416022c9SBarry Smith for ( i=0; i<m; i++ ) { 1719416022c9SBarry Smith ourlens[i] -= offlens[i]; 1720416022c9SBarry Smith } 1721d65a2f8fSBarry Smith ierr = MatCreateMPIAIJ(comm,m,PETSC_DECIDE,M,N,0,ourlens,0,offlens,newmat);CHKERRQ(ierr); 1722d65a2f8fSBarry Smith A = *newmat; 17236d4a8577SBarry Smith MatSetOption(A,MAT_COLUMNS_SORTED); 1724d65a2f8fSBarry Smith for ( i=0; i<m; i++ ) { 1725d65a2f8fSBarry Smith ourlens[i] += offlens[i]; 1726d65a2f8fSBarry Smith } 1727416022c9SBarry Smith 172817699dbbSLois Curfman McInnes if (!rank) { 17290452661fSBarry Smith vals = (Scalar *) PetscMalloc( maxnz*sizeof(Scalar) ); CHKPTRQ(vals); 1730416022c9SBarry Smith 1731416022c9SBarry Smith /* read in my part of the matrix numerical values */ 1732416022c9SBarry Smith nz = procsnz[0]; 173377c4ece6SBarry Smith ierr = PetscBinaryRead(fd,vals,nz,BINARY_SCALAR); CHKERRQ(ierr); 1734d65a2f8fSBarry Smith 1735d65a2f8fSBarry Smith /* insert into matrix */ 1736d65a2f8fSBarry Smith jj = rstart; 1737d65a2f8fSBarry Smith smycols = mycols; 1738d65a2f8fSBarry Smith svals = vals; 1739d65a2f8fSBarry Smith for ( i=0; i<m; i++ ) { 1740d65a2f8fSBarry Smith ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 1741d65a2f8fSBarry Smith smycols += ourlens[i]; 1742d65a2f8fSBarry Smith svals += ourlens[i]; 1743d65a2f8fSBarry Smith jj++; 1744416022c9SBarry Smith } 1745416022c9SBarry Smith 1746d65a2f8fSBarry Smith /* read in other processors and ship out */ 174717699dbbSLois Curfman McInnes for ( i=1; i<size; i++ ) { 1748416022c9SBarry Smith nz = procsnz[i]; 174977c4ece6SBarry Smith ierr = PetscBinaryRead(fd,vals,nz,BINARY_SCALAR); CHKERRQ(ierr); 1750d65a2f8fSBarry Smith MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm); 1751416022c9SBarry Smith } 17520452661fSBarry Smith PetscFree(procsnz); 1753416022c9SBarry Smith } 1754d65a2f8fSBarry Smith else { 1755d65a2f8fSBarry Smith /* receive numeric values */ 17560452661fSBarry Smith vals = (Scalar*) PetscMalloc( nz*sizeof(Scalar) ); CHKPTRQ(vals); 1757416022c9SBarry Smith 1758d65a2f8fSBarry Smith /* receive message of values*/ 1759d65a2f8fSBarry Smith MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status); 1760d65a2f8fSBarry Smith MPI_Get_count(&status,MPIU_SCALAR,&maxnz); 1761c456f294SBarry Smith if (maxnz != nz) SETERRQ(1,"MatLoad_MPIAIJ:something is wrong with file"); 1762d65a2f8fSBarry Smith 1763d65a2f8fSBarry Smith /* insert into matrix */ 1764d65a2f8fSBarry Smith jj = rstart; 1765d65a2f8fSBarry Smith smycols = mycols; 1766d65a2f8fSBarry Smith svals = vals; 1767d65a2f8fSBarry Smith for ( i=0; i<m; i++ ) { 1768d65a2f8fSBarry Smith ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 1769d65a2f8fSBarry Smith smycols += ourlens[i]; 1770d65a2f8fSBarry Smith svals += ourlens[i]; 1771d65a2f8fSBarry Smith jj++; 1772d65a2f8fSBarry Smith } 1773d65a2f8fSBarry Smith } 17740452661fSBarry Smith PetscFree(ourlens); PetscFree(vals); PetscFree(mycols); PetscFree(rowners); 1775d65a2f8fSBarry Smith 17766d4a8577SBarry Smith ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 17776d4a8577SBarry Smith ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 1778416022c9SBarry Smith return 0; 1779416022c9SBarry Smith } 1780