1cb512458SBarry Smith #ifndef lint 2*dbb450caSBarry Smith static char vcid[] = "$Id: mpiaij.c,v 1.79 1995/09/12 03:25:20 bsmith Exp bsmith $"; 3cb512458SBarry Smith #endif 48a729477SBarry Smith 51eb62cbbSBarry Smith #include "mpiaij.h" 68a729477SBarry Smith #include "vec/vecimpl.h" 7d6dfbf8fSBarry Smith #include "inline/spops.h" 88a729477SBarry Smith 99e25ed09SBarry Smith /* local utility routine that creates a mapping from the global column 109e25ed09SBarry Smith number to the local number in the off-diagonal part of the local 119e25ed09SBarry Smith storage of the matrix. This is done in a non scable way since the 129e25ed09SBarry Smith length of colmap equals the global matrix length. 139e25ed09SBarry Smith */ 14b7c46309SBarry Smith static int CreateColmap_Private(Mat mat) 159e25ed09SBarry Smith { 1644a69424SLois Curfman McInnes Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 17ec8511deSBarry Smith Mat_SeqAIJ *B = (Mat_SeqAIJ*) aij->B->data; 189e25ed09SBarry Smith int n = B->n,i; 19*dbb450caSBarry Smith int shift = B->indexshift; 20*dbb450caSBarry Smith 2178b31e54SBarry Smith aij->colmap = (int *) PETSCMALLOC(aij->N*sizeof(int));CHKPTRQ(aij->colmap); 22464493b3SBarry Smith PLogObjectMemory(mat,aij->N*sizeof(int)); 2378b31e54SBarry Smith PETSCMEMSET(aij->colmap,0,aij->N*sizeof(int)); 24*dbb450caSBarry Smith for ( i=0; i<n; i++ ) aij->colmap[aij->garray[i]] = i-shift; 259e25ed09SBarry Smith return 0; 269e25ed09SBarry Smith } 279e25ed09SBarry Smith 282493cbb0SBarry Smith extern int DisAssemble_MPIAIJ(Mat); 292493cbb0SBarry Smith 3051c98ccdSLois Curfman McInnes static int MatGetReordering_MPIAIJ(Mat mat,MatOrdering type,IS *rperm,IS *cperm) 31299609e3SLois Curfman McInnes { 32299609e3SLois Curfman McInnes Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 33299609e3SLois Curfman McInnes int ierr; 34299609e3SLois Curfman McInnes if (aij->numtids == 1) { 3551c98ccdSLois Curfman McInnes ierr = MatGetReordering(aij->A,type,rperm,cperm); CHKERRQ(ierr); 36299609e3SLois Curfman McInnes } else 37bbb6d6a8SBarry Smith SETERRQ(1,"MatGetReordering_MPIAIJ: not yet supported in parallel"); 38299609e3SLois Curfman McInnes return 0; 39299609e3SLois Curfman McInnes } 40299609e3SLois Curfman McInnes 412ee70a88SLois Curfman McInnes static int MatSetValues_MPIAIJ(Mat mat,int m,int *idxm,int n, 421eb62cbbSBarry Smith int *idxn,Scalar *v,InsertMode addv) 438a729477SBarry Smith { 4444a69424SLois Curfman McInnes Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 45*dbb450caSBarry Smith Mat_SeqAIJ *C = (Mat_SeqAIJ*) aij->A->data; 461eb62cbbSBarry Smith int ierr,i,j, rstart = aij->rstart, rend = aij->rend; 471eb62cbbSBarry Smith int cstart = aij->cstart, cend = aij->cend,row,col; 48*dbb450caSBarry Smith int shift = C->indexshift; 498a729477SBarry Smith 5007a0e7adSLois Curfman McInnes if (aij->insertmode != NOTSETVALUES && aij->insertmode != addv) { 51bbb6d6a8SBarry Smith SETERRQ(1,"MatSetValues_MPIAIJ:You cannot mix inserts and adds"); 528a729477SBarry Smith } 531eb62cbbSBarry Smith aij->insertmode = addv; 548a729477SBarry Smith for ( i=0; i<m; i++ ) { 55bbb6d6a8SBarry Smith if (idxm[i] < 0) SETERRQ(1,"MatSetValues_MPIAIJ:Negative row"); 56bbb6d6a8SBarry Smith if (idxm[i] >= aij->M) SETERRQ(1,"MatSetValues_MPIAIJ:Row too large"); 571eb62cbbSBarry Smith if (idxm[i] >= rstart && idxm[i] < rend) { 581eb62cbbSBarry Smith row = idxm[i] - rstart; 591eb62cbbSBarry Smith for ( j=0; j<n; j++ ) { 60bbb6d6a8SBarry Smith if (idxn[j] < 0) SETERRQ(1,"MatSetValues_MPIAIJ:Negative column"); 61bbb6d6a8SBarry Smith if (idxn[j] >= aij->N) SETERRQ(1,"MatSetValues_MPIAIJ:Col too large"); 621eb62cbbSBarry Smith if (idxn[j] >= cstart && idxn[j] < cend){ 631eb62cbbSBarry Smith col = idxn[j] - cstart; 6478b31e54SBarry Smith ierr = MatSetValues(aij->A,1,&row,1,&col,v+i*n+j,addv);CHKERRQ(ierr); 651eb62cbbSBarry Smith } 661eb62cbbSBarry Smith else { 67d6dfbf8fSBarry Smith if (aij->assembled) { 68b7c46309SBarry Smith if (!aij->colmap) {ierr = CreateColmap_Private(mat);CHKERRQ(ierr);} 69*dbb450caSBarry Smith col = aij->colmap[idxn[j]] + shift; 70ec8511deSBarry Smith if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 712493cbb0SBarry Smith ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr); 722493cbb0SBarry Smith col = idxn[j]; 73d6dfbf8fSBarry Smith } 749e25ed09SBarry Smith } 759e25ed09SBarry Smith else col = idxn[j]; 7678b31e54SBarry Smith ierr = MatSetValues(aij->B,1,&row,1,&col,v+i*n+j,addv);CHKERRQ(ierr); 771eb62cbbSBarry Smith } 781eb62cbbSBarry Smith } 791eb62cbbSBarry Smith } 801eb62cbbSBarry Smith else { 81dbd7a890SLois Curfman McInnes ierr = StashValues_Private(&aij->stash,idxm[i],n,idxn,v+i*n,addv); 8278b31e54SBarry Smith CHKERRQ(ierr); 831eb62cbbSBarry Smith } 848a729477SBarry Smith } 858a729477SBarry Smith return 0; 868a729477SBarry Smith } 878a729477SBarry Smith 888a729477SBarry Smith /* 891eb62cbbSBarry Smith the assembly code is a lot like the code for vectors, we should 901eb62cbbSBarry Smith sometime derive a single assembly code that can be used for 911eb62cbbSBarry Smith either case. 928a729477SBarry Smith */ 938a729477SBarry Smith 94fc76ce87SLois Curfman McInnes static int MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode) 958a729477SBarry Smith { 9644a69424SLois Curfman McInnes Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 97d6dfbf8fSBarry Smith MPI_Comm comm = mat->comm; 986abc6512SBarry Smith int numtids = aij->numtids, *owners = aij->rowners; 991eb62cbbSBarry Smith int mytid = aij->mytid; 1001eb62cbbSBarry Smith MPI_Request *send_waits,*recv_waits; 1016abc6512SBarry Smith int *nprocs,i,j,idx,*procs,nsends,nreceives,nmax,*work; 1025392566eSBarry Smith int tag = mat->tag, *owner,*starts,count,ierr; 1031eb62cbbSBarry Smith InsertMode addv; 1041eb62cbbSBarry Smith Scalar *rvalues,*svalues; 1051eb62cbbSBarry Smith 1061eb62cbbSBarry Smith /* make sure all processors are either in INSERTMODE or ADDMODE */ 10728988994SBarry Smith MPI_Allreduce((void *) &aij->insertmode,(void *) &addv,1,MPI_INT, 1081eb62cbbSBarry Smith MPI_BOR,comm); 109*dbb450caSBarry Smith if (addv == (ADD_VALUES|INSERT_VALUES)) { 110bbb6d6a8SBarry Smith SETERRQ(1,"MatAssemblyBegin_MPIAIJ:Some processors inserted others added"); 1111eb62cbbSBarry Smith } 1121eb62cbbSBarry Smith aij->insertmode = addv; /* in case this processor had no cache */ 1131eb62cbbSBarry Smith 1141eb62cbbSBarry Smith /* first count number of contributors to each processor */ 11578b31e54SBarry Smith nprocs = (int *) PETSCMALLOC( 2*numtids*sizeof(int) ); CHKPTRQ(nprocs); 11678b31e54SBarry Smith PETSCMEMSET(nprocs,0,2*numtids*sizeof(int)); procs = nprocs + numtids; 11778b31e54SBarry Smith owner = (int *) PETSCMALLOC( (aij->stash.n+1)*sizeof(int) ); CHKPTRQ(owner); 1181eb62cbbSBarry Smith for ( i=0; i<aij->stash.n; i++ ) { 1191eb62cbbSBarry Smith idx = aij->stash.idx[i]; 1201eb62cbbSBarry Smith for ( j=0; j<numtids; j++ ) { 1211eb62cbbSBarry Smith if (idx >= owners[j] && idx < owners[j+1]) { 1221eb62cbbSBarry Smith nprocs[j]++; procs[j] = 1; owner[i] = j; break; 1238a729477SBarry Smith } 1248a729477SBarry Smith } 1258a729477SBarry Smith } 1261eb62cbbSBarry Smith nsends = 0; for ( i=0; i<numtids; i++ ) { nsends += procs[i];} 1271eb62cbbSBarry Smith 1281eb62cbbSBarry Smith /* inform other processors of number of messages and max length*/ 12978b31e54SBarry Smith work = (int *) PETSCMALLOC( numtids*sizeof(int) ); CHKPTRQ(work); 1301eb62cbbSBarry Smith MPI_Allreduce((void *) procs,(void *) work,numtids,MPI_INT,MPI_SUM,comm); 1311eb62cbbSBarry Smith nreceives = work[mytid]; 1321eb62cbbSBarry Smith MPI_Allreduce((void *) nprocs,(void *) work,numtids,MPI_INT,MPI_MAX,comm); 1331eb62cbbSBarry Smith nmax = work[mytid]; 13478b31e54SBarry Smith PETSCFREE(work); 1351eb62cbbSBarry Smith 1361eb62cbbSBarry Smith /* post receives: 1371eb62cbbSBarry Smith 1) each message will consist of ordered pairs 1381eb62cbbSBarry Smith (global index,value) we store the global index as a double 139d6dfbf8fSBarry Smith to simplify the message passing. 1401eb62cbbSBarry Smith 2) since we don't know how long each individual message is we 1411eb62cbbSBarry Smith allocate the largest needed buffer for each receive. Potentially 1421eb62cbbSBarry Smith this is a lot of wasted space. 1431eb62cbbSBarry Smith 1441eb62cbbSBarry Smith 1451eb62cbbSBarry Smith This could be done better. 1461eb62cbbSBarry Smith */ 14778b31e54SBarry Smith rvalues = (Scalar *) PETSCMALLOC(3*(nreceives+1)*(nmax+1)*sizeof(Scalar)); 14878b31e54SBarry Smith CHKPTRQ(rvalues); 14978b31e54SBarry Smith recv_waits = (MPI_Request *) PETSCMALLOC((nreceives+1)*sizeof(MPI_Request)); 15078b31e54SBarry Smith CHKPTRQ(recv_waits); 1511eb62cbbSBarry Smith for ( i=0; i<nreceives; i++ ) { 152c60448a5SBarry Smith MPI_Irecv((void *)(rvalues+3*nmax*i),3*nmax,MPIU_SCALAR,MPI_ANY_SOURCE,tag, 1531eb62cbbSBarry Smith comm,recv_waits+i); 1541eb62cbbSBarry Smith } 1551eb62cbbSBarry Smith 1561eb62cbbSBarry Smith /* do sends: 1571eb62cbbSBarry Smith 1) starts[i] gives the starting index in svalues for stuff going to 1581eb62cbbSBarry Smith the ith processor 1591eb62cbbSBarry Smith */ 16078b31e54SBarry Smith svalues = (Scalar *) PETSCMALLOC( 3*(aij->stash.n+1)*sizeof(Scalar) ); 16178b31e54SBarry Smith CHKPTRQ(svalues); 16278b31e54SBarry Smith send_waits = (MPI_Request *) PETSCMALLOC( (nsends+1)*sizeof(MPI_Request)); 16378b31e54SBarry Smith CHKPTRQ(send_waits); 16478b31e54SBarry Smith starts = (int *) PETSCMALLOC( numtids*sizeof(int) ); CHKPTRQ(starts); 1651eb62cbbSBarry Smith starts[0] = 0; 1661eb62cbbSBarry Smith for ( i=1; i<numtids; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 1671eb62cbbSBarry Smith for ( i=0; i<aij->stash.n; i++ ) { 1681eb62cbbSBarry Smith svalues[3*starts[owner[i]]] = (Scalar) aij->stash.idx[i]; 1691eb62cbbSBarry Smith svalues[3*starts[owner[i]]+1] = (Scalar) aij->stash.idy[i]; 1701eb62cbbSBarry Smith svalues[3*(starts[owner[i]]++)+2] = aij->stash.array[i]; 1711eb62cbbSBarry Smith } 17278b31e54SBarry Smith PETSCFREE(owner); 1731eb62cbbSBarry Smith starts[0] = 0; 1741eb62cbbSBarry Smith for ( i=1; i<numtids; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 1751eb62cbbSBarry Smith count = 0; 1761eb62cbbSBarry Smith for ( i=0; i<numtids; i++ ) { 1771eb62cbbSBarry Smith if (procs[i]) { 178c60448a5SBarry Smith MPI_Isend((void*)(svalues+3*starts[i]),3*nprocs[i],MPIU_SCALAR,i,tag, 1791eb62cbbSBarry Smith comm,send_waits+count++); 1801eb62cbbSBarry Smith } 1811eb62cbbSBarry Smith } 18278b31e54SBarry Smith PETSCFREE(starts); PETSCFREE(nprocs); 1831eb62cbbSBarry Smith 1841eb62cbbSBarry Smith /* Free cache space */ 18578b31e54SBarry Smith ierr = StashDestroy_Private(&aij->stash); CHKERRQ(ierr); 1861eb62cbbSBarry Smith 1871eb62cbbSBarry Smith aij->svalues = svalues; aij->rvalues = rvalues; 1881eb62cbbSBarry Smith aij->nsends = nsends; aij->nrecvs = nreceives; 1891eb62cbbSBarry Smith aij->send_waits = send_waits; aij->recv_waits = recv_waits; 1901eb62cbbSBarry Smith aij->rmax = nmax; 1911eb62cbbSBarry Smith 1921eb62cbbSBarry Smith return 0; 1931eb62cbbSBarry Smith } 19444a69424SLois Curfman McInnes extern int MatSetUpMultiply_MPIAIJ(Mat); 1951eb62cbbSBarry Smith 196fc76ce87SLois Curfman McInnes static int MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode) 1971eb62cbbSBarry Smith { 1981eb62cbbSBarry Smith int ierr; 19944a69424SLois Curfman McInnes Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 200*dbb450caSBarry Smith Mat_SeqAIJ *C = (Mat_SeqAIJ *) aij->A->data; 2011eb62cbbSBarry Smith MPI_Status *send_status,recv_status; 2026abc6512SBarry Smith int imdex,nrecvs = aij->nrecvs, count = nrecvs, i, n; 2032493cbb0SBarry Smith int row,col,other_disassembled; 2041eb62cbbSBarry Smith Scalar *values,val; 2051eb62cbbSBarry Smith InsertMode addv = aij->insertmode; 206*dbb450caSBarry Smith int shift = C->indexshift; 2071eb62cbbSBarry Smith 2081eb62cbbSBarry Smith /* wait on receives */ 2091eb62cbbSBarry Smith while (count) { 210d6dfbf8fSBarry Smith MPI_Waitany(nrecvs,aij->recv_waits,&imdex,&recv_status); 2111eb62cbbSBarry Smith /* unpack receives into our local space */ 212d6dfbf8fSBarry Smith values = aij->rvalues + 3*imdex*aij->rmax; 213c60448a5SBarry Smith MPI_Get_count(&recv_status,MPIU_SCALAR,&n); 2141eb62cbbSBarry Smith n = n/3; 2151eb62cbbSBarry Smith for ( i=0; i<n; i++ ) { 2161eb62cbbSBarry Smith row = (int) PETSCREAL(values[3*i]) - aij->rstart; 2171eb62cbbSBarry Smith col = (int) PETSCREAL(values[3*i+1]); 2181eb62cbbSBarry Smith val = values[3*i+2]; 2191eb62cbbSBarry Smith if (col >= aij->cstart && col < aij->cend) { 2201eb62cbbSBarry Smith col -= aij->cstart; 2211eb62cbbSBarry Smith MatSetValues(aij->A,1,&row,1,&col,&val,addv); 2221eb62cbbSBarry Smith } 2231eb62cbbSBarry Smith else { 224d6dfbf8fSBarry Smith if (aij->assembled) { 225b7c46309SBarry Smith if (!aij->colmap) {ierr = CreateColmap_Private(mat);CHKERRQ(ierr);} 226*dbb450caSBarry Smith col = aij->colmap[col] + shift; 227ec8511deSBarry Smith if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 2282493cbb0SBarry Smith ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr); 2292493cbb0SBarry Smith col = (int) PETSCREAL(values[3*i+1]); 230d6dfbf8fSBarry Smith } 2319e25ed09SBarry Smith } 2321eb62cbbSBarry Smith MatSetValues(aij->B,1,&row,1,&col,&val,addv); 2331eb62cbbSBarry Smith } 2341eb62cbbSBarry Smith } 2351eb62cbbSBarry Smith count--; 2361eb62cbbSBarry Smith } 23778b31e54SBarry Smith PETSCFREE(aij->recv_waits); PETSCFREE(aij->rvalues); 2381eb62cbbSBarry Smith 2391eb62cbbSBarry Smith /* wait on sends */ 2401eb62cbbSBarry Smith if (aij->nsends) { 24178b31e54SBarry Smith send_status = (MPI_Status *) PETSCMALLOC( aij->nsends*sizeof(MPI_Status) ); 24278b31e54SBarry Smith CHKPTRQ(send_status); 2431eb62cbbSBarry Smith MPI_Waitall(aij->nsends,aij->send_waits,send_status); 24478b31e54SBarry Smith PETSCFREE(send_status); 2451eb62cbbSBarry Smith } 24678b31e54SBarry Smith PETSCFREE(aij->send_waits); PETSCFREE(aij->svalues); 2471eb62cbbSBarry Smith 24807a0e7adSLois Curfman McInnes aij->insertmode = NOTSETVALUES; 24978b31e54SBarry Smith ierr = MatAssemblyBegin(aij->A,mode); CHKERRQ(ierr); 25078b31e54SBarry Smith ierr = MatAssemblyEnd(aij->A,mode); CHKERRQ(ierr); 2511eb62cbbSBarry Smith 2522493cbb0SBarry Smith /* determine if any processor has disassembled, if so we must 2532493cbb0SBarry Smith also disassemble ourselfs, in order that we may reassemble. */ 2542493cbb0SBarry Smith MPI_Allreduce((void *) &aij->assembled,(void *) &other_disassembled,1, 2552493cbb0SBarry Smith MPI_INT,MPI_PROD,mat->comm); 2562493cbb0SBarry Smith if (aij->assembled && !other_disassembled) { 2572493cbb0SBarry Smith ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr); 2582493cbb0SBarry Smith } 2592493cbb0SBarry Smith 2605e42470aSBarry Smith if (!aij->assembled && mode == FINAL_ASSEMBLY) { 26178b31e54SBarry Smith ierr = MatSetUpMultiply_MPIAIJ(mat); CHKERRQ(ierr); 262d6dfbf8fSBarry Smith aij->assembled = 1; 2635e42470aSBarry Smith } 26478b31e54SBarry Smith ierr = MatAssemblyBegin(aij->B,mode); CHKERRQ(ierr); 26578b31e54SBarry Smith ierr = MatAssemblyEnd(aij->B,mode); CHKERRQ(ierr); 2665e42470aSBarry Smith 2678a729477SBarry Smith return 0; 2688a729477SBarry Smith } 2698a729477SBarry Smith 2702ee70a88SLois Curfman McInnes static int MatZeroEntries_MPIAIJ(Mat A) 2711eb62cbbSBarry Smith { 27244a69424SLois Curfman McInnes Mat_MPIAIJ *l = (Mat_MPIAIJ *) A->data; 273dbd7a890SLois Curfman McInnes int ierr; 27478b31e54SBarry Smith ierr = MatZeroEntries(l->A); CHKERRQ(ierr); 27578b31e54SBarry Smith ierr = MatZeroEntries(l->B); CHKERRQ(ierr); 2761eb62cbbSBarry Smith return 0; 2771eb62cbbSBarry Smith } 2781eb62cbbSBarry Smith 2791eb62cbbSBarry Smith /* again this uses the same basic stratagy as in the assembly and 2801eb62cbbSBarry Smith scatter create routines, we should try to do it systemamatically 2811eb62cbbSBarry Smith if we can figure out the proper level of generality. */ 2821eb62cbbSBarry Smith 2831eb62cbbSBarry Smith /* the code does not do the diagonal entries correctly unless the 2841eb62cbbSBarry Smith matrix is square and the column and row owerships are identical. 2851eb62cbbSBarry Smith This is a BUG. The only way to fix it seems to be to access 2861eb62cbbSBarry Smith aij->A and aij->B directly and not through the MatZeroRows() 2871eb62cbbSBarry Smith routine. 2881eb62cbbSBarry Smith */ 28944a69424SLois Curfman McInnes static int MatZeroRows_MPIAIJ(Mat A,IS is,Scalar *diag) 2901eb62cbbSBarry Smith { 29144a69424SLois Curfman McInnes Mat_MPIAIJ *l = (Mat_MPIAIJ *) A->data; 2921eb62cbbSBarry Smith int i,ierr,N, *rows,*owners = l->rowners,numtids = l->numtids; 2936abc6512SBarry Smith int *procs,*nprocs,j,found,idx,nsends,*work; 2941eb62cbbSBarry Smith int nmax,*svalues,*starts,*owner,nrecvs,mytid = l->mytid; 2955392566eSBarry Smith int *rvalues,tag = A->tag,count,base,slen,n,*source; 296d6dfbf8fSBarry Smith int *lens,imdex,*lrows,*values; 297d6dfbf8fSBarry Smith MPI_Comm comm = A->comm; 2981eb62cbbSBarry Smith MPI_Request *send_waits,*recv_waits; 2991eb62cbbSBarry Smith MPI_Status recv_status,*send_status; 3001eb62cbbSBarry Smith IS istmp; 3011eb62cbbSBarry Smith 302c8f67822SLois Curfman McInnes if (!l->assembled) 30378b31e54SBarry Smith SETERRQ(1,"MatZeroRows_MPIAIJ:Must assemble matrix first"); 30478b31e54SBarry Smith ierr = ISGetLocalSize(is,&N); CHKERRQ(ierr); 30578b31e54SBarry Smith ierr = ISGetIndices(is,&rows); CHKERRQ(ierr); 3061eb62cbbSBarry Smith 3071eb62cbbSBarry Smith /* first count number of contributors to each processor */ 30878b31e54SBarry Smith nprocs = (int *) PETSCMALLOC( 2*numtids*sizeof(int) ); CHKPTRQ(nprocs); 30978b31e54SBarry Smith PETSCMEMSET(nprocs,0,2*numtids*sizeof(int)); procs = nprocs + numtids; 31078b31e54SBarry Smith owner = (int *) PETSCMALLOC((N+1)*sizeof(int)); CHKPTRQ(owner); /* see note*/ 3111eb62cbbSBarry Smith for ( i=0; i<N; i++ ) { 3121eb62cbbSBarry Smith idx = rows[i]; 3131eb62cbbSBarry Smith found = 0; 3141eb62cbbSBarry Smith for ( j=0; j<numtids; j++ ) { 3151eb62cbbSBarry Smith if (idx >= owners[j] && idx < owners[j+1]) { 3161eb62cbbSBarry Smith nprocs[j]++; procs[j] = 1; owner[i] = j; found = 1; break; 3171eb62cbbSBarry Smith } 3181eb62cbbSBarry Smith } 319bbb6d6a8SBarry Smith if (!found) SETERRQ(1,"MatZeroRows_MPIAIJ:Index out of range"); 3201eb62cbbSBarry Smith } 3211eb62cbbSBarry Smith nsends = 0; for ( i=0; i<numtids; i++ ) { nsends += procs[i];} 3221eb62cbbSBarry Smith 3231eb62cbbSBarry Smith /* inform other processors of number of messages and max length*/ 32478b31e54SBarry Smith work = (int *) PETSCMALLOC( numtids*sizeof(int) ); CHKPTRQ(work); 3251eb62cbbSBarry Smith MPI_Allreduce((void *) procs,(void *) work,numtids,MPI_INT,MPI_SUM,comm); 3261eb62cbbSBarry Smith nrecvs = work[mytid]; 3271eb62cbbSBarry Smith MPI_Allreduce((void *) nprocs,(void *) work,numtids,MPI_INT,MPI_MAX,comm); 3281eb62cbbSBarry Smith nmax = work[mytid]; 32978b31e54SBarry Smith PETSCFREE(work); 3301eb62cbbSBarry Smith 3311eb62cbbSBarry Smith /* post receives: */ 33278b31e54SBarry Smith rvalues = (int *) PETSCMALLOC((nrecvs+1)*(nmax+1)*sizeof(int)); /*see note */ 33378b31e54SBarry Smith CHKPTRQ(rvalues); 33478b31e54SBarry Smith recv_waits = (MPI_Request *) PETSCMALLOC((nrecvs+1)*sizeof(MPI_Request)); 33578b31e54SBarry Smith CHKPTRQ(recv_waits); 3361eb62cbbSBarry Smith for ( i=0; i<nrecvs; i++ ) { 3371eb62cbbSBarry Smith MPI_Irecv((void *)(rvalues+nmax*i),nmax,MPI_INT,MPI_ANY_SOURCE,tag, 3381eb62cbbSBarry Smith comm,recv_waits+i); 3391eb62cbbSBarry Smith } 3401eb62cbbSBarry Smith 3411eb62cbbSBarry Smith /* do sends: 3421eb62cbbSBarry Smith 1) starts[i] gives the starting index in svalues for stuff going to 3431eb62cbbSBarry Smith the ith processor 3441eb62cbbSBarry Smith */ 34578b31e54SBarry Smith svalues = (int *) PETSCMALLOC( (N+1)*sizeof(int) ); CHKPTRQ(svalues); 34678b31e54SBarry Smith send_waits = (MPI_Request *) PETSCMALLOC( (nsends+1)*sizeof(MPI_Request)); 34778b31e54SBarry Smith CHKPTRQ(send_waits); 34878b31e54SBarry Smith starts = (int *) PETSCMALLOC( (numtids+1)*sizeof(int) ); CHKPTRQ(starts); 3491eb62cbbSBarry Smith starts[0] = 0; 3501eb62cbbSBarry Smith for ( i=1; i<numtids; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 3511eb62cbbSBarry Smith for ( i=0; i<N; i++ ) { 3521eb62cbbSBarry Smith svalues[starts[owner[i]]++] = rows[i]; 3531eb62cbbSBarry Smith } 3541eb62cbbSBarry Smith ISRestoreIndices(is,&rows); 3551eb62cbbSBarry Smith 3561eb62cbbSBarry Smith starts[0] = 0; 3571eb62cbbSBarry Smith for ( i=1; i<numtids+1; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 3581eb62cbbSBarry Smith count = 0; 3591eb62cbbSBarry Smith for ( i=0; i<numtids; i++ ) { 3601eb62cbbSBarry Smith if (procs[i]) { 3611eb62cbbSBarry Smith MPI_Isend((void*)(svalues+starts[i]),nprocs[i],MPI_INT,i,tag, 3621eb62cbbSBarry Smith comm,send_waits+count++); 3631eb62cbbSBarry Smith } 3641eb62cbbSBarry Smith } 36578b31e54SBarry Smith PETSCFREE(starts); 3661eb62cbbSBarry Smith 3671eb62cbbSBarry Smith base = owners[mytid]; 3681eb62cbbSBarry Smith 3691eb62cbbSBarry Smith /* wait on receives */ 37078b31e54SBarry Smith lens = (int *) PETSCMALLOC( 2*(nrecvs+1)*sizeof(int) ); CHKPTRQ(lens); 3711eb62cbbSBarry Smith source = lens + nrecvs; 3721eb62cbbSBarry Smith count = nrecvs; slen = 0; 3731eb62cbbSBarry Smith while (count) { 374d6dfbf8fSBarry Smith MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status); 3751eb62cbbSBarry Smith /* unpack receives into our local space */ 3761eb62cbbSBarry Smith MPI_Get_count(&recv_status,MPI_INT,&n); 377d6dfbf8fSBarry Smith source[imdex] = recv_status.MPI_SOURCE; 378d6dfbf8fSBarry Smith lens[imdex] = n; 3791eb62cbbSBarry Smith slen += n; 3801eb62cbbSBarry Smith count--; 3811eb62cbbSBarry Smith } 38278b31e54SBarry Smith PETSCFREE(recv_waits); 3831eb62cbbSBarry Smith 3841eb62cbbSBarry Smith /* move the data into the send scatter */ 38578b31e54SBarry Smith lrows = (int *) PETSCMALLOC( (slen+1)*sizeof(int) ); CHKPTRQ(lrows); 3861eb62cbbSBarry Smith count = 0; 3871eb62cbbSBarry Smith for ( i=0; i<nrecvs; i++ ) { 3881eb62cbbSBarry Smith values = rvalues + i*nmax; 3891eb62cbbSBarry Smith for ( j=0; j<lens[i]; j++ ) { 3901eb62cbbSBarry Smith lrows[count++] = values[j] - base; 3911eb62cbbSBarry Smith } 3921eb62cbbSBarry Smith } 39378b31e54SBarry Smith PETSCFREE(rvalues); PETSCFREE(lens); 39478b31e54SBarry Smith PETSCFREE(owner); PETSCFREE(nprocs); 3951eb62cbbSBarry Smith 3961eb62cbbSBarry Smith /* actually zap the local rows */ 397fafbff53SBarry Smith ierr = ISCreateSeq(MPI_COMM_SELF,slen,lrows,&istmp); 398464493b3SBarry Smith PLogObjectParent(A,istmp); 39978b31e54SBarry Smith CHKERRQ(ierr); PETSCFREE(lrows); 40078b31e54SBarry Smith ierr = MatZeroRows(l->A,istmp,diag); CHKERRQ(ierr); 40178b31e54SBarry Smith ierr = MatZeroRows(l->B,istmp,0); CHKERRQ(ierr); 40278b31e54SBarry Smith ierr = ISDestroy(istmp); CHKERRQ(ierr); 4031eb62cbbSBarry Smith 4041eb62cbbSBarry Smith /* wait on sends */ 4051eb62cbbSBarry Smith if (nsends) { 40678b31e54SBarry Smith send_status = (MPI_Status *) PETSCMALLOC( nsends*sizeof(MPI_Status) ); 40778b31e54SBarry Smith CHKPTRQ(send_status); 4081eb62cbbSBarry Smith MPI_Waitall(nsends,send_waits,send_status); 40978b31e54SBarry Smith PETSCFREE(send_status); 4101eb62cbbSBarry Smith } 41178b31e54SBarry Smith PETSCFREE(send_waits); PETSCFREE(svalues); 4121eb62cbbSBarry Smith 4131eb62cbbSBarry Smith 4141eb62cbbSBarry Smith return 0; 4151eb62cbbSBarry Smith } 4161eb62cbbSBarry Smith 41744a69424SLois Curfman McInnes static int MatMult_MPIAIJ(Mat aijin,Vec xx,Vec yy) 4181eb62cbbSBarry Smith { 41944a69424SLois Curfman McInnes Mat_MPIAIJ *aij = (Mat_MPIAIJ *) aijin->data; 4201eb62cbbSBarry Smith int ierr; 42178b31e54SBarry Smith if (!aij->assembled) SETERRQ(1,"MatMult_MPIAIJ:must assemble matrix first"); 422*dbb450caSBarry Smith ierr = VecScatterBegin(xx,aij->lvec,INSERT_VALUES,SCATTERALL,aij->Mvctx); 42378b31e54SBarry Smith CHKERRQ(ierr); 42478b31e54SBarry Smith ierr = MatMult(aij->A,xx,yy); CHKERRQ(ierr); 425*dbb450caSBarry Smith ierr = VecScatterEnd(xx,aij->lvec,INSERT_VALUES,SCATTERALL,aij->Mvctx); 42678b31e54SBarry Smith CHKERRQ(ierr); 42778b31e54SBarry Smith ierr = MatMultAdd(aij->B,aij->lvec,yy,yy); CHKERRQ(ierr); 4281eb62cbbSBarry Smith return 0; 4291eb62cbbSBarry Smith } 4301eb62cbbSBarry Smith 43144a69424SLois Curfman McInnes static int MatMultAdd_MPIAIJ(Mat aijin,Vec xx,Vec yy,Vec zz) 432da3a660dSBarry Smith { 43344a69424SLois Curfman McInnes Mat_MPIAIJ *aij = (Mat_MPIAIJ *) aijin->data; 434da3a660dSBarry Smith int ierr; 43578b31e54SBarry Smith if (!aij->assembled) SETERRQ(1,"MatMult_MPIAIJ:must assemble matrix first"); 436*dbb450caSBarry Smith ierr = VecScatterBegin(xx,aij->lvec,INSERT_VALUES,SCATTERALL,aij->Mvctx); 43778b31e54SBarry Smith CHKERRQ(ierr); 43878b31e54SBarry Smith ierr = MatMultAdd(aij->A,xx,yy,zz); CHKERRQ(ierr); 439*dbb450caSBarry Smith ierr = VecScatterEnd(xx,aij->lvec,INSERT_VALUES,SCATTERALL,aij->Mvctx); 44078b31e54SBarry Smith CHKERRQ(ierr); 44178b31e54SBarry Smith ierr = MatMultAdd(aij->B,aij->lvec,zz,zz); CHKERRQ(ierr); 442da3a660dSBarry Smith return 0; 443da3a660dSBarry Smith } 444da3a660dSBarry Smith 44544a69424SLois Curfman McInnes static int MatMultTrans_MPIAIJ(Mat aijin,Vec xx,Vec yy) 446da3a660dSBarry Smith { 44744a69424SLois Curfman McInnes Mat_MPIAIJ *aij = (Mat_MPIAIJ *) aijin->data; 448da3a660dSBarry Smith int ierr; 449da3a660dSBarry Smith 45044a69424SLois Curfman McInnes if (!aij->assembled) 45178b31e54SBarry Smith SETERRQ(1,"MatMulTrans_MPIAIJ:must assemble matrix first"); 452da3a660dSBarry Smith /* do nondiagonal part */ 45378b31e54SBarry Smith ierr = MatMultTrans(aij->B,xx,aij->lvec); CHKERRQ(ierr); 454da3a660dSBarry Smith /* send it on its way */ 455*dbb450caSBarry Smith ierr = VecScatterBegin(aij->lvec,yy,ADD_VALUES, 45678b31e54SBarry Smith (ScatterMode)(SCATTERALL|SCATTERREVERSE),aij->Mvctx); CHKERRQ(ierr); 457da3a660dSBarry Smith /* do local part */ 45878b31e54SBarry Smith ierr = MatMultTrans(aij->A,xx,yy); CHKERRQ(ierr); 459da3a660dSBarry Smith /* receive remote parts: note this assumes the values are not actually */ 460da3a660dSBarry Smith /* inserted in yy until the next line, which is true for my implementation*/ 461da3a660dSBarry Smith /* but is not perhaps always true. */ 462*dbb450caSBarry Smith ierr = VecScatterEnd(aij->lvec,yy,ADD_VALUES, 46378b31e54SBarry Smith (ScatterMode)(SCATTERALL|SCATTERREVERSE),aij->Mvctx); CHKERRQ(ierr); 464da3a660dSBarry Smith return 0; 465da3a660dSBarry Smith } 466da3a660dSBarry Smith 46744a69424SLois Curfman McInnes static int MatMultTransAdd_MPIAIJ(Mat aijin,Vec xx,Vec yy,Vec zz) 468da3a660dSBarry Smith { 46944a69424SLois Curfman McInnes Mat_MPIAIJ *aij = (Mat_MPIAIJ *) aijin->data; 470da3a660dSBarry Smith int ierr; 471da3a660dSBarry Smith 47244a69424SLois Curfman McInnes if (!aij->assembled) 47378b31e54SBarry Smith SETERRQ(1,"MatMulTransAdd_MPIAIJ:must assemble matrix first"); 474da3a660dSBarry Smith /* do nondiagonal part */ 47578b31e54SBarry Smith ierr = MatMultTrans(aij->B,xx,aij->lvec); CHKERRQ(ierr); 476da3a660dSBarry Smith /* send it on its way */ 477*dbb450caSBarry Smith ierr = VecScatterBegin(aij->lvec,zz,ADD_VALUES, 47878b31e54SBarry Smith (ScatterMode)(SCATTERALL|SCATTERREVERSE),aij->Mvctx); CHKERRQ(ierr); 479da3a660dSBarry Smith /* do local part */ 48078b31e54SBarry Smith ierr = MatMultTransAdd(aij->A,xx,yy,zz); CHKERRQ(ierr); 481da3a660dSBarry Smith /* receive remote parts: note this assumes the values are not actually */ 482da3a660dSBarry Smith /* inserted in yy until the next line, which is true for my implementation*/ 483da3a660dSBarry Smith /* but is not perhaps always true. */ 484*dbb450caSBarry Smith ierr = VecScatterEnd(aij->lvec,zz,ADD_VALUES, 48578b31e54SBarry Smith (ScatterMode)(SCATTERALL|SCATTERREVERSE),aij->Mvctx); CHKERRQ(ierr); 486da3a660dSBarry Smith return 0; 487da3a660dSBarry Smith } 488da3a660dSBarry Smith 4891eb62cbbSBarry Smith /* 4901eb62cbbSBarry Smith This only works correctly for square matrices where the subblock A->A is the 4911eb62cbbSBarry Smith diagonal block 4921eb62cbbSBarry Smith */ 493d1710a03SLois Curfman McInnes static int MatGetDiagonal_MPIAIJ(Mat Ain,Vec v) 4941eb62cbbSBarry Smith { 49544a69424SLois Curfman McInnes Mat_MPIAIJ *A = (Mat_MPIAIJ *) Ain->data; 49678b31e54SBarry Smith if (!A->assembled) SETERRQ(1,"MatGetDiag_MPIAIJ:must assemble matrix first"); 4971eb62cbbSBarry Smith return MatGetDiagonal(A->A,v); 4981eb62cbbSBarry Smith } 4991eb62cbbSBarry Smith 50044a69424SLois Curfman McInnes static int MatDestroy_MPIAIJ(PetscObject obj) 5011eb62cbbSBarry Smith { 5021eb62cbbSBarry Smith Mat mat = (Mat) obj; 50344a69424SLois Curfman McInnes Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 5041eb62cbbSBarry Smith int ierr; 505a5a9c739SBarry Smith #if defined(PETSC_LOG) 5066e35fa57SLois Curfman McInnes PLogObjectState(obj,"Rows=%d, Cols=%d",aij->M,aij->N); 507a5a9c739SBarry Smith #endif 50878b31e54SBarry Smith PETSCFREE(aij->rowners); 50978b31e54SBarry Smith ierr = MatDestroy(aij->A); CHKERRQ(ierr); 51078b31e54SBarry Smith ierr = MatDestroy(aij->B); CHKERRQ(ierr); 51178b31e54SBarry Smith if (aij->colmap) PETSCFREE(aij->colmap); 51278b31e54SBarry Smith if (aij->garray) PETSCFREE(aij->garray); 5131eb62cbbSBarry Smith if (aij->lvec) VecDestroy(aij->lvec); 5141eb62cbbSBarry Smith if (aij->Mvctx) VecScatterCtxDestroy(aij->Mvctx); 51578b31e54SBarry Smith PETSCFREE(aij); 516a5a9c739SBarry Smith PLogObjectDestroy(mat); 517a5a9c739SBarry Smith PETSCHEADERDESTROY(mat); 5181eb62cbbSBarry Smith return 0; 5191eb62cbbSBarry Smith } 5207857610eSBarry Smith #include "draw.h" 521b16a3bb1SBarry Smith #include "pinclude/pviewer.h" 522ee50ffe9SBarry Smith 52344a69424SLois Curfman McInnes static int MatView_MPIAIJ(PetscObject obj,Viewer viewer) 5241eb62cbbSBarry Smith { 5251eb62cbbSBarry Smith Mat mat = (Mat) obj; 52644a69424SLois Curfman McInnes Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 527*dbb450caSBarry Smith Mat_SeqAIJ* C = (Mat_SeqAIJ*)aij->A->data; 528f72585f2SLois Curfman McInnes int ierr, format; 529d13ab20cSBarry Smith PetscObject vobj = (PetscObject) viewer; 530*dbb450caSBarry Smith int shift = C->indexshift; 5311eb62cbbSBarry Smith 53278b31e54SBarry Smith if (!aij->assembled) SETERRQ(1,"MatView_MPIAIJ:must assemble matrix first"); 533154123eaSLois Curfman McInnes if (!viewer) { /* so that viewers may be used from debuggers */ 53421b0d8fbSLois Curfman McInnes viewer = STDOUT_VIEWER_SELF; vobj = (PetscObject) viewer; 535154123eaSLois Curfman McInnes } 536ab254492SBarry Smith if (vobj->cookie == DRAW_COOKIE && vobj->type == NULLWINDOW) return 0; 537d636dbe3SBarry Smith ierr = ViewerFileGetFormat_Private(viewer,&format); 538f72585f2SLois Curfman McInnes if (vobj->cookie == VIEWER_COOKIE && format == FILE_FORMAT_INFO && 5399b94acceSBarry Smith (vobj->type == ASCII_FILE_VIEWER || vobj->type == ASCII_FILES_VIEWER)) { 540f72585f2SLois Curfman McInnes /* do nothing for now */ 541f72585f2SLois Curfman McInnes } 5429b94acceSBarry Smith else if (vobj->cookie == VIEWER_COOKIE && vobj->type == ASCII_FILE_VIEWER) { 543d636dbe3SBarry Smith FILE *fd; 544d636dbe3SBarry Smith ierr = ViewerFileGetPointer_Private(viewer,&fd); CHKERRQ(ierr); 5457f813858SBarry Smith MPIU_Seq_begin(mat->comm,1); 546d13ab20cSBarry Smith fprintf(fd,"[%d] rows %d starts %d ends %d cols %d starts %d ends %d\n", 5471eb62cbbSBarry Smith aij->mytid,aij->m,aij->rstart,aij->rend,aij->n,aij->cstart, 5481eb62cbbSBarry Smith aij->cend); 54978b31e54SBarry Smith ierr = MatView(aij->A,viewer); CHKERRQ(ierr); 55078b31e54SBarry Smith ierr = MatView(aij->B,viewer); CHKERRQ(ierr); 551d13ab20cSBarry Smith fflush(fd); 5527f813858SBarry Smith MPIU_Seq_end(mat->comm,1); 553d13ab20cSBarry Smith } 5549b94acceSBarry Smith else if ((vobj->cookie == VIEWER_COOKIE && vobj->type == ASCII_FILES_VIEWER) || 5557857610eSBarry Smith vobj->cookie == DRAW_COOKIE) { 55695373324SBarry Smith int numtids = aij->numtids, mytid = aij->mytid; 55795373324SBarry Smith if (numtids == 1) { 55878b31e54SBarry Smith ierr = MatView(aij->A,viewer); CHKERRQ(ierr); 55995373324SBarry Smith } 56095373324SBarry Smith else { 56195373324SBarry Smith /* assemble the entire matrix onto first processor. */ 56295373324SBarry Smith Mat A; 563ec8511deSBarry Smith Mat_SeqAIJ *Aloc; 5642eb8c8abSBarry Smith int M = aij->M, N = aij->N,m,*ai,*aj,row,*cols,i,*ct; 56595373324SBarry Smith Scalar *a; 5662ee70a88SLois Curfman McInnes 56795373324SBarry Smith if (!mytid) { 56895373324SBarry Smith ierr = MatCreateMPIAIJ(mat->comm,M,N,M,N,0,0,0,0,&A); 56995373324SBarry Smith } 57095373324SBarry Smith else { 57195373324SBarry Smith ierr = MatCreateMPIAIJ(mat->comm,0,0,M,N,0,0,0,0,&A); 57295373324SBarry Smith } 573464493b3SBarry Smith PLogObjectParent(mat,A); 57478b31e54SBarry Smith CHKERRQ(ierr); 57595373324SBarry Smith 57695373324SBarry Smith /* copy over the A part */ 577ec8511deSBarry Smith Aloc = (Mat_SeqAIJ*) aij->A->data; 5782ee70a88SLois Curfman McInnes m = Aloc->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 57995373324SBarry Smith row = aij->rstart; 580*dbb450caSBarry Smith for ( i=0; i<ai[m]+shift; i++ ) {aj[i] += aij->cstart + shift;} 58195373324SBarry Smith for ( i=0; i<m; i++ ) { 582*dbb450caSBarry Smith ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES); 58378b31e54SBarry Smith CHKERRQ(ierr); 58495373324SBarry Smith row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 58595373324SBarry Smith } 5862ee70a88SLois Curfman McInnes aj = Aloc->j; 587*dbb450caSBarry Smith for ( i=0; i<ai[m]+shift; i++ ) {aj[i] -= aij->cstart + shift;} 58895373324SBarry Smith 58995373324SBarry Smith /* copy over the B part */ 590ec8511deSBarry Smith Aloc = (Mat_SeqAIJ*) aij->B->data; 5912ee70a88SLois Curfman McInnes m = Aloc->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 59295373324SBarry Smith row = aij->rstart; 59378b31e54SBarry Smith ct = cols = (int *) PETSCMALLOC( (ai[m]+1)*sizeof(int) ); CHKPTRQ(cols); 594*dbb450caSBarry Smith for ( i=0; i<ai[m]+shift; i++ ) {cols[i] = aij->garray[aj[i]+shift];} 59595373324SBarry Smith for ( i=0; i<m; i++ ) { 596*dbb450caSBarry Smith ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES); 59778b31e54SBarry Smith CHKERRQ(ierr); 59895373324SBarry Smith row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 59995373324SBarry Smith } 60078b31e54SBarry Smith PETSCFREE(ct); 60178b31e54SBarry Smith ierr = MatAssemblyBegin(A,FINAL_ASSEMBLY); CHKERRQ(ierr); 60278b31e54SBarry Smith ierr = MatAssemblyEnd(A,FINAL_ASSEMBLY); CHKERRQ(ierr); 60395373324SBarry Smith if (!mytid) { 60478b31e54SBarry Smith ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,viewer); CHKERRQ(ierr); 60595373324SBarry Smith } 60678b31e54SBarry Smith ierr = MatDestroy(A); CHKERRQ(ierr); 60795373324SBarry Smith } 60895373324SBarry Smith } 6091eb62cbbSBarry Smith return 0; 6101eb62cbbSBarry Smith } 6111eb62cbbSBarry Smith 612ec8511deSBarry Smith extern int MatMarkDiag_SeqAIJ(Mat); 6131eb62cbbSBarry Smith /* 6141eb62cbbSBarry Smith This has to provide several versions. 6151eb62cbbSBarry Smith 6161eb62cbbSBarry Smith 1) per sequential 6171eb62cbbSBarry Smith 2) a) use only local smoothing updating outer values only once. 6181eb62cbbSBarry Smith b) local smoothing updating outer values each inner iteration 619d6dfbf8fSBarry Smith 3) color updating out values betwen colors. 6201eb62cbbSBarry Smith */ 621fc76ce87SLois Curfman McInnes static int MatRelax_MPIAIJ(Mat matin,Vec bb,double omega,MatSORType flag, 622*dbb450caSBarry Smith double fshift,int its,Vec xx) 6238a729477SBarry Smith { 62444a69424SLois Curfman McInnes Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 625d6dfbf8fSBarry Smith Mat AA = mat->A, BB = mat->B; 626ec8511deSBarry Smith Mat_SeqAIJ *A = (Mat_SeqAIJ *) AA->data, *B = (Mat_SeqAIJ *)BB->data; 6276abc6512SBarry Smith Scalar zero = 0.0,*b,*x,*xs,*ls,d,*v,sum,scale,*t,*ts; 6286abc6512SBarry Smith int ierr,*idx, *diag; 6296abc6512SBarry Smith int n = mat->n, m = mat->m, i; 630da3a660dSBarry Smith Vec tt; 631*dbb450caSBarry Smith int shift = A->indexshift; 6328a729477SBarry Smith 63378b31e54SBarry Smith if (!mat->assembled) SETERRQ(1,"MatRelax_MPIAIJ:must assemble matrix first"); 6341eb62cbbSBarry Smith 635d6dfbf8fSBarry Smith VecGetArray(xx,&x); VecGetArray(bb,&b); VecGetArray(mat->lvec,&ls); 636*dbb450caSBarry Smith xs = x + shift; /* shift by one for index start of 1 */ 637*dbb450caSBarry Smith ls = ls + shift; 638ec8511deSBarry Smith if (!A->diag) {if ((ierr = MatMarkDiag_SeqAIJ(AA))) return ierr;} 639d6dfbf8fSBarry Smith diag = A->diag; 640acb40c82SBarry Smith if (flag == SOR_APPLY_UPPER || flag == SOR_APPLY_LOWER) { 64142f8db3fSLois Curfman McInnes SETERRQ(1,"MatRelax_MPIAIJ:Option not yet supported"); 642acb40c82SBarry Smith } 643da3a660dSBarry Smith if (flag & SOR_EISENSTAT) { 644da3a660dSBarry Smith /* Let A = L + U + D; where L is lower trianglar, 645da3a660dSBarry Smith U is upper triangular, E is diagonal; This routine applies 646da3a660dSBarry Smith 647da3a660dSBarry Smith (L + E)^{-1} A (U + E)^{-1} 648da3a660dSBarry Smith 649da3a660dSBarry Smith to a vector efficiently using Eisenstat's trick. This is for 650da3a660dSBarry Smith the case of SSOR preconditioner, so E is D/omega where omega 651da3a660dSBarry Smith is the relaxation factor. 652da3a660dSBarry Smith */ 65378b31e54SBarry Smith ierr = VecDuplicate(xx,&tt); CHKERRQ(ierr); 654464493b3SBarry Smith PLogObjectParent(matin,tt); 655da3a660dSBarry Smith VecGetArray(tt,&t); 656da3a660dSBarry Smith scale = (2.0/omega) - 1.0; 657da3a660dSBarry Smith /* x = (E + U)^{-1} b */ 658da3a660dSBarry Smith VecSet(&zero,mat->lvec); 659*dbb450caSBarry Smith ierr = VecPipelineBegin(xx,mat->lvec,INSERT_VALUES,PIPELINEUP, 66078b31e54SBarry Smith mat->Mvctx); CHKERRQ(ierr); 661da3a660dSBarry Smith for ( i=m-1; i>-1; i-- ) { 662da3a660dSBarry Smith n = A->i[i+1] - diag[i] - 1; 663*dbb450caSBarry Smith idx = A->j + diag[i] + !shift; 664*dbb450caSBarry Smith v = A->a + diag[i] + !shift; 665da3a660dSBarry Smith sum = b[i]; 666da3a660dSBarry Smith SPARSEDENSEMDOT(sum,xs,v,idx,n); 667*dbb450caSBarry Smith d = fshift + A->a[diag[i]+shift]; 668da3a660dSBarry Smith n = B->i[i+1] - B->i[i]; 669*dbb450caSBarry Smith idx = B->j + B->i[i] + shift; 670*dbb450caSBarry Smith v = B->a + B->i[i] + shift; 671da3a660dSBarry Smith SPARSEDENSEMDOT(sum,ls,v,idx,n); 672da3a660dSBarry Smith x[i] = omega*(sum/d); 673da3a660dSBarry Smith } 674*dbb450caSBarry Smith ierr = VecPipelineEnd(xx,mat->lvec,INSERT_VALUES,PIPELINEUP, 67578b31e54SBarry Smith mat->Mvctx); CHKERRQ(ierr); 676da3a660dSBarry Smith 677da3a660dSBarry Smith /* t = b - (2*E - D)x */ 678da3a660dSBarry Smith v = A->a; 679*dbb450caSBarry Smith for ( i=0; i<m; i++ ) { t[i] = b[i] - scale*(v[*diag++ + shift])*x[i]; } 680da3a660dSBarry Smith 681da3a660dSBarry Smith /* t = (E + L)^{-1}t */ 682*dbb450caSBarry Smith ts = t + shift; /* shifted by one for index start of a or mat->j*/ 683da3a660dSBarry Smith diag = A->diag; 684da3a660dSBarry Smith VecSet(&zero,mat->lvec); 685*dbb450caSBarry Smith ierr = VecPipelineBegin(tt,mat->lvec,INSERT_VALUES,PIPELINEDOWN, 68678b31e54SBarry Smith mat->Mvctx); CHKERRQ(ierr); 687da3a660dSBarry Smith for ( i=0; i<m; i++ ) { 688da3a660dSBarry Smith n = diag[i] - A->i[i]; 689*dbb450caSBarry Smith idx = A->j + A->i[i] + shift; 690*dbb450caSBarry Smith v = A->a + A->i[i] + shift; 691da3a660dSBarry Smith sum = t[i]; 692da3a660dSBarry Smith SPARSEDENSEMDOT(sum,ts,v,idx,n); 693*dbb450caSBarry Smith d = fshift + A->a[diag[i]+shift]; 694da3a660dSBarry Smith n = B->i[i+1] - B->i[i]; 695*dbb450caSBarry Smith idx = B->j + B->i[i] + shift; 696*dbb450caSBarry Smith v = B->a + B->i[i] + shift; 697da3a660dSBarry Smith SPARSEDENSEMDOT(sum,ls,v,idx,n); 698da3a660dSBarry Smith t[i] = omega*(sum/d); 699da3a660dSBarry Smith } 700*dbb450caSBarry Smith ierr = VecPipelineEnd(tt,mat->lvec,INSERT_VALUES,PIPELINEDOWN, 70178b31e54SBarry Smith mat->Mvctx); CHKERRQ(ierr); 702da3a660dSBarry Smith /* x = x + t */ 703da3a660dSBarry Smith for ( i=0; i<m; i++ ) { x[i] += t[i]; } 704da3a660dSBarry Smith VecDestroy(tt); 705da3a660dSBarry Smith return 0; 706da3a660dSBarry Smith } 707da3a660dSBarry Smith 7081eb62cbbSBarry Smith 709d6dfbf8fSBarry Smith if ((flag & SOR_SYMMETRIC_SWEEP) == SOR_SYMMETRIC_SWEEP){ 710da3a660dSBarry Smith if (flag & SOR_ZERO_INITIAL_GUESS) { 711da3a660dSBarry Smith VecSet(&zero,mat->lvec); VecSet(&zero,xx); 712da3a660dSBarry Smith } 713da3a660dSBarry Smith else { 714*dbb450caSBarry Smith ierr=VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTERUP,mat->Mvctx); 71578b31e54SBarry Smith CHKERRQ(ierr); 716*dbb450caSBarry Smith ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTERUP,mat->Mvctx); 71778b31e54SBarry Smith CHKERRQ(ierr); 718da3a660dSBarry Smith } 719d6dfbf8fSBarry Smith while (its--) { 720d6dfbf8fSBarry Smith /* go down through the rows */ 721*dbb450caSBarry Smith ierr = VecPipelineBegin(xx,mat->lvec,INSERT_VALUES,PIPELINEDOWN, 72278b31e54SBarry Smith mat->Mvctx); CHKERRQ(ierr); 723d6dfbf8fSBarry Smith for ( i=0; i<m; i++ ) { 724d6dfbf8fSBarry Smith n = A->i[i+1] - A->i[i]; 725*dbb450caSBarry Smith idx = A->j + A->i[i] + shift; 726*dbb450caSBarry Smith v = A->a + A->i[i] + shift; 727d6dfbf8fSBarry Smith sum = b[i]; 728d6dfbf8fSBarry Smith SPARSEDENSEMDOT(sum,xs,v,idx,n); 729*dbb450caSBarry Smith d = fshift + A->a[diag[i]+shift]; 730d6dfbf8fSBarry Smith n = B->i[i+1] - B->i[i]; 731*dbb450caSBarry Smith idx = B->j + B->i[i] + shift; 732*dbb450caSBarry Smith v = B->a + B->i[i] + shift; 733d6dfbf8fSBarry Smith SPARSEDENSEMDOT(sum,ls,v,idx,n); 734d6dfbf8fSBarry Smith x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]); 735d6dfbf8fSBarry Smith } 736*dbb450caSBarry Smith ierr = VecPipelineEnd(xx,mat->lvec,INSERT_VALUES,PIPELINEDOWN, 73778b31e54SBarry Smith mat->Mvctx); CHKERRQ(ierr); 738d6dfbf8fSBarry Smith /* come up through the rows */ 739*dbb450caSBarry Smith ierr = VecPipelineBegin(xx,mat->lvec,INSERT_VALUES,PIPELINEUP, 74078b31e54SBarry Smith mat->Mvctx); CHKERRQ(ierr); 741d6dfbf8fSBarry Smith for ( i=m-1; i>-1; i-- ) { 742d6dfbf8fSBarry Smith n = A->i[i+1] - A->i[i]; 743*dbb450caSBarry Smith idx = A->j + A->i[i] + shift; 744*dbb450caSBarry Smith v = A->a + A->i[i] + shift; 745d6dfbf8fSBarry Smith sum = b[i]; 746d6dfbf8fSBarry Smith SPARSEDENSEMDOT(sum,xs,v,idx,n); 747*dbb450caSBarry Smith d = fshift + A->a[diag[i]+shift]; 748d6dfbf8fSBarry Smith n = B->i[i+1] - B->i[i]; 749*dbb450caSBarry Smith idx = B->j + B->i[i] + shift; 750*dbb450caSBarry Smith v = B->a + B->i[i] + shift; 751d6dfbf8fSBarry Smith SPARSEDENSEMDOT(sum,ls,v,idx,n); 752d6dfbf8fSBarry Smith x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]); 753d6dfbf8fSBarry Smith } 754*dbb450caSBarry Smith ierr = VecPipelineEnd(xx,mat->lvec,INSERT_VALUES,PIPELINEUP, 75578b31e54SBarry Smith mat->Mvctx); CHKERRQ(ierr); 756d6dfbf8fSBarry Smith } 757d6dfbf8fSBarry Smith } 758d6dfbf8fSBarry Smith else if (flag & SOR_FORWARD_SWEEP){ 759da3a660dSBarry Smith if (flag & SOR_ZERO_INITIAL_GUESS) { 760da3a660dSBarry Smith VecSet(&zero,mat->lvec); 761*dbb450caSBarry Smith ierr = VecPipelineBegin(xx,mat->lvec,INSERT_VALUES,PIPELINEDOWN, 76278b31e54SBarry Smith mat->Mvctx); CHKERRQ(ierr); 763da3a660dSBarry Smith for ( i=0; i<m; i++ ) { 764da3a660dSBarry Smith n = diag[i] - A->i[i]; 765*dbb450caSBarry Smith idx = A->j + A->i[i] + shift; 766*dbb450caSBarry Smith v = A->a + A->i[i] + shift; 767da3a660dSBarry Smith sum = b[i]; 768da3a660dSBarry Smith SPARSEDENSEMDOT(sum,xs,v,idx,n); 769*dbb450caSBarry Smith d = fshift + A->a[diag[i]+shift]; 770da3a660dSBarry Smith n = B->i[i+1] - B->i[i]; 771*dbb450caSBarry Smith idx = B->j + B->i[i] + shift; 772*dbb450caSBarry Smith v = B->a + B->i[i] + shift; 773da3a660dSBarry Smith SPARSEDENSEMDOT(sum,ls,v,idx,n); 774da3a660dSBarry Smith x[i] = omega*(sum/d); 775da3a660dSBarry Smith } 776*dbb450caSBarry Smith ierr = VecPipelineEnd(xx,mat->lvec,INSERT_VALUES,PIPELINEDOWN, 77778b31e54SBarry Smith mat->Mvctx); CHKERRQ(ierr); 778da3a660dSBarry Smith its--; 779da3a660dSBarry Smith } 780d6dfbf8fSBarry Smith while (its--) { 781*dbb450caSBarry Smith ierr=VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTERUP,mat->Mvctx); 78278b31e54SBarry Smith CHKERRQ(ierr); 783*dbb450caSBarry Smith ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTERUP,mat->Mvctx); 78478b31e54SBarry Smith CHKERRQ(ierr); 785*dbb450caSBarry Smith ierr = VecPipelineBegin(xx,mat->lvec,INSERT_VALUES,PIPELINEDOWN, 78678b31e54SBarry Smith mat->Mvctx); CHKERRQ(ierr); 787d6dfbf8fSBarry Smith for ( i=0; i<m; i++ ) { 788d6dfbf8fSBarry Smith n = A->i[i+1] - A->i[i]; 789*dbb450caSBarry Smith idx = A->j + A->i[i] + shift; 790*dbb450caSBarry Smith v = A->a + A->i[i] + shift; 791d6dfbf8fSBarry Smith sum = b[i]; 792d6dfbf8fSBarry Smith SPARSEDENSEMDOT(sum,xs,v,idx,n); 793*dbb450caSBarry Smith d = fshift + A->a[diag[i]+shift]; 794d6dfbf8fSBarry Smith n = B->i[i+1] - B->i[i]; 795*dbb450caSBarry Smith idx = B->j + B->i[i] + shift; 796*dbb450caSBarry Smith v = B->a + B->i[i] + shift; 797d6dfbf8fSBarry Smith SPARSEDENSEMDOT(sum,ls,v,idx,n); 798d6dfbf8fSBarry Smith x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]); 799d6dfbf8fSBarry Smith } 800*dbb450caSBarry Smith ierr = VecPipelineEnd(xx,mat->lvec,INSERT_VALUES,PIPELINEDOWN, 80178b31e54SBarry Smith mat->Mvctx); CHKERRQ(ierr); 802d6dfbf8fSBarry Smith } 803d6dfbf8fSBarry Smith } 804d6dfbf8fSBarry Smith else if (flag & SOR_BACKWARD_SWEEP){ 805da3a660dSBarry Smith if (flag & SOR_ZERO_INITIAL_GUESS) { 806da3a660dSBarry Smith VecSet(&zero,mat->lvec); 807*dbb450caSBarry Smith ierr = VecPipelineBegin(xx,mat->lvec,INSERT_VALUES,PIPELINEUP, 80878b31e54SBarry Smith mat->Mvctx); CHKERRQ(ierr); 809da3a660dSBarry Smith for ( i=m-1; i>-1; i-- ) { 810da3a660dSBarry Smith n = A->i[i+1] - diag[i] - 1; 811*dbb450caSBarry Smith idx = A->j + diag[i] + !shift; 812*dbb450caSBarry Smith v = A->a + diag[i] + !shift; 813da3a660dSBarry Smith sum = b[i]; 814da3a660dSBarry Smith SPARSEDENSEMDOT(sum,xs,v,idx,n); 815*dbb450caSBarry Smith d = fshift + A->a[diag[i]+shift]; 816da3a660dSBarry Smith n = B->i[i+1] - B->i[i]; 817*dbb450caSBarry Smith idx = B->j + B->i[i] + shift; 818*dbb450caSBarry Smith v = B->a + B->i[i] + shift; 819da3a660dSBarry Smith SPARSEDENSEMDOT(sum,ls,v,idx,n); 820da3a660dSBarry Smith x[i] = omega*(sum/d); 821da3a660dSBarry Smith } 822*dbb450caSBarry Smith ierr = VecPipelineEnd(xx,mat->lvec,INSERT_VALUES,PIPELINEUP, 82378b31e54SBarry Smith mat->Mvctx); CHKERRQ(ierr); 824da3a660dSBarry Smith its--; 825da3a660dSBarry Smith } 826d6dfbf8fSBarry Smith while (its--) { 827*dbb450caSBarry Smith ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTERDOWN, 82878b31e54SBarry Smith mat->Mvctx); CHKERRQ(ierr); 829*dbb450caSBarry Smith ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTERDOWN, 83078b31e54SBarry Smith mat->Mvctx); CHKERRQ(ierr); 831*dbb450caSBarry Smith ierr = VecPipelineBegin(xx,mat->lvec,INSERT_VALUES,PIPELINEUP, 83278b31e54SBarry Smith mat->Mvctx); CHKERRQ(ierr); 833d6dfbf8fSBarry Smith for ( i=m-1; i>-1; i-- ) { 834d6dfbf8fSBarry Smith n = A->i[i+1] - A->i[i]; 835*dbb450caSBarry Smith idx = A->j + A->i[i] + shift; 836*dbb450caSBarry Smith v = A->a + A->i[i] + shift; 837d6dfbf8fSBarry Smith sum = b[i]; 838d6dfbf8fSBarry Smith SPARSEDENSEMDOT(sum,xs,v,idx,n); 839*dbb450caSBarry Smith d = fshift + A->a[diag[i]+shift]; 840d6dfbf8fSBarry Smith n = B->i[i+1] - B->i[i]; 841*dbb450caSBarry Smith idx = B->j + B->i[i] + shift; 842*dbb450caSBarry Smith v = B->a + B->i[i] + shift; 843d6dfbf8fSBarry Smith SPARSEDENSEMDOT(sum,ls,v,idx,n); 844d6dfbf8fSBarry Smith x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]); 845d6dfbf8fSBarry Smith } 846*dbb450caSBarry Smith ierr = VecPipelineEnd(xx,mat->lvec,INSERT_VALUES,PIPELINEUP, 84778b31e54SBarry Smith mat->Mvctx); CHKERRQ(ierr); 848d6dfbf8fSBarry Smith } 849d6dfbf8fSBarry Smith } 850d6dfbf8fSBarry Smith else if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){ 851da3a660dSBarry Smith if (flag & SOR_ZERO_INITIAL_GUESS) { 852*dbb450caSBarry Smith return MatRelax(mat->A,bb,omega,flag,fshift,its,xx); 853da3a660dSBarry Smith } 854*dbb450caSBarry Smith ierr=VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTERALL,mat->Mvctx); 85578b31e54SBarry Smith CHKERRQ(ierr); 856*dbb450caSBarry Smith ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTERALL,mat->Mvctx); 85778b31e54SBarry Smith CHKERRQ(ierr); 858d6dfbf8fSBarry Smith while (its--) { 859d6dfbf8fSBarry Smith /* go down through the rows */ 860d6dfbf8fSBarry Smith for ( i=0; i<m; i++ ) { 861d6dfbf8fSBarry Smith n = A->i[i+1] - A->i[i]; 862*dbb450caSBarry Smith idx = A->j + A->i[i] + shift; 863*dbb450caSBarry Smith v = A->a + A->i[i] + shift; 864d6dfbf8fSBarry Smith sum = b[i]; 865d6dfbf8fSBarry Smith SPARSEDENSEMDOT(sum,xs,v,idx,n); 866*dbb450caSBarry Smith d = fshift + A->a[diag[i]+shift]; 867d6dfbf8fSBarry Smith n = B->i[i+1] - B->i[i]; 868*dbb450caSBarry Smith idx = B->j + B->i[i] + shift; 869*dbb450caSBarry Smith v = B->a + B->i[i] + shift; 870d6dfbf8fSBarry Smith SPARSEDENSEMDOT(sum,ls,v,idx,n); 871d6dfbf8fSBarry Smith x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]); 872d6dfbf8fSBarry Smith } 873d6dfbf8fSBarry Smith /* come up through the rows */ 874d6dfbf8fSBarry Smith for ( i=m-1; i>-1; i-- ) { 875d6dfbf8fSBarry Smith n = A->i[i+1] - A->i[i]; 876*dbb450caSBarry Smith idx = A->j + A->i[i] + shift; 877*dbb450caSBarry Smith v = A->a + A->i[i] + shift; 878d6dfbf8fSBarry Smith sum = b[i]; 879d6dfbf8fSBarry Smith SPARSEDENSEMDOT(sum,xs,v,idx,n); 880*dbb450caSBarry Smith d = fshift + A->a[diag[i]+shift]; 881d6dfbf8fSBarry Smith n = B->i[i+1] - B->i[i]; 882*dbb450caSBarry Smith idx = B->j + B->i[i] + shift; 883*dbb450caSBarry Smith v = B->a + B->i[i] + shift; 884d6dfbf8fSBarry Smith SPARSEDENSEMDOT(sum,ls,v,idx,n); 885d6dfbf8fSBarry Smith x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]); 886d6dfbf8fSBarry Smith } 887d6dfbf8fSBarry Smith } 888d6dfbf8fSBarry Smith } 889d6dfbf8fSBarry Smith else if (flag & SOR_LOCAL_FORWARD_SWEEP){ 890da3a660dSBarry Smith if (flag & SOR_ZERO_INITIAL_GUESS) { 891*dbb450caSBarry Smith return MatRelax(mat->A,bb,omega,flag,fshift,its,xx); 892da3a660dSBarry Smith } 893*dbb450caSBarry Smith ierr=VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTERALL,mat->Mvctx); 89478b31e54SBarry Smith CHKERRQ(ierr); 895*dbb450caSBarry Smith ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTERALL,mat->Mvctx); 89678b31e54SBarry Smith CHKERRQ(ierr); 897d6dfbf8fSBarry Smith while (its--) { 898d6dfbf8fSBarry Smith for ( i=0; i<m; i++ ) { 899d6dfbf8fSBarry Smith n = A->i[i+1] - A->i[i]; 900*dbb450caSBarry Smith idx = A->j + A->i[i] + shift; 901*dbb450caSBarry Smith v = A->a + A->i[i] + shift; 902d6dfbf8fSBarry Smith sum = b[i]; 903d6dfbf8fSBarry Smith SPARSEDENSEMDOT(sum,xs,v,idx,n); 904*dbb450caSBarry Smith d = fshift + A->a[diag[i]+shift]; 905d6dfbf8fSBarry Smith n = B->i[i+1] - B->i[i]; 906*dbb450caSBarry Smith idx = B->j + B->i[i] + shift; 907*dbb450caSBarry Smith v = B->a + B->i[i] + shift; 908d6dfbf8fSBarry Smith SPARSEDENSEMDOT(sum,ls,v,idx,n); 909d6dfbf8fSBarry Smith x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]); 910d6dfbf8fSBarry Smith } 911d6dfbf8fSBarry Smith } 912d6dfbf8fSBarry Smith } 913d6dfbf8fSBarry Smith else if (flag & SOR_LOCAL_BACKWARD_SWEEP){ 914da3a660dSBarry Smith if (flag & SOR_ZERO_INITIAL_GUESS) { 915*dbb450caSBarry Smith return MatRelax(mat->A,bb,omega,flag,fshift,its,xx); 916da3a660dSBarry Smith } 917*dbb450caSBarry Smith ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTERALL, 91878b31e54SBarry Smith mat->Mvctx); CHKERRQ(ierr); 919*dbb450caSBarry Smith ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTERALL, 92078b31e54SBarry Smith mat->Mvctx); CHKERRQ(ierr); 921d6dfbf8fSBarry Smith while (its--) { 922d6dfbf8fSBarry Smith for ( i=m-1; i>-1; i-- ) { 923d6dfbf8fSBarry Smith n = A->i[i+1] - A->i[i]; 924*dbb450caSBarry Smith idx = A->j + A->i[i] + shift; 925*dbb450caSBarry Smith v = A->a + A->i[i] + shift; 926d6dfbf8fSBarry Smith sum = b[i]; 927d6dfbf8fSBarry Smith SPARSEDENSEMDOT(sum,xs,v,idx,n); 928*dbb450caSBarry Smith d = fshift + A->a[diag[i]+shift]; 929d6dfbf8fSBarry Smith n = B->i[i+1] - B->i[i]; 930*dbb450caSBarry Smith idx = B->j + B->i[i] + shift; 931*dbb450caSBarry Smith v = B->a + B->i[i] + shift; 932d6dfbf8fSBarry Smith SPARSEDENSEMDOT(sum,ls,v,idx,n); 933d6dfbf8fSBarry Smith x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]); 934d6dfbf8fSBarry Smith } 935d6dfbf8fSBarry Smith } 936d6dfbf8fSBarry Smith } 9378a729477SBarry Smith return 0; 9388a729477SBarry Smith } 939a66be287SLois Curfman McInnes 940fc76ce87SLois Curfman McInnes static int MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,int *nz, 941fc76ce87SLois Curfman McInnes int *nzalloc,int *mem) 942a66be287SLois Curfman McInnes { 943a66be287SLois Curfman McInnes Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 944a66be287SLois Curfman McInnes Mat A = mat->A, B = mat->B; 945a66be287SLois Curfman McInnes int ierr, isend[3], irecv[3], nzA, nzallocA, memA; 946a66be287SLois Curfman McInnes 94778b31e54SBarry Smith ierr = MatGetInfo(A,MAT_LOCAL,&nzA,&nzallocA,&memA); CHKERRQ(ierr); 94878b31e54SBarry Smith ierr = MatGetInfo(B,MAT_LOCAL,&isend[0],&isend[1],&isend[2]); CHKERRQ(ierr); 949a66be287SLois Curfman McInnes isend[0] += nzA; isend[1] += nzallocA; isend[2] += memA; 950a66be287SLois Curfman McInnes if (flag == MAT_LOCAL) { 951a66be287SLois Curfman McInnes *nz = isend[0]; *nzalloc = isend[1]; *mem = isend[2]; 952a66be287SLois Curfman McInnes } else if (flag == MAT_GLOBAL_MAX) { 953a66be287SLois Curfman McInnes MPI_Allreduce((void *) isend,(void *) irecv,3,MPI_INT,MPI_MAX,matin->comm); 954a66be287SLois Curfman McInnes *nz = irecv[0]; *nzalloc = irecv[1]; *mem = irecv[2]; 955a66be287SLois Curfman McInnes } else if (flag == MAT_GLOBAL_SUM) { 956a66be287SLois Curfman McInnes MPI_Allreduce((void *) isend,(void *) irecv,3,MPI_INT,MPI_SUM,matin->comm); 957a66be287SLois Curfman McInnes *nz = irecv[0]; *nzalloc = irecv[1]; *mem = irecv[2]; 958a66be287SLois Curfman McInnes } 959a66be287SLois Curfman McInnes return 0; 960a66be287SLois Curfman McInnes } 961a66be287SLois Curfman McInnes 962299609e3SLois Curfman McInnes extern int MatLUFactorSymbolic_MPIAIJ(Mat,IS,IS,double,Mat*); 963299609e3SLois Curfman McInnes extern int MatLUFactorNumeric_MPIAIJ(Mat,Mat*); 964299609e3SLois Curfman McInnes extern int MatLUFactor_MPIAIJ(Mat,IS,IS,double); 965299609e3SLois Curfman McInnes extern int MatILUFactorSymbolic_MPIAIJ(Mat,IS,IS,double,int,Mat *); 966299609e3SLois Curfman McInnes extern int MatSolve_MPIAIJ(Mat,Vec,Vec); 967299609e3SLois Curfman McInnes extern int MatSolveAdd_MPIAIJ(Mat,Vec,Vec,Vec); 968299609e3SLois Curfman McInnes extern int MatSolveTrans_MPIAIJ(Mat,Vec,Vec); 969299609e3SLois Curfman McInnes extern int MatSolveTransAdd_MPIAIJ(Mat,Vec,Vec,Vec); 970299609e3SLois Curfman McInnes 971fc76ce87SLois Curfman McInnes static int MatSetOption_MPIAIJ(Mat aijin,MatOption op) 972c74985f6SBarry Smith { 97344a69424SLois Curfman McInnes Mat_MPIAIJ *aij = (Mat_MPIAIJ *) aijin->data; 974c74985f6SBarry Smith 975c74985f6SBarry Smith if (op == NO_NEW_NONZERO_LOCATIONS) { 976c74985f6SBarry Smith MatSetOption(aij->A,op); 977c74985f6SBarry Smith MatSetOption(aij->B,op); 978c74985f6SBarry Smith } 979c74985f6SBarry Smith else if (op == YES_NEW_NONZERO_LOCATIONS) { 980c74985f6SBarry Smith MatSetOption(aij->A,op); 981c74985f6SBarry Smith MatSetOption(aij->B,op); 982c74985f6SBarry Smith } 983bbb6d6a8SBarry Smith else if (op == COLUMN_ORIENTED) 984bbb6d6a8SBarry Smith SETERRQ(1,"MatSetOption_MPIAIJ:Column oriented not supported"); 985c74985f6SBarry Smith return 0; 986c74985f6SBarry Smith } 987c74985f6SBarry Smith 988d1710a03SLois Curfman McInnes static int MatGetSize_MPIAIJ(Mat matin,int *m,int *n) 989c74985f6SBarry Smith { 99044a69424SLois Curfman McInnes Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 991c74985f6SBarry Smith *m = mat->M; *n = mat->N; 992c74985f6SBarry Smith return 0; 993c74985f6SBarry Smith } 994c74985f6SBarry Smith 995d1710a03SLois Curfman McInnes static int MatGetLocalSize_MPIAIJ(Mat matin,int *m,int *n) 996c74985f6SBarry Smith { 99744a69424SLois Curfman McInnes Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 998b7c46309SBarry Smith *m = mat->m; *n = mat->N; 999c74985f6SBarry Smith return 0; 1000c74985f6SBarry Smith } 1001c74985f6SBarry Smith 1002d1710a03SLois Curfman McInnes static int MatGetOwnershipRange_MPIAIJ(Mat matin,int *m,int *n) 1003c74985f6SBarry Smith { 100444a69424SLois Curfman McInnes Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 1005c74985f6SBarry Smith *m = mat->rstart; *n = mat->rend; 1006c74985f6SBarry Smith return 0; 1007c74985f6SBarry Smith } 1008c74985f6SBarry Smith 100939e00950SLois Curfman McInnes static int MatGetRow_MPIAIJ(Mat matin,int row,int *nz,int **idx,Scalar **v) 101039e00950SLois Curfman McInnes { 1011154123eaSLois Curfman McInnes Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 1012154123eaSLois Curfman McInnes Scalar *vworkA, *vworkB, **pvA, **pvB; 1013154123eaSLois Curfman McInnes int i, ierr, *cworkA, *cworkB, **pcA, **pcB, cstart = mat->cstart; 1014154123eaSLois Curfman McInnes int nztot, nzA, nzB, lrow, rstart = mat->rstart, rend = mat->rend; 101539e00950SLois Curfman McInnes 101639e00950SLois Curfman McInnes if (row < rstart || row >= rend) 1017bbb6d6a8SBarry Smith SETERRQ(1,"MatGetRow_MPIAIJ:Currently you can get only local rows") 1018abc0e9e4SLois Curfman McInnes lrow = row - rstart; 101939e00950SLois Curfman McInnes 1020154123eaSLois Curfman McInnes pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1021154123eaSLois Curfman McInnes if (!v) {pvA = 0; pvB = 0;} 1022154123eaSLois Curfman McInnes if (!idx) {pcA = 0; if (!v) pcB = 0;} 102378b31e54SBarry Smith ierr = MatGetRow(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr); 102478b31e54SBarry Smith ierr = MatGetRow(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr); 1025154123eaSLois Curfman McInnes nztot = nzA + nzB; 1026154123eaSLois Curfman McInnes 1027154123eaSLois Curfman McInnes if (v || idx) { 1028154123eaSLois Curfman McInnes if (nztot) { 1029154123eaSLois Curfman McInnes /* Sort by increasing column numbers, assuming A and B already sorted */ 1030299609e3SLois Curfman McInnes int imark; 103148b35521SBarry Smith if (mat->assembled) { 1032154123eaSLois Curfman McInnes for (i=0; i<nzB; i++) cworkB[i] = mat->garray[cworkB[i]]; 103348b35521SBarry Smith } 1034154123eaSLois Curfman McInnes if (v) { 103578b31e54SBarry Smith *v = (Scalar *) PETSCMALLOC( (nztot)*sizeof(Scalar) ); CHKPTRQ(*v); 103639e00950SLois Curfman McInnes for ( i=0; i<nzB; i++ ) { 1037154123eaSLois Curfman McInnes if (cworkB[i] < cstart) (*v)[i] = vworkB[i]; 1038154123eaSLois Curfman McInnes else break; 1039154123eaSLois Curfman McInnes } 1040154123eaSLois Curfman McInnes imark = i; 1041154123eaSLois Curfman McInnes for ( i=0; i<nzA; i++ ) (*v)[imark+i] = vworkA[i]; 1042299609e3SLois Curfman McInnes for ( i=imark; i<nzB; i++ ) (*v)[nzA+i] = vworkB[i]; 1043154123eaSLois Curfman McInnes } 1044154123eaSLois Curfman McInnes if (idx) { 104578b31e54SBarry Smith *idx = (int *) PETSCMALLOC( (nztot)*sizeof(int) ); CHKPTRQ(*idx); 1046154123eaSLois Curfman McInnes for (i=0; i<nzA; i++) cworkA[i] += cstart; 1047154123eaSLois Curfman McInnes for ( i=0; i<nzB; i++ ) { 1048154123eaSLois Curfman McInnes if (cworkB[i] < cstart) (*idx)[i] = cworkB[i]; 1049154123eaSLois Curfman McInnes else break; 1050154123eaSLois Curfman McInnes } 1051154123eaSLois Curfman McInnes imark = i; 1052154123eaSLois Curfman McInnes for ( i=0; i<nzA; i++ ) (*idx)[imark+i] = cworkA[i]; 1053299609e3SLois Curfman McInnes for ( i=imark; i<nzB; i++ ) (*idx)[nzA+i] = cworkB[i]; 105439e00950SLois Curfman McInnes } 105539e00950SLois Curfman McInnes } 105639e00950SLois Curfman McInnes else {*idx = 0; *v=0;} 1057154123eaSLois Curfman McInnes } 105839e00950SLois Curfman McInnes *nz = nztot; 105978b31e54SBarry Smith ierr = MatRestoreRow(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr); 106078b31e54SBarry Smith ierr = MatRestoreRow(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr); 106139e00950SLois Curfman McInnes return 0; 106239e00950SLois Curfman McInnes } 106339e00950SLois Curfman McInnes 106439e00950SLois Curfman McInnes static int MatRestoreRow_MPIAIJ(Mat mat,int row,int *nz,int **idx,Scalar **v) 106539e00950SLois Curfman McInnes { 106678b31e54SBarry Smith if (idx) PETSCFREE(*idx); 106778b31e54SBarry Smith if (v) PETSCFREE(*v); 106839e00950SLois Curfman McInnes return 0; 106939e00950SLois Curfman McInnes } 107039e00950SLois Curfman McInnes 1071855ac2c5SLois Curfman McInnes static int MatNorm_MPIAIJ(Mat mat,MatNormType type,double *norm) 1072855ac2c5SLois Curfman McInnes { 1073855ac2c5SLois Curfman McInnes Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 1074aac3204fSLois Curfman McInnes int ierr, i, j, cstart = aij->cstart; 107504ca555eSLois Curfman McInnes double sum = 0.0; 1076ec8511deSBarry Smith Mat_SeqAIJ *amat = (Mat_SeqAIJ*) aij->A->data, *bmat = (Mat_SeqAIJ*) aij->B->data; 107704ca555eSLois Curfman McInnes Scalar *v; 1078*dbb450caSBarry Smith int shift = amat->indexshift; 107904ca555eSLois Curfman McInnes 108004ca555eSLois Curfman McInnes if (!aij->assembled) 108104ca555eSLois Curfman McInnes SETERRQ(1,"MatNorm_MPIAIJ: Cannot compute norm of unassembled matrix"); 1082855ac2c5SLois Curfman McInnes if (aij->numtids == 1) { 108314183eadSLois Curfman McInnes ierr = MatNorm(aij->A,type,norm); CHKERRQ(ierr); 108437fa93a5SLois Curfman McInnes } else { 108504ca555eSLois Curfman McInnes if (type == NORM_FROBENIUS) { 108604ca555eSLois Curfman McInnes v = amat->a; 108704ca555eSLois Curfman McInnes for (i=0; i<amat->nz; i++ ) { 108804ca555eSLois Curfman McInnes #if defined(PETSC_COMPLEX) 108904ca555eSLois Curfman McInnes sum += real(conj(*v)*(*v)); v++; 109004ca555eSLois Curfman McInnes #else 109104ca555eSLois Curfman McInnes sum += (*v)*(*v); v++; 109204ca555eSLois Curfman McInnes #endif 109304ca555eSLois Curfman McInnes } 109404ca555eSLois Curfman McInnes v = bmat->a; 109504ca555eSLois Curfman McInnes for (i=0; i<bmat->nz; i++ ) { 109604ca555eSLois Curfman McInnes #if defined(PETSC_COMPLEX) 109704ca555eSLois Curfman McInnes sum += real(conj(*v)*(*v)); v++; 109804ca555eSLois Curfman McInnes #else 109904ca555eSLois Curfman McInnes sum += (*v)*(*v); v++; 110004ca555eSLois Curfman McInnes #endif 110104ca555eSLois Curfman McInnes } 110204ca555eSLois Curfman McInnes MPI_Allreduce((void*)&sum,(void*)norm,1,MPI_DOUBLE,MPI_SUM,mat->comm); 110304ca555eSLois Curfman McInnes *norm = sqrt(*norm); 110404ca555eSLois Curfman McInnes } 110504ca555eSLois Curfman McInnes else if (type == NORM_1) { /* max column norm */ 110604ca555eSLois Curfman McInnes double *tmp, *tmp2; 110704ca555eSLois Curfman McInnes int *jj, *garray = aij->garray; 110804ca555eSLois Curfman McInnes tmp = (double *) PETSCMALLOC( aij->N*sizeof(double) ); CHKPTRQ(tmp); 110904ca555eSLois Curfman McInnes tmp2 = (double *) PETSCMALLOC( aij->N*sizeof(double) ); CHKPTRQ(tmp2); 111004ca555eSLois Curfman McInnes PETSCMEMSET(tmp,0,aij->N*sizeof(double)); 111104ca555eSLois Curfman McInnes *norm = 0.0; 111204ca555eSLois Curfman McInnes v = amat->a; jj = amat->j; 111304ca555eSLois Curfman McInnes for ( j=0; j<amat->nz; j++ ) { 111404ca555eSLois Curfman McInnes #if defined(PETSC_COMPLEX) 1115*dbb450caSBarry Smith tmp[cstart + *jj++ + shift] += abs(*v++); 111604ca555eSLois Curfman McInnes #else 1117*dbb450caSBarry Smith tmp[cstart + *jj++ + shift] += fabs(*v++); 111804ca555eSLois Curfman McInnes #endif 111904ca555eSLois Curfman McInnes } 112004ca555eSLois Curfman McInnes v = bmat->a; jj = bmat->j; 112104ca555eSLois Curfman McInnes for ( j=0; j<bmat->nz; j++ ) { 112204ca555eSLois Curfman McInnes #if defined(PETSC_COMPLEX) 1123*dbb450caSBarry Smith tmp[garray[*jj++ + shift]] += abs(*v++); 112404ca555eSLois Curfman McInnes #else 1125*dbb450caSBarry Smith tmp[garray[*jj++ + shift]] += fabs(*v++); 112604ca555eSLois Curfman McInnes #endif 112704ca555eSLois Curfman McInnes } 112804ca555eSLois Curfman McInnes MPI_Allreduce((void*)tmp,(void*)tmp2,aij->N,MPI_DOUBLE,MPI_SUM,mat->comm); 112904ca555eSLois Curfman McInnes for ( j=0; j<aij->N; j++ ) { 113004ca555eSLois Curfman McInnes if (tmp2[j] > *norm) *norm = tmp2[j]; 113104ca555eSLois Curfman McInnes } 113204ca555eSLois Curfman McInnes PETSCFREE(tmp); PETSCFREE(tmp2); 113304ca555eSLois Curfman McInnes } 113404ca555eSLois Curfman McInnes else if (type == NORM_INFINITY) { /* max row norm */ 113504ca555eSLois Curfman McInnes double normtemp = 0.0; 113604ca555eSLois Curfman McInnes for ( j=0; j<amat->m; j++ ) { 1137*dbb450caSBarry Smith v = amat->a + amat->i[j] + shift; 113804ca555eSLois Curfman McInnes sum = 0.0; 113904ca555eSLois Curfman McInnes for ( i=0; i<amat->i[j+1]-amat->i[j]; i++ ) { 114004ca555eSLois Curfman McInnes #if defined(PETSC_COMPLEX) 114104ca555eSLois Curfman McInnes sum += abs(*v); v++; 114204ca555eSLois Curfman McInnes #else 114304ca555eSLois Curfman McInnes sum += fabs(*v); v++; 114404ca555eSLois Curfman McInnes #endif 114504ca555eSLois Curfman McInnes } 1146*dbb450caSBarry Smith v = bmat->a + bmat->i[j] + shift; 114704ca555eSLois Curfman McInnes for ( i=0; i<bmat->i[j+1]-bmat->i[j]; i++ ) { 114804ca555eSLois Curfman McInnes #if defined(PETSC_COMPLEX) 114904ca555eSLois Curfman McInnes sum += abs(*v); v++; 115004ca555eSLois Curfman McInnes #else 115104ca555eSLois Curfman McInnes sum += fabs(*v); v++; 115204ca555eSLois Curfman McInnes #endif 115304ca555eSLois Curfman McInnes } 115404ca555eSLois Curfman McInnes if (sum > normtemp) normtemp = sum; 115504ca555eSLois Curfman McInnes MPI_Allreduce((void*)&normtemp,(void*)norm,1,MPI_DOUBLE,MPI_MAX,mat->comm); 115604ca555eSLois Curfman McInnes } 115704ca555eSLois Curfman McInnes } 115804ca555eSLois Curfman McInnes else { 115904ca555eSLois Curfman McInnes SETERRQ(1,"MatNorm_MPIRow:No support for the two norm"); 116004ca555eSLois Curfman McInnes } 116137fa93a5SLois Curfman McInnes } 1162855ac2c5SLois Curfman McInnes return 0; 1163855ac2c5SLois Curfman McInnes } 1164855ac2c5SLois Curfman McInnes 11650de55854SLois Curfman McInnes static int MatTranspose_MPIAIJ(Mat A,Mat *matout) 1166b7c46309SBarry Smith { 1167b7c46309SBarry Smith Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 1168b7c46309SBarry Smith int ierr; 1169b7c46309SBarry Smith Mat B; 1170*dbb450caSBarry Smith Mat_SeqAIJ *Aloc = (Mat_SeqAIJ *) a->A->data; 1171b7c46309SBarry Smith int M = a->M, N = a->N,m,*ai,*aj,row,*cols,i,*ct; 1172b7c46309SBarry Smith Scalar *array; 1173*dbb450caSBarry Smith int shift = Aloc->indexshift; 1174b7c46309SBarry Smith 11750de55854SLois Curfman McInnes if (!matout && M != N) SETERRQ(1, 11760de55854SLois Curfman McInnes "MatTranspose_MPIAIJ: Cannot transpose rectangular matrix in place"); 1177b7c46309SBarry Smith ierr = MatCreateMPIAIJ(A->comm,PETSC_DECIDE,PETSC_DECIDE,N,M,0,0,0,0,&B); 1178b7c46309SBarry Smith CHKERRQ(ierr); 1179b7c46309SBarry Smith 1180b7c46309SBarry Smith /* copy over the A part */ 1181ec8511deSBarry Smith Aloc = (Mat_SeqAIJ*) a->A->data; 1182b7c46309SBarry Smith m = Aloc->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a; 1183b7c46309SBarry Smith row = a->rstart; 1184*dbb450caSBarry Smith for ( i=0; i<ai[m]+shift; i++ ) {aj[i] += a->cstart + shift;} 1185b7c46309SBarry Smith for ( i=0; i<m; i++ ) { 1186*dbb450caSBarry Smith ierr = MatSetValues(B,ai[i+1]-ai[i],aj,1,&row,array,INSERT_VALUES); 1187b7c46309SBarry Smith CHKERRQ(ierr); 1188b7c46309SBarry Smith row++; array += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 1189b7c46309SBarry Smith } 1190b7c46309SBarry Smith aj = Aloc->j; 1191*dbb450caSBarry Smith for ( i=0; i<ai[m]|+shift; i++ ) {aj[i] -= a->cstart + shift;} 1192b7c46309SBarry Smith 1193b7c46309SBarry Smith /* copy over the B part */ 1194ec8511deSBarry Smith Aloc = (Mat_SeqAIJ*) a->B->data; 1195b7c46309SBarry Smith m = Aloc->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a; 1196b7c46309SBarry Smith row = a->rstart; 1197*dbb450caSBarry Smith ct = cols = (int *) PETSCMALLOC( (ai[m]-shift)*sizeof(int) ); CHKPTRQ(cols); 1198*dbb450caSBarry Smith for ( i=0; i<ai[m]+shift; i++ ) {cols[i] = a->garray[aj[i]+shift];} 1199b7c46309SBarry Smith for ( i=0; i<m; i++ ) { 1200*dbb450caSBarry Smith ierr = MatSetValues(B,ai[i+1]-ai[i],cols,1,&row,array,INSERT_VALUES); 1201b7c46309SBarry Smith CHKERRQ(ierr); 1202b7c46309SBarry Smith row++; array += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 1203b7c46309SBarry Smith } 1204b7c46309SBarry Smith PETSCFREE(ct); 1205b7c46309SBarry Smith ierr = MatAssemblyBegin(B,FINAL_ASSEMBLY); CHKERRQ(ierr); 1206b7c46309SBarry Smith ierr = MatAssemblyEnd(B,FINAL_ASSEMBLY); CHKERRQ(ierr); 12070de55854SLois Curfman McInnes if (matout) { 12080de55854SLois Curfman McInnes *matout = B; 12090de55854SLois Curfman McInnes } else { 12100de55854SLois Curfman McInnes /* This isn't really an in-place transpose .... but free data structures from a */ 12110de55854SLois Curfman McInnes PETSCFREE(a->rowners); 12120de55854SLois Curfman McInnes ierr = MatDestroy(a->A); CHKERRQ(ierr); 12130de55854SLois Curfman McInnes ierr = MatDestroy(a->B); CHKERRQ(ierr); 12140de55854SLois Curfman McInnes if (a->colmap) PETSCFREE(a->colmap); 12150de55854SLois Curfman McInnes if (a->garray) PETSCFREE(a->garray); 12160de55854SLois Curfman McInnes if (a->lvec) VecDestroy(a->lvec); 12170de55854SLois Curfman McInnes if (a->Mvctx) VecScatterCtxDestroy(a->Mvctx); 12180de55854SLois Curfman McInnes PETSCFREE(a); 12190de55854SLois Curfman McInnes PETSCMEMCPY(A,B,sizeof(struct _Mat)); 12200de55854SLois Curfman McInnes PETSCHEADERDESTROY(B); 12210de55854SLois Curfman McInnes } 1222b7c46309SBarry Smith return 0; 1223b7c46309SBarry Smith } 1224b7c46309SBarry Smith 1225fc76ce87SLois Curfman McInnes extern int MatConvert_MPIAIJ(Mat,MatType,Mat *); 1226ff7509f2SBarry Smith static int MatCopyPrivate_MPIAIJ(Mat,Mat *); 1227d6dfbf8fSBarry Smith 12288a729477SBarry Smith /* -------------------------------------------------------------------*/ 12292ee70a88SLois Curfman McInnes static struct _MatOps MatOps = {MatSetValues_MPIAIJ, 123039e00950SLois Curfman McInnes MatGetRow_MPIAIJ,MatRestoreRow_MPIAIJ, 123144a69424SLois Curfman McInnes MatMult_MPIAIJ,MatMultAdd_MPIAIJ, 123244a69424SLois Curfman McInnes MatMultTrans_MPIAIJ,MatMultTransAdd_MPIAIJ, 1233299609e3SLois Curfman McInnes MatSolve_MPIAIJ,MatSolveAdd_MPIAIJ, 1234299609e3SLois Curfman McInnes MatSolveTrans_MPIAIJ,MatSolveTransAdd_MPIAIJ, 1235299609e3SLois Curfman McInnes MatLUFactor_MPIAIJ,0, 123644a69424SLois Curfman McInnes MatRelax_MPIAIJ, 1237b7c46309SBarry Smith MatTranspose_MPIAIJ, 1238a66be287SLois Curfman McInnes MatGetInfo_MPIAIJ,0, 1239855ac2c5SLois Curfman McInnes MatGetDiagonal_MPIAIJ,0,MatNorm_MPIAIJ, 1240ee50ffe9SBarry Smith MatAssemblyBegin_MPIAIJ,MatAssemblyEnd_MPIAIJ, 12411eb62cbbSBarry Smith 0, 1242299609e3SLois Curfman McInnes MatSetOption_MPIAIJ,MatZeroEntries_MPIAIJ,MatZeroRows_MPIAIJ, 1243299609e3SLois Curfman McInnes MatGetReordering_MPIAIJ, 1244299609e3SLois Curfman McInnes MatLUFactorSymbolic_MPIAIJ,MatLUFactorNumeric_MPIAIJ,0,0, 1245d1710a03SLois Curfman McInnes MatGetSize_MPIAIJ,MatGetLocalSize_MPIAIJ,MatGetOwnershipRange_MPIAIJ, 1246299609e3SLois Curfman McInnes MatILUFactorSymbolic_MPIAIJ,0, 1247ff7509f2SBarry Smith 0,0,MatConvert_MPIAIJ,0,0,MatCopyPrivate_MPIAIJ}; 12488a729477SBarry Smith 12498a729477SBarry Smith /*@ 1250ff756334SLois Curfman McInnes MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format 1251ff756334SLois Curfman McInnes (the default parallel PETSc format). 12528a729477SBarry Smith 12538a729477SBarry Smith Input Parameters: 12541eb62cbbSBarry Smith . comm - MPI communicator 12557d3e4905SLois Curfman McInnes . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 12567d3e4905SLois Curfman McInnes . n - number of local columns (or PETSC_DECIDE to have calculated 12577d3e4905SLois Curfman McInnes if N is given) 12587d3e4905SLois Curfman McInnes . M - number of global rows (or PETSC_DECIDE to have calculated if m is given) 12597d3e4905SLois Curfman McInnes . N - number of global columns (or PETSC_DECIDE to have calculated 12607d3e4905SLois Curfman McInnes if n is given) 1261ab693e5aSLois Curfman McInnes . d_nz - number of nonzeros per row in diagonal portion of local submatrix 1262ff756334SLois Curfman McInnes (same for all local rows) 1263ab693e5aSLois Curfman McInnes . d_nzz - number of nonzeros per row in diagonal portion of local submatrix 1264ab693e5aSLois Curfman McInnes or null (possibly different for each row). You must leave room 1265ab693e5aSLois Curfman McInnes for the diagonal entry even if it is zero. 1266ab693e5aSLois Curfman McInnes . o_nz - number of nonzeros per row in off-diagonal portion of local 1267ab693e5aSLois Curfman McInnes submatrix (same for all local rows). 1268ab693e5aSLois Curfman McInnes . o_nzz - number of nonzeros per row in off-diagonal portion of local 1269ab693e5aSLois Curfman McInnes submatrix or null (possibly different for each row). 12708a729477SBarry Smith 1271ff756334SLois Curfman McInnes Output Parameter: 12728a729477SBarry Smith . newmat - the matrix 12738a729477SBarry Smith 1274ff756334SLois Curfman McInnes Notes: 1275ff756334SLois Curfman McInnes The AIJ format (also called the Yale sparse matrix format or 1276ff756334SLois Curfman McInnes compressed row storage), is fully compatible with standard Fortran 77 1277ff756334SLois Curfman McInnes storage. That is, the stored row and column indices begin at 1278ab693e5aSLois Curfman McInnes one, not zero. See the users manual for further details. 1279ff756334SLois Curfman McInnes 1280ff756334SLois Curfman McInnes The user MUST specify either the local or global matrix dimensions 1281ff756334SLois Curfman McInnes (possibly both). 1282ff756334SLois Curfman McInnes 1283e0245417SLois Curfman McInnes Storage Information: 1284e0245417SLois Curfman McInnes For a square global matrix we define each processor's diagonal portion 1285e0245417SLois Curfman McInnes to be its local rows and the corresponding columns (a square submatrix); 1286e0245417SLois Curfman McInnes each processor's off-diagonal portion encompasses the remainder of the 1287e0245417SLois Curfman McInnes local matrix (a rectangular submatrix). 1288e0245417SLois Curfman McInnes 1289e0245417SLois Curfman McInnes The user can specify preallocated storage for the diagonal part of 1290e0245417SLois Curfman McInnes the local submatrix with either d_nz or d_nnz (not both). Set both 1291e0245417SLois Curfman McInnes d_nz and d_nnz to zero for PETSc to control dynamic memory allocation. 1292e0245417SLois Curfman McInnes Likewise, specify preallocated storage for the off-diagonal part of 1293e0245417SLois Curfman McInnes the local submatrix with o_nz or o_nnz (not both). 1294ff756334SLois Curfman McInnes 1295dbd7a890SLois Curfman McInnes .keywords: matrix, aij, compressed row, sparse, parallel 1296ff756334SLois Curfman McInnes 1297fafbff53SBarry Smith .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues() 12988a729477SBarry Smith @*/ 12991eb62cbbSBarry Smith int MatCreateMPIAIJ(MPI_Comm comm,int m,int n,int M,int N, 13001eb62cbbSBarry Smith int d_nz,int *d_nnz, int o_nz,int *o_nnz,Mat *newmat) 13018a729477SBarry Smith { 13028a729477SBarry Smith Mat mat; 130344a69424SLois Curfman McInnes Mat_MPIAIJ *aij; 13046abc6512SBarry Smith int ierr, i,sum[2],work[2]; 13058a729477SBarry Smith *newmat = 0; 130644a69424SLois Curfman McInnes PETSCHEADERCREATE(mat,_Mat,MAT_COOKIE,MATMPIAIJ,comm); 1307a5a9c739SBarry Smith PLogObjectCreate(mat); 130878b31e54SBarry Smith mat->data = (void *) (aij = PETSCNEW(Mat_MPIAIJ)); CHKPTRQ(aij); 1309*dbb450caSBarry Smith PETSCMEMCPY(&mat->ops,&MatOps,sizeof(struct _MatOps)); 131044a69424SLois Curfman McInnes mat->destroy = MatDestroy_MPIAIJ; 131144a69424SLois Curfman McInnes mat->view = MatView_MPIAIJ; 13128a729477SBarry Smith mat->factor = 0; 1313d6dfbf8fSBarry Smith 131407a0e7adSLois Curfman McInnes aij->insertmode = NOTSETVALUES; 13151eb62cbbSBarry Smith MPI_Comm_rank(comm,&aij->mytid); 13161eb62cbbSBarry Smith MPI_Comm_size(comm,&aij->numtids); 13171eb62cbbSBarry Smith 1318dbd7a890SLois Curfman McInnes if (M == PETSC_DECIDE || N == PETSC_DECIDE) { 13191eb62cbbSBarry Smith work[0] = m; work[1] = n; 1320d6dfbf8fSBarry Smith MPI_Allreduce((void *) work,(void *) sum,2,MPI_INT,MPI_SUM,comm ); 1321dbd7a890SLois Curfman McInnes if (M == PETSC_DECIDE) M = sum[0]; 1322dbd7a890SLois Curfman McInnes if (N == PETSC_DECIDE) N = sum[1]; 13231eb62cbbSBarry Smith } 1324dbd7a890SLois Curfman McInnes if (m == PETSC_DECIDE) 1325dbd7a890SLois Curfman McInnes {m = M/aij->numtids + ((M % aij->numtids) > aij->mytid);} 1326dbd7a890SLois Curfman McInnes if (n == PETSC_DECIDE) 1327dbd7a890SLois Curfman McInnes {n = N/aij->numtids + ((N % aij->numtids) > aij->mytid);} 13288a729477SBarry Smith aij->m = m; 13298a729477SBarry Smith aij->n = n; 13301eb62cbbSBarry Smith aij->N = N; 13311eb62cbbSBarry Smith aij->M = M; 13321eb62cbbSBarry Smith 13331eb62cbbSBarry Smith /* build local table of row and column ownerships */ 133478b31e54SBarry Smith aij->rowners = (int *) PETSCMALLOC(2*(aij->numtids+2)*sizeof(int)); 133578b31e54SBarry Smith CHKPTRQ(aij->rowners); 1336464493b3SBarry Smith PLogObjectMemory(mat,2*(aij->numtids+2)*sizeof(int)+sizeof(struct _Mat)+ 1337464493b3SBarry Smith sizeof(Mat_MPIAIJ)); 13381eb62cbbSBarry Smith aij->cowners = aij->rowners + aij->numtids + 1; 13391eb62cbbSBarry Smith MPI_Allgather(&m,1,MPI_INT,aij->rowners+1,1,MPI_INT,comm); 13401eb62cbbSBarry Smith aij->rowners[0] = 0; 13411eb62cbbSBarry Smith for ( i=2; i<=aij->numtids; i++ ) { 13421eb62cbbSBarry Smith aij->rowners[i] += aij->rowners[i-1]; 13438a729477SBarry Smith } 13441eb62cbbSBarry Smith aij->rstart = aij->rowners[aij->mytid]; 13451eb62cbbSBarry Smith aij->rend = aij->rowners[aij->mytid+1]; 13461eb62cbbSBarry Smith MPI_Allgather(&n,1,MPI_INT,aij->cowners+1,1,MPI_INT,comm); 13471eb62cbbSBarry Smith aij->cowners[0] = 0; 13481eb62cbbSBarry Smith for ( i=2; i<=aij->numtids; i++ ) { 13491eb62cbbSBarry Smith aij->cowners[i] += aij->cowners[i-1]; 13508a729477SBarry Smith } 13511eb62cbbSBarry Smith aij->cstart = aij->cowners[aij->mytid]; 13521eb62cbbSBarry Smith aij->cend = aij->cowners[aij->mytid+1]; 13538a729477SBarry Smith 1354fafbff53SBarry Smith ierr = MatCreateSeqAIJ(MPI_COMM_SELF,m,n,d_nz,d_nnz,&aij->A); 135578b31e54SBarry Smith CHKERRQ(ierr); 1356a5a9c739SBarry Smith PLogObjectParent(mat,aij->A); 1357fafbff53SBarry Smith ierr = MatCreateSeqAIJ(MPI_COMM_SELF,m,N,o_nz,o_nnz,&aij->B); 135878b31e54SBarry Smith CHKERRQ(ierr); 1359a5a9c739SBarry Smith PLogObjectParent(mat,aij->B); 13608a729477SBarry Smith 13611eb62cbbSBarry Smith /* build cache for off array entries formed */ 136278b31e54SBarry Smith ierr = StashBuild_Private(&aij->stash); CHKERRQ(ierr); 13639e25ed09SBarry Smith aij->colmap = 0; 13649e25ed09SBarry Smith aij->garray = 0; 13658a729477SBarry Smith 13661eb62cbbSBarry Smith /* stuff used for matrix vector multiply */ 13671eb62cbbSBarry Smith aij->lvec = 0; 13681eb62cbbSBarry Smith aij->Mvctx = 0; 1369d6dfbf8fSBarry Smith aij->assembled = 0; 13708a729477SBarry Smith 1371d6dfbf8fSBarry Smith *newmat = mat; 1372d6dfbf8fSBarry Smith return 0; 1373d6dfbf8fSBarry Smith } 1374c74985f6SBarry Smith 1375ff7509f2SBarry Smith static int MatCopyPrivate_MPIAIJ(Mat matin,Mat *newmat) 1376d6dfbf8fSBarry Smith { 1377d6dfbf8fSBarry Smith Mat mat; 137844a69424SLois Curfman McInnes Mat_MPIAIJ *aij,*oldmat = (Mat_MPIAIJ *) matin->data; 13792ee70a88SLois Curfman McInnes int ierr, len; 1380d6dfbf8fSBarry Smith *newmat = 0; 1381d6dfbf8fSBarry Smith 1382bbb6d6a8SBarry Smith if (!oldmat->assembled) 1383bbb6d6a8SBarry Smith SETERRQ(1,"MatCopyPrivate_MPIAIJ:Cannot copy unassembled matrix"); 138444a69424SLois Curfman McInnes PETSCHEADERCREATE(mat,_Mat,MAT_COOKIE,MATMPIAIJ,matin->comm); 1385a5a9c739SBarry Smith PLogObjectCreate(mat); 138678b31e54SBarry Smith mat->data = (void *) (aij = PETSCNEW(Mat_MPIAIJ)); CHKPTRQ(aij); 1387*dbb450caSBarry Smith PETSCMEMCPY(&mat->ops,&MatOps,sizeof(struct _MatOps)); 138844a69424SLois Curfman McInnes mat->destroy = MatDestroy_MPIAIJ; 138944a69424SLois Curfman McInnes mat->view = MatView_MPIAIJ; 1390d6dfbf8fSBarry Smith mat->factor = matin->factor; 1391d6dfbf8fSBarry Smith 1392d6dfbf8fSBarry Smith aij->m = oldmat->m; 1393d6dfbf8fSBarry Smith aij->n = oldmat->n; 1394d6dfbf8fSBarry Smith aij->M = oldmat->M; 1395d6dfbf8fSBarry Smith aij->N = oldmat->N; 1396d6dfbf8fSBarry Smith 1397d6dfbf8fSBarry Smith aij->assembled = 1; 1398d6dfbf8fSBarry Smith aij->rstart = oldmat->rstart; 1399d6dfbf8fSBarry Smith aij->rend = oldmat->rend; 1400d6dfbf8fSBarry Smith aij->cstart = oldmat->cstart; 1401d6dfbf8fSBarry Smith aij->cend = oldmat->cend; 1402d6dfbf8fSBarry Smith aij->numtids = oldmat->numtids; 1403d6dfbf8fSBarry Smith aij->mytid = oldmat->mytid; 140407a0e7adSLois Curfman McInnes aij->insertmode = NOTSETVALUES; 1405d6dfbf8fSBarry Smith 140678b31e54SBarry Smith aij->rowners = (int *) PETSCMALLOC( (aij->numtids+1)*sizeof(int) ); 140778b31e54SBarry Smith CHKPTRQ(aij->rowners); 1408464493b3SBarry Smith PLogObjectMemory(mat,(aij->numtids+1)*sizeof(int)+sizeof(struct _Mat)+ 1409464493b3SBarry Smith sizeof(Mat_MPIAIJ)); 141078b31e54SBarry Smith PETSCMEMCPY(aij->rowners,oldmat->rowners,(aij->numtids+1)*sizeof(int)); 141178b31e54SBarry Smith ierr = StashInitialize_Private(&aij->stash); CHKERRQ(ierr); 14122ee70a88SLois Curfman McInnes if (oldmat->colmap) { 141378b31e54SBarry Smith aij->colmap = (int *) PETSCMALLOC( (aij->N)*sizeof(int) ); 141478b31e54SBarry Smith CHKPTRQ(aij->colmap); 1415464493b3SBarry Smith PLogObjectMemory(mat,(aij->N)*sizeof(int)); 141678b31e54SBarry Smith PETSCMEMCPY(aij->colmap,oldmat->colmap,(aij->N)*sizeof(int)); 14172ee70a88SLois Curfman McInnes } else aij->colmap = 0; 1418ec8511deSBarry Smith if (oldmat->garray && (len = ((Mat_SeqAIJ *) (oldmat->B->data))->n)) { 141978b31e54SBarry Smith aij->garray = (int *) PETSCMALLOC(len*sizeof(int) ); CHKPTRQ(aij->garray); 1420464493b3SBarry Smith PLogObjectMemory(mat,len*sizeof(int)); 142178b31e54SBarry Smith PETSCMEMCPY(aij->garray,oldmat->garray,len*sizeof(int)); 14222ee70a88SLois Curfman McInnes } else aij->garray = 0; 1423d6dfbf8fSBarry Smith 142478b31e54SBarry Smith ierr = VecDuplicate(oldmat->lvec,&aij->lvec); CHKERRQ(ierr); 1425a5a9c739SBarry Smith PLogObjectParent(mat,aij->lvec); 142678b31e54SBarry Smith ierr = VecScatterCtxCopy(oldmat->Mvctx,&aij->Mvctx); CHKERRQ(ierr); 1427a5a9c739SBarry Smith PLogObjectParent(mat,aij->Mvctx); 142878b31e54SBarry Smith ierr = MatConvert(oldmat->A,MATSAME,&aij->A); CHKERRQ(ierr); 1429a5a9c739SBarry Smith PLogObjectParent(mat,aij->A); 143078b31e54SBarry Smith ierr = MatConvert(oldmat->B,MATSAME,&aij->B); CHKERRQ(ierr); 1431a5a9c739SBarry Smith PLogObjectParent(mat,aij->B); 14328a729477SBarry Smith *newmat = mat; 14338a729477SBarry Smith return 0; 14348a729477SBarry Smith } 1435