1cb512458SBarry Smith #ifndef lint 2*bbb6d6a8SBarry Smith static char vcid[] = "$Id: mpiaij.c,v 1.60 1995/07/13 16:15:46 curfman Exp bsmith $"; 3cb512458SBarry Smith #endif 48a729477SBarry Smith 51eb62cbbSBarry Smith #include "mpiaij.h" 68a729477SBarry Smith #include "vec/vecimpl.h" 7d6dfbf8fSBarry Smith #include "inline/spops.h" 88a729477SBarry Smith 99e25ed09SBarry Smith /* local utility routine that creates a mapping from the global column 109e25ed09SBarry Smith number to the local number in the off-diagonal part of the local 119e25ed09SBarry Smith storage of the matrix. This is done in a non scable way since the 129e25ed09SBarry Smith length of colmap equals the global matrix length. 139e25ed09SBarry Smith */ 14b7c46309SBarry Smith static int CreateColmap_Private(Mat mat) 159e25ed09SBarry Smith { 1644a69424SLois Curfman McInnes Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 1744a69424SLois Curfman McInnes Mat_AIJ *B = (Mat_AIJ*) aij->B->data; 189e25ed09SBarry Smith int n = B->n,i; 1978b31e54SBarry Smith aij->colmap = (int *) PETSCMALLOC(aij->N*sizeof(int));CHKPTRQ(aij->colmap); 2078b31e54SBarry Smith PETSCMEMSET(aij->colmap,0,aij->N*sizeof(int)); 21abc0e9e4SLois Curfman McInnes for ( i=0; i<n; i++ ) aij->colmap[aij->garray[i]] = i+1; 229e25ed09SBarry Smith return 0; 239e25ed09SBarry Smith } 249e25ed09SBarry Smith 252493cbb0SBarry Smith extern int DisAssemble_MPIAIJ(Mat); 262493cbb0SBarry Smith 2751c98ccdSLois Curfman McInnes static int MatGetReordering_MPIAIJ(Mat mat,MatOrdering type,IS *rperm,IS *cperm) 28299609e3SLois Curfman McInnes { 29299609e3SLois Curfman McInnes Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 30299609e3SLois Curfman McInnes int ierr; 31299609e3SLois Curfman McInnes if (aij->numtids == 1) { 3251c98ccdSLois Curfman McInnes ierr = MatGetReordering(aij->A,type,rperm,cperm); CHKERRQ(ierr); 33299609e3SLois Curfman McInnes } else 34*bbb6d6a8SBarry Smith SETERRQ(1,"MatGetReordering_MPIAIJ: not yet supported in parallel"); 35299609e3SLois Curfman McInnes return 0; 36299609e3SLois Curfman McInnes } 37299609e3SLois Curfman McInnes 382ee70a88SLois Curfman McInnes static int MatSetValues_MPIAIJ(Mat mat,int m,int *idxm,int n, 391eb62cbbSBarry Smith int *idxn,Scalar *v,InsertMode addv) 408a729477SBarry Smith { 4144a69424SLois Curfman McInnes Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 421eb62cbbSBarry Smith int ierr,i,j, rstart = aij->rstart, rend = aij->rend; 431eb62cbbSBarry Smith int cstart = aij->cstart, cend = aij->cend,row,col; 448a729477SBarry Smith 4507a0e7adSLois Curfman McInnes if (aij->insertmode != NOTSETVALUES && aij->insertmode != addv) { 46*bbb6d6a8SBarry Smith SETERRQ(1,"MatSetValues_MPIAIJ:You cannot mix inserts and adds"); 478a729477SBarry Smith } 481eb62cbbSBarry Smith aij->insertmode = addv; 498a729477SBarry Smith for ( i=0; i<m; i++ ) { 50*bbb6d6a8SBarry Smith if (idxm[i] < 0) SETERRQ(1,"MatSetValues_MPIAIJ:Negative row"); 51*bbb6d6a8SBarry Smith if (idxm[i] >= aij->M) SETERRQ(1,"MatSetValues_MPIAIJ:Row too large"); 521eb62cbbSBarry Smith if (idxm[i] >= rstart && idxm[i] < rend) { 531eb62cbbSBarry Smith row = idxm[i] - rstart; 541eb62cbbSBarry Smith for ( j=0; j<n; j++ ) { 55*bbb6d6a8SBarry Smith if (idxn[j] < 0) SETERRQ(1,"MatSetValues_MPIAIJ:Negative column"); 56*bbb6d6a8SBarry Smith if (idxn[j] >= aij->N) SETERRQ(1,"MatSetValues_MPIAIJ:Col too large"); 571eb62cbbSBarry Smith if (idxn[j] >= cstart && idxn[j] < cend){ 581eb62cbbSBarry Smith col = idxn[j] - cstart; 5978b31e54SBarry Smith ierr = MatSetValues(aij->A,1,&row,1,&col,v+i*n+j,addv);CHKERRQ(ierr); 601eb62cbbSBarry Smith } 611eb62cbbSBarry Smith else { 62d6dfbf8fSBarry Smith if (aij->assembled) { 63b7c46309SBarry Smith if (!aij->colmap) {ierr = CreateColmap_Private(mat);CHKERRQ(ierr);} 649e25ed09SBarry Smith col = aij->colmap[idxn[j]] - 1; 65b7c46309SBarry Smith if (col < 0 && !((Mat_AIJ*)(aij->A->data))->nonew) { 662493cbb0SBarry Smith ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr); 672493cbb0SBarry Smith col = idxn[j]; 68d6dfbf8fSBarry Smith } 699e25ed09SBarry Smith } 709e25ed09SBarry Smith else col = idxn[j]; 7178b31e54SBarry Smith ierr = MatSetValues(aij->B,1,&row,1,&col,v+i*n+j,addv);CHKERRQ(ierr); 721eb62cbbSBarry Smith } 731eb62cbbSBarry Smith } 741eb62cbbSBarry Smith } 751eb62cbbSBarry Smith else { 76dbd7a890SLois Curfman McInnes ierr = StashValues_Private(&aij->stash,idxm[i],n,idxn,v+i*n,addv); 7778b31e54SBarry Smith CHKERRQ(ierr); 781eb62cbbSBarry Smith } 798a729477SBarry Smith } 808a729477SBarry Smith return 0; 818a729477SBarry Smith } 828a729477SBarry Smith 838a729477SBarry Smith /* 841eb62cbbSBarry Smith the assembly code is a lot like the code for vectors, we should 851eb62cbbSBarry Smith sometime derive a single assembly code that can be used for 861eb62cbbSBarry Smith either case. 878a729477SBarry Smith */ 888a729477SBarry Smith 89fc76ce87SLois Curfman McInnes static int MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode) 908a729477SBarry Smith { 9144a69424SLois Curfman McInnes Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 92d6dfbf8fSBarry Smith MPI_Comm comm = mat->comm; 936abc6512SBarry Smith int numtids = aij->numtids, *owners = aij->rowners; 941eb62cbbSBarry Smith int mytid = aij->mytid; 951eb62cbbSBarry Smith MPI_Request *send_waits,*recv_waits; 966abc6512SBarry Smith int *nprocs,i,j,idx,*procs,nsends,nreceives,nmax,*work; 975392566eSBarry Smith int tag = mat->tag, *owner,*starts,count,ierr; 981eb62cbbSBarry Smith InsertMode addv; 991eb62cbbSBarry Smith Scalar *rvalues,*svalues; 1001eb62cbbSBarry Smith 1011eb62cbbSBarry Smith /* make sure all processors are either in INSERTMODE or ADDMODE */ 10228988994SBarry Smith MPI_Allreduce((void *) &aij->insertmode,(void *) &addv,1,MPI_INT, 1031eb62cbbSBarry Smith MPI_BOR,comm); 1049d00d63dSBarry Smith if (addv == (ADDVALUES|INSERTVALUES)) { 105*bbb6d6a8SBarry Smith SETERRQ(1,"MatAssemblyBegin_MPIAIJ:Some processors inserted others added"); 1061eb62cbbSBarry Smith } 1071eb62cbbSBarry Smith aij->insertmode = addv; /* in case this processor had no cache */ 1081eb62cbbSBarry Smith 1091eb62cbbSBarry Smith /* first count number of contributors to each processor */ 11078b31e54SBarry Smith nprocs = (int *) PETSCMALLOC( 2*numtids*sizeof(int) ); CHKPTRQ(nprocs); 11178b31e54SBarry Smith PETSCMEMSET(nprocs,0,2*numtids*sizeof(int)); procs = nprocs + numtids; 11278b31e54SBarry Smith owner = (int *) PETSCMALLOC( (aij->stash.n+1)*sizeof(int) ); CHKPTRQ(owner); 1131eb62cbbSBarry Smith for ( i=0; i<aij->stash.n; i++ ) { 1141eb62cbbSBarry Smith idx = aij->stash.idx[i]; 1151eb62cbbSBarry Smith for ( j=0; j<numtids; j++ ) { 1161eb62cbbSBarry Smith if (idx >= owners[j] && idx < owners[j+1]) { 1171eb62cbbSBarry Smith nprocs[j]++; procs[j] = 1; owner[i] = j; break; 1188a729477SBarry Smith } 1198a729477SBarry Smith } 1208a729477SBarry Smith } 1211eb62cbbSBarry Smith nsends = 0; for ( i=0; i<numtids; i++ ) { nsends += procs[i];} 1221eb62cbbSBarry Smith 1231eb62cbbSBarry Smith /* inform other processors of number of messages and max length*/ 12478b31e54SBarry Smith work = (int *) PETSCMALLOC( numtids*sizeof(int) ); CHKPTRQ(work); 1251eb62cbbSBarry Smith MPI_Allreduce((void *) procs,(void *) work,numtids,MPI_INT,MPI_SUM,comm); 1261eb62cbbSBarry Smith nreceives = work[mytid]; 1271eb62cbbSBarry Smith MPI_Allreduce((void *) nprocs,(void *) work,numtids,MPI_INT,MPI_MAX,comm); 1281eb62cbbSBarry Smith nmax = work[mytid]; 12978b31e54SBarry Smith PETSCFREE(work); 1301eb62cbbSBarry Smith 1311eb62cbbSBarry Smith /* post receives: 1321eb62cbbSBarry Smith 1) each message will consist of ordered pairs 1331eb62cbbSBarry Smith (global index,value) we store the global index as a double 134d6dfbf8fSBarry Smith to simplify the message passing. 1351eb62cbbSBarry Smith 2) since we don't know how long each individual message is we 1361eb62cbbSBarry Smith allocate the largest needed buffer for each receive. Potentially 1371eb62cbbSBarry Smith this is a lot of wasted space. 1381eb62cbbSBarry Smith 1391eb62cbbSBarry Smith 1401eb62cbbSBarry Smith This could be done better. 1411eb62cbbSBarry Smith */ 14278b31e54SBarry Smith rvalues = (Scalar *) PETSCMALLOC(3*(nreceives+1)*(nmax+1)*sizeof(Scalar)); 14378b31e54SBarry Smith CHKPTRQ(rvalues); 14478b31e54SBarry Smith recv_waits = (MPI_Request *) PETSCMALLOC((nreceives+1)*sizeof(MPI_Request)); 14578b31e54SBarry Smith CHKPTRQ(recv_waits); 1461eb62cbbSBarry Smith for ( i=0; i<nreceives; i++ ) { 147c60448a5SBarry Smith MPI_Irecv((void *)(rvalues+3*nmax*i),3*nmax,MPIU_SCALAR,MPI_ANY_SOURCE,tag, 1481eb62cbbSBarry Smith comm,recv_waits+i); 1491eb62cbbSBarry Smith } 1501eb62cbbSBarry Smith 1511eb62cbbSBarry Smith /* do sends: 1521eb62cbbSBarry Smith 1) starts[i] gives the starting index in svalues for stuff going to 1531eb62cbbSBarry Smith the ith processor 1541eb62cbbSBarry Smith */ 15578b31e54SBarry Smith svalues = (Scalar *) PETSCMALLOC( 3*(aij->stash.n+1)*sizeof(Scalar) ); 15678b31e54SBarry Smith CHKPTRQ(svalues); 15778b31e54SBarry Smith send_waits = (MPI_Request *) PETSCMALLOC( (nsends+1)*sizeof(MPI_Request)); 15878b31e54SBarry Smith CHKPTRQ(send_waits); 15978b31e54SBarry Smith starts = (int *) PETSCMALLOC( numtids*sizeof(int) ); CHKPTRQ(starts); 1601eb62cbbSBarry Smith starts[0] = 0; 1611eb62cbbSBarry Smith for ( i=1; i<numtids; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 1621eb62cbbSBarry Smith for ( i=0; i<aij->stash.n; i++ ) { 1631eb62cbbSBarry Smith svalues[3*starts[owner[i]]] = (Scalar) aij->stash.idx[i]; 1641eb62cbbSBarry Smith svalues[3*starts[owner[i]]+1] = (Scalar) aij->stash.idy[i]; 1651eb62cbbSBarry Smith svalues[3*(starts[owner[i]]++)+2] = aij->stash.array[i]; 1661eb62cbbSBarry Smith } 16778b31e54SBarry Smith PETSCFREE(owner); 1681eb62cbbSBarry Smith starts[0] = 0; 1691eb62cbbSBarry Smith for ( i=1; i<numtids; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 1701eb62cbbSBarry Smith count = 0; 1711eb62cbbSBarry Smith for ( i=0; i<numtids; i++ ) { 1721eb62cbbSBarry Smith if (procs[i]) { 173c60448a5SBarry Smith MPI_Isend((void*)(svalues+3*starts[i]),3*nprocs[i],MPIU_SCALAR,i,tag, 1741eb62cbbSBarry Smith comm,send_waits+count++); 1751eb62cbbSBarry Smith } 1761eb62cbbSBarry Smith } 17778b31e54SBarry Smith PETSCFREE(starts); PETSCFREE(nprocs); 1781eb62cbbSBarry Smith 1791eb62cbbSBarry Smith /* Free cache space */ 18078b31e54SBarry Smith ierr = StashDestroy_Private(&aij->stash); CHKERRQ(ierr); 1811eb62cbbSBarry Smith 1821eb62cbbSBarry Smith aij->svalues = svalues; aij->rvalues = rvalues; 1831eb62cbbSBarry Smith aij->nsends = nsends; aij->nrecvs = nreceives; 1841eb62cbbSBarry Smith aij->send_waits = send_waits; aij->recv_waits = recv_waits; 1851eb62cbbSBarry Smith aij->rmax = nmax; 1861eb62cbbSBarry Smith 1871eb62cbbSBarry Smith return 0; 1881eb62cbbSBarry Smith } 18944a69424SLois Curfman McInnes extern int MatSetUpMultiply_MPIAIJ(Mat); 1901eb62cbbSBarry Smith 191fc76ce87SLois Curfman McInnes static int MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode) 1921eb62cbbSBarry Smith { 1931eb62cbbSBarry Smith int ierr; 19444a69424SLois Curfman McInnes Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 1951eb62cbbSBarry Smith MPI_Status *send_status,recv_status; 1966abc6512SBarry Smith int imdex,nrecvs = aij->nrecvs, count = nrecvs, i, n; 1972493cbb0SBarry Smith int row,col,other_disassembled; 1981eb62cbbSBarry Smith Scalar *values,val; 1991eb62cbbSBarry Smith InsertMode addv = aij->insertmode; 2001eb62cbbSBarry Smith 2011eb62cbbSBarry Smith /* wait on receives */ 2021eb62cbbSBarry Smith while (count) { 203d6dfbf8fSBarry Smith MPI_Waitany(nrecvs,aij->recv_waits,&imdex,&recv_status); 2041eb62cbbSBarry Smith /* unpack receives into our local space */ 205d6dfbf8fSBarry Smith values = aij->rvalues + 3*imdex*aij->rmax; 206c60448a5SBarry Smith MPI_Get_count(&recv_status,MPIU_SCALAR,&n); 2071eb62cbbSBarry Smith n = n/3; 2081eb62cbbSBarry Smith for ( i=0; i<n; i++ ) { 2091eb62cbbSBarry Smith row = (int) PETSCREAL(values[3*i]) - aij->rstart; 2101eb62cbbSBarry Smith col = (int) PETSCREAL(values[3*i+1]); 2111eb62cbbSBarry Smith val = values[3*i+2]; 2121eb62cbbSBarry Smith if (col >= aij->cstart && col < aij->cend) { 2131eb62cbbSBarry Smith col -= aij->cstart; 2141eb62cbbSBarry Smith MatSetValues(aij->A,1,&row,1,&col,&val,addv); 2151eb62cbbSBarry Smith } 2161eb62cbbSBarry Smith else { 217d6dfbf8fSBarry Smith if (aij->assembled) { 218b7c46309SBarry Smith if (!aij->colmap) {ierr = CreateColmap_Private(mat);CHKERRQ(ierr);} 2199e25ed09SBarry Smith col = aij->colmap[col] - 1; 220b7c46309SBarry Smith if (col < 0 && !((Mat_AIJ*)(aij->A->data))->nonew) { 2212493cbb0SBarry Smith ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr); 2222493cbb0SBarry Smith col = (int) PETSCREAL(values[3*i+1]); 223d6dfbf8fSBarry Smith } 2249e25ed09SBarry Smith } 2251eb62cbbSBarry Smith MatSetValues(aij->B,1,&row,1,&col,&val,addv); 2261eb62cbbSBarry Smith } 2271eb62cbbSBarry Smith } 2281eb62cbbSBarry Smith count--; 2291eb62cbbSBarry Smith } 23078b31e54SBarry Smith PETSCFREE(aij->recv_waits); PETSCFREE(aij->rvalues); 2311eb62cbbSBarry Smith 2321eb62cbbSBarry Smith /* wait on sends */ 2331eb62cbbSBarry Smith if (aij->nsends) { 23478b31e54SBarry Smith send_status = (MPI_Status *) PETSCMALLOC( aij->nsends*sizeof(MPI_Status) ); 23578b31e54SBarry Smith CHKPTRQ(send_status); 2361eb62cbbSBarry Smith MPI_Waitall(aij->nsends,aij->send_waits,send_status); 23778b31e54SBarry Smith PETSCFREE(send_status); 2381eb62cbbSBarry Smith } 23978b31e54SBarry Smith PETSCFREE(aij->send_waits); PETSCFREE(aij->svalues); 2401eb62cbbSBarry Smith 24107a0e7adSLois Curfman McInnes aij->insertmode = NOTSETVALUES; 24278b31e54SBarry Smith ierr = MatAssemblyBegin(aij->A,mode); CHKERRQ(ierr); 24378b31e54SBarry Smith ierr = MatAssemblyEnd(aij->A,mode); CHKERRQ(ierr); 2441eb62cbbSBarry Smith 2452493cbb0SBarry Smith /* determine if any processor has disassembled, if so we must 2462493cbb0SBarry Smith also disassemble ourselfs, in order that we may reassemble. */ 2472493cbb0SBarry Smith MPI_Allreduce((void *) &aij->assembled,(void *) &other_disassembled,1, 2482493cbb0SBarry Smith MPI_INT,MPI_PROD,mat->comm); 2492493cbb0SBarry Smith if (aij->assembled && !other_disassembled) { 2502493cbb0SBarry Smith ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr); 2512493cbb0SBarry Smith } 2522493cbb0SBarry Smith 2535e42470aSBarry Smith if (!aij->assembled && mode == FINAL_ASSEMBLY) { 25478b31e54SBarry Smith ierr = MatSetUpMultiply_MPIAIJ(mat); CHKERRQ(ierr); 255d6dfbf8fSBarry Smith aij->assembled = 1; 2565e42470aSBarry Smith } 25778b31e54SBarry Smith ierr = MatAssemblyBegin(aij->B,mode); CHKERRQ(ierr); 25878b31e54SBarry Smith ierr = MatAssemblyEnd(aij->B,mode); CHKERRQ(ierr); 2595e42470aSBarry Smith 2608a729477SBarry Smith return 0; 2618a729477SBarry Smith } 2628a729477SBarry Smith 2632ee70a88SLois Curfman McInnes static int MatZeroEntries_MPIAIJ(Mat A) 2641eb62cbbSBarry Smith { 26544a69424SLois Curfman McInnes Mat_MPIAIJ *l = (Mat_MPIAIJ *) A->data; 266dbd7a890SLois Curfman McInnes int ierr; 26778b31e54SBarry Smith ierr = MatZeroEntries(l->A); CHKERRQ(ierr); 26878b31e54SBarry Smith ierr = MatZeroEntries(l->B); CHKERRQ(ierr); 2691eb62cbbSBarry Smith return 0; 2701eb62cbbSBarry Smith } 2711eb62cbbSBarry Smith 2721eb62cbbSBarry Smith /* again this uses the same basic stratagy as in the assembly and 2731eb62cbbSBarry Smith scatter create routines, we should try to do it systemamatically 2741eb62cbbSBarry Smith if we can figure out the proper level of generality. */ 2751eb62cbbSBarry Smith 2761eb62cbbSBarry Smith /* the code does not do the diagonal entries correctly unless the 2771eb62cbbSBarry Smith matrix is square and the column and row owerships are identical. 2781eb62cbbSBarry Smith This is a BUG. The only way to fix it seems to be to access 2791eb62cbbSBarry Smith aij->A and aij->B directly and not through the MatZeroRows() 2801eb62cbbSBarry Smith routine. 2811eb62cbbSBarry Smith */ 28244a69424SLois Curfman McInnes static int MatZeroRows_MPIAIJ(Mat A,IS is,Scalar *diag) 2831eb62cbbSBarry Smith { 28444a69424SLois Curfman McInnes Mat_MPIAIJ *l = (Mat_MPIAIJ *) A->data; 2851eb62cbbSBarry Smith int i,ierr,N, *rows,*owners = l->rowners,numtids = l->numtids; 2866abc6512SBarry Smith int *procs,*nprocs,j,found,idx,nsends,*work; 2871eb62cbbSBarry Smith int nmax,*svalues,*starts,*owner,nrecvs,mytid = l->mytid; 2885392566eSBarry Smith int *rvalues,tag = A->tag,count,base,slen,n,*source; 289d6dfbf8fSBarry Smith int *lens,imdex,*lrows,*values; 290d6dfbf8fSBarry Smith MPI_Comm comm = A->comm; 2911eb62cbbSBarry Smith MPI_Request *send_waits,*recv_waits; 2921eb62cbbSBarry Smith MPI_Status recv_status,*send_status; 2931eb62cbbSBarry Smith IS istmp; 2941eb62cbbSBarry Smith 295c8f67822SLois Curfman McInnes if (!l->assembled) 29678b31e54SBarry Smith SETERRQ(1,"MatZeroRows_MPIAIJ:Must assemble matrix first"); 29778b31e54SBarry Smith ierr = ISGetLocalSize(is,&N); CHKERRQ(ierr); 29878b31e54SBarry Smith ierr = ISGetIndices(is,&rows); CHKERRQ(ierr); 2991eb62cbbSBarry Smith 3001eb62cbbSBarry Smith /* first count number of contributors to each processor */ 30178b31e54SBarry Smith nprocs = (int *) PETSCMALLOC( 2*numtids*sizeof(int) ); CHKPTRQ(nprocs); 30278b31e54SBarry Smith PETSCMEMSET(nprocs,0,2*numtids*sizeof(int)); procs = nprocs + numtids; 30378b31e54SBarry Smith owner = (int *) PETSCMALLOC((N+1)*sizeof(int)); CHKPTRQ(owner); /* see note*/ 3041eb62cbbSBarry Smith for ( i=0; i<N; i++ ) { 3051eb62cbbSBarry Smith idx = rows[i]; 3061eb62cbbSBarry Smith found = 0; 3071eb62cbbSBarry Smith for ( j=0; j<numtids; j++ ) { 3081eb62cbbSBarry Smith if (idx >= owners[j] && idx < owners[j+1]) { 3091eb62cbbSBarry Smith nprocs[j]++; procs[j] = 1; owner[i] = j; found = 1; break; 3101eb62cbbSBarry Smith } 3111eb62cbbSBarry Smith } 312*bbb6d6a8SBarry Smith if (!found) SETERRQ(1,"MatZeroRows_MPIAIJ:Index out of range"); 3131eb62cbbSBarry Smith } 3141eb62cbbSBarry Smith nsends = 0; for ( i=0; i<numtids; i++ ) { nsends += procs[i];} 3151eb62cbbSBarry Smith 3161eb62cbbSBarry Smith /* inform other processors of number of messages and max length*/ 31778b31e54SBarry Smith work = (int *) PETSCMALLOC( numtids*sizeof(int) ); CHKPTRQ(work); 3181eb62cbbSBarry Smith MPI_Allreduce((void *) procs,(void *) work,numtids,MPI_INT,MPI_SUM,comm); 3191eb62cbbSBarry Smith nrecvs = work[mytid]; 3201eb62cbbSBarry Smith MPI_Allreduce((void *) nprocs,(void *) work,numtids,MPI_INT,MPI_MAX,comm); 3211eb62cbbSBarry Smith nmax = work[mytid]; 32278b31e54SBarry Smith PETSCFREE(work); 3231eb62cbbSBarry Smith 3241eb62cbbSBarry Smith /* post receives: */ 32578b31e54SBarry Smith rvalues = (int *) PETSCMALLOC((nrecvs+1)*(nmax+1)*sizeof(int)); /*see note */ 32678b31e54SBarry Smith CHKPTRQ(rvalues); 32778b31e54SBarry Smith recv_waits = (MPI_Request *) PETSCMALLOC((nrecvs+1)*sizeof(MPI_Request)); 32878b31e54SBarry Smith CHKPTRQ(recv_waits); 3291eb62cbbSBarry Smith for ( i=0; i<nrecvs; i++ ) { 3301eb62cbbSBarry Smith MPI_Irecv((void *)(rvalues+nmax*i),nmax,MPI_INT,MPI_ANY_SOURCE,tag, 3311eb62cbbSBarry Smith comm,recv_waits+i); 3321eb62cbbSBarry Smith } 3331eb62cbbSBarry Smith 3341eb62cbbSBarry Smith /* do sends: 3351eb62cbbSBarry Smith 1) starts[i] gives the starting index in svalues for stuff going to 3361eb62cbbSBarry Smith the ith processor 3371eb62cbbSBarry Smith */ 33878b31e54SBarry Smith svalues = (int *) PETSCMALLOC( (N+1)*sizeof(int) ); CHKPTRQ(svalues); 33978b31e54SBarry Smith send_waits = (MPI_Request *) PETSCMALLOC( (nsends+1)*sizeof(MPI_Request)); 34078b31e54SBarry Smith CHKPTRQ(send_waits); 34178b31e54SBarry Smith starts = (int *) PETSCMALLOC( (numtids+1)*sizeof(int) ); CHKPTRQ(starts); 3421eb62cbbSBarry Smith starts[0] = 0; 3431eb62cbbSBarry Smith for ( i=1; i<numtids; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 3441eb62cbbSBarry Smith for ( i=0; i<N; i++ ) { 3451eb62cbbSBarry Smith svalues[starts[owner[i]]++] = rows[i]; 3461eb62cbbSBarry Smith } 3471eb62cbbSBarry Smith ISRestoreIndices(is,&rows); 3481eb62cbbSBarry Smith 3491eb62cbbSBarry Smith starts[0] = 0; 3501eb62cbbSBarry Smith for ( i=1; i<numtids+1; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 3511eb62cbbSBarry Smith count = 0; 3521eb62cbbSBarry Smith for ( i=0; i<numtids; i++ ) { 3531eb62cbbSBarry Smith if (procs[i]) { 3541eb62cbbSBarry Smith MPI_Isend((void*)(svalues+starts[i]),nprocs[i],MPI_INT,i,tag, 3551eb62cbbSBarry Smith comm,send_waits+count++); 3561eb62cbbSBarry Smith } 3571eb62cbbSBarry Smith } 35878b31e54SBarry Smith PETSCFREE(starts); 3591eb62cbbSBarry Smith 3601eb62cbbSBarry Smith base = owners[mytid]; 3611eb62cbbSBarry Smith 3621eb62cbbSBarry Smith /* wait on receives */ 36378b31e54SBarry Smith lens = (int *) PETSCMALLOC( 2*(nrecvs+1)*sizeof(int) ); CHKPTRQ(lens); 3641eb62cbbSBarry Smith source = lens + nrecvs; 3651eb62cbbSBarry Smith count = nrecvs; slen = 0; 3661eb62cbbSBarry Smith while (count) { 367d6dfbf8fSBarry Smith MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status); 3681eb62cbbSBarry Smith /* unpack receives into our local space */ 3691eb62cbbSBarry Smith MPI_Get_count(&recv_status,MPI_INT,&n); 370d6dfbf8fSBarry Smith source[imdex] = recv_status.MPI_SOURCE; 371d6dfbf8fSBarry Smith lens[imdex] = n; 3721eb62cbbSBarry Smith slen += n; 3731eb62cbbSBarry Smith count--; 3741eb62cbbSBarry Smith } 37578b31e54SBarry Smith PETSCFREE(recv_waits); 3761eb62cbbSBarry Smith 3771eb62cbbSBarry Smith /* move the data into the send scatter */ 37878b31e54SBarry Smith lrows = (int *) PETSCMALLOC( (slen+1)*sizeof(int) ); CHKPTRQ(lrows); 3791eb62cbbSBarry Smith count = 0; 3801eb62cbbSBarry Smith for ( i=0; i<nrecvs; i++ ) { 3811eb62cbbSBarry Smith values = rvalues + i*nmax; 3821eb62cbbSBarry Smith for ( j=0; j<lens[i]; j++ ) { 3831eb62cbbSBarry Smith lrows[count++] = values[j] - base; 3841eb62cbbSBarry Smith } 3851eb62cbbSBarry Smith } 38678b31e54SBarry Smith PETSCFREE(rvalues); PETSCFREE(lens); 38778b31e54SBarry Smith PETSCFREE(owner); PETSCFREE(nprocs); 3881eb62cbbSBarry Smith 3891eb62cbbSBarry Smith /* actually zap the local rows */ 3906b5873e3SBarry Smith ierr = ISCreateSequential(MPI_COMM_SELF,slen,lrows,&istmp); 39178b31e54SBarry Smith CHKERRQ(ierr); PETSCFREE(lrows); 39278b31e54SBarry Smith ierr = MatZeroRows(l->A,istmp,diag); CHKERRQ(ierr); 39378b31e54SBarry Smith ierr = MatZeroRows(l->B,istmp,0); CHKERRQ(ierr); 39478b31e54SBarry Smith ierr = ISDestroy(istmp); CHKERRQ(ierr); 3951eb62cbbSBarry Smith 3961eb62cbbSBarry Smith /* wait on sends */ 3971eb62cbbSBarry Smith if (nsends) { 39878b31e54SBarry Smith send_status = (MPI_Status *) PETSCMALLOC( nsends*sizeof(MPI_Status) ); 39978b31e54SBarry Smith CHKPTRQ(send_status); 4001eb62cbbSBarry Smith MPI_Waitall(nsends,send_waits,send_status); 40178b31e54SBarry Smith PETSCFREE(send_status); 4021eb62cbbSBarry Smith } 40378b31e54SBarry Smith PETSCFREE(send_waits); PETSCFREE(svalues); 4041eb62cbbSBarry Smith 4051eb62cbbSBarry Smith 4061eb62cbbSBarry Smith return 0; 4071eb62cbbSBarry Smith } 4081eb62cbbSBarry Smith 40944a69424SLois Curfman McInnes static int MatMult_MPIAIJ(Mat aijin,Vec xx,Vec yy) 4101eb62cbbSBarry Smith { 41144a69424SLois Curfman McInnes Mat_MPIAIJ *aij = (Mat_MPIAIJ *) aijin->data; 4121eb62cbbSBarry Smith int ierr; 41378b31e54SBarry Smith if (!aij->assembled) SETERRQ(1,"MatMult_MPIAIJ:must assemble matrix first"); 41406be10caSBarry Smith ierr = VecScatterBegin(xx,aij->lvec,INSERTVALUES,SCATTERALL,aij->Mvctx); 41578b31e54SBarry Smith CHKERRQ(ierr); 41678b31e54SBarry Smith ierr = MatMult(aij->A,xx,yy); CHKERRQ(ierr); 41706be10caSBarry Smith ierr = VecScatterEnd(xx,aij->lvec,INSERTVALUES,SCATTERALL,aij->Mvctx); 41878b31e54SBarry Smith CHKERRQ(ierr); 41978b31e54SBarry Smith ierr = MatMultAdd(aij->B,aij->lvec,yy,yy); CHKERRQ(ierr); 4201eb62cbbSBarry Smith return 0; 4211eb62cbbSBarry Smith } 4221eb62cbbSBarry Smith 42344a69424SLois Curfman McInnes static int MatMultAdd_MPIAIJ(Mat aijin,Vec xx,Vec yy,Vec zz) 424da3a660dSBarry Smith { 42544a69424SLois Curfman McInnes Mat_MPIAIJ *aij = (Mat_MPIAIJ *) aijin->data; 426da3a660dSBarry Smith int ierr; 42778b31e54SBarry Smith if (!aij->assembled) SETERRQ(1,"MatMult_MPIAIJ:must assemble matrix first"); 42806be10caSBarry Smith ierr = VecScatterBegin(xx,aij->lvec,INSERTVALUES,SCATTERALL,aij->Mvctx); 42978b31e54SBarry Smith CHKERRQ(ierr); 43078b31e54SBarry Smith ierr = MatMultAdd(aij->A,xx,yy,zz); CHKERRQ(ierr); 43106be10caSBarry Smith ierr = VecScatterEnd(xx,aij->lvec,INSERTVALUES,SCATTERALL,aij->Mvctx); 43278b31e54SBarry Smith CHKERRQ(ierr); 43378b31e54SBarry Smith ierr = MatMultAdd(aij->B,aij->lvec,zz,zz); CHKERRQ(ierr); 434da3a660dSBarry Smith return 0; 435da3a660dSBarry Smith } 436da3a660dSBarry Smith 43744a69424SLois Curfman McInnes static int MatMultTrans_MPIAIJ(Mat aijin,Vec xx,Vec yy) 438da3a660dSBarry Smith { 43944a69424SLois Curfman McInnes Mat_MPIAIJ *aij = (Mat_MPIAIJ *) aijin->data; 440da3a660dSBarry Smith int ierr; 441da3a660dSBarry Smith 44244a69424SLois Curfman McInnes if (!aij->assembled) 44378b31e54SBarry Smith SETERRQ(1,"MatMulTrans_MPIAIJ:must assemble matrix first"); 444da3a660dSBarry Smith /* do nondiagonal part */ 44578b31e54SBarry Smith ierr = MatMultTrans(aij->B,xx,aij->lvec); CHKERRQ(ierr); 446da3a660dSBarry Smith /* send it on its way */ 44706be10caSBarry Smith ierr = VecScatterBegin(aij->lvec,yy,ADDVALUES, 44878b31e54SBarry Smith (ScatterMode)(SCATTERALL|SCATTERREVERSE),aij->Mvctx); CHKERRQ(ierr); 449da3a660dSBarry Smith /* do local part */ 45078b31e54SBarry Smith ierr = MatMultTrans(aij->A,xx,yy); CHKERRQ(ierr); 451da3a660dSBarry Smith /* receive remote parts: note this assumes the values are not actually */ 452da3a660dSBarry Smith /* inserted in yy until the next line, which is true for my implementation*/ 453da3a660dSBarry Smith /* but is not perhaps always true. */ 45406be10caSBarry Smith ierr = VecScatterEnd(aij->lvec,yy,ADDVALUES, 45578b31e54SBarry Smith (ScatterMode)(SCATTERALL|SCATTERREVERSE),aij->Mvctx); CHKERRQ(ierr); 456da3a660dSBarry Smith return 0; 457da3a660dSBarry Smith } 458da3a660dSBarry Smith 45944a69424SLois Curfman McInnes static int MatMultTransAdd_MPIAIJ(Mat aijin,Vec xx,Vec yy,Vec zz) 460da3a660dSBarry Smith { 46144a69424SLois Curfman McInnes Mat_MPIAIJ *aij = (Mat_MPIAIJ *) aijin->data; 462da3a660dSBarry Smith int ierr; 463da3a660dSBarry Smith 46444a69424SLois Curfman McInnes if (!aij->assembled) 46578b31e54SBarry Smith SETERRQ(1,"MatMulTransAdd_MPIAIJ:must assemble matrix first"); 466da3a660dSBarry Smith /* do nondiagonal part */ 46778b31e54SBarry Smith ierr = MatMultTrans(aij->B,xx,aij->lvec); CHKERRQ(ierr); 468da3a660dSBarry Smith /* send it on its way */ 46906be10caSBarry Smith ierr = VecScatterBegin(aij->lvec,zz,ADDVALUES, 47078b31e54SBarry Smith (ScatterMode)(SCATTERALL|SCATTERREVERSE),aij->Mvctx); CHKERRQ(ierr); 471da3a660dSBarry Smith /* do local part */ 47278b31e54SBarry Smith ierr = MatMultTransAdd(aij->A,xx,yy,zz); CHKERRQ(ierr); 473da3a660dSBarry Smith /* receive remote parts: note this assumes the values are not actually */ 474da3a660dSBarry Smith /* inserted in yy until the next line, which is true for my implementation*/ 475da3a660dSBarry Smith /* but is not perhaps always true. */ 47606be10caSBarry Smith ierr = VecScatterEnd(aij->lvec,zz,ADDVALUES, 47778b31e54SBarry Smith (ScatterMode)(SCATTERALL|SCATTERREVERSE),aij->Mvctx); CHKERRQ(ierr); 478da3a660dSBarry Smith return 0; 479da3a660dSBarry Smith } 480da3a660dSBarry Smith 4811eb62cbbSBarry Smith /* 4821eb62cbbSBarry Smith This only works correctly for square matrices where the subblock A->A is the 4831eb62cbbSBarry Smith diagonal block 4841eb62cbbSBarry Smith */ 485d1710a03SLois Curfman McInnes static int MatGetDiagonal_MPIAIJ(Mat Ain,Vec v) 4861eb62cbbSBarry Smith { 48744a69424SLois Curfman McInnes Mat_MPIAIJ *A = (Mat_MPIAIJ *) Ain->data; 48878b31e54SBarry Smith if (!A->assembled) SETERRQ(1,"MatGetDiag_MPIAIJ:must assemble matrix first"); 4891eb62cbbSBarry Smith return MatGetDiagonal(A->A,v); 4901eb62cbbSBarry Smith } 4911eb62cbbSBarry Smith 49244a69424SLois Curfman McInnes static int MatDestroy_MPIAIJ(PetscObject obj) 4931eb62cbbSBarry Smith { 4941eb62cbbSBarry Smith Mat mat = (Mat) obj; 49544a69424SLois Curfman McInnes Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 4961eb62cbbSBarry Smith int ierr; 497a5a9c739SBarry Smith #if defined(PETSC_LOG) 498a5a9c739SBarry Smith PLogObjectState(obj,"Rows %d Cols %d",aij->M,aij->N); 499a5a9c739SBarry Smith #endif 50078b31e54SBarry Smith PETSCFREE(aij->rowners); 50178b31e54SBarry Smith ierr = MatDestroy(aij->A); CHKERRQ(ierr); 50278b31e54SBarry Smith ierr = MatDestroy(aij->B); CHKERRQ(ierr); 50378b31e54SBarry Smith if (aij->colmap) PETSCFREE(aij->colmap); 50478b31e54SBarry Smith if (aij->garray) PETSCFREE(aij->garray); 5051eb62cbbSBarry Smith if (aij->lvec) VecDestroy(aij->lvec); 5061eb62cbbSBarry Smith if (aij->Mvctx) VecScatterCtxDestroy(aij->Mvctx); 50778b31e54SBarry Smith PETSCFREE(aij); 508a5a9c739SBarry Smith PLogObjectDestroy(mat); 509a5a9c739SBarry Smith PETSCHEADERDESTROY(mat); 5101eb62cbbSBarry Smith return 0; 5111eb62cbbSBarry Smith } 5127857610eSBarry Smith #include "draw.h" 513ee50ffe9SBarry Smith #include "pviewer.h" 514ee50ffe9SBarry Smith 51544a69424SLois Curfman McInnes static int MatView_MPIAIJ(PetscObject obj,Viewer viewer) 5161eb62cbbSBarry Smith { 5171eb62cbbSBarry Smith Mat mat = (Mat) obj; 51844a69424SLois Curfman McInnes Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 5191eb62cbbSBarry Smith int ierr; 520d13ab20cSBarry Smith PetscObject vobj = (PetscObject) viewer; 5211eb62cbbSBarry Smith 52278b31e54SBarry Smith if (!aij->assembled) SETERRQ(1,"MatView_MPIAIJ:must assemble matrix first"); 523154123eaSLois Curfman McInnes if (!viewer) { /* so that viewers may be used from debuggers */ 524154123eaSLois Curfman McInnes viewer = STDOUT_VIEWER; vobj = (PetscObject) viewer; 525154123eaSLois Curfman McInnes } 526ab254492SBarry Smith if (vobj->cookie == DRAW_COOKIE && vobj->type == NULLWINDOW) return 0; 5277857610eSBarry Smith if (vobj->cookie == VIEWER_COOKIE && vobj->type == FILE_VIEWER) { 5284a0ce102SLois Curfman McInnes FILE *fd = ViewerFileGetPointer_Private(viewer); 5297f813858SBarry Smith MPIU_Seq_begin(mat->comm,1); 530d13ab20cSBarry Smith fprintf(fd,"[%d] rows %d starts %d ends %d cols %d starts %d ends %d\n", 5311eb62cbbSBarry Smith aij->mytid,aij->m,aij->rstart,aij->rend,aij->n,aij->cstart, 5321eb62cbbSBarry Smith aij->cend); 53378b31e54SBarry Smith ierr = MatView(aij->A,viewer); CHKERRQ(ierr); 53478b31e54SBarry Smith ierr = MatView(aij->B,viewer); CHKERRQ(ierr); 535d13ab20cSBarry Smith fflush(fd); 5367f813858SBarry Smith MPIU_Seq_end(mat->comm,1); 537d13ab20cSBarry Smith } 5387857610eSBarry Smith else if ((vobj->cookie == VIEWER_COOKIE && vobj->type == FILES_VIEWER) || 5397857610eSBarry Smith vobj->cookie == DRAW_COOKIE) { 54095373324SBarry Smith int numtids = aij->numtids, mytid = aij->mytid; 54195373324SBarry Smith if (numtids == 1) { 54278b31e54SBarry Smith ierr = MatView(aij->A,viewer); CHKERRQ(ierr); 54395373324SBarry Smith } 54495373324SBarry Smith else { 54595373324SBarry Smith /* assemble the entire matrix onto first processor. */ 54695373324SBarry Smith Mat A; 5472ee70a88SLois Curfman McInnes Mat_AIJ *Aloc; 5482eb8c8abSBarry Smith int M = aij->M, N = aij->N,m,*ai,*aj,row,*cols,i,*ct; 54995373324SBarry Smith Scalar *a; 5502ee70a88SLois Curfman McInnes 55195373324SBarry Smith if (!mytid) { 55295373324SBarry Smith ierr = MatCreateMPIAIJ(mat->comm,M,N,M,N,0,0,0,0,&A); 55395373324SBarry Smith } 55495373324SBarry Smith else { 55595373324SBarry Smith ierr = MatCreateMPIAIJ(mat->comm,0,0,M,N,0,0,0,0,&A); 55695373324SBarry Smith } 55778b31e54SBarry Smith CHKERRQ(ierr); 55895373324SBarry Smith 55995373324SBarry Smith /* copy over the A part */ 5602ee70a88SLois Curfman McInnes Aloc = (Mat_AIJ*) aij->A->data; 5612ee70a88SLois Curfman McInnes m = Aloc->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 56295373324SBarry Smith row = aij->rstart; 563b7c46309SBarry Smith for ( i=0; i<ai[m]-1; i++ ) {aj[i] += aij->cstart - 1;} 56495373324SBarry Smith for ( i=0; i<m; i++ ) { 56507a0e7adSLois Curfman McInnes ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERTVALUES); 56678b31e54SBarry Smith CHKERRQ(ierr); 56795373324SBarry Smith row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 56895373324SBarry Smith } 5692ee70a88SLois Curfman McInnes aj = Aloc->j; 570b7c46309SBarry Smith for ( i=0; i<ai[m]-1; i++ ) {aj[i] -= aij->cstart - 1;} 57195373324SBarry Smith 57295373324SBarry Smith /* copy over the B part */ 5732ee70a88SLois Curfman McInnes Aloc = (Mat_AIJ*) aij->B->data; 5742ee70a88SLois Curfman McInnes m = Aloc->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 57595373324SBarry Smith row = aij->rstart; 57678b31e54SBarry Smith ct = cols = (int *) PETSCMALLOC( (ai[m]+1)*sizeof(int) ); CHKPTRQ(cols); 577b7c46309SBarry Smith for ( i=0; i<ai[m]-1; i++ ) {cols[i] = aij->garray[aj[i]-1];} 57895373324SBarry Smith for ( i=0; i<m; i++ ) { 57907a0e7adSLois Curfman McInnes ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERTVALUES); 58078b31e54SBarry Smith CHKERRQ(ierr); 58195373324SBarry Smith row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 58295373324SBarry Smith } 58378b31e54SBarry Smith PETSCFREE(ct); 58478b31e54SBarry Smith ierr = MatAssemblyBegin(A,FINAL_ASSEMBLY); CHKERRQ(ierr); 58578b31e54SBarry Smith ierr = MatAssemblyEnd(A,FINAL_ASSEMBLY); CHKERRQ(ierr); 58695373324SBarry Smith if (!mytid) { 58778b31e54SBarry Smith ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,viewer); CHKERRQ(ierr); 58895373324SBarry Smith } 58978b31e54SBarry Smith ierr = MatDestroy(A); CHKERRQ(ierr); 59095373324SBarry Smith } 59195373324SBarry Smith } 5921eb62cbbSBarry Smith return 0; 5931eb62cbbSBarry Smith } 5941eb62cbbSBarry Smith 595d3c9fed9SLois Curfman McInnes extern int MatMarkDiag_AIJ(Mat_AIJ *); 5961eb62cbbSBarry Smith /* 5971eb62cbbSBarry Smith This has to provide several versions. 5981eb62cbbSBarry Smith 5991eb62cbbSBarry Smith 1) per sequential 6001eb62cbbSBarry Smith 2) a) use only local smoothing updating outer values only once. 6011eb62cbbSBarry Smith b) local smoothing updating outer values each inner iteration 602d6dfbf8fSBarry Smith 3) color updating out values betwen colors. 6031eb62cbbSBarry Smith */ 604fc76ce87SLois Curfman McInnes static int MatRelax_MPIAIJ(Mat matin,Vec bb,double omega,MatSORType flag, 605fc76ce87SLois Curfman McInnes double shift,int its,Vec xx) 6068a729477SBarry Smith { 60744a69424SLois Curfman McInnes Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 608d6dfbf8fSBarry Smith Mat AA = mat->A, BB = mat->B; 60944a69424SLois Curfman McInnes Mat_AIJ *A = (Mat_AIJ *) AA->data, *B = (Mat_AIJ *)BB->data; 6106abc6512SBarry Smith Scalar zero = 0.0,*b,*x,*xs,*ls,d,*v,sum,scale,*t,*ts; 6116abc6512SBarry Smith int ierr,*idx, *diag; 6126abc6512SBarry Smith int n = mat->n, m = mat->m, i; 613da3a660dSBarry Smith Vec tt; 6148a729477SBarry Smith 61578b31e54SBarry Smith if (!mat->assembled) SETERRQ(1,"MatRelax_MPIAIJ:must assemble matrix first"); 6161eb62cbbSBarry Smith 617d6dfbf8fSBarry Smith VecGetArray(xx,&x); VecGetArray(bb,&b); VecGetArray(mat->lvec,&ls); 618d6dfbf8fSBarry Smith xs = x -1; /* shift by one for index start of 1 */ 619da3a660dSBarry Smith ls--; 620d3c9fed9SLois Curfman McInnes if (!A->diag) {if ((ierr = MatMarkDiag_AIJ(A))) return ierr;} 621d6dfbf8fSBarry Smith diag = A->diag; 622acb40c82SBarry Smith if (flag == SOR_APPLY_UPPER || flag == SOR_APPLY_LOWER) { 623*bbb6d6a8SBarry Smith SETERRQ(1,"MatRelax_MPIAIJ:option not yet support"); 624acb40c82SBarry Smith } 625da3a660dSBarry Smith if (flag & SOR_EISENSTAT) { 626da3a660dSBarry Smith /* Let A = L + U + D; where L is lower trianglar, 627da3a660dSBarry Smith U is upper triangular, E is diagonal; This routine applies 628da3a660dSBarry Smith 629da3a660dSBarry Smith (L + E)^{-1} A (U + E)^{-1} 630da3a660dSBarry Smith 631da3a660dSBarry Smith to a vector efficiently using Eisenstat's trick. This is for 632da3a660dSBarry Smith the case of SSOR preconditioner, so E is D/omega where omega 633da3a660dSBarry Smith is the relaxation factor. 634da3a660dSBarry Smith */ 63578b31e54SBarry Smith ierr = VecDuplicate(xx,&tt); CHKERRQ(ierr); 636da3a660dSBarry Smith VecGetArray(tt,&t); 637da3a660dSBarry Smith scale = (2.0/omega) - 1.0; 638da3a660dSBarry Smith /* x = (E + U)^{-1} b */ 639da3a660dSBarry Smith VecSet(&zero,mat->lvec); 64006be10caSBarry Smith ierr = VecPipelineBegin(xx,mat->lvec,INSERTVALUES,PIPELINEUP, 64178b31e54SBarry Smith mat->Mvctx); CHKERRQ(ierr); 642da3a660dSBarry Smith for ( i=m-1; i>-1; i-- ) { 643da3a660dSBarry Smith n = A->i[i+1] - diag[i] - 1; 644da3a660dSBarry Smith idx = A->j + diag[i]; 645da3a660dSBarry Smith v = A->a + diag[i]; 646da3a660dSBarry Smith sum = b[i]; 647da3a660dSBarry Smith SPARSEDENSEMDOT(sum,xs,v,idx,n); 648da3a660dSBarry Smith d = shift + A->a[diag[i]-1]; 649da3a660dSBarry Smith n = B->i[i+1] - B->i[i]; 650da3a660dSBarry Smith idx = B->j + B->i[i] - 1; 651da3a660dSBarry Smith v = B->a + B->i[i] - 1; 652da3a660dSBarry Smith SPARSEDENSEMDOT(sum,ls,v,idx,n); 653da3a660dSBarry Smith x[i] = omega*(sum/d); 654da3a660dSBarry Smith } 655edd2f0e1SBarry Smith ierr = VecPipelineEnd(xx,mat->lvec,INSERTVALUES,PIPELINEUP, 65678b31e54SBarry Smith mat->Mvctx); CHKERRQ(ierr); 657da3a660dSBarry Smith 658da3a660dSBarry Smith /* t = b - (2*E - D)x */ 659da3a660dSBarry Smith v = A->a; 660da3a660dSBarry Smith for ( i=0; i<m; i++ ) { t[i] = b[i] - scale*(v[*diag++ - 1])*x[i]; } 661da3a660dSBarry Smith 662da3a660dSBarry Smith /* t = (E + L)^{-1}t */ 663da3a660dSBarry Smith ts = t - 1; /* shifted by one for index start of a or mat->j*/ 664da3a660dSBarry Smith diag = A->diag; 665da3a660dSBarry Smith VecSet(&zero,mat->lvec); 66606be10caSBarry Smith ierr = VecPipelineBegin(tt,mat->lvec,INSERTVALUES,PIPELINEDOWN, 66778b31e54SBarry Smith mat->Mvctx); CHKERRQ(ierr); 668da3a660dSBarry Smith for ( i=0; i<m; i++ ) { 669da3a660dSBarry Smith n = diag[i] - A->i[i]; 670da3a660dSBarry Smith idx = A->j + A->i[i] - 1; 671da3a660dSBarry Smith v = A->a + A->i[i] - 1; 672da3a660dSBarry Smith sum = t[i]; 673da3a660dSBarry Smith SPARSEDENSEMDOT(sum,ts,v,idx,n); 674da3a660dSBarry Smith d = shift + A->a[diag[i]-1]; 675da3a660dSBarry Smith n = B->i[i+1] - B->i[i]; 676da3a660dSBarry Smith idx = B->j + B->i[i] - 1; 677da3a660dSBarry Smith v = B->a + B->i[i] - 1; 678da3a660dSBarry Smith SPARSEDENSEMDOT(sum,ls,v,idx,n); 679da3a660dSBarry Smith t[i] = omega*(sum/d); 680da3a660dSBarry Smith } 681edd2f0e1SBarry Smith ierr = VecPipelineEnd(tt,mat->lvec,INSERTVALUES,PIPELINEDOWN, 68278b31e54SBarry Smith mat->Mvctx); CHKERRQ(ierr); 683da3a660dSBarry Smith /* x = x + t */ 684da3a660dSBarry Smith for ( i=0; i<m; i++ ) { x[i] += t[i]; } 685da3a660dSBarry Smith VecDestroy(tt); 686da3a660dSBarry Smith return 0; 687da3a660dSBarry Smith } 688da3a660dSBarry Smith 6891eb62cbbSBarry Smith 690d6dfbf8fSBarry Smith if ((flag & SOR_SYMMETRIC_SWEEP) == SOR_SYMMETRIC_SWEEP){ 691da3a660dSBarry Smith if (flag & SOR_ZERO_INITIAL_GUESS) { 692da3a660dSBarry Smith VecSet(&zero,mat->lvec); VecSet(&zero,xx); 693da3a660dSBarry Smith } 694da3a660dSBarry Smith else { 69506be10caSBarry Smith ierr=VecScatterBegin(xx,mat->lvec,INSERTVALUES,SCATTERUP,mat->Mvctx); 69678b31e54SBarry Smith CHKERRQ(ierr); 69706be10caSBarry Smith ierr = VecScatterEnd(xx,mat->lvec,INSERTVALUES,SCATTERUP,mat->Mvctx); 69878b31e54SBarry Smith CHKERRQ(ierr); 699da3a660dSBarry Smith } 700d6dfbf8fSBarry Smith while (its--) { 701d6dfbf8fSBarry Smith /* go down through the rows */ 70206be10caSBarry Smith ierr = VecPipelineBegin(xx,mat->lvec,INSERTVALUES,PIPELINEDOWN, 70378b31e54SBarry Smith mat->Mvctx); CHKERRQ(ierr); 704d6dfbf8fSBarry Smith for ( i=0; i<m; i++ ) { 705d6dfbf8fSBarry Smith n = A->i[i+1] - A->i[i]; 706d6dfbf8fSBarry Smith idx = A->j + A->i[i] - 1; 707d6dfbf8fSBarry Smith v = A->a + A->i[i] - 1; 708d6dfbf8fSBarry Smith sum = b[i]; 709d6dfbf8fSBarry Smith SPARSEDENSEMDOT(sum,xs,v,idx,n); 710d6dfbf8fSBarry Smith d = shift + A->a[diag[i]-1]; 711d6dfbf8fSBarry Smith n = B->i[i+1] - B->i[i]; 712d6dfbf8fSBarry Smith idx = B->j + B->i[i] - 1; 713d6dfbf8fSBarry Smith v = B->a + B->i[i] - 1; 714d6dfbf8fSBarry Smith SPARSEDENSEMDOT(sum,ls,v,idx,n); 715d6dfbf8fSBarry Smith x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]); 716d6dfbf8fSBarry Smith } 717edd2f0e1SBarry Smith ierr = VecPipelineEnd(xx,mat->lvec,INSERTVALUES,PIPELINEDOWN, 71878b31e54SBarry Smith mat->Mvctx); CHKERRQ(ierr); 719d6dfbf8fSBarry Smith /* come up through the rows */ 72006be10caSBarry Smith ierr = VecPipelineBegin(xx,mat->lvec,INSERTVALUES,PIPELINEUP, 72178b31e54SBarry Smith mat->Mvctx); CHKERRQ(ierr); 722d6dfbf8fSBarry Smith for ( i=m-1; i>-1; i-- ) { 723d6dfbf8fSBarry Smith n = A->i[i+1] - A->i[i]; 724d6dfbf8fSBarry Smith idx = A->j + A->i[i] - 1; 725d6dfbf8fSBarry Smith v = A->a + A->i[i] - 1; 726d6dfbf8fSBarry Smith sum = b[i]; 727d6dfbf8fSBarry Smith SPARSEDENSEMDOT(sum,xs,v,idx,n); 728d6dfbf8fSBarry Smith d = shift + A->a[diag[i]-1]; 729d6dfbf8fSBarry Smith n = B->i[i+1] - B->i[i]; 730d6dfbf8fSBarry Smith idx = B->j + B->i[i] - 1; 731d6dfbf8fSBarry Smith v = B->a + B->i[i] - 1; 732d6dfbf8fSBarry Smith SPARSEDENSEMDOT(sum,ls,v,idx,n); 733d6dfbf8fSBarry Smith x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]); 734d6dfbf8fSBarry Smith } 735edd2f0e1SBarry Smith ierr = VecPipelineEnd(xx,mat->lvec,INSERTVALUES,PIPELINEUP, 73678b31e54SBarry Smith mat->Mvctx); CHKERRQ(ierr); 737d6dfbf8fSBarry Smith } 738d6dfbf8fSBarry Smith } 739d6dfbf8fSBarry Smith else if (flag & SOR_FORWARD_SWEEP){ 740da3a660dSBarry Smith if (flag & SOR_ZERO_INITIAL_GUESS) { 741da3a660dSBarry Smith VecSet(&zero,mat->lvec); 74206be10caSBarry Smith ierr = VecPipelineBegin(xx,mat->lvec,INSERTVALUES,PIPELINEDOWN, 74378b31e54SBarry Smith mat->Mvctx); CHKERRQ(ierr); 744da3a660dSBarry Smith for ( i=0; i<m; i++ ) { 745da3a660dSBarry Smith n = diag[i] - A->i[i]; 746da3a660dSBarry Smith idx = A->j + A->i[i] - 1; 747da3a660dSBarry Smith v = A->a + A->i[i] - 1; 748da3a660dSBarry Smith sum = b[i]; 749da3a660dSBarry Smith SPARSEDENSEMDOT(sum,xs,v,idx,n); 750da3a660dSBarry Smith d = shift + A->a[diag[i]-1]; 751da3a660dSBarry Smith n = B->i[i+1] - B->i[i]; 752da3a660dSBarry Smith idx = B->j + B->i[i] - 1; 753da3a660dSBarry Smith v = B->a + B->i[i] - 1; 754da3a660dSBarry Smith SPARSEDENSEMDOT(sum,ls,v,idx,n); 755da3a660dSBarry Smith x[i] = omega*(sum/d); 756da3a660dSBarry Smith } 757edd2f0e1SBarry Smith ierr = VecPipelineEnd(xx,mat->lvec,INSERTVALUES,PIPELINEDOWN, 75878b31e54SBarry Smith mat->Mvctx); CHKERRQ(ierr); 759da3a660dSBarry Smith its--; 760da3a660dSBarry Smith } 761d6dfbf8fSBarry Smith while (its--) { 76206be10caSBarry Smith ierr=VecScatterBegin(xx,mat->lvec,INSERTVALUES,SCATTERUP,mat->Mvctx); 76378b31e54SBarry Smith CHKERRQ(ierr); 76406be10caSBarry Smith ierr = VecScatterEnd(xx,mat->lvec,INSERTVALUES,SCATTERUP,mat->Mvctx); 76578b31e54SBarry Smith CHKERRQ(ierr); 76606be10caSBarry Smith ierr = VecPipelineBegin(xx,mat->lvec,INSERTVALUES,PIPELINEDOWN, 76778b31e54SBarry Smith mat->Mvctx); CHKERRQ(ierr); 768d6dfbf8fSBarry Smith for ( i=0; i<m; i++ ) { 769d6dfbf8fSBarry Smith n = A->i[i+1] - A->i[i]; 770d6dfbf8fSBarry Smith idx = A->j + A->i[i] - 1; 771d6dfbf8fSBarry Smith v = A->a + A->i[i] - 1; 772d6dfbf8fSBarry Smith sum = b[i]; 773d6dfbf8fSBarry Smith SPARSEDENSEMDOT(sum,xs,v,idx,n); 774d6dfbf8fSBarry Smith d = shift + A->a[diag[i]-1]; 775d6dfbf8fSBarry Smith n = B->i[i+1] - B->i[i]; 776d6dfbf8fSBarry Smith idx = B->j + B->i[i] - 1; 777d6dfbf8fSBarry Smith v = B->a + B->i[i] - 1; 778d6dfbf8fSBarry Smith SPARSEDENSEMDOT(sum,ls,v,idx,n); 779d6dfbf8fSBarry Smith x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]); 780d6dfbf8fSBarry Smith } 781edd2f0e1SBarry Smith ierr = VecPipelineEnd(xx,mat->lvec,INSERTVALUES,PIPELINEDOWN, 78278b31e54SBarry Smith mat->Mvctx); CHKERRQ(ierr); 783d6dfbf8fSBarry Smith } 784d6dfbf8fSBarry Smith } 785d6dfbf8fSBarry Smith else if (flag & SOR_BACKWARD_SWEEP){ 786da3a660dSBarry Smith if (flag & SOR_ZERO_INITIAL_GUESS) { 787da3a660dSBarry Smith VecSet(&zero,mat->lvec); 78806be10caSBarry Smith ierr = VecPipelineBegin(xx,mat->lvec,INSERTVALUES,PIPELINEUP, 78978b31e54SBarry Smith mat->Mvctx); CHKERRQ(ierr); 790da3a660dSBarry Smith for ( i=m-1; i>-1; i-- ) { 791da3a660dSBarry Smith n = A->i[i+1] - diag[i] - 1; 792da3a660dSBarry Smith idx = A->j + diag[i]; 793da3a660dSBarry Smith v = A->a + diag[i]; 794da3a660dSBarry Smith sum = b[i]; 795da3a660dSBarry Smith SPARSEDENSEMDOT(sum,xs,v,idx,n); 796da3a660dSBarry Smith d = shift + A->a[diag[i]-1]; 797da3a660dSBarry Smith n = B->i[i+1] - B->i[i]; 798da3a660dSBarry Smith idx = B->j + B->i[i] - 1; 799da3a660dSBarry Smith v = B->a + B->i[i] - 1; 800da3a660dSBarry Smith SPARSEDENSEMDOT(sum,ls,v,idx,n); 801da3a660dSBarry Smith x[i] = omega*(sum/d); 802da3a660dSBarry Smith } 803edd2f0e1SBarry Smith ierr = VecPipelineEnd(xx,mat->lvec,INSERTVALUES,PIPELINEUP, 80478b31e54SBarry Smith mat->Mvctx); CHKERRQ(ierr); 805da3a660dSBarry Smith its--; 806da3a660dSBarry Smith } 807d6dfbf8fSBarry Smith while (its--) { 80806be10caSBarry Smith ierr = VecScatterBegin(xx,mat->lvec,INSERTVALUES,SCATTERDOWN, 80978b31e54SBarry Smith mat->Mvctx); CHKERRQ(ierr); 81006be10caSBarry Smith ierr = VecScatterEnd(xx,mat->lvec,INSERTVALUES,SCATTERDOWN, 81178b31e54SBarry Smith mat->Mvctx); CHKERRQ(ierr); 81206be10caSBarry Smith ierr = VecPipelineBegin(xx,mat->lvec,INSERTVALUES,PIPELINEUP, 81378b31e54SBarry Smith mat->Mvctx); CHKERRQ(ierr); 814d6dfbf8fSBarry Smith for ( i=m-1; i>-1; i-- ) { 815d6dfbf8fSBarry Smith n = A->i[i+1] - A->i[i]; 816d6dfbf8fSBarry Smith idx = A->j + A->i[i] - 1; 817d6dfbf8fSBarry Smith v = A->a + A->i[i] - 1; 818d6dfbf8fSBarry Smith sum = b[i]; 819d6dfbf8fSBarry Smith SPARSEDENSEMDOT(sum,xs,v,idx,n); 820d6dfbf8fSBarry Smith d = shift + A->a[diag[i]-1]; 821d6dfbf8fSBarry Smith n = B->i[i+1] - B->i[i]; 822d6dfbf8fSBarry Smith idx = B->j + B->i[i] - 1; 823d6dfbf8fSBarry Smith v = B->a + B->i[i] - 1; 824d6dfbf8fSBarry Smith SPARSEDENSEMDOT(sum,ls,v,idx,n); 825d6dfbf8fSBarry Smith x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]); 826d6dfbf8fSBarry Smith } 827edd2f0e1SBarry Smith ierr = VecPipelineEnd(xx,mat->lvec,INSERTVALUES,PIPELINEUP, 82878b31e54SBarry Smith mat->Mvctx); CHKERRQ(ierr); 829d6dfbf8fSBarry Smith } 830d6dfbf8fSBarry Smith } 831d6dfbf8fSBarry Smith else if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){ 832da3a660dSBarry Smith if (flag & SOR_ZERO_INITIAL_GUESS) { 833da3a660dSBarry Smith return MatRelax(mat->A,bb,omega,flag,shift,its,xx); 834da3a660dSBarry Smith } 83506be10caSBarry Smith ierr=VecScatterBegin(xx,mat->lvec,INSERTVALUES,SCATTERALL,mat->Mvctx); 83678b31e54SBarry Smith CHKERRQ(ierr); 83706be10caSBarry Smith ierr = VecScatterEnd(xx,mat->lvec,INSERTVALUES,SCATTERALL,mat->Mvctx); 83878b31e54SBarry Smith CHKERRQ(ierr); 839d6dfbf8fSBarry Smith while (its--) { 840d6dfbf8fSBarry Smith /* go down through the rows */ 841d6dfbf8fSBarry Smith for ( i=0; i<m; i++ ) { 842d6dfbf8fSBarry Smith n = A->i[i+1] - A->i[i]; 843d6dfbf8fSBarry Smith idx = A->j + A->i[i] - 1; 844d6dfbf8fSBarry Smith v = A->a + A->i[i] - 1; 845d6dfbf8fSBarry Smith sum = b[i]; 846d6dfbf8fSBarry Smith SPARSEDENSEMDOT(sum,xs,v,idx,n); 847d6dfbf8fSBarry Smith d = shift + A->a[diag[i]-1]; 848d6dfbf8fSBarry Smith n = B->i[i+1] - B->i[i]; 849d6dfbf8fSBarry Smith idx = B->j + B->i[i] - 1; 850d6dfbf8fSBarry Smith v = B->a + B->i[i] - 1; 851d6dfbf8fSBarry Smith SPARSEDENSEMDOT(sum,ls,v,idx,n); 852d6dfbf8fSBarry Smith x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]); 853d6dfbf8fSBarry Smith } 854d6dfbf8fSBarry Smith /* come up through the rows */ 855d6dfbf8fSBarry Smith for ( i=m-1; i>-1; i-- ) { 856d6dfbf8fSBarry Smith n = A->i[i+1] - A->i[i]; 857d6dfbf8fSBarry Smith idx = A->j + A->i[i] - 1; 858d6dfbf8fSBarry Smith v = A->a + A->i[i] - 1; 859d6dfbf8fSBarry Smith sum = b[i]; 860d6dfbf8fSBarry Smith SPARSEDENSEMDOT(sum,xs,v,idx,n); 861d6dfbf8fSBarry Smith d = shift + A->a[diag[i]-1]; 862d6dfbf8fSBarry Smith n = B->i[i+1] - B->i[i]; 863d6dfbf8fSBarry Smith idx = B->j + B->i[i] - 1; 864d6dfbf8fSBarry Smith v = B->a + B->i[i] - 1; 865d6dfbf8fSBarry Smith SPARSEDENSEMDOT(sum,ls,v,idx,n); 866d6dfbf8fSBarry Smith x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]); 867d6dfbf8fSBarry Smith } 868d6dfbf8fSBarry Smith } 869d6dfbf8fSBarry Smith } 870d6dfbf8fSBarry Smith else if (flag & SOR_LOCAL_FORWARD_SWEEP){ 871da3a660dSBarry Smith if (flag & SOR_ZERO_INITIAL_GUESS) { 872da3a660dSBarry Smith return MatRelax(mat->A,bb,omega,flag,shift,its,xx); 873da3a660dSBarry Smith } 87406be10caSBarry Smith ierr=VecScatterBegin(xx,mat->lvec,INSERTVALUES,SCATTERALL,mat->Mvctx); 87578b31e54SBarry Smith CHKERRQ(ierr); 87606be10caSBarry Smith ierr = VecScatterEnd(xx,mat->lvec,INSERTVALUES,SCATTERALL,mat->Mvctx); 87778b31e54SBarry Smith CHKERRQ(ierr); 878d6dfbf8fSBarry Smith while (its--) { 879d6dfbf8fSBarry Smith for ( i=0; i<m; i++ ) { 880d6dfbf8fSBarry Smith n = A->i[i+1] - A->i[i]; 881d6dfbf8fSBarry Smith idx = A->j + A->i[i] - 1; 882d6dfbf8fSBarry Smith v = A->a + A->i[i] - 1; 883d6dfbf8fSBarry Smith sum = b[i]; 884d6dfbf8fSBarry Smith SPARSEDENSEMDOT(sum,xs,v,idx,n); 885d6dfbf8fSBarry Smith d = shift + A->a[diag[i]-1]; 886d6dfbf8fSBarry Smith n = B->i[i+1] - B->i[i]; 887d6dfbf8fSBarry Smith idx = B->j + B->i[i] - 1; 888d6dfbf8fSBarry Smith v = B->a + B->i[i] - 1; 889d6dfbf8fSBarry Smith SPARSEDENSEMDOT(sum,ls,v,idx,n); 890d6dfbf8fSBarry Smith x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]); 891d6dfbf8fSBarry Smith } 892d6dfbf8fSBarry Smith } 893d6dfbf8fSBarry Smith } 894d6dfbf8fSBarry Smith else if (flag & SOR_LOCAL_BACKWARD_SWEEP){ 895da3a660dSBarry Smith if (flag & SOR_ZERO_INITIAL_GUESS) { 896da3a660dSBarry Smith return MatRelax(mat->A,bb,omega,flag,shift,its,xx); 897da3a660dSBarry Smith } 89806be10caSBarry Smith ierr = VecScatterBegin(xx,mat->lvec,INSERTVALUES,SCATTERALL, 89978b31e54SBarry Smith mat->Mvctx); CHKERRQ(ierr); 90006be10caSBarry Smith ierr = VecScatterEnd(xx,mat->lvec,INSERTVALUES,SCATTERALL, 90178b31e54SBarry Smith mat->Mvctx); CHKERRQ(ierr); 902d6dfbf8fSBarry Smith while (its--) { 903d6dfbf8fSBarry Smith for ( i=m-1; i>-1; i-- ) { 904d6dfbf8fSBarry Smith n = A->i[i+1] - A->i[i]; 905d6dfbf8fSBarry Smith idx = A->j + A->i[i] - 1; 906d6dfbf8fSBarry Smith v = A->a + A->i[i] - 1; 907d6dfbf8fSBarry Smith sum = b[i]; 908d6dfbf8fSBarry Smith SPARSEDENSEMDOT(sum,xs,v,idx,n); 909d6dfbf8fSBarry Smith d = shift + A->a[diag[i]-1]; 910d6dfbf8fSBarry Smith n = B->i[i+1] - B->i[i]; 911d6dfbf8fSBarry Smith idx = B->j + B->i[i] - 1; 912d6dfbf8fSBarry Smith v = B->a + B->i[i] - 1; 913d6dfbf8fSBarry Smith SPARSEDENSEMDOT(sum,ls,v,idx,n); 914d6dfbf8fSBarry Smith x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]); 915d6dfbf8fSBarry Smith } 916d6dfbf8fSBarry Smith } 917d6dfbf8fSBarry Smith } 9188a729477SBarry Smith return 0; 9198a729477SBarry Smith } 920a66be287SLois Curfman McInnes 921fc76ce87SLois Curfman McInnes static int MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,int *nz, 922fc76ce87SLois Curfman McInnes int *nzalloc,int *mem) 923a66be287SLois Curfman McInnes { 924a66be287SLois Curfman McInnes Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 925a66be287SLois Curfman McInnes Mat A = mat->A, B = mat->B; 926a66be287SLois Curfman McInnes int ierr, isend[3], irecv[3], nzA, nzallocA, memA; 927a66be287SLois Curfman McInnes 92878b31e54SBarry Smith ierr = MatGetInfo(A,MAT_LOCAL,&nzA,&nzallocA,&memA); CHKERRQ(ierr); 92978b31e54SBarry Smith ierr = MatGetInfo(B,MAT_LOCAL,&isend[0],&isend[1],&isend[2]); CHKERRQ(ierr); 930a66be287SLois Curfman McInnes isend[0] += nzA; isend[1] += nzallocA; isend[2] += memA; 931a66be287SLois Curfman McInnes if (flag == MAT_LOCAL) { 932a66be287SLois Curfman McInnes *nz = isend[0]; *nzalloc = isend[1]; *mem = isend[2]; 933a66be287SLois Curfman McInnes } else if (flag == MAT_GLOBAL_MAX) { 934a66be287SLois Curfman McInnes MPI_Allreduce((void *) isend,(void *) irecv,3,MPI_INT,MPI_MAX,matin->comm); 935a66be287SLois Curfman McInnes *nz = irecv[0]; *nzalloc = irecv[1]; *mem = irecv[2]; 936a66be287SLois Curfman McInnes } else if (flag == MAT_GLOBAL_SUM) { 937a66be287SLois Curfman McInnes MPI_Allreduce((void *) isend,(void *) irecv,3,MPI_INT,MPI_SUM,matin->comm); 938a66be287SLois Curfman McInnes *nz = irecv[0]; *nzalloc = irecv[1]; *mem = irecv[2]; 939a66be287SLois Curfman McInnes } 940a66be287SLois Curfman McInnes return 0; 941a66be287SLois Curfman McInnes } 942a66be287SLois Curfman McInnes 943299609e3SLois Curfman McInnes extern int MatLUFactorSymbolic_MPIAIJ(Mat,IS,IS,double,Mat*); 944299609e3SLois Curfman McInnes extern int MatLUFactorNumeric_MPIAIJ(Mat,Mat*); 945299609e3SLois Curfman McInnes extern int MatLUFactor_MPIAIJ(Mat,IS,IS,double); 946299609e3SLois Curfman McInnes extern int MatILUFactorSymbolic_MPIAIJ(Mat,IS,IS,double,int,Mat *); 947299609e3SLois Curfman McInnes extern int MatSolve_MPIAIJ(Mat,Vec,Vec); 948299609e3SLois Curfman McInnes extern int MatSolveAdd_MPIAIJ(Mat,Vec,Vec,Vec); 949299609e3SLois Curfman McInnes extern int MatSolveTrans_MPIAIJ(Mat,Vec,Vec); 950299609e3SLois Curfman McInnes extern int MatSolveTransAdd_MPIAIJ(Mat,Vec,Vec,Vec); 951299609e3SLois Curfman McInnes 952fc76ce87SLois Curfman McInnes static int MatSetOption_MPIAIJ(Mat aijin,MatOption op) 953c74985f6SBarry Smith { 95444a69424SLois Curfman McInnes Mat_MPIAIJ *aij = (Mat_MPIAIJ *) aijin->data; 955c74985f6SBarry Smith 956c74985f6SBarry Smith if (op == NO_NEW_NONZERO_LOCATIONS) { 957c74985f6SBarry Smith MatSetOption(aij->A,op); 958c74985f6SBarry Smith MatSetOption(aij->B,op); 959c74985f6SBarry Smith } 960c74985f6SBarry Smith else if (op == YES_NEW_NONZERO_LOCATIONS) { 961c74985f6SBarry Smith MatSetOption(aij->A,op); 962c74985f6SBarry Smith MatSetOption(aij->B,op); 963c74985f6SBarry Smith } 964*bbb6d6a8SBarry Smith else if (op == COLUMN_ORIENTED) 965*bbb6d6a8SBarry Smith SETERRQ(1,"MatSetOption_MPIAIJ:Column oriented not supported"); 966c74985f6SBarry Smith return 0; 967c74985f6SBarry Smith } 968c74985f6SBarry Smith 969d1710a03SLois Curfman McInnes static int MatGetSize_MPIAIJ(Mat matin,int *m,int *n) 970c74985f6SBarry Smith { 97144a69424SLois Curfman McInnes Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 972c74985f6SBarry Smith *m = mat->M; *n = mat->N; 973c74985f6SBarry Smith return 0; 974c74985f6SBarry Smith } 975c74985f6SBarry Smith 976d1710a03SLois Curfman McInnes static int MatGetLocalSize_MPIAIJ(Mat matin,int *m,int *n) 977c74985f6SBarry Smith { 97844a69424SLois Curfman McInnes Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 979b7c46309SBarry Smith *m = mat->m; *n = mat->N; 980c74985f6SBarry Smith return 0; 981c74985f6SBarry Smith } 982c74985f6SBarry Smith 983d1710a03SLois Curfman McInnes static int MatGetOwnershipRange_MPIAIJ(Mat matin,int *m,int *n) 984c74985f6SBarry Smith { 98544a69424SLois Curfman McInnes Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 986c74985f6SBarry Smith *m = mat->rstart; *n = mat->rend; 987c74985f6SBarry Smith return 0; 988c74985f6SBarry Smith } 989c74985f6SBarry Smith 99039e00950SLois Curfman McInnes static int MatGetRow_MPIAIJ(Mat matin,int row,int *nz,int **idx,Scalar **v) 99139e00950SLois Curfman McInnes { 992154123eaSLois Curfman McInnes Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 993154123eaSLois Curfman McInnes Scalar *vworkA, *vworkB, **pvA, **pvB; 994154123eaSLois Curfman McInnes int i, ierr, *cworkA, *cworkB, **pcA, **pcB, cstart = mat->cstart; 995154123eaSLois Curfman McInnes int nztot, nzA, nzB, lrow, rstart = mat->rstart, rend = mat->rend; 99639e00950SLois Curfman McInnes 99739e00950SLois Curfman McInnes if (row < rstart || row >= rend) 998*bbb6d6a8SBarry Smith SETERRQ(1,"MatGetRow_MPIAIJ:Currently you can get only local rows") 999abc0e9e4SLois Curfman McInnes lrow = row - rstart; 100039e00950SLois Curfman McInnes 1001154123eaSLois Curfman McInnes pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1002154123eaSLois Curfman McInnes if (!v) {pvA = 0; pvB = 0;} 1003154123eaSLois Curfman McInnes if (!idx) {pcA = 0; if (!v) pcB = 0;} 100478b31e54SBarry Smith ierr = MatGetRow(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr); 100578b31e54SBarry Smith ierr = MatGetRow(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr); 1006154123eaSLois Curfman McInnes nztot = nzA + nzB; 1007154123eaSLois Curfman McInnes 1008154123eaSLois Curfman McInnes if (v || idx) { 1009154123eaSLois Curfman McInnes if (nztot) { 1010154123eaSLois Curfman McInnes /* Sort by increasing column numbers, assuming A and B already sorted */ 1011299609e3SLois Curfman McInnes int imark; 101248b35521SBarry Smith if (mat->assembled) { 1013154123eaSLois Curfman McInnes for (i=0; i<nzB; i++) cworkB[i] = mat->garray[cworkB[i]]; 101448b35521SBarry Smith } 1015154123eaSLois Curfman McInnes if (v) { 101678b31e54SBarry Smith *v = (Scalar *) PETSCMALLOC( (nztot)*sizeof(Scalar) ); CHKPTRQ(*v); 101739e00950SLois Curfman McInnes for ( i=0; i<nzB; i++ ) { 1018154123eaSLois Curfman McInnes if (cworkB[i] < cstart) (*v)[i] = vworkB[i]; 1019154123eaSLois Curfman McInnes else break; 1020154123eaSLois Curfman McInnes } 1021154123eaSLois Curfman McInnes imark = i; 1022154123eaSLois Curfman McInnes for ( i=0; i<nzA; i++ ) (*v)[imark+i] = vworkA[i]; 1023299609e3SLois Curfman McInnes for ( i=imark; i<nzB; i++ ) (*v)[nzA+i] = vworkB[i]; 1024154123eaSLois Curfman McInnes } 1025154123eaSLois Curfman McInnes if (idx) { 102678b31e54SBarry Smith *idx = (int *) PETSCMALLOC( (nztot)*sizeof(int) ); CHKPTRQ(*idx); 1027154123eaSLois Curfman McInnes for (i=0; i<nzA; i++) cworkA[i] += cstart; 1028154123eaSLois Curfman McInnes for ( i=0; i<nzB; i++ ) { 1029154123eaSLois Curfman McInnes if (cworkB[i] < cstart) (*idx)[i] = cworkB[i]; 1030154123eaSLois Curfman McInnes else break; 1031154123eaSLois Curfman McInnes } 1032154123eaSLois Curfman McInnes imark = i; 1033154123eaSLois Curfman McInnes for ( i=0; i<nzA; i++ ) (*idx)[imark+i] = cworkA[i]; 1034299609e3SLois Curfman McInnes for ( i=imark; i<nzB; i++ ) (*idx)[nzA+i] = cworkB[i]; 103539e00950SLois Curfman McInnes } 103639e00950SLois Curfman McInnes } 103739e00950SLois Curfman McInnes else {*idx = 0; *v=0;} 1038154123eaSLois Curfman McInnes } 103939e00950SLois Curfman McInnes *nz = nztot; 104078b31e54SBarry Smith ierr = MatRestoreRow(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr); 104178b31e54SBarry Smith ierr = MatRestoreRow(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr); 104239e00950SLois Curfman McInnes return 0; 104339e00950SLois Curfman McInnes } 104439e00950SLois Curfman McInnes 104539e00950SLois Curfman McInnes static int MatRestoreRow_MPIAIJ(Mat mat,int row,int *nz,int **idx,Scalar **v) 104639e00950SLois Curfman McInnes { 104778b31e54SBarry Smith if (idx) PETSCFREE(*idx); 104878b31e54SBarry Smith if (v) PETSCFREE(*v); 104939e00950SLois Curfman McInnes return 0; 105039e00950SLois Curfman McInnes } 105139e00950SLois Curfman McInnes 1052855ac2c5SLois Curfman McInnes static int MatNorm_MPIAIJ(Mat mat,MatNormType type,double *norm) 1053855ac2c5SLois Curfman McInnes { 1054855ac2c5SLois Curfman McInnes Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 1055855ac2c5SLois Curfman McInnes int ierr; 1056855ac2c5SLois Curfman McInnes if (aij->numtids == 1) { 105714183eadSLois Curfman McInnes ierr = MatNorm(aij->A,type,norm); CHKERRQ(ierr); 1058855ac2c5SLois Curfman McInnes } else 1059*bbb6d6a8SBarry Smith SETERRQ(1,"MatNorm_MPIAIJ:not yet supported in parallel"); 1060855ac2c5SLois Curfman McInnes return 0; 1061855ac2c5SLois Curfman McInnes } 1062855ac2c5SLois Curfman McInnes 1063b7c46309SBarry Smith static int MatTranspose_MPIAIJ(Mat A,Mat *Bin) 1064b7c46309SBarry Smith { 1065b7c46309SBarry Smith Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 1066b7c46309SBarry Smith int ierr; 1067b7c46309SBarry Smith Mat B; 1068b7c46309SBarry Smith Mat_AIJ *Aloc; 1069b7c46309SBarry Smith int M = a->M, N = a->N,m,*ai,*aj,row,*cols,i,*ct; 1070b7c46309SBarry Smith Scalar *array; 1071b7c46309SBarry Smith 1072b7c46309SBarry Smith ierr = MatCreateMPIAIJ(A->comm,PETSC_DECIDE,PETSC_DECIDE,N,M,0,0,0,0,&B); 1073b7c46309SBarry Smith CHKERRQ(ierr); 1074b7c46309SBarry Smith 1075b7c46309SBarry Smith /* copy over the A part */ 1076b7c46309SBarry Smith Aloc = (Mat_AIJ*) a->A->data; 1077b7c46309SBarry Smith m = Aloc->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a; 1078b7c46309SBarry Smith row = a->rstart; 1079b7c46309SBarry Smith for ( i=0; i<ai[m]-1; i++ ) {aj[i] += a->cstart - 1;} 1080b7c46309SBarry Smith for ( i=0; i<m; i++ ) { 1081b7c46309SBarry Smith ierr = MatSetValues(B,ai[i+1]-ai[i],aj,1,&row,array,INSERTVALUES); 1082b7c46309SBarry Smith CHKERRQ(ierr); 1083b7c46309SBarry Smith row++; array += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 1084b7c46309SBarry Smith } 1085b7c46309SBarry Smith aj = Aloc->j; 1086b7c46309SBarry Smith for ( i=0; i<ai[m]-1; i++ ) {aj[i] -= a->cstart - 1;} 1087b7c46309SBarry Smith 1088b7c46309SBarry Smith /* copy over the B part */ 1089b7c46309SBarry Smith Aloc = (Mat_AIJ*) a->B->data; 1090b7c46309SBarry Smith m = Aloc->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a; 1091b7c46309SBarry Smith row = a->rstart; 1092b7c46309SBarry Smith ct = cols = (int *) PETSCMALLOC( (ai[m]+1)*sizeof(int) ); CHKPTRQ(cols); 1093b7c46309SBarry Smith for ( i=0; i<ai[m]-1; i++ ) {cols[i] = a->garray[aj[i]-1];} 1094b7c46309SBarry Smith for ( i=0; i<m; i++ ) { 1095b7c46309SBarry Smith ierr = MatSetValues(B,ai[i+1]-ai[i],cols,1,&row,array,INSERTVALUES); 1096b7c46309SBarry Smith CHKERRQ(ierr); 1097b7c46309SBarry Smith row++; array += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 1098b7c46309SBarry Smith } 1099b7c46309SBarry Smith PETSCFREE(ct); 1100b7c46309SBarry Smith ierr = MatAssemblyBegin(B,FINAL_ASSEMBLY); CHKERRQ(ierr); 1101b7c46309SBarry Smith ierr = MatAssemblyEnd(B,FINAL_ASSEMBLY); CHKERRQ(ierr); 1102b7c46309SBarry Smith *Bin = B; 1103b7c46309SBarry Smith return 0; 1104b7c46309SBarry Smith } 1105b7c46309SBarry Smith 1106fc76ce87SLois Curfman McInnes extern int MatConvert_MPIAIJ(Mat,MatType,Mat *); 1107ff7509f2SBarry Smith static int MatCopyPrivate_MPIAIJ(Mat,Mat *); 1108d6dfbf8fSBarry Smith 11098a729477SBarry Smith /* -------------------------------------------------------------------*/ 11102ee70a88SLois Curfman McInnes static struct _MatOps MatOps = {MatSetValues_MPIAIJ, 111139e00950SLois Curfman McInnes MatGetRow_MPIAIJ,MatRestoreRow_MPIAIJ, 111244a69424SLois Curfman McInnes MatMult_MPIAIJ,MatMultAdd_MPIAIJ, 111344a69424SLois Curfman McInnes MatMultTrans_MPIAIJ,MatMultTransAdd_MPIAIJ, 1114299609e3SLois Curfman McInnes MatSolve_MPIAIJ,MatSolveAdd_MPIAIJ, 1115299609e3SLois Curfman McInnes MatSolveTrans_MPIAIJ,MatSolveTransAdd_MPIAIJ, 1116299609e3SLois Curfman McInnes MatLUFactor_MPIAIJ,0, 111744a69424SLois Curfman McInnes MatRelax_MPIAIJ, 1118b7c46309SBarry Smith MatTranspose_MPIAIJ, 1119a66be287SLois Curfman McInnes MatGetInfo_MPIAIJ,0, 1120855ac2c5SLois Curfman McInnes MatGetDiagonal_MPIAIJ,0,MatNorm_MPIAIJ, 1121ee50ffe9SBarry Smith MatAssemblyBegin_MPIAIJ,MatAssemblyEnd_MPIAIJ, 11221eb62cbbSBarry Smith 0, 1123299609e3SLois Curfman McInnes MatSetOption_MPIAIJ,MatZeroEntries_MPIAIJ,MatZeroRows_MPIAIJ, 1124299609e3SLois Curfman McInnes MatGetReordering_MPIAIJ, 1125299609e3SLois Curfman McInnes MatLUFactorSymbolic_MPIAIJ,MatLUFactorNumeric_MPIAIJ,0,0, 1126d1710a03SLois Curfman McInnes MatGetSize_MPIAIJ,MatGetLocalSize_MPIAIJ,MatGetOwnershipRange_MPIAIJ, 1127299609e3SLois Curfman McInnes MatILUFactorSymbolic_MPIAIJ,0, 1128ff7509f2SBarry Smith 0,0,MatConvert_MPIAIJ,0,0,MatCopyPrivate_MPIAIJ}; 11298a729477SBarry Smith 11308a729477SBarry Smith /*@ 1131ff756334SLois Curfman McInnes MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format 1132ff756334SLois Curfman McInnes (the default parallel PETSc format). 11338a729477SBarry Smith 11348a729477SBarry Smith Input Parameters: 11351eb62cbbSBarry Smith . comm - MPI communicator 11367d3e4905SLois Curfman McInnes . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 11377d3e4905SLois Curfman McInnes . n - number of local columns (or PETSC_DECIDE to have calculated 11387d3e4905SLois Curfman McInnes if N is given) 11397d3e4905SLois Curfman McInnes . M - number of global rows (or PETSC_DECIDE to have calculated if m is given) 11407d3e4905SLois Curfman McInnes . N - number of global columns (or PETSC_DECIDE to have calculated 11417d3e4905SLois Curfman McInnes if n is given) 1142ab693e5aSLois Curfman McInnes . d_nz - number of nonzeros per row in diagonal portion of local submatrix 1143ff756334SLois Curfman McInnes (same for all local rows) 1144ab693e5aSLois Curfman McInnes . d_nzz - number of nonzeros per row in diagonal portion of local submatrix 1145ab693e5aSLois Curfman McInnes or null (possibly different for each row). You must leave room 1146ab693e5aSLois Curfman McInnes for the diagonal entry even if it is zero. 1147ab693e5aSLois Curfman McInnes . o_nz - number of nonzeros per row in off-diagonal portion of local 1148ab693e5aSLois Curfman McInnes submatrix (same for all local rows). 1149ab693e5aSLois Curfman McInnes . o_nzz - number of nonzeros per row in off-diagonal portion of local 1150ab693e5aSLois Curfman McInnes submatrix or null (possibly different for each row). 11518a729477SBarry Smith 1152ff756334SLois Curfman McInnes Output Parameter: 11538a729477SBarry Smith . newmat - the matrix 11548a729477SBarry Smith 1155ff756334SLois Curfman McInnes Notes: 1156ff756334SLois Curfman McInnes The AIJ format (also called the Yale sparse matrix format or 1157ff756334SLois Curfman McInnes compressed row storage), is fully compatible with standard Fortran 77 1158ff756334SLois Curfman McInnes storage. That is, the stored row and column indices begin at 1159ab693e5aSLois Curfman McInnes one, not zero. See the users manual for further details. 1160ff756334SLois Curfman McInnes 1161ff756334SLois Curfman McInnes The user MUST specify either the local or global matrix dimensions 1162ff756334SLois Curfman McInnes (possibly both). 1163ff756334SLois Curfman McInnes 1164e0245417SLois Curfman McInnes Storage Information: 1165e0245417SLois Curfman McInnes For a square global matrix we define each processor's diagonal portion 1166e0245417SLois Curfman McInnes to be its local rows and the corresponding columns (a square submatrix); 1167e0245417SLois Curfman McInnes each processor's off-diagonal portion encompasses the remainder of the 1168e0245417SLois Curfman McInnes local matrix (a rectangular submatrix). 1169e0245417SLois Curfman McInnes 1170e0245417SLois Curfman McInnes The user can specify preallocated storage for the diagonal part of 1171e0245417SLois Curfman McInnes the local submatrix with either d_nz or d_nnz (not both). Set both 1172e0245417SLois Curfman McInnes d_nz and d_nnz to zero for PETSc to control dynamic memory allocation. 1173e0245417SLois Curfman McInnes Likewise, specify preallocated storage for the off-diagonal part of 1174e0245417SLois Curfman McInnes the local submatrix with o_nz or o_nnz (not both). 1175ff756334SLois Curfman McInnes 1176dbd7a890SLois Curfman McInnes .keywords: matrix, aij, compressed row, sparse, parallel 1177ff756334SLois Curfman McInnes 1178dbd7a890SLois Curfman McInnes .seealso: MatCreate(), MatCreateSequentialAIJ(), MatSetValues() 11798a729477SBarry Smith @*/ 11801eb62cbbSBarry Smith int MatCreateMPIAIJ(MPI_Comm comm,int m,int n,int M,int N, 11811eb62cbbSBarry Smith int d_nz,int *d_nnz, int o_nz,int *o_nnz,Mat *newmat) 11828a729477SBarry Smith { 11838a729477SBarry Smith Mat mat; 118444a69424SLois Curfman McInnes Mat_MPIAIJ *aij; 11856abc6512SBarry Smith int ierr, i,sum[2],work[2]; 11868a729477SBarry Smith *newmat = 0; 118744a69424SLois Curfman McInnes PETSCHEADERCREATE(mat,_Mat,MAT_COOKIE,MATMPIAIJ,comm); 1188a5a9c739SBarry Smith PLogObjectCreate(mat); 118978b31e54SBarry Smith mat->data = (void *) (aij = PETSCNEW(Mat_MPIAIJ)); CHKPTRQ(aij); 11908a729477SBarry Smith mat->ops = &MatOps; 119144a69424SLois Curfman McInnes mat->destroy = MatDestroy_MPIAIJ; 119244a69424SLois Curfman McInnes mat->view = MatView_MPIAIJ; 11938a729477SBarry Smith mat->factor = 0; 1194d6dfbf8fSBarry Smith 119507a0e7adSLois Curfman McInnes aij->insertmode = NOTSETVALUES; 11961eb62cbbSBarry Smith MPI_Comm_rank(comm,&aij->mytid); 11971eb62cbbSBarry Smith MPI_Comm_size(comm,&aij->numtids); 11981eb62cbbSBarry Smith 1199dbd7a890SLois Curfman McInnes if (M == PETSC_DECIDE || N == PETSC_DECIDE) { 12001eb62cbbSBarry Smith work[0] = m; work[1] = n; 1201d6dfbf8fSBarry Smith MPI_Allreduce((void *) work,(void *) sum,2,MPI_INT,MPI_SUM,comm ); 1202dbd7a890SLois Curfman McInnes if (M == PETSC_DECIDE) M = sum[0]; 1203dbd7a890SLois Curfman McInnes if (N == PETSC_DECIDE) N = sum[1]; 12041eb62cbbSBarry Smith } 1205dbd7a890SLois Curfman McInnes if (m == PETSC_DECIDE) 1206dbd7a890SLois Curfman McInnes {m = M/aij->numtids + ((M % aij->numtids) > aij->mytid);} 1207dbd7a890SLois Curfman McInnes if (n == PETSC_DECIDE) 1208dbd7a890SLois Curfman McInnes {n = N/aij->numtids + ((N % aij->numtids) > aij->mytid);} 12098a729477SBarry Smith aij->m = m; 12108a729477SBarry Smith aij->n = n; 12111eb62cbbSBarry Smith aij->N = N; 12121eb62cbbSBarry Smith aij->M = M; 12131eb62cbbSBarry Smith 12141eb62cbbSBarry Smith /* build local table of row and column ownerships */ 121578b31e54SBarry Smith aij->rowners = (int *) PETSCMALLOC(2*(aij->numtids+2)*sizeof(int)); 121678b31e54SBarry Smith CHKPTRQ(aij->rowners); 12171eb62cbbSBarry Smith aij->cowners = aij->rowners + aij->numtids + 1; 12181eb62cbbSBarry Smith MPI_Allgather(&m,1,MPI_INT,aij->rowners+1,1,MPI_INT,comm); 12191eb62cbbSBarry Smith aij->rowners[0] = 0; 12201eb62cbbSBarry Smith for ( i=2; i<=aij->numtids; i++ ) { 12211eb62cbbSBarry Smith aij->rowners[i] += aij->rowners[i-1]; 12228a729477SBarry Smith } 12231eb62cbbSBarry Smith aij->rstart = aij->rowners[aij->mytid]; 12241eb62cbbSBarry Smith aij->rend = aij->rowners[aij->mytid+1]; 12251eb62cbbSBarry Smith MPI_Allgather(&n,1,MPI_INT,aij->cowners+1,1,MPI_INT,comm); 12261eb62cbbSBarry Smith aij->cowners[0] = 0; 12271eb62cbbSBarry Smith for ( i=2; i<=aij->numtids; i++ ) { 12281eb62cbbSBarry Smith aij->cowners[i] += aij->cowners[i-1]; 12298a729477SBarry Smith } 12301eb62cbbSBarry Smith aij->cstart = aij->cowners[aij->mytid]; 12311eb62cbbSBarry Smith aij->cend = aij->cowners[aij->mytid+1]; 12328a729477SBarry Smith 12338a729477SBarry Smith 12346b5873e3SBarry Smith ierr = MatCreateSequentialAIJ(MPI_COMM_SELF,m,n,d_nz,d_nnz,&aij->A); 123578b31e54SBarry Smith CHKERRQ(ierr); 1236a5a9c739SBarry Smith PLogObjectParent(mat,aij->A); 12376b5873e3SBarry Smith ierr = MatCreateSequentialAIJ(MPI_COMM_SELF,m,N,o_nz,o_nnz,&aij->B); 123878b31e54SBarry Smith CHKERRQ(ierr); 1239a5a9c739SBarry Smith PLogObjectParent(mat,aij->B); 12408a729477SBarry Smith 12411eb62cbbSBarry Smith /* build cache for off array entries formed */ 124278b31e54SBarry Smith ierr = StashBuild_Private(&aij->stash); CHKERRQ(ierr); 12439e25ed09SBarry Smith aij->colmap = 0; 12449e25ed09SBarry Smith aij->garray = 0; 12458a729477SBarry Smith 12461eb62cbbSBarry Smith /* stuff used for matrix vector multiply */ 12471eb62cbbSBarry Smith aij->lvec = 0; 12481eb62cbbSBarry Smith aij->Mvctx = 0; 1249d6dfbf8fSBarry Smith aij->assembled = 0; 12508a729477SBarry Smith 1251d6dfbf8fSBarry Smith *newmat = mat; 1252d6dfbf8fSBarry Smith return 0; 1253d6dfbf8fSBarry Smith } 1254c74985f6SBarry Smith 1255ff7509f2SBarry Smith static int MatCopyPrivate_MPIAIJ(Mat matin,Mat *newmat) 1256d6dfbf8fSBarry Smith { 1257d6dfbf8fSBarry Smith Mat mat; 125844a69424SLois Curfman McInnes Mat_MPIAIJ *aij,*oldmat = (Mat_MPIAIJ *) matin->data; 12592ee70a88SLois Curfman McInnes int ierr, len; 1260d6dfbf8fSBarry Smith *newmat = 0; 1261d6dfbf8fSBarry Smith 1262*bbb6d6a8SBarry Smith if (!oldmat->assembled) 1263*bbb6d6a8SBarry Smith SETERRQ(1,"MatCopyPrivate_MPIAIJ:Cannot copy unassembled matrix"); 126444a69424SLois Curfman McInnes PETSCHEADERCREATE(mat,_Mat,MAT_COOKIE,MATMPIAIJ,matin->comm); 1265a5a9c739SBarry Smith PLogObjectCreate(mat); 126678b31e54SBarry Smith mat->data = (void *) (aij = PETSCNEW(Mat_MPIAIJ)); CHKPTRQ(aij); 1267d6dfbf8fSBarry Smith mat->ops = &MatOps; 126844a69424SLois Curfman McInnes mat->destroy = MatDestroy_MPIAIJ; 126944a69424SLois Curfman McInnes mat->view = MatView_MPIAIJ; 1270d6dfbf8fSBarry Smith mat->factor = matin->factor; 1271d6dfbf8fSBarry Smith 1272d6dfbf8fSBarry Smith aij->m = oldmat->m; 1273d6dfbf8fSBarry Smith aij->n = oldmat->n; 1274d6dfbf8fSBarry Smith aij->M = oldmat->M; 1275d6dfbf8fSBarry Smith aij->N = oldmat->N; 1276d6dfbf8fSBarry Smith 1277d6dfbf8fSBarry Smith aij->assembled = 1; 1278d6dfbf8fSBarry Smith aij->rstart = oldmat->rstart; 1279d6dfbf8fSBarry Smith aij->rend = oldmat->rend; 1280d6dfbf8fSBarry Smith aij->cstart = oldmat->cstart; 1281d6dfbf8fSBarry Smith aij->cend = oldmat->cend; 1282d6dfbf8fSBarry Smith aij->numtids = oldmat->numtids; 1283d6dfbf8fSBarry Smith aij->mytid = oldmat->mytid; 128407a0e7adSLois Curfman McInnes aij->insertmode = NOTSETVALUES; 1285d6dfbf8fSBarry Smith 128678b31e54SBarry Smith aij->rowners = (int *) PETSCMALLOC( (aij->numtids+1)*sizeof(int) ); 128778b31e54SBarry Smith CHKPTRQ(aij->rowners); 128878b31e54SBarry Smith PETSCMEMCPY(aij->rowners,oldmat->rowners,(aij->numtids+1)*sizeof(int)); 128978b31e54SBarry Smith ierr = StashInitialize_Private(&aij->stash); CHKERRQ(ierr); 12902ee70a88SLois Curfman McInnes if (oldmat->colmap) { 129178b31e54SBarry Smith aij->colmap = (int *) PETSCMALLOC( (aij->N)*sizeof(int) ); 129278b31e54SBarry Smith CHKPTRQ(aij->colmap); 129378b31e54SBarry Smith PETSCMEMCPY(aij->colmap,oldmat->colmap,(aij->N)*sizeof(int)); 12942ee70a88SLois Curfman McInnes } else aij->colmap = 0; 12952ee70a88SLois Curfman McInnes if (oldmat->garray && (len = ((Mat_AIJ *) (oldmat->B->data))->n)) { 129678b31e54SBarry Smith aij->garray = (int *) PETSCMALLOC(len*sizeof(int) ); CHKPTRQ(aij->garray); 129778b31e54SBarry Smith PETSCMEMCPY(aij->garray,oldmat->garray,len*sizeof(int)); 12982ee70a88SLois Curfman McInnes } else aij->garray = 0; 1299d6dfbf8fSBarry Smith 130078b31e54SBarry Smith ierr = VecDuplicate(oldmat->lvec,&aij->lvec); CHKERRQ(ierr); 1301a5a9c739SBarry Smith PLogObjectParent(mat,aij->lvec); 130278b31e54SBarry Smith ierr = VecScatterCtxCopy(oldmat->Mvctx,&aij->Mvctx); CHKERRQ(ierr); 1303a5a9c739SBarry Smith PLogObjectParent(mat,aij->Mvctx); 130478b31e54SBarry Smith ierr = MatConvert(oldmat->A,MATSAME,&aij->A); CHKERRQ(ierr); 1305a5a9c739SBarry Smith PLogObjectParent(mat,aij->A); 130678b31e54SBarry Smith ierr = MatConvert(oldmat->B,MATSAME,&aij->B); CHKERRQ(ierr); 1307a5a9c739SBarry Smith PLogObjectParent(mat,aij->B); 13088a729477SBarry Smith *newmat = mat; 13098a729477SBarry Smith return 0; 13108a729477SBarry Smith } 1311